Index: lib/Target/AArch64/AArch64CallingConvention.td
===================================================================
--- lib/Target/AArch64/AArch64CallingConvention.td
+++ lib/Target/AArch64/AArch64CallingConvention.td
@@ -288,6 +288,14 @@
                                            D8,  D9,  D10, D11,
                                            D12, D13, D14, D15)>;
 
+// Win64 has unwinding codes for an (FP,LR) pair, save_fplr and save_fplr_x.
+// We put FP before LR, so that frame lowering logic generates (FP,LR) pairs,
+// and not (LR,FP) pairs.
+def CSR_Win_AArch64_AAPCS : CalleeSavedRegs<(add FP, LR, X19, X20, X21, X22,
+                                               X23, X24, X25, X26, X27, X28,
+                                               D8, D9, D10, D11,
+                                               D12, D13, D14, D15)>;
+
 // Constructors and destructors return 'this' in the iOS 64-bit C++ ABI; since
 // 'this' and the pointer return value are both passed in X0 in these cases,
 // this can be partially modelled by treating X0 as a callee-saved register;
Index: lib/Target/AArch64/AArch64FrameLowering.cpp
===================================================================
--- lib/Target/AArch64/AArch64FrameLowering.cpp
+++ lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -115,11 +115,13 @@
 #include "llvm/CodeGen/TargetInstrInfo.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/Function.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCDwarf.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -154,6 +156,31 @@
 /// displacements (see estimateRSStackSizeLimit(), isAArch64FrameOffsetLegal()).
 static const unsigned DefaultSafeSPDisplacement = 255;
 
+static bool isSEHOpcode(unsigned Opc) {
+  switch (Opc) {
+    default:
+      return false;
+    case AArch64::SEH_StackAlloc:
+    case AArch64::SEH_SaveFPLR:
+    case AArch64::SEH_SaveFPLR_X:
+    case AArch64::SEH_SaveReg:
+    case AArch64::SEH_SaveReg_X:
+    case AArch64::SEH_SaveRegP:
+    case AArch64::SEH_SaveRegP_X:
+    case AArch64::SEH_SaveFReg:
+    case AArch64::SEH_SaveFReg_X:
+    case AArch64::SEH_SaveFRegP:
+    case AArch64::SEH_SaveFRegP_X:
+    case AArch64::SEH_SetFP:
+    case AArch64::SEH_AddFP:
+    case AArch64::SEH_Nop:
+    case AArch64::SEH_PrologEnd:
+    case AArch64::SEH_EpilogStart:
+    case AArch64::SEH_EpilogEnd:
+      return true;
+  }
+}
+
 /// Look at each instruction that references stack frames and return the stack
 /// size limit beyond which some of these instructions will require a scratch
 /// register during their expansion later.
@@ -434,12 +461,160 @@
   return true;
 }
 
+// Given a load or a store instruction, generate an appropriate unwinding SEH
+// code on Windows.
+static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI,
+                                             const TargetInstrInfo &TII,
+                                             MachineInstr::MIFlag Flag) {
+  unsigned Opc = MBBI->getOpcode();
+  MachineBasicBlock *MBB = MBBI->getParent();
+  MachineFunction &MF = *MBB->getParent();
+  const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
+      MF.getSubtarget().getRegisterInfo());
+  DebugLoc DL = MBBI->getDebugLoc();
+  unsigned ImmIdx = MBBI->getNumOperands() - 1;
+  int Imm = MBBI->getOperand(ImmIdx).getImm();
+  // All offsets are scaled, so account for it.
+  Imm *= 8;
+  MachineInstrBuilder MIB;
+
+  switch (Opc) {
+  default:
+    assert(false && "No SEH Opcode for this instruction");
+  case AArch64::STPDpre:
+  case AArch64::LDPDpost: {
+    if (Opc == AArch64::LDPDpost)
+      Imm = -Imm;
+    unsigned Reg0 = MBBI->getOperand(1).getReg();
+    unsigned Reg1 = MBBI->getOperand(2).getReg();
+    MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP_X))
+              .addImm(Reg0 - AArch64::D0)
+              .addImm(Reg1 - AArch64::D0)
+              .addImm(Imm)
+              .setMIFlag(Flag);
+    break;
+  }
+  case AArch64::STPXpre:
+  case AArch64::LDPXpost: {
+    if (Opc == AArch64::LDPXpost) {
+      Imm = -Imm;
+    }
+    unsigned Reg0 = MBBI->getOperand(1).getReg();
+    unsigned Reg1 = MBBI->getOperand(2).getReg();
+    if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
+      MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR_X))
+                .addImm(Imm)
+                .setMIFlag(Flag);
+    else {
+      MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP_X))
+                .addImm(Reg0 - AArch64::X0)
+                .addImm(Reg1 - AArch64::X0)
+                .addImm(Imm)
+                .setMIFlag(Flag);
+    }
+    break;
+  }
+  case AArch64::STRDpre:
+  case AArch64::LDRDpost: {
+    if (Opc == AArch64::LDRDpost)
+      Imm = -Imm;
+    unsigned Reg = MBBI->getOperand(1).getReg() - AArch64::D0;
+    MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg_X))
+              .addImm(Reg)
+              .addImm(Imm)
+              .setMIFlag(Flag);
+    break;
+  }
+  case AArch64::STRXpre:
+  case AArch64::LDRXpost: {
+    if (Opc == AArch64::LDRXpost)
+      Imm = -Imm;
+    unsigned Reg = RegInfo->getDwarfRegNum(MBBI->getOperand(1).getReg(), true);
+    MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg_X))
+              .addImm(Reg)
+              .addImm(Imm)
+              .setMIFlag(Flag);
+    break;
+  }
+  case AArch64::STPDi:
+  case AArch64::LDPDi: {
+    unsigned Reg0 = MBBI->getOperand(0).getReg();
+    unsigned Reg1 = MBBI->getOperand(1).getReg();
+    MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP))
+              .addImm(Reg0 - AArch64::D0)
+              .addImm(Reg1 - AArch64::D0)
+              .addImm(Imm)
+              .setMIFlag(Flag);
+    break;
+  }
+  case AArch64::STPXi:
+  case AArch64::LDPXi: {
+    unsigned Reg0 = MBBI->getOperand(0).getReg();
+    unsigned Reg1 = MBBI->getOperand(1).getReg();
+    if (MBBI->getOperand(0).getReg() == AArch64::FP &&
+        MBBI->getOperand(1).getReg() == AArch64::LR)
+      MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR))
+                .addImm(Imm)
+                .setMIFlag(Flag);
+    else
+      MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP))
+                .addImm(Reg0 - AArch64::X0)
+                .addImm(Reg1 - AArch64::X0)
+                .addImm(Imm)
+                .setMIFlag(Flag);
+    break;
+  }
+  case AArch64::STRXui:
+  case AArch64::LDRXui: {
+    unsigned Reg = RegInfo->getDwarfRegNum(MBBI->getOperand(0).getReg(), true);
+    MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg))
+              .addImm(Reg)
+              .addImm(Imm)
+              .setMIFlag(Flag);
+    break;
+  }
+  case AArch64::STRDui:
+  case AArch64::LDRDui: {
+    unsigned Reg = MBBI->getOperand(0).getReg() - AArch64::D0;
+    MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg))
+              .addImm(Reg)
+              .addImm(Imm)
+              .setMIFlag(Flag);
+    break;
+  }
+  }
+  auto I = MBB->insertAfter(MBBI, MIB);
+  return I;
+}
+
+// Fix up the SEH opcode associated with the save/restore instruction.
+static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI,
+                           unsigned LocalStackSize) {
+  MachineOperand *ImmOpnd = nullptr;
+  unsigned ImmIdx = MBBI->getNumOperands()-1;
+  switch (MBBI->getOpcode()) {
+  default:
+    assert(false && "Fix the offset in the SEH instruction");
+    break;
+  case AArch64::SEH_SaveFPLR:
+  case AArch64::SEH_SaveRegP:
+  case AArch64::SEH_SaveReg:
+  case AArch64::SEH_SaveFRegP:
+  case AArch64::SEH_SaveFReg:
+    ImmOpnd = &MBBI->getOperand(ImmIdx);
+  break;
+ }
+ if (ImmOpnd)
+   ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize);
+}
+
 // Convert callee-save register save/restore instruction to do stack pointer
 // decrement/increment to allocate/deallocate the callee-save stack area by
 // converting store/load to use pre/post increment version.
 static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
-    const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc) {
+    const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc,
+    bool NeedsWinCFI, bool InProlog = true) {
   // Ignore instructions that do not operate on SP, i.e. shadow call stack
   // instructions.
   while (MBBI->getOpcode() == AArch64::STRXpost ||
@@ -483,6 +658,13 @@
     break;
   }
 
+  // Get rid of the SEH code associated with the old instruction.
+  if (NeedsWinCFI) {
+    auto SEH = std::next(MBBI);
+    if (isSEHOpcode(SEH->getOpcode()))
+      SEH->eraseFromParent();
+  }
+
   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
   MIB.addReg(AArch64::SP, RegState::Define);
 
@@ -507,15 +689,24 @@
   MIB.setMIFlags(MBBI->getFlags());
   MIB.setMemRefs(MBBI->memoperands());
 
+  // Generate a new SEH code that corresponds to the new instruction.
+  if (NeedsWinCFI)
+    InsertSEH(*MIB, *TII,
+	      InProlog ? MachineInstr::FrameSetup : MachineInstr::FrameDestroy);
+
   return std::prev(MBB.erase(MBBI));
 }
 
 // Fixup callee-save register save/restore instructions to take into account
 // combined SP bump by adding the local stack size to the stack offsets.
 static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
-                                              unsigned LocalStackSize) {
+                                              unsigned LocalStackSize,
+                                              bool NeedsWinCFI) {
   unsigned Opc = MI.getOpcode();
 
+  if (isSEHOpcode(Opc))
+    return;
+
   // Ignore instructions that do not operate on SP, i.e. shadow call stack
   // instructions.
   if (Opc == AArch64::STRXpost || Opc == AArch64::LDRXpre) {
@@ -538,6 +729,13 @@
   // All generated opcodes have scaled offsets.
   assert(LocalStackSize % 8 == 0);
   OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / 8);
+
+  if (NeedsWinCFI) {
+    auto MBBI = std::next(MachineBasicBlock::iterator(MI));
+    assert(MBBI != MI.getParent()->end() && "Expecting a valid instruction");
+    assert(isSEHOpcode(MBBI->getOpcode()) && "Expecting a SEH opcode");
+    fixupSEHOpcode(MBBI, LocalStackSize);
+  }
 }
 
 static void adaptForLdStOpt(MachineBasicBlock &MBB,
@@ -584,6 +782,9 @@
   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
   bool needsFrameMoves = MMI.hasDebugInfo() || F.needsUnwindTableEntry();
   bool HasFP = hasFP(MF);
+  bool NeedsWinCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
+                     F.needsUnwindTableEntry();
+  MF.setHasWinCFI(NeedsWinCFI);
 
   // At this point, we're going to decide whether or not the function uses a
   // redzone. In most cases, the function doesn't have a redzone so let's
@@ -599,6 +800,19 @@
         .setMIFlag(MachineInstr::FrameSetup);
   }
 
+  // The very last FrameSetup instruction indicates the end of prologue. Emit a
+  // SEH opcode indicating the prologue end.
+  if (NeedsWinCFI) {
+    MachineBasicBlock::iterator LastFrameSetupI = MBB.begin();
+    while (LastFrameSetupI != MBB.end() &&
+           LastFrameSetupI->getFlag(MachineInstr::FrameSetup))
+      ++LastFrameSetupI;
+    DebugLoc NewDL =
+      (MBB.end() == LastFrameSetupI) ? DL : LastFrameSetupI->getDebugLoc();
+    BuildMI(MBB, LastFrameSetupI, DL, TII->get(AArch64::SEH_PrologEnd))
+        .setMIFlag(MachineInstr::FrameSetup);
+  }
+
   // All calls are tail calls in GHC calling conv, and functions have no
   // prologue/epilogue.
   if (MF.getFunction().getCallingConv() == CallingConv::GHC)
@@ -607,7 +821,6 @@
   int NumBytes = (int)MFI.getStackSize();
   if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) {
     assert(!HasFP && "unexpected function without stack frame but with FP");
-
     // All of the stack allocation is for locals.
     AFI->setLocalStackSize(NumBytes);
 
@@ -620,7 +833,7 @@
       ++NumRedZoneFunctions;
     } else {
       emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
-                      MachineInstr::FrameSetup);
+                      MachineInstr::FrameSetup, false, NeedsWinCFI);
 
       // Label used to tie together the PROLOG_LABEL and the MachineMoves.
       MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
@@ -641,15 +854,15 @@
   auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
   // All of the remaining stack allocations are for locals.
   AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
-
   bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
   if (CombineSPBump) {
     emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
-                    MachineInstr::FrameSetup);
+                    MachineInstr::FrameSetup, false, NeedsWinCFI);
     NumBytes = 0;
   } else if (PrologueSaveSize != 0) {
     MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(MBB, MBBI, DL, TII,
-                                                     -PrologueSaveSize);
+                                                     -PrologueSaveSize,
+						     NeedsWinCFI);
     NumBytes -= PrologueSaveSize;
   }
   assert(NumBytes >= 0 && "Negative stack allocation size!?");
@@ -660,9 +873,11 @@
   MachineBasicBlock::iterator End = MBB.end();
   while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup)) {
     if (CombineSPBump)
-      fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize());
+      fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(),
+                                        NeedsWinCFI);
     ++MBBI;
   }
+
   if (HasFP) {
     // Only set up FP if we actually need to. Frame pointer is fp =
     // sp - fixedobject - 16.
@@ -675,7 +890,7 @@
     // Note: All stores of callee-saved registers are marked as "FrameSetup".
     // This code marks the instruction(s) that set the FP also.
     emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII,
-                    MachineInstr::FrameSetup);
+                    MachineInstr::FrameSetup, false, NeedsWinCFI);
   }
 
   if (windowsRequiresStackProbe(MF, NumBytes)) {
@@ -733,7 +948,7 @@
       // the correct value here, as NumBytes also includes padding bytes,
       // which shouldn't be counted here.
       emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII,
-                      MachineInstr::FrameSetup);
+                      MachineInstr::FrameSetup, false, NeedsWinCFI);
 
     if (NeedsRealignment) {
       const unsigned Alignment = MFI.getMaxAlignment();
@@ -756,6 +971,10 @@
           .addReg(scratchSPReg, RegState::Kill)
           .addImm(andMaskEncoded);
       AFI->setStackRealigned(true);
+      if (NeedsWinCFI)
+        BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)).
+                addImm(NumBytes & andMaskEncoded).
+	        setMIFlag(MachineInstr::FrameSetup);
     }
   }
 
@@ -769,6 +988,9 @@
   if (RegInfo->hasBasePointer(MF)) {
     TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
                      false);
+    if (NeedsWinCFI)
+      BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)).
+              setMIFlag(MachineInstr::FrameSetup);
   }
 
   if (needsFrameMoves) {
@@ -898,14 +1120,22 @@
   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   DebugLoc DL;
   bool IsTailCallReturn = false;
+  bool NeedsWinCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
+	             MF.getFunction().needsUnwindTableEntry();
+
   if (MBB.end() != MBBI) {
     DL = MBBI->getDebugLoc();
     unsigned RetOpcode = MBBI->getOpcode();
     IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi ||
       RetOpcode == AArch64::TCRETURNri;
   }
+
+  if (NeedsWinCFI)
+    BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_EpilogEnd))
+        .setMIFlag(MachineInstr::FrameDestroy);
+
   int NumBytes = MFI.getStackSize();
-  const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
 
   // All calls are tail calls in GHC calling conv, and functions have no
   // prologue/epilogue.
@@ -970,13 +1200,16 @@
 
   if (!CombineSPBump && PrologueSaveSize != 0) {
     MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());
+    while (isSEHOpcode(Pop->getOpcode()))
+      Pop = std::prev(Pop);
     // Converting the last ldp to a post-index ldp is valid only if the last
     // ldp's offset is 0.
     const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1);
     // If the offset is 0, convert it to a post-index ldp.
     if (OffsetOp.getImm() == 0) {
       convertCalleeSaveRestoreToSPPrePostIncDec(MBB, Pop, DL, TII,
-                                                PrologueSaveSize);
+                                                PrologueSaveSize, NeedsWinCFI,
+                                                false);
     } else {
       // If not, make sure to emit an add after the last ldp.
       // We're doing this by transfering the size to be restored from the
@@ -996,15 +1229,20 @@
     if (!LastPopI->getFlag(MachineInstr::FrameDestroy)) {
       ++LastPopI;
       break;
-    } else if (CombineSPBump)
-      fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize());
+    } else if (CombineSPBump && !isSEHOpcode(LastPopI->getOpcode()))
+      fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize(),
+                                        NeedsWinCFI);
   }
 
+  if (NeedsWinCFI)
+    BuildMI(MBB, LastPopI, DL, TII->get(AArch64::SEH_EpilogStart)).
+            setMIFlag(MachineInstr::FrameDestroy);
+
   // If there is a single SP update, insert it before the ret and we're done.
   if (CombineSPBump) {
     emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
                     NumBytes + AfterCSRPopSize, TII,
-                    MachineInstr::FrameDestroy);
+                    MachineInstr::FrameDestroy, false, NeedsWinCFI);
     return;
   }
 
@@ -1032,7 +1270,8 @@
       adaptForLdStOpt(MBB, MBB.getFirstTerminator(), LastPopI);
 
     emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
-                    StackRestoreBytes, TII, MachineInstr::FrameDestroy);
+                    StackRestoreBytes, TII, MachineInstr::FrameDestroy,
+                    false, NeedsWinCFI);
     if (Done)
       return;
 
@@ -1046,10 +1285,10 @@
   if (MFI.hasVarSizedObjects() || AFI->isStackRealigned())
     emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
                     -AFI->getCalleeSavedStackSize() + 16, TII,
-                    MachineInstr::FrameDestroy);
+                    MachineInstr::FrameDestroy, false, NeedsWinCFI);
   else if (NumBytes)
     emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII,
-                    MachineInstr::FrameDestroy);
+                    MachineInstr::FrameDestroy, false, NeedsWinCFI);
 
   // This must be placed after the callee-save restore code because that code
   // assumes the SP is at the same location as it was after the callee-save save
@@ -1070,7 +1309,8 @@
     adaptForLdStOpt(MBB, FirstSPPopI, LastPopI);
 
     emitFrameOffset(MBB, FirstSPPopI, DL, AArch64::SP, AArch64::SP,
-                    AfterCSRPopSize, TII, MachineInstr::FrameDestroy);
+                    AfterCSRPopSize, TII, MachineInstr::FrameDestroy,
+                    false, NeedsWinCFI);
   }
 }
 
@@ -1196,6 +1436,20 @@
            Attrs.hasAttrSomewhere(Attribute::SwiftError));
 }
 
+static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
+                                          bool NeedsWinCFI) {
+  // If we are generating register pairs for a Windows function that requires
+  // EH support, then pair consecutive registers only.  There are no unwind
+  // opcodes for saves/restores of non-consectuve register pairs.
+  // The unwind opcodes are save_regp, save_regp_x, save_fregp, save_frepg_x.
+  // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
+  if (!NeedsWinCFI)
+    return false;
+  if (Reg2 == Reg1 + 1)
+    return false;
+  return true;
+}
+
 namespace {
 
 struct RegPairInfo {
@@ -1220,6 +1474,8 @@
   if (CSI.empty())
     return;
 
+  bool NeedsWinCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
+                     MF.getFunction().needsUnwindTableEntry();
   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
   MachineFrameInfo &MFI = MF.getFrameInfo();
   CallingConv::ID CC = MF.getFunction().getCallingConv();
@@ -1233,6 +1489,11 @@
          "Odd number of callee-saved regs to spill!");
   int Offset = AFI->getCalleeSavedStackSize();
 
+  // On Win64, we don't necessarily pair all registers or all registers except
+  // possibly one because the unwind opcodes for stack unwinding require that
+  // only consecutive registers are paired.  This flag makes sure that the
+  // padding below is done only once.
+  bool FixupDone = false;
   for (unsigned i = 0; i < Count; ++i) {
     RegPairInfo RPI;
     RPI.Reg1 = CSI[i].getReg();
@@ -1241,11 +1502,11 @@
            AArch64::FPR64RegClass.contains(RPI.Reg1));
     RPI.IsGPR = AArch64::GPR64RegClass.contains(RPI.Reg1);
 
-    // Add the next reg to the pair if it is in the same register class.
     if (i + 1 < Count) {
       unsigned NextReg = CSI[i + 1].getReg();
-      if ((RPI.IsGPR && AArch64::GPR64RegClass.contains(NextReg)) ||
-          (!RPI.IsGPR && AArch64::FPR64RegClass.contains(NextReg)))
+      if (((RPI.IsGPR && AArch64::GPR64RegClass.contains(NextReg)) ||
+          (!RPI.IsGPR && AArch64::FPR64RegClass.contains(NextReg))) &&
+          !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI))
         RPI.Reg2 = NextReg;
     }
 
@@ -1279,9 +1540,11 @@
 
     RPI.FrameIdx = CSI[i].getFrameIdx();
 
-    if (Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired()) {
+    if (Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired() &&
+        !FixupDone) {
       // Round up size of non-pair to pair size if we need to pad the
       // callee-save area to ensure 16-byte alignment.
+      FixupDone = NeedsWinCFI;
       Offset -= 16;
       assert(MFI.getObjectAlignment(RPI.FrameIdx) <= 16);
       MFI.setObjectAlignment(RPI.FrameIdx, 16);
@@ -1305,6 +1568,8 @@
     const TargetRegisterInfo *TRI) const {
   MachineFunction &MF = *MBB.getParent();
   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+  bool NeedsWinCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
+                     MF.getFunction().needsUnwindTableEntry();
   DebugLoc DL;
   SmallVector<RegPairInfo, 8> RegPairs;
 
@@ -1353,9 +1618,21 @@
                if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1;
                dbgs() << ")\n");
 
+    assert((!NeedsWinCFI || !(Reg1 == AArch64::LR && Reg2 == AArch64::FP)) &&
+           "Windows unwdinding requires a consecutive (FP,LR) pair");
+
     MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
     if (!MRI.isReserved(Reg1))
       MBB.addLiveIn(Reg1);
+
+    // Windows unwinding codes require that gprs be consecutive if they are paired.
+    if (NeedsWinCFI) {
+      MIB.addReg(Reg1, getPrologueDeath(MF, Reg1));
+      MIB.addMemOperand(MF.getMachineMemOperand(
+        MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx),
+        MachineMemOperand::MOStore, 8, 8));
+    }
+
     if (RPI.isPaired()) {
       if (!MRI.isReserved(Reg2))
         MBB.addLiveIn(Reg2);
@@ -1364,13 +1641,21 @@
           MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1),
           MachineMemOperand::MOStore, 8, 8));
     }
-    MIB.addReg(Reg1, getPrologueDeath(MF, Reg1))
-        .addReg(AArch64::SP)
-        .addImm(RPI.Offset) // [sp, #offset*8], where factor*8 is implicit
-        .setMIFlag(MachineInstr::FrameSetup);
-    MIB.addMemOperand(MF.getMachineMemOperand(
-        MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx),
-        MachineMemOperand::MOStore, 8, 8));
+
+    if (NeedsWinCFI) {
+      MIB.addReg(AArch64::SP)
+         .addImm(RPI.Offset) // [sp, #offset*8], where factor*8 is implicit
+         .setMIFlag(MachineInstr::FrameSetup);
+      InsertSEH(MIB, TII, MachineInstr::FrameSetup);
+    } else {
+      MIB.addReg(Reg1, getPrologueDeath(MF, Reg1))
+          .addReg(AArch64::SP)
+          .addImm(RPI.Offset) // [sp, #offset*8], where factor*8 is implicit
+          .setMIFlag(MachineInstr::FrameSetup);
+      MIB.addMemOperand(MF.getMachineMemOperand(
+          MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx),
+          MachineMemOperand::MOStore, 8, 8));
+    }
   }
   return true;
 }
@@ -1383,6 +1668,8 @@
   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
   DebugLoc DL;
   SmallVector<RegPairInfo, 8> RegPairs;
+  bool NeedsWinCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
+	             MF.getFunction().needsUnwindTableEntry();
 
   if (MI != MBB.end())
     DL = MI->getDebugLoc();
@@ -1415,19 +1702,36 @@
                dbgs() << ")\n");
 
     MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc));
+
+    if (NeedsWinCFI) {
+      MIB.addReg(Reg1, getDefRegState(true));
+      MIB.addMemOperand(MF.getMachineMemOperand(
+          MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx),
+          MachineMemOperand::MOLoad, 8, 8));
+    }
+
     if (RPI.isPaired()) {
       MIB.addReg(Reg2, getDefRegState(true));
       MIB.addMemOperand(MF.getMachineMemOperand(
           MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1),
           MachineMemOperand::MOLoad, 8, 8));
     }
-    MIB.addReg(Reg1, getDefRegState(true))
-        .addReg(AArch64::SP)
-        .addImm(RPI.Offset) // [sp, #offset*8] where the factor*8 is implicit
-        .setMIFlag(MachineInstr::FrameDestroy);
-    MIB.addMemOperand(MF.getMachineMemOperand(
-        MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx),
-        MachineMemOperand::MOLoad, 8, 8));
+
+
+    if (NeedsWinCFI) {
+      MIB.addReg(AArch64::SP)
+         .addImm(RPI.Offset) // [sp, #offset*8] where the factor*8 is implicit
+         .setMIFlag(MachineInstr::FrameDestroy);
+      InsertSEH(MIB, TII, MachineInstr::FrameDestroy);
+    } else {
+      MIB.addReg(Reg1, getDefRegState(true))
+         .addReg(AArch64::SP)
+         .addImm(RPI.Offset) // [sp, #offset*8] where the factor*8 is implicit
+         .setMIFlag(MachineInstr::FrameDestroy);
+      MIB.addMemOperand(MF.getMachineMemOperand(
+          MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx),
+          MachineMemOperand::MOLoad, 8, 8));
+    }
   };
 
   if (ReverseCSRRestoreSeq)
Index: lib/Target/AArch64/AArch64InstrInfo.h
===================================================================
--- lib/Target/AArch64/AArch64InstrInfo.h
+++ lib/Target/AArch64/AArch64InstrInfo.h
@@ -286,7 +286,7 @@
                      const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
                      int Offset, const TargetInstrInfo *TII,
                      MachineInstr::MIFlag = MachineInstr::NoFlags,
-                     bool SetNZCV = false);
+                     bool SetNZCV = false, bool NeedsWinCFI = false);
 
 /// rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the
 /// FP. Return false if the offset could not be handled directly in MI, and
Index: lib/Target/AArch64/AArch64InstrInfo.cpp
===================================================================
--- lib/Target/AArch64/AArch64InstrInfo.cpp
+++ lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -2952,7 +2952,8 @@
                            MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
                            unsigned DestReg, unsigned SrcReg, int Offset,
                            const TargetInstrInfo *TII,
-                           MachineInstr::MIFlag Flag, bool SetNZCV) {
+                           MachineInstr::MIFlag Flag, bool SetNZCV,
+                           bool NeedsWinCFI) {
   if (DestReg == SrcReg && Offset == 0)
     return;
 
@@ -3007,6 +3008,21 @@
       .addImm(Offset)
       .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
       .setMIFlag(Flag);
+
+  if (NeedsWinCFI) {
+    if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
+        (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
+      if (Offset == 0)
+        BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).
+                setMIFlag(Flag);
+      else
+        BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP)).
+                addImm(Offset).setMIFlag(Flag);
+    } else if (DestReg == AArch64::SP) {
+      BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)).
+              addImm(Offset).setMIFlag(Flag);
+    }
+  }
 }
 
 MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
Index: lib/Target/AArch64/AArch64RegisterInfo.cpp
===================================================================
--- lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -42,6 +42,8 @@
 const MCPhysReg *
 AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   assert(MF && "Invalid MachineFunction pointer.");
+  if (MF->getSubtarget<AArch64Subtarget>().isTargetWindows())
+    return CSR_Win_AArch64_AAPCS_SaveList;
   if (MF->getFunction().getCallingConv() == CallingConv::GHC)
     // GHC set of callee saved regs is empty as all those regs are
     // used for passing STG regs around
Index: test/CodeGen/AArch64/win64_vararg.ll
===================================================================
--- test/CodeGen/AArch64/win64_vararg.ll
+++ test/CodeGen/AArch64/win64_vararg.ll
@@ -104,7 +104,7 @@
 
 ; CHECK-LABEL: fp
 ; CHECK: str     x21, [sp, #-96]!
-; CHECK: stp     x20, x19, [sp, #16]
+; CHECK: stp     x19, x20, [sp, #16]
 ; CHECK: stp     x29, x30, [sp, #32]
 ; CHECK: add     x29, sp, #32
 ; CHECK: add     x8, x29, #24
@@ -125,7 +125,7 @@
 ; CHECK: mov     x4, xzr
 ; CHECK: bl      __stdio_common_vsprintf
 ; CHECK: ldp     x29, x30, [sp, #32]
-; CHECK: ldp     x20, x19, [sp, #16]
+; CHECK: ldp     x19, x20, [sp, #16]
 ; CHECK: cmp     w0, #0
 ; CHECK: csinv   w0, w0, wzr, ge
 ; CHECK: ldr     x21, [sp], #96
@@ -151,8 +151,8 @@
 
 ; CHECK-LABEL: vla
 ; CHECK: str     x23, [sp, #-112]!
-; CHECK: stp     x22, x21, [sp, #16]
-; CHECK: stp     x20, x19, [sp, #32]
+; CHECK: stp     x21, x22, [sp, #16]
+; CHECK: stp     x19, x20, [sp, #32]
 ; CHECK: stp     x29, x30, [sp, #48]
 ; CHECK: add     x29, sp, #48
 ; CHECK: add     x8, x29, #16
@@ -183,8 +183,8 @@
 ; CHECK: mov     sp, [[REG2]]
 ; CHECK: sub     sp, x29, #48
 ; CHECK: ldp     x29, x30, [sp, #48]
-; CHECK: ldp     x20, x19, [sp, #32]
-; CHECK: ldp     x22, x21, [sp, #16]
+; CHECK: ldp     x19, x20, [sp, #32]
+; CHECK: ldp     x21, x22, [sp, #16]
 ; CHECK: ldr     x23, [sp], #112
 ; CHECK: ret
 define void @vla(i32, i8*, ...) local_unnamed_addr {
@@ -212,8 +212,9 @@
 
 ; CHECK-LABEL: snprintf
 ; CHECK: sub     sp,  sp, #96
-; CHECK: stp     x21, x20, [sp, #16]
-; CHECK: stp     x19, x30, [sp, #32]
+; CHECK: str     x21, [sp, #16]
+; CHECK: stp     x19, x20, [sp, #24]
+; CHECK: str     x30, [sp, #40]
 ; CHECK: add     x8, sp, #56
 ; CHECK: mov     x19, x2
 ; CHECK: mov     x20, x1
@@ -231,8 +232,9 @@
 ; CHECK: mov     x3, x19
 ; CHECK: mov     x4, xzr
 ; CHECK: bl      __stdio_common_vsprintf
-; CHECK: ldp     x19, x30, [sp, #32]
-; CHECK: ldp     x21, x20, [sp, #16]
+; CHECK: ldr     x30, [sp, #40]
+; CHECK: ldp     x19, x20, [sp, #24]
+; CHECK: ldr     x21, [sp, #16]
 ; CHECK: cmp     w0, #0
 ; CHECK: csinv   w0, w0, wzr, ge
 ; CHECK: add     sp, sp, #96
Index: test/CodeGen/AArch64/wineh-pei.mir
===================================================================
--- /dev/null
+++ test/CodeGen/AArch64/wineh-pei.mir
@@ -0,0 +1,75 @@
+# RUN: llc -o - %s -mtriple=aarch64-windows -start-before=prologepilog -stop-after=prologepilog | FileCheck %s
+
+# Test Win64 unwind opcodes generated by the PEI.
+
+# CHECK: frame-setup SEH_SaveRegP_X 27, 28, -80
+# CHECK: frame-setup SEH_SaveRegP 25, 26, 16
+# CHECK: frame-setup SEH_SaveRegP 23, 24, 32
+# CHECK: frame-setup SEH_SaveRegP 21, 22, 48
+# CHECK: frame-setup SEH_SaveRegP 19, 20, 64
+# CHECK: frame-setup SEH_PrologEnd
+# CHECK: frame-destroy SEH_EpilogStart
+# CHECK: frame-destroy SEH_SaveRegP 19, 20, 64
+# CHECK: frame-destroy SEH_SaveRegP 21, 22, 48
+# CHECK: frame-destroy SEH_SaveRegP 23, 24, 32
+# CHECK: frame-destroy SEH_SaveRegP 25, 26, 16
+# CHECK: frame-destroy SEH_SaveRegP_X 27, 28, -80
+# CHECK: frame-destroy SEH_EpilogEnd
+---
+name:            test
+alignment:       2
+tracksRegLiveness: true
+liveins:
+  - { reg: '$w0' }
+frameInfo:
+  maxCallFrameSize: 0
+  hasOpaqueSPAdjustment: true
+body:             |
+  bb.0.entry:
+    liveins: $x0, $x1
+
+    $x19 = ADDXrr $x0, killed $x1
+    $x20 = ADDXrr $x19, killed $x0
+    $x21 = ADDXrr $x20, killed $x19
+    $x22 = ADDXrr $x21, killed $x20
+    $x23 = ADDXrr $x22, killed $x21
+    $x24 = ADDXrr $x23, killed $x22
+    $x25 = ADDXrr $x24, killed $x23
+    $x26 = ADDXrr $x25, killed $x24
+    $x27 = ADDXrr $x26, killed $x25
+    $x28 = ADDXrr $x27, killed $x26
+    $x0 = COPY $x28
+    RET_ReallyLR implicit $x0
+...
+
+# CHECK: frame-setup SEH_SaveRegP_X 27, 28, -64
+# CHECK: frame-setup SEH_SaveRegP 25, 26, 16
+# CHECK: frame-setup SEH_SaveReg 23, 32
+# CHECK: frame-setup SEH_SaveRegP 21, 22, 40
+# CHECK: frame-setup SEH_SaveReg 19, 56
+# CHECK: frame-setup SEH_PrologEnd
+---
+name:            test2
+alignment:       2
+tracksRegLiveness: true
+liveins:
+  - { reg: '$w0' }
+frameInfo:
+  maxCallFrameSize: 0
+  hasOpaqueSPAdjustment: true
+body:             |
+  bb.0.entry:
+    liveins: $x0, $x1
+
+    $x19 = ADDXrr $x0, killed $x1
+    $x21 = ADDXrr $x19, $x19
+    $x22 = ADDXrr $x21, killed $x19
+    $x23 = ADDXrr $x22, killed $x21
+    $x25 = ADDXrr $x23, $x23
+    $x26 = ADDXrr $x25, killed $x23
+    $x27 = ADDXrr $x26, killed $x25
+    $x28 = ADDXrr $x27, killed $x26
+    $x0 = COPY $x28
+    RET_ReallyLR implicit $x0
+...
+