Index: lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
===================================================================
--- lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -103,22 +103,6 @@
   }
 }
 
-/// Helper function which extracts the specified 16-bit chunk from a
-/// 64-bit value.
-static uint64_t getChunk(uint64_t Imm, unsigned ChunkIdx) {
-  assert(ChunkIdx < 4 && "Out of range chunk index specified!");
-
-  return (Imm >> (ChunkIdx * 16)) & 0xFFFF;
-}
-
-/// Check whether the given 16-bit chunk replicated to full 64-bit width
-/// can be materialized with an ORR instruction.
-static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding) {
-  Chunk = (Chunk << 48) | (Chunk << 32) | (Chunk << 16) | Chunk;
-
-  return AArch64_AM::processLogicalImmediate(Chunk, 64, Encoding);
-}
-
 /// Check for identical 16-bit chunks within the constant and if so
 /// materialize them with a single ORR instruction. The remaining one or two
 /// 16-bit chunks will be materialized with MOVK instructions.
@@ -130,121 +114,70 @@
                                  MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator &MBBI,
                                  const AArch64InstrInfo *TII) {
-  using CountMap = DenseMap<uint64_t, unsigned>;
-
-  CountMap Counts;
-
-  // Scan the constant and count how often every chunk occurs.
-  for (unsigned Idx = 0; Idx < 4; ++Idx)
-    ++Counts[getChunk(UImm, Idx)];
-
-  // Traverse the chunks to find one which occurs more than once.
-  for (CountMap::const_iterator Chunk = Counts.begin(), End = Counts.end();
-       Chunk != End; ++Chunk) {
-    const uint64_t ChunkVal = Chunk->first;
-    const unsigned Count = Chunk->second;
-
-    uint64_t Encoding = 0;
-
-    // We are looking for chunks which have two or three instances and can be
-    // materialized with an ORR instruction.
-    if ((Count != 2 && Count != 3) || !canUseOrr(ChunkVal, Encoding))
-      continue;
-
-    const bool CountThree = Count == 3;
-    // Create the ORR-immediate instruction.
-    MachineInstrBuilder MIB =
-        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
-            .add(MI.getOperand(0))
-            .addReg(AArch64::XZR)
-            .addImm(Encoding);
-
-    const unsigned DstReg = MI.getOperand(0).getReg();
-    const bool DstIsDead = MI.getOperand(0).isDead();
-
-    unsigned ShiftAmt = 0;
-    uint64_t Imm16 = 0;
-    // Find the first chunk not materialized with the ORR instruction.
-    for (; ShiftAmt < 64; ShiftAmt += 16) {
-      Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
-
-      if (Imm16 != ChunkVal)
-        break;
-    }
+  AArch64_AM::ReplicableChunk RC = AArch64_AM::replicableWithChunks(UImm);
+  if (!RC.hasValue())
+    return false;
+  const unsigned Count = std::get<0>(RC.getValue());
+  const uint64_t ChunkVal = std::get<1>(RC.getValue());
+  const uint64_t Encoding = std::get<2>(RC.getValue());
 
-    // Create the first MOVK instruction.
-    MachineInstrBuilder MIB1 =
-        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
-            .addReg(DstReg,
-                    RegState::Define | getDeadRegState(DstIsDead && CountThree))
-            .addReg(DstReg)
-            .addImm(Imm16)
-            .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt));
+  const bool CountThree = Count == 3;
+  // Create the ORR-immediate instruction.
+  MachineInstrBuilder MIB =
+      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
+          .add(MI.getOperand(0))
+          .addReg(AArch64::XZR)
+          .addImm(Encoding);
 
-    // In case we have three instances the whole constant is now materialized
-    // and we can exit.
-    if (CountThree) {
-      transferImpOps(MI, MIB, MIB1);
-      MI.eraseFromParent();
-      return true;
-    }
+  const unsigned DstReg = MI.getOperand(0).getReg();
+  const bool DstIsDead = MI.getOperand(0).isDead();
 
-    // Find the remaining chunk which needs to be materialized.
-    for (ShiftAmt += 16; ShiftAmt < 64; ShiftAmt += 16) {
-      Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
+  unsigned ShiftAmt = 0;
+  uint64_t Imm16 = 0;
+  // Find the first chunk not materialized with the ORR instruction.
+  for (; ShiftAmt < 64; ShiftAmt += 16) {
+    Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
 
-      if (Imm16 != ChunkVal)
-        break;
-    }
+    if (Imm16 != ChunkVal)
+      break;
+  }
 
-    // Create the second MOVK instruction.
-    MachineInstrBuilder MIB2 =
-        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
-            .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
-            .addReg(DstReg)
-            .addImm(Imm16)
-            .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt));
+  // Create the first MOVK instruction.
+  MachineInstrBuilder MIB1 =
+      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
+          .addReg(DstReg,
+                  RegState::Define | getDeadRegState(DstIsDead && CountThree))
+          .addReg(DstReg)
+          .addImm(Imm16)
+          .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt));
 
-    transferImpOps(MI, MIB, MIB2);
+  // In case we have three instances the whole constant is now materialized
+  // and we can exit.
+  if (CountThree) {
+    transferImpOps(MI, MIB, MIB1);
     MI.eraseFromParent();
     return true;
   }
 
-  return false;
-}
+  // Find the remaining chunk which needs to be materialized.
+  for (ShiftAmt += 16; ShiftAmt < 64; ShiftAmt += 16) {
+    Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
 
-/// Check whether this chunk matches the pattern '1...0...'. This pattern
-/// starts a contiguous sequence of ones if we look at the bits from the LSB
-/// towards the MSB.
-static bool isStartChunk(uint64_t Chunk) {
-  if (Chunk == 0 || Chunk == std::numeric_limits<uint64_t>::max())
-    return false;
-
-  return isMask_64(~Chunk);
-}
-
-/// Check whether this chunk matches the pattern '0...1...' This pattern
-/// ends a contiguous sequence of ones if we look at the bits from the LSB
-/// towards the MSB.
-static bool isEndChunk(uint64_t Chunk) {
-  if (Chunk == 0 || Chunk == std::numeric_limits<uint64_t>::max())
-    return false;
-
-  return isMask_64(Chunk);
-}
-
-/// Clear or set all bits in the chunk at the given index.
-static uint64_t updateImm(uint64_t Imm, unsigned Idx, bool Clear) {
-  const uint64_t Mask = 0xFFFF;
+    if (Imm16 != ChunkVal)
+      break;
+  }
 
-  if (Clear)
-    // Clear chunk in the immediate.
-    Imm &= ~(Mask << (Idx * 16));
-  else
-    // Set all bits in the immediate for the particular chunk.
-    Imm |= Mask << (Idx * 16);
+  // Create the second MOVK instruction.
+  MachineInstrBuilder MIB2 =
+      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
+          .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+          .addReg(DstReg)
+          .addImm(Imm16)
+          .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt));
 
-  return Imm;
+  transferImpOps(MI, MIB, MIB2);
+  MI.eraseFromParent();
+  return true;
 }
 
 /// Check whether the constant contains a sequence of contiguous ones,
@@ -264,73 +197,13 @@
                               MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator &MBBI,
                               const AArch64InstrInfo *TII) {
-  const int NotSet = -1;
-  const uint64_t Mask = 0xFFFF;
-
-  int StartIdx = NotSet;
-  int EndIdx = NotSet;
-  // Try to find the chunks which start/end a contiguous sequence of ones.
-  for (int Idx = 0; Idx < 4; ++Idx) {
-    int64_t Chunk = getChunk(UImm, Idx);
-    // Sign extend the 16-bit chunk to 64-bit.
-    Chunk = (Chunk << 48) >> 48;
-
-    if (isStartChunk(Chunk))
-      StartIdx = Idx;
-    else if (isEndChunk(Chunk))
-      EndIdx = Idx;
-  }
-
-  // Early exit in case we can't find a start/end chunk.
-  if (StartIdx == NotSet || EndIdx == NotSet)
+  AArch64_AM::SequenceOneIdx SOI = AArch64_AM::sequenceOfOnes(UImm);
+  if (!SOI.hasValue())
     return false;
-
-  // Outside of the contiguous sequence of ones everything needs to be zero.
-  uint64_t Outside = 0;
-  // Chunks between the start and end chunk need to have all their bits set.
-  uint64_t Inside = Mask;
-
-  // If our contiguous sequence of ones wraps around from the MSB into the LSB,
-  // just swap indices and pretend we are materializing a contiguous sequence
-  // of zeros surrounded by a contiguous sequence of ones.
-  if (StartIdx > EndIdx) {
-    std::swap(StartIdx, EndIdx);
-    std::swap(Outside, Inside);
-  }
-
-  uint64_t OrrImm = UImm;
-  int FirstMovkIdx = NotSet;
-  int SecondMovkIdx = NotSet;
-
-  // Find out which chunks we need to patch up to obtain a contiguous sequence
-  // of ones.
-  for (int Idx = 0; Idx < 4; ++Idx) {
-    const uint64_t Chunk = getChunk(UImm, Idx);
-
-    // Check whether we are looking at a chunk which is not part of the
-    // contiguous sequence of ones.
-    if ((Idx < StartIdx || EndIdx < Idx) && Chunk != Outside) {
-      OrrImm = updateImm(OrrImm, Idx, Outside == 0);
-
-      // Remember the index we need to patch.
-      if (FirstMovkIdx == NotSet)
-        FirstMovkIdx = Idx;
-      else
-        SecondMovkIdx = Idx;
-
-      // Check whether we are looking a chunk which is part of the contiguous
-      // sequence of ones.
-    } else if (Idx > StartIdx && Idx < EndIdx && Chunk != Inside) {
-      OrrImm = updateImm(OrrImm, Idx, Inside != Mask);
-
-      // Remember the index we need to patch.
-      if (FirstMovkIdx == NotSet)
-        FirstMovkIdx = Idx;
-      else
-        SecondMovkIdx = Idx;
-    }
-  }
-  assert(FirstMovkIdx != NotSet && "Constant materializable with single ORR!");
+  const uint64_t OrrImm = std::get<0>(SOI.getValue());
+  const int FirstMovkIdx = std::get<1>(SOI.getValue());
+  const int SecondMovkIdx = std::get<2>(SOI.getValue());
+  const int NotSet = -1;
 
   // Create the ORR-immediate instruction.
   uint64_t Encoding = 0;
@@ -351,7 +224,7 @@
           .addReg(DstReg,
                   RegState::Define | getDeadRegState(DstIsDead && SingleMovk))
           .addReg(DstReg)
-          .addImm(getChunk(UImm, FirstMovkIdx))
+          .addImm(AArch64_AM::getChunk(UImm, FirstMovkIdx))
           .addImm(
               AArch64_AM::getShifterImm(AArch64_AM::LSL, FirstMovkIdx * 16));
 
@@ -367,7 +240,7 @@
       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
           .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
           .addReg(DstReg)
-          .addImm(getChunk(UImm, SecondMovkIdx))
+          .addImm(AArch64_AM::getChunk(UImm, SecondMovkIdx))
           .addImm(
               AArch64_AM::getShifterImm(AArch64_AM::LSL, SecondMovkIdx * 16));
 
@@ -459,7 +332,7 @@
               .addImm(Encoding);
 
       // Create the MOVK instruction.
-      const unsigned Imm16 = getChunk(UImm, Shift / 16);
+      const unsigned Imm16 = AArch64_AM::getChunk(UImm, Shift / 16);
       const unsigned DstReg = MI.getOperand(0).getReg();
       const bool DstIsDead = MI.getOperand(0).isDead();
       MachineInstrBuilder MIB1 =
Index: lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- lib/Target/AArch64/AArch64ISelLowering.cpp
+++ lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -5394,15 +5394,25 @@
     IsLegal = AArch64_AM::getFP32Imm(ImmInt) != -1 || Imm.isPosZero();
   else if (VT == MVT::f16 && Subtarget->hasFullFP16())
     IsLegal = AArch64_AM::getFP16Imm(ImmInt) != -1 || Imm.isPosZero();
-  // TODO: fmov h0, w0 is also legal, however on't have an isel pattern to
+  // TODO: fmov h0, w0 is also legal, however we don't have an isel pattern to
   //       generate that fmov.
 
   // If we can not materialize in immediate field for fmov, check if the
   // value can be encoded as the immediate operand of a logical instruction.
   // The immediate value will be created with either MOVZ, MOVN, or ORR.
-  if (!IsLegal && (VT == MVT::f64 || VT == MVT::f32))
-    IsLegal = AArch64_AM::isAnyMOVWMovAlias(ImmInt.getZExtValue(),
-                                            VT.getSizeInBits());
+  // The cost is actually exactly the same for mov+fmov vs. adrp+ldr; however
+  // the mov+fmov sequence is always better because of the reduced cache
+  // pressure. The timings are still the same if you consider movw+movk+fmov
+  // vs. adrp+ldr (it's one instruction longer, but the movw+movk is fused).
+  // So we limit up to 2 instrdduction at most.
+  if (!IsLegal && (VT == MVT::f64 || VT == MVT::f32)) {
+    // For f64 is not obviously worthwhile to emit up a five-instruction
+    // sequence vs. a two instruction constant-pool load.  So we limit to
+    // at maximum of 2 moves to match and adrl+ldr cost.
+    int NumInst = AArch64_AM::getExpandImmCost(ImmInt.getZExtValue(),
+                                               VT.getSizeInBits());
+    IsLegal = NumInst <= forCodeSize ? 1 : 2;
+  }
 
   LLVM_DEBUG(dbgs() << (IsLegal ? "Legal " : "Illegal ") << VT.getEVTString()
                     << " imm value: "; Imm.dump(););
Index: lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
===================================================================
--- lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
+++ lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
@@ -16,8 +16,10 @@
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/bit.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include <tuple>
 #include <cassert>
 
 namespace llvm {
@@ -841,6 +843,245 @@
   return isAnyMOVZMovAlias(Value, RegWidth);
 }
 
+/// Helper function which extracts the specified 16-bit chunk from a
+/// 64-bit value.
+inline static uint64_t getChunk(uint64_t Imm, unsigned ChunkIdx) {
+  assert(ChunkIdx < 4 && "Out of range chunk index specified!");
+
+  return (Imm >> (ChunkIdx * 16)) & 0xFFFF;
+}
+
+/// Check whether the given 16-bit chunk replicated to full 64-bit width
+/// can be materialized with an ORR instruction.
+inline static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding) {
+  Chunk = (Chunk << 48) | (Chunk << 32) | (Chunk << 16) | Chunk;
+
+  return AArch64_AM::processLogicalImmediate(Chunk, 64, Encoding);
+}
+
+/// Check whether this chunk matches the pattern '1...0...'. This pattern
+/// starts a contiguous sequence of ones if we look at the bits from the LSB
+/// towards the MSB.
+inline static bool isStartChunk(uint64_t Chunk) {
+  if (Chunk == 0 || Chunk == std::numeric_limits<uint64_t>::max())
+    return false;
+
+  return isMask_64(~Chunk);
+}
+
+/// Check whether this chunk matches the pattern '0...1...' This pattern
+/// ends a contiguous sequence of ones if we look at the bits from the LSB
+/// towards the MSB.
+inline static bool isEndChunk(uint64_t Chunk) {
+  if (Chunk == 0 || Chunk == std::numeric_limits<uint64_t>::max())
+    return false;
+
+  return isMask_64(Chunk);
+}
+
+typedef Optional<std::tuple<unsigned, uint64_t, uint64_t>> ReplicableChunk;
+
+// Check for identical 16-bit chunks within the constant that can be
+// materialized with a single ORR instruction plust MOVK for the remaining
+// chunks.
+inline static ReplicableChunk replicableWithChunks(uint64_t UImm) {
+  using CountMap = DenseMap<uint64_t, unsigned>;
+
+  CountMap Counts;
+
+  // Scan the constant and count how often every chunk occurs.
+  for (unsigned Idx = 0; Idx < 4; ++Idx)
+    ++Counts[getChunk(UImm, Idx)];
+
+  for (CountMap::const_iterator Chunk = Counts.begin(), End = Counts.end();
+       Chunk != End; ++Chunk) {
+    const uint64_t ChunkVal = Chunk->first;
+    const unsigned Count = Chunk->second;
+
+    uint64_t Encoding = 0;
+
+    // We are looking for chunks which have two or three instances and can be
+    // materialized with an ORR instruction.
+    if ((Count != 2 && Count != 3) || !canUseOrr(ChunkVal, Encoding))
+      continue;
+
+    return std::make_tuple(Count, ChunkVal, Encoding);
+  }
+
+  return ReplicableChunk();
+}
+
+inline static bool isReplicableWithChunks(uint64_t UImm, unsigned &Count) {
+  AArch64_AM::ReplicableChunk RC = AArch64_AM::replicableWithChunks(UImm);
+  if (!RC.hasValue())
+    return false;
+  // In case we have three instances the whole constant is now materialized
+  // and we can exit.
+  Count = std::get<0>(RC.getValue()) == 3 ? 2 : 3;
+  return true;
+}
+
+
+typedef Optional<std::tuple<uint64_t, int, int>> SequenceOneIdx;
+
+/// Clear or set all bits in the chunk at the given index.
+static inline uint64_t updateImm(uint64_t Imm, unsigned Idx, bool Clear) {
+  const uint64_t Mask = 0xFFFF;
+
+  if (Clear)
+    // Clear chunk in the immediate.
+    Imm &= ~(Mask << (Idx * 16));
+  else
+    // Set all bits in the immediate for the particular chunk.
+    Imm |= Mask << (Idx * 16);
+
+  return Imm;
+}
+
+// Check if the constant contains a sequence of contiguous ones, which might
+// interrupted by or one or two chunks that can be materialized with an
+// ORR instruction plus MOVK instructions.
+inline static SequenceOneIdx sequenceOfOnes(uint64_t UImm) {
+  const int NotSet = -1;
+  const uint64_t Mask = 0xFFFF;
+
+  int StartIdx = NotSet;
+  int EndIdx = NotSet;
+
+  // Try to find the chunks which start/end a contiguous sequence of ones.
+  for (int Idx = 0; Idx < 4; ++Idx) {
+    int64_t Chunk = getChunk(UImm, Idx);
+    // Sign extend the 16-bit chunk to 64-bit.
+    Chunk = (Chunk << 48) >> 48;
+    if (isStartChunk(Chunk))
+      StartIdx = Idx;
+    else if (isEndChunk(Chunk))
+      EndIdx = Idx;
+  }
+
+  // Early exit in case we can't find a start/end chunk.
+  if (StartIdx == NotSet || EndIdx == NotSet)
+    return SequenceOneIdx();
+
+  // Outside of the contiguous sequence of ones everything needs to be zero.
+  uint64_t Outside = 0;
+  // Chunks between the start and end chunk need to have all their bits set.
+  uint64_t Inside = Mask;
+
+  // If our contiguous sequence of ones wraps around from the MSB into the LSB,
+  // just swap indices and pretend we are materializing a contiguous sequence
+  // of zeros surrounded by a contiguous sequence of ones.
+  if (StartIdx > EndIdx) {
+    std::swap(StartIdx, EndIdx);
+    std::swap(Outside, Inside);
+  }
+
+  uint64_t OrrImm = UImm;
+  int FirstMovkIdx = NotSet;
+  int SecondMovkIdx = NotSet;
+
+  // Find out which chunks we need to patch up to obtain a contiguous sequence
+  // of ones.
+  for (int Idx = 0; Idx < 4; ++Idx) {
+    const uint64_t Chunk = AArch64_AM::getChunk(UImm, Idx);
+
+    // Check whether we are looking at a chunk which is not part of the
+    // contiguous sequence of ones.
+    if ((Idx < StartIdx || EndIdx < Idx) && Chunk != Outside) {
+      OrrImm = updateImm(OrrImm, Idx, Outside == 0);
+
+      // Remember the index we need to patch.
+      if (FirstMovkIdx == NotSet)
+        FirstMovkIdx = Idx;
+      else
+        SecondMovkIdx = Idx;
+
+      // Check whether we are looking a chunk which is part of the contiguous
+      // sequence of ones.
+    } else if (Idx > StartIdx && Idx < EndIdx && Chunk != Inside) {
+      OrrImm = updateImm(OrrImm, Idx, Inside != Mask);
+
+      // Remember the index we need to patch.
+      if (FirstMovkIdx == NotSet)
+        FirstMovkIdx = Idx;
+      else
+        SecondMovkIdx = Idx;
+    }
+  }
+  assert(FirstMovkIdx != NotSet && "Constant materializable with single ORR!");
+
+  return std::make_tuple(OrrImm, FirstMovkIdx, SecondMovkIdx);
+}
+
+
+static inline bool isSequenceOfOnes(uint64_t UImm, unsigned &Count) {
+  SequenceOneIdx SOI = sequenceOfOnes(UImm);
+  if (!SOI.hasValue())
+    return false;
+  const bool SingleMovk = std::get<2>(SOI.getValue()) == -1;
+  Count = SingleMovk ? 2 : 3;
+  return true;
+}
+
+// Return the number of instruction requires to materialize the constant.
+// It follows the strategy used on AArch64ExpandPseudo::expandMOVImm.
+static inline int getExpandImmCost(uint64_t Imm, unsigned BitSize) {
+  const unsigned Mask = 0xFFFF;
+  uint64_t Encoding;
+
+  if (Imm == 0)
+    return 0;
+
+  // Single ORR.
+  uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
+  if (processLogicalImmediate(UImm, BitSize, Encoding))
+    return 1;
+
+  // Scan the immediate and count the number of 16-bit chunks which are either
+  // all ones or all zeros.
+  unsigned OneChunks = 0;
+  unsigned ZeroChunks = 0;
+  for (unsigned Shift = 0; Shift < BitSize; Shift += 16) {
+    const unsigned Chunk = (Imm >> Shift) & Mask;
+    if (Chunk == Mask)
+      OneChunks++;
+    else if (Chunk == 0)
+      ZeroChunks++;
+  }
+
+  // MOVZ/MOVN followed by MOVK.
+  if (OneChunks >= (BitSize / 16) - 2 || ZeroChunks >= (BitSize / 16) - 2)
+    return 2;
+
+  // All 32-bit immediates can be expanded with a MOVZ/MOVK pair.
+
+  // 64-bit ORR followed by MOVK.
+  for (unsigned Shift = 0; Shift < BitSize; Shift += 16) {
+    uint64_t ShiftedMask = (0xFFFFULL << Shift);
+    uint64_t ZeroChunk = UImm & ~ShiftedMask;
+    uint64_t OneChunk = UImm | ShiftedMask;
+    uint64_t RotatedImm = (UImm << 32) | (UImm >> 32);
+    uint64_t ReplicateChunk = ZeroChunk | (RotatedImm & ShiftedMask);
+    if (processLogicalImmediate(ZeroChunk, BitSize, Encoding) ||
+        processLogicalImmediate(OneChunk, BitSize, Encoding) ||
+        processLogicalImmediate(ReplicateChunk, BitSize, Encoding)) {
+      return 2;
+    }
+  }
+
+  // MOVZ/MOVN followed by two MOVK;
+  if (OneChunks || ZeroChunks)
+    return 3;
+
+  unsigned Count;
+  if (BitSize == 64 && isReplicableWithChunks(UImm, Count))
+    return Count;
+  if (BitSize == 64 && isSequenceOfOnes(UImm, Count))
+    return Count;
+
+  return 4;
+}
+
 } // end namespace AArch64_AM
 
 } // end namespace llvm
Index: test/CodeGen/AArch64/arm64-fp-imm.ll
===================================================================
--- test/CodeGen/AArch64/arm64-fp-imm.ll
+++ test/CodeGen/AArch64/arm64-fp-imm.ll
@@ -10,12 +10,11 @@
   ret double 0x400921FB54442D18
 }
 
-; CHECK: literal4
-; CHECK: .long 1078530011
 define float @bar() {
 ; CHECK: _bar:
-; CHECK:  adrp  x[[REG:[0-9]+]], lCPI1_0@PAGE
-; CHECK:  ldr s0, [x[[REG]], lCPI1_0@PAGEOFF]
+; CHECK:  mov  [[REG:w[0-9]+]], #4059
+; CHECK:  movk [[REG]], #16457, lsl #16
+; CHECK:  fmov s0, [[REG]]
 ; CHECK-NEXT:  ret
   ret float 0x400921FB60000000
 }
Index: test/CodeGen/AArch64/fpimm.ll
===================================================================
--- test/CodeGen/AArch64/fpimm.ll
+++ test/CodeGen/AArch64/fpimm.ll
@@ -45,6 +45,13 @@
 ; TINY-DAG: mov [[X128:x[0-9]+]], #4638707616191610880
 ; TINY-DAG: fmov {{d[0-9]+}}, [[X128]]
 
+; 64-bit ORR followed by MOVK.
+; CHECK-DAG: mov  [[XFP0:x[0-9]+]], #1082331758844
+; CHECK-DAG: movk [[XFP0]], #64764, lsl #16
+; CHECk-DAG: fmov {{d[0-9]+}}, [[XFP0]]
+  %newval3 = fadd double %val, 0xFCFCFC00FC
+  store volatile double %newval3, double* @varf64
+
 ; CHECK: ret
 ; TINY: ret
   ret void
@@ -54,8 +61,9 @@
 ; LARGE:       mov [[REG:w[0-9]+]], #4059
 ; LARGE-NEXT:  movk [[REG]], #16457, lsl #16
 ; LARGE-NEXT:  fmov s0, [[REG]]
-; TINY-LABEL: check_float2
-; TINY:  ldr     s0, .LCPI2_0
+; TINY-LABEL:  check_float2
+; TINY:        mov [[REG:w[0-9]+]], #4059
+; TINY-NEXT:   movk [[REG]], #16457, lsl #16
 define float @check_float2() {
   ret float 3.14159274101257324218750
 }
Index: test/CodeGen/AArch64/literal_pools_float.ll
===================================================================
--- test/CodeGen/AArch64/literal_pools_float.ll
+++ test/CodeGen/AArch64/literal_pools_float.ll
@@ -31,16 +31,19 @@
 
   %doubleval = load double, double* @vardouble
   %newdouble = fadd double %doubleval, 129.0
-; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI[0-9]+_[0-9]+]]
-; CHECK: ldr [[LIT129:d[0-9]+]], [x[[LITBASE]], {{#?}}:lo12:[[CURLIT]]]
+; CHECK: mov  [[W129:x[0-9]+]], #35184372088832
+; CHECK: movk [[W129]], #16480, lsl #48
+; CHECK: fmov {{d[0-9]+}}, [[W129]]
 ; CHECK-NOFP-NOT: ldr {{d[0-9]+}},
 ; CHECK-NOFP-NOT: fadd
 
-; CHECK-TINY: ldr [[LIT129:d[0-9]+]], [[CURLIT:.LCPI[0-9]+_[0-9]+]]
+; CHECK-TINY: mov  [[W129:x[0-9]+]], #35184372088832
+; CHECK-TINY: movk [[W129]], #16480, lsl #48
+; CHECK-TINY: fmov {{d[0-9]+}}, [[W129]]
 ; CHECK-NOFP-TINY-NOT: ldr {{d[0-9]+}},
 ; CHECK-NOFP-TINY-NOT: fadd
 
-; CHECK-LARGE: movz x[[LITADDR:[0-9]+]], #:abs_g0_nc:[[CURLIT:.LCPI[0-9]+_[0-9]+]]
+; CHECK-LARGE: movz x[[LITADDR:[0-9]+]], #:abs_g0_nc:[[CURLIT:vardouble]]
 ; CHECK-LARGE: movk x[[LITADDR]], #:abs_g1_nc:[[CURLIT]]
 ; CHECK-LARGE: movk x[[LITADDR]], #:abs_g2_nc:[[CURLIT]]
 ; CHECK-LARGE: movk x[[LITADDR]], #:abs_g3:[[CURLIT]]
Index: test/CodeGen/AArch64/win_cst_pool.ll
===================================================================
--- test/CodeGen/AArch64/win_cst_pool.ll
+++ test/CodeGen/AArch64/win_cst_pool.ll
@@ -2,22 +2,22 @@
 ; RUN: llc < %s -mtriple=aarch64-win32-gnu | FileCheck -check-prefix=MINGW %s
 
 define double @double() {
-  ret double 0x0000000000800001
+  ret double 0x2000000000800001
 }
-; CHECK:              .globl  __real@0000000000800001
-; CHECK-NEXT:         .section        .rdata,"dr",discard,__real@0000000000800001
+; CHECK:              .globl  __real@2000000000800001
+; CHECK-NEXT:         .section        .rdata,"dr",discard,__real@2000000000800001
 ; CHECK-NEXT:         .p2align  3
-; CHECK-NEXT: __real@0000000000800001:
-; CHECK-NEXT:         .xword   8388609
+; CHECK-NEXT: __real@2000000000800001:
+; CHECK-NEXT:         .xword   2305843009222082561
 ; CHECK:      double:
-; CHECK:               adrp    x8, __real@0000000000800001
-; CHECK-NEXT:          ldr     d0, [x8, __real@0000000000800001]
+; CHECK:               adrp    x8, __real@2000000000800001
+; CHECK-NEXT:          ldr     d0, [x8, __real@2000000000800001]
 ; CHECK-NEXT:          ret
 
 ; MINGW:              .section        .rdata,"dr"
 ; MINGW-NEXT:         .p2align  3
 ; MINGW-NEXT: [[LABEL:\.LC.*]]:
-; MINGW-NEXT:         .xword   8388609
+; MINGW-NEXT:         .xword   2305843009222082561
 ; MINGW:      double:
 ; MINGW:               adrp    x8, [[LABEL]]
 ; MINGW-NEXT:          ldr     d0, [x8, [[LABEL]]]