Index: lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
===================================================================
--- lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -304,6 +304,8 @@
   void splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0,
                            uint64_t UpperVal, uint64_t LowerVal);
 
+  bool tryReplicateConstantSplat(BuildVectorSDNode *BVN);
+
   // Try to use gather instruction Opcode to implement vector insertion N.
   bool tryGather(SDNode *N, unsigned Opcode);
 
@@ -1132,6 +1134,48 @@
   SelectCode(Or.getNode());
 }
 
+// Try to load a vector constant in which BitsPerElement-bit value Value
+// is replicated to fill the vector.  VT is the type of the resulting
+// constant, which may have elements of a different size from BitsPerElement.
+// Return the SDValue of the constant on success, otherwise return
+// an empty value.
+bool SystemZDAGToDAGISel::tryReplicateConstantSplat(BuildVectorSDNode *BVN) {
+  const SystemZInstrInfo *TII = getInstrInfo();
+  int64_t ReplicatedImm;
+  unsigned RotateStart, RotateEnd;
+  MVT VecVT;
+  if (!SystemZTargetLowering::analyzeBVNForConstantReplication(
+          BVN, ReplicatedImm, RotateStart, RotateEnd, VecVT, TII))
+    return false;
+
+  SDLoc DL(BVN);
+  EVT VT = BVN->getValueType(0);
+  SDValue Op;
+  SDValue BitCast;
+  if (ReplicatedImm != INT64_MAX) {
+    Op = CurDAG->getNode(SystemZISD::REPLICATE, DL, VecVT,
+                         CurDAG->getConstant(ReplicatedImm, DL, MVT::i32, false,
+                                             true /*isOpaque*/));
+    BitCast = CurDAG->getNode(ISD::BITCAST, DL, VT, Op);
+  } else {
+    Op = CurDAG->getNode(
+        SystemZISD::ROTATE_MASK, DL, VecVT,
+        CurDAG->getConstant(RotateStart, DL, MVT::i32, false,
+                            true /*isOpaque*/),
+        CurDAG->getConstant(RotateEnd, DL, MVT::i32, false, true /*isOpaque*/));
+    BitCast = CurDAG->getNode(ISD::BITCAST, DL, VT, Op);
+  }
+
+  ReplaceNode(BVN, BitCast.getNode());
+  SelectCode(BitCast.getNode());
+  if (Op != BitCast) {
+    assert(!Op.use_empty() && "Expected bitcasted SDValue to remain in DAG");
+    SelectCode(Op.getNode());
+  }
+
+  return true;
+}
+
 bool SystemZDAGToDAGISel::tryGather(SDNode *N, unsigned Opcode) {
   SDValue ElemV = N->getOperand(2);
   auto *ElemN = dyn_cast<ConstantSDNode>(ElemV);
@@ -1538,6 +1582,8 @@
       ReplaceNode(Node, Res);
       return;
     }
+    if (tryReplicateConstantSplat(BVN))
+      return;
     break;
   }
 
Index: lib/Target/SystemZ/SystemZISelLowering.h
===================================================================
--- lib/Target/SystemZ/SystemZISelLowering.h
+++ lib/Target/SystemZ/SystemZISelLowering.h
@@ -15,6 +15,7 @@
 #define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZISELLOWERING_H
 
 #include "SystemZ.h"
+#include "SystemZInstrInfo.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/TargetLowering.h"
@@ -513,6 +514,11 @@
   }
 
   static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask);
+  static bool analyzeBVNForConstantReplication(BuildVectorSDNode *BVN,
+                                               int64_t &ReplicatedImm,
+                                               unsigned &RotateStart,
+                                               unsigned &RotateEnd, MVT &VecVT,
+                                               const SystemZInstrInfo *TII);
 
 private:
   const SystemZSubtarget &Subtarget;
@@ -638,6 +644,8 @@
   MachineBasicBlock *emitLoadAndTestCmp0(MachineInstr &MI,
                                          MachineBasicBlock *MBB,
                                          unsigned Opcode) const;
+  MachineBasicBlock *emitFPScalarImm(MachineInstr &MI,
+                                     MachineBasicBlock *MBB) const;
 
   const TargetRegisterClass *getRepRegClassFor(MVT VT) const override;
 };
Index: lib/Target/SystemZ/SystemZISelLowering.cpp
===================================================================
--- lib/Target/SystemZ/SystemZISelLowering.cpp
+++ lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -577,9 +577,25 @@
   return false;
 }
 
+static bool analyzeFPImm(const APFloat &Imm, unsigned &Start, unsigned &End,
+                         const SystemZInstrInfo *TII) {
+  APInt IntImm = Imm.bitcastToAPInt();
+  if (IntImm.getActiveBits() > 64)
+    return false;
+
+  // See if this immediate could be generated with VGM.
+  return TII->isRxSBGMask(uint64_t(IntImm.getZExtValue()), 64, Start, End);
+}
+
 bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
   // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
-  return Imm.isZero() || Imm.isNegZero();
+  if (Imm.isZero() || Imm.isNegZero())
+    return true;
+
+  const SystemZInstrInfo *TII =
+      static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+  unsigned Start, End;
+  return analyzeFPImm(Imm, Start, End, TII);
 }
 
 bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
@@ -4288,49 +4304,6 @@
   return true;
 }
 
-// Try to load a vector constant in which BitsPerElement-bit value Value
-// is replicated to fill the vector.  VT is the type of the resulting
-// constant, which may have elements of a different size from BitsPerElement.
-// Return the SDValue of the constant on success, otherwise return
-// an empty value.
-static SDValue tryBuildVectorReplicate(SelectionDAG &DAG,
-                                       const SystemZInstrInfo *TII,
-                                       const SDLoc &DL, EVT VT, uint64_t Value,
-                                       unsigned BitsPerElement) {
-  // Signed 16-bit values can be replicated using VREPI.
-  // Mark the constants as opaque or DAGCombiner will convert back to
-  // BUILD_VECTOR.
-  int64_t SignedValue = SignExtend64(Value, BitsPerElement);
-  if (isInt<16>(SignedValue)) {
-    MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement),
-                                 SystemZ::VectorBits / BitsPerElement);
-    SDValue Op = DAG.getNode(
-        SystemZISD::REPLICATE, DL, VecVT,
-        DAG.getConstant(SignedValue, DL, MVT::i32, false, true /*isOpaque*/));
-    return DAG.getNode(ISD::BITCAST, DL, VT, Op);
-  }
-  // See whether rotating the constant left some N places gives a value that
-  // is one less than a power of 2 (i.e. all zeros followed by all ones).
-  // If so we can use VGM.
-  unsigned Start, End;
-  if (TII->isRxSBGMask(Value, BitsPerElement, Start, End)) {
-    // isRxSBGMask returns the bit numbers for a full 64-bit value,
-    // with 0 denoting 1 << 63 and 63 denoting 1.  Convert them to
-    // bit numbers for an BitsPerElement value, so that 0 denotes
-    // 1 << (BitsPerElement-1).
-    Start -= 64 - BitsPerElement;
-    End -= 64 - BitsPerElement;
-    MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement),
-                                 SystemZ::VectorBits / BitsPerElement);
-    SDValue Op = DAG.getNode(
-        SystemZISD::ROTATE_MASK, DL, VecVT,
-        DAG.getConstant(Start, DL, MVT::i32, false, true /*isOpaque*/),
-        DAG.getConstant(End, DL, MVT::i32, false, true /*isOpaque*/));
-    return DAG.getNode(ISD::BITCAST, DL, VT, Op);
-  }
-  return SDValue();
-}
-
 // If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
 // better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
 // the non-EXTRACT_VECTOR_ELT elements.  See if the given BUILD_VECTOR
@@ -4529,10 +4502,55 @@
   return Result;
 }
 
+bool SystemZTargetLowering::analyzeBVNForConstantReplication(
+    BuildVectorSDNode *BVN, int64_t &ReplicatedImm, unsigned &RotateStart,
+    unsigned &RotateEnd, MVT &VecVT, const SystemZInstrInfo *TII) {
+  APInt SplatBits, SplatUndef;
+  unsigned SplatBitSize;
+  bool HasAnyUndefs;
+  if (!(BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
+                             8, true) &&
+        SplatBitSize <= 64))
+    return false;
+  VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
+                           SystemZ::VectorBits / SplatBitSize);
+  ReplicatedImm = INT64_MAX;
+  auto tryValue = [&](uint64_t Value) -> bool {
+    int64_t SignedValue = SignExtend64(Value, SplatBitSize);
+    if (isInt<16>(SignedValue)) {
+      ReplicatedImm = SignedValue;
+      return true;
+    }
+    if (TII->isRxSBGMask(Value, SplatBitSize, RotateStart, RotateEnd)) {
+      RotateStart -= 64 - SplatBitSize;
+      RotateEnd -= 64 - SplatBitSize;
+      return true;
+    }
+    return false;
+  };
+
+  // First try assuming that any undefined bits above the highest set bit
+  // and below the lowest set bit are 1s.  This increases the likelihood of
+  // being able to use a sign-extended element value in VECTOR REPLICATE
+  // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
+  uint64_t SplatBitsZ = SplatBits.getZExtValue();
+  uint64_t SplatUndefZ = SplatUndef.getZExtValue();
+  uint64_t Lower =
+      (SplatUndefZ & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1));
+  uint64_t Upper =
+      (SplatUndefZ & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1));
+  if (tryValue(SplatBitsZ | Upper | Lower))
+    return true;
+
+  // Now try assuming that any undefined bits between the first and
+  // last defined set bits are set.  This increases the chances of
+  // using a non-wraparound mask.
+  uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
+  return tryValue(SplatBitsZ | Middle);
+}
+
 SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
                                                  SelectionDAG &DAG) const {
-  const SystemZInstrInfo *TII =
-    static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
   auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
   SDLoc DL(Op);
   EVT VT = Op.getValueType();
@@ -4548,37 +4566,14 @@
       return Op;
 
     // Try using some form of replication.
-    APInt SplatBits, SplatUndef;
-    unsigned SplatBitSize;
-    bool HasAnyUndefs;
-    if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
-                             8, true) &&
-        SplatBitSize <= 64) {
-      // First try assuming that any undefined bits above the highest set bit
-      // and below the lowest set bit are 1s.  This increases the likelihood of
-      // being able to use a sign-extended element value in VECTOR REPLICATE
-      // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
-      uint64_t SplatBitsZ = SplatBits.getZExtValue();
-      uint64_t SplatUndefZ = SplatUndef.getZExtValue();
-      uint64_t Lower = (SplatUndefZ
-                        & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1));
-      uint64_t Upper = (SplatUndefZ
-                        & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1));
-      uint64_t Value = SplatBitsZ | Upper | Lower;
-      SDValue Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value,
-                                           SplatBitSize);
-      if (Op.getNode())
-        return Op;
-
-      // Now try assuming that any undefined bits between the first and
-      // last defined set bits are set.  This increases the chances of
-      // using a non-wraparound mask.
-      uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
-      Value = SplatBitsZ | Middle;
-      Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value, SplatBitSize);
-      if (Op.getNode())
-        return Op;
-    }
+    const SystemZInstrInfo *TII =
+        static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+    int64_t ReplicatedImm;
+    unsigned RotateStart, RotateEnd;
+    MVT VecVT;
+    if (analyzeBVNForConstantReplication(BVN, ReplicatedImm, RotateStart,
+                                         RotateEnd, VecVT, TII))
+      return Op;
 
     // Fall back to loading it from memory.
     return SDValue();
@@ -7172,6 +7167,34 @@
   return MBB;
 }
 
+MachineBasicBlock *SystemZTargetLowering::emitFPScalarImm(
+    MachineInstr &MI, MachineBasicBlock *MBB) const {
+  MachineFunction &MF = *MBB->getParent();
+  MachineRegisterInfo *MRI = &MF.getRegInfo();
+  const SystemZInstrInfo *TII =
+      static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+
+  APFloat Imm = MI.getOperand(1).getFPImm()->getValueAPF();
+  assert(!Imm.isZero() && !Imm.isNegZero() && "Expected non-zero FP immediate");
+  unsigned Start, End;
+  bool Success = analyzeFPImm(Imm, Start, End, TII);
+  assert(Success && "Can't build FP immediate.");
+
+  unsigned DstReg = MI.getOperand(0).getReg();
+  unsigned SubRegIdx = (MRI->getRegClass(DstReg) == &SystemZ::FP32BitRegClass ?
+                        SystemZ::subreg_h32 : SystemZ::subreg_h64);
+  unsigned VReg = MRI->createVirtualRegister(&SystemZ::VF128BitRegClass);
+  DebugLoc DL = MI.getDebugLoc();
+  BuildMI(*MBB, MI, DL, TII->get(SystemZ::VGMG), VReg)
+    .addImm(Start)
+    .addImm(End);
+  BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::COPY), DstReg)
+    .addReg(VReg, RegState::Kill, SubRegIdx);
+
+  MI.eraseFromParent();
+  return MBB;
+}
+
 MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
     MachineInstr &MI, MachineBasicBlock *MBB) const {
   switch (MI.getOpcode()) {
@@ -7436,6 +7459,10 @@
   case TargetOpcode::PATCHPOINT:
     return emitPatchPoint(MI, MBB);
 
+  case SystemZ::FP32ScalarImmPseudo:
+  case SystemZ::FP64ScalarImmPseudo:
+    return emitFPScalarImm(MI, MBB);
+
   default:
     llvm_unreachable("Unexpected instr type to insert");
   }
Index: lib/Target/SystemZ/SystemZInstrFP.td
===================================================================
--- lib/Target/SystemZ/SystemZInstrFP.td
+++ lib/Target/SystemZ/SystemZInstrFP.td
@@ -41,6 +41,15 @@
   def LZXR : InherentRRE<"lzxr", 0xB376, FP128, fpimm0>;
 }
 
+// Load scalar floating-point immediate with a VGM.
+let isAsCheapAsAMove = 1, isMoveImm = 1, usesCustomInserter = 1,
+    hasNoSchedulingInfo = 1 in {
+  def FP32ScalarImmPseudo : Pseudo<(outs FP32:$R1), (ins FP32:$Imm),
+                                   [(set FP32:$R1, (fpimm:$Imm))]>;
+  def FP64ScalarImmPseudo : Pseudo<(outs FP64:$R1), (ins FP64:$Imm),
+                                   [(set FP64:$R1, (fpimm:$Imm))]>;
+}
+
 // Moves between two floating-point registers.
 def LER : UnaryRR <"ler", 0x38,   null_frag, FP32,  FP32>;
 def LDR : UnaryRR <"ldr", 0x28,   null_frag, FP64,  FP64>;