Index: include/llvm/CodeGen/SelectionDAGNodes.h
===================================================================
--- include/llvm/CodeGen/SelectionDAGNodes.h
+++ include/llvm/CodeGen/SelectionDAGNodes.h
@@ -650,7 +650,7 @@
   }
 
   /// Test if this node is a strict floating point pseudo-op.
-  bool isStrictFPOpcode() {
+  bool isStrictFPOpcode() const {
     switch (NodeType) {
       default:
         return false;
@@ -1363,6 +1363,7 @@
            N->getOpcode() == ISD::MGATHER             ||
            N->getOpcode() == ISD::MSCATTER            ||
            N->isMemIntrinsic()                        ||
+           N->isStrictFPOpcode()                      ||
            N->isTargetMemoryOpcode();
   }
 };
@@ -1430,6 +1431,7 @@
     // We lower some target intrinsics to their target opcode
     // early a node with a target opcode can be of this class
     return N->isMemIntrinsic()             ||
+           N->isStrictFPOpcode()           ||
            N->getOpcode() == ISD::PREFETCH ||
            N->isTargetMemoryOpcode();
   }
Index: include/llvm/Target/TargetSelectionDAG.td
===================================================================
--- include/llvm/Target/TargetSelectionDAG.td
+++ include/llvm/Target/TargetSelectionDAG.td
@@ -446,6 +446,51 @@
 def f16_to_fp  : SDNode<"ISD::FP16_TO_FP" , SDTIntToFPOp>;
 def fp_to_f16  : SDNode<"ISD::FP_TO_FP16" , SDTFPToIntOp>;
 
+def strict_fadd       : SDNode<"ISD::STRICT_FADD",
+                               SDTFPBinOp, [SDNPHasChain, SDNPMemOperand,
+                                            SDNPCommutative]>;
+def strict_fsub       : SDNode<"ISD::STRICT_FSUB",
+                               SDTFPBinOp, [SDNPHasChain, SDNPMemOperand]>;
+def strict_fmul       : SDNode<"ISD::STRICT_FMUL",
+                               SDTFPBinOp, [SDNPHasChain, SDNPMemOperand,
+                                            SDNPCommutative]>;
+def strict_fdiv       : SDNode<"ISD::STRICT_FDIV",
+                               SDTFPBinOp, [SDNPHasChain, SDNPMemOperand]>;
+def strict_frem       : SDNode<"ISD::STRICT_FREM",
+                               SDTFPBinOp, [SDNPHasChain, SDNPMemOperand]>;
+def strict_fma        : SDNode<"ISD::STRICT_FMA",
+                               SDTFPTernaryOp, [SDNPHasChain, SDNPMemOperand]>;
+def strict_fsqrt      : SDNode<"ISD::STRICT_FSQRT",
+                               SDTFPUnaryOp, [SDNPHasChain, SDNPMemOperand]>;
+def strict_fsin       : SDNode<"ISD::STRICT_FSIN",
+                               SDTFPUnaryOp, [SDNPHasChain, SDNPMemOperand]>;
+def strict_fcos       : SDNode<"ISD::STRICT_FCOS",
+                               SDTFPUnaryOp, [SDNPHasChain, SDNPMemOperand]>;
+def strict_fexp2      : SDNode<"ISD::STRICT_FEXP2",
+                               SDTFPUnaryOp, [SDNPHasChain, SDNPMemOperand]>;
+def strict_fpow       : SDNode<"ISD::STRICT_FPOW",
+                               SDTFPBinOp, [SDNPHasChain, SDNPMemOperand]>;
+def strict_flog2      : SDNode<"ISD::STRICT_FLOG2",
+                               SDTFPUnaryOp, [SDNPHasChain, SDNPMemOperand]>;
+def strict_frint      : SDNode<"ISD::STRICT_FRINT",
+                               SDTFPUnaryOp, [SDNPHasChain, SDNPMemOperand]>;
+def strict_fnearbyint : SDNode<"ISD::STRICT_FNEARBYINT",
+                               SDTFPUnaryOp, [SDNPHasChain, SDNPMemOperand]>;
+def strict_fceil      : SDNode<"ISD::STRICT_FCEIL",
+                               SDTFPUnaryOp, [SDNPHasChain, SDNPMemOperand]>;
+def strict_ffloor     : SDNode<"ISD::STRICT_FFLOOR",
+                               SDTFPUnaryOp, [SDNPHasChain, SDNPMemOperand]>;
+def strict_fround     : SDNode<"ISD::STRICT_FROUND",
+                               SDTFPUnaryOp, [SDNPHasChain, SDNPMemOperand]>;
+def strict_ftrunc     : SDNode<"ISD::STRICT_FTRUNC",
+                               SDTFPUnaryOp, [SDNPHasChain, SDNPMemOperand]>;
+def strict_fminnum    : SDNode<"ISD::STRICT_FMINNUM",
+                               SDTFPBinOp, [SDNPCommutative, SDNPAssociative,
+                                            SDNPHasChain, SDNPMemOperand]>;
+def strict_fmaxnum    : SDNode<"ISD::STRICT_FMAXNUM",
+                               SDTFPBinOp, [SDNPCommutative, SDNPAssociative,
+                                            SDNPHasChain, SDNPMemOperand]>;
+
 def setcc      : SDNode<"ISD::SETCC"      , SDTSetCC>;
 def select     : SDNode<"ISD::SELECT"     , SDTSelect>;
 def vselect    : SDNode<"ISD::VSELECT"    , SDTVSelect>;
@@ -1145,6 +1190,68 @@
 def setne  : PatFrag<(ops node:$lhs, node:$rhs),
                      (setcc node:$lhs, node:$rhs, SETNE)>;
 
+// Convenience fragments to match both strict and non-strict fp operations
+def any_fadd       : PatFrags<(ops node:$lhs, node:$rhs),
+                              [(strict_fadd node:$lhs, node:$rhs),
+                               (fadd node:$lhs, node:$rhs)]>;
+def any_fsub       : PatFrags<(ops node:$lhs, node:$rhs),
+                              [(strict_fsub node:$lhs, node:$rhs),
+                               (fsub node:$lhs, node:$rhs)]>;
+def any_fmul       : PatFrags<(ops node:$lhs, node:$rhs),
+                              [(strict_fmul node:$lhs, node:$rhs),
+                               (fmul node:$lhs, node:$rhs)]>;
+def any_fdiv       : PatFrags<(ops node:$lhs, node:$rhs),
+                              [(strict_fdiv node:$lhs, node:$rhs),
+                               (fdiv node:$lhs, node:$rhs)]>;
+def any_frem       : PatFrags<(ops node:$lhs, node:$rhs),
+                              [(strict_frem node:$lhs, node:$rhs),
+                               (frem node:$lhs, node:$rhs)]>;
+def any_fma        : PatFrags<(ops node:$src1, node:$src2, node:$src3),
+                              [(strict_fma node:$src1, node:$src2, node:$src3),
+                               (fma node:$src1, node:$src2, node:$src3)]>;
+def any_fsqrt      : PatFrags<(ops node:$src),
+                              [(strict_fsqrt node:$src),
+                               (fsqrt node:$src)]>;
+def any_fsin       : PatFrags<(ops node:$src),
+                              [(strict_fsin node:$src),
+                               (fsin node:$src)]>;
+def any_fcos       : PatFrags<(ops node:$src),
+                              [(strict_fcos node:$src),
+                               (fcos node:$src)]>;
+def any_fexp2      : PatFrags<(ops node:$src),
+                              [(strict_fexp2 node:$src),
+                               (fexp2 node:$src)]>;
+def any_fpow       : PatFrags<(ops node:$lhs, node:$rhs),
+                              [(strict_fpow node:$lhs, node:$rhs),
+                               (fpow node:$lhs, node:$rhs)]>;
+def any_flog2      : PatFrags<(ops node:$src),
+                              [(strict_flog2 node:$src),
+                               (flog2 node:$src)]>;
+def any_frint      : PatFrags<(ops node:$src),
+                              [(strict_frint node:$src),
+                               (frint node:$src)]>;
+def any_fnearbyint : PatFrags<(ops node:$src),
+                              [(strict_fnearbyint node:$src),
+                               (fnearbyint node:$src)]>;
+def any_fceil      : PatFrags<(ops node:$src),
+                              [(strict_fceil node:$src),
+                               (fceil node:$src)]>;
+def any_ffloor     : PatFrags<(ops node:$src),
+                              [(strict_ffloor node:$src),
+                               (ffloor node:$src)]>;
+def any_fround     : PatFrags<(ops node:$src),
+                              [(strict_fround node:$src),
+                               (fround node:$src)]>;
+def any_ftrunc     : PatFrags<(ops node:$src),
+                              [(strict_ftrunc node:$src),
+                               (ftrunc node:$src)]>;
+def any_fmaxnum    : PatFrags<(ops node:$lhs, node:$rhs),
+                              [(strict_fmaxnum node:$lhs, node:$rhs),
+                               (fmaxnum node:$lhs, node:$rhs)]>;
+def any_fminnum    : PatFrags<(ops node:$lhs, node:$rhs),
+                              [(strict_fminnum node:$lhs, node:$rhs),
+                               (fminnum node:$lhs, node:$rhs)]>;
+
 multiclass binary_atomic_op_ord<SDNode atomic_op> {
   def #NAME#_monotonic : PatFrag<(ops node:$ptr, node:$val),
       (!cast<SDPatternOperator>(#NAME) node:$ptr, node:$val)> {
Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6307,6 +6307,30 @@
           Opcode == ISD::PREFETCH ||
           Opcode == ISD::LIFETIME_START ||
           Opcode == ISD::LIFETIME_END ||
+          Opcode == ISD::STRICT_FADD ||
+          Opcode == ISD::STRICT_FSUB ||
+          Opcode == ISD::STRICT_FMUL ||
+          Opcode == ISD::STRICT_FDIV ||
+          Opcode == ISD::STRICT_FREM ||
+          Opcode == ISD::STRICT_FMA ||
+          Opcode == ISD::STRICT_FSQRT ||
+          Opcode == ISD::STRICT_FPOW ||
+          Opcode == ISD::STRICT_FPOWI ||
+          Opcode == ISD::STRICT_FSIN ||
+          Opcode == ISD::STRICT_FCOS ||
+          Opcode == ISD::STRICT_FEXP ||
+          Opcode == ISD::STRICT_FEXP2 ||
+          Opcode == ISD::STRICT_FLOG ||
+          Opcode == ISD::STRICT_FLOG10 ||
+          Opcode == ISD::STRICT_FLOG2 ||
+          Opcode == ISD::STRICT_FRINT ||
+          Opcode == ISD::STRICT_FNEARBYINT ||
+          Opcode == ISD::STRICT_FCEIL ||
+          Opcode == ISD::STRICT_FFLOOR ||
+          Opcode == ISD::STRICT_FROUND ||
+          Opcode == ISD::STRICT_FTRUNC ||
+          Opcode == ISD::STRICT_FMAXNUM ||
+          Opcode == ISD::STRICT_FMINNUM ||
           ((int)Opcode <= std::numeric_limits<int>::max() &&
            (int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) &&
          "Opcode is not a memory-accessing opcode!");
Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6333,6 +6333,7 @@
 
 void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
     const ConstrainedFPIntrinsic &FPI) {
+  MachineFunction &MF = DAG.getMachineFunction();
   SDLoc sdl = getCurSDLoc();
   unsigned Opcode;
   switch (FPI.getIntrinsicID()) {
@@ -6417,19 +6418,27 @@
   ValueVTs.push_back(MVT::Other); // Out chain
 
   SDVTList VTs = DAG.getVTList(ValueVTs);
+  EVT MemVT = EVT::getIntegerVT(*Context, 8);
+  MachinePointerInfo MPInfo = MachinePointerInfo::getFPStatus(MF);
   SDValue Result;
   if (FPI.isUnaryOp())
-    Result = DAG.getNode(Opcode, sdl, VTs,
-                         { Chain, getValue(FPI.getArgOperand(0)) });
+    Result = DAG.getMemIntrinsicNode(Opcode, sdl, VTs,
+                                     { Chain,
+                                       getValue(FPI.getArgOperand(0)) },
+                                     MemVT, MPInfo);
   else if (FPI.isTernaryOp())
-    Result = DAG.getNode(Opcode, sdl, VTs,
-                         { Chain, getValue(FPI.getArgOperand(0)),
-                                  getValue(FPI.getArgOperand(1)),
-                                  getValue(FPI.getArgOperand(2)) });
+    Result = DAG.getMemIntrinsicNode(Opcode, sdl, VTs,
+                                     { Chain,
+                                       getValue(FPI.getArgOperand(0)),
+                                       getValue(FPI.getArgOperand(1)),
+                                       getValue(FPI.getArgOperand(2)) },
+                                     MemVT, MPInfo);
   else
-    Result = DAG.getNode(Opcode, sdl, VTs,
-                         { Chain, getValue(FPI.getArgOperand(0)),
-                           getValue(FPI.getArgOperand(1))  });
+    Result = DAG.getMemIntrinsicNode(Opcode, sdl, VTs,
+                                     { Chain,
+                                       getValue(FPI.getArgOperand(0)),
+                                       getValue(FPI.getArgOperand(1)) },
+                                     MemVT, MPInfo);
 
   assert(Result.getNode()->getNumValues() == 2);
   SDValue OutChain = Result.getValue(1);
Index: lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -1092,16 +1092,14 @@
 #endif
 
       // When we are using non-default rounding modes or FP exception behavior
-      // FP operations are represented by StrictFP pseudo-operations.  They
-      // need to be simplified here so that the target-specific instruction
-      // selectors know how to handle them.
-      //
-      // If the current node is a strict FP pseudo-op, the isStrictFPOp()
-      // function will provide the corresponding normal FP opcode to which the
-      // node should be mutated.
-      //
-      // FIXME: The backends need a way to handle FP constraints.
-      if (Node->isStrictFPOpcode())
+      // FP operations are represented by StrictFP pseudo-operations.  For
+      // targets that do not (yet) understand strict FP operations directly,
+      // we convert them to normal FP opcodes instead at this point.  This
+      // will allow them to be handled by existing target-specific instruction
+      // selectors.
+      if (Node->isStrictFPOpcode() &&
+          (TLI->getOperationAction(Node->getOpcode(), Node->getValueType(0))
+           != TargetLowering::Legal))
         Node = CurDAG->mutateStrictFPToFP(Node);
 
       LLVM_DEBUG(dbgs() << "\nISEL: Starting selection on root node: ";
Index: lib/CodeGen/TargetLoweringBase.cpp
===================================================================
--- lib/CodeGen/TargetLoweringBase.cpp
+++ lib/CodeGen/TargetLoweringBase.cpp
@@ -652,6 +652,32 @@
       setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand);
     }
 
+    // Constrained floating-point operations default to expand.
+    setOperationAction(ISD::STRICT_FADD, VT, Expand);
+    setOperationAction(ISD::STRICT_FSUB, VT, Expand);
+    setOperationAction(ISD::STRICT_FMUL, VT, Expand);
+    setOperationAction(ISD::STRICT_FDIV, VT, Expand);
+    setOperationAction(ISD::STRICT_FREM, VT, Expand);
+    setOperationAction(ISD::STRICT_FMA, VT, Expand);
+    setOperationAction(ISD::STRICT_FSQRT, VT, Expand);
+    setOperationAction(ISD::STRICT_FPOW, VT, Expand);
+    setOperationAction(ISD::STRICT_FPOWI, VT, Expand);
+    setOperationAction(ISD::STRICT_FSIN, VT, Expand);
+    setOperationAction(ISD::STRICT_FCOS, VT, Expand);
+    setOperationAction(ISD::STRICT_FEXP, VT, Expand);
+    setOperationAction(ISD::STRICT_FEXP2, VT, Expand);
+    setOperationAction(ISD::STRICT_FLOG, VT, Expand);
+    setOperationAction(ISD::STRICT_FLOG10, VT, Expand);
+    setOperationAction(ISD::STRICT_FLOG2, VT, Expand);
+    setOperationAction(ISD::STRICT_FRINT, VT, Expand);
+    setOperationAction(ISD::STRICT_FNEARBYINT, VT, Expand);
+    setOperationAction(ISD::STRICT_FCEIL, VT, Expand);
+    setOperationAction(ISD::STRICT_FFLOOR, VT, Expand);
+    setOperationAction(ISD::STRICT_FROUND, VT, Expand);
+    setOperationAction(ISD::STRICT_FTRUNC, VT, Expand);
+    setOperationAction(ISD::STRICT_FMAXNUM, VT, Expand);
+    setOperationAction(ISD::STRICT_FMINNUM, VT, Expand);
+
     // For most targets @llvm.get.dynamic.area.offset just returns 0.
     setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand);
   }
Index: lib/Target/SystemZ/SystemZISelLowering.cpp
===================================================================
--- lib/Target/SystemZ/SystemZISelLowering.cpp
+++ lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -401,6 +401,22 @@
       setOperationAction(ISD::FSINCOS, VT, Expand);
       setOperationAction(ISD::FREM, VT, Expand);
       setOperationAction(ISD::FPOW, VT, Expand);
+
+      // Handle constrained floating-point operations.
+      setOperationAction(ISD::STRICT_FADD, VT, Legal);
+      setOperationAction(ISD::STRICT_FSUB, VT, Legal);
+      setOperationAction(ISD::STRICT_FMUL, VT, Legal);
+      setOperationAction(ISD::STRICT_FDIV, VT, Legal);
+      setOperationAction(ISD::STRICT_FMA, VT, Legal);
+      setOperationAction(ISD::STRICT_FSQRT, VT, Legal);
+      setOperationAction(ISD::STRICT_FRINT, VT, Legal);
+      if (Subtarget.hasFPExtension()) {
+        setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
+        setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
+        setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
+        setOperationAction(ISD::STRICT_FROUND, VT, Legal);
+        setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
+      }
     }
   }
 
@@ -432,6 +448,20 @@
     setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
     setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
     setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
+
+    // Handle constrained floating-point operations.
+    setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
+    setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
+    setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
+    setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal);
+    setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
+    setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
+    setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal);
+    setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v2f64, Legal);
+    setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);
+    setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal);
+    setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);
+    setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal);
   }
 
   // The vector enhancements facility 1 has instructions for these.
@@ -475,6 +505,25 @@
     setOperationAction(ISD::FMAXIMUM, MVT::f128, Legal);
     setOperationAction(ISD::FMINNUM, MVT::f128, Legal);
     setOperationAction(ISD::FMINIMUM, MVT::f128, Legal);
+
+    // Handle constrained floating-point operations.
+    setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
+    setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
+    setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
+    setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal);
+    setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
+    setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
+    setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);
+    setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v4f32, Legal);
+    setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
+    setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal);
+    setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal);
+    setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
+    for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
+                     MVT::v4f32, MVT::v2f64 }) {
+      setOperationAction(ISD::STRICT_FMAXNUM, VT, Legal);
+      setOperationAction(ISD::STRICT_FMINNUM, VT, Legal);
+    }
   }
 
   // We have fused multiply-addition for f32 and f64 but not f128.
Index: lib/Target/SystemZ/SystemZInstrDFP.td
===================================================================
--- lib/Target/SystemZ/SystemZInstrDFP.td
+++ lib/Target/SystemZ/SystemZInstrDFP.td
@@ -20,7 +20,7 @@
 //===----------------------------------------------------------------------===//
 
 // Load and test.
-let Defs = [CC] in {
+let Uses = [FPC], Defs = [CC] in {
   def LTDTR : UnaryRRE<"ltdtr", 0xB3D6, null_frag, FP64,  FP64>;
   def LTXTR : UnaryRRE<"ltxtr", 0xB3DE, null_frag, FP128, FP128>;
 }
@@ -32,25 +32,31 @@
 
 // Convert floating-point values to narrower representations.  The destination
 // of LDXTR is a 128-bit value, but only the first register of the pair is used.
-def LEDTR : TernaryRRFe<"ledtr", 0xB3D5, FP32,  FP64>;
-def LDXTR : TernaryRRFe<"ldxtr", 0xB3DD, FP128, FP128>;
+let Uses = [FPC] in {
+  def LEDTR : TernaryRRFe<"ledtr", 0xB3D5, FP32,  FP64>;
+  def LDXTR : TernaryRRFe<"ldxtr", 0xB3DD, FP128, FP128>;
+}
 
 // Extend floating-point values to wider representations.
-def LDETR : BinaryRRFd<"ldetr", 0xB3D4, FP64,  FP32>;
-def LXDTR : BinaryRRFd<"lxdtr", 0xB3DC, FP128, FP64>;
+let Uses = [FPC] in {
+  def LDETR : BinaryRRFd<"ldetr", 0xB3D4, FP64,  FP32>;
+  def LXDTR : BinaryRRFd<"lxdtr", 0xB3DC, FP128, FP64>;
+}
 
 // Convert a signed integer value to a floating-point one.
-def CDGTR : UnaryRRE<"cdgtr", 0xB3F1, null_frag, FP64,  GR64>;
-def CXGTR : UnaryRRE<"cxgtr", 0xB3F9, null_frag, FP128, GR64>;
-let Predicates = [FeatureFPExtension] in {
-  def CDGTRA : TernaryRRFe<"cdgtra", 0xB3F1, FP64,  GR64>;
-  def CXGTRA : TernaryRRFe<"cxgtra", 0xB3F9, FP128, GR64>;
-  def CDFTR : TernaryRRFe<"cdftr", 0xB951, FP64,  GR32>;
-  def CXFTR : TernaryRRFe<"cxftr", 0xB959, FP128, GR32>;
+let Uses = [FPC] in {
+  def CDGTR : UnaryRRE<"cdgtr", 0xB3F1, null_frag, FP64,  GR64>;
+  def CXGTR : UnaryRRE<"cxgtr", 0xB3F9, null_frag, FP128, GR64>;
+  let Predicates = [FeatureFPExtension] in {
+    def CDGTRA : TernaryRRFe<"cdgtra", 0xB3F1, FP64,  GR64>;
+    def CXGTRA : TernaryRRFe<"cxgtra", 0xB3F9, FP128, GR64>;
+    def CDFTR : TernaryRRFe<"cdftr", 0xB951, FP64,  GR32>;
+    def CXFTR : TernaryRRFe<"cxftr", 0xB959, FP128, GR32>;
+  }
 }
 
 // Convert an unsigned integer value to a floating-point one.
-let Predicates = [FeatureFPExtension] in {
+let Uses = [FPC], Predicates = [FeatureFPExtension] in {
   def CDLGTR : TernaryRRFe<"cdlgtr", 0xB952, FP64,  GR64>;
   def CXLGTR : TernaryRRFe<"cxlgtr", 0xB95A, FP128, GR64>;
   def CDLFTR : TernaryRRFe<"cdlftr", 0xB953, FP64,  GR32>;
@@ -58,7 +64,7 @@
 }
 
 // Convert a floating-point value to a signed integer value.
-let Defs = [CC] in {
+let Uses = [FPC], Defs = [CC] in {
   def CGDTR : BinaryRRFe<"cgdtr", 0xB3E1, GR64, FP64>;
   def CGXTR : BinaryRRFe<"cgxtr", 0xB3E9, GR64, FP128>;
   let Predicates = [FeatureFPExtension] in {
@@ -70,7 +76,7 @@
 }
 
 // Convert a floating-point value to an unsigned integer value.
-let Defs = [CC] in {
+let Uses = [FPC], Defs = [CC] in {
   let Predicates = [FeatureFPExtension] in {
     def CLGDTR : TernaryRRFe<"clgdtr", 0xB942, GR64, FP64>;
     def CLGXTR : TernaryRRFe<"clgxtr", 0xB94A, GR64, FP128>;
@@ -108,7 +114,7 @@
 }
 
 // Perform floating-point operation.
-let Defs = [CC, R1L, F0Q], Uses = [R0L, F4Q] in
+let Defs = [CC, R1L, F0Q], Uses = [FPC, R0L, F4Q] in
   def PFPO : SideEffectInherentE<"pfpo", 0x010A>;
 
 
@@ -118,8 +124,10 @@
 
 // Round to an integer, with the second operand (M3) specifying the rounding
 // mode.  M4 can be set to 4 to suppress detection of inexact conditions.
-def FIDTR : TernaryRRFe<"fidtr", 0xB3D7, FP64,  FP64>;
-def FIXTR : TernaryRRFe<"fixtr", 0xB3DF, FP128, FP128>;
+let Uses = [FPC] in {
+  def FIDTR : TernaryRRFe<"fidtr", 0xB3D7, FP64,  FP64>;
+  def FIXTR : TernaryRRFe<"fixtr", 0xB3DF, FP128, FP128>;
+}
 
 // Extract biased exponent.
 def EEDTR : UnaryRRE<"eedtr", 0xB3E5, null_frag, FP64,  FP64>;
@@ -135,7 +143,7 @@
 //===----------------------------------------------------------------------===//
 
 // Addition.
-let Defs = [CC] in {
+let Uses = [FPC], Defs = [CC] in {
   let isCommutable = 1 in {
     def ADTR : BinaryRRFa<"adtr", 0xB3D2, null_frag, FP64,  FP64,  FP64>;
     def AXTR : BinaryRRFa<"axtr", 0xB3DA, null_frag, FP128, FP128, FP128>;
@@ -147,7 +155,7 @@
 }
 
 // Subtraction.
-let Defs = [CC] in {
+let Uses = [FPC], Defs = [CC] in {
   def SDTR : BinaryRRFa<"sdtr", 0xB3D3, null_frag, FP64,  FP64,  FP64>;
   def SXTR : BinaryRRFa<"sxtr", 0xB3DB, null_frag, FP128, FP128, FP128>;
   let Predicates = [FeatureFPExtension] in {
@@ -157,30 +165,38 @@
 }
 
 // Multiplication.
-let isCommutable = 1 in {
-  def MDTR : BinaryRRFa<"mdtr", 0xB3D0, null_frag, FP64,  FP64,  FP64>;
-  def MXTR : BinaryRRFa<"mxtr", 0xB3D8, null_frag, FP128, FP128, FP128>;
-}
-let Predicates = [FeatureFPExtension] in {
-  def MDTRA : TernaryRRFa<"mdtra", 0xB3D0, FP64,  FP64,  FP64>;
-  def MXTRA : TernaryRRFa<"mxtra", 0xB3D8, FP128, FP128, FP128>;
+let Uses = [FPC] in {
+  let isCommutable = 1 in {
+    def MDTR : BinaryRRFa<"mdtr", 0xB3D0, null_frag, FP64,  FP64,  FP64>;
+    def MXTR : BinaryRRFa<"mxtr", 0xB3D8, null_frag, FP128, FP128, FP128>;
+  }
+  let Predicates = [FeatureFPExtension] in {
+    def MDTRA : TernaryRRFa<"mdtra", 0xB3D0, FP64,  FP64,  FP64>;
+    def MXTRA : TernaryRRFa<"mxtra", 0xB3D8, FP128, FP128, FP128>;
+  }
 }
 
 // Division.
-def DDTR : BinaryRRFa<"ddtr", 0xB3D1, null_frag, FP64,  FP64,  FP64>;
-def DXTR : BinaryRRFa<"dxtr", 0xB3D9, null_frag, FP128, FP128, FP128>;
-let Predicates = [FeatureFPExtension] in {
-  def DDTRA : TernaryRRFa<"ddtra", 0xB3D1, FP64,  FP64,  FP64>;
-  def DXTRA : TernaryRRFa<"dxtra", 0xB3D9, FP128, FP128, FP128>;
+let Uses = [FPC] in {
+  def DDTR : BinaryRRFa<"ddtr", 0xB3D1, null_frag, FP64,  FP64,  FP64>;
+  def DXTR : BinaryRRFa<"dxtr", 0xB3D9, null_frag, FP128, FP128, FP128>;
+  let Predicates = [FeatureFPExtension] in {
+    def DDTRA : TernaryRRFa<"ddtra", 0xB3D1, FP64,  FP64,  FP64>;
+    def DXTRA : TernaryRRFa<"dxtra", 0xB3D9, FP128, FP128, FP128>;
+  }
 }
 
 // Quantize.
-def QADTR : TernaryRRFb<"qadtr", 0xB3F5, FP64,  FP64,  FP64>;
-def QAXTR : TernaryRRFb<"qaxtr", 0xB3FD, FP128, FP128, FP128>;
+let Uses = [FPC] in {
+  def QADTR : TernaryRRFb<"qadtr", 0xB3F5, FP64,  FP64,  FP64>;
+  def QAXTR : TernaryRRFb<"qaxtr", 0xB3FD, FP128, FP128, FP128>;
+}
 
 // Reround.
-def RRDTR : TernaryRRFb<"rrdtr", 0xB3F7, FP64,  FP64,  FP64>;
-def RRXTR : TernaryRRFb<"rrxtr", 0xB3FF, FP128, FP128, FP128>;
+let Uses = [FPC] in {
+  def RRDTR : TernaryRRFb<"rrdtr", 0xB3F7, FP64,  FP64,  FP64>;
+  def RRXTR : TernaryRRFb<"rrxtr", 0xB3FF, FP128, FP128, FP128>;
+}
 
 // Shift significand left/right.
 def SLDT : BinaryRXF<"sldt", 0xED40, null_frag, FP64,  FP64,  null_frag, 0>;
@@ -198,13 +214,13 @@
 //===----------------------------------------------------------------------===//
 
 // Compare.
-let Defs = [CC] in {
+let Uses = [FPC], Defs = [CC] in {
   def CDTR : CompareRRE<"cdtr", 0xB3E4, null_frag, FP64,  FP64>;
   def CXTR : CompareRRE<"cxtr", 0xB3EC, null_frag, FP128, FP128>;
 }
 
 // Compare and signal.
-let Defs = [CC] in {
+let Uses = [FPC], Defs = [CC] in {
   def KDTR : CompareRRE<"kdtr", 0xB3E0, null_frag, FP64,  FP64>;
   def KXTR : CompareRRE<"kxtr", 0xB3E8, null_frag, FP128, FP128>;
 }
Index: lib/Target/SystemZ/SystemZInstrFP.td
===================================================================
--- lib/Target/SystemZ/SystemZInstrFP.td
+++ lib/Target/SystemZ/SystemZInstrFP.td
@@ -53,7 +53,7 @@
 
 // Moves between two floating-point registers that also set the condition
 // codes.
-let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
+let Uses = [FPC], Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
   defm LTEBR : LoadAndTestRRE<"ltebr", 0xB302, FP32>;
   defm LTDBR : LoadAndTestRRE<"ltdbr", 0xB312, FP64>;
   defm LTXBR : LoadAndTestRRE<"ltxbr", 0xB342, FP128>;
@@ -69,7 +69,7 @@
 
 // Use a normal load-and-test for compare against zero in case of
 // vector support (via a pseudo to simplify instruction selection).
-let Defs = [CC], usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
+let Uses = [FPC], Defs = [CC], usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
   def LTEBRCompare_VecPseudo : Pseudo<(outs), (ins FP32:$R1, FP32:$R2), []>;
   def LTDBRCompare_VecPseudo : Pseudo<(outs), (ins FP64:$R1, FP64:$R2), []>;
   def LTXBRCompare_VecPseudo : Pseudo<(outs), (ins FP128:$R1, FP128:$R2), []>;
@@ -174,16 +174,18 @@
 // Convert floating-point values to narrower representations, rounding
 // according to the current mode.  The destination of LEXBR and LDXBR
 // is a 128-bit value, but only the first register of the pair is used.
-def LEDBR : UnaryRRE<"ledbr", 0xB344, fpround,    FP32,  FP64>;
-def LEXBR : UnaryRRE<"lexbr", 0xB346, null_frag, FP128, FP128>;
-def LDXBR : UnaryRRE<"ldxbr", 0xB345, null_frag, FP128, FP128>;
-
-def LEDBRA : TernaryRRFe<"ledbra", 0xB344, FP32,  FP64>,
-             Requires<[FeatureFPExtension]>;
-def LEXBRA : TernaryRRFe<"lexbra", 0xB346, FP128, FP128>,
-             Requires<[FeatureFPExtension]>;
-def LDXBRA : TernaryRRFe<"ldxbra", 0xB345, FP128, FP128>,
-             Requires<[FeatureFPExtension]>;
+let Uses = [FPC] in {
+  def LEDBR : UnaryRRE<"ledbr", 0xB344, fpround,    FP32,  FP64>;
+  def LEXBR : UnaryRRE<"lexbr", 0xB346, null_frag, FP128, FP128>;
+  def LDXBR : UnaryRRE<"ldxbr", 0xB345, null_frag, FP128, FP128>;
+
+  def LEDBRA : TernaryRRFe<"ledbra", 0xB344, FP32,  FP64>,
+               Requires<[FeatureFPExtension]>;
+  def LEXBRA : TernaryRRFe<"lexbra", 0xB346, FP128, FP128>,
+               Requires<[FeatureFPExtension]>;
+  def LDXBRA : TernaryRRFe<"ldxbra", 0xB345, FP128, FP128>,
+               Requires<[FeatureFPExtension]>;
+}
 
 let Predicates = [FeatureNoVectorEnhancements1] in {
   def : Pat<(f32 (fpround FP128:$src)),
@@ -193,18 +195,22 @@
 }
 
 // Extend register floating-point values to wider representations.
-def LDEBR : UnaryRRE<"ldebr", 0xB304, fpextend,  FP64,  FP32>;
-def LXEBR : UnaryRRE<"lxebr", 0xB306, null_frag, FP128, FP32>;
-def LXDBR : UnaryRRE<"lxdbr", 0xB305, null_frag, FP128, FP64>;
+let Uses = [FPC] in {
+  def LDEBR : UnaryRRE<"ldebr", 0xB304, fpextend,  FP64,  FP32>;
+  def LXEBR : UnaryRRE<"lxebr", 0xB306, null_frag, FP128, FP32>;
+  def LXDBR : UnaryRRE<"lxdbr", 0xB305, null_frag, FP128, FP64>;
+}
 let Predicates = [FeatureNoVectorEnhancements1] in {
   def : Pat<(f128 (fpextend (f32 FP32:$src))), (LXEBR FP32:$src)>;
   def : Pat<(f128 (fpextend (f64 FP64:$src))), (LXDBR FP64:$src)>;
 }
 
 // Extend memory floating-point values to wider representations.
-def LDEB : UnaryRXE<"ldeb", 0xED04, extloadf32, FP64,  4>;
-def LXEB : UnaryRXE<"lxeb", 0xED06, null_frag,  FP128, 4>;
-def LXDB : UnaryRXE<"lxdb", 0xED05, null_frag,  FP128, 8>;
+let Uses = [FPC] in {
+  def LDEB : UnaryRXE<"ldeb", 0xED04, extloadf32, FP64,  4>;
+  def LXEB : UnaryRXE<"lxeb", 0xED06, null_frag,  FP128, 4>;
+  def LXDB : UnaryRXE<"lxdb", 0xED05, null_frag,  FP128, 8>;
+}
 let Predicates = [FeatureNoVectorEnhancements1] in {
   def : Pat<(f128 (extloadf32 bdxaddr12only:$src)),
             (LXEB bdxaddr12only:$src)>;
@@ -213,17 +219,19 @@
 }
 
 // Convert a signed integer register value to a floating-point one.
-def CEFBR : UnaryRRE<"cefbr", 0xB394, sint_to_fp, FP32,  GR32>;
-def CDFBR : UnaryRRE<"cdfbr", 0xB395, sint_to_fp, FP64,  GR32>;
-def CXFBR : UnaryRRE<"cxfbr", 0xB396, sint_to_fp, FP128, GR32>;
-
-def CEGBR : UnaryRRE<"cegbr", 0xB3A4, sint_to_fp, FP32,  GR64>;
-def CDGBR : UnaryRRE<"cdgbr", 0xB3A5, sint_to_fp, FP64,  GR64>;
-def CXGBR : UnaryRRE<"cxgbr", 0xB3A6, sint_to_fp, FP128, GR64>;
+let Uses = [FPC] in {
+  def CEFBR : UnaryRRE<"cefbr", 0xB394, sint_to_fp, FP32,  GR32>;
+  def CDFBR : UnaryRRE<"cdfbr", 0xB395, sint_to_fp, FP64,  GR32>;
+  def CXFBR : UnaryRRE<"cxfbr", 0xB396, sint_to_fp, FP128, GR32>;
+
+  def CEGBR : UnaryRRE<"cegbr", 0xB3A4, sint_to_fp, FP32,  GR64>;
+  def CDGBR : UnaryRRE<"cdgbr", 0xB3A5, sint_to_fp, FP64,  GR64>;
+  def CXGBR : UnaryRRE<"cxgbr", 0xB3A6, sint_to_fp, FP128, GR64>;
+}
 
 // The FP extension feature provides versions of the above that allow
 // specifying rounding mode and inexact-exception suppression flags.
-let Predicates = [FeatureFPExtension] in {
+let Uses = [FPC], Predicates = [FeatureFPExtension] in {
   def CEFBRA : TernaryRRFe<"cefbra", 0xB394, FP32,  GR32>;
   def CDFBRA : TernaryRRFe<"cdfbra", 0xB395, FP64,  GR32>;
   def CXFBRA : TernaryRRFe<"cxfbra", 0xB396, FP128, GR32>;
@@ -235,13 +243,15 @@
 
 // Convert am unsigned integer register value to a floating-point one.
 let Predicates = [FeatureFPExtension] in {
-  def CELFBR : TernaryRRFe<"celfbr", 0xB390, FP32,  GR32>;
-  def CDLFBR : TernaryRRFe<"cdlfbr", 0xB391, FP64,  GR32>;
-  def CXLFBR : TernaryRRFe<"cxlfbr", 0xB392, FP128, GR32>;
-
-  def CELGBR : TernaryRRFe<"celgbr", 0xB3A0, FP32,  GR64>;
-  def CDLGBR : TernaryRRFe<"cdlgbr", 0xB3A1, FP64,  GR64>;
-  def CXLGBR : TernaryRRFe<"cxlgbr", 0xB3A2, FP128, GR64>;
+  let Uses = [FPC] in {
+    def CELFBR : TernaryRRFe<"celfbr", 0xB390, FP32,  GR32>;
+    def CDLFBR : TernaryRRFe<"cdlfbr", 0xB391, FP64,  GR32>;
+    def CXLFBR : TernaryRRFe<"cxlfbr", 0xB392, FP128, GR32>;
+
+    def CELGBR : TernaryRRFe<"celgbr", 0xB3A0, FP32,  GR64>;
+    def CDLGBR : TernaryRRFe<"cdlgbr", 0xB3A1, FP64,  GR64>;
+    def CXLGBR : TernaryRRFe<"cxlgbr", 0xB3A2, FP128, GR64>;
+  }
 
   def : Pat<(f32  (uint_to_fp GR32:$src)), (CELFBR 0, GR32:$src, 0)>;
   def : Pat<(f64  (uint_to_fp GR32:$src)), (CDLFBR 0, GR32:$src, 0)>;
@@ -254,7 +264,7 @@
 
 // Convert a floating-point register value to a signed integer value,
 // with the second operand (modifier M3) specifying the rounding mode.
-let Defs = [CC] in {
+let Uses = [FPC], Defs = [CC] in {
   def CFEBR : BinaryRRFe<"cfebr", 0xB398, GR32, FP32>;
   def CFDBR : BinaryRRFe<"cfdbr", 0xB399, GR32, FP64>;
   def CFXBR : BinaryRRFe<"cfxbr", 0xB39A, GR32, FP128>;
@@ -275,7 +285,7 @@
 
 // The FP extension feature provides versions of the above that allow
 // also specifying the inexact-exception suppression flag.
-let Predicates = [FeatureFPExtension], Defs = [CC] in {
+let Uses = [FPC], Predicates = [FeatureFPExtension], Defs = [CC] in {
   def CFEBRA : TernaryRRFe<"cfebra", 0xB398, GR32, FP32>;
   def CFDBRA : TernaryRRFe<"cfdbra", 0xB399, GR32, FP64>;
   def CFXBRA : TernaryRRFe<"cfxbra", 0xB39A, GR32, FP128>;
@@ -287,7 +297,7 @@
 
 // Convert a floating-point register value to an unsigned integer value.
 let Predicates = [FeatureFPExtension] in {
-  let Defs = [CC] in {
+  let Uses = [FPC], Defs = [CC] in {
     def CLFEBR : TernaryRRFe<"clfebr", 0xB39C, GR32, FP32>;
     def CLFDBR : TernaryRRFe<"clfdbr", 0xB39D, GR32, FP64>;
     def CLFXBR : TernaryRRFe<"clfxbr", 0xB39E, GR32, FP128>;
@@ -353,59 +363,65 @@
   def LNDFR_32 : UnaryRRE<"lndfr", 0xB371, fnabs, FP32,  FP32>;
 
 // Square root.
-def SQEBR : UnaryRRE<"sqebr", 0xB314, fsqrt, FP32,  FP32>;
-def SQDBR : UnaryRRE<"sqdbr", 0xB315, fsqrt, FP64,  FP64>;
-def SQXBR : UnaryRRE<"sqxbr", 0xB316, fsqrt, FP128, FP128>;
+let Uses = [FPC], mayAccessMemory = 1 in {
+  def SQEBR : UnaryRRE<"sqebr", 0xB314, any_fsqrt, FP32,  FP32>;
+  def SQDBR : UnaryRRE<"sqdbr", 0xB315, any_fsqrt, FP64,  FP64>;
+  def SQXBR : UnaryRRE<"sqxbr", 0xB316, any_fsqrt, FP128, FP128>;
 
-def SQEB : UnaryRXE<"sqeb", 0xED14, loadu<fsqrt>, FP32, 4>;
-def SQDB : UnaryRXE<"sqdb", 0xED15, loadu<fsqrt>, FP64, 8>;
+  def SQEB : UnaryRXE<"sqeb", 0xED14, loadu<any_fsqrt>, FP32, 4>;
+  def SQDB : UnaryRXE<"sqdb", 0xED15, loadu<any_fsqrt>, FP64, 8>;
+}
 
 // Round to an integer, with the second operand (modifier M3) specifying
 // the rounding mode.  These forms always check for inexact conditions.
-def FIEBR : BinaryRRFe<"fiebr", 0xB357, FP32,  FP32>;
-def FIDBR : BinaryRRFe<"fidbr", 0xB35F, FP64,  FP64>;
-def FIXBR : BinaryRRFe<"fixbr", 0xB347, FP128, FP128>;
+let Uses = [FPC], mayAccessMemory = 1 in {
+  def FIEBR : BinaryRRFe<"fiebr", 0xB357, FP32,  FP32>;
+  def FIDBR : BinaryRRFe<"fidbr", 0xB35F, FP64,  FP64>;
+  def FIXBR : BinaryRRFe<"fixbr", 0xB347, FP128, FP128>;
+}
 
 // frint rounds according to the current mode (modifier 0) and detects
 // inexact conditions.
-def : Pat<(frint FP32:$src),  (FIEBR 0, FP32:$src)>;
-def : Pat<(frint FP64:$src),  (FIDBR 0, FP64:$src)>;
-def : Pat<(frint FP128:$src), (FIXBR 0, FP128:$src)>;
+def : Pat<(any_frint FP32:$src),  (FIEBR 0, FP32:$src)>;
+def : Pat<(any_frint FP64:$src),  (FIDBR 0, FP64:$src)>;
+def : Pat<(any_frint FP128:$src), (FIXBR 0, FP128:$src)>;
 
 let Predicates = [FeatureFPExtension] in {
   // Extended forms of the FIxBR instructions.  M4 can be set to 4
   // to suppress detection of inexact conditions.
-  def FIEBRA : TernaryRRFe<"fiebra", 0xB357, FP32,  FP32>;
-  def FIDBRA : TernaryRRFe<"fidbra", 0xB35F, FP64,  FP64>;
-  def FIXBRA : TernaryRRFe<"fixbra", 0xB347, FP128, FP128>;
+  let Uses = [FPC], mayAccessMemory = 1 in {
+    def FIEBRA : TernaryRRFe<"fiebra", 0xB357, FP32,  FP32>;
+    def FIDBRA : TernaryRRFe<"fidbra", 0xB35F, FP64,  FP64>;
+    def FIXBRA : TernaryRRFe<"fixbra", 0xB347, FP128, FP128>;
+  }
 
   // fnearbyint is like frint but does not detect inexact conditions.
-  def : Pat<(fnearbyint FP32:$src),  (FIEBRA 0, FP32:$src,  4)>;
-  def : Pat<(fnearbyint FP64:$src),  (FIDBRA 0, FP64:$src,  4)>;
-  def : Pat<(fnearbyint FP128:$src), (FIXBRA 0, FP128:$src, 4)>;
+  def : Pat<(any_fnearbyint FP32:$src),  (FIEBRA 0, FP32:$src,  4)>;
+  def : Pat<(any_fnearbyint FP64:$src),  (FIDBRA 0, FP64:$src,  4)>;
+  def : Pat<(any_fnearbyint FP128:$src), (FIXBRA 0, FP128:$src, 4)>;
 
   // floor is no longer allowed to raise an inexact condition,
   // so restrict it to the cases where the condition can be suppressed.
   // Mode 7 is round towards -inf.
-  def : Pat<(ffloor FP32:$src),  (FIEBRA 7, FP32:$src,  4)>;
-  def : Pat<(ffloor FP64:$src),  (FIDBRA 7, FP64:$src,  4)>;
-  def : Pat<(ffloor FP128:$src), (FIXBRA 7, FP128:$src, 4)>;
+  def : Pat<(any_ffloor FP32:$src),  (FIEBRA 7, FP32:$src,  4)>;
+  def : Pat<(any_ffloor FP64:$src),  (FIDBRA 7, FP64:$src,  4)>;
+  def : Pat<(any_ffloor FP128:$src), (FIXBRA 7, FP128:$src, 4)>;
 
   // Same idea for ceil, where mode 6 is round towards +inf.
-  def : Pat<(fceil FP32:$src),  (FIEBRA 6, FP32:$src,  4)>;
-  def : Pat<(fceil FP64:$src),  (FIDBRA 6, FP64:$src,  4)>;
-  def : Pat<(fceil FP128:$src), (FIXBRA 6, FP128:$src, 4)>;
+  def : Pat<(any_fceil FP32:$src),  (FIEBRA 6, FP32:$src,  4)>;
+  def : Pat<(any_fceil FP64:$src),  (FIDBRA 6, FP64:$src,  4)>;
+  def : Pat<(any_fceil FP128:$src), (FIXBRA 6, FP128:$src, 4)>;
 
   // Same idea for trunc, where mode 5 is round towards zero.
-  def : Pat<(ftrunc FP32:$src),  (FIEBRA 5, FP32:$src,  4)>;
-  def : Pat<(ftrunc FP64:$src),  (FIDBRA 5, FP64:$src,  4)>;
-  def : Pat<(ftrunc FP128:$src), (FIXBRA 5, FP128:$src, 4)>;
+  def : Pat<(any_ftrunc FP32:$src),  (FIEBRA 5, FP32:$src,  4)>;
+  def : Pat<(any_ftrunc FP64:$src),  (FIDBRA 5, FP64:$src,  4)>;
+  def : Pat<(any_ftrunc FP128:$src), (FIXBRA 5, FP128:$src, 4)>;
 
   // Same idea for round, where mode 1 is round towards nearest with
   // ties away from zero.
-  def : Pat<(fround FP32:$src),  (FIEBRA 1, FP32:$src,  4)>;
-  def : Pat<(fround FP64:$src),  (FIDBRA 1, FP64:$src,  4)>;
-  def : Pat<(fround FP128:$src), (FIXBRA 1, FP128:$src, 4)>;
+  def : Pat<(any_fround FP32:$src),  (FIEBRA 1, FP32:$src,  4)>;
+  def : Pat<(any_fround FP64:$src),  (FIDBRA 1, FP64:$src,  4)>;
+  def : Pat<(any_fround FP128:$src), (FIXBRA 1, FP128:$src, 4)>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -413,87 +429,102 @@
 //===----------------------------------------------------------------------===//
 
 // Addition.
-let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
+let Uses = [FPC], mayAccessMemory = 1,
+    Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
   let isCommutable = 1 in {
-    def AEBR : BinaryRRE<"aebr", 0xB30A, fadd, FP32,  FP32>;
-    def ADBR : BinaryRRE<"adbr", 0xB31A, fadd, FP64,  FP64>;
-    def AXBR : BinaryRRE<"axbr", 0xB34A, fadd, FP128, FP128>;
+    def AEBR : BinaryRRE<"aebr", 0xB30A, any_fadd, FP32,  FP32>;
+    def ADBR : BinaryRRE<"adbr", 0xB31A, any_fadd, FP64,  FP64>;
+    def AXBR : BinaryRRE<"axbr", 0xB34A, any_fadd, FP128, FP128>;
   }
-  def AEB : BinaryRXE<"aeb", 0xED0A, fadd, FP32, load, 4>;
-  def ADB : BinaryRXE<"adb", 0xED1A, fadd, FP64, load, 8>;
+  def AEB : BinaryRXE<"aeb", 0xED0A, any_fadd, FP32, load, 4>;
+  def ADB : BinaryRXE<"adb", 0xED1A, any_fadd, FP64, load, 8>;
 }
 
 // Subtraction.
-let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
-  def SEBR : BinaryRRE<"sebr", 0xB30B, fsub, FP32,  FP32>;
-  def SDBR : BinaryRRE<"sdbr", 0xB31B, fsub, FP64,  FP64>;
-  def SXBR : BinaryRRE<"sxbr", 0xB34B, fsub, FP128, FP128>;
+let Uses = [FPC], mayAccessMemory = 1,
+    Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
+  def SEBR : BinaryRRE<"sebr", 0xB30B, any_fsub, FP32,  FP32>;
+  def SDBR : BinaryRRE<"sdbr", 0xB31B, any_fsub, FP64,  FP64>;
+  def SXBR : BinaryRRE<"sxbr", 0xB34B, any_fsub, FP128, FP128>;
 
-  def SEB : BinaryRXE<"seb",  0xED0B, fsub, FP32, load, 4>;
-  def SDB : BinaryRXE<"sdb",  0xED1B, fsub, FP64, load, 8>;
+  def SEB : BinaryRXE<"seb",  0xED0B, any_fsub, FP32, load, 4>;
+  def SDB : BinaryRXE<"sdb",  0xED1B, any_fsub, FP64, load, 8>;
 }
 
 // Multiplication.
-let isCommutable = 1 in {
-  def MEEBR : BinaryRRE<"meebr", 0xB317, fmul, FP32,  FP32>;
-  def MDBR  : BinaryRRE<"mdbr",  0xB31C, fmul, FP64,  FP64>;
-  def MXBR  : BinaryRRE<"mxbr",  0xB34C, fmul, FP128, FP128>;
+let Uses = [FPC], mayAccessMemory = 1 in {
+  let isCommutable = 1 in {
+    def MEEBR : BinaryRRE<"meebr", 0xB317, any_fmul, FP32,  FP32>;
+    def MDBR  : BinaryRRE<"mdbr",  0xB31C, any_fmul, FP64,  FP64>;
+    def MXBR  : BinaryRRE<"mxbr",  0xB34C, any_fmul, FP128, FP128>;
+  }
+  def MEEB : BinaryRXE<"meeb", 0xED17, any_fmul, FP32, load, 4>;
+  def MDB  : BinaryRXE<"mdb",  0xED1C, any_fmul, FP64, load, 8>;
 }
-def MEEB : BinaryRXE<"meeb", 0xED17, fmul, FP32, load, 4>;
-def MDB  : BinaryRXE<"mdb",  0xED1C, fmul, FP64, load, 8>;
 
 // f64 multiplication of two FP32 registers.
-def MDEBR : BinaryRRE<"mdebr", 0xB30C, null_frag, FP64, FP32>;
-def : Pat<(fmul (f64 (fpextend FP32:$src1)), (f64 (fpextend FP32:$src2))),
+let Uses = [FPC], mayAccessMemory = 1 in
+  def MDEBR : BinaryRRE<"mdebr", 0xB30C, null_frag, FP64, FP32>;
+def : Pat<(any_fmul (f64 (fpextend FP32:$src1)), (f64 (fpextend FP32:$src2))),
           (MDEBR (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
                                 FP32:$src1, subreg_h32), FP32:$src2)>;
 
 // f64 multiplication of an FP32 register and an f32 memory.
-def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load, 4>;
-def : Pat<(fmul (f64 (fpextend FP32:$src1)),
-                (f64 (extloadf32 bdxaddr12only:$addr))),
+let Uses = [FPC], mayAccessMemory = 1 in
+  def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load, 4>;
+def : Pat<(any_fmul (f64 (fpextend FP32:$src1)),
+                    (f64 (extloadf32 bdxaddr12only:$addr))),
           (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_h32),
                 bdxaddr12only:$addr)>;
 
 // f128 multiplication of two FP64 registers.
-def MXDBR : BinaryRRE<"mxdbr", 0xB307, null_frag, FP128, FP64>;
+let Uses = [FPC], mayAccessMemory = 1 in
+  def MXDBR : BinaryRRE<"mxdbr", 0xB307, null_frag, FP128, FP64>;
 let Predicates = [FeatureNoVectorEnhancements1] in
-  def : Pat<(fmul (f128 (fpextend FP64:$src1)), (f128 (fpextend FP64:$src2))),
+  def : Pat<(any_fmul (f128 (fpextend FP64:$src1)),
+                      (f128 (fpextend FP64:$src2))),
             (MXDBR (INSERT_SUBREG (f128 (IMPLICIT_DEF)),
                                   FP64:$src1, subreg_h64), FP64:$src2)>;
 
 // f128 multiplication of an FP64 register and an f64 memory.
-def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load, 8>;
+let Uses = [FPC], mayAccessMemory = 1 in
+  def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load, 8>;
 let Predicates = [FeatureNoVectorEnhancements1] in
-  def : Pat<(fmul (f128 (fpextend FP64:$src1)),
-                  (f128 (extloadf64 bdxaddr12only:$addr))),
+  def : Pat<(any_fmul (f128 (fpextend FP64:$src1)),
+                      (f128 (extloadf64 bdxaddr12only:$addr))),
             (MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_h64),
                   bdxaddr12only:$addr)>;
 
 // Fused multiply-add.
-def MAEBR : TernaryRRD<"maebr", 0xB30E, z_fma, FP32, FP32>;
-def MADBR : TernaryRRD<"madbr", 0xB31E, z_fma, FP64, FP64>;
+let Uses = [FPC], mayAccessMemory = 1 in {
+  def MAEBR : TernaryRRD<"maebr", 0xB30E, z_any_fma, FP32, FP32>;
+  def MADBR : TernaryRRD<"madbr", 0xB31E, z_any_fma, FP64, FP64>;
 
-def MAEB : TernaryRXF<"maeb", 0xED0E, z_fma, FP32, FP32, load, 4>;
-def MADB : TernaryRXF<"madb", 0xED1E, z_fma, FP64, FP64, load, 8>;
+  def MAEB : TernaryRXF<"maeb", 0xED0E, z_any_fma, FP32, FP32, load, 4>;
+  def MADB : TernaryRXF<"madb", 0xED1E, z_any_fma, FP64, FP64, load, 8>;
+}
 
 // Fused multiply-subtract.
-def MSEBR : TernaryRRD<"msebr", 0xB30F, z_fms, FP32, FP32>;
-def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_fms, FP64, FP64>;
+let Uses = [FPC], mayAccessMemory = 1 in {
+  def MSEBR : TernaryRRD<"msebr", 0xB30F, z_any_fms, FP32, FP32>;
+  def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_any_fms, FP64, FP64>;
 
-def MSEB : TernaryRXF<"mseb", 0xED0F, z_fms, FP32, FP32, load, 4>;
-def MSDB : TernaryRXF<"msdb", 0xED1F, z_fms, FP64, FP64, load, 8>;
+  def MSEB : TernaryRXF<"mseb", 0xED0F, z_any_fms, FP32, FP32, load, 4>;
+  def MSDB : TernaryRXF<"msdb", 0xED1F, z_any_fms, FP64, FP64, load, 8>;
+}
 
 // Division.
-def DEBR : BinaryRRE<"debr", 0xB30D, fdiv, FP32,  FP32>;
-def DDBR : BinaryRRE<"ddbr", 0xB31D, fdiv, FP64,  FP64>;
-def DXBR : BinaryRRE<"dxbr", 0xB34D, fdiv, FP128, FP128>;
+let Uses = [FPC], mayAccessMemory = 1 in {
+  def DEBR : BinaryRRE<"debr", 0xB30D, any_fdiv, FP32,  FP32>;
+  def DDBR : BinaryRRE<"ddbr", 0xB31D, any_fdiv, FP64,  FP64>;
+  def DXBR : BinaryRRE<"dxbr", 0xB34D, any_fdiv, FP128, FP128>;
 
-def DEB : BinaryRXE<"deb", 0xED0D, fdiv, FP32, load, 4>;
-def DDB : BinaryRXE<"ddb", 0xED1D, fdiv, FP64, load, 8>;
+  def DEB : BinaryRXE<"deb", 0xED0D, any_fdiv, FP32, load, 4>;
+  def DDB : BinaryRXE<"ddb", 0xED1D, any_fdiv, FP64, load, 8>;
+}
 
 // Divide to integer.
-let Defs = [CC] in {
+let Uses = [FPC], Defs = [CC] in {
   def DIEBR : TernaryRRFb<"diebr", 0xB353, FP32, FP32, FP32>;
   def DIDBR : TernaryRRFb<"didbr", 0xB35B, FP64, FP64, FP64>;
 }
@@ -502,7 +533,7 @@
 // Comparisons
 //===----------------------------------------------------------------------===//
 
-let Defs = [CC], CCValues = 0xF in {
+let Uses = [FPC], Defs = [CC], CCValues = 0xF in {
   def CEBR : CompareRRE<"cebr", 0xB309, z_fcmp, FP32,  FP32>;
   def CDBR : CompareRRE<"cdbr", 0xB319, z_fcmp, FP64,  FP64>;
   def CXBR : CompareRRE<"cxbr", 0xB349, z_fcmp, FP128, FP128>;
@@ -532,20 +563,28 @@
 let hasSideEffects = 1 in {
   let mayLoad = 1, mayStore = 1 in {
     // TODO: EFPC and SFPC do not touch memory at all
-    def EFPC  : InherentRRE<"efpc", 0xB38C, GR32, int_s390_efpc>;
-    def STFPC : StoreInherentS<"stfpc", 0xB29C, storei<int_s390_efpc>, 4>;
-
-    def SFPC : SideEffectUnaryRRE<"sfpc", 0xB384, GR32, int_s390_sfpc>;
-    def LFPC : SideEffectUnaryS<"lfpc", 0xB29D, loadu<int_s390_sfpc>, 4>;
+    let Uses = [FPC] in {
+      def EFPC  : InherentRRE<"efpc", 0xB38C, GR32, int_s390_efpc>;
+      def STFPC : StoreInherentS<"stfpc", 0xB29C, storei<int_s390_efpc>, 4>;
+    }
+
+    let Defs = [FPC] in {
+      def SFPC : SideEffectUnaryRRE<"sfpc", 0xB384, GR32, int_s390_sfpc>;
+      def LFPC : SideEffectUnaryS<"lfpc", 0xB29D, loadu<int_s390_sfpc>, 4>;
+    }
   }
 
-  def SFASR : SideEffectUnaryRRE<"sfasr", 0xB385, GR32, null_frag>;
-  def LFAS  : SideEffectUnaryS<"lfas", 0xB2BD, null_frag, 4>;
+  let Defs = [FPC] in {
+    def SFASR : SideEffectUnaryRRE<"sfasr", 0xB385, GR32, null_frag>;
+    def LFAS  : SideEffectUnaryS<"lfas", 0xB2BD, null_frag, 4>;
+  }
 
-  def SRNMB : SideEffectAddressS<"srnmb", 0xB2B8, null_frag, shift12only>,
-              Requires<[FeatureFPExtension]>;
-  def SRNM  : SideEffectAddressS<"srnm", 0xB299, null_frag, shift12only>;
-  def SRNMT : SideEffectAddressS<"srnmt", 0xB2B9, null_frag, shift12only>;
+  let Uses = [FPC], Defs = [FPC] in {
+    def SRNMB : SideEffectAddressS<"srnmb", 0xB2B8, null_frag, shift12only>,
+                Requires<[FeatureFPExtension]>;
+    def SRNM  : SideEffectAddressS<"srnm", 0xB299, null_frag, shift12only>;
+    def SRNMT : SideEffectAddressS<"srnmt", 0xB2B9, null_frag, shift12only>;
+  }
 }
 
 //===----------------------------------------------------------------------===//
Index: lib/Target/SystemZ/SystemZInstrVector.td
===================================================================
--- lib/Target/SystemZ/SystemZInstrVector.td
+++ lib/Target/SystemZ/SystemZInstrVector.td
@@ -925,104 +925,128 @@
 // See comments in SystemZInstrFP.td for the suppression flags and
 // rounding modes.
 multiclass VectorRounding<Instruction insn, TypedReg tr> {
-  def : FPConversion<insn, frint,      tr, tr, 0, 0>;
-  def : FPConversion<insn, fnearbyint, tr, tr, 4, 0>;
-  def : FPConversion<insn, ffloor,     tr, tr, 4, 7>;
-  def : FPConversion<insn, fceil,      tr, tr, 4, 6>;
-  def : FPConversion<insn, ftrunc,     tr, tr, 4, 5>;
-  def : FPConversion<insn, fround,     tr, tr, 4, 1>;
+  def : FPConversion<insn, any_frint,      tr, tr, 0, 0>;
+  def : FPConversion<insn, any_fnearbyint, tr, tr, 4, 0>;
+  def : FPConversion<insn, any_ffloor,     tr, tr, 4, 7>;
+  def : FPConversion<insn, any_fceil,      tr, tr, 4, 6>;
+  def : FPConversion<insn, any_ftrunc,     tr, tr, 4, 5>;
+  def : FPConversion<insn, any_fround,     tr, tr, 4, 1>;
 }
 
 let Predicates = [FeatureVector] in {
   // Add.
-  def VFA   : BinaryVRRcFloatGeneric<"vfa", 0xE7E3>;
-  def VFADB : BinaryVRRc<"vfadb", 0xE7E3, fadd, v128db, v128db, 3, 0>;
-  def WFADB : BinaryVRRc<"wfadb", 0xE7E3, fadd, v64db, v64db, 3, 8>;
-  let Predicates = [FeatureVectorEnhancements1] in {
-    def VFASB : BinaryVRRc<"vfasb", 0xE7E3, fadd, v128sb, v128sb, 2, 0>;
-    def WFASB : BinaryVRRc<"wfasb", 0xE7E3, fadd, v32sb, v32sb, 2, 8>;
-    def WFAXB : BinaryVRRc<"wfaxb", 0xE7E3, fadd, v128xb, v128xb, 4, 8>;
+  let Uses = [FPC], mayAccessMemory = 1 in {
+    def VFA   : BinaryVRRcFloatGeneric<"vfa", 0xE7E3>;
+    def VFADB : BinaryVRRc<"vfadb", 0xE7E3, any_fadd, v128db, v128db, 3, 0>;
+    def WFADB : BinaryVRRc<"wfadb", 0xE7E3, any_fadd, v64db, v64db, 3, 8>;
+    let Predicates = [FeatureVectorEnhancements1] in {
+      def VFASB : BinaryVRRc<"vfasb", 0xE7E3, any_fadd, v128sb, v128sb, 2, 0>;
+      def WFASB : BinaryVRRc<"wfasb", 0xE7E3, any_fadd, v32sb, v32sb, 2, 8>;
+      def WFAXB : BinaryVRRc<"wfaxb", 0xE7E3, any_fadd, v128xb, v128xb, 4, 8>;
+    }
   }
 
   // Convert from fixed 64-bit.
-  def VCDG  : TernaryVRRaFloatGeneric<"vcdg", 0xE7C3>;
-  def VCDGB : TernaryVRRa<"vcdgb", 0xE7C3, null_frag, v128db, v128g, 3, 0>;
-  def WCDGB : TernaryVRRa<"wcdgb", 0xE7C3, null_frag, v64db, v64g, 3, 8>;
+  let Uses = [FPC] in {
+    def VCDG  : TernaryVRRaFloatGeneric<"vcdg", 0xE7C3>;
+    def VCDGB : TernaryVRRa<"vcdgb", 0xE7C3, null_frag, v128db, v128g, 3, 0>;
+    def WCDGB : TernaryVRRa<"wcdgb", 0xE7C3, null_frag, v64db, v64g, 3, 8>;
+  }
   def : FPConversion<VCDGB, sint_to_fp, v128db, v128g, 0, 0>;
 
   // Convert from logical 64-bit.
-  def VCDLG  : TernaryVRRaFloatGeneric<"vcdlg", 0xE7C1>;
-  def VCDLGB : TernaryVRRa<"vcdlgb", 0xE7C1, null_frag, v128db, v128g, 3, 0>;
-  def WCDLGB : TernaryVRRa<"wcdlgb", 0xE7C1, null_frag, v64db, v64g, 3, 8>;
+  let Uses = [FPC] in {
+    def VCDLG  : TernaryVRRaFloatGeneric<"vcdlg", 0xE7C1>;
+    def VCDLGB : TernaryVRRa<"vcdlgb", 0xE7C1, null_frag, v128db, v128g, 3, 0>;
+    def WCDLGB : TernaryVRRa<"wcdlgb", 0xE7C1, null_frag, v64db, v64g, 3, 8>;
+  }
   def : FPConversion<VCDLGB, uint_to_fp, v128db, v128g, 0, 0>;
 
   // Convert to fixed 64-bit.
-  def VCGD  : TernaryVRRaFloatGeneric<"vcgd", 0xE7C2>;
-  def VCGDB : TernaryVRRa<"vcgdb", 0xE7C2, null_frag, v128g, v128db, 3, 0>;
-  def WCGDB : TernaryVRRa<"wcgdb", 0xE7C2, null_frag, v64g, v64db, 3, 8>;
+  let Uses = [FPC] in {
+    def VCGD  : TernaryVRRaFloatGeneric<"vcgd", 0xE7C2>;
+    def VCGDB : TernaryVRRa<"vcgdb", 0xE7C2, null_frag, v128g, v128db, 3, 0>;
+    def WCGDB : TernaryVRRa<"wcgdb", 0xE7C2, null_frag, v64g, v64db, 3, 8>;
+  }
   // Rounding mode should agree with SystemZInstrFP.td.
   def : FPConversion<VCGDB, fp_to_sint, v128g, v128db, 0, 5>;
 
   // Convert to logical 64-bit.
-  def VCLGD  : TernaryVRRaFloatGeneric<"vclgd", 0xE7C0>;
-  def VCLGDB : TernaryVRRa<"vclgdb", 0xE7C0, null_frag, v128g, v128db, 3, 0>;
-  def WCLGDB : TernaryVRRa<"wclgdb", 0xE7C0, null_frag, v64g, v64db, 3, 8>;
+  let Uses = [FPC] in {
+    def VCLGD  : TernaryVRRaFloatGeneric<"vclgd", 0xE7C0>;
+    def VCLGDB : TernaryVRRa<"vclgdb", 0xE7C0, null_frag, v128g, v128db, 3, 0>;
+    def WCLGDB : TernaryVRRa<"wclgdb", 0xE7C0, null_frag, v64g, v64db, 3, 8>;
+  }
   // Rounding mode should agree with SystemZInstrFP.td.
   def : FPConversion<VCLGDB, fp_to_uint, v128g, v128db, 0, 5>;
 
   // Divide.
-  def VFD   : BinaryVRRcFloatGeneric<"vfd", 0xE7E5>;
-  def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, fdiv, v128db, v128db, 3, 0>;
-  def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, fdiv, v64db, v64db, 3, 8>;
-  let Predicates = [FeatureVectorEnhancements1] in {
-    def VFDSB : BinaryVRRc<"vfdsb", 0xE7E5, fdiv, v128sb, v128sb, 2, 0>;
-    def WFDSB : BinaryVRRc<"wfdsb", 0xE7E5, fdiv, v32sb, v32sb, 2, 8>;
-    def WFDXB : BinaryVRRc<"wfdxb", 0xE7E5, fdiv, v128xb, v128xb, 4, 8>;
+  let Uses = [FPC], mayAccessMemory = 1 in {
+    def VFD   : BinaryVRRcFloatGeneric<"vfd", 0xE7E5>;
+    def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, any_fdiv, v128db, v128db, 3, 0>;
+    def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, any_fdiv, v64db, v64db, 3, 8>;
+    let Predicates = [FeatureVectorEnhancements1] in {
+      def VFDSB : BinaryVRRc<"vfdsb", 0xE7E5, any_fdiv, v128sb, v128sb, 2, 0>;
+      def WFDSB : BinaryVRRc<"wfdsb", 0xE7E5, any_fdiv, v32sb, v32sb, 2, 8>;
+      def WFDXB : BinaryVRRc<"wfdxb", 0xE7E5, any_fdiv, v128xb, v128xb, 4, 8>;
+    }
   }
 
   // Load FP integer.
-  def VFI   : TernaryVRRaFloatGeneric<"vfi", 0xE7C7>;
-  def VFIDB : TernaryVRRa<"vfidb", 0xE7C7, int_s390_vfidb, v128db, v128db, 3, 0>;
-  def WFIDB : TernaryVRRa<"wfidb", 0xE7C7, null_frag, v64db, v64db, 3, 8>;
+  let Uses = [FPC], mayAccessMemory = 1 in {
+    def VFI   : TernaryVRRaFloatGeneric<"vfi", 0xE7C7>;
+    def VFIDB : TernaryVRRa<"vfidb", 0xE7C7, int_s390_vfidb, v128db, v128db, 3, 0>;
+    def WFIDB : TernaryVRRa<"wfidb", 0xE7C7, null_frag, v64db, v64db, 3, 8>;
+  }
   defm : VectorRounding<VFIDB, v128db>;
   defm : VectorRounding<WFIDB, v64db>;
   let Predicates = [FeatureVectorEnhancements1] in {
-    def VFISB : TernaryVRRa<"vfisb", 0xE7C7, int_s390_vfisb, v128sb, v128sb, 2, 0>;
-    def WFISB : TernaryVRRa<"wfisb", 0xE7C7, null_frag, v32sb, v32sb, 2, 8>;
-    def WFIXB : TernaryVRRa<"wfixb", 0xE7C7, null_frag, v128xb, v128xb, 4, 8>;
+    let Uses = [FPC], mayAccessMemory = 1 in {
+      def VFISB : TernaryVRRa<"vfisb", 0xE7C7, int_s390_vfisb, v128sb, v128sb, 2, 0>;
+      def WFISB : TernaryVRRa<"wfisb", 0xE7C7, null_frag, v32sb, v32sb, 2, 8>;
+      def WFIXB : TernaryVRRa<"wfixb", 0xE7C7, null_frag, v128xb, v128xb, 4, 8>;
+    }
     defm : VectorRounding<VFISB, v128sb>;
     defm : VectorRounding<WFISB, v32sb>;
     defm : VectorRounding<WFIXB, v128xb>;
   }
 
   // Load lengthened.
-  def VLDE  : UnaryVRRaFloatGeneric<"vlde", 0xE7C4>;
-  def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128sb, 2, 0>;
-  def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, fpextend, v64db, v32sb, 2, 8>;
+  let Uses = [FPC] in {
+    def VLDE  : UnaryVRRaFloatGeneric<"vlde", 0xE7C4>;
+    def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128sb, 2, 0>;
+    def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, fpextend, v64db, v32sb, 2, 8>;
+  }
   let Predicates = [FeatureVectorEnhancements1] in {
-    let isAsmParserOnly = 1 in {
-      def VFLL  : UnaryVRRaFloatGeneric<"vfll", 0xE7C4>;
-      def VFLLS : UnaryVRRa<"vflls", 0xE7C4, null_frag, v128db, v128sb, 2, 0>;
-      def WFLLS : UnaryVRRa<"wflls", 0xE7C4, null_frag, v64db, v32sb, 2, 8>;
+    let Uses = [FPC] in {
+      let isAsmParserOnly = 1 in {
+        def VFLL  : UnaryVRRaFloatGeneric<"vfll", 0xE7C4>;
+        def VFLLS : UnaryVRRa<"vflls", 0xE7C4, null_frag, v128db, v128sb, 2, 0>;
+        def WFLLS : UnaryVRRa<"wflls", 0xE7C4, null_frag, v64db, v32sb, 2, 8>;
+      }
+      def WFLLD : UnaryVRRa<"wflld", 0xE7C4, fpextend, v128xb, v64db, 3, 8>;
     }
-    def WFLLD : UnaryVRRa<"wflld", 0xE7C4, fpextend, v128xb, v64db, 3, 8>;
     def : Pat<(f128 (fpextend (f32 VR32:$src))),
               (WFLLD (WLDEB VR32:$src))>;
   }
 
   // Load rounded.
-  def VLED  : TernaryVRRaFloatGeneric<"vled", 0xE7C5>;
-  def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128sb, v128db, 3, 0>;
-  def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32sb, v64db, 3, 8>;
+  let Uses = [FPC] in {
+    def VLED  : TernaryVRRaFloatGeneric<"vled", 0xE7C5>;
+    def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128sb, v128db, 3, 0>;
+    def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32sb, v64db, 3, 8>;
+  }
   def : Pat<(v4f32 (z_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>;
   def : FPConversion<WLEDB, fpround, v32sb, v64db, 0, 0>;
   let Predicates = [FeatureVectorEnhancements1] in {
-    let isAsmParserOnly = 1 in {
-      def VFLR  : TernaryVRRaFloatGeneric<"vflr", 0xE7C5>;
-      def VFLRD : TernaryVRRa<"vflrd", 0xE7C5, null_frag, v128sb, v128db, 3, 0>;
-      def WFLRD : TernaryVRRa<"wflrd", 0xE7C5, null_frag, v32sb, v64db, 3, 8>;
+    let Uses = [FPC] in {
+      let isAsmParserOnly = 1 in {
+        def VFLR  : TernaryVRRaFloatGeneric<"vflr", 0xE7C5>;
+        def VFLRD : TernaryVRRa<"vflrd", 0xE7C5, null_frag, v128sb, v128db, 3, 0>;
+        def WFLRD : TernaryVRRa<"wflrd", 0xE7C5, null_frag, v32sb, v64db, 3, 8>;
+      }
+      def WFLRX : TernaryVRRa<"wflrx", 0xE7C5, null_frag, v64db, v128xb, 4, 8>;
     }
-    def WFLRX : TernaryVRRa<"wflrx", 0xE7C5, null_frag, v64db, v128xb, 4, 8>;
     def : FPConversion<WFLRX, fpround, v64db, v128xb, 0, 0>;
     def : Pat<(f32 (fpround (f128 VR128:$src))),
               (WLEDB (WFLRX VR128:$src, 0, 3), 0, 0)>;
@@ -1030,21 +1054,23 @@
 
   // Maximum.
   multiclass VectorMax<Instruction insn, TypedReg tr> {
-    def : FPMinMax<insn, fmaxnum, tr, 4>;
+    def : FPMinMax<insn, any_fmaxnum, tr, 4>;
     def : FPMinMax<insn, fmaximum, tr, 1>;
   }
   let Predicates = [FeatureVectorEnhancements1] in {
-    def VFMAX   : TernaryVRRcFloatGeneric<"vfmax", 0xE7EF>;
-    def VFMAXDB : TernaryVRRcFloat<"vfmaxdb", 0xE7EF, int_s390_vfmaxdb,
-                                   v128db, v128db, 3, 0>;
-    def WFMAXDB : TernaryVRRcFloat<"wfmaxdb", 0xE7EF, null_frag,
-                                   v64db, v64db, 3, 8>;
-    def VFMAXSB : TernaryVRRcFloat<"vfmaxsb", 0xE7EF, int_s390_vfmaxsb,
-                                   v128sb, v128sb, 2, 0>;
-    def WFMAXSB : TernaryVRRcFloat<"wfmaxsb", 0xE7EF, null_frag,
-                                   v32sb, v32sb, 2, 8>;
-    def WFMAXXB : TernaryVRRcFloat<"wfmaxxb", 0xE7EF, null_frag,
-                                   v128xb, v128xb, 4, 8>;
+    let Uses = [FPC] in {
+      def VFMAX   : TernaryVRRcFloatGeneric<"vfmax", 0xE7EF>;
+      def VFMAXDB : TernaryVRRcFloat<"vfmaxdb", 0xE7EF, int_s390_vfmaxdb,
+                                     v128db, v128db, 3, 0>;
+      def WFMAXDB : TernaryVRRcFloat<"wfmaxdb", 0xE7EF, null_frag,
+                                     v64db, v64db, 3, 8>;
+      def VFMAXSB : TernaryVRRcFloat<"vfmaxsb", 0xE7EF, int_s390_vfmaxsb,
+                                     v128sb, v128sb, 2, 0>;
+      def WFMAXSB : TernaryVRRcFloat<"wfmaxsb", 0xE7EF, null_frag,
+                                     v32sb, v32sb, 2, 8>;
+      def WFMAXXB : TernaryVRRcFloat<"wfmaxxb", 0xE7EF, null_frag,
+                                     v128xb, v128xb, 4, 8>;
+    }
     defm : VectorMax<VFMAXDB, v128db>;
     defm : VectorMax<WFMAXDB, v64db>;
     defm : VectorMax<VFMAXSB, v128sb>;
@@ -1054,21 +1080,23 @@
 
   // Minimum.
   multiclass VectorMin<Instruction insn, TypedReg tr> {
-    def : FPMinMax<insn, fminnum, tr, 4>;
+    def : FPMinMax<insn, any_fminnum, tr, 4>;
     def : FPMinMax<insn, fminimum, tr, 1>;
   }
   let Predicates = [FeatureVectorEnhancements1] in {
-    def VFMIN   : TernaryVRRcFloatGeneric<"vfmin", 0xE7EE>;
-    def VFMINDB : TernaryVRRcFloat<"vfmindb", 0xE7EE, int_s390_vfmindb,
-                                   v128db, v128db, 3, 0>;
-    def WFMINDB : TernaryVRRcFloat<"wfmindb", 0xE7EE, null_frag,
-                                   v64db, v64db, 3, 8>;
-    def VFMINSB : TernaryVRRcFloat<"vfminsb", 0xE7EE, int_s390_vfminsb,
-                                   v128sb, v128sb, 2, 0>;
-    def WFMINSB : TernaryVRRcFloat<"wfminsb", 0xE7EE, null_frag,
-                                   v32sb, v32sb, 2, 8>;
-    def WFMINXB : TernaryVRRcFloat<"wfminxb", 0xE7EE, null_frag,
-                                   v128xb, v128xb, 4, 8>;
+    let Uses = [FPC] in {
+      def VFMIN   : TernaryVRRcFloatGeneric<"vfmin", 0xE7EE>;
+      def VFMINDB : TernaryVRRcFloat<"vfmindb", 0xE7EE, int_s390_vfmindb,
+                                     v128db, v128db, 3, 0>;
+      def WFMINDB : TernaryVRRcFloat<"wfmindb", 0xE7EE, null_frag,
+                                     v64db, v64db, 3, 8>;
+      def VFMINSB : TernaryVRRcFloat<"vfminsb", 0xE7EE, int_s390_vfminsb,
+                                     v128sb, v128sb, 2, 0>;
+      def WFMINSB : TernaryVRRcFloat<"wfminsb", 0xE7EE, null_frag,
+                                     v32sb, v32sb, 2, 8>;
+      def WFMINXB : TernaryVRRcFloat<"wfminxb", 0xE7EE, null_frag,
+                                     v128xb, v128xb, 4, 8>;
+    }
     defm : VectorMin<VFMINDB, v128db>;
     defm : VectorMin<WFMINDB, v64db>;
     defm : VectorMin<VFMINSB, v128sb>;
@@ -1077,53 +1105,61 @@
   }
 
   // Multiply.
-  def VFM   : BinaryVRRcFloatGeneric<"vfm", 0xE7E7>;
-  def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, fmul, v128db, v128db, 3, 0>;
-  def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, fmul, v64db, v64db, 3, 8>;
-  let Predicates = [FeatureVectorEnhancements1] in {
-    def VFMSB : BinaryVRRc<"vfmsb", 0xE7E7, fmul, v128sb, v128sb, 2, 0>;
-    def WFMSB : BinaryVRRc<"wfmsb", 0xE7E7, fmul, v32sb, v32sb, 2, 8>;
-    def WFMXB : BinaryVRRc<"wfmxb", 0xE7E7, fmul, v128xb, v128xb, 4, 8>;
+  let Uses = [FPC], mayAccessMemory = 1 in {
+    def VFM   : BinaryVRRcFloatGeneric<"vfm", 0xE7E7>;
+    def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, any_fmul, v128db, v128db, 3, 0>;
+    def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, any_fmul, v64db, v64db, 3, 8>;
+    let Predicates = [FeatureVectorEnhancements1] in {
+      def VFMSB : BinaryVRRc<"vfmsb", 0xE7E7, any_fmul, v128sb, v128sb, 2, 0>;
+      def WFMSB : BinaryVRRc<"wfmsb", 0xE7E7, any_fmul, v32sb, v32sb, 2, 8>;
+      def WFMXB : BinaryVRRc<"wfmxb", 0xE7E7, any_fmul, v128xb, v128xb, 4, 8>;
+    }
   }
 
   // Multiply and add.
-  def VFMA   : TernaryVRReFloatGeneric<"vfma", 0xE78F>;
-  def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, fma, v128db, v128db, 0, 3>;
-  def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, fma, v64db, v64db, 8, 3>;
-  let Predicates = [FeatureVectorEnhancements1] in {
-    def VFMASB : TernaryVRRe<"vfmasb", 0xE78F, fma, v128sb, v128sb, 0, 2>;
-    def WFMASB : TernaryVRRe<"wfmasb", 0xE78F, fma, v32sb, v32sb, 8, 2>;
-    def WFMAXB : TernaryVRRe<"wfmaxb", 0xE78F, fma, v128xb, v128xb, 8, 4>;
+  let Uses = [FPC], mayAccessMemory = 1 in {
+    def VFMA   : TernaryVRReFloatGeneric<"vfma", 0xE78F>;
+    def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, any_fma, v128db, v128db, 0, 3>;
+    def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, any_fma, v64db, v64db, 8, 3>;
+    let Predicates = [FeatureVectorEnhancements1] in {
+      def VFMASB : TernaryVRRe<"vfmasb", 0xE78F, any_fma, v128sb, v128sb, 0, 2>;
+      def WFMASB : TernaryVRRe<"wfmasb", 0xE78F, any_fma, v32sb, v32sb, 8, 2>;
+      def WFMAXB : TernaryVRRe<"wfmaxb", 0xE78F, any_fma, v128xb, v128xb, 8, 4>;
+    }
   }
 
   // Multiply and subtract.
-  def VFMS   : TernaryVRReFloatGeneric<"vfms", 0xE78E>;
-  def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, fms, v128db, v128db, 0, 3>;
-  def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, fms, v64db, v64db, 8, 3>;
-  let Predicates = [FeatureVectorEnhancements1] in {
-    def VFMSSB : TernaryVRRe<"vfmssb", 0xE78E, fms, v128sb, v128sb, 0, 2>;
-    def WFMSSB : TernaryVRRe<"wfmssb", 0xE78E, fms, v32sb, v32sb, 8, 2>;
-    def WFMSXB : TernaryVRRe<"wfmsxb", 0xE78E, fms, v128xb, v128xb, 8, 4>;
+  let Uses = [FPC], mayAccessMemory = 1 in {
+    def VFMS   : TernaryVRReFloatGeneric<"vfms", 0xE78E>;
+    def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, any_fms, v128db, v128db, 0, 3>;
+    def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, any_fms, v64db, v64db, 8, 3>;
+    let Predicates = [FeatureVectorEnhancements1] in {
+      def VFMSSB : TernaryVRRe<"vfmssb", 0xE78E, any_fms, v128sb, v128sb, 0, 2>;
+      def WFMSSB : TernaryVRRe<"wfmssb", 0xE78E, any_fms, v32sb, v32sb, 8, 2>;
+      def WFMSXB : TernaryVRRe<"wfmsxb", 0xE78E, any_fms, v128xb, v128xb, 8, 4>;
+    }
   }
 
   // Negative multiply and add.
-  let Predicates = [FeatureVectorEnhancements1] in {
+  let Uses = [FPC], mayAccessMemory = 1,
+      Predicates = [FeatureVectorEnhancements1] in {
     def VFNMA   : TernaryVRReFloatGeneric<"vfnma", 0xE79F>;
-    def VFNMADB : TernaryVRRe<"vfnmadb", 0xE79F, fnma, v128db, v128db, 0, 3>;
-    def WFNMADB : TernaryVRRe<"wfnmadb", 0xE79F, fnma, v64db, v64db, 8, 3>;
-    def VFNMASB : TernaryVRRe<"vfnmasb", 0xE79F, fnma, v128sb, v128sb, 0, 2>;
-    def WFNMASB : TernaryVRRe<"wfnmasb", 0xE79F, fnma, v32sb, v32sb, 8, 2>;
-    def WFNMAXB : TernaryVRRe<"wfnmaxb", 0xE79F, fnma, v128xb, v128xb, 8, 4>;
+    def VFNMADB : TernaryVRRe<"vfnmadb", 0xE79F, any_fnma, v128db, v128db, 0, 3>;
+    def WFNMADB : TernaryVRRe<"wfnmadb", 0xE79F, any_fnma, v64db, v64db, 8, 3>;
+    def VFNMASB : TernaryVRRe<"vfnmasb", 0xE79F, any_fnma, v128sb, v128sb, 0, 2>;
+    def WFNMASB : TernaryVRRe<"wfnmasb", 0xE79F, any_fnma, v32sb, v32sb, 8, 2>;
+    def WFNMAXB : TernaryVRRe<"wfnmaxb", 0xE79F, any_fnma, v128xb, v128xb, 8, 4>;
   }
 
   // Negative multiply and subtract.
-  let Predicates = [FeatureVectorEnhancements1] in {
+  let Uses = [FPC], mayAccessMemory = 1,
+      Predicates = [FeatureVectorEnhancements1] in {
     def VFNMS   : TernaryVRReFloatGeneric<"vfnms", 0xE79E>;
-    def VFNMSDB : TernaryVRRe<"vfnmsdb", 0xE79E, fnms, v128db, v128db, 0, 3>;
-    def WFNMSDB : TernaryVRRe<"wfnmsdb", 0xE79E, fnms, v64db, v64db, 8, 3>;
-    def VFNMSSB : TernaryVRRe<"vfnmssb", 0xE79E, fnms, v128sb, v128sb, 0, 2>;
-    def WFNMSSB : TernaryVRRe<"wfnmssb", 0xE79E, fnms, v32sb, v32sb, 8, 2>;
-    def WFNMSXB : TernaryVRRe<"wfnmsxb", 0xE79E, fnms, v128xb, v128xb, 8, 4>;
+    def VFNMSDB : TernaryVRRe<"vfnmsdb", 0xE79E, any_fnms, v128db, v128db, 0, 3>;
+    def WFNMSDB : TernaryVRRe<"wfnmsdb", 0xE79E, any_fnms, v64db, v64db, 8, 3>;
+    def VFNMSSB : TernaryVRRe<"vfnmssb", 0xE79E, any_fnms, v128sb, v128sb, 0, 2>;
+    def WFNMSSB : TernaryVRRe<"wfnmssb", 0xE79E, any_fnms, v32sb, v32sb, 8, 2>;
+    def WFNMSXB : TernaryVRRe<"wfnmsxb", 0xE79E, any_fnms, v128xb, v128xb, 8, 4>;
   }
 
   // Perform sign operation.
@@ -1164,23 +1200,27 @@
   }
 
   // Square root.
-  def VFSQ   : UnaryVRRaFloatGeneric<"vfsq", 0xE7CE>;
-  def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, fsqrt, v128db, v128db, 3, 0>;
-  def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, fsqrt, v64db, v64db, 3, 8>;
-  let Predicates = [FeatureVectorEnhancements1] in {
-    def VFSQSB : UnaryVRRa<"vfsqsb", 0xE7CE, fsqrt, v128sb, v128sb, 2, 0>;
-    def WFSQSB : UnaryVRRa<"wfsqsb", 0xE7CE, fsqrt, v32sb, v32sb, 2, 8>;
-    def WFSQXB : UnaryVRRa<"wfsqxb", 0xE7CE, fsqrt, v128xb, v128xb, 4, 8>;
+  let Uses = [FPC], mayAccessMemory = 1 in {
+    def VFSQ   : UnaryVRRaFloatGeneric<"vfsq", 0xE7CE>;
+    def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, any_fsqrt, v128db, v128db, 3, 0>;
+    def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, any_fsqrt, v64db, v64db, 3, 8>;
+    let Predicates = [FeatureVectorEnhancements1] in {
+      def VFSQSB : UnaryVRRa<"vfsqsb", 0xE7CE, any_fsqrt, v128sb, v128sb, 2, 0>;
+      def WFSQSB : UnaryVRRa<"wfsqsb", 0xE7CE, any_fsqrt, v32sb, v32sb, 2, 8>;
+      def WFSQXB : UnaryVRRa<"wfsqxb", 0xE7CE, any_fsqrt, v128xb, v128xb, 4, 8>;
+    }
   }
 
   // Subtract.
-  def VFS   : BinaryVRRcFloatGeneric<"vfs", 0xE7E2>;
-  def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, fsub, v128db, v128db, 3, 0>;
-  def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, fsub, v64db, v64db, 3, 8>;
-  let Predicates = [FeatureVectorEnhancements1] in {
-    def VFSSB : BinaryVRRc<"vfssb", 0xE7E2, fsub, v128sb, v128sb, 2, 0>;
-    def WFSSB : BinaryVRRc<"wfssb", 0xE7E2, fsub, v32sb, v32sb, 2, 8>;
-    def WFSXB : BinaryVRRc<"wfsxb", 0xE7E2, fsub, v128xb, v128xb, 4, 8>;
+  let Uses = [FPC], mayAccessMemory = 1 in {
+    def VFS   : BinaryVRRcFloatGeneric<"vfs", 0xE7E2>;
+    def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, any_fsub, v128db, v128db, 3, 0>;
+    def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, any_fsub, v64db, v64db, 3, 8>;
+    let Predicates = [FeatureVectorEnhancements1] in {
+      def VFSSB : BinaryVRRc<"vfssb", 0xE7E2, any_fsub, v128sb, v128sb, 2, 0>;
+      def WFSSB : BinaryVRRc<"wfssb", 0xE7E2, any_fsub, v32sb, v32sb, 2, 8>;
+      def WFSXB : BinaryVRRc<"wfsxb", 0xE7E2, any_fsub, v128xb, v128xb, 4, 8>;
+    }
   }
 
   // Test data class immediate.
@@ -1202,7 +1242,7 @@
 
 let Predicates = [FeatureVector] in {
   // Compare scalar.
-  let Defs = [CC] in {
+  let Uses = [FPC], Defs = [CC] in {
     def WFC   : CompareVRRaFloatGeneric<"wfc", 0xE7CB>;
     def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_fcmp, v64db, 3>;
     let Predicates = [FeatureVectorEnhancements1] in {
@@ -1212,7 +1252,7 @@
   }
 
   // Compare and signal scalar.
-  let Defs = [CC] in {
+  let Uses = [FPC], Defs = [CC] in {
     def WFK   : CompareVRRaFloatGeneric<"wfk", 0xE7CA>;
     def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, null_frag, v64db, 3>;
     let Predicates = [FeatureVectorEnhancements1] in {
@@ -1222,22 +1262,24 @@
   }
 
   // Compare equal.
-  def  VFCE   : BinaryVRRcSPairFloatGeneric<"vfce", 0xE7E8>;
-  defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, z_vfcmpe, z_vfcmpes,
-                                v128g, v128db, 3, 0>;
-  defm WFCEDB : BinaryVRRcSPair<"wfcedb", 0xE7E8, null_frag, null_frag,
-                                v64g, v64db, 3, 8>;
-  let Predicates = [FeatureVectorEnhancements1] in {
-    defm VFCESB : BinaryVRRcSPair<"vfcesb", 0xE7E8, z_vfcmpe, z_vfcmpes,
-                                  v128f, v128sb, 2, 0>;
-    defm WFCESB : BinaryVRRcSPair<"wfcesb", 0xE7E8, null_frag, null_frag,
-                                  v32f, v32sb, 2, 8>;
-    defm WFCEXB : BinaryVRRcSPair<"wfcexb", 0xE7E8, null_frag, null_frag,
-                                  v128q, v128xb, 4, 8>;
+  let Uses = [FPC] in {
+    def  VFCE   : BinaryVRRcSPairFloatGeneric<"vfce", 0xE7E8>;
+    defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, z_vfcmpe, z_vfcmpes,
+                                  v128g, v128db, 3, 0>;
+    defm WFCEDB : BinaryVRRcSPair<"wfcedb", 0xE7E8, null_frag, null_frag,
+                                  v64g, v64db, 3, 8>;
+    let Predicates = [FeatureVectorEnhancements1] in {
+      defm VFCESB : BinaryVRRcSPair<"vfcesb", 0xE7E8, z_vfcmpe, z_vfcmpes,
+                                    v128f, v128sb, 2, 0>;
+      defm WFCESB : BinaryVRRcSPair<"wfcesb", 0xE7E8, null_frag, null_frag,
+                                    v32f, v32sb, 2, 8>;
+      defm WFCEXB : BinaryVRRcSPair<"wfcexb", 0xE7E8, null_frag, null_frag,
+                                    v128q, v128xb, 4, 8>;
+    }
   }
 
   // Compare and signal equal.
-  let Predicates = [FeatureVectorEnhancements1] in {
+  let Uses = [FPC], Predicates = [FeatureVectorEnhancements1] in {
     defm VFKEDB : BinaryVRRcSPair<"vfkedb", 0xE7E8, null_frag, null_frag,
                                   v128g, v128db, 3, 4>;
     defm WFKEDB : BinaryVRRcSPair<"wfkedb", 0xE7E8, null_frag, null_frag,
@@ -1251,22 +1293,24 @@
   }
 
   // Compare high.
-  def  VFCH   : BinaryVRRcSPairFloatGeneric<"vfch", 0xE7EB>;
-  defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, z_vfcmph, z_vfcmphs,
-                                v128g, v128db, 3, 0>;
-  defm WFCHDB : BinaryVRRcSPair<"wfchdb", 0xE7EB, null_frag, null_frag,
-                                v64g, v64db, 3, 8>;
-  let Predicates = [FeatureVectorEnhancements1] in {
-    defm VFCHSB : BinaryVRRcSPair<"vfchsb", 0xE7EB, z_vfcmph, z_vfcmphs,
-                                  v128f, v128sb, 2, 0>;
-    defm WFCHSB : BinaryVRRcSPair<"wfchsb", 0xE7EB, null_frag, null_frag,
-                                  v32f, v32sb, 2, 8>;
-    defm WFCHXB : BinaryVRRcSPair<"wfchxb", 0xE7EB, null_frag, null_frag,
-                                  v128q, v128xb, 4, 8>;
+  let Uses = [FPC] in {
+    def  VFCH   : BinaryVRRcSPairFloatGeneric<"vfch", 0xE7EB>;
+    defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, z_vfcmph, z_vfcmphs,
+                                  v128g, v128db, 3, 0>;
+    defm WFCHDB : BinaryVRRcSPair<"wfchdb", 0xE7EB, null_frag, null_frag,
+                                  v64g, v64db, 3, 8>;
+    let Predicates = [FeatureVectorEnhancements1] in {
+      defm VFCHSB : BinaryVRRcSPair<"vfchsb", 0xE7EB, z_vfcmph, z_vfcmphs,
+                                    v128f, v128sb, 2, 0>;
+      defm WFCHSB : BinaryVRRcSPair<"wfchsb", 0xE7EB, null_frag, null_frag,
+                                    v32f, v32sb, 2, 8>;
+      defm WFCHXB : BinaryVRRcSPair<"wfchxb", 0xE7EB, null_frag, null_frag,
+                                    v128q, v128xb, 4, 8>;
+    }
   }
 
   // Compare and signal high.
-  let Predicates = [FeatureVectorEnhancements1] in {
+  let Uses = [FPC], Predicates = [FeatureVectorEnhancements1] in {
     defm VFKHDB : BinaryVRRcSPair<"vfkhdb", 0xE7EB, null_frag, null_frag,
                                   v128g, v128db, 3, 4>;
     defm WFKHDB : BinaryVRRcSPair<"wfkhdb", 0xE7EB, null_frag, null_frag,
@@ -1280,22 +1324,24 @@
   }
 
   // Compare high or equal.
-  def  VFCHE   : BinaryVRRcSPairFloatGeneric<"vfche", 0xE7EA>;
-  defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, z_vfcmphe, z_vfcmphes,
-                                 v128g, v128db, 3, 0>;
-  defm WFCHEDB : BinaryVRRcSPair<"wfchedb", 0xE7EA, null_frag, null_frag,
-                                 v64g, v64db, 3, 8>;
-  let Predicates = [FeatureVectorEnhancements1] in {
-    defm VFCHESB : BinaryVRRcSPair<"vfchesb", 0xE7EA, z_vfcmphe, z_vfcmphes,
-                                   v128f, v128sb, 2, 0>;
-    defm WFCHESB : BinaryVRRcSPair<"wfchesb", 0xE7EA, null_frag, null_frag,
-                                   v32f, v32sb, 2, 8>;
-    defm WFCHEXB : BinaryVRRcSPair<"wfchexb", 0xE7EA, null_frag, null_frag,
-                                   v128q, v128xb, 4, 8>;
+  let Uses = [FPC] in {
+    def  VFCHE   : BinaryVRRcSPairFloatGeneric<"vfche", 0xE7EA>;
+    defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, z_vfcmphe, z_vfcmphes,
+                                   v128g, v128db, 3, 0>;
+    defm WFCHEDB : BinaryVRRcSPair<"wfchedb", 0xE7EA, null_frag, null_frag,
+                                   v64g, v64db, 3, 8>;
+    let Predicates = [FeatureVectorEnhancements1] in {
+      defm VFCHESB : BinaryVRRcSPair<"vfchesb", 0xE7EA, z_vfcmphe, z_vfcmphes,
+                                     v128f, v128sb, 2, 0>;
+      defm WFCHESB : BinaryVRRcSPair<"wfchesb", 0xE7EA, null_frag, null_frag,
+                                     v32f, v32sb, 2, 8>;
+      defm WFCHEXB : BinaryVRRcSPair<"wfchexb", 0xE7EA, null_frag, null_frag,
+                                     v128q, v128xb, 4, 8>;
+    }
   }
 
   // Compare and signal high or equal.
-  let Predicates = [FeatureVectorEnhancements1] in {
+  let Uses = [FPC], Predicates = [FeatureVectorEnhancements1] in {
     defm VFKHEDB : BinaryVRRcSPair<"vfkhedb", 0xE7EA, null_frag, null_frag,
                                    v128g, v128db, 3, 4>;
     defm WFKHEDB : BinaryVRRcSPair<"wfkhedb", 0xE7EA, null_frag, null_frag,
Index: lib/Target/SystemZ/SystemZOperators.td
===================================================================
--- lib/Target/SystemZ/SystemZOperators.td
+++ lib/Target/SystemZ/SystemZOperators.td
@@ -663,21 +663,21 @@
                        (sub node:$src1, node:$src2)]>;
 
 // Fused multiply-subtract, using the natural operand order.
-def fms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
-                  (fma node:$src1, node:$src2, (fneg node:$src3))>;
+def any_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                      (any_fma node:$src1, node:$src2, (fneg node:$src3))>;
 
 // Fused multiply-add and multiply-subtract, but with the order of the
 // operands matching SystemZ's MA and MS instructions.
-def z_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3),
-                    (fma node:$src2, node:$src3, node:$src1)>;
-def z_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
-                    (fma node:$src2, node:$src3, (fneg node:$src1))>;
+def z_any_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                        (any_fma node:$src2, node:$src3, node:$src1)>;
+def z_any_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                        (any_fma node:$src2, node:$src3, (fneg node:$src1))>;
 
 // Negative fused multiply-add and multiply-subtract.
-def fnma : PatFrag<(ops node:$src1, node:$src2, node:$src3),
-                   (fneg (fma node:$src1, node:$src2, node:$src3))>;
-def fnms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
-                   (fneg (fms node:$src1, node:$src2, node:$src3))>;
+def any_fnma : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                       (fneg (any_fma node:$src1, node:$src2, node:$src3))>;
+def any_fnms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                       (fneg (any_fms node:$src1, node:$src2, node:$src3))>;
 
 // Floating-point negative absolute.
 def fnabs : PatFrag<(ops node:$ptr), (fneg (fabs node:$ptr))>;
Index: lib/Target/SystemZ/SystemZRegisterInfo.cpp
===================================================================
--- lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -156,6 +156,9 @@
   Reserved.set(SystemZ::A0);
   Reserved.set(SystemZ::A1);
 
+  // FPC is the floating-point control register.
+  Reserved.set(SystemZ::FPC);
+
   return Reserved;
 }
 
Index: lib/Target/SystemZ/SystemZRegisterInfo.td
===================================================================
--- lib/Target/SystemZ/SystemZRegisterInfo.td
+++ lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -296,6 +296,13 @@
 let isAllocatable = 0, CopyCost = -1 in
   def CCR : RegisterClass<"SystemZ", [i32], 32, (add CC)>;
 
+// The floating-point control register.
+// Note: We only model the current rounding modes and the IEEE masks.
+// IEEE flags and DXC are not modeled here.
+def FPC : SystemZReg<"fpc">;
+let isAllocatable = 0 in
+  def FPCRegs : RegisterClass<"SystemZ", [i32], 32, (add FPC)>;
+
 // Access registers.
 class ACR32<bits<16> num, string n> : SystemZReg<n> {
   let HWEncoding = num;
Index: test/CodeGen/SystemZ/RAbasic-invalid-LR-update.mir
===================================================================
--- test/CodeGen/SystemZ/RAbasic-invalid-LR-update.mir
+++ test/CodeGen/SystemZ/RAbasic-invalid-LR-update.mir
@@ -181,11 +181,11 @@
     J %bb.3
   
   bb.3:
-    WFCDB undef %46, %45, implicit-def $cc
+    WFCDB undef %46, %45, implicit-def $cc, implicit $fpc
     %48 = IPM implicit killed $cc
     %48 = AFIMux %48, 268435456, implicit-def dead $cc
     %6 = RISBMux undef %6, %48, 31, 159, 35
-    WFCDB undef %50, %45, implicit-def $cc
+    WFCDB undef %50, %45, implicit-def $cc, implicit $fpc
     BRC 15, 6, %bb.1, implicit killed $cc
     J %bb.4
   
Index: test/CodeGen/SystemZ/clear-liverange-spillreg.mir
===================================================================
--- test/CodeGen/SystemZ/clear-liverange-spillreg.mir
+++ test/CodeGen/SystemZ/clear-liverange-spillreg.mir
@@ -401,7 +401,7 @@
     BRC 14, 6, %bb.29, implicit killed $cc
   
   bb.28:
-    %130 = CDFBR %60
+    %130 = CDFBR %60, implicit $fpc
     J %bb.30
   
   bb.29:
Index: test/CodeGen/SystemZ/fp-cmp-07.mir
===================================================================
--- test/CodeGen/SystemZ/fp-cmp-07.mir
+++ test/CodeGen/SystemZ/fp-cmp-07.mir
@@ -30,7 +30,7 @@
   bb.0.entry:
     liveins: $f0s, $r2d
 
-    LTEBRCompare $f0s, $f0s, implicit-def $cc
+    LTEBRCompare $f0s, $f0s, implicit-def $cc, implicit $fpc
     $f2s = LER $f0s
     INLINEASM &"blah $0", 1, 9, $f2s
     CondReturn 15, 4, implicit $f0s, implicit $cc
Index: test/CodeGen/SystemZ/fp-conv-17.mir
===================================================================
--- test/CodeGen/SystemZ/fp-conv-17.mir
+++ test/CodeGen/SystemZ/fp-conv-17.mir
@@ -163,39 +163,39 @@
     STE %16, %1, 0, $noreg :: (volatile store 4 into %ir.ptr2)
     STE %17, %1, 0, $noreg :: (volatile store 4 into %ir.ptr2)
     STE %18, %1, 0, $noreg :: (volatile store 4 into %ir.ptr2)
-    %19 = LDEBR %2
+    %19 = LDEBR %2, implicit $fpc
     STD %19, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1)
-    %20 = LDEBR %3
+    %20 = LDEBR %3, implicit $fpc
     STD %20, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1)
-    %21 = LDEBR %4
+    %21 = LDEBR %4, implicit $fpc
     STD %21, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1)
-    %22 = LDEBR %5
+    %22 = LDEBR %5, implicit $fpc
     STD %22, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1)
-    %23 = LDEBR %6
+    %23 = LDEBR %6, implicit $fpc
     STD %23, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1)
-    %24 = LDEBR %7
+    %24 = LDEBR %7, implicit $fpc
     STD %24, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1)
-    %25 = LDEBR %8
+    %25 = LDEBR %8, implicit $fpc
     STD %25, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1)
-    %26 = LDEBR %9
+    %26 = LDEBR %9, implicit $fpc
     STD %26, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1)
-    %27 = LDEBR %10
+    %27 = LDEBR %10, implicit $fpc
     STD %27, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1)
-    %28 = LDEBR %11
+    %28 = LDEBR %11, implicit $fpc
     STD %28, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1)
-    %29 = LDEBR %12
+    %29 = LDEBR %12, implicit $fpc
     STD %29, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1)
-    %30 = LDEBR %13
+    %30 = LDEBR %13, implicit $fpc
     STD %30, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1)
-    %31 = LDEBR %14
+    %31 = LDEBR %14, implicit $fpc
     STD %31, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1)
-    %32 = LDEBR %15
+    %32 = LDEBR %15, implicit $fpc
     STD %32, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1)
-    %33 = LDEBR %16
+    %33 = LDEBR %16, implicit $fpc
     STD %33, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1)
-    %34 = LDEBR %17
+    %34 = LDEBR %17, implicit $fpc
     STD %34, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1)
-    %35 = LDEBR %18
+    %35 = LDEBR %18, implicit $fpc
     STD %35, %0, 0, $noreg :: (volatile store 8 into %ir.ptr1)
     Return
 
Index: test/CodeGen/SystemZ/fp-strict-add-01.ll
===================================================================
--- test/CodeGen/SystemZ/fp-strict-add-01.ll
+++ test/CodeGen/SystemZ/fp-strict-add-01.ll
@@ -0,0 +1,173 @@
+; Test 32-bit floating-point strict addition.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare float @foo()
+declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
+
+; Check register addition.
+define float @f1(float %f1, float %f2) {
+; CHECK-LABEL: f1:
+; CHECK: aebr %f0, %f2
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.fadd.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check the low end of the AEB range.
+define float @f2(float %f1, float *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: aeb %f0, 0(%r2)
+; CHECK: br %r14
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fadd.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check the high end of the aligned AEB range.
+define float @f3(float %f1, float *%base) {
+; CHECK-LABEL: f3:
+; CHECK: aeb %f0, 4092(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 1023
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fadd.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define float @f4(float %f1, float *%base) {
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: aeb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 1024
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fadd.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define float @f5(float %f1, float *%base) {
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -4
+; CHECK: aeb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 -1
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fadd.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check that AEB allows indices.
+define float @f6(float %f1, float *%base, i64 %index) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: aeb %f0, 400(%r1,%r2)
+; CHECK: br %r14
+  %ptr1 = getelementptr float, float *%base, i64 %index
+  %ptr2 = getelementptr float, float *%ptr1, i64 100
+  %f2 = load float, float *%ptr2
+  %res = call float @llvm.experimental.constrained.fadd.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check that additions of spilled values can use AEB rather than AEBR.
+define float @f7(float *%ptr0) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK-SCALAR: aeb %f0, 16{{[04]}}(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr float, float *%ptr0, i64 2
+  %ptr2 = getelementptr float, float *%ptr0, i64 4
+  %ptr3 = getelementptr float, float *%ptr0, i64 6
+  %ptr4 = getelementptr float, float *%ptr0, i64 8
+  %ptr5 = getelementptr float, float *%ptr0, i64 10
+  %ptr6 = getelementptr float, float *%ptr0, i64 12
+  %ptr7 = getelementptr float, float *%ptr0, i64 14
+  %ptr8 = getelementptr float, float *%ptr0, i64 16
+  %ptr9 = getelementptr float, float *%ptr0, i64 18
+  %ptr10 = getelementptr float, float *%ptr0, i64 20
+
+  %val0 = load float, float *%ptr0
+  %val1 = load float, float *%ptr1
+  %val2 = load float, float *%ptr2
+  %val3 = load float, float *%ptr3
+  %val4 = load float, float *%ptr4
+  %val5 = load float, float *%ptr5
+  %val6 = load float, float *%ptr6
+  %val7 = load float, float *%ptr7
+  %val8 = load float, float *%ptr8
+  %val9 = load float, float *%ptr9
+  %val10 = load float, float *%ptr10
+
+  %ret = call float @foo()
+
+  %add0 = call float @llvm.experimental.constrained.fadd.f32(
+                        float %ret, float %val0,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %add1 = call float @llvm.experimental.constrained.fadd.f32(
+                        float %add0, float %val1,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %add2 = call float @llvm.experimental.constrained.fadd.f32(
+                        float %add1, float %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %add3 = call float @llvm.experimental.constrained.fadd.f32(
+                        float %add2, float %val3,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %add4 = call float @llvm.experimental.constrained.fadd.f32(
+                        float %add3, float %val4,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %add5 = call float @llvm.experimental.constrained.fadd.f32(
+                        float %add4, float %val5,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %add6 = call float @llvm.experimental.constrained.fadd.f32(
+                        float %add5, float %val6,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %add7 = call float @llvm.experimental.constrained.fadd.f32(
+                        float %add6, float %val7,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %add8 = call float @llvm.experimental.constrained.fadd.f32(
+                        float %add7, float %val8,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %add9 = call float @llvm.experimental.constrained.fadd.f32(
+                        float %add8, float %val9,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %add10 = call float @llvm.experimental.constrained.fadd.f32(
+                        float %add9, float %val10,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+
+  ret float %add10
+}
Index: test/CodeGen/SystemZ/fp-strict-add-02.ll
===================================================================
--- test/CodeGen/SystemZ/fp-strict-add-02.ll
+++ test/CodeGen/SystemZ/fp-strict-add-02.ll
@@ -0,0 +1,172 @@
+; Test strict 64-bit floating-point addition.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -verify-machineinstrs | FileCheck %s
+declare double @foo()
+declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata)
+
+; Check register addition.
+define double @f1(double %f1, double %f2) {
+; CHECK-LABEL: f1:
+; CHECK: adbr %f0, %f2
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.fadd.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check the low end of the ADB range.
+define double @f2(double %f1, double *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: adb %f0, 0(%r2)
+; CHECK: br %r14
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fadd.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check the high end of the aligned ADB range.
+define double @f3(double %f1, double *%base) {
+; CHECK-LABEL: f3:
+; CHECK: adb %f0, 4088(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 511
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fadd.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define double @f4(double %f1, double *%base) {
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: adb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 512
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fadd.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define double @f5(double %f1, double *%base) {
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -8
+; CHECK: adb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 -1
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fadd.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check that ADB allows indices.
+define double @f6(double %f1, double *%base, i64 %index) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: adb %f0, 800(%r1,%r2)
+; CHECK: br %r14
+  %ptr1 = getelementptr double, double *%base, i64 %index
+  %ptr2 = getelementptr double, double *%ptr1, i64 100
+  %f2 = load double, double *%ptr2
+  %res = call double @llvm.experimental.constrained.fadd.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check that additions of spilled values can use ADB rather than ADBR.
+define double @f7(double *%ptr0) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK-SCALAR: adb %f0, 160(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr double, double *%ptr0, i64 2
+  %ptr2 = getelementptr double, double *%ptr0, i64 4
+  %ptr3 = getelementptr double, double *%ptr0, i64 6
+  %ptr4 = getelementptr double, double *%ptr0, i64 8
+  %ptr5 = getelementptr double, double *%ptr0, i64 10
+  %ptr6 = getelementptr double, double *%ptr0, i64 12
+  %ptr7 = getelementptr double, double *%ptr0, i64 14
+  %ptr8 = getelementptr double, double *%ptr0, i64 16
+  %ptr9 = getelementptr double, double *%ptr0, i64 18
+  %ptr10 = getelementptr double, double *%ptr0, i64 20
+
+  %val0 = load double, double *%ptr0
+  %val1 = load double, double *%ptr1
+  %val2 = load double, double *%ptr2
+  %val3 = load double, double *%ptr3
+  %val4 = load double, double *%ptr4
+  %val5 = load double, double *%ptr5
+  %val6 = load double, double *%ptr6
+  %val7 = load double, double *%ptr7
+  %val8 = load double, double *%ptr8
+  %val9 = load double, double *%ptr9
+  %val10 = load double, double *%ptr10
+
+  %ret = call double @foo()
+
+  %add0 = call double @llvm.experimental.constrained.fadd.f64(
+                        double %ret, double %val0,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %add1 = call double @llvm.experimental.constrained.fadd.f64(
+                        double %add0, double %val1,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %add2 = call double @llvm.experimental.constrained.fadd.f64(
+                        double %add1, double %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %add3 = call double @llvm.experimental.constrained.fadd.f64(
+                        double %add2, double %val3,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %add4 = call double @llvm.experimental.constrained.fadd.f64(
+                        double %add3, double %val4,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %add5 = call double @llvm.experimental.constrained.fadd.f64(
+                        double %add4, double %val5,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %add6 = call double @llvm.experimental.constrained.fadd.f64(
+                        double %add5, double %val6,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %add7 = call double @llvm.experimental.constrained.fadd.f64(
+                        double %add6, double %val7,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %add8 = call double @llvm.experimental.constrained.fadd.f64(
+                        double %add7, double %val8,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %add9 = call double @llvm.experimental.constrained.fadd.f64(
+                        double %add8, double %val9,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %add10 = call double @llvm.experimental.constrained.fadd.f64(
+                        double %add9, double %val10,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+
+  ret double %add10
+}
Index: test/CodeGen/SystemZ/fp-strict-add-03.ll
===================================================================
--- test/CodeGen/SystemZ/fp-strict-add-03.ll
+++ test/CodeGen/SystemZ/fp-strict-add-03.ll
@@ -0,0 +1,25 @@
+; Test strict 128-bit floating-point addition.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare fp128 @llvm.experimental.constrained.fadd.f128(fp128, fp128, metadata, metadata)
+
+; There is no memory form of 128-bit addition.
+define void @f1(fp128 *%ptr, float %f2) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: lxebr %f0, %f0
+; CHECK-DAG: ld %f1, 0(%r2)
+; CHECK-DAG: ld %f3, 8(%r2)
+; CHECK: axbr %f0, %f1
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+  %f1 = load fp128, fp128 *%ptr
+  %f2x = fpext float %f2 to fp128
+  %sum = call fp128 @llvm.experimental.constrained.fadd.f128(
+                        fp128 %f1, fp128 %f2x,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %sum, fp128 *%ptr
+  ret void
+}
Index: test/CodeGen/SystemZ/fp-strict-add-04.ll
===================================================================
--- test/CodeGen/SystemZ/fp-strict-add-04.ll
+++ test/CodeGen/SystemZ/fp-strict-add-04.ll
@@ -0,0 +1,22 @@
+; Test strict 128-bit floating-point addition on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare fp128 @llvm.experimental.constrained.fadd.f128(fp128, fp128, metadata, metadata)
+
+define void @f1(fp128 *%ptr1, fp128 *%ptr2) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2)
+; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3)
+; CHECK: wfaxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]]
+; CHECK: vst [[RES]], 0(%r2)
+; CHECK: br %r14
+  %f1 = load fp128, fp128 *%ptr1
+  %f2 = load fp128, fp128 *%ptr2
+  %sum = call fp128 @llvm.experimental.constrained.fadd.f128(
+                        fp128 %f1, fp128 %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %sum, fp128 *%ptr1
+  ret void
+}
Index: test/CodeGen/SystemZ/fp-strict-alias.ll
===================================================================
--- test/CodeGen/SystemZ/fp-strict-alias.ll
+++ test/CodeGen/SystemZ/fp-strict-alias.ll
@@ -0,0 +1,99 @@
+; Verify that strict FP operations are not rescheduled
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
+declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata)
+declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata)
+declare float @llvm.sqrt.f32(float)
+declare void @llvm.s390.sfpc(i32)
+
+; For non-strict operations, we expect the post-RA scheduler to
+; separate the two square root instructions on z13.
+define void @f1(float %f1, float %f2, float %f3, float %f4, float *%ptr0) {
+; CHECK-LABEL: f1:
+; CHECK: sqebr
+; CHECK: {{aebr|sebr}}
+; CHECK: sqebr
+; CHECK: br %r14
+
+  %add = fadd float %f1, %f2
+  %sub = fsub float %f3, %f4
+  %sqrt1 = call float @llvm.sqrt.f32(float %f2)
+  %sqrt2 = call float @llvm.sqrt.f32(float %f4)
+
+  %ptr1 = getelementptr float, float *%ptr0, i64 1
+  %ptr2 = getelementptr float, float *%ptr0, i64 2
+  %ptr3 = getelementptr float, float *%ptr0, i64 3
+
+  store float %add, float *%ptr0
+  store float %sub, float *%ptr1
+  store float %sqrt1, float *%ptr2
+  store float %sqrt2, float *%ptr3
+
+  ret void
+}
+
+; But for strict operations, this must not happen.
+define void @f2(float %f1, float %f2, float %f3, float %f4, float *%ptr0) {
+; CHECK-LABEL: f2:
+; CHECK: sqebr
+; CHECK-NEXT: sqebr
+; CHECK: br %r14
+
+  %add = call float @llvm.experimental.constrained.fadd.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sub = call float @llvm.experimental.constrained.fsub.f32(
+                        float %f3, float %f4,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sqrt1 = call float @llvm.experimental.constrained.sqrt.f32(
+                        float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sqrt2 = call float @llvm.experimental.constrained.sqrt.f32(
+                        float %f4,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+
+  %ptr1 = getelementptr float, float *%ptr0, i64 1
+  %ptr2 = getelementptr float, float *%ptr0, i64 2
+  %ptr3 = getelementptr float, float *%ptr0, i64 3
+
+  store float %add, float *%ptr0
+  store float %sub, float *%ptr1
+  store float %sqrt1, float *%ptr2
+  store float %sqrt2, float *%ptr3
+
+  ret void
+}
+
+; Also, even non-strict operations must not be scheduled across an SFPC.
+define void @f3(float %f1, float %f2, float %f3, float %f4, float *%ptr0) {
+; CHECK-LABEL: f3:
+; CHECK: {{aebr|sebr}}
+; CHECK: {{aebr|sebr}}
+; CHECK: sfpc
+; CHECK: sqebr
+; CHECK: sqebr
+; CHECK: br %r14
+
+  %add = fadd float %f1, %f2
+  %sub = fsub float %f3, %f4
+  call void @llvm.s390.sfpc(i32 0)
+  %sqrt1 = call float @llvm.sqrt.f32(float %f2)
+  %sqrt2 = call float @llvm.sqrt.f32(float %f4)
+
+  %ptr1 = getelementptr float, float *%ptr0, i64 1
+  %ptr2 = getelementptr float, float *%ptr0, i64 2
+  %ptr3 = getelementptr float, float *%ptr0, i64 3
+
+  store float %add, float *%ptr0
+  store float %sub, float *%ptr1
+  store float %sqrt1, float *%ptr2
+  store float %sqrt2, float *%ptr3
+
+  ret void
+}
Index: test/CodeGen/SystemZ/fp-strict-div-01.ll
===================================================================
--- test/CodeGen/SystemZ/fp-strict-div-01.ll
+++ test/CodeGen/SystemZ/fp-strict-div-01.ll
@@ -0,0 +1,173 @@
+; Test strict 32-bit floating-point division.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare float @foo()
+declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata)
+
+; Check register division.
+define float @f1(float %f1, float %f2) {
+; CHECK-LABEL: f1:
+; CHECK: debr %f0, %f2
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.fdiv.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check the low end of the DEB range.
+define float @f2(float %f1, float *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: deb %f0, 0(%r2)
+; CHECK: br %r14
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fdiv.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check the high end of the aligned DEB range.
+define float @f3(float %f1, float *%base) {
+; CHECK-LABEL: f3:
+; CHECK: deb %f0, 4092(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 1023
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fdiv.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define float @f4(float %f1, float *%base) {
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: deb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 1024
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fdiv.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define float @f5(float %f1, float *%base) {
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -4
+; CHECK: deb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 -1
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fdiv.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check that DEB allows indices.
+define float @f6(float %f1, float *%base, i64 %index) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: deb %f0, 400(%r1,%r2)
+; CHECK: br %r14
+  %ptr1 = getelementptr float, float *%base, i64 %index
+  %ptr2 = getelementptr float, float *%ptr1, i64 100
+  %f2 = load float, float *%ptr2
+  %res = call float @llvm.experimental.constrained.fdiv.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check that divisions of spilled values can use DEB rather than DEBR.
+define float @f7(float *%ptr0) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK-SCALAR: deb %f0, 16{{[04]}}(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr float, float *%ptr0, i64 2
+  %ptr2 = getelementptr float, float *%ptr0, i64 4
+  %ptr3 = getelementptr float, float *%ptr0, i64 6
+  %ptr4 = getelementptr float, float *%ptr0, i64 8
+  %ptr5 = getelementptr float, float *%ptr0, i64 10
+  %ptr6 = getelementptr float, float *%ptr0, i64 12
+  %ptr7 = getelementptr float, float *%ptr0, i64 14
+  %ptr8 = getelementptr float, float *%ptr0, i64 16
+  %ptr9 = getelementptr float, float *%ptr0, i64 18
+  %ptr10 = getelementptr float, float *%ptr0, i64 20
+
+  %val0 = load float, float *%ptr0
+  %val1 = load float, float *%ptr1
+  %val2 = load float, float *%ptr2
+  %val3 = load float, float *%ptr3
+  %val4 = load float, float *%ptr4
+  %val5 = load float, float *%ptr5
+  %val6 = load float, float *%ptr6
+  %val7 = load float, float *%ptr7
+  %val8 = load float, float *%ptr8
+  %val9 = load float, float *%ptr9
+  %val10 = load float, float *%ptr10
+
+  %ret = call float @foo()
+
+  %div0 = call float @llvm.experimental.constrained.fdiv.f32(
+                        float %ret, float %val0,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %div1 = call float @llvm.experimental.constrained.fdiv.f32(
+                        float %div0, float %val1,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %div2 = call float @llvm.experimental.constrained.fdiv.f32(
+                        float %div1, float %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %div3 = call float @llvm.experimental.constrained.fdiv.f32(
+                        float %div2, float %val3,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %div4 = call float @llvm.experimental.constrained.fdiv.f32(
+                        float %div3, float %val4,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %div5 = call float @llvm.experimental.constrained.fdiv.f32(
+                        float %div4, float %val5,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %div6 = call float @llvm.experimental.constrained.fdiv.f32(
+                        float %div5, float %val6,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %div7 = call float @llvm.experimental.constrained.fdiv.f32(
+                        float %div6, float %val7,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %div8 = call float @llvm.experimental.constrained.fdiv.f32(
+                        float %div7, float %val8,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %div9 = call float @llvm.experimental.constrained.fdiv.f32(
+                        float %div8, float %val9,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %div10 = call float @llvm.experimental.constrained.fdiv.f32(
+                        float %div9, float %val10,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+
+  ret float %div10
+}
Index: test/CodeGen/SystemZ/fp-strict-div-02.ll
===================================================================
--- test/CodeGen/SystemZ/fp-strict-div-02.ll
+++ test/CodeGen/SystemZ/fp-strict-div-02.ll
@@ -0,0 +1,173 @@
+; Test strict 64-bit floating-point division.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare double @foo()
+declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata)
+
+; Check register division.
+define double @f1(double %f1, double %f2) {
+; CHECK-LABEL: f1:
+; CHECK: ddbr %f0, %f2
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.fdiv.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check the low end of the DDB range.
+define double @f2(double %f1, double *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: ddb %f0, 0(%r2)
+; CHECK: br %r14
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fdiv.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check the high end of the aligned DDB range.
+define double @f3(double %f1, double *%base) {
+; CHECK-LABEL: f3:
+; CHECK: ddb %f0, 4088(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 511
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fdiv.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define double @f4(double %f1, double *%base) {
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: ddb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 512
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fdiv.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define double @f5(double %f1, double *%base) {
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -8
+; CHECK: ddb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 -1
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fdiv.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check that DDB allows indices.
+define double @f6(double %f1, double *%base, i64 %index) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: ddb %f0, 800(%r1,%r2)
+; CHECK: br %r14
+  %ptr1 = getelementptr double, double *%base, i64 %index
+  %ptr2 = getelementptr double, double *%ptr1, i64 100
+  %f2 = load double, double *%ptr2
+  %res = call double @llvm.experimental.constrained.fdiv.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check that divisions of spilled values can use DDB rather than DDBR.
+define double @f7(double *%ptr0) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK-SCALAR: ddb %f0, 160(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr double, double *%ptr0, i64 2
+  %ptr2 = getelementptr double, double *%ptr0, i64 4
+  %ptr3 = getelementptr double, double *%ptr0, i64 6
+  %ptr4 = getelementptr double, double *%ptr0, i64 8
+  %ptr5 = getelementptr double, double *%ptr0, i64 10
+  %ptr6 = getelementptr double, double *%ptr0, i64 12
+  %ptr7 = getelementptr double, double *%ptr0, i64 14
+  %ptr8 = getelementptr double, double *%ptr0, i64 16
+  %ptr9 = getelementptr double, double *%ptr0, i64 18
+  %ptr10 = getelementptr double, double *%ptr0, i64 20
+
+  %val0 = load double, double *%ptr0
+  %val1 = load double, double *%ptr1
+  %val2 = load double, double *%ptr2
+  %val3 = load double, double *%ptr3
+  %val4 = load double, double *%ptr4
+  %val5 = load double, double *%ptr5
+  %val6 = load double, double *%ptr6
+  %val7 = load double, double *%ptr7
+  %val8 = load double, double *%ptr8
+  %val9 = load double, double *%ptr9
+  %val10 = load double, double *%ptr10
+
+  %ret = call double @foo()
+
+  %div0 = call double @llvm.experimental.constrained.fdiv.f64(
+                        double %ret, double %val0,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %div1 = call double @llvm.experimental.constrained.fdiv.f64(
+                        double %div0, double %val1,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %div2 = call double @llvm.experimental.constrained.fdiv.f64(
+                        double %div1, double %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %div3 = call double @llvm.experimental.constrained.fdiv.f64(
+                        double %div2, double %val3,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %div4 = call double @llvm.experimental.constrained.fdiv.f64(
+                        double %div3, double %val4,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %div5 = call double @llvm.experimental.constrained.fdiv.f64(
+                        double %div4, double %val5,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %div6 = call double @llvm.experimental.constrained.fdiv.f64(
+                        double %div5, double %val6,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %div7 = call double @llvm.experimental.constrained.fdiv.f64(
+                        double %div6, double %val7,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %div8 = call double @llvm.experimental.constrained.fdiv.f64(
+                        double %div7, double %val8,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %div9 = call double @llvm.experimental.constrained.fdiv.f64(
+                        double %div8, double %val9,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %div10 = call double @llvm.experimental.constrained.fdiv.f64(
+                        double %div9, double %val10,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+
+  ret double %div10
+}
Index: test/CodeGen/SystemZ/fp-strict-div-03.ll
===================================================================
--- test/CodeGen/SystemZ/fp-strict-div-03.ll
+++ test/CodeGen/SystemZ/fp-strict-div-03.ll
@@ -0,0 +1,25 @@
+; Test strict 128-bit floating-point division.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare fp128 @llvm.experimental.constrained.fdiv.f128(fp128, fp128, metadata, metadata)
+
+; There is no memory form of 128-bit division.
+define void @f1(fp128 *%ptr, float %f2) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: lxebr %f0, %f0
+; CHECK-DAG: ld %f1, 0(%r2)
+; CHECK-DAG: ld %f3, 8(%r2)
+; CHECK: dxbr %f1, %f0
+; CHECK: std %f1, 0(%r2)
+; CHECK: std %f3, 8(%r2)
+; CHECK: br %r14
+  %f1 = load fp128, fp128 *%ptr
+  %f2x = fpext float %f2 to fp128
+  %sum = call fp128 @llvm.experimental.constrained.fdiv.f128(
+                        fp128 %f1, fp128 %f2x,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %sum, fp128 *%ptr
+  ret void
+}
Index: test/CodeGen/SystemZ/fp-strict-div-04.ll
===================================================================
--- test/CodeGen/SystemZ/fp-strict-div-04.ll
+++ test/CodeGen/SystemZ/fp-strict-div-04.ll
@@ -0,0 +1,22 @@
+; Test strict 128-bit floating-point division on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare fp128 @llvm.experimental.constrained.fdiv.f128(fp128, fp128, metadata, metadata)
+
+define void @f1(fp128 *%ptr1, fp128 *%ptr2) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2)
+; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3)
+; CHECK: wfdxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]]
+; CHECK: vst [[RES]], 0(%r2)
+; CHECK: br %r14
+  %f1 = load fp128, fp128 *%ptr1
+  %f2 = load fp128, fp128 *%ptr2
+  %sum = call fp128 @llvm.experimental.constrained.fdiv.f128(
+                        fp128 %f1, fp128 %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %sum, fp128 *%ptr1
+  ret void
+}
Index: test/CodeGen/SystemZ/fp-strict-mul-01.ll
===================================================================
--- test/CodeGen/SystemZ/fp-strict-mul-01.ll
+++ test/CodeGen/SystemZ/fp-strict-mul-01.ll
@@ -0,0 +1,173 @@
+; Test strict multiplication of two f32s, producing an f32 result.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare float @foo()
+declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata)
+
+; Check register multiplication.
+define float @f1(float %f1, float %f2) {
+; CHECK-LABEL: f1:
+; CHECK: meebr %f0, %f2
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.fmul.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check the low end of the MEEB range.
+define float @f2(float %f1, float *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: meeb %f0, 0(%r2)
+; CHECK: br %r14
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fmul.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check the high end of the aligned MEEB range.
+define float @f3(float %f1, float *%base) {
+; CHECK-LABEL: f3:
+; CHECK: meeb %f0, 4092(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 1023
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fmul.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define float @f4(float %f1, float *%base) {
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: meeb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 1024
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fmul.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define float @f5(float %f1, float *%base) {
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -4
+; CHECK: meeb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 -1
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fmul.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check that MEEB allows indices.
+define float @f6(float %f1, float *%base, i64 %index) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: meeb %f0, 400(%r1,%r2)
+; CHECK: br %r14
+  %ptr1 = getelementptr float, float *%base, i64 %index
+  %ptr2 = getelementptr float, float *%ptr1, i64 100
+  %f2 = load float, float *%ptr2
+  %res = call float @llvm.experimental.constrained.fmul.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check that multiplications of spilled values can use MEEB rather than MEEBR.
+define float @f7(float *%ptr0) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK-SCALAR: meeb %f0, 16{{[04]}}(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr float, float *%ptr0, i64 2
+  %ptr2 = getelementptr float, float *%ptr0, i64 4
+  %ptr3 = getelementptr float, float *%ptr0, i64 6
+  %ptr4 = getelementptr float, float *%ptr0, i64 8
+  %ptr5 = getelementptr float, float *%ptr0, i64 10
+  %ptr6 = getelementptr float, float *%ptr0, i64 12
+  %ptr7 = getelementptr float, float *%ptr0, i64 14
+  %ptr8 = getelementptr float, float *%ptr0, i64 16
+  %ptr9 = getelementptr float, float *%ptr0, i64 18
+  %ptr10 = getelementptr float, float *%ptr0, i64 20
+
+  %val0 = load float, float *%ptr0
+  %val1 = load float, float *%ptr1
+  %val2 = load float, float *%ptr2
+  %val3 = load float, float *%ptr3
+  %val4 = load float, float *%ptr4
+  %val5 = load float, float *%ptr5
+  %val6 = load float, float *%ptr6
+  %val7 = load float, float *%ptr7
+  %val8 = load float, float *%ptr8
+  %val9 = load float, float *%ptr9
+  %val10 = load float, float *%ptr10
+
+  %ret = call float @foo()
+
+  %mul0 = call float @llvm.experimental.constrained.fmul.f32(
+                        float %ret, float %val0,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %mul1 = call float @llvm.experimental.constrained.fmul.f32(
+                        float %mul0, float %val1,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %mul2 = call float @llvm.experimental.constrained.fmul.f32(
+                        float %mul1, float %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %mul3 = call float @llvm.experimental.constrained.fmul.f32(
+                        float %mul2, float %val3,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %mul4 = call float @llvm.experimental.constrained.fmul.f32(
+                        float %mul3, float %val4,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %mul5 = call float @llvm.experimental.constrained.fmul.f32(
+                        float %mul4, float %val5,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %mul6 = call float @llvm.experimental.constrained.fmul.f32(
+                        float %mul5, float %val6,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %mul7 = call float @llvm.experimental.constrained.fmul.f32(
+                        float %mul6, float %val7,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %mul8 = call float @llvm.experimental.constrained.fmul.f32(
+                        float %mul7, float %val8,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %mul9 = call float @llvm.experimental.constrained.fmul.f32(
+                        float %mul8, float %val9,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %mul10 = call float @llvm.experimental.constrained.fmul.f32(
+                        float %mul9, float %val10,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+
+  ret float %mul10
+}
Index: test/CodeGen/SystemZ/fp-strict-mul-02.ll
===================================================================
--- test/CodeGen/SystemZ/fp-strict-mul-02.ll
+++ test/CodeGen/SystemZ/fp-strict-mul-02.ll
@@ -0,0 +1,283 @@
+; Test strict multiplication of two f32s, producing an f64 result.
+; FIXME: we do not have a strict version of fpext yet
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare float @foo()
+declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
+
+; Check register multiplication.
+define double @f1(float %f1, float %f2) {
+; CHECK-LABEL: f1:
+; CHECK: mdebr %f0, %f2
+; CHECK: br %r14
+  %f1x = fpext float %f1 to double
+  %f2x = fpext float %f2 to double
+  %res = call double @llvm.experimental.constrained.fmul.f64(
+                        double %f1x, double %f2x,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check the low end of the MDEB range.
+define double @f2(float %f1, float *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: mdeb %f0, 0(%r2)
+; CHECK: br %r14
+  %f2 = load float, float *%ptr
+  %f1x = fpext float %f1 to double
+  %f2x = fpext float %f2 to double
+  %res = call double @llvm.experimental.constrained.fmul.f64(
+                        double %f1x, double %f2x,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check the high end of the aligned MDEB range.
+define double @f3(float %f1, float *%base) {
+; CHECK-LABEL: f3:
+; CHECK: mdeb %f0, 4092(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 1023
+  %f2 = load float, float *%ptr
+  %f1x = fpext float %f1 to double
+  %f2x = fpext float %f2 to double
+  %res = call double @llvm.experimental.constrained.fmul.f64(
+                        double %f1x, double %f2x,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define double @f4(float %f1, float *%base) {
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: mdeb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 1024
+  %f2 = load float, float *%ptr
+  %f1x = fpext float %f1 to double
+  %f2x = fpext float %f2 to double
+  %res = call double @llvm.experimental.constrained.fmul.f64(
+                        double %f1x, double %f2x,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define double @f5(float %f1, float *%base) {
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -4
+; CHECK: mdeb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 -1
+  %f2 = load float, float *%ptr
+  %f1x = fpext float %f1 to double
+  %f2x = fpext float %f2 to double
+  %res = call double @llvm.experimental.constrained.fmul.f64(
+                        double %f1x, double %f2x,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check that MDEB allows indices.
+define double @f6(float %f1, float *%base, i64 %index) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: mdeb %f0, 400(%r1,%r2)
+; CHECK: br %r14
+  %ptr1 = getelementptr float, float *%base, i64 %index
+  %ptr2 = getelementptr float, float *%ptr1, i64 100
+  %f2 = load float, float *%ptr2
+  %f1x = fpext float %f1 to double
+  %f2x = fpext float %f2 to double
+  %res = call double @llvm.experimental.constrained.fmul.f64(
+                        double %f1x, double %f2x,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check that multiplications of spilled values can use MDEB rather than MDEBR.
+define float @f7(float *%ptr0) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: mdeb %f0, 16{{[04]}}(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr float, float *%ptr0, i64 2
+  %ptr2 = getelementptr float, float *%ptr0, i64 4
+  %ptr3 = getelementptr float, float *%ptr0, i64 6
+  %ptr4 = getelementptr float, float *%ptr0, i64 8
+  %ptr5 = getelementptr float, float *%ptr0, i64 10
+  %ptr6 = getelementptr float, float *%ptr0, i64 12
+  %ptr7 = getelementptr float, float *%ptr0, i64 14
+  %ptr8 = getelementptr float, float *%ptr0, i64 16
+  %ptr9 = getelementptr float, float *%ptr0, i64 18
+  %ptr10 = getelementptr float, float *%ptr0, i64 20
+
+  %val0 = load float, float *%ptr0
+  %val1 = load float, float *%ptr1
+  %val2 = load float, float *%ptr2
+  %val3 = load float, float *%ptr3
+  %val4 = load float, float *%ptr4
+  %val5 = load float, float *%ptr5
+  %val6 = load float, float *%ptr6
+  %val7 = load float, float *%ptr7
+  %val8 = load float, float *%ptr8
+  %val9 = load float, float *%ptr9
+  %val10 = load float, float *%ptr10
+
+  %frob0 = fadd float %val0, %val0
+  %frob1 = fadd float %val1, %val1
+  %frob2 = fadd float %val2, %val2
+  %frob3 = fadd float %val3, %val3
+  %frob4 = fadd float %val4, %val4
+  %frob5 = fadd float %val5, %val5
+  %frob6 = fadd float %val6, %val6
+  %frob7 = fadd float %val7, %val7
+  %frob8 = fadd float %val8, %val8
+  %frob9 = fadd float %val9, %val9
+  %frob10 = fadd float %val9, %val10
+
+  store float %frob0, float *%ptr0
+  store float %frob1, float *%ptr1
+  store float %frob2, float *%ptr2
+  store float %frob3, float *%ptr3
+  store float %frob4, float *%ptr4
+  store float %frob5, float *%ptr5
+  store float %frob6, float *%ptr6
+  store float %frob7, float *%ptr7
+  store float %frob8, float *%ptr8
+  store float %frob9, float *%ptr9
+  store float %frob10, float *%ptr10
+
+  %ret = call float @foo()
+
+  %accext0 = fpext float %ret to double
+  %ext0 = fpext float %frob0 to double
+  %mul0 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %accext0, double %ext0,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %extra0 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %mul0, double 1.01,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %trunc0 = fptrunc double %extra0 to float
+
+  %accext1 = fpext float %trunc0 to double
+  %ext1 = fpext float %frob1 to double
+  %mul1 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %accext1, double %ext1,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %extra1 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %mul1, double 1.11,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %trunc1 = fptrunc double %extra1 to float
+
+  %accext2 = fpext float %trunc1 to double
+  %ext2 = fpext float %frob2 to double
+  %mul2 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %accext2, double %ext2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %extra2 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %mul2, double 1.21,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %trunc2 = fptrunc double %extra2 to float
+
+  %accext3 = fpext float %trunc2 to double
+  %ext3 = fpext float %frob3 to double
+  %mul3 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %accext3, double %ext3,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %extra3 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %mul3, double 1.31,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %trunc3 = fptrunc double %extra3 to float
+
+  %accext4 = fpext float %trunc3 to double
+  %ext4 = fpext float %frob4 to double
+  %mul4 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %accext4, double %ext4,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %extra4 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %mul4, double 1.41,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %trunc4 = fptrunc double %extra4 to float
+
+  %accext5 = fpext float %trunc4 to double
+  %ext5 = fpext float %frob5 to double
+  %mul5 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %accext5, double %ext5,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %extra5 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %mul5, double 1.51,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %trunc5 = fptrunc double %extra5 to float
+
+  %accext6 = fpext float %trunc5 to double
+  %ext6 = fpext float %frob6 to double
+  %mul6 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %accext6, double %ext6,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %extra6 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %mul6, double 1.61,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %trunc6 = fptrunc double %extra6 to float
+
+  %accext7 = fpext float %trunc6 to double
+  %ext7 = fpext float %frob7 to double
+  %mul7 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %accext7, double %ext7,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %extra7 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %mul7, double 1.71,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %trunc7 = fptrunc double %extra7 to float
+
+  %accext8 = fpext float %trunc7 to double
+  %ext8 = fpext float %frob8 to double
+  %mul8 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %accext8, double %ext8,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %extra8 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %mul8, double 1.81,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %trunc8 = fptrunc double %extra8 to float
+
+  %accext9 = fpext float %trunc8 to double
+  %ext9 = fpext float %frob9 to double
+  %mul9 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %accext9, double %ext9,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %extra9 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %mul9, double 1.91,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %trunc9 = fptrunc double %extra9 to float
+
+  ret float %trunc9
+}
Index: test/CodeGen/SystemZ/fp-strict-mul-03.ll
===================================================================
--- test/CodeGen/SystemZ/fp-strict-mul-03.ll
+++ test/CodeGen/SystemZ/fp-strict-mul-03.ll
@@ -0,0 +1,173 @@
+; Test strict multiplication of two f64s, producing an f64 result.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare double @foo()
+declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
+
+; Check register multiplication.
+define double @f1(double %f1, double %f2) {
+; CHECK-LABEL: f1:
+; CHECK: mdbr %f0, %f2
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.fmul.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check the low end of the MDB range.
+define double @f2(double %f1, double *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: mdb %f0, 0(%r2)
+; CHECK: br %r14
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fmul.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check the high end of the aligned MDB range.
+define double @f3(double %f1, double *%base) {
+; CHECK-LABEL: f3:
+; CHECK: mdb %f0, 4088(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 511
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fmul.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define double @f4(double %f1, double *%base) {
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: mdb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 512
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fmul.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define double @f5(double %f1, double *%base) {
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -8
+; CHECK: mdb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 -1
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fmul.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check that MDB allows indices.
+define double @f6(double %f1, double *%base, i64 %index) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: mdb %f0, 800(%r1,%r2)
+; CHECK: br %r14
+  %ptr1 = getelementptr double, double *%base, i64 %index
+  %ptr2 = getelementptr double, double *%ptr1, i64 100
+  %f2 = load double, double *%ptr2
+  %res = call double @llvm.experimental.constrained.fmul.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check that multiplications of spilled values can use MDB rather than MDBR.
+define double @f7(double *%ptr0) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK-SCALAR: mdb %f0, 160(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr double, double *%ptr0, i64 2
+  %ptr2 = getelementptr double, double *%ptr0, i64 4
+  %ptr3 = getelementptr double, double *%ptr0, i64 6
+  %ptr4 = getelementptr double, double *%ptr0, i64 8
+  %ptr5 = getelementptr double, double *%ptr0, i64 10
+  %ptr6 = getelementptr double, double *%ptr0, i64 12
+  %ptr7 = getelementptr double, double *%ptr0, i64 14
+  %ptr8 = getelementptr double, double *%ptr0, i64 16
+  %ptr9 = getelementptr double, double *%ptr0, i64 18
+  %ptr10 = getelementptr double, double *%ptr0, i64 20
+
+  %val0 = load double, double *%ptr0
+  %val1 = load double, double *%ptr1
+  %val2 = load double, double *%ptr2
+  %val3 = load double, double *%ptr3
+  %val4 = load double, double *%ptr4
+  %val5 = load double, double *%ptr5
+  %val6 = load double, double *%ptr6
+  %val7 = load double, double *%ptr7
+  %val8 = load double, double *%ptr8
+  %val9 = load double, double *%ptr9
+  %val10 = load double, double *%ptr10
+
+  %ret = call double @foo()
+
+  %mul0 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %ret, double %val0,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %mul1 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %mul0, double %val1,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %mul2 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %mul1, double %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %mul3 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %mul2, double %val3,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %mul4 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %mul3, double %val4,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %mul5 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %mul4, double %val5,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %mul6 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %mul5, double %val6,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %mul7 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %mul6, double %val7,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %mul8 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %mul7, double %val8,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %mul9 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %mul8, double %val9,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %mul10 = call double @llvm.experimental.constrained.fmul.f64(
+                        double %mul9, double %val10,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+
+  ret double %mul10
+}
Index: test/CodeGen/SystemZ/fp-strict-mul-04.ll
===================================================================
--- test/CodeGen/SystemZ/fp-strict-mul-04.ll
+++ test/CodeGen/SystemZ/fp-strict-mul-04.ll
@@ -0,0 +1,314 @@
+; Test strict multiplication of two f64s, producing an f128 result.
+; FIXME: we do not have a strict version of fpext yet
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare fp128 @llvm.experimental.constrained.fmul.f128(fp128, fp128, metadata, metadata)
+
+declare double @foo()
+
+; Check register multiplication.  "mxdbr %f0, %f2" is not valid from LLVM's
+; point of view, because %f2 is the low register of the FP128 %f0.  Pass the
+; multiplier in %f4 instead.
+define void @f1(double %f1, double %dummy, double %f2, fp128 *%dst) {
+; CHECK-LABEL: f1:
+; CHECK: mxdbr %f0, %f4
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+  %f1x = fpext double %f1 to fp128
+  %f2x = fpext double %f2 to fp128
+  %res = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %f1x, fp128 %f2x,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%dst
+  ret void
+}
+
+; Check the low end of the MXDB range.
+define void @f2(double %f1, double *%ptr, fp128 *%dst) {
+; CHECK-LABEL: f2:
+; CHECK: mxdb %f0, 0(%r2)
+; CHECK: std %f0, 0(%r3)
+; CHECK: std %f2, 8(%r3)
+; CHECK: br %r14
+  %f2 = load double, double *%ptr
+  %f1x = fpext double %f1 to fp128
+  %f2x = fpext double %f2 to fp128
+  %res = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %f1x, fp128 %f2x,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%dst
+  ret void
+}
+
+; Check the high end of the aligned MXDB range.
+define void @f3(double %f1, double *%base, fp128 *%dst) {
+; CHECK-LABEL: f3:
+; CHECK: mxdb %f0, 4088(%r2)
+; CHECK: std %f0, 0(%r3)
+; CHECK: std %f2, 8(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 511
+  %f2 = load double, double *%ptr
+  %f1x = fpext double %f1 to fp128
+  %f2x = fpext double %f2 to fp128
+  %res = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %f1x, fp128 %f2x,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%dst
+  ret void
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f4(double %f1, double *%base, fp128 *%dst) {
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: mxdb %f0, 0(%r2)
+; CHECK: std %f0, 0(%r3)
+; CHECK: std %f2, 8(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 512
+  %f2 = load double, double *%ptr
+  %f1x = fpext double %f1 to fp128
+  %f2x = fpext double %f2 to fp128
+  %res = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %f1x, fp128 %f2x,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%dst
+  ret void
+}
+
+; Check negative displacements, which also need separate address logic.
+define void @f5(double %f1, double *%base, fp128 *%dst) {
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -8
+; CHECK: mxdb %f0, 0(%r2)
+; CHECK: std %f0, 0(%r3)
+; CHECK: std %f2, 8(%r3)
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 -1
+  %f2 = load double, double *%ptr
+  %f1x = fpext double %f1 to fp128
+  %f2x = fpext double %f2 to fp128
+  %res = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %f1x, fp128 %f2x,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%dst
+  ret void
+}
+
+; Check that MXDB allows indices.
+define void @f6(double %f1, double *%base, i64 %index, fp128 *%dst) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: mxdb %f0, 800(%r1,%r2)
+; CHECK: std %f0, 0(%r4)
+; CHECK: std %f2, 8(%r4)
+; CHECK: br %r14
+  %ptr1 = getelementptr double, double *%base, i64 %index
+  %ptr2 = getelementptr double, double *%ptr1, i64 100
+  %f2 = load double, double *%ptr2
+  %f1x = fpext double %f1 to fp128
+  %f2x = fpext double %f2 to fp128
+  %res = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %f1x, fp128 %f2x,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%dst
+  ret void
+}
+
+; Check that multiplications of spilled values can use MXDB rather than MXDBR.
+define double @f7(double *%ptr0) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: mxdb %f0, 160(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr double, double *%ptr0, i64 2
+  %ptr2 = getelementptr double, double *%ptr0, i64 4
+  %ptr3 = getelementptr double, double *%ptr0, i64 6
+  %ptr4 = getelementptr double, double *%ptr0, i64 8
+  %ptr5 = getelementptr double, double *%ptr0, i64 10
+  %ptr6 = getelementptr double, double *%ptr0, i64 12
+  %ptr7 = getelementptr double, double *%ptr0, i64 14
+  %ptr8 = getelementptr double, double *%ptr0, i64 16
+  %ptr9 = getelementptr double, double *%ptr0, i64 18
+  %ptr10 = getelementptr double, double *%ptr0, i64 20
+
+  %val0 = load double, double *%ptr0
+  %val1 = load double, double *%ptr1
+  %val2 = load double, double *%ptr2
+  %val3 = load double, double *%ptr3
+  %val4 = load double, double *%ptr4
+  %val5 = load double, double *%ptr5
+  %val6 = load double, double *%ptr6
+  %val7 = load double, double *%ptr7
+  %val8 = load double, double *%ptr8
+  %val9 = load double, double *%ptr9
+  %val10 = load double, double *%ptr10
+
+  %frob0 = fadd double %val0, %val0
+  %frob1 = fadd double %val1, %val1
+  %frob2 = fadd double %val2, %val2
+  %frob3 = fadd double %val3, %val3
+  %frob4 = fadd double %val4, %val4
+  %frob5 = fadd double %val5, %val5
+  %frob6 = fadd double %val6, %val6
+  %frob7 = fadd double %val7, %val7
+  %frob8 = fadd double %val8, %val8
+  %frob9 = fadd double %val9, %val9
+  %frob10 = fadd double %val9, %val10
+
+  store double %frob0, double *%ptr0
+  store double %frob1, double *%ptr1
+  store double %frob2, double *%ptr2
+  store double %frob3, double *%ptr3
+  store double %frob4, double *%ptr4
+  store double %frob5, double *%ptr5
+  store double %frob6, double *%ptr6
+  store double %frob7, double *%ptr7
+  store double %frob8, double *%ptr8
+  store double %frob9, double *%ptr9
+  store double %frob10, double *%ptr10
+
+  %ret = call double @foo()
+
+  %accext0 = fpext double %ret to fp128
+  %ext0 = fpext double %frob0 to fp128
+  %mul0 = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %accext0, fp128 %ext0,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %const0 = fpext double 1.01 to fp128
+  %extra0 = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %mul0, fp128 %const0,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %trunc0 = fptrunc fp128 %extra0 to double
+
+  %accext1 = fpext double %trunc0 to fp128
+  %ext1 = fpext double %frob1 to fp128
+  %mul1 = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %accext1, fp128 %ext1,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %const1 = fpext double 1.11 to fp128
+  %extra1 = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %mul1, fp128 %const1,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %trunc1 = fptrunc fp128 %extra1 to double
+
+  %accext2 = fpext double %trunc1 to fp128
+  %ext2 = fpext double %frob2 to fp128
+  %mul2 = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %accext2, fp128 %ext2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %const2 = fpext double 1.21 to fp128
+  %extra2 = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %mul2, fp128 %const2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %trunc2 = fptrunc fp128 %extra2 to double
+
+  %accext3 = fpext double %trunc2 to fp128
+  %ext3 = fpext double %frob3 to fp128
+  %mul3 = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %accext3, fp128 %ext3,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %const3 = fpext double 1.31 to fp128
+  %extra3 = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %mul3, fp128 %const3,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %trunc3 = fptrunc fp128 %extra3 to double
+
+  %accext4 = fpext double %trunc3 to fp128
+  %ext4 = fpext double %frob4 to fp128
+  %mul4 = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %accext4, fp128 %ext4,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %const4 = fpext double 1.41 to fp128
+  %extra4 = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %mul4, fp128 %const4,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %trunc4 = fptrunc fp128 %extra4 to double
+
+  %accext5 = fpext double %trunc4 to fp128
+  %ext5 = fpext double %frob5 to fp128
+  %mul5 = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %accext5, fp128 %ext5,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %const5 = fpext double 1.51 to fp128
+  %extra5 = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %mul5, fp128 %const5,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %trunc5 = fptrunc fp128 %extra5 to double
+
+  %accext6 = fpext double %trunc5 to fp128
+  %ext6 = fpext double %frob6 to fp128
+  %mul6 = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %accext6, fp128 %ext6,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %const6 = fpext double 1.61 to fp128
+  %extra6 = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %mul6, fp128 %const6,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %trunc6 = fptrunc fp128 %extra6 to double
+
+  %accext7 = fpext double %trunc6 to fp128
+  %ext7 = fpext double %frob7 to fp128
+  %mul7 = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %accext7, fp128 %ext7,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %const7 = fpext double 1.71 to fp128
+  %extra7 = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %mul7, fp128 %const7,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %trunc7 = fptrunc fp128 %extra7 to double
+
+  %accext8 = fpext double %trunc7 to fp128
+  %ext8 = fpext double %frob8 to fp128
+  %mul8 = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %accext8, fp128 %ext8,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %const8 = fpext double 1.81 to fp128
+  %extra8 = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %mul8, fp128 %const8,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %trunc8 = fptrunc fp128 %extra8 to double
+
+  %accext9 = fpext double %trunc8 to fp128
+  %ext9 = fpext double %frob9 to fp128
+  %mul9 = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %accext9, fp128 %ext9,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %const9 = fpext double 1.91 to fp128
+  %extra9 = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %mul9, fp128 %const9,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %trunc9 = fptrunc fp128 %extra9 to double
+
+  ret double %trunc9
+}
Index: test/CodeGen/SystemZ/fp-strict-mul-05.ll
===================================================================
--- test/CodeGen/SystemZ/fp-strict-mul-05.ll
+++ test/CodeGen/SystemZ/fp-strict-mul-05.ll
@@ -0,0 +1,25 @@
+; Test strict multiplication of two f128s.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare fp128 @llvm.experimental.constrained.fmul.f128(fp128, fp128, metadata, metadata)
+
+; There is no memory form of 128-bit multiplication.
+define void @f1(fp128 *%ptr, float %f2) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: lxebr %f0, %f0
+; CHECK-DAG: ld %f1, 0(%r2)
+; CHECK-DAG: ld %f3, 8(%r2)
+; CHECK: mxbr %f0, %f1
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+  %f1 = load fp128, fp128 *%ptr
+  %f2x = fpext float %f2 to fp128
+  %diff = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %f1, fp128 %f2x,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %diff, fp128 *%ptr
+  ret void
+}
Index: test/CodeGen/SystemZ/fp-strict-mul-06.ll
===================================================================
--- test/CodeGen/SystemZ/fp-strict-mul-06.ll
+++ test/CodeGen/SystemZ/fp-strict-mul-06.ll
@@ -0,0 +1,137 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
+
+declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata)
+
+define float @f1(float %f1, float %f2, float %acc) {
+; CHECK-LABEL: f1:
+; CHECK-SCALAR: maebr %f4, %f0, %f2
+; CHECK-SCALAR: ler %f0, %f4
+; CHECK-VECTOR: wfmasb %f0, %f0, %f2, %f4
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.fma.f32 (
+                        float %f1, float %f2, float %acc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+define float @f2(float %f1, float *%ptr, float %acc) {
+; CHECK-LABEL: f2:
+; CHECK: maeb %f2, %f0, 0(%r2)
+; CHECK-SCALAR: ler %f0, %f2
+; CHECK-VECTOR: ldr %f0, %f2
+; CHECK: br %r14
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fma.f32 (
+                        float %f1, float %f2, float %acc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+define float @f3(float %f1, float *%base, float %acc) {
+; CHECK-LABEL: f3:
+; CHECK: maeb %f2, %f0, 4092(%r2)
+; CHECK-SCALAR: ler %f0, %f2
+; CHECK-VECTOR: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 1023
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fma.f32 (
+                        float %f1, float %f2, float %acc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+define float @f4(float %f1, float *%base, float %acc) {
+; The important thing here is that we don't generate an out-of-range
+; displacement.  Other sequences besides this one would be OK.
+;
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: maeb %f2, %f0, 0(%r2)
+; CHECK-SCALAR: ler %f0, %f2
+; CHECK-VECTOR: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 1024
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fma.f32 (
+                        float %f1, float %f2, float %acc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+define float @f5(float %f1, float *%base, float %acc) {
+; Here too the important thing is that we don't generate an out-of-range
+; displacement.  Other sequences besides this one would be OK.
+;
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -4
+; CHECK: maeb %f2, %f0, 0(%r2)
+; CHECK-SCALAR: ler %f0, %f2
+; CHECK-VECTOR: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 -1
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fma.f32 (
+                        float %f1, float %f2, float %acc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+define float @f6(float %f1, float *%base, i64 %index, float %acc) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: maeb %f2, %f0, 0(%r1,%r2)
+; CHECK-SCALAR: ler %f0, %f2
+; CHECK-VECTOR: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 %index
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fma.f32 (
+                        float %f1, float %f2, float %acc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+define float @f7(float %f1, float *%base, i64 %index, float %acc) {
+; CHECK-LABEL: f7:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: maeb %f2, %f0, 4092({{%r1,%r2|%r2,%r1}})
+; CHECK-SCALAR: ler %f0, %f2
+; CHECK-VECTOR: ldr %f0, %f2
+; CHECK: br %r14
+  %index2 = add i64 %index, 1023
+  %ptr = getelementptr float, float *%base, i64 %index2
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fma.f32 (
+                        float %f1, float %f2, float %acc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+define float @f8(float %f1, float *%base, i64 %index, float %acc) {
+; CHECK-LABEL: f8:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}})
+; CHECK: maeb %f2, %f0, 0(%r1)
+; CHECK-SCALAR: ler %f0, %f2
+; CHECK-VECTOR: ldr %f0, %f2
+; CHECK: br %r14
+  %index2 = add i64 %index, 1024
+  %ptr = getelementptr float, float *%base, i64 %index2
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fma.f32 (
+                        float %f1, float %f2, float %acc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
Index: test/CodeGen/SystemZ/fp-strict-mul-07.ll
===================================================================
--- test/CodeGen/SystemZ/fp-strict-mul-07.ll
+++ test/CodeGen/SystemZ/fp-strict-mul-07.ll
@@ -0,0 +1,130 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
+
+declare double @llvm.experimental.constrained.fma.f64(double %f1, double %f2, double %f3, metadata, metadata)
+
+define double @f1(double %f1, double %f2, double %acc) {
+; CHECK-LABEL: f1:
+; CHECK-SCALAR: madbr %f4, %f0, %f2
+; CHECK-SCALAR: ldr %f0, %f4
+; CHECK-VECTOR: wfmadb %f0, %f0, %f2, %f4
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.fma.f64 (
+                        double %f1, double %f2, double %acc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+define double @f2(double %f1, double *%ptr, double %acc) {
+; CHECK-LABEL: f2:
+; CHECK: madb %f2, %f0, 0(%r2)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fma.f64 (
+                        double %f1, double %f2, double %acc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+define double @f3(double %f1, double *%base, double %acc) {
+; CHECK-LABEL: f3:
+; CHECK: madb %f2, %f0, 4088(%r2)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 511
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fma.f64 (
+                        double %f1, double %f2, double %acc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+define double @f4(double %f1, double *%base, double %acc) {
+; The important thing here is that we don't generate an out-of-range
+; displacement.  Other sequences besides this one would be OK.
+;
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: madb %f2, %f0, 0(%r2)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 512
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fma.f64 (
+                        double %f1, double %f2, double %acc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+define double @f5(double %f1, double *%base, double %acc) {
+; Here too the important thing is that we don't generate an out-of-range
+; displacement.  Other sequences besides this one would be OK.
+;
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -8
+; CHECK: madb %f2, %f0, 0(%r2)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 -1
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fma.f64 (
+                        double %f1, double %f2, double %acc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+define double @f6(double %f1, double *%base, i64 %index, double %acc) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: madb %f2, %f0, 0(%r1,%r2)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 %index
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fma.f64 (
+                        double %f1, double %f2, double %acc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+define double @f7(double %f1, double *%base, i64 %index, double %acc) {
+; CHECK-LABEL: f7:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: madb %f2, %f0, 4088({{%r1,%r2|%r2,%r1}})
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %index2 = add i64 %index, 511
+  %ptr = getelementptr double, double *%base, i64 %index2
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fma.f64 (
+                        double %f1, double %f2, double %acc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+define double @f8(double %f1, double *%base, i64 %index, double %acc) {
+; CHECK-LABEL: f8:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}})
+; CHECK: madb %f2, %f0, 0(%r1)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %index2 = add i64 %index, 512
+  %ptr = getelementptr double, double *%base, i64 %index2
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fma.f64 (
+                        double %f1, double %f2, double %acc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
Index: test/CodeGen/SystemZ/fp-strict-mul-08.ll
===================================================================
--- test/CodeGen/SystemZ/fp-strict-mul-08.ll
+++ test/CodeGen/SystemZ/fp-strict-mul-08.ll
@@ -0,0 +1,145 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
+
+declare float @llvm.experimental.constrained.fma.f32(float %f1, float %f2, float %f3, metadata, metadata)
+
+define float @f1(float %f1, float %f2, float %acc) {
+; CHECK-LABEL: f1:
+; CHECK-SCALAR: msebr %f4, %f0, %f2
+; CHECK-SCALAR: ler %f0, %f4
+; CHECK-VECTOR: wfmssb %f0, %f0, %f2, %f4
+; CHECK: br %r14
+  %negacc = fsub float -0.0, %acc
+  %res = call float @llvm.experimental.constrained.fma.f32 (
+                        float %f1, float %f2, float %negacc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+define float @f2(float %f1, float *%ptr, float %acc) {
+; CHECK-LABEL: f2:
+; CHECK: mseb %f2, %f0, 0(%r2)
+; CHECK-SCALAR: ler %f0, %f2
+; CHECK-VECTOR: ldr %f0, %f2
+; CHECK: br %r14
+  %f2 = load float, float *%ptr
+  %negacc = fsub float -0.0, %acc
+  %res = call float @llvm.experimental.constrained.fma.f32 (
+                        float %f1, float %f2, float %negacc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+define float @f3(float %f1, float *%base, float %acc) {
+; CHECK-LABEL: f3:
+; CHECK: mseb %f2, %f0, 4092(%r2)
+; CHECK-SCALAR: ler %f0, %f2
+; CHECK-VECTOR: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 1023
+  %f2 = load float, float *%ptr
+  %negacc = fsub float -0.0, %acc
+  %res = call float @llvm.experimental.constrained.fma.f32 (
+                        float %f1, float %f2, float %negacc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+define float @f4(float %f1, float *%base, float %acc) {
+; The important thing here is that we don't generate an out-of-range
+; displacement.  Other sequences besides this one would be OK.
+;
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: mseb %f2, %f0, 0(%r2)
+; CHECK-SCALAR: ler %f0, %f2
+; CHECK-VECTOR: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 1024
+  %f2 = load float, float *%ptr
+  %negacc = fsub float -0.0, %acc
+  %res = call float @llvm.experimental.constrained.fma.f32 (
+                        float %f1, float %f2, float %negacc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+define float @f5(float %f1, float *%base, float %acc) {
+; Here too the important thing is that we don't generate an out-of-range
+; displacement.  Other sequences besides this one would be OK.
+;
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -4
+; CHECK: mseb %f2, %f0, 0(%r2)
+; CHECK-SCALAR: ler %f0, %f2
+; CHECK-VECTOR: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 -1
+  %f2 = load float, float *%ptr
+  %negacc = fsub float -0.0, %acc
+  %res = call float @llvm.experimental.constrained.fma.f32 (
+                        float %f1, float %f2, float %negacc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+define float @f6(float %f1, float *%base, i64 %index, float %acc) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: mseb %f2, %f0, 0(%r1,%r2)
+; CHECK-SCALAR: ler %f0, %f2
+; CHECK-VECTOR: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 %index
+  %f2 = load float, float *%ptr
+  %negacc = fsub float -0.0, %acc
+  %res = call float @llvm.experimental.constrained.fma.f32 (
+                        float %f1, float %f2, float %negacc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+define float @f7(float %f1, float *%base, i64 %index, float %acc) {
+; CHECK-LABEL: f7:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: mseb %f2, %f0, 4092({{%r1,%r2|%r2,%r1}})
+; CHECK-SCALAR: ler %f0, %f2
+; CHECK-VECTOR: ldr %f0, %f2
+; CHECK: br %r14
+  %index2 = add i64 %index, 1023
+  %ptr = getelementptr float, float *%base, i64 %index2
+  %f2 = load float, float *%ptr
+  %negacc = fsub float -0.0, %acc
+  %res = call float @llvm.experimental.constrained.fma.f32 (
+                        float %f1, float %f2, float %negacc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+define float @f8(float %f1, float *%base, i64 %index, float %acc) {
+; CHECK-LABEL: f8:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}})
+; CHECK: mseb %f2, %f0, 0(%r1)
+; CHECK-SCALAR: ler %f0, %f2
+; CHECK-VECTOR: ldr %f0, %f2
+; CHECK: br %r14
+  %index2 = add i64 %index, 1024
+  %ptr = getelementptr float, float *%base, i64 %index2
+  %f2 = load float, float *%ptr
+  %negacc = fsub float -0.0, %acc
+  %res = call float @llvm.experimental.constrained.fma.f32 (
+                        float %f1, float %f2, float %negacc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
Index: test/CodeGen/SystemZ/fp-strict-mul-09.ll
===================================================================
--- test/CodeGen/SystemZ/fp-strict-mul-09.ll
+++ test/CodeGen/SystemZ/fp-strict-mul-09.ll
@@ -0,0 +1,138 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
+
+declare double @llvm.experimental.constrained.fma.f64(double %f1, double %f2, double %f3, metadata, metadata)
+
+define double @f1(double %f1, double %f2, double %acc) {
+; CHECK-LABEL: f1:
+; CHECK-SCALAR: msdbr %f4, %f0, %f2
+; CHECK-SCALAR: ldr %f0, %f4
+; CHECK-VECTOR: wfmsdb %f0, %f0, %f2, %f4
+; CHECK: br %r14
+  %negacc = fsub double -0.0, %acc
+  %res = call double @llvm.experimental.constrained.fma.f64 (
+                        double %f1, double %f2, double %negacc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+define double @f2(double %f1, double *%ptr, double %acc) {
+; CHECK-LABEL: f2:
+; CHECK: msdb %f2, %f0, 0(%r2)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %f2 = load double, double *%ptr
+  %negacc = fsub double -0.0, %acc
+  %res = call double @llvm.experimental.constrained.fma.f64 (
+                        double %f1, double %f2, double %negacc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+define double @f3(double %f1, double *%base, double %acc) {
+; CHECK-LABEL: f3:
+; CHECK: msdb %f2, %f0, 4088(%r2)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 511
+  %f2 = load double, double *%ptr
+  %negacc = fsub double -0.0, %acc
+  %res = call double @llvm.experimental.constrained.fma.f64 (
+                        double %f1, double %f2, double %negacc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+define double @f4(double %f1, double *%base, double %acc) {
+; The important thing here is that we don't generate an out-of-range
+; displacement.  Other sequences besides this one would be OK.
+;
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: msdb %f2, %f0, 0(%r2)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 512
+  %f2 = load double, double *%ptr
+  %negacc = fsub double -0.0, %acc
+  %res = call double @llvm.experimental.constrained.fma.f64 (
+                        double %f1, double %f2, double %negacc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+define double @f5(double %f1, double *%base, double %acc) {
+; Here too the important thing is that we don't generate an out-of-range
+; displacement.  Other sequences besides this one would be OK.
+;
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -8
+; CHECK: msdb %f2, %f0, 0(%r2)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 -1
+  %f2 = load double, double *%ptr
+  %negacc = fsub double -0.0, %acc
+  %res = call double @llvm.experimental.constrained.fma.f64 (
+                        double %f1, double %f2, double %negacc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+define double @f6(double %f1, double *%base, i64 %index, double %acc) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: msdb %f2, %f0, 0(%r1,%r2)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 %index
+  %f2 = load double, double *%ptr
+  %negacc = fsub double -0.0, %acc
+  %res = call double @llvm.experimental.constrained.fma.f64 (
+                        double %f1, double %f2, double %negacc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+define double @f7(double %f1, double *%base, i64 %index, double %acc) {
+; CHECK-LABEL: f7:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: msdb %f2, %f0, 4088({{%r1,%r2|%r2,%r1}})
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %index2 = add i64 %index, 511
+  %ptr = getelementptr double, double *%base, i64 %index2
+  %f2 = load double, double *%ptr
+  %negacc = fsub double -0.0, %acc
+  %res = call double @llvm.experimental.constrained.fma.f64 (
+                        double %f1, double %f2, double %negacc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+define double @f8(double %f1, double *%base, i64 %index, double %acc) {
+; CHECK-LABEL: f8:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}})
+; CHECK: msdb %f2, %f0, 0(%r1)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  %index2 = add i64 %index, 512
+  %ptr = getelementptr double, double *%base, i64 %index2
+  %f2 = load double, double *%ptr
+  %negacc = fsub double -0.0, %acc
+  %res = call double @llvm.experimental.constrained.fma.f64 (
+                        double %f1, double %f2, double %negacc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
Index: test/CodeGen/SystemZ/fp-strict-mul-10.ll
===================================================================
--- test/CodeGen/SystemZ/fp-strict-mul-10.ll
+++ test/CodeGen/SystemZ/fp-strict-mul-10.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare double @llvm.experimental.constrained.fma.f64(double %f1, double %f2, double %f3, metadata, metadata)
+declare float @llvm.experimental.constrained.fma.f32(float %f1, float %f2, float %f3, metadata, metadata)
+
+define double @f1(double %f1, double %f2, double %acc) {
+; CHECK-LABEL: f1:
+; CHECK: wfnmadb %f0, %f0, %f2, %f4
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.fma.f64 (
+                        double %f1, double %f2, double %acc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %negres = fsub double -0.0, %res
+  ret double %negres
+}
+
+define double @f2(double %f1, double %f2, double %acc) {
+; CHECK-LABEL: f2:
+; CHECK: wfnmsdb %f0, %f0, %f2, %f4
+; CHECK: br %r14
+  %negacc = fsub double -0.0, %acc
+  %res = call double @llvm.experimental.constrained.fma.f64 (
+                        double %f1, double %f2, double %negacc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %negres = fsub double -0.0, %res
+  ret double %negres
+}
+
+define float @f3(float %f1, float %f2, float %acc) {
+; CHECK-LABEL: f3:
+; CHECK: wfnmasb %f0, %f0, %f2, %f4
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.fma.f32 (
+                        float %f1, float %f2, float %acc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %negres = fsub float -0.0, %res
+  ret float %negres
+}
+
+define float @f4(float %f1, float %f2, float %acc) {
+; CHECK-LABEL: f4:
+; CHECK: wfnmssb %f0, %f0, %f2, %f4
+; CHECK: br %r14
+  %negacc = fsub float -0.0, %acc
+  %res = call float @llvm.experimental.constrained.fma.f32 (
+                        float %f1, float %f2, float %negacc,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %negres = fsub float -0.0, %res
+  ret float %negres
+}
+
Index: test/CodeGen/SystemZ/fp-strict-mul-11.ll
===================================================================
--- test/CodeGen/SystemZ/fp-strict-mul-11.ll
+++ test/CodeGen/SystemZ/fp-strict-mul-11.ll
@@ -0,0 +1,40 @@
+; Test strict 128-bit floating-point multiplication on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare fp128 @llvm.experimental.constrained.fmul.f128(fp128, fp128, metadata, metadata)
+
+define void @f1(fp128 *%ptr1, fp128 *%ptr2) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2)
+; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3)
+; CHECK: wfmxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]]
+; CHECK: vst [[RES]], 0(%r2)
+; CHECK: br %r14
+  %f1 = load fp128, fp128 *%ptr1
+  %f2 = load fp128, fp128 *%ptr2
+  %sum = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %f1, fp128 %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %sum, fp128 *%ptr1
+  ret void
+}
+
+define void @f2(double %f1, double %f2, fp128 *%dst) {
+; CHECK-LABEL: f2:
+; CHECK-DAG: wflld [[REG1:%v[0-9]+]], %f0
+; CHECK-DAG: wflld [[REG2:%v[0-9]+]], %f2
+; CHECK: wfmxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]]
+; CHECK: vst [[RES]], 0(%r2)
+; CHECK: br %r14
+  %f1x = fpext double %f1 to fp128
+  %f2x = fpext double %f2 to fp128
+  %res = call fp128 @llvm.experimental.constrained.fmul.f128(
+                        fp128 %f1x, fp128 %f2x,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%dst
+  ret void
+}
+
Index: test/CodeGen/SystemZ/fp-strict-round-01.ll
===================================================================
--- test/CodeGen/SystemZ/fp-strict-round-01.ll
+++ test/CodeGen/SystemZ/fp-strict-round-01.ll
@@ -0,0 +1,250 @@
+; Test strict rounding functions for z10.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+
+; Test rint for f32.
+declare float @llvm.experimental.constrained.rint.f32(float, metadata, metadata)
+define float @f1(float %f) {
+; CHECK-LABEL: f1:
+; CHECK: fiebr %f0, 0, %f0
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.rint.f32(
+                        float %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Test rint for f64.
+declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
+define double @f2(double %f) {
+; CHECK-LABEL: f2:
+; CHECK: fidbr %f0, 0, %f0
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.rint.f64(
+                        double %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Test rint for f128.
+declare fp128 @llvm.experimental.constrained.rint.f128(fp128, metadata, metadata)
+define void @f3(fp128 *%ptr) {
+; CHECK-LABEL: f3:
+; CHECK: fixbr %f0, 0, %f0
+; CHECK: br %r14
+  %src = load fp128, fp128 *%ptr
+  %res = call fp128 @llvm.experimental.constrained.rint.f128(
+                        fp128 %src,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
+; Test nearbyint for f32.
+declare float @llvm.experimental.constrained.nearbyint.f32(float, metadata, metadata)
+define float @f4(float %f) {
+; CHECK-LABEL: f4:
+; CHECK: brasl %r14, nearbyintf@PLT
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.nearbyint.f32(
+                        float %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Test nearbyint for f64.
+declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
+define double @f5(double %f) {
+; CHECK-LABEL: f5:
+; CHECK: brasl %r14, nearbyint@PLT
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.nearbyint.f64(
+                        double %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Test nearbyint for f128.
+declare fp128 @llvm.experimental.constrained.nearbyint.f128(fp128, metadata, metadata)
+define void @f6(fp128 *%ptr) {
+; CHECK-LABEL: f6:
+; CHECK: brasl %r14, nearbyintl@PLT
+; CHECK: br %r14
+  %src = load fp128, fp128 *%ptr
+  %res = call fp128 @llvm.experimental.constrained.nearbyint.f128(
+                        fp128 %src,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
+; Test floor for f32.
+declare float @llvm.experimental.constrained.floor.f32(float, metadata, metadata)
+define float @f7(float %f) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, floorf@PLT
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.floor.f32(
+                        float %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Test floor for f64.
+declare double @llvm.experimental.constrained.floor.f64(double, metadata, metadata)
+define double @f8(double %f) {
+; CHECK-LABEL: f8:
+; CHECK: brasl %r14, floor@PLT
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.floor.f64(
+                        double %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Test floor for f128.
+declare fp128 @llvm.experimental.constrained.floor.f128(fp128, metadata, metadata)
+define void @f9(fp128 *%ptr) {
+; CHECK-LABEL: f9:
+; CHECK: brasl %r14, floorl@PLT
+; CHECK: br %r14
+  %src = load fp128, fp128 *%ptr
+  %res = call fp128 @llvm.experimental.constrained.floor.f128(
+                        fp128 %src,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
+; Test ceil for f32.
+declare float @llvm.experimental.constrained.ceil.f32(float, metadata, metadata)
+define float @f10(float %f) {
+; CHECK-LABEL: f10:
+; CHECK: brasl %r14, ceilf@PLT
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.ceil.f32(
+                        float %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Test ceil for f64.
+declare double @llvm.experimental.constrained.ceil.f64(double, metadata, metadata)
+define double @f11(double %f) {
+; CHECK-LABEL: f11:
+; CHECK: brasl %r14, ceil@PLT
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.ceil.f64(
+                        double %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Test ceil for f128.
+declare fp128 @llvm.experimental.constrained.ceil.f128(fp128, metadata, metadata)
+define void @f12(fp128 *%ptr) {
+; CHECK-LABEL: f12:
+; CHECK: brasl %r14, ceill@PLT
+; CHECK: br %r14
+  %src = load fp128, fp128 *%ptr
+  %res = call fp128 @llvm.experimental.constrained.ceil.f128(
+                        fp128 %src,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
+; Test trunc for f32.
+declare float @llvm.experimental.constrained.trunc.f32(float, metadata, metadata)
+define float @f13(float %f) {
+; CHECK-LABEL: f13:
+; CHECK: brasl %r14, truncf@PLT
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.trunc.f32(
+                        float %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Test trunc for f64.
+declare double @llvm.experimental.constrained.trunc.f64(double, metadata, metadata)
+define double @f14(double %f) {
+; CHECK-LABEL: f14:
+; CHECK: brasl %r14, trunc@PLT
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.trunc.f64(
+                        double %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Test trunc for f128.
+declare fp128 @llvm.experimental.constrained.trunc.f128(fp128, metadata, metadata)
+define void @f15(fp128 *%ptr) {
+; CHECK-LABEL: f15:
+; CHECK: brasl %r14, truncl@PLT
+; CHECK: br %r14
+  %src = load fp128, fp128 *%ptr
+  %res = call fp128 @llvm.experimental.constrained.trunc.f128(
+                        fp128 %src,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
+; Test round for f32.
+declare float @llvm.experimental.constrained.round.f32(float, metadata, metadata)
+define float @f16(float %f) {
+; CHECK-LABEL: f16:
+; CHECK: brasl %r14, roundf@PLT
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.round.f32(
+                        float %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Test round for f64.
+declare double @llvm.experimental.constrained.round.f64(double, metadata, metadata)
+define double @f17(double %f) {
+; CHECK-LABEL: f17:
+; CHECK: brasl %r14, round@PLT
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.round.f64(
+                        double %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Test round for f128.
+declare fp128 @llvm.experimental.constrained.round.f128(fp128, metadata, metadata)
+define void @f18(fp128 *%ptr) {
+; CHECK-LABEL: f18:
+; CHECK: brasl %r14, roundl@PLT
+; CHECK: br %r14
+  %src = load fp128, fp128 *%ptr
+  %res = call fp128 @llvm.experimental.constrained.round.f128(
+                        fp128 %src,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
Index: test/CodeGen/SystemZ/fp-strict-round-02.ll
===================================================================
--- test/CodeGen/SystemZ/fp-strict-round-02.ll
+++ test/CodeGen/SystemZ/fp-strict-round-02.ll
@@ -0,0 +1,254 @@
+; Test strict rounding functions for z196 and above.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
+
+; Test rint for f32.
+declare float @llvm.experimental.constrained.rint.f32(float, metadata, metadata)
+define float @f1(float %f) {
+; CHECK-LABEL: f1:
+; CHECK: fiebr %f0, 0, %f0
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.rint.f32(
+                        float %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Test rint for f64.
+declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
+define double @f2(double %f) {
+; CHECK-LABEL: f2:
+; CHECK-SCALAR: fidbr %f0, 0, %f0
+; CHECK-VECTOR: fidbra %f0, 0, %f0, 0
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.rint.f64(
+                        double %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Test rint for f128.
+declare fp128 @llvm.experimental.constrained.rint.f128(fp128, metadata, metadata)
+define void @f3(fp128 *%ptr) {
+; CHECK-LABEL: f3:
+; CHECK: fixbr %f0, 0, %f0
+; CHECK: br %r14
+  %src = load fp128, fp128 *%ptr
+  %res = call fp128 @llvm.experimental.constrained.rint.f128(
+                        fp128 %src,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
+; Test nearbyint for f32.
+declare float @llvm.experimental.constrained.nearbyint.f32(float, metadata, metadata)
+define float @f4(float %f) {
+; CHECK-LABEL: f4:
+; CHECK: fiebra %f0, 0, %f0, 4
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.nearbyint.f32(
+                        float %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Test nearbyint for f64.
+declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
+define double @f5(double %f) {
+; CHECK-LABEL: f5:
+; CHECK: fidbra %f0, 0, %f0, 4
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.nearbyint.f64(
+                        double %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Test nearbyint for f128.
+declare fp128 @llvm.experimental.constrained.nearbyint.f128(fp128, metadata, metadata)
+define void @f6(fp128 *%ptr) {
+; CHECK-LABEL: f6:
+; CHECK: fixbra %f0, 0, %f0, 4
+; CHECK: br %r14
+  %src = load fp128, fp128 *%ptr
+  %res = call fp128 @llvm.experimental.constrained.nearbyint.f128(
+                        fp128 %src,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
+; Test floor for f32.
+declare float @llvm.experimental.constrained.floor.f32(float, metadata, metadata)
+define float @f7(float %f) {
+; CHECK-LABEL: f7:
+; CHECK: fiebra %f0, 7, %f0, 4
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.floor.f32(
+                        float %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Test floor for f64.
+declare double @llvm.experimental.constrained.floor.f64(double, metadata, metadata)
+define double @f8(double %f) {
+; CHECK-LABEL: f8:
+; CHECK: fidbra %f0, 7, %f0, 4
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.floor.f64(
+                        double %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Test floor for f128.
+declare fp128 @llvm.experimental.constrained.floor.f128(fp128, metadata, metadata)
+define void @f9(fp128 *%ptr) {
+; CHECK-LABEL: f9:
+; CHECK: fixbra %f0, 7, %f0, 4
+; CHECK: br %r14
+  %src = load fp128, fp128 *%ptr
+  %res = call fp128 @llvm.experimental.constrained.floor.f128(
+                        fp128 %src,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
+; Test ceil for f32.
+declare float @llvm.experimental.constrained.ceil.f32(float, metadata, metadata)
+define float @f10(float %f) {
+; CHECK-LABEL: f10:
+; CHECK: fiebra %f0, 6, %f0, 4
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.ceil.f32(
+                        float %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Test ceil for f64.
+declare double @llvm.experimental.constrained.ceil.f64(double, metadata, metadata)
+define double @f11(double %f) {
+; CHECK-LABEL: f11:
+; CHECK: fidbra %f0, 6, %f0, 4
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.ceil.f64(
+                        double %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Test ceil for f128.
+declare fp128 @llvm.experimental.constrained.ceil.f128(fp128, metadata, metadata)
+define void @f12(fp128 *%ptr) {
+; CHECK-LABEL: f12:
+; CHECK: fixbra %f0, 6, %f0, 4
+; CHECK: br %r14
+  %src = load fp128, fp128 *%ptr
+  %res = call fp128 @llvm.experimental.constrained.ceil.f128(
+                        fp128 %src,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
+; Test trunc for f32.
+declare float @llvm.experimental.constrained.trunc.f32(float, metadata, metadata)
+define float @f13(float %f) {
+; CHECK-LABEL: f13:
+; CHECK: fiebra %f0, 5, %f0, 4
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.trunc.f32(
+                        float %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Test trunc for f64.
+declare double @llvm.experimental.constrained.trunc.f64(double, metadata, metadata)
+define double @f14(double %f) {
+; CHECK-LABEL: f14:
+; CHECK: fidbra %f0, 5, %f0, 4
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.trunc.f64(
+                        double %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Test trunc for f128.
+declare fp128 @llvm.experimental.constrained.trunc.f128(fp128, metadata, metadata)
+define void @f15(fp128 *%ptr) {
+; CHECK-LABEL: f15:
+; CHECK: fixbra %f0, 5, %f0, 4
+; CHECK: br %r14
+  %src = load fp128, fp128 *%ptr
+  %res = call fp128 @llvm.experimental.constrained.trunc.f128(
+                        fp128 %src,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
+; Test round for f32.
+declare float @llvm.experimental.constrained.round.f32(float, metadata, metadata)
+define float @f16(float %f) {
+; CHECK-LABEL: f16:
+; CHECK: fiebra %f0, 1, %f0, 4
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.round.f32(
+                        float %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Test round for f64.
+declare double @llvm.experimental.constrained.round.f64(double, metadata, metadata)
+define double @f17(double %f) {
+; CHECK-LABEL: f17:
+; CHECK: fidbra %f0, 1, %f0, 4
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.round.f64(
+                        double %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Test round for f128.
+declare fp128 @llvm.experimental.constrained.round.f128(fp128, metadata, metadata)
+define void @f18(fp128 *%ptr) {
+; CHECK-LABEL: f18:
+; CHECK: fixbra %f0, 1, %f0, 4
+; CHECK: br %r14
+  %src = load fp128, fp128 *%ptr
+  %res = call fp128 @llvm.experimental.constrained.round.f128(
+                        fp128 %src,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
Index: test/CodeGen/SystemZ/fp-strict-round-03.ll
===================================================================
--- test/CodeGen/SystemZ/fp-strict-round-03.ll
+++ test/CodeGen/SystemZ/fp-strict-round-03.ll
@@ -0,0 +1,262 @@
+; Test strict rounding functions for z14 and above.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+; Test rint for f32.
+declare float @llvm.experimental.constrained.rint.f32(float, metadata, metadata)
+define float @f1(float %f) {
+; CHECK-LABEL: f1:
+; CHECK: fiebra %f0, 0, %f0, 0
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.rint.f32(
+                        float %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Test rint for f64.
+declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
+define double @f2(double %f) {
+; CHECK-LABEL: f2:
+; CHECK: fidbra %f0, 0, %f0, 0
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.rint.f64(
+                        double %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Test rint for f128.
+declare fp128 @llvm.experimental.constrained.rint.f128(fp128, metadata, metadata)
+define void @f3(fp128 *%ptr) {
+; CHECK-LABEL: f3:
+; CHECK: vl [[REG:%v[0-9]+]], 0(%r2)
+; CHECK: wfixb [[RES:%v[0-9]+]], [[REG]], 0, 0
+; CHECK: vst [[RES]], 0(%r2)
+; CHECK: br %r14
+  %src = load fp128, fp128 *%ptr
+  %res = call fp128 @llvm.experimental.constrained.rint.f128(
+                        fp128 %src,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
+; Test nearbyint for f32.
+declare float @llvm.experimental.constrained.nearbyint.f32(float, metadata, metadata)
+define float @f4(float %f) {
+; CHECK-LABEL: f4:
+; CHECK: fiebra %f0, 0, %f0, 4
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.nearbyint.f32(
+                        float %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Test nearbyint for f64.
+declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
+define double @f5(double %f) {
+; CHECK-LABEL: f5:
+; CHECK: fidbra %f0, 0, %f0, 4
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.nearbyint.f64(
+                        double %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Test nearbyint for f128.
+declare fp128 @llvm.experimental.constrained.nearbyint.f128(fp128, metadata, metadata)
+define void @f6(fp128 *%ptr) {
+; CHECK-LABEL: f6:
+; CHECK: vl [[REG:%v[0-9]+]], 0(%r2)
+; CHECK: wfixb [[RES:%v[0-9]+]], [[REG]], 4, 0
+; CHECK: vst [[RES]], 0(%r2)
+; CHECK: br %r14
+  %src = load fp128, fp128 *%ptr
+  %res = call fp128 @llvm.experimental.constrained.nearbyint.f128(
+                        fp128 %src,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
+; Test floor for f32.
+declare float @llvm.experimental.constrained.floor.f32(float, metadata, metadata)
+define float @f7(float %f) {
+; CHECK-LABEL: f7:
+; CHECK: fiebra %f0, 7, %f0, 4
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.floor.f32(
+                        float %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Test floor for f64.
+declare double @llvm.experimental.constrained.floor.f64(double, metadata, metadata)
+define double @f8(double %f) {
+; CHECK-LABEL: f8:
+; CHECK: fidbra %f0, 7, %f0, 4
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.floor.f64(
+                        double %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Test floor for f128.
+declare fp128 @llvm.experimental.constrained.floor.f128(fp128, metadata, metadata)
+define void @f9(fp128 *%ptr) {
+; CHECK-LABEL: f9:
+; CHECK: vl [[REG:%v[0-9]+]], 0(%r2)
+; CHECK: wfixb [[RES:%v[0-9]+]], [[REG]], 4, 7
+; CHECK: vst [[RES]], 0(%r2)
+; CHECK: br %r14
+  %src = load fp128, fp128 *%ptr
+  %res = call fp128 @llvm.experimental.constrained.floor.f128(
+                        fp128 %src,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
+; Test ceil for f32.
+declare float @llvm.experimental.constrained.ceil.f32(float, metadata, metadata)
+define float @f10(float %f) {
+; CHECK-LABEL: f10:
+; CHECK: fiebra %f0, 6, %f0, 4
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.ceil.f32(
+                        float %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Test ceil for f64.
+declare double @llvm.experimental.constrained.ceil.f64(double, metadata, metadata)
+define double @f11(double %f) {
+; CHECK-LABEL: f11:
+; CHECK: fidbra %f0, 6, %f0, 4
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.ceil.f64(
+                        double %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Test ceil for f128.
+declare fp128 @llvm.experimental.constrained.ceil.f128(fp128, metadata, metadata)
+define void @f12(fp128 *%ptr) {
+; CHECK-LABEL: f12:
+; CHECK: vl [[REG:%v[0-9]+]], 0(%r2)
+; CHECK: wfixb [[RES:%v[0-9]+]], [[REG]], 4, 6
+; CHECK: vst [[RES]], 0(%r2)
+; CHECK: br %r14
+  %src = load fp128, fp128 *%ptr
+  %res = call fp128 @llvm.experimental.constrained.ceil.f128(
+                        fp128 %src,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
+; Test trunc for f32.
+declare float @llvm.experimental.constrained.trunc.f32(float, metadata, metadata)
+define float @f13(float %f) {
+; CHECK-LABEL: f13:
+; CHECK: fiebra %f0, 5, %f0, 4
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.trunc.f32(
+                        float %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Test trunc for f64.
+declare double @llvm.experimental.constrained.trunc.f64(double, metadata, metadata)
+define double @f14(double %f) {
+; CHECK-LABEL: f14:
+; CHECK: fidbra %f0, 5, %f0, 4
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.trunc.f64(
+                        double %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Test trunc for f128.
+declare fp128 @llvm.experimental.constrained.trunc.f128(fp128, metadata, metadata)
+define void @f15(fp128 *%ptr) {
+; CHECK-LABEL: f15:
+; CHECK: vl [[REG:%v[0-9]+]], 0(%r2)
+; CHECK: wfixb [[RES:%v[0-9]+]], [[REG]], 4, 5
+; CHECK: vst [[RES]], 0(%r2)
+; CHECK: br %r14
+  %src = load fp128, fp128 *%ptr
+  %res = call fp128 @llvm.experimental.constrained.trunc.f128(
+                        fp128 %src,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
+; Test round for f32.
+declare float @llvm.experimental.constrained.round.f32(float, metadata, metadata)
+define float @f16(float %f) {
+; CHECK-LABEL: f16:
+; CHECK: fiebra %f0, 1, %f0, 4
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.round.f32(
+                        float %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Test round for f64.
+declare double @llvm.experimental.constrained.round.f64(double, metadata, metadata)
+define double @f17(double %f) {
+; CHECK-LABEL: f17:
+; CHECK: fidbra %f0, 1, %f0, 4
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.round.f64(
+                        double %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Test round for f128.
+declare fp128 @llvm.experimental.constrained.round.f128(fp128, metadata, metadata)
+define void @f18(fp128 *%ptr) {
+; CHECK-LABEL: f18:
+; CHECK: vl [[REG:%v[0-9]+]], 0(%r2)
+; CHECK: wfixb [[RES:%v[0-9]+]], [[REG]], 4, 1
+; CHECK: vst [[RES]], 0(%r2)
+; CHECK: br %r14
+  %src = load fp128, fp128 *%ptr
+  %res = call fp128 @llvm.experimental.constrained.round.f128(
+                        fp128 %src,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
+
Index: test/CodeGen/SystemZ/fp-strict-sqrt-01.ll
===================================================================
--- test/CodeGen/SystemZ/fp-strict-sqrt-01.ll
+++ test/CodeGen/SystemZ/fp-strict-sqrt-01.ll
@@ -0,0 +1,94 @@
+; Test strict 32-bit square root.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata)
+
+; Check register square root.
+define float @f1(float %val) {
+; CHECK-LABEL: f1:
+; CHECK: sqebr %f0, %f0
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.sqrt.f32(
+                        float %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check the low end of the SQEB range.
+define float @f2(float *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: sqeb %f0, 0(%r2)
+; CHECK: br %r14
+  %val = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.sqrt.f32(
+                        float %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check the high end of the aligned SQEB range.
+define float @f3(float *%base) {
+; CHECK-LABEL: f3:
+; CHECK: sqeb %f0, 4092(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 1023
+  %val = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.sqrt.f32(
+                        float %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define float @f4(float *%base) {
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: sqeb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 1024
+  %val = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.sqrt.f32(
+                        float %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define float @f5(float *%base) {
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -4
+; CHECK: sqeb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 -1
+  %val = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.sqrt.f32(
+                        float %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check that SQEB allows indices.
+define float @f6(float *%base, i64 %index) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: sqeb %f0, 400(%r1,%r2)
+; CHECK: br %r14
+  %ptr1 = getelementptr float, float *%base, i64 %index
+  %ptr2 = getelementptr float, float *%ptr1, i64 100
+  %val = load float, float *%ptr2
+  %res = call float @llvm.experimental.constrained.sqrt.f32(
+                        float %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
Index: test/CodeGen/SystemZ/fp-strict-sqrt-02.ll
===================================================================
--- test/CodeGen/SystemZ/fp-strict-sqrt-02.ll
+++ test/CodeGen/SystemZ/fp-strict-sqrt-02.ll
@@ -0,0 +1,94 @@
+; Test strict 64-bit square root.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata)
+
+; Check register square root.
+define double @f1(double %val) {
+; CHECK-LABEL: f1:
+; CHECK: sqdbr %f0, %f0
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.sqrt.f64(
+                        double %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check the low end of the SQDB range.
+define double @f2(double *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: sqdb %f0, 0(%r2)
+; CHECK: br %r14
+  %val = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.sqrt.f64(
+                        double %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check the high end of the aligned SQDB range.
+define double @f3(double *%base) {
+; CHECK-LABEL: f3:
+; CHECK: sqdb %f0, 4088(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 511
+  %val = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.sqrt.f64(
+                        double %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define double @f4(double *%base) {
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: sqdb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 512
+  %val = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.sqrt.f64(
+                        double %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define double @f5(double *%base) {
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -8
+; CHECK: sqdb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 -1
+  %val = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.sqrt.f64(
+                        double %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check that SQDB allows indices.
+define double @f6(double *%base, i64 %index) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: sqdb %f0, 800(%r1,%r2)
+; CHECK: br %r14
+  %ptr1 = getelementptr double, double *%base, i64 %index
+  %ptr2 = getelementptr double, double *%ptr1, i64 100
+  %val = load double, double *%ptr2
+  %res = call double @llvm.experimental.constrained.sqrt.f64(
+                        double %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
Index: test/CodeGen/SystemZ/fp-strict-sqrt-03.ll
===================================================================
--- test/CodeGen/SystemZ/fp-strict-sqrt-03.ll
+++ test/CodeGen/SystemZ/fp-strict-sqrt-03.ll
@@ -0,0 +1,23 @@
+; Test strict 128-bit square root.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare fp128 @llvm.experimental.constrained.sqrt.f128(fp128, metadata, metadata)
+
+; There's no memory form of SQXBR.
+define void @f1(fp128 *%ptr) {
+; CHECK-LABEL: f1:
+; CHECK: ld %f0, 0(%r2)
+; CHECK: ld %f2, 8(%r2)
+; CHECK: sqxbr %f0, %f0
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+  %orig = load fp128, fp128 *%ptr
+  %sqrt = call fp128 @llvm.experimental.constrained.sqrt.f128(
+                        fp128 %orig,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %sqrt, fp128 *%ptr
+  ret void
+}
Index: test/CodeGen/SystemZ/fp-strict-sqrt-04.ll
===================================================================
--- test/CodeGen/SystemZ/fp-strict-sqrt-04.ll
+++ test/CodeGen/SystemZ/fp-strict-sqrt-04.ll
@@ -0,0 +1,20 @@
+; Test strict 128-bit floating-point square root on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare fp128 @llvm.experimental.constrained.sqrt.f128(fp128, metadata, metadata)
+
+define void @f1(fp128 *%ptr) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: vl [[REG:%v[0-9]+]], 0(%r2)
+; CHECK: wfsqxb [[RES:%v[0-9]+]], [[REG]]
+; CHECK: vst [[RES]], 0(%r2)
+; CHECK: br %r14
+  %f = load fp128, fp128 *%ptr
+  %res = call fp128 @llvm.experimental.constrained.sqrt.f128(
+                        fp128 %f,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128 *%ptr
+  ret void
+}
Index: test/CodeGen/SystemZ/fp-strict-sub-01.ll
===================================================================
--- test/CodeGen/SystemZ/fp-strict-sub-01.ll
+++ test/CodeGen/SystemZ/fp-strict-sub-01.ll
@@ -0,0 +1,173 @@
+; Test 32-bit floating-point strict subtraction.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare float @foo()
+declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata)
+
+; Check register subtraction.
+define float @f1(float %f1, float %f2) {
+; CHECK-LABEL: f1:
+; CHECK: sebr %f0, %f2
+; CHECK: br %r14
+  %res = call float @llvm.experimental.constrained.fsub.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check the low end of the SEB range.
+define float @f2(float %f1, float *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: seb %f0, 0(%r2)
+; CHECK: br %r14
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fsub.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check the high end of the aligned SEB range.
+define float @f3(float %f1, float *%base) {
+; CHECK-LABEL: f3:
+; CHECK: seb %f0, 4092(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 1023
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fsub.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define float @f4(float %f1, float *%base) {
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: seb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 1024
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fsub.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define float @f5(float %f1, float *%base) {
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -4
+; CHECK: seb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr float, float *%base, i64 -1
+  %f2 = load float, float *%ptr
+  %res = call float @llvm.experimental.constrained.fsub.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check that SEB allows indices.
+define float @f6(float %f1, float *%base, i64 %index) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: seb %f0, 400(%r1,%r2)
+; CHECK: br %r14
+  %ptr1 = getelementptr float, float *%base, i64 %index
+  %ptr2 = getelementptr float, float *%ptr1, i64 100
+  %f2 = load float, float *%ptr2
+  %res = call float @llvm.experimental.constrained.fsub.f32(
+                        float %f1, float %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+; Check that subtractions of spilled values can use SEB rather than SEBR.
+define float @f7(float *%ptr0) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK-SCALAR: seb %f0, 16{{[04]}}(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr float, float *%ptr0, i64 2
+  %ptr2 = getelementptr float, float *%ptr0, i64 4
+  %ptr3 = getelementptr float, float *%ptr0, i64 6
+  %ptr4 = getelementptr float, float *%ptr0, i64 8
+  %ptr5 = getelementptr float, float *%ptr0, i64 10
+  %ptr6 = getelementptr float, float *%ptr0, i64 12
+  %ptr7 = getelementptr float, float *%ptr0, i64 14
+  %ptr8 = getelementptr float, float *%ptr0, i64 16
+  %ptr9 = getelementptr float, float *%ptr0, i64 18
+  %ptr10 = getelementptr float, float *%ptr0, i64 20
+
+  %val0 = load float, float *%ptr0
+  %val1 = load float, float *%ptr1
+  %val2 = load float, float *%ptr2
+  %val3 = load float, float *%ptr3
+  %val4 = load float, float *%ptr4
+  %val5 = load float, float *%ptr5
+  %val6 = load float, float *%ptr6
+  %val7 = load float, float *%ptr7
+  %val8 = load float, float *%ptr8
+  %val9 = load float, float *%ptr9
+  %val10 = load float, float *%ptr10
+
+  %ret = call float @foo()
+
+  %sub0 = call float @llvm.experimental.constrained.fsub.f32(
+                        float %ret, float %val0,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sub1 = call float @llvm.experimental.constrained.fsub.f32(
+                        float %sub0, float %val1,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sub2 = call float @llvm.experimental.constrained.fsub.f32(
+                        float %sub1, float %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sub3 = call float @llvm.experimental.constrained.fsub.f32(
+                        float %sub2, float %val3,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sub4 = call float @llvm.experimental.constrained.fsub.f32(
+                        float %sub3, float %val4,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sub5 = call float @llvm.experimental.constrained.fsub.f32(
+                        float %sub4, float %val5,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sub6 = call float @llvm.experimental.constrained.fsub.f32(
+                        float %sub5, float %val6,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sub7 = call float @llvm.experimental.constrained.fsub.f32(
+                        float %sub6, float %val7,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sub8 = call float @llvm.experimental.constrained.fsub.f32(
+                        float %sub7, float %val8,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sub9 = call float @llvm.experimental.constrained.fsub.f32(
+                        float %sub8, float %val9,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sub10 = call float @llvm.experimental.constrained.fsub.f32(
+                        float %sub9, float %val10,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+
+  ret float %sub10
+}
Index: test/CodeGen/SystemZ/fp-strict-sub-02.ll
===================================================================
--- test/CodeGen/SystemZ/fp-strict-sub-02.ll
+++ test/CodeGen/SystemZ/fp-strict-sub-02.ll
@@ -0,0 +1,173 @@
+; Test strict 64-bit floating-point subtraction.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare double @foo()
+declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata)
+
+; Check register subtraction.
+define double @f1(double %f1, double %f2) {
+; CHECK-LABEL: f1:
+; CHECK: sdbr %f0, %f2
+; CHECK: br %r14
+  %res = call double @llvm.experimental.constrained.fsub.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check the low end of the SDB range.
+define double @f2(double %f1, double *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: sdb %f0, 0(%r2)
+; CHECK: br %r14
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fsub.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check the high end of the aligned SDB range.
+define double @f3(double %f1, double *%base) {
+; CHECK-LABEL: f3:
+; CHECK: sdb %f0, 4088(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 511
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fsub.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define double @f4(double %f1, double *%base) {
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: sdb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 512
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fsub.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define double @f5(double %f1, double *%base) {
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -8
+; CHECK: sdb %f0, 0(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr double, double *%base, i64 -1
+  %f2 = load double, double *%ptr
+  %res = call double @llvm.experimental.constrained.fsub.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check that SDB allows indices.
+define double @f6(double %f1, double *%base, i64 %index) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: sdb %f0, 800(%r1,%r2)
+; CHECK: br %r14
+  %ptr1 = getelementptr double, double *%base, i64 %index
+  %ptr2 = getelementptr double, double *%ptr1, i64 100
+  %f2 = load double, double *%ptr2
+  %res = call double @llvm.experimental.constrained.fsub.f64(
+                        double %f1, double %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+; Check that subtractions of spilled values can use SDB rather than SDBR.
+define double @f7(double *%ptr0) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK-SCALAR: sdb %f0, 16{{[04]}}(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr double, double *%ptr0, i64 2
+  %ptr2 = getelementptr double, double *%ptr0, i64 4
+  %ptr3 = getelementptr double, double *%ptr0, i64 6
+  %ptr4 = getelementptr double, double *%ptr0, i64 8
+  %ptr5 = getelementptr double, double *%ptr0, i64 10
+  %ptr6 = getelementptr double, double *%ptr0, i64 12
+  %ptr7 = getelementptr double, double *%ptr0, i64 14
+  %ptr8 = getelementptr double, double *%ptr0, i64 16
+  %ptr9 = getelementptr double, double *%ptr0, i64 18
+  %ptr10 = getelementptr double, double *%ptr0, i64 20
+
+  %val0 = load double, double *%ptr0
+  %val1 = load double, double *%ptr1
+  %val2 = load double, double *%ptr2
+  %val3 = load double, double *%ptr3
+  %val4 = load double, double *%ptr4
+  %val5 = load double, double *%ptr5
+  %val6 = load double, double *%ptr6
+  %val7 = load double, double *%ptr7
+  %val8 = load double, double *%ptr8
+  %val9 = load double, double *%ptr9
+  %val10 = load double, double *%ptr10
+
+  %ret = call double @foo()
+
+  %sub0 = call double @llvm.experimental.constrained.fsub.f64(
+                        double %ret, double %val0,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sub1 = call double @llvm.experimental.constrained.fsub.f64(
+                        double %sub0, double %val1,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sub2 = call double @llvm.experimental.constrained.fsub.f64(
+                        double %sub1, double %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sub3 = call double @llvm.experimental.constrained.fsub.f64(
+                        double %sub2, double %val3,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sub4 = call double @llvm.experimental.constrained.fsub.f64(
+                        double %sub3, double %val4,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sub5 = call double @llvm.experimental.constrained.fsub.f64(
+                        double %sub4, double %val5,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sub6 = call double @llvm.experimental.constrained.fsub.f64(
+                        double %sub5, double %val6,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sub7 = call double @llvm.experimental.constrained.fsub.f64(
+                        double %sub6, double %val7,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sub8 = call double @llvm.experimental.constrained.fsub.f64(
+                        double %sub7, double %val8,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sub9 = call double @llvm.experimental.constrained.fsub.f64(
+                        double %sub8, double %val9,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %sub10 = call double @llvm.experimental.constrained.fsub.f64(
+                        double %sub9, double %val10,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+
+  ret double %sub10
+}
Index: test/CodeGen/SystemZ/fp-strict-sub-03.ll
===================================================================
--- test/CodeGen/SystemZ/fp-strict-sub-03.ll
+++ test/CodeGen/SystemZ/fp-strict-sub-03.ll
@@ -0,0 +1,25 @@
+; Test strict 128-bit floating-point subtraction.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare fp128 @llvm.experimental.constrained.fsub.f128(fp128, fp128, metadata, metadata)
+
+; There is no memory form of 128-bit subtraction.
+define void @f1(fp128 *%ptr, float %f2) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: lxebr %f0, %f0
+; CHECK-DAG: ld %f1, 0(%r2)
+; CHECK-DAG: ld %f3, 8(%r2)
+; CHECK: sxbr %f1, %f0
+; CHECK: std %f1, 0(%r2)
+; CHECK: std %f3, 8(%r2)
+; CHECK: br %r14
+  %f1 = load fp128, fp128 *%ptr
+  %f2x = fpext float %f2 to fp128
+  %sum = call fp128 @llvm.experimental.constrained.fsub.f128(
+                        fp128 %f1, fp128 %f2x,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %sum, fp128 *%ptr
+  ret void
+}
Index: test/CodeGen/SystemZ/fp-strict-sub-04.ll
===================================================================
--- test/CodeGen/SystemZ/fp-strict-sub-04.ll
+++ test/CodeGen/SystemZ/fp-strict-sub-04.ll
@@ -0,0 +1,22 @@
+; Test strict 128-bit floating-point subtraction on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare fp128 @llvm.experimental.constrained.fsub.f128(fp128, fp128, metadata, metadata)
+
+define void @f1(fp128 *%ptr1, fp128 *%ptr2) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2)
+; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3)
+; CHECK: wfsxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]]
+; CHECK: vst [[RES]], 0(%r2)
+; CHECK: br %r14
+  %f1 = load fp128, fp128 *%ptr1
+  %f2 = load fp128, fp128 *%ptr2
+  %sum = call fp128 @llvm.experimental.constrained.fsub.f128(
+                        fp128 %f1, fp128 %f2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %sum, fp128 *%ptr1
+  ret void
+}
Index: test/CodeGen/SystemZ/vec-strict-add-01.ll
===================================================================
--- test/CodeGen/SystemZ/vec-strict-add-01.ll
+++ test/CodeGen/SystemZ/vec-strict-add-01.ll
@@ -0,0 +1,33 @@
+; Test strict vector addition.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+
+; Test a v2f64 addition.
+define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1,
+                        <2 x double> %val2) {
+; CHECK-LABEL: f5:
+; CHECK: vfadb %v24, %v26, %v28
+; CHECK: br %r14
+  %ret = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(
+                        <2 x double> %val1, <2 x double> %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <2 x double> %ret
+}
+
+; Test an f64 addition that uses vector registers.
+define double @f6(<2 x double> %val1, <2 x double> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: wfadb %f0, %v24, %v26
+; CHECK: br %r14
+  %scalar1 = extractelement <2 x double> %val1, i32 0
+  %scalar2 = extractelement <2 x double> %val2, i32 0
+  %ret = call double @llvm.experimental.constrained.fadd.f64(
+                        double %scalar1, double %scalar2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %ret
+}
Index: test/CodeGen/SystemZ/vec-strict-add-02.ll
===================================================================
--- test/CodeGen/SystemZ/vec-strict-add-02.ll
+++ test/CodeGen/SystemZ/vec-strict-add-02.ll
@@ -0,0 +1,33 @@
+; Test strict vector addition on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float>, <4 x float>, metadata, metadata)
+
+; Test a v4f32 addition.
+define <4 x float> @f1(<4 x float> %dummy, <4 x float> %val1,
+                       <4 x float> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: vfasb %v24, %v26, %v28
+; CHECK: br %r14
+  %ret = call <4 x float> @llvm.experimental.constrained.fadd.v4f32(
+                        <4 x float> %val1, <4 x float> %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <4 x float> %ret
+}
+
+; Test an f32 addition that uses vector registers.
+define float @f2(<4 x float> %val1, <4 x float> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: wfasb %f0, %v24, %v26
+; CHECK: br %r14
+  %scalar1 = extractelement <4 x float> %val1, i32 0
+  %scalar2 = extractelement <4 x float> %val2, i32 0
+  %ret = call float @llvm.experimental.constrained.fadd.f32(
+                        float %scalar1, float %scalar2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %ret
+}
Index: test/CodeGen/SystemZ/vec-strict-div-01.ll
===================================================================
--- test/CodeGen/SystemZ/vec-strict-div-01.ll
+++ test/CodeGen/SystemZ/vec-strict-div-01.ll
@@ -0,0 +1,33 @@
+; Test strict vector division.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+
+; Test a v2f64 division.
+define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1,
+                        <2 x double> %val2) {
+; CHECK-LABEL: f5:
+; CHECK: vfddb %v24, %v26, %v28
+; CHECK: br %r14
+  %ret = call <2 x double> @llvm.experimental.constrained.fdiv.v2f64(
+                        <2 x double> %val1, <2 x double> %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <2 x double> %ret
+}
+
+; Test an f64 division that uses vector registers.
+define double @f6(<2 x double> %val1, <2 x double> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: wfddb %f0, %v24, %v26
+; CHECK: br %r14
+  %scalar1 = extractelement <2 x double> %val1, i32 0
+  %scalar2 = extractelement <2 x double> %val2, i32 0
+  %ret = call double @llvm.experimental.constrained.fdiv.f64(
+                        double %scalar1, double %scalar2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %ret
+}
Index: test/CodeGen/SystemZ/vec-strict-div-02.ll
===================================================================
--- test/CodeGen/SystemZ/vec-strict-div-02.ll
+++ test/CodeGen/SystemZ/vec-strict-div-02.ll
@@ -0,0 +1,33 @@
+; Test strict vector division on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.fdiv.v4f32(<4 x float>, <4 x float>, metadata, metadata)
+
+; Test a v4f32 division.
+define <4 x float> @f1(<4 x float> %dummy, <4 x float> %val1,
+                       <4 x float> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: vfdsb %v24, %v26, %v28
+; CHECK: br %r14
+  %ret = call <4 x float> @llvm.experimental.constrained.fdiv.v4f32(
+                        <4 x float> %val1, <4 x float> %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <4 x float> %ret
+}
+
+; Test an f32 division that uses vector registers.
+define float @f2(<4 x float> %val1, <4 x float> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: wfdsb %f0, %v24, %v26
+; CHECK: br %r14
+  %scalar1 = extractelement <4 x float> %val1, i32 0
+  %scalar2 = extractelement <4 x float> %val2, i32 0
+  %ret = call float @llvm.experimental.constrained.fdiv.f32(
+                        float %scalar1, float %scalar2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %ret
+}
Index: test/CodeGen/SystemZ/vec-strict-max-01.ll
===================================================================
--- test/CodeGen/SystemZ/vec-strict-max-01.ll
+++ test/CodeGen/SystemZ/vec-strict-max-01.ll
@@ -0,0 +1,80 @@
+; Test strict vector maximum on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare double @llvm.experimental.constrained.maxnum.f64(double, double, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.maxnum.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+
+declare float @llvm.experimental.constrained.maxnum.f32(float, float, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.maxnum.v4f32(<4 x float>, <4 x float>, metadata, metadata)
+
+declare fp128 @llvm.experimental.constrained.maxnum.f128(fp128, fp128, metadata, metadata)
+
+; Test the f64 maxnum intrinsic.
+define double @f1(double %dummy, double %val1, double %val2) {
+; CHECK-LABEL: f1:
+; CHECK: wfmaxdb %f0, %f2, %f4, 4
+; CHECK: br %r14
+  %ret = call double @llvm.experimental.constrained.maxnum.f64(
+                        double %val1, double %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %ret
+}
+
+; Test the v2f64 maxnum intrinsic.
+define <2 x double> @f2(<2 x double> %dummy, <2 x double> %val1,
+                        <2 x double> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: vfmaxdb %v24, %v26, %v28, 4
+; CHECK: br %r14
+  %ret = call <2 x double> @llvm.experimental.constrained.maxnum.v2f64(
+                        <2 x double> %val1, <2 x double> %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <2 x double> %ret
+}
+
+; Test the f32 maxnum intrinsic.
+define float @f3(float %dummy, float %val1, float %val2) {
+; CHECK-LABEL: f3:
+; CHECK: wfmaxsb %f0, %f2, %f4, 4
+; CHECK: br %r14
+  %ret = call float @llvm.experimental.constrained.maxnum.f32(
+                        float %val1, float %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %ret
+}
+
+; Test the v4f32 maxnum intrinsic.
+define <4 x float> @f4(<4 x float> %dummy, <4 x float> %val1,
+                       <4 x float> %val2) {
+; CHECK-LABEL: f4:
+; CHECK: vfmaxsb %v24, %v26, %v28, 4
+; CHECK: br %r14
+  %ret = call <4 x float> @llvm.experimental.constrained.maxnum.v4f32(
+                        <4 x float> %val1, <4 x float> %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <4 x float> %ret
+}
+
+; Test the f128 maxnum intrinsic.
+define void @f5(fp128 *%ptr1, fp128 *%ptr2, fp128 *%dst) {
+; CHECK-LABEL: f5:
+; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2)
+; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3)
+; CHECK: wfmaxxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]], 4
+; CHECK: vst [[RES]], 0(%r4)
+; CHECK: br %r14
+  %val1 = load fp128, fp128* %ptr1
+  %val2 = load fp128, fp128* %ptr2
+  %res = call fp128 @llvm.experimental.constrained.maxnum.f128(
+                        fp128 %val1, fp128 %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128* %dst
+  ret void
+}
+
Index: test/CodeGen/SystemZ/vec-strict-min-01.ll
===================================================================
--- test/CodeGen/SystemZ/vec-strict-min-01.ll
+++ test/CodeGen/SystemZ/vec-strict-min-01.ll
@@ -0,0 +1,80 @@
+; Test strict vector minimum on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare double @llvm.experimental.constrained.minnum.f64(double, double, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.minnum.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+
+declare float @llvm.experimental.constrained.minnum.f32(float, float, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.minnum.v4f32(<4 x float>, <4 x float>, metadata, metadata)
+
+declare fp128 @llvm.experimental.constrained.minnum.f128(fp128, fp128, metadata, metadata)
+
+; Test the f64 minnum intrinsic.
+define double @f1(double %dummy, double %val1, double %val2) {
+; CHECK-LABEL: f1:
+; CHECK: wfmindb %f0, %f2, %f4, 4
+; CHECK: br %r14
+  %ret = call double @llvm.experimental.constrained.minnum.f64(
+                        double %val1, double %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %ret
+}
+
+; Test the v2f64 minnum intrinsic.
+define <2 x double> @f2(<2 x double> %dummy, <2 x double> %val1,
+                        <2 x double> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: vfmindb %v24, %v26, %v28, 4
+; CHECK: br %r14
+  %ret = call <2 x double> @llvm.experimental.constrained.minnum.v2f64(
+                        <2 x double> %val1, <2 x double> %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <2 x double> %ret
+}
+
+; Test the f32 minnum intrinsic.
+define float @f3(float %dummy, float %val1, float %val2) {
+; CHECK-LABEL: f3:
+; CHECK: wfminsb %f0, %f2, %f4, 4
+; CHECK: br %r14
+  %ret = call float @llvm.experimental.constrained.minnum.f32(
+                        float %val1, float %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %ret
+}
+
+; Test the v4f32 minnum intrinsic.
+define <4 x float> @f4(<4 x float> %dummy, <4 x float> %val1,
+                       <4 x float> %val2) {
+; CHECK-LABEL: f4:
+; CHECK: vfminsb %v24, %v26, %v28, 4
+; CHECK: br %r14
+  %ret = call <4 x float> @llvm.experimental.constrained.minnum.v4f32(
+                        <4 x float> %val1, <4 x float> %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <4 x float> %ret
+}
+
+; Test the f128 minnum intrinsic.
+define void @f5(fp128 *%ptr1, fp128 *%ptr2, fp128 *%dst) {
+; CHECK-LABEL: f5:
+; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2)
+; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3)
+; CHECK: wfminxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]], 4
+; CHECK: vst [[RES]], 0(%r4)
+; CHECK: br %r14
+  %val1 = load fp128, fp128* %ptr1
+  %val2 = load fp128, fp128* %ptr2
+  %res = call fp128 @llvm.experimental.constrained.minnum.f128(
+                        fp128 %val1, fp128 %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  store fp128 %res, fp128* %dst
+  ret void
+}
+
Index: test/CodeGen/SystemZ/vec-strict-mul-01.ll
===================================================================
--- test/CodeGen/SystemZ/vec-strict-mul-01.ll
+++ test/CodeGen/SystemZ/vec-strict-mul-01.ll
@@ -0,0 +1,33 @@
+; Test strict vector multiplication.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+
+; Test a v2f64 multiplication.
+define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1,
+                        <2 x double> %val2) {
+; CHECK-LABEL: f5:
+; CHECK: vfmdb %v24, %v26, %v28
+; CHECK: br %r14
+  %ret = call <2 x double> @llvm.experimental.constrained.fmul.v2f64(
+                        <2 x double> %val1, <2 x double> %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <2 x double> %ret
+}
+
+; Test an f64 multiplication that uses vector registers.
+define double @f6(<2 x double> %val1, <2 x double> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: wfmdb %f0, %v24, %v26
+; CHECK: br %r14
+  %scalar1 = extractelement <2 x double> %val1, i32 0
+  %scalar2 = extractelement <2 x double> %val2, i32 0
+  %ret = call double @llvm.experimental.constrained.fmul.f64(
+                        double %scalar1, double %scalar2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %ret
+}
Index: test/CodeGen/SystemZ/vec-strict-mul-02.ll
===================================================================
--- test/CodeGen/SystemZ/vec-strict-mul-02.ll
+++ test/CodeGen/SystemZ/vec-strict-mul-02.ll
@@ -0,0 +1,36 @@
+; Test strict vector multiply-and-add.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, metadata, metadata)
+
+; Test a v2f64 multiply-and-add.
+define <2 x double> @f4(<2 x double> %dummy, <2 x double> %val1,
+                        <2 x double> %val2, <2 x double> %val3) {
+; CHECK-LABEL: f4:
+; CHECK: vfmadb %v24, %v26, %v28, %v30
+; CHECK: br %r14
+  %ret = call <2 x double> @llvm.experimental.constrained.fma.v2f64 (
+                        <2 x double> %val1,
+                        <2 x double> %val2,
+                        <2 x double> %val3,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <2 x double> %ret
+}
+
+; Test a v2f64 multiply-and-subtract.
+define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1,
+                        <2 x double> %val2, <2 x double> %val3) {
+; CHECK-LABEL: f5:
+; CHECK: vfmsdb %v24, %v26, %v28, %v30
+; CHECK: br %r14
+  %negval3 = fsub <2 x double> <double -0.0, double -0.0>, %val3
+  %ret = call <2 x double> @llvm.experimental.constrained.fma.v2f64 (
+                        <2 x double> %val1,
+                        <2 x double> %val2,
+                        <2 x double> %negval3,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <2 x double> %ret
+}
Index: test/CodeGen/SystemZ/vec-strict-mul-03.ll
===================================================================
--- test/CodeGen/SystemZ/vec-strict-mul-03.ll
+++ test/CodeGen/SystemZ/vec-strict-mul-03.ll
@@ -0,0 +1,33 @@
+; Test strict vector multiplication on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.fmul.v4f32(<4 x float>, <4 x float>, metadata, metadata)
+
+; Test a v4f32 multiplication.
+define <4 x float> @f1(<4 x float> %dummy, <4 x float> %val1,
+                       <4 x float> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: vfmsb %v24, %v26, %v28
+; CHECK: br %r14
+  %ret = call <4 x float> @llvm.experimental.constrained.fmul.v4f32(
+                        <4 x float> %val1, <4 x float> %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <4 x float> %ret
+}
+
+; Test an f32 multiplication that uses vector registers.
+define float @f2(<4 x float> %val1, <4 x float> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: wfmsb %f0, %v24, %v26
+; CHECK: br %r14
+  %scalar1 = extractelement <4 x float> %val1, i32 0
+  %scalar2 = extractelement <4 x float> %val2, i32 0
+  %ret = call float @llvm.experimental.constrained.fmul.f32(
+                        float %scalar1, float %scalar2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %ret
+}
Index: test/CodeGen/SystemZ/vec-strict-mul-04.ll
===================================================================
--- test/CodeGen/SystemZ/vec-strict-mul-04.ll
+++ test/CodeGen/SystemZ/vec-strict-mul-04.ll
@@ -0,0 +1,37 @@
+; Test strict vector multiply-and-add on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata)
+
+; Test a v4f32 multiply-and-add.
+define <4 x float> @f1(<4 x float> %dummy, <4 x float> %val1,
+                       <4 x float> %val2, <4 x float> %val3) {
+; CHECK-LABEL: f1:
+; CHECK: vfmasb %v24, %v26, %v28, %v30
+; CHECK: br %r14
+  %ret = call <4 x float> @llvm.experimental.constrained.fma.v4f32 (
+                        <4 x float> %val1,
+                        <4 x float> %val2,
+                        <4 x float> %val3,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <4 x float> %ret
+}
+
+; Test a v4f32 multiply-and-subtract.
+define <4 x float> @f2(<4 x float> %dummy, <4 x float> %val1,
+                       <4 x float> %val2, <4 x float> %val3) {
+; CHECK-LABEL: f2:
+; CHECK: vfmssb %v24, %v26, %v28, %v30
+; CHECK: br %r14
+  %negval3 = fsub <4 x float> <float -0.0, float -0.0,
+                               float -0.0, float -0.0>, %val3
+  %ret = call <4 x float> @llvm.experimental.constrained.fma.v4f32 (
+                        <4 x float> %val1,
+                        <4 x float> %val2,
+                        <4 x float> %negval3,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <4 x float> %ret
+}
Index: test/CodeGen/SystemZ/vec-strict-mul-05.ll
===================================================================
--- test/CodeGen/SystemZ/vec-strict-mul-05.ll
+++ test/CodeGen/SystemZ/vec-strict-mul-05.ll
@@ -0,0 +1,75 @@
+; Test vector negative multiply-and-add on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata)
+
+; Test a v2f64 negative multiply-and-add.
+define <2 x double> @f1(<2 x double> %dummy, <2 x double> %val1,
+                        <2 x double> %val2, <2 x double> %val3) {
+; CHECK-LABEL: f1:
+; CHECK: vfnmadb %v24, %v26, %v28, %v30
+; CHECK: br %r14
+  %ret = call <2 x double> @llvm.experimental.constrained.fma.v2f64 (
+                        <2 x double> %val1,
+                        <2 x double> %val2,
+                        <2 x double> %val3,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %negret = fsub <2 x double> <double -0.0, double -0.0>, %ret
+  ret <2 x double> %negret
+}
+
+; Test a v2f64 negative multiply-and-subtract.
+define <2 x double> @f2(<2 x double> %dummy, <2 x double> %val1,
+                        <2 x double> %val2, <2 x double> %val3) {
+; CHECK-LABEL: f2:
+; CHECK: vfnmsdb %v24, %v26, %v28, %v30
+; CHECK: br %r14
+  %negval3 = fsub <2 x double> <double -0.0, double -0.0>, %val3
+  %ret = call <2 x double> @llvm.experimental.constrained.fma.v2f64 (
+                        <2 x double> %val1,
+                        <2 x double> %val2,
+                        <2 x double> %negval3,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %negret = fsub <2 x double> <double -0.0, double -0.0>, %ret
+  ret <2 x double> %negret
+}
+
+; Test a v4f32 negative multiply-and-add.
+define <4 x float> @f3(<4 x float> %dummy, <4 x float> %val1,
+                       <4 x float> %val2, <4 x float> %val3) {
+; CHECK-LABEL: f3:
+; CHECK: vfnmasb %v24, %v26, %v28, %v30
+; CHECK: br %r14
+  %ret = call <4 x float> @llvm.experimental.constrained.fma.v4f32 (
+                        <4 x float> %val1,
+                        <4 x float> %val2,
+                        <4 x float> %val3,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %negret = fsub <4 x float> <float -0.0, float -0.0,
+                              float -0.0, float -0.0>, %ret
+  ret <4 x float> %negret
+}
+
+; Test a v4f32 negative multiply-and-subtract.
+define <4 x float> @f4(<4 x float> %dummy, <4 x float> %val1,
+                       <4 x float> %val2, <4 x float> %val3) {
+; CHECK-LABEL: f4:
+; CHECK: vfnmssb %v24, %v26, %v28, %v30
+; CHECK: br %r14
+  %negval3 = fsub <4 x float> <float -0.0, float -0.0,
+                               float -0.0, float -0.0>, %val3
+  %ret = call <4 x float> @llvm.experimental.constrained.fma.v4f32 (
+                        <4 x float> %val1,
+                        <4 x float> %val2,
+                        <4 x float> %negval3,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  %negret = fsub <4 x float> <float -0.0, float -0.0,
+                               float -0.0, float -0.0>, %ret
+  ret <4 x float> %negret
+}
Index: test/CodeGen/SystemZ/vec-strict-round-01.ll
===================================================================
--- test/CodeGen/SystemZ/vec-strict-round-01.ll
+++ test/CodeGen/SystemZ/vec-strict-round-01.ll
@@ -0,0 +1,155 @@
+; Test strict v2f64 rounding.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
+declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
+declare double @llvm.experimental.constrained.floor.f64(double, metadata, metadata)
+declare double @llvm.experimental.constrained.ceil.f64(double, metadata, metadata)
+declare double @llvm.experimental.constrained.trunc.f64(double, metadata, metadata)
+declare double @llvm.experimental.constrained.round.f64(double, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double>, metadata, metadata)
+
+define <2 x double> @f1(<2 x double> %val) {
+; CHECK-LABEL: f1:
+; CHECK: vfidb %v24, %v24, 0, 0
+; CHECK: br %r14
+  %res = call <2 x double> @llvm.experimental.constrained.rint.v2f64(
+                        <2 x double> %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <2 x double> %res
+}
+
+define <2 x double> @f2(<2 x double> %val) {
+; CHECK-LABEL: f2:
+; CHECK: vfidb %v24, %v24, 4, 0
+; CHECK: br %r14
+  %res = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(
+                        <2 x double> %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <2 x double> %res
+}
+
+define <2 x double> @f3(<2 x double> %val) {
+; CHECK-LABEL: f3:
+; CHECK: vfidb %v24, %v24, 4, 7
+; CHECK: br %r14
+  %res = call <2 x double> @llvm.experimental.constrained.floor.v2f64(
+                        <2 x double> %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <2 x double> %res
+}
+
+define <2 x double> @f4(<2 x double> %val) {
+; CHECK-LABEL: f4:
+; CHECK: vfidb %v24, %v24, 4, 6
+; CHECK: br %r14
+  %res = call <2 x double> @llvm.experimental.constrained.ceil.v2f64(
+                        <2 x double> %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <2 x double> %res
+}
+
+define <2 x double> @f5(<2 x double> %val) {
+; CHECK-LABEL: f5:
+; CHECK: vfidb %v24, %v24, 4, 5
+; CHECK: br %r14
+  %res = call <2 x double> @llvm.experimental.constrained.trunc.v2f64(
+                        <2 x double> %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <2 x double> %res
+}
+
+define <2 x double> @f6(<2 x double> %val) {
+; CHECK-LABEL: f6:
+; CHECK: vfidb %v24, %v24, 4, 1
+; CHECK: br %r14
+  %res = call <2 x double> @llvm.experimental.constrained.round.v2f64(
+                        <2 x double> %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <2 x double> %res
+}
+
+define double @f7(<2 x double> %val) {
+; CHECK-LABEL: f7:
+; CHECK: wfidb %f0, %v24, 0, 0
+; CHECK: br %r14
+  %scalar = extractelement <2 x double> %val, i32 0
+  %res = call double @llvm.experimental.constrained.rint.f64(
+                        double %scalar,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+define double @f8(<2 x double> %val) {
+; CHECK-LABEL: f8:
+; CHECK: wfidb %f0, %v24, 4, 0
+; CHECK: br %r14
+  %scalar = extractelement <2 x double> %val, i32 0
+  %res = call double @llvm.experimental.constrained.nearbyint.f64(
+                        double %scalar,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+define double @f9(<2 x double> %val) {
+; CHECK-LABEL: f9:
+; CHECK: wfidb %f0, %v24, 4, 7
+; CHECK: br %r14
+  %scalar = extractelement <2 x double> %val, i32 0
+  %res = call double @llvm.experimental.constrained.floor.f64(
+                        double %scalar,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+
+define double @f10(<2 x double> %val) {
+; CHECK-LABEL: f10:
+; CHECK: wfidb %f0, %v24, 4, 6
+; CHECK: br %r14
+  %scalar = extractelement <2 x double> %val, i32 0
+  %res = call double @llvm.experimental.constrained.ceil.f64(
+                        double %scalar,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+define double @f11(<2 x double> %val) {
+; CHECK-LABEL: f11:
+; CHECK: wfidb %f0, %v24, 4, 5
+; CHECK: br %r14
+  %scalar = extractelement <2 x double> %val, i32 0
+  %res = call double @llvm.experimental.constrained.trunc.f64(
+                        double %scalar,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
+
+define double @f12(<2 x double> %val) {
+; CHECK-LABEL: f12:
+; CHECK: wfidb %f0, %v24, 4, 1
+; CHECK: br %r14
+  %scalar = extractelement <2 x double> %val, i32 0
+  %res = call double @llvm.experimental.constrained.round.f64(
+                        double %scalar,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %res
+}
Index: test/CodeGen/SystemZ/vec-strict-round-02.ll
===================================================================
--- test/CodeGen/SystemZ/vec-strict-round-02.ll
+++ test/CodeGen/SystemZ/vec-strict-round-02.ll
@@ -0,0 +1,154 @@
+; Test strict v4f32 rounding on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare float @llvm.experimental.constrained.rint.f32(float, metadata, metadata)
+declare float @llvm.experimental.constrained.nearbyint.f32(float, metadata, metadata)
+declare float @llvm.experimental.constrained.floor.f32(float, metadata, metadata)
+declare float @llvm.experimental.constrained.ceil.f32(float, metadata, metadata)
+declare float @llvm.experimental.constrained.trunc.f32(float, metadata, metadata)
+declare float @llvm.experimental.constrained.round.f32(float, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.rint.v4f32(<4 x float>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.nearbyint.v4f32(<4 x float>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.floor.v4f32(<4 x float>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.ceil.v4f32(<4 x float>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.trunc.v4f32(<4 x float>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.round.v4f32(<4 x float>, metadata, metadata)
+
+define <4 x float> @f1(<4 x float> %val) {
+; CHECK-LABEL: f1:
+; CHECK: vfisb %v24, %v24, 0, 0
+; CHECK: br %r14
+  %res = call <4 x float> @llvm.experimental.constrained.rint.v4f32(
+                        <4 x float> %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <4 x float> %res
+}
+
+define <4 x float> @f2(<4 x float> %val) {
+; CHECK-LABEL: f2:
+; CHECK: vfisb %v24, %v24, 4, 0
+; CHECK: br %r14
+  %res = call <4 x float> @llvm.experimental.constrained.nearbyint.v4f32(
+                        <4 x float> %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <4 x float> %res
+}
+
+define <4 x float> @f3(<4 x float> %val) {
+; CHECK-LABEL: f3:
+; CHECK: vfisb %v24, %v24, 4, 7
+; CHECK: br %r14
+  %res = call <4 x float> @llvm.experimental.constrained.floor.v4f32(
+                        <4 x float> %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <4 x float> %res
+}
+
+define <4 x float> @f4(<4 x float> %val) {
+; CHECK-LABEL: f4:
+; CHECK: vfisb %v24, %v24, 4, 6
+; CHECK: br %r14
+  %res = call <4 x float> @llvm.experimental.constrained.ceil.v4f32(
+                        <4 x float> %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <4 x float> %res
+}
+
+define <4 x float> @f5(<4 x float> %val) {
+; CHECK-LABEL: f5:
+; CHECK: vfisb %v24, %v24, 4, 5
+; CHECK: br %r14
+  %res = call <4 x float> @llvm.experimental.constrained.trunc.v4f32(
+                        <4 x float> %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <4 x float> %res
+}
+
+define <4 x float> @f6(<4 x float> %val) {
+; CHECK-LABEL: f6:
+; CHECK: vfisb %v24, %v24, 4, 1
+; CHECK: br %r14
+  %res = call <4 x float> @llvm.experimental.constrained.round.v4f32(
+                        <4 x float> %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <4 x float> %res
+}
+
+define float @f7(<4 x float> %val) {
+; CHECK-LABEL: f7:
+; CHECK: wfisb %f0, %v24, 0, 0
+; CHECK: br %r14
+  %scalar = extractelement <4 x float> %val, i32 0
+  %res = call float @llvm.experimental.constrained.rint.f32(
+                        float %scalar,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+define float @f8(<4 x float> %val) {
+; CHECK-LABEL: f8:
+; CHECK: wfisb %f0, %v24, 4, 0
+; CHECK: br %r14
+  %scalar = extractelement <4 x float> %val, i32 0
+  %res = call float @llvm.experimental.constrained.nearbyint.f32(
+                        float %scalar,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+define float @f9(<4 x float> %val) {
+; CHECK-LABEL: f9:
+; CHECK: wfisb %f0, %v24, 4, 7
+; CHECK: br %r14
+  %scalar = extractelement <4 x float> %val, i32 0
+  %res = call float @llvm.experimental.constrained.floor.f32(
+                        float %scalar,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+define float @f10(<4 x float> %val) {
+; CHECK-LABEL: f10:
+; CHECK: wfisb %f0, %v24, 4, 6
+; CHECK: br %r14
+  %scalar = extractelement <4 x float> %val, i32 0
+  %res = call float @llvm.experimental.constrained.ceil.f32(
+                        float %scalar,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+define float @f11(<4 x float> %val) {
+; CHECK-LABEL: f11:
+; CHECK: wfisb %f0, %v24, 4, 5
+; CHECK: br %r14
+  %scalar = extractelement <4 x float> %val, i32 0
+  %res = call float @llvm.experimental.constrained.trunc.f32(
+                        float %scalar,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
+
+define float @f12(<4 x float> %val) {
+; CHECK-LABEL: f12:
+; CHECK: wfisb %f0, %v24, 4, 1
+; CHECK: br %r14
+  %scalar = extractelement <4 x float> %val, i32 0
+  %res = call float @llvm.experimental.constrained.round.f32(
+                        float %scalar,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %res
+}
Index: test/CodeGen/SystemZ/vec-strict-sqrt-01.ll
===================================================================
--- test/CodeGen/SystemZ/vec-strict-sqrt-01.ll
+++ test/CodeGen/SystemZ/vec-strict-sqrt-01.ll
@@ -0,0 +1,29 @@
+; Test f64 and v2f64 square root.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double>, metadata, metadata)
+
+define <2 x double> @f1(<2 x double> %val) {
+; CHECK-LABEL: f1:
+; CHECK: vfsqdb %v24, %v24
+; CHECK: br %r14
+  %ret = call <2 x double> @llvm.experimental.constrained.sqrt.v2f64(
+                        <2 x double> %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <2 x double> %ret
+}
+
+define double @f2(<2 x double> %val) {
+; CHECK-LABEL: f2:
+; CHECK: wfsqdb %f0, %v24
+; CHECK: br %r14
+  %scalar = extractelement <2 x double> %val, i32 0
+  %ret = call double @llvm.experimental.constrained.sqrt.f64(
+                        double %scalar,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %ret
+}
Index: test/CodeGen/SystemZ/vec-strict-sqrt-02.ll
===================================================================
--- test/CodeGen/SystemZ/vec-strict-sqrt-02.ll
+++ test/CodeGen/SystemZ/vec-strict-sqrt-02.ll
@@ -0,0 +1,29 @@
+; Test strict f32 and v4f32 square root on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.sqrt.v4f32(<4 x float>, metadata, metadata)
+
+define <4 x float> @f1(<4 x float> %val) {
+; CHECK-LABEL: f1:
+; CHECK: vfsqsb %v24, %v24
+; CHECK: br %r14
+  %ret = call <4 x float> @llvm.experimental.constrained.sqrt.v4f32(
+                        <4 x float> %val,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <4 x float> %ret
+}
+
+define float @f2(<4 x float> %val) {
+; CHECK-LABEL: f2:
+; CHECK: wfsqsb %f0, %v24
+; CHECK: br %r14
+  %scalar = extractelement <4 x float> %val, i32 0
+  %ret = call float @llvm.experimental.constrained.sqrt.f32(
+                        float %scalar,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %ret
+}
Index: test/CodeGen/SystemZ/vec-strict-sub-01.ll
===================================================================
--- test/CodeGen/SystemZ/vec-strict-sub-01.ll
+++ test/CodeGen/SystemZ/vec-strict-sub-01.ll
@@ -0,0 +1,34 @@
+; Test strict vector subtraction.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+
+; Test a v2f64 subtraction.
+define <2 x double> @f6(<2 x double> %dummy, <2 x double> %val1,
+                        <2 x double> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: vfsdb %v24, %v26, %v28
+; CHECK: br %r14
+  %ret = call <2 x double> @llvm.experimental.constrained.fsub.v2f64(
+                        <2 x double> %val1, <2 x double> %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <2 x double> %ret
+}
+
+; Test an f64 subtraction that uses vector registers.
+define double @f7(<2 x double> %val1, <2 x double> %val2) {
+; CHECK-LABEL: f7:
+; CHECK: wfsdb %f0, %v24, %v26
+; CHECK: br %r14
+  %scalar1 = extractelement <2 x double> %val1, i32 0
+  %scalar2 = extractelement <2 x double> %val2, i32 0
+  %ret = call double @llvm.experimental.constrained.fsub.f64(
+                        double %scalar1, double %scalar2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret double %ret
+}
+
Index: test/CodeGen/SystemZ/vec-strict-sub-02.ll
===================================================================
--- test/CodeGen/SystemZ/vec-strict-sub-02.ll
+++ test/CodeGen/SystemZ/vec-strict-sub-02.ll
@@ -0,0 +1,33 @@
+; Test strict vector subtraction on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.fsub.v4f32(<4 x float>, <4 x float>, metadata, metadata)
+
+; Test a v4f32 subtraction.
+define <4 x float> @f6(<4 x float> %dummy, <4 x float> %val1,
+                       <4 x float> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: vfssb %v24, %v26, %v28
+; CHECK: br %r14
+  %ret = call <4 x float> @llvm.experimental.constrained.fsub.v4f32(
+                        <4 x float> %val1, <4 x float> %val2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <4 x float> %ret
+}
+
+; Test an f32 subtraction that uses vector registers.
+define float @f7(<4 x float> %val1, <4 x float> %val2) {
+; CHECK-LABEL: f7:
+; CHECK: wfssb %f0, %v24, %v26
+; CHECK: br %r14
+  %scalar1 = extractelement <4 x float> %val1, i32 0
+  %scalar2 = extractelement <4 x float> %val2, i32 0
+  %ret = call float @llvm.experimental.constrained.fsub.f32(
+                        float %scalar1, float %scalar2,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret float %ret
+}