Index: docs/LangRef.rst
===================================================================
--- docs/LangRef.rst
+++ docs/LangRef.rst
@@ -15139,6 +15139,72 @@
 operand computed with infinite precision, and then rounded to the target
 precision.
 
+'``llvm.experimental.constrained.fptoui``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare <ty2>
+      @llvm.experimental.constrained.fptoui(<type> <value>,
+                                          metadata <exception behavior>)
+
+Overview:
+"""""""""
+
+The '``llvm.experimental.constrained.fptoui``' intrinsic converts a 
+floating-point ``value`` to its unsigned integer equivalent of type ``ty2``.
+
+Arguments:
+""""""""""
+
+The first argument to the '``llvm.experimental.constrained.fptoui``'
+intrinsic must be :ref:`floating point <t_floating>` or :ref:`vector
+<t_vector>` of floating point values.
+
+The second argument specifies the exception behavior as described above.
+
+Semantics:
+""""""""""
+
+The result produced is an unsigned integer converted from the floating
+point operand. The value is truncated, so it is rounded towards zero.
+
+'``llvm.experimental.constrained.fptosi``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare <ty2>
+      @llvm.experimental.constrained.fptosi(<type> <value>,
+                                          metadata <exception behavior>)
+
+Overview:
+"""""""""
+
+The '``llvm.experimental.constrained.fptosi``' intrinsic converts 
+:ref:`floating-point <t_floating>` ``value`` to type ``ty2``.
+
+Arguments:
+""""""""""
+
+The first argument to the '``llvm.experimental.constrained.fptosi``'
+intrinsic must be :ref:`floating point <t_floating>` or :ref:`vector
+<t_vector>` of floating point values. 
+
+The second argument specifies the exception behavior as described above.
+
+Semantics:
+""""""""""
+
+The result produced is a signed integer converted from the floating
+point operand. The value is truncated, so it is rounded towards zero.
+
 '``llvm.experimental.constrained.fptrunc``' Intrinsic
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
Index: include/llvm/CodeGen/ISDOpcodes.h
===================================================================
--- include/llvm/CodeGen/ISDOpcodes.h
+++ include/llvm/CodeGen/ISDOpcodes.h
@@ -302,6 +302,13 @@
     STRICT_FRINT, STRICT_FNEARBYINT, STRICT_FMAXNUM, STRICT_FMINNUM,
     STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND, STRICT_FTRUNC,
 
+    /// STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or
+    /// unsigned integer. These have the same semantics as fptosi and fptoui 
+    /// in IR.
+    /// They are used to limit optimizations while the DAG is being optimized.
+    STRICT_FP_TO_SINT,
+    STRICT_FP_TO_UINT,
+
     /// X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating 
     /// point type down to the precision of the destination VT.  TRUNC is a 
     /// flag, which is always an integer that is zero or one.  If TRUNC is 0,
Index: include/llvm/CodeGen/SelectionDAGNodes.h
===================================================================
--- include/llvm/CodeGen/SelectionDAGNodes.h
+++ include/llvm/CodeGen/SelectionDAGNodes.h
@@ -704,6 +704,8 @@
       case ISD::STRICT_FFLOOR:
       case ISD::STRICT_FROUND:
       case ISD::STRICT_FTRUNC:
+      case ISD::STRICT_FP_TO_SINT:
+      case ISD::STRICT_FP_TO_UINT:
       case ISD::STRICT_FP_ROUND:
       case ISD::STRICT_FP_EXTEND:
         return true;
Index: include/llvm/CodeGen/TargetLowering.h
===================================================================
--- include/llvm/CodeGen/TargetLowering.h
+++ include/llvm/CodeGen/TargetLowering.h
@@ -904,6 +904,8 @@
       case ISD::STRICT_FFLOOR: EqOpc = ISD::FFLOOR; break;
       case ISD::STRICT_FROUND: EqOpc = ISD::FROUND; break;
       case ISD::STRICT_FTRUNC: EqOpc = ISD::FTRUNC; break;
+      case ISD::STRICT_FP_TO_SINT: EqOpc = ISD::FP_TO_SINT; break;
+      case ISD::STRICT_FP_TO_UINT: EqOpc = ISD::FP_TO_UINT; break;
       case ISD::STRICT_FP_ROUND: EqOpc = ISD::FP_ROUND; break;
       case ISD::STRICT_FP_EXTEND: EqOpc = ISD::FP_EXTEND; break;
     }
@@ -3890,7 +3892,7 @@
   /// \param N Node to expand
   /// \param Result output after conversion
   /// \returns True, if the expansion was successful, false otherwise
-  bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
+  bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const;
 
   /// Expand UINT(i64) to double(f64) conversion
   /// \param N Node to expand
Index: include/llvm/IR/IntrinsicInst.h
===================================================================
--- include/llvm/IR/IntrinsicInst.h
+++ include/llvm/IR/IntrinsicInst.h
@@ -238,6 +238,8 @@
       case Intrinsic::experimental_constrained_fdiv:
       case Intrinsic::experimental_constrained_frem:
       case Intrinsic::experimental_constrained_fma:
+      case Intrinsic::experimental_constrained_fptosi:
+      case Intrinsic::experimental_constrained_fptoui:
       case Intrinsic::experimental_constrained_fptrunc:
       case Intrinsic::experimental_constrained_fpext:
       case Intrinsic::experimental_constrained_sqrt:
Index: include/llvm/IR/Intrinsics.td
===================================================================
--- include/llvm/IR/Intrinsics.td
+++ include/llvm/IR/Intrinsics.td
@@ -613,6 +613,14 @@
                                                       llvm_metadata_ty,
                                                       llvm_metadata_ty ]>;
 
+  def int_experimental_constrained_fptosi : Intrinsic<[ llvm_anyint_ty ],
+                                                    [ llvm_anyfloat_ty,
+                                                      llvm_metadata_ty ]>;
+
+  def int_experimental_constrained_fptoui : Intrinsic<[ llvm_anyint_ty ],
+                                                    [ llvm_anyfloat_ty,
+                                                      llvm_metadata_ty ]>;
+
   def int_experimental_constrained_fptrunc : Intrinsic<[ llvm_anyfloat_ty ],
                                                        [ llvm_anyfloat_ty,
                                                          llvm_metadata_ty,
@@ -703,7 +711,8 @@
                                                        llvm_metadata_ty,
                                                        llvm_metadata_ty ]>;
 }
-// FIXME: Add intrinsics for fcmp, fptoui and fptosi.
+// FIXME: Add intrinsic for fcmp.
+// FIXME: Consider maybe adding intrinsics for sitofp, uitofp.
 
 //===------------------------- Expect Intrinsics --------------------------===//
 //
Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -236,6 +236,16 @@
     }
     ReplacedNode(Old);
   }
+
+  void ReplaceNodeWithValue(SDValue Old, SDValue New) {
+    LLVM_DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG);
+               dbgs() << "     with:      "; New->dump(&DAG));
+
+    DAG.ReplaceAllUsesOfValueWith(Old, New);
+    if (UpdatedNodes)
+      UpdatedNodes->insert(New.getNode());
+    ReplacedNode(Old.getNode());
+  }
 };
 
 } // end anonymous namespace
@@ -1121,6 +1131,8 @@
   case ISD::STRICT_FFLOOR:
   case ISD::STRICT_FROUND:
   case ISD::STRICT_FTRUNC:
+  case ISD::STRICT_FP_TO_SINT:
+  case ISD::STRICT_FP_TO_UINT:
   case ISD::STRICT_FP_ROUND:
   case ISD::STRICT_FP_EXTEND:
     // These pseudo-ops get legalized as if they were their non-strict
@@ -2901,10 +2913,27 @@
     if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG))
       Results.push_back(Tmp1);
     break;
+  case ISD::STRICT_FP_TO_SINT:
+    if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG)) {
+      ReplaceNode(Node, Tmp1.getNode());
+      LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_TO_SINT node\n");
+      return true;
+    }
+    break;
   case ISD::FP_TO_UINT:
-    if (TLI.expandFP_TO_UINT(Node, Tmp1, DAG))
+    if (TLI.expandFP_TO_UINT(Node, Tmp1, Tmp2, DAG))
       Results.push_back(Tmp1);
     break;
+  case ISD::STRICT_FP_TO_UINT:
+    if (TLI.expandFP_TO_UINT(Node, Tmp1, Tmp2, DAG)) {
+      // Relink the chain.
+      DAG.ReplaceAllUsesOfValueWith(SDValue(Node,1), Tmp2);
+      // Replace the now UINT result.
+      ReplaceNodeWithValue(SDValue(Node, 0), Tmp1);
+      LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_TO_UINT node\n");
+      return true;
+    }
+    break;
   case ISD::LROUND:
     Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LROUND_F32,
                                          RTLIB::LROUND_F64, RTLIB::LROUND_F80,
Index: lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -112,6 +112,8 @@
   case ISD::ZERO_EXTEND:
   case ISD::ANY_EXTEND:  Res = PromoteIntRes_INT_EXTEND(N); break;
 
+  case ISD::STRICT_FP_TO_SINT:
+  case ISD::STRICT_FP_TO_UINT:
   case ISD::FP_TO_SINT:
   case ISD::FP_TO_UINT:  Res = PromoteIntRes_FP_TO_XINT(N); break;
 
@@ -494,8 +496,21 @@
       TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
     NewOpc = ISD::FP_TO_SINT;
 
-  SDValue Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0));
+  if (N->getOpcode() == ISD::STRICT_FP_TO_UINT &&
+      !TLI.isOperationLegal(ISD::STRICT_FP_TO_UINT, NVT) &&
+      TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT))
+    NewOpc = ISD::STRICT_FP_TO_SINT;
 
+  SDValue Res;
+  if (N->isStrictFPOpcode()) {
+    Res = DAG.getNode(NewOpc, dl, { NVT, MVT::Other }, 
+                      { N->getOperand(0), N->getOperand(1) });
+    // Legalize the chain result - switch anything that used the old chain to
+    // use the new one.
+    ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+  } else
+    Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0));
+
   // Assert that the converted value fits in the original type.  If it doesn't
   // (eg: because the value being converted is too big), then the result of the
   // original operation was undefined anyway, so the assert is still correct.
@@ -503,7 +518,8 @@
   // NOTE: fp-to-uint to fp-to-sint promotion guarantees zero extend. For example:
   //   before legalization: fp-to-uint16, 65534. -> 0xfffe
   //   after legalization: fp-to-sint32, 65534. -> 0x0000fffe
-  return DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ?
+  return DAG.getNode((N->getOpcode() == ISD::FP_TO_UINT ||
+                      N->getOpcode() == ISD::STRICT_FP_TO_UINT) ?
                      ISD::AssertZext : ISD::AssertSext, dl, NVT, Res,
                      DAG.getValueType(N->getValueType(0).getScalarType()));
 }
Index: lib/CodeGen/SelectionDAG/LegalizeTypes.h
===================================================================
--- lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -715,6 +715,7 @@
   bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
   SDValue ScalarizeVecOp_BITCAST(SDNode *N);
   SDValue ScalarizeVecOp_UnaryOp(SDNode *N);
+  SDValue ScalarizeVecOp_UnaryOp_StrictFP(SDNode *N);
   SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N);
   SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
   SDValue ScalarizeVecOp_VSELECT(SDNode *N);
Index: lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -333,6 +333,8 @@
   case ISD::STRICT_FFLOOR:
   case ISD::STRICT_FROUND:
   case ISD::STRICT_FTRUNC:
+  case ISD::STRICT_FP_TO_SINT:
+  case ISD::STRICT_FP_TO_UINT:
   case ISD::STRICT_FP_ROUND:
   case ISD::STRICT_FP_EXTEND:
     // These pseudo-ops get legalized as if they were their non-strict
@@ -844,6 +846,8 @@
   case ISD::STRICT_FFLOOR:
   case ISD::STRICT_FROUND:
   case ISD::STRICT_FTRUNC:
+  case ISD::STRICT_FP_TO_SINT:
+  case ISD::STRICT_FP_TO_UINT:
     return ExpandStrictFPOp(Op);
   case ISD::VECREDUCE_ADD:
   case ISD::VECREDUCE_MUL:
@@ -1168,9 +1172,13 @@
 
 SDValue VectorLegalizer::ExpandFP_TO_UINT(SDValue Op) {
   // Attempt to expand using TargetLowering.
-  SDValue Result;
-  if (TLI.expandFP_TO_UINT(Op.getNode(), Result, DAG))
+  SDValue Result, Chain;
+  if (TLI.expandFP_TO_UINT(Op.getNode(), Result, Chain, DAG)) {
+    if (Op.getNode()->isStrictFPOpcode())
+      // Relink the chain
+      DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Chain);
     return Result;
+  }
 
   // Otherwise go ahead and unroll.
   return DAG.UnrollVectorOp(Op.getNode());
Index: lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -171,6 +171,8 @@
   case ISD::STRICT_FFLOOR:
   case ISD::STRICT_FROUND:
   case ISD::STRICT_FTRUNC:
+  case ISD::STRICT_FP_TO_SINT:
+  case ISD::STRICT_FP_TO_UINT:
   case ISD::STRICT_FP_EXTEND:
     R = ScalarizeVecRes_StrictFPOp(N);
     break;
@@ -604,6 +606,10 @@
     case ISD::UINT_TO_FP:
       Res = ScalarizeVecOp_UnaryOp(N);
       break;
+    case ISD::STRICT_FP_TO_SINT:
+    case ISD::STRICT_FP_TO_UINT:
+      Res = ScalarizeVecOp_UnaryOp_StrictFP(N);
+      break;
     case ISD::CONCAT_VECTORS:
       Res = ScalarizeVecOp_CONCAT_VECTORS(N);
       break;
@@ -679,6 +685,23 @@
   return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Op);
 }
 
+/// If the input is a vector that needs to be scalarized, it must be <1 x ty>.
+/// Do the strict FP operation on the element instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp_StrictFP(SDNode *N) {
+  assert(N->getValueType(0).getVectorNumElements() == 1 &&
+         "Unexpected vector type!");
+  SDValue Elt = GetScalarizedVector(N->getOperand(1));
+  SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N),
+                            { N->getValueType(0).getScalarType(), MVT::Other },
+                            { N->getOperand(0), Elt });
+  // Legalize the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+  // Revectorize the result so the types line up with what the uses of this
+  // expression expect.
+  return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
+}
+
 /// The vectors to concatenate have length one - use a BUILD_VECTOR instead.
 SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) {
   SmallVector<SDValue, 8> Ops(N->getNumOperands());
@@ -883,7 +906,9 @@
   case ISD::FP_ROUND:
   case ISD::STRICT_FP_ROUND:
   case ISD::FP_TO_SINT:
+  case ISD::STRICT_FP_TO_SINT:
   case ISD::FP_TO_UINT:
+  case ISD::STRICT_FP_TO_UINT:
   case ISD::FRINT:
   case ISD::FROUND:
   case ISD::FSIN:
@@ -1979,6 +2004,8 @@
       break;
     case ISD::FP_TO_SINT:
     case ISD::FP_TO_UINT:
+    case ISD::STRICT_FP_TO_SINT:
+    case ISD::STRICT_FP_TO_UINT:
     case ISD::CTTZ:
     case ISD::CTLZ:
     case ISD::CTPOP:
@@ -2790,6 +2817,8 @@
 
   case ISD::STRICT_FP_EXTEND:
   case ISD::STRICT_FP_ROUND:
+  case ISD::STRICT_FP_TO_SINT:
+  case ISD::STRICT_FP_TO_UINT:
     Res = WidenVecRes_Convert_StrictFP(N);
     break;
 
@@ -4094,7 +4123,9 @@
   case ISD::FP_EXTEND:
   case ISD::STRICT_FP_EXTEND:
   case ISD::FP_TO_SINT:
+  case ISD::STRICT_FP_TO_SINT:
   case ISD::FP_TO_UINT:
+  case ISD::STRICT_FP_TO_UINT:
   case ISD::SINT_TO_FP:
   case ISD::UINT_TO_FP:
   case ISD::TRUNCATE:
Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -7766,6 +7766,8 @@
   case ISD::STRICT_FTRUNC:     NewOpc = ISD::FTRUNC;     break;
   case ISD::STRICT_FP_ROUND:   NewOpc = ISD::FP_ROUND;   break;
   case ISD::STRICT_FP_EXTEND:  NewOpc = ISD::FP_EXTEND;  break;
+  case ISD::STRICT_FP_TO_SINT: NewOpc = ISD::FP_TO_SINT; break;
+  case ISD::STRICT_FP_TO_UINT: NewOpc = ISD::FP_TO_UINT; break;
   }
 
   assert(Node->getNumValues() == 2 && "Unexpected number of results!");
Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6069,6 +6069,8 @@
   case Intrinsic::experimental_constrained_fdiv:
   case Intrinsic::experimental_constrained_frem:
   case Intrinsic::experimental_constrained_fma:
+  case Intrinsic::experimental_constrained_fptosi:
+  case Intrinsic::experimental_constrained_fptoui:
   case Intrinsic::experimental_constrained_fptrunc:
   case Intrinsic::experimental_constrained_fpext:
   case Intrinsic::experimental_constrained_sqrt:
@@ -6838,6 +6840,12 @@
   case Intrinsic::experimental_constrained_fma:
     Opcode = ISD::STRICT_FMA;
     break;
+  case Intrinsic::experimental_constrained_fptosi:
+    Opcode = ISD::STRICT_FP_TO_SINT;
+    break;
+  case Intrinsic::experimental_constrained_fptoui:
+    Opcode = ISD::STRICT_FP_TO_UINT;
+    break;
   case Intrinsic::experimental_constrained_fptrunc:
     Opcode = ISD::STRICT_FP_ROUND;
     break;
Index: lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -325,7 +325,9 @@
   case ISD::SINT_TO_FP:                 return "sint_to_fp";
   case ISD::UINT_TO_FP:                 return "uint_to_fp";
   case ISD::FP_TO_SINT:                 return "fp_to_sint";
+  case ISD::STRICT_FP_TO_SINT:          return "strict_fp_to_sint";
   case ISD::FP_TO_UINT:                 return "fp_to_uint";
+  case ISD::STRICT_FP_TO_UINT:          return "strict_fp_to_uint";
   case ISD::BITCAST:                    return "bitcast";
   case ISD::ADDRSPACECAST:              return "addrspacecast";
   case ISD::FP16_TO_FP:                 return "fp16_to_fp";
Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -4726,7 +4726,8 @@
 
 bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
                                       SelectionDAG &DAG) const {
-  SDValue Src = Node->getOperand(0);
+  unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
+  SDValue Src = Node->getOperand(OpNo);
   EVT SrcVT = Src.getValueType();
   EVT DstVT = Node->getValueType(0);
   SDLoc dl(SDValue(Node, 0));
@@ -4735,6 +4736,13 @@
   if (SrcVT != MVT::f32 || DstVT != MVT::i64)
     return false;
 
+  if (Node->isStrictFPOpcode())
+    // When a NaN is converted to an integer a trap is allowed. We can't
+    // use this expansion here because it would eliminate that trap. Other
+    // traps are also allowed and cannot be eliminated. See 
+    // IEEE 754-2008 sec 5.8.
+    return false;
+
   // Expand f32 -> i64 conversion
   // This algorithm comes from compiler-rt's implementation of fixsfdi:
   // https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/builtins/fixsfdi.c
@@ -4788,9 +4796,11 @@
 }
 
 bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
+                                      SDValue &Chain,
                                       SelectionDAG &DAG) const {
   SDLoc dl(SDValue(Node, 0));
-  SDValue Src = Node->getOperand(0);
+  unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
+  SDValue Src = Node->getOperand(OpNo);
 
   EVT SrcVT = Src.getValueType();
   EVT DstVT = Node->getValueType(0);
@@ -4798,7 +4808,9 @@
       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
 
   // Only expand vector types if we have the appropriate vector bit operations.
-  if (DstVT.isVector() && (!isOperationLegalOrCustom(ISD::FP_TO_SINT, DstVT) ||
+  unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT : 
+                                                   ISD::FP_TO_SINT;
+  if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
                            !isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT)))
     return false;
 
@@ -4810,14 +4822,25 @@
   APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
   if (APFloat::opOverflow &
       APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
-    Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
+    if (Node->isStrictFPOpcode()) {
+      Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other }, 
+                           { Node->getOperand(0), Src }); 
+      Chain = Result.getValue(1);
+#if 0
+      // Relink the chain
+      DAG.ReplaceAllUsesOfValueWith(SDValue(Node,1), Result.getValue(1));
+#endif
+    } else
+      Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
     return true;
   }
 
   SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
   SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
 
-  bool Strict = shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
+  bool Strict = Node->isStrictFPOpcode() ||
+                shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
+
   if (Strict) {
     // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
     // signmask then offset (the result of which should be fully representable).
@@ -4827,12 +4850,27 @@
     // Result = fp_to_sint(Val) ^ Ofs
 
     // TODO: Should any fast-math-flags be set for the FSUB?
-    SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src,
-                                DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
+    SDValue SrcBiased;
+    if (Node->isStrictFPOpcode())
+      SrcBiased = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other }, 
+                              { Node->getOperand(0), Src, Cst });
+    else
+      SrcBiased = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst);
+    SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src, SrcBiased);
     SDValue Ofs = DAG.getSelect(dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT),
                                 DAG.getConstant(SignMask, dl, DstVT));
-    Result = DAG.getNode(ISD::XOR, dl, DstVT,
-                         DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val), Ofs);
+    SDValue SInt;
+    if (Node->isStrictFPOpcode()) {
+      SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other }, 
+                         { SrcBiased.getValue(1), Val });
+      Chain = SInt.getValue(1);
+#if 0
+      // Relink the chain
+      DAG.ReplaceAllUsesOfValueWith(SDValue(Node,1), SInt.getValue(1));
+#endif
+    } else
+      SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
+    Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, Ofs);
   } else {
     // Expand based on maximum range of FP_TO_SINT:
     // True = fp_to_sint(Src)
Index: lib/CodeGen/TargetLoweringBase.cpp
===================================================================
--- lib/CodeGen/TargetLoweringBase.cpp
+++ lib/CodeGen/TargetLoweringBase.cpp
@@ -690,6 +690,8 @@
     setOperationAction(ISD::STRICT_FMINNUM, VT, Expand);
     setOperationAction(ISD::STRICT_FP_ROUND, VT, Expand);
     setOperationAction(ISD::STRICT_FP_EXTEND, VT, Expand);
+    setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Expand);
+    setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Expand);
 
     // For most targets @llvm.get.dynamic.area.offset just returns 0.
     setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand);
Index: lib/IR/IntrinsicInst.cpp
===================================================================
--- lib/IR/IntrinsicInst.cpp
+++ lib/IR/IntrinsicInst.cpp
@@ -142,6 +142,8 @@
   switch (getIntrinsicID()) {
     default:
       return false;
+    case Intrinsic::experimental_constrained_fptosi:
+    case Intrinsic::experimental_constrained_fptoui:
     case Intrinsic::experimental_constrained_fptrunc:
     case Intrinsic::experimental_constrained_fpext:
     case Intrinsic::experimental_constrained_sqrt:
Index: lib/IR/Verifier.cpp
===================================================================
--- lib/IR/Verifier.cpp
+++ lib/IR/Verifier.cpp
@@ -4257,6 +4257,8 @@
   case Intrinsic::experimental_constrained_fdiv:
   case Intrinsic::experimental_constrained_frem:
   case Intrinsic::experimental_constrained_fma:
+  case Intrinsic::experimental_constrained_fptosi:
+  case Intrinsic::experimental_constrained_fptoui:
   case Intrinsic::experimental_constrained_fptrunc:
   case Intrinsic::experimental_constrained_fpext:
   case Intrinsic::experimental_constrained_sqrt:
@@ -4748,6 +4750,33 @@
     HasRoundingMD = true;
     break;
 
+  case Intrinsic::experimental_constrained_fptosi:
+  case Intrinsic::experimental_constrained_fptoui: { 
+    Assert((NumOperands == 2),
+           "invalid arguments for constrained FP intrinsic", &FPI);
+    HasExceptionMD = true;
+
+    Value *Operand = FPI.getArgOperand(0);
+    uint64_t NumSrcElem = 0;
+    Assert(Operand->getType()->isFPOrFPVectorTy(),
+           "Intrinsic first argument must be floating point", &FPI);
+    if (auto *OperandT = dyn_cast<VectorType>(Operand->getType())) {
+      NumSrcElem = OperandT->getNumElements();
+    }
+
+    Operand = &FPI;
+    Assert((NumSrcElem > 0) == Operand->getType()->isVectorTy(),
+           "Intrinsic first argument and result disagree on vector use", &FPI);
+    Assert(Operand->getType()->isIntOrIntVectorTy(),
+           "Intrinsic result must be an integer", &FPI);
+    if (auto *OperandT = dyn_cast<VectorType>(Operand->getType())) {
+      Assert(NumSrcElem == OperandT->getNumElements(),
+             "Intrinsic first argument and result vector lengths must be equal",
+             &FPI);
+    }
+  }
+    break;
+
   case Intrinsic::experimental_constrained_fptrunc:
   case Intrinsic::experimental_constrained_fpext: {
     if (FPI.getIntrinsicID() == Intrinsic::experimental_constrained_fptrunc) {
Index: test/CodeGen/PowerPC/fp-intrinsics-fptosi-legal.ll
===================================================================
--- test/CodeGen/PowerPC/fp-intrinsics-fptosi-legal.ll
+++ test/CodeGen/PowerPC/fp-intrinsics-fptosi-legal.ll
@@ -0,0 +1,19 @@
+; RUN: llc -O3 -mtriple=powerpc-unknown-linux-gnu -mcpu=e500 -mattr=spe < %s | FileCheck %s
+
+; PowerPC SPE is a rare in-tree target that has the FP_TO_SINT node marked
+; as Legal.
+
+; Verify that fptosi(42.1) isn't simplified when the rounding mode is
+; unknown.
+; Verify that no gross errors happen.
+; CHECK-LABEL: @f20
+; COMMON: cfdctsiz
+define i32 @f20(double %a) {
+entry:
+  %result = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double 42.1,
+                                               metadata !"fpexcept.strict")
+  ret i32 %result
+}
+
+@llvm.fp.env = thread_local global i8 zeroinitializer, section "llvm.metadata"
+declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata)
Index: test/CodeGen/X86/fp-intrinsics.ll
===================================================================
--- test/CodeGen/X86/fp-intrinsics.ll
+++ test/CodeGen/X86/fp-intrinsics.ll
@@ -286,6 +286,39 @@
   ret double %rem
 }
 
+; Verify that fptoui(%x) isn't simplified when the rounding mode is
+; unknown. The expansion should have only one conversion instruction.
+; Verify that no gross errors happen.
+; CHECK-LABEL: @f20u
+; NO-FMA: cmpltsd
+; NO-FMA: movapd
+; NO-FMA: andpd
+; NO-FMA: xorl
+; NO-FMA: ucomisd
+; NO-FMA: subsd
+; NO-FMA: andnpd
+; NO-FMA: orpd
+; NO-FMA: cvttsd2si
+; NO-FMA: setae
+; NO-FMA: shll
+; NO-FMA: xorl
+;
+; HAS-FMA: vcmpltsd
+; HAS-FMA: vsubsd
+; HAS-FMA: vblendvpd
+; HAS-FMA: vcvttsd2si
+; HAS-FMA: xorl
+; HAS-FMA: vucomisd
+; HAS-FMA: setae
+; HAS-FMA: shll
+; HAS-FMA: xorl
+define i32 @f20u(double %x) {
+entry:
+  %result = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %x,
+                                               metadata !"fpexcept.strict")
+  ret i32 %result
+}
+
 ; Verify that round(42.1) isn't simplified when the rounding mode is
 ; unknown.
 ; Verify that no gross errors happen.
@@ -329,6 +362,7 @@
 declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
 declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata)
 declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)
+declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata)
+declare i32 @llvm.experimental.constrained.fptoui.i32.f64(double, metadata)
 declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata)
 declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata)
-
Index: test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
===================================================================
--- test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
+++ test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
@@ -3831,6 +3831,836 @@
   ret <4 x double> %min
 }
 
+define <1 x i32> @constrained_vector_fptosi_v1i32_v1f32() {
+; CHECK-LABEL: constrained_vector_fptosi_v1i32_v1f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %eax
+; CHECK-NEXT:    retq
+;
+; AVX-LABEL: constrained_vector_fptosi_v1i32_v1f32:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvttss2si {{.*}}(%rip), %eax
+; AVX-NEXT:    retq
+entry:
+  %result = call <1 x i32> @llvm.experimental.constrained.fptosi.v1i32.v1f32(
+                               <1 x float><float 42.0>,
+                               metadata !"fpexcept.strict")
+  ret <1 x i32> %result
+}
+
+define <2 x i32> @constrained_vector_fptosi_v2i32_v2f32() {
+; CHECK-LABEL: constrained_vector_fptosi_v2i32_v2f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    movq %rax, %xmm1
+; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    movq %rax, %xmm0
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:    retq
+;
+; AVX-LABEL: constrained_vector_fptosi_v2i32_v2f32:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvttss2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm0
+; AVX-NEXT:    vcvttss2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm1
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT:    retq
+entry:
+  %result = call <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f32(
+                                <2 x float><float 42.0, float 43.0>,
+                                metadata !"fpexcept.strict")
+  ret <2 x i32> %result
+}
+
+define <3 x i32> @constrained_vector_fptosi_v3i32_v3f32() {
+; CHECK-LABEL: constrained_vector_fptosi_v3i32_v3f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %eax
+; CHECK-NEXT:    movd %eax, %xmm1
+; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %eax
+; CHECK-NEXT:    movd %eax, %xmm0
+; CHECK-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %eax
+; CHECK-NEXT:    movd %eax, %xmm1
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:    retq
+;
+; AVX-LABEL: constrained_vector_fptosi_v3i32_v3f32:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvttss2si {{.*}}(%rip), %eax
+; AVX-NEXT:    vmovd %eax, %xmm0
+; AVX-NEXT:    vcvttss2si {{.*}}(%rip), %eax
+; AVX-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX-NEXT:    vcvttss2si {{.*}}(%rip), %eax
+; AVX-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %result = call <3 x i32> @llvm.experimental.constrained.fptosi.v3i32.v3f32(
+                                <3 x float><float 42.0, float 43.0,
+                                            float 44.0>,
+                                metadata !"fpexcept.strict")
+  ret <3 x i32> %result
+}
+
+define <4 x i32> @constrained_vector_fptosi_v4i32_v4f32() {
+; CHECK-LABEL: constrained_vector_fptosi_v4i32_v4f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cvttps2dq {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    retq
+;
+; AVX-LABEL: constrained_vector_fptosi_v4i32_v4f32:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvttps2dq {{.*}}(%rip), %xmm0
+; AVX-NEXT:    retq
+entry:
+  %result = call <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f32(
+                                <4 x float><float 42.0, float 43.0,
+                                            float 44.0, float 45.0>,
+                                metadata !"fpexcept.strict")
+  ret <4 x i32> %result
+}
+
+define <1 x i64> @constrained_vector_fptosi_v1i64_v1f32() {
+; CHECK-LABEL: constrained_vector_fptosi_v1i64_v1f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    retq
+;
+; AVX-LABEL: constrained_vector_fptosi_v1i64_v1f32:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvttss2si {{.*}}(%rip), %rax
+; AVX-NEXT:    retq
+entry:
+  %result = call <1 x i64> @llvm.experimental.constrained.fptosi.v1i64.v1f32(
+                               <1 x float><float 42.0>,
+                               metadata !"fpexcept.strict")
+  ret <1 x i64> %result
+}
+
+define <2 x i64> @constrained_vector_fptosi_v2i64_v2f32() {
+; CHECK-LABEL: constrained_vector_fptosi_v2i64_v2f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    movq %rax, %xmm1
+; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    movq %rax, %xmm0
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:    retq
+;
+; AVX-LABEL: constrained_vector_fptosi_v2i64_v2f32:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvttss2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm0
+; AVX-NEXT:    vcvttss2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm1
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT:    retq
+entry:
+  %result = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f32(
+                                <2 x float><float 42.0, float 43.0>,
+                                metadata !"fpexcept.strict")
+  ret <2 x i64> %result
+}
+
+define <3 x i64> @constrained_vector_fptosi_v3i64_v3f32() {
+; CHECK-LABEL: constrained_vector_fptosi_v3i64_v3f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %rdx
+; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %rcx
+; CHECK-NEXT:    retq
+;
+; AVX-LABEL: constrained_vector_fptosi_v3i64_v3f32:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvttss2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm0
+; AVX-NEXT:    vcvttss2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm1
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT:    vcvttss2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm1
+; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT:    retq
+entry:
+  %result = call <3 x i64> @llvm.experimental.constrained.fptosi.v3i64.v3f32(
+                                <3 x float><float 42.0, float 43.0,
+                                            float 44.0>,
+                                metadata !"fpexcept.strict")
+  ret <3 x i64> %result
+}
+
+define <4 x i64> @constrained_vector_fptosi_v4i64_v4f32() {
+; CHECK-LABEL: constrained_vector_fptosi_v4i64_v4f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    movq %rax, %xmm1
+; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    movq %rax, %xmm0
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    movq %rax, %xmm2
+; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    movq %rax, %xmm1
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; CHECK-NEXT:    retq
+;
+; AVX-LABEL: constrained_vector_fptosi_v4i64_v4f32:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvttss2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm0
+; AVX-NEXT:    vcvttss2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm1
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT:    vcvttss2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm1
+; AVX-NEXT:    vcvttss2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm2
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX-NEXT:    retq
+entry:
+  %result = call <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f32(
+                                <4 x float><float 42.0, float 43.0,
+                                            float 44.0, float 45.0>,
+                                metadata !"fpexcept.strict")
+  ret <4 x i64> %result
+}
+
+define <1 x i32> @constrained_vector_fptosi_v1i32_v1f64() {
+; CHECK-LABEL: constrained_vector_fptosi_v1i32_v1f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %eax
+; CHECK-NEXT:    retq
+;
+; AVX-LABEL: constrained_vector_fptosi_v1i32_v1f64:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvttsd2si {{.*}}(%rip), %eax
+; AVX-NEXT:    retq
+entry:
+  %result = call <1 x i32> @llvm.experimental.constrained.fptosi.v1i32.v1f64(
+                               <1 x double><double 42.1>,
+                               metadata !"fpexcept.strict")
+  ret <1 x i32> %result
+}
+
+
+define <2 x i32> @constrained_vector_fptosi_v2i32_v2f64() {
+; CHECK-LABEL: constrained_vector_fptosi_v2i32_v2f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    movq %rax, %xmm1
+; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    movq %rax, %xmm0
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:    retq
+;
+; AVX-LABEL: constrained_vector_fptosi_v2i32_v2f64:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm0
+; AVX-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm1
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT:    retq
+entry:
+  %result = call <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f64(
+                                <2 x double><double 42.1, double 42.2>,
+                                metadata !"fpexcept.strict")
+  ret <2 x i32> %result
+}
+
+define <3 x i32> @constrained_vector_fptosi_v3i32_v3f64() {
+; CHECK-LABEL: constrained_vector_fptosi_v3i32_v3f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %eax
+; CHECK-NEXT:    movd %eax, %xmm1
+; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %eax
+; CHECK-NEXT:    movd %eax, %xmm0
+; CHECK-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %eax
+; CHECK-NEXT:    movd %eax, %xmm1
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:    retq
+;
+; AVX-LABEL: constrained_vector_fptosi_v3i32_v3f64:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvttsd2si {{.*}}(%rip), %eax
+; AVX-NEXT:    vmovd %eax, %xmm0
+; AVX-NEXT:    vcvttsd2si {{.*}}(%rip), %eax
+; AVX-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX-NEXT:    vcvttsd2si {{.*}}(%rip), %eax
+; AVX-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %result = call <3 x i32> @llvm.experimental.constrained.fptosi.v3i32.v3f64(
+                                <3 x double><double 42.1, double 42.2,
+                                             double 42.3>,
+                                metadata !"fpexcept.strict")
+  ret <3 x i32> %result
+}
+
+define <4 x i32> @constrained_vector_fptosi_v4i32_v4f64() {
+; CHECK-LABEL: constrained_vector_fptosi_v4i32_v4f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    movq %rax, %xmm0
+; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    movq %rax, %xmm1
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    movq %rax, %xmm2
+; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    movq %rax, %xmm0
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; CHECK-NEXT:    retq
+;
+; AVX-LABEL: constrained_vector_fptosi_v4i32_v4f64:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvttpd2dqy {{.*}}(%rip), %xmm0
+; AVX-NEXT:    retq
+entry:
+  %result = call <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f64(
+                                <4 x double><double 42.1, double 42.2,
+                                             double 42.3, double 42.4>,
+                                metadata !"fpexcept.strict")
+  ret <4 x i32> %result
+}
+
+define <1 x i64> @constrained_vector_fptosi_v1i64_v1f64() {
+; CHECK-LABEL: constrained_vector_fptosi_v1i64_v1f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    retq
+;
+; AVX-LABEL: constrained_vector_fptosi_v1i64_v1f64:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
+; AVX-NEXT:    retq
+entry:
+  %result = call <1 x i64> @llvm.experimental.constrained.fptosi.v1i64.v1f64(
+                               <1 x double><double 42.1>,
+                               metadata !"fpexcept.strict")
+  ret <1 x i64> %result
+}
+
+define <2 x i64> @constrained_vector_fptosi_v2i64_v2f64() {
+; CHECK-LABEL: constrained_vector_fptosi_v2i64_v2f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    movq %rax, %xmm1
+; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    movq %rax, %xmm0
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:    retq
+;
+; AVX-LABEL: constrained_vector_fptosi_v2i64_v2f64:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm0
+; AVX-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm1
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT:    retq
+entry:
+  %result = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64(
+                                <2 x double><double 42.1, double 42.2>,
+                                metadata !"fpexcept.strict")
+  ret <2 x i64> %result
+}
+
+define <3 x i64> @constrained_vector_fptosi_v3i64_v3f64() {
+; CHECK-LABEL: constrained_vector_fptosi_v3i64_v3f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rdx
+; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rcx
+; CHECK-NEXT:    retq
+;
+; AVX-LABEL: constrained_vector_fptosi_v3i64_v3f64:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm0
+; AVX-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm1
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm1
+; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT:    retq
+entry:
+  %result = call <3 x i64> @llvm.experimental.constrained.fptosi.v3i64.v3f64(
+                                <3 x double><double 42.1, double 42.2,
+                                             double 42.3>,
+                                metadata !"fpexcept.strict")
+  ret <3 x i64> %result
+}
+
+define <4 x i64> @constrained_vector_fptosi_v4i64_v4f64() {
+; CHECK-LABEL: constrained_vector_fptosi_v4i64_v4f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    movq %rax, %xmm1
+; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    movq %rax, %xmm0
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    movq %rax, %xmm2
+; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    movq %rax, %xmm1
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; CHECK-NEXT:    retq
+;
+; AVX-LABEL: constrained_vector_fptosi_v4i64_v4f64:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm0
+; AVX-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm1
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm1
+; AVX-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm2
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX-NEXT:    retq
+entry:
+  %result = call <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f64(
+                                <4 x double><double 42.1, double 42.2,
+                                             double 42.3, double 42.4>,
+                                metadata !"fpexcept.strict")
+  ret <4 x i64> %result
+}
+
+define <1 x i32> @constrained_vector_fptoui_v1i32_v1f32() {
+; CHECK-LABEL: constrained_vector_fptoui_v1i32_v1f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %eax
+; CHECK-NEXT:    retq
+;
+; AVX-LABEL: constrained_vector_fptoui_v1i32_v1f32:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvttss2si {{.*}}(%rip), %eax
+; AVX-NEXT:    retq
+entry:
+  %result = call <1 x i32> @llvm.experimental.constrained.fptoui.v1i32.v1f32(
+                               <1 x float><float 42.0>,
+                               metadata !"fpexcept.strict")
+  ret <1 x i32> %result
+}
+
+define <2 x i32> @constrained_vector_fptoui_v2i32_v2f32() {
+; CHECK-LABEL: constrained_vector_fptoui_v2i32_v2f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    movq %rax, %xmm1
+; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    movq %rax, %xmm0
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:    retq
+;
+; AVX-LABEL: constrained_vector_fptoui_v2i32_v2f32:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvttss2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm0
+; AVX-NEXT:    vcvttss2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm1
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT:    retq
+entry:
+  %result = call <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f32(
+                                <2 x float><float 42.0, float 43.0>,
+                                metadata !"fpexcept.strict")
+  ret <2 x i32> %result
+}
+
+define <3 x i32> @constrained_vector_fptoui_v3i32_v3f32() {
+; CHECK-LABEL: constrained_vector_fptoui_v3i32_v3f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %eax
+; CHECK-NEXT:    movd %eax, %xmm1
+; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %eax
+; CHECK-NEXT:    movd %eax, %xmm0
+; CHECK-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %eax
+; CHECK-NEXT:    movd %eax, %xmm1
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:    retq
+;
+; AVX-LABEL: constrained_vector_fptoui_v3i32_v3f32:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvttss2si {{.*}}(%rip), %eax
+; AVX-NEXT:    vcvttss2si {{.*}}(%rip), %ecx
+; AVX-NEXT:    vmovd %ecx, %xmm0
+; AVX-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX-NEXT:    vcvttss2si {{.*}}(%rip), %eax
+; AVX-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %result = call <3 x i32> @llvm.experimental.constrained.fptoui.v3i32.v3f32(
+                                <3 x float><float 42.0, float 43.0,
+                                            float 44.0>,
+                                metadata !"fpexcept.strict")
+  ret <3 x i32> %result
+}
+
+define <4 x i32> @constrained_vector_fptoui_v4i32_v4f32() {
+; CHECK-LABEL: constrained_vector_fptoui_v4i32_v4f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %eax
+; CHECK-NEXT:    movd %eax, %xmm0
+; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %eax
+; CHECK-NEXT:    movd %eax, %xmm1
+; CHECK-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %eax
+; CHECK-NEXT:    movd %eax, %xmm2
+; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %eax
+; CHECK-NEXT:    movd %eax, %xmm0
+; CHECK-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:    retq
+;
+; AVX-LABEL: constrained_vector_fptoui_v4i32_v4f32:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvttss2si {{.*}}(%rip), %eax
+; AVX-NEXT:    vcvttss2si {{.*}}(%rip), %ecx
+; AVX-NEXT:    vmovd %ecx, %xmm0
+; AVX-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX-NEXT:    vcvttss2si {{.*}}(%rip), %eax
+; AVX-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0
+; AVX-NEXT:    vcvttss2si {{.*}}(%rip), %eax
+; AVX-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %result = call <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f32(
+                                <4 x float><float 42.0, float 43.0,
+                                            float 44.0, float 45.0>,
+                                metadata !"fpexcept.strict")
+  ret <4 x i32> %result
+}
+
+define <1 x i64> @constrained_vector_fptoui_v1i64_v1f32() {
+; CHECK-LABEL: constrained_vector_fptoui_v1i64_v1f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    retq
+;
+; AVX-LABEL: constrained_vector_fptoui_v1i64_v1f32:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvttss2si {{.*}}(%rip), %rax
+; AVX-NEXT:    retq
+entry:
+  %result = call <1 x i64> @llvm.experimental.constrained.fptoui.v1i64.v1f32(
+                               <1 x float><float 42.0>,
+                               metadata !"fpexcept.strict")
+  ret <1 x i64> %result
+}
+
+define <2 x i64> @constrained_vector_fptoui_v2i64_v2f32() {
+; CHECK-LABEL: constrained_vector_fptoui_v2i64_v2f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    movq %rax, %xmm1
+; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    movq %rax, %xmm0
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:    retq
+;
+; AVX-LABEL: constrained_vector_fptoui_v2i64_v2f32:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvttss2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm0
+; AVX-NEXT:    vcvttss2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm1
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT:    retq
+entry:
+  %result = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f32(
+                                <2 x float><float 42.0, float 43.0>,
+                                metadata !"fpexcept.strict")
+  ret <2 x i64> %result
+}
+
+define <3 x i64> @constrained_vector_fptoui_v3i64_v3f32() {
+; CHECK-LABEL: constrained_vector_fptoui_v3i64_v3f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %rdx
+; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %rcx
+; CHECK-NEXT:    retq
+;
+; AVX-LABEL: constrained_vector_fptoui_v3i64_v3f32:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvttss2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm0
+; AVX-NEXT:    vcvttss2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm1
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT:    vcvttss2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm1
+; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT:    retq
+entry:
+  %result = call <3 x i64> @llvm.experimental.constrained.fptoui.v3i64.v3f32(
+                                <3 x float><float 42.0, float 43.0,
+                                            float 44.0>,
+                                metadata !"fpexcept.strict")
+  ret <3 x i64> %result
+}
+
+define <4 x i64> @constrained_vector_fptoui_v4i64_v4f32() {
+; CHECK-LABEL: constrained_vector_fptoui_v4i64_v4f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    movq %rax, %xmm1
+; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    movq %rax, %xmm0
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    movq %rax, %xmm2
+; CHECK-NEXT:    cvttss2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    movq %rax, %xmm1
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; CHECK-NEXT:    retq
+;
+; AVX-LABEL: constrained_vector_fptoui_v4i64_v4f32:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvttss2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm0
+; AVX-NEXT:    vcvttss2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm1
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT:    vcvttss2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm1
+; AVX-NEXT:    vcvttss2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm2
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX-NEXT:    retq
+entry:
+  %result = call <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f32(
+                                <4 x float><float 42.0, float 43.0,
+                                            float 44.0, float 45.0>,
+                                metadata !"fpexcept.strict")
+  ret <4 x i64> %result
+}
+
+define <1 x i32> @constrained_vector_fptoui_v1i32_v1f64() {
+; CHECK-LABEL: constrained_vector_fptoui_v1i32_v1f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %eax
+; CHECK-NEXT:    retq
+;
+; AVX-LABEL: constrained_vector_fptoui_v1i32_v1f64:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvttsd2si {{.*}}(%rip), %eax
+; AVX-NEXT:    retq
+entry:
+  %result = call <1 x i32> @llvm.experimental.constrained.fptoui.v1i32.v1f64(
+                               <1 x double><double 42.1>,
+                               metadata !"fpexcept.strict")
+  ret <1 x i32> %result
+}
+
+define <2 x i32> @constrained_vector_fptoui_v2i32_v2f64() {
+; CHECK-LABEL: constrained_vector_fptoui_v2i32_v2f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    movq %rax, %xmm1
+; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    movq %rax, %xmm0
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:    retq
+;
+; AVX-LABEL: constrained_vector_fptoui_v2i32_v2f64:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm0
+; AVX-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm1
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT:    retq
+entry:
+  %result = call <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f64(
+                                <2 x double><double 42.1, double 42.2>,
+                                metadata !"fpexcept.strict")
+  ret <2 x i32> %result
+}
+
+define <3 x i32> @constrained_vector_fptoui_v3i32_v3f64() {
+; CHECK-LABEL: constrained_vector_fptoui_v3i32_v3f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %eax
+; CHECK-NEXT:    movd %eax, %xmm1
+; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %eax
+; CHECK-NEXT:    movd %eax, %xmm0
+; CHECK-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %eax
+; CHECK-NEXT:    movd %eax, %xmm1
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:    retq
+;
+; AVX-LABEL: constrained_vector_fptoui_v3i32_v3f64:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvttsd2si {{.*}}(%rip), %eax
+; AVX-NEXT:    vcvttsd2si {{.*}}(%rip), %ecx
+; AVX-NEXT:    vmovd %ecx, %xmm0
+; AVX-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX-NEXT:    vcvttsd2si {{.*}}(%rip), %eax
+; AVX-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %result = call <3 x i32> @llvm.experimental.constrained.fptoui.v3i32.v3f64(
+                                <3 x double><double 42.1, double 42.2,
+                                             double 42.3>,
+                                metadata !"fpexcept.strict")
+  ret <3 x i32> %result
+}
+
+define <4 x i32> @constrained_vector_fptoui_v4i32_v4f64() {
+; CHECK-LABEL: constrained_vector_fptoui_v4i32_v4f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    movq %rax, %xmm0
+; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    movq %rax, %xmm1
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    movq %rax, %xmm2
+; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    movq %rax, %xmm0
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; CHECK-NEXT:    retq
+;
+; AVX-LABEL: constrained_vector_fptoui_v4i32_v4f64:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvttsd2si {{.*}}(%rip), %eax
+; AVX-NEXT:    vcvttsd2si {{.*}}(%rip), %ecx
+; AVX-NEXT:    vmovd %ecx, %xmm0
+; AVX-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX-NEXT:    vcvttsd2si {{.*}}(%rip), %eax
+; AVX-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0
+; AVX-NEXT:    vcvttsd2si {{.*}}(%rip), %eax
+; AVX-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %result = call <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f64(
+                                <4 x double><double 42.1, double 42.2,
+                                             double 42.3, double 42.4>,
+                                metadata !"fpexcept.strict")
+  ret <4 x i32> %result
+}
+
+define <1 x i64> @constrained_vector_fptoui_v1i64_v1f64() {
+; CHECK-LABEL: constrained_vector_fptoui_v1i64_v1f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    retq
+;
+; AVX-LABEL: constrained_vector_fptoui_v1i64_v1f64:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
+; AVX-NEXT:    retq
+entry:
+  %result = call <1 x i64> @llvm.experimental.constrained.fptoui.v1i64.v1f64(
+                               <1 x double><double 42.1>,
+                               metadata !"fpexcept.strict")
+  ret <1 x i64> %result
+}
+
+define <2 x i64> @constrained_vector_fptoui_v2i64_v2f64() {
+; CHECK-LABEL: constrained_vector_fptoui_v2i64_v2f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    movq %rax, %xmm1
+; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    movq %rax, %xmm0
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:    retq
+;
+; AVX-LABEL: constrained_vector_fptoui_v2i64_v2f64:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm0
+; AVX-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm1
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT:    retq
+entry:
+  %result = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(
+                                <2 x double><double 42.1, double 42.2>,
+                                metadata !"fpexcept.strict")
+  ret <2 x i64> %result
+}
+
+define <3 x i64> @constrained_vector_fptoui_v3i64_v3f64() {
+; CHECK-LABEL: constrained_vector_fptoui_v3i64_v3f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rdx
+; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rcx
+; CHECK-NEXT:    retq
+;
+; AVX-LABEL: constrained_vector_fptoui_v3i64_v3f64:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm0
+; AVX-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm1
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm1
+; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT:    retq
+entry:
+  %result = call <3 x i64> @llvm.experimental.constrained.fptoui.v3i64.v3f64(
+                                <3 x double><double 42.1, double 42.2,
+                                             double 42.3>,
+                                metadata !"fpexcept.strict")
+  ret <3 x i64> %result
+}
+
+define <4 x i64> @constrained_vector_fptoui_v4i64_v4f64() {
+; CHECK-LABEL: constrained_vector_fptoui_v4i64_v4f64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    movq %rax, %xmm1
+; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    movq %rax, %xmm0
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    movq %rax, %xmm2
+; CHECK-NEXT:    cvttsd2si {{.*}}(%rip), %rax
+; CHECK-NEXT:    movq %rax, %xmm1
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; CHECK-NEXT:    retq
+;
+; AVX-LABEL: constrained_vector_fptoui_v4i64_v4f64:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm0
+; AVX-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm1
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm1
+; AVX-NEXT:    vcvttsd2si {{.*}}(%rip), %rax
+; AVX-NEXT:    vmovq %rax, %xmm2
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX-NEXT:    retq
+entry:
+  %result = call <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f64(
+                                <4 x double><double 42.1, double 42.2,
+                                             double 42.3, double 42.4>,
+                                metadata !"fpexcept.strict")
+  ret <4 x i64> %result
+}
+
+
 define <1 x float> @constrained_vector_fptrunc_v1f64() {
 ; CHECK-LABEL: constrained_vector_fptrunc_v1f64:
 ; CHECK:       # %bb.0: # %entry
@@ -4624,6 +5454,14 @@
 declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata)
 declare <2 x double> @llvm.experimental.constrained.maxnum.v2f64(<2 x double>, <2 x double>, metadata, metadata)
 declare <2 x double> @llvm.experimental.constrained.minnum.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f32(<2 x float>, metadata)
+declare <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f32(<2 x float>, metadata)
+declare <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f64(<2 x double>, metadata)
+declare <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64(<2 x double>, metadata)
+declare <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f32(<2 x float>, metadata)
+declare <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f32(<2 x float>, metadata)
+declare <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f64(<2 x double>, metadata)
+declare <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(<2 x double>, metadata)
 declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata)
 declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float>, metadata)
 declare <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double>, metadata, metadata)
@@ -4651,6 +5489,14 @@
 declare <1 x float> @llvm.experimental.constrained.nearbyint.v1f32(<1 x float>, metadata, metadata)
 declare <1 x float> @llvm.experimental.constrained.maxnum.v1f32(<1 x float>, <1 x float>, metadata, metadata)
 declare <1 x float> @llvm.experimental.constrained.minnum.v1f32(<1 x float>, <1 x float>, metadata, metadata)
+declare <1 x i32> @llvm.experimental.constrained.fptosi.v1i32.v1f32(<1 x float>, metadata)
+declare <1 x i64> @llvm.experimental.constrained.fptosi.v1i64.v1f32(<1 x float>, metadata)
+declare <1 x i32> @llvm.experimental.constrained.fptosi.v1i32.v1f64(<1 x double>, metadata)
+declare <1 x i64> @llvm.experimental.constrained.fptosi.v1i64.v1f64(<1 x double>, metadata)
+declare <1 x i32> @llvm.experimental.constrained.fptoui.v1i32.v1f32(<1 x float>, metadata)
+declare <1 x i64> @llvm.experimental.constrained.fptoui.v1i64.v1f32(<1 x float>, metadata)
+declare <1 x i32> @llvm.experimental.constrained.fptoui.v1i32.v1f64(<1 x double>, metadata)
+declare <1 x i64> @llvm.experimental.constrained.fptoui.v1i64.v1f64(<1 x double>, metadata)
 declare <1 x float> @llvm.experimental.constrained.fptrunc.v1f32.v1f64(<1 x double>, metadata, metadata)
 declare <1 x double> @llvm.experimental.constrained.fpext.v1f64.v1f32(<1 x float>, metadata)
 declare <1 x float> @llvm.experimental.constrained.ceil.v1f32(<1 x float>, metadata, metadata)
@@ -4697,6 +5543,14 @@
 declare <3 x double> @llvm.experimental.constrained.maxnum.v3f64(<3 x double>, <3 x double>, metadata, metadata)
 declare <3 x float> @llvm.experimental.constrained.minnum.v3f32(<3 x float>, <3 x float>, metadata, metadata)
 declare <3 x double> @llvm.experimental.constrained.minnum.v3f64(<3 x double>, <3 x double>, metadata, metadata)
+declare <3 x i32> @llvm.experimental.constrained.fptosi.v3i32.v3f32(<3 x float>, metadata)
+declare <3 x i64> @llvm.experimental.constrained.fptosi.v3i64.v3f32(<3 x float>, metadata)
+declare <3 x i32> @llvm.experimental.constrained.fptosi.v3i32.v3f64(<3 x double>, metadata)
+declare <3 x i64> @llvm.experimental.constrained.fptosi.v3i64.v3f64(<3 x double>, metadata)
+declare <3 x i32> @llvm.experimental.constrained.fptoui.v3i32.v3f32(<3 x float>, metadata)
+declare <3 x i64> @llvm.experimental.constrained.fptoui.v3i64.v3f32(<3 x float>, metadata)
+declare <3 x i32> @llvm.experimental.constrained.fptoui.v3i32.v3f64(<3 x double>, metadata)
+declare <3 x i64> @llvm.experimental.constrained.fptoui.v3i64.v3f64(<3 x double>, metadata)
 declare <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(<3 x double>, metadata, metadata)
 declare <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(<3 x float>, metadata)
 declare <3 x float> @llvm.experimental.constrained.ceil.v3f32(<3 x float>, metadata, metadata)
@@ -4728,6 +5582,14 @@
 declare <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double>, metadata, metadata)
 declare <4 x double> @llvm.experimental.constrained.maxnum.v4f64(<4 x double>, <4 x double>, metadata, metadata)
 declare <4 x double> @llvm.experimental.constrained.minnum.v4f64(<4 x double>, <4 x double>, metadata, metadata)
+declare <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f32(<4 x float>, metadata)
+declare <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f32(<4 x float>, metadata)
+declare <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f64(<4 x double>, metadata)
+declare <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f64(<4 x double>, metadata)
+declare <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f32(<4 x float>, metadata)
+declare <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f32(<4 x float>, metadata)
+declare <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f64(<4 x double>, metadata)
+declare <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f64(<4 x double>, metadata)
 declare <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(<4 x double>, metadata, metadata)
 declare <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(<4 x float>, metadata)
 declare <4 x double> @llvm.experimental.constrained.ceil.v4f64(<4 x double>, metadata, metadata)
Index: test/Feature/fp-intrinsics.ll
===================================================================
--- test/Feature/fp-intrinsics.ll
+++ test/Feature/fp-intrinsics.ll
@@ -242,6 +242,29 @@
   ret double %result
 }
 
+; Verify that fptoui(42.1) isn't simplified when the rounding mode is
+; unknown.
+; CHECK-LABEL: f18
+; CHECK: call zeroext i32 @llvm.experimental.constrained.fptoui
+define zeroext i32 @f18() {
+entry:
+  %result = call zeroext i32 @llvm.experimental.constrained.fptoui.i32.f64(
+                                               double 42.1,
+                                               metadata !"fpexcept.strict")
+  ret i32 %result
+}
+
+; Verify that fptosi(42.1) isn't simplified when the rounding mode is
+; unknown.
+; CHECK-LABEL: f19
+; CHECK: call i32 @llvm.experimental.constrained.fptosi
+define i32 @f19() {
+entry:
+  %result = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double 42.1,
+                                               metadata !"fpexcept.strict")
+  ret i32 %result
+}
+
 ; Verify that fptrunc(42.1) isn't simplified when the rounding mode is
 ; unknown.
 ; CHECK-LABEL: f20
@@ -284,5 +307,7 @@
 declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
 declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
 declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)
+declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata)
+declare i32 @llvm.experimental.constrained.fptoui.i32.f64(double, metadata)
 declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata)
 declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata)