Index: llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -173,8 +173,8 @@
   SDValue ExpandFCOPYSIGN(SDNode *Node) const;
   SDValue ExpandFABS(SDNode *Node) const;
   SDValue ExpandLegalINT_TO_FP(SDNode *Node, SDValue &Chain);
-  SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned,
-                                const SDLoc &dl);
+  void PromoteLegalINT_TO_FP(SDNode *N, const SDLoc &dl,
+                             SmallVectorImpl<SDValue> &Results);
   void PromoteLegalFP_TO_INT(SDNode *N, const SDLoc &dl,
                              SmallVectorImpl<SDValue> &Results);
 
@@ -2492,9 +2492,13 @@
 /// we promote it.  At this point, we know that the result and operand types are
 /// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP
 /// operation that takes a larger input.
-SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT,
-                                                    bool isSigned,
-                                                    const SDLoc &dl) {
+void SelectionDAGLegalize::PromoteLegalINT_TO_FP(
+    SDNode *N, const SDLoc &dl, SmallVectorImpl<SDValue> &Results) {
+  bool IsStrict = N->isStrictFPOpcode();
+  bool IsSigned = N->getOpcode() == ISD::SINT_TO_FP ||
+                  N->getOpcode() == ISD::STRICT_SINT_TO_FP;
+  EVT DestVT = N->getValueType(0);
+  SDValue LegalOp = N->getOperand(IsStrict ? 1 : 0);
   // First step, figure out the appropriate *INT_TO_FP operation to use.
   EVT NewInTy = LegalOp.getValueType();
 
@@ -2506,26 +2510,34 @@
     assert(NewInTy.isInteger() && "Ran out of possibilities!");
 
     // If the target supports SINT_TO_FP of this type, use it.
-    if (TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, NewInTy)) {
-      OpToUse = ISD::SINT_TO_FP;
+    OpToUse = IsStrict ? ISD::STRICT_SINT_TO_FP : ISD::SINT_TO_FP;
+    if (TLI.isOperationLegalOrCustom(OpToUse, NewInTy))
       break;
-    }
-    if (isSigned) continue;
+    if (IsSigned) continue;
 
     // If the target supports UINT_TO_FP of this type, use it.
-    if (TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, NewInTy)) {
-      OpToUse = ISD::UINT_TO_FP;
+    OpToUse = IsStrict ? ISD::STRICT_UINT_TO_FP : ISD::UINT_TO_FP;
+    if (TLI.isOperationLegalOrCustom(OpToUse, NewInTy))
       break;
-    }
 
     // Otherwise, try a larger type.
   }
 
-  // Okay, we found the operation and type to use.  Zero extend our input to the
+  // Okay, we found the operation and type to use. Extend our input to the
   // desired type then run the operation on it.
-  return DAG.getNode(OpToUse, dl, DestVT,
-                     DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
-                                 dl, NewInTy, LegalOp));
+  SDValue Ext = DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
+                            NewInTy, LegalOp);
+
+  SDValue Operation;
+  if (IsStrict)
+    Operation =
+        DAG.getNode(OpToUse, dl, {DestVT, MVT::Other}, {N->getOperand(0), Ext});
+  else
+    Operation = DAG.getNode(OpToUse, dl, DestVT, Ext);
+
+  Results.push_back(Operation);
+  if (IsStrict)
+    Results.push_back(Operation.getValue(1));
 }
 
 /// This function is responsible for legalizing a
@@ -4221,10 +4233,12 @@
       Node->getOpcode() == ISD::SINT_TO_FP ||
       Node->getOpcode() == ISD::SETCC ||
       Node->getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
-      Node->getOpcode() == ISD::INSERT_VECTOR_ELT) {
+      Node->getOpcode() == ISD::INSERT_VECTOR_ELT)
     OVT = Node->getOperand(0).getSimpleValueType();
-  }
-  if (Node->getOpcode() == ISD::BR_CC)
+  else if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP ||
+           Node->getOpcode() == ISD::STRICT_SINT_TO_FP)
+    OVT = Node->getOperand(1).getSimpleValueType();
+  else if (Node->getOpcode() == ISD::BR_CC)
     OVT = Node->getOperand(2).getSimpleValueType();
   MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
   SDLoc dl(Node);
@@ -4278,10 +4292,10 @@
     PromoteLegalFP_TO_INT(Node, dl, Results);
     break;
   case ISD::UINT_TO_FP:
+  case ISD::STRICT_UINT_TO_FP:
   case ISD::SINT_TO_FP:
-    Tmp1 = PromoteLegalINT_TO_FP(Node->getOperand(0), Node->getValueType(0),
-                                 Node->getOpcode() == ISD::SINT_TO_FP, dl);
-    Results.push_back(Tmp1);
+  case ISD::STRICT_SINT_TO_FP:
+    PromoteLegalINT_TO_FP(Node, dl, Results);
     break;
   case ISD::VAARG: {
     SDValue Chain = Node->getOperand(0); // Get the chain.
Index: llvm/lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- llvm/lib/Target/X86/X86ISelLowering.cpp
+++ llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -228,8 +228,10 @@
   if (!Subtarget.useSoftFloat()) {
     // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
     // operation.
-    setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Promote);
-    setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
+    setOperationAction(ISD::UINT_TO_FP,        MVT::i8,  Promote);
+    setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i8,  Promote);
+    setOperationAction(ISD::UINT_TO_FP,        MVT::i16, Promote);
+    setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i16, Promote);
     // We have an algorithm for SSE2, and we turn this into a 64-bit
     // FILD or VCVTUSI2SS/SD for other targets.
     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
@@ -241,16 +243,18 @@
 
     // Promote i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
     // this operation.
-    setOperationAction(ISD::SINT_TO_FP, MVT::i8,  Promote);
+    setOperationAction(ISD::SINT_TO_FP,        MVT::i8,  Promote);
+    setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i8,  Promote);
     // SSE has no i16 to fp conversion, only i32. We promote in the handler
     // to allow f80 to use i16 and f64 to use i16 with sse1 only
-    setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom);
+    setOperationAction(ISD::SINT_TO_FP,        MVT::i16, Custom);
+    setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i16, Custom);
     // f32 and f64 cases are Legal with SSE1/SSE2, f80 case is not
-    setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+    setOperationAction(ISD::SINT_TO_FP,        MVT::i32, Custom);
     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
     // In 32-bit mode these are custom lowered.  In 64-bit mode F32 and F64
     // are Legal, f80 is custom lowered.
-    setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+    setOperationAction(ISD::SINT_TO_FP,        MVT::i64, Custom);
     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
 
     // Promote i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
@@ -18423,7 +18427,8 @@
           Op.getOpcode() == ISD::STRICT_UINT_TO_FP ||
           Op.getOpcode() == ISD::UINT_TO_FP) &&
          "Unexpected opcode!");
-  unsigned OpNo = Op.getNode()->isStrictFPOpcode() ? 1 : 0;
+  bool IsStrict = Op->isStrictFPOpcode();
+  unsigned OpNo = IsStrict ? 1 : 0;
   SDValue Src = Op.getOperand(OpNo);
   MVT SrcVT = Src.getSimpleValueType();
   MVT VT = Op.getSimpleValueType();
@@ -18441,7 +18446,7 @@
 
   SDLoc dl(Op);
   SDValue InVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecInVT, Src);
-  if (Op.getNode()->isStrictFPOpcode()) {
+  if (IsStrict) {
     SDValue CvtVec = DAG.getNode(Op.getOpcode(), dl, {VecVT, MVT::Other},
                                  {Op.getOperand(0), InVec});
     SDValue Chain = CvtVec.getValue(1);
@@ -18522,7 +18527,8 @@
 
 SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
                                            SelectionDAG &DAG) const {
-  unsigned OpNo = Op.getNode()->isStrictFPOpcode() ? 1 : 0;
+  bool IsStrict = Op->isStrictFPOpcode();
+  unsigned OpNo = IsStrict ? 1 : 0;
   SDValue Src = Op.getOperand(OpNo);
   MVT SrcVT = Src.getSimpleValueType();
   MVT VT = Op.getSimpleValueType();
@@ -18532,8 +18538,7 @@
     return Extract;
 
   if (SrcVT.isVector()) {
-    if (SrcVT == MVT::v2i32 && VT == MVT::v2f64 &&
-        !Op.getNode()->isStrictFPOpcode()) {
+    if (SrcVT == MVT::v2i32 && VT == MVT::v2f64 && !IsStrict) {
       // FIXME: A strict version of CVTSI2P is needed.
       return DAG.getNode(X86ISD::CVTSI2P, dl, VT,
                          DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src,
@@ -18560,13 +18565,17 @@
   // SSE doesn't have an i16 conversion so we need to promote.
   if (SrcVT == MVT::i16 && (UseSSEReg || VT == MVT::f128)) {
     SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, Src);
+    if (IsStrict)
+      return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other},
+                         {Op.getOperand(0), Ext});
+
     return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Ext);
   }
 
   if (VT == MVT::f128)
     return LowerF128Call(Op, DAG, RTLIB::getSINTTOFP(SrcVT, VT));
 
-  SDValue ValueToStore = Op.getOperand(OpNo);
+  SDValue ValueToStore = Src;
   if (SrcVT == MVT::i64 && UseSSEReg && !Subtarget.is64Bit())
     // Bitcasting to f64 here allows us to do a single 64-bit store from
     // an SSE register, avoiding the store forwarding penalty that would come
@@ -18578,10 +18587,15 @@
   auto PtrVT = getPointerTy(MF.getDataLayout());
   int SSFI = MF.getFrameInfo().CreateStackObject(Size, Size, false);
   SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
-  SDValue Chain = DAG.getStore(
-      DAG.getEntryNode(), dl, ValueToStore, StackSlot,
+  SDValue Chain = IsStrict ? Op->getOperand(0) : DAG.getEntryNode();
+  Chain = DAG.getStore(
+      Chain, dl, ValueToStore, StackSlot,
       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI));
-  return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG).first;
+  std::pair<SDValue, SDValue> Tmp = BuildFILD(Op, SrcVT, Chain, StackSlot, DAG);
+  if (IsStrict)
+    return DAG.getMergeValues({Tmp.first, Tmp.second}, dl);
+
+  return Tmp.first;
 }
 
 std::pair<SDValue, SDValue> X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
@@ -18669,7 +18683,8 @@
      #endif
   */
 
-  unsigned OpNo = Op.getNode()->isStrictFPOpcode() ? 1 : 0;
+  bool IsStrict = Op->isStrictFPOpcode();
+  unsigned OpNo = IsStrict ? 1 : 0;
   SDLoc dl(Op);
   LLVMContext *Context = DAG.getContext();
 
@@ -18707,7 +18722,7 @@
   SDValue Sub;
   SDValue Chain;
   // TODO: Are there any fast-math-flags to propagate here?
-  if (Op.getNode()->isStrictFPOpcode()) {
+  if (IsStrict) {
     Sub = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::v2f64, MVT::Other},
                       {Op.getOperand(0), XR2F, CLod1});
     Chain = Sub.getValue(1);
@@ -18715,26 +18730,25 @@
     Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
   SDValue Result;
 
-  if (Subtarget.hasSSE3() && shouldUseHorizontalOp(true, DAG, Subtarget)) {
+  if (!IsStrict && Subtarget.hasSSE3() &&
+      shouldUseHorizontalOp(true, DAG, Subtarget)) {
     // FIXME: Do we need a STRICT version of FHADD?
     Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub);
   } else {
     SDValue Shuffle = DAG.getVectorShuffle(MVT::v2f64, dl, Sub, Sub, {1,-1});
-    if (Op.getNode()->isStrictFPOpcode()) {
+    if (IsStrict) {
       Result = DAG.getNode(ISD::STRICT_FADD, dl, {MVT::v2f64, MVT::Other},
                            {Chain, Shuffle, Sub});
       Chain = Result.getValue(1);
     } else
       Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuffle, Sub);
   }
-  if (Op.getNode()->isStrictFPOpcode()) {
-    Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result,
-                         DAG.getIntPtrConstant(0, dl));
+  Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result,
+                       DAG.getIntPtrConstant(0, dl));
+  if (IsStrict)
     return DAG.getMergeValues({Result, Chain}, dl);
-  }
 
-  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result,
-                     DAG.getIntPtrConstant(0, dl));
+  return Result;
 }
 
 /// 32-bit unsigned integer to float expansion.
@@ -18775,14 +18789,14 @@
     SDValue Sub = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::f64, MVT::Other},
                               {Chain, Or, Bias});
 
-    if (!Op.getValueType().bitsEq(Sub.getValueType())) {
-      // Handle final rounding.
-      std::pair<SDValue, SDValue> ResultPair = DAG.getStrictFPExtendOrRound(
-          Sub, Sub.getValue(1), dl, Op.getSimpleValueType());
+    if (Op.getValueType() == Sub.getValueType())
+      return Sub;
 
-      return DAG.getMergeValues({ResultPair.first, ResultPair.second}, dl);
-    }
-    return Sub;
+    // Handle final rounding.
+    std::pair<SDValue, SDValue> ResultPair = DAG.getStrictFPExtendOrRound(
+        Sub, Sub.getValue(1), dl, Op.getSimpleValueType());
+
+    return DAG.getMergeValues({ResultPair.first, ResultPair.second}, dl);
   }
 
   // Subtract the bias.
@@ -18948,14 +18962,14 @@
 
 SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
                                            SelectionDAG &DAG) const {
-  unsigned OpNo = Op.getNode()->isStrictFPOpcode() ? 1 : 0;
-  SDValue N0 = Op.getOperand(OpNo);
+  bool IsStrict = Op->isStrictFPOpcode();
+  unsigned OpNo = IsStrict ? 1 : 0;
+  SDValue Src = Op.getOperand(OpNo);
   SDLoc dl(Op);
   auto PtrVT = getPointerTy(DAG.getDataLayout());
-  MVT SrcVT = N0.getSimpleValueType();
+  MVT SrcVT = Src.getSimpleValueType();
   MVT DstVT = Op.getSimpleValueType();
-  SDValue Chain =
-      Op.getNode()->isStrictFPOpcode() ? Op.getOperand(0) : DAG.getEntryNode();
+  SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
 
   if (DstVT == MVT::f128)
     return LowerF128Call(Op, DAG, RTLIB::getUINTTOFP(SrcVT, DstVT));
@@ -18975,12 +18989,12 @@
 
   // Promote i32 to i64 and use a signed conversion on 64-bit targets.
   if (SrcVT == MVT::i32 && Subtarget.is64Bit()) {
-    N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, N0);
-    if (Op.getNode()->isStrictFPOpcode()) {
+    Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Src);
+    if (IsStrict)
       return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {DstVT, MVT::Other},
-                         {Chain, N0});
-    }
-    return DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, N0);
+                         {Chain, Src});
+
+    return DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src);
   }
 
   if (SDValue V = LowerI64IntToFP_AVX512DQ(Op, DAG, Subtarget))
@@ -18997,20 +19011,26 @@
   SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64);
   if (SrcVT == MVT::i32) {
     SDValue OffsetSlot = DAG.getMemBasePlusOffset(StackSlot, 4, dl);
-    SDValue Store1 = DAG.getStore(Chain, dl, Op.getOperand(OpNo), StackSlot,
-                                  MachinePointerInfo());
+    SDValue Store1 =
+        DAG.getStore(Chain, dl, Src, StackSlot, MachinePointerInfo());
     SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, dl, MVT::i32),
                                   OffsetSlot, MachinePointerInfo());
-    return BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG).first;
+    std::pair<SDValue, SDValue> Tmp =
+        BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG);
+    if (IsStrict)
+      return DAG.getMergeValues({Tmp.first, Tmp.second}, dl);
+
+    return Tmp.first;
   }
 
   assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP");
-  SDValue ValueToStore = Op.getOperand(OpNo);
-  if (isScalarFPTypeInSSEReg(Op.getValueType()) && !Subtarget.is64Bit())
+  SDValue ValueToStore = Src;
+  if (isScalarFPTypeInSSEReg(Op.getValueType()) && !Subtarget.is64Bit()) {
     // Bitcasting to f64 here allows us to do a single 64-bit store from
     // an SSE register, avoiding the store forwarding penalty that would come
     // with two 32-bit stores.
     ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore);
+  }
   SDValue Store =
       DAG.getStore(Chain, dl, ValueToStore, StackSlot, MachinePointerInfo());
   // For i64 source, we need to add the appropriate power of 2 if the input
@@ -19055,7 +19075,7 @@
   Chain = Fudge.getValue(1);
   // Extend everything to 80 bits to force it to be done on x87.
   // TODO: Are there any fast-math-flags to propagate here?
-  if (Op.getNode()->isStrictFPOpcode()) {
+  if (IsStrict) {
     SDValue Add = DAG.getNode(ISD::STRICT_FADD, dl, {MVT::f80, MVT::Other},
                               {Chain, Fild, Fudge});
     return DAG.getNode(ISD::STRICT_FP_ROUND, dl, {DstVT, MVT::Other},
@@ -19114,10 +19134,7 @@
   int SSFI = MF.getFrameInfo().CreateStackObject(MemSize, MemSize, false);
   SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
 
-  if (IsStrict)
-    Chain = Op.getOperand(0);
-  else
-    Chain = DAG.getEntryNode();
+  Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
 
   SDValue Adjust; // 0x0 or 0x80000000, for result sign bit adjustment.
 
Index: llvm/test/CodeGen/X86/fp-intrinsics.ll
===================================================================
--- llvm/test/CodeGen/X86/fp-intrinsics.ll
+++ llvm/test/CodeGen/X86/fp-intrinsics.ll
@@ -1942,8 +1942,92 @@
 }
 
 ; Verify that sitofp(%x) isn't simplified when the rounding mode is
-; unknown. The expansion should have only one conversion instruction.
+; unknown.
 ; Verify that no gross errors happen.
+define double @sifdb(i8 %x) #0 {
+; X87-LABEL: sifdb:
+; X87:       # %bb.0: # %entry
+; X87-NEXT:    pushl %eax
+; X87-NEXT:    .cfi_def_cfa_offset 8
+; X87-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
+; X87-NEXT:    movw %ax, {{[0-9]+}}(%esp)
+; X87-NEXT:    filds {{[0-9]+}}(%esp)
+; X87-NEXT:    popl %eax
+; X87-NEXT:    .cfi_def_cfa_offset 4
+; X87-NEXT:    retl
+;
+; X86-SSE-LABEL: sifdb:
+; X86-SSE:       # %bb.0: # %entry
+; X86-SSE-NEXT:    subl $12, %esp
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 16
+; X86-SSE-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT:    cvtsi2sd %eax, %xmm0
+; X86-SSE-NEXT:    movsd %xmm0, (%esp)
+; X86-SSE-NEXT:    fldl (%esp)
+; X86-SSE-NEXT:    addl $12, %esp
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
+; X86-SSE-NEXT:    retl
+;
+; SSE-LABEL: sifdb:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    movsbl %dil, %eax
+; SSE-NEXT:    cvtsi2sd %eax, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: sifdb:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    movsbl %dil, %eax
+; AVX-NEXT:    vcvtsi2sd %eax, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %result = call double @llvm.experimental.constrained.sitofp.f64.i8(i8 %x,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict") #0
+  ret double %result
+}
+
+define double @sifdw(i16 %x) #0 {
+; X87-LABEL: sifdw:
+; X87:       # %bb.0: # %entry
+; X87-NEXT:    pushl %eax
+; X87-NEXT:    .cfi_def_cfa_offset 8
+; X87-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X87-NEXT:    movw %ax, {{[0-9]+}}(%esp)
+; X87-NEXT:    filds {{[0-9]+}}(%esp)
+; X87-NEXT:    popl %eax
+; X87-NEXT:    .cfi_def_cfa_offset 4
+; X87-NEXT:    retl
+;
+; X86-SSE-LABEL: sifdw:
+; X86-SSE:       # %bb.0: # %entry
+; X86-SSE-NEXT:    subl $12, %esp
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 16
+; X86-SSE-NEXT:    movswl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT:    cvtsi2sd %eax, %xmm0
+; X86-SSE-NEXT:    movsd %xmm0, (%esp)
+; X86-SSE-NEXT:    fldl (%esp)
+; X86-SSE-NEXT:    addl $12, %esp
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
+; X86-SSE-NEXT:    retl
+;
+; SSE-LABEL: sifdw:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    movswl %di, %eax
+; SSE-NEXT:    cvtsi2sd %eax, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: sifdw:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    movswl %di, %eax
+; AVX-NEXT:    vcvtsi2sd %eax, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %result = call double @llvm.experimental.constrained.sitofp.f64.i16(i16 %x,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict") #0
+  ret double %result
+}
+
 define double @sifdi(i32 %x) #0 {
 ; X87-LABEL: sifdi:
 ; X87:       # %bb.0: # %entry
@@ -1983,6 +2067,90 @@
   ret double %result
 }
 
+define float @siffb(i8 %x) #0 {
+; X87-LABEL: siffb:
+; X87:       # %bb.0: # %entry
+; X87-NEXT:    pushl %eax
+; X87-NEXT:    .cfi_def_cfa_offset 8
+; X87-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
+; X87-NEXT:    movw %ax, {{[0-9]+}}(%esp)
+; X87-NEXT:    filds {{[0-9]+}}(%esp)
+; X87-NEXT:    popl %eax
+; X87-NEXT:    .cfi_def_cfa_offset 4
+; X87-NEXT:    retl
+;
+; X86-SSE-LABEL: siffb:
+; X86-SSE:       # %bb.0: # %entry
+; X86-SSE-NEXT:    pushl %eax
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 8
+; X86-SSE-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT:    cvtsi2ss %eax, %xmm0
+; X86-SSE-NEXT:    movss %xmm0, (%esp)
+; X86-SSE-NEXT:    flds (%esp)
+; X86-SSE-NEXT:    popl %eax
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
+; X86-SSE-NEXT:    retl
+;
+; SSE-LABEL: siffb:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    movsbl %dil, %eax
+; SSE-NEXT:    cvtsi2ss %eax, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: siffb:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    movsbl %dil, %eax
+; AVX-NEXT:    vcvtsi2ss %eax, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %result = call float @llvm.experimental.constrained.sitofp.f32.i8(i8 %x,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict") #0
+  ret float %result
+}
+
+define float @siffw(i16 %x) #0 {
+; X87-LABEL: siffw:
+; X87:       # %bb.0: # %entry
+; X87-NEXT:    pushl %eax
+; X87-NEXT:    .cfi_def_cfa_offset 8
+; X87-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X87-NEXT:    movw %ax, {{[0-9]+}}(%esp)
+; X87-NEXT:    filds {{[0-9]+}}(%esp)
+; X87-NEXT:    popl %eax
+; X87-NEXT:    .cfi_def_cfa_offset 4
+; X87-NEXT:    retl
+;
+; X86-SSE-LABEL: siffw:
+; X86-SSE:       # %bb.0: # %entry
+; X86-SSE-NEXT:    pushl %eax
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 8
+; X86-SSE-NEXT:    movswl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT:    cvtsi2ss %eax, %xmm0
+; X86-SSE-NEXT:    movss %xmm0, (%esp)
+; X86-SSE-NEXT:    flds (%esp)
+; X86-SSE-NEXT:    popl %eax
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
+; X86-SSE-NEXT:    retl
+;
+; SSE-LABEL: siffw:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    movswl %di, %eax
+; SSE-NEXT:    cvtsi2ss %eax, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: siffw:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    movswl %di, %eax
+; AVX-NEXT:    vcvtsi2ss %eax, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %result = call float @llvm.experimental.constrained.sitofp.f32.i16(i16 %x,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict") #0
+  ret float %result
+}
+
 define float @siffi(i32 %x) #0 {
 ; X87-LABEL: siffi:
 ; X87:       # %bb.0: # %entry
@@ -2109,8 +2277,92 @@
 }
 
 ; Verify that uitofp(%x) isn't simplified when the rounding mode is
-; unknown. Expansions from i32 should have only one conversion instruction.
+; unknown.
 ; Verify that no gross errors happen.
+define double @uifdb(i8 %x) #0 {
+; X87-LABEL: uifdb:
+; X87:       # %bb.0: # %entry
+; X87-NEXT:    pushl %eax
+; X87-NEXT:    .cfi_def_cfa_offset 8
+; X87-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X87-NEXT:    movw %ax, {{[0-9]+}}(%esp)
+; X87-NEXT:    filds {{[0-9]+}}(%esp)
+; X87-NEXT:    popl %eax
+; X87-NEXT:    .cfi_def_cfa_offset 4
+; X87-NEXT:    retl
+;
+; X86-SSE-LABEL: uifdb:
+; X86-SSE:       # %bb.0: # %entry
+; X86-SSE-NEXT:    subl $12, %esp
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 16
+; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT:    cvtsi2sd %eax, %xmm0
+; X86-SSE-NEXT:    movsd %xmm0, (%esp)
+; X86-SSE-NEXT:    fldl (%esp)
+; X86-SSE-NEXT:    addl $12, %esp
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
+; X86-SSE-NEXT:    retl
+;
+; SSE-LABEL: uifdb:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    movzbl %dil, %eax
+; SSE-NEXT:    cvtsi2sd %eax, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: uifdb:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    movzbl %dil, %eax
+; AVX-NEXT:    vcvtsi2sd %eax, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %result = call double @llvm.experimental.constrained.uitofp.f64.i8(i8 %x,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict") #0
+  ret double %result
+}
+
+define double @uifdw(i16 %x) #0 {
+; X87-LABEL: uifdw:
+; X87:       # %bb.0: # %entry
+; X87-NEXT:    pushl %eax
+; X87-NEXT:    .cfi_def_cfa_offset 8
+; X87-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X87-NEXT:    movl %eax, (%esp)
+; X87-NEXT:    fildl (%esp)
+; X87-NEXT:    popl %eax
+; X87-NEXT:    .cfi_def_cfa_offset 4
+; X87-NEXT:    retl
+;
+; X86-SSE-LABEL: uifdw:
+; X86-SSE:       # %bb.0: # %entry
+; X86-SSE-NEXT:    subl $12, %esp
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 16
+; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT:    cvtsi2sd %eax, %xmm0
+; X86-SSE-NEXT:    movsd %xmm0, (%esp)
+; X86-SSE-NEXT:    fldl (%esp)
+; X86-SSE-NEXT:    addl $12, %esp
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
+; X86-SSE-NEXT:    retl
+;
+; SSE-LABEL: uifdw:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    movzwl %di, %eax
+; SSE-NEXT:    cvtsi2sd %eax, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: uifdw:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    movzwl %di, %eax
+; AVX-NEXT:    vcvtsi2sd %eax, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %result = call double @llvm.experimental.constrained.uitofp.f64.i16(i16 %x,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict") #0
+  ret double %result
+}
+
 define double @uifdi(i32 %x) #0 {
 ; X87-LABEL: uifdi:
 ; X87:       # %bb.0: # %entry
@@ -2227,6 +2479,90 @@
   ret double %result
 }
 
+define float @uiffb(i8 %x) #0 {
+; X87-LABEL: uiffb:
+; X87:       # %bb.0: # %entry
+; X87-NEXT:    pushl %eax
+; X87-NEXT:    .cfi_def_cfa_offset 8
+; X87-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X87-NEXT:    movw %ax, {{[0-9]+}}(%esp)
+; X87-NEXT:    filds {{[0-9]+}}(%esp)
+; X87-NEXT:    popl %eax
+; X87-NEXT:    .cfi_def_cfa_offset 4
+; X87-NEXT:    retl
+;
+; X86-SSE-LABEL: uiffb:
+; X86-SSE:       # %bb.0: # %entry
+; X86-SSE-NEXT:    pushl %eax
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 8
+; X86-SSE-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT:    cvtsi2ss %eax, %xmm0
+; X86-SSE-NEXT:    movss %xmm0, (%esp)
+; X86-SSE-NEXT:    flds (%esp)
+; X86-SSE-NEXT:    popl %eax
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
+; X86-SSE-NEXT:    retl
+;
+; SSE-LABEL: uiffb:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    movzbl %dil, %eax
+; SSE-NEXT:    cvtsi2ss %eax, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: uiffb:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    movzbl %dil, %eax
+; AVX-NEXT:    vcvtsi2ss %eax, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %result = call float @llvm.experimental.constrained.uitofp.f32.i8(i8 %x,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict") #0
+  ret float %result
+}
+
+define float @uiffw(i16 %x) #0 {
+; X87-LABEL: uiffw:
+; X87:       # %bb.0: # %entry
+; X87-NEXT:    pushl %eax
+; X87-NEXT:    .cfi_def_cfa_offset 8
+; X87-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X87-NEXT:    movl %eax, (%esp)
+; X87-NEXT:    fildl (%esp)
+; X87-NEXT:    popl %eax
+; X87-NEXT:    .cfi_def_cfa_offset 4
+; X87-NEXT:    retl
+;
+; X86-SSE-LABEL: uiffw:
+; X86-SSE:       # %bb.0: # %entry
+; X86-SSE-NEXT:    pushl %eax
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 8
+; X86-SSE-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-SSE-NEXT:    cvtsi2ss %eax, %xmm0
+; X86-SSE-NEXT:    movss %xmm0, (%esp)
+; X86-SSE-NEXT:    flds (%esp)
+; X86-SSE-NEXT:    popl %eax
+; X86-SSE-NEXT:    .cfi_def_cfa_offset 4
+; X86-SSE-NEXT:    retl
+;
+; SSE-LABEL: uiffw:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    movzwl %di, %eax
+; SSE-NEXT:    cvtsi2ss %eax, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: uiffw:
+; AVX:       # %bb.0: # %entry
+; AVX-NEXT:    movzwl %di, %eax
+; AVX-NEXT:    vcvtsi2ss %eax, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %result = call float @llvm.experimental.constrained.uitofp.f32.i16(i16 %x,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict") #0
+  ret float %result
+}
+
 define float @uiffi(i32 %x) #0 {
 ; X87-LABEL: uiffi:
 ; X87:       # %bb.0: # %entry
@@ -2401,11 +2737,19 @@
 declare i32 @llvm.experimental.constrained.lround.i32.f32(float, metadata)
 declare i64 @llvm.experimental.constrained.llround.i64.f64(double, metadata)
 declare i64 @llvm.experimental.constrained.llround.i64.f32(float, metadata)
+declare double @llvm.experimental.constrained.sitofp.f64.i8(i8, metadata, metadata)
+declare double @llvm.experimental.constrained.sitofp.f64.i16(i16, metadata, metadata)
 declare double @llvm.experimental.constrained.sitofp.f64.i32(i32, metadata, metadata)
-declare float @llvm.experimental.constrained.sitofp.f32.i32(i32, metadata, metadata)
 declare double @llvm.experimental.constrained.sitofp.f64.i64(i64, metadata, metadata)
+declare float @llvm.experimental.constrained.sitofp.f32.i8(i8, metadata, metadata)
+declare float @llvm.experimental.constrained.sitofp.f32.i16(i16, metadata, metadata)
+declare float @llvm.experimental.constrained.sitofp.f32.i32(i32, metadata, metadata)
 declare float @llvm.experimental.constrained.sitofp.f32.i64(i64, metadata, metadata)
+declare double @llvm.experimental.constrained.uitofp.f64.i8(i8, metadata, metadata)
+declare double @llvm.experimental.constrained.uitofp.f64.i16(i16, metadata, metadata)
 declare double @llvm.experimental.constrained.uitofp.f64.i32(i32, metadata, metadata)
 declare double @llvm.experimental.constrained.uitofp.f64.i64(i64, metadata, metadata)
+declare float @llvm.experimental.constrained.uitofp.f32.i8(i8, metadata, metadata)
+declare float @llvm.experimental.constrained.uitofp.f32.i16(i16, metadata, metadata)
 declare float @llvm.experimental.constrained.uitofp.f32.i32(i32, metadata, metadata)
 declare float @llvm.experimental.constrained.uitofp.f32.i64(i64, metadata, metadata)