diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -13462,6 +13462,33 @@
   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
     return N0.getOperand(0);
 
+  // fold (fp_round ({u,s}int_to_fp x)) -> ({u,s}int_to_fp x)
+  // but only when the ({u,s}int_to_fp x) remains precise
+  if (N0.getOpcode() == ISD::SINT_TO_FP || N0.getOpcode() == ISD::UINT_TO_FP) {
+    SDValue IntNode = N0->getOperand(0);
+    EVT IntTy = IntNode.getValueType();
+
+    bool IsIntSigned = N0.getOpcode() == ISD::SINT_TO_FP;
+    unsigned IntSize = (int)IntTy.getScalarSizeInBits() - IsIntSigned;
+    const fltSemantics &IntrSem = DAG.EVTToAPFloatSemantics(N0.getValueType());
+
+    // The intuition behind this check is that the original DAG took an integer,
+    // and converted it to the resulting float, but via a more precise float.
+    // After the transform, the integer is converted directly to the resulting
+    // float, without the intermediate precise float.
+    //
+    // Because the intermediate float is rounded to the resulting float, we know
+    // the resulting float is less precise than the intermediate float.
+    // Therefore, the relative precision of the int to the resulting float does
+    // not matter as long as we can fully represent the int in the intermediate
+    // float value. This avoids double-rounding issues.
+    if (APFloat::semanticsPrecision(IntrSem) >= IntSize &&
+        (!LegalTypes || hasOperation(N0.getOpcode(), IntTy))) {
+      SDLoc DL(N);
+      return DAG.getNode(N0.getOpcode(), DL, VT, IntNode);
+    }
+  }
+
   // fold (fp_round (fp_round x)) -> (fp_round x)
   if (N0.getOpcode() == ISD::FP_ROUND) {
     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
@@ -392,12 +392,6 @@
 // [u]int->float. Match GCC and default to using dynamic rounding mode.
 def : Pat<(sint_to_fp GPR:$rs1), (FCVT_S_W $rs1, 0b111)>;
 def : Pat<(uint_to_fp GPR:$rs1), (FCVT_S_WU $rs1, 0b111)>;
-
-// [u]int->double->float
-def : Pat<(fpround (f64 (sint_to_fp GPR:$rs1))),
-          (FCVT_S_W GPR:$rs1, 0b111)>;
-def : Pat<(fpround (f64 (uint_to_fp GPR:$rs1))),
-          (FCVT_S_WU GPR:$rs1, 0b111)>;
 } // Predicates = [HasStdExtF, IsRV32]
 
 let Predicates = [HasStdExtF, IsRV64] in {
diff --git a/llvm/test/CodeGen/RISCV/fp-convert-indirect.ll b/llvm/test/CodeGen/RISCV/fp-convert-indirect.ll
--- a/llvm/test/CodeGen/RISCV/fp-convert-indirect.ll
+++ b/llvm/test/CodeGen/RISCV/fp-convert-indirect.ll
@@ -11,19 +11,15 @@
 ;; These testcases check that we merge sequences of `fcvt.d.wu; fcvt.s.d` into
 ;; `fcvt.s.wu`.
 ;;
-;; TODO: Unfortunately, though this only uses 32-bit FP instructions, we cannot
-;; do this optimisation without the D extension as we need 64-bit FP values to
-;; be legal to get the right operands to match.
+;; These folds are actually implemented in the DAGCombiner, because otherwise
+;; without the D extension the intermediate `double` will be legalised away and
+;; the conversions will be turned into libcalls.
 
 define float @fcvt_s_w_via_d(i32 %a) nounwind {
 ; RV32IF-LABEL: fcvt_s_w_via_d:
 ; RV32IF:       # %bb.0:
-; RV32IF-NEXT:    addi sp, sp, -16
-; RV32IF-NEXT:    sw ra, 12(sp)
-; RV32IF-NEXT:    call __floatsidf
-; RV32IF-NEXT:    call __truncdfsf2
-; RV32IF-NEXT:    lw ra, 12(sp)
-; RV32IF-NEXT:    addi sp, sp, 16
+; RV32IF-NEXT:    fcvt.s.w ft0, a0
+; RV32IF-NEXT:    fmv.x.w a0, ft0
 ; RV32IF-NEXT:    ret
 ;
 ; RV32IFD-LABEL: fcvt_s_w_via_d:
@@ -34,13 +30,8 @@
 ;
 ; RV64IF-LABEL: fcvt_s_w_via_d:
 ; RV64IF:       # %bb.0:
-; RV64IF-NEXT:    addi sp, sp, -16
-; RV64IF-NEXT:    sd ra, 8(sp)
-; RV64IF-NEXT:    sext.w a0, a0
-; RV64IF-NEXT:    call __floatsidf
-; RV64IF-NEXT:    call __truncdfsf2
-; RV64IF-NEXT:    ld ra, 8(sp)
-; RV64IF-NEXT:    addi sp, sp, 16
+; RV64IF-NEXT:    fcvt.s.w ft0, a0
+; RV64IF-NEXT:    fmv.x.w a0, ft0
 ; RV64IF-NEXT:    ret
 ;
 ; RV64IFD-LABEL: fcvt_s_w_via_d:
@@ -56,12 +47,8 @@
 define float @fcvt_s_wu_via_d(i32 %a) nounwind {
 ; RV32IF-LABEL: fcvt_s_wu_via_d:
 ; RV32IF:       # %bb.0:
-; RV32IF-NEXT:    addi sp, sp, -16
-; RV32IF-NEXT:    sw ra, 12(sp)
-; RV32IF-NEXT:    call __floatunsidf
-; RV32IF-NEXT:    call __truncdfsf2
-; RV32IF-NEXT:    lw ra, 12(sp)
-; RV32IF-NEXT:    addi sp, sp, 16
+; RV32IF-NEXT:    fcvt.s.wu ft0, a0
+; RV32IF-NEXT:    fmv.x.w a0, ft0
 ; RV32IF-NEXT:    ret
 ;
 ; RV32IFD-LABEL: fcvt_s_wu_via_d:
@@ -72,14 +59,8 @@
 ;
 ; RV64IF-LABEL: fcvt_s_wu_via_d:
 ; RV64IF:       # %bb.0:
-; RV64IF-NEXT:    addi sp, sp, -16
-; RV64IF-NEXT:    sd ra, 8(sp)
-; RV64IF-NEXT:    slli a0, a0, 32
-; RV64IF-NEXT:    srli a0, a0, 32
-; RV64IF-NEXT:    call __floatunsidf
-; RV64IF-NEXT:    call __truncdfsf2
-; RV64IF-NEXT:    ld ra, 8(sp)
-; RV64IF-NEXT:    addi sp, sp, 16
+; RV64IF-NEXT:    fcvt.s.wu ft0, a0
+; RV64IF-NEXT:    fmv.x.w a0, ft0
 ; RV64IF-NEXT:    ret
 ;
 ; RV64IFD-LABEL: fcvt_s_wu_via_d: