diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -2585,6 +2585,11 @@
   // same blocks of its users.
   virtual bool shouldConsiderGEPOffsetSplit() const { return false; }
 
+  // Return true if FCOPYSIGN can be lowered with the given FP types.
+  virtual bool canCopySign(EVT ValueTy, EVT SignTy) const {
+    return isOperationExpand(ISD::FCOPYSIGN, ValueTy) || ValueTy == SignTy;
+  }
+
   //===--------------------------------------------------------------------===//
   // Runtime Library hooks
   //
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -12581,17 +12581,14 @@
 
 /// copysign(x, fp_extend(y)) -> copysign(x, y)
 /// copysign(x, fp_round(y)) -> copysign(x, y)
-static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
+static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(const TargetLowering &TLI,
+                                                    SDNode *N) {
   SDValue N1 = N->getOperand(1);
   if ((N1.getOpcode() == ISD::FP_EXTEND ||
        N1.getOpcode() == ISD::FP_ROUND)) {
-    // Do not optimize out type conversion of f128 type yet.
-    // For some targets like x86_64, configuration is changed to keep one f128
-    // value in one SSE register, but instruction selection cannot handle
-    // FCOPYSIGN on SSE registers yet.
     EVT N1VT = N1->getValueType(0);
     EVT N1Op0VT = N1->getOperand(0).getValueType();
-    return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
+    return TLI.canCopySign(N1VT, N1Op0VT);
   }
   return false;
 }
@@ -12637,7 +12634,7 @@
 
   // copysign(x, fp_extend(y)) -> copysign(x, y)
   // copysign(x, fp_round(y)) -> copysign(x, y)
-  if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
+  if (CanCombineFCOPYSIGN_EXTEND_ROUND(TLI, N))
     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
 
   return SDValue();
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -749,6 +749,10 @@
   bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
 
   void finalizeLowering(MachineFunction &MF) const override;
+
+  bool canCopySign(EVT ValueTy, EVT SignTy) const override {
+    return true;
+  }
 };
 
 namespace AArch64 {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -323,6 +323,10 @@
     return MVT::i32;
   }
 
+  bool canCopySign(EVT ValueTy, EVT SignTy) const override {
+    return SignTy != MVT::f128;
+  }
+
   AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override;
 
   bool SelectFlatOffset(bool IsSigned, SelectionDAG &DAG, SDNode *N,
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -805,6 +805,10 @@
 
     bool shouldConsiderGEPOffsetSplit() const override { return true; }
 
+    bool canCopySign(EVT ValueTy, EVT SignTy) const override {
+      return SignTy != MVT::f128;
+    }
+
     bool isUnsupportedFloatingType(EVT VT) const;
 
     SDValue getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, SDValue TrueVal,
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -276,6 +276,10 @@
       return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
     }
 
+    bool canCopySign(EVT ValueTy, EVT SignTy) const override {
+      return SignTy != MVT::f128;
+    }
+
     // Intrinsics
     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/Mips/MipsISelLowering.h b/llvm/lib/Target/Mips/MipsISelLowering.h
--- a/llvm/lib/Target/Mips/MipsISelLowering.h
+++ b/llvm/lib/Target/Mips/MipsISelLowering.h
@@ -687,6 +687,10 @@
       return true;
     }
 
+    bool canCopySign(EVT ValueTy, EVT SignTy) const override {
+      return SignTy != MVT::f128;
+    }
+
     /// Emit a sign-extension using sll/sra, seb, or seh appropriately.
     MachineBasicBlock *emitSignExtendToI32InReg(MachineInstr &MI,
                                                 MachineBasicBlock *BB,
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -946,6 +946,10 @@
                                                unsigned JTI,
                                                MCContext &Ctx) const override;
 
+    bool canCopySign(EVT ValueTy, EVT SignTy) const override {
+      return SignTy != MVT::f128;
+    }
+
   private:
     struct ReuseLoadInfo {
       SDValue Ptr;
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -524,6 +524,10 @@
     return true;
   }
 
+  bool canCopySign(EVT ValueTy, EVT SignTy) const override {
+    return SignTy != MVT::f128;
+  }
+
 private:
   const SystemZSubtarget &Subtarget;
 
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
@@ -66,12 +66,6 @@
 defm TRUNC : UnaryFP<ftrunc, "trunc", 0x8f, 0x9d>;
 defm NEAREST : UnaryFP<fnearbyint, "nearest", 0x90, 0x9e>;
 
-// DAGCombine oddly folds casts into the rhs of copysign. Unfold them.
-def : Pat<(fcopysign F64:$lhs, F32:$rhs),
-          (COPYSIGN_F64 F64:$lhs, (F64_PROMOTE_F32 F32:$rhs))>;
-def : Pat<(fcopysign F32:$lhs, F64:$rhs),
-          (COPYSIGN_F32 F32:$lhs, (F32_DEMOTE_F64 F64:$rhs))>;
-
 // WebAssembly doesn't expose inexact exceptions, so map frint to fnearbyint.
 def : Pat<(frint f32:$src), (NEAREST_F32 f32:$src)>;
 def : Pat<(frint f64:$src), (NEAREST_F64 f64:$src)>;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1135,6 +1135,10 @@
     /// math ops).
     bool shouldFormOverflowOp(unsigned Opcode, EVT VT) const override;
 
+    bool canCopySign(EVT ValueTy, EVT SignTy) const override {
+      return SignTy != MVT::f128;
+    }
+
     bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
                                       unsigned AddrSpace) const override {
       // If we can replace more than 2 scalar stores, there will be a reduction
diff --git a/llvm/test/CodeGen/RISCV/copysign-casts.ll b/llvm/test/CodeGen/RISCV/copysign-casts.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/copysign-casts.ll
@@ -0,0 +1,93 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32I-ILP32
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64I-LP64
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+f -mattr=+d \
+; RUN:   -target-abi ilp32d < %s | FileCheck %s -check-prefix=RV32I-ILP32D
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs -mattr=+f -mattr=+d \
+; RUN:   -target-abi lp64d < %s | FileCheck %s -check-prefix=RV64I-LP64D
+
+; Check that DAGCombiner only folds casts into the sign argument of copysign
+; when appropriate (i.e. when it would be expanded because we don't handle mixed
+; precision magnitude and sign arguments).
+
+declare double @llvm.copysign.f64(double, double)
+declare float @llvm.copysign.f32(float, float)
+
+define double @fold_promote(double %a, float %b) nounwind {
+; RV32I-ILP32-LABEL: fold_promote:
+; RV32I-ILP32:       # %bb.0:
+; RV32I-ILP32-NEXT:    lui a3, 524288
+; RV32I-ILP32-NEXT:    and a2, a2, a3
+; RV32I-ILP32-NEXT:    addi a3, a3, -1
+; RV32I-ILP32-NEXT:    and a1, a1, a3
+; RV32I-ILP32-NEXT:    or a1, a1, a2
+; RV32I-ILP32-NEXT:    ret
+;
+; RV64I-LP64-LABEL: fold_promote:
+; RV64I-LP64:       # %bb.0:
+; RV64I-LP64-NEXT:    addi a2, zero, -1
+; RV64I-LP64-NEXT:    slli a2, a2, 63
+; RV64I-LP64-NEXT:    addi a2, a2, -1
+; RV64I-LP64-NEXT:    and a0, a0, a2
+; RV64I-LP64-NEXT:    addi a2, zero, 1
+; RV64I-LP64-NEXT:    slli a2, a2, 31
+; RV64I-LP64-NEXT:    and a1, a1, a2
+; RV64I-LP64-NEXT:    slli a1, a1, 32
+; RV64I-LP64-NEXT:    or a0, a0, a1
+; RV64I-LP64-NEXT:    ret
+;
+; RV32I-ILP32D-LABEL: fold_promote:
+; RV32I-ILP32D:       # %bb.0:
+; RV32I-ILP32D-NEXT:    fcvt.d.s ft0, fa1
+; RV32I-ILP32D-NEXT:    fsgnj.d fa0, fa0, ft0
+; RV32I-ILP32D-NEXT:    ret
+;
+; RV64I-LP64D-LABEL: fold_promote:
+; RV64I-LP64D:       # %bb.0:
+; RV64I-LP64D-NEXT:    fcvt.d.s ft0, fa1
+; RV64I-LP64D-NEXT:    fsgnj.d fa0, fa0, ft0
+; RV64I-LP64D-NEXT:    ret
+  %c = fpext float %b to double
+  %t = call double @llvm.copysign.f64(double %a, double %c)
+  ret double %t
+}
+
+define float @fold_demote(float %a, double %b) nounwind {
+; RV32I-ILP32-LABEL: fold_demote:
+; RV32I-ILP32:       # %bb.0:
+; RV32I-ILP32-NEXT:    lui a1, 524288
+; RV32I-ILP32-NEXT:    and a2, a2, a1
+; RV32I-ILP32-NEXT:    addi a1, a1, -1
+; RV32I-ILP32-NEXT:    and a0, a0, a1
+; RV32I-ILP32-NEXT:    or a0, a0, a2
+; RV32I-ILP32-NEXT:    ret
+;
+; RV64I-LP64-LABEL: fold_demote:
+; RV64I-LP64:       # %bb.0:
+; RV64I-LP64-NEXT:    lui a2, 524288
+; RV64I-LP64-NEXT:    addiw a2, a2, -1
+; RV64I-LP64-NEXT:    and a0, a0, a2
+; RV64I-LP64-NEXT:    addi a2, zero, -1
+; RV64I-LP64-NEXT:    slli a2, a2, 63
+; RV64I-LP64-NEXT:    and a1, a1, a2
+; RV64I-LP64-NEXT:    srli a1, a1, 32
+; RV64I-LP64-NEXT:    or a0, a0, a1
+; RV64I-LP64-NEXT:    ret
+;
+; RV32I-ILP32D-LABEL: fold_demote:
+; RV32I-ILP32D:       # %bb.0:
+; RV32I-ILP32D-NEXT:    fcvt.s.d ft0, fa1
+; RV32I-ILP32D-NEXT:    fsgnj.s fa0, fa0, ft0
+; RV32I-ILP32D-NEXT:    ret
+;
+; RV64I-LP64D-LABEL: fold_demote:
+; RV64I-LP64D:       # %bb.0:
+; RV64I-LP64D-NEXT:    fcvt.s.d ft0, fa1
+; RV64I-LP64D-NEXT:    fsgnj.s fa0, fa0, ft0
+; RV64I-LP64D-NEXT:    ret
+  %c = fptrunc double %b to float
+  %t = call float @llvm.copysign.f32(float %a, float %c)
+  ret float %t
+}
diff --git a/llvm/test/CodeGen/WebAssembly/copysign-casts.ll b/llvm/test/CodeGen/WebAssembly/copysign-casts.ll
--- a/llvm/test/CodeGen/WebAssembly/copysign-casts.ll
+++ b/llvm/test/CodeGen/WebAssembly/copysign-casts.ll
@@ -1,7 +1,6 @@
 ; RUN: llc < %s -asm-verbose=false -wasm-keep-registers | FileCheck %s
 
-; DAGCombiner oddly folds casts into the rhs of copysign. Test that they get
-; unfolded.
+; Check that DAGCombiner does not fold casts into the sign argument of copysign.
 
 target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
 target triple = "wasm32-unknown-unknown"