Index: lib/Transforms/InstCombine/InstCombineCalls.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1671,6 +1671,23 @@
 
     break;
   }
+  case Intrinsic::copysign: {
+    Value *ExtSrc0;
+    Value *ExtSrc1;
+
+    // copysign (fpext x), (fpext y) -> copysign x, y
+    if (match(II->getArgOperand(0), m_FPExt(m_Value(ExtSrc0))) &&
+        match(II->getArgOperand(1), m_FPExt(m_Value(ExtSrc1)))) {
+      Value *F = Intrinsic::getDeclaration(II->getModule(), II->getIntrinsicID(),
+                                           { ExtSrc0->getType() });
+      CallInst *NewCall = Builder->CreateCall(F, { ExtSrc0, ExtSrc1 });
+      NewCall->copyFastMathFlags(II);
+      NewCall->takeName(II);
+      return new FPExtInst(NewCall, II->getType());
+    }
+
+    break;
+  }
   case Intrinsic::ppc_altivec_lvx:
   case Intrinsic::ppc_altivec_lvxl:
     // Turn PPC lvx -> load if the pointer is known aligned.
Index: lib/Transforms/InstCombine/InstCombineCasts.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1416,8 +1416,26 @@
                                           OpBundles, II->getName());
       NewCI->copyFastMathFlags(II);
       return NewCI;
-    }
-    }
+  }
+  case Intrinsic::copysign: {
+    Type *Ty = CI.getType();
+    // Do binary FP operation on smaller type.
+    // (fptrunc (copysign x, y)) -> (copysign (fptrunc x), (fptrunc y))
+    Value *Trunc0 = Builder->CreateFPTrunc(II->getArgOperand(0), Ty);
+    Value *Trunc1 = Builder->CreateFPTrunc(II->getArgOperand(1), Ty);
+
+    Function *Overload = Intrinsic::getDeclaration(
+      CI.getModule(), II->getIntrinsicID(), Ty);
+
+    SmallVector<OperandBundleDef, 1> OpBundles;
+    II->getOperandBundlesAsDefs(OpBundles);
+
+    CallInst *NewCI =  CallInst::Create(Overload, { Trunc0, Trunc1 },
+                                        OpBundles, II->getName());
+    NewCI->copyFastMathFlags(II);
+    return NewCI;
+  }
+  }
   }
 
   return nullptr;
Index: lib/Transforms/Utils/SimplifyLibCalls.cpp
===================================================================
--- lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -962,6 +962,21 @@
   return NewCall;
 }
 
+// Replace a libcall \p CI with a call to intrinsic \p IID
+static Value *replaceBinaryCall(CallInst *CI, IRBuilder<> &B, Intrinsic::ID IID) {
+  // Propagate fast-math flags from the existing call to the new call.
+  IRBuilder<>::FastMathFlagGuard Guard(B);
+  B.setFastMathFlags(CI->getFastMathFlags());
+
+  Module *M = CI->getModule();
+  Value *V0 = CI->getArgOperand(0);
+  Value *V1 = CI->getArgOperand(1);
+  Function *F = Intrinsic::getDeclaration(M, IID, CI->getType());
+  CallInst *NewCall = B.CreateCall(F, { V0, V1 });
+  NewCall->takeName(CI);
+  return NewCall;
+}
+
 /// Shrink double -> float for binary functions like 'fmin/fmax'.
 static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) {
   Function *Callee = CI->getCalledFunction();
@@ -2171,9 +2186,7 @@
         return optimizeUnaryDoubleFP(CI, Builder, true);
       return nullptr;
     case LibFunc::copysign:
-      if (hasFloatVersion(FuncName))
-        return optimizeBinaryDoubleFP(CI, Builder);
-      return nullptr;
+      return replaceBinaryCall(CI, Builder, Intrinsic::copysign);
     case LibFunc::fminf:
     case LibFunc::fmin:
     case LibFunc::fminl:
Index: test/Transforms/InstCombine/copysign.ll
===================================================================
--- test/Transforms/InstCombine/copysign.ll
+++ test/Transforms/InstCombine/copysign.ll
@@ -45,5 +45,81 @@
   ret double %x
 }
 
+; CHECK-LABEL: @reduce_precision(
+; CHECK: %copysign = call float @llvm.copysign.f32(float %x, float %y)
+; CHECK-NEXT: ret float %copysign
+define float @reduce_precision(float %x, float %y) {
+  %x.ext = fpext float %x to double
+  %y.ext = fpext float %y to double
+  %copysign = call double @llvm.copysign.f64(double %x.ext, double %y.ext)
+  %trunc = fptrunc double %copysign to float
+  ret float %trunc
+}
+
+; CHECK-LABEL: @reduce_precision_fmf(
+; CHECK: %copysign = call nnan float @llvm.copysign.f32(float %x, float %y)
+; CHECK-NEXT: ret float %copysign
+define float @reduce_precision_fmf(float %x, float %y) {
+  %x.ext = fpext float %x to double
+  %y.ext = fpext float %y to double
+  %copysign = call nnan double @llvm.copysign.f64(double %x.ext, double %y.ext)
+  %trunc = fptrunc double %copysign to float
+  ret float %trunc
+}
+
+; CHECK-LABEL: @reduce_precision_constant(
+; CHECK: %trunc = call float @llvm.copysign.f32(float %x, float 4.000000e+00)
+; CHECK-NEXT: ret float %trunc
+define float @reduce_precision_constant(float %x) {
+  %x.ext = fpext float %x to double
+  %copysign = call double @llvm.copysign.f64(double %x.ext, double 4.0)
+  %trunc = fptrunc double %copysign to float
+  ret float %trunc
+}
+
+; CHECK-LABEL: @reduce_precision_multi_use_src0(
+; CHECK: %x.ext = fpext float %x to double
+; CHECK-NEXT: %copysign = call float @llvm.copysign.f32(float %x, float %y)
+; CHECK-NEXT: store volatile double %x.ext,
+; CHECK-NEXT: ret float %copysign
+define float @reduce_precision_multi_use_src0(float %x, float %y) {
+  %x.ext = fpext float %x to double
+  %y.ext = fpext float %y to double
+  %copysign = call double @llvm.copysign.f64(double %x.ext, double %y.ext)
+  %trunc = fptrunc double %copysign to float
+  store volatile double %x.ext, double* undef
+  ret float %trunc
+}
+
+; CHECK-LABEL: @reduce_precision_multi_use_src1(
+; CHECK: %y.ext = fpext float %y to double
+; CHECK-NEXT: %copysign = call float @llvm.copysign.f32(float %x, float %y)
+; CHECK-NEXT: store volatile double %y.ext
+; CHECK-NEXT: ret float %copysign
+define float @reduce_precision_multi_use_src1(float %x, float %y) {
+  %x.ext = fpext float %x to double
+  %y.ext = fpext float %y to double
+  %copysign = call double @llvm.copysign.f64(double %x.ext, double %y.ext)
+  %trunc = fptrunc double %copysign to float
+  store volatile double %y.ext, double* undef
+  ret float %trunc
+}
+
+; CHECK-LABEL: @reduce_precision_multi_use_src0_src1(
+; CHECK-NEXT: %x.ext = fpext float %x to double
+; CHECK-NEXT: %y.ext = fpext float %y to double
+; CHECK-NEXT: %copysign = call float @llvm.copysign.f32(float %x, float %y)
+; CHECK-NEXT: store volatile double %x.ext,
+; CHECK-NEXT: store volatile double %y.ext,
+; CHECK-NEXT: ret float %copysign
+define float @reduce_precision_multi_use_src0_src1(float %x, float %y) {
+  %x.ext = fpext float %x to double
+  %y.ext = fpext float %y to double
+  %copysign = call double @llvm.copysign.f64(double %x.ext, double %y.ext)
+  %trunc = fptrunc double %copysign to float
+  store volatile double %x.ext, double* undef
+  store volatile double %y.ext, double* undef
+  ret float %trunc
+}
 
 attributes #0 = { nounwind readnone }
Index: test/Transforms/InstCombine/float-shrink-compare.ll
===================================================================
--- test/Transforms/InstCombine/float-shrink-compare.ll
+++ test/Transforms/InstCombine/float-shrink-compare.ll
@@ -223,16 +223,16 @@
 }
 
 define i32 @test19(float %x, float %y, float %z) nounwind uwtable {
-  %1 = fpext float %x to double
-  %2 = fpext float %y to double
-  %3 = call double @copysign(double %1, double %2) nounwind
-  %4 = fpext float %z to double
-  %5 = fcmp oeq double %3, %4
-  %6 = zext i1 %5 to i32
-  ret i32 %6
+  %x.ext = fpext float %x to double
+  %y.ext = fpext float %y to double
+  %copysign = call double @copysign(double %x.ext, double %y.ext) nounwind
+  %z.ext = fpext float %z to double
+  %cmp = fcmp oeq double %copysign, %z.ext
+  %cmp.ext = zext i1 %cmp to i32
+  ret i32 %cmp.ext
 ; CHECK-LABEL: @test19(
-; CHECK-NEXT: %copysignf = call float @copysignf(float %x, float %y)
-; CHECK-NEXT: fcmp oeq float %copysignf, %z
+; CHECK-NEXT: %copysign = call float @llvm.copysign.f32(float %x, float %y)
+; CHECK-NEXT: fcmp oeq float %copysign, %z
 }
 
 define i32 @test20(float %x, float %y) nounwind uwtable {