Index: lib/Transforms/InstCombine/InstCombineCasts.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1442,10 +1442,19 @@
     case Intrinsic::round:
     case Intrinsic::nearbyint:
     case Intrinsic::trunc: {
+      Value *Src = II->getArgOperand(0);
+      if (!Src->hasOneUse())
+        break;
+
+      if (II->getIntrinsicID() != Intrinsic::fabs) {
+        FPExtInst *FPExtSrc = dyn_cast<FPExtInst>(Src);
+        if (!FPExtSrc || FPExtSrc->getOperand(0)->getType() != CI.getType())
+          break;
+      }
+
       // Do unary FP operation on smaller type.
       // (fptrunc (fabs x)) -> (fabs (fptrunc x))
-      Value *InnerTrunc = Builder->CreateFPTrunc(II->getArgOperand(0),
-                                                 CI.getType());
+      Value *InnerTrunc = Builder->CreateFPTrunc(Src, CI.getType());
       Type *IntrinsicType[] = { CI.getType() };
       Function *Overload = Intrinsic::getDeclaration(
         CI.getModule(), II->getIntrinsicID(), IntrinsicType);
Index: test/Transforms/InstCombine/double-float-shrink-2.ll
===================================================================
--- test/Transforms/InstCombine/double-float-shrink-2.ll
+++ test/Transforms/InstCombine/double-float-shrink-2.ll
@@ -1,32 +1,9 @@
-; RUN: opt < %s -instcombine -S -mtriple "i386-pc-linux" | FileCheck -check-prefix=DO-SIMPLIFY %s
-; RUN: opt < %s -instcombine -S -mtriple "i386-pc-win32" | FileCheck -check-prefix=DONT-SIMPLIFY %s
-; RUN: opt < %s -instcombine -S -mtriple "x86_64-pc-win32" | FileCheck -check-prefix=C89-SIMPLIFY %s
-; RUN: opt < %s -instcombine -S -mtriple "i386-pc-mingw32" | FileCheck -check-prefix=DO-SIMPLIFY %s
-; RUN: opt < %s -instcombine -S -mtriple "x86_64-pc-mingw32" | FileCheck -check-prefix=DO-SIMPLIFY %s
-; RUN: opt < %s -instcombine -S -mtriple "sparc-sun-solaris" | FileCheck -check-prefix=DO-SIMPLIFY %s
-
-; DO-SIMPLIFY: call float @llvm.floor.f32(
-; DO-SIMPLIFY: call float @llvm.ceil.f32(
-; DO-SIMPLIFY: call float @llvm.round.f32(
-; DO-SIMPLIFY: call float @llvm.nearbyint.f32(
-; DO-SIMPLIFY: call float @llvm.trunc.f32(
-; DO-SIMPLIFY: call float @llvm.fabs.f32(
-; DO-SIMPLIFY: call fast float @llvm.fabs.f32(
-
-; C89-SIMPLIFY: call float @llvm.floor.f32(
-; C89-SIMPLIFY: call float @llvm.ceil.f32(
-; C89-SIMPLIFY: call double @round(
-; C89-SIMPLIFY: call double @nearbyint(
-
-; DONT-SIMPLIFY: call float @llvm.floor.f32(
-; DONT-SIMPLIFY: call float @llvm.ceil.f32(
-; DONT-SIMPLIFY: call double @round(
-; DONT-SIMPLIFY: call double @nearbyint(
-; DONT-SIMPLIFY: call double @trunc(
-
-; This is replaced with the intrinsic, which does the right thing on
-; all platforms.
-; DONT-SIMPLIFY: call float @llvm.fabs.f32(
+; RUN: opt < %s -instcombine -S -mtriple "i386-pc-linux" | FileCheck -check-prefix=DO-SIMPLIFY -check-prefix=ALL %s
+; RUN: opt < %s -instcombine -S -mtriple "i386-pc-win32" | FileCheck -check-prefix=DONT-SIMPLIFY -check-prefix=ALL %s
+; RUN: opt < %s -instcombine -S -mtriple "x86_64-pc-win32" | FileCheck -check-prefix=C89-SIMPLIFY -check-prefix=ALL %s
+; RUN: opt < %s -instcombine -S -mtriple "i386-pc-mingw32" | FileCheck -check-prefix=DO-SIMPLIFY -check-prefix=ALL %s
+; RUN: opt < %s -instcombine -S -mtriple "x86_64-pc-mingw32" | FileCheck -check-prefix=DO-SIMPLIFY -check-prefix=ALL %s
+; RUN: opt < %s -instcombine -S -mtriple "sparc-sun-solaris" | FileCheck -check-prefix=DO-SIMPLIFY -check-prefix=ALL %s
 
 declare double @floor(double)
 declare double @ceil(double)
@@ -34,9 +11,19 @@
 declare double @nearbyint(double)
 declare double @trunc(double)
 declare double @fabs(double)
+
+declare double @llvm.floor.f64(double)
+declare double @llvm.ceil.f64(double)
+declare double @llvm.round.f64(double)
+declare double @llvm.nearbyint.f64(double)
+declare double @llvm.trunc.f64(double)
 declare double @llvm.fabs.f64(double)
 
-define float @test_floor(float %C) {
+; ALL-LABEL: @test_shrink_libcall_floor(
+; DO-SIMPLIFY: call float @llvm.floor.f32(
+; C89-SIMPLIFY: call float @llvm.floor.f32(
+; DONT-SIMPLIFY: call float @llvm.floor.f32(
+define float @test_shrink_libcall_floor(float %C) {
   %D = fpext float %C to double
   ; --> floorf
   %E = call double @floor(double %D)
@@ -44,7 +31,11 @@
   ret float %F
 }
 
-define float @test_ceil(float %C) {
+; ALL-LABEL: @test_shrink_libcall_ceil(
+; DO-SIMPLIFY: call float @llvm.ceil.f32(
+; C89-SIMPLIFY: call float @llvm.ceil.f32(
+; DONT-SIMPLIFY: call float @llvm.ceil.f32(
+define float @test_shrink_libcall_ceil(float %C) {
   %D = fpext float %C to double
   ; --> ceilf
   %E = call double @ceil(double %D)
@@ -52,7 +43,11 @@
   ret float %F
 }
 
-define float @test_round(float %C) {
+; ALL-LABEL: @test_shrink_libcall_round(
+; DO-SIMPLIFY: call float @llvm.round.f32(
+; C89-SIMPLIFY: call double @round(
+; DONT-SIMPLIFY: call double @round(
+define float @test_shrink_libcall_round(float %C) {
   %D = fpext float %C to double
   ; --> roundf
   %E = call double @round(double %D)
@@ -60,7 +55,11 @@
   ret float %F
 }
 
-define float @test_nearbyint(float %C) {
+; ALL-LABEL: @test_shrink_libcall_nearbyint(
+; DO-SIMPLIFY: call float @llvm.nearbyint.f32(
+; C89-SIMPLIFY: call double @nearbyint(
+; DONT-SIMPLIFY: call double @nearbyint(
+define float @test_shrink_libcall_nearbyint(float %C) {
   %D = fpext float %C to double
   ; --> nearbyintf
   %E = call double @nearbyint(double %D)
@@ -68,7 +67,10 @@
   ret float %F
 }
 
-define float @test_trunc(float %C) {
+; ALL-LABEL: @test_shrink_libcall_trunc(
+; DO-SIMPLIFY: call float @llvm.trunc.f32(
+; DONT-SIMPLIFY: call double @trunc(
+define float @test_shrink_libcall_trunc(float %C) {
   %D = fpext float %C to double
   ; --> truncf
   %E = call double @trunc(double %D)
@@ -76,7 +78,13 @@
   ret float %F
 }
 
-define float @test_fabs(float %C) {
+; ALL-LABEL: @test_shrink_libcall_fabs(
+; DO-SIMPLIFY: call float @llvm.fabs.f32(
+
+; This is replaced with the intrinsic, which does the right thing on
+; all platforms.
+; DONT-SIMPLIFY: call float @llvm.fabs.f32(
+define float @test_shrink_libcall_fabs(float %C) {
   %D = fpext float %C to double
   ; --> fabsf
   %E = call double @fabs(double %D)
@@ -85,10 +93,371 @@
 }
 
 ; Make sure fast math flags are preserved
-define float @test_fabs_fast(float %C) {
+; ALL-LABEL: @test_shrink_libcall_fabs_fast(
+; DO-SIMPLIFY: call fast float @llvm.fabs.f32(
+define float @test_shrink_libcall_fabs_fast(float %C) {
   %D = fpext float %C to double
   ; --> fabsf
   %E = call fast double @fabs(double %D)
   %F = fptrunc double %E to float
   ret float %F
 }
+
+; ALL-LABEL: @test_shrink_intrin_floor(
+; ALL: call float @llvm.floor.f32(
+define float @test_shrink_intrin_floor(float %C) {
+  %D = fpext float %C to double
+  ; --> floorf
+  %E = call double @llvm.floor.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_shrink_intrin_ceil(
+; ALL: call float @llvm.ceil.f32(
+define float @test_shrink_intrin_ceil(float %C) {
+  %D = fpext float %C to double
+  ; --> ceilf
+  %E = call double @llvm.ceil.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_shrink_intrin_round(
+; ALL: call float @llvm.round.f32(
+define float @test_shrink_intrin_round(float %C) {
+  %D = fpext float %C to double
+  ; --> roundf
+  %E = call double @llvm.round.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_shrink_intrin_nearbyint(
+; ALL: call float @llvm.nearbyint.f32(
+define float @test_shrink_intrin_nearbyint(float %C) {
+  %D = fpext float %C to double
+  ; --> nearbyintf
+  %E = call double @llvm.nearbyint.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_shrink_intrin_trunc(
+; ALL-SIMPLIFY: call float @llvm.trunc.f32(
+define float @test_shrink_intrin_trunc(float %C) {
+  %D = fpext float %C to double
+  %E = call double @llvm.trunc.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_shrink_intrin_fabs(
+; ALL: call float @llvm.fabs.f32(
+define float @test_shrink_intrin_fabs(float %C) {
+  %D = fpext float %C to double
+  %E = call double @llvm.fabs.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; Make sure fast math flags are preserved
+; ALL-LABEL: @test_shrink_intrin_fabs_fast(
+; ALL: call fast float @llvm.fabs.f32(
+define float @test_shrink_intrin_fabs_fast(float %C) {
+  %D = fpext float %C to double
+  %E = call fast double @llvm.fabs.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_no_shrink_intrin_floor(
+; ALL: call double @llvm.floor.f64(
+define float @test_no_shrink_intrin_floor(double %D) {
+  %E = call double @llvm.floor.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_no_shrink_intrin_ceil(
+; ALL: call double @llvm.ceil.f64(
+define float @test_no_shrink_intrin_ceil(double %D) {
+  %E = call double @llvm.ceil.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_no_shrink_intrin_round(
+; ALL: call double @llvm.round.f64(
+define float @test_no_shrink_intrin_round(double %D) {
+  %E = call double @llvm.round.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_no_shrink_intrin_nearbyint(
+; ALL: call double @llvm.nearbyint.f64(
+define float @test_no_shrink_intrin_nearbyint(double %D) {
+  %E = call double @llvm.nearbyint.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_no_shrink_intrin_trunc(
+; ALL-SIMPLIFY: call double @llvm.trunc.f64(
+define float @test_no_shrink_intrin_trunc(double %D) {
+  %E = call double @llvm.trunc.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_shrink_intrin_fabs_double_src(
+; ALL: call float @llvm.fabs.f32(
+define float @test_shrink_intrin_fabs_double_src(double %D) {
+  %E = call double @llvm.fabs.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; Make sure fast math flags are preserved
+; ALL-LABEL: @test_shrink_intrin_fabs_fast_double_src(
+; ALL: call fast float @llvm.fabs.f32(
+define float @test_shrink_intrin_fabs_fast_double_src(double %D) {
+  %E = call fast double @llvm.fabs.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_shrink_float_convertible_constant_intrin_floor(
+; ALL: ret float 2.000000e+00
+define float @test_shrink_float_convertible_constant_intrin_floor() {
+  %E = call double @llvm.floor.f64(double 2.1)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_shrink_float_convertible_constant_intrin_ceil(
+; ALL: ret float 3.000000e+00
+define float @test_shrink_float_convertible_constant_intrin_ceil() {
+  %E = call double @llvm.ceil.f64(double 2.1)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_shrink_float_convertible_constant_intrin_round(
+; ALL: ret float 2.000000e+00
+define float @test_shrink_float_convertible_constant_intrin_round() {
+  %E = call double @llvm.round.f64(double 2.1)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_shrink_float_convertible_constant_intrin_nearbyint(
+; ALL: ret float 2.000000e+00
+define float @test_shrink_float_convertible_constant_intrin_nearbyint() {
+  %E = call double @llvm.nearbyint.f64(double 2.1)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_shrink_float_convertible_constant_intrin_trunc(
+; ALL: ret float 2.000000e+00
+define float @test_shrink_float_convertible_constant_intrin_trunc() {
+  %E = call double @llvm.trunc.f64(double 2.1)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_shrink_float_convertible_constant_intrin_fabs(
+; ALL: ret float 0x4000CCCCC0000000
+define float @test_shrink_float_convertible_constant_intrin_fabs() {
+  %E = call double @llvm.fabs.f64(double 2.1)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; Make sure fast math flags are preserved
+; ALL-LABEL: @test_shrink_float_convertible_constant_intrin_fabs_fast(
+; ALL: ret float 0x4000CCCCC0000000
+define float @test_shrink_float_convertible_constant_intrin_fabs_fast() {
+  %E = call fast double @llvm.fabs.f64(double 2.1)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_no_shrink_mismatched_type_intrin_floor(
+; ALL-NEXT: %E = call double @llvm.floor.f64(double %D)
+; ALL-NEXT: %F = fptrunc double %E to half
+; ALL-NEXT: ret half %F
+define half @test_no_shrink_mismatched_type_intrin_floor(double %D) {
+  %E = call double @llvm.floor.f64(double %D)
+  %F = fptrunc double %E to half
+  ret half %F
+}
+
+; ALL-LABEL: @test_no_shrink_mismatched_type_intrin_ceil(
+; ALL-NEXT: %E = call double @llvm.ceil.f64(double %D)
+; ALL-NEXT: %F = fptrunc double %E to half
+; ALL-NEXT: ret half %F
+define half @test_no_shrink_mismatched_type_intrin_ceil(double %D) {
+  %E = call double @llvm.ceil.f64(double %D)
+  %F = fptrunc double %E to half
+  ret half %F
+}
+
+; ALL-LABEL: @test_no_shrink_mismatched_type_intrin_round(
+; ALL-NEXT: %E = call double @llvm.round.f64(double %D)
+; ALL-NEXT: %F = fptrunc double %E to half
+; ALL-NEXT: ret half %F
+define half @test_no_shrink_mismatched_type_intrin_round(double %D) {
+  %E = call double @llvm.round.f64(double %D)
+  %F = fptrunc double %E to half
+  ret half %F
+}
+
+; ALL-LABEL: @test_no_shrink_mismatched_type_intrin_nearbyint(
+; ALL-NEXT: %E = call double @llvm.nearbyint.f64(double %D)
+; ALL-NEXT: %F = fptrunc double %E to half
+; ALL-NEXT: ret half %F
+define half @test_no_shrink_mismatched_type_intrin_nearbyint(double %D) {
+  %E = call double @llvm.nearbyint.f64(double %D)
+  %F = fptrunc double %E to half
+  ret half %F
+}
+
+; ALL-LABEL: @test_no_shrink_mismatched_type_intrin_trunc(
+; ALL-NEXT: %E = call double @llvm.trunc.f64(double %D)
+; ALL-NEXT: %F = fptrunc double %E to half
+; ALL-NEXT: ret half %F
+define half @test_no_shrink_mismatched_type_intrin_trunc(double %D) {
+  %E = call double @llvm.trunc.f64(double %D)
+  %F = fptrunc double %E to half
+  ret half %F
+}
+
+; ALL-LABEL: @test_shrink_mismatched_type_intrin_fabs_double_src(
+; ALL-NEXT: %1 = fptrunc double %D to half
+; ALL-NEXT: %F = call half @llvm.fabs.f16(half %1)
+; ALL-NEXT: ret half %F
+define half @test_shrink_mismatched_type_intrin_fabs_double_src(double %D) {
+  %E = call double @llvm.fabs.f64(double %D)
+  %F = fptrunc double %E to half
+  ret half %F
+}
+
+; Make sure fast math flags are preserved
+; ALL-LABEL: @test_mismatched_type_intrin_fabs_fast_double_src(
+; ALL-NEXT: %1 = fptrunc double %D to half
+; ALL-NEXT: %F = call fast half @llvm.fabs.f16(half %1)
+; ALL-NEXT: ret half %F
+define half @test_mismatched_type_intrin_fabs_fast_double_src(double %D) {
+  %E = call fast double @llvm.fabs.f64(double %D)
+  %F = fptrunc double %E to half
+  ret half %F
+}
+
+; ALL-LABEL: @test_shrink_intrin_floor_fp16_src(
+; ALL-NEXT: %E = call half @llvm.floor.f16(half %C)
+; ALL-NEXT: %1 = fpext half %E to double
+; ALL-NEXT: %F = fptrunc double %1 to float
+define float @test_shrink_intrin_floor_fp16_src(half %C) {
+  %D = fpext half %C to double
+  %E = call double @llvm.floor.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_shrink_intrin_ceil_fp16_src(
+; ALL-NEXT: %E = call half @llvm.ceil.f16(half %C)
+; ALL-NEXT: %1 = fpext half %E to double
+; ALL-NEXT: %F = fptrunc double %1 to float
+; ALL-NEXT: ret float %F
+define float @test_shrink_intrin_ceil_fp16_src(half %C) {
+  %D = fpext half %C to double
+  %E = call double @llvm.ceil.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_shrink_intrin_round_fp16_src(
+; ALL-NEXT: %E = call half @llvm.round.f16(half %C)
+; ALL-NEXT: %1 = fpext half %E to double
+; ALL-NEXT: %F = fptrunc double %1 to float
+; ALL-NEXT: ret float %F
+define float @test_shrink_intrin_round_fp16_src(half %C) {
+  %D = fpext half %C to double
+  %E = call double @llvm.round.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_shrink_intrin_nearbyint_fp16_src(
+; ALL-NEXT: %E = call half @llvm.nearbyint.f16(half %C)
+; ALL-NEXT: %1 = fpext half %E to double
+; ALL-NEXT: %F = fptrunc double %1 to float
+; ALL-NEXT: ret float %F
+define float @test_shrink_intrin_nearbyint_fp16_src(half %C) {
+  %D = fpext half %C to double
+  %E = call double @llvm.nearbyint.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_shrink_intrin_trunc_fp16_src(
+; ALL-NEXT: %E = call half @llvm.trunc.f16(half %C)
+; ALL-NEXT: %1 = fpext half %E to double
+; ALL-NEXT: %F = fptrunc double %1 to float
+; ALL-NEXT: ret float %F
+define float @test_shrink_intrin_trunc_fp16_src(half %C) {
+  %D = fpext half %C to double
+  %E = call double @llvm.trunc.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_shrink_intrin_fabs_fp16_src(
+; ALL-NEXT: %E = call half @llvm.fabs.f16(half %C)
+; ALL-NEXT: %1 = fpext half %E to double
+; ALL-NEXT: %F = fptrunc double %1 to float
+; ALL-NEXT: ret float %F
+define float @test_shrink_intrin_fabs_fp16_src(half %C) {
+  %D = fpext half %C to double
+  %E = call double @llvm.fabs.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; Make sure fast math flags are preserved
+; ALL-LABEL: @test_shrink_intrin_fabs_fast_fp16_src(
+; ALL-NEXT: %E = call fast half @llvm.fabs.f16(half %C)
+; ALL-NEXT: %1 = fpext half %E to double
+; ALL-NEXT: %F = fptrunc double %1 to float
+; ALL-NEXT: ret float %F
+define float @test_shrink_intrin_fabs_fast_fp16_src(half %C) {
+  %D = fpext half %C to double
+  %E = call fast double @llvm.fabs.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_no_shrink_intrin_floor_multi_use_fpext(
+; ALL: %D = fpext half %C to double
+; ALL: call double @llvm.floor.f64
+define float @test_no_shrink_intrin_floor_multi_use_fpext(half %C) {
+  %D = fpext half %C to double
+  store volatile double %D, double* undef
+  %E = call double @llvm.floor.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}
+
+; ALL-LABEL: @test_no_shrink_intrin_fabs_multi_use_fpext(
+; ALL: %D = fpext half %C to double
+; ALL: call double @llvm.fabs.f64
+define float @test_no_shrink_intrin_fabs_multi_use_fpext(half %C) {
+  %D = fpext half %C to double
+  store volatile double %D, double* undef
+  %E = call double @llvm.fabs.f64(double %D)
+  %F = fptrunc double %E to float
+  ret float %F
+}