Index: lib/Analysis/InlineCost.cpp
===================================================================
--- lib/Analysis/InlineCost.cpp
+++ lib/Analysis/InlineCost.cpp
@@ -700,6 +700,23 @@
   // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere.
   disableSROA(I.getOperand(0));
 
+  // If this is a floating-point cast, and the target says this operation
+  // is expensive or the function has the "use-soft-float" attribute, this may
+  // eventually become a library call. Treat the cost as such.
+  switch (I.getOpcode()) {
+  case Instruction::FPTrunc:
+  case Instruction::FPExt:
+  case Instruction::UIToFP:
+  case Instruction::SIToFP:
+  case Instruction::FPToUI:
+  case Instruction::FPToSI:
+    if (TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive ||
+        (F.getFnAttribute("use-soft-float").getValueAsString() == "true"))
+      Cost += InlineConstants::CallPenalty;
+  default:
+    break;
+  }
+
   return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I);
 }
 
@@ -1078,6 +1095,15 @@
   disableSROA(LHS);
   disableSROA(RHS);
 
+  // If the instruction is floating point, and the target says this operation
+  // is expensive or the function has the "use-soft-float" attribute, this may
+  // eventually become a library call. Treat the cost as such.
+  if (I.getType()->isFloatingPointTy()) {
+    if (TTI.getFPOpCost(I.getType()) == TargetTransformInfo::TCC_Expensive ||
+        (F.getFnAttribute("use-soft-float").getValueAsString() == "true"))
+      Cost += InlineConstants::CallPenalty;
+  }
+
   return false;
 }
 
@@ -1547,17 +1573,6 @@
     if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
       ++NumVectorInstructions;
 
-    // If the instruction is floating point, and the target says this operation
-    // is expensive or the function has the "use-soft-float" attribute, this may
-    // eventually become a library call. Treat the cost as such.
-    if (I->getType()->isFloatingPointTy()) {
-      // If the function has the "use-soft-float" attribute, mark it as
-      // expensive.
-      if (TTI.getFPOpCost(I->getType()) == TargetTransformInfo::TCC_Expensive ||
-          (F.getFnAttribute("use-soft-float").getValueAsString() == "true"))
-        Cost += InlineConstants::CallPenalty;
-    }
-
     // If the instruction simplified to a constant, there is no cost to this
     // instruction. Visit the instructions using our InstVisitor to account for
     // all of the per-instruction logic. The visit tree returns true if we
Index: test/Transforms/Inline/inline-fp.ll
===================================================================
--- test/Transforms/Inline/inline-fp.ll
+++ test/Transforms/Inline/inline-fp.ll
@@ -1,100 +1,42 @@
-; RUN: opt -S -inline < %s | FileCheck %s
-; RUN: opt -S -passes='cgscc(inline)' < %s | FileCheck %s
+; RUN: opt -S -inline -pass-remarks=.* -pass-remarks-missed=.* < %s 2>&1 | FileCheck %s
+; RUN: opt -S -passes='cgscc(inline)' -pass-remarks=.* -pass-remarks-missed=.* < %s 2>&1 | FileCheck %s
 ; Make sure that soft float implementations are calculated as being more expensive
 ; to the inliner.
 
-define i32 @test_nofp() #0 {
-; f_nofp() has the "use-soft-float" attribute, so it should never get inlined.
-; CHECK-LABEL: test_nofp
-; CHECK: call float @f_nofp 
-entry:
-  %responseX = alloca i32, align 4
-  %responseY = alloca i32, align 4
-  %responseZ = alloca i32, align 4
-  %valueX = alloca i8, align 1
-  %valueY = alloca i8, align 1
-  %valueZ = alloca i8, align 1
-
-  call void @getX(i32* %responseX, i8* %valueX)
-  call void @getY(i32* %responseY, i8* %valueY)
-  call void @getZ(i32* %responseZ, i8* %valueZ)
-
-  %0 = load i32, i32* %responseX
-  %1 = load i8, i8* %valueX
-  %call = call float @f_nofp(i32 %0, i8 zeroext %1)
-  %2 = load i32, i32* %responseZ
-  %3 = load i8, i8* %valueZ
-  %call2 = call float @f_nofp(i32 %2, i8 zeroext %3)
-  %call3 = call float @fabsf(float %call)
-  %cmp = fcmp ogt float %call3, 0x3FC1EB8520000000
-  br i1 %cmp, label %if.end12, label %if.else
-
-if.else:                                          ; preds = %entry
-  %4 = load i32, i32* %responseY
-  %5 = load i8, i8* %valueY
-  %call1 = call float @f_nofp(i32 %4, i8 zeroext %5)
-  %call4 = call float @fabsf(float %call1)
-  %cmp5 = fcmp ogt float %call4, 0x3FC1EB8520000000
-  br i1 %cmp5, label %if.end12, label %if.else7
+; CHECK-DAG: f_nofp not inlined into test_nofp because too costly to inline (cost=125, threshold=75)
+; CHECK-DAG: f_nofp not inlined into test_nofp because too costly to inline (cost=125, threshold=75)
+; CHECK-DAG: f_nofp_cheap inlined into test_nofp_cheap with cost=-15 (threshold=75)
+; CHECK-DAG: f_nofp_cheap inlined into test_nofp_cheap with cost=-15015 (threshold=75)
+; CHECK-DAG: f_hasfp inlined into test_hasfp with cost=0 (threshold=75)
+; CHECK-DAG: f_hasfp inlined into test_hasfp with cost=-15000 (threshold=75)
 
-if.else7:                                         ; preds = %if.else
-  %call8 = call float @fabsf(float %call2)
-  %cmp9 = fcmp ogt float %call8, 0x3FC1EB8520000000
-  br i1 %cmp9, label %if.then10, label %if.end12
 
-if.then10:                                        ; preds = %if.else7
-  br label %if.end12
+define i32 @test_nofp(i32 %a, i8 %b, i32 %c, i8 %d) #0 {
+; f_nofp() has the "use-soft-float" attribute, so it should never get inlined.
+; CHECK-LABEL: @test_nofp
+; CHECK: call float @f_nofp
+  %call = call float @f_nofp(i32 %a, i8 zeroext %b)
+  %call2 = call float @f_nofp(i32 %c, i8 zeroext %d)
+  ret i32 0
+}
 
-if.end12:                                         ; preds = %if.else, %entry, %if.then10, %if.else7
-  %success.0 = phi i32 [ 0, %if.then10 ], [ 1, %if.else7 ], [ 0, %entry ], [ 0, %if.else ]
-  ret i32 %success.0
+define i32 @test_nofp_cheap(i32 %a, i8 %b, i32 %c, i8 %d) #0 {
+; f_nofp() has the "use-soft-float" attribute, but doesn't contain any
+; expensive ops, so it should get inlined
+; CHECK-LABEL: @test_nofp_cheap
+; CHECK: zext i8 %b
+  %call = call float @f_nofp_cheap(i32 %a, i8 zeroext %b)
+  %call2 = call float @f_nofp_cheap(i32 %c, i8 zeroext %d)
+  ret i32 0
 }
 
-define i32 @test_hasfp() #0 {
+define i32 @test_hasfp(i32 %a, i8 %b, i32 %c, i8 %d) #1 {
 ; f_hasfp()  does not have the "use-soft-float" attribute, so it should get inlined.
-; CHECK-LABEL: test_hasfp
-; CHECK-NOT: call float @f_hasfp 
-entry:
-  %responseX = alloca i32, align 4
-  %responseY = alloca i32, align 4
-  %responseZ = alloca i32, align 4
-  %valueX = alloca i8, align 1
-  %valueY = alloca i8, align 1
-  %valueZ = alloca i8, align 1
-
-  call void @getX(i32* %responseX, i8* %valueX)
-  call void @getY(i32* %responseY, i8* %valueY)
-  call void @getZ(i32* %responseZ, i8* %valueZ)
-
-  %0 = load i32, i32* %responseX
-  %1 = load i8, i8* %valueX
-  %call = call float @f_hasfp(i32 %0, i8 zeroext %1)
-  %2 = load i32, i32* %responseZ
-  %3 = load i8, i8* %valueZ
-  %call2 = call float @f_hasfp(i32 %2, i8 zeroext %3)
-  %call3 = call float @fabsf(float %call)
-  %cmp = fcmp ogt float %call3, 0x3FC1EB8520000000
-  br i1 %cmp, label %if.end12, label %if.else
-
-if.else:                                          ; preds = %entry
-  %4 = load i32, i32* %responseY
-  %5 = load i8, i8* %valueY
-  %call1 = call float @f_hasfp(i32 %4, i8 zeroext %5)
-  %call4 = call float @fabsf(float %call1)
-  %cmp5 = fcmp ogt float %call4, 0x3FC1EB8520000000
-  br i1 %cmp5, label %if.end12, label %if.else7
-
-if.else7:                                         ; preds = %if.else
-  %call8 = call float @fabsf(float %call2)
-  %cmp9 = fcmp ogt float %call8, 0x3FC1EB8520000000
-  br i1 %cmp9, label %if.then10, label %if.end12
-
-if.then10:                                        ; preds = %if.else7
-  br label %if.end12
-
-if.end12:                                         ; preds = %if.else, %entry, %if.then10, %if.else7
-  %success.0 = phi i32 [ 0, %if.then10 ], [ 1, %if.else7 ], [ 0, %entry ], [ 0, %if.else ]
-  ret i32 %success.0
+; CHECK-LABEL: @test_hasfp
+; CHECK: zext i8 %b
+  %call = call float @f_hasfp(i32 %a, i8 zeroext %b)
+  %call2 = call float @f_hasfp(i32 %c, i8 zeroext %d)
+  ret i32 0
 }
 
 declare void @getX(i32*, i8*) #0
@@ -129,6 +71,18 @@
   ret float %div
 }
 
+define internal float @f_nofp_cheap(i32 %response, i8 zeroext %value1) #1 {
+entry:
+  %conv = zext i8 %value1 to i32
+  %sub = add nsw i32 %conv, -1
+  %conv1 = bitcast i32 %sub to float
+  %conv2 = bitcast i32 %response to float
+  %0 = tail call float @llvm.pow.f32(float %conv2, float %conv1)
+  %1 = tail call float @llvm.pow.f32(float %0, float %0)
+  %2 = tail call float @llvm.pow.f32(float %1, float %1)
+  ret float %2
+}
+
 declare float @fabsf(float) optsize minsize
 
 declare float @llvm.pow.f32(float, float) optsize minsize