Index: lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -28,18 +28,6 @@
 
 #define DEBUG_TYPE "AMDGPUtti"
 
-static const int FullRateCost = TargetTransformInfo::TCC_Basic;
-static const int HalfRateCost = 2 * TargetTransformInfo::TCC_Basic;
-
-// TODO: The size is usually 8 bytes, but takes 4x as many cycles. Maybe should
-// be 2 or 4.
-static const int QuarterRateCost = 3 * TargetTransformInfo::TCC_Basic;
-
-// TODO: On some parts, normal fp64 operations are half rate, and others
-// quarter.
-static const int FP64RateCost = HalfRateCost;
-
-
 void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L,
                                             TTI::UnrollingPreferences &UP) {
   UP.Threshold = 300; // Twice the default.
@@ -102,31 +90,30 @@
   case Intrinsic::fma: {
     if (VT == MVT::f32 || VT == MVT::f16) {
       if (ST.hasFastFMAF32())
-        return FullRateCost;
+        return TargetTransformInfo::TCC_Basic;
 
-      return FP64RateCost;
+      return 3 * TargetTransformInfo::TCC_Basic;
     }
 
-    return QuarterRateCost;
+    return 3 * TargetTransformInfo::TCC_Basic;
   }
   case Intrinsic::floor: {
     if (VT == MVT::f32 || VT == MVT::f16)
-      return FullRateCost;
+      return TargetTransformInfo::TCC_Basic;
 
     if (ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS)
-      return FP64RateCost;
+      return 2 * TargetTransformInfo::TCC_Basic;
 
-    return getIntrinsicCost(ST, VT, Intrinsic::trunc) +
-      3 * FullRateCost + 3 * HalfRateCost;
+    return getIntrinsicCost(ST, VT, Intrinsic::trunc) + 7;
   }
   case Intrinsic::trunc: {
     if (VT == MVT::f32 || VT == MVT::f16)
-      return FullRateCost;
+      return TargetTransformInfo::TCC_Basic;
 
     if (ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS)
-      return FP64RateCost;
+      return 2 * TargetTransformInfo::TCC_Basic;
 
-    return 16 * FullRateCost + 1 * HalfRateCost;
+    return 15;
   }
   default:
     return -1;
@@ -160,10 +147,10 @@
   case ISD::FSUB:
   case ISD::FMUL:
     if (SLT == MVT::f64)
-      return LT.first * NElts * FP64RateCost;
+      return 2 * LT.first * NElts;
 
     if (SLT == MVT::f32 || SLT == MVT::f16)
-      return LT.first * NElts * FullRateCost;
+      return LT.first * NElts;
     break;
 
   case ISD::FDIV:
@@ -171,9 +158,10 @@
     // FIXME: frem should be handled separately. The fdiv in it is most of it,
     // but the current lowering is also not entirely correct.
     if (SLT == MVT::f64) {
-      int Cost = 4 * FP64RateCost + 7 * QuarterRateCost;
+      int Cost = 24;
+
       if (ST->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS)
-        Cost += 3 * FullRateCost;
+        Cost += 3;
 
       return LT.first * Cost * NElts;
     }
@@ -181,8 +169,7 @@
     // Assuming no fp32 denormals lowering
     if (SLT == MVT::f32 || SLT == MVT::f16) {
       assert(!ST->hasFP32Denormals() && "will change when supported");
-      int Cost = 7 * FullRateCost + 1 * QuarterRateCost;
-      return LT.first * NElts * Cost;
+      return 6 * LT.first * NElts;
     }
 
     break;
@@ -229,7 +216,7 @@
         return BaseT::getCastInstrCost(Opcode, Dst, Src);
       } else {
         // f32 -> i32 full rate instruction.
-        Cost += FullRateCost;
+        Cost += 1;
       }
     } else {
       assert(SSrcLT == MVT::f64);
@@ -239,12 +226,10 @@
         Cost += ::getIntrinsicCost(*ST, SSrcLT, Intrinsic::trunc);
         Cost += ::getIntrinsicCost(*ST, SSrcLT, Intrinsic::floor);
         Cost += ::getIntrinsicCost(*ST, SSrcLT, Intrinsic::fma);
-        Cost += 4 * FullRateCost;
-        Cost += 1 * QuarterRateCost;
-        Cost += 3 * FP64RateCost;
+        Cost += 6;
       } else {
         // f64 -> i32 half or quarter rate instruction.
-        Cost += FP64RateCost;
+        Cost += 2;
       }
     }
 
@@ -260,15 +245,19 @@
         return BaseT::getCastInstrCost(Opcode, Dst, Src);
       } else {
         // i32 -> f32 full rate instruction.
-        Cost = FullRateCost;
+        Cost = TargetTransformInfo::TCC_Basic;
       }
     } else {
       // i64 to f64 expansion
       if (SSrcLT == MVT::i64) {
-        Cost = 4 * FP64RateCost;
+        // [su]int_to_fp (half or full)
+        // uint_to_fp (half or full)
+        // ldexp (half or full)
+        // fadd (half or full)
+        Cost = 2 + 2 + 2 + 2;
       } else {
         // i32 -> f64 half or quarter rate instruction.
-        Cost = FP64RateCost;
+        Cost = 2;
       }
     }
 
Index: test/Analysis/CostModel/AMDGPU/fdiv.ll
===================================================================
--- test/Analysis/CostModel/AMDGPU/fdiv.ll
+++ test/Analysis/CostModel/AMDGPU/fdiv.ll
@@ -2,7 +2,7 @@
 ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=tahiti < %s | FileCheck -check-prefix=COMMON -check-prefix=SI %s
 
 ; CHECK: 'fdiv_f32'
-; COMMON: estimated cost of 10 for {{.*}} fdiv float
+; COMMON: estimated cost of 6 for {{.*}} fdiv float
 define void @fdiv_f32(float addrspace(1)* %out, float addrspace(1)* %vaddr, float %b) #0 {
   %vec = load float, float addrspace(1)* %vaddr
   %add = fdiv float %vec, %b
@@ -11,7 +11,7 @@
 }
 
 ; COMMON: 'fdiv_v2f32'
-; COMMON: estimated cost of 20 for {{.*}} fdiv <2 x float>
+; COMMON: estimated cost of 12 for {{.*}} fdiv <2 x float>
 define void @fdiv_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %vaddr, <2 x float> %b) #0 {
   %vec = load <2 x float>, <2 x float> addrspace(1)* %vaddr
   %add = fdiv <2 x float> %vec, %b
@@ -20,7 +20,7 @@
 }
 
 ; COMMON: 'fdiv_v3f32'
-; COMMON: estimated cost of 30 for {{.*}} fdiv <3 x float>
+; COMMON: estimated cost of 18 for {{.*}} fdiv <3 x float>
 define void @fdiv_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %vaddr, <3 x float> %b) #0 {
   %vec = load <3 x float>, <3 x float> addrspace(1)* %vaddr
   %add = fdiv <3 x float> %vec, %b
@@ -29,8 +29,8 @@
 }
 
 ; COMMON: 'fdiv_f64'
-; CI: estimated cost of 29 for {{.*}} fdiv double
-; SI: estimated cost of 32 for {{.*}} fdiv double
+; CI: estimated cost of 24 for {{.*}} fdiv double
+; SI: estimated cost of 27 for {{.*}} fdiv double
 define void @fdiv_f64(double addrspace(1)* %out, double addrspace(1)* %vaddr, double %b) #0 {
   %vec = load double, double addrspace(1)* %vaddr
   %add = fdiv double %vec, %b
@@ -39,8 +39,8 @@
 }
 
 ; COMMON: 'fdiv_v2f64'
-; CI: estimated cost of 58 for {{.*}} fdiv <2 x double>
-; SI: estimated cost of 64 for {{.*}} fdiv <2 x double>
+; CI: estimated cost of 48 for {{.*}} fdiv <2 x double>
+; SI: estimated cost of 54 for {{.*}} fdiv <2 x double>
 define void @fdiv_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %vaddr, <2 x double> %b) #0 {
   %vec = load <2 x double>, <2 x double> addrspace(1)* %vaddr
   %add = fdiv <2 x double> %vec, %b
@@ -49,8 +49,8 @@
 }
 
 ; COMMON: 'fdiv_v3f64'
-; CI: estimated cost of 87 for {{.*}} fdiv <3 x double>
-; SI: estimated cost of 96 for {{.*}} fdiv <3 x double>
+; CI: estimated cost of 72 for {{.*}} fdiv <3 x double>
+; SI: estimated cost of 81 for {{.*}} fdiv <3 x double>
 define void @fdiv_v3f64(<3 x double> addrspace(1)* %out, <3 x double> addrspace(1)* %vaddr, <3 x double> %b) #0 {
   %vec = load <3 x double>, <3 x double> addrspace(1)* %vaddr
   %add = fdiv <3 x double> %vec, %b
@@ -59,7 +59,7 @@
 }
 
 ; COMMON: 'fdiv_f16'
-; COMMON: estimated cost of 10 for {{.*}} fdiv half
+; COMMON: estimated cost of 6 for {{.*}} fdiv half
 define void @fdiv_f16(half addrspace(1)* %out, half addrspace(1)* %vaddr, half %b) #0 {
   %vec = load half, half addrspace(1)* %vaddr
   %add = fdiv half %vec, %b
@@ -68,7 +68,7 @@
 }
 
 ; COMMON: 'fdiv_v2f16'
-; COMMON: estimated cost of 20 for {{.*}} fdiv <2 x half>
+; COMMON: estimated cost of 12 for {{.*}} fdiv <2 x half>
 define void @fdiv_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %vaddr, <2 x half> %b) #0 {
   %vec = load <2 x half>, <2 x half> addrspace(1)* %vaddr
   %add = fdiv <2 x half> %vec, %b
@@ -77,7 +77,7 @@
 }
 
 ; COMMON: 'fdiv_v4f16'
-; COMMON: estimated cost of 40 for {{.*}} fdiv <4 x half>
+; COMMON: estimated cost of 24 for {{.*}} fdiv <4 x half>
 define void @fdiv_v4f16(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %vaddr, <4 x half> %b) #0 {
   %vec = load <4 x half>, <4 x half> addrspace(1)* %vaddr
   %add = fdiv <4 x half> %vec, %b
Index: test/Analysis/CostModel/AMDGPU/fptosi.ll
===================================================================
--- test/Analysis/CostModel/AMDGPU/fptosi.ll
+++ test/Analysis/CostModel/AMDGPU/fptosi.ll
@@ -35,8 +35,7 @@
 }
 
 ; COMMON: 'fptosi_f64_to_i64'
-; SI: estimated cost of 61 for {{.*}} fptosi double %val to i64
-; CI: estimated cost of 20 for {{.*}} fptosi double %val to i64
+; SI: estimated cost of 46 for {{.*}} fptosi double %val to i64
 define void @fptosi_f64_to_i64(i64 addrspace(1)* %out, double %val) #0 {
   %cvt = fptosi double %val to i64
   store i64 %cvt, i64 addrspace(1)* %out
@@ -44,8 +43,8 @@
 }
 
 ; COMMON: 'fptosi_v3f64_to_v3i64'
-; SI: estimated cost of 183 for {{.*}} fptosi <3 x double> %val to <3 x i64>
-; CI: estimated cost of 60 for {{.*}} fptosi <3 x double> %val to <3 x i64>
+; SI: estimated cost of 138 for {{.*}} fptosi <3 x double> %val to <3 x i64>
+; CI: estimated cost of 39 for {{.*}} fptosi <3 x double> %val to <3 x i64>
 define void @fptosi_v3f64_to_v3i64(<3 x i64> addrspace(1)* %out, <3 x double> %val) #0 {
   %cvt = fptosi <3 x double> %val to <3 x i64>
   store <3 x i64> %cvt, <3 x i64> addrspace(1)* %out