Index: lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
===================================================================
--- lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -68,6 +68,8 @@
     TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
     TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
 
+  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
+
   unsigned getCFInstrCost(unsigned Opcode);
 
   unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
Index: lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
===================================================================
--- lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -83,6 +83,43 @@
   return 64;
 }
 
+// TODO: Implement getIntrinsicInstrCost
+static int getIntrinsicCost(const AMDGPUSubtarget &ST, MVT::SimpleValueType VT,
+                            unsigned IID) {
+  switch (IID) {
+  case Intrinsic::fma: {
+    if (VT == MVT::f32 || VT == MVT::f16) {
+      if (ST.hasFastFMAF32())
+        return TargetTransformInfo::TCC_Basic;
+
+      return 3 * TargetTransformInfo::TCC_Basic;
+    }
+
+    return 3 * TargetTransformInfo::TCC_Basic;
+  }
+  case Intrinsic::floor: {
+    if (VT == MVT::f32 || VT == MVT::f16)
+      return TargetTransformInfo::TCC_Basic;
+
+    if (ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS)
+      return 2 * TargetTransformInfo::TCC_Basic;
+
+    return getIntrinsicCost(ST, VT, Intrinsic::trunc) + 7;
+  }
+  case Intrinsic::trunc: {
+    if (VT == MVT::f32 || VT == MVT::f16)
+      return TargetTransformInfo::TCC_Basic;
+
+    if (ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS)
+      return 2 * TargetTransformInfo::TCC_Basic;
+
+    return 15;
+  }
+  default:
+    return -1;
+  }
+}
+
 int AMDGPUTTIImpl::getArithmeticInstrCost(
     unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
     TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
@@ -144,6 +181,95 @@
                                        Opd1PropInfo, Opd2PropInfo);
 }
 
+int AMDGPUTTIImpl::getCastInstrCost(unsigned Opcode,
+                                    Type *Dst, Type *Src) {
+  if (Opcode != Instruction::FPToSI &&
+      Opcode != Instruction::FPToUI &&
+      Opcode != Instruction::SIToFP &&
+      Opcode != Instruction::UIToFP)
+    return BaseT::getCastInstrCost(Opcode, Dst, Src);
+
+  EVT SrcTy = TLI->getValueType(DL, Src);
+  EVT DstTy = TLI->getValueType(DL, Dst);
+
+  if (!SrcTy.isSimple() || !DstTy.isSimple())
+    return BaseT::getCastInstrCost(Opcode, Dst, Src);
+
+  std::pair<int, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src);
+  std::pair<int, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst);
+  assert(SrcLT.first == DstLT.first);
+
+  unsigned NElts = SrcLT.second.isVector() ?
+    SrcLT.second.getVectorNumElements() : 1;
+
+  MVT::SimpleValueType SSrcLT = SrcLT.second.getScalarType().SimpleTy;
+  MVT::SimpleValueType SDstLT = DstLT.second.getScalarType().SimpleTy;
+
+  switch (Opcode) {
+  case Instruction::FPToSI:
+  case Instruction::FPToUI: {
+    int Cost = 0;
+    if (SSrcLT == MVT::f32 || SSrcLT == MVT::f16) {
+      if (SDstLT == MVT::i64) {
+        // f32 -> i64 expansion.
+        // FIXME: This expansion not yet implemented.
+        return BaseT::getCastInstrCost(Opcode, Dst, Src);
+      } else {
+        // f32 -> i32 full rate instruction.
+        Cost += 1;
+      }
+    } else {
+      assert(SSrcLT == MVT::f64);
+
+      if (SDstLT == MVT::i64) {
+        // f64 -> i64 expansion.
+        Cost += ::getIntrinsicCost(*ST, SSrcLT, Intrinsic::trunc);
+        Cost += ::getIntrinsicCost(*ST, SSrcLT, Intrinsic::floor);
+        Cost += ::getIntrinsicCost(*ST, SSrcLT, Intrinsic::fma);
+        Cost += 6;
+      } else {
+        // f64 -> i32 half or quarter rate instruction.
+        Cost += 2;
+      }
+    }
+
+    return NElts * SrcLT.first * Cost;
+  }
+  case Instruction::SIToFP:
+  case Instruction::UIToFP: {
+    int Cost = 0;
+    if (SDstLT == MVT::f32 || SDstLT == MVT::f16) {
+      if (SSrcLT == MVT::i64) {
+        // i64 -> f32 expansion.
+        // FIXME: This expansion not yet implemented.
+        return BaseT::getCastInstrCost(Opcode, Dst, Src);
+      } else {
+        // i32 -> f32 full rate instruction.
+        Cost = TargetTransformInfo::TCC_Basic;
+      }
+    } else {
+      // i64 to f64 expansion
+      if (SSrcLT == MVT::i64) {
+        // [su]int_to_fp (half or full)
+        // uint_to_fp (half or full)
+        // ldexp (half or full)
+        // fadd (half or full)
+        Cost = 2 + 2 + 2 + 2;
+      } else {
+        // i32 -> f64 half or quarter rate instruction.
+        Cost = 2;
+      }
+    }
+
+    return NElts * SrcLT.first * Cost;
+  }
+  default:
+    break;
+  }
+
+  return BaseT::getCastInstrCost(Opcode, Dst, Src);
+}
+
 unsigned AMDGPUTTIImpl::getCFInstrCost(unsigned Opcode) {
   // XXX - For some reason this isn't called for switch.
   switch (Opcode) {
Index: test/Analysis/CostModel/AMDGPU/fptosi.ll
===================================================================
--- /dev/null
+++ test/Analysis/CostModel/AMDGPU/fptosi.ll
@@ -0,0 +1,71 @@
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-unknown -mcpu=tahiti < %s | FileCheck -check-prefix=COMMON -check-prefix=SI %s
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri < %s | FileCheck -check-prefix=COMMON -check-prefix=CI %s
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji < %s | FileCheck -check-prefix=COMMON -check-prefix=CI %s
+
+; COMMON: 'fptosi_f32_to_i32'
+; COMMON: estimated cost of 1 for {{.*}} fptosi float %val to i32
+define void @fptosi_f32_to_i32(i32 addrspace(1)* %out, float %val) #0 {
+  %cvt = fptosi float %val to i32
+  store i32 %cvt, i32 addrspace(1)* %out
+  ret void
+}
+
+; COMMON: 'fptosi_v32f32_to_v32i32'
+; COMMON: estimated cost of 32 for {{.*}} fptosi <32 x float> %val to <32 x i32>
+define void @fptosi_v32f32_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x float> %val) #0 {
+  %cvt = fptosi <32 x float> %val to <32 x i32>
+  store <32 x i32> %cvt, <32 x i32> addrspace(1)* %out
+  ret void
+}
+
+; COMMON: 'fptosi_f64_to_i32'
+; COMMON: estimated cost of 2 for {{.*}} fptosi double %val to i32
+define void @fptosi_f64_to_i32(i32 addrspace(1)* %out, double %val) #0 {
+  %cvt = fptosi double %val to i32
+  store i32 %cvt, i32 addrspace(1)* %out
+  ret void
+}
+
+; COMMON: 'fptosi_f32_to_i64'
+; COMMON: estimated cost of 1 for {{.*}} fptosi float %val to i64
+define void @fptosi_f32_to_i64(i64 addrspace(1)* %out, float %val) #0 {
+  %cvt = fptosi float %val to i64
+  store i64 %cvt, i64 addrspace(1)* %out
+  ret void
+}
+
+; COMMON: 'fptosi_f64_to_i64'
+; SI: estimated cost of 46 for {{.*}} fptosi double %val to i64
+define void @fptosi_f64_to_i64(i64 addrspace(1)* %out, double %val) #0 {
+  %cvt = fptosi double %val to i64
+  store i64 %cvt, i64 addrspace(1)* %out
+  ret void
+}
+
+; COMMON: 'fptosi_v3f64_to_v3i64'
+; SI: estimated cost of 138 for {{.*}} fptosi <3 x double> %val to <3 x i64>
+; CI: estimated cost of 39 for {{.*}} fptosi <3 x double> %val to <3 x i64>
+define void @fptosi_v3f64_to_v3i64(<3 x i64> addrspace(1)* %out, <3 x double> %val) #0 {
+  %cvt = fptosi <3 x double> %val to <3 x i64>
+  store <3 x i64> %cvt, <3 x i64> addrspace(1)* %out
+  ret void
+}
+
+; COMMON: 'fptosi_f16_to_i32'
+; COMMON: estimated cost of 1 for {{.*}} fptosi half %val to i32
+define void @fptosi_f16_to_i32(i32 addrspace(1)* %out, half %val) #0 {
+  %cvt = fptosi half %val to i32
+  store i32 %cvt, i32 addrspace(1)* %out
+  ret void
+}
+
+; FIXME: Should probably be a bit higher
+; COMMON: 'fptosi_f16_to_i64'
+; COMMON: estimated cost of 1 for {{.*}} fptosi half %val to i64
+define void @fptosi_f16_to_i64(i64 addrspace(1)* %out, half %val) #0 {
+  %cvt = fptosi half %val to i64
+  store i64 %cvt, i64 addrspace(1)* %out
+  ret void
+}
+
+attributes #0 = { nounwind }
Index: test/Analysis/CostModel/AMDGPU/sitofp.ll
===================================================================
--- /dev/null
+++ test/Analysis/CostModel/AMDGPU/sitofp.ll
@@ -0,0 +1,51 @@
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefix=COMMON %s
+
+; COMMON: 'sitofp_i32_to_f32'
+; COMMON: estimated cost of 1 for {{.*}} sitofp i32 %val to float
+define void @sitofp_i32_to_f32(float addrspace(1)* %out, i32 %val) #0 {
+  %cvt = sitofp i32 %val to float
+  store float %cvt, float addrspace(1)* %out
+  ret void
+}
+
+; COMMON: 'sitofp_v32i32_to_v32f32'
+; COMMON: estimated cost of 32 for {{.*}} sitofp <32 x i32> %val to <32 x float>
+define void @sitofp_v32i32_to_v32f32(<32 x float> addrspace(1)* %out, <32 x i32> %val) #0 {
+  %cvt = sitofp <32 x i32> %val to <32 x float>
+  store <32 x float> %cvt, <32 x float> addrspace(1)* %out
+  ret void
+}
+
+; COMMON: 'sitofp_i64_to_f32'
+; COMMON: estimated cost of 1 for {{.*}} sitofp i64 %val to float
+define void @sitofp_i64_to_f32(float addrspace(1)* %out, i64 %val) #0 {
+  %cvt = sitofp i64 %val to float
+  store float %cvt, float addrspace(1)* %out
+  ret void
+}
+
+; COMMON: 'sitofp_i32_to_f64'
+; COMMON: estimated cost of 2 for {{.*}} sitofp i32 %val to double
+define void @sitofp_i32_to_f64(double addrspace(1)* %out, i32 %val) #0 {
+  %cvt = sitofp i32 %val to double
+  store double %cvt, double addrspace(1)* %out
+  ret void
+}
+
+; COMMON: 'sitofp_i64_to_f64'
+; COMMON: estimated cost of 8 for {{.*}} sitofp i64 %val to double
+define void @sitofp_i64_to_f64(double addrspace(1)* %out, i64 %val) #0 {
+  %cvt = sitofp i64 %val to double
+  store double %cvt, double addrspace(1)* %out
+  ret void
+}
+
+; COMMON: 'sitofp_v3i64_to_v3f64'
+; COMMON: estimated cost of 24 for {{.*}} sitofp <3 x i64> %val to <3 x double>
+define void @sitofp_v3i64_to_v3f64(<3 x double> addrspace(1)* %out, <3 x i64> %val) #0 {
+  %cvt = sitofp <3 x i64> %val to <3 x double>
+  store <3 x double> %cvt, <3 x double> addrspace(1)* %out
+  ret void
+}
+
+attributes #0 = { nounwind }