Index: lib/Target/AArch64/AArch64TargetTransformInfo.h
===================================================================
--- lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -111,6 +111,11 @@
   int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
                       unsigned AddressSpace);
 
+  int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
+                            ArrayRef<Type *> Tys, FastMathFlags FMF);
+  int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
+                            ArrayRef<Value *> Args, FastMathFlags FMF);
+
   int getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
 
   void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
Index: lib/Target/AArch64/AArch64TargetTransformInfo.cpp
===================================================================
--- lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -493,6 +493,40 @@
   return LT.first;
 }
 
+int AArch64TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
+                                          ArrayRef<Type *> Tys, FastMathFlags FMF) {
+  static const CostTblEntry IntrinsicCostTbl[] = {
+    { ISD::FSQRT, MVT::f32,   4 },
+    { ISD::FSQRT, MVT::v2f32, 4 },
+    { ISD::FSQRT, MVT::v4f32, 4 },
+    { ISD::FSQRT, MVT::f64,   5 },
+    { ISD::FSQRT, MVT::v2f64, 5 },
+  };
+
+  unsigned ISD = ISD::DELETED_NODE;
+  switch (IID) {
+  default:
+    break;
+  case Intrinsic::sqrt:
+    ISD = ISD::FSQRT;
+    break;
+  }
+
+  // Legalize the type.
+  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
+  MVT MTy = LT.second;
+
+  // Attempt to lookup cost.
+  if (const auto *Entry = CostTableLookup(IntrinsicCostTbl, ISD, MTy))
+    return LT.first * Entry->Cost;
+  return BaseT::getIntrinsicInstrCost(IID, RetTy, Tys, FMF);
+}
+
+int AArch64TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
+                                          ArrayRef<Value *> Args, FastMathFlags FMF) {
+  return BaseT::getIntrinsicInstrCost(IID, RetTy, Args, FMF);
+}
+
 int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
                                                unsigned Factor,
                                                ArrayRef<unsigned> Indices,
Index: test/Analysis/CostModel/AArch64/arith-fp.ll
===================================================================
--- /dev/null
+++ test/Analysis/CostModel/AArch64/arith-fp.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -enable-no-nans-fp-math  -cost-model -analyze | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+define i32 @fsqrt(i32 %arg) {
+  %F32 = call float @llvm.sqrt.f32(float undef)
+  %V2F32 = call <2 x float> @llvm.sqrt.v2f32(<2 x float> undef)
+  %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
+; CHECK: cost of 4 {{.*}} %F32 = call float @llvm.sqrt.f32
+; CHECK: cost of 4 {{.*}} %V2F32 = call <2 x float> @llvm.sqrt.v2f32
+; CHECK: cost of 4 {{.*}} %V4F32 = call <4 x float> @llvm.sqrt.v4f32
+
+  %F64 = call double @llvm.sqrt.f64(double undef)
+  %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
+; CHECK: cost of 5 {{.*}} %F64 = call double @llvm.sqrt.f64
+; CHECK: cost of 5 {{.*}} %V2F64 = call <2 x double> @llvm.sqrt.v2f64
+
+  ret i32 undef
+}
+
+declare float @llvm.sqrt.f32(float)
+declare <2 x float> @llvm.sqrt.v2f32(<2 x float>)
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
+
+declare double @llvm.sqrt.f64(double)
+declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
Index: test/Transforms/SLPVectorizer/AArch64/intrinsic-cost-model.ll
===================================================================
--- /dev/null
+++ test/Transforms/SLPVectorizer/AArch64/intrinsic-cost-model.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=aarch64-unknown-linux-gnu -mcpu=kryo | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+; CHECK-LABEL: @test1
+; CHECK: fmul fast <2 x double>
+; CHECK: fdiv fast <2 x double>
+; CHECK: call fast <2 x double> @llvm.sqrt.v2f64(<2 x double>
+; CHECK: call fast <2 x double> @llvm.sqrt.v2f64(<2 x double>
+; CHECK: ret double
+define double @test1(double %t1, double %t2, double %t3, double %z1, double %z2) {
+entry:
+  %cmp = fcmp fast une double %t1, 0.000000e+00
+  %cmp1 = fcmp fast une double %t2, 0.000000e+00
+  %or.cond = and i1 %cmp, %cmp1
+  %cmp3 = fcmp fast une double %t3, 0.000000e+00
+  %or.cond20 = and i1 %or.cond, %cmp3
+  br i1 %or.cond20, label %if.then, label %return
+
+if.then:                                          ; preds = %entry
+  %mul = fmul fast double %t1, 2.000000e+00
+  %mul4 = fmul fast double %mul, %z1
+  %div = fdiv fast double %mul4, %t2
+  %0 = tail call fast double @llvm.sqrt.f64(double %div)
+  %div7 = fdiv fast double %mul4, %t3
+  %1 = tail call fast double @llvm.sqrt.f64(double %div7)
+  %mul9 = fmul fast double %mul, %z2
+  %div10 = fdiv fast double %mul9, %t2
+  %2 = tail call fast double @llvm.sqrt.f64(double %div10)
+  %div13 = fdiv fast double %mul9, %t3
+  %3 = tail call fast double @llvm.sqrt.f64(double %div13)
+  %cmp14 = fcmp fast ogt double %0, %2
+  %cond = select i1 %cmp14, double %0, double %2
+  %cmp15 = fcmp fast ogt double %1, %3
+  %cond19 = select i1 %cmp15, double %1, double %3
+  %add = fadd fast double %cond, %cond19
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval.0 = phi double [ %add, %if.then ], [ 0.000000e+00, %entry ]
+  ret double %retval.0
+}
+
+declare double @llvm.sqrt.f64(double)