Index: lib/Analysis/TargetTransformInfo.cpp
===================================================================
--- lib/Analysis/TargetTransformInfo.cpp
+++ lib/Analysis/TargetTransformInfo.cpp
@@ -1139,12 +1139,57 @@
     if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
       SmallVector<Value *, 4> Args(II->arg_operands());
 
-      FastMathFlags FMF;
-      if (auto *FPMO = dyn_cast<FPMathOperator>(II))
-        FMF = FPMO->getFastMathFlags();
-
-      return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(),
-                                        Args, FMF);
+      // A helper for getting the cost of non-pairwise arithmetic reductions.
+      // The given opcode indicates the reduction operation.
+      auto getArithmeticRdxCost = [&](unsigned Opcode) {
+        assert(Args.size() == 1 && "Unexpected number of operands");
+        return getArithmeticReductionCost(Opcode, Args[0]->getType(),
+                                          /*IsPairwiseForm=*/false);
+      };
+
+      // A helper for getting the costof non-parwise min-max reductions. The
+      // given flag indicates if the reductions is signed.
+      auto getMinMaxRdxCost = [&](bool IsSigned) {
+        assert(Args.size() == 1 && "Unexpected number of operands");
+        return getMinMaxReductionCost(
+            Args[0]->getType(), CmpInst::makeCmpResultType(Args[0]->getType()),
+            /*IsPairwiseForm=*/false, IsSigned);
+      };
+
+      switch (II->getIntrinsicID()) {
+      default: {
+        FastMathFlags FMF;
+        if (auto *FPMO = dyn_cast<FPMathOperator>(II))
+          FMF = FPMO->getFastMathFlags();
+        return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), Args,
+                                     FMF);
+      }
+
+      // If we have an experimental vector reduce instrinsic, use the costs
+      // specified by getArithmeticReductionCost and getMinMaxReductionCost.
+      case Intrinsic::experimental_vector_reduce_add:
+        return getArithmeticRdxCost(Instruction::Add);
+      case Intrinsic::experimental_vector_reduce_mul:
+        return getArithmeticRdxCost(Instruction::Mul);
+      case Intrinsic::experimental_vector_reduce_and:
+        return getArithmeticRdxCost(Instruction::And);
+      case Intrinsic::experimental_vector_reduce_or:
+        return getArithmeticRdxCost(Instruction::Or);
+      case Intrinsic::experimental_vector_reduce_xor:
+        return getArithmeticRdxCost(Instruction::Xor);
+      case Intrinsic::experimental_vector_reduce_fadd:
+        return getArithmeticRdxCost(Instruction::FAdd);
+      case Intrinsic::experimental_vector_reduce_fmul:
+        return getArithmeticRdxCost(Instruction::FMul);
+      case Intrinsic::experimental_vector_reduce_smax:
+      case Intrinsic::experimental_vector_reduce_smin:
+      case Intrinsic::experimental_vector_reduce_fmax:
+      case Intrinsic::experimental_vector_reduce_fmin:
+        return getMinMaxRdxCost(true);
+      case Intrinsic::experimental_vector_reduce_umax:
+      case Intrinsic::experimental_vector_reduce_umin:
+        return getMinMaxRdxCost(false);
+      }
     }
     return -1;
   default:
Index: test/Analysis/CostModel/AArch64/vector-reduce.ll
===================================================================
--- /dev/null
+++ test/Analysis/CostModel/AArch64/vector-reduce.ll
@@ -0,0 +1,279 @@
+; RUN: opt < %s -mtriple=aarch64--linux-gnu -cost-model -analyze | FileCheck %s --check-prefix=COST
+; RUN: llc < %s -mtriple=aarch64--linux-gnu | FileCheck %s --check-prefix=CODE
+
+; COST-LABEL: add.i8.v8i8
+; COST:       Found an estimated cost of 27 for instruction: %r = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> %v)
+; CODE-LABEL: add.i8.v8i8
+; CODE:       addv b0, v0.8b
+define i8 @add.i8.v8i8(<8 x i8> %v) {
+  %r = call i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8> %v)
+  ret i8 %r
+}
+
+; COST-LABEL: add.i8.v16i8
+; COST:       Found an estimated cost of 53 for instruction: %r = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> %v)
+; CODE-LABEL: add.i8.v16i8
+; CODE:       addv b0, v0.16b
+define i8 @add.i8.v16i8(<16 x i8> %v) {
+  %r = call i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8> %v)
+  ret i8 %r
+}
+
+; COST-LABEL: add.i16.v4i16
+; COST:       Found an estimated cost of 13 for instruction: %r = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> %v)
+; CODE-LABEL: add.i16.v4i16
+; CODE:       addv h0, v0.4h
+define i16 @add.i16.v4i16(<4 x i16> %v) {
+  %r = call i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16> %v)
+  ret i16 %r
+}
+
+; COST-LABEL: add.i16.v8i16
+; COST:       Found an estimated cost of 27 for instruction: %r = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> %v)
+; CODE-LABEL: add.i16.v8i16
+; CODE:       addv h0, v0.8h
+define i16 @add.i16.v8i16(<8 x i16> %v) {
+  %r = call i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16> %v)
+  ret i16 %r
+}
+
+; COST-LABEL: add.i32.v4i32
+; COST:       Found an estimated cost of 13 for instruction: %r = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> %v)
+; CODE-LABEL: add.i32.v4i32
+; CODE:       addv s0, v0.4s
+define i32 @add.i32.v4i32(<4 x i32> %v) {
+  %r = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> %v)
+  ret i32 %r
+}
+
+; COST-LABEL: umin.i8.v8i8
+; COST:       Found an estimated cost of 157 for instruction: %r = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> %v)
+; CODE-LABEL: umin.i8.v8i8
+; CODE:       uminv b0, v0.8b
+define i8 @umin.i8.v8i8(<8 x i8> %v) {
+  %r = call i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8> %v)
+  ret i8 %r
+}
+
+; COST-LABEL: umin.i8.v16i8
+; COST:       Found an estimated cost of 388 for instruction: %r = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> %v)
+; CODE-LABEL: umin.i8.v16i8
+; CODE:       uminv b0, v0.16b
+define i8 @umin.i8.v16i8(<16 x i8> %v) {
+  %r = call i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8> %v)
+  ret i8 %r
+}
+
+; COST-LABEL: umin.i16.v4i16
+; COST:       Found an estimated cost of 58 for instruction: %r = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> %v)
+; CODE-LABEL: umin.i16.v4i16
+; CODE:       uminv h0, v0.4h
+define i16 @umin.i16.v4i16(<4 x i16> %v) {
+  %r = call i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16> %v)
+  ret i16 %r
+}
+
+; COST-LABEL: umin.i16.v8i16
+; COST:       Found an estimated cost of 157 for instruction: %r = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> %v)
+; CODE-LABEL: umin.i16.v8i16
+; CODE:       uminv h0, v0.8h
+define i16 @umin.i16.v8i16(<8 x i16> %v) {
+  %r = call i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16> %v)
+  ret i16 %r
+}
+
+; COST-LABEL: umin.i32.v4i32
+; COST:       Found an estimated cost of 58 for instruction: %r = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> %v)
+; CODE-LABEL: umin.i32.v4i32
+; CODE:       uminv s0, v0.4s
+define i32 @umin.i32.v4i32(<4 x i32> %v) {
+  %r = call i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32> %v)
+  ret i32 %r
+}
+
+; COST-LABEL: umax.i8.v8i8
+; COST:       Found an estimated cost of 157 for instruction: %r = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> %v)
+; CODE-LABEL: umax.i8.v8i8
+; CODE:       umaxv b0, v0.8b
+define i8 @umax.i8.v8i8(<8 x i8> %v) {
+  %r = call i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8> %v)
+  ret i8 %r
+}
+
+; COST-LABEL: umax.i8.v16i8
+; COST:       Found an estimated cost of 388 for instruction: %r = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> %v)
+; CODE-LABEL: umax.i8.v16i8
+; CODE:       umaxv b0, v0.16b
+define i8 @umax.i8.v16i8(<16 x i8> %v) {
+  %r = call i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8> %v)
+  ret i8 %r
+}
+
+; COST-LABEL: umax.i16.v4i16
+; COST:       Found an estimated cost of 58 for instruction: %r = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> %v)
+; CODE-LABEL: umax.i16.v4i16
+; CODE:       umaxv h0, v0.4h
+define i16 @umax.i16.v4i16(<4 x i16> %v) {
+  %r = call i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16> %v)
+  ret i16 %r
+}
+
+; COST-LABEL: umax.i16.v8i16
+; COST:       Found an estimated cost of 157 for instruction: %r = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> %v)
+; CODE-LABEL: umax.i16.v8i16
+; CODE:       umaxv h0, v0.8h
+define i16 @umax.i16.v8i16(<8 x i16> %v) {
+  %r = call i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16> %v)
+  ret i16 %r
+}
+
+; COST-LABEL: umax.i32.v4i32
+; COST:       Found an estimated cost of 58 for instruction: %r = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> %v)
+; CODE-LABEL: umax.i32.v4i32
+; CODE:       umaxv s0, v0.4s
+define i32 @umax.i32.v4i32(<4 x i32> %v) {
+  %r = call i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32> %v)
+  ret i32 %r
+}
+
+; COST-LABEL: smin.i8.v8i8
+; COST:       Found an estimated cost of 157 for instruction: %r = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> %v)
+; CODE-LABEL: smin.i8.v8i8
+; CODE:       sminv b0, v0.8b
+define i8 @smin.i8.v8i8(<8 x i8> %v) {
+  %r = call i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8> %v)
+  ret i8 %r
+}
+
+; COST-LABEL: smin.i8.v16i8
+; COST:       Found an estimated cost of 388 for instruction: %r = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> %v)
+; CODE-LABEL: smin.i8.v16i8
+; CODE:       sminv b0, v0.16b
+define i8 @smin.i8.v16i8(<16 x i8> %v) {
+  %r = call i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8> %v)
+  ret i8 %r
+}
+
+; COST-LABEL: smin.i16.v4i16
+; COST:       Found an estimated cost of 58 for instruction: %r = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> %v)
+; CODE-LABEL: smin.i16.v4i16
+; CODE:       sminv h0, v0.4h
+define i16 @smin.i16.v4i16(<4 x i16> %v) {
+  %r = call i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16> %v)
+  ret i16 %r
+}
+
+; COST-LABEL: smin.i16.v8i16
+; COST:       Found an estimated cost of 157 for instruction: %r = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> %v)
+; CODE-LABEL: smin.i16.v8i16
+; CODE:       sminv h0, v0.8h
+define i16 @smin.i16.v8i16(<8 x i16> %v) {
+  %r = call i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16> %v)
+  ret i16 %r
+}
+
+; COST-LABEL: smin.i32.v4i32
+; COST:       Found an estimated cost of 58 for instruction: %r = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> %v)
+; CODE-LABEL: smin.i32.v4i32
+; CODE:       sminv s0, v0.4s
+define i32 @smin.i32.v4i32(<4 x i32> %v) {
+  %r = call i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32> %v)
+  ret i32 %r
+}
+
+; COST-LABEL: smax.i8.v8i8
+; COST:       Found an estimated cost of 157 for instruction: %r = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> %v)
+; CODE-LABEL: smax.i8.v8i8
+; CODE:       smaxv b0, v0.8b
+define i8 @smax.i8.v8i8(<8 x i8> %v) {
+  %r = call i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8> %v)
+  ret i8 %r
+}
+
+; COST-LABEL: smax.i8.v16i8
+; COST:       Found an estimated cost of 388 for instruction: %r = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> %v)
+; CODE-LABEL: smax.i8.v16i8
+; CODE:       smaxv b0, v0.16b
+define i8 @smax.i8.v16i8(<16 x i8> %v) {
+  %r = call i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8> %v)
+  ret i8 %r
+}
+
+; COST-LABEL: smax.i16.v4i16
+; COST:       Found an estimated cost of 58 for instruction: %r = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> %v)
+; CODE-LABEL: smax.i16.v4i16
+; CODE:       smaxv h0, v0.4h
+define i16 @smax.i16.v4i16(<4 x i16> %v) {
+  %r = call i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16> %v)
+  ret i16 %r
+}
+
+; COST-LABEL: smax.i16.v8i16
+; COST:       Found an estimated cost of 157 for instruction: %r = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> %v)
+; CODE-LABEL: smax.i16.v8i16
+; CODE:       smaxv h0, v0.8h
+define i16 @smax.i16.v8i16(<8 x i16> %v) {
+  %r = call i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16> %v)
+  ret i16 %r
+}
+
+; COST-LABEL: smax.i32.v4i32
+; COST:       Found an estimated cost of 58 for instruction: %r = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> %v)
+; CODE-LABEL: smax.i32.v4i32
+; CODE:       smaxv s0, v0.4s
+define i32 @smax.i32.v4i32(<4 x i32> %v) {
+  %r = call i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32> %v)
+  ret i32 %r
+}
+
+; COST-LABEL: fmin.f32.v4f32
+; COST:       Found an estimated cost of 58 for instruction: %r = call nnan float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float> %v)
+; CODE-LABEL: fmin.f32.v4f32
+; CODE:       fminnmv s0, v0.4s
+define float @fmin.f32.v4f32(<4 x float> %v) {
+  %r = call nnan float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float> %v)
+  ret float %r
+}
+
+; COST-LABEL: fmax.f32.v4f32
+; COST:       Found an estimated cost of 58 for instruction: %r = call nnan float @llvm.experimental.vector.reduce.fmax.f32.v4f32(<4 x float> %v)
+; CODE-LABEL: fmax.f32.v4f32
+; CODE:       fmaxnmv s0, v0.4s
+define float @fmax.f32.v4f32(<4 x float> %v) {
+  %r = call nnan float @llvm.experimental.vector.reduce.fmax.f32.v4f32(<4 x float> %v)
+  ret float %r
+}
+
+declare i8 @llvm.experimental.vector.reduce.add.i8.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.add.i8.v16i8(<16 x i8>)
+declare i16 @llvm.experimental.vector.reduce.add.i16.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.add.i16.v8i16(<8 x i16>)
+declare i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32>)
+
+declare i8 @llvm.experimental.vector.reduce.umin.i8.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umin.i8.v16i8(<16 x i8>)
+declare i16 @llvm.experimental.vector.reduce.umin.i16.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umin.i16.v8i16(<8 x i16>)
+declare i32 @llvm.experimental.vector.reduce.umin.i32.v4i32(<4 x i32>)
+
+declare i8 @llvm.experimental.vector.reduce.umax.i8.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.umax.i8.v16i8(<16 x i8>)
+declare i16 @llvm.experimental.vector.reduce.umax.i16.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.umax.i16.v8i16(<8 x i16>)
+declare i32 @llvm.experimental.vector.reduce.umax.i32.v4i32(<4 x i32>)
+
+declare i8 @llvm.experimental.vector.reduce.smin.i8.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smin.i8.v16i8(<16 x i8>)
+declare i16 @llvm.experimental.vector.reduce.smin.i16.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smin.i16.v8i16(<8 x i16>)
+declare i32 @llvm.experimental.vector.reduce.smin.i32.v4i32(<4 x i32>)
+
+declare i8 @llvm.experimental.vector.reduce.smax.i8.v8i8(<8 x i8>)
+declare i8 @llvm.experimental.vector.reduce.smax.i8.v16i8(<16 x i8>)
+declare i16 @llvm.experimental.vector.reduce.smax.i16.v4i16(<4 x i16>)
+declare i16 @llvm.experimental.vector.reduce.smax.i16.v8i16(<8 x i16>)
+declare i32 @llvm.experimental.vector.reduce.smax.i32.v4i32(<4 x i32>)
+
+declare float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float>)
+
+declare float @llvm.experimental.vector.reduce.fmax.f32.v4f32(<4 x float>)