Index: lib/Target/AArch64/AArch64ISelLowering.h
===================================================================
--- lib/Target/AArch64/AArch64ISelLowering.h
+++ lib/Target/AArch64/AArch64ISelLowering.h
@@ -152,7 +152,11 @@
   UMINV,
   SMAXV,
   UMAXV,
-
+  // Vector min/max
+  SMIN,
+  SMAX,
+  UMIN,
+  UMAX,
   // Vector bitwise negation
   NOT,
 
@@ -181,7 +185,6 @@
   /// need to re-interpret the data in SIMD vector registers in big-endian
   /// mode without emitting such REV instructions.
   NVCAST,
-
   SMULL,
   UMULL,
 
Index: lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- lib/Target/AArch64/AArch64ISelLowering.cpp
+++ lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -8610,21 +8610,77 @@
   return SDValue();
 }
 
-// vselect (v1i1 setcc) ->
-//     vselect (v1iXX setcc)  (XX is the size of the compared operand type)
-// FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as
-// condition. If it can legalize "VSELECT v1i1" correctly, no need to combine
-// such VSELECT.
+// \brief Generate vector Min/Max node
+// vselect x, y (cmp lt x, y) -> [s|u]min x, y
+// vselect x, y (cmp gt x, y) -> [s|u]max x, y
+static SDValue vcombineMinNumMaxNum(SDLoc DL, EVT VT, SDValue &SetCondCode,
+                                    SDValue IfTrue, SDValue IfFalse,
+                                    SelectionDAG &DAG) {
+
+  assert(SetCondCode.getOpcode() == ISD::SETCC &&
+         "No set condition code operand\n");
+  if (!SetCondCode.hasOneUse())
+    return SDValue();
+  if (!VT.isVector())
+    return SDValue();
+  if (!VT.isInteger())
+    return SDValue();
+
+  ISD::CondCode CC = cast<CondCodeSDNode>(SetCondCode.getOperand(2))->get();
+  SDValue CCLHS = SetCondCode.getOperand(0);
+  SDValue CCRHS = SetCondCode.getOperand(1);
+
+  if (!(CCLHS == IfTrue && CCRHS == IfFalse) &&
+      !(CCLHS == IfFalse && CCRHS == IfTrue))
+    return SDValue();
+
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+  unsigned Opcode;
+  switch (CC) {
+  case ISD::SETLT:
+  case ISD::SETLE: {
+    Opcode = (CCLHS == IfTrue) ? AArch64ISD::SMIN : AArch64ISD::SMAX;
+    if (TLI.isOperationLegalOrCustom(Opcode, VT))
+      return DAG.getNode(Opcode, DL, VT, CCLHS, CCRHS);
+    return SDValue();
+  }
+  case ISD::SETULT:
+  case ISD::SETULE: {
+    Opcode = (CCLHS == IfTrue) ? AArch64ISD::UMIN : AArch64ISD::UMAX;
+    if (TLI.isOperationLegalOrCustom(Opcode, VT))
+      return DAG.getNode(Opcode, DL, VT, CCLHS, CCRHS);
+    return SDValue();
+  }
+  case ISD::SETGT:
+  case ISD::SETGE: {
+    Opcode = (CCLHS == IfTrue) ? AArch64ISD::SMAX : AArch64ISD::SMIN;
+    if (TLI.isOperationLegalOrCustom(Opcode, VT))
+      return DAG.getNode(Opcode, DL, VT, CCLHS, CCRHS);
+    return SDValue();
+  }
+  case ISD::SETUGT:
+  case ISD::SETUGE: {
+    Opcode = (CCLHS == IfTrue) ? AArch64ISD::UMAX : AArch64ISD::UMIN;
+    if (TLI.isOperationLegalOrCustom(Opcode, VT))
+      return DAG.getNode(Opcode, DL, VT, CCLHS, CCRHS);
+    return SDValue();
+  }
+  default:
+    llvm_unreachable("Unknown condition code!");
+    return SDValue();
+  }
+}
+// \brief Combine vselect and setcc
 static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) {
   SDValue N0 = N->getOperand(0);
-  EVT CCVT = N0.getValueType();
 
-  if (N0.getOpcode() != ISD::SETCC || CCVT.getVectorNumElements() != 1 ||
-      CCVT.getVectorElementType() != MVT::i1)
+  if (N0.getOpcode() != ISD::SETCC)
     return SDValue();
 
   EVT ResVT = N->getValueType(0);
   EVT CmpVT = N0.getOperand(0).getValueType();
+
   // Only combine when the result type is of the same size as the compared
   // operands.
   if (ResVT.getSizeInBits() != CmpVT.getSizeInBits())
@@ -8632,6 +8688,24 @@
 
   SDValue IfTrue = N->getOperand(1);
   SDValue IfFalse = N->getOperand(2);
+
+  // Min/Max
+
+  SDValue MinMax =
+      vcombineMinNumMaxNum(SDLoc(N), ResVT, N0, IfTrue, IfFalse, DAG);
+  if (MinMax.getNode())
+    return MinMax;
+
+  // vselect (v1i1 setcc) ->
+  //     vselect (v1iXX setcc)  (XX is the size of the compared operand type)
+  // FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as
+  // condition. If it can legalize "VSELECT v1i1" correctly, no need to combine
+  // such VSELECT.
+  EVT CCVT = N0.getValueType();
+  if (CCVT.getVectorNumElements() != 1 ||
+      CCVT.getVectorElementType() != MVT::i1)
+    return SDValue();
+
   SDValue SetCC =
       DAG.getSetCC(SDLoc(N), CmpVT.changeVectorElementTypeToInteger(),
                    N0.getOperand(0), N0.getOperand(1),
Index: lib/Target/AArch64/AArch64InstrInfo.td
===================================================================
--- lib/Target/AArch64/AArch64InstrInfo.td
+++ lib/Target/AArch64/AArch64InstrInfo.td
@@ -266,6 +266,10 @@
 def AArch64uminv    : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>;
 def AArch64smaxv    : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>;
 def AArch64umaxv    : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
+def AArch64smin     : SDNode<"AArch64ISD::SMIN", SDT_AArch64binvec>;
+def AArch64umin     : SDNode<"AArch64ISD::UMIN", SDT_AArch64binvec>;
+def AArch64smax     : SDNode<"AArch64ISD::SMAX", SDT_AArch64binvec>;
+def AArch64umax     : SDNode<"AArch64ISD::UMAX", SDT_AArch64binvec>;
 
 //===----------------------------------------------------------------------===//
 
@@ -2827,6 +2831,66 @@
 def : Pat<(AArch64bsl (v2i64 V128:$Rd), V128:$Rn, V128:$Rm),
           (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
 
+//UMAX
+def : Pat<(v8i8 (AArch64umax (v8i8 V64:$Rn), (v8i8 V64:$Rm))),
+          (UMAXv8i8 V64:$Rn, V64:$Rm)>;
+def : Pat<(v4i16 (AArch64umax (v4i16 V64:$Rn), (v4i16 V64:$Rm))),
+          (UMAXv4i16 V64:$Rn, V64:$Rm)>;
+def : Pat<(v2i32 (AArch64umax (v2i32 V64:$Rn), (v2i32 V64:$Rm))),
+          (UMAXv2i32 V64:$Rn, V64:$Rm)>;
+
+def : Pat<(v16i8 (AArch64umax (v16i8 V128:$Rn), (v16i8 V128:$Rm))),
+          (UMAXv16i8 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i16 (AArch64umax (v8i16 V128:$Rn), (v8i16 V128:$Rm))),
+          (UMAXv8i16 V128:$Rn, V128:$Rm)>;
+def : Pat<(v4i32 (AArch64umax (v4i32 V128:$Rn), (v4i32 V128:$Rm))),
+          (UMAXv4i32 V128:$Rn, V128:$Rm)>;
+
+//UMIN
+def : Pat<(v8i8 (AArch64umin (v8i8 V64:$Rn), (v8i8 V64:$Rm))),
+          (UMINv8i8 V64:$Rn, V64:$Rm)>;
+def : Pat<(v4i16 (AArch64umin (v4i16 V64:$Rn), (v4i16 V64:$Rm))),
+          (UMINv4i16 V64:$Rn, V64:$Rm)>;
+def : Pat<(v2i32 (AArch64umin (v2i32 V64:$Rn), (v2i32 V64:$Rm))),
+          (UMINv2i32 V64:$Rn, V64:$Rm)>;
+
+def : Pat<(v16i8 (AArch64umin (v16i8 V128:$Rn), (v16i8 V128:$Rm))),
+          (UMINv16i8 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i16 (AArch64umin (v8i16 V128:$Rn), (v8i16 V128:$Rm))),
+          (UMINv8i16 V128:$Rn, V128:$Rm)>;
+def : Pat<(v4i32 (AArch64umin (v4i32 V128:$Rn), (v4i32 V128:$Rm))),
+          (UMINv4i32 V128:$Rn, V128:$Rm)>;
+
+// SMAX
+def : Pat<(v8i8 (AArch64smax (v8i8 V64:$Rn), (v8i8 V64:$Rm))),
+          (SMAXv8i8 V64:$Rn, V64:$Rm)>;
+def : Pat<(v4i16 (AArch64smax (v4i16 V64:$Rn), (v4i16 V64:$Rm))),
+          (SMAXv4i16 V64:$Rn, V64:$Rm)>;
+def : Pat<(v2i32 (AArch64smax (v2i32 V64:$Rn), (v2i32 V64:$Rm))),
+          (SMAXv2i32 V64:$Rn, V64:$Rm)>;
+
+def : Pat<(v16i8 (AArch64smax (v16i8 V128:$Rn), (v16i8 V128:$Rm))),
+          (SMAXv16i8 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i16 (AArch64smax (v8i16 V128:$Rn), (v8i16 V128:$Rm))),
+          (SMAXv8i16 V128:$Rn, V128:$Rm)>;
+def : Pat<(v4i32 (AArch64smax (v4i32 V128:$Rn), (v4i32 V128:$Rm))),
+          (SMAXv4i32 V128:$Rn, V128:$Rm)>;
+
+// SMIN
+def : Pat<(v8i8 (AArch64smin (v8i8 V64:$Rn), (v8i8 V64:$Rm))),
+          (SMINv8i8 V64:$Rn, V64:$Rm)>;
+def : Pat<(v4i16 (AArch64smin (v4i16 V64:$Rn), (v4i16 V64:$Rm))),
+          (SMINv4i16 V64:$Rn, V64:$Rm)>;
+def : Pat<(v2i32 (AArch64smin (v2i32 V64:$Rn), (v2i32 V64:$Rm))),
+          (SMINv2i32 V64:$Rn, V64:$Rm)>;
+
+def : Pat<(v16i8 (AArch64smin (v16i8 V128:$Rn), (v16i8 V128:$Rm))),
+          (SMINv16i8 V128:$Rn, V128:$Rm)>;
+def : Pat<(v8i16 (AArch64smin (v8i16 V128:$Rn), (v8i16 V128:$Rm))),
+          (SMINv8i16 V128:$Rn, V128:$Rm)>;
+def : Pat<(v4i32 (AArch64smin (v4i32 V128:$Rn), (v4i32 V128:$Rm))),
+          (SMINv4i32 V128:$Rn, V128:$Rm)>;
+
 def : InstAlias<"mov{\t$dst.16b, $src.16b|.16b\t$dst, $src}",
                 (ORRv16i8 V128:$dst, V128:$src, V128:$src), 1>;
 def : InstAlias<"mov{\t$dst.8h, $src.8h|.8h\t$dst, $src}",
Index: test/CodeGen/AArch64/aarch64-vmin-vmax-opt.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AArch64/aarch64-vmin-vmax-opt.ll
@@ -0,0 +1,66 @@
+; RUN: llc < %s -mtriple=arm64-none-linux-gnu  -o - | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-ios8.0.0"
+
+@b = common global [10000 x i32] zeroinitializer, align 4
+@c = common global [10000 x i32] zeroinitializer, align 4
+@m = common global [10000 x i32] zeroinitializer, align 4
+
+; Function Attrs: nounwind ssp
+define void @foo() {
+; CHECK-LABEL: foo
+; CHECK: smax v[[OUT:[0-9]+]].4s, v[[IN1:[0-9]+]].4s, v[[IN2:[0-9]+]].4s
+; CHECK: smin v[[OUT:[0-9]+]].4s, v[[IN1:[0-9]+]].4s, v[[IN2:[0-9]+]].4s
+; CHECK: umax v[[OUT:[0-9]+]].4s, v[[IN1:[0-9]+]].4s, v[[IN2:[0-9]+]].4s
+; CHECK: umin v[[OUT:[0-9]+]].4s, v[[IN1:[0-9]+]].4s, v[[IN2:[0-9]+]].4s
+entry:
+  %index =  add i64 1, 0
+  %broadcast.splatinsert = insertelement <4 x i64> undef, i64 %index, i32 0
+  %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> undef, <4 x i32> zeroinitializer
+  %induction = add <4 x i64> %broadcast.splat, <i64 0, i64 1, i64 2, i64 3>
+  %0 = add nsw <4 x i64> %induction, <i64 -1, i64 -1, i64 -1, i64 -1>
+  %1 = extractelement <4 x i64> %0, i32 0
+  %2 = getelementptr inbounds [10000 x i32], [10000 x i32]* @b, i64 0, i64 %1
+  %3 = getelementptr i32, i32* %2, i32 0
+  %4 = bitcast i32* %3 to <4 x i32>*
+  %wide.load46 = load <4 x i32>, <4 x i32>* %4, align 4
+  %5 = getelementptr inbounds [10000 x i32], [10000 x i32]* @c, i64 0, i64 %1
+  %6 = getelementptr i32, i32* %5, i32 0
+  %7 = bitcast i32* %6 to <4 x i32>*
+  %wide.load47 = load <4 x i32>, <4 x i32>* %7, align 4
+  %8 = add nsw <4 x i32> %wide.load46, %wide.load47
+  %9 = getelementptr i32, i32* %2, i32 0
+  %10 = bitcast i32* %9 to <4 x i32>*
+  %wide.load = load <4 x i32>, <4 x i32>* %10, align 4
+  %11 = add nsw <4 x i32> %wide.load46, %wide.load
+  %12 = icmp sgt <4 x i32> %8, %11
+  %13 = select <4 x i1> %12, <4 x i32> %8, <4 x i32> %11
+  %i0 = extractelement <4 x i64> %induction, i32 0
+  %melti0 = getelementptr inbounds [10000 x i32], [10000 x i32]* @m, i64 0, i64 %i0
+  %14 = getelementptr i32, i32* %melti0, i32 0
+  %15 = bitcast i32* %14 to <4 x i32>*
+  store <4 x i32> %13, <4 x i32>* %15, align 4
+  %16 = icmp slt <4 x i32> %8, %11
+  %17 = select <4 x i1> %16, <4 x i32> %8, <4 x i32> %11
+  %i1 = extractelement <4 x i64> %induction, i32 1
+  %melti1 = getelementptr inbounds [10000 x i32], [10000 x i32]* @m, i64 0, i64 %i1
+  %s1 = getelementptr i32, i32* %melti1, i32 0
+  %s11 = bitcast i32* %s1 to <4 x i32>*
+  store <4 x i32> %17, <4 x i32>* %s11, align 4
+  %18 = icmp ugt <4 x i32> %8, %11
+  %19 = select <4 x i1> %18, <4 x i32> %8, <4 x i32> %11
+  %i2 = extractelement <4 x i64> %induction, i32 2
+  %melti2 = getelementptr inbounds [10000 x i32], [10000 x i32]* @m, i64 0, i64 %i2
+  %s2 = getelementptr i32, i32* %melti2, i32 0
+  %s22 = bitcast i32* %s2 to <4 x i32>*
+  store <4 x i32> %19, <4 x i32>* %s22, align 4
+  %20 = icmp ult <4 x i32> %8, %11
+  %21 = select <4 x i1> %20, <4 x i32> %8, <4 x i32> %11
+  %i3 = extractelement <4 x i64> %induction, i32 3
+  %melti3 = getelementptr inbounds [10000 x i32], [10000 x i32]* @m, i64 0, i64 %i3
+  %s3 = getelementptr i32, i32* %melti3, i32 0
+  %s33 = bitcast i32* %s3 to <4 x i32>*
+  store <4 x i32> %21, <4 x i32>* %s33, align 4
+  ret void
+}