Index: include/llvm/Analysis/TargetTransformInfo.h
===================================================================
--- include/llvm/Analysis/TargetTransformInfo.h
+++ include/llvm/Analysis/TargetTransformInfo.h
@@ -312,6 +312,11 @@
   bool isLegalMaskedStore(Type *DataType, int Consecutive) const;
   bool isLegalMaskedLoad(Type *DataType, int Consecutive) const;
 
+  /// \brief Return true if the target supports 'sum of absolute differences'
+  /// instruction for the given type.Calling this function with NULL argument
+  /// reportis if the SAD instruction is supported by this target in general.
+  bool isLegalSad(Type *DataType) const;
+
   /// \brief Return the cost of the scaling factor used in the addressing
   /// mode represented by AM for this target, for a load/store
   /// of the specified type.
@@ -542,6 +547,7 @@
                                      int64_t Scale) = 0;
   virtual bool isLegalMaskedStore(Type *DataType, int Consecutive) = 0;
   virtual bool isLegalMaskedLoad(Type *DataType, int Consecutive) = 0;
+  virtual bool isLegalSad(Type *DataType) = 0;
   virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
                                    int64_t BaseOffset, bool HasBaseReg,
                                    int64_t Scale) = 0;
@@ -658,6 +664,7 @@
   bool isLegalMaskedLoad(Type *DataType, int Consecutive) override {
     return Impl.isLegalMaskedLoad(DataType, Consecutive);
   }
+  bool isLegalSad(Type *DataType) override { return Impl.isLegalSad(DataType); }
   int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
                            bool HasBaseReg, int64_t Scale) override {
     return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale);
Index: include/llvm/Analysis/TargetTransformInfoImpl.h
===================================================================
--- include/llvm/Analysis/TargetTransformInfoImpl.h
+++ include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -217,6 +217,8 @@
 
   bool isLegalMaskedLoad(Type *DataType, int Consecutive) { return false; }
 
+  bool isLegalSad(Type *DataType) { return false; }
+
   int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
                            bool HasBaseReg, int64_t Scale) {
     // Guess that all legal addressing mode are free.
Index: include/llvm/CodeGen/ISDOpcodes.h
===================================================================
--- include/llvm/CodeGen/ISDOpcodes.h
+++ include/llvm/CodeGen/ISDOpcodes.h
@@ -325,6 +325,12 @@
     /// Byte Swap and Counting operators.
     BSWAP, CTTZ, CTLZ, CTPOP,
 
+    /// SAD - This corresponds to an operation representing 'Sum Of Absolute
+    /// Differences' of the two input integer vector elements and the reduced sum
+    /// is returned as integer scalar.
+    /// This node is generated from llvm.sad intrinsics.
+    SAD,
+
     /// Bit counting operators with an undefined result for zero inputs.
     CTTZ_ZERO_UNDEF, CTLZ_ZERO_UNDEF,
 
Index: include/llvm/IR/Intrinsics.td
===================================================================
--- include/llvm/IR/Intrinsics.td
+++ include/llvm/IR/Intrinsics.td
@@ -584,6 +584,11 @@
 def int_clear_cache : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty],
                                 [], "llvm.clear_cache">;
 
+// Calculate the Sum of Absolute Differences (SAD) of the two input integer
+// vectors.
+def int_sad : Intrinsic<[llvm_anyint_ty],
+                        [ llvm_anyvector_ty, llvm_anyvector_ty ], [IntrNoMem]>;
+
 //===-------------------------- Masked Intrinsics -------------------------===//
 //
 def int_masked_store : Intrinsic<[], [llvm_anyvector_ty, LLVMPointerTo<0>,
Index: lib/Analysis/TargetTransformInfo.cpp
===================================================================
--- lib/Analysis/TargetTransformInfo.cpp
+++ lib/Analysis/TargetTransformInfo.cpp
@@ -115,6 +115,10 @@
   return TTIImpl->isLegalMaskedLoad(DataType, Consecutive);
 }
 
+bool TargetTransformInfo::isLegalSad(Type *DataType) const {
+  return TTIImpl->isLegalSad(DataType);
+}
+
 int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
                                               int64_t BaseOffset,
                                               bool HasBaseReg,
Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -140,6 +140,7 @@
 
   SDValue ExpandBSWAP(SDValue Op, SDLoc dl);
   SDValue ExpandBitCount(unsigned Opc, SDValue Op, SDLoc dl);
+  SDValue ExpandSAD(EVT DestVT, SDValue LHS, SDValue RHS, SDLoc dl);
 
   SDValue ExpandExtractFromVectorThroughStack(SDValue Op);
   SDValue ExpandInsertToVectorThroughStack(SDValue Op);
@@ -1287,7 +1288,11 @@
       return;
     }
     break;
-
+  case ISD::SAD:
+    Action = TLI.getOperationAction(Node->getOpcode(),
+                                    Node->getOperand(0).getValueType());
+    SimpleFinishLegalizing = false;
+    break;
   default:
     if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
       Action = TargetLowering::Legal;
@@ -1382,7 +1387,7 @@
     dbgs() << "\n";
 #endif
     llvm_unreachable("Do not know how to legalize this operator!");
-
+  case ISD::SAD:
   case ISD::CALLSEQ_START:
   case ISD::CALLSEQ_END:
     break;
@@ -2733,6 +2738,27 @@
   }
 }
 
+SDValue SelectionDAGLegalize::ExpandSAD(EVT DestVT, SDValue LHS, SDValue RHS,
+                                        SDLoc DL) {
+  SDValue result;
+  switch (LHS.getValueType().getSimpleVT().SimpleTy) {
+  default:
+    llvm_unreachable("Unhandled Expand type in SAD");
+  case MVT::i16:
+    SDValue sad = DAG.getNode(ISD::SAD, DL, MVT::v2i64, LHS, RHS);
+    EVT VecIdxTy = DAG.getTargetLoweringInfo().getVectorIdxTy();
+    SDValue BottomHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, sad,
+                                     DAG.getConstant(0, VecIdxTy));
+    SDValue TopHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, sad,
+                                  DAG.getConstant(1, VecIdxTy));
+
+    BottomHalf = DAG.getNode(ISD::TRUNCATE, DL, DestVT, BottomHalf);
+    TopHalf = DAG.getNode(ISD::TRUNCATE, DL, DestVT, TopHalf);
+    result = DAG.getNode(ISD::ADD, DL, DestVT, TopHalf, BottomHalf);
+  }
+  return result;
+}
+
 /// Expand the specified bitcount instruction into operations.
 SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
                                              SDLoc dl) {
@@ -2854,6 +2880,10 @@
   case ISD::BSWAP:
     Results.push_back(ExpandBSWAP(Node->getOperand(0), dl));
     break;
+  case ISD::SAD:
+    Results.push_back(ExpandSAD(Node->getValueType(0), Node->getOperand(0),
+                                Node->getOperand(1), dl));
+    break;
   case ISD::FRAMEADDR:
   case ISD::RETURNADDR:
   case ISD::FRAME_TO_ARGS_OFFSET:
Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4376,6 +4376,13 @@
   return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS);
 }
 
+/// ExpandSad - expand the llvm sad intrinsic.
+static SDValue ExpandSad(SDLoc DL, EVT VT, SDValue LHS, SDValue RHS,
+                         SelectionDAG &DAG) {
+  SDValue result;
+  result = DAG.getNode(ISD::SAD, DL, VT, LHS, RHS);
+  return result;
+}
 
 /// ExpandPowI - Expand a llvm.powi intrinsic.
 static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS,
@@ -5086,6 +5093,11 @@
                              getValue(I.getArgOperand(0)).getValueType(),
                              getValue(I.getArgOperand(0))));
     return nullptr;
+  case Intrinsic::sad: {
+    setValue(&I, ExpandSad(sdl, TLI.getValueType(I.getType()), getValue(I.getArgOperand(0)),
+                           getValue(I.getArgOperand(1)), DAG));
+    return nullptr;
+  }
   case Intrinsic::cttz: {
     SDValue Arg = getValue(I.getArgOperand(0));
     ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
Index: lib/Target/X86/X86ISelDAGToDAG.cpp
===================================================================
--- lib/Target/X86/X86ISelDAGToDAG.cpp
+++ lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -2084,6 +2084,17 @@
 
   switch (Opcode) {
   default: break;
+  case ISD::SAD: {
+    SDNode *New;
+    SDValue Ops[] = {Node->getOperand(0), Node->getOperand(1)};
+
+    if (Subtarget->hasAVX() || Subtarget->hasSSE1() || Subtarget->hasSSE2())
+      New =
+          CurDAG->getMachineNode(X86::PSADBWrr, dl, Node->getValueType(0), Ops);
+    else
+      New = Node;
+    return New;
+  }
   case ISD::INTRINSIC_W_CHAIN: {
     unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
     switch (IntNo) {
Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp
+++ lib/Target/X86/X86ISelLowering.cpp
@@ -911,6 +911,9 @@
     setOperationAction(ISD::SELECT,             MVT::v2f64, Custom);
     setOperationAction(ISD::SELECT,             MVT::v2i64, Custom);
 
+    setOperationAction(ISD::SAD, MVT::v8i8, Custom);
+    setOperationAction(ISD::SAD, MVT::v16i8, Expand);
+
     setOperationAction(ISD::FP_TO_SINT,         MVT::v4i32, Legal);
     setOperationAction(ISD::SINT_TO_FP,         MVT::v4i32, Legal);
 
@@ -16909,6 +16912,33 @@
   return SDValue();
 }
 
+static SDValue LowerSAD(SDValue Op, const X86Subtarget *Subtarget,
+                        SelectionDAG &DAG) {
+  SDNode *Node = Op.getNode();
+  SDLoc dl(Node);
+  SDValue N0 = Node->getOperand(0);
+  SDValue N1 = Node->getOperand(1);
+  SDValue result;
+
+  if (N0.getValueType() != MVT::v8i8)
+    return SDValue();
+
+  if (Subtarget->hasSSE1() || Subtarget->hasSSE2() || Subtarget->hasAVX()) {
+    EVT VecIdxTy = DAG.getTargetLoweringInfo().getVectorIdxTy();
+    SDValue V0 = DAG.getUNDEF(MVT::v8i8);
+    SDValue V1 = DAG.getUNDEF(MVT::v8i8);
+    SDValue Op0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i8, V0, N0);
+    SDValue Op1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i8, V1, N1);
+    SDValue Ops[] = {Op0, Op1};
+    SDValue SAD = DAG.getNode(ISD::SAD, dl, MVT::v2i64, Ops);
+    SDValue BottomHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, SAD,
+                                     DAG.getConstant(0, VecIdxTy));
+    result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BottomHalf);
+  } else
+    result = SDValue();
+  return result;
+}
+
 static SDValue LowerCTPOP(SDValue Op, const X86Subtarget *Subtarget,
                           SelectionDAG &DAG) {
   SDNode *Node = Op.getNode();
@@ -17247,6 +17277,7 @@
   case ISD::ADD:                return LowerADD(Op, DAG);
   case ISD::SUB:                return LowerSUB(Op, DAG);
   case ISD::FSINCOS:            return LowerFSINCOS(Op, Subtarget, DAG);
+  case ISD::SAD:                return LowerSAD(Op, Subtarget, DAG);
   }
 }
 
Index: lib/Target/X86/X86TargetTransformInfo.h
===================================================================
--- lib/Target/X86/X86TargetTransformInfo.h
+++ lib/Target/X86/X86TargetTransformInfo.h
@@ -103,6 +103,7 @@
                          Type *Ty);
   bool isLegalMaskedLoad(Type *DataType, int Consecutive);
   bool isLegalMaskedStore(Type *DataType, int Consecutive);
+  bool isLegalSad(Type *DataType);
 
   /// @}
 };
Index: lib/Target/X86/X86TargetTransformInfo.cpp
===================================================================
--- lib/Target/X86/X86TargetTransformInfo.cpp
+++ lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1124,3 +1124,36 @@
   return isLegalMaskedLoad(DataType, Consecutive);
 }
 
+bool X86TTIImpl::isLegalSad(Type *DataTy) {
+
+  // Checks if target support SAD instruction
+  if (!DataTy) {
+    if (ST->hasSSE1())
+      return true;
+
+    if (ST->hasSSE2())
+      return true;
+
+    if (ST->hasAVX())
+      return true;
+
+    return false;
+  }
+
+  assert(DataTy->isVectorTy() && "Must be a vector");
+  assert(DataTy->getScalarType()->isIntegerTy() && "Elem must be an integer");
+
+  // Reaching here means target supports SAD, check if the
+  // supported instruction is legal for a given type ...
+  Type *ITy = DataTy->getScalarType();
+  if (ITy != Type::getInt8Ty(DataTy->getContext()))
+    return false;
+
+  // and for size
+  VectorType *Ty = cast<VectorType>(DataTy);
+  int VLen = Ty->getNumElements();
+  if (VLen != 8 && VLen != 16)
+    return false;
+
+  return true;
+}
Index: test/CodeGen/X86/sad_intrinsic.ll
===================================================================
--- /dev/null
+++ test/CodeGen/X86/sad_intrinsic.ll
@@ -0,0 +1,24 @@
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu  -mattr=+avx < %s | FileCheck %s -check-prefix=AVX
+
+; AVX-LABEL: sad_intrinsic_test1
+
+; AVX: psadbw     %xmm1, %xmm0 
+
+
+define i32 @sad_intrinsic_test1(<16 x i8> %a1, <16 x i8> %a2) {
+  %1 = call i32 @llvm.sad.i32.v16i8.v16i8(<16 x i8> %a1, <16 x i8> %a2)
+  ret i32 %1
+}
+
+
+; AVX-LABEL: sad_intrinsic_test2
+
+; AVX: psadbw     %xmm1, %xmm0 
+
+define i32 @sad_intrinsic_test2(<8 x i8> %a1, <8 x i8> %a2) {
+  %1 = call i32 @llvm.sad.i32.v8i8.v8i8(<8 x i8> %a1, <8 x i8> %a2)
+  ret i32 %1
+}
+
+declare i32 @llvm.sad.i32.v8i8.v8i8(<8 x i8>, <8 x i8>)
+declare i32 @llvm.sad.i32.v16i8.v16i8(<16 x i8>, <16 x i8>)