Index: include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- include/llvm/Analysis/TargetTransformInfo.h +++ include/llvm/Analysis/TargetTransformInfo.h @@ -312,6 +312,11 @@ bool isLegalMaskedStore(Type *DataType, int Consecutive) const; bool isLegalMaskedLoad(Type *DataType, int Consecutive) const; + /// \brief Return true if the target supports 'sum of absolute differences' + /// instruction for the given type.Calling this function with NULL argument + /// reportis if the SAD instruction is supported by this target in general. + bool isLegalSad(Type *DataType) const; + /// \brief Return the cost of the scaling factor used in the addressing /// mode represented by AM for this target, for a load/store /// of the specified type. @@ -542,6 +547,7 @@ int64_t Scale) = 0; virtual bool isLegalMaskedStore(Type *DataType, int Consecutive) = 0; virtual bool isLegalMaskedLoad(Type *DataType, int Consecutive) = 0; + virtual bool isLegalSad(Type *DataType) = 0; virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale) = 0; @@ -658,6 +664,7 @@ bool isLegalMaskedLoad(Type *DataType, int Consecutive) override { return Impl.isLegalMaskedLoad(DataType, Consecutive); } + bool isLegalSad(Type *DataType) override { return Impl.isLegalSad(DataType); } int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale) override { return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale); Index: include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- include/llvm/Analysis/TargetTransformInfoImpl.h +++ include/llvm/Analysis/TargetTransformInfoImpl.h @@ -217,6 +217,8 @@ bool isLegalMaskedLoad(Type *DataType, int Consecutive) { return false; } + bool isLegalSad(Type *DataType) { return false; } + int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale) { // Guess that all legal addressing mode are free. Index: include/llvm/CodeGen/ISDOpcodes.h =================================================================== --- include/llvm/CodeGen/ISDOpcodes.h +++ include/llvm/CodeGen/ISDOpcodes.h @@ -325,6 +325,12 @@ /// Byte Swap and Counting operators. BSWAP, CTTZ, CTLZ, CTPOP, + /// SAD - This corresponds to an operation representing 'Sum Of Absolute + /// Differences' of the two input integer vector elements and the reduced sum + /// is returned as integer scalar. + /// This node is generated from llvm.sad intrinsics. + SAD, + /// Bit counting operators with an undefined result for zero inputs. CTTZ_ZERO_UNDEF, CTLZ_ZERO_UNDEF, Index: include/llvm/IR/Intrinsics.td =================================================================== --- include/llvm/IR/Intrinsics.td +++ include/llvm/IR/Intrinsics.td @@ -584,6 +584,11 @@ def int_clear_cache : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty], [], "llvm.clear_cache">; +// Calculate the Sum of Absolute Differences (SAD) of the two input integer +// vectors. +def int_sad : Intrinsic<[llvm_anyint_ty], + [ llvm_anyvector_ty, llvm_anyvector_ty ], [IntrNoMem]>; + //===-------------------------- Masked Intrinsics -------------------------===// // def int_masked_store : Intrinsic<[], [llvm_anyvector_ty, LLVMPointerTo<0>, Index: lib/Analysis/TargetTransformInfo.cpp =================================================================== --- lib/Analysis/TargetTransformInfo.cpp +++ lib/Analysis/TargetTransformInfo.cpp @@ -115,6 +115,10 @@ return TTIImpl->isLegalMaskedLoad(DataType, Consecutive); } +bool TargetTransformInfo::isLegalSad(Type *DataType) const { + return TTIImpl->isLegalSad(DataType); +} + int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -140,6 +140,7 @@ SDValue ExpandBSWAP(SDValue Op, SDLoc dl); SDValue ExpandBitCount(unsigned Opc, SDValue Op, SDLoc dl); + SDValue ExpandSAD(EVT DestVT, SDValue LHS, SDValue RHS, SDLoc dl); SDValue ExpandExtractFromVectorThroughStack(SDValue Op); SDValue ExpandInsertToVectorThroughStack(SDValue Op); @@ -1287,7 +1288,11 @@ return; } break; - + case ISD::SAD: + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getOperand(0).getValueType()); + SimpleFinishLegalizing = false; + break; default: if (Node->getOpcode() >= ISD::BUILTIN_OP_END) { Action = TargetLowering::Legal; @@ -1382,7 +1387,7 @@ dbgs() << "\n"; #endif llvm_unreachable("Do not know how to legalize this operator!"); - + case ISD::SAD: case ISD::CALLSEQ_START: case ISD::CALLSEQ_END: break; @@ -2733,6 +2738,27 @@ } } +SDValue SelectionDAGLegalize::ExpandSAD(EVT DestVT, SDValue LHS, SDValue RHS, + SDLoc DL) { + SDValue result; + switch (LHS.getValueType().getSimpleVT().SimpleTy) { + default: + llvm_unreachable("Unhandled Expand type in SAD"); + case MVT::i16: + SDValue sad = DAG.getNode(ISD::SAD, DL, MVT::v2i64, LHS, RHS); + EVT VecIdxTy = DAG.getTargetLoweringInfo().getVectorIdxTy(); + SDValue BottomHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, sad, + DAG.getConstant(0, VecIdxTy)); + SDValue TopHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, sad, + DAG.getConstant(1, VecIdxTy)); + + BottomHalf = DAG.getNode(ISD::TRUNCATE, DL, DestVT, BottomHalf); + TopHalf = DAG.getNode(ISD::TRUNCATE, DL, DestVT, TopHalf); + result = DAG.getNode(ISD::ADD, DL, DestVT, TopHalf, BottomHalf); + } + return result; +} + /// Expand the specified bitcount instruction into operations. SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, SDLoc dl) { @@ -2854,6 +2880,10 @@ case ISD::BSWAP: Results.push_back(ExpandBSWAP(Node->getOperand(0), dl)); break; + case ISD::SAD: + Results.push_back(ExpandSAD(Node->getValueType(0), Node->getOperand(0), + Node->getOperand(1), dl)); + break; case ISD::FRAMEADDR: case ISD::RETURNADDR: case ISD::FRAME_TO_ARGS_OFFSET: Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4376,6 +4376,13 @@ return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS); } +/// ExpandSad - expand the llvm sad intrinsic. +static SDValue ExpandSad(SDLoc DL, EVT VT, SDValue LHS, SDValue RHS, + SelectionDAG &DAG) { + SDValue result; + result = DAG.getNode(ISD::SAD, DL, VT, LHS, RHS); + return result; +} /// ExpandPowI - Expand a llvm.powi intrinsic. static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS, @@ -5086,6 +5093,11 @@ getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)))); return nullptr; + case Intrinsic::sad: { + setValue(&I, ExpandSad(sdl, TLI.getValueType(I.getType()), getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), DAG)); + return nullptr; + } case Intrinsic::cttz: { SDValue Arg = getValue(I.getArgOperand(0)); ConstantInt *CI = cast(I.getArgOperand(1)); Index: lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- lib/Target/X86/X86ISelDAGToDAG.cpp +++ lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2084,6 +2084,17 @@ switch (Opcode) { default: break; + case ISD::SAD: { + SDNode *New; + SDValue Ops[] = {Node->getOperand(0), Node->getOperand(1)}; + + if (Subtarget->hasAVX() || Subtarget->hasSSE1() || Subtarget->hasSSE2()) + New = + CurDAG->getMachineNode(X86::PSADBWrr, dl, Node->getValueType(0), Ops); + else + New = Node; + return New; + } case ISD::INTRINSIC_W_CHAIN: { unsigned IntNo = cast(Node->getOperand(1))->getZExtValue(); switch (IntNo) { Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -911,6 +911,9 @@ setOperationAction(ISD::SELECT, MVT::v2f64, Custom); setOperationAction(ISD::SELECT, MVT::v2i64, Custom); + setOperationAction(ISD::SAD, MVT::v8i8, Custom); + setOperationAction(ISD::SAD, MVT::v16i8, Expand); + setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); @@ -16909,6 +16912,33 @@ return SDValue(); } +static SDValue LowerSAD(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + SDNode *Node = Op.getNode(); + SDLoc dl(Node); + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + SDValue result; + + if (N0.getValueType() != MVT::v8i8) + return SDValue(); + + if (Subtarget->hasSSE1() || Subtarget->hasSSE2() || Subtarget->hasAVX()) { + EVT VecIdxTy = DAG.getTargetLoweringInfo().getVectorIdxTy(); + SDValue V0 = DAG.getUNDEF(MVT::v8i8); + SDValue V1 = DAG.getUNDEF(MVT::v8i8); + SDValue Op0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i8, V0, N0); + SDValue Op1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i8, V1, N1); + SDValue Ops[] = {Op0, Op1}; + SDValue SAD = DAG.getNode(ISD::SAD, dl, MVT::v2i64, Ops); + SDValue BottomHalf = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, SAD, + DAG.getConstant(0, VecIdxTy)); + result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BottomHalf); + } else + result = SDValue(); + return result; +} + static SDValue LowerCTPOP(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { SDNode *Node = Op.getNode(); @@ -17247,6 +17277,7 @@ case ISD::ADD: return LowerADD(Op, DAG); case ISD::SUB: return LowerSUB(Op, DAG); case ISD::FSINCOS: return LowerFSINCOS(Op, Subtarget, DAG); + case ISD::SAD: return LowerSAD(Op, Subtarget, DAG); } } Index: lib/Target/X86/X86TargetTransformInfo.h =================================================================== --- lib/Target/X86/X86TargetTransformInfo.h +++ lib/Target/X86/X86TargetTransformInfo.h @@ -103,6 +103,7 @@ Type *Ty); bool isLegalMaskedLoad(Type *DataType, int Consecutive); bool isLegalMaskedStore(Type *DataType, int Consecutive); + bool isLegalSad(Type *DataType); /// @} }; Index: lib/Target/X86/X86TargetTransformInfo.cpp =================================================================== --- lib/Target/X86/X86TargetTransformInfo.cpp +++ lib/Target/X86/X86TargetTransformInfo.cpp @@ -1124,3 +1124,36 @@ return isLegalMaskedLoad(DataType, Consecutive); } +bool X86TTIImpl::isLegalSad(Type *DataTy) { + + // Checks if target support SAD instruction + if (!DataTy) { + if (ST->hasSSE1()) + return true; + + if (ST->hasSSE2()) + return true; + + if (ST->hasAVX()) + return true; + + return false; + } + + assert(DataTy->isVectorTy() && "Must be a vector"); + assert(DataTy->getScalarType()->isIntegerTy() && "Elem must be an integer"); + + // Reaching here means target supports SAD, check if the + // supported instruction is legal for a given type ... + Type *ITy = DataTy->getScalarType(); + if (ITy != Type::getInt8Ty(DataTy->getContext())) + return false; + + // and for size + VectorType *Ty = cast(DataTy); + int VLen = Ty->getNumElements(); + if (VLen != 8 && VLen != 16) + return false; + + return true; +} Index: test/CodeGen/X86/sad_intrinsic.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/sad_intrinsic.ll @@ -0,0 +1,24 @@ +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx < %s | FileCheck %s -check-prefix=AVX + +; AVX-LABEL: sad_intrinsic_test1 + +; AVX: psadbw %xmm1, %xmm0 + + +define i32 @sad_intrinsic_test1(<16 x i8> %a1, <16 x i8> %a2) { + %1 = call i32 @llvm.sad.i32.v16i8.v16i8(<16 x i8> %a1, <16 x i8> %a2) + ret i32 %1 +} + + +; AVX-LABEL: sad_intrinsic_test2 + +; AVX: psadbw %xmm1, %xmm0 + +define i32 @sad_intrinsic_test2(<8 x i8> %a1, <8 x i8> %a2) { + %1 = call i32 @llvm.sad.i32.v8i8.v8i8(<8 x i8> %a1, <8 x i8> %a2) + ret i32 %1 +} + +declare i32 @llvm.sad.i32.v8i8.v8i8(<8 x i8>, <8 x i8>) +declare i32 @llvm.sad.i32.v16i8.v16i8(<16 x i8>, <16 x i8>)