Index: llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
===================================================================
--- llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
+++ llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
@@ -114,6 +114,40 @@
   /// \name Generic Target Information
   /// @{
 
+  /// \brief The kind of cost model.
+  ///
+  /// There are several different cost models that can be customized by the
+  /// target. The normalization of each cost model may be target specific.
+  enum TargetCostKind {
+    TCK_RecipThroughput, ///< Reciprocal throughput.
+    TCK_Latency,         ///< The latency of instruction.
+    TCK_CodeSize         ///< Instruction code size.
+  };
+
+  /// \brief Query the cost of a specified instruction.
+  ///
+  /// Clients should use this interface to query the cost of an existing
+  /// instruction. The instruction must have a valid parent (basic block).
+  ///
+  /// Note, this method does not cache the cost calculation and it
+  /// can be expensive in some cases.
+  int getInstructionCost(const Instruction *I, enum TargetCostKind kind) const {
+    switch (kind){
+    case TCK_RecipThroughput:
+      return getInstructionThroughput(I);
+
+    case TCK_Latency:
+      return getInstructionLatency(I);
+
+    case TCK_CodeSize:
+      return getUserCost(I);
+
+    default:
+      llvm_unreachable("Unknown instruction cost kind");
+      return 0;
+    }
+  }
+
   /// \brief Underlying constants for 'cost' values in this interface.
   ///
   /// Many APIs in this interface return a cost. This enum defines the
@@ -868,6 +902,14 @@
   /// @}
 
 private:
+  /// \brief Estimate the latency of specified instruction.
+  /// Returns 1 as the default value.
+  int getInstructionLatency(const Instruction *I) const;
+
+  /// \brief Returns the expected throughput cost of the instruction.
+  /// Returns -1 if the cost is unknown.
+  int getInstructionThroughput(const Instruction *I) const;
+
   /// \brief The abstract base class used to type erase specific TTI
   /// implementations.
   class Concept;
@@ -1044,6 +1086,7 @@
   virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
                                      ReductionFlags) const = 0;
   virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
+  virtual int getInstructionLatency(const Instruction *I) = 0;
 };
 
 template <typename T>
@@ -1406,6 +1449,9 @@
   bool shouldExpandReduction(const IntrinsicInst *II) const override {
     return Impl.shouldExpandReduction(II);
   }
+  int getInstructionLatency(const Instruction *I) override {
+    return Impl.getInstructionLatency(I);
+  }
 };
 
 template <typename T>
Index: llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h
===================================================================
--- llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -771,6 +771,25 @@
         Operator::getOpcode(U), U->getType(),
         U->getNumOperands() == 1 ? U->getOperand(0)->getType() : nullptr);
   }
+
+  int getInstructionLatency(const Instruction *I) {
+    if (isa<PHINode>(I))
+      return 0;
+
+    if (isa<CallInst>(I))
+      return 40;
+
+    if (isa<LoadInst>(I))
+      return 4;
+
+    Type *dstTy = I->getType();
+    if (VectorType *VectorTy = dyn_cast<VectorType>(dstTy))
+      dstTy = VectorTy->getElementType();
+    if (dstTy->isFloatingPointTy())
+      return 3;
+
+    return 1;
+  }
 };
 }
 
Index: llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h
===================================================================
--- llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h
+++ llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h
@@ -350,6 +350,13 @@
     UP.BEInsns = 2;
   }
 
+  int getInstructionLatency(const Instruction *I) {
+    if (isa<LoadInst>(I))
+      return getST()->getSchedModel().DefaultLoadLatency;
+
+    return BaseT::getInstructionLatency(I);
+  }
+
   /// @}
 
   /// \name Vector TTI Implementations
Index: llvm/trunk/lib/Analysis/CostModel.cpp
===================================================================
--- llvm/trunk/lib/Analysis/CostModel.cpp
+++ llvm/trunk/lib/Analysis/CostModel.cpp
@@ -20,26 +20,27 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/PatternMatch.h"
-#include "llvm/IR/Value.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
-using namespace PatternMatch;
+
+static cl::opt<TargetTransformInfo::TargetCostKind> CostKind(
+    "cost-kind", cl::desc("Target cost kind"),
+    cl::init(TargetTransformInfo::TCK_RecipThroughput),
+    cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput,
+                          "throughput", "Reciprocal throughput"),
+               clEnumValN(TargetTransformInfo::TCK_Latency,
+                          "latency", "Instruction latency"),
+               clEnumValN(TargetTransformInfo::TCK_CodeSize,
+                          "code-size", "Code size")));
 
 #define CM_NAME "cost-model"
 #define DEBUG_TYPE CM_NAME
 
-static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false),
-                                     cl::Hidden,
-                                     cl::desc("Recognize reduction patterns."));
-
 namespace {
   class CostModelAnalysis : public FunctionPass {
 
@@ -54,7 +55,9 @@
     /// Returns -1 if the cost is unknown.
     /// Note, this method does not cache the cost calculation and it
     /// can be expensive in some cases.
-    unsigned getInstructionCost(const Instruction *I) const;
+    unsigned getInstructionCost(const Instruction *I) const {
+      return TTI->getInstructionCost(I, TargetTransformInfo::TCK_RecipThroughput);
+    }
 
   private:
     void getAnalysisUsage(AnalysisUsage &AU) const override;
@@ -92,564 +95,13 @@
  return false;
 }
 
-static bool isReverseVectorMask(ArrayRef<int> Mask) {
-  for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i)
-    if (Mask[i] >= 0 && Mask[i] != (int)(MaskSize - 1 - i))
-      return false;
-  return true;
-}
-
-static bool isSingleSourceVectorMask(ArrayRef<int> Mask) {
-  bool Vec0 = false;
-  bool Vec1 = false;
-  for (unsigned i = 0, NumVecElts = Mask.size(); i < NumVecElts; ++i) {
-    if (Mask[i] >= 0) {
-      if ((unsigned)Mask[i] >= NumVecElts)
-        Vec1 = true;
-      else
-        Vec0 = true;
-    }
-  }
-  return !(Vec0 && Vec1);
-}
-
-static bool isZeroEltBroadcastVectorMask(ArrayRef<int> Mask) {
-  for (unsigned i = 0; i < Mask.size(); ++i)
-    if (Mask[i] > 0)
-      return false;
-  return true;
-}
-
-static bool isAlternateVectorMask(ArrayRef<int> Mask) {
-  bool isAlternate = true;
-  unsigned MaskSize = Mask.size();
-
-  // Example: shufflevector A, B, <0,5,2,7>
-  for (unsigned i = 0; i < MaskSize && isAlternate; ++i) {
-    if (Mask[i] < 0)
-      continue;
-    isAlternate = Mask[i] == (int)((i & 1) ? MaskSize + i : i);
-  }
-
-  if (isAlternate)
-    return true;
-
-  isAlternate = true;
-  // Example: shufflevector A, B, <4,1,6,3>
-  for (unsigned i = 0; i < MaskSize && isAlternate; ++i) {
-    if (Mask[i] < 0)
-      continue;
-    isAlternate = Mask[i] == (int)((i & 1) ? i : MaskSize + i);
-  }
-
-  return isAlternate;
-}
-
-static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) {
-  TargetTransformInfo::OperandValueKind OpInfo =
-      TargetTransformInfo::OK_AnyValue;
-
-  // Check for a splat of a constant or for a non uniform vector of constants.
-  if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) {
-    OpInfo = TargetTransformInfo::OK_NonUniformConstantValue;
-    if (cast<Constant>(V)->getSplatValue() != nullptr)
-      OpInfo = TargetTransformInfo::OK_UniformConstantValue;
-  }
-
-  // Check for a splat of a uniform value. This is not loop aware, so return
-  // true only for the obviously uniform cases (argument, globalvalue)
-  const Value *Splat = getSplatValue(V);
-  if (Splat && (isa<Argument>(Splat) || isa<GlobalValue>(Splat)))
-    OpInfo = TargetTransformInfo::OK_UniformValue;
-
-  return OpInfo;
-}
-
-static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft,
-                                     unsigned Level) {
-  // We don't need a shuffle if we just want to have element 0 in position 0 of
-  // the vector.
-  if (!SI && Level == 0 && IsLeft)
-    return true;
-  else if (!SI)
-    return false;
-
-  SmallVector<int, 32> Mask(SI->getType()->getVectorNumElements(), -1);
-
-  // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether
-  // we look at the left or right side.
-  for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2)
-    Mask[i] = val;
-
-  SmallVector<int, 16> ActualMask = SI->getShuffleMask();
-  return Mask == ActualMask;
-}
-
-namespace {
-/// Kind of the reduction data.
-enum ReductionKind {
-  RK_None,           /// Not a reduction.
-  RK_Arithmetic,     /// Binary reduction data.
-  RK_MinMax,         /// Min/max reduction data.
-  RK_UnsignedMinMax, /// Unsigned min/max reduction data.
-};
-/// Contains opcode + LHS/RHS parts of the reduction operations.
-struct ReductionData {
-  ReductionData() = delete;
-  ReductionData(ReductionKind Kind, unsigned Opcode, Value *LHS, Value *RHS)
-      : Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind) {
-    assert(Kind != RK_None && "expected binary or min/max reduction only.");
-  }
-  unsigned Opcode = 0;
-  Value *LHS = nullptr;
-  Value *RHS = nullptr;
-  ReductionKind Kind = RK_None;
-  bool hasSameData(ReductionData &RD) const {
-    return Kind == RD.Kind && Opcode == RD.Opcode;
-  }
-};
-} // namespace
-
-static Optional<ReductionData> getReductionData(Instruction *I) {
-  Value *L, *R;
-  if (m_BinOp(m_Value(L), m_Value(R)).match(I))
-    return ReductionData(RK_Arithmetic, I->getOpcode(), L, R);
-  if (auto *SI = dyn_cast<SelectInst>(I)) {
-    if (m_SMin(m_Value(L), m_Value(R)).match(SI) ||
-        m_SMax(m_Value(L), m_Value(R)).match(SI) ||
-        m_OrdFMin(m_Value(L), m_Value(R)).match(SI) ||
-        m_OrdFMax(m_Value(L), m_Value(R)).match(SI) ||
-        m_UnordFMin(m_Value(L), m_Value(R)).match(SI) ||
-        m_UnordFMax(m_Value(L), m_Value(R)).match(SI)) {
-      auto *CI = cast<CmpInst>(SI->getCondition());
-      return ReductionData(RK_MinMax, CI->getOpcode(), L, R);
-    }
-    if (m_UMin(m_Value(L), m_Value(R)).match(SI) ||
-        m_UMax(m_Value(L), m_Value(R)).match(SI)) {
-      auto *CI = cast<CmpInst>(SI->getCondition());
-      return ReductionData(RK_UnsignedMinMax, CI->getOpcode(), L, R);
-    }
-  }
-  return llvm::None;
-}
-
-static ReductionKind matchPairwiseReductionAtLevel(Instruction *I,
-                                                   unsigned Level,
-                                                   unsigned NumLevels) {
-  // Match one level of pairwise operations.
-  // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
-  //       <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
-  // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
-  //       <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
-  // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
-  if (!I)
-    return RK_None;
-
-  assert(I->getType()->isVectorTy() && "Expecting a vector type");
-
-  Optional<ReductionData> RD = getReductionData(I);
-  if (!RD)
-    return RK_None;
-
-  ShuffleVectorInst *LS = dyn_cast<ShuffleVectorInst>(RD->LHS);
-  if (!LS && Level)
-    return RK_None;
-  ShuffleVectorInst *RS = dyn_cast<ShuffleVectorInst>(RD->RHS);
-  if (!RS && Level)
-    return RK_None;
-
-  // On level 0 we can omit one shufflevector instruction.
-  if (!Level && !RS && !LS)
-    return RK_None;
-
-  // Shuffle inputs must match.
-  Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr;
-  Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr;
-  Value *NextLevelOp = nullptr;
-  if (NextLevelOpR && NextLevelOpL) {
-    // If we have two shuffles their operands must match.
-    if (NextLevelOpL != NextLevelOpR)
-      return RK_None;
-
-    NextLevelOp = NextLevelOpL;
-  } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) {
-    // On the first level we can omit the shufflevector <0, undef,...>. So the
-    // input to the other shufflevector <1, undef> must match with one of the
-    // inputs to the current binary operation.
-    // Example:
-    //  %NextLevelOpL = shufflevector %R, <1, undef ...>
-    //  %BinOp        = fadd          %NextLevelOpL, %R
-    if (NextLevelOpL && NextLevelOpL != RD->RHS)
-      return RK_None;
-    else if (NextLevelOpR && NextLevelOpR != RD->LHS)
-      return RK_None;
-
-    NextLevelOp = NextLevelOpL ? RD->RHS : RD->LHS;
-  } else {
-    return RK_None;
-  }
-
-  // Check that the next levels binary operation exists and matches with the
-  // current one.
-  if (Level + 1 != NumLevels) {
-    Optional<ReductionData> NextLevelRD =
-        getReductionData(cast<Instruction>(NextLevelOp));
-    if (!NextLevelRD || !RD->hasSameData(*NextLevelRD))
-      return RK_None;
-  }
-
-  // Shuffle mask for pairwise operation must match.
-  if (matchPairwiseShuffleMask(LS, /*IsLeft=*/true, Level)) {
-    if (!matchPairwiseShuffleMask(RS, /*IsLeft=*/false, Level))
-      return RK_None;
-  } else if (matchPairwiseShuffleMask(RS, /*IsLeft=*/true, Level)) {
-    if (!matchPairwiseShuffleMask(LS, /*IsLeft=*/false, Level))
-      return RK_None;
-  } else {
-    return RK_None;
-  }
-
-  if (++Level == NumLevels)
-    return RD->Kind;
-
-  // Match next level.
-  return matchPairwiseReductionAtLevel(cast<Instruction>(NextLevelOp), Level,
-                                       NumLevels);
-}
-
-static ReductionKind matchPairwiseReduction(const ExtractElementInst *ReduxRoot,
-                                            unsigned &Opcode, Type *&Ty) {
-  if (!EnableReduxCost)
-    return RK_None;
-
-  // Need to extract the first element.
-  ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
-  unsigned Idx = ~0u;
-  if (CI)
-    Idx = CI->getZExtValue();
-  if (Idx != 0)
-    return RK_None;
-
-  auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0));
-  if (!RdxStart)
-    return RK_None;
-  Optional<ReductionData> RD = getReductionData(RdxStart);
-  if (!RD)
-    return RK_None;
-
-  Type *VecTy = RdxStart->getType();
-  unsigned NumVecElems = VecTy->getVectorNumElements();
-  if (!isPowerOf2_32(NumVecElems))
-    return RK_None;
-
-  // We look for a sequence of shuffle,shuffle,add triples like the following
-  // that builds a pairwise reduction tree.
-  //
-  //  (X0, X1, X2, X3)
-  //   (X0 + X1, X2 + X3, undef, undef)
-  //    ((X0 + X1) + (X2 + X3), undef, undef, undef)
-  //
-  // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
-  //       <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
-  // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
-  //       <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
-  // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
-  // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
-  //       <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
-  // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
-  //       <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
-  // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
-  // %r = extractelement <4 x float> %bin.rdx8, i32 0
-  if (matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems)) ==
-      RK_None)
-    return RK_None;
-
-  Opcode = RD->Opcode;
-  Ty = VecTy;
-
-  return RD->Kind;
-}
-
-static std::pair<Value *, ShuffleVectorInst *>
-getShuffleAndOtherOprd(Value *L, Value *R) {
-  ShuffleVectorInst *S = nullptr;
-
-  if ((S = dyn_cast<ShuffleVectorInst>(L)))
-    return std::make_pair(R, S);
-
-  S = dyn_cast<ShuffleVectorInst>(R);
-  return std::make_pair(L, S);
-}
-
-static ReductionKind
-matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot,
-                              unsigned &Opcode, Type *&Ty) {
-  if (!EnableReduxCost)
-    return RK_None;
-
-  // Need to extract the first element.
-  ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
-  unsigned Idx = ~0u;
-  if (CI)
-    Idx = CI->getZExtValue();
-  if (Idx != 0)
-    return RK_None;
-
-  auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0));
-  if (!RdxStart)
-    return RK_None;
-  Optional<ReductionData> RD = getReductionData(RdxStart);
-  if (!RD)
-    return RK_None;
-
-  Type *VecTy = ReduxRoot->getOperand(0)->getType();
-  unsigned NumVecElems = VecTy->getVectorNumElements();
-  if (!isPowerOf2_32(NumVecElems))
-    return RK_None;
-
-  // We look for a sequence of shuffles and adds like the following matching one
-  // fadd, shuffle vector pair at a time.
-  //
-  // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef,
-  //                           <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
-  // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
-  // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef,
-  //                          <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
-  // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
-  // %r = extractelement <4 x float> %bin.rdx8, i32 0
-
-  unsigned MaskStart = 1;
-  Instruction *RdxOp = RdxStart;
-  SmallVector<int, 32> ShuffleMask(NumVecElems, 0);
-  unsigned NumVecElemsRemain = NumVecElems;
-  while (NumVecElemsRemain - 1) {
-    // Check for the right reduction operation.
-    if (!RdxOp)
-      return RK_None;
-    Optional<ReductionData> RDLevel = getReductionData(RdxOp);
-    if (!RDLevel || !RDLevel->hasSameData(*RD))
-      return RK_None;
-
-    Value *NextRdxOp;
-    ShuffleVectorInst *Shuffle;
-    std::tie(NextRdxOp, Shuffle) =
-        getShuffleAndOtherOprd(RDLevel->LHS, RDLevel->RHS);
-
-    // Check the current reduction operation and the shuffle use the same value.
-    if (Shuffle == nullptr)
-      return RK_None;
-    if (Shuffle->getOperand(0) != NextRdxOp)
-      return RK_None;
-
-    // Check that shuffle masks matches.
-    for (unsigned j = 0; j != MaskStart; ++j)
-      ShuffleMask[j] = MaskStart + j;
-    // Fill the rest of the mask with -1 for undef.
-    std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1);
-
-    SmallVector<int, 16> Mask = Shuffle->getShuffleMask();
-    if (ShuffleMask != Mask)
-      return RK_None;
-
-    RdxOp = dyn_cast<Instruction>(NextRdxOp);
-    NumVecElemsRemain /= 2;
-    MaskStart *= 2;
-  }
-
-  Opcode = RD->Opcode;
-  Ty = VecTy;
-  return RD->Kind;
-}
-
-unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
-  if (!TTI)
-    return -1;
-
-  switch (I->getOpcode()) {
-  case Instruction::GetElementPtr:
-    return TTI->getUserCost(I);
-
-  case Instruction::Ret:
-  case Instruction::PHI:
-  case Instruction::Br: {
-    return TTI->getCFInstrCost(I->getOpcode());
-  }
-  case Instruction::Add:
-  case Instruction::FAdd:
-  case Instruction::Sub:
-  case Instruction::FSub:
-  case Instruction::Mul:
-  case Instruction::FMul:
-  case Instruction::UDiv:
-  case Instruction::SDiv:
-  case Instruction::FDiv:
-  case Instruction::URem:
-  case Instruction::SRem:
-  case Instruction::FRem:
-  case Instruction::Shl:
-  case Instruction::LShr:
-  case Instruction::AShr:
-  case Instruction::And:
-  case Instruction::Or:
-  case Instruction::Xor: {
-    TargetTransformInfo::OperandValueKind Op1VK =
-      getOperandInfo(I->getOperand(0));
-    TargetTransformInfo::OperandValueKind Op2VK =
-      getOperandInfo(I->getOperand(1));
-    SmallVector<const Value*, 2> Operands(I->operand_values()); 
-    return TTI->getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK,
-                                       Op2VK, TargetTransformInfo::OP_None, 
-                                       TargetTransformInfo::OP_None, 
-                                       Operands);
-  }
-  case Instruction::Select: {
-    const SelectInst *SI = cast<SelectInst>(I);
-    Type *CondTy = SI->getCondition()->getType();
-    return TTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy, I);
-  }
-  case Instruction::ICmp:
-  case Instruction::FCmp: {
-    Type *ValTy = I->getOperand(0)->getType();
-    return TTI->getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(), I);
-  }
-  case Instruction::Store: {
-    const StoreInst *SI = cast<StoreInst>(I);
-    Type *ValTy = SI->getValueOperand()->getType();
-    return TTI->getMemoryOpCost(I->getOpcode(), ValTy,
-                                SI->getAlignment(),
-                                SI->getPointerAddressSpace(), I);
-  }
-  case Instruction::Load: {
-    const LoadInst *LI = cast<LoadInst>(I);
-    return TTI->getMemoryOpCost(I->getOpcode(), I->getType(),
-                                LI->getAlignment(),
-                                LI->getPointerAddressSpace(), I);
-  }
-  case Instruction::ZExt:
-  case Instruction::SExt:
-  case Instruction::FPToUI:
-  case Instruction::FPToSI:
-  case Instruction::FPExt:
-  case Instruction::PtrToInt:
-  case Instruction::IntToPtr:
-  case Instruction::SIToFP:
-  case Instruction::UIToFP:
-  case Instruction::Trunc:
-  case Instruction::FPTrunc:
-  case Instruction::BitCast:
-  case Instruction::AddrSpaceCast: {
-    Type *SrcTy = I->getOperand(0)->getType();
-    return TTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, I);
-  }
-  case Instruction::ExtractElement: {
-    const ExtractElementInst * EEI = cast<ExtractElementInst>(I);
-    ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
-    unsigned Idx = -1;
-    if (CI)
-      Idx = CI->getZExtValue();
-
-    // Try to match a reduction sequence (series of shufflevector and vector
-    // adds followed by a extractelement).
-    unsigned ReduxOpCode;
-    Type *ReduxType;
-
-    switch (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) {
-    case RK_Arithmetic:
-      return TTI->getArithmeticReductionCost(ReduxOpCode, ReduxType,
-                                             /*IsPairwiseForm=*/false);
-    case RK_MinMax:
-      return TTI->getMinMaxReductionCost(
-          ReduxType, CmpInst::makeCmpResultType(ReduxType),
-          /*IsPairwiseForm=*/false, /*IsUnsigned=*/false);
-    case RK_UnsignedMinMax:
-      return TTI->getMinMaxReductionCost(
-          ReduxType, CmpInst::makeCmpResultType(ReduxType),
-          /*IsPairwiseForm=*/false, /*IsUnsigned=*/true);
-    case RK_None:
-      break;
-    }
-
-    switch (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) {
-    case RK_Arithmetic:
-      return TTI->getArithmeticReductionCost(ReduxOpCode, ReduxType,
-                                             /*IsPairwiseForm=*/true);
-    case RK_MinMax:
-      return TTI->getMinMaxReductionCost(
-          ReduxType, CmpInst::makeCmpResultType(ReduxType),
-          /*IsPairwiseForm=*/true, /*IsUnsigned=*/false);
-    case RK_UnsignedMinMax:
-      return TTI->getMinMaxReductionCost(
-          ReduxType, CmpInst::makeCmpResultType(ReduxType),
-          /*IsPairwiseForm=*/true, /*IsUnsigned=*/true);
-    case RK_None:
-      break;
-    }
-
-    return TTI->getVectorInstrCost(I->getOpcode(),
-                                   EEI->getOperand(0)->getType(), Idx);
-  }
-  case Instruction::InsertElement: {
-    const InsertElementInst * IE = cast<InsertElementInst>(I);
-    ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
-    unsigned Idx = -1;
-    if (CI)
-      Idx = CI->getZExtValue();
-    return TTI->getVectorInstrCost(I->getOpcode(),
-                                   IE->getType(), Idx);
-  }
-  case Instruction::ShuffleVector: {
-    const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
-    Type *VecTypOp0 = Shuffle->getOperand(0)->getType();
-    unsigned NumVecElems = VecTypOp0->getVectorNumElements();
-    SmallVector<int, 16> Mask = Shuffle->getShuffleMask();
-
-    if (NumVecElems == Mask.size()) {
-      if (isReverseVectorMask(Mask))
-        return TTI->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0,
-                                   0, nullptr);
-      if (isAlternateVectorMask(Mask))
-        return TTI->getShuffleCost(TargetTransformInfo::SK_Alternate,
-                                   VecTypOp0, 0, nullptr);
-
-      if (isZeroEltBroadcastVectorMask(Mask))
-        return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast,
-                                   VecTypOp0, 0, nullptr);
-
-      if (isSingleSourceVectorMask(Mask))
-        return TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
-                                   VecTypOp0, 0, nullptr);
-
-      return TTI->getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc,
-                                 VecTypOp0, 0, nullptr);
-    }
-
-    return -1;
-  }
-  case Instruction::Call:
-    if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
-      SmallVector<Value *, 4> Args(II->arg_operands());
-
-      FastMathFlags FMF;
-      if (auto *FPMO = dyn_cast<FPMathOperator>(II))
-        FMF = FPMO->getFastMathFlags();
-
-      return TTI->getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(),
-                                        Args, FMF);
-    }
-    return -1;
-  default:
-    // We don't have any information on this instruction.
-    return -1;
-  }
-}
-
 void CostModelAnalysis::print(raw_ostream &OS, const Module*) const {
   if (!F)
     return;
 
   for (BasicBlock &B : *F) {
     for (Instruction &Inst : B) {
-      unsigned Cost = getInstructionCost(&Inst);
+      unsigned Cost = TTI->getInstructionCost(&Inst, CostKind);
       if (Cost != (unsigned)-1)
         OS << "Cost Model: Found an estimated cost of " << Cost;
       else
Index: llvm/trunk/lib/Analysis/TargetTransformInfo.cpp
===================================================================
--- llvm/trunk/lib/Analysis/TargetTransformInfo.cpp
+++ llvm/trunk/lib/Analysis/TargetTransformInfo.cpp
@@ -16,11 +16,13 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <utility>
 
 using namespace llvm;
+using namespace PatternMatch;
 
 #define DEBUG_TYPE "tti"
 
@@ -29,6 +31,10 @@
     cl::desc("Enables the new wide memcpy loop lowering in Transforms/Utils."),
     cl::Hidden);
 
+static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false),
+                                     cl::Hidden,
+                                     cl::desc("Recognize reduction patterns."));
+
 namespace {
 /// \brief No-op implementation of the TTI interface using the utility base
 /// classes.
@@ -583,6 +589,557 @@
   return TTIImpl->shouldExpandReduction(II);
 }
 
+int TargetTransformInfo::getInstructionLatency(const Instruction *I) const {
+  return TTIImpl->getInstructionLatency(I);
+}
+
+static bool isReverseVectorMask(ArrayRef<int> Mask) {
+  for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i)
+    if (Mask[i] >= 0 && Mask[i] != (int)(MaskSize - 1 - i))
+      return false;
+  return true;
+}
+
+static bool isSingleSourceVectorMask(ArrayRef<int> Mask) {
+  bool Vec0 = false;
+  bool Vec1 = false;
+  for (unsigned i = 0, NumVecElts = Mask.size(); i < NumVecElts; ++i) {
+    if (Mask[i] >= 0) {
+      if ((unsigned)Mask[i] >= NumVecElts)
+        Vec1 = true;
+      else
+        Vec0 = true;
+    }
+  }
+  return !(Vec0 && Vec1);
+}
+
+static bool isZeroEltBroadcastVectorMask(ArrayRef<int> Mask) {
+  for (unsigned i = 0; i < Mask.size(); ++i)
+    if (Mask[i] > 0)
+      return false;
+  return true;
+}
+
+static bool isAlternateVectorMask(ArrayRef<int> Mask) {
+  bool isAlternate = true;
+  unsigned MaskSize = Mask.size();
+
+  // Example: shufflevector A, B, <0,5,2,7>
+  for (unsigned i = 0; i < MaskSize && isAlternate; ++i) {
+    if (Mask[i] < 0)
+      continue;
+    isAlternate = Mask[i] == (int)((i & 1) ? MaskSize + i : i);
+  }
+
+  if (isAlternate)
+    return true;
+
+  isAlternate = true;
+  // Example: shufflevector A, B, <4,1,6,3>
+  for (unsigned i = 0; i < MaskSize && isAlternate; ++i) {
+    if (Mask[i] < 0)
+      continue;
+    isAlternate = Mask[i] == (int)((i & 1) ? i : MaskSize + i);
+  }
+
+  return isAlternate;
+}
+
+static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) {
+  TargetTransformInfo::OperandValueKind OpInfo =
+      TargetTransformInfo::OK_AnyValue;
+
+  // Check for a splat of a constant or for a non uniform vector of constants.
+  if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) {
+    OpInfo = TargetTransformInfo::OK_NonUniformConstantValue;
+    if (cast<Constant>(V)->getSplatValue() != nullptr)
+      OpInfo = TargetTransformInfo::OK_UniformConstantValue;
+  }
+
+  // Check for a splat of a uniform value. This is not loop aware, so return
+  // true only for the obviously uniform cases (argument, globalvalue)
+  const Value *Splat = getSplatValue(V);
+  if (Splat && (isa<Argument>(Splat) || isa<GlobalValue>(Splat)))
+    OpInfo = TargetTransformInfo::OK_UniformValue;
+
+  return OpInfo;
+}
+
+static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft,
+                                     unsigned Level) {
+  // We don't need a shuffle if we just want to have element 0 in position 0 of
+  // the vector.
+  if (!SI && Level == 0 && IsLeft)
+    return true;
+  else if (!SI)
+    return false;
+
+  SmallVector<int, 32> Mask(SI->getType()->getVectorNumElements(), -1);
+
+  // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether
+  // we look at the left or right side.
+  for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2)
+    Mask[i] = val;
+
+  SmallVector<int, 16> ActualMask = SI->getShuffleMask();
+  return Mask == ActualMask;
+}
+
+namespace {
+/// Kind of the reduction data.
+enum ReductionKind {
+  RK_None,           /// Not a reduction.
+  RK_Arithmetic,     /// Binary reduction data.
+  RK_MinMax,         /// Min/max reduction data.
+  RK_UnsignedMinMax, /// Unsigned min/max reduction data.
+};
+/// Contains opcode + LHS/RHS parts of the reduction operations.
+struct ReductionData {
+  ReductionData() = delete;
+  ReductionData(ReductionKind Kind, unsigned Opcode, Value *LHS, Value *RHS)
+      : Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind) {
+    assert(Kind != RK_None && "expected binary or min/max reduction only.");
+  }
+  unsigned Opcode = 0;
+  Value *LHS = nullptr;
+  Value *RHS = nullptr;
+  ReductionKind Kind = RK_None;
+  bool hasSameData(ReductionData &RD) const {
+    return Kind == RD.Kind && Opcode == RD.Opcode;
+  }
+};
+} // namespace
+
+static Optional<ReductionData> getReductionData(Instruction *I) {
+  Value *L, *R;
+  if (m_BinOp(m_Value(L), m_Value(R)).match(I))
+    return ReductionData(RK_Arithmetic, I->getOpcode(), L, R); 
+  if (auto *SI = dyn_cast<SelectInst>(I)) {
+    if (m_SMin(m_Value(L), m_Value(R)).match(SI) ||
+        m_SMax(m_Value(L), m_Value(R)).match(SI) ||
+        m_OrdFMin(m_Value(L), m_Value(R)).match(SI) ||
+        m_OrdFMax(m_Value(L), m_Value(R)).match(SI) ||
+        m_UnordFMin(m_Value(L), m_Value(R)).match(SI) ||
+        m_UnordFMax(m_Value(L), m_Value(R)).match(SI)) {
+      auto *CI = cast<CmpInst>(SI->getCondition());
+      return ReductionData(RK_MinMax, CI->getOpcode(), L, R); 
+    }   
+    if (m_UMin(m_Value(L), m_Value(R)).match(SI) ||
+        m_UMax(m_Value(L), m_Value(R)).match(SI)) {
+      auto *CI = cast<CmpInst>(SI->getCondition());
+      return ReductionData(RK_UnsignedMinMax, CI->getOpcode(), L, R);
+    }
+  }
+  return llvm::None;
+}
+
+static ReductionKind matchPairwiseReductionAtLevel(Instruction *I,
+                                                   unsigned Level,
+                                                   unsigned NumLevels) {
+  // Match one level of pairwise operations.
+  // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
+  //       <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
+  // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
+  //       <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
+  // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
+  if (!I)
+    return RK_None;
+
+  assert(I->getType()->isVectorTy() && "Expecting a vector type");
+
+  Optional<ReductionData> RD = getReductionData(I);
+  if (!RD)
+    return RK_None;
+
+  ShuffleVectorInst *LS = dyn_cast<ShuffleVectorInst>(RD->LHS);
+  if (!LS && Level)
+    return RK_None;
+  ShuffleVectorInst *RS = dyn_cast<ShuffleVectorInst>(RD->RHS);
+  if (!RS && Level)
+    return RK_None;
+
+  // On level 0 we can omit one shufflevector instruction.
+  if (!Level && !RS && !LS)
+    return RK_None;
+
+  // Shuffle inputs must match.
+  Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr;
+  Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr;
+  Value *NextLevelOp = nullptr;
+  if (NextLevelOpR && NextLevelOpL) {
+    // If we have two shuffles their operands must match.
+    if (NextLevelOpL != NextLevelOpR)
+      return RK_None;
+
+    NextLevelOp = NextLevelOpL;
+  } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) {
+    // On the first level we can omit the shufflevector <0, undef,...>. So the
+    // input to the other shufflevector <1, undef> must match with one of the
+    // inputs to the current binary operation.
+    // Example:
+    //  %NextLevelOpL = shufflevector %R, <1, undef ...>
+    //  %BinOp        = fadd          %NextLevelOpL, %R
+    if (NextLevelOpL && NextLevelOpL != RD->RHS)
+      return RK_None;
+    else if (NextLevelOpR && NextLevelOpR != RD->LHS)
+      return RK_None;
+
+    NextLevelOp = NextLevelOpL ? RD->RHS : RD->LHS;
+  } else
+    return RK_None;
+
+  // Check that the next levels binary operation exists and matches with the
+  // current one.
+  if (Level + 1 != NumLevels) {
+    Optional<ReductionData> NextLevelRD =
+        getReductionData(cast<Instruction>(NextLevelOp));
+    if (!NextLevelRD || !RD->hasSameData(*NextLevelRD))
+      return RK_None;
+  }
+
+  // Shuffle mask for pairwise operation must match.
+  if (matchPairwiseShuffleMask(LS, /*IsLeft=*/true, Level)) {
+    if (!matchPairwiseShuffleMask(RS, /*IsLeft=*/false, Level))
+      return RK_None;
+  } else if (matchPairwiseShuffleMask(RS, /*IsLeft=*/true, Level)) {
+    if (!matchPairwiseShuffleMask(LS, /*IsLeft=*/false, Level))
+      return RK_None;
+  } else {
+    return RK_None;
+  }
+
+  if (++Level == NumLevels)
+    return RD->Kind;
+
+  // Match next level.
+  return matchPairwiseReductionAtLevel(cast<Instruction>(NextLevelOp), Level,
+                                       NumLevels);
+}
+
+static ReductionKind matchPairwiseReduction(const ExtractElementInst *ReduxRoot,
+                                            unsigned &Opcode, Type *&Ty) {
+  if (!EnableReduxCost)
+    return RK_None;
+
+  // Need to extract the first element.
+  ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
+  unsigned Idx = ~0u;
+  if (CI)
+    Idx = CI->getZExtValue();
+  if (Idx != 0)
+    return RK_None;
+
+  auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0));
+  if (!RdxStart)
+    return RK_None;
+  Optional<ReductionData> RD = getReductionData(RdxStart);
+  if (!RD)
+    return RK_None;
+
+  Type *VecTy = RdxStart->getType();
+  unsigned NumVecElems = VecTy->getVectorNumElements();
+  if (!isPowerOf2_32(NumVecElems))
+    return RK_None;
+
+  // We look for a sequence of shuffle,shuffle,add triples like the following
+  // that builds a pairwise reduction tree.
+  //  
+  //  (X0, X1, X2, X3)
+  //   (X0 + X1, X2 + X3, undef, undef)
+  //    ((X0 + X1) + (X2 + X3), undef, undef, undef)
+  //  
+  // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
+  //       <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
+  // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
+  //       <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
+  // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
+  // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
+  //       <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+  // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
+  //       <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
+  // %r = extractelement <4 x float> %bin.rdx8, i32 0
+  if (matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems)) ==
+      RK_None)
+    return RK_None;
+
+  Opcode = RD->Opcode;
+  Ty = VecTy;
+
+  return RD->Kind;
+}
+
+static std::pair<Value *, ShuffleVectorInst *>
+getShuffleAndOtherOprd(Value *L, Value *R) {
+  ShuffleVectorInst *S = nullptr;
+
+  if ((S = dyn_cast<ShuffleVectorInst>(L)))
+    return std::make_pair(R, S);
+
+  S = dyn_cast<ShuffleVectorInst>(R);
+  return std::make_pair(L, S);
+}
+
+static ReductionKind
+matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot,
+                              unsigned &Opcode, Type *&Ty) {
+  if (!EnableReduxCost)
+    return RK_None;
+
+  // Need to extract the first element.
+  ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
+  unsigned Idx = ~0u;
+  if (CI)
+    Idx = CI->getZExtValue();
+  if (Idx != 0)
+    return RK_None;
+
+  auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0));
+  if (!RdxStart)
+    return RK_None;
+  Optional<ReductionData> RD = getReductionData(RdxStart);
+  if (!RD)
+    return RK_None;
+
+  Type *VecTy = ReduxRoot->getOperand(0)->getType();
+  unsigned NumVecElems = VecTy->getVectorNumElements();
+  if (!isPowerOf2_32(NumVecElems))
+    return RK_None;
+
+  // We look for a sequence of shuffles and adds like the following matching one
+  // fadd, shuffle vector pair at a time.
+  //  
+  // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef,
+  //                           <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
+  // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef,
+  //                          <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
+  // %r = extractelement <4 x float> %bin.rdx8, i32 0
+
+  unsigned MaskStart = 1;
+  Instruction *RdxOp = RdxStart;
+  SmallVector<int, 32> ShuffleMask(NumVecElems, 0); 
+  unsigned NumVecElemsRemain = NumVecElems;
+  while (NumVecElemsRemain - 1) {
+    // Check for the right reduction operation.
+    if (!RdxOp)
+      return RK_None;
+    Optional<ReductionData> RDLevel = getReductionData(RdxOp);
+    if (!RDLevel || !RDLevel->hasSameData(*RD))
+      return RK_None;
+
+    Value *NextRdxOp;
+    ShuffleVectorInst *Shuffle;
+    std::tie(NextRdxOp, Shuffle) =
+        getShuffleAndOtherOprd(RDLevel->LHS, RDLevel->RHS);
+
+    // Check the current reduction operation and the shuffle use the same value.
+    if (Shuffle == nullptr)
+      return RK_None;
+    if (Shuffle->getOperand(0) != NextRdxOp)
+      return RK_None;
+
+    // Check that shuffle masks matches.
+    for (unsigned j = 0; j != MaskStart; ++j)
+      ShuffleMask[j] = MaskStart + j;
+    // Fill the rest of the mask with -1 for undef.
+    std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1);
+
+    SmallVector<int, 16> Mask = Shuffle->getShuffleMask();
+    if (ShuffleMask != Mask)
+      return RK_None;
+
+    RdxOp = dyn_cast<Instruction>(NextRdxOp);
+    NumVecElemsRemain /= 2;
+    MaskStart *= 2;
+  }
+
+  Opcode = RD->Opcode;
+  Ty = VecTy;
+  return RD->Kind;
+}
+
+int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
+  switch (I->getOpcode()) {
+  case Instruction::GetElementPtr:
+    return getUserCost(I);
+
+  case Instruction::Ret:
+  case Instruction::PHI:
+  case Instruction::Br: {
+    return getCFInstrCost(I->getOpcode());
+  }
+  case Instruction::Add:
+  case Instruction::FAdd:
+  case Instruction::Sub:
+  case Instruction::FSub:
+  case Instruction::Mul:
+  case Instruction::FMul:
+  case Instruction::UDiv:
+  case Instruction::SDiv:
+  case Instruction::FDiv:
+  case Instruction::URem:
+  case Instruction::SRem:
+  case Instruction::FRem:
+  case Instruction::Shl:
+  case Instruction::LShr:
+  case Instruction::AShr:
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor: {
+    TargetTransformInfo::OperandValueKind Op1VK =
+      getOperandInfo(I->getOperand(0));
+    TargetTransformInfo::OperandValueKind Op2VK =
+      getOperandInfo(I->getOperand(1));
+    SmallVector<const Value*, 2> Operands(I->operand_values());
+    return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK,
+                                       Op2VK, TargetTransformInfo::OP_None,
+                                       TargetTransformInfo::OP_None,
+                                       Operands);
+  }
+  case Instruction::Select: {
+    const SelectInst *SI = cast<SelectInst>(I);
+    Type *CondTy = SI->getCondition()->getType();
+    return getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy, I);
+  }
+  case Instruction::ICmp:
+  case Instruction::FCmp: {
+    Type *ValTy = I->getOperand(0)->getType();
+    return getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(), I);
+  }
+  case Instruction::Store: {
+    const StoreInst *SI = cast<StoreInst>(I);
+    Type *ValTy = SI->getValueOperand()->getType();
+    return getMemoryOpCost(I->getOpcode(), ValTy,
+                                SI->getAlignment(),
+                                SI->getPointerAddressSpace(), I);
+  }
+  case Instruction::Load: {
+    const LoadInst *LI = cast<LoadInst>(I);
+    return getMemoryOpCost(I->getOpcode(), I->getType(),
+                                LI->getAlignment(),
+                                LI->getPointerAddressSpace(), I);
+  }
+  case Instruction::ZExt:
+  case Instruction::SExt:
+  case Instruction::FPToUI:
+  case Instruction::FPToSI:
+  case Instruction::FPExt:
+  case Instruction::PtrToInt:
+  case Instruction::IntToPtr:
+  case Instruction::SIToFP:
+  case Instruction::UIToFP:
+  case Instruction::Trunc:
+  case Instruction::FPTrunc:
+  case Instruction::BitCast:
+  case Instruction::AddrSpaceCast: {
+    Type *SrcTy = I->getOperand(0)->getType();
+    return getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, I);
+  }
+  case Instruction::ExtractElement: {
+    const ExtractElementInst * EEI = cast<ExtractElementInst>(I);
+    ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
+    unsigned Idx = -1;
+    if (CI)
+      Idx = CI->getZExtValue();
+
+    // Try to match a reduction sequence (series of shufflevector and vector
+    // adds followed by a extractelement).
+    unsigned ReduxOpCode;
+    Type *ReduxType;
+
+    switch (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) {
+    case RK_Arithmetic:
+      return getArithmeticReductionCost(ReduxOpCode, ReduxType,
+                                             /*IsPairwiseForm=*/false);
+    case RK_MinMax:
+      return getMinMaxReductionCost(
+          ReduxType, CmpInst::makeCmpResultType(ReduxType),
+          /*IsPairwiseForm=*/false, /*IsUnsigned=*/false);
+    case RK_UnsignedMinMax:
+      return getMinMaxReductionCost(
+          ReduxType, CmpInst::makeCmpResultType(ReduxType),
+          /*IsPairwiseForm=*/false, /*IsUnsigned=*/true);
+    case RK_None:
+      break;
+    }
+
+    switch (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) {
+    case RK_Arithmetic:
+      return getArithmeticReductionCost(ReduxOpCode, ReduxType,
+                                             /*IsPairwiseForm=*/true);
+    case RK_MinMax:
+      return getMinMaxReductionCost(
+          ReduxType, CmpInst::makeCmpResultType(ReduxType),
+          /*IsPairwiseForm=*/true, /*IsUnsigned=*/false);
+    case RK_UnsignedMinMax:
+      return getMinMaxReductionCost(
+          ReduxType, CmpInst::makeCmpResultType(ReduxType),
+          /*IsPairwiseForm=*/true, /*IsUnsigned=*/true);
+    case RK_None:
+      break;
+    }
+
+    return getVectorInstrCost(I->getOpcode(),
+                                   EEI->getOperand(0)->getType(), Idx);
+  }
+  case Instruction::InsertElement: {
+    const InsertElementInst * IE = cast<InsertElementInst>(I);
+    ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
+    unsigned Idx = -1; 
+    if (CI)
+      Idx = CI->getZExtValue();
+    return getVectorInstrCost(I->getOpcode(),
+                                   IE->getType(), Idx);
+  }
+  case Instruction::ShuffleVector: {
+    const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
+    Type *VecTypOp0 = Shuffle->getOperand(0)->getType();
+    unsigned NumVecElems = VecTypOp0->getVectorNumElements();
+    SmallVector<int, 16> Mask = Shuffle->getShuffleMask();
+
+    if (NumVecElems == Mask.size()) {
+      if (isReverseVectorMask(Mask))
+        return getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0,
+                                   0, nullptr);
+      if (isAlternateVectorMask(Mask))
+        return getShuffleCost(TargetTransformInfo::SK_Alternate,
+                                   VecTypOp0, 0, nullptr);
+
+      if (isZeroEltBroadcastVectorMask(Mask))
+        return getShuffleCost(TargetTransformInfo::SK_Broadcast,
+                                   VecTypOp0, 0, nullptr);
+
+      if (isSingleSourceVectorMask(Mask))
+        return getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
+                                   VecTypOp0, 0, nullptr);
+
+      return getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc,
+                                 VecTypOp0, 0, nullptr);
+    }
+
+    return -1;
+  }
+  case Instruction::Call:
+    if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+      SmallVector<Value *, 4> Args(II->arg_operands());
+
+      FastMathFlags FMF;
+      if (auto *FPMO = dyn_cast<FPMathOperator>(II))
+        FMF = FPMO->getFastMathFlags();
+
+      return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(),
+                                        Args, FMF);
+    }
+    return -1;
+  default:
+    // We don't have any information on this instruction.
+    return -1;
+  }
+}
+
 TargetTransformInfo::Concept::~Concept() {}
 
 TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
Index: llvm/trunk/test/Analysis/CostModel/X86/costmodel.ll
===================================================================
--- llvm/trunk/test/Analysis/CostModel/X86/costmodel.ll
+++ llvm/trunk/test/Analysis/CostModel/X86/costmodel.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -cost-model -cost-kind=latency -analyze -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s --check-prefix=LATENCY
+; RUN: opt < %s -cost-model -cost-kind=code-size -analyze -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s --check-prefix=CODESIZE
+
+; Tests if the interface TargetTransformInfo::getInstructionCost() works correctly.
+
+define i64 @foo(i64 %arg) {
+
+  ; LATENCY:  cost of 1 {{.*}} %I64 = add
+  ; CODESIZE: cost of 1 {{.*}} %I64 = add
+  %I64 = add i64 undef, undef
+
+  ; LATENCY:  cost of 4 {{.*}} load
+  ; CODESIZE: cost of 1 {{.*}} load
+  load i64, i64* undef, align 4
+
+  ; LATENCY:  cost of 1 {{.*}} ret
+  ; CODESIZE: cost of 1 {{.*}} ret
+  ret i64 undef
+}