Index: llvm/include/llvm/Analysis/TargetTransformInfo.h
===================================================================
--- llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -38,6 +38,7 @@
 class BlockFrequencyInfo;
 class DominatorTree;
 class BranchInst;
+class ExtractElementInst;
 class Function;
 class GlobalValue;
 class IntrinsicInst;
@@ -48,6 +49,7 @@
 class ProfileSummaryInfo;
 class SCEV;
 class ScalarEvolution;
+class ShuffleVectorInst;
 class StoreInst;
 class SwitchInst;
 class TargetLibraryInfo;
@@ -160,28 +162,6 @@
     TCK_SizeAndLatency   ///< The weighted sum of size and latency.
   };
 
-  /// Query the cost of a specified instruction.
-  ///
-  /// Clients should use this interface to query the cost of an existing
-  /// instruction. The instruction must have a valid parent (basic block).
-  ///
-  /// Note, this method does not cache the cost calculation and it
-  /// can be expensive in some cases.
-  int getInstructionCost(const Instruction *I, enum TargetCostKind kind) const {
-    switch (kind) {
-    case TCK_RecipThroughput:
-      return getInstructionThroughput(I);
-
-    case TCK_Latency:
-      return getInstructionLatency(I);
-
-    case TCK_CodeSize:
-    case TCK_SizeAndLatency:
-      return getUserCost(I, kind);
-    }
-    llvm_unreachable("Unknown instruction cost kind");
-  }
-
   /// Underlying constants for 'cost' values in this interface.
   ///
   /// Many APIs in this interface return a cost. This enum defines the
@@ -362,6 +342,39 @@
     unsigned ScaleCost;
   };
 
+  /// Kind of the reduction data.
+  enum ReductionKind {
+    RK_None,           /// Not a reduction.
+    RK_Arithmetic,     /// Binary reduction data.
+    RK_MinMax,         /// Min/max reduction data.
+    RK_UnsignedMinMax, /// Unsigned min/max reduction data.
+  };
+
+  /// Contains opcode + LHS/RHS parts of the reduction operations.
+  struct ReductionData {
+    ReductionData() = delete;
+    ReductionData(ReductionKind Kind, unsigned Opcode, Value *LHS, Value *RHS)
+        : Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind) {
+      assert(Kind != RK_None && "expected binary or min/max reduction only.");
+    }
+    unsigned Opcode = 0;
+    Value *LHS = nullptr;
+    Value *RHS = nullptr;
+    ReductionKind Kind = RK_None;
+    bool hasSameData(ReductionData &RD) const {
+      return Kind == RD.Kind && Opcode == RD.Opcode;
+    }
+  };
+
+  static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI,
+                                       bool IsLeft, unsigned Level);
+
+  static ReductionKind matchPairwiseReduction(
+    const ExtractElementInst *ReduxRoot, unsigned &Opcode, VectorType *&Ty);
+
+  static ReductionKind matchVectorSplittingReduction(
+    const ExtractElementInst *ReduxRoot, unsigned &Opcode, VectorType *&Ty);
+
   /// Parameters that control the generic loop unrolling transformation.
   struct UnrollingPreferences {
     /// The cost threshold for the unrolled loop. Should be relative to the
@@ -1163,10 +1176,6 @@
   /// Returns 1 as the default value.
   int getInstructionLatency(const Instruction *I) const;
 
-  /// Returns the expected throughput cost of the instruction.
-  /// Returns -1 if the cost is unknown.
-  int getInstructionThroughput(const Instruction *I) const;
-
   /// The abstract base class used to type erase specific TTI
   /// implementations.
   class Concept;
Index: llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
===================================================================
--- llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -19,6 +19,8 @@
 #include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/GetElementPtrTypeIterator.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/Type.h"
@@ -817,10 +819,27 @@
                                                     CostKind);
   }
 
-  unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands,
+  int getUserCost(const User *U, ArrayRef<const Value *> Operands,
                        TTI::TargetCostKind CostKind) {
+    if (CostKind == TTI::TCK_Latency)
+      return getInstructionLatency(cast<Instruction>(U));
+
     auto *TargetTTI = static_cast<T *>(this);
 
+    if (isa<CallInst>(U) && CostKind == TTI::TCK_RecipThroughput) {
+      if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
+        SmallVector<Value *, 4> Args(II->arg_operands());
+
+        FastMathFlags FMF;
+        if (auto *FPMO = dyn_cast<FPMathOperator>(II))
+          FMF = FPMO->getFastMathFlags();
+
+        return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), Args,
+                                     FMF, 1, CostKind, II);
+      }
+      return -1;
+    }
+
     // FIXME: Unlikely to be true for anything but CodeSize.
     if (const auto *CB = dyn_cast<CallBase>(U)) {
       const Function *F = CB->getCalledFunction();
@@ -845,10 +864,16 @@
       U->getNumOperands() == 1 ? U->getOperand(0)->getType() : nullptr;
     unsigned Opcode = Operator::getOpcode(U);
     auto *I = dyn_cast<Instruction>(U);
+    int Cost = CostKind == TTI::TCK_RecipThroughput ? -1 : 0;
+
     switch (Opcode) {
     default:
+      Cost = TTI::TCC_Basic;
       break;
+    case Instruction::Br:
+    case Instruction::Ret:
     case Instruction::PHI:
+      return TargetTTI->getCFInstrCost(Opcode, CostKind);
     case Instruction::ExtractValue:
     case Instruction::Freeze:
       return TTI::TCC_Free;
@@ -862,34 +887,201 @@
                                    GEP->getPointerOperand(),
                                    Operands.drop_front());
     }
-    case Instruction::FDiv:
-    case Instruction::FRem:
-    case Instruction::SDiv:
-    case Instruction::SRem:
+    case Instruction::Add:
+    case Instruction::FAdd:
+    case Instruction::Sub:
+    case Instruction::FSub:
+    case Instruction::Mul:
+    case Instruction::FMul:
     case Instruction::UDiv:
+    case Instruction::SDiv:
+    case Instruction::FDiv:
     case Instruction::URem:
-      // FIXME: Unlikely to be true for CodeSize.
-      return TTI::TCC_Expensive;
+    case Instruction::SRem:
+    case Instruction::FRem:
+    case Instruction::Shl:
+    case Instruction::LShr:
+    case Instruction::AShr:
+    case Instruction::And:
+    case Instruction::Or:
+    case Instruction::Xor: {
+      TTI::OperandValueKind Op1VK, Op2VK;
+      TTI::OperandValueProperties Op1VP, Op2VP;
+      Op1VK = TTI::getOperandInfo(U->getOperand(0), Op1VP);
+      Op2VK = TTI::getOperandInfo(U->getOperand(1), Op2VP);
+      SmallVector<const Value *, 2> Operands(U->operand_values());
+      Cost = TargetTTI->getArithmeticInstrCost(Opcode, Ty, CostKind,
+                                               Op1VK, Op2VK,
+                                               Op1VP, Op2VP, Operands, I);
+      break;
+    }
+    case Instruction::FNeg: {
+      TTI::OperandValueKind Op1VK, Op2VK;
+      TTI::OperandValueProperties Op1VP, Op2VP;
+      Op1VK = TTI::getOperandInfo(I->getOperand(0), Op1VP);
+      Op2VK = TTI::OK_AnyValue;
+      Op2VP = TTI::OP_None;
+      SmallVector<const Value *, 2> Operands(I->operand_values());
+      Cost = TargetTTI->getArithmeticInstrCost(Opcode, Ty, CostKind,
+                                               Op1VK, Op2VK,
+                                               Op1VP, Op2VP, Operands, I);
+      break;
+    }
+    case Instruction::Select:
+      Cost = TargetTTI->getCmpSelInstrCost(Opcode, Ty, U->getOperand(0)->getType(),
+                                           CostKind, I);
+      break;
+    case Instruction::ICmp:
+    case Instruction::FCmp:
+      Cost = TargetTTI->getCmpSelInstrCost(Opcode, U->getOperand(0)->getType(),
+                                           Ty, CostKind, I);
+      break;
+    case Instruction::Store: {
+      const StoreInst *SI = cast<StoreInst>(I);
+      Type *ValTy = SI->getValueOperand()->getType();
+      Cost = TargetTTI->getMemoryOpCost(I->getOpcode(), ValTy,
+                             MaybeAlign(SI->getAlignment()),
+                             SI->getPointerAddressSpace(), CostKind, I);
+      break;
+    }
+    case Instruction::Load: {
+      const LoadInst *LI = cast<LoadInst>(I);
+      Cost = TargetTTI->getMemoryOpCost(Opcode, Ty,
+                             MaybeAlign(LI->getAlignment()),
+                             LI->getPointerAddressSpace(), CostKind, I);
+      break;
+    }
+    case Instruction::FPToUI:
+    case Instruction::FPToSI:
     case Instruction::IntToPtr:
     case Instruction::PtrToInt:
     case Instruction::Trunc:
+    case Instruction::FPTrunc:
     case Instruction::BitCast:
     case Instruction::FPExt:
     case Instruction::SExt:
     case Instruction::ZExt:
-      if (TargetTTI->getCastInstrCost(Opcode, Ty, OpTy, CostKind, I) ==
-          TTI::TCC_Free)
-        return TTI::TCC_Free;
+    case Instruction::AddrSpaceCast:
+      Cost = TargetTTI->getCastInstrCost(Opcode, Ty, OpTy, CostKind, I);
+      break;
+    case Instruction::ExtractElement: {
+      auto IsVectorSplittingReduction = [&](const ExtractElementInst *EEI) {
+        unsigned ReduxOpCode;
+        VectorType *ReduxType;
+        switch (TTI::matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) {
+        case TTI::RK_Arithmetic:
+          Cost = TargetTTI->getArithmeticReductionCost(ReduxOpCode, ReduxType,
+                                            /*IsPairwiseForm=*/false,
+                                            CostKind);
+          return true;
+        case TTI::RK_MinMax:
+          Cost = TargetTTI->getMinMaxReductionCost(
+              ReduxType, cast<VectorType>(CmpInst::makeCmpResultType(ReduxType)),
+              /*IsPairwiseForm=*/false, /*IsUnsigned=*/false, CostKind);
+          return true;
+        case TTI::RK_UnsignedMinMax:
+          Cost = TargetTTI->getMinMaxReductionCost(
+              ReduxType, cast<VectorType>(CmpInst::makeCmpResultType(ReduxType)),
+              /*IsPairwiseForm=*/false, /*IsUnsigned=*/true, CostKind);
+          return true;
+        case TTI::RK_None:
+          break;
+        }
+        return false;
+      };
+
+      auto IsPairWiseReduction = [&](const ExtractElementInst *EEI) {
+        unsigned ReduxOpCode;
+        VectorType *ReduxType;
+        switch (TTI::matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) {
+        case TTI::RK_Arithmetic:
+          Cost = TargetTTI->getArithmeticReductionCost(ReduxOpCode, ReduxType,
+                                          /*IsPairwiseForm=*/true, CostKind);
+          return true;
+        case TTI::RK_MinMax:
+          Cost = TargetTTI->getMinMaxReductionCost(
+              ReduxType, cast<VectorType>(CmpInst::makeCmpResultType(ReduxType)),
+              /*IsPairwiseForm=*/true, /*IsUnsigned=*/false, CostKind);
+          return true;
+        case TTI::RK_UnsignedMinMax:
+          Cost = TargetTTI->getMinMaxReductionCost(
+              ReduxType, cast<VectorType>(CmpInst::makeCmpResultType(ReduxType)),
+              /*IsPairwiseForm=*/true, /*IsUnsigned=*/true, CostKind);
+          return true;
+        case TTI::RK_None:
+          break;
+        }
+        return false;
+      };
+
+      const ExtractElementInst *EEI = cast<ExtractElementInst>(I);
+      ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
+      unsigned Idx = -1;
+      if (CI)
+        Idx = CI->getZExtValue();
+
+      // Try to match a reduction sequence (series of shufflevector and vector
+      // adds followed by a extractelement).
+      if (IsVectorSplittingReduction(EEI))
+        break;
+      else if (IsPairWiseReduction(EEI))
+        break;
+
+      Cost = TargetTTI->getVectorInstrCost(I->getOpcode(),
+                                           EEI->getOperand(0)->getType(), Idx);
       break;
     }
-    // By default, just classify everything as 'basic'.
-    return TTI::TCC_Basic;
+    case Instruction::InsertElement: {
+      const InsertElementInst *IE = cast<InsertElementInst>(I);
+      ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
+      unsigned Idx = -1;
+      if (CI)
+        Idx = CI->getZExtValue();
+      Cost = TargetTTI->getVectorInstrCost(I->getOpcode(), IE->getType(), Idx);
+      break;
+    }
+    case Instruction::ShuffleVector: {
+      const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
+      auto *Ty = cast<VectorType>(Shuffle->getType());
+      auto *SrcTy = cast<VectorType>(Shuffle->getOperand(0)->getType());
+
+      // TODO: Identify and add costs for insert subvector, etc.
+      int SubIndex;
+      if (Shuffle->isExtractSubvectorMask(SubIndex))
+        return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, SrcTy, SubIndex, Ty);
+
+      if (Shuffle->changesLength())
+        return CostKind == TTI::TCK_RecipThroughput ? -1 : 1;
+
+      if (Shuffle->isIdentity())
+        return 0;
+
+      if (Shuffle->isReverse())
+        Cost = TargetTTI->getShuffleCost(TTI::SK_Reverse, Ty, 0, nullptr);
+      else if (Shuffle->isSelect())
+        Cost = TargetTTI->getShuffleCost(TTI::SK_Select, Ty, 0, nullptr);
+      else if (Shuffle->isTranspose())
+        Cost = TargetTTI->getShuffleCost(TTI::SK_Transpose, Ty, 0, nullptr);
+      else if (Shuffle->isZeroEltSplat())
+        Cost = TargetTTI->getShuffleCost(TTI::SK_Broadcast, Ty, 0, nullptr);
+      else if (Shuffle->isSingleSource())
+        Cost = TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, 0, nullptr);
+      else
+        Cost = TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, Ty, 0, nullptr);
+    }
+    }
+
+    if (CostKind == TTI::TCK_CodeSize || CostKind == TTI::TCK_SizeAndLatency) {
+      // By default, just classify everything as 'basic'.
+      return Cost == 0 ? Cost : TTI::TCC_Basic;
+    }
+    return Cost;
   }
 
   int getInstructionLatency(const Instruction *I) {
     SmallVector<const Value *, 4> Operands(I->value_op_begin(),
                                            I->value_op_end());
-    if (getUserCost(I, Operands, TTI::TCK_Latency) == TTI::TCC_Free)
+    if (getUserCost(I, Operands, TTI::TCK_SizeAndLatency) == TTI::TCC_Free)
       return 0;
 
     if (isa<LoadInst>(I))
Index: llvm/lib/Analysis/CostModel.cpp
===================================================================
--- llvm/lib/Analysis/CostModel.cpp
+++ llvm/lib/Analysis/CostModel.cpp
@@ -54,8 +54,8 @@
     /// Returns -1 if the cost is unknown.
     /// Note, this method does not cache the cost calculation and it
     /// can be expensive in some cases.
-    unsigned getInstructionCost(const Instruction *I) const {
-      return TTI->getInstructionCost(I, TargetTransformInfo::TCK_RecipThroughput);
+    unsigned getUserCost(const Instruction *I) const {
+      return TTI->getUserCost(I, TargetTransformInfo::TCK_RecipThroughput);
     }
 
   private:
@@ -100,7 +100,7 @@
 
   for (BasicBlock &B : *F) {
     for (Instruction &Inst : B) {
-      unsigned Cost = TTI->getInstructionCost(&Inst, CostKind);
+      unsigned Cost = TTI->getUserCost(&Inst, CostKind);
       if (Cost != (unsigned)-1)
         OS << "Cost Model: Found an estimated cost of " << Cost;
       else
Index: llvm/lib/Analysis/TargetTransformInfo.cpp
===================================================================
--- llvm/lib/Analysis/TargetTransformInfo.cpp
+++ llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -164,7 +164,8 @@
                                           const User *U,
                                           TTI::TargetCostKind CostKind) const {
   int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments, U, CostKind);
-  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  assert((Cost >= 0 || CostKind == TTI::TCK_RecipThroughput) &&
+         "TTI should not produce negative costs!");
   return Cost;
 }
 
@@ -178,7 +179,8 @@
                                      ArrayRef<const Value *> Operands,
                                      enum TargetCostKind CostKind) const {
   int Cost = TTIImpl->getUserCost(U, Operands, CostKind);
-  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  assert((Cost >= 0 || CostKind == TTI::TCK_RecipThroughput) &&
+         "TTI should not produce negative costs!");
   return Cost;
 }
 
@@ -873,8 +875,9 @@
   return TTIImpl->getInstructionLatency(I);
 }
 
-static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft,
-                                     unsigned Level) {
+bool
+TargetTransformInfo::matchPairwiseShuffleMask(ShuffleVectorInst *SI,
+                                              bool IsLeft, unsigned Level) {
   // We don't need a shuffle if we just want to have element 0 in position 0 of
   // the vector.
   if (!SI && Level == 0 && IsLeft)
@@ -893,35 +896,10 @@
   return Mask == ActualMask;
 }
 
-namespace {
-/// Kind of the reduction data.
-enum ReductionKind {
-  RK_None,           /// Not a reduction.
-  RK_Arithmetic,     /// Binary reduction data.
-  RK_MinMax,         /// Min/max reduction data.
-  RK_UnsignedMinMax, /// Unsigned min/max reduction data.
-};
-/// Contains opcode + LHS/RHS parts of the reduction operations.
-struct ReductionData {
-  ReductionData() = delete;
-  ReductionData(ReductionKind Kind, unsigned Opcode, Value *LHS, Value *RHS)
-      : Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind) {
-    assert(Kind != RK_None && "expected binary or min/max reduction only.");
-  }
-  unsigned Opcode = 0;
-  Value *LHS = nullptr;
-  Value *RHS = nullptr;
-  ReductionKind Kind = RK_None;
-  bool hasSameData(ReductionData &RD) const {
-    return Kind == RD.Kind && Opcode == RD.Opcode;
-  }
-};
-} // namespace
-
-static Optional<ReductionData> getReductionData(Instruction *I) {
+static Optional<TTI::ReductionData> getReductionData(Instruction *I) {
   Value *L, *R;
   if (m_BinOp(m_Value(L), m_Value(R)).match(I))
-    return ReductionData(RK_Arithmetic, I->getOpcode(), L, R);
+    return TTI::ReductionData(TTI::RK_Arithmetic, I->getOpcode(), L, R);
   if (auto *SI = dyn_cast<SelectInst>(I)) {
     if (m_SMin(m_Value(L), m_Value(R)).match(SI) ||
         m_SMax(m_Value(L), m_Value(R)).match(SI) ||
@@ -930,20 +908,20 @@
         m_UnordFMin(m_Value(L), m_Value(R)).match(SI) ||
         m_UnordFMax(m_Value(L), m_Value(R)).match(SI)) {
       auto *CI = cast<CmpInst>(SI->getCondition());
-      return ReductionData(RK_MinMax, CI->getOpcode(), L, R);
+      return TTI::ReductionData(TTI::RK_MinMax, CI->getOpcode(), L, R);
     }
     if (m_UMin(m_Value(L), m_Value(R)).match(SI) ||
         m_UMax(m_Value(L), m_Value(R)).match(SI)) {
       auto *CI = cast<CmpInst>(SI->getCondition());
-      return ReductionData(RK_UnsignedMinMax, CI->getOpcode(), L, R);
+      return TTI::ReductionData(TTI::RK_UnsignedMinMax, CI->getOpcode(), L, R);
     }
   }
   return llvm::None;
 }
 
-static ReductionKind matchPairwiseReductionAtLevel(Instruction *I,
-                                                   unsigned Level,
-                                                   unsigned NumLevels) {
+static TTI::ReductionKind matchPairwiseReductionAtLevel(Instruction *I,
+                                                        unsigned Level,
+                                                        unsigned NumLevels) {
   // Match one level of pairwise operations.
   // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
   //       <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
@@ -951,24 +929,24 @@
   //       <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
   // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
   if (!I)
-    return RK_None;
+    return TTI::RK_None;
 
   assert(I->getType()->isVectorTy() && "Expecting a vector type");
 
-  Optional<ReductionData> RD = getReductionData(I);
+  Optional<TTI::ReductionData> RD = getReductionData(I);
   if (!RD)
-    return RK_None;
+    return TTI::RK_None;
 
   ShuffleVectorInst *LS = dyn_cast<ShuffleVectorInst>(RD->LHS);
   if (!LS && Level)
-    return RK_None;
+    return TTI::RK_None;
   ShuffleVectorInst *RS = dyn_cast<ShuffleVectorInst>(RD->RHS);
   if (!RS && Level)
-    return RK_None;
+    return TTI::RK_None;
 
   // On level 0 we can omit one shufflevector instruction.
   if (!Level && !RS && !LS)
-    return RK_None;
+    return TTI::RK_None;
 
   // Shuffle inputs must match.
   Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr;
@@ -977,7 +955,7 @@
   if (NextLevelOpR && NextLevelOpL) {
     // If we have two shuffles their operands must match.
     if (NextLevelOpL != NextLevelOpR)
-      return RK_None;
+      return TTI::RK_None;
 
     NextLevelOp = NextLevelOpL;
   } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) {
@@ -988,32 +966,32 @@
     //  %NextLevelOpL = shufflevector %R, <1, undef ...>
     //  %BinOp        = fadd          %NextLevelOpL, %R
     if (NextLevelOpL && NextLevelOpL != RD->RHS)
-      return RK_None;
+      return TTI::RK_None;
     else if (NextLevelOpR && NextLevelOpR != RD->LHS)
-      return RK_None;
+      return TTI::RK_None;
 
     NextLevelOp = NextLevelOpL ? RD->RHS : RD->LHS;
   } else
-    return RK_None;
+    return TTI::RK_None;
 
   // Check that the next levels binary operation exists and matches with the
   // current one.
   if (Level + 1 != NumLevels) {
-    Optional<ReductionData> NextLevelRD =
+    Optional<TTI::ReductionData> NextLevelRD =
         getReductionData(cast<Instruction>(NextLevelOp));
     if (!NextLevelRD || !RD->hasSameData(*NextLevelRD))
-      return RK_None;
+      return TTI::RK_None;
   }
 
   // Shuffle mask for pairwise operation must match.
-  if (matchPairwiseShuffleMask(LS, /*IsLeft=*/true, Level)) {
-    if (!matchPairwiseShuffleMask(RS, /*IsLeft=*/false, Level))
-      return RK_None;
-  } else if (matchPairwiseShuffleMask(RS, /*IsLeft=*/true, Level)) {
-    if (!matchPairwiseShuffleMask(LS, /*IsLeft=*/false, Level))
-      return RK_None;
+  if (TTI::matchPairwiseShuffleMask(LS, /*IsLeft=*/true, Level)) {
+    if (!TTI::matchPairwiseShuffleMask(RS, /*IsLeft=*/false, Level))
+      return TTI::RK_None;
+  } else if (TTI::matchPairwiseShuffleMask(RS, /*IsLeft=*/true, Level)) {
+    if (!TTI::matchPairwiseShuffleMask(LS, /*IsLeft=*/false, Level))
+      return TTI::RK_None;
   } else {
-    return RK_None;
+    return TTI::RK_None;
   }
 
   if (++Level == NumLevels)
@@ -1024,11 +1002,12 @@
                                        NumLevels);
 }
 
-static ReductionKind matchPairwiseReduction(const ExtractElementInst *ReduxRoot,
+TTI::ReductionKind
+TargetTransformInfo::matchPairwiseReduction(const ExtractElementInst *ReduxRoot,
                                             unsigned &Opcode,
                                             VectorType *&Ty) {
   if (!EnableReduxCost)
-    return RK_None;
+    return TTI::RK_None;
 
   // Need to extract the first element.
   ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
@@ -1036,19 +1015,19 @@
   if (CI)
     Idx = CI->getZExtValue();
   if (Idx != 0)
-    return RK_None;
+    return TTI::RK_None;
 
   auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0));
   if (!RdxStart)
-    return RK_None;
-  Optional<ReductionData> RD = getReductionData(RdxStart);
+    return TTI::RK_None;
+  Optional<TTI::ReductionData> RD = getReductionData(RdxStart);
   if (!RD)
-    return RK_None;
+    return TTI::RK_None;
 
   auto *VecTy = cast<VectorType>(RdxStart->getType());
   unsigned NumVecElems = VecTy->getNumElements();
   if (!isPowerOf2_32(NumVecElems))
-    return RK_None;
+    return TTI::RK_None;
 
   // We look for a sequence of shuffle,shuffle,add triples like the following
   // that builds a pairwise reduction tree.
@@ -1069,8 +1048,8 @@
   // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
   // %r = extractelement <4 x float> %bin.rdx8, i32 0
   if (matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems)) ==
-      RK_None)
-    return RK_None;
+      TTI::RK_None)
+    return TTI::RK_None;
 
   Opcode = RD->Opcode;
   Ty = VecTy;
@@ -1089,11 +1068,11 @@
   return std::make_pair(L, S);
 }
 
-static ReductionKind
-matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot,
-                              unsigned &Opcode, VectorType *&Ty) {
+TTI::ReductionKind
+TargetTransformInfo::matchVectorSplittingReduction(
+    const ExtractElementInst *ReduxRoot, unsigned &Opcode, VectorType *&Ty) {
   if (!EnableReduxCost)
-    return RK_None;
+    return TTI::RK_None;
 
   // Need to extract the first element.
   ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
@@ -1101,19 +1080,19 @@
   if (CI)
     Idx = CI->getZExtValue();
   if (Idx != 0)
-    return RK_None;
+    return TTI::RK_None;
 
   auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0));
   if (!RdxStart)
-    return RK_None;
-  Optional<ReductionData> RD = getReductionData(RdxStart);
+    return TTI::RK_None;
+  Optional<TTI::ReductionData> RD = getReductionData(RdxStart);
   if (!RD)
-    return RK_None;
+    return TTI::RK_None;
 
   auto *VecTy = cast<VectorType>(ReduxRoot->getOperand(0)->getType());
   unsigned NumVecElems = VecTy->getNumElements();
   if (!isPowerOf2_32(NumVecElems))
-    return RK_None;
+    return TTI::RK_None;
 
   // We look for a sequence of shuffles and adds like the following matching one
   // fadd, shuffle vector pair at a time.
@@ -1133,10 +1112,10 @@
   while (NumVecElemsRemain - 1) {
     // Check for the right reduction operation.
     if (!RdxOp)
-      return RK_None;
-    Optional<ReductionData> RDLevel = getReductionData(RdxOp);
+      return TTI::RK_None;
+    Optional<TTI::ReductionData> RDLevel = getReductionData(RdxOp);
     if (!RDLevel || !RDLevel->hasSameData(*RD))
-      return RK_None;
+      return TTI::RK_None;
 
     Value *NextRdxOp;
     ShuffleVectorInst *Shuffle;
@@ -1145,9 +1124,9 @@
 
     // Check the current reduction operation and the shuffle use the same value.
     if (Shuffle == nullptr)
-      return RK_None;
+      return TTI::RK_None;
     if (Shuffle->getOperand(0) != NextRdxOp)
-      return RK_None;
+      return TTI::RK_None;
 
     // Check that shuffle masks matches.
     for (unsigned j = 0; j != MaskStart; ++j)
@@ -1157,7 +1136,7 @@
 
     ArrayRef<int> Mask = Shuffle->getShuffleMask();
     if (ShuffleMask != Mask)
-      return RK_None;
+      return TTI::RK_None;
 
     RdxOp = dyn_cast<Instruction>(NextRdxOp);
     NumVecElemsRemain /= 2;
@@ -1169,206 +1148,6 @@
   return RD->Kind;
 }
 
-int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
-  TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
-
-  switch (I->getOpcode()) {
-  case Instruction::GetElementPtr:
-    return getUserCost(I, CostKind);
-
-  case Instruction::Ret:
-  case Instruction::PHI:
-  case Instruction::Br: {
-    return getCFInstrCost(I->getOpcode(), CostKind);
-  }
-  case Instruction::Add:
-  case Instruction::FAdd:
-  case Instruction::Sub:
-  case Instruction::FSub:
-  case Instruction::Mul:
-  case Instruction::FMul:
-  case Instruction::UDiv:
-  case Instruction::SDiv:
-  case Instruction::FDiv:
-  case Instruction::URem:
-  case Instruction::SRem:
-  case Instruction::FRem:
-  case Instruction::Shl:
-  case Instruction::LShr:
-  case Instruction::AShr:
-  case Instruction::And:
-  case Instruction::Or:
-  case Instruction::Xor: {
-    TargetTransformInfo::OperandValueKind Op1VK, Op2VK;
-    TargetTransformInfo::OperandValueProperties Op1VP, Op2VP;
-    Op1VK = getOperandInfo(I->getOperand(0), Op1VP);
-    Op2VK = getOperandInfo(I->getOperand(1), Op2VP);
-    SmallVector<const Value *, 2> Operands(I->operand_values());
-    return getArithmeticInstrCost(I->getOpcode(), I->getType(), CostKind,
-                                  Op1VK, Op2VK,
-                                  Op1VP, Op2VP, Operands, I);
-  }
-  case Instruction::FNeg: {
-    TargetTransformInfo::OperandValueKind Op1VK, Op2VK;
-    TargetTransformInfo::OperandValueProperties Op1VP, Op2VP;
-    Op1VK = getOperandInfo(I->getOperand(0), Op1VP);
-    Op2VK = OK_AnyValue;
-    Op2VP = OP_None;
-    SmallVector<const Value *, 2> Operands(I->operand_values());
-    return getArithmeticInstrCost(I->getOpcode(), I->getType(), CostKind,
-                                  Op1VK, Op2VK,
-                                  Op1VP, Op2VP, Operands, I);
-  }
-  case Instruction::Select: {
-    const SelectInst *SI = cast<SelectInst>(I);
-    Type *CondTy = SI->getCondition()->getType();
-    return getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy,
-                              CostKind, I);
-  }
-  case Instruction::ICmp:
-  case Instruction::FCmp: {
-    Type *ValTy = I->getOperand(0)->getType();
-    return getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(),
-                              CostKind, I);
-  }
-  case Instruction::Store: {
-    const StoreInst *SI = cast<StoreInst>(I);
-    Type *ValTy = SI->getValueOperand()->getType();
-    return getMemoryOpCost(I->getOpcode(), ValTy,
-                           MaybeAlign(SI->getAlignment()),
-                           SI->getPointerAddressSpace(), CostKind, I);
-  }
-  case Instruction::Load: {
-    const LoadInst *LI = cast<LoadInst>(I);
-    return getMemoryOpCost(I->getOpcode(), I->getType(),
-                           MaybeAlign(LI->getAlignment()),
-                           LI->getPointerAddressSpace(), CostKind, I);
-  }
-  case Instruction::ZExt:
-  case Instruction::SExt:
-  case Instruction::FPToUI:
-  case Instruction::FPToSI:
-  case Instruction::FPExt:
-  case Instruction::PtrToInt:
-  case Instruction::IntToPtr:
-  case Instruction::SIToFP:
-  case Instruction::UIToFP:
-  case Instruction::Trunc:
-  case Instruction::FPTrunc:
-  case Instruction::BitCast:
-  case Instruction::AddrSpaceCast: {
-    Type *SrcTy = I->getOperand(0)->getType();
-    return getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, CostKind, I);
-  }
-  case Instruction::ExtractElement: {
-    const ExtractElementInst *EEI = cast<ExtractElementInst>(I);
-    ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
-    unsigned Idx = -1;
-    if (CI)
-      Idx = CI->getZExtValue();
-
-    // Try to match a reduction sequence (series of shufflevector and vector
-    // adds followed by a extractelement).
-    unsigned ReduxOpCode;
-    VectorType *ReduxType;
-
-    switch (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) {
-    case RK_Arithmetic:
-      return getArithmeticReductionCost(ReduxOpCode, ReduxType,
-                                        /*IsPairwiseForm=*/false,
-                                        CostKind);
-    case RK_MinMax:
-      return getMinMaxReductionCost(
-          ReduxType, cast<VectorType>(CmpInst::makeCmpResultType(ReduxType)),
-          /*IsPairwiseForm=*/false, /*IsUnsigned=*/false);
-    case RK_UnsignedMinMax:
-      return getMinMaxReductionCost(
-          ReduxType, cast<VectorType>(CmpInst::makeCmpResultType(ReduxType)),
-          /*IsPairwiseForm=*/false, /*IsUnsigned=*/true);
-    case RK_None:
-      break;
-    }
-
-    switch (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) {
-    case RK_Arithmetic:
-      return getArithmeticReductionCost(ReduxOpCode, ReduxType,
-                                        /*IsPairwiseForm=*/true, CostKind);
-    case RK_MinMax:
-      return getMinMaxReductionCost(
-          ReduxType, cast<VectorType>(CmpInst::makeCmpResultType(ReduxType)),
-          /*IsPairwiseForm=*/true, /*IsUnsigned=*/false);
-    case RK_UnsignedMinMax:
-      return getMinMaxReductionCost(
-          ReduxType, cast<VectorType>(CmpInst::makeCmpResultType(ReduxType)),
-          /*IsPairwiseForm=*/true, /*IsUnsigned=*/true);
-    case RK_None:
-      break;
-    }
-
-    return getVectorInstrCost(I->getOpcode(), EEI->getOperand(0)->getType(),
-                              Idx);
-  }
-  case Instruction::InsertElement: {
-    const InsertElementInst *IE = cast<InsertElementInst>(I);
-    ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
-    unsigned Idx = -1;
-    if (CI)
-      Idx = CI->getZExtValue();
-    return getVectorInstrCost(I->getOpcode(), IE->getType(), Idx);
-  }
-  case Instruction::ExtractValue:
-    return 0; // Model all ExtractValue nodes as free.
-  case Instruction::ShuffleVector: {
-    const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
-    auto *Ty = cast<VectorType>(Shuffle->getType());
-    auto *SrcTy = cast<VectorType>(Shuffle->getOperand(0)->getType());
-
-    // TODO: Identify and add costs for insert subvector, etc.
-    int SubIndex;
-    if (Shuffle->isExtractSubvectorMask(SubIndex))
-      return TTIImpl->getShuffleCost(SK_ExtractSubvector, SrcTy, SubIndex, Ty);
-
-    if (Shuffle->changesLength())
-      return -1;
-
-    if (Shuffle->isIdentity())
-      return 0;
-
-    if (Shuffle->isReverse())
-      return TTIImpl->getShuffleCost(SK_Reverse, Ty, 0, nullptr);
-
-    if (Shuffle->isSelect())
-      return TTIImpl->getShuffleCost(SK_Select, Ty, 0, nullptr);
-
-    if (Shuffle->isTranspose())
-      return TTIImpl->getShuffleCost(SK_Transpose, Ty, 0, nullptr);
-
-    if (Shuffle->isZeroEltSplat())
-      return TTIImpl->getShuffleCost(SK_Broadcast, Ty, 0, nullptr);
-
-    if (Shuffle->isSingleSource())
-      return TTIImpl->getShuffleCost(SK_PermuteSingleSrc, Ty, 0, nullptr);
-
-    return TTIImpl->getShuffleCost(SK_PermuteTwoSrc, Ty, 0, nullptr);
-  }
-  case Instruction::Call:
-    if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
-      SmallVector<Value *, 4> Args(II->arg_operands());
-
-      FastMathFlags FMF;
-      if (auto *FPMO = dyn_cast<FPMathOperator>(II))
-        FMF = FPMO->getFastMathFlags();
-
-      return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), Args,
-                                   FMF, 1, CostKind, II);
-    }
-    return -1;
-  default:
-    // We don't have any information on this instruction.
-    return -1;
-  }
-}
-
 TargetTransformInfo::Concept::~Concept() {}
 
 TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
Index: llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
===================================================================
--- llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
+++ llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
@@ -1160,7 +1160,7 @@
   for (auto &I : Is) {
     // Compute the old cost
     InstructionCost +=
-        TTI.getInstructionCost(I, TargetTransformInfo::TCK_Latency);
+        TTI.getUserCost(I, TargetTransformInfo::TCK_Latency);
 
     // The final SVIs are allowed not to be dead, all uses will be replaced
     if (SVIs.find(I) != SVIs.end())
Index: llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
===================================================================
--- llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -262,7 +262,7 @@
       if (isa<StoreInst>(&I)) {
         Type *MemAccessTy = I.getOperand(0)->getType();
         NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, None, 0,
-                                     TTI::TCK_RecipThroughput);
+                                     TTI::TCK_SizeAndLatency);
       }
     }
 
@@ -998,7 +998,7 @@
     // Comparison between memory and immediate.
     if (UserI->getOpcode() == Instruction::ICmp)
       if (ConstantInt *CI = dyn_cast<ConstantInt>(UserI->getOperand(1)))
-        if (isUInt<16>(CI->getZExtValue()))
+        if (CI->getBitWidth() <= 64 && isUInt<16>(CI->getZExtValue()))
           return true;
     return (LoadOrTruncBits == 32 || LoadOrTruncBits == 64);
     break;
Index: llvm/lib/Transforms/IPO/HotColdSplitting.cpp
===================================================================
--- llvm/lib/Transforms/IPO/HotColdSplitting.cpp
+++ llvm/lib/Transforms/IPO/HotColdSplitting.cpp
@@ -230,7 +230,7 @@
     for (Instruction &I : BB->instructionsWithoutDebug())
       if (&I != BB->getTerminator())
         Benefit +=
-            TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize);
+            TTI.getUserCost(&I, TargetTransformInfo::TCK_CodeSize);
 
   return Benefit;
 }
Index: llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
+++ llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
@@ -211,7 +211,7 @@
   unsigned Cost = 0;
   for (auto &InstBeforeCall :
        llvm::make_range(CallSiteBB->begin(), CB.getIterator())) {
-    Cost += TTI.getInstructionCost(&InstBeforeCall,
+    Cost += TTI.getUserCost(&InstBeforeCall,
                                    TargetTransformInfo::TCK_CodeSize);
     if (Cost >= DuplicationThreshold)
       return false;
Index: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -3610,17 +3610,17 @@
       if (NeedToShuffleReuses) {
         for (unsigned Idx : E->ReuseShuffleIndices) {
           Instruction *I = cast<Instruction>(VL[Idx]);
-          ReuseShuffleCost -= TTI->getInstructionCost(I, CostKind);
+          ReuseShuffleCost -= TTI->getUserCost(I, CostKind);
         }
         for (Value *V : VL) {
           Instruction *I = cast<Instruction>(V);
-          ReuseShuffleCost += TTI->getInstructionCost(I, CostKind);
+          ReuseShuffleCost += TTI->getUserCost(I, CostKind);
         }
       }
       for (Value *V : VL) {
         Instruction *I = cast<Instruction>(V);
         assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");
-        ScalarCost += TTI->getInstructionCost(I, CostKind);
+        ScalarCost += TTI->getUserCost(I, CostKind);
       }
       // VecCost is equal to sum of the cost of creating 2 vectors
       // and the cost of creating shuffle.