Index: docs/LangRef.rst
===================================================================
--- docs/LangRef.rst
+++ docs/LangRef.rst
@@ -9704,6 +9704,37 @@
 format that can be written out by a compiler runtime and consumed via
 the ``llvm-profdata`` tool.
 
+'``llvm.instrprof_increment_step``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare void @llvm.instrprof_increment_step(i8* <name>, i64 <hash>,
+                                                  i32 <num-counters>,
+                                                  i32 <index>, i64 <step>)
+
+Overview:
+"""""""""
+
+The '``llvm.instrprof_increment_step``' intrinsic is an extension to
+the '``llvm.instrprof_increment``' intrinsic with an additional fifth
+argument to specify the step of the increment.
+
+Arguments:
+""""""""""
+The first four arguments are the same as '``llvm.instrprof_increment``'
+instrinsic.
+
+The last argument specifies the value of the increment of the counter variable.
+
+Semantics:
+""""""""""
+See description of '``llvm.instrprof_increment``' instrinsic.
+
+
 '``llvm.instrprof_value_profile``' Intrinsic
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
Index: include/llvm/IR/IntrinsicInst.h
===================================================================
--- include/llvm/IR/IntrinsicInst.h
+++ include/llvm/IR/IntrinsicInst.h
@@ -361,6 +361,17 @@
     }
   };
 
+  class InstrProfIncrementInstStep : public InstrProfIncrementInst {
+  public:
+    static inline bool classof(const IntrinsicInst *I) {
+      return I->getIntrinsicID() == Intrinsic::instrprof_increment_step;
+    }
+    static inline bool classof(const Value *V) {
+      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+    }
+    Value *getStep() const { return const_cast<Value *>(getArgOperand(4)); }
+  };
+
   /// This represents the llvm.instrprof_value_profile intrinsic.
   class InstrProfValueProfileInst : public IntrinsicInst {
   public:
Index: include/llvm/IR/Intrinsics.td
===================================================================
--- include/llvm/IR/Intrinsics.td
+++ include/llvm/IR/Intrinsics.td
@@ -346,6 +346,12 @@
                                          llvm_i32_ty, llvm_i32_ty],
                                         []>;
 
+// A counter increment with step for instrumentation based profiling.
+def int_instrprof_increment_step : Intrinsic<[],
+                                        [llvm_ptr_ty, llvm_i64_ty,
+                                         llvm_i32_ty, llvm_i32_ty, llvm_i64_ty],
+                                        []>;
+
 // A call to profile runtime for value profiling of target expressions
 // through instrumentation based profiling.
 def int_instrprof_value_profile : Intrinsic<[],
Index: lib/Transforms/Instrumentation/InstrProfiling.cpp
===================================================================
--- lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -107,6 +107,13 @@
   return getInstrProfCoverageSectionName(isMachO());
 }
 
+static InstrProfIncrementInst *castToIncrementInst(Instruction *Instr) {
+  InstrProfIncrementInst *Inc = dyn_cast<InstrProfIncrementInstStep>(Instr);
+  if (Inc)
+    return Inc;
+  return dyn_cast<InstrProfIncrementInst>(Instr);
+}
+
 bool InstrProfiling::run(Module &M) {
   bool MadeChange = false;
 
@@ -138,7 +145,8 @@
     for (BasicBlock &BB : F)
       for (auto I = BB.begin(), E = BB.end(); I != E;) {
         auto Instr = I++;
-        if (auto *Inc = dyn_cast<InstrProfIncrementInst>(Instr)) {
+        InstrProfIncrementInst *Inc = castToIncrementInst(&*Instr);
+        if (Inc) {
           lowerIncrement(Inc);
           MadeChange = true;
         } else if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(Instr)) {
@@ -214,6 +222,14 @@
   Ind->eraseFromParent();
 }
 
+static Value *getIncrementStep(InstrProfIncrementInst *Inc,
+                               IRBuilder<> &Builder) {
+  auto *IncWithStep = dyn_cast<InstrProfIncrementInstStep>(Inc);
+  if (IncWithStep)
+    return IncWithStep->getStep();
+  return Builder.getInt64(1);
+}
+
 void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
   GlobalVariable *Counters = getOrCreateRegionCounters(Inc);
 
@@ -221,7 +237,7 @@
   uint64_t Index = Inc->getIndex()->getZExtValue();
   Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters, 0, Index);
   Value *Count = Builder.CreateLoad(Addr, "pgocount");
-  Count = Builder.CreateAdd(Count, Builder.getInt64(1));
+  Count = Builder.CreateAdd(Count, getIncrementStep(Inc, Builder));
   Inc->replaceAllUsesWith(Builder.CreateStore(Count, Addr));
   Inc->eraseFromParent();
 }
Index: lib/Transforms/Instrumentation/PGOInstrumentation.cpp
===================================================================
--- lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -86,6 +86,7 @@
 #define DEBUG_TYPE "pgo-instrumentation"
 
 STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.");
+STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented.");
 STATISTIC(NumOfPGOEdge, "Number of edges.");
 STATISTIC(NumOfPGOBB, "Number of basic-blocks.");
 STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
@@ -133,7 +134,65 @@
 static cl::opt<bool> NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false),
                                        cl::Hidden);
 
+// Command line option to enable/disable select instruction instrumentation.
+static cl::opt<bool> PGOInstrSelect("pgo-instr-select", cl::init(true),
+                                    cl::Hidden);
 namespace {
+
+/// The select instruction visitor plays three roles specified
+/// by the mode. In \c VM_counting mode, it simply counts the number of
+/// select instructions. In \c VM_instrument mode, it inserts code to count
+/// the number times TrueValue of select is taken. In \c VM_annotate mode,
+/// it reads the profile data and annotate the select instruction with metadata.
+enum VisitMode { VM_counting, VM_instrument, VM_annotate };
+class PGOUseFunc;
+
+/// Instruction Visitor class to visit select instructions.
+struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
+  Function &F;
+  unsigned NSIs = 0;             // Number of select instructions instrumented.
+  VisitMode Mode = VM_counting;  // Visiting mode.
+  unsigned *CurCtrIdx = nullptr; // Pointer to current counter index.
+  unsigned TotalNumCtrs = 0;     // Total number of counters
+  GlobalVariable *FuncNameVar = nullptr;
+  uint64_t FuncHash = 0;
+  PGOUseFunc *UseFunc = nullptr;
+
+  SelectInstVisitor(Function &Func) : F(Func) {}
+
+  void countSelects(Function &Func) {
+    Mode = VM_counting;
+    visit(Func);
+  }
+  // Visit the IR stream and instrument all select instructions. \p
+  // Ind is a pointer to the counter index variable; \p TotalNC
+  // is the total number of counters; \p FNV is the pointer to the
+  // PGO function name var; \p FHash is the function hash.
+  void instrumentSelects(Function &Func, unsigned *Ind, unsigned TotalNC,
+                         GlobalVariable *FNV, uint64_t FHash) {
+    Mode = VM_instrument;
+    CurCtrIdx = Ind;
+    TotalNumCtrs = TotalNC;
+    FuncHash = FHash;
+    FuncNameVar = FNV;
+    visit(Func);
+  }
+
+  // Visit the IR stream and annotate all select instructions.
+  void annotateSelects(Function &Func, PGOUseFunc *UF, unsigned *Ind) {
+    Mode = VM_annotate;
+    UseFunc = UF;
+    CurCtrIdx = Ind;
+    visit(Func);
+  }
+
+  void instrumentOneSelectInst(SelectInst &SI);
+  void annotateOneSelectInst(SelectInst &SI);
+  // Visit \p SI instruction and perform tasks according to visit mode.
+  void visitSelectInst(SelectInst &SI);
+  unsigned getNumOfSelectInsts() const { return NSIs; }
+};
+
 class PGOInstrumentationGenLegacyPass : public ModulePass {
 public:
   static char ID;
@@ -180,6 +239,7 @@
     AU.addRequired<BlockFrequencyInfoWrapperPass>();
   }
 };
+
 } // end anonymous namespace
 
 char PGOInstrumentationGenLegacyPass::ID = 0;
@@ -254,6 +314,7 @@
   std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
 
 public:
+  SelectInstVisitor SIVisitor;
   std::string FuncName;
   GlobalVariable *FuncNameVar;
   // CFG hash value for this function.
@@ -280,8 +341,13 @@
       std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
       bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
       BlockFrequencyInfo *BFI = nullptr)
-      : F(Func), ComdatMembers(ComdatMembers), FunctionHash(0),
+      : F(Func), ComdatMembers(ComdatMembers), SIVisitor(Func), FunctionHash(0),
         MST(F, BPI, BFI) {
+
+    // This should be done before CFG hash computation.
+    SIVisitor.countSelects(Func);
+    NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
+
     FuncName = getPGOFuncName(F);
     computeCFGHash();
     if (ComdatMembers.size())
@@ -308,7 +374,7 @@
       if (!E->InMST && !E->Removed)
         NumCounters++;
     }
-    return NumCounters;
+    return NumCounters + SIVisitor.getNumOfSelectInsts();
   }
 };
 
@@ -328,7 +394,8 @@
     }
   }
   JC.update(Indexes);
-  FunctionHash = (uint64_t)findIndirectCallSites(F).size() << 48 |
+  FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 |
+                 (uint64_t)findIndirectCallSites(F).size() << 48 |
                  (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC();
 }
 
@@ -473,6 +540,10 @@
          Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters),
          Builder.getInt32(I++)});
   }
+
+  // Now instrument select instructions:
+  FuncInfo.SIVisitor.instrumentSelects(F, &I, NumCounters, FuncInfo.FuncNameVar,
+                                       FuncInfo.FunctionHash);
   assert(I == NumCounters);
 
   if (DisableValueProfiling)
@@ -594,17 +665,17 @@
   // Return the profile record for this function;
   InstrProfRecord &getProfileRecord() { return ProfileRecord; }
 
+  // Return the auxiliary BB information.
+  UseBBInfo &getBBInfo(const BasicBlock *BB) const {
+    return FuncInfo.getBBInfo(BB);
+  }
+
 private:
   Function &F;
   Module *M;
   // This member stores the shared information with class PGOGenFunc.
   FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo;
 
-  // Return the auxiliary BB information.
-  UseBBInfo &getBBInfo(const BasicBlock *BB) const {
-    return FuncInfo.getBBInfo(BB);
-  }
-
   // The maximum count value in the profile. This is only used in PGO use
   // compilation.
   uint64_t ProgramMaxCount;
@@ -677,6 +748,8 @@
     NewEdge1.InMST = true;
     getBBInfo(InstrBB).setBBInfoCount(CountValue);
   }
+  // Now annotate select instructions
+  FuncInfo.SIVisitor.annotateSelects(F, this, &I);
   assert(I == CountFromProfile.size());
 }
 
@@ -820,7 +893,7 @@
   DEBUG(FuncInfo.dumpInfo("after reading profile."));
 }
 
-static void setProfMetadata(Module *M, TerminatorInst *TI,
+static void setProfMetadata(Module *M, Instruction *TI,
                             ArrayRef<uint64_t> EdgeCounts, uint64_t MaxCount) {
   MDBuilder MDB(M->getContext());
   assert(MaxCount > 0 && "Bad max count");
@@ -869,6 +942,57 @@
   }
 }
 
+void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
+  Module *M = F.getParent();
+  IRBuilder<> Builder(&SI);
+  Type *Int64Ty = Builder.getInt64Ty();
+  Type *I8PtrTy = Builder.getInt8PtrTy();
+  auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
+  Builder.CreateCall(
+      Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step),
+      {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
+       Builder.getInt64(FuncHash),
+       Builder.getInt32(TotalNumCtrs), Builder.getInt32(*CurCtrIdx), Step});
+  ++(*CurCtrIdx);
+}
+
+void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) {
+  std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts;
+  assert(*CurCtrIdx < CountFromProfile.size() &&
+         "Out of bound access of counters");
+  uint64_t SCounts[2];
+  SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count
+  ++(*CurCtrIdx);
+  uint64_t TotalCount = UseFunc->getBBInfo(SI.getParent()).CountValue;
+  // False Count
+  SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0);
+  uint64_t MaxCount = std::max(SCounts[0], SCounts[1]);
+  setProfMetadata(F.getParent(), &SI, SCounts, MaxCount);
+}
+
+void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
+  if (!PGOInstrSelect)
+    return;
+  // FIXME: do not handle this yet.
+  if (SI.getCondition()->getType()->isVectorTy())
+    return;
+
+  NSIs++;
+  switch (Mode) {
+  case VM_counting:
+    return;
+  case VM_instrument:
+    instrumentOneSelectInst(SI);
+    break;
+  case VM_annotate:
+    annotateOneSelectInst(SI);
+    break;
+  default:
+    assert(false && "Unknown visiting mode");
+    break;
+  }
+}
+
 // Traverse all the indirect callsites and annotate the instructions.
 void PGOUseFunc::annotateIndirectCallSites() {
   if (DisableValueProfiling)
Index: test/Transforms/PGOProfile/Inputs/select1.proftext
===================================================================
--- test/Transforms/PGOProfile/Inputs/select1.proftext
+++ test/Transforms/PGOProfile/Inputs/select1.proftext
@@ -0,0 +1,8 @@
+:ir
+test_br_2
+72057623705475732
+3
+4
+1
+1
+
Index: test/Transforms/PGOProfile/select1.ll
===================================================================
--- test/Transforms/PGOProfile/select1.ll
+++ test/Transforms/PGOProfile/select1.ll
@@ -0,0 +1,34 @@
+; RUN: opt < %s -pgo-instr-gen -pgo-instr-select=true -S | FileCheck %s --check-prefix=GEN
+; RUN: opt < %s -passes=pgo-instr-gen -pgo-instr-select=true -S | FileCheck %s --check-prefix=GEN
+; RUN: opt < %s -pgo-instr-gen -pgo-instr-select=false -S | FileCheck %s --check-prefix=NOSELECT
+; RUN: opt < %s -passes=pgo-instr-gen -pgo-instr-select=false -S | FileCheck %s --check-prefix=NOSELECT
+; RUN: llvm-profdata merge %S/Inputs/select1.proftext -o %t.profdata
+; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -pgo-instr-select=true -S | FileCheck %s --check-prefix=USE
+; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -pgo-instr-select=true -S | FileCheck %s --check-prefix=USE
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @test_br_2(i32 %i) {
+entry:
+  %cmp = icmp sgt i32 %i, 0
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  %add = add nsw i32 %i, 2
+;GEN: %[[STEP:[0-9]+]] = zext i1 %cmp to i64
+;GEN: call void @llvm.instrprof.increment.step({{.*}} i32 3, i32 2, i64 %[[STEP]])
+;NOSELECT-NOT: call void @llvm.instrprof.increment.step
+  %s = select i1 %cmp, i32 %add, i32 0
+;USE: select i1 %cmp{{.*}}, !prof ![[BW_ENTRY:[0-9]+]]
+;USE: ![[BW_ENTRY]] = !{!"branch_weights", i32 1, i32 3}
+
+  br label %if.end
+
+if.else:
+  %sub = sub nsw i32 %i, 2
+  br label %if.end
+
+if.end:
+  %retv = phi i32 [ %add, %if.then ], [ %sub, %if.else ]
+  ret i32 %retv
+}