Index: include/llvm/Bitcode/LLVMBitCodes.h
===================================================================
--- include/llvm/Bitcode/LLVMBitCodes.h
+++ include/llvm/Bitcode/LLVMBitCodes.h
@@ -175,7 +175,7 @@
     VST_CODE_ENTRY   = 1,   // VST_ENTRY: [valueid, namechar x N]
     VST_CODE_BBENTRY = 2,   // VST_BBENTRY: [bbid, namechar x N]
     VST_CODE_FNENTRY = 3,   // VST_FNENTRY: [valueid, offset, namechar x N]
-    // VST_COMBINED_FNENTRY: [funcsumoffset, funcguid]
+    // VST_COMBINED_FNENTRY: [valueid, funcsumoffset, funcguid]
     VST_CODE_COMBINED_FNENTRY = 4
   };
 
@@ -187,8 +187,19 @@
   // The function summary section uses different codes in the per-module
   // and combined index cases.
   enum FunctionSummarySymtabCodes {
-    FS_CODE_PERMODULE_ENTRY = 1,  // FS_ENTRY: [valueid, linkage, instcount]
-    FS_CODE_COMBINED_ENTRY  = 2,  // FS_ENTRY: [modid, linkage, instcount]
+    // PERMODULE_NOCALLS: [valueid, linkage, instcount]
+    FS_PERMODULE_NOCALLS = 1,
+    // PERMODULE_CALLS: [valueid, linkage, instcount, n x valueid]
+    FS_PERMODULE_CALLS = 2,
+    // PERMODULE_CALLS_PROFILE: [valueid, linkage, instcount,
+    //                           n x (valueid, count)]
+    FS_PERMODULE_CALLS_PROFILE = 3,
+    // COMBINED_NOCALLS: [modid, linkage, instcount]
+    FS_COMBINED_NOCALLS  = 4,
+    // COMBINED_CALLS: [modid, linkage, instcount, n x valueid]
+    FS_COMBINED_CALLS  = 5,
+    // COMBINED_CALLS_PROFILE: [modid, linkage, instcount, n x (valueid, count)]
+    FS_COMBINED_CALLS_PROFILE  = 6,
   };
 
   enum MetadataCodes {
Index: include/llvm/IR/FunctionInfo.h
===================================================================
--- include/llvm/IR/FunctionInfo.h
+++ include/llvm/IR/FunctionInfo.h
@@ -31,6 +31,10 @@
 /// This is a separate class from FunctionInfo to enable lazy reading of this
 /// function summary information from the combined index file during imporing.
 class FunctionSummary {
+public:
+  /// <CalleeGUID, ProfileCount> call edge pair.
+  typedef std::pair<uint64_t, uint64_t> EdgeTy;
+
 private:
   /// \brief Path of module containing function IR, used to locate module when
   /// importing this function.
@@ -59,9 +63,12 @@
   /// during the initial compile step when the function index is first built.
   unsigned InstCount;
 
+  /// List of <CalleeGUID, ProfileCount> call edge pairs from this function.
+  std::vector<EdgeTy> CallGraphEdgeList;
+
 public:
-  /// Construct a summary object from summary data expected for all
-  /// summary records.
+  /// Summary constructors.
+  FunctionSummary() : InstCount(0) {}
   FunctionSummary(unsigned NumInsts) : InstCount(NumInsts) {}
 
   /// Set the path to the module containing this function, for use in
@@ -81,8 +88,21 @@
     return FunctionLinkage;
   }
 
+  /// Set the instruction count for this function.
+  void setInstCount(unsigned NumInsts) { InstCount = NumInsts; }
+
   /// Get the instruction count recorded for this function.
   unsigned instCount() const { return InstCount; }
+
+  /// Record a call graph edge from this function to the function identified
+  /// by \p CalleeGUID, with cumulative profile count (across all calls from
+  /// this function) \p CalleeCount, or 0 if no FDO.
+  void addCallGraphEdge(uint64_t CalleeGUID, uint64_t CalleeCount) {
+    CallGraphEdgeList.push_back(std::make_pair(CalleeGUID, CalleeCount));
+  }
+
+  /// Return the list of <CalleeGUID, ProfileCount> pairs.
+  std::vector<EdgeTy> &edges() { return CallGraphEdgeList; }
 };
 
 /// \brief Class to hold pointer to function summary and information required
@@ -249,6 +269,51 @@
   }
 };
 
+/// Helper class for reading and writing the function summary in bitcode.
+/// Specifically, the call graph edges in the summary bitcode section
+/// reference the callees by ValueId. However, in the function index in
+/// memory we want these referenced by GUID. Rather than bloat the
+/// FunctionSummary class with an additional map, we use this class
+/// to temporarily hold the ValueId representation.
+class FunctionSummaryIOHelper {
+public:
+  /// <CalleeValueId, ProfileCount> call edge pair.
+  typedef std::pair<unsigned, uint64_t> EdgeTy;
+
+private:
+  /// The FunctionSummary object being built during bitcode reading,
+  /// or built during the per-module bitcode write process.
+  std::unique_ptr<FunctionSummary> Summary;
+  /// List of <CalleeValueId, ProfileCount> call edge pairs for the function.
+  std::vector<EdgeTy> CallGraphEdgeValueIdList;
+
+public:
+  FunctionSummaryIOHelper() {}
+
+  /// Save the new function summary.
+  void setFunctionSummary(std::unique_ptr<FunctionSummary> FuncSummary) {
+    Summary = std::move(FuncSummary);
+  }
+  /// Get ownership of the function summary unique_ptr.
+  std::unique_ptr<FunctionSummary> getFunctionSummary() {
+    return std::move(Summary);
+  }
+
+  /// Access the function summary pointer.
+  FunctionSummary *functionSummary() { return Summary.get(); }
+
+  /// Record a call graph edge from this function to the function identified
+  /// by \p CalleeValueId, with cumulative profile count (across all calls from
+  /// this function) \p CalleeCount, or 0 if no FDO.
+  void addCallGraphEdge(unsigned CalleeValueId, uint64_t CalleeCount) {
+    CallGraphEdgeValueIdList.push_back(
+        std::make_pair(CalleeValueId, CalleeCount));
+  }
+
+  /// Return the list of <CalleeValueId, ProfileCount> pairs.
+  std::vector<EdgeTy> &edges() { return CallGraphEdgeValueIdList; }
+};
+
 } // End llvm namespace
 
 #endif
Index: include/llvm/ProfileData/ProfileCommon.h
===================================================================
--- include/llvm/ProfileData/ProfileCommon.h
+++ include/llvm/ProfileData/ProfileCommon.h
@@ -12,6 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Support/BlockFrequency.h"
+#include "llvm/Support/BranchProbability.h"
 #include <cstdint>
 #include <functional>
 #include <map>
@@ -87,5 +89,23 @@
   return DetailedSummary;
 }
 
+/// Helper to compute the profile count for a block, based on the
+/// ratio of its frequency to the entry block frequency, multiplied
+/// by the entry block count.
+inline uint64_t getBlockProfileCount(uint64_t BlockFreq, uint64_t EntryFreq,
+                                     uint64_t EntryCount) {
+  auto ScaledCount = BlockFrequency(EntryCount);
+  if (EntryFreq > UINT32_MAX || BlockFreq > UINT32_MAX)
+    // Can't use BranchProbability in general, since it takes 32-bit numbers.
+    ScaledCount = ScaledCount.getFrequency() * (BlockFreq / EntryFreq);
+  else if (BlockFreq < EntryFreq)
+    ScaledCount *= BranchProbability(BlockFreq, EntryFreq);
+  else
+    // Invert the fraction and divide.
+    ScaledCount /= BranchProbability(EntryFreq, BlockFreq);
+
+  return ScaledCount.getFrequency();
+}
+
 } // end namespace llvm
 #endif
Index: lib/Bitcode/Reader/BitcodeReader.cpp
===================================================================
--- lib/Bitcode/Reader/BitcodeReader.cpp
+++ lib/Bitcode/Reader/BitcodeReader.cpp
@@ -450,7 +450,7 @@
   /// offset to the function summary (since in the combined index the
   /// VST records do not hold value IDs but rather hold the function
   /// summary record offset).
-  DenseMap<uint64_t, std::unique_ptr<FunctionSummary>> SummaryMap;
+  std::map<uint64_t, FunctionSummaryIOHelper> SummaryMap;
 
   /// Map populated during module path string table parsing, from the
   /// module ID to a string reference owned by the index's module
@@ -5437,6 +5437,19 @@
 
   SmallVector<uint64_t, 64> Record;
 
+  // Map to save ValueId to GUID association that was recorded in the
+  // ValueSymbolTable. It is used after the VST is parsed to convert
+  // call graph edges read from the function summary from referencing
+  // callees by their ValueId to using the GUID instead, which is how
+  // they are recorded in the function index being built.
+  DenseMap<unsigned, uint64_t> ValueIdToCallGraphGUIDMap;
+
+  // Map to keep track of which helper object was associated with which
+  // function summary, used when we transfer ownership of summary to
+  // the index, because we need this information for later creation
+  // of call graph edges in the index.
+  DenseMap<FunctionSummaryIOHelper *, FunctionSummary *> HelperToFuncSummaryMap;
+
   // Read all the records for this value table.
   SmallString<128> ValueName;
   while (1) {
@@ -5446,8 +5459,34 @@
     case BitstreamEntry::SubBlock: // Handled for us already.
     case BitstreamEntry::Error:
       return error("Malformed block");
-    case BitstreamEntry::EndBlock:
+    case BitstreamEntry::EndBlock: {
+      // We now have saved the ValueId to GUID mapping for all functions in the
+      // VST. For the non-lazy function summary parsing case, we can convert the
+      // call graph edges from using the Callee ValueId to instead using the
+      // GUID and record them in the index. In the lazy summary parsing combined
+      // function case, we can perform this mapping as the summaries are parsed.
+      if (foundFuncSummary() && !IsLazy) {
+        for (auto &SMI : SummaryMap) {
+          // Walk over the call edges in this entry and add them to the
+          // function summary.
+          for (auto &EI : SMI.second.edges()) {
+            auto CGI = ValueIdToCallGraphGUIDMap.find(EI.first);
+            // For the per-module case we should only have edges to external
+            // calls which would have a VST_CODE_ENTRY in the VST, and these
+            // were added to the map, so we should always find an entry.
+            // Similarly, when writing the combined index bitcode we filtered
+            // out any edges to functions that weren't in the index, and all
+            // combined VST entries were added to the map.
+            assert(CGI != ValueIdToCallGraphGUIDMap.end());
+            // Add the edge to the summary now, using the GUID.
+            auto FSI = HelperToFuncSummaryMap.find(&SMI.second);
+            assert(FSI != HelperToFuncSummaryMap.end());
+            FSI->second->addCallGraphEdge(CGI->second, EI.second);
+          }
+        }
+      }
       return std::error_code();
+    }
     case BitstreamEntry::Record:
       // The interesting case.
       break;
@@ -5458,6 +5497,17 @@
     switch (Stream.readRecord(Entry.ID, Record)) {
     default: // Default behavior: ignore (e.g. VST_CODE_BBENTRY records).
       break;
+    case bitc::VST_CODE_ENTRY: { // VST_CODE_ENTRY: [valueid, namechar x N]
+      if (convertToString(Record, 1, ValueName))
+        return error("Invalid record");
+      unsigned ValueID = Record[0];
+      // The per-module summary call graph edges are only recorded for
+      // external calls, which use this record type. Save the mapping
+      // between the ValueId and the GUID.
+      ValueIdToCallGraphGUIDMap[ValueID] = Function::getGUID(ValueName);
+      ValueName.clear();
+      break;
+    }
     case bitc::VST_CODE_FNENTRY: {
       // VST_CODE_FNENTRY: [valueid, offset, namechar x N]
       if (convertToString(Record, 2, ValueName))
@@ -5468,12 +5518,12 @@
       // Gracefully handle bitcode without a function summary section,
       // which will simply not populate the index.
       if (foundFuncSummary()) {
-        DenseMap<uint64_t, std::unique_ptr<FunctionSummary>>::iterator SMI =
-            SummaryMap.find(ValueID);
+        auto SMI = SummaryMap.find(ValueID);
         assert(SMI != SummaryMap.end() && "Summary info not found");
         std::unique_ptr<FunctionInfo> FuncInfo =
             llvm::make_unique<FunctionInfo>(FuncOffset);
-        FuncInfo->setFunctionSummary(std::move(SMI->second));
+        FuncInfo->setFunctionSummary(SMI->second.getFunctionSummary());
+        HelperToFuncSummaryMap[&SMI->second] = FuncInfo->functionSummary();
         assert(!SourceFileName.empty());
         std::string FunctionGlobalId = Function::getGlobalIdentifier(
             ValueName, FuncInfo->functionSummary()->getFunctionLinkage(),
@@ -5485,18 +5535,20 @@
       break;
     }
     case bitc::VST_CODE_COMBINED_FNENTRY: {
-      // VST_CODE_COMBINED_FNENTRY: [offset, funcguid]
-      uint64_t FuncSummaryOffset = Record[0];
-      uint64_t FuncGUID = Record[1];
+      // VST_CODE_COMBINED_FNENTRY: [valueid, offset, funcguid]
+      unsigned ValueID = Record[0];
+      uint64_t FuncSummaryOffset = Record[1];
+      uint64_t FuncGUID = Record[2];
       std::unique_ptr<FunctionInfo> FuncInfo =
           llvm::make_unique<FunctionInfo>(FuncSummaryOffset);
       if (foundFuncSummary() && !IsLazy) {
-        DenseMap<uint64_t, std::unique_ptr<FunctionSummary>>::iterator SMI =
-            SummaryMap.find(FuncSummaryOffset);
+        auto SMI = SummaryMap.find(FuncSummaryOffset);
         assert(SMI != SummaryMap.end() && "Summary info not found");
-        FuncInfo->setFunctionSummary(std::move(SMI->second));
+        FuncInfo->setFunctionSummary(SMI->second.getFunctionSummary());
+        HelperToFuncSummaryMap[&SMI->second] = FuncInfo->functionSummary();
       }
       TheIndex->addFunctionInfo(FuncGUID, std::move(FuncInfo));
+      ValueIdToCallGraphGUIDMap[ValueID] = Function::getGUID(ValueName);
 
       ValueName.clear();
       break;
@@ -5624,11 +5676,17 @@
     // information used for ThinLTO renaming and importing.
     Record.clear();
     uint64_t CurRecordBit = Stream.GetCurrentBitNo();
-    switch (Stream.readRecord(Entry.ID, Record)) {
+    auto BitCode = Stream.readRecord(Entry.ID, Record);
+    switch (BitCode) {
     default: // Default behavior: ignore.
       break;
-    // FS_PERMODULE_ENTRY: [valueid, linkage, instcount]
-    case bitc::FS_CODE_PERMODULE_ENTRY: {
+    // FS_PERMODULE_NOCALLS: [valueid, linkage, instcount]
+    // FS_PERMODULE_CALLS: [valueid, linkage, instcount, n x valueid]
+    // FS_PERMODULE_CALLS_PROFILE: [valueid, linkage, instcount,
+    //                              n x (valueid, count)]
+    case bitc::FS_PERMODULE_NOCALLS:
+    case bitc::FS_PERMODULE_CALLS:
+    case bitc::FS_PERMODULE_CALLS_PROFILE: {
       unsigned ValueID = Record[0];
       uint64_t RawLinkage = Record[1];
       unsigned InstCount = Record[2];
@@ -5642,10 +5700,27 @@
       // ownership.
       FS->setModulePath(
           TheIndex->addModulePath(Buffer->getBufferIdentifier(), 0));
-      SummaryMap[ValueID] = std::move(FS);
+      FunctionSummaryIOHelper &BitcodeSummary = SummaryMap[ValueID];
+      BitcodeSummary.setFunctionSummary(std::move(FS));
+      bool HasProfile = (BitCode == bitc::FS_PERMODULE_CALLS_PROFILE);
+      assert(BitCode != bitc::FS_PERMODULE_NOCALLS || Record.size() < 4);
+      // For now save the call graph edges using the recorded ValueId,
+      // on the summary helper object. After reading the VST this will
+      // be transferred into the function summary in the index, using the
+      // callee GUID instead.
+      for (unsigned I = 3, E = Record.size(); I != E; ++I) {
+        BitcodeSummary.addCallGraphEdge(Record[I],
+                                        HasProfile ? Record[++I] : 0);
+      }
+      break;
     }
-    // FS_COMBINED_ENTRY: [modid, linkage, instcount]
-    case bitc::FS_CODE_COMBINED_ENTRY: {
+    // FS_COMBINED_NOCALLS: [modid, linkage, instcount]
+    // FS_COMBINED_CALLS: [modid, linkage, instcount, n x valueid]
+    // FS_COMBINED_CALLS_PROFILE: [modid, linkage, instcount,
+    //                             n x (valueid, count)]
+    case bitc::FS_COMBINED_NOCALLS:
+    case bitc::FS_COMBINED_CALLS:
+    case bitc::FS_COMBINED_CALLS_PROFILE: {
       uint64_t ModuleId = Record[0];
       uint64_t RawLinkage = Record[1];
       unsigned InstCount = Record[2];
@@ -5653,7 +5728,21 @@
           llvm::make_unique<FunctionSummary>(InstCount);
       FS->setFunctionLinkage(getDecodedLinkage(RawLinkage));
       FS->setModulePath(ModuleIdMap[ModuleId]);
-      SummaryMap[CurRecordBit] = std::move(FS);
+      FunctionSummaryIOHelper &BitcodeSummary = SummaryMap[CurRecordBit];
+      BitcodeSummary.setFunctionSummary(std::move(FS));
+      bool HasProfile = (BitCode == bitc::FS_COMBINED_CALLS_PROFILE);
+      assert(BitCode != bitc::FS_COMBINED_NOCALLS || Record.size() < 4);
+      // For now save the call graph edges using the recorded ValueId,
+      // on the summary helper object. After reading the VST this will
+      // be transferred into the function summary in the index, using the
+      // callee GUID instead.
+      static int CallGraphEdgeStartIndex = 3;
+      for (unsigned I = CallGraphEdgeStartIndex, E = Record.size(); I != E;
+           ++I) {
+        BitcodeSummary.addCallGraphEdge(Record[I],
+                                        HasProfile ? Record[++I] : 0);
+      }
+      break;
     }
     }
   }
@@ -5771,7 +5860,9 @@
   // importing is added so that it can be tested.
   SmallVector<uint64_t, 64> Record;
   switch (Stream.readRecord(Entry.ID, Record)) {
-  case bitc::FS_CODE_COMBINED_ENTRY:
+  case bitc::FS_COMBINED_NOCALLS:
+  case bitc::FS_COMBINED_CALLS:
+  case bitc::FS_COMBINED_CALLS_PROFILE:
   default:
     return error("Invalid record");
   }
Index: lib/Bitcode/Writer/BitcodeWriter.cpp
===================================================================
--- lib/Bitcode/Writer/BitcodeWriter.cpp
+++ lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -11,24 +11,30 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Bitcode/ReaderWriter.h"
 #include "ValueEnumerator.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Bitcode/BitstreamWriter.h"
 #include "llvm/Bitcode/LLVMBitCodes.h"
+#include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/UseListOrder.h"
 #include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/ProfileData/ProfileCommon.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
@@ -38,6 +44,71 @@
 #include <map>
 using namespace llvm;
 
+namespace {
+/// Helper class for writing function summary section. Used to hold information
+/// built while writing the function to bitcode, then later accessed when
+/// writing the function summary and the ValueSymbolTable.
+class FunctionWriteInfo {
+private:
+  // Pair holding the bitcode index of the corresponding function block
+  // (written to the VST) and the helper object holding other summary
+  // information.
+  typedef std::pair<uint64_t, std::unique_ptr<FunctionSummaryIOHelper>> ValueTy;
+  std::map<const Function *, ValueTy> FunctionMap;
+  bool EmitFunctionSummary;
+
+public:
+  FunctionWriteInfo(bool EmitFunctionSummary)
+      : EmitFunctionSummary(EmitFunctionSummary) {}
+
+  /// Create an entry in the map for the given function, which is
+  /// written to the bitcode index \p FuncOffset.
+  void recordFunction(const Function &F, uint64_t FuncOffset) {
+    std::unique_ptr<FunctionSummaryIOHelper> BitcodeSummaryInfo;
+    // If writing function summary sections, create the helper object and
+    // start populating the function summary.
+    if (EmitFunctionSummary) {
+      BitcodeSummaryInfo = llvm::make_unique<FunctionSummaryIOHelper>();
+      std::unique_ptr<FunctionSummary> FuncSummary =
+          llvm::make_unique<FunctionSummary>();
+      FuncSummary->setFunctionLinkage(F.getLinkage());
+      BitcodeSummaryInfo->setFunctionSummary(std::move(FuncSummary));
+    }
+    FunctionMap[&F] = std::make_pair(FuncOffset, std::move(BitcodeSummaryInfo));
+  }
+
+  /// Save the instruction count to be written to the summary section for \p F.
+  void setInstCount(const Function &F, unsigned NumInsts) {
+    assert(EmitFunctionSummary);
+    FunctionMap[&F].second->functionSummary()->setInstCount(NumInsts);
+  }
+
+  /// Save the call graph edges to be written to the summary section for \p F.
+  /// The edges are pairs of <CalleeValueId, ProfileCount>, where ProfileCount
+  /// is 0 when there is no PGO.
+  void addCallGraphEdges(const Function &F,
+                         DenseMap<unsigned, uint64_t> &CallGraphEdges) {
+    assert(EmitFunctionSummary);
+    for (auto &EI : CallGraphEdges)
+      FunctionMap[&F].second->addCallGraphEdge(EI.first, EI.second);
+  }
+
+  /// Return the bitcode index where the function block for \p F was written.
+  uint64_t getBitcodeIndex(const Function &F) {
+    auto FMI = FunctionMap.find(&F);
+    assert(FMI != FunctionMap.end());
+    return FMI->second.first;
+  }
+
+  /// Return the summary helper object for writing the summary section for \p F.
+  FunctionSummaryIOHelper &getBitcodeSummary(const Function &F) {
+    auto FMI = FunctionMap.find(&F);
+    assert(FMI != FunctionMap.end());
+    return *FMI->second.second;
+  }
+};
+}
+
 /// These are manifest constants used by the bitcode writer. They do not need to
 /// be kept in sync with the reader, but need to be consistent within this file.
 enum {
@@ -2241,15 +2312,15 @@
 }
 
 /// Emit names for globals/functions etc. The VSTOffsetPlaceholder,
-/// BitcodeStartBit and FunctionIndex are only passed for the module-level
+/// BitcodeStartBit and FunctionInfo are only passed for the module-level
 /// VST, where we are including a function bitcode index and need to
 /// backpatch the VST forward declaration record.
-static void WriteValueSymbolTable(
-    const ValueSymbolTable &VST, const ValueEnumerator &VE,
-    BitstreamWriter &Stream, uint64_t VSTOffsetPlaceholder = 0,
-    uint64_t BitcodeStartBit = 0,
-    DenseMap<const Function *, std::unique_ptr<FunctionInfo>> *FunctionIndex =
-        nullptr) {
+static void WriteValueSymbolTable(const ValueSymbolTable &VST,
+                                  const ValueEnumerator &VE,
+                                  BitstreamWriter &Stream,
+                                  uint64_t VSTOffsetPlaceholder = 0,
+                                  uint64_t BitcodeStartBit = 0,
+                                  FunctionWriteInfo *FunctionInfo = nullptr) {
   if (VST.empty()) {
     // WriteValueSymbolTableForwardDecl should have returned early as
     // well. Ensure this handling remains in sync by asserting that
@@ -2338,14 +2409,13 @@
       // Must be the module-level VST, where we pass in the Index and
       // have a VSTOffsetPlaceholder. The function-level VST should not
       // contain any Function symbols.
-      assert(FunctionIndex);
+      assert(FunctionInfo);
       assert(VSTOffsetPlaceholder > 0);
 
       // Save the word offset of the function (from the start of the
       // actual bitcode written to the stream).
-      assert(FunctionIndex->count(F) == 1);
       uint64_t BitcodeIndex =
-          (*FunctionIndex)[F]->bitcodeIndex() - BitcodeStartBit;
+          FunctionInfo->getBitcodeIndex(*F) - BitcodeStartBit;
       assert((BitcodeIndex & 31) == 0 && "function block not 32-bit aligned");
       NameVals.push_back(BitcodeIndex / 32);
 
@@ -2375,12 +2445,15 @@
 
 /// Emit function names and summary offsets for the combined index
 /// used by ThinLTO.
-static void WriteCombinedValueSymbolTable(const FunctionInfoIndex &Index,
-                                          BitstreamWriter &Stream) {
+static void
+WriteCombinedValueSymbolTable(const FunctionInfoIndex &Index,
+                              BitstreamWriter &Stream,
+                              std::map<uint64_t, unsigned> &GUIDToValueIdMap) {
   Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4);
 
   BitCodeAbbrev *Abbv = new BitCodeAbbrev();
   Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_FNENTRY));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcsumoffset
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcguid
   unsigned FnEntryAbbrev = Stream.EmitAbbrev(Abbv);
@@ -2389,13 +2462,15 @@
 
   for (const auto &FII : Index) {
     for (const auto &FI : FII.second) {
-      NameVals.push_back(FI->bitcodeIndex());
+      // VST_CODE_COMBINED_FNENTRY: [valueid, funcsumoffset, funcguid]
+      unsigned AbbrevToUse = FnEntryAbbrev;
 
       uint64_t FuncGUID = FII.first;
+      const auto &VMI = GUIDToValueIdMap.find(FuncGUID);
+      assert(VMI != GUIDToValueIdMap.end());
 
-      // VST_CODE_COMBINED_FNENTRY: [funcsumoffset, funcguid]
-      unsigned AbbrevToUse = FnEntryAbbrev;
-
+      NameVals.push_back(VMI->second);
+      NameVals.push_back(FI->bitcodeIndex());
       NameVals.push_back(FuncGUID);
 
       // Emit the finished record.
@@ -2440,33 +2515,23 @@
   Stream.ExitBlock();
 }
 
-/// \brief Save information for the given function into the function index.
-///
-/// At a minimum this saves the bitcode index of the function record that
-/// was just written. However, if we are emitting function summary information,
-/// for example for ThinLTO, then a \a FunctionSummary object is created
-/// to hold the provided summary information.
-static void SaveFunctionInfo(
-    const Function &F,
-    DenseMap<const Function *, std::unique_ptr<FunctionInfo>> &FunctionIndex,
-    unsigned NumInsts, uint64_t BitcodeIndex, bool EmitFunctionSummary) {
-  std::unique_ptr<FunctionSummary> FuncSummary;
-  if (EmitFunctionSummary) {
-    FuncSummary = llvm::make_unique<FunctionSummary>(NumInsts);
-    FuncSummary->setFunctionLinkage(F.getLinkage());
-  }
-  FunctionIndex[&F] =
-      llvm::make_unique<FunctionInfo>(BitcodeIndex, std::move(FuncSummary));
-}
-
 /// Emit a function body to the module stream.
-static void WriteFunction(
-    const Function &F, ValueEnumerator &VE, BitstreamWriter &Stream,
-    DenseMap<const Function *, std::unique_ptr<FunctionInfo>> &FunctionIndex,
-    bool EmitFunctionSummary) {
+static void WriteFunction(const Function &F, const Module *M,
+                          ValueEnumerator &VE, BitstreamWriter &Stream,
+                          FunctionWriteInfo &FunctionInfo,
+                          bool EmitFunctionSummary) {
   // Save the bitcode index of the start of this function block for recording
   // in the VST.
-  uint64_t BitcodeIndex = Stream.GetCurrentBitNo();
+  FunctionInfo.recordFunction(F, Stream.GetCurrentBitNo());
+
+  bool HasProfileData = F.getEntryCount().hasValue();
+  std::unique_ptr<BlockFrequencyInfo> BFI;
+  if (EmitFunctionSummary && HasProfileData) {
+    Function &Func = const_cast<Function &>(F);
+    LoopInfo LI{DominatorTree(Func)};
+    BranchProbabilityInfo BPI{Func, LI};
+    BFI = llvm::make_unique<BlockFrequencyInfo>(Func, BPI, LI);
+  }
 
   Stream.EnterSubblock(bitc::FUNCTION_BLOCK_ID, 4);
   VE.incorporateFunction(F);
@@ -2494,6 +2559,9 @@
 
   DILocation *LastDL = nullptr;
   unsigned NumInsts = 0;
+  // Map from callee ValueId to profile count. Used to accumulate profile
+  // counts for all static calls to a given callee.
+  DenseMap<unsigned, uint64_t> CallGraphEdges;
 
   // Finally, emit all the instructions, in order.
   for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
@@ -2507,6 +2575,21 @@
       if (!I->getType()->isVoidTy())
         ++InstID;
 
+      if (EmitFunctionSummary && isa<CallInst>(I)) {
+        auto CalledFunction = cast<CallInst>(I)->getCalledFunction();
+        if (CalledFunction && CalledFunction->hasName() &&
+            !CalledFunction->isIntrinsic() && CalledFunction->isDeclaration()) {
+          uint64_t ScaledCount = 0;
+          if (HasProfileData)
+            ScaledCount = getBlockProfileCount(
+                BFI->getBlockFreq(&(*BB)).getFrequency(), BFI->getEntryFreq(),
+                F.getEntryCount().getValue());
+          unsigned CalleeId = VE.getValueID(
+              M->getValueSymbolTable().lookup(CalledFunction->getName()));
+          CallGraphEdges[CalleeId] += ScaledCount;
+        }
+      }
+
       // If the instruction has metadata, write a metadata attachment later.
       NeedsMetadataAttachment |= I->hasMetadataOtherThanDebugLoc();
 
@@ -2531,6 +2614,11 @@
       LastDL = DL;
     }
 
+  if (EmitFunctionSummary) {
+    FunctionInfo.setInstCount(F, NumInsts);
+    FunctionInfo.addCallGraphEdges(F, CallGraphEdges);
+  }
+
   // Emit names for all the instructions etc.
   WriteValueSymbolTable(F.getValueSymbolTable(), VE, Stream);
 
@@ -2540,9 +2628,6 @@
     WriteUseListBlock(&F, VE, Stream);
   VE.purgeFunction();
   Stream.ExitBlock();
-
-  SaveFunctionInfo(F, FunctionIndex, NumInsts, BitcodeIndex,
-                   EmitFunctionSummary);
 }
 
 // Emit blockinfo, which defines the standard abbreviations etc.
@@ -2776,35 +2861,75 @@
 
 // Helper to emit a single function summary record.
 static void WritePerModuleFunctionSummaryRecord(
-    SmallVector<unsigned, 64> &NameVals, FunctionSummary *FS, unsigned ValueID,
-    unsigned FSAbbrev, BitstreamWriter &Stream) {
+    SmallVector<uint64_t, 64> &NameVals,
+    FunctionSummaryIOHelper &BitcodeSummary, unsigned ValueID,
+    unsigned FSNoCallsAbbrev, unsigned FSCallsAbbrev,
+    unsigned FSCallsProfileAbbrev, BitstreamWriter &Stream, const Function &F) {
+  FunctionSummary *FS = BitcodeSummary.functionSummary();
   assert(FS);
   NameVals.push_back(ValueID);
   NameVals.push_back(getEncodedLinkage(FS->getFunctionLinkage()));
   NameVals.push_back(FS->instCount());
 
+  bool HasProfileData = F.getEntryCount().hasValue();
+  for (auto &ECI : BitcodeSummary.edges()) {
+    NameVals.push_back(ECI.first);
+    if (HasProfileData)
+      NameVals.push_back(ECI.second);
+  }
+
+  unsigned FSAbbrev =
+      BitcodeSummary.edges().empty()
+          ? FSNoCallsAbbrev
+          : (HasProfileData ? FSCallsProfileAbbrev : FSCallsAbbrev);
+  unsigned Code = BitcodeSummary.edges().empty()
+                      ? bitc::FS_PERMODULE_NOCALLS
+                      : (HasProfileData ? bitc::FS_PERMODULE_CALLS_PROFILE
+                                        : bitc::FS_PERMODULE_CALLS);
+
   // Emit the finished record.
-  Stream.EmitRecord(bitc::FS_CODE_PERMODULE_ENTRY, NameVals, FSAbbrev);
+  Stream.EmitRecord(Code, NameVals, FSAbbrev);
   NameVals.clear();
 }
 
 /// Emit the per-module function summary section alongside the rest of
 /// the module's bitcode.
-static void WritePerModuleFunctionSummary(
-    DenseMap<const Function *, std::unique_ptr<FunctionInfo>> &FunctionIndex,
-    const Module *M, const ValueEnumerator &VE, BitstreamWriter &Stream) {
+static void WritePerModuleFunctionSummary(FunctionWriteInfo &FunctionInfo,
+                                          const Module *M,
+                                          const ValueEnumerator &VE,
+                                          BitstreamWriter &Stream) {
   Stream.EnterSubblock(bitc::FUNCTION_SUMMARY_BLOCK_ID, 3);
 
-  // Abbrev for FS_CODE_PERMODULE_ENTRY.
+  // Abbrev for FS_PERMODULE_NOCALLS.
   BitCodeAbbrev *Abbv = new BitCodeAbbrev();
-  Abbv->Add(BitCodeAbbrevOp(bitc::FS_CODE_PERMODULE_ENTRY));
+  Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_NOCALLS));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // valueid
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 5)); // linkage
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // instcount
-  unsigned FSAbbrev = Stream.EmitAbbrev(Abbv);
+  unsigned FSNoCallsAbbrev = Stream.EmitAbbrev(Abbv);
 
-  SmallVector<unsigned, 64> NameVals;
-  // Iterate over the list of functions instead of the FunctionIndex map to
+  // Abbrev for FS_PERMODULE_CALLS.
+  Abbv = new BitCodeAbbrev();
+  Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_CALLS));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // valueid
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 5)); // linkage
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // instcount
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));    // valueids
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
+  unsigned FSCallsAbbrev = Stream.EmitAbbrev(Abbv);
+
+  // Abbrev for FS_PERMODULE_CALLS_PROFILE.
+  Abbv = new BitCodeAbbrev();
+  Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_CALLS_PROFILE));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // valueid
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 5)); // linkage
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // instcount
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));    // valueid/count pairs
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
+  unsigned FSCallsProfileAbbrev = Stream.EmitAbbrev(Abbv);
+
+  SmallVector<uint64_t, 64> NameVals;
+  // Iterate over the list of functions instead of the FunctionInfo map to
   // ensure the ordering is stable.
   for (const Function &F : *M) {
     if (F.isDeclaration())
@@ -2814,12 +2939,10 @@
     if (!F.hasName())
       continue;
 
-    assert(FunctionIndex.count(&F) == 1);
-
     WritePerModuleFunctionSummaryRecord(
-        NameVals, FunctionIndex[&F]->functionSummary(),
-        VE.getValueID(M->getValueSymbolTable().lookup(F.getName())), FSAbbrev,
-        Stream);
+        NameVals, FunctionInfo.getBitcodeSummary(F),
+        VE.getValueID(M->getValueSymbolTable().lookup(F.getName())),
+        FSNoCallsAbbrev, FSCallsAbbrev, FSCallsProfileAbbrev, Stream, F);
   }
 
   for (const GlobalAlias &A : M->aliases()) {
@@ -2829,11 +2952,10 @@
     if (!F || F->isDeclaration())
       continue;
 
-    assert(FunctionIndex.count(F) == 1);
     WritePerModuleFunctionSummaryRecord(
-        NameVals, FunctionIndex[F]->functionSummary(),
-        VE.getValueID(M->getValueSymbolTable().lookup(A.getName())), FSAbbrev,
-        Stream);
+        NameVals, FunctionInfo.getBitcodeSummary(*F),
+        VE.getValueID(M->getValueSymbolTable().lookup(A.getName())),
+        FSNoCallsAbbrev, FSCallsAbbrev, FSCallsProfileAbbrev, Stream, *F);
   }
 
   Stream.ExitBlock();
@@ -2841,19 +2963,41 @@
 
 /// Emit the combined function summary section into the combined index
 /// file.
-static void WriteCombinedFunctionSummary(const FunctionInfoIndex &I,
-                                         BitstreamWriter &Stream) {
+static void
+WriteCombinedFunctionSummary(const FunctionInfoIndex &I,
+                             BitstreamWriter &Stream,
+                             std::map<uint64_t, unsigned> &GUIDToValueIdMap) {
   Stream.EnterSubblock(bitc::FUNCTION_SUMMARY_BLOCK_ID, 3);
 
-  // Abbrev for FS_CODE_COMBINED_ENTRY.
+  // Abbrev for FS_COMBINED_NOCALLS.
   BitCodeAbbrev *Abbv = new BitCodeAbbrev();
-  Abbv->Add(BitCodeAbbrevOp(bitc::FS_CODE_COMBINED_ENTRY));
+  Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_NOCALLS));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // modid
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 5)); // linkage
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // instcount
+  unsigned FSNoCallsAbbrev = Stream.EmitAbbrev(Abbv);
+
+  // Abbrev for FS_COMBINED_CALLS.
+  Abbv = new BitCodeAbbrev();
+  Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_CALLS));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // modid
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 5)); // linkage
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount
-  unsigned FSAbbrev = Stream.EmitAbbrev(Abbv);
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));  // valueids
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
+  unsigned FSCallsAbbrev = Stream.EmitAbbrev(Abbv);
 
-  SmallVector<unsigned, 64> NameVals;
+  // Abbrev for FS_COMBINED_CALLS_PROFILE.
+  Abbv = new BitCodeAbbrev();
+  Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_CALLS_PROFILE));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // modid
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 5)); // linkage
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // instcount
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));    // valueid/count pairs
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
+  unsigned FSCallsProfileAbbrev = Stream.EmitAbbrev(Abbv);
+
+  SmallVector<uint64_t, 64> NameVals;
   for (const auto &FII : I) {
     for (auto &FI : FII.second) {
       FunctionSummary *FS = FI->functionSummary();
@@ -2863,13 +3007,41 @@
       NameVals.push_back(getEncodedLinkage(FS->getFunctionLinkage()));
       NameVals.push_back(FS->instCount());
 
+      bool HasProfileData = false;
+      for (auto &EI : FS->edges()) {
+        HasProfileData |= EI.second != 0;
+        if (HasProfileData)
+          break;
+      }
+
+      bool HasCalls = false;
+      for (auto &EI : FS->edges()) {
+        const auto &VMI = GUIDToValueIdMap.find(EI.first);
+        // If this GUID doesn't have an entry, it doesn't have a function
+        // summary and we don't need to record any calls to it.
+        if (VMI == GUIDToValueIdMap.end())
+          continue;
+        HasCalls = true;
+        NameVals.push_back(VMI->second);
+        if (HasProfileData)
+          NameVals.push_back(EI.second);
+      }
+
       // Record the starting offset of this summary entry for use
       // in the VST entry. Add the current code size since the
       // reader will invoke readRecord after the abbrev id read.
       FI->setBitcodeIndex(Stream.GetCurrentBitNo() + Stream.GetAbbrevIDWidth());
 
+      unsigned FSAbbrev =
+          HasCalls ? (HasProfileData ? FSCallsProfileAbbrev : FSCallsAbbrev)
+                   : FSNoCallsAbbrev;
+      unsigned Code = HasCalls
+                          ? (HasProfileData ? bitc::FS_COMBINED_CALLS_PROFILE
+                                            : bitc::FS_COMBINED_CALLS)
+                          : bitc::FS_COMBINED_NOCALLS;
+
       // Emit the finished record.
-      Stream.EmitRecord(bitc::FS_CODE_COMBINED_ENTRY, NameVals, FSAbbrev);
+      Stream.EmitRecord(Code, NameVals, FSAbbrev);
       NameVals.clear();
     }
   }
@@ -2949,18 +3121,18 @@
   WriteOperandBundleTags(M, Stream);
 
   // Emit function bodies.
-  DenseMap<const Function *, std::unique_ptr<FunctionInfo>> FunctionIndex;
+  FunctionWriteInfo FunctionInfo(EmitFunctionSummary);
   for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F)
     if (!F->isDeclaration())
-      WriteFunction(*F, VE, Stream, FunctionIndex, EmitFunctionSummary);
+      WriteFunction(*F, M, VE, Stream, FunctionInfo, EmitFunctionSummary);
 
   // Need to write after the above call to WriteFunction which populates
   // the summary information in the index.
   if (EmitFunctionSummary)
-    WritePerModuleFunctionSummary(FunctionIndex, M, VE, Stream);
+    WritePerModuleFunctionSummary(FunctionInfo, M, VE, Stream);
 
   WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream,
-                        VSTOffsetPlaceholder, BitcodeStartBit, &FunctionIndex);
+                        VSTOffsetPlaceholder, BitcodeStartBit, &FunctionInfo);
 
   Stream.ExitBlock();
 }
@@ -3105,12 +3277,21 @@
   // Write the module paths in the combined index.
   WriteModStrings(Index, Stream);
 
+  // Assign unique value ids to all functions in the index for use
+  // in writing out the call graph edges. Save the mapping from GUID
+  // to the new global value id to use when writing those edges, which
+  // are currently saved in the index in terms of GUID.
+  std::map<uint64_t, unsigned> GUIDToValueIdMap;
+  unsigned GlobalValueId = 0;
+  for (auto &II : Index)
+    GUIDToValueIdMap[II.first] = ++GlobalValueId;
+
   // Write the function summary combined index records.
-  WriteCombinedFunctionSummary(Index, Stream);
+  WriteCombinedFunctionSummary(Index, Stream, GUIDToValueIdMap);
 
   // Need a special VST writer for the combined index (we don't have a
   // real VST and real values when this is invoked).
-  WriteCombinedValueSymbolTable(Index, Stream);
+  WriteCombinedValueSymbolTable(Index, Stream, GUIDToValueIdMap);
 
   Stream.ExitBlock();
 
Index: lib/Bitcode/Writer/LLVMBuild.txt
===================================================================
--- lib/Bitcode/Writer/LLVMBuild.txt
+++ lib/Bitcode/Writer/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Library
 name = BitWriter
 parent = Bitcode
-required_libraries = Core Support
+required_libraries = Analysis Core Support
Index: test/Bitcode/Inputs/thinlto-function-summary-callgraph-pgo.ll
===================================================================
--- /dev/null
+++ test/Bitcode/Inputs/thinlto-function-summary-callgraph-pgo.ll
@@ -0,0 +1,11 @@
+; ModuleID = 'thinlto-function-summary-callgraph2.ll'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define void @func() #0 !prof !2 {
+entry:
+    ret void
+}
+
+!2 = !{!"function_entry_count", i64 1}
Index: test/Bitcode/Inputs/thinlto-function-summary-callgraph.ll
===================================================================
--- /dev/null
+++ test/Bitcode/Inputs/thinlto-function-summary-callgraph.ll
@@ -0,0 +1,10 @@
+; ModuleID = 'thinlto-function-summary-callgraph2.ll'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define void @func() #0 {
+entry:
+    ret void
+}
+
Index: test/Bitcode/thinlto-function-summary-callgraph-pgo.ll
===================================================================
--- /dev/null
+++ test/Bitcode/thinlto-function-summary-callgraph-pgo.ll
@@ -0,0 +1,43 @@
+; RUN: llvm-as -function-summary %s -o %t.o
+; RUN: llvm-bcanalyzer -dump %t.o | FileCheck %s
+; RUN: llvm-as -function-summary %p/Inputs/thinlto-function-summary-callgraph.ll -o %t2.o
+; RUN: llvm-lto -thinlto -o %t3 %t.o %t2.o
+; RUN: llvm-bcanalyzer -dump %t3.thinlto.bc | FileCheck %s --check-prefix=COMBINED
+
+; CHECK:       <FUNCTION_SUMMARY_BLOCK
+; See if the call to func is registered, using the expected value id and
+; profile count
+; CHECK-NEXT:    <PERMODULE_CALLS_PROFILE {{.*}} op3=1 op4=1/>
+; CHECK-NEXT:  </FUNCTION_SUMMARY_BLOCK>
+; CHECK-NEXT:  <VALUE_SYMTAB
+; CHECK-NEXT:    <FNENTRY {{.*}} record string = 'main'
+; External function func should have entry with value id 1
+; CHECK-NEXT:    <ENTRY {{.*}} op0=1 {{.*}} record string = 'func'
+; CHECK-NEXT:  </VALUE_SYMTAB>
+
+; COMBINED:       <FUNCTION_SUMMARY_BLOCK
+; COMBINED-NEXT:    <COMBINED_NOCALLS
+; See if the call to func is registered, using the expected value id and
+; profile count
+; COMBINED-NEXT:    <COMBINED_CALLS_PROFILE {{.*}} op3=1 op4=1/>
+; COMBINED-NEXT:  </FUNCTION_SUMMARY_BLOCK>
+; COMBINED-NEXT:  <VALUE_SYMTAB
+; Entry for function func should have entry with value id 1
+; COMBINED-NEXT:    <COMBINED_FNENTRY {{.*}} op0=1 {{.*}} op2=7289175272376759421/>
+; COMBINED-NEXT:    <COMBINED_FNENTRY
+; COMBINED-NEXT:  </VALUE_SYMTAB>
+
+; ModuleID = 'thinlto-function-summary-callgraph.ll'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define i32 @main() #0 !prof !2 {
+entry:
+    call void (...) @func()
+    ret i32 0
+}
+
+declare void @func(...) #1
+
+!2 = !{!"function_entry_count", i64 1}
Index: test/Bitcode/thinlto-function-summary-callgraph.ll
===================================================================
--- /dev/null
+++ test/Bitcode/thinlto-function-summary-callgraph.ll
@@ -0,0 +1,39 @@
+; RUN: llvm-as -function-summary %s -o %t.o
+; RUN: llvm-bcanalyzer -dump %t.o | FileCheck %s
+; RUN: llvm-as -function-summary %p/Inputs/thinlto-function-summary-callgraph.ll -o %t2.o
+; RUN: llvm-lto -thinlto -o %t3 %t.o %t2.o
+; RUN: llvm-bcanalyzer -dump %t3.thinlto.bc | FileCheck %s --check-prefix=COMBINED
+
+; CHECK:       <FUNCTION_SUMMARY_BLOCK
+; See if the call to func is registered, using the expected value id
+; CHECK-NEXT:    <PERMODULE_CALLS {{.*}} op3=1/>
+; CHECK-NEXT:  </FUNCTION_SUMMARY_BLOCK>
+; CHECK-NEXT:  <VALUE_SYMTAB
+; CHECK-NEXT:    <FNENTRY {{.*}} record string = 'main'
+; External function func should have entry with value id 1
+; CHECK-NEXT:    <ENTRY {{.*}} op0=1 {{.*}} record string = 'func'
+; CHECK-NEXT:  </VALUE_SYMTAB>
+
+; COMBINED:       <FUNCTION_SUMMARY_BLOCK
+; COMBINED-NEXT:    <COMBINED_NOCALLS
+; See if the call to func is registered, using the expected value id
+; COMBINED-NEXT:    <COMBINED_CALLS {{.*}} op3=1/>
+; COMBINED-NEXT:  </FUNCTION_SUMMARY_BLOCK>
+; COMBINED-NEXT:  <VALUE_SYMTAB
+; Entry for function func should have entry with value id 1
+; COMBINED-NEXT:    <COMBINED_FNENTRY {{.*}} op0=1 {{.*}} op2=7289175272376759421/>
+; COMBINED-NEXT:    <COMBINED_FNENTRY
+; COMBINED-NEXT:  </VALUE_SYMTAB>
+
+; ModuleID = 'thinlto-function-summary-callgraph.ll'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define i32 @main() #0 {
+entry:
+    call void (...) @func()
+    ret i32 0
+}
+
+declare void @func(...) #1
Index: test/Bitcode/thinlto-function-summary.ll
===================================================================
--- test/Bitcode/thinlto-function-summary.ll
+++ test/Bitcode/thinlto-function-summary.ll
@@ -5,9 +5,9 @@
 ; same in the ValueSumbolTable, to ensure the ordering is stable.
 ; Also check the linkage field on the summary entries.
 ; BC: <FUNCTION_SUMMARY_BLOCK
-; BC-NEXT: <PERMODULE_ENTRY {{.*}} op0=1 op1=0
-; BC-NEXT: <PERMODULE_ENTRY {{.*}} op0=2 op1=0
-; BC-NEXT: <PERMODULE_ENTRY {{.*}} op0=4 op1=3
+; BC-NEXT: <PERMODULE_NOCALLS {{.*}} op0=1 op1=0
+; BC-NEXT: <PERMODULE_NOCALLS {{.*}} op0=2 op1=0
+; BC-NEXT: <PERMODULE_NOCALLS {{.*}} op0=4 op1=3
 ; BC-NEXT: </FUNCTION_SUMMARY_BLOCK
 ; BC-NEXT: <VALUE_SYMTAB
 ; BC-NEXT: <FNENTRY {{.*}} op0=1 {{.*}}> record string = 'foo'
Index: test/Bitcode/thinlto-summary-linkage-types.ll
===================================================================
--- test/Bitcode/thinlto-summary-linkage-types.ll
+++ test/Bitcode/thinlto-summary-linkage-types.ll
@@ -5,57 +5,57 @@
 ; RUN: llvm-bcanalyzer -dump %t2.thinlto.bc | FileCheck %s --check-prefix=COMBINED
 
 define private void @private()
-; CHECK: <PERMODULE_ENTRY {{.*}} op1=9
-; COMBINED-DAG: <COMBINED_ENTRY {{.*}} op1=9
+; CHECK: <PERMODULE_NOCALLS {{.*}} op1=9
+; COMBINED-DAG: <COMBINED_NOCALLS {{.*}} op1=9
 {
   ret void
 }
 
 define internal void @internal()
-; CHECK: <PERMODULE_ENTRY {{.*}} op1=3
-; COMBINED-DAG: <COMBINED_ENTRY {{.*}} op1=3
+; CHECK: <PERMODULE_NOCALLS {{.*}} op1=3
+; COMBINED-DAG: <COMBINED_NOCALLS {{.*}} op1=3
 {
   ret void
 }
 
 define available_externally void @available_externally()
-; CHECK: <PERMODULE_ENTRY {{.*}} op1=12
-; COMBINED-DAG: <COMBINED_ENTRY {{.*}} op1=12
+; CHECK: <PERMODULE_NOCALLS {{.*}} op1=12
+; COMBINED-DAG: <COMBINED_NOCALLS {{.*}} op1=12
 {
   ret void
 }
 
 define linkonce void @linkonce()
-; CHECK: <PERMODULE_ENTRY {{.*}} op1=18
-; COMBINED-DAG: <COMBINED_ENTRY {{.*}} op1=18
+; CHECK: <PERMODULE_NOCALLS {{.*}} op1=18
+; COMBINED-DAG: <COMBINED_NOCALLS {{.*}} op1=18
 {
   ret void
 }
 
 define weak void @weak()
-; CHECK: <PERMODULE_ENTRY {{.*}} op1=16
-; COMBINED-DAG: <COMBINED_ENTRY {{.*}} op1=16
+; CHECK: <PERMODULE_NOCALLS {{.*}} op1=16
+; COMBINED-DAG: <COMBINED_NOCALLS {{.*}} op1=16
 {
   ret void
 }
 
 define linkonce_odr void @linkonce_odr()
-; CHECK: <PERMODULE_ENTRY {{.*}} op1=19
-; COMBINED-DAG: <COMBINED_ENTRY {{.*}} op1=19
+; CHECK: <PERMODULE_NOCALLS {{.*}} op1=19
+; COMBINED-DAG: <COMBINED_NOCALLS {{.*}} op1=19
 {
   ret void
 }
 
 define weak_odr void @weak_odr()
-; CHECK: <PERMODULE_ENTRY {{.*}} op1=17
-; COMBINED-DAG: <COMBINED_ENTRY {{.*}} op1=17
+; CHECK: <PERMODULE_NOCALLS {{.*}} op1=17
+; COMBINED-DAG: <COMBINED_NOCALLS {{.*}} op1=17
 {
   ret void
 }
 
 define external void @external()
-; CHECK: <PERMODULE_ENTRY {{.*}} op1=0
-; COMBINED-DAG: <COMBINED_ENTRY {{.*}} op1=0
+; CHECK: <PERMODULE_NOCALLS {{.*}} op1=0
+; COMBINED-DAG: <COMBINED_NOCALLS {{.*}} op1=0
 {
   ret void
 }
Index: test/tools/gold/X86/thinlto.ll
===================================================================
--- test/tools/gold/X86/thinlto.ll
+++ test/tools/gold/X86/thinlto.ll
@@ -20,14 +20,14 @@
 ; COMBINED-NEXT: <ENTRY {{.*}} record string = '{{.*}}/test/tools/gold/X86/Output/thinlto.ll.tmp{{.*}}.o'
 ; COMBINED-NEXT: </MODULE_STRTAB_BLOCK
 ; COMBINED-NEXT: <FUNCTION_SUMMARY_BLOCK
-; COMBINED-NEXT: <COMBINED_ENTRY
-; COMBINED-NEXT: <COMBINED_ENTRY
+; COMBINED-NEXT: <COMBINED_NOCALLS
+; COMBINED-NEXT: <COMBINED_NOCALLS
 ; COMBINED-NEXT: </FUNCTION_SUMMARY_BLOCK
 ; COMBINED-NEXT: <VALUE_SYMTAB
-; Check that the format is: op0=offset, op1=funcguid, where funcguid is
-; the lower 64 bits of the function name MD5.
-; COMBINED-NEXT: <COMBINED_FNENTRY abbrevid={{[0-9]+}} op0={{[0-9]+}} op1={{-3706093650706652785|-5300342847281564238}}
-; COMBINED-NEXT: <COMBINED_FNENTRY abbrevid={{[0-9]+}} op0={{[0-9]+}} op1={{-3706093650706652785|-5300342847281564238}}
+; Check that the format is: op0=valueid, op1=offset, op2=funcguid,
+; where funcguid is the lower 64 bits of the function name MD5.
+; COMBINED-NEXT: <COMBINED_FNENTRY abbrevid={{[0-9]+}} op0={{1|2}} op1={{[0-9]+}} op2={{-3706093650706652785|-5300342847281564238}}
+; COMBINED-NEXT: <COMBINED_FNENTRY abbrevid={{[0-9]+}} op0={{1|2}} op1={{[0-9]+}} op2={{-3706093650706652785|-5300342847281564238}}
 ; COMBINED-NEXT: </VALUE_SYMTAB
 
 define void @f() {
Index: test/tools/llvm-lto/thinlto.ll
===================================================================
--- test/tools/llvm-lto/thinlto.ll
+++ test/tools/llvm-lto/thinlto.ll
@@ -10,14 +10,14 @@
 ; COMBINED-NEXT: <ENTRY {{.*}} record string = '{{.*}}thinlto.ll.tmp{{.*}}.o'
 ; COMBINED-NEXT: </MODULE_STRTAB_BLOCK
 ; COMBINED-NEXT: <FUNCTION_SUMMARY_BLOCK
-; COMBINED-NEXT: <COMBINED_ENTRY
-; COMBINED-NEXT: <COMBINED_ENTRY
+; COMBINED-NEXT: <COMBINED_NOCALLS
+; COMBINED-NEXT: <COMBINED_NOCALLS
 ; COMBINED-NEXT: </FUNCTION_SUMMARY_BLOCK
 ; COMBINED-NEXT: <VALUE_SYMTAB
-; Check that the format is: op0=offset, op1=funcguid, where funcguid is
-; the lower 64 bits of the function name MD5.
-; COMBINED-NEXT: <COMBINED_FNENTRY abbrevid={{[0-9]+}} op0={{[0-9]+}} op1={{-3706093650706652785|-5300342847281564238}}
-; COMBINED-NEXT: <COMBINED_FNENTRY abbrevid={{[0-9]+}} op0={{[0-9]+}} op1={{-3706093650706652785|-5300342847281564238}}
+; Check that the format is: op0=valueid, op1=offset, op2=funcguid,
+; where funcguid is the lower 64 bits of the function name MD5.
+; COMBINED-NEXT: <COMBINED_FNENTRY abbrevid={{[0-9]+}} op0={{1|2}} op1={{[0-9]+}} op2={{-3706093650706652785|-5300342847281564238}}
+; COMBINED-NEXT: <COMBINED_FNENTRY abbrevid={{[0-9]+}} op0={{1|2}} op1={{[0-9]+}} op2={{-3706093650706652785|-5300342847281564238}}
 ; COMBINED-NEXT: </VALUE_SYMTAB
 
 define void @f() {
Index: tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
===================================================================
--- tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
+++ tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
@@ -292,8 +292,12 @@
     switch (CodeID) {
     default:
       return nullptr;
-      STRINGIFY_CODE(FS_CODE, PERMODULE_ENTRY)
-      STRINGIFY_CODE(FS_CODE, COMBINED_ENTRY)
+      STRINGIFY_CODE(FS, PERMODULE_NOCALLS)
+      STRINGIFY_CODE(FS, PERMODULE_CALLS)
+      STRINGIFY_CODE(FS, PERMODULE_CALLS_PROFILE)
+      STRINGIFY_CODE(FS, COMBINED_NOCALLS)
+      STRINGIFY_CODE(FS, COMBINED_CALLS)
+      STRINGIFY_CODE(FS, COMBINED_CALLS_PROFILE)
     }
   case bitc::METADATA_ATTACHMENT_ID:
     switch(CodeID) {