diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -318,6 +318,14 @@
 
 raw_ostream &operator<<(raw_ostream &OS, const LineLocation &Loc);
 
+static inline uint64_t hashFuncName(StringRef F) {
+  // If function name is already MD5 string, do not hash again.
+  uint64_t Hash;
+  if (F.getAsInteger(10, Hash))
+    Hash = MD5Hash(F);
+  return Hash;
+}
+
 /// Representation of a single sample record.
 ///
 /// A sample record is represented by a positive integer value, which
@@ -631,8 +639,12 @@
   }
 
   uint64_t getHashCode() const {
-    return hasContext() ? hash_value(getContextFrames())
-                        : hash_value(getName());
+    if (hasContext())
+      return hash_value(getContextFrames());
+
+    // For non-context function name, use its MD5 as hash value, so that it is
+    // consistent with the profile map's key.
+    return hashFuncName(getName());
   }
 
   /// Set the name of the function and clear the current context.
@@ -710,11 +722,6 @@
   uint32_t Attributes;
 };
 
-static inline hash_code hash_value(const SampleContext &arg) {
-  return arg.hasContext() ? hash_value(arg.getContextFrames())
-                          : hash_value(arg.getName());
-}
-
 class FunctionSamples;
 class SampleProfileReaderItaniumRemapper;
 
@@ -1267,10 +1274,107 @@
 
 raw_ostream &operator<<(raw_ostream &OS, const FunctionSamples &FS);
 
-using SampleProfileMap =
-    std::unordered_map<SampleContext, FunctionSamples, SampleContext::Hash>;
+/// This class provides operator overloads to the map container using MD5 as the
+/// key type, so that existing code can still work in most cases using
+/// SampleContext as key. It checks for MD5 collision when inserting an element.
+/// Note: when populating container, make sure to assign the SampleContext to
+/// the mapped value immediately because the key no longer holds it.
+class SampleProfileMap : public std::unordered_map<uint64_t, FunctionSamples> {
+public:
+  using base_type = std::unordered_map<uint64_t, FunctionSamples>;
 
-using NameFunctionSamples = std::pair<SampleContext, const FunctionSamples *>;
+  // If MD5 collision actually happens, we have to erase the existing entry
+  // rather than returning a reference to an irrelevant entry, and pretend the
+  // old entry never existed. Dropping a sample should not affect compilation
+  // correctness.
+  template <typename... Ts>
+  std::pair<iterator, bool>
+  try_emplace(const key_type &Key, const SampleContext &Ctx, Ts &&...Args) {
+    assert(Key == Ctx.getHashCode());
+    auto Ret = base_type::try_emplace(Key, std::forward<Ts>(Args)...);
+    if (!Ret.second) {
+      if (LLVM_UNLIKELY(Ret.first->second.getContext() != Ctx)) {
+        dbgs() << "MD5 collision detected: "
+               << Ret.first->second.getContext().toString() << " and "
+               << Ctx.toString() << " has same hash value " << Key << "\n";
+        Ret.second = true;
+        Ret.first->second = mapped_type(std::forward<Ts>(Args)...);
+      }
+    }
+    return Ret;
+  }
+
+  template <typename... Ts>
+  std::pair<iterator, bool> try_emplace(const SampleContext &Ctx,
+                                        Ts &&...Args) {
+    uint64_t Hash = Ctx.getHashCode();
+    return try_emplace(Hash, Ctx, std::forward<Ts>(Args)...);
+  }
+
+  template <typename... Ts>
+  std::pair<iterator, bool> emplace(const SampleContext &Ctx, Ts &&...Args) {
+    return try_emplace(Ctx, std::forward<Ts>(Args)...);
+  }
+
+  mapped_type &operator[](const SampleContext &Ctx) {
+    return try_emplace(Ctx, FunctionSamples()).first->second;
+  }
+
+  // Convenience method because this is being used in many places. Set the
+  // FunctionSamples' context if its newly inserted.
+  mapped_type &Create(const SampleContext &Ctx) {
+    auto Ret = try_emplace(Ctx, FunctionSamples());
+    if (Ret.second)
+      Ret.first->second.setContext(Ctx);
+    return Ret.first->second;
+  }
+
+  iterator find(const SampleContext &Ctx) {
+    uint64_t Hash = Ctx.getHashCode();
+    auto It = base_type::find(Hash);
+    if (It != end())
+      if (LLVM_LIKELY(It->second.getContext() == Ctx))
+        return It;
+    return end();
+  }
+
+  const_iterator find(const SampleContext &Ctx) const {
+    uint64_t Hash = Ctx.getHashCode();
+    auto It = base_type::find(Hash);
+    if (It != end())
+      if (LLVM_LIKELY(It->second.getContext() == Ctx))
+        return It;
+    return end();
+  }
+
+  // Overloaded find() to lookup a function by name. This is called by IPO
+  // passes with an actual fucntion name, and it is possible that the profile
+  // reader converted function names in the profile to MD5 strings, so we need
+  // to check if either representation matches.
+  iterator find(StringRef Fname) {
+    uint64_t Hash = hashFuncName(Fname);
+    auto It = base_type::find(Hash);
+    if (It != end()) {
+      StringRef CtxName = It->second.getContext().getName();
+      if (LLVM_LIKELY(CtxName == Fname || CtxName == std::to_string(Hash)))
+        return It;
+    }
+    return end();
+  }
+
+  size_t erase(const SampleContext &Ctx) {
+    auto It = find(Ctx);
+    if (It != end()) {
+      base_type::erase(It);
+      return 1;
+    }
+    return 0;
+  }
+
+  size_t erase(const key_type &Key) { return base_type::erase(Key); }
+};
+
+using NameFunctionSamples = std::pair<uint64_t, const FunctionSamples *>;
 
 void sortFuncProfiles(const SampleProfileMap &ProfileMap,
                       std::vector<NameFunctionSamples> &SortedProfiles);
@@ -1316,8 +1420,6 @@
                                        bool MergeColdContext,
                                        uint32_t ColdContextFrameLength,
                                        bool TrimBaseProfileOnly);
-  // Canonicalize context profile name and attributes.
-  void canonicalizeContextProfiles();
 
 private:
   SampleProfileMap &ProfileMap;
@@ -1363,12 +1465,12 @@
                              SampleProfileMap &OutputProfiles,
                              bool ProfileIsCS = false) {
     if (ProfileIsCS) {
-      for (const auto &I : InputProfiles)
-        OutputProfiles[I.second.getName()].merge(I.second);
-      // Retain the profile name and clear the full context for each function
-      // profile.
-      for (auto &I : OutputProfiles)
-        I.second.setContext(SampleContext(I.first));
+      for (const auto &I : InputProfiles) {
+        // Retain the profile name and clear the full context for each function
+        // profile.
+        FunctionSamples &FS = OutputProfiles.Create(I.second.getName());
+        FS.merge(I.second);
+      }
     } else {
       for (const auto &I : InputProfiles)
         flattenNestedProfile(OutputProfiles, I.second);
diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h
--- a/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -347,7 +347,7 @@
 public:
   SampleProfileReader(std::unique_ptr<MemoryBuffer> B, LLVMContext &C,
                       SampleProfileFormat Format = SPF_None)
-      : Profiles(0), Ctx(C), Buffer(std::move(B)), Format(Format) {}
+      : Profiles(), Ctx(C), Buffer(std::move(B)), Format(Format) {}
 
   virtual ~SampleProfileReader() = default;
 
@@ -383,8 +383,8 @@
   /// The implementaion to read sample profiles from the associated file.
   virtual std::error_code readImpl() = 0;
 
-  /// Print the profile for \p FContext on stream \p OS.
-  void dumpFunctionProfile(SampleContext FContext, raw_ostream &OS = dbgs());
+  /// Print the profile for \p FunctionSamples on stream \p OS.
+  void dumpFunctionProfile(const FunctionSamples &FS, raw_ostream &OS = dbgs());
 
   /// Collect functions with definitions in Module M. For reader which
   /// support loading function profiles on demand, return true when the
@@ -410,23 +410,15 @@
   /// Return the samples collected for function \p F, create empty
   /// FunctionSamples if it doesn't exist.
   FunctionSamples *getOrCreateSamplesFor(const Function &F) {
-    std::string FGUID;
     StringRef CanonName = FunctionSamples::getCanonicalFnName(F);
-    CanonName = getRepInFormat(CanonName, useMD5(), FGUID);
-    auto It = Profiles.find(CanonName);
-    if (It != Profiles.end())
-      return &It->second;
-    if (!FGUID.empty()) {
-      assert(useMD5() && "New name should only be generated for md5 profile");
-      CanonName = *MD5NameBuffer.insert(FGUID).first;
-    }
+    auto it = Profiles.find(CanonName);
+    if (it != Profiles.end())
+      return &it->second;
     return &Profiles[CanonName];
   }
 
   /// Return the samples collected for function \p F.
-  virtual FunctionSamples *getSamplesFor(StringRef Fname) {
-    std::string FGUID;
-    Fname = getRepInFormat(Fname, useMD5(), FGUID);
+  FunctionSamples *getSamplesFor(StringRef Fname) {
     auto It = Profiles.find(Fname);
     if (It != Profiles.end())
       return &It->second;
@@ -654,15 +646,16 @@
   /// Read the whole name table.
   std::error_code readNameTable();
 
-  /// Read a string indirectly via the name table.
-  ErrorOr<StringRef> readStringFromTable();
+  /// Read a string indirectly via the name table. Optionally return the index.
+  ErrorOr<StringRef> readStringFromTable(size_t *Ret = nullptr);
 
-  /// Read a context indirectly via the CSNameTable.
-  ErrorOr<SampleContextFrames> readContextFromTable();
+  /// Read a context indirectly via the CSNameTable. Optionally return the
+  /// index.
+  ErrorOr<SampleContextFrames> readContextFromTable(size_t *Ret = nullptr);
 
   /// Read a context indirectly via the CSNameTable if the profile has context,
-  /// otherwise same as readStringFromTable.
-  ErrorOr<SampleContext> readSampleContextFromTable();
+  /// otherwise same as readStringFromTable, also return its hash value.
+  ErrorOr<std::pair<SampleContext, uint64_t>> readSampleContextFromTable();
 
   /// Points to the current location in the buffer.
   const uint8_t *Data = nullptr;
@@ -682,13 +675,24 @@
   /// the lifetime of MD5StringBuf is not shorter than that of NameTable.
   std::vector<std::string> MD5StringBuf;
 
-  /// The starting address of NameTable containing fixed length MD5.
+  /// The starting address of fixed length MD5 name table section.
   const uint8_t *MD5NameMemStart = nullptr;
 
   /// CSNameTable is used to save full context vectors. It is the backing buffer
   /// for SampleContextFrames.
   std::vector<SampleContextFrameVector> CSNameTable;
 
+  /// Table to cache MD5 values of sample contexts corresponding to
+  /// readSampleContextFromTable(), used to index into Profiles or
+  /// FuncOffsetTable.
+  std::vector<uint64_t> MD5SampleContextTable;
+
+  /// The starting address of the table of MD5 values of sample contexts. For
+  /// fixed length MD5 non-CS profile it is same as MD5NameMemStart because
+  /// hashes of non-CS contexts are already in the profile. Otherwise it points
+  /// to the start of MD5SampleContextTable.
+  const uint64_t *MD5SampleContextStart = nullptr;
+
 private:
   std::error_code readSummaryEntry(std::vector<ProfileSummaryEntry> &Entries);
   virtual std::error_code verifySPMagic(uint64_t Magic) = 0;
@@ -762,10 +766,10 @@
 
   std::unique_ptr<ProfileSymbolList> ProfSymList;
 
-  /// The table mapping from function context to the offset of its
+  /// The table mapping from a function context's MD5 to the offset of its
   /// FunctionSample towards file start.
   /// At most one of FuncOffsetTable and FuncOffsetList is populated.
-  DenseMap<SampleContext, uint64_t> FuncOffsetTable;
+  DenseMap<uint64_t, uint64_t> FuncOffsetTable;
 
   /// The list version of FuncOffsetTable. This is used if every entry is
   /// being accessed.
diff --git a/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp b/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
--- a/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
+++ b/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
@@ -204,9 +204,7 @@
   // profiles before computing profile summary.
   if (UseContextLessSummary || (sampleprof::FunctionSamples::ProfileIsCS &&
                                 !UseContextLessSummary.getNumOccurrences())) {
-    for (const auto &I : Profiles) {
-      ContextLessProfiles[I.second.getName()].merge(I.second);
-    }
+    ProfileConverter::flattenProfile(Profiles, ContextLessProfiles, true);
     ProfilesToUse = &ContextLessProfiles;
   }
 
diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp
--- a/llvm/lib/ProfileData/SampleProf.cpp
+++ b/llvm/lib/ProfileData/SampleProf.cpp
@@ -202,13 +202,12 @@
     const SampleProfileMap &ProfileMap,
     std::vector<NameFunctionSamples> &SortedProfiles) {
   for (const auto &I : ProfileMap) {
-    assert(I.first == I.second.getContext() && "Inconsistent profile map");
-    SortedProfiles.push_back(std::make_pair(I.second.getContext(), &I.second));
+    SortedProfiles.push_back(std::make_pair(I.first, &I.second));
   }
   llvm::stable_sort(SortedProfiles, [](const NameFunctionSamples &A,
                                        const NameFunctionSamples &B) {
     if (A.second->getTotalSamples() == B.second->getTotalSamples())
-      return A.first < B.first;
+      return A.second->getContext() < B.second->getContext();
     return A.second->getTotalSamples() > B.second->getTotalSamples();
   });
 }
@@ -357,13 +356,13 @@
 
   // Filter the cold profiles from ProfileMap and move them into a tmp
   // container
-  std::vector<std::pair<SampleContext, const FunctionSamples *>> ColdProfiles;
+  std::vector<std::pair<uint64_t, const FunctionSamples *>> ColdProfiles;
   for (const auto &I : ProfileMap) {
-    const SampleContext &Context = I.first;
+    const SampleContext &Context = I.second.getContext();
     const FunctionSamples &FunctionProfile = I.second;
     if (FunctionProfile.getTotalSamples() < ColdCountThreshold &&
         (!TrimBaseProfileOnly || Context.isBaseContext()))
-      ColdProfiles.emplace_back(Context, &I.second);
+      ColdProfiles.emplace_back(I.first, &I.second);
   }
 
   // Remove the cold profile from ProfileMap and merge them into
@@ -374,8 +373,8 @@
       auto MergedContext = I.second->getContext().getContextFrames();
       if (ColdContextFrameLength < MergedContext.size())
         MergedContext = MergedContext.take_back(ColdContextFrameLength);
-      auto Ret = MergedProfileMap.emplace(MergedContext, FunctionSamples());
-      FunctionSamples &MergedProfile = Ret.first->second;
+      // Need to set MergedProfile's context here otherwise it will be lost.
+      FunctionSamples &MergedProfile = MergedProfileMap.Create(MergedContext);
       MergedProfile.merge(*I.second);
     }
     ProfileMap.erase(I.first);
@@ -385,57 +384,17 @@
   for (const auto &I : MergedProfileMap) {
     // Filter the cold merged profile
     if (TrimColdContext && I.second.getTotalSamples() < ColdCountThreshold &&
-        ProfileMap.find(I.first) == ProfileMap.end())
+        ProfileMap.find(I.second.getContext()) == ProfileMap.end())
       continue;
     // Merge the profile if the original profile exists, otherwise just insert
-    // as a new profile
-    auto Ret = ProfileMap.emplace(I.first, FunctionSamples());
-    if (Ret.second) {
-      SampleContext FContext(Ret.first->first, RawContext);
-      FunctionSamples &FProfile = Ret.first->second;
-      FProfile.setContext(FContext);
-    }
+    // as a new profile. If inserted as a new profile from MergedProfileMap, it
+    // already has the right context.
+    auto Ret = ProfileMap.emplace(I.second.getContext(), FunctionSamples());
     FunctionSamples &OrigProfile = Ret.first->second;
     OrigProfile.merge(I.second);
   }
 }
 
-void SampleContextTrimmer::canonicalizeContextProfiles() {
-  std::vector<SampleContext> ProfilesToBeRemoved;
-  SampleProfileMap ProfilesToBeAdded;
-  for (auto &I : ProfileMap) {
-    FunctionSamples &FProfile = I.second;
-    SampleContext &Context = FProfile.getContext();
-    if (I.first == Context)
-      continue;
-
-    // Use the context string from FunctionSamples to update the keys of
-    // ProfileMap. They can get out of sync after context profile promotion
-    // through pre-inliner.
-    // Duplicate the function profile for later insertion to avoid a conflict
-    // caused by a context both to be add and to be removed. This could happen
-    // when a context is promoted to another context which is also promoted to
-    // the third context. For example, given an original context A @ B @ C that
-    // is promoted to B @ C and the original context B @ C which is promoted to
-    // just C, adding B @ C to the profile map while removing same context (but
-    // with different profiles) from the map can cause a conflict if they are
-    // not handled in a right order. This can be solved by just caching the
-    // profiles to be added.
-    auto Ret = ProfilesToBeAdded.emplace(Context, FProfile);
-    (void)Ret;
-    assert(Ret.second && "Context conflict during canonicalization");
-    ProfilesToBeRemoved.push_back(I.first);
-  }
-
-  for (auto &I : ProfilesToBeRemoved) {
-    ProfileMap.erase(I);
-  }
-
-  for (auto &I : ProfilesToBeAdded) {
-    ProfileMap.emplace(I.first, I.second);
-  }
-}
-
 std::error_code ProfileSymbolList::write(raw_ostream &OS) {
   // Sort the symbols before output. If doing compression.
   // It will make the compression much more effective.
@@ -509,6 +468,7 @@
     if (!ChildProfile)
       continue;
     SampleContext OrigChildContext = ChildProfile->getContext();
+    uint64_t OrigChildContextHash = OrigChildContext.getHashCode();
     // Reset the child context to be contextless.
     ChildProfile->getContext().setName(OrigChildContext.getName());
     if (NodeProfile) {
@@ -524,6 +484,7 @@
       NodeProfile->removeTotalSamples(Count);
     }
 
+    uint64_t NewChildProfileHash = 0;
     // Separate child profile to be a standalone profile, if the current parent
     // profile doesn't exist. This is a duplicating operation when the child
     // profile is already incorporated into the parent which is still useful and
@@ -532,15 +493,20 @@
     // profile in the prelink phase for to-be-fully-inlined functions.
     if (!NodeProfile) {
       ProfileMap[ChildProfile->getContext()].merge(*ChildProfile);
+      NewChildProfileHash = ChildProfile->getContext().getHashCode();
     } else if (GenerateMergedBaseProfiles) {
       ProfileMap[ChildProfile->getContext()].merge(*ChildProfile);
+      NewChildProfileHash = ChildProfile->getContext().getHashCode();
       auto &SamplesMap = NodeProfile->functionSamplesAt(ChildNode.CallSiteLoc);
       SamplesMap[ChildProfile->getName().str()].getContext().setAttribute(
           ContextDuplicatedIntoBase);
     }
 
-    // Remove the original child profile.
-    ProfileMap.erase(OrigChildContext);
+    // Remove the original child profile. Check if MD5 of new child profile
+    // collides with old profile, in this case the [] operator already
+    // overwritten it without the need of erase.
+    if (NewChildProfileHash != OrigChildContextHash)
+      ProfileMap.erase(OrigChildContextHash);
   }
 }
 
diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -61,9 +61,9 @@
 ///
 /// \param FContext Name + context of the function to print.
 /// \param OS Stream to emit the output to.
-void SampleProfileReader::dumpFunctionProfile(SampleContext FContext,
+void SampleProfileReader::dumpFunctionProfile(const FunctionSamples &FS,
                                               raw_ostream &OS) {
-  OS << "Function: " << FContext.toString() << ": " << Profiles[FContext];
+  OS << "Function: " << FS.getContext().toString() << ": " << FS;
 }
 
 /// Dump all the function profiles found on stream \p OS.
@@ -71,7 +71,7 @@
   std::vector<NameFunctionSamples> V;
   sortFuncProfiles(Profiles, V);
   for (const auto &I : V)
-    dumpFunctionProfile(I.first, OS);
+    dumpFunctionProfile(*I.second, OS);
 }
 
 static void dumpFunctionProfileJson(const FunctionSamples &S,
@@ -355,9 +355,7 @@
       SampleContext FContext(FName, CSNameTable);
       if (FContext.hasContext())
         ++CSProfileCount;
-      Profiles[FContext] = FunctionSamples();
-      FunctionSamples &FProfile = Profiles[FContext];
-      FProfile.setContext(FContext);
+      FunctionSamples &FProfile = Profiles.Create(FContext);
       MergeResult(Result, FProfile.addTotalSamples(NumSamples));
       MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples));
       InlineStack.clear();
@@ -525,7 +523,7 @@
   return *Idx;
 }
 
-ErrorOr<StringRef> SampleProfileReaderBinary::readStringFromTable() {
+ErrorOr<StringRef> SampleProfileReaderBinary::readStringFromTable(size_t *Ret) {
   auto Idx = readStringIndex(NameTable);
   if (std::error_code EC = Idx.getError())
     return EC;
@@ -543,30 +541,46 @@
        MD5NameMemStart + (*Idx) * sizeof(uint64_t));
     SR = MD5StringBuf.emplace_back(std::to_string(FID));
   }
+  if (Ret)
+    *Ret = *Idx;
   return SR;
 }
 
-ErrorOr<SampleContextFrames> SampleProfileReaderBinary::readContextFromTable() {
+ErrorOr<SampleContextFrames>
+SampleProfileReaderBinary::readContextFromTable(size_t *Ret) {
   auto ContextIdx = readNumber<size_t>();
   if (std::error_code EC = ContextIdx.getError())
     return EC;
   if (*ContextIdx >= CSNameTable.size())
     return sampleprof_error::truncated_name_table;
+  if (Ret)
+    *Ret = *ContextIdx;
   return CSNameTable[*ContextIdx];
 }
 
-ErrorOr<SampleContext> SampleProfileReaderBinary::readSampleContextFromTable() {
+ErrorOr<std::pair<SampleContext, uint64_t>>
+SampleProfileReaderBinary::readSampleContextFromTable() {
+  SampleContext Context;
+  size_t Idx;
   if (ProfileIsCS) {
-    auto FContext(readContextFromTable());
+    auto FContext(readContextFromTable(&Idx));
     if (std::error_code EC = FContext.getError())
       return EC;
-    return SampleContext(*FContext);
+    Context = SampleContext(*FContext);
   } else {
-    auto FName(readStringFromTable());
+    auto FName(readStringFromTable(&Idx));
     if (std::error_code EC = FName.getError())
       return EC;
-    return SampleContext(*FName);
+    Context = SampleContext(*FName);
   }
+  uint64_t Hash = MD5SampleContextStart[Idx];
+  if (Hash == 0) {
+    assert(MD5SampleContextStart == MD5SampleContextTable.data());
+    // Lazy computing of hash value, write back to the table.
+    Hash = Context.getHashCode();
+    MD5SampleContextTable[Idx] = Hash;
+  }
+  return std::make_pair(Context, Hash);
 }
 
 std::error_code
@@ -659,16 +673,18 @@
   if (std::error_code EC = NumHeadSamples.getError())
     return EC;
 
-  ErrorOr<SampleContext> FContext(readSampleContextFromTable());
-  if (std::error_code EC = FContext.getError())
+  auto FContextHash(readSampleContextFromTable());
+  if (std::error_code EC = FContextHash.getError())
     return EC;
 
-  Profiles[*FContext] = FunctionSamples();
-  FunctionSamples &FProfile = Profiles[*FContext];
-  FProfile.setContext(*FContext);
+  auto &[FContext, Hash] = *FContextHash;
+  // Use the cached hash value for insertion instead of recalculating it.
+  auto Res = Profiles.try_emplace(Hash, FContext, FunctionSamples());
+  FunctionSamples &FProfile = Res.first->second;
+  FProfile.setContext(FContext);
   FProfile.addHeadSamples(*NumHeadSamples);
 
-  if (FContext->hasContext())
+  if (FContext.hasContext())
     CSProfileCount++;
 
   if (std::error_code EC = readProfile(FProfile))
@@ -816,18 +832,26 @@
     FuncOffsetTable.reserve(*Size);
 
   for (uint64_t I = 0; I < *Size; ++I) {
-    auto FContext(readSampleContextFromTable());
-    if (std::error_code EC = FContext.getError())
+    auto FContextHash(readSampleContextFromTable());
+    if (std::error_code EC = FContextHash.getError())
       return EC;
 
+    auto &[FContext, Hash] = *FContextHash;
     auto Offset = readNumber<uint64_t>();
     if (std::error_code EC = Offset.getError())
       return EC;
 
     if (UseFuncOffsetList)
-      FuncOffsetList.emplace_back(*FContext, *Offset);
-    else
-      FuncOffsetTable[*FContext] = *Offset;
+      FuncOffsetList.emplace_back(FContext, *Offset);
+    else {
+      auto Ret = FuncOffsetTable.try_emplace(Hash, *Offset);
+      if (LLVM_UNLIKELY(!Ret.second)) {
+        dbgs() << "Duplicated entry or MD5 collision detected\n";
+        // Insert it anyways, to be consistent with the behavior of
+        // SampleProfileMap.
+        Ret.first->second = *Offset;
+      }
+    }
  }
 
  return sampleprof_error::success;
@@ -900,8 +924,8 @@
     } else if (useMD5()) {
       assert(!useFuncOffsetList());
       for (auto Name : FuncsToUse) {
-        auto GUID = std::to_string(MD5Hash(Name));
-        auto iter = FuncOffsetTable.find(StringRef(GUID));
+        auto GUID = MD5Hash(Name);
+        auto iter = FuncOffsetTable.find(GUID);
         if (iter == FuncOffsetTable.end())
           continue;
         const uint8_t *FuncProfileAddr = Start + iter->second;
@@ -922,7 +946,7 @@
     } else {
       assert(!useFuncOffsetList());
       for (auto Name : FuncsToUse) {
-        auto iter = FuncOffsetTable.find(Name);
+        auto iter = FuncOffsetTable.find(MD5Hash(Name));
         if (iter == FuncOffsetTable.end())
           continue;
         const uint8_t *FuncProfileAddr = Start + iter->second;
@@ -1050,17 +1074,30 @@
 
   NameTable.clear();
   NameTable.reserve(*Size);
+  if (!ProfileIsCS) {
+    MD5SampleContextTable.clear();
+    if (UseMD5)
+      MD5SampleContextTable.reserve(*Size);
+    else
+      // If we are using strings, delay MD5 computation since only a portion of
+      // names are used by top level functions. Use 0 to indicate MD5 value is
+      // to be calculated as no known string has a MD5 value of 0.
+      MD5SampleContextTable.resize(*Size);
+  }
   for (size_t I = 0; I < *Size; ++I) {
     auto Name(readString());
     if (std::error_code EC = Name.getError())
       return EC;
     if (UseMD5) {
       uint64_t FID = MD5Hash(*Name);
+      if (!ProfileIsCS)
+        MD5SampleContextTable.emplace_back(FID);
       NameTable.emplace_back(MD5StringBuf.emplace_back(std::to_string(FID)));
     } else
       NameTable.push_back(*Name);
   }
-
+  if (!ProfileIsCS)
+    MD5SampleContextStart = MD5SampleContextTable.data();
   return sampleprof_error::success;
 }
 
@@ -1088,6 +1125,8 @@
     NameTable.clear();
     NameTable.resize(*Size);
     MD5NameMemStart = Data;
+    if (!ProfileIsCS)
+      MD5SampleContextStart = reinterpret_cast<const uint64_t *>(Data);
     Data = Data + (*Size) * sizeof(uint64_t);
     return sampleprof_error::success;
   }
@@ -1101,12 +1140,20 @@
     MD5StringBuf.reserve(MD5StringBuf.size() + *Size);
     NameTable.clear();
     NameTable.reserve(*Size);
+    if (!ProfileIsCS) {
+      MD5SampleContextTable.clear();
+      MD5SampleContextTable.reserve(*Size);
+    }
     for (size_t I = 0; I < *Size; ++I) {
       auto FID = readNumber<uint64_t>();
       if (std::error_code EC = FID.getError())
         return EC;
+      if (!ProfileIsCS)
+        MD5SampleContextTable.emplace_back(*FID);
       NameTable.emplace_back(MD5StringBuf.emplace_back(std::to_string(*FID)));
     }
+    if (!ProfileIsCS)
+      MD5SampleContextStart = MD5SampleContextTable.data();
     return sampleprof_error::success;
   }
 
@@ -1124,6 +1171,14 @@
 
   CSNameTable.clear();
   CSNameTable.reserve(*Size);
+  if (ProfileIsCS) {
+    // Delay MD5 computation of CS context until they are needed. Use 0 to
+    // indicate MD5 value is to be calculated as no known string has a MD5
+    // value of 0.
+    MD5SampleContextTable.clear();
+    MD5SampleContextTable.resize(*Size);
+    MD5SampleContextStart = MD5SampleContextTable.data();
+  }
   for (size_t I = 0; I < *Size; ++I) {
     CSNameTable.emplace_back(SampleContextFrameVector());
     auto ContextSize = readNumber<uint32_t>();
@@ -1187,16 +1242,17 @@
         if (std::error_code EC = Discriminator.getError())
           return EC;
 
-        auto FContext(readSampleContextFromTable());
-        if (std::error_code EC = FContext.getError())
+        auto FContextHash(readSampleContextFromTable());
+        if (std::error_code EC = FContextHash.getError())
           return EC;
 
+        auto &[FContext, Hash] = *FContextHash;
         FunctionSamples *CalleeProfile = nullptr;
         if (FProfile) {
           CalleeProfile = const_cast<FunctionSamples *>(
               &FProfile->functionSamplesAt(LineLocation(
                   *LineOffset,
-                  *Discriminator))[std::string(FContext.get().getName())]);
+                  *Discriminator))[std::string(FContext.getName())]);
         }
         if (std::error_code EC =
                 readFuncMetadata(ProfileHasAttribute, CalleeProfile))
@@ -1211,11 +1267,12 @@
 std::error_code
 SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) {
   while (Data < End) {
-    auto FContext(readSampleContextFromTable());
-    if (std::error_code EC = FContext.getError())
+    auto FContextHash(readSampleContextFromTable());
+    if (std::error_code EC = FContextHash.getError())
       return EC;
+    auto &[FContext, Hash] = *FContextHash;
     FunctionSamples *FProfile = nullptr;
-    auto It = Profiles.find(*FContext);
+    auto It = Profiles.find(FContext);
     if (It != Profiles.end())
       FProfile = &It->second;
 
diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp
--- a/llvm/lib/ProfileData/SampleProfWriter.cpp
+++ b/llvm/lib/ProfileData/SampleProfWriter.cpp
@@ -364,7 +364,6 @@
 std::error_code SampleProfileWriterExtBinaryBase::writeNameTableSection(
     const SampleProfileMap &ProfileMap) {
   for (const auto &I : ProfileMap) {
-    assert(I.first == I.second.getContext() && "Inconsistent profile map");
     addContext(I.second.getContext());
     addNames(I.second);
   }
@@ -726,8 +725,7 @@
 
   // Generate the name table for all the functions referenced in the profile.
   for (const auto &I : ProfileMap) {
-    assert(I.first == I.second.getContext() && "Inconsistent profile map");
-    addContext(I.first);
+    addContext(I.second.getContext());
     addNames(I.second);
   }
 
diff --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
--- a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
+++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
@@ -201,7 +201,7 @@
     : GUIDToFuncNameMap(GUIDToFuncNameMap) {
   for (auto &FuncSample : Profiles) {
     FunctionSamples *FSamples = &FuncSample.second;
-    SampleContext Context = FuncSample.first;
+    SampleContext Context = FuncSample.second.getContext();
     LLVM_DEBUG(dbgs() << "Tracking Context for function: " << Context.toString()
                       << "\n");
     ContextTrieNode *NewNode = getOrCreateContextPath(Context, true);
@@ -638,7 +638,7 @@
     FunctionSamples *FProfile = Node->getFunctionSamples();
     // Profile's context can be empty, use ContextNode's func name.
     if (FProfile)
-      ContextLessProfiles[Node->getFuncName()].merge(*FProfile);
+      ContextLessProfiles.Create(Node->getFuncName()).merge(*FProfile);
   }
 }
 } // namespace llvm
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -593,7 +593,7 @@
 
   auto checkSampleProfileHasFUnique = [&Reader]() {
     for (const auto &PD : Reader->getProfiles()) {
-      auto &FContext = PD.first;
+      auto &FContext = PD.second.getContext();
       if (FContext.toString().find(FunctionSamples::UniqSuffix) !=
           std::string::npos) {
         return true;
@@ -2832,7 +2832,8 @@
           "be printed");
 
     // TODO: parse context string to support filtering by contexts.
-    Reader->dumpFunctionProfile(StringRef(ShowFunction), OS);
+    FunctionSamples *FS = Reader->getSamplesFor(StringRef(ShowFunction));
+    Reader->dumpFunctionProfile(FS ? *FS : FunctionSamples(), OS);
   }
 
   if (ShowProfileSymbolList) {
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -449,7 +449,7 @@
 bool ProfileGenerator::collectFunctionsFromLLVMProfile(
     std::unordered_set<const BinaryFunction *> &ProfiledFunctions) {
   for (const auto &FS : ProfileMap) {
-    if (auto *Func = Binary->getBinaryFunction(FS.first.getName()))
+    if (auto *Func = Binary->getBinaryFunction(FS.second.getName()))
       ProfiledFunctions.insert(Func);
   }
   return true;
@@ -468,12 +468,7 @@
 FunctionSamples &
 ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) {
   SampleContext Context(FuncName);
-  auto Ret = ProfileMap.emplace(Context, FunctionSamples());
-  if (Ret.second) {
-    FunctionSamples &FProfile = Ret.first->second;
-    FProfile.setContext(Context);
-  }
-  return Ret.first->second;
+  return ProfileMap.Create(Context);
 }
 
 void ProfileGenerator::generateProfile() {
@@ -505,14 +500,14 @@
     return;
 
   // Move cold profiles into a tmp container.
-  std::vector<SampleContext> ColdProfiles;
+  std::vector<uint64_t> ColdProfileHashes;
   for (const auto &I : ProfileMap) {
     if (I.second.getTotalSamples() < ColdCntThreshold)
-      ColdProfiles.emplace_back(I.first);
+      ColdProfileHashes.emplace_back(I.first);
   }
 
   // Remove the cold profile from ProfileMap.
-  for (const auto &I : ColdProfiles)
+  for (const auto &I : ColdProfileHashes)
     ProfileMap.erase(I);
 }
 
@@ -1018,9 +1013,7 @@
 
   // Merge function samples of CS profile to calculate profile density.
   sampleprof::SampleProfileMap ContextLessProfiles;
-  for (const auto &I : ProfileMap) {
-    ContextLessProfiles[I.second.getName()].merge(I.second);
-  }
+  ProfileConverter::flattenProfile(ProfileMap, ContextLessProfiles, true);
 
   calculateAndShowDensity(ContextLessProfiles);
   if (GenCSNestedProfile) {
diff --git a/llvm/unittests/tools/llvm-profdata/CMakeLists.txt b/llvm/unittests/tools/llvm-profdata/CMakeLists.txt
--- a/llvm/unittests/tools/llvm-profdata/CMakeLists.txt
+++ b/llvm/unittests/tools/llvm-profdata/CMakeLists.txt
@@ -6,6 +6,7 @@
 
 add_llvm_unittest(LLVMProfdataTests
     OutputSizeLimitTest.cpp
+    MD5CollisionTest.cpp
   )
 
 target_link_libraries(LLVMProfdataTests PRIVATE LLVMTestingSupport)
diff --git a/llvm/unittests/tools/llvm-profdata/MD5CollisionTest.cpp b/llvm/unittests/tools/llvm-profdata/MD5CollisionTest.cpp
new file mode 100644
--- /dev/null
+++ b/llvm/unittests/tools/llvm-profdata/MD5CollisionTest.cpp
@@ -0,0 +1,158 @@
+//===- llvm/unittests/tools/llvm-profdata/MD5CollisionTest.cpp ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+/// Test whether the MD5-key SampleProfileMap can handle collision correctly.
+/// Probability of collision is rare but not negligible since we only use the
+/// lower 64 bits of the MD5 value. A unit test is required because the function
+/// names are not printable ASCII characters.
+
+#include "llvm/ProfileData/SampleProfReader.h"
+#include "llvm/Support/VirtualFileSystem.h"
+#include "llvm/Testing/Support/Error.h"
+#include "gtest/gtest.h"
+
+/// According to https://en.wikipedia.org/wiki/MD5#Preimage_vulnerability, the
+/// MD5 of the two strings are 79054025255fb1a26e4bc422aef54eb4.
+
+const uint64_t Hash = 0xa2b15f2525400579; // First 8 bytes of the MD5.
+
+// clang-format off
+const uint8_t ProfileData[] = {
+    0x84, 0xe4, 0xd0, 0xb1, 0xf4, 0xc9, 0x94, 0xa8,
+    0x53, 0x67, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x7D, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x80, 0x01, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x90, 0x01, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00,
+
+    /// Name Table
+    0x02,
+    /// String1
+    0xd1, 0x31, 0xdd, 0x02, 0xc5, 0xe6, 0xee, 0xc4,
+    0x69, 0x3d, 0x9a, 0x06, 0x98, 0xaf, 0xf9, 0x5c,
+    0x2f, 0xca, 0xb5, 0x87, 0x12, 0x46, 0x7e, 0xab,
+    0x40, 0x04, 0x58, 0x3e, 0xb8, 0xfb, 0x7f, 0x89,
+    0x55, 0xad, 0x34, 0x06, 0x09, 0xf4, 0xb3, 0x02,
+    0x83, 0xe4, 0x88, 0x83, 0x25, 0x71, 0x41, 0x5a,
+    0x08, 0x51, 0x25, 0xe8, 0xf7, 0xcd, 0xc9, 0x9f,
+    0xd9, 0x1d, 0xbd, 0xf2, 0x80, 0x37, 0x3c, 0x5b,
+    0xd8, 0x82, 0x3e, 0x31, 0x56, 0x34, 0x8f, 0x5b,
+    0xae, 0x6d, 0xac, 0xd4, 0x36, 0xc9, 0x19, 0xc6,
+    0xdd, 0x53, 0xe2, 0xb4, 0x87, 0xda, 0x03, 0xfd,
+    0x02, 0x39, 0x63, 0x06, 0xd2, 0x48, 0xcd, 0xa0,
+    0xe9, 0x9f, 0x33, 0x42, 0x0f, 0x57, 0x7e, 0xe8,
+    0xce, 0x54, 0xb6, 0x70, 0x80, 0xa8, 0x0d, 0x1e,
+    0xc6, 0x98, 0x21, 0xbc, 0xb6, 0xa8, 0x83, 0x93,
+    0x96, 0xf9, 0x65, 0x2b, 0x6f, 0xf7, 0x2a, 0x70, 0x00,
+    /// String2
+    0xd1, 0x31, 0xdd, 0x02, 0xc5, 0xe6, 0xee, 0xc4,
+    0x69, 0x3d, 0x9a, 0x06, 0x98, 0xaf, 0xf9, 0x5c,
+    0x2f, 0xca, 0xb5, 0x07, 0x12, 0x46, 0x7e, 0xab,
+    0x40, 0x04, 0x58, 0x3e, 0xb8, 0xfb, 0x7f, 0x89,
+    0x55, 0xad, 0x34, 0x06, 0x09, 0xf4, 0xb3, 0x02,
+    0x83, 0xe4, 0x88, 0x83, 0x25, 0xf1, 0x41, 0x5a,
+    0x08, 0x51, 0x25, 0xe8, 0xf7, 0xcd, 0xc9, 0x9f,
+    0xd9, 0x1d, 0xbd, 0x72, 0x80, 0x37, 0x3c, 0x5b,
+    0xd8, 0x82, 0x3e, 0x31, 0x56, 0x34, 0x8f, 0x5b,
+    0xae, 0x6d, 0xac, 0xd4, 0x36, 0xc9, 0x19, 0xc6,
+    0xdd, 0x53, 0xe2, 0x34, 0x87, 0xda, 0x03, 0xfd,
+    0x02, 0x39, 0x63, 0x06, 0xd2, 0x48, 0xcd, 0xa0,
+    0xe9, 0x9f, 0x33, 0x42, 0x0f, 0x57, 0x7e, 0xe8,
+    0xce, 0x54, 0xb6, 0x70, 0x80, 0x28, 0x0d, 0x1e,
+    0xc6, 0x98, 0x21, 0xbc, 0xb6, 0xa8, 0x83, 0x93,
+    0x96, 0xf9, 0x65, 0xab, 0x6f, 0xf7, 0x2a, 0x70, 0x00,
+
+    /// FuncOffsetTable
+    0x02, 0x00, 0x00, 0x01, 0x17, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+
+    /// Samples
+    /// String1:10:1
+    ///  1: 5
+    ///  2.3: 6
+    ///  4: String2:100
+    ///   1: 100
+    /// String2:7:3
+    ///  9: 0
+    0x01, 0x00, 0x0a, 0x02, 0x01, 0x00, 0x05, 0x00,
+    0x02, 0x03, 0x06, 0x00, 0x01, 0x04, 0x00, 0x01,
+    0x64, 0x01, 0x01, 0x00, 0x64, 0x00, 0x00,
+
+    0x03, 0x01, 0x07, 0x01, 0x09, 0x00, 0x00, 0x00,
+    0x00};
+// clang-format on
+
+using namespace llvm;
+using namespace llvm::sampleprof;
+
+TEST(MD5CollisionTest, TestCollision) {
+  auto InputBuffer = MemoryBuffer::getMemBuffer(
+      StringRef(reinterpret_cast<const char *>(ProfileData),
+                sizeof(ProfileData)),
+      "", false);
+  LLVMContext Context;
+  auto FileSystem = vfs::getRealFileSystem();
+  auto Result = SampleProfileReader::create(InputBuffer, Context, *FileSystem);
+  ASSERT_TRUE(Result);
+  SampleProfileReader *Reader = Result->get();
+  ASSERT_FALSE(Reader->read());
+
+  std::vector<StringRef> &NameTable = *Reader->getNameTable();
+  EXPECT_EQ(NameTable.size(), 2);
+  StringRef S1 = NameTable[0];
+  StringRef S2 = NameTable[1];
+  EXPECT_NE(S1, S2);
+  EXPECT_EQ(MD5Hash(S1), Hash);
+  EXPECT_EQ(MD5Hash(S2), Hash);
+
+  // S2's MD5 value collides with S1, S1 is expected to be dropped when S2 is
+  // inserted, as if S1 never existed.
+
+  SampleProfileMap &Profiles = Reader->getProfiles();
+  EXPECT_EQ(Profiles.size(), 1);
+  auto &Entry = *Profiles.begin();
+  EXPECT_EQ(Entry.first, Hash);
+
+  FunctionSamples Expected;
+  Expected.setName(S2);
+  Expected.setHeadSamples(3);
+  Expected.setTotalSamples(7);
+  Expected.addBodySamples(9, 0, 0);
+
+  EXPECT_EQ(Entry.second, Expected);
+
+  // Inserting S2 again should fail, returning the existing sample unchanged.
+  auto Ret = Profiles.try_emplace(S2, FunctionSamples());
+  EXPECT_FALSE(Ret.second);
+  EXPECT_EQ(Profiles.size(), 1);
+  EXPECT_EQ(Ret.first->first, Hash);
+  EXPECT_EQ(Ret.first->second, Expected);
+
+  // Inserting S1 should success as if S2 never existed, and S2 is erased.
+  FunctionSamples FS1;
+  FS1.setName(S1);
+  FS1.setHeadSamples(5);
+  FS1.setTotalSamples(10);
+  FS1.addBodySamples(1, 2, 5);
+
+  auto Ret2 = Profiles.try_emplace(S1, FS1);
+  EXPECT_TRUE(Ret2.second);
+  EXPECT_EQ(Profiles.size(), 1);
+  EXPECT_EQ(Ret2.first->first, Hash);
+  EXPECT_EQ(Ret2.first->second, FS1);
+}
diff --git a/llvm/unittests/tools/llvm-profdata/OutputSizeLimitTest.cpp b/llvm/unittests/tools/llvm-profdata/OutputSizeLimitTest.cpp
--- a/llvm/unittests/tools/llvm-profdata/OutputSizeLimitTest.cpp
+++ b/llvm/unittests/tools/llvm-profdata/OutputSizeLimitTest.cpp
@@ -126,7 +126,7 @@
   // For every sample in the new profile, confirm it is in the old profile and
   // unchanged.
   for (auto Sample : NewProfiles) {
-    auto FindResult = OldProfiles.find(Sample.first);
+    auto FindResult = OldProfiles.find(Sample.second.getContext());
     EXPECT_NE(FindResult, OldProfiles.end());
     if (FindResult != OldProfiles.end()) {
       EXPECT_EQ(Sample.second.getHeadSamples(),