diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp --- a/llvm/lib/ProfileData/SampleProf.cpp +++ b/llvm/lib/ProfileData/SampleProf.cpp @@ -370,10 +370,8 @@ } void SampleContextTrimmer::canonicalizeContextProfiles() { - StringSet<> ProfilesToBeRemoved; - // Note that StringMap order is guaranteed to be top-down order, - // this makes sure we make room for promoted/merged context in the - // map, before we move profiles in the map. + std::vector ProfilesToBeRemoved; + StringMap ProfilesToBeAdded; for (auto &I : ProfileMap) { FunctionSamples &FProfile = I.second; StringRef ContextStr = FProfile.getNameWithContext(); @@ -383,17 +381,27 @@ // Use the context string from FunctionSamples to update the keys of // ProfileMap. They can get out of sync after context profile promotion // through pre-inliner. - auto Ret = ProfileMap.try_emplace(ContextStr, FProfile); - assert(Ret.second && "Conext conflict during canonicalization"); - FProfile = Ret.first->second; - - // Track the context profile to remove - ProfilesToBeRemoved.erase(ContextStr); - ProfilesToBeRemoved.insert(I.first()); + // Duplicate the function profile for later insertion to avoid a conflict + // caused by a context both to be add and to be removed. This could happen + // when a context is promoted to another context which is also promoted to + // the third context. For example, given an original context A @ B @ C that + // is promoted to B @ C and the original context B @ C which is promoted to + // just C, adding B @ C to the profile map while removing same context (but + // with different profiles) from the map can cause a conflict if they are + // not handled in a right order. This can be solved by just caching the + // profiles to be added. + auto Ret = ProfilesToBeAdded.try_emplace(ContextStr, FProfile); + (void)Ret; + assert(Ret.second && "Context conflict during canonicalization"); + ProfilesToBeRemoved.push_back(I.first()); } for (auto &I : ProfilesToBeRemoved) { - ProfileMap.erase(I.first()); + ProfileMap.erase(I); + } + + for (auto &I : ProfilesToBeAdded) { + ProfileMap.try_emplace(I.first(), I.second); } } diff --git a/llvm/tools/llvm-profgen/CSPreInliner.cpp b/llvm/tools/llvm-profgen/CSPreInliner.cpp --- a/llvm/tools/llvm-profgen/CSPreInliner.cpp +++ b/llvm/tools/llvm-profgen/CSPreInliner.cpp @@ -16,11 +16,6 @@ using namespace llvm; using namespace sampleprof; -static cl::opt EnableCSPreInliner( - "csspgo-preinliner", cl::Hidden, cl::init(false), - cl::desc("Run a global pre-inliner to merge context profile based on " - "estimated global top-down inline decisions")); - // The switches specify inline thresholds used in SampleProfileLoader inlining. // TODO: the actual threshold to be tuned here because the size here is based // on machine code not LLVM IR. @@ -179,9 +174,6 @@ } void CSPreInliner::run() { - if (!EnableCSPreInliner) - return; - #ifndef NDEBUG auto printProfileNames = [](StringMap &Profiles, bool IsInput) { diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -48,6 +48,11 @@ cl::desc("Keep the last K frames while merging cold profile. 1 means the " "context-less base profile")); +static cl::opt EnableCSPreInliner( + "csspgo-preinliner", cl::Hidden, cl::init(false), + cl::desc("Run a global pre-inliner to merge context profile based on " + "estimated global top-down inline decisions")); + extern cl::opt ProfileSummaryCutoffCold; using namespace llvm; @@ -401,7 +406,8 @@ // Run global pre-inliner to adjust/merge context profile based on estimated // inline decisions. - CSPreInliner(ProfileMap, HotCountThreshold, ColdCountThreshold).run(); + if (EnableCSPreInliner) + CSPreInliner(ProfileMap, HotCountThreshold, ColdCountThreshold).run(); // Trim and merge cold context profile using cold threshold above; SampleContextTrimmer(ProfileMap)