diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp --- a/llvm/lib/ProfileData/SampleProf.cpp +++ b/llvm/lib/ProfileData/SampleProf.cpp @@ -371,9 +371,8 @@ void SampleContextTrimmer::canonicalizeContextProfiles() { StringSet<> ProfilesToBeRemoved; - // Note that StringMap order is guaranteed to be top-down order, - // this makes sure we make room for promoted/merged context in the - // map, before we move profiles in the map. + // Use reference type to reduce unnecessary copying. + StringMap ProfilesToBeAdded; for (auto &I : ProfileMap) { FunctionSamples &FProfile = I.second; StringRef ContextStr = FProfile.getNameWithContext(); @@ -383,17 +382,54 @@ // Use the context string from FunctionSamples to update the keys of // ProfileMap. They can get out of sync after context profile promotion // through pre-inliner. - auto Ret = ProfileMap.try_emplace(ContextStr, FProfile); - assert(Ret.second && "Conext conflict during canonicalization"); - FProfile = Ret.first->second; - - // Track the context profile to remove - ProfilesToBeRemoved.erase(ContextStr); + auto Ret = ProfilesToBeAdded.try_emplace(ContextStr, FProfile); + (void)Ret; + assert(Ret.second && "Context conflict during canonicalization"); ProfilesToBeRemoved.insert(I.first()); } + // There could be a conflict where a context being added is also being + // removed. This could happen when a context is promoted to another context + // which is also promoted to the third context. For example, given an original + // context A @ B @ C that is promoted to B @ C and the original context B @ C + // which is promoted to just C, adding B @ C to the profile map while removing + // same context (but with different profile) from the map will cause a + // conflict. We need to handle the conflict in a correct order. The order we + // are taking is + // Insert C (new) into the profile map + // Remove B @ C (old) from the profile + // Insert B @ C (new) from the profile + // Remove A @ B @ C (old) from the profile. + // The order matters because a new entry relies on the profile of the + // corresponding old entry to construct itself, so as to avoid unnecessary + // profile copying. Therefore, removing an old entry before the new entry is + // inserted is problematic. The processing order can be figured out by a + // simple algorithm that does not handle cyclic dependencies. This should be + // fine since we are only targeting context promotion where context strings + // are truncated when promoted. + + // Step 1. Insert new contexts that do not exist so far. + for (auto &I : ProfilesToBeAdded) { + if (!ProfilesToBeRemoved.count(I.first())) + ProfileMap.try_emplace(I.first(), I.second); + } + + // Step 2. Remove contexts that were not inserted in step 1. + for (auto &I : ProfilesToBeRemoved) { + if (ProfilesToBeAdded.count(I.first())) + ProfileMap.erase(I.first()); + } + + // Step 3. Insert contexts that were just removed. + for (auto &I : ProfilesToBeAdded) { + if (ProfilesToBeRemoved.count(I.first())) + ProfileMap.try_emplace(I.first(), I.second); + } + + // Step 4. Remove the rest contexts. for (auto &I : ProfilesToBeRemoved) { - ProfileMap.erase(I.first()); + if (!ProfilesToBeAdded.count(I.first())) + ProfileMap.erase(I.first()); } } diff --git a/llvm/tools/llvm-profgen/CSPreInliner.cpp b/llvm/tools/llvm-profgen/CSPreInliner.cpp --- a/llvm/tools/llvm-profgen/CSPreInliner.cpp +++ b/llvm/tools/llvm-profgen/CSPreInliner.cpp @@ -16,11 +16,6 @@ using namespace llvm; using namespace sampleprof; -static cl::opt EnableCSPreInliner( - "csspgo-preinliner", cl::Hidden, cl::init(false), - cl::desc("Run a global pre-inliner to merge context profile based on " - "estimated global top-down inline decisions")); - // The switches specify inline thresholds used in SampleProfileLoader inlining. // TODO: the actual threshold to be tuned here because the size here is based // on machine code not LLVM IR. @@ -179,9 +174,6 @@ } void CSPreInliner::run() { - if (!EnableCSPreInliner) - return; - #ifndef NDEBUG auto printProfileNames = [](StringMap &Profiles, bool IsInput) { diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -48,6 +48,11 @@ cl::desc("Keep the last K frames while merging cold profile. 1 means the " "context-less base profile")); +static cl::opt EnableCSPreInliner( + "csspgo-preinliner", cl::Hidden, cl::init(false), + cl::desc("Run a global pre-inliner to merge context profile based on " + "estimated global top-down inline decisions")); + extern cl::opt ProfileSummaryCutoffCold; using namespace llvm; @@ -401,7 +406,8 @@ // Run global pre-inliner to adjust/merge context profile based on estimated // inline decisions. - CSPreInliner(ProfileMap, HotCountThreshold, ColdCountThreshold).run(); + if (EnableCSPreInliner) + CSPreInliner(ProfileMap, HotCountThreshold, ColdCountThreshold).run(); // Trim and merge cold context profile using cold threshold above; SampleContextTrimmer(ProfileMap)