diff --git a/llvm/include/llvm/ProfileData/SampleProfWriter.h b/llvm/include/llvm/ProfileData/SampleProfWriter.h --- a/llvm/include/llvm/ProfileData/SampleProfWriter.h +++ b/llvm/include/llvm/ProfileData/SampleProfWriter.h @@ -69,6 +69,10 @@ virtual void setPartialProfile() {} virtual void resetSecLayout(SectionLayout SL) {} + /// Function profiles sorted by total count. Use this as a cache for profile + /// writing with size limit. + std::unique_ptr> SortedFunctionSamples; + protected: SampleProfileWriter(std::unique_ptr &OS) : OutputStream(std::move(OS)) {} @@ -125,7 +129,6 @@ std::error_code writeSample(const FunctionSamples &S) override; protected: - virtual MapVector &getNameTable() { return NameTable; } virtual std::error_code writeMagicIdent(SampleProfileFormat Format); virtual std::error_code writeNameTable(); std::error_code writeHeader(const SampleProfileMap &ProfileMap) override; @@ -141,6 +144,7 @@ void addName(StringRef FName); virtual void addContext(const SampleContext &Context); void addNames(const FunctionSamples &S); + void populateNameTable(const SampleProfileMap &ProfileMap); private: friend ErrorOr> diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp --- a/llvm/lib/ProfileData/SampleProfWriter.cpp +++ b/llvm/lib/ProfileData/SampleProfWriter.cpp @@ -42,9 +42,11 @@ std::error_code SampleProfileWriter::writeFuncProfiles(const SampleProfileMap &ProfileMap) { - std::vector V; - sortFuncProfiles(ProfileMap, V); - for (const auto &I : V) { + if (!SortedFunctionSamples) { + SortedFunctionSamples = std::make_unique>(); + sortFuncProfiles(ProfileMap, *SortedFunctionSamples); + } + for (const auto &I : *SortedFunctionSamples) { if (std::error_code EC = writeSample(*I.second)) return EC; } @@ -256,11 +258,7 @@ std::error_code SampleProfileWriterExtBinaryBase::writeNameTableSection( const SampleProfileMap &ProfileMap) { - for (const auto &I : ProfileMap) { - assert(I.first == I.second.getContext() && "Inconsistent profile map"); - addContext(I.second.getContext()); - addNames(I.second); - } + populateNameTable(ProfileMap); // If NameTable contains ".__uniq." suffix, set SecFlagUniqSuffix flag // so compiler won't strip the suffix during profile matching after @@ -532,17 +530,15 @@ } std::error_code SampleProfileWriterBinary::writeNameIdx(StringRef FName) { - auto &NTable = getNameTable(); - const auto &Ret = NTable.find(FName); - if (Ret == NTable.end()) + const auto &Ret = NameTable.find(FName); + if (Ret == NameTable.end()) return sampleprof_error::truncated_name_table; encodeULEB128(Ret->second, *OutputStream); return sampleprof_error::success; } void SampleProfileWriterBinary::addName(StringRef FName) { - auto &NTable = getNameTable(); - NTable.insert(std::make_pair(FName, 0)); + NameTable.insert(std::make_pair(FName, 0)); } void SampleProfileWriterBinary::addContext(const SampleContext &Context) { @@ -566,6 +562,16 @@ } } +void SampleProfileWriterBinary::populateNameTable( + const SampleProfileMap &ProfileMap) { + NameTable.clear(); + for (const auto &I : ProfileMap) { + assert(I.first == I.second.getContext() && "Inconsistent profile map"); + addContext(I.second.getContext()); + addNames(I.second); + } +} + void SampleProfileWriterExtBinaryBase::addContext( const SampleContext &Context) { if (Context.hasContext()) { @@ -657,11 +663,7 @@ return EC; // Generate the name table for all the functions referenced in the profile. - for (const auto &I : ProfileMap) { - assert(I.first == I.second.getContext() && "Inconsistent profile map"); - addContext(I.first); - addNames(I.second); - } + populateNameTable(ProfileMap); writeNameTable(); return sampleprof_error::success; diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -953,13 +953,34 @@ } } +/// The exact size of the output cannot be easily calculated, we use a +/// heuristic to remove as many functions as neccessary (but not too many), +/// while minimizing the number of iterations needed. +/// Empirically, functions with larger total sample count contain linearly more +/// sample entries, meaning it takes linearly more space to write them. The +/// cumulative length is therefore quadratic if all functions are sorted by +/// total sample count. +/// TODO: Find better heuristic. +static size_t CalculateNumFunctionsToRemove(size_t OriginalFunctionCount, + size_t CurrentFunctionCount, + size_t OutputSize, + size_t OutputSizeLimit) { + double D = (double) OutputSizeLimit / OutputSize; + size_t NewSize = (size_t) CurrentFunctionCount * D * D; + size_t NumToRemove = CurrentFunctionCount - NewSize; + if (NumToRemove < 1) + NumToRemove = 1; + return NumToRemove; +} + static void mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper, StringRef OutputFilename, ProfileFormat OutputFormat, StringRef ProfileSymbolListFile, bool CompressAllSections, bool UseMD5, bool GenPartialProfile, bool GenCSNestedProfile, bool SampleMergeColdContext, bool SampleTrimColdContext, - bool SampleColdContextFrameDepth, FailureMode FailMode) { + bool SampleColdContextFrameDepth, + size_t OutputSizeLimit, FailureMode FailMode) { using namespace sampleprof; SampleProfileMap ProfileMap; SmallVector, 5> Readers; @@ -1052,6 +1073,48 @@ CompressAllSections, UseMD5, GenPartialProfile); if (std::error_code EC = Writer->write(ProfileMap)) exitWithErrorCode(std::move(EC)); + + // If limiting output size, rewrite the entire profile by dropping samples if + // the profile size exceeds limit. This iterates multiple times until the + // limit is satisfied. + + // TODO: there is no reason to actually write the profile to the file first, + // it can be handled in memory. This requires major refactoring since only + // raw_fd_ostream supports seek() required by ExtBinary format. + size_t OriginalFunctionCount = Writer->SortedFunctionSamples->size(); + if (OutputSizeLimit != 0) { + while (Writer->getOutputStream().tell() > OutputSizeLimit) { + std::unique_ptr> Functions = + std::move(Writer->SortedFunctionSamples); + size_t NumToRemove = CalculateNumFunctionsToRemove( + OriginalFunctionCount, Functions->size(), + Writer->getOutputStream().tell(), OutputSizeLimit); + if (NumToRemove >= ProfileMap.size()) + exitWithErrorCode(sampleprof_error::too_large); + + llvm::for_each( + llvm::make_range(Functions->begin() + ProfileMap.size() - NumToRemove, + Functions->end()), + [&](const NameFunctionSamples &E) { ProfileMap.erase(E.first); }); + Functions->resize(Functions->size() - NumToRemove); + + // Recreate the writer with already sorted function samples, since there + // are too many state variables to restore. + WriterOrErr = + SampleProfileWriter::create(OutputFilename, FormatMap[OutputFormat]); + if (std::error_code EC = WriterOrErr.getError()) + exitWithErrorCode(EC, OutputFilename); + Writer = std::move(WriterOrErr.get()); + Writer->SortedFunctionSamples = std::move(Functions); + auto Buffer = getInputFileBuf(ProfileSymbolListFile); + handleExtBinaryWriter(*Writer, OutputFormat, Buffer.get(), WriterList, + CompressAllSections, UseMD5, GenPartialProfile); + if (std::error_code EC = Writer->write(ProfileMap)) + exitWithErrorCode(std::move(EC)); + } + outs() << "Profile originally has " << OriginalFunctionCount << " functions" + ", now " << Writer->SortedFunctionSamples->size() << "\n"; + } } static WeightedFile parseWeightedFile(const StringRef &WeightedFilename) { @@ -1193,6 +1256,11 @@ "sample-frame-depth-for-cold-context", cl::init(1), cl::desc("Keep the last K frames while merging cold profile. 1 means the " "context-less base profile")); + cl::opt OutputSizeLimit( + "output-size-limit", cl::init(0), cl::Hidden, + cl::desc("Trim cold functions until profile size is below specified " + "limit in bytes. This uses a heursitic algorithm and functions " + "may be excessively trimmed")); cl::opt GenPartialProfile( "gen-partial-profile", cl::init(false), cl::Hidden, cl::desc("Generate a partial profile (only meaningful for -extbinary)")); @@ -1275,7 +1343,8 @@ OutputFormat, ProfileSymbolListFile, CompressAllSections, UseMD5, GenPartialProfile, GenCSNestedProfile, SampleMergeColdContext, SampleTrimColdContext, - SampleColdContextFrameDepth, FailureMode); + SampleColdContextFrameDepth, OutputSizeLimit, + FailureMode); return 0; }