diff --git a/llvm/include/llvm/ProfileData/SampleProfWriter.h b/llvm/include/llvm/ProfileData/SampleProfWriter.h --- a/llvm/include/llvm/ProfileData/SampleProfWriter.h +++ b/llvm/include/llvm/ProfileData/SampleProfWriter.h @@ -69,6 +69,16 @@ virtual void setPartialProfile() {} virtual void resetSecLayout(SectionLayout SL) {} + /// Clear all states associated with a specific profile, but not states set + /// from llvm-profdata command line arguments. Ignore transient states (those + /// always being set by write() before use). Call this to reuse the profile + /// writer on different profile map data. + /// Derived class should first call base class reset if overriding it. + virtual void reset(std::unique_ptr &OS) { + OutputStream = std::move(OS); + Summary.reset(); + } + protected: SampleProfileWriter(std::unique_ptr &OS) : OutputStream(std::move(OS)) {} @@ -97,6 +107,11 @@ public: std::error_code writeSample(const FunctionSamples &S) override; + void reset(std::unique_ptr &OS) override { + SampleProfileWriter::reset(OS); + Indent = 0; + } + protected: SampleProfileWriterText(std::unique_ptr &OS) : SampleProfileWriter(OS), Indent(0) {} @@ -124,6 +139,11 @@ std::error_code writeSample(const FunctionSamples &S) override; + void reset(std::unique_ptr &OS) override { + SampleProfileWriter::reset(OS); + NameTable.clear(); + } + protected: virtual MapVector &getNameTable() { return NameTable; } virtual std::error_code writeMagicIdent(SampleProfileFormat Format); @@ -225,6 +245,12 @@ SectionHdrLayout = ExtBinaryHdrLayoutTable[SL]; } + void reset(std::unique_ptr &OS) override { + SampleProfileWriterBinary::reset(OS); + SecHdrTable.clear(); + CSNameTable.clear(); + } + protected: uint64_t markSectionStart(SecType Type, uint32_t LayoutIdx); std::error_code addNewSection(SecType Sec, uint32_t LayoutIdx, @@ -380,6 +406,11 @@ std::error_code writeSample(const FunctionSamples &S) override; std::error_code write(const SampleProfileMap &ProfileMap) override; + void reset(std::unique_ptr &OS) override { + SampleProfileWriterBinary::reset(OS); + FuncOffsetTable.clear(); + } + protected: /// The table mapping from function name to the offset of its FunctionSample /// towards profile start. @@ -392,6 +423,34 @@ std::error_code writeFuncOffsetTable(); }; +namespace { + +/// When combining multiple profiles, user may want to use different strategies +/// to reduce function count other than dropping the least sampled first, so +/// this class can be extended to satisfy such need. +class FunctionPruningStrategy { + std::vector Functions; + +public: + SampleProfileMap &ProfileMap; + + FunctionPruningStrategy(SampleProfileMap &ProfileMap) + : ProfileMap(ProfileMap) { + sortFuncProfiles(ProfileMap, Functions); + } + + void Erase(size_t NumToRemove) { + assert(NumToRemove <= Functions.size()); + llvm::for_each( + llvm::make_range(Functions.begin() + Functions.size() - NumToRemove, + Functions.end()), + [&](const NameFunctionSamples &E) { ProfileMap.erase(E.first); }); + Functions.resize(Functions.size() - NumToRemove); + } +}; + +} // namespace + } // end namespace sampleprof } // end namespace llvm diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp --- a/llvm/lib/ProfileData/SampleProfWriter.cpp +++ b/llvm/lib/ProfileData/SampleProfWriter.cpp @@ -40,6 +40,32 @@ using namespace llvm; using namespace sampleprof; +namespace llvm { +namespace support { +namespace endian { +namespace { + +// Adapter class to llvm::support::endian::Writer for pwrite(). +struct SeekableWriter { + raw_pwrite_stream &OS; + endianness Endian; + SeekableWriter(raw_pwrite_stream &OS, endianness Endian) + : OS(OS), Endian(Endian) {} + + template + void pwrite(ValueType Val, size_t Offset) { + std::string StringBuf; + raw_string_ostream SStream(StringBuf); + Writer(SStream, Endian).write(Val); + OS.pwrite(StringBuf.data(), StringBuf.size(), Offset); + } +}; + +} // namespace +} // namespace endian +} // namespace support +} // namespace llvm + std::error_code SampleProfileWriter::writeFuncProfiles(const SampleProfileMap &ProfileMap) { std::vector V; @@ -605,14 +631,10 @@ auto &OS = *OutputStream; // Fill the slot remembered by TableOffset with the offset of FuncOffsetTable. - auto &OFS = static_cast(OS); uint64_t FuncOffsetTableStart = OS.tell(); - if (OFS.seek(TableOffset) == (uint64_t)-1) - return sampleprof_error::ostream_seek_unsupported; - support::endian::Writer Writer(*OutputStream, support::little); - Writer.write(FuncOffsetTableStart); - if (OFS.seek(FuncOffsetTableStart) == (uint64_t)-1) - return sampleprof_error::ostream_seek_unsupported; + support::endian::SeekableWriter Writer(static_cast(OS), + support::little); + Writer.pwrite(FuncOffsetTableStart, TableOffset); // Write out the table size. encodeULEB128(FuncOffsetTable.size(), OS); @@ -690,14 +712,6 @@ } std::error_code SampleProfileWriterExtBinaryBase::writeSecHdrTable() { - auto &OFS = static_cast(*OutputStream); - uint64_t Saved = OutputStream->tell(); - - // Set OutputStream to the location saved in SecHdrTableOffset. - if (OFS.seek(SecHdrTableOffset) == (uint64_t)-1) - return sampleprof_error::ostream_seek_unsupported; - support::endian::Writer Writer(*OutputStream, support::little); - assert(SecHdrTable.size() == SectionHdrLayout.size() && "SecHdrTable entries doesn't match SectionHdrLayout"); SmallVector IndexMap(SecHdrTable.size(), -1); @@ -714,21 +728,23 @@ // needs to be computed after SecLBRProfile (the order in SecHdrTable), // but it needs to be read before SecLBRProfile (the order in // SectionHdrLayout). So we use IndexMap above to switch the order. + support::endian::SeekableWriter Writer( + static_cast(*OutputStream), support::little); for (uint32_t LayoutIdx = 0; LayoutIdx < SectionHdrLayout.size(); LayoutIdx++) { assert(IndexMap[LayoutIdx] < SecHdrTable.size() && "Incorrect LayoutIdx in SecHdrTable"); auto Entry = SecHdrTable[IndexMap[LayoutIdx]]; - Writer.write(static_cast(Entry.Type)); - Writer.write(static_cast(Entry.Flags)); - Writer.write(static_cast(Entry.Offset)); - Writer.write(static_cast(Entry.Size)); + Writer.pwrite(static_cast(Entry.Type), + SecHdrTableOffset + 4 * LayoutIdx * sizeof(uint64_t)); + Writer.pwrite(static_cast(Entry.Flags), + SecHdrTableOffset + (4 * LayoutIdx + 1) * sizeof(uint64_t)); + Writer.pwrite(static_cast(Entry.Offset), + SecHdrTableOffset + (4 * LayoutIdx + 2) * sizeof(uint64_t)); + Writer.pwrite(static_cast(Entry.Size), + SecHdrTableOffset + (4 * LayoutIdx + 3) * sizeof(uint64_t)); } - // Reset OutputStream. - if (OFS.seek(Saved) == (uint64_t)-1) - return sampleprof_error::ostream_seek_unsupported; - return sampleprof_error::success; } diff --git a/llvm/test/tools/llvm-profdata/output-size-limit.test b/llvm/test/tools/llvm-profdata/output-size-limit.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/output-size-limit.test @@ -0,0 +1,119 @@ +Tests for output-size-limit option. Functions with least sample count are dropped. + +1- No effect if output size limit >= original size +RUN: llvm-profdata merge --sample --text --output-size-limit=212 %p/Inputs/sample-profile.proftext | FileCheck %s --check-prefix=TEST_TEXT1 +TEST_TEXT1: main:184019:0 +TEST_TEXT1-NEXT: 4: 534 +TEST_TEXT1-NEXT: 4.2: 534 +TEST_TEXT1-NEXT: 5: 1075 +TEST_TEXT1-NEXT: 5.1: 1075 +TEST_TEXT1-NEXT: 6: 2080 +TEST_TEXT1-NEXT: 7: 534 +TEST_TEXT1-NEXT: 9: 2064 _Z3bari:1471 _Z3fooi:631 +TEST_TEXT1-NEXT: 10: inline1:1000 +TEST_TEXT1-NEXT: 1: 1000 +TEST_TEXT1-NEXT: 10: inline2:2000 +TEST_TEXT1-NEXT: 1: 2000 +TEST_TEXT1-NEXT: _Z3bari:20301:1437 +TEST_TEXT1-NEXT: 1: 1437 +TEST_TEXT1-NEXT: _Z3fooi:7711:610 +TEST_TEXT1-NEXT: 1: 610 + +2- 1 function dropped +RUN: llvm-profdata merge --sample --text --output-size-limit=211 %p/Inputs/sample-profile.proftext | FileCheck %s --check-prefix=TEST_TEXT2 +RUN: llvm-profdata merge --sample --text --output-size-limit=187 %p/Inputs/sample-profile.proftext | FileCheck %s --check-prefix=TEST_TEXT2 +TEST_TEXT2: main:184019:0 +TEST_TEXT2-NEXT: 4: 534 +TEST_TEXT2-NEXT: 4.2: 534 +TEST_TEXT2-NEXT: 5: 1075 +TEST_TEXT2-NEXT: 5.1: 1075 +TEST_TEXT2-NEXT: 6: 2080 +TEST_TEXT2-NEXT: 7: 534 +TEST_TEXT2-NEXT: 9: 2064 _Z3bari:1471 _Z3fooi:631 +TEST_TEXT2-NEXT: 10: inline1:1000 +TEST_TEXT2-NEXT: 1: 1000 +TEST_TEXT2-NEXT: 10: inline2:2000 +TEST_TEXT2-NEXT: 1: 2000 +TEST_TEXT2-NEXT: _Z3bari:20301:1437 +TEST_TEXT2-NEXT: 1: 1437 + +3- 2 functions dropped +RUN: llvm-profdata merge --sample --text --output-size-limit=170 %p/Inputs/sample-profile.proftext | FileCheck %s --check-prefix=TEST_TEXT3 +TEST_TEXT3: main:184019:0 +TEST_TEXT3-NEXT: 4: 534 +TEST_TEXT3-NEXT: 4.2: 534 +TEST_TEXT3-NEXT: 5: 1075 +TEST_TEXT3-NEXT: 5.1: 1075 +TEST_TEXT3-NEXT: 6: 2080 +TEST_TEXT3-NEXT: 7: 534 +TEST_TEXT3-NEXT: 9: 2064 _Z3bari:1471 _Z3fooi:631 +TEST_TEXT3-NEXT: 10: inline1:1000 +TEST_TEXT3-NEXT: 1: 1000 +TEST_TEXT3-NEXT: 10: inline2:2000 +TEST_TEXT3-NEXT: 1: 2000 + +4- All functions dropped, should report an error +RUN: not llvm-profdata merge --sample --text --output-size-limit=158 %p/Inputs/sample-profile.proftext 2>&1 | FileCheck %s --check-prefix=INVALID1 +INVALID1: error: Too much profile data + +5- ExtBinary form, no function dropped. Check output size and file content converted back to text +RUN: llvm-profdata merge --sample --extbinary --output-size-limit=489 %p/Inputs/sample-profile.proftext -o %t.output +RUN: test $(stat -c %%s %t.output) -le 489 +RUN: llvm-profdata merge --sample --text %t.output | FileCheck %s --check-prefix=TEST_EXTBINARY1 +TEST_EXTBINARY1: main:184019:0 +TEST_EXTBINARY1-NEXT: 4: 534 +TEST_EXTBINARY1-NEXT: 4.2: 534 +TEST_EXTBINARY1-NEXT: 5: 1075 +TEST_EXTBINARY1-NEXT: 5.1: 1075 +TEST_EXTBINARY1-NEXT: 6: 2080 +TEST_EXTBINARY1-NEXT: 7: 534 +TEST_EXTBINARY1-NEXT: 9: 2064 _Z3bari:1471 _Z3fooi:631 +TEST_EXTBINARY1-NEXT: 10: inline1:1000 +TEST_EXTBINARY1-NEXT: 1: 1000 +TEST_EXTBINARY1-NEXT: 10: inline2:2000 +TEST_EXTBINARY1-NEXT: 1: 2000 +TEST_EXTBINARY1-NEXT: _Z3bari:20301:1437 +TEST_EXTBINARY1-NEXT: 1: 1437 +TEST_EXTBINARY1-NEXT: _Z3fooi:7711:610 +TEST_EXTBINARY1-NEXT: 1: 610 + +6- ExtBinary form, 1 function dropped +RUN: llvm-profdata merge --sample --extbinary --output-size-limit=488 %p/Inputs/sample-profile.proftext -o %t.output +RUN: test $(stat -c %%s %t.output) -le 488 +RUN: llvm-profdata merge --sample --text %t.output | FileCheck %s --check-prefix=TEST_EXTBINARY2 +TEST_EXTBINARY2: main:184019:0 +TEST_EXTBINARY2-NEXT: 4: 534 +TEST_EXTBINARY2-NEXT: 4.2: 534 +TEST_EXTBINARY2-NEXT: 5: 1075 +TEST_EXTBINARY2-NEXT: 5.1: 1075 +TEST_EXTBINARY2-NEXT: 6: 2080 +TEST_EXTBINARY2-NEXT: 7: 534 +TEST_EXTBINARY2-NEXT: 9: 2064 _Z3bari:1471 _Z3fooi:631 +TEST_EXTBINARY2-NEXT: 10: inline1:1000 +TEST_EXTBINARY2-NEXT: 1: 1000 +TEST_EXTBINARY2-NEXT: 10: inline2:2000 +TEST_EXTBINARY2-NEXT: 1: 2000 +TEST_EXTBINARY2-NEXT: _Z3bari:20301:1437 +TEST_EXTBINARY2-NEXT: 1: 1437 + +7- ExtBinary form, 2 functions dropped +RUN: llvm-profdata merge --sample --extbinary --output-size-limit=474 %p/Inputs/sample-profile.proftext -o %t.output +RUN: test $(stat -c %%s %t.output) -le 474 +RUN: llvm-profdata merge --sample --text %t.output | FileCheck %s --check-prefix=TEST_EXTBINARY3 +TEST_EXTBINARY3: main:184019:0 +TEST_EXTBINARY3-NEXT: 4: 534 +TEST_EXTBINARY3-NEXT: 4.2: 534 +TEST_EXTBINARY3-NEXT: 5: 1075 +TEST_EXTBINARY3-NEXT: 5.1: 1075 +TEST_EXTBINARY3-NEXT: 6: 2080 +TEST_EXTBINARY3-NEXT: 7: 534 +TEST_EXTBINARY3-NEXT: 9: 2064 _Z3bari:1471 _Z3fooi:631 +TEST_EXTBINARY3-NEXT: 10: inline1:1000 +TEST_EXTBINARY3-NEXT: 1: 1000 +TEST_EXTBINARY3-NEXT: 10: inline2:2000 +TEST_EXTBINARY3-NEXT: 1: 2000 + +8- ExtBinary form, all functions dropped +RUN: not llvm-profdata merge --sample --extbinary --output-size-limit=400 %p/Inputs/sample-profile.proftext 2>&1 | FileCheck %s --check-prefix=INVALID2 +INVALID2: error: Too much profile data + diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -25,6 +25,7 @@ #include "llvm/ProfileData/SampleProfReader.h" #include "llvm/ProfileData/SampleProfWriter.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/Discriminator.h" #include "llvm/Support/Errc.h" #include "llvm/Support/FileSystem.h" @@ -43,6 +44,8 @@ #include #include +#define DEBUG_TYPE "llvm-profdata" + using namespace llvm; // We use this string to indicate that there are @@ -953,6 +956,55 @@ } } +/// The exact size of the output cannot be easily calculated, we use a +/// heuristic to remove as many functions as necessary (but not too many), while +/// minimizing the number of iterations needed. +/// Empirically, functions with larger total sample count contain linearly more +/// sample entries, meaning it takes linearly more space to write them. The +/// cumulative length is therefore quadratic if all functions are sorted by +/// total sample count. +/// TODO: Find better heuristic. +static size_t CalculateNumFunctionsToRemove(size_t OriginalFunctionCount, + size_t CurrentFunctionCount, + size_t OutputSize, + size_t OutputSizeLimit) { + double D = (double)OutputSizeLimit / OutputSize; + size_t NewSize = (size_t)CurrentFunctionCount * D * D; + size_t NumToRemove = CurrentFunctionCount - NewSize; + if (NumToRemove < 1) + NumToRemove = 1; + return NumToRemove; +} + +static void RewriteProfileSizeLimit(SampleProfileWriter &Writer, + FunctionPruningStrategy &Strategy, + SmallVector &StringBuffer, + size_t OutputSizeLimit) { + SampleProfileMap &ProfileMap = Strategy.ProfileMap; + + size_t OriginalFunctionCount = ProfileMap.size(); + size_t IterationCount = 0; + + while (StringBuffer.size() > OutputSizeLimit) { + size_t NumToRemove = + CalculateNumFunctionsToRemove(OriginalFunctionCount, ProfileMap.size(), + StringBuffer.size(), OutputSizeLimit); + if (NumToRemove >= ProfileMap.size()) + exitWithErrorCode(sampleprof_error::too_large); + + Strategy.Erase(NumToRemove); + StringBuffer.clear(); + std::unique_ptr NewStream( + new raw_svector_ostream(StringBuffer)); + Writer.reset(NewStream); + Writer.write(ProfileMap); + IterationCount++; + } + LLVM_DEBUG(dbgs() << "Profile originally has " << OriginalFunctionCount + << " functions, reduced to " << ProfileMap.size() << " in " + << IterationCount << " iterations\n"); +} + static void mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper, StringRef OutputFilename, ProfileFormat OutputFormat, @@ -960,7 +1012,7 @@ bool UseMD5, bool GenPartialProfile, bool GenCSNestedProfile, bool SampleMergeColdContext, bool SampleTrimColdContext, bool SampleColdContextFrameDepth, FailureMode FailMode, - bool DropProfileSymbolList) { + bool DropProfileSymbolList, size_t OutputSizeLimit) { using namespace sampleprof; SampleProfileMap ProfileMap; SmallVector, 5> Readers; @@ -1042,8 +1094,18 @@ ProfileIsCS = FunctionSamples::ProfileIsCS = false; } + // If limiting the output size, write to a string buffer first, and drop + // functions if the output size exceeds limit. This iterates multiple times + // until the limit is satisfied. + SmallVector StringBuffer; + std::unique_ptr BufferStream( + new raw_svector_ostream(StringBuffer)); + auto WriterOrErr = - SampleProfileWriter::create(OutputFilename, FormatMap[OutputFormat]); + (OutputSizeLimit != 0) + ? SampleProfileWriter::create(BufferStream, FormatMap[OutputFormat]) + : SampleProfileWriter::create(OutputFilename, + FormatMap[OutputFormat]); if (std::error_code EC = WriterOrErr.getError()) exitWithErrorCode(EC, OutputFilename); @@ -1053,8 +1115,24 @@ auto Buffer = getInputFileBuf(ProfileSymbolListFile); handleExtBinaryWriter(*Writer, OutputFormat, Buffer.get(), WriterList, CompressAllSections, UseMD5, GenPartialProfile); + if (std::error_code EC = Writer->write(ProfileMap)) exitWithErrorCode(std::move(EC)); + + if (OutputSizeLimit != 0) { + FunctionPruningStrategy Strategy(ProfileMap); + RewriteProfileSizeLimit(*Writer, Strategy, StringBuffer, OutputSizeLimit); + // Write back to the file after dropping enough functions. + std::error_code EC; + raw_fd_ostream File(OutputFilename, EC, + OutputFormat == PF_Text ? sys::fs::OF_TextWithCRLF + : sys::fs::OF_None); + if (EC) + exitWithErrorCode(std::move(EC)); + File.write(StringBuffer.data(), StringBuffer.size()); + if (EC = File.error(); EC) + exitWithErrorCode(std::move(EC)); + } } static WeightedFile parseWeightedFile(const StringRef &WeightedFilename) { @@ -1196,6 +1274,11 @@ "sample-frame-depth-for-cold-context", cl::init(1), cl::desc("Keep the last K frames while merging cold profile. 1 means the " "context-less base profile")); + cl::opt OutputSizeLimit( + "output-size-limit", cl::init(0), cl::Hidden, + cl::desc("Trim cold functions until profile size is below specified " + "limit in bytes. This uses a heursitic and functions may be " + "excessively trimmed")); cl::opt GenPartialProfile( "gen-partial-profile", cl::init(false), cl::Hidden, cl::desc("Generate a partial profile (only meaningful for -extbinary)")); @@ -1282,7 +1365,8 @@ WeightedInputs, Remapper.get(), OutputFilename, OutputFormat, ProfileSymbolListFile, CompressAllSections, UseMD5, GenPartialProfile, GenCSNestedProfile, SampleMergeColdContext, SampleTrimColdContext, - SampleColdContextFrameDepth, FailureMode, DropProfileSymbolList); + SampleColdContextFrameDepth, FailureMode, DropProfileSymbolList, + OutputSizeLimit); return 0; }