diff --git a/llvm/include/llvm/ProfileData/SampleProfWriter.h b/llvm/include/llvm/ProfileData/SampleProfWriter.h --- a/llvm/include/llvm/ProfileData/SampleProfWriter.h +++ b/llvm/include/llvm/ProfileData/SampleProfWriter.h @@ -35,6 +35,54 @@ NumOfLayout, }; +/// When writing a profile with size limit, user may want to use a different +/// strategy to reduce function count other than dropping functions with fewest +/// samples first. In this case a class implementing the same interfaces should +/// be provided to SampleProfileWriter::writeWithSizeLimit(). +class FunctionPruningStrategy { +protected: + SampleProfileMap &ProfileMap; + size_t OutputSizeLimit; + +public: + /// \p ProfileMap A reference to the original profile map. + /// \p OutputSizeLimit Size limit of the output profile. This is necessary + /// to estimate how many functions to remove. + FunctionPruningStrategy(SampleProfileMap &ProfileMap, size_t OutputSizeLimit) + : ProfileMap(ProfileMap), OutputSizeLimit(OutputSizeLimit) {} + + virtual ~FunctionPruningStrategy() = default; + + /// SampleProfileWriter::writeWithSizeLimit() calls this after every write + /// iteration if the output size still exceeds the limit. This function + /// should erase some functions from the profile map so that the writer tries + /// to write the profile again with fewer functions. At least 1 entry from the + /// profile map must be erased. + /// + /// \p CurrentOutputSize Number of bytes + virtual void Erase(size_t CurrentOutputSize) = 0; +}; + +class DefaultFunctionPruningStrategy : public FunctionPruningStrategy { + std::vector SortedFunctions; + +public: + DefaultFunctionPruningStrategy(SampleProfileMap &ProfileMap, + size_t OutputSizeLimit); + + /// In this default implementation, functions with fewest samples are dropped + /// first. Since the exact size of the output cannot be easily calculated due + /// to compression, we use a heuristic to remove as many functions as + /// necessary but not too many, aiming to minimize the number of write + /// iterations. + /// Empirically, functions with larger total sample count contain linearly + /// more sample entries, meaning it takes linearly more space to write them. + /// The cumulative length is therefore quadratic if all functions are sorted + /// by total sample count. + /// TODO: Find better heuristic. + void Erase(size_t CurrentOutputSize) override; +}; + /// Sample-based profile writer. Base class. class SampleProfileWriter { public: @@ -50,6 +98,17 @@ /// \returns status code of the file update operation. virtual std::error_code write(const SampleProfileMap &ProfileMap); + /// Write sample profiles up to given size limit, using the pruning strategy + /// to drop some functions if necessary. + /// + /// \returns status code of the file update operation. + template + std::error_code writeWithSizeLimit(SampleProfileMap &ProfileMap, + size_t OutputSizeLimit) { + FunctionPruningStrategy Strategy(ProfileMap, OutputSizeLimit); + return writeWithSizeLimitInternal(ProfileMap, OutputSizeLimit, &Strategy); + } + raw_ostream &getOutputStream() { return *OutputStream; } /// Profile writer factory. @@ -79,6 +138,10 @@ // Write function profiles to the profile file. virtual std::error_code writeFuncProfiles(const SampleProfileMap &ProfileMap); + std::error_code writeWithSizeLimitInternal(SampleProfileMap &ProfileMap, + size_t OutputSizeLimit, + FunctionPruningStrategy *Strategy); + /// Output stream where to emit the profile to. std::unique_ptr OutputStream; diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp --- a/llvm/lib/ProfileData/SampleProfWriter.cpp +++ b/llvm/lib/ProfileData/SampleProfWriter.cpp @@ -30,6 +30,7 @@ #include "llvm/Support/MD5.h" #include "llvm/Support/raw_ostream.h" #include +#include #include #include #include @@ -37,9 +38,96 @@ #include #include +#define DEBUG_TYPE "llvm-profdata" + using namespace llvm; using namespace sampleprof; +namespace llvm { +namespace support { +namespace endian { +namespace { + +// Adapter class to llvm::support::endian::Writer for pwrite(). +struct SeekableWriter { + raw_pwrite_stream &OS; + endianness Endian; + SeekableWriter(raw_pwrite_stream &OS, endianness Endian) + : OS(OS), Endian(Endian) {} + + template + void pwrite(ValueType Val, size_t Offset) { + std::string StringBuf; + raw_string_ostream SStream(StringBuf); + Writer(SStream, Endian).write(Val); + OS.pwrite(StringBuf.data(), StringBuf.size(), Offset); + } +}; + +} // namespace +} // namespace endian +} // namespace support +} // namespace llvm + +DefaultFunctionPruningStrategy::DefaultFunctionPruningStrategy( + SampleProfileMap &ProfileMap, size_t OutputSizeLimit) + : FunctionPruningStrategy(ProfileMap, OutputSizeLimit) { + sortFuncProfiles(ProfileMap, SortedFunctions); +} + +void DefaultFunctionPruningStrategy::Erase(size_t CurrentOutputSize) { + double D = (double)OutputSizeLimit / CurrentOutputSize; + size_t NewSize = (size_t)round(ProfileMap.size() * D * D); + size_t NumToRemove = ProfileMap.size() - NewSize; + if (NumToRemove < 1) + NumToRemove = 1; + + assert(NumToRemove <= SortedFunctions.size()); + llvm::for_each( + llvm::make_range(SortedFunctions.begin() + SortedFunctions.size() - + NumToRemove, + SortedFunctions.end()), + [&](const NameFunctionSamples &E) { ProfileMap.erase(E.first); }); + SortedFunctions.resize(SortedFunctions.size() - NumToRemove); +} + +std::error_code SampleProfileWriter::writeWithSizeLimitInternal( + SampleProfileMap &ProfileMap, size_t OutputSizeLimit, + FunctionPruningStrategy *Strategy) { + if (OutputSizeLimit == 0) + return write(ProfileMap); + + size_t OriginalFunctionCount = ProfileMap.size(); + + SmallVector StringBuffer; + std::unique_ptr BufferStream( + new raw_svector_ostream(StringBuffer)); + OutputStream.swap(BufferStream); + + if (std::error_code EC = write(ProfileMap)) + return EC; + size_t IterationCount = 0; + while (StringBuffer.size() > OutputSizeLimit) { + Strategy->Erase(StringBuffer.size()); + + if (ProfileMap.size() == 0) + return sampleprof_error::too_large; + + StringBuffer.clear(); + OutputStream.reset(new raw_svector_ostream(StringBuffer)); + if (std::error_code EC = write(ProfileMap)) + return EC; + IterationCount++; + } + + OutputStream.swap(BufferStream); + OutputStream->write(StringBuffer.data(), StringBuffer.size()); + LLVM_DEBUG(dbgs() << "Profile originally has " << OriginalFunctionCount + << " functions, reduced to " << ProfileMap.size() << " in " + << IterationCount << " iterations\n"); + return sampleprof_error::success; +} + std::error_code SampleProfileWriter::writeFuncProfiles(const SampleProfileMap &ProfileMap) { std::vector V; @@ -116,6 +204,12 @@ std::error_code SampleProfileWriterExtBinaryBase::write(const SampleProfileMap &ProfileMap) { + // When calling write on a different profile map, existing states should be + // cleared. + NameTable.clear(); + CSNameTable.clear(); + SecHdrTable.clear(); + if (std::error_code EC = writeHeader(ProfileMap)) return EC; @@ -605,14 +699,10 @@ auto &OS = *OutputStream; // Fill the slot remembered by TableOffset with the offset of FuncOffsetTable. - auto &OFS = static_cast(OS); uint64_t FuncOffsetTableStart = OS.tell(); - if (OFS.seek(TableOffset) == (uint64_t)-1) - return sampleprof_error::ostream_seek_unsupported; - support::endian::Writer Writer(*OutputStream, support::little); - Writer.write(FuncOffsetTableStart); - if (OFS.seek(FuncOffsetTableStart) == (uint64_t)-1) - return sampleprof_error::ostream_seek_unsupported; + support::endian::SeekableWriter Writer(static_cast(OS), + support::little); + Writer.pwrite(FuncOffsetTableStart, TableOffset); // Write out the table size. encodeULEB128(FuncOffsetTable.size(), OS); @@ -650,6 +740,10 @@ std::error_code SampleProfileWriterBinary::writeHeader(const SampleProfileMap &ProfileMap) { + // When calling write on a different profile map, existing names should be + // cleared. + NameTable.clear(); + writeMagicIdent(Format); computeSummary(ProfileMap); @@ -690,14 +784,6 @@ } std::error_code SampleProfileWriterExtBinaryBase::writeSecHdrTable() { - auto &OFS = static_cast(*OutputStream); - uint64_t Saved = OutputStream->tell(); - - // Set OutputStream to the location saved in SecHdrTableOffset. - if (OFS.seek(SecHdrTableOffset) == (uint64_t)-1) - return sampleprof_error::ostream_seek_unsupported; - support::endian::Writer Writer(*OutputStream, support::little); - assert(SecHdrTable.size() == SectionHdrLayout.size() && "SecHdrTable entries doesn't match SectionHdrLayout"); SmallVector IndexMap(SecHdrTable.size(), -1); @@ -714,21 +800,23 @@ // needs to be computed after SecLBRProfile (the order in SecHdrTable), // but it needs to be read before SecLBRProfile (the order in // SectionHdrLayout). So we use IndexMap above to switch the order. + support::endian::SeekableWriter Writer( + static_cast(*OutputStream), support::little); for (uint32_t LayoutIdx = 0; LayoutIdx < SectionHdrLayout.size(); LayoutIdx++) { assert(IndexMap[LayoutIdx] < SecHdrTable.size() && "Incorrect LayoutIdx in SecHdrTable"); auto Entry = SecHdrTable[IndexMap[LayoutIdx]]; - Writer.write(static_cast(Entry.Type)); - Writer.write(static_cast(Entry.Flags)); - Writer.write(static_cast(Entry.Offset)); - Writer.write(static_cast(Entry.Size)); + Writer.pwrite(static_cast(Entry.Type), + SecHdrTableOffset + 4 * LayoutIdx * sizeof(uint64_t)); + Writer.pwrite(static_cast(Entry.Flags), + SecHdrTableOffset + (4 * LayoutIdx + 1) * sizeof(uint64_t)); + Writer.pwrite(static_cast(Entry.Offset), + SecHdrTableOffset + (4 * LayoutIdx + 2) * sizeof(uint64_t)); + Writer.pwrite(static_cast(Entry.Size), + SecHdrTableOffset + (4 * LayoutIdx + 3) * sizeof(uint64_t)); } - // Reset OutputStream. - if (OFS.seek(Saved) == (uint64_t)-1) - return sampleprof_error::ostream_seek_unsupported; - return sampleprof_error::success; } diff --git a/llvm/test/tools/llvm-profdata/output-size-limit.test b/llvm/test/tools/llvm-profdata/output-size-limit.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/output-size-limit.test @@ -0,0 +1,119 @@ +Tests for output-size-limit option. Functions with least sample count are dropped. + +1- No effect if output size limit >= original size +RUN: llvm-profdata merge --sample --text --output-size-limit=212 %p/Inputs/sample-profile.proftext | FileCheck %s --check-prefix=TEST_TEXT1 +TEST_TEXT1: main:184019:0 +TEST_TEXT1-NEXT: 4: 534 +TEST_TEXT1-NEXT: 4.2: 534 +TEST_TEXT1-NEXT: 5: 1075 +TEST_TEXT1-NEXT: 5.1: 1075 +TEST_TEXT1-NEXT: 6: 2080 +TEST_TEXT1-NEXT: 7: 534 +TEST_TEXT1-NEXT: 9: 2064 _Z3bari:1471 _Z3fooi:631 +TEST_TEXT1-NEXT: 10: inline1:1000 +TEST_TEXT1-NEXT: 1: 1000 +TEST_TEXT1-NEXT: 10: inline2:2000 +TEST_TEXT1-NEXT: 1: 2000 +TEST_TEXT1-NEXT: _Z3bari:20301:1437 +TEST_TEXT1-NEXT: 1: 1437 +TEST_TEXT1-NEXT: _Z3fooi:7711:610 +TEST_TEXT1-NEXT: 1: 610 + +2- 1 function dropped +RUN: llvm-profdata merge --sample --text --output-size-limit=211 %p/Inputs/sample-profile.proftext | FileCheck %s --check-prefix=TEST_TEXT2 +RUN: llvm-profdata merge --sample --text --output-size-limit=187 %p/Inputs/sample-profile.proftext | FileCheck %s --check-prefix=TEST_TEXT2 +TEST_TEXT2: main:184019:0 +TEST_TEXT2-NEXT: 4: 534 +TEST_TEXT2-NEXT: 4.2: 534 +TEST_TEXT2-NEXT: 5: 1075 +TEST_TEXT2-NEXT: 5.1: 1075 +TEST_TEXT2-NEXT: 6: 2080 +TEST_TEXT2-NEXT: 7: 534 +TEST_TEXT2-NEXT: 9: 2064 _Z3bari:1471 _Z3fooi:631 +TEST_TEXT2-NEXT: 10: inline1:1000 +TEST_TEXT2-NEXT: 1: 1000 +TEST_TEXT2-NEXT: 10: inline2:2000 +TEST_TEXT2-NEXT: 1: 2000 +TEST_TEXT2-NEXT: _Z3bari:20301:1437 +TEST_TEXT2-NEXT: 1: 1437 + +3- 2 functions dropped +RUN: llvm-profdata merge --sample --text --output-size-limit=170 %p/Inputs/sample-profile.proftext | FileCheck %s --check-prefix=TEST_TEXT3 +TEST_TEXT3: main:184019:0 +TEST_TEXT3-NEXT: 4: 534 +TEST_TEXT3-NEXT: 4.2: 534 +TEST_TEXT3-NEXT: 5: 1075 +TEST_TEXT3-NEXT: 5.1: 1075 +TEST_TEXT3-NEXT: 6: 2080 +TEST_TEXT3-NEXT: 7: 534 +TEST_TEXT3-NEXT: 9: 2064 _Z3bari:1471 _Z3fooi:631 +TEST_TEXT3-NEXT: 10: inline1:1000 +TEST_TEXT3-NEXT: 1: 1000 +TEST_TEXT3-NEXT: 10: inline2:2000 +TEST_TEXT3-NEXT: 1: 2000 + +4- All functions dropped, should report an error +RUN: not llvm-profdata merge --sample --text --output-size-limit=158 %p/Inputs/sample-profile.proftext 2>&1 | FileCheck %s --check-prefix=INVALID1 +INVALID1: error: Too much profile data + +5- ExtBinary form, no function dropped. Check output size and file content converted back to text +RUN: llvm-profdata merge --sample --extbinary --output-size-limit=489 %p/Inputs/sample-profile.proftext -o %t.output +RUN: test $(stat -c %%s %t.output) -le 489 +RUN: llvm-profdata merge --sample --text %t.output | FileCheck %s --check-prefix=TEST_EXTBINARY1 +TEST_EXTBINARY1: main:184019:0 +TEST_EXTBINARY1-NEXT: 4: 534 +TEST_EXTBINARY1-NEXT: 4.2: 534 +TEST_EXTBINARY1-NEXT: 5: 1075 +TEST_EXTBINARY1-NEXT: 5.1: 1075 +TEST_EXTBINARY1-NEXT: 6: 2080 +TEST_EXTBINARY1-NEXT: 7: 534 +TEST_EXTBINARY1-NEXT: 9: 2064 _Z3bari:1471 _Z3fooi:631 +TEST_EXTBINARY1-NEXT: 10: inline1:1000 +TEST_EXTBINARY1-NEXT: 1: 1000 +TEST_EXTBINARY1-NEXT: 10: inline2:2000 +TEST_EXTBINARY1-NEXT: 1: 2000 +TEST_EXTBINARY1-NEXT: _Z3bari:20301:1437 +TEST_EXTBINARY1-NEXT: 1: 1437 +TEST_EXTBINARY1-NEXT: _Z3fooi:7711:610 +TEST_EXTBINARY1-NEXT: 1: 610 + +6- ExtBinary form, 1 function dropped +RUN: llvm-profdata merge --sample --extbinary --output-size-limit=488 %p/Inputs/sample-profile.proftext -o %t.output +RUN: test $(stat -c %%s %t.output) -le 488 +RUN: llvm-profdata merge --sample --text %t.output | FileCheck %s --check-prefix=TEST_EXTBINARY2 +TEST_EXTBINARY2: main:184019:0 +TEST_EXTBINARY2-NEXT: 4: 534 +TEST_EXTBINARY2-NEXT: 4.2: 534 +TEST_EXTBINARY2-NEXT: 5: 1075 +TEST_EXTBINARY2-NEXT: 5.1: 1075 +TEST_EXTBINARY2-NEXT: 6: 2080 +TEST_EXTBINARY2-NEXT: 7: 534 +TEST_EXTBINARY2-NEXT: 9: 2064 _Z3bari:1471 _Z3fooi:631 +TEST_EXTBINARY2-NEXT: 10: inline1:1000 +TEST_EXTBINARY2-NEXT: 1: 1000 +TEST_EXTBINARY2-NEXT: 10: inline2:2000 +TEST_EXTBINARY2-NEXT: 1: 2000 +TEST_EXTBINARY2-NEXT: _Z3bari:20301:1437 +TEST_EXTBINARY2-NEXT: 1: 1437 + +7- ExtBinary form, 2 functions dropped +RUN: llvm-profdata merge --sample --extbinary --output-size-limit=474 %p/Inputs/sample-profile.proftext -o %t.output +RUN: test $(stat -c %%s %t.output) -le 474 +RUN: llvm-profdata merge --sample --text %t.output | FileCheck %s --check-prefix=TEST_EXTBINARY3 +TEST_EXTBINARY3: main:184019:0 +TEST_EXTBINARY3-NEXT: 4: 534 +TEST_EXTBINARY3-NEXT: 4.2: 534 +TEST_EXTBINARY3-NEXT: 5: 1075 +TEST_EXTBINARY3-NEXT: 5.1: 1075 +TEST_EXTBINARY3-NEXT: 6: 2080 +TEST_EXTBINARY3-NEXT: 7: 534 +TEST_EXTBINARY3-NEXT: 9: 2064 _Z3bari:1471 _Z3fooi:631 +TEST_EXTBINARY3-NEXT: 10: inline1:1000 +TEST_EXTBINARY3-NEXT: 1: 1000 +TEST_EXTBINARY3-NEXT: 10: inline2:2000 +TEST_EXTBINARY3-NEXT: 1: 2000 + +8- ExtBinary form, all functions dropped +RUN: not llvm-profdata merge --sample --extbinary --output-size-limit=400 %p/Inputs/sample-profile.proftext 2>&1 | FileCheck %s --check-prefix=INVALID2 +INVALID2: error: Too much profile data + diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -960,7 +960,7 @@ bool UseMD5, bool GenPartialProfile, bool GenCSNestedProfile, bool SampleMergeColdContext, bool SampleTrimColdContext, bool SampleColdContextFrameDepth, FailureMode FailMode, - bool DropProfileSymbolList) { + bool DropProfileSymbolList, size_t OutputSizeLimit) { using namespace sampleprof; SampleProfileMap ProfileMap; SmallVector, 5> Readers; @@ -1042,6 +1042,13 @@ ProfileIsCS = FunctionSamples::ProfileIsCS = false; } + // If limiting the output size, write to a string buffer first, and drop + // functions if the output size exceeds limit. This iterates multiple times + // until the limit is satisfied. + SmallVector StringBuffer; + std::unique_ptr BufferStream( + new raw_svector_ostream(StringBuffer)); + auto WriterOrErr = SampleProfileWriter::create(OutputFilename, FormatMap[OutputFormat]); if (std::error_code EC = WriterOrErr.getError()) @@ -1053,7 +1060,9 @@ auto Buffer = getInputFileBuf(ProfileSymbolListFile); handleExtBinaryWriter(*Writer, OutputFormat, Buffer.get(), WriterList, CompressAllSections, UseMD5, GenPartialProfile); - if (std::error_code EC = Writer->write(ProfileMap)) + + if (std::error_code EC = + Writer->writeWithSizeLimit(ProfileMap, OutputSizeLimit)) exitWithErrorCode(std::move(EC)); } @@ -1196,6 +1205,11 @@ "sample-frame-depth-for-cold-context", cl::init(1), cl::desc("Keep the last K frames while merging cold profile. 1 means the " "context-less base profile")); + cl::opt OutputSizeLimit( + "output-size-limit", cl::init(0), cl::Hidden, + cl::desc("Trim cold functions until profile size is below specified " + "limit in bytes. This uses a heursitic and functions may be " + "excessively trimmed")); cl::opt GenPartialProfile( "gen-partial-profile", cl::init(false), cl::Hidden, cl::desc("Generate a partial profile (only meaningful for -extbinary)")); @@ -1282,7 +1296,8 @@ WeightedInputs, Remapper.get(), OutputFilename, OutputFormat, ProfileSymbolListFile, CompressAllSections, UseMD5, GenPartialProfile, GenCSNestedProfile, SampleMergeColdContext, SampleTrimColdContext, - SampleColdContextFrameDepth, FailureMode, DropProfileSymbolList); + SampleColdContextFrameDepth, FailureMode, DropProfileSymbolList, + OutputSizeLimit); return 0; }