Index: include/llvm/ProfileData/InstrProfWriter.h =================================================================== --- include/llvm/ProfileData/InstrProfWriter.h +++ include/llvm/ProfileData/InstrProfWriter.h @@ -47,6 +47,8 @@ /// for this function and the hash and number of counts match, each counter is /// summed. Optionally scale counts by \p Weight. Error addRecord(InstrProfRecord &&I, uint64_t Weight = 1); + /// Merge existing function counts from the given writer. + Error mergeRecordsFromWriter(InstrProfWriter &&IPW); /// Write the profile to \c OS void write(raw_fd_ostream &OS); /// Write the profile in text format to \c OS Index: lib/ProfileData/InstrProfWriter.cpp =================================================================== --- lib/ProfileData/InstrProfWriter.cpp +++ lib/ProfileData/InstrProfWriter.cpp @@ -182,6 +182,14 @@ return Dest.takeError(); } +Error InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW) { + for (auto &I : IPW.FunctionData) + for (auto &Func : I.getValue()) + if (Error E = addRecord(std::move(Func.second), 1)) + return E; + return Error::success(); +} + bool InstrProfWriter::shouldEncodeData(const ProfilingData &PD) { if (!Sparse) return true; Index: test/tools/llvm-profdata/multiple-inputs.test =================================================================== --- test/tools/llvm-profdata/multiple-inputs.test +++ test/tools/llvm-profdata/multiple-inputs.test @@ -51,3 +51,17 @@ DISJOINT: Total functions: 2 DISJOINT: Maximum function count: 1 DISJOINT: Maximum internal block count: 3 + +RUN: llvm-profdata merge %p/Inputs/foo3-1.proftext %p/Inputs/foo3-1.proftext \ +RUN: %p/Inputs/foo3-1.proftext %p/Inputs/foo3-1.proftext \ +RUN: %p/Inputs/foo3-1.proftext %p/Inputs/foo3-1.proftext \ +RUN: %p/Inputs/foo3-1.proftext %p/Inputs/foo3-1.proftext \ +RUN: %p/Inputs/foo3-1.proftext %p/Inputs/foo3-1.proftext -o %t +RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO10 +FOO10: foo: +FOO10: Counters: 3 +FOO10: Function count: 10 +FOO10: Block counts: [20, 30] +FOO10: Total functions: 1 +FOO10: Maximum function count: 10 +FOO10: Maximum internal block count: 30 Index: tools/llvm-profdata/llvm-profdata.cpp =================================================================== --- tools/llvm-profdata/llvm-profdata.cpp +++ tools/llvm-profdata/llvm-profdata.cpp @@ -29,6 +29,7 @@ #include "llvm/Support/Path.h" #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/Signals.h" +#include "llvm/Support/ThreadPool.h" #include "llvm/Support/raw_ostream.h" #include @@ -131,30 +132,93 @@ if (EC) exitWithErrorCode(EC, OutputFilename); - InstrProfWriter Writer(OutputSparse); + // Let n = Inputs.size(), t = NumThreads. The number of serial merge + // operations is given by (t + n/t), which is minimized by t = sqrt(n). + unsigned NumUsefulThreads = + std::max(1U, std::min(std::thread::hardware_concurrency(), + unsigned(std::sqrt(Inputs.size())))); + + // Each merge thread has a context object which stores a mutex, a writer, and + // an Error object (for deferred error reporting). + struct WriterContext { + std::mutex Lock; + InstrProfWriter Writer; + Error Err; + StringRef ErrWhence; + + WriterContext(bool IsSparse) + : Lock(), Writer(IsSparse), Err(Error::success()), ErrWhence("") {} + }; + + // Initialize the writer contexts. + SmallVector, 4> Contexts; + for (unsigned I = 0; I < NumUsefulThreads; ++I) + Contexts.emplace_back(llvm::make_unique(OutputSparse)); + + std::mutex ErrorLock; SmallSet WriterErrorCodes; - for (const auto &Input : Inputs) { + + auto mergeInput = [&ErrorLock, &WriterErrorCodes](const WeightedFile &Input, + WriterContext *WC) { + std::unique_lock CtxLock{WC->Lock}; + + // If there's a pending hard error, don't do more work. + if (WC->Err) + return; + + WC->ErrWhence = Input.Filename; + auto ReaderOrErr = InstrProfReader::create(Input.Filename); - if (Error E = ReaderOrErr.takeError()) - exitWithError(std::move(E), Input.Filename); + if ((WC->Err = ReaderOrErr.takeError())) + return; auto Reader = std::move(ReaderOrErr.get()); bool IsIRProfile = Reader->isIRLevelProfile(); - if (Writer.setIsIRLevelProfile(IsIRProfile)) - exitWithError("Merge IR generated profile with Clang generated profile."); + if (WC->Writer.setIsIRLevelProfile(IsIRProfile)) { + WC->Err = make_error( + "Merge IR generated profile with Clang generated profile.", + std::error_code()); + return; + } for (auto &I : *Reader) { - if (Error E = Writer.addRecord(std::move(I), Input.Weight)) { + if (Error E = WC->Writer.addRecord(std::move(I), Input.Weight)) { // Only show hint the first time an error occurs. instrprof_error IPE = InstrProfError::take(std::move(E)); + std::unique_lock ErrorGuard{ErrorLock}; bool firstTime = WriterErrorCodes.insert(IPE).second; handleMergeWriterError(make_error(IPE), Input.Filename, I.Name, firstTime); } } if (Reader->hasError()) - exitWithError(Reader->getError(), Input.Filename); + WC->Err = Reader->getError(); + }; + + if (NumUsefulThreads == 1) { + for (const auto &Input : Inputs) + mergeInput(Input, Contexts[0].get()); + } else { + ThreadPool Pool(NumUsefulThreads); + unsigned Ctx = 0; + for (const auto &Input : Inputs) { + Pool.async(mergeInput, Input, Contexts[Ctx].get()); + Ctx = (Ctx + 1) % NumUsefulThreads; + } + Pool.wait(); } + + // Handle deferred hard errors encountered during merging. + for (std::unique_ptr &WC : Contexts) + if (WC->Err) + exitWithError(std::move(WC->Err), WC->ErrWhence); + + // Merge the writers together. + InstrProfWriter &Writer = Contexts[0]->Writer; + for (unsigned I = 1; I < NumUsefulThreads; ++I) + if (Error E = Writer.mergeRecordsFromWriter(std::move(Contexts[I]->Writer))) + exitWithError(std::move(E)); + if (OutputFormat == PF_Text) Writer.writeText(Output); else Index: unittests/ProfileData/InstrProfTest.cpp =================================================================== --- unittests/ProfileData/InstrProfTest.cpp +++ unittests/ProfileData/InstrProfTest.cpp @@ -204,6 +204,31 @@ delete PSFromMD; } +TEST_F(InstrProfTest, test_writer_merge) { + InstrProfRecord Record1("func1", 0x1234, {42}); + NoError(Writer.addRecord(std::move(Record1))); + + InstrProfWriter Writer2; + InstrProfRecord Record2("func2", 0x1234, {0, 0}); + NoError(Writer2.addRecord(std::move(Record2))); + + NoError(Writer.mergeRecordsFromWriter(std::move(Writer2))); + + auto Profile = Writer.writeBuffer(); + readProfile(std::move(Profile)); + + Expected R = Reader->getInstrProfRecord("func1", 0x1234); + ASSERT_TRUE(NoError(R.takeError())); + ASSERT_EQ(1U, R->Counts.size()); + ASSERT_EQ(42U, R->Counts[0]); + + R = Reader->getInstrProfRecord("func2", 0x1234); + ASSERT_TRUE(NoError(R.takeError())); + ASSERT_EQ(2U, R->Counts.size()); + ASSERT_EQ(0U, R->Counts[0]); + ASSERT_EQ(0U, R->Counts[1]); +} + static const char callee1[] = "callee1"; static const char callee2[] = "callee2"; static const char callee3[] = "callee3";