Index: include/llvm/ProfileData/InstrProfReader.h =================================================================== --- include/llvm/ProfileData/InstrProfReader.h +++ include/llvm/ProfileData/InstrProfReader.h @@ -348,6 +348,9 @@ using OnDiskHashTableImplV3 = OnDiskIterableChainedHashTable; +template +class InstrProfReaderItaniumRemapper; + template class InstrProfReaderIndex : public InstrProfReaderIndexBase { private: @@ -355,6 +358,8 @@ typename HashTableImpl::data_iterator RecordIterator; uint64_t FormatVersion; + friend class InstrProfReaderItaniumRemapper; + public: InstrProfReaderIndex(const unsigned char *Buckets, const unsigned char *const Payload, @@ -386,13 +391,26 @@ } }; +/// Name matcher supporting fuzzy matching of symbol names to names in profiles. +class InstrProfReaderRemapper { +public: + virtual ~InstrProfReaderRemapper() {} + virtual Error populateRemappings() { return Error::success(); } + virtual Error getRecords(StringRef FuncName, + ArrayRef &Data) = 0; +}; + /// Reader for the indexed binary instrprof format. class IndexedInstrProfReader : public InstrProfReader { private: /// The profile data file contents. std::unique_ptr DataBuffer; + /// The profile remapping file contents. + std::unique_ptr RemappingBuffer; /// The index into the profile data. std::unique_ptr Index; + /// The profile remapping file contents. + std::unique_ptr Remapper; /// Profile summary data. std::unique_ptr Summary; // Index to the current record in the record array. @@ -404,8 +422,11 @@ const unsigned char *Cur); public: - IndexedInstrProfReader(std::unique_ptr DataBuffer) - : DataBuffer(std::move(DataBuffer)), RecordIndex(0) {} + IndexedInstrProfReader( + std::unique_ptr DataBuffer, + std::unique_ptr RemappingBuffer = nullptr) + : DataBuffer(std::move(DataBuffer)), + RemappingBuffer(std::move(RemappingBuffer)), RecordIndex(0) {} IndexedInstrProfReader(const IndexedInstrProfReader &) = delete; IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete; @@ -434,10 +455,11 @@ /// Factory method to create an indexed reader. static Expected> - create(const Twine &Path); + create(const Twine &Path, const Twine &RemappingPath = ""); static Expected> - create(std::unique_ptr Buffer); + create(std::unique_ptr Buffer, + std::unique_ptr RemappingBuffer = nullptr); // Used for testing purpose only. void setValueProfDataEndianness(support::endianness Endianness) { Index: lib/ProfileData/InstrProfReader.cpp =================================================================== --- lib/ProfileData/InstrProfReader.cpp +++ lib/ProfileData/InstrProfReader.cpp @@ -14,6 +14,7 @@ #include "llvm/ProfileData/InstrProfReader.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/ProfileSummary.h" @@ -23,6 +24,7 @@ #include "llvm/Support/Error.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SymbolRemappingReader.h" #include "llvm/Support/SwapByteOrder.h" #include #include @@ -88,16 +90,29 @@ } Expected> -IndexedInstrProfReader::create(const Twine &Path) { +IndexedInstrProfReader::create(const Twine &Path, const Twine &RemappingPath) { // Set up the buffer to read. auto BufferOrError = setupMemoryBuffer(Path); if (Error E = BufferOrError.takeError()) return std::move(E); - return IndexedInstrProfReader::create(std::move(BufferOrError.get())); + + // Set up the remapping buffer if requested. + std::unique_ptr RemappingBuffer; + std::string RemappingPathStr = RemappingPath.str(); + if (!RemappingPathStr.empty()) { + auto RemappingBufferOrError = setupMemoryBuffer(RemappingPathStr); + if (Error E = RemappingBufferOrError.takeError()) + return std::move(E); + RemappingBuffer = std::move(RemappingBufferOrError.get()); + } + + return IndexedInstrProfReader::create(std::move(BufferOrError.get()), + std::move(RemappingBuffer)); } Expected> -IndexedInstrProfReader::create(std::unique_ptr Buffer) { +IndexedInstrProfReader::create(std::unique_ptr Buffer, + std::unique_ptr RemappingBuffer) { // Sanity check the buffer. if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits::max()) return make_error(instrprof_error::too_large); @@ -105,7 +120,8 @@ // Create the reader. if (!IndexedInstrProfReader::hasFormat(*Buffer)) return make_error(instrprof_error::bad_magic); - auto Result = llvm::make_unique(std::move(Buffer)); + auto Result = llvm::make_unique( + std::move(Buffer), std::move(RemappingBuffer)); // Initialize the reader and return the result. if (Error E = initializeReader(*Result)) @@ -587,6 +603,124 @@ RecordIterator = HashTable->data_begin(); } +namespace { +/// A remapper that does not apply any remappings. +class InstrProfReaderNullRemapper : public InstrProfReaderRemapper { + InstrProfReaderIndexBase &Underlying; + +public: + InstrProfReaderNullRemapper(InstrProfReaderIndexBase &Underlying) + : Underlying(Underlying) {} + + Error getRecords(StringRef FuncName, + ArrayRef &Data) override { + return Underlying.getRecords(FuncName, Data); + } +}; +} + +/// A remapper that applies remappings based on a symbol remapping file. +template +class llvm::InstrProfReaderItaniumRemapper + : public InstrProfReaderRemapper { +public: + InstrProfReaderItaniumRemapper( + std::unique_ptr RemapBuffer, + InstrProfReaderIndex &Underlying) + : RemapBuffer(std::move(RemapBuffer)), Underlying(Underlying) { + } + + /// Extract the original function name from a PGO function name. + static StringRef extractName(StringRef Name) { + // We can have multiple :-separated pieces; there can be pieces both + // before and after the mangled name. Find the first part that starts + // with '_Z'; we'll assume that's the mangled name we want. + std::pair Parts = {StringRef(), Name}; + while (true) { + Parts = Parts.second.split(':'); + if (Parts.first.startswith("_Z")) + return Parts.first; + if (Parts.second.empty()) + return Name; + } + } + + /// Given a mangled name extracted from a PGO function name, and a new + /// form for that mangled name, reconstitute the name. + static void reconstituteName(StringRef OrigName, StringRef ExtractedName, + StringRef Replacement, + SmallVectorImpl &Out) { + Out.reserve(OrigName.size() + Replacement.size() - ExtractedName.size()); + Out.insert(Out.end(), OrigName.begin(), ExtractedName.begin()); + Out.insert(Out.end(), Replacement.begin(), Replacement.end()); + Out.insert(Out.end(), ExtractedName.end(), OrigName.end()); + } + + Error populateRemappings() override { + if (Error E = Remappings.read(*RemapBuffer)) + return E; + for (StringRef Name : Underlying.HashTable->keys()) { + StringRef RealName = extractName(Name); + if (auto Key = Remappings.insert(RealName)) { + // FIXME: We could theoretically map the same equivalence class to + // multiple names in the profile data. If that happens, we should + // return NamedInstrProfRecords from all of them. + MappedNames.insert({Key, RealName}); + } + } + return Error::success(); + } + + Error getRecords(StringRef FuncName, + ArrayRef &Data) override { + StringRef RealName = extractName(FuncName); + if (auto Key = Remappings.lookup(RealName)) { + StringRef Remapped = MappedNames.lookup(Key); + if (!Remapped.empty()) { + if (RealName.begin() == FuncName.begin() && + RealName.end() == FuncName.end()) + FuncName = Remapped; + else { + // Try rebuilding the name from the given remapping. + SmallString<256> Reconstituted; + reconstituteName(FuncName, RealName, Remapped, Reconstituted); + Error E = Underlying.getRecords(Reconstituted, Data); + if (!E) + return E; + + // If we failed because the name doesn't exist, fall back to asking + // about the original name. + if (Error Unhandled = handleErrors( + std::move(E), [](std::unique_ptr Err) { + return Err->get() == instrprof_error::unknown_function + ? Error::success() + : Error(std::move(Err)); + })) + return Unhandled; + } + } + } + return Underlying.getRecords(FuncName, Data); + } + +private: + /// The memory buffer containing the remapping configuration. Remappings + /// holds pointers into this buffer. + std::unique_ptr RemapBuffer; + + /// The mangling remapper. + SymbolRemappingReader Remappings; + + /// Mapping from mangled name keys to the name used for the key in the + /// profile data. + /// FIXME: Can we store a location within the on-disk hash table instead of + /// redoing lookup? + DenseMap MappedNames; + + /// The real profile data reader. + InstrProfReaderIndex &Underlying; +}; + bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) { using namespace support; @@ -683,10 +817,22 @@ uint64_t HashOffset = endian::byte_swap(Header->HashOffset); // The rest of the file is an on disk hash table. - InstrProfReaderIndexBase *IndexPtr = nullptr; - IndexPtr = new InstrProfReaderIndex( - Start + HashOffset, Cur, Start, HashType, FormatVersion); - Index.reset(IndexPtr); + auto IndexPtr = + llvm::make_unique>( + Start + HashOffset, Cur, Start, HashType, FormatVersion); + + // Load the remapping table now if requested. + if (RemappingBuffer) { + Remapper = llvm::make_unique< + InstrProfReaderItaniumRemapper>( + std::move(RemappingBuffer), *IndexPtr); + if (Error E = Remapper->populateRemappings()) + return E; + } else { + Remapper = llvm::make_unique(*IndexPtr); + } + Index = std::move(IndexPtr); + return success(); } @@ -707,7 +853,7 @@ IndexedInstrProfReader::getInstrProfRecord(StringRef FuncName, uint64_t FuncHash) { ArrayRef Data; - Error Err = Index->getRecords(FuncName, Data); + Error Err = Remapper->getRecords(FuncName, Data); if (Err) return std::move(Err); // Found it. Look for counters with the right hash. Index: unittests/ProfileData/InstrProfTest.cpp =================================================================== --- unittests/ProfileData/InstrProfTest.cpp +++ unittests/ProfileData/InstrProfTest.cpp @@ -42,8 +42,10 @@ void SetUp() { Writer.setOutputSparse(false); } - void readProfile(std::unique_ptr Profile) { - auto ReaderOrErr = IndexedInstrProfReader::create(std::move(Profile)); + void readProfile(std::unique_ptr Profile, + std::unique_ptr Remapping = nullptr) { + auto ReaderOrErr = IndexedInstrProfReader::create(std::move(Profile), + std::move(Remapping)); EXPECT_THAT_ERROR(ReaderOrErr.takeError(), Succeeded()); Reader = std::move(ReaderOrErr.get()); } @@ -990,6 +992,44 @@ } } +TEST_P(MaybeSparseInstrProfTest, remapping_test) { + Writer.addRecord({"_Z3fooi", 0x1234, {1, 2, 3, 4}}, Err); + Writer.addRecord({"file:_Z3barf", 0x567, {5, 6, 7}}, Err); + auto Profile = Writer.writeBuffer(); + readProfile(std::move(Profile), llvm::MemoryBuffer::getMemBuffer(R"( + type i l + name 3bar 4quux + )")); + + std::vector Counts; + for (StringRef FooName : {"_Z3fooi", "_Z3fool"}) { + EXPECT_THAT_ERROR(Reader->getFunctionCounts(FooName, 0x1234, Counts), + Succeeded()); + ASSERT_EQ(4u, Counts.size()); + EXPECT_EQ(1u, Counts[0]); + EXPECT_EQ(2u, Counts[1]); + EXPECT_EQ(3u, Counts[2]); + EXPECT_EQ(4u, Counts[3]); + } + + for (StringRef BarName : {"file:_Z3barf", "file:_Z4quuxf"}) { + EXPECT_THAT_ERROR(Reader->getFunctionCounts(BarName, 0x567, Counts), + Succeeded()); + ASSERT_EQ(3u, Counts.size()); + EXPECT_EQ(5u, Counts[0]); + EXPECT_EQ(6u, Counts[1]); + EXPECT_EQ(7u, Counts[2]); + } + + for (StringRef BadName : {"_Z3foof", "_Z4quuxi", "_Z3barl", "", "_ZZZ", + "_Z3barf", "otherfile:_Z4quuxf"}) { + EXPECT_THAT_ERROR(Reader->getFunctionCounts(BadName, 0x1234, Counts), + Failed()); + EXPECT_THAT_ERROR(Reader->getFunctionCounts(BadName, 0x567, Counts), + Failed()); + } +} + TEST_F(SparseInstrProfTest, preserve_no_records) { Writer.addRecord({"foo", 0x1234, {0}}, Err); Writer.addRecord({"bar", 0x4321, {0, 0}}, Err);