diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h --- a/llvm/include/llvm/ProfileData/SampleProfReader.h +++ b/llvm/include/llvm/ProfileData/SampleProfReader.h @@ -510,6 +510,8 @@ void setModule(const Module *Mod) { M = Mod; } protected: + BumpPtrAllocator Allocator; + /// Map every function to its associated profile. /// /// The profile of every function executed at runtime is collected @@ -655,6 +657,13 @@ /// Read the whole name table with ULEB128 encoded MD5 values. std::error_code readMD5NameTable(); + /// Set MD5StringBuf size to be allocated for the whole MD5 name table. + /// uint64_t::max() == 18446744073709551616 has 20 bytes. + void SetMD5StringBufSize(size_t N) { MD5StringBufSize = 21 * N; } + + /// Materialize a string from MD5 value into MD5StringBuf. + StringRef allocateMD5String(uint64_t FID); + /// Read a string indirectly via the name table. ErrorOr readStringFromTable(); @@ -709,10 +718,17 @@ /// The uint64_t data has to be converted to a string and then the string /// will be used to initialize StringRef in NameTable. /// Note NameTable contains StringRef so it needs another buffer to own - /// the string data. MD5StringBuf serves as the string buffer that is - /// referenced by NameTable (vector of StringRef). We make sure - /// the lifetime of MD5StringBuf is not shorter than that of NameTable. - std::vector MD5StringBuf; + /// the string data. MD5StringBuf is a pointer to an allocated slab from + /// Allocator that serves as the char buffer referenced by NameTable. + /// It is incremented to the next writable position every time a string is + /// allocated, or a new slab is allocated if it exceeds MD5StringBufLimit. + char *MD5StringBuf = reinterpret_cast(1); + + /// End of the current MD5 string buffer. + const char *MD5StringBufLimit = nullptr; + + /// Size of MD5 string buffer per allocation. + size_t MD5StringBufSize; /// The starting address of NameTable containing fixed length MD5. const uint8_t *MD5NameMemStart = nullptr; @@ -774,9 +790,6 @@ const uint64_t SecSize, const uint8_t *&DecompressBuf, uint64_t &DecompressBufSize); - - BumpPtrAllocator Allocator; - protected: std::vector SecHdrTable; std::error_code readSecHdrTableEntry(uint64_t Idx); diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -525,6 +525,20 @@ return *Idx; } +StringRef SampleProfileReaderBinary::allocateMD5String(uint64_t FID) { + if (MD5StringBuf > MD5StringBufLimit) { + // uint64_t::max() == 18446744073709551616 has 20 bytes. The limit is + // reduced by 21 so that the next write always have enough space. + size_t AllocSize = std::max(MD5StringBufSize, (size_t) 4096); + MD5StringBuf = Allocator.Allocate(AllocSize); + MD5StringBufLimit = MD5StringBuf + AllocSize - 21; + } + char *Start = MD5StringBuf; + size_t N = sprintf(MD5StringBuf, "%llu", (unsigned long long) FID); + MD5StringBuf += N + 1; + return StringRef(Start, N); +} + ErrorOr SampleProfileReaderBinary::readStringFromTable() { auto Idx = readStringIndex(NameTable); if (std::error_code EC = Idx.getError()) @@ -538,7 +552,7 @@ using namespace support; uint64_t FID = endian::read( MD5NameMemStart + (*Idx) * sizeof(uint64_t)); - SR = MD5StringBuf.emplace_back(std::to_string(FID)); + SR = allocateMD5String(FID); } return SR; } @@ -1037,8 +1051,7 @@ // because optimization passes can only handle either type. bool UseMD5 = useMD5(); if (UseMD5) - MD5StringBuf.reserve(MD5StringBuf.size() + *Size); - + SetMD5StringBufSize(*Size); NameTable.clear(); NameTable.reserve(*Size); for (size_t I = 0; I < *Size; ++I) { @@ -1047,7 +1060,7 @@ return EC; if (UseMD5) { uint64_t MD5 = MD5Hash(*Name); - NameTable.emplace_back(MD5StringBuf.emplace_back(std::to_string(MD5))); + NameTable.emplace_back(allocateMD5String(MD5)); } else NameTable.push_back(*Name); } @@ -1060,14 +1073,14 @@ if (std::error_code EC = Size.getError()) return EC; - MD5StringBuf.reserve(MD5StringBuf.size() + *Size); + SetMD5StringBufSize(*Size); NameTable.clear(); NameTable.reserve(*Size); for (size_t I = 0; I < *Size; ++I) { auto FID = readNumber(); if (std::error_code EC = FID.getError()) return EC; - NameTable.emplace_back(MD5StringBuf.emplace_back(std::to_string(*FID))); + NameTable.emplace_back(allocateMD5String(*FID)); } return sampleprof_error::success; } @@ -1096,10 +1109,23 @@ // index has been read before by checking whether the element in the // NameTable is empty, meanwhile readStringIndex can do the boundary // check using the size of NameTable. + SetMD5StringBufSize(*Size); NameTable.clear(); - NameTable.resize(*Size); - MD5NameMemStart = Data; - Data = Data + (*Size) * sizeof(uint64_t); + // If in tools mode, every profile is read so every name is read, loading them + // all now. + if (!M) { + NameTable.reserve(*Size); + for (size_t I = 0; I < *Size; I++) { + using namespace support; + uint64_t FID = endian::read(Data); + NameTable.emplace_back(allocateMD5String(FID)); + Data += sizeof(uint64_t); + } + } else { + NameTable.resize(*Size); + MD5NameMemStart = Data; + Data = Data + (*Size) * sizeof(uint64_t); + } return sampleprof_error::success; }