Index: docs/CoverageMappingFormat.rst =================================================================== --- docs/CoverageMappingFormat.rst +++ docs/CoverageMappingFormat.rst @@ -260,20 +260,31 @@ i32 20, ; The length of the string that contains the encoded coverage mapping data i32 0, ; Coverage mapping format version }, - [2 x { i8*, i32, i32, i64 }] [ ; Function records - { i8*, i32, i32, i64 } { i8* getelementptr inbounds ([3 x i8]* @__profn_foo, i32 0, i32 0), ; Function's name - i32 3, ; Function's name length + [2 x { i64, i32, i64 }] [ ; Function records + { i64, i32, i64 } { + i64 0x5cf8c24cdb18bdac, ; Function's name MD5 i32 9, ; Function's encoded coverage mapping data string length i64 0 ; Function's structural hash }, - { i8*, i32, i32, i64 } { i8* getelementptr inbounds ([3 x i8]* @__profn_bar, i32 0, i32 0), ; Function's name - i32 3, ; Function's name length + { i64, i32, i64 } { + i64 0xe413754a191db537, ; Function's name MD5 i32 9, ; Function's encoded coverage mapping data string length i64 0 ; Function's structural hash }], [40 x i8] c"..." ; Encoded data (dissected later) }, section "__llvm_covmap", align 8 +The function record layout has evolved since version 1. In version 1, the function record for *foo* is defined as follows: + +.. code-block:: llvm + + { i8*, i32, i32, i64 } { i8* getelementptr inbounds ([3 x i8]* @__profn_foo, i32 0, i32 0), ; Function's name + i32 3, ; Function's name length + i32 9, ; Function's encoded coverage mapping data string length + i64 0 ; Function's structural hash + } + + Coverage Mapping Header: ------------------------ @@ -296,11 +307,10 @@ .. code-block:: llvm - { i8*, i32, i32, i64 } + { i64, i32, i64 } -It contains the pointer to the function's name, function's name length, -the length of the encoded mapping data for that function, and function's -hash value. +It contains function name's MD5, the length of the encoded mapping data for that function, and function's +structural hash value. Encoded data: ------------- Index: include/llvm/ProfileData/CoverageMapping.h =================================================================== --- include/llvm/ProfileData/CoverageMapping.h +++ include/llvm/ProfileData/CoverageMapping.h @@ -488,9 +488,11 @@ // [ArrayEnd] // [Encoded Region Mapping Data] LLVM_PACKED_START -template struct CovMapFunctionRecord { +template struct CovMapFunctionRecordV1 { +#define COVMAP_V1 #define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) Type Name; #include "llvm/ProfileData/InstrProfData.inc" +#undef COVMAP_V1 // Return the structural hash associated with the function. template uint64_t getFuncHash() const { @@ -516,6 +518,33 @@ return std::error_code(); } }; + +template struct CovMapFunctionRecord { +#define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) Type Name; +#include "llvm/ProfileData/InstrProfData.inc" + + // Return the structural hash associated with the function. + template uint64_t getFuncHash() const { + return support::endian::byte_swap(FuncHash); + } + // Return the coverage map data size for the funciton. + template uint32_t getDataSize() const { + return support::endian::byte_swap(DataSize); + } + // Return function lookup key. The value is consider opaque. + template uint64_t getFuncNameRef() const { + return support::endian::byte_swap(NameRef); + } + // Return the PGO name of the function */ + template + std::error_code getFuncName(InstrProfSymtab &ProfileNames, + StringRef &FuncName) const { + IntPtrT NameRef = getFuncNameRef(); + FuncName = ProfileNames.getFuncName(NameRef); + return std::error_code(); + } +}; + // Per module coverage mapping data header, i.e. CoverageMapFileHeader // documented above. struct CovMapHeader { @@ -539,12 +568,21 @@ enum CovMapVersion { Version1 = 0, - // The current version is Version1 + // Function's name reference from CovMapFuncRecord is changed from raw + // name string pointer to MD5 to support name section compression. Name + // section is also compressed. + Version2 = 1, + // The current version is Version2 CurrentVersion = INSTR_PROF_COVMAP_VERSION }; template struct CovMapTraits { typedef CovMapFunctionRecord CovMapFuncRecordType; + typedef uint64_t NameRefType; +}; + +template struct CovMapTraits { + typedef CovMapFunctionRecordV1 CovMapFuncRecordType; typedef IntPtrT NameRefType; }; Index: include/llvm/ProfileData/InstrProf.h =================================================================== --- include/llvm/ProfileData/InstrProf.h +++ include/llvm/ProfileData/InstrProf.h @@ -83,6 +83,12 @@ /// associated with a COMDAT function. inline StringRef getInstrProfComdatPrefix() { return "__profv_"; } +/// Return the name of the variable holding the strings (possibly compressed) +/// of all function's PGO names. +inline StringRef getInstrProfNamesVarName() { + return "__llvm_prf_nm"; +} + /// Return the name of a covarage mapping variable (internal linkage) /// for each instrumented source module. Such variables are allocated /// in the __llvm_covmap section. @@ -707,6 +713,11 @@ namespace RawInstrProf { +// Version 1: First version +// Version 2: Added value profile data section. Per-function control data +// struct has more fields to describe value profile information. +// Version 3: Compressed name section support. Function PGO name reference +// from control data struct is changed from raw pointer to Name's MD5 value. const uint64_t Version = INSTR_PROF_RAW_VERSION; template inline uint64_t getMagic(); Index: include/llvm/ProfileData/InstrProfData.inc =================================================================== --- include/llvm/ProfileData/InstrProfData.inc +++ include/llvm/ProfileData/InstrProfData.inc @@ -64,17 +64,12 @@ #else #define INSTR_PROF_DATA_DEFINED #endif - -INSTR_PROF_DATA(const uint32_t, llvm::Type::getInt32Ty(Ctx), NameSize, \ - ConstantInt::get(llvm::Type::getInt32Ty(Ctx), \ - NamePtr->getType()->getPointerElementType()->getArrayNumElements())) -INSTR_PROF_DATA(const uint32_t, llvm::Type::getInt32Ty(Ctx), NumCounters, \ - ConstantInt::get(llvm::Type::getInt32Ty(Ctx), NumCounters)) +INSTR_PROF_DATA(const uint64_t, llvm::Type::getInt64Ty(Ctx), NameRef, \ + ConstantInt::get(llvm::Type::getInt64Ty(Ctx), \ + IndexedInstrProf::ComputeHash(getPGOFuncNameVarInitializer(Inc->getName())))) INSTR_PROF_DATA(const uint64_t, llvm::Type::getInt64Ty(Ctx), FuncHash, \ ConstantInt::get(llvm::Type::getInt64Ty(Ctx), \ Inc->getHash()->getZExtValue())) -INSTR_PROF_DATA(const IntPtrT, llvm::Type::getInt8PtrTy(Ctx), NamePtr, \ - ConstantExpr::getBitCast(NamePtr, llvm::Type::getInt8PtrTy(Ctx))) INSTR_PROF_DATA(const IntPtrT, llvm::Type::getInt64PtrTy(Ctx), CounterPtr, \ ConstantExpr::getBitCast(CounterPtr, \ llvm::Type::getInt64PtrTy(Ctx))) @@ -82,6 +77,8 @@ FunctionAddr) INSTR_PROF_DATA(IntPtrT, llvm::Type::getInt8PtrTy(Ctx), Values, \ ConstantPointerNull::get(Int8PtrTy)) +INSTR_PROF_DATA(const uint32_t, llvm::Type::getInt32Ty(Ctx), NumCounters, \ + ConstantInt::get(llvm::Type::getInt32Ty(Ctx), NumCounters)) INSTR_PROF_DATA(const uint16_t, Int16ArrayTy, NumValueSites[IPVK_Last+1], \ ConstantArray::get(Int16ArrayTy, Int16ArrayVals)) #undef INSTR_PROF_DATA @@ -153,12 +150,18 @@ #else #define INSTR_PROF_DATA_DEFINED #endif +#ifdef COVMAP_V1 COVMAP_FUNC_RECORD(const IntPtrT, llvm::Type::getInt8PtrTy(Ctx), \ NamePtr, llvm::ConstantExpr::getBitCast(NamePtr, \ llvm::Type::getInt8PtrTy(Ctx))) COVMAP_FUNC_RECORD(const uint32_t, llvm::Type::getInt32Ty(Ctx), NameSize, \ - llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx),\ + llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), \ NameValue.size())) +#else +COVMAP_FUNC_RECORD(const int64_t, llvm::Type::getInt64Ty(Ctx), NameRef, \ + llvm::ConstantInt::get(llvm::Type::getInt64Ty(Ctx), \ + llvm::IndexedInstrProf::ComputeHash(NameValue))) +#endif COVMAP_FUNC_RECORD(const uint32_t, llvm::Type::getInt32Ty(Ctx), DataSize, \ llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx),\ CoverageMapping.size())) @@ -692,10 +695,12 @@ (uint64_t)'p' << 40 | (uint64_t)'r' << 32 | (uint64_t)'o' << 24 | \ (uint64_t)'f' << 16 | (uint64_t)'R' << 8 | (uint64_t)129 -/* Raw profile format version. */ -#define INSTR_PROF_RAW_VERSION 2 +/* Raw profile format version (start from 1). */ +#define INSTR_PROF_RAW_VERSION 3 +/* Indexed profile format version (start from 1). */ #define INSTR_PROF_INDEX_VERSION 3 -#define INSTR_PROF_COVMAP_VERSION 0 +/* Coverage mapping format version (start from 0). */ +#define INSTR_PROF_COVMAP_VERSION 1 /* Profile version is always of type uint64_t. Reserve the upper 8 bits in the * version for other variants of profile. We set the lowest bit of the upper 8 Index: include/llvm/ProfileData/InstrProfReader.h =================================================================== --- include/llvm/ProfileData/InstrProfReader.h +++ include/llvm/ProfileData/InstrProfReader.h @@ -160,6 +160,7 @@ const RawInstrProf::ProfileData *DataEnd; const uint64_t *CountersStart; const char *NamesStart; + uint64_t NamesSize; const uint8_t *ValueDataStart; const char *ProfileEnd; uint32_t ValueKindLast; @@ -216,9 +217,8 @@ ptrdiff_t Offset = (swap(CounterPtr) - CountersDelta) / sizeof(uint64_t); return CountersStart + Offset; } - const char *getName(IntPtrT NamePtr) const { - ptrdiff_t Offset = (swap(NamePtr) - NamesDelta) / sizeof(char); - return NamesStart + Offset; + StringRef getName(uint64_t NameRef) const { + return Symtab->getFuncName(swap(NameRef)); } }; Index: lib/ProfileData/CoverageMappingReader.cpp =================================================================== --- lib/ProfileData/CoverageMappingReader.cpp +++ lib/ProfileData/CoverageMappingReader.cpp @@ -424,6 +424,11 @@ case CovMapVersion::Version1: return llvm::make_unique>(P, R, F); + case CovMapVersion::Version2: + // Decompress the name data. + P.create(P.getNameData()); + return llvm::make_unique>(P, R, F); } llvm_unreachable("Unsupported version"); } Index: lib/ProfileData/InstrProf.cpp =================================================================== --- lib/ProfileData/InstrProf.cpp +++ lib/ProfileData/InstrProf.cpp @@ -208,7 +208,7 @@ std::string(CompressedNameStrings.data(), CompressedNameStrings.size())); } -StringRef getPGOFuncNameInitializer(GlobalVariable *NameVar) { +StringRef getPGOFuncNameVarInitializer(GlobalVariable *NameVar) { auto *Arr = cast(NameVar->getInitializer()); StringRef NameStr = Arr->isCString() ? Arr->getAsCString() : Arr->getAsString(); @@ -219,7 +219,7 @@ std::string &Result, bool doCompression) { std::vector NameStrs; for (auto *NameVar : NameVars) { - NameStrs.push_back(getPGOFuncNameInitializer(NameVar)); + NameStrs.push_back(getPGOFuncNameVarInitializer(NameVar)); } return collectPGOFuncNameStrings( NameStrs, zlib::isAvailable() && doCompression, Result); Index: lib/ProfileData/InstrProfReader.cpp =================================================================== --- lib/ProfileData/InstrProfReader.cpp +++ lib/ProfileData/InstrProfReader.cpp @@ -280,13 +280,12 @@ template void RawInstrProfReader::createSymtab(InstrProfSymtab &Symtab) { + Symtab.create(StringRef(NamesStart, NamesSize)); for (const RawInstrProf::ProfileData *I = Data; I != DataEnd; ++I) { - StringRef FunctionName(getName(I->NamePtr), swap(I->NameSize)); - Symtab.addFuncName(FunctionName); const IntPtrT FPtr = swap(I->FunctionPointer); if (!FPtr) continue; - Symtab.mapAddress(FPtr, IndexedInstrProf::ComputeHash(FunctionName)); + Symtab.mapAddress(FPtr, I->NameRef); } Symtab.finalizeSymtab(); } @@ -301,7 +300,7 @@ NamesDelta = swap(Header.NamesDelta); auto DataSize = swap(Header.DataSize); auto CountersSize = swap(Header.CountersSize); - auto NamesSize = swap(Header.NamesSize); + NamesSize = swap(Header.NamesSize); auto ValueDataSize = swap(Header.ValueDataSize); ValueKindLast = swap(Header.ValueKindLast); @@ -334,11 +333,7 @@ template std::error_code RawInstrProfReader::readName(InstrProfRecord &Record) { - Record.Name = StringRef(getName(Data->NamePtr), swap(Data->NameSize)); - if (Record.Name.data() < NamesStart || - Record.Name.data() + Record.Name.size() > - reinterpret_cast(ValueDataStart)) - return error(instrprof_error::malformed); + Record.Name = getName(Data->NameRef); return success(); } Index: lib/Transforms/Instrumentation/InstrProfiling.cpp =================================================================== --- lib/Transforms/Instrumentation/InstrProfiling.cpp +++ lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -27,6 +27,10 @@ namespace { +cl::opt DoNameCompression( + "enable-name-compression", cl::desc("Enable name string compression"), + cl::Hidden, cl::init(true)); + class InstrProfiling : public ModulePass { public: static char ID; @@ -59,6 +63,7 @@ } PerFunctionProfileData; DenseMap ProfileDataMap; std::vector UsedVars; + std::vector ReferencedNames; bool isMachO() const { return Triple(M->getTargetTriple()).isOSBinFormatMachO(); @@ -102,6 +107,9 @@ /// referring to them will also be created. GlobalVariable *getOrCreateRegionCounters(InstrProfIncrementInst *Inc); + /// Emit the section with compressed function names. + void emitNameData(); + /// Emit runtime registration functions for each profile data variable. void emitRegistration(); @@ -174,6 +182,7 @@ if (!MadeChange) return false; + emitNameData(); emitRegistration(); emitRuntimeHook(); emitUses(); @@ -252,9 +261,8 @@ assert(isa(V) && "Missing reference to function name"); GlobalVariable *Name = cast(V); - // Move the name variable to the right section. - Name->setSection(getNameSection()); - Name->setAlignment(1); + Name->setLinkage(GlobalValue::PrivateLinkage); + ReferencedNames.push_back(Name); } } @@ -281,7 +289,7 @@ // a section is associated to must precede the associating section. For this // reason, we must choose the name var's name as the name of the comdat. StringRef ComdatPrefix = (Triple(M.getTargetTriple()).isOSBinFormatCOFF() - ? getInstrProfNameVarPrefix() + ? getInstrProfDataVarPrefix() : getInstrProfComdatPrefix()); return M.getOrInsertComdat(StringRef(getVarName(Inc, ComdatPrefix))); } @@ -305,9 +313,6 @@ Comdat *ProfileVarsComdat = nullptr; if (Fn->hasComdat()) ProfileVarsComdat = getOrCreateProfileComdat(*M, Inc); - NamePtr->setSection(getNameSection()); - NamePtr->setAlignment(1); - NamePtr->setComdat(ProfileVarsComdat); uint64_t NumCounters = Inc->getNumCounters()->getZExtValue(); LLVMContext &Ctx = M->getContext(); @@ -359,10 +364,34 @@ // Mark the data variable as used so that it isn't stripped out. UsedVars.push_back(Data); + // Reset Name variable's linkage and visibility + NamePtr->setLinkage(GlobalValue::PrivateLinkage); + // Mark the name variable as used so that it isn't stripped out. + ReferencedNames.push_back(NamePtr); return CounterPtr; } +void InstrProfiling::emitNameData() { + std::string UncompressedData; + + if (ReferencedNames.empty()) + return; + + std::string CompressedNameStr; + collectPGOFuncNameStrings(ReferencedNames, CompressedNameStr, DoNameCompression); + + auto &Ctx = M->getContext(); + auto *NamesVal = llvm::ConstantDataArray::getString( + Ctx, StringRef(CompressedNameStr), false); + GlobalVariable *NamesVar = new llvm::GlobalVariable(*M, NamesVal->getType(), true, + llvm::GlobalValue::PrivateLinkage, + NamesVal, getInstrProfNamesVarName()); + NamesVar->setSection(getNameSection()); + UsedVars.push_back(NamesVar); +} + + void InstrProfiling::emitRegistration() { // Don't do this for Darwin. compiler-rt uses linker magic. if (Triple(M->getTargetTriple()).isOSDarwin())