diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -208,6 +208,13 @@ SecFlagHasAttribute = (1 << 1) }; +enum class SecFuncOffsetFlags : uint32_t { + SecFlagInvalid = 0, + // Store function offsets in an order of contexts. The order ensures that + // callee contexts of a given context laid out next to it. + SecFlagOrdered = (1 << 0), +}; + // Verify section specific flag is used for the correct section. template static inline void verifySecFlag(SecType Type, SecFlagType Flag) { @@ -228,6 +235,8 @@ IsFlagLegal = std::is_same(); break; default: + case SecFuncOffsetTable: + IsFlagLegal = std::is_same(); break; } if (!IsFlagLegal) diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h --- a/llvm/include/llvm/ProfileData/SampleProfReader.h +++ b/llvm/include/llvm/ProfileData/SampleProfReader.h @@ -720,6 +720,11 @@ /// The table mapping from function context to the offset of its /// FunctionSample towards file start. DenseMap FuncOffsetTable; + + /// Function offset mapping ordered by contexts. + std::unique_ptr>> + OrderedFuncOffsets; + /// The set containing the functions to use when compiling a module. DenseSet FuncsToUse; @@ -746,6 +751,8 @@ /// SecFlagFlat flag. bool SkipFlatProf = false; + bool FuncOffsetsOrdered = false; + public: SampleProfileReaderExtBinaryBase(std::unique_ptr B, LLVMContext &C, SampleProfileFormat Format) diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp --- a/llvm/lib/ProfileData/SampleProfReader.cpp +++ b/llvm/lib/ProfileData/SampleProfReader.cpp @@ -675,6 +675,7 @@ return EC; break; case SecFuncOffsetTable: + FuncOffsetsOrdered = hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered); if (std::error_code EC = readFuncOffsetTable()) return EC; break; @@ -720,17 +721,27 @@ return EC; FuncOffsetTable.reserve(*Size); + + if (FuncOffsetsOrdered) { + OrderedFuncOffsets = + std::make_unique>>(); + OrderedFuncOffsets->reserve(*Size); + } + for (uint32_t I = 0; I < *Size; ++I) { - auto FName(readSampleContextFromTable()); - if (std::error_code EC = FName.getError()) + auto FContext(readSampleContextFromTable()); + if (std::error_code EC = FContext.getError()) return EC; auto Offset = readNumber(); if (std::error_code EC = Offset.getError()) return EC; - FuncOffsetTable[*FName] = *Offset; + FuncOffsetTable[*FContext] = *Offset; + if (FuncOffsetsOrdered) + OrderedFuncOffsets->emplace_back(*FContext, *Offset); } + return sampleprof_error::success; } @@ -760,42 +771,43 @@ } if (ProfileIsCS) { - // Compute the ordered set of names, so we can - // get all context profiles under a subtree by - // iterating through the ordered names. - std::set OrderedContexts; - for (auto Name : FuncOffsetTable) { - OrderedContexts.insert(Name.first); - } - DenseSet FuncGuidsToUse; if (useMD5()) { for (auto Name : FuncsToUse) FuncGuidsToUse.insert(Function::getGUID(Name)); } - // For each function in current module, load all - // context profiles for the function. - for (auto NameOffset : FuncOffsetTable) { - SampleContext FContext = NameOffset.first; - auto FuncName = FContext.getName(); - if ((useMD5() && !FuncGuidsToUse.count(std::stoull(FuncName.data()))) || - (!useMD5() && !FuncsToUse.count(FuncName) && - (!Remapper || !Remapper->exist(FuncName)))) - continue; - - // For each context profile we need, try to load - // all context profile in the subtree. This can - // help profile guided importing for ThinLTO. - auto It = OrderedContexts.find(FContext); - while (It != OrderedContexts.end() && FContext.IsPrefixOf(*It)) { - const uint8_t *FuncProfileAddr = Start + FuncOffsetTable[*It]; + // For each function in current module, load all context profiles for + // the function as well as their callee contexts which can help profile + // guided importing for ThinLTO. This can be achieved by walking + // through an ordered context container, where contexts are laid out + // as if they were walked in preorder of a context trie. While + // traversing the trie, a link to the highest common ancestor node is + // kept so that all of its decendants will be loaded. + assert(OrderedFuncOffsets.get() && + "func offset table should always be sorted in CS profile"); + const SampleContext *CommonContext = nullptr; + for (auto &NameOffset : *OrderedFuncOffsets) { + auto &FContext = NameOffset.first; + auto FName = FContext.getName(); + // For function in the current module, keep its farthest ancestor + // context. This can be used to load itself and its child and + // sibling contexts. + if ((useMD5() && FuncGuidsToUse.count(std::stoull(FName.data()))) || + (!useMD5() && (FuncsToUse.count(FName) || + (Remapper && Remapper->exist(FName))))) { + if (!CommonContext || !CommonContext->IsPrefixOf(FContext)) + CommonContext = &FContext; + } + + if (CommonContext == &FContext || + (CommonContext && CommonContext->IsPrefixOf(FContext))) { + // Load profile for the current context which originated from + // the common ancestor. + const uint8_t *FuncProfileAddr = Start + NameOffset.second; assert(FuncProfileAddr < End && "out of LBRProfile section"); if (std::error_code EC = readFuncProfile(FuncProfileAddr)) return EC; - // Remove loaded context profile so we won't - // load it repeatedly. - It = OrderedContexts.erase(It); } } } else { @@ -1212,6 +1224,10 @@ if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator)) Flags.append("fs-discriminator,"); break; + case SecFuncOffsetTable: + if (hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered)) + Flags.append("ordered,"); + break; default: break; } diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp --- a/llvm/lib/ProfileData/SampleProfWriter.cpp +++ b/llvm/lib/ProfileData/SampleProfWriter.cpp @@ -165,11 +165,31 @@ encodeULEB128(FuncOffsetTable.size(), OS); // Write out FuncOffsetTable. - for (auto Entry : FuncOffsetTable) { - if (std::error_code EC = writeContextIdx(Entry.first)) + auto WriteItem = [&](const SampleContext &Context, uint64_t Offset) { + if (std::error_code EC = writeContextIdx(Context)) return EC; - encodeULEB128(Entry.second, OS); + encodeULEB128(Offset, OS); + return (std::error_code)sampleprof_error::success; + }; + + if (FunctionSamples::ProfileIsCS) { + // Sort the contexts before writing them out. This is to help fast load all + // context profiles for a function as well as their callee contexts which + // can help profile-guided importing for ThinLTO. + std::map OrderedFuncOffsetTable( + FuncOffsetTable.begin(), FuncOffsetTable.end()); + for (const auto &Entry : OrderedFuncOffsetTable) { + if (std::error_code EC = WriteItem(Entry.first, Entry.second)) + return EC; + } + addSectionFlag(SecFuncOffsetTable, SecFuncOffsetFlags::SecFlagOrdered); + } else { + for (const auto &Entry : FuncOffsetTable) { + if (std::error_code EC = WriteItem(Entry.first, Entry.second)) + return EC; + } } + FuncOffsetTable.clear(); return sampleprof_error::success; } diff --git a/llvm/test/Transforms/SampleProfile/csspgo-import-list.ll b/llvm/test/Transforms/SampleProfile/csspgo-import-list.ll --- a/llvm/test/Transforms/SampleProfile/csspgo-import-list.ll +++ b/llvm/test/Transforms/SampleProfile/csspgo-import-list.ll @@ -2,8 +2,11 @@ ; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/csspgo-import-list.prof -S | FileCheck %s ; RUN: llvm-profdata merge --sample --extbinary %S/Inputs/csspgo-import-list.prof -o %t.prof ; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%t.prof -S | FileCheck %s +; RUN: llvm-profdata show --sample -show-sec-info-only %t.prof | FileCheck %s --check-prefix=CHECK-ORDERED ; RUN: llvm-profdata merge --sample --extbinary --use-md5 %S/Inputs/csspgo-import-list.prof -o %t.md5 ; RUN: opt < %s -passes='thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%t.md5 -S | FileCheck %s +; RUN: llvm-profdata show --sample -show-sec-info-only %t.md5 | FileCheck %s --check-prefix=CHECK-ORDERED + declare i32 @_Z5funcBi(i32 %x) declare i32 @_Z5funcAi(i32 %x) @@ -32,6 +35,7 @@ ; CHECK: distinct !DISubprogram(name: "main" ; CHECK: !{!"function_entry_count", i64 3, i64 446061515086924981, i64 3815895320998406042, i64 7102633082150537521, i64 -2862076748587597320} +; CHECK-ORDERED: FuncOffsetTableSection {{.*}} {ordered} attributes #0 = { nofree noinline norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }