diff --git a/llvm/include/llvm/IR/PseudoProbe.h b/llvm/include/llvm/IR/PseudoProbe.h --- a/llvm/include/llvm/IR/PseudoProbe.h +++ b/llvm/include/llvm/IR/PseudoProbe.h @@ -80,6 +80,9 @@ uint32_t Id; uint32_t Type; uint32_t Attr; + // Distribution factor that estimates the portion of the real execution count. + // A saturated distribution factor stands for 1.0 or 100%. A pesudo probe has + // a factor with the value ranged from 0.0 to 1.0. float Factor; bool isDangling() const { diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h --- a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h +++ b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h @@ -33,7 +33,10 @@ using namespace sampleprof; using BlockIdMap = std::unordered_map; using InstructionIdMap = std::unordered_map; -using ProbeFactorMap = std::unordered_map; +// Map from tuples of Probe id and inline stack hash code to distribution +// factors. +using ProbeFactorMap = std::unordered_map, float, + pair_hash>; using FuncProbeFactorMap = StringMap; enum class PseudoProbeReservedId { Invalid = 0, Last = Invalid }; @@ -135,6 +138,18 @@ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); }; +// Pseudo probe distribution factor updater. +// Sample profile annotation can happen in both LTO prelink and postlink. The +// postlink-time re-annotation can degrade profile quality because of prelink +// code duplication transformation, such as loop unrolling, jump threading, +// indirect call promotion etc. As such, samples corresponding to a source +// location may be aggregated multiple times in postlink. With a concept of +// distribution factor for pseudo probes, samples can be distributed among +// duplicated probes reasonable based on the assumption that optimizations +// duplicating code well-maintain the branch frequency information (BFI). This +// pass updates distribution factors for each pseudo probe at the end of the +// prelink pipeline, to reflect an estimated portion of the real execution +// count. class PseudoProbeUpdatePass : public PassInfoMixin { void runOnFunction(Function &F, FunctionAnalysisManager &FAM); diff --git a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp --- a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp @@ -50,6 +50,27 @@ UpdatePseudoProbe("update-pseudo-probe", cl::init(true), cl::Hidden, cl::desc("Update pseudo probe distribution factor")); +static uint64_t getCallStackHash(const DILocation *DIL) { + uint64_t Hash = 0; + const DILocation *InlinedAt = DIL ? DIL->getInlinedAt() : nullptr; + while (InlinedAt) { + Hash ^= MD5Hash(std::to_string(InlinedAt->getLine())); + Hash ^= MD5Hash(std::to_string(InlinedAt->getColumn())); + const DISubprogram *SP = InlinedAt->getScope()->getSubprogram(); + // Use linkage name for C++ if possible. + auto Name = SP->getLinkageName(); + if (Name.empty()) + Name = SP->getName(); + Hash ^= MD5Hash(Name); + InlinedAt = InlinedAt->getInlinedAt(); + } + return Hash; +} + +static uint64_t computeCallStackHash(const Instruction &Inst) { + return getCallStackHash(Inst.getDebugLoc()); +} + bool PseudoProbeVerifier::shouldVerifyFunction(const Function *F) { // Skip function declaration. if (F->isDeclaration()) @@ -117,8 +138,10 @@ void PseudoProbeVerifier::collectProbeFactors(const BasicBlock *Block, ProbeFactorMap &ProbeFactors) { for (const auto &I : *Block) { - if (Optional Probe = extractProbe(I)) - ProbeFactors[Probe->Id] += Probe->Factor; + if (Optional Probe = extractProbe(I)) { + uint64_t Hash = computeCallStackHash(I); + ProbeFactors[{Probe->Id, Hash}] += Probe->Factor; + } } } @@ -136,7 +159,7 @@ dbgs() << "Function " << F->getName() << ":\n"; BannerPrinted = true; } - dbgs() << "Probe " << I.first << "\tprevious factor " + dbgs() << "Probe " << I.first.first << "\tprevious factor " << format("%0.2f", PrevProbeFactor) << "\tcurrent factor " << format("%0.2f", CurProbeFactor) << "\n"; } @@ -407,8 +430,10 @@ // execution count of the probe. The original samples of the probe will // be distributed among the rest probes if there are any, this is // less-than-deal but at least we don't lose any samples. - if (!Probe->isDangling()) - ProbeFactors[Probe->Id] += BBProfileCount(&Block); + if (!Probe->isDangling()) { + uint64_t Hash = computeCallStackHash(I); + ProbeFactors[{Probe->Id, Hash}] += BBProfileCount(&Block); + } } } } @@ -420,7 +445,8 @@ // Ignore danling probes since they are logically deleted and should do // not consume any profile samples in the subsequent profile annotation. if (!Probe->isDangling()) { - float Sum = ProbeFactors[Probe->Id]; + uint64_t Hash = computeCallStackHash(I); + float Sum = ProbeFactors[{Probe->Id, Hash}]; if (Sum != 0) setProbeDistributionFactor(I, BBProfileCount(&Block) / Sum); } diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-update-2.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-update-2.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-update-2.ll @@ -0,0 +1,80 @@ +; RUN: opt < %s -passes='pseudo-probe-update' -S | FileCheck %s + +declare i32 @f1() + +declare i32 @f2() + +declare void @f3() + +define i32 @foo(i1 %cond, i1 %cond2) !dbg !4 !prof !10 { + call void @llvm.pseudoprobe(i64 6699318081062747564, i64 1, i32 0, i64 -1) + br i1 %cond, label %T1, label %Merge, !prof !11 + +T1: ; preds = %0 + %v1 = call i32 @f1(), !prof !12 + %cond3 = icmp eq i32 %v1, 412 + call void @llvm.pseudoprobe(i64 6699318081062747564, i64 2, i32 0, i64 -1) +;; The distribution factor -8513881372706734080 stands for 53.85%, whic is from 7/6+7. +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -8513881372706734080) + call void @llvm.pseudoprobe(i64 6699318081062747564, i64 4, i32 0, i64 -1), !dbg !13 +;; Probe 7 has two copies, since they don't share the same inline context, they are not +;; considered sharing samples, thus their distribution factors are not fixed up. +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 7, i32 0, i64 -1) + call void @llvm.pseudoprobe(i64 6699318081062747564, i64 7, i32 0, i64 -1), !dbg !13 +;; Similar to Probe 7, one copy of Probe 8 doesn't have inline context. +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 8, i32 0, i64 -1) + call void @llvm.pseudoprobe(i64 6699318081062747564, i64 8, i32 0, i64 -1), !dbg !13 + br i1 %cond3, label %T2, label %F2, !prof !11 + +Merge: ; preds = %0 + %v2 = call i32 @f2(), !prof !12 + call void @llvm.pseudoprobe(i64 6699318081062747564, i64 3, i32 0, i64 -1) +;; The distribution factor 8513881922462547968 stands for 46.25%, which is from 6/6+7. +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0, i64 8513881922462547968) + call void @llvm.pseudoprobe(i64 6699318081062747564, i64 4, i32 0, i64 8513881922462547968), !dbg !13 +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 7, i32 0, i64 -1) + call void @llvm.pseudoprobe(i64 6699318081062747564, i64 7, i32 0, i64 -1), !dbg !18 +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 8, i32 0, i64 -1) + call void @llvm.pseudoprobe(i64 6699318081062747564, i64 8, i32 0, i64 -1), !dbg !19 + br i1 %cond2, label %T2, label %F2, !prof !11 + +T2: ; preds = %Merge, %T1 + %B1 = phi i32 [ %v1, %T1 ], [ %v2, %Merge ] + call void @f3(), !prof !12 + call void @llvm.pseudoprobe(i64 6699318081062747564, i64 5, i32 0, i64 -1) + ret i32 %B1 + +F2: ; preds = %Merge, %T1 + %B2 = phi i32 [ %v1, %T1 ], [ %v2, %Merge ] + call void @llvm.pseudoprobe(i64 6699318081062747564, i64 6, i32 0, i64 -1) + ret i32 %B2 +} + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare void @llvm.pseudoprobe(i64, i64, i32, i64) #0 + +attributes #0 = { inaccessiblememonly nounwind willreturn } + +!llvm.module.flags = !{!0, !1} +!llvm.pseudo_probe_desc = !{!2, !3} + +!0 = !{i32 7, !"Dwarf Version", i32 4} +!1 = !{i32 2, !"Debug Info Version", i32 3} +!2 = !{i64 6699318081062747564, i64 281479271677951, !"foo", null} +!3 = !{i64 6468398850841090686, i64 138828622701, !"zen", null} +!4 = distinct !DISubprogram(name: "foo", scope: !5, file: !5, line: 9, type: !6, scopeLine: 9, spFlags: DISPFlagDefinition, unit: !9) +!5 = !DIFile(filename: "test.cpp", directory: "test") +!6 = !DISubroutineType(types: !7) +!7 = !{!8, !8} +!8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!9 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !5, isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug) +!10 = !{!"function_entry_count", i64 14} +!11 = !{!"branch_weights", i32 8, i32 7} +!12 = !{!"branch_weights", i32 7} +!13 = !DILocation(line: 39, column: 9, scope: !14, inlinedAt: !16) +!14 = distinct !DILexicalBlock(scope: !15, file: !5, line: 39, column: 7) +!15 = distinct !DISubprogram(name: "zen", scope: !5, file: !5, line: 37, type: !6, scopeLine: 38, spFlags: DISPFlagDefinition, unit: !9) +!16 = distinct !DILocation(line: 10, column: 11, scope: !17) +!17 = !DILexicalBlockFile(scope: !4, file: !5, discriminator: 186646551) +!18 = !DILocation(line: 53, column: 3, scope: !15, inlinedAt: !19) +!19 = !DILocation(line: 12, column: 3, scope: !4) \ No newline at end of file