diff --git a/clang/test/CodeGen/pseudo-probe-emit.c b/clang/test/CodeGen/pseudo-probe-emit.c --- a/clang/test/CodeGen/pseudo-probe-emit.c +++ b/clang/test/CodeGen/pseudo-probe-emit.c @@ -6,12 +6,12 @@ void go(); void foo(int x) { - // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0) + // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1) if (x == 0) - // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 2, i32 0) + // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 2, i32 0, i64 -1) bar(); else - // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 3, i32 0) + // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 3, i32 0, i64 -1) go(); - // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0) + // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0, i64 -1) } diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -981,12 +981,16 @@ return cast(const_cast(getArgOperand(0))); } + ConstantInt *getIndex() const { + return cast(const_cast(getArgOperand(1))); + } + ConstantInt *getAttributes() const { return cast(const_cast(getArgOperand(2))); } - ConstantInt *getIndex() const { - return cast(const_cast(getArgOperand(1))); + ConstantInt *getFactor() const { + return cast(const_cast(getArgOperand(3))); } }; diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1299,7 +1299,7 @@ // Like the sideeffect intrinsic defined above, this intrinsic is treated by the // optimizer as having opaque side effects so that it won't be get rid of or moved // out of the block it probes. -def int_pseudoprobe : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], +def int_pseudoprobe : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [IntrInaccessibleMemOnly, IntrWillReturn]>; // Intrinsics to support half precision floating point format diff --git a/llvm/include/llvm/IR/PseudoProbe.h b/llvm/include/llvm/IR/PseudoProbe.h --- a/llvm/include/llvm/IR/PseudoProbe.h +++ b/llvm/include/llvm/IR/PseudoProbe.h @@ -16,28 +16,39 @@ #include "llvm/ADT/Optional.h" #include #include +#include namespace llvm { class Instruction; +class BasicBlock; constexpr const char *PseudoProbeDescMetadataName = "llvm.pseudo_probe_desc"; enum class PseudoProbeType { Block = 0, IndirectCall, DirectCall }; +// The saturated distrution factor representing 100% for block probes. +constexpr static uint64_t PseudoProbeFullDistributionFactor = + std::numeric_limits::max(); + struct PseudoProbeDwarfDiscriminator { +public: // The following APIs encodes/decodes per-probe information to/from a // 32-bit integer which is organized as: // [2:0] - 0x7, this is reserved for regular discriminator, // see DWARF discriminator encoding rule // [18:3] - probe id - // [25:19] - reserved + // [25:19] - probe distribution factor // [28:26] - probe type, see PseudoProbeType // [31:29] - reserved for probe attributes - static uint32_t packProbeData(uint32_t Index, uint32_t Type) { + static uint32_t packProbeData(uint32_t Index, uint32_t Type, uint32_t Flags, + uint32_t Factor) { assert(Index <= 0xFFFF && "Probe index too big to encode, exceeding 2^16"); assert(Type <= 0x7 && "Probe type too big to encode, exceeding 7"); - return (Index << 3) | (Type << 26) | 0x7; + assert(Flags <= 0x7); + assert(Factor <= 100 && + "Probe distribution factor too big to encode, exceeding 100"); + return (Index << 3) | (Factor << 19) | (Type << 26) | 0x7; } static uint32_t extractProbeIndex(uint32_t Value) { @@ -51,16 +62,26 @@ static uint32_t extractProbeAttributes(uint32_t Value) { return (Value >> 29) & 0x7; } + + static uint32_t extractProbeFactor(uint32_t Value) { + return (Value >> 19) & 0x7F; + } + + // The saturated distrution factor representing 100% for callsites. + constexpr static uint8_t FullDistributionFactor = 100; }; struct PseudoProbe { uint32_t Id; uint32_t Type; uint32_t Attr; + float Factor; }; Optional extractProbe(const Instruction &Inst); +void setProbeDistributionFactor(Instruction &Inst, float Factor); + } // end namespace llvm #endif // LLVM_IR_PSEUDOPROBE_H diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h --- a/llvm/include/llvm/Passes/StandardInstrumentations.h +++ b/llvm/include/llvm/Passes/StandardInstrumentations.h @@ -22,6 +22,7 @@ #include "llvm/IR/PassTimingInfo.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/IPO/SampleProfileProbe.h" #include #include @@ -273,6 +274,7 @@ OptBisectInstrumentation OptBisect; PreservedCFGCheckerInstrumentation PreservedCFGChecker; IRChangedPrinter PrintChangedIR; + PseudoProbeVerifier PseudoProbeVerification; VerifyInstrumentation Verify; bool VerifyEach; diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -347,6 +347,16 @@ return SortedTargets; } + /// Prorate call targets by a distribution factor. + static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets, + float DistributionFactor) { + CallTargetMap AdjustedTargets; + for (const auto &I : Targets) { + AdjustedTargets[I.first()] = I.second * DistributionFactor; + } + return AdjustedTargets; + } + /// Merge the samples in \p Other into this record. /// Optionally scale sample counts by \p Weight. sampleprof_error merge(const SampleRecord &Other, uint64_t Weight = 1) { diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h --- a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h +++ b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h @@ -16,6 +16,10 @@ #define LLVM_TRANSFORMS_IPO_SAMPLEPROFILEPROBE_H #include "llvm/ADT/DenseMap.h" +#include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/Analysis/LazyCallGraph.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/PassInstrumentation.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/PseudoProbe.h" #include "llvm/ProfileData/SampleProf.h" @@ -29,6 +33,8 @@ using namespace sampleprof; using BlockIdMap = std::unordered_map; using InstructionIdMap = std::unordered_map; +using ProbeFactorMap = std::unordered_map; +using FuncProbeFactorMap = StringMap; enum class PseudoProbeReservedId { Invalid = 0, Last = Invalid }; @@ -43,6 +49,33 @@ uint64_t getFunctionHash() const { return FunctionHash; } }; +// A pseudo probe verifier that can be run after each IR passes to detect the +// violation of updating probe factors. In principle, the sum of distribution +// factor for a probe should be identical before and after a pass. For a +// function pass, the factor sum for a probe would be typically 100%. +class PseudoProbeVerifier { +public: + void registerCallbacks(PassInstrumentationCallbacks &PIC); + + // Implementation of pass instrumentation callbacks for new pass manager. + void runAfterPass(StringRef PassID, Any IR); + +private: + // Allow a little bias due the rounding to integral factors. + constexpr static float DistributionFactorVariance = 0.02; + // Distribution factors from last pass. + FuncProbeFactorMap FunctionProbeFactors; + + void collectProbeFactors(const BasicBlock *BB, ProbeFactorMap &ProbeFactors); + void runAfterPass(const Module *M); + void runAfterPass(const LazyCallGraph::SCC *C); + void runAfterPass(const Function *F); + void runAfterPass(const Loop *L); + bool shouldVerifyFunction(const Function *F); + void verifyProbeFactors(const Function *F, + const ProbeFactorMap &ProbeFactors); +}; + // This class serves sample counts correlation for SampleProfileLoader by // analyzing pseudo probes and their function descriptors injected by // SampleProfileProber. @@ -102,5 +135,13 @@ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); }; +class PseudoProbeUpdatePass : public PassInfoMixin { + void runOnFunction(Function &F, FunctionAnalysisManager &FAM); + +public: + PseudoProbeUpdatePass() {} + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + } // end namespace llvm #endif // LLVM_TRANSFORMS_IPO_SAMPLEPROFILEPROBE_H diff --git a/llvm/lib/IR/PseudoProbe.cpp b/llvm/lib/IR/PseudoProbe.cpp --- a/llvm/lib/IR/PseudoProbe.cpp +++ b/llvm/lib/IR/PseudoProbe.cpp @@ -35,6 +35,9 @@ PseudoProbeDwarfDiscriminator::extractProbeType(Discriminator); Probe.Attr = PseudoProbeDwarfDiscriminator::extractProbeAttributes(Discriminator); + Probe.Factor = + PseudoProbeDwarfDiscriminator::extractProbeFactor(Discriminator) / + (float)PseudoProbeDwarfDiscriminator::FullDistributionFactor; return Probe; } } @@ -47,6 +50,8 @@ Probe.Id = II->getIndex()->getZExtValue(); Probe.Type = (uint32_t)PseudoProbeType::Block; Probe.Attr = II->getAttributes()->getZExtValue(); + Probe.Factor = II->getFactor()->getZExtValue() / + (float)PseudoProbeFullDistributionFactor; return Probe; } @@ -55,4 +60,40 @@ return None; } + +void setProbeDistributionFactor(Instruction &Inst, float Factor) { + assert(Factor >= 0 && Factor <= 1 && + "Distribution factor must be in [0, 1.0]"); + if (auto *II = dyn_cast(&Inst)) { + IRBuilder<> Builder(&Inst); + uint64_t IntFactor = PseudoProbeFullDistributionFactor; + if (Factor < 1) + IntFactor *= Factor; + auto OrigFactor = II->getFactor()->getZExtValue(); + if (IntFactor != OrigFactor) + II->replaceUsesOfWith(II->getFactor(), Builder.getInt64(IntFactor)); + } else if (isa(&Inst) && !isa(&Inst)) { + if (const DebugLoc &DLoc = Inst.getDebugLoc()) { + const DILocation *DIL = DLoc; + auto Discriminator = DIL->getDiscriminator(); + if (DILocation::isPseudoProbeDiscriminator(Discriminator)) { + auto Index = + PseudoProbeDwarfDiscriminator::extractProbeIndex(Discriminator); + auto Type = + PseudoProbeDwarfDiscriminator::extractProbeType(Discriminator); + auto Attr = PseudoProbeDwarfDiscriminator::extractProbeAttributes( + Discriminator); + // Round small factors to 0 to avoid over-counting. + uint32_t IntFactor = + PseudoProbeDwarfDiscriminator::FullDistributionFactor; + if (Factor < 1) + IntFactor *= Factor; + uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData( + Index, Type, Attr, IntFactor); + DIL = DIL->cloneWithDiscriminator(V); + Inst.setDebugLoc(DIL); + } + } + } +} } // namespace llvm diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1428,6 +1428,9 @@ // Now add the optimization pipeline. MPM.addPass(buildModuleOptimizationPipeline(Level, LTOPreLink)); + if (PGOOpt && PGOOpt->PseudoProbeForProfiling) + MPM.addPass(PseudoProbeUpdatePass()); + // Emit annotation remarks. addAnnotationRemarksPass(MPM); @@ -1482,6 +1485,9 @@ if (PTO.Coroutines) MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass())); + if (PGOOpt && PGOOpt->PseudoProbeForProfiling) + MPM.addPass(PseudoProbeUpdatePass()); + // Emit annotation remarks. addAnnotationRemarksPass(MPM); diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -119,6 +119,7 @@ MODULE_PASS("sancov-module", ModuleSanitizerCoveragePass()) MODULE_PASS("memprof-module", ModuleMemProfilerPass()) MODULE_PASS("poison-checking", PoisonCheckingPass()) +MODULE_PASS("pseudo-probe-update", PseudoProbeUpdatePass()) #undef MODULE_PASS #ifndef CGSCC_ANALYSIS diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp --- a/llvm/lib/Passes/StandardInstrumentations.cpp +++ b/llvm/lib/Passes/StandardInstrumentations.cpp @@ -882,6 +882,7 @@ OptBisect.registerCallbacks(PIC); PreservedCFGChecker.registerCallbacks(PIC); PrintChangedIR.registerCallbacks(PIC); + PseudoProbeVerification.registerCallbacks(PIC); if (VerifyEach) Verify.registerCallbacks(PIC); } diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -108,6 +108,8 @@ STATISTIC(NumMismatchedProfile, "Number of functions with CFG mismatched profile"); STATISTIC(NumMatchedProfile, "Number of functions with CFG matched profile"); +STATISTIC(NumDuplicatedInlinesite, + "Number of inlined callsites with a partial distribution factor"); STATISTIC(NumCSInlinedHitMinLimit, "Number of functions with FDO inline stopped due to min size limit"); @@ -358,7 +360,14 @@ struct InlineCandidate { CallBase *CallInstr; const FunctionSamples *CalleeSamples; + // Prorated callsite count, which will be used to guide inlining. For example, + // if a callsite is duplicated in LTO prelink, then in LTO postlink the two + // copies will get their own distribution factors and their prorated counts + // will be used to decide if they should be inlined independently. uint64_t CallsiteCount; + // Call site distribution factor to prorate the profile samples for a + // duplicated callsite. Default value is 1.0. + float CallsiteDistribution; }; // Inline candidate comparer using call site weight @@ -418,8 +427,8 @@ const FunctionSamples *findFunctionSamples(const Instruction &I) const; // Attempt to promote indirect call and also inline the promoted call bool tryPromoteAndInlineCandidate( - Function &F, InlineCandidate &Candidate, uint64_t &Sum, - DenseSet &PromotedInsns, + Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, + uint64_t &Sum, DenseSet &PromotedInsns, SmallVector *InlinedCallSites = nullptr); bool inlineHotFunctions(Function &F, DenseSet &InlinedGUIDs); @@ -886,7 +895,7 @@ const ErrorOr &R = FS->findSamplesAt(Probe->Id, 0); if (R) { - uint64_t Samples = R.get(); + uint64_t Samples = R.get() * Probe->Factor; bool FirstMark = CoverageTracker.markSamplesUsed(FS, Probe->Id, 0, Samples); if (FirstMark) { ORE->emit([&]() { @@ -894,13 +903,17 @@ Remark << "Applied " << ore::NV("NumSamples", Samples); Remark << " samples from profile (ProbeId="; Remark << ore::NV("ProbeId", Probe->Id); + Remark << ", Factor="; + Remark << ore::NV("Factor", Probe->Factor); + Remark << ", OriginalSamples="; + Remark << ore::NV("OriginalSamples", R.get()); Remark << ")"; return Remark; }); } - LLVM_DEBUG(dbgs() << " " << Probe->Id << ":" << Inst - << " - weight: " << R.get() << ")\n"); + << " - weight: " << R.get() << " - factor: " + << format("%0.2f", Probe->Factor) << ")\n"); return Samples; } return R; @@ -1085,7 +1098,7 @@ /// \param InlinedCallSite Output vector for new call sites exposed after /// inlining. bool SampleProfileLoader::tryPromoteAndInlineCandidate( - Function &F, InlineCandidate &Candidate, uint64_t &Sum, + Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum, DenseSet &PromotedInsns, SmallVector *InlinedCallSite) { const char *Reason = "Callee function not available"; @@ -1106,10 +1119,28 @@ Candidate.CallsiteCount, Sum, false, ORE); if (DI) { Sum -= Candidate.CallsiteCount; + // Prorate the indirect callsite distribution. + // Do not update the promoted direct callsite distribution at this + // point since the original distribution combined with the callee + // profile will be used to prorate callsites from the callee if + // inlined. Once not inlined, the direct callsite distribution should + // be prorated so that the it will reflect the real callsite counts. + setProbeDistributionFactor(*Candidate.CallInstr, + Candidate.CallsiteDistribution * Sum / + SumOrigin); PromotedInsns.insert(Candidate.CallInstr); Candidate.CallInstr = DI; - if (isa(DI) || isa(DI)) - return tryInlineCandidate(Candidate, InlinedCallSite); + if (isa(DI) || isa(DI)) { + bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite); + if (!Inlined) { + // Prorate the direct callsite distribution so that it reflects real + // callsite counts. + setProbeDistributionFactor(*DI, Candidate.CallsiteDistribution * + Candidate.CallsiteCount / + SumOrigin); + } + return Inlined; + } } } else { LLVM_DEBUG(dbgs() << "\nFailed to promote indirect call to " @@ -1216,11 +1247,11 @@ } for (CallBase *I : CIS) { Function *CalledFunction = I->getCalledFunction(); - InlineCandidate Candidate = {I, - LocalNotInlinedCallSites.count(I) - ? LocalNotInlinedCallSites[I] - : nullptr, - 0 /* dummy count */}; + InlineCandidate Candidate = { + I, + LocalNotInlinedCallSites.count(I) ? LocalNotInlinedCallSites[I] + : nullptr, + 0 /* dummy count */, 1.0 /* dummy distribution factor */}; // Do not inline recursive calls. if (CalledFunction == &F) continue; @@ -1229,6 +1260,7 @@ continue; uint64_t Sum; for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) { + uint64_t SumOrigin = Sum; if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) { FS->findInlinedFunctions(InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold()); @@ -1237,8 +1269,9 @@ if (!callsiteIsHot(FS, PSI)) continue; - Candidate = {I, FS, FS->getEntrySamples()}; - if (tryPromoteAndInlineCandidate(F, Candidate, Sum, PromotedInsns)) { + Candidate = {I, FS, FS->getEntrySamples(), 1.0}; + if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum, + PromotedInsns)) { LocalNotInlinedCallSites.erase(I); LocalChanged = true; } @@ -1343,6 +1376,23 @@ if (ProfileIsCS) ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples); ++NumCSInlined; + + // Prorate inlined probes for a duplicated inlining callsite which probably + // has a distribution less than 100%. Samples for an inlinee should be + // distributed among the copies of the original callsite based on each + // callsite's distribution factor for counts accuracy. Note that an inlined + // probe may come with its own distribution factor if it has been duplicated + // in the inlinee body. The two factor are multiplied to reflect the + // aggregation of duplication. + if (Candidate.CallsiteDistribution < 1) { + for (auto &I : IFI.InlinedCallSites) { + if (Optional Probe = extractProbe(*I)) + setProbeDistributionFactor(*I, Probe->Factor * + Candidate.CallsiteDistribution); + } + NumDuplicatedInlinesite++; + } + return true; } return false; @@ -1360,14 +1410,19 @@ if (!CalleeSamples) return false; + float Factor = 1.0; + if (Optional Probe = extractProbe(*CB)) + Factor = Probe->Factor; + uint64_t CallsiteCount = 0; ErrorOr Weight = getBlockWeight(CB->getParent()); if (Weight) CallsiteCount = Weight.get(); if (CalleeSamples) - CallsiteCount = std::max(CallsiteCount, CalleeSamples->getEntrySamples()); + CallsiteCount = std::max( + CallsiteCount, uint64_t(CalleeSamples->getEntrySamples() * Factor)); - *NewCandidate = {CB, CalleeSamples, CallsiteCount}; + *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor}; return true; } @@ -1479,6 +1534,7 @@ uint64_t Sum; auto CalleeSamples = findIndirectCallFunctionSamples(*I, Sum); uint64_t SumOrigin = Sum; + Sum *= Candidate.CallsiteDistribution; for (const auto *FS : CalleeSamples) { // TODO: Consider disable pre-lTO ICP for MonoLTO as well if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) { @@ -1486,7 +1542,8 @@ PSI->getOrCompHotCountThreshold()); continue; } - uint64_t EntryCountDistributed = FS->getEntrySamples(); + uint64_t EntryCountDistributed = + FS->getEntrySamples() * Candidate.CallsiteDistribution; // In addition to regular inline cost check, we also need to make sure // ICP isn't introducing excessive speculative checks even if individual // target looks beneficial to promote and inline. That means we should @@ -1505,9 +1562,10 @@ SmallVector InlinedCallSites; // Attach function profile for promoted indirect callee, and update // call site count for the promoted inline candidate too. - Candidate = {I, FS, EntryCountDistributed}; - if (tryPromoteAndInlineCandidate(F, Candidate, Sum, PromotedInsns, - &InlinedCallSites)) { + Candidate = {I, FS, EntryCountDistributed, + Candidate.CallsiteDistribution}; + if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum, + PromotedInsns, &InlinedCallSites)) { for (auto *CB : InlinedCallSites) { if (getInlineCandidate(&NewCandidate, CB)) CQueue.emplace(NewCandidate); @@ -1965,6 +2023,14 @@ auto T = FS->findCallTargetMapAt(CallSite); if (!T || T.get().empty()) continue; + // Prorate the callsite counts to reflect what is already done to the + // callsite, such as ICP or calliste cloning. + if (FunctionSamples::ProfileIsProbeBased) { + if (Optional Probe = extractProbe(I)) { + if (Probe->Factor < 1) + T = SampleRecord::adjustCallTargets(T.get(), Probe->Factor); + } + } SmallVector SortedCallTargets = GetSortedValueDataFromCallTargets(T.get()); uint64_t Sum; diff --git a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp --- a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp @@ -12,6 +12,7 @@ #include "llvm/Transforms/IPO/SampleProfileProbe.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -25,8 +26,10 @@ #include "llvm/IR/MDBuilder.h" #include "llvm/ProfileData/SampleProf.h" #include "llvm/Support/CRC.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Utils/ModuleUtils.h" +#include #include using namespace llvm; @@ -35,6 +38,115 @@ STATISTIC(ArtificialDbgLine, "Number of probes that have an artificial debug line"); +static cl::opt + VerifyPseudoProbe("verify-pseudo-probe", cl::init(false), cl::Hidden, + cl::desc("Do pseudo probe verification")); + +static cl::list VerifyPseudoProbeFuncList( + "verify-pseudo-probe-funcs", cl::Hidden, + cl::desc("The option to specify the name of the functions to verify.")); + +static cl::opt + UpdatePseudoProbe("update-pseudo-probe", cl::init(true), cl::Hidden, + cl::desc("Update pseudo probe distribution factor")); + +bool PseudoProbeVerifier::shouldVerifyFunction(const Function *F) { + // Skip function declaration. + if (F->isDeclaration()) + return false; + // Skip function that will not be emitted into object file. The prevailing + // defintion will be verified instead. + if (F->hasAvailableExternallyLinkage()) + return false; + // Do a name matching. + static std::unordered_set VerifyFuncNames( + VerifyPseudoProbeFuncList.begin(), VerifyPseudoProbeFuncList.end()); + return VerifyFuncNames.empty() || VerifyFuncNames.count(F->getName().str()); +} + +void PseudoProbeVerifier::registerCallbacks(PassInstrumentationCallbacks &PIC) { + if (VerifyPseudoProbe) { + PIC.registerAfterPassCallback( + [this](StringRef P, Any IR, const PreservedAnalyses &) { + this->runAfterPass(P, IR); + }); + } +} + +// Callback to run after each transformation for the new pass manager. +void PseudoProbeVerifier::runAfterPass(StringRef PassID, Any IR) { + std::string Banner = + "\n*** Pseudo Probe Verification After " + PassID.str() + " ***\n"; + dbgs() << Banner; + if (any_isa(IR)) + runAfterPass(any_cast(IR)); + else if (any_isa(IR)) + runAfterPass(any_cast(IR)); + else if (any_isa(IR)) + runAfterPass(any_cast(IR)); + else if (any_isa(IR)) + runAfterPass(any_cast(IR)); + else + llvm_unreachable("Unknown IR unit"); +} + +void PseudoProbeVerifier::runAfterPass(const Module *M) { + for (const Function &F : *M) + runAfterPass(&F); +} + +void PseudoProbeVerifier::runAfterPass(const LazyCallGraph::SCC *C) { + for (const LazyCallGraph::Node &N : *C) + runAfterPass(&N.getFunction()); +} + +void PseudoProbeVerifier::runAfterPass(const Function *F) { + if (!shouldVerifyFunction(F)) + return; + ProbeFactorMap ProbeFactors; + for (const auto &BB : *F) + collectProbeFactors(&BB, ProbeFactors); + verifyProbeFactors(F, ProbeFactors); +} + +void PseudoProbeVerifier::runAfterPass(const Loop *L) { + const Function *F = L->getHeader()->getParent(); + runAfterPass(F); +} + +void PseudoProbeVerifier::collectProbeFactors(const BasicBlock *Block, + ProbeFactorMap &ProbeFactors) { + for (const auto &I : *Block) { + if (Optional Probe = extractProbe(I)) + ProbeFactors[Probe->Id] += Probe->Factor; + } +} + +void PseudoProbeVerifier::verifyProbeFactors( + const Function *F, const ProbeFactorMap &ProbeFactors) { + bool BannerPrinted = false; + auto &PrevProbeFactors = FunctionProbeFactors[F->getName()]; + for (const auto &I : ProbeFactors) { + float CurProbeFactor = I.second; + if (PrevProbeFactors.count(I.first)) { + float PrevProbeFactor = PrevProbeFactors[I.first]; + if (std::abs(CurProbeFactor - PrevProbeFactor) > + DistributionFactorVariance) { + if (!BannerPrinted) { + dbgs() << "Function " << F->getName() << ":\n"; + BannerPrinted = true; + } + dbgs() << "Probe " << I.first << "\tprevious factor " + << format("%0.2f", PrevProbeFactor) << "\tcurrent factor " + << format("%0.2f", CurProbeFactor) << "\n"; + } + } + + // Update + PrevProbeFactors[I.first] = I.second; + } +} + PseudoProbeManager::PseudoProbeManager(const Module &M) { if (NamedMDNode *FuncInfo = M.getNamedMetadata(PseudoProbeDescMetadataName)) { for (const auto *Operand : FuncInfo->operands()) { @@ -201,7 +313,8 @@ Function *ProbeFn = llvm::Intrinsic::getDeclaration(M, Intrinsic::pseudoprobe); Value *Args[] = {Builder.getInt64(Guid), Builder.getInt64(Index), - Builder.getInt32(0)}; + Builder.getInt32(0), + Builder.getInt64(PseudoProbeFullDistributionFactor)}; auto *Probe = Builder.CreateCall(ProbeFn, Args); AssignDebugLoc(Probe); } @@ -219,7 +332,8 @@ // Levarge the 32-bit discriminator field of debug data to store the ID and // type of a callsite probe. This gets rid of the dependency on plumbing a // customized metadata through the codegen pipeline. - uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData(Index, Type); + uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData( + Index, Type, 0, PseudoProbeDwarfDiscriminator::FullDistributionFactor); if (auto DIL = Call->getDebugLoc()) { DIL = DIL->cloneWithDiscriminator(V); Call->setDebugLoc(DIL); @@ -274,3 +388,47 @@ return PreservedAnalyses::none(); } + +void PseudoProbeUpdatePass::runOnFunction(Function &F, + FunctionAnalysisManager &FAM) { + BlockFrequencyInfo &BFI = FAM.getResult(F); + auto BBProfileCount = [&BFI](BasicBlock *BB) { + return BFI.getBlockProfileCount(BB) + ? BFI.getBlockProfileCount(BB).getValue() + : 0; + }; + + // Collect the sum of execution weight for each probe. + ProbeFactorMap ProbeFactors; + for (auto &Block : F) { + for (auto &I : Block) { + if (Optional Probe = extractProbe(I)) + ProbeFactors[Probe->Id] += BBProfileCount(&Block); + } + } + + // Fix up over-counted probes. + for (auto &Block : F) { + for (auto &I : Block) { + if (Optional Probe = extractProbe(I)) { + float Sum = ProbeFactors[Probe->Id]; + if (Sum != 0) + setProbeDistributionFactor(I, BBProfileCount(&Block) / Sum); + } + } + } +} + +PreservedAnalyses PseudoProbeUpdatePass::run(Module &M, + ModuleAnalysisManager &AM) { + if (UpdatePseudoProbe) { + for (auto &F : M) { + if (F.isDeclaration()) + continue; + FunctionAnalysisManager &FAM = + AM.getResult(M).getManager(); + runOnFunction(F, FAM); + } + } + return PreservedAnalyses::none(); +} diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof @@ -0,0 +1,8 @@ +foo:3200:13 + 1: 13 + 2: 7 + 3: 6 + 4: 13 + 5: 7 + 6: 6 + !CFGChecksum: 844530426352218 diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-emit-inline.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-emit-inline.ll --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-emit-inline.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-emit-inline.ll @@ -11,14 +11,14 @@ ; RUN: llvm-objdump --section-headers %t4 | FileCheck %s --check-prefix=CHECK-OBJ define dso_local void @foo2() !dbg !7 { -; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID1:]], i64 1, i32 0), !dbg ![[#]] +; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID1:]], i64 1, i32 0, i64 -1), !dbg ![[#]] ; CHECK-ASM: .pseudoprobe [[#GUID1:]] 1 0 0 ret void, !dbg !10 } define dso_local void @foo() #0 !dbg !11 { -; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0), !dbg ![[#]] -; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID1]], i64 1, i32 0), !dbg ![[#DL1:]] +; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0, i64 -1), !dbg ![[#]] +; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID1]], i64 1, i32 0, i64 -1), !dbg ![[#DL1:]] ; CHECK-ASM: .pseudoprobe [[#GUID2:]] 1 0 0 ; CHECK-ASM: .pseudoprobe [[#GUID1]] 1 0 0 @ [[#GUID2]]:2 call void @foo2(), !dbg !12 @@ -26,9 +26,9 @@ } define dso_local i32 @entry() !dbg !14 { -; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID3:]], i64 1, i32 0), !dbg ![[#]] -; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 1, i32 0), !dbg ![[#DL2:]] -; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID1]], i64 1, i32 0), !dbg ![[#DL3:]] +; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID3:]], i64 1, i32 0, i64 -1), !dbg ![[#]] +; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 1, i32 0, i64 -1), !dbg ![[#DL2:]] +; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID1]], i64 1, i32 0, i64 -1), !dbg ![[#DL3:]] ; CHECK-ASM: .pseudoprobe [[#GUID3:]] 1 0 0 ; CHECK-ASM: .pseudoprobe [[#GUID2]] 1 0 0 @ [[#GUID3]]:2 ; CHECK-ASM: .pseudoprobe [[#GUID1]] 1 0 0 @ [[#GUID3]]:2 @ [[#GUID2]]:2 @@ -41,13 +41,13 @@ ; CHECK-IL: ![[#SCOPE2:]] = distinct !DISubprogram(name: "foo" ; CHECK-IL: ![[#DL1]] = !DILocation(line: 3, column: 1, scope: ![[#SCOPE1]], inlinedAt: ![[#INL1:]]) ; CHECK-IL: ![[#INL1]] = distinct !DILocation(line: 7, column: 3, scope: ![[#BL1:]]) -;; A discriminator of 134217751 which is 0x8000017 in hexdecimal, stands for a direct call probe -;; with an index of 2. -; CHECK-IL: ![[#BL1]] = !DILexicalBlockFile(scope: ![[#SCOPE2]], file: !1, discriminator: 134217751) +;; A discriminator of 186646551 which is 0xb200017 in hexdecimal, stands for a direct call probe +;; with an index of 2 and a scale of 100%. +; CHECK-IL: ![[#BL1]] = !DILexicalBlockFile(scope: ![[#SCOPE2]], file: !1, discriminator: 186646551) ; CHECK-IL: ![[#SCOPE3:]] = distinct !DISubprogram(name: "entry" ; CHECK-IL: ![[#DL2]] = !DILocation(line: 7, column: 3, scope: ![[#SCOPE2]], inlinedAt: ![[#INL2:]]) ; CHECK-IL: ![[#INL2]] = distinct !DILocation(line: 11, column: 3, scope: ![[#BL2:]]) -; CHECK-IL: ![[#BL2]] = !DILexicalBlockFile(scope: ![[#SCOPE3]], file: !1, discriminator: 134217751) +; CHECK-IL: ![[#BL2]] = !DILexicalBlockFile(scope: ![[#SCOPE3]], file: !1, discriminator: 186646551) ; CHECK-IL: ![[#DL3]] = !DILocation(line: 3, column: 1, scope: ![[#SCOPE1]], inlinedAt: ![[#INL3:]]) ; CHECK-IL: ![[#INL3]] = distinct !DILocation(line: 7, column: 3, scope: ![[#BL1]], inlinedAt: ![[#INL2]]) diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll @@ -11,32 +11,36 @@ ;; Check the generation of pseudoprobe intrinsic call. +@a = dso_local global i32 0, align 4 + define void @foo(i32 %x) !dbg !3 { bb0: %cmp = icmp eq i32 %x, 0 -; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0), !dbg ![[#FAKELINE:]] +; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1), !dbg ![[#FAKELINE:]] ; CHECK-MIR: PSEUDO_PROBE [[#GUID:]], 1, 0, 0 ; CHECK-ASM: .pseudoprobe [[#GUID:]] 1 0 0 br i1 %cmp, label %bb1, label %bb2 bb1: -; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0), !dbg ![[#FAKELINE]] +; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1), !dbg ![[#FAKELINE]] ; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 3, 0, 0 ; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 4, 0, 0 ; CHECK-ASM: .pseudoprobe [[#GUID]] 3 0 0 ; CHECK-ASM: .pseudoprobe [[#GUID]] 4 0 0 + store i32 6, i32* @a, align 4 br label %bb3 bb2: -; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0), !dbg ![[#FAKELINE]] +; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1), !dbg ![[#FAKELINE]] ; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 2, 0, 0 ; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 4, 0, 0 ; CHECK-ASM: .pseudoprobe [[#GUID]] 2 0 0 ; CHECK-ASM: .pseudoprobe [[#GUID]] 4 0 0 + store i32 8, i32* @a, align 4 br label %bb3 bb3: -; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0), !dbg ![[#REALLINE:]] +; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0, i64 -1), !dbg ![[#REALLINE:]] ret void, !dbg !12 } @@ -44,7 +48,7 @@ define internal void @foo2(void (i32)* %f) !dbg !4 { entry: -; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0) +; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0, i64 -1) ; CHECK-MIR: PSEUDO_PROBE [[#GUID2:]], 1, 0, 0 ; CHECK-ASM: .pseudoprobe [[#GUID2:]] 1 0 0 ; Check pseudo_probe metadata attached to the indirect call instruction. @@ -64,13 +68,13 @@ ; CHECK-IL: ![[#FAKELINE]] = !DILocation(line: 0, scope: ![[#FOO]]) ; CHECK-IL: ![[#REALLINE]] = !DILocation(line: 2, scope: ![[#FOO]]) ; CHECK-IL: ![[#PROBE0]] = !DILocation(line: 2, column: 20, scope: ![[#SCOPE0:]]) -;; A discriminator of 67108887 which is 0x4000017 in hexdecimal, stands for a direct call probe +;; A discriminator of 67108887 which is 0x7200017 in hexdecimal, stands for a direct call probe ;; with an index of 2. -; CHECK-IL: ![[#SCOPE0]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 67108887) +; CHECK-IL: ![[#SCOPE0]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537687) ; CHECK-IL: ![[#PROBE1]] = !DILocation(line: 0, scope: ![[#SCOPE1:]]) -;; A discriminator of 134217759 which is 0x800001f in hexdecimal, stands for a direct call probe +;; A discriminator of 186646559 which is 0xb20001f in hexdecimal, stands for a direct call probe ;; with an index of 3. -; CHECK-IL: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 134217759) +; CHECK-IL: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 186646559) ; Check the generation of .pseudo_probe_desc section ; CHECK-ASM: .section .pseudo_probe_desc,"G",@progbits,.pseudo_probe_desc_foo,comdat diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll @@ -12,18 +12,18 @@ define dso_local i32 @foo(i32 %x) #0 !dbg !12 { entry: -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID1:]], i64 1, i32 0) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID1:]], i64 1, i32 0, i64 -1) %add = add nsw i32 %x, 100000, !dbg !19 ;; Check zen is fully inlined so there's no call to zen anymore. ;; Check code from the inlining of zen is properly annotated here. -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0, i64 -1) ; CHECK: br i1 %cmp.i, label %while.cond.i, label %while.cond2.i, !dbg ![[#]], !prof ![[PD1:[0-9]+]] -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 2, i32 0) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 2, i32 0, i64 -1) ; CHECK: br i1 %cmp1.i, label %while.body.i, label %zen.exit, !dbg ![[#]], !prof ![[PD2:[0-9]+]] -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 3, i32 0) -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 4, i32 0) -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 5, i32 0) -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 6, i32 0) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 3, i32 0, i64 -1) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 4, i32 0, i64 -1) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 5, i32 0, i64 -1) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 6, i32 0, i64 -1) ; CHECK-NOT: call i32 @zen %call = call i32 @zen(i32 %add), !dbg !20 ret i32 %call, !dbg !21 @@ -32,36 +32,36 @@ ; CHECK: define dso_local i32 @zen define dso_local i32 @zen(i32 %x) #0 !dbg !22 { entry: -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 1, i32 0) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 1, i32 0, i64 -1) %cmp = icmp sgt i32 %x, 0, !dbg !26 br i1 %cmp, label %while.cond, label %while.cond2, !dbg !28 while.cond: -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 2, i32 0) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 2, i32 0, i64 -1) %x.addr.0 = phi i32 [ %x, %entry ], [ %sub, %while.body ] %cmp1 = icmp sgt i32 %x.addr.0, 0, !dbg !29 br i1 %cmp1, label %while.body, label %if.end, !dbg !31 while.body: -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 3, i32 0) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 3, i32 0, i64 -1) %0 = load volatile i32, i32* @factor, align 4, !dbg !32 %sub = sub nsw i32 %x.addr.0, %0, !dbg !39 br label %while.cond, !dbg !31 while.cond2: -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 4, i32 0) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 4, i32 0, i64 -1) %x.addr.1 = phi i32 [ %x, %entry ], [ %add, %while.body4 ] %cmp3 = icmp slt i32 %x.addr.1, 0, !dbg !42 br i1 %cmp3, label %while.body4, label %if.end, !dbg !44 while.body4: -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 5, i32 0) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 5, i32 0, i64 -1) %1 = load volatile i32, i32* @factor, align 4, !dbg !45 %add = add nsw i32 %x.addr.1, %1, !dbg !48 br label %while.cond2, !dbg !44 if.end: -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 6, i32 0) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 6, i32 0, i64 -1) %x.addr.2 = phi i32 [ %x.addr.0, %while.cond ], [ %x.addr.1, %while.cond2 ] ret i32 %x.addr.2, !dbg !51 } @@ -109,6 +109,10 @@ ;YAML-NEXT: - NumSamples: '23' ;YAML-NEXT: - String: ' samples from profile (ProbeId=' ;YAML-NEXT: - ProbeId: '1' +;YAML-NEXT: - String: ', Factor=' +;YAML-NEXT: - Factor: '1.000000e+00' +;YAML-NEXT: - String: ', OriginalSamples=' +;YAML-NEXT: - OriginalSamples: '23' ;YAML-NEXT: - String: ')' ;YAML-NEXT: ... ;YAML: --- !Analysis @@ -121,6 +125,10 @@ ;YAML-NEXT: - NumSamples: '23' ;YAML-NEXT: - String: ' samples from profile (ProbeId=' ;YAML-NEXT: - ProbeId: '1' +;YAML-NEXT: - String: ', Factor=' +;YAML-NEXT: - Factor: '1.000000e+00' +;YAML-NEXT: - String: ', OriginalSamples=' +;YAML-NEXT: - OriginalSamples: '23' ;YAML-NEXT: - String: ')' ;YAML-NEXT: ... ;YAML: --- !Analysis @@ -133,6 +141,10 @@ ;YAML-NEXT: - NumSamples: '382920' ;YAML-NEXT: - String: ' samples from profile (ProbeId=' ;YAML-NEXT: - ProbeId: '2' +;YAML-NEXT: - String: ', Factor=' +;YAML-NEXT: - Factor: '1.000000e+00' +;YAML-NEXT: - String: ', OriginalSamples=' +;YAML-NEXT: - OriginalSamples: '382920' ;YAML-NEXT: - String: ')' ;YAML-NEXT: ... diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll @@ -8,26 +8,26 @@ store i32 %x, i32* %x.addr, align 4 %0 = load i32, i32* %x.addr, align 4 %cmp = icmp eq i32 %0, 0 - ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0) + ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1) br i1 %cmp, label %if.then, label %if.else ; CHECK: br i1 %cmp, label %if.then, label %if.else, !prof ![[PD1:[0-9]+]] if.then: ; CHECK: call {{.*}}, !dbg ![[#PROBE1:]], !prof ![[PROF1:[0-9]+]] call void %f(i32 1) - ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0) + ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1) store i32 1, i32* %retval, align 4 br label %return if.else: ; CHECK: call {{.*}}, !dbg ![[#PROBE2:]], !prof ![[PROF2:[0-9]+]] call void %f(i32 2) - ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0) + ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1) store i32 2, i32* %retval, align 4 br label %return return: - ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0) + ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) %1 = load i32, i32* %retval, align 4 ret i32 %1 } @@ -36,14 +36,14 @@ ; CHECK: ![[PD1]] = !{!"branch_weights", i32 8, i32 7} ; CHECK: ![[#PROBE1]] = !DILocation(line: 0, scope: ![[#SCOPE1:]]) -;; A discriminator of 119537711 which is 0x400002f in hexdecimal, stands for an indirect call probe +;; A discriminator of 119537711 which is 0x720002f in hexdecimal, stands for an indirect call probe ;; with an index of 5. -; CHECK: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 67108911) +; CHECK: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537711) ; CHECK: ![[PROF1]] = !{!"VP", i32 0, i64 7, i64 9191153033785521275, i64 5, i64 -1069303473483922844, i64 2} -; CHECK: ![[#PROBE2]] = !DILocation(line: 0, scope: ![[#SCOPE2:]]) -;; A discriminator of 119537719 which is 0x4000037 in hexdecimal, stands for an indirect call probe +;; A discriminator of 119537719 which is 0x7200037 in hexdecimal, stands for an indirect call probe ;; with an index of 6. -; CHECK: ![[#SCOPE2]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 67108919) +; CHECK: ![[#PROBE2]] = !DILocation(line: 0, scope: ![[#SCOPE2:]]) +; CHECK: ![[#SCOPE2]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537719) ; CHECK: ![[PROF2]] = !{!"VP", i32 0, i64 6, i64 -1069303473483922844, i64 4, i64 9191153033785521275, i64 2} !llvm.module.flags = !{!9, !10} @@ -69,6 +69,10 @@ ;YAML-NEXT: - NumSamples: '13' ;YAML-NEXT: - String: ' samples from profile (ProbeId=' ;YAML-NEXT: - ProbeId: '1' +;YAML-NEXT: - String: ', Factor=' +;YAML-NEXT: - Factor: '1.000000e+00' +;YAML-NEXT: - String: ', OriginalSamples=' +;YAML-NEXT: - OriginalSamples: '13' ;YAML-NEXT: - String: ')' ;YAML: --- !Analysis ;YAML-NEXT: Pass: sample-profile @@ -80,6 +84,10 @@ ;YAML-NEXT: - NumSamples: '7' ;YAML-NEXT: - String: ' samples from profile (ProbeId=' ;YAML-NEXT: - ProbeId: '5' +;YAML-NEXT: - String: ', Factor=' +;YAML-NEXT: - Factor: '1.000000e+00' +;YAML-NEXT: - String: ', OriginalSamples=' +;YAML-NEXT: - OriginalSamples: '7' ;YAML-NEXT: - String: ')' ;YAML: --- !Analysis ;YAML-NEXT: Pass: sample-profile @@ -91,6 +99,10 @@ ;YAML-NEXT: - NumSamples: '7' ;YAML-NEXT: - String: ' samples from profile (ProbeId=' ;YAML-NEXT: - ProbeId: '2' +;YAML-NEXT: - String: ', Factor=' +;YAML-NEXT: - Factor: '1.000000e+00' +;YAML-NEXT: - String: ', OriginalSamples=' +;YAML-NEXT: - OriginalSamples: '7' ;YAML-NEXT: - String: ')' ;YAML: --- !Analysis ;YAML-NEXT: Pass: sample-profile @@ -102,6 +114,10 @@ ;YAML-NEXT: - NumSamples: '6' ;YAML-NEXT: - String: ' samples from profile (ProbeId=' ;YAML-NEXT: - ProbeId: '6' +;YAML-NEXT: - String: ', Factor=' +;YAML-NEXT: - Factor: '1.000000e+00' +;YAML-NEXT: - String: ', OriginalSamples=' +;YAML-NEXT: - OriginalSamples: '6' ;YAML-NEXT: - String: ')' ;YAML: --- !Analysis ;YAML-NEXT: Pass: sample-profile @@ -113,6 +129,10 @@ ;YAML-NEXT: - NumSamples: '6' ;YAML-NEXT: - String: ' samples from profile (ProbeId=' ;YAML-NEXT: - ProbeId: '3' +;YAML-NEXT: - String: ', Factor=' +;YAML-NEXT: - Factor: '1.000000e+00' +;YAML-NEXT: - String: ', OriginalSamples=' +;YAML-NEXT: - OriginalSamples: '6' ;YAML-NEXT: - String: ')' ;YAML: --- !Analysis ;YAML-NEXT: Pass: sample-profile @@ -124,4 +144,8 @@ ;YAML-NEXT: - NumSamples: '13' ;YAML-NEXT: - String: ' samples from profile (ProbeId=' ;YAML-NEXT: - ProbeId: '4' +;YAML-NEXT: - String: ', Factor=' +;YAML-NEXT: - Factor: '1.000000e+00' +;YAML-NEXT: - String: ', OriginalSamples=' +;YAML-NEXT: - OriginalSamples: '13' ;YAML-NEXT: - String: ')' diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll @@ -0,0 +1,45 @@ +; RUN: opt < %s -passes='pseudo-probe,sample-profile,jump-threading,pseudo-probe-update' -sample-profile-file=%S/Inputs/pseudo-probe-update.prof -S | FileCheck %s + +declare i32 @f1() +declare i32 @f2() +declare void @f3() + + +;; This tests that the branch in 'merge' can be cloned up into T1. +define i32 @foo(i1 %cond, i1 %cond2) #0 { +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1) + br i1 %cond, label %T1, label %F1 +T1: +; CHECK: %v1 = call i32 @f1(), !prof ![[#PROF1:]] + %v1 = call i32 @f1() +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1) +;; The distribution factor -8513881372706734080 stands for 53.85%, whic is from 7/6+7. +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -8513881372706734080) + %cond3 = icmp eq i32 %v1, 412 + br label %Merge +F1: +; CHECK: %v2 = call i32 @f2(), !prof ![[#PROF2:]] + %v2 = call i32 @f2() +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1) +;; The distribution factor 8513881922462547968 stands for 46.25%, which is from 6/6+7. +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 8513881922462547968) + br label %Merge +Merge: + + %A = phi i1 [%cond3, %T1], [%cond2, %F1] + %B = phi i32 [%v1, %T1], [%v2, %F1] + br i1 %A, label %T2, label %F2 +T2: +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 5, i32 0, i64 -1) + call void @f3() + ret i32 %B +F2: +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 -1) + ret i32 %B +} + +; CHECK: ![[#PROF1]] = !{!"branch_weights", i32 7} +; CHECK: ![[#PROF2]] = !{!"branch_weights", i32 6} + +attributes #0 = {"use-sample-profile"} + diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll @@ -0,0 +1,77 @@ +; REQUIRES: x86_64-linux +; RUN: opt < %s -passes='pseudo-probe,loop-unroll-full' -verify-pseudo-probe -S -o %t 2>&1 | FileCheck %s --check-prefix=VERIFY +; RUN: FileCheck %s < %t + +; VERIFY: *** Pseudo Probe Verification After LoopFullUnrollPass *** +; VERIFY: Function foo: +; VERIFY-DAG: Probe 6 previous factor 1.00 current factor 5.00 +; VERIFY-DAG: Probe 4 previous factor 1.00 current factor 5.00 + +declare void @foo2() nounwind + +define void @foo(i32 %x) { +bb: +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1) + %tmp = alloca [5 x i32*], align 16 + br label %bb7.preheader + +bb3.loopexit: + %spec.select.lcssa = phi i32 [ %spec.select, %bb10 ] + %tmp5.not = icmp eq i32 %spec.select.lcssa, 0 + br i1 %tmp5.not, label %bb24, label %bb7.preheader + +bb7.preheader: +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1) + %tmp1.06 = phi i32 [ 5, %bb ], [ %spec.select.lcssa, %bb3.loopexit ] + br label %bb10 + +bb10: +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) +; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) +; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) +; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) +; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) +; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1) + %indvars.iv = phi i64 [ 0, %bb7.preheader ], [ %indvars.iv.next, %bb10 ] + %tmp1.14 = phi i32 [ %tmp1.06, %bb7.preheader ], [ %spec.select, %bb10 ] + %tmp13 = getelementptr inbounds [5 x i32*], [5 x i32*]* %tmp, i64 0, i64 %indvars.iv + %tmp14 = load i32*, i32** %tmp13, align 8 + %tmp15.not = icmp ne i32* %tmp14, null + %tmp18 = sext i1 %tmp15.not to i32 + %spec.select = add nsw i32 %tmp1.14, %tmp18 + call void @foo2(), !dbg !12 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, 5 + br i1 %exitcond.not, label %bb3.loopexit, label %bb10, !llvm.loop !13 + +bb24: +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 5, i32 0, i64 -1) + ret void +} + +;; A discriminator of 186646583 which is 0xb200037 in hexdecimal, stands for a direct call probe +;; with an index of 6 and a scale of -1%. +; CHECK: ![[#PROBE6]] = !DILocation(line: 2, column: 20, scope: ![[#SCOPE:]]) +; CHECK: ![[#SCOPE]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 186646583) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!9, !10} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2) +!1 = !DIFile(filename: "test.c", directory: "") +!2 = !{} +!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: false, unit: !0, retainedNodes: !2) +!5 = !DISubroutineType(types: !6) +!6 = !{!7} +!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) +!9 = !{i32 2, !"Dwarf Version", i32 4} +!10 = !{i32 2, !"Debug Info Version", i32 3} +!11 = !{!"clang version 3.9.0"} +!12 = !DILocation(line: 2, column: 20, scope: !4) +!13 = distinct !{!13, !14} +!14 = !{!"llvm.loop.unroll.full"}