diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1261,6 +1261,8 @@ "", PGOOptions::NoAction, PGOOptions::CSIRInstr, CodeGenOpts.DebugInfoForProfiling); } + if (TM) + TM->setPGOOption(PGOOpt); PipelineTuningOptions PTO; PTO.LoopUnrolling = CodeGenOpts.UnrollLoops; diff --git a/llvm/include/llvm/CodeGen/MIRSampleProfile.h b/llvm/include/llvm/CodeGen/MIRSampleProfile.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/CodeGen/MIRSampleProfile.h @@ -0,0 +1,81 @@ +//===----- MIRSampleProfile.h: SampleFDO Support in MIR ---*- c++ -*-------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the supoorting functions for machine level Sample FDO +// loader. This is used in Flow Sensitive SampelFDO. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_MIRSAMPLEPROFILE_H +#define LLVM_CODEGEN_MIRSAMPLEPROFILE_H + +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" +#include "llvm/CodeGen/MachinePostDominators.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" +#include "llvm/ProfileData/InstrProf.h" +#include "llvm/ProfileData/SampleProf.h" +#include "llvm/ProfileData/SampleProfReader.h" + +#include + +namespace llvm { + +using namespace sampleprof; + +class MIRProfileLoader; +class MIRProfileLoaderPass : public MachineFunctionPass { + MachineFunction *MF; + std::string ProfileFileName; + FSDiscriminatorPass P; + unsigned LowBit; + unsigned HighBit; + +public: + static char ID; + /// FS bits will only use the '1' bits in the Mask. + MIRProfileLoaderPass(std::string FileName = "", + std::string RemappingFileName = "", + FSDiscriminatorPass P = FSDiscriminatorPass::Pass1) + : MachineFunctionPass(ID), ProfileFileName(FileName), P(P), + MIRSampleLoader( + std::make_unique(FileName, RemappingFileName)) { + LowBit = getFSPassBitBegin(P); + HighBit = getFSPassBitEnd(P); + assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit"); + } + + /// getMachineFunction - Return the last machine function computed. + const MachineFunction *getMachineFunction() const { return MF; } + +private: + void init(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &) override; + bool doInitialization(Module &M) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + + std::unique_ptr MIRSampleLoader; + /// Hold the information of the basic block frequency. + MachineBlockFrequencyInfo *MBFI; +}; + +} // namespace llvm + +#endif // LLVM_CODEGEN_MIRSAMPLEPROFILE_H diff --git a/llvm/include/llvm/CodeGen/MachineDominators.h b/llvm/include/llvm/CodeGen/MachineDominators.h --- a/llvm/include/llvm/CodeGen/MachineDominators.h +++ b/llvm/include/llvm/CodeGen/MachineDominators.h @@ -112,6 +112,12 @@ return DT->dominates(A, B); } + void getDescendants(MachineBasicBlock *A, + SmallVectorImpl &Result) { + applySplitCriticalEdges(); + DT->getDescendants(A, Result); + } + bool dominates(const MachineBasicBlock *A, const MachineBasicBlock *B) const { applySplitCriticalEdges(); return DT->dominates(A, B); diff --git a/llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h b/llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h --- a/llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h +++ b/llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h @@ -118,6 +118,12 @@ : DiagnosticInfoMIROptimization(DK_MachineOptimizationRemarkAnalysis, PassName, RemarkName, Loc, MBB) {} + MachineOptimizationRemarkAnalysis(const char *PassName, StringRef RemarkName, + const MachineInstr *MI) + : DiagnosticInfoMIROptimization(DK_MachineOptimizationRemarkAnalysis, + PassName, RemarkName, MI->getDebugLoc(), + MI->getParent()) {} + static bool classof(const DiagnosticInfo *DI) { return DI->getKind() == DK_MachineOptimizationRemarkAnalysis; } diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -171,6 +171,9 @@ /// This pass adds flow sensitive discriminators. extern char &MIRAddFSDiscriminatorsID; + /// This pass reads flow sensitive profile. + extern char &MIRProfileLoaderPassID; + /// FastRegisterAllocation Pass - This pass register allocates as fast as /// possible. It is best suited for debug code where live ranges are short. /// @@ -513,6 +516,11 @@ FunctionPass * createMIRAddFSDiscriminatorsPass(sampleprof::FSDiscriminatorPass P); + /// Read Flow Sensitive Profile. + FunctionPass *createMIRProfileLoaderPass(std::string File, + std::string RemappingFile, + sampleprof::FSDiscriminatorPass P); + /// Creates MIR Debugify pass. \see MachineDebugify.cpp ModulePass *createDebugifyMachineModulePass(); diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h --- a/llvm/include/llvm/IR/DebugInfoMetadata.h +++ b/llvm/include/llvm/IR/DebugInfoMetadata.h @@ -2212,7 +2212,8 @@ return getCopyIdentifierFromDiscriminator(getDiscriminator()); } -Optional DILocation::cloneWithBaseDiscriminator(unsigned D) const { +Optional +DILocation::cloneWithBaseDiscriminator(unsigned D) const { unsigned BD, DF, CI; if (EnableFSDiscriminator) { @@ -2230,7 +2231,8 @@ return None; } -Optional DILocation::cloneByMultiplyingDuplicationFactor(unsigned DF) const { +Optional +DILocation::cloneByMultiplyingDuplicationFactor(unsigned DF) const { assert(!EnableFSDiscriminator && "FSDiscriminator should not call this."); DF *= getDuplicationFactor(); diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -64,6 +64,7 @@ void initializeAAResultsWrapperPassPass(PassRegistry&); void initializeADCELegacyPassPass(PassRegistry&); void initializeAddDiscriminatorsLegacyPassPass(PassRegistry&); +void initializeAddFSDiscriminatorsPass(PassRegistry &); void initializeModuleAddressSanitizerLegacyPassPass(PassRegistry &); void initializeASanGlobalsMetadataWrapperPassPass(PassRegistry &); void initializeAddressSanitizerLegacyPassPass(PassRegistry &); @@ -183,6 +184,7 @@ void initializeGlobalsAAWrapperPassPass(PassRegistry&); void initializeGuardWideningLegacyPassPass(PassRegistry&); void initializeHardwareLoopsPass(PassRegistry&); +void initializeMIRProfileLoaderPassPass(PassRegistry &); void initializeMemProfilerLegacyPassPass(PassRegistry &); void initializeHotColdSplittingLegacyPassPass(PassRegistry&); void initializeHWAddressSanitizerLegacyPassPass(PassRegistry &); diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -20,6 +20,7 @@ #include "llvm/IR/PassManager.h" #include "llvm/Passes/OptimizationLevel.h" #include "llvm/Support/Error.h" +#include "llvm/Support/PGOOptions.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/IPO/Inliner.h" #include "llvm/Transforms/Instrumentation.h" @@ -32,49 +33,6 @@ class TargetMachine; class ModuleSummaryIndex; -/// A struct capturing PGO tunables. -struct PGOOptions { - enum PGOAction { NoAction, IRInstr, IRUse, SampleUse }; - enum CSPGOAction { NoCSAction, CSIRInstr, CSIRUse }; - PGOOptions(std::string ProfileFile = "", std::string CSProfileGenFile = "", - std::string ProfileRemappingFile = "", PGOAction Action = NoAction, - CSPGOAction CSAction = NoCSAction, - bool DebugInfoForProfiling = false, - bool PseudoProbeForProfiling = false) - : ProfileFile(ProfileFile), CSProfileGenFile(CSProfileGenFile), - ProfileRemappingFile(ProfileRemappingFile), Action(Action), - CSAction(CSAction), DebugInfoForProfiling(DebugInfoForProfiling || - (Action == SampleUse && - !PseudoProbeForProfiling)), - PseudoProbeForProfiling(PseudoProbeForProfiling) { - // Note, we do allow ProfileFile.empty() for Action=IRUse LTO can - // callback with IRUse action without ProfileFile. - - // If there is a CSAction, PGOAction cannot be IRInstr or SampleUse. - assert(this->CSAction == NoCSAction || - (this->Action != IRInstr && this->Action != SampleUse)); - - // For CSIRInstr, CSProfileGenFile also needs to be nonempty. - assert(this->CSAction != CSIRInstr || !this->CSProfileGenFile.empty()); - - // If CSAction is CSIRUse, PGOAction needs to be IRUse as they share - // a profile. - assert(this->CSAction != CSIRUse || this->Action == IRUse); - - // If neither Action nor CSAction, DebugInfoForProfiling or - // PseudoProbeForProfiling needs to be true. - assert(this->Action != NoAction || this->CSAction != NoCSAction || - this->DebugInfoForProfiling || this->PseudoProbeForProfiling); - } - std::string ProfileFile; - std::string CSProfileGenFile; - std::string ProfileRemappingFile; - PGOAction Action; - CSPGOAction CSAction; - bool DebugInfoForProfiling; - bool PseudoProbeForProfiling; -}; - /// Tunable parameters for passes in the default pipelines. class PipelineTuningOptions { public: diff --git a/llvm/include/llvm/Support/PGOOptions.h b/llvm/include/llvm/Support/PGOOptions.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Support/PGOOptions.h @@ -0,0 +1,65 @@ +//===------ PGOOptions.h -- PGO option tunables ----------------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// Define option tunables for PGO. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_PGOOPTIONS_H +#define LLVM_SUPPORT_PGOOPTIONS_H + +#include "llvm/Support/Error.h" + +namespace llvm { + +/// A struct capturing PGO tunables. +struct PGOOptions { + enum PGOAction { NoAction, IRInstr, IRUse, SampleUse }; + enum CSPGOAction { NoCSAction, CSIRInstr, CSIRUse }; + PGOOptions(std::string ProfileFile = "", std::string CSProfileGenFile = "", + std::string ProfileRemappingFile = "", PGOAction Action = NoAction, + CSPGOAction CSAction = NoCSAction, + bool DebugInfoForProfiling = false, + bool PseudoProbeForProfiling = false) + : ProfileFile(ProfileFile), CSProfileGenFile(CSProfileGenFile), + ProfileRemappingFile(ProfileRemappingFile), Action(Action), + CSAction(CSAction), DebugInfoForProfiling(DebugInfoForProfiling || + (Action == SampleUse && + !PseudoProbeForProfiling)), + PseudoProbeForProfiling(PseudoProbeForProfiling) { + // Note, we do allow ProfileFile.empty() for Action=IRUse LTO can + // callback with IRUse action without ProfileFile. + + // If there is a CSAction, PGOAction cannot be IRInstr or SampleUse. + assert(this->CSAction == NoCSAction || + (this->Action != IRInstr && this->Action != SampleUse)); + + // For CSIRInstr, CSProfileGenFile also needs to be nonempty. + assert(this->CSAction != CSIRInstr || !this->CSProfileGenFile.empty()); + + // If CSAction is CSIRUse, PGOAction needs to be IRUse as they share + // a profile. + assert(this->CSAction != CSIRUse || this->Action == IRUse); + + // If neither Action nor CSAction, DebugInfoForProfiling or + // PseudoProbeForProfiling needs to be true. + assert(this->Action != NoAction || this->CSAction != NoCSAction || + this->DebugInfoForProfiling || this->PseudoProbeForProfiling); + } + std::string ProfileFile; + std::string CSProfileGenFile; + std::string ProfileRemappingFile; + PGOAction Action; + CSPGOAction CSAction; + bool DebugInfoForProfiling; + bool PseudoProbeForProfiling; +}; +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h --- a/llvm/include/llvm/Target/TargetMachine.h +++ b/llvm/include/llvm/Target/TargetMachine.h @@ -13,6 +13,7 @@ #ifndef LLVM_TARGET_TARGETMACHINE_H #define LLVM_TARGET_TARGETMACHINE_H +#include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/IR/DataLayout.h" @@ -20,6 +21,7 @@ #include "llvm/Pass.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/Error.h" +#include "llvm/Support/PGOOptions.h" #include "llvm/Target/CGPassBuilderOption.h" #include "llvm/Target/TargetOptions.h" #include @@ -110,6 +112,9 @@ unsigned RequireStructuredCFG : 1; unsigned O0WantsFastISel : 1; + // PGO related tunables. + Optional PGOOption = None; + public: const TargetOptions DefaultOptions; mutable TargetOptions Options; @@ -303,6 +308,9 @@ return false; } + void setPGOOption(Optional PGOOpt) { PGOOption = PGOOpt; } + const Optional &getPGOOption() const { return PGOOption; } + /// If the specified generic pointer could be assumed as a pointer to a /// specific address space, return that address space. /// diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h --- a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h +++ b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h @@ -56,15 +56,20 @@ using FunctionT = Function; using BlockFrequencyInfoT = BlockFrequencyInfo; using LoopT = Loop; - using LoopInfoT = LoopInfo; + using LoopInfoPtrT = std::unique_ptr; + using DominatorTreePtrT = std::unique_ptr; + using PostDominatorTreeT = PostDominatorTree; + using PostDominatorTreePtrT = std::unique_ptr; using OptRemarkEmitterT = OptimizationRemarkEmitter; using OptRemarkAnalysisT = OptimizationRemarkAnalysis; - using DominatorTreeT = DominatorTree; - using PostDominatorTreeT = PostDominatorTree; + using PredRangeT = pred_range; + using SuccRangeT = succ_range; static Function &getFunction(Function &F) { return F; } static const BasicBlock *getEntryBB(const Function *F) { return &F->getEntryBlock(); } + static pred_range getPredecessors(BasicBlock *BB) { return predecessors(BB); } + static succ_range getSuccessors(BasicBlock *BB) { return successors(BB); } }; } // end namespace afdo_detail @@ -76,7 +81,8 @@ template class SampleProfileLoaderBaseImpl { public: - SampleProfileLoaderBaseImpl(std::string Name) : Filename(Name) {} + SampleProfileLoaderBaseImpl(std::string Name, std::string RemapName) + : Filename(Name), RemappingFilename(RemapName) {} void dump() { Reader->dump(); } using InstructionT = typename afdo_detail::IRTraits::InstructionT; @@ -85,14 +91,19 @@ typename afdo_detail::IRTraits::BlockFrequencyInfoT; using FunctionT = typename afdo_detail::IRTraits::FunctionT; using LoopT = typename afdo_detail::IRTraits::LoopT; - using LoopInfoT = typename afdo_detail::IRTraits::LoopInfoT; + using LoopInfoPtrT = typename afdo_detail::IRTraits::LoopInfoPtrT; + using DominatorTreePtrT = + typename afdo_detail::IRTraits::DominatorTreePtrT; + using PostDominatorTreePtrT = + typename afdo_detail::IRTraits::PostDominatorTreePtrT; + using PostDominatorTreeT = + typename afdo_detail::IRTraits::PostDominatorTreeT; using OptRemarkEmitterT = typename afdo_detail::IRTraits::OptRemarkEmitterT; using OptRemarkAnalysisT = typename afdo_detail::IRTraits::OptRemarkAnalysisT; - using DominatorTreeT = typename afdo_detail::IRTraits::DominatorTreeT; - using PostDominatorTreeT = - typename afdo_detail::IRTraits::PostDominatorTreeT; + using PredRangeT = typename afdo_detail::IRTraits::PredRangeT; + using SuccRangeT = typename afdo_detail::IRTraits::SuccRangeT; using BlockWeightMap = DenseMap; using EquivalenceClassMap = @@ -112,6 +123,12 @@ const BasicBlockT *getEntryBB(const FunctionT *F) { return afdo_detail::IRTraits::getEntryBB(F); } + PredRangeT getPredecessors(BasicBlockT *BB) { + return afdo_detail::IRTraits::getPredecessors(BB); + } + SuccRangeT getSuccessors(BasicBlockT *BB) { + return afdo_detail::IRTraits::getSuccessors(BB); + } unsigned getFunctionLoc(FunctionT &Func); virtual ErrorOr getInstWeight(const InstructionT &Inst); @@ -129,12 +146,11 @@ void findEquivalencesFor(BasicBlockT *BB1, ArrayRef Descendants, PostDominatorTreeT *DomTree); - void propagateWeights(FunctionT &F); uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge); void buildEdges(FunctionT &F); bool propagateThroughEdges(FunctionT &F, bool UpdateBlockCount); - void clearFunctionData(); + void clearFunctionData(bool ResetDT = true); void computeDominanceAndLoopInfo(FunctionT &F); bool computeAndPropagateWeights(FunctionT &F, @@ -168,9 +184,9 @@ EquivalenceClassMap EquivalenceClass; /// Dominance, post-dominance and loop information. - std::unique_ptr DT; - std::unique_ptr PDT; - std::unique_ptr LI; + DominatorTreePtrT DT; + PostDominatorTreePtrT PDT; + LoopInfoPtrT LI; /// Predecessors for each basic block in the CFG. BlockEdgeMap Predecessors; @@ -190,6 +206,9 @@ /// Name of the profile file to load. std::string Filename; + /// Name of the profile remapping file to load. + std::string RemappingFilename; + /// Profile Summary Info computed from sample profile. ProfileSummaryInfo *PSI = nullptr; @@ -199,15 +218,17 @@ /// Clear all the per-function data used to load samples and propagate weights. template -void SampleProfileLoaderBaseImpl::clearFunctionData() { +void SampleProfileLoaderBaseImpl::clearFunctionData(bool ResetDT) { BlockWeights.clear(); EdgeWeights.clear(); VisitedBlocks.clear(); VisitedEdges.clear(); EquivalenceClass.clear(); - DT = nullptr; - PDT = nullptr; - LI = nullptr; + if (ResetDT) { + DT = nullptr; + PDT = nullptr; + LI = nullptr; + } Predecessors.clear(); Successors.clear(); CoverageTracker.clear(); @@ -475,7 +496,7 @@ // class by making BB2's equivalence class be BB1. DominatedBBs.clear(); DT->getDescendants(BB1, DominatedBBs); - findEquivalencesFor(BB1, DominatedBBs, PDT.get()); + findEquivalencesFor(BB1, DominatedBBs, &*PDT); LLVM_DEBUG(printBlockEquivalence(dbgs(), BB1)); } @@ -692,7 +713,7 @@ SmallPtrSet Visited; if (!Predecessors[B1].empty()) llvm_unreachable("Found a stale predecessors list in a basic block."); - for (BasicBlockT *B2 : predecessors(B1)) + for (auto *B2 : getPredecessors(B1)) if (Visited.insert(B2).second) Predecessors[B1].push_back(B2); @@ -700,7 +721,7 @@ Visited.clear(); if (!Successors[B1].empty()) llvm_unreachable("Found a stale successors list in a basic block."); - for (BasicBlockT *B2 : successors(B1)) + for (auto *B2 : getSuccessors(B1)) if (Visited.insert(B2).second) Successors[B1].push_back(B2); } @@ -911,12 +932,12 @@ template void SampleProfileLoaderBaseImpl::computeDominanceAndLoopInfo( FunctionT &F) { - DT.reset(new DominatorTreeT); + DT.reset(new DominatorTree); DT->recalculate(F); PDT.reset(new PostDominatorTree(F)); - LI.reset(new LoopInfoT); + LI.reset(new LoopInfo); LI->analyze(*DT); } diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -108,6 +108,7 @@ MachineTraceMetrics.cpp MachineVerifier.cpp MIRFSDiscriminator.cpp + MIRSampleProfile.cpp MIRYamlMapping.cpp ModuloSchedule.cpp MultiHazardRecognizer.cpp diff --git a/llvm/lib/CodeGen/MIRSampleProfile.cpp b/llvm/lib/CodeGen/MIRSampleProfile.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/CodeGen/MIRSampleProfile.cpp @@ -0,0 +1,335 @@ +//===-------- MIRSampleProfile.cpp: MIRSampleFDO (For FSAFDO) -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides the implementation of the MIRSampleProfile loader, mainly +// for flow sensitive SampleFDO. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MIRSampleProfile.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/Analysis/BlockFrequencyInfoImpl.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h" +#include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h" + +using namespace llvm; +using namespace sampleprof; +using namespace llvm::sampleprofutil; +using ProfileCount = Function::ProfileCount; + +#define DEBUG_TYPE "fs-profile-loader" + +static cl::opt ShowFSBranchProb( + "show-fs-branchprob", cl::Hidden, cl::init(false), + cl::desc("Print setting flow sensitive branch probabilities")); +static cl::opt FSProfileDebugProbDiffThreshold( + "fs-profile-debug-prob-diff-threshold", cl::init(10), + cl::desc("Only show debug message if the branch probility is greater than " + "this value (in percentage).")); + +static cl::opt FSProfileDebugBWThreshold( + "fs-profile-debug-bw-threshold", cl::init(10000), + cl::desc("Only show debug message if the source branch weight is greater " + " than this value.")); + +static cl::opt ViewBFIBefore("fs-viewbfi-before", cl::Hidden, + cl::init(false), + cl::desc("View BFI before MIR loader")); +static cl::opt ViewBFIAfter("fs-viewbfi-after", cl::Hidden, + cl::init(false), + cl::desc("View BFI after MIR loader")); + +char MIRProfileLoaderPass::ID = 0; + +INITIALIZE_PASS_BEGIN(MIRProfileLoaderPass, DEBUG_TYPE, + "Load MIR Sample Profile", + /* cfg = */ false, /* is_analysis = */ false) +INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass) +INITIALIZE_PASS_END(MIRProfileLoaderPass, DEBUG_TYPE, "Load MIR Sample Profile", + /* cfg = */ false, /* is_analysis = */ false) + +char &llvm::MIRProfileLoaderPassID = MIRProfileLoaderPass::ID; + +FunctionPass *llvm::createMIRProfileLoaderPass(std::string File, + std::string RemappingFile, + FSDiscriminatorPass P) { + return new MIRProfileLoaderPass(File, RemappingFile, P); +} + +namespace llvm { + +// Internal option used to control BFI display only after MBP pass. +// Defined in CodeGen/MachineBlockFrequencyInfo.cpp: +// -view-block-layout-with-bfi={none | fraction | integer | count} +extern cl::opt ViewBlockLayoutWithBFI; + +// Command line option to specify the name of the function for CFG dump +// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name= +extern cl::opt ViewBlockFreqFuncName; + +namespace afdo_detail { +template <> struct IRTraits { + using InstructionT = MachineInstr; + using BasicBlockT = MachineBasicBlock; + using FunctionT = MachineFunction; + using BlockFrequencyInfoT = MachineBlockFrequencyInfo; + using LoopT = MachineLoop; + using LoopInfoPtrT = MachineLoopInfo *; + using DominatorTreePtrT = MachineDominatorTree *; + using PostDominatorTreePtrT = MachinePostDominatorTree *; + using PostDominatorTreeT = MachinePostDominatorTree; + using OptRemarkEmitterT = MachineOptimizationRemarkEmitter; + using OptRemarkAnalysisT = MachineOptimizationRemarkAnalysis; + using PredRangeT = iterator_range::iterator>; + using SuccRangeT = iterator_range::iterator>; + static Function &getFunction(MachineFunction &F) { return F.getFunction(); } + static const MachineBasicBlock *getEntryBB(const MachineFunction *F) { + return GraphTraits::getEntryNode(F); + } + static PredRangeT getPredecessors(MachineBasicBlock *BB) { + return BB->predecessors(); + } + static SuccRangeT getSuccessors(MachineBasicBlock *BB) { + return BB->successors(); + } +}; +} // namespace afdo_detail + +class MIRProfileLoader final + : public SampleProfileLoaderBaseImpl { +public: + void setInitVals(MachineDominatorTree *MDT, MachinePostDominatorTree *MPDT, + MachineLoopInfo *MLI, MachineBlockFrequencyInfo *MBFI, + MachineOptimizationRemarkEmitter *MORE) { + DT = MDT; + PDT = MPDT; + LI = MLI; + BFI = MBFI; + ORE = MORE; + } + void setFSPass(FSDiscriminatorPass Pass) { + P = Pass; + LowBit = getFSPassBitBegin(P); + HighBit = getFSPassBitEnd(P); + assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit"); + } + + MIRProfileLoader(StringRef Name, StringRef RemapName) + : SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName)) { + } + + void setBranchProbs(MachineFunction &F); + bool runOnFunction(MachineFunction &F); + bool doInitialization(Module &M); + bool isValid() const { return ProfileIsValid; } + +protected: + friend class SampleCoverageTracker; + + /// Hold the information of the basic block frequency. + MachineBlockFrequencyInfo *BFI; + + /// PassNum is the sequence number this pass is called, start from 1. + FSDiscriminatorPass P; + + // LowBit in the FS discriminator used by this instance. Note the number is + // 0-based. Base discrimnator use bit 0 to bit 11. + unsigned LowBit; + // HighwBit in the FS discriminator used by this instance. Note the number + // is 0-based. + unsigned HighBit; + + bool ProfileIsValid = true; +}; + +template <> +void SampleProfileLoaderBaseImpl< + MachineBasicBlock>::computeDominanceAndLoopInfo(MachineFunction &F) {} + +void MIRProfileLoader::setBranchProbs(MachineFunction &F) { + LLVM_DEBUG(dbgs() << "\nPropagation complete. Setting branch probs\n"); + for (auto &BI : F) { + MachineBasicBlock *BB = &BI; + if (BB->succ_size() < 2) + continue; + const MachineBasicBlock *EC = EquivalenceClass[BB]; + uint64_t BBWeight = BlockWeights[EC]; + uint64_t SumEdgeWeight = 0; + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); + SI != SE; ++SI) { + MachineBasicBlock *Succ = *SI; + Edge E = std::make_pair(BB, Succ); + SumEdgeWeight += EdgeWeights[E]; + } + + if (BBWeight != SumEdgeWeight) { + LLVM_DEBUG(dbgs() << "BBweight is not equal to SumEdgeWeight: BBWWeight=" + << BBWeight << " SumEdgeWeight= " << SumEdgeWeight + << "\n"); + BBWeight = SumEdgeWeight; + } + if (BBWeight == 0) { + LLVM_DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n"); + continue; + } + +#ifndef NDEBUG + uint64_t BBWeightOrig = BBWeight; +#endif + uint32_t MaxWeight = std::numeric_limits::max(); + uint32_t Factor = 1; + if (BBWeight > MaxWeight) { + Factor = BBWeight / MaxWeight + 1; + BBWeight /= Factor; + LLVM_DEBUG(dbgs() << "Scaling weights by " << Factor << "\n"); + } + + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); + SI != SE; ++SI) { + MachineBasicBlock *Succ = *SI; + Edge E = std::make_pair(BB, Succ); + uint64_t EdgeWeight = EdgeWeights[E]; + EdgeWeight /= Factor; + + assert(BBWeight >= EdgeWeight && + "BBweight is larger than EdgeWeight -- should not happen.\n"); + + BranchProbability OldProb = BFI->getMBPI()->getEdgeProbability(BB, SI); + BranchProbability NewProb(EdgeWeight, BBWeight); + if (OldProb == NewProb) + continue; + BB->setSuccProbability(SI, NewProb); +#ifndef NDEBUG + if (!ShowFSBranchProb) + continue; + bool Show = false; + BranchProbability Diff; + if (OldProb > NewProb) + Diff = OldProb - NewProb; + else + Diff = NewProb - OldProb; + Show = (Diff >= BranchProbability(FSProfileDebugProbDiffThreshold, 100)); + Show &= (BBWeightOrig >= FSProfileDebugBWThreshold); + + auto DIL = BB->findBranchDebugLoc(); + auto SuccDIL = Succ->findBranchDebugLoc(); + if (Show) { + dbgs() << "Set branch fs prob: MBB (" << BB->getNumber() << " -> " + << Succ->getNumber() << "): "; + if (DIL) + dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":" + << DIL->getColumn(); + if (SuccDIL) + dbgs() << "-->" << SuccDIL->getFilename() << ":" << SuccDIL->getLine() + << ":" << SuccDIL->getColumn(); + dbgs() << " W=" << BBWeightOrig << " " << OldProb << " --> " << NewProb + << "\n"; + } +#endif + } + } +} + +bool MIRProfileLoader::doInitialization(Module &M) { + auto &Ctx = M.getContext(); + + auto ReaderOrErr = sampleprof::SampleProfileReader::create(Filename, Ctx, P, + RemappingFilename); + if (std::error_code EC = ReaderOrErr.getError()) { + std::string Msg = "Could not open profile: " + EC.message(); + Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg)); + return false; + } + + Reader = std::move(ReaderOrErr.get()); + Reader->setModule(&M); + ProfileIsValid = (Reader->read() == sampleprof_error::success); + Reader->getSummary(); + + return true; +} + +bool MIRProfileLoader::runOnFunction(MachineFunction &MF) { + Function &Func = MF.getFunction(); + clearFunctionData(false); + Samples = Reader->getSamplesFor(Func); + if (!Samples || Samples->empty()) + return false; + + if (getFunctionLoc(MF) == 0) + return false; + + DenseSet InlinedGUIDs; + bool Changed = computeAndPropagateWeights(MF, InlinedGUIDs); + + // Set the new BPI, BFI. + setBranchProbs(MF); + + return Changed; +} + +} // namespace llvm + +bool MIRProfileLoaderPass::runOnMachineFunction(MachineFunction &MF) { + if (!MIRSampleLoader->isValid()) + return false; + + LLVM_DEBUG(dbgs() << "MIRProfileLoader pass working on Func: " + << MF.getFunction().getName() << "\n"); + MBFI = &getAnalysis(); + MIRSampleLoader->setInitVals( + &getAnalysis(), + &getAnalysis(), &getAnalysis(), + MBFI, &getAnalysis().getORE()); + + MF.RenumberBlocks(); + if (ViewBFIBefore && ViewBlockLayoutWithBFI != GVDT_None && + (ViewBlockFreqFuncName.empty() || + MF.getFunction().getName().equals(ViewBlockFreqFuncName))) { + MBFI->view("MIR_Prof_loader_b." + MF.getName(), false); + } + + bool Changed = MIRSampleLoader->runOnFunction(MF); + + if (ViewBFIAfter && ViewBlockLayoutWithBFI != GVDT_None && + (ViewBlockFreqFuncName.empty() || + MF.getFunction().getName().equals(ViewBlockFreqFuncName))) { + MBFI->view("MIR_prof_loader_a." + MF.getName(), false); + } + + return Changed; +} + +bool MIRProfileLoaderPass::doInitialization(Module &M) { + LLVM_DEBUG(dbgs() << "MIRProfileLoader pass working on Module " << M.getName() + << "\n"); + + MIRSampleLoader->setFSPass(P); + return MIRSampleLoader->doInitialization(M); +} + +void MIRProfileLoaderPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequiredTransitive(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); +} diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -172,6 +172,24 @@ FSNoFinalDiscrim("fs-no-final-discrim", cl::init(false), cl::Hidden, cl::desc("Do not insert FS-AFDO discriminators before " "emit.")); +// Disable MIRProfileLoader before RegAlloc. This is for for debugging and +// tuning purpose. +static cl::opt DisableRAFSProfileLoader( + "disable-ra-fsprofile-loader", cl::init(true), cl::Hidden, + cl::desc("Disable MIRProfileLoader before RegAlloc")); +// Disable MIRProfileLoader before BloackPlacement. This is for for debugging +// and tuning purpose. +static cl::opt DisableLayoutFSProfileLoader( + "disable-layout-fsprofile-loader", cl::init(true), cl::Hidden, + cl::desc("Disable MIRProfileLoader before BlockPlacement")); +// Specify FSProfile file name. +static cl::opt + FSProfileFile("fs-profile-file", cl::init(""), cl::value_desc("filename"), + cl::desc("Flow Sensitive profile file name."), cl::Hidden); +// Specify Remapping file for FSProfile. +static cl::opt FSRemappingFile( + "fs-remapping-file", cl::init(""), cl::value_desc("filename"), + cl::desc("Flow Sensitive profile remapping file name."), cl::Hidden); // Temporary option to allow experimenting with MachineScheduler as a post-RA // scheduler. Targets can "properly" enable this with @@ -308,6 +326,28 @@ return TargetID; } +// Find the FSProfile file name. The internal option takes the precedence +// before getting from TargetMachine. +static const std::string getFSProfileFile(const TargetMachine *TM) { + if (!FSProfileFile.empty()) + return FSProfileFile.getValue(); + const Optional &PGOOpt = TM->getPGOOption(); + if (PGOOpt == None || PGOOpt->Action != PGOOptions::SampleUse) + return std::string(); + return PGOOpt->ProfileFile; +} + +// Find the Profile remapping file name. The internal option takes the +// precedence before getting from TargetMachine. +static const std::string getFSRemappingFile(const TargetMachine *TM) { + if (!FSRemappingFile.empty()) + return FSRemappingFile.getValue(); + const Optional &PGOOpt = TM->getPGOOption(); + if (PGOOpt == None || PGOOpt->Action != PGOOptions::SampleUse) + return std::string(); + return PGOOpt->ProfileRemappingFile; +} + //===---------------------------------------------------------------------===// /// TargetPassConfig //===---------------------------------------------------------------------===// @@ -1115,9 +1155,15 @@ // Add a FSDiscriminator pass right before RA, so that we could get // more precise SampleFDO profile for RA. - if (EnableFSDiscriminator) + if (EnableFSDiscriminator) { addPass(createMIRAddFSDiscriminatorsPass( sampleprof::FSDiscriminatorPass::Pass1)); + const std::string ProfileFile = getFSProfileFile(TM); + if (!ProfileFile.empty() && !DisableRAFSProfileLoader) + addPass( + createMIRProfileLoaderPass(ProfileFile, getFSRemappingFile(TM), + sampleprof::FSDiscriminatorPass::Pass1)); + } // Run register allocation and passes that are tightly coupled with it, // including phi elimination and scheduling. @@ -1471,9 +1517,15 @@ /// Add standard basic block placement passes. void TargetPassConfig::addBlockPlacement() { - if (EnableFSDiscriminator) + if (EnableFSDiscriminator) { addPass(createMIRAddFSDiscriminatorsPass( sampleprof::FSDiscriminatorPass::Pass2)); + const std::string ProfileFile = getFSProfileFile(TM); + if (!ProfileFile.empty() && !DisableLayoutFSProfileLoader) + addPass( + createMIRProfileLoaderPass(ProfileFile, getFSRemappingFile(TM), + sampleprof::FSDiscriminatorPass::Pass2)); + } if (addPass(&MachineBlockPlacementID)) { // Run a separate pass to collect block placement statistics. if (EnableBlockPlacementStats) diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -230,6 +230,8 @@ PGOOpt = PGOOptions("", "", "", PGOOptions::NoAction, PGOOptions::NoCSAction, true); } + if (TM) + TM->setPGOOption(PGOOpt); LoopAnalysisManager LAM; FunctionAnalysisManager FAM; diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -358,10 +358,10 @@ std::function GetAssumptionCache, std::function GetTargetTransformInfo, std::function GetTLI) - : SampleProfileLoaderBaseImpl(std::string(Name)), + : SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName)), GetAC(std::move(GetAssumptionCache)), GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)), - RemappingFilename(std::string(RemapName)), LTOPhase(LTOPhase) {} + LTOPhase(LTOPhase) {} bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr); bool runOnModule(Module &M, ModuleAnalysisManager *AM, @@ -417,9 +417,6 @@ /// Profile tracker for different context. std::unique_ptr ContextTracker; - /// Name of the profile remapping file to load. - std::string RemappingFilename; - /// Flag indicating whether input profile is context-sensitive bool ProfileIsCS = false; diff --git a/llvm/test/CodeGen/X86/Inputs/fsloader.afdo b/llvm/test/CodeGen/X86/Inputs/fsloader.afdo new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/Inputs/fsloader.afdo @@ -0,0 +1,35 @@ +work:42380966:1346190 + 1: 1246499 + 5: 1246499 +foo:28798256:4267 + 0: 4267 + 2.1: 255999 + 4: 264627 bar:250018 + 4.512: 269485 bar:278102 + 4.4608: 280297 bar:280933 + 4.12288: 278916 bar:267752 + 5: 264627 + 5.4096: 269485 + 5.8192: 260670 + 5.8704: 278916 + 6: 11541 + 6.3584: 278916 work:284547 + 6.4096: 260670 work:249428 + 6.8704: 11541 + 7: 272442 + 7.512: 283590 + 7.4608: 234082 + 7.9728: 279149 + 8: 11541 + 8.11776: 283590 work:305061 + 8.12288: 279149 work:281368 + 8.13824: 234082 work:225786 + 10: 4050 +bar:9504180:1076805 + 2: 1056020 + 3: 1056020 +main:20360:0 + 0: 0 + 2.1: 4045 + 3: 4156 foo:4267 + 5: 0 diff --git a/llvm/test/CodeGen/X86/fsafdo_test2.ll b/llvm/test/CodeGen/X86/fsafdo_test2.ll --- a/llvm/test/CodeGen/X86/fsafdo_test2.ll +++ b/llvm/test/CodeGen/X86/fsafdo_test2.ll @@ -1,4 +1,7 @@ ; RUN: llc -enable-fs-discriminator < %s | FileCheck %s +; RUN: llvm-profdata merge --sample -profile-isfs -o %t.afdo %S/Inputs/fsloader.afdo +; RUN: llc -enable-fs-discriminator -fs-profile-file=%t.afdo -show-fs-branchprob -disable-ra-fsprofile-loader=false -disable-layout-fsprofile-loader=false < %s 2>&1 | FileCheck %s --check-prefix=LOADER +; ;; ;; C source code for the test (compiler at -O3): ;; // A test case for loop unroll. @@ -50,6 +53,25 @@ ; CHECK: .byte 1 ; CHECK: .size __llvm_fs_discriminator__, 1 +;; Check that new branch probs are generated. +; LOADER: Set branch fs prob: MBB (1 -> 3): unroll.c:22:11-->unroll.c:24:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x7aca7894 / 0x80000000 = 95.93% +; LOADER: Set branch fs prob: MBB (1 -> 2): unroll.c:22:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x0535876c / 0x80000000 = 4.07% +; LOADER: Set branch fs prob: MBB (3 -> 5): unroll.c:24:11-->unroll.c:22:11 W=283590 0x30000000 / 0x80000000 = 37.50% --> 0x7aca7894 / 0x80000000 = 95.93% +; LOADER: Set branch fs prob: MBB (3 -> 4): unroll.c:24:11 W=283590 0x50000000 / 0x80000000 = 62.50% --> 0x0535876c / 0x80000000 = 4.07% +; LOADER: Set branch fs prob: MBB (5 -> 8): unroll.c:22:11-->unroll.c:24:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x021c112e / 0x80000000 = 1.65% +; LOADER: Set branch fs prob: MBB (5 -> 7): unroll.c:22:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x7de3eed2 / 0x80000000 = 98.35% +; LOADER: Set branch fs prob: MBB (8 -> 10): unroll.c:24:11-->unroll.c:22:11 W=283590 0x30000000 / 0x80000000 = 37.50% --> 0x00000000 / 0x80000000 = 0.00% +; LOADER: Set branch fs prob: MBB (8 -> 9): unroll.c:24:11 W=283590 0x50000000 / 0x80000000 = 62.50% --> 0x80000000 / 0x80000000 = 100.00% +; LOADER: Set branch fs prob: MBB (10 -> 12): unroll.c:22:11-->unroll.c:24:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x7aca7894 / 0x80000000 = 95.93% +; LOADER: Set branch fs prob: MBB (10 -> 11): unroll.c:22:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x0535876c / 0x80000000 = 4.07% +; LOADER: Set branch fs prob: MBB (12 -> 14): unroll.c:24:11-->unroll.c:22:11 W=283590 0x30000000 / 0x80000000 = 37.50% --> 0x02012507 / 0x80000000 = 1.57% +; LOADER: Set branch fs prob: MBB (12 -> 13): unroll.c:24:11 W=283590 0x50000000 / 0x80000000 = 62.50% --> 0x7dfedaf9 / 0x80000000 = 98.43% +; LOADER: Set branch fs prob: MBB (14 -> 16): unroll.c:22:11-->unroll.c:24:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x0a5856e1 / 0x80000000 = 8.08% +; LOADER: Set branch fs prob: MBB (14 -> 15): unroll.c:22:11 W=283590 0x40000000 / 0x80000000 = 50.00% --> 0x75a7a91f / 0x80000000 = 91.92% +; LOADER: Set branch fs prob: MBB (16 -> 18): unroll.c:24:11-->unroll.c:19:3 W=283590 0x30000000 / 0x80000000 = 37.50% --> 0x16588166 / 0x80000000 = 17.46% +; LOADER: Set branch fs prob: MBB (16 -> 17): unroll.c:24:11 W=283590 0x50000000 / 0x80000000 = 62.50% --> 0x69a77e9a / 0x80000000 = 82.54% + + target triple = "x86_64-unknown-linux-gnu" @sum = dso_local local_unnamed_addr global i32 0, align 4 diff --git a/llvm/tools/opt/NewPMDriver.cpp b/llvm/tools/opt/NewPMDriver.cpp --- a/llvm/tools/opt/NewPMDriver.cpp +++ b/llvm/tools/opt/NewPMDriver.cpp @@ -284,6 +284,9 @@ P->CSAction = PGOOptions::CSIRUse; } } + if (TM) + TM->setPGOOption(P); + LoopAnalysisManager LAM; FunctionAnalysisManager FAM; CGSCCAnalysisManager CGAM;