Index: llvm/include/llvm/CodeGen/FlowSensitiveSampleProfile.h =================================================================== --- /dev/null +++ llvm/include/llvm/CodeGen/FlowSensitiveSampleProfile.h @@ -0,0 +1,103 @@ +//===----- FlowSensitiveSampleProfile.h: FS SampleFDO Support ---*- c++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the supoorting functions for Flow Sensitive Sample FDO. +// The AddFSDiscriminators pass adds flow sensitive DRAWF discriminators to the +// instuctions, so that different instruction clones will have their own +// sample value. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_FLOWSENSITIVESAMPLEPROFILE_H +#define LLVM_CODEGEN_FLOWSENSITIVESAMPLEPROFILE_H + +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" +#include "llvm/CodeGen/MachinePostDominators.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" +#include "llvm/ProfileData/InstrProf.h" +#include "llvm/ProfileData/SampleProf.h" +#include "llvm/ProfileData/SampleProfReader.h" + +#include + +namespace llvm { + +class AddFSDiscriminators : public MachineFunctionPass { + MachineFunction *MF; + unsigned LowBit; + unsigned HighBit; + +public: + static char ID; + /// FS bits will only use the '1' bits in the Mask. + AddFSDiscriminators(unsigned LowBit = 0, unsigned HighBit = 0) + : MachineFunctionPass(ID), LowBit(LowBit), HighBit(HighBit) { + assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit"); + } + + /// getNumFSBBs() - Return the number of BBs that have FS samaples. + unsigned getNumFSBBs(); + + /// getNumFSSamples() - Return the number of samples that are flow sensitive. + uint64_t getNumFSSamples(); + + /// getMachineFunction - Return the last machine function computed. + const MachineFunction *getMachineFunction() const { return MF; } + +private: + bool runOnMachineFunction(MachineFunction &) override; +}; + +class FSProfileLoader; +class FSProfileLoaderPass : public MachineFunctionPass { + MachineFunction *MF; + std::string ProfileFileName; + unsigned LowBit; + unsigned HighBit; + +public: + static char ID; + /// FS bits will only use the '1' bits in the Mask. + FSProfileLoaderPass(std::string Filename = "", unsigned LowBit = 0, + unsigned HighBit = 0) + : MachineFunctionPass(ID), ProfileFileName(Filename), LowBit(LowBit), + HighBit(HighBit), + FSSampleLoader(std::make_unique(Filename)) { + assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit"); + } + + /// getMachineFunction - Return the last machine function computed. + const MachineFunction *getMachineFunction() const { return MF; } + +private: + void init(MachineFunction &MF); + bool runOnMachineFunction(MachineFunction &) override; + bool doInitialization(Module &M) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + + std::unique_ptr FSSampleLoader; + /// Hold the information of the basic block frequency. + MachineBlockFrequencyInfo *MBFI; +}; + +} // end namespace llvm + +#endif // LLVM_CODEGEN_FLOWSENSITIVESAMPLEPROFILE_H Index: llvm/include/llvm/CodeGen/MachineDominators.h =================================================================== --- llvm/include/llvm/CodeGen/MachineDominators.h +++ llvm/include/llvm/CodeGen/MachineDominators.h @@ -112,6 +112,12 @@ return DT->dominates(A, B); } + void getDescendants(MachineBasicBlock *A, + SmallVectorImpl &Result) { + applySplitCriticalEdges(); + DT->getDescendants(A, Result); + } + bool dominates(const MachineBasicBlock *A, const MachineBasicBlock *B) const { applySplitCriticalEdges(); return DT->dominates(A, B); Index: llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h =================================================================== --- llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h +++ llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h @@ -118,6 +118,12 @@ : DiagnosticInfoMIROptimization(DK_MachineOptimizationRemarkAnalysis, PassName, RemarkName, Loc, MBB) {} + MachineOptimizationRemarkAnalysis(const char *PassName, StringRef RemarkName, + const MachineInstr *MI) + : DiagnosticInfoMIROptimization(DK_MachineOptimizationRemarkAnalysis, + PassName, RemarkName, MI->getDebugLoc(), + MI->getParent()) {} + static bool classof(const DiagnosticInfo *DI) { return DI->getKind() == DK_MachineOptimizationRemarkAnalysis; } Index: llvm/include/llvm/CodeGen/Passes.h =================================================================== --- llvm/include/llvm/CodeGen/Passes.h +++ llvm/include/llvm/CodeGen/Passes.h @@ -164,6 +164,12 @@ /// This pass perform post-ra machine sink for COPY instructions. extern char &PostRAMachineSinkingID; + /// This pass adds flow sensitive discriminators. + extern char &AddFSDiscriminatorsID; + + /// This pass reads flow sensitive profile. + extern char &FSProfileLoaderPassID; + /// FastRegisterAllocation Pass - This pass register allocates as fast as /// possible. It is best suited for debug code where live ranges are short. /// @@ -481,6 +487,13 @@ /// Create IR Type Promotion pass. \see TypePromotion.cpp FunctionPass *createTypePromotionPass(); + /// Add Flow Sensitive Discriminators. + FunctionPass *createAddFSDiscriminatorsPass(unsigned LowBit, + unsigned HighBit); + + /// Read Flow Sensitive Profile. + FunctionPass *createFSProfileLoaderPass(std::string File, unsigned LowBit, + unsigned HighBit); /// Creates MIR Debugify pass. \see MachineDebugify.cpp ModulePass *createDebugifyMachineModulePass(); Index: llvm/include/llvm/IR/DebugInfoMetadata.h =================================================================== --- llvm/include/llvm/IR/DebugInfoMetadata.h +++ llvm/include/llvm/IR/DebugInfoMetadata.h @@ -26,6 +26,8 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/Metadata.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FSAFDODiscriminator.h" #include #include #include @@ -58,6 +60,8 @@ } \ DEFINE_MDNODE_GET_DISTINCT_TEMPORARY(CLASS, FORMAL, ARGS) +extern llvm::cl::opt EnableFSDiscriminator; + namespace llvm { class DITypeRefArray { @@ -1758,8 +1762,19 @@ static const DILocation *getMergedLocations(ArrayRef Locs); + /// Return Discriminator that cleaning bit B and above (0 based, inclusive). + /// (0x1FF, 7) = 0xFF. + static unsigned getMaskedDiscriminator(unsigned D, unsigned B) { + if (B == 0) + return D; + return (D & getN1Bits(B)); + } + static unsigned getBaseDiscriminatorBits() { return BASE_DIS_BIT_END; } + /// Returns the base discriminator for a given encoded discriminator \p D. static unsigned getBaseDiscriminatorFromDiscriminator(unsigned D) { + if (EnableFSDiscriminator) + return getMaskedDiscriminator(D, getBaseDiscriminatorBits()); return getUnsignedFromPrefixEncoding(D); } @@ -1781,6 +1796,8 @@ /// Returns the duplication factor for a given encoded discriminator \p D, or /// 1 if no value or 0 is encoded. static unsigned getDuplicationFactorFromDiscriminator(unsigned D) { + if (EnableFSDiscriminator) + return 1; D = getNextComponentInDiscriminator(D); unsigned Ret = getUnsignedFromPrefixEncoding(D); if (Ret == 0) @@ -2220,9 +2237,14 @@ Optional DILocation::cloneWithBaseDiscriminator(unsigned D) const { unsigned BD, DF, CI; - decodeDiscriminator(getDiscriminator(), BD, DF, CI); + if (EnableFSDiscriminator) + BD = getBaseDiscriminator(); + else + decodeDiscriminator(getDiscriminator(), BD, DF, CI); if (D == BD) return this; + if (EnableFSDiscriminator) + return cloneWithDiscriminator(D); if (Optional Encoded = encodeDiscriminator(D, DF, CI)) return cloneWithDiscriminator(*Encoded); return None; Index: llvm/include/llvm/InitializePasses.h =================================================================== --- llvm/include/llvm/InitializePasses.h +++ llvm/include/llvm/InitializePasses.h @@ -61,9 +61,10 @@ void initializeTarget(PassRegistry&); void initializeAAEvalLegacyPassPass(PassRegistry&); -void initializeAAResultsWrapperPassPass(PassRegistry&); -void initializeADCELegacyPassPass(PassRegistry&); -void initializeAddDiscriminatorsLegacyPassPass(PassRegistry&); +void initializeAAResultsWrapperPassPass(PassRegistry &); +void initializeADCELegacyPassPass(PassRegistry &); +void initializeAddDiscriminatorsLegacyPassPass(PassRegistry &); +void initializeAddFSDiscriminatorsPass(PassRegistry &); void initializeModuleAddressSanitizerLegacyPassPass(PassRegistry &); void initializeASanGlobalsMetadataWrapperPassPass(PassRegistry &); void initializeAddressSanitizerLegacyPassPass(PassRegistry &); @@ -154,11 +155,12 @@ void initializeExpandMemCmpPassPass(PassRegistry&); void initializeExpandPostRAPass(PassRegistry&); void initializeExpandReductionsPass(PassRegistry&); -void initializeMakeGuardsExplicitLegacyPassPass(PassRegistry&); -void initializeExternalAAWrapperPassPass(PassRegistry&); -void initializeFEntryInserterPass(PassRegistry&); -void initializeFinalizeISelPass(PassRegistry&); -void initializeFinalizeMachineBundlesPass(PassRegistry&); +void initializeMakeGuardsExplicitLegacyPassPass(PassRegistry &); +void initializeExternalAAWrapperPassPass(PassRegistry &); +void initializeFEntryInserterPass(PassRegistry &); +void initializeFSProfileLoaderPassPass(PassRegistry &); +void initializeFinalizeISelPass(PassRegistry &); +void initializeFinalizeMachineBundlesPass(PassRegistry &); void initializeFixIrreduciblePass(PassRegistry &); void initializeFixupStatepointCallerSavedPass(PassRegistry&); void initializeFlattenCFGPassPass(PassRegistry&); Index: llvm/include/llvm/LTO/Config.h =================================================================== --- llvm/include/llvm/LTO/Config.h +++ llvm/include/llvm/LTO/Config.h @@ -171,6 +171,9 @@ bool ShouldDiscardValueNames = true; DiagnosticHandlerFunction DiagHandler; + /// Add FS AFDO discriminator. + bool AddFSDiscriminator = false; + /// If this field is set, LTO will write input file paths and symbol /// resolutions here in llvm-lto2 command line flag format. This can be /// used for testing and for running the LTO pipeline outside of the linker Index: llvm/include/llvm/ProfileData/SampleProfReader.h =================================================================== --- llvm/include/llvm/ProfileData/SampleProfReader.h +++ llvm/include/llvm/ProfileData/SampleProfReader.h @@ -234,6 +234,7 @@ #include "llvm/ProfileData/SampleProf.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FSAFDODiscriminator.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SymbolRemappingReader.h" #include @@ -342,6 +343,13 @@ SampleProfileFormat Format = SPF_None) : Profiles(0), Ctx(C), Buffer(std::move(B)), Format(Format) {} + void setDiscriminatorMaskedBitFrom(uint32_t B) { MaskedBitFrom = B; } + + inline uint32_t getDiscriminatorMask() const { + assert((MaskedBitFrom != 0) && "MaskedBitFrom is not set properly"); + return getN1Bits(MaskedBitFrom); + } + virtual ~SampleProfileReader() = default; /// Read and validate the file header. @@ -509,6 +517,10 @@ /// is used by compiler. If SampleProfileReader is used by other /// tools which are not compiler, M is usually nullptr. const Module *M = nullptr; + + /// The samples in this class masked the discriminator bit ending + /// with bit MaskedBitFrom (0 based). Default should be for the base. + unsigned MaskedBitFrom = 31; }; class SampleProfileReaderText : public SampleProfileReader { Index: llvm/include/llvm/Support/FSAFDODiscriminator.h =================================================================== --- /dev/null +++ llvm/include/llvm/Support/FSAFDODiscriminator.h @@ -0,0 +1,100 @@ +//===- llvm/Support/FSAFDODiscriminator.h -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the bits to be used by variois FSAFDO passes. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_FSAFDODISCRIMINATOR_H +#define LLVM_SUPPORT_FSAFDODISCRIMINATOR_H + +#define BASE_DIS_BIT_BEG 0 +#define BASE_DIS_BIT_END 7 + +#define PASS_1_DIS_BIT_BEG 8 +#define PASS_1_DIS_BIT_END 13 + +#define PASS_2_DIS_BIT_BEG 14 +#define PASS_2_DIS_BIT_END 19 + +#define PASS_3_DIS_BIT_BEG 20 +#define PASS_3_DIS_BIT_END 25 + +#define PASS_LAST_DIS_BIT_BEG 26 +#define PASS_LAST_DIS_BIT_END 31 + +// Set bit 0 .. n to 1. +static inline unsigned getN1Bits(int N) { + if (N >= 31) + return 0xFFFFFFFF; + return (1 << (N + 1)) - 1; +} + +// Given a discriminator n, return the number of bucket it's in. +inline static unsigned getFSBucket(unsigned int DiscriminatorVal) { + unsigned int N = DiscriminatorVal; + if (N == 0) + return 0; + if (N & + (getN1Bits(PASS_LAST_DIS_BIT_BEG - 1) ^ getN1Bits(PASS_LAST_DIS_BIT_END))) + return 5; + if (N & (getN1Bits(PASS_3_DIS_BIT_BEG - 1) ^ getN1Bits(PASS_3_DIS_BIT_END))) + return 4; + if (N & (getN1Bits(PASS_2_DIS_BIT_BEG - 1) ^ getN1Bits(PASS_2_DIS_BIT_END))) + return 3; + if (N & (getN1Bits(PASS_1_DIS_BIT_BEG - 1) ^ getN1Bits(PASS_1_DIS_BIT_END))) + return 2; + return 1; +} + +inline unsigned getFSBucketVal(int LowBit, int HighBit, unsigned N) { + unsigned int V = N & getN1Bits(HighBit - LowBit); + return (V >> LowBit); +} + +inline unsigned getFSBucketVal(int B, unsigned N) { + switch (B) { + case 1: + return getFSBucketVal(BASE_DIS_BIT_BEG, BASE_DIS_BIT_END, N); + case 2: + return getFSBucketVal(PASS_1_DIS_BIT_BEG, PASS_1_DIS_BIT_END, N); + case 3: + return getFSBucketVal(PASS_2_DIS_BIT_BEG, PASS_2_DIS_BIT_END, N); + case 4: + return getFSBucketVal(PASS_3_DIS_BIT_BEG, PASS_3_DIS_BIT_END, N); + case 5: + return getFSBucketVal(PASS_LAST_DIS_BIT_BEG, PASS_LAST_DIS_BIT_END, N); + default: + llvm_unreachable("Wrong FSBucket Number"); + } +} + +inline void setFSBucketVal(int LowBit, int HighBit, unsigned Val, unsigned &N) { + unsigned int V = Val & getN1Bits(HighBit - LowBit); + V = V << LowBit; + N |= V; +} + +inline void setFSBucketVal(int B, unsigned Val, unsigned &N) { + switch (B) { + case 1: + return setFSBucketVal(BASE_DIS_BIT_BEG, BASE_DIS_BIT_END, Val, N); + case 2: + return setFSBucketVal(PASS_1_DIS_BIT_BEG, PASS_1_DIS_BIT_END, Val, N); + case 3: + return setFSBucketVal(PASS_2_DIS_BIT_BEG, PASS_2_DIS_BIT_END, Val, N); + case 4: + return setFSBucketVal(PASS_3_DIS_BIT_BEG, PASS_3_DIS_BIT_END, Val, N); + case 5: + return setFSBucketVal(PASS_LAST_DIS_BIT_BEG, PASS_LAST_DIS_BIT_END, Val, N); + default: + llvm_unreachable("Wrong FSBucket Number"); + } +} + +#endif /* LLVM_SUPPORT_FSAFDODISCRIMINATOR_H */ Index: llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h =================================================================== --- llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h +++ llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h @@ -56,11 +56,12 @@ using FunctionT = Function; using BlockFrequencyInfoT = BlockFrequencyInfo; using LoopT = Loop; - using LoopInfoT = LoopInfo; + using LoopInfoPtrT = std::unique_ptr; + using DominatorTreePtrT = std::unique_ptr; + using PostDominatorTreeT = PostDominatorTree; + using PostDominatorTreePtrT = std::unique_ptr; using OptRemarkEmitterT = OptimizationRemarkEmitter; using OptRemarkAnalysisT = OptimizationRemarkAnalysis; - using DominatorTreeT = DominatorTree; - using PostDominatorTreeT = PostDominatorTree; static Function &getFunction(Function &F) { return F; } static const BasicBlock *getEntryBB(const Function *F) { return &F->getEntryBlock(); @@ -85,14 +86,17 @@ typename afdo_detail::IRTraits::BlockFrequencyInfoT; using FunctionT = typename afdo_detail::IRTraits::FunctionT; using LoopT = typename afdo_detail::IRTraits::LoopT; - using LoopInfoT = typename afdo_detail::IRTraits::LoopInfoT; + using LoopInfoPtrT = typename afdo_detail::IRTraits::LoopInfoPtrT; + using DominatorTreePtrT = + typename afdo_detail::IRTraits::DominatorTreePtrT; + using PostDominatorTreePtrT = + typename afdo_detail::IRTraits::PostDominatorTreePtrT; + using PostDominatorTreeT = + typename afdo_detail::IRTraits::PostDominatorTreeT; using OptRemarkEmitterT = typename afdo_detail::IRTraits::OptRemarkEmitterT; using OptRemarkAnalysisT = typename afdo_detail::IRTraits::OptRemarkAnalysisT; - using DominatorTreeT = typename afdo_detail::IRTraits::DominatorTreeT; - using PostDominatorTreeT = - typename afdo_detail::IRTraits::PostDominatorTreeT; using BlockWeightMap = DenseMap; using EquivalenceClassMap = @@ -129,7 +133,6 @@ void findEquivalencesFor(BasicBlockT *BB1, ArrayRef Descendants, PostDominatorTreeT *DomTree); - void propagateWeights(FunctionT &F); uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge); void buildEdges(FunctionT &F); @@ -168,9 +171,9 @@ EquivalenceClassMap EquivalenceClass; /// Dominance, post-dominance and loop information. - std::unique_ptr DT; - std::unique_ptr PDT; - std::unique_ptr LI; + DominatorTreePtrT DT; + PostDominatorTreePtrT PDT; + LoopInfoPtrT LI; /// Predecessors for each basic block in the CFG. BlockEdgeMap Predecessors; @@ -279,7 +282,11 @@ const DILocation *DIL = DLoc; uint32_t LineOffset = FunctionSamples::getOffset(DIL); - uint32_t Discriminator = DIL->getBaseDiscriminator(); + uint32_t Discriminator; + if (EnableFSDiscriminator) + Discriminator = DIL->getDiscriminator(); + else + Discriminator = DIL->getBaseDiscriminator(); ErrorOr R = FS->findSamplesAt(LineOffset, Discriminator); if (R) { bool FirstMark = @@ -298,11 +305,9 @@ return Remark; }); } - LLVM_DEBUG(dbgs() << " " << DLoc.getLine() << "." - << DIL->getBaseDiscriminator() << ":" << Inst - << " (line offset: " << LineOffset << "." - << DIL->getBaseDiscriminator() << " - weight: " << R.get() - << ")\n"); + LLVM_DEBUG(dbgs() << " " << DLoc.getLine() << "." << Discriminator << ":" + << Inst << " (line offset: " << LineOffset << "." + << Discriminator << " - weight: " << R.get() << ")\n"); } return R; } @@ -472,7 +477,7 @@ // class by making BB2's equivalence class be BB1. DominatedBBs.clear(); DT->getDescendants(BB1, DominatedBBs); - findEquivalencesFor(BB1, DominatedBBs, PDT.get()); + findEquivalencesFor(BB1, DominatedBBs, &*PDT); LLVM_DEBUG(printBlockEquivalence(dbgs(), BB1)); } @@ -908,12 +913,12 @@ template void SampleProfileLoaderBaseImpl::computeDominanceAndLoopInfo( FunctionT &F) { - DT.reset(new DominatorTreeT); + DT.reset(new DominatorTree); DT->recalculate(F); PDT.reset(new PostDominatorTree(F)); - LI.reset(new LoopInfoT); + LI.reset(new LoopInfo); LI->analyze(*DT); } Index: llvm/lib/CodeGen/CMakeLists.txt =================================================================== --- llvm/lib/CodeGen/CMakeLists.txt +++ llvm/lib/CodeGen/CMakeLists.txt @@ -33,6 +33,7 @@ FEntryInserter.cpp FinalizeISel.cpp FixupStatepointCallerSaved.cpp + FlowSensitiveSampleProfile.cpp FuncletLayout.cpp GCMetadata.cpp GCMetadataPrinter.cpp Index: llvm/lib/CodeGen/FlowSensitiveSampleProfile.cpp =================================================================== --- /dev/null +++ llvm/lib/CodeGen/FlowSensitiveSampleProfile.cpp @@ -0,0 +1,454 @@ +//===-------- FlowSensitiveSampleProfile.cpp: Flow Sensitive SampleFDO-----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides the implementation of the flow sensitive SampleFDO. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/FlowSensitiveSampleProfile.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/Analysis/BlockFrequencyInfoImpl.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h" +#include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h" + +using namespace llvm; +using namespace sampleprof; +using namespace llvm::sampleprofutil; +using ProfileCount = Function::ProfileCount; + +#define DEBUG_TYPE "fs" +#define DEBUG_TYPE_DISCRIMINATOR DEBUG_TYPE "-discriminators" +#define DEBUG_TYPE_LOADER DEBUG_TYPE "-profile-loader" + +extern cl::opt EnableFSDiscriminator; +static cl::opt + DisableFSProfileLoader("disable-fs-profile-loader", cl::Hidden, + cl::init(false), + cl::desc("Disable flow senstive profile loading")); +static cl::opt EnableFSBranchProb( + "enable-fs-branchprob", cl::Hidden, cl::init(true), + cl::desc("Enable seting flow senstive branch probabilities")); + +static cl::opt FSProfileDebugProbDiffThreshold( + "fs-profile-debug-prob-diff-threshold", cl::init(10), + cl::desc("Only show debug message if the branch probility is greater than " + "this value (in percentage).")); + +static cl::opt FSProfileDebugBWThreshold( + "fs-profile-debug-bw-threshold", cl::init(10000), + cl::desc("Only show debug message if the source branch weight is greater " + " than this value.")); + +static cl::opt ViewBFIBefore("fs-viewbfi-before", cl::Hidden, + cl::init(false), + cl::desc("View BFI before FS loader")); +static cl::opt ViewBFIAfter("fs-viewbfi-after", cl::Hidden, + cl::init(false), + cl::desc("View BFI after FS loader")); + +// Internal option used to control BFI display only after MBP pass. +// Defined in CodeGen/MachineBlockFrequencyInfo.cpp: +// -view-block-layout-with-bfi={none | fraction | integer | count} +extern cl::opt ViewBlockLayoutWithBFI; + +// Command line option to specify the name of the function for CFG dump +// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name= +extern cl::opt ViewBlockFreqFuncName; + +char AddFSDiscriminators::ID = 0; +char FSProfileLoaderPass::ID = 0; + +INITIALIZE_PASS(AddFSDiscriminators, DEBUG_TYPE_DISCRIMINATOR, + "Add Flow Sensitive Discriminators", + /* cfg = */ false, /* is_analysis = */ false) + +INITIALIZE_PASS_BEGIN(FSProfileLoaderPass, DEBUG_TYPE_LOADER, + "Load Flow Sensitive Profile", + /* cfg = */ false, /* is_analysis = */ false) +INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass) +INITIALIZE_PASS_END(FSProfileLoaderPass, DEBUG_TYPE_LOADER, + "Load Flow Sensitive Profile", + /* cfg = */ false, /* is_analysis = */ false) + +char &llvm::AddFSDiscriminatorsID = AddFSDiscriminators::ID; +char &llvm::FSProfileLoaderPassID = FSProfileLoaderPass::ID; + +FunctionPass *llvm::createAddFSDiscriminatorsPass(unsigned LowBit, + unsigned HighBit) { + return new AddFSDiscriminators(LowBit, HighBit); +} +FunctionPass *llvm::createFSProfileLoaderPass(std::string File, unsigned LowBit, + unsigned HighBit) { + return new FSProfileLoaderPass(File, LowBit, HighBit); +} + +static uint64_t getCallStackHash(const MachineBasicBlock &BB, + const MachineInstr &MI, + const DILocation *DIL) { + uint64_t Ret = MD5Hash(std::to_string(DIL->getLine())); + Ret ^= MD5Hash(BB.getName()); + Ret ^= MD5Hash(DIL->getScope()->getSubprogram()->getLinkageName()); + for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) { + Ret ^= MD5Hash(std::to_string(DIL->getLine())); + Ret ^= MD5Hash(DIL->getScope()->getSubprogram()->getLinkageName()); + } + return Ret; +} + +namespace llvm { + +namespace afdo_detail { +template <> struct IRTraits { + using InstructionT = MachineInstr; + using BasicBlockT = MachineBasicBlock; + using FunctionT = MachineFunction; + using BlockFrequencyInfoT = MachineBlockFrequencyInfo; + using LoopT = MachineLoop; + using LoopInfoPtrT = MachineLoopInfo *; + using DominatorTreePtrT = MachineDominatorTree *; + using PostDominatorTreePtrT = MachinePostDominatorTree *; + using PostDominatorTreeT = MachinePostDominatorTree; + using OptRemarkEmitterT = MachineOptimizationRemarkEmitter; + using OptRemarkAnalysisT = MachineOptimizationRemarkAnalysis; + static Function &getFunction(MachineFunction &F) { return F.getFunction(); } + static const MachineBasicBlock *getEntryBB(const MachineFunction *F) { + return GraphTraits::getEntryNode(F); + } +}; +} // namespace afdo_detail + +class FSProfileLoader final + : public SampleProfileLoaderBaseImpl { +public: + void setInitVals(MachineDominatorTree *MDT_, MachinePostDominatorTree *MPDT_, + MachineLoopInfo *MLI_, MachineBlockFrequencyInfo *MBFI_, + MachineOptimizationRemarkEmitter *ORE_) { + DT = MDT_; + PDT = MPDT_; + LI = MLI_; + MBFI = MBFI_; + ORE = ORE_; + } + void setMaskBitVals(unsigned LowBit_, unsigned HighBit_) { + LowBit = LowBit_; + HighBit = HighBit_; + } + + FSProfileLoader(StringRef Name) + : SampleProfileLoaderBaseImpl(std::string(Name)) {} + + void setBranchProbs(MachineFunction &F); + bool runOnFunction(MachineFunction &F); + bool doInitialization(Module &M); + bool isValid() const { return ProfileIsValid; } + +protected: + friend class SampleCoverageTracker; + + /// Hold the information of the basic block frequency. + MachineBlockFrequencyInfo *MBFI; + + // LowBit in the FS discriminator used by this instance. Note the number is + // 0-based. Base discrimnator use bit 0 to bit 11. + unsigned LowBit; + // HighwBit in the FS discriminator used by this instance. Note the number + // is 0-based. + unsigned HighBit; + + bool ProfileIsValid = true; +}; + +template <> +void SampleProfileLoaderBaseImpl< + MachineBasicBlock>::computeDominanceAndLoopInfo(MachineFunction &F) {} + +/// Build in/out edge lists for each basic block in the CFG. +/// +/// We are interested in unique edges. If a block B1 has multiple +/// edges to another block B2, we only add a single B1->B2 edge. +template <> +void SampleProfileLoaderBaseImpl::buildEdges(FunctionT &F) { + for (auto &BI : F) { + BasicBlockT *B1 = &BI; + + // Add predecessors for B1. + SmallPtrSet Visited; + if (!Predecessors[B1].empty()) + llvm_unreachable("Found a stale predecessors list in a basic block."); + for (auto *B2 : B1->predecessors()) { + if (Visited.insert(B2).second) + Predecessors[B1].push_back(B2); + } + + // Add successors for B1. + Visited.clear(); + if (!Successors[B1].empty()) + llvm_unreachable("Found a stale successors list in a basic block."); + for (auto *B2 : B1->successors()) { + if (Visited.insert(B2).second) + Successors[B1].push_back(B2); + } + } +} + +void FSProfileLoader::setBranchProbs(MachineFunction &F) { + LLVM_DEBUG(dbgs() << "\nPropagation complete. Setting branch probs\n"); + for (auto &BI : F) { + MachineBasicBlock *BB = &BI; + if (BB->succ_size() < 2) + continue; + const MachineBasicBlock *EC = EquivalenceClass[BB]; + uint64_t BBWeight = BlockWeights[EC]; + uint64_t SumEdgeWeight = 0; + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); + SI != SE; ++SI) { + MachineBasicBlock *Succ = *SI; + Edge E = std::make_pair(BB, Succ); + SumEdgeWeight += EdgeWeights[E]; + } + + if (BBWeight != SumEdgeWeight) { + LLVM_DEBUG(dbgs() << "BBweight is not equal to SumEdgeWeight: BBWWeight=" + << BBWeight << " SumEdgeWeight= " << SumEdgeWeight + << "\n"); + BBWeight = SumEdgeWeight; + } + if (BBWeight == 0) { + LLVM_DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n"); + continue; + } + + uint64_t BBWeight_Orig = BBWeight; + uint32_t MaxWeight = std::numeric_limits::max(); + uint32_t Factor = 1; + if (BBWeight > MaxWeight) { + Factor = BBWeight / MaxWeight + 1; + BBWeight /= Factor; + LLVM_DEBUG(dbgs() << "Scaling weights by " << Factor << "\n"); + } + + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); + SI != SE; ++SI) { + MachineBasicBlock *Succ = *SI; + Edge E = std::make_pair(BB, Succ); + uint64_t EdgeWeight = EdgeWeights[E]; + EdgeWeight /= Factor; + + assert(BBWeight >= EdgeWeight && + "BBweight is larger than EdgeWeight -- should not happen.\n"); + + BranchProbability OldProb = MBFI->getMBPI()->getEdgeProbability(BB, SI); + BranchProbability NewProb(EdgeWeight, BBWeight); + if (OldProb == NewProb) + continue; + BB->setSuccProbability(SI, NewProb); + bool Show = false; + BranchProbability Diff; + if (OldProb > NewProb) + Diff = OldProb - NewProb; + else + Diff = NewProb - OldProb; + Show = (Diff >= + BranchProbability(FSProfileDebugProbDiffThreshold, 100)); + Show &= (BBWeight_Orig >= FSProfileDebugBWThreshold); + + auto DIL = BB->findBranchDebugLoc(); + auto SuccDIL = Succ->findBranchDebugLoc(); + if (Show) { + dbgs() << "Set branch fs prob: MBB (" << BB->getNumber() << " -> " + << Succ->getNumber() << "): "; + if (DIL) + dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":" + << DIL->getColumn(); + if (SuccDIL) + dbgs() << "-->" << SuccDIL->getFilename() << ":" << SuccDIL->getLine() + << ":" << SuccDIL->getColumn(); + dbgs() << " W=" << BBWeight_Orig << " " << OldProb << " --> " + << NewProb << "\n"; + } + } + } +} + +bool FSProfileLoader::doInitialization(Module &M) { + auto &Ctx = M.getContext(); + + auto ReaderOrErr = sampleprof::SampleProfileReader::create(Filename, Ctx, ""); + if (std::error_code EC = ReaderOrErr.getError()) { + std::string Msg = "Could not open profile: " + EC.message(); + Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg)); + return false; + } + + Reader = std::move(ReaderOrErr.get()); + Reader->setModule(&M); + Reader->setDiscriminatorMaskedBitFrom(HighBit); + ProfileIsValid = (Reader->read() == sampleprof_error::success); + Reader->getSummary(); + + return true; +} + +bool FSProfileLoader::runOnFunction(FunctionT &F) { + Function &Func = F.getFunction(); + Samples = Reader->getSamplesFor(Func); + if (!Samples || Samples->empty()) + return false; + + if (getFunctionLoc(F) == 0) + return false; + + DenseSet InlinedGUIDs; + bool Changed = computeAndPropagateWeights(F, InlinedGUIDs); + + // Set the new BPI, BFI. + if (EnableFSBranchProb) + setBranchProbs(F); + + return Changed; +} + +} // namespace llvm + +bool AddFSDiscriminators::runOnMachineFunction(MachineFunction &mf) { + if (!EnableFSDiscriminator) + return false; + + bool Changed = false; + using Location = std::pair; + using LocationDiscriminator = std::pair; + using BBSet = DenseSet; + using LocationDiscriminatorBBMap = DenseMap; + using LocationDiscriminatorCurrPassMap = + DenseMap; + + MF = &mf; + LocationDiscriminatorBBMap LDBM; + LocationDiscriminatorCurrPassMap LDCM; + + // Mask of discrimnators before this pass. + unsigned BitMaskBefore = (1 << LowBit) - 1; + // Mask of discrimnators includeing this pass. + unsigned BitMaskNow = (1 << (HighBit + 1)) - 1; + // Mask of discrimnators for bits specific to this pass. + unsigned BitMaskThisPass = BitMaskNow ^ BitMaskBefore; + unsigned NumNewD = 0; + + LLVM_DEBUG(dbgs() << "AddFSDiscriminators working on Func: " + << MF->getFunction().getName() << "\n"); + for (MachineBasicBlock &BB : *MF) { + for (MachineInstr &I : BB) { + const DILocation *DIL = I.getDebugLoc().get(); + if (!DIL) + continue; + unsigned LineNo = DIL->getLine(); + if (LineNo == 0) + continue; + Location L = std::make_pair(DIL->getFilename(), LineNo); + unsigned Discriminator = DIL->getDiscriminator(); + Discriminator &= BitMaskBefore; + LocationDiscriminator LD = std::make_pair(L, Discriminator); + auto &BBMap = LDBM[LD]; + auto R = BBMap.insert(&BB); + if (BBMap.size() == 1) + continue; + unsigned DiscriminatorCurrPass; + + DiscriminatorCurrPass = R.second ? ++LDCM[LD] : LDCM[LD]; + DiscriminatorCurrPass = DiscriminatorCurrPass << LowBit; + DiscriminatorCurrPass += getCallStackHash(BB, I, DIL); + DiscriminatorCurrPass &= BitMaskThisPass; + unsigned NewD = Discriminator | DiscriminatorCurrPass; + auto NewDIL = DIL->cloneWithDiscriminator(NewD); + if (!NewDIL) { + LLVM_DEBUG(dbgs() << "Could not encode discriminator: " + << DIL->getFilename() << ":" << DIL->getLine() << ":" + << DIL->getColumn() << ":" << Discriminator << " " + << I << "\n"); + } else { + I.setDebugLoc(NewDIL); + NumNewD++; + LLVM_DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":" + << DIL->getColumn() << ": from " << Discriminator + << " -> " << NewD << " DC is " << NumNewD << "\n"); + } + Changed = true; + } + } + + if (Changed) { + LLVM_DEBUG(dbgs() << "Num of LDBB: " << LDBM.size() + << " Num of New D: " << NumNewD << "\n"); + } + + return Changed; +} + +bool FSProfileLoaderPass::runOnMachineFunction(MachineFunction &mf) { + if (DisableFSProfileLoader) + return false; + if (!FSSampleLoader->isValid()) + return false; + + MF = &mf; + LLVM_DEBUG(dbgs() << "FSProfileLoader pass working on Func: " + << MF->getFunction().getName() << "\n"); + MBFI = &getAnalysis(); + FSSampleLoader->setInitVals( + &getAnalysis(), + &getAnalysis(), &getAnalysis(), + MBFI, &getAnalysis().getORE()); + + MF->RenumberBlocks(); + if (ViewBFIBefore && ViewBlockLayoutWithBFI != GVDT_None && + (ViewBlockFreqFuncName.empty() || + MF->getFunction().getName().equals(ViewBlockFreqFuncName))) { + MBFI->view("FSP_b." + MF->getName(), false); + } + + bool Changed = FSSampleLoader->runOnFunction(mf); + + if (ViewBFIAfter && ViewBlockLayoutWithBFI != GVDT_None && + (ViewBlockFreqFuncName.empty() || + MF->getFunction().getName().equals(ViewBlockFreqFuncName))) { + MBFI->view("FSP_a." + MF->getName(), false); + } + + return Changed; +} + +bool FSProfileLoaderPass::doInitialization(Module &M) { + if (DisableFSProfileLoader) + return false; + LLVM_DEBUG(dbgs() << "FSProfileLoader pass working on Module " << M.getName() + << "\n"); + + FSSampleLoader->setMaskBitVals(LowBit, HighBit); + return FSSampleLoader->doInitialization(M); +} + +void FSProfileLoaderPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequiredTransitive(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); +} Index: llvm/lib/CodeGen/TargetPassConfig.cpp =================================================================== --- llvm/lib/CodeGen/TargetPassConfig.cpp +++ llvm/lib/CodeGen/TargetPassConfig.cpp @@ -40,6 +40,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FSAFDODiscriminator.h" #include "llvm/Support/SaveAndRestore.h" #include "llvm/Support/Threading.h" #include "llvm/Target/CGPassBuilderOption.h" @@ -165,6 +166,17 @@ clEnumValN(GlobalISelAbortMode::DisableWithDiag, "2", "Disable the abort but emit a diagnostic on failure"))); +extern cl::opt FSProfileFile; +extern cl::opt EnableFSDiscriminator; + +static cl::opt DisableFSP2("disable-fsafdo-p2", cl::init(true), + cl::Hidden, + cl::desc("Disable FS Sampleloader Pass2")); + +static cl::opt + FSNoFinalDiscrim("fs-no-final_discrim", cl::Hidden, + cl::desc("No final discrimnator in flow sensitive AFDO.")); + // Temporary option to allow experimenting with MachineScheduler as a post-RA // scheduler. Targets can "properly" enable this with // substitutePass(&PostRASchedulerID, &PostMachineSchedulerID). @@ -1162,6 +1174,9 @@ addPass(&XRayInstrumentationID); addPass(&PatchableFunctionID); + if (EnableFSDiscriminator && !FSNoFinalDiscrim) + addPass(createAddFSDiscriminatorsPass(PASS_LAST_DIS_BIT_BEG, + PASS_LAST_DIS_BIT_END)); addPreEmitPass(); if (TM->Options.EnableIPRA) @@ -1435,13 +1450,36 @@ return true; } +static std::string getFSProfileFile() { + if (FSProfileFile.empty() || !EnableFSDiscriminator) + return std::string(); + dbgs() << "FSProfile is " << FSProfileFile.getValue() << "\n"; + return FSProfileFile.getValue(); +} + /// Add standard basic block placement passes. void TargetPassConfig::addBlockPlacement() { + if (EnableFSDiscriminator) + addPass( + createAddFSDiscriminatorsPass(PASS_1_DIS_BIT_BEG, PASS_1_DIS_BIT_END)); + std::string FSFile = getFSProfileFile(); + if (!FSFile.empty()) + addPass(createFSProfileLoaderPass(FSFile, PASS_1_DIS_BIT_BEG, + PASS_1_DIS_BIT_END)); if (addPass(&MachineBlockPlacementID)) { // Run a separate pass to collect block placement statistics. if (EnableBlockPlacementStats) addPass(&MachineBlockPlacementStatsID); } + if (EnableFSDiscriminator && !DisableFSP2) { + addPass( + createAddFSDiscriminatorsPass(PASS_2_DIS_BIT_BEG, PASS_2_DIS_BIT_END)); + std::string FSFile = getFSProfileFile(); + if (!FSFile.empty()) + addPass(createFSProfileLoaderPass(FSFile, PASS_2_DIS_BIT_BEG, + PASS_2_DIS_BIT_END)); + addPass(&BranchFolderPassID); + } } //===---------------------------------------------------------------------===// Index: llvm/lib/LTO/LTOBackend.cpp =================================================================== --- llvm/lib/LTO/LTOBackend.cpp +++ llvm/lib/LTO/LTOBackend.cpp @@ -214,10 +214,15 @@ PGOOptions::SampleUse, PGOOptions::NoCSAction, true); else if (Conf.RunCSIRInstr) { PGOOpt = PGOOptions("", Conf.CSIRProfile, Conf.ProfileRemapping, - PGOOptions::IRUse, PGOOptions::CSIRInstr); + PGOOptions::IRUse, PGOOptions::CSIRInstr, + Conf.AddFSDiscriminator); } else if (!Conf.CSIRProfile.empty()) { PGOOpt = PGOOptions(Conf.CSIRProfile, "", Conf.ProfileRemapping, - PGOOptions::IRUse, PGOOptions::CSIRUse); + PGOOptions::IRUse, PGOOptions::CSIRUse, + Conf.AddFSDiscriminator); + } else if (Conf.AddFSDiscriminator) { + PGOOpt = PGOOptions("", "", "", PGOOptions::NoAction, + PGOOptions::NoCSAction, true); } PassInstrumentationCallbacks PIC; Index: llvm/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/lib/Passes/PassBuilder.cpp +++ llvm/lib/Passes/PassBuilder.cpp @@ -307,6 +307,7 @@ extern cl::opt FlattenedProfileUsed; +extern cl::opt FSProfileFile; extern cl::opt AttributorRun; extern cl::opt EnableKnowledgeRetention; @@ -1073,6 +1074,7 @@ // FIXME: revisit how SampleProfileLoad/Inliner/ICP is structured. if (LoadSampleProfile) EarlyFPM.addPass(InstCombinePass()); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM))); if (LoadSampleProfile) { @@ -1086,13 +1088,17 @@ // Do not invoke ICP in the LTOPrelink phase as it makes it hard // for the profile annotation to be accurate in the LTO backend. if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink && - Phase != ThinOrFullLTOPhase::FullLTOPreLink) + Phase != ThinOrFullLTOPhase::FullLTOPreLink) { // We perform early indirect call promotion here, before globalopt. // This is important for the ThinLTO backend phase because otherwise // imported available_externally functions look unreferenced and are // removed. MPM.addPass( PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */)); + + // Set FSProfileFile so that CodeGen can read the profile. + FSProfileFile.setValue(PGOOpt->ProfileFile); + } } if (AttributorRun & AttributorRunOption::MODULE) @@ -1609,6 +1615,9 @@ // Cache ProfileSummaryAnalysis once to avoid the potential need to insert // RequireAnalysisPass for PSI before subsequent non-module passes. MPM.addPass(RequireAnalysisPass()); + + // Set FSProfileFile so that CodeGen can read the profile. + FSProfileFile.setValue(PGOOpt->ProfileFile); } // Remove unused virtual tables to improve the quality of code generated by Index: llvm/lib/ProfileData/SampleProf.cpp =================================================================== --- llvm/lib/ProfileData/SampleProf.cpp +++ llvm/lib/ProfileData/SampleProf.cpp @@ -34,6 +34,13 @@ "profile-symbol-list-cutoff", cl::Hidden, cl::init(-1), cl::ZeroOrMore, cl::desc("Cutoff value about how many symbols in profile symbol list " "will be used. This is very useful for performance debugging")); +cl::opt + FSProfileFile("fs-profile-file", cl::init(""), cl::value_desc("filename"), + cl::desc("Flow Sensitive profile file name."), cl::Hidden); +cl::opt EnableFSDiscriminator( + "enable-fs-discriminator", cl::Hidden, cl::init(false), + //"enable-fs-discriminator", cl::Hidden, cl::init(true), + cl::desc("Enable adding flow senstive discriminators")); namespace llvm { namespace sampleprof { @@ -207,8 +214,11 @@ } unsigned FunctionSamples::getOffset(const DILocation *DIL) { - return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) & - 0xffff; + unsigned Offset = + DIL->getLine() - DIL->getScope()->getSubprogram()->getLine(); + if (EnableFSDiscriminator) + return Offset; + return Offset & 0xffff; } LineLocation FunctionSamples::getCallSiteIdentifier(const DILocation *DIL) { @@ -232,9 +242,15 @@ const DILocation *PrevDIL = DIL; for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) { - S.push_back(std::make_pair( - LineLocation(getOffset(DIL), DIL->getBaseDiscriminator()), - PrevDIL->getScope()->getSubprogram()->getLinkageName())); + unsigned Discriminator; + if (EnableFSDiscriminator) + Discriminator = DIL->getDiscriminator(); + else + Discriminator = DIL->getBaseDiscriminator(); + + S.push_back( + std::make_pair(LineLocation(getOffset(DIL), Discriminator), + PrevDIL->getScope()->getSubprogram()->getLinkageName())); PrevDIL = DIL; } if (S.size() == 0) Index: llvm/lib/ProfileData/SampleProfReader.cpp =================================================================== --- llvm/lib/ProfileData/SampleProfReader.cpp +++ llvm/lib/ProfileData/SampleProfReader.cpp @@ -26,6 +26,7 @@ #include "llvm/IR/ProfileSummary.h" #include "llvm/ProfileData/ProfileCommon.h" #include "llvm/ProfileData/SampleProf.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compression.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/LEB128.h" @@ -45,6 +46,9 @@ using namespace llvm; using namespace sampleprof; +#define DEBUG_TYPE "samplepgo-reader" +extern cl::opt EnableFSDiscriminator; + /// Dump the function profile for \p FName. /// /// \param FName Name of the function to print. @@ -82,7 +86,9 @@ } /// Returns true if line offset \p L is legal (only has 16 bits). -static bool isOffsetLegal(unsigned L) { return (L & 0xffff) == L; } +static bool isOffsetLegal(unsigned L) { + return EnableFSDiscriminator || (L & 0xffff) == L; +} /// Parse \p Input that contains metadata. /// Possible metadata: @@ -229,6 +235,15 @@ // top-level function profile. bool SeenMetadata = false; +#ifndef NDEBUG + uint64_t FSBucketSamples[6]; + uint32_t FSBucketRecords[6]; + for (int i = 0; i < 6; i++) { + FSBucketSamples[i] = 0; + FSBucketRecords[i] = 0; + } +#endif + for (; !LineIt.is_at_eof(); ++LineIt) { if ((*LineIt)[(*LineIt).find_first_not_of(' ')] == '#') continue; @@ -284,6 +299,18 @@ "Found non-metadata after metadata: " + *LineIt); return sampleprof_error::malformed; } + + // Here we handle FS discriminators: + uint32_t MaskedDiscriminator = Discriminator; + MaskedDiscriminator &= getDiscriminatorMask(); +#ifndef NDEBUG + int Bucket = getFSBucket(Discriminator); + FSBucketRecords[Bucket] += 1; + FSBucketSamples[Bucket] += NumSamples; +#endif + + Discriminator = MaskedDiscriminator; + while (InlineStack.size() > Depth) { InlineStack.pop_back(); } @@ -333,6 +360,17 @@ if (Result == sampleprof_error::success) computeSummary(); +#ifndef NDEBUG + LLVM_DEBUG(dbgs() << "Text reader is done. Statistics:\n"); + for (int i = 0; i < 6; i++) { + if (FSBucketRecords[i] == 0) + continue; + LLVM_DEBUG(dbgs() << "Bucket " << i << ": " + << "records=" << FSBucketRecords[i] + << " samples=" << FSBucketSamples[i] << "\n"); + } +#endif + return Result; } @@ -489,6 +527,11 @@ if (std::error_code EC = NumCalls.getError()) return EC; + // Here we handle FS discriminators: + uint32_t DiscriminatorVal = *Discriminator; + uint32_t MaskedDiscriminator = DiscriminatorVal & getDiscriminatorMask(); + DiscriminatorVal = MaskedDiscriminator; + for (uint32_t J = 0; J < *NumCalls; ++J) { auto CalledFunction(readStringFromTable()); if (std::error_code EC = CalledFunction.getError()) @@ -498,11 +541,11 @@ if (std::error_code EC = CalledFunctionSamples.getError()) return EC; - FProfile.addCalledTargetSamples(*LineOffset, *Discriminator, + FProfile.addCalledTargetSamples(*LineOffset, DiscriminatorVal, *CalledFunction, *CalledFunctionSamples); } - FProfile.addBodySamples(*LineOffset, *Discriminator, *NumSamples); + FProfile.addBodySamples(*LineOffset, DiscriminatorVal, *NumSamples); } // Read all the samples for inlined function calls. @@ -523,8 +566,13 @@ if (std::error_code EC = FName.getError()) return EC; + // Here we handle FS discriminators: + uint32_t DiscriminatorVal = *Discriminator; + uint32_t MaskedDiscriminator = DiscriminatorVal & getDiscriminatorMask(); + DiscriminatorVal = MaskedDiscriminator; + FunctionSamples &CalleeProfile = FProfile.functionSamplesAt( - LineLocation(*LineOffset, *Discriminator))[std::string(*FName)]; + LineLocation(*LineOffset, DiscriminatorVal))[std::string(*FName)]; CalleeProfile.setName(*FName); if (std::error_code EC = readProfile(CalleeProfile)) return EC; Index: llvm/lib/Target/X86/X86InsertPrefetch.cpp =================================================================== --- llvm/lib/Target/X86/X86InsertPrefetch.cpp +++ llvm/lib/Target/X86/X86InsertPrefetch.cpp @@ -167,6 +167,7 @@ return false; } Reader = std::move(ReaderOrErr.get()); + Reader->setDiscriminatorMaskedBitFrom(DILocation::getBaseDiscriminatorBits()); Reader->read(); return true; } Index: llvm/lib/Transforms/IPO/SampleProfile.cpp =================================================================== --- llvm/lib/Transforms/IPO/SampleProfile.cpp +++ llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -1742,6 +1742,7 @@ // set module before reading the profile so reader may be able to only // read the function profiles which are used by the current module. Reader->setModule(&M); + Reader->setDiscriminatorMaskedBitFrom(DILocation::getBaseDiscriminatorBits()); if (std::error_code EC = Reader->read()) { std::string Msg = "profile reading failed: " + EC.message(); Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg)); Index: llvm/lib/Transforms/Utils/LoopUnroll.cpp =================================================================== --- llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -576,7 +576,7 @@ for (Loop *SubLoop : *L) LoopsToSimplify.insert(SubLoop); - if (Header->getParent()->isDebugInfoForProfiling()) + if (Header->getParent()->isDebugInfoForProfiling() && !EnableFSDiscriminator) for (BasicBlock *BB : L->getBlocks()) for (Instruction &I : *BB) if (!isa(&I)) Index: llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp =================================================================== --- llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp +++ llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp @@ -346,7 +346,7 @@ LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO(); LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO(); - if (Header->getParent()->isDebugInfoForProfiling()) + if (Header->getParent()->isDebugInfoForProfiling() && !EnableFSDiscriminator) for (BasicBlock *BB : L->getBlocks()) for (Instruction &I : *BB) if (!isa(&I)) Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1033,7 +1033,7 @@ if (const Instruction *Inst = dyn_cast_or_null(Ptr)) { const DILocation *DIL = Inst->getDebugLoc(); if (DIL && Inst->getFunction()->isDebugInfoForProfiling() && - !isa(Inst)) { + !isa(Inst) && !EnableFSDiscriminator) { assert(!VF.isScalable() && "scalable vectors not yet supported."); auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(UF * VF.getKnownMinValue()); @@ -1043,8 +1043,7 @@ LLVM_DEBUG(dbgs() << "Failed to create new discriminator: " << DIL->getFilename() << " Line: " << DIL->getLine()); - } - else + } else B.SetCurrentDebugLocation(DIL); } else B.SetCurrentDebugLocation(DebugLoc()); Index: llvm/tools/llvm-profdata/llvm-profdata.cpp =================================================================== --- llvm/tools/llvm-profdata/llvm-profdata.cpp +++ llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -21,6 +21,7 @@ #include "llvm/ProfileData/SampleProfWriter.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Errc.h" +#include "llvm/Support/FSAFDODiscriminator.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormattedStream.h" @@ -450,6 +451,8 @@ const uint64_t ColdPercentileIdx = 15; const uint64_t HotPercentileIdx = 11; +static uint32_t MaskHighBitFrom = 31; + /// Adjust the instr profile in \p WC based on the sample profile in /// \p Reader. static void @@ -546,6 +549,7 @@ if (std::error_code EC = ReaderOrErr.getError()) exitWithErrorCode(EC, SampleFilename); auto Reader = std::move(ReaderOrErr.get()); + Reader->setDiscriminatorMaskedBitFrom(MaskHighBitFrom); if (std::error_code EC = Reader->read()) exitWithErrorCode(EC, SampleFilename); @@ -572,13 +576,16 @@ Result.setName(Remapper(Samples.getName())); Result.addTotalSamples(Samples.getTotalSamples()); Result.addHeadSamples(Samples.getHeadSamples()); + + uint32_t DiscriminatorMask = getN1Bits(MaskHighBitFrom); for (const auto &BodySample : Samples.getBodySamples()) { - Result.addBodySamples(BodySample.first.LineOffset, - BodySample.first.Discriminator, + uint32_t MaskedDiscriminator = + BodySample.first.Discriminator & DiscriminatorMask; + Result.addBodySamples(BodySample.first.LineOffset, MaskedDiscriminator, BodySample.second.getSamples()); for (const auto &Target : BodySample.second.getCallTargets()) { Result.addCalledTargetSamples(BodySample.first.LineOffset, - BodySample.first.Discriminator, + MaskedDiscriminator, Remapper(Target.first()), Target.second); } } @@ -686,6 +693,7 @@ // merged profile map. Readers.push_back(std::move(ReaderOrErr.get())); const auto Reader = Readers.back().get(); + Reader->setDiscriminatorMaskedBitFrom(MaskHighBitFrom); if (std::error_code EC = Reader->read()) { warnOrExitGivenError(FailMode, EC, Input.Filename); Readers.pop_back(); @@ -879,18 +887,24 @@ "sample profile, if the ratio of the number of zero counters " "divided by the the total number of counters is above the " "threshold, the profile of the function will be regarded as " - "being harmful for performance and will be dropped. ")); + "being harmful for performance and will be dropped.")); cl::opt SupplMinSizeThreshold( "suppl-min-size-threshold", cl::init(10), cl::Hidden, cl::desc("If the size of a function is smaller than the threshold, " "assume it can be inlined by PGO early inliner and it won't " - "be adjusted based on sample profile. ")); + "be adjusted based on sample profile.")); cl::opt InstrProfColdThreshold( "instr-prof-cold-threshold", cl::init(0), cl::Hidden, cl::desc("User specified cold threshold for instr profile which will " - "override the cold threshold got from profile summary. ")); + "override the cold threshold got from profile summary.")); + cl::opt MaskHighBitFromVal( + "mask-highbit-from", cl::init(31), cl::Hidden, + cl::desc("Zero out the discriminatior bit from this value (0 based) " + "for exmaple, value 11 will only use base discriminator; " + "17 will use base and second round; 23 will first 3 rounds.")); cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n"); + MaskHighBitFrom = MaskHighBitFromVal.getValue(); WeightedFileVector WeightedInputs; for (StringRef Filename : InputFilenames) @@ -1559,6 +1573,7 @@ using namespace sampleprof; StringMap BaseFuncProf; + const auto &BaseProfiles = BaseReader->getProfiles(); for (const auto &BaseFunc : BaseProfiles) { BaseFuncProf.try_emplace(BaseFunc.second.getNameWithContext(), @@ -1841,6 +1856,9 @@ BaseReader = std::move(BaseReaderOrErr.get()); TestReader = std::move(TestReaderOrErr.get()); + BaseReader->setDiscriminatorMaskedBitFrom(MaskHighBitFrom); + TestReader->setDiscriminatorMaskedBitFrom(MaskHighBitFrom); + if (std::error_code EC = BaseReader->read()) exitWithErrorCode(EC, BaseFilename); if (std::error_code EC = TestReader->read()) @@ -2352,6 +2370,8 @@ auto Reader = std::move(ReaderOrErr.get()); + Reader->setDiscriminatorMaskedBitFrom(MaskHighBitFrom); + if (ShowSectionInfoOnly) { showSectionInfo(Reader.get(), OS); return 0; @@ -2443,8 +2463,14 @@ cl::desc("Show the information of each section in the sample profile. " "The flag is only usable when the sample profile is in " "extbinary format")); + cl::opt MaskHighBitFrom1( + "mask-highbit-from", cl::init(31), cl::Hidden, + cl::desc("Zero out the discriminatior bit from this value (0 based) " + "for exmaple, value 11 will only use base discriminator; " + "17 will use base and second round; 23 will first 3 rounds.")); cl::ParseCommandLineOptions(argc, argv, "LLVM profile data summary\n"); + MaskHighBitFrom = MaskHighBitFrom1.getValue(); if (OutputFilename.empty()) OutputFilename = "-"; Index: llvm/unittests/ProfileData/SampleProfTest.cpp =================================================================== --- llvm/unittests/ProfileData/SampleProfTest.cpp +++ llvm/unittests/ProfileData/SampleProfTest.cpp @@ -9,6 +9,7 @@ #include "llvm/ProfileData/SampleProf.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" @@ -61,6 +62,8 @@ ASSERT_TRUE(NoError(ReaderOrErr.getError())); Reader = std::move(ReaderOrErr.get()); Reader->setModule(&M); + Reader->setDiscriminatorMaskedBitFrom( + DILocation::getBaseDiscriminatorBits()); } TempFile createRemapFile() {