Index: llvm/trunk/include/llvm/InitializePasses.h =================================================================== --- llvm/trunk/include/llvm/InitializePasses.h +++ llvm/trunk/include/llvm/InitializePasses.h @@ -299,6 +299,7 @@ void initializePGOIndirectCallPromotionLegacyPassPass(PassRegistry&); void initializePGOInstrumentationGenLegacyPassPass(PassRegistry&); void initializePGOInstrumentationUseLegacyPassPass(PassRegistry&); +void initializePGOInstrumentationGenCreateVarLegacyPassPass(PassRegistry&); void initializePGOMemOPSizeOptLegacyPassPass(PassRegistry&); void initializePHIEliminationPass(PassRegistry&); void initializePartialInlinerLegacyPassPass(PassRegistry&); Index: llvm/trunk/include/llvm/LTO/Config.h =================================================================== --- llvm/trunk/include/llvm/LTO/Config.h +++ llvm/trunk/include/llvm/LTO/Config.h @@ -55,6 +55,9 @@ /// Disable entirely the optimizer, including importing for ThinLTO bool CodeGenOnly = false; + /// Run PGO context sensitive IR instrumentation. + bool RunCSIRInstr = false; + /// If this field is set, the set of passes run in the middle-end optimizer /// will be the one specified by the string. Only works with the new pass /// manager as the old one doesn't have this ability. @@ -73,6 +76,9 @@ /// with this triple. std::string DefaultTriple; + /// Context Sensitive PGO profile path. + std::string CSIRProfile; + /// Sample PGO profile path. std::string SampleProfile; Index: llvm/trunk/include/llvm/LinkAllPasses.h =================================================================== --- llvm/trunk/include/llvm/LinkAllPasses.h +++ llvm/trunk/include/llvm/LinkAllPasses.h @@ -102,6 +102,7 @@ (void) llvm::createGCOVProfilerPass(); (void) llvm::createPGOInstrumentationGenLegacyPass(); (void) llvm::createPGOInstrumentationUseLegacyPass(); + (void) llvm::createPGOInstrumentationGenCreateVarLegacyPass(); (void) llvm::createPGOIndirectCallPromotionLegacyPass(); (void) llvm::createPGOMemOPSizeOptLegacyPass(); (void) llvm::createInstrProfilingLegacyPass(); Index: llvm/trunk/include/llvm/ProfileData/InstrProf.h =================================================================== --- llvm/trunk/include/llvm/ProfileData/InstrProf.h +++ llvm/trunk/include/llvm/ProfileData/InstrProf.h @@ -767,10 +767,20 @@ StringRef Name; uint64_t Hash; + // We reserve this bit as the flag for context sensitive profile record. + static const int CS_FLAG_IN_FUNC_HASH = 60; + NamedInstrProfRecord() = default; NamedInstrProfRecord(StringRef Name, uint64_t Hash, std::vector Counts) : InstrProfRecord(std::move(Counts)), Name(Name), Hash(Hash) {} + + static bool hasCSFlagInHash(uint64_t FuncHash) { + return ((FuncHash >> CS_FLAG_IN_FUNC_HASH) & 1); + } + static void setCSFlagInHash(uint64_t &FuncHash) { + FuncHash |= ((uint64_t)1 << CS_FLAG_IN_FUNC_HASH); + } }; uint32_t InstrProfRecord::getNumValueKinds() const { @@ -1004,6 +1014,8 @@ // from control data struct is changed from raw pointer to Name's MD5 value. // Version 4: ValueDataBegin and ValueDataSizes fields are removed from the // raw header. +// Version 5: Bit 60 of FuncHash is reserved for the flag for the context +// sensitive records. const uint64_t Version = INSTR_PROF_RAW_VERSION; template inline uint64_t getMagic(); @@ -1040,6 +1052,10 @@ void getMemOPSizeRangeFromOption(StringRef Str, int64_t &RangeStart, int64_t &RangeLast); +// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime +// aware this is an ir_level profile so it can set the version flag. +void createIRLevelProfileFlagVar(Module &M, bool IsCS); + // Create the variable for the profile file name. void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput); Index: llvm/trunk/include/llvm/ProfileData/InstrProfData.inc =================================================================== --- llvm/trunk/include/llvm/ProfileData/InstrProfData.inc +++ llvm/trunk/include/llvm/ProfileData/InstrProfData.inc @@ -635,10 +635,12 @@ * version for other variants of profile. We set the lowest bit of the upper 8 * bits (i.e. bit 56) to 1 to indicate if this is an IR-level instrumentaiton * generated profile, and 0 if this is a Clang FE generated profile. + * 1 in bit 57 indicates there are context-sensitive records in the profile. */ #define VARIANT_MASKS_ALL 0xff00000000000000ULL #define GET_VERSION(V) ((V) & ~VARIANT_MASKS_ALL) #define VARIANT_MASK_IR_PROF (0x1ULL << 56) +#define VARIANT_MASK_CSIR_PROF (0x1ULL << 57) #define INSTR_PROF_RAW_VERSION_VAR __llvm_profile_raw_version #define INSTR_PROF_PROFILE_RUNTIME_VAR __llvm_profile_runtime Index: llvm/trunk/include/llvm/Transforms/Instrumentation.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Instrumentation.h +++ llvm/trunk/include/llvm/Transforms/Instrumentation.h @@ -87,10 +87,14 @@ ModulePass *createGCOVProfilerPass(const GCOVOptions &Options = GCOVOptions::getDefault()); -// PGO Instrumention -ModulePass *createPGOInstrumentationGenLegacyPass(); +// PGO Instrumention. Parameter IsCS indicates if this is the context senstive +// instrumentation. +ModulePass *createPGOInstrumentationGenLegacyPass(bool IsCS = false); ModulePass * -createPGOInstrumentationUseLegacyPass(StringRef Filename = StringRef("")); +createPGOInstrumentationUseLegacyPass(StringRef Filename = StringRef(""), + bool IsCS = false); +ModulePass *createPGOInstrumentationGenCreateVarLegacyPass( + StringRef CSInstrName = StringRef("")); ModulePass *createPGOIndirectCallPromotionLegacyPass(bool InLTO = false, bool SamplePGO = false); FunctionPass *createPGOMemOPSizeOptLegacyPass(); @@ -132,15 +136,19 @@ // Use atomic profile counter increments. bool Atomic = false; + // Use BFI to guide register promotion + bool UseBFIInPromotion = false; + // Name of the profile file to use as output std::string InstrProfileOutput; InstrProfOptions() = default; }; -/// Insert frontend instrumentation based profiling. +/// Insert frontend instrumentation based profiling. Parameter IsCS indicates if +// this is the context senstive instrumentation. ModulePass *createInstrProfilingLegacyPass( - const InstrProfOptions &Options = InstrProfOptions()); + const InstrProfOptions &Options = InstrProfOptions(), bool IsCS = false); FunctionPass *createHWAddressSanitizerPass(bool CompileKernel = false, bool Recover = false); Index: llvm/trunk/include/llvm/Transforms/Instrumentation/InstrProfiling.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Instrumentation/InstrProfiling.h +++ llvm/trunk/include/llvm/Transforms/Instrumentation/InstrProfiling.h @@ -35,7 +35,8 @@ class InstrProfiling : public PassInfoMixin { public: InstrProfiling() = default; - InstrProfiling(const InstrProfOptions &Options) : Options(Options) {} + InstrProfiling(const InstrProfOptions &Options, bool IsCS) + : Options(Options), IsCS(IsCS) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); bool run(Module &M, const TargetLibraryInfo &TLI); @@ -60,6 +61,9 @@ GlobalVariable *NamesVar; size_t NamesSize; + // Is this lowering for the context-sensitive instrumentation. + bool IsCS; + // vector of counter load/store pairs to be register promoted. std::vector PromotionCandidates; Index: llvm/trunk/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h +++ llvm/trunk/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h @@ -17,6 +17,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/IR/PassManager.h" +#include "llvm/ProfileData/InstrProf.h" #include #include @@ -27,22 +28,50 @@ class Module; /// The instrumentation (profile-instr-gen) pass for IR based PGO. +// We use this pass to create COMDAT profile variables for context +// sensitive PGO (CSPGO). The reason to have a pass for this is CSPGO +// can be run after LTO/ThinLTO linking. Lld linker needs to see +// all the COMDAT variables before linking. So we have this pass +// always run before linking for CSPGO. +class PGOInstrumentationGenCreateVar + : public PassInfoMixin { +public: + PGOInstrumentationGenCreateVar(std::string CSInstrName = "") + : CSInstrName(CSInstrName) {} + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) { + createProfileFileNameVar(M, CSInstrName); + createIRLevelProfileFlagVar(M, /* IsCS */ true); + return PreservedAnalyses::all(); + } + +private: + std::string CSInstrName; +}; + +/// The instrumentation (profile-instr-gen) pass for IR based PGO. class PGOInstrumentationGen : public PassInfoMixin { public: + PGOInstrumentationGen(bool IsCS = false) : IsCS(IsCS) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + +private: + // If this is a context sensitive instrumentation. + bool IsCS; }; /// The profile annotation (profile-instr-use) pass for IR based PGO. class PGOInstrumentationUse : public PassInfoMixin { public: PGOInstrumentationUse(std::string Filename = "", - std::string RemappingFilename = ""); + std::string RemappingFilename = "", bool IsCS = false); PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); private: std::string ProfileFileName; std::string ProfileRemappingFileName; + // If this is a context sensitive instrumentation. + bool IsCS; }; /// The indirect function call promotion pass. Index: llvm/trunk/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/trunk/lib/Passes/PassBuilder.cpp +++ llvm/trunk/lib/Passes/PassBuilder.cpp @@ -569,7 +569,8 @@ if (!ProfileGenFile.empty()) Options.InstrProfileOutput = ProfileGenFile; Options.DoCounterPromotion = true; - MPM.addPass(InstrProfiling(Options)); + Options.UseBFIInPromotion = false; + MPM.addPass(InstrProfiling(Options, false)); } if (!ProfileUseFile.empty()) Index: llvm/trunk/lib/ProfileData/InstrProf.cpp =================================================================== --- llvm/trunk/lib/ProfileData/InstrProf.cpp +++ llvm/trunk/lib/ProfileData/InstrProf.cpp @@ -1011,6 +1011,25 @@ assert(RangeLast >= RangeStart); } +// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime +// aware this is an ir_level profile so it can set the version flag. +void createIRLevelProfileFlagVar(Module &M, bool IsCS) { + const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)); + Type *IntTy64 = Type::getInt64Ty(M.getContext()); + uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF); + if (IsCS) + ProfileVersion |= VARIANT_MASK_CSIR_PROF; + auto IRLevelVersionVariable = new GlobalVariable( + M, IntTy64, true, GlobalValue::WeakAnyLinkage, + Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName); + IRLevelVersionVariable->setVisibility(GlobalValue::DefaultVisibility); + Triple TT(M.getTargetTriple()); + if (TT.supportsCOMDAT()) { + IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage); + IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName)); + } +} + // Create the variable for the profile file name. void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput) { if (InstrProfileOutput.empty()) Index: llvm/trunk/lib/Transforms/Instrumentation/InstrProfiling.cpp =================================================================== --- llvm/trunk/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ llvm/trunk/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -18,6 +18,8 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Attributes.h" @@ -147,8 +149,8 @@ static char ID; InstrProfilingLegacyPass() : ModulePass(ID) {} - InstrProfilingLegacyPass(const InstrProfOptions &Options) - : ModulePass(ID), InstrProf(Options) {} + InstrProfilingLegacyPass(const InstrProfOptions &Options, bool IsCS) + : ModulePass(ID), InstrProf(Options, IsCS) {} StringRef getPassName() const override { return "Frontend instrumentation-based coverage lowering"; @@ -232,9 +234,9 @@ public: PGOCounterPromoter( DenseMap> &LoopToCands, - Loop &CurLoop, LoopInfo &LI) + Loop &CurLoop, LoopInfo &LI, BlockFrequencyInfo *BFI) : LoopToCandidates(LoopToCands), ExitBlocks(), InsertPts(), L(CurLoop), - LI(LI) { + LI(LI), BFI(BFI) { SmallVector LoopExitBlocks; SmallPtrSet BlockSet; @@ -263,6 +265,20 @@ SSAUpdater SSA(&NewPHIs); Value *InitVal = ConstantInt::get(Cand.first->getType(), 0); + // If BFI is set, we will use it to guide the promotions. + if (BFI) { + auto *BB = Cand.first->getParent(); + auto InstrCount = BFI->getBlockProfileCount(BB); + if (!InstrCount) + continue; + auto PreheaderCount = BFI->getBlockProfileCount(L.getLoopPreheader()); + // If the average loop trip count is not greater than 1.5, we skip + // promotion. + if (PreheaderCount && + (PreheaderCount.getValue() * 3) >= (InstrCount.getValue() * 2)) + continue; + } + PGOCounterPromoterHelper Promoter(Cand.first, Cand.second, SSA, InitVal, L.getLoopPreheader(), ExitBlocks, InsertPts, LoopToCandidates, LI); @@ -312,6 +328,11 @@ SmallVector ExitingBlocks; LP->getExitingBlocks(ExitingBlocks); + + // If BFI is set, we do more aggressive promotions based on BFI. + if (BFI) + return (unsigned)-1; + // Not considierered speculative. if (ExitingBlocks.size() == 1) return MaxNumOfPromotionsPerLoop; @@ -343,6 +364,7 @@ SmallVector InsertPts; Loop &L; LoopInfo &LI; + BlockFrequencyInfo *BFI; }; } // end anonymous namespace @@ -365,8 +387,9 @@ "Frontend instrumentation-based coverage lowering.", false, false) ModulePass * -llvm::createInstrProfilingLegacyPass(const InstrProfOptions &Options) { - return new InstrProfilingLegacyPass(Options); +llvm::createInstrProfilingLegacyPass(const InstrProfOptions &Options, + bool IsCS) { + return new InstrProfilingLegacyPass(Options, IsCS); } static InstrProfIncrementInst *castToIncrementInst(Instruction *Instr) { @@ -415,6 +438,13 @@ LoopInfo LI(DT); DenseMap> LoopPromotionCandidates; + std::unique_ptr BFI; + if (Options.UseBFIInPromotion) { + std::unique_ptr BPI; + BPI.reset(new BranchProbabilityInfo(*F, LI, TLI)); + BFI.reset(new BlockFrequencyInfo(*F, *BPI, LI)); + } + for (const auto &LoadStore : PromotionCandidates) { auto *CounterLoad = LoadStore.first; auto *CounterStore = LoadStore.second; @@ -430,7 +460,7 @@ // Do a post-order traversal of the loops so that counter updates can be // iteratively hoisted outside the loop nest. for (auto *Loop : llvm::reverse(Loops)) { - PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI); + PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI, BFI.get()); Promoter.run(&TotalCountersPromoted); } } @@ -681,7 +711,6 @@ // Don't do this for Darwin. compiler-rt uses linker magic. if (TT.isOSDarwin()) return false; - // Use linker script magic to get data/cnts/name start/end. if (TT.isOSLinux() || TT.isOSFreeBSD() || TT.isOSNetBSD() || TT.isOSFuchsia() || TT.isPS4CPU() || TT.isOSWindows()) @@ -985,8 +1014,12 @@ } void InstrProfiling::emitInitialization() { - // Create variable for profile name. - createProfileFileNameVar(*M, Options.InstrProfileOutput); + // Create ProfileFileName variable. Don't don't this for the + // context-sensitive instrumentation lowering: This lowering is after + // LTO/ThinLTO linking. Pass PGOInstrumentationGenCreateVar should + // have already create the variable before LTO/ThinLTO linking. + if (!IsCS) + createProfileFileNameVar(*M, Options.InstrProfileOutput); Function *RegisterF = M->getFunction(getInstrProfRegFuncsName()); if (!RegisterF) return; Index: llvm/trunk/lib/Transforms/Instrumentation/PGOInstrumentation.cpp =================================================================== --- llvm/trunk/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ llvm/trunk/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -65,6 +65,7 @@ #include "llvm/Analysis/IndirectCallVisitor.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -132,6 +133,19 @@ STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile."); STATISTIC(NumOfPGOMissing, "Number of functions without profile."); STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations."); +STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO."); +STATISTIC(NumOfCSPGOSelectInsts, + "Number of select instruction instrumented in CSPGO."); +STATISTIC(NumOfCSPGOMemIntrinsics, + "Number of mem intrinsics instrumented in CSPGO."); +STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO."); +STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO."); +STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO."); +STATISTIC(NumOfCSPGOFunc, + "Number of functions having valid profile counts in CSPGO."); +STATISTIC(NumOfCSPGOMismatch, + "Number of functions having mismatch profile in CSPGO."); +STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO."); // Command line option to specify the file to read profile from. This is // mainly used for testing. @@ -383,7 +397,8 @@ public: static char ID; - PGOInstrumentationGenLegacyPass() : ModulePass(ID) { + PGOInstrumentationGenLegacyPass(bool IsCS = false) + : ModulePass(ID), IsCS(IsCS) { initializePGOInstrumentationGenLegacyPassPass( *PassRegistry::getPassRegistry()); } @@ -391,6 +406,8 @@ StringRef getPassName() const override { return "PGOInstrumentationGenPass"; } private: + // Is this is context-sensitive instrumentation. + bool IsCS; bool runOnModule(Module &M) override; void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -403,8 +420,8 @@ static char ID; // Provide the profile filename as the parameter. - PGOInstrumentationUseLegacyPass(std::string Filename = "") - : ModulePass(ID), ProfileFileName(std::move(Filename)) { + PGOInstrumentationUseLegacyPass(std::string Filename = "", bool IsCS = false) + : ModulePass(ID), ProfileFileName(std::move(Filename)), IsCS(IsCS) { if (!PGOTestProfileFile.empty()) ProfileFileName = PGOTestProfileFile; initializePGOInstrumentationUseLegacyPassPass( @@ -415,14 +432,38 @@ private: std::string ProfileFileName; + // Is this is context-sensitive instrumentation use. + bool IsCS; bool runOnModule(Module &M) override; void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); AU.addRequired(); } }; +class PGOInstrumentationGenCreateVarLegacyPass : public ModulePass { +public: + static char ID; + StringRef getPassName() const override { + return "PGOInstrumentationGenCreateVarPass"; + } + PGOInstrumentationGenCreateVarLegacyPass(std::string CSInstrName = "") + : ModulePass(ID), InstrProfileOutput(CSInstrName) { + initializePGOInstrumentationGenCreateVarLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + +private: + bool runOnModule(Module &M) override { + createProfileFileNameVar(M, InstrProfileOutput); + createIRLevelProfileFlagVar(M, true); + return false; + } + std::string InstrProfileOutput; +}; + } // end anonymous namespace char PGOInstrumentationGenLegacyPass::ID = 0; @@ -434,8 +475,8 @@ INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", "PGO instrumentation.", false, false) -ModulePass *llvm::createPGOInstrumentationGenLegacyPass() { - return new PGOInstrumentationGenLegacyPass(); +ModulePass *llvm::createPGOInstrumentationGenLegacyPass(bool IsCS) { + return new PGOInstrumentationGenLegacyPass(IsCS); } char PGOInstrumentationUseLegacyPass::ID = 0; @@ -444,11 +485,25 @@ "Read PGO instrumentation profile.", false, false) INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_END(PGOInstrumentationUseLegacyPass, "pgo-instr-use", "Read PGO instrumentation profile.", false, false) -ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename) { - return new PGOInstrumentationUseLegacyPass(Filename.str()); +ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename, + bool IsCS) { + return new PGOInstrumentationUseLegacyPass(Filename.str(), IsCS); +} + +char PGOInstrumentationGenCreateVarLegacyPass::ID = 0; + +INITIALIZE_PASS(PGOInstrumentationGenCreateVarLegacyPass, + "pgo-instr-gen-create-var", + "Create PGO instrumentation version variable for CSPGO.", false, + false) + +ModulePass * +llvm::createPGOInstrumentationGenCreateVarLegacyPass(StringRef CSInstrName) { + return new PGOInstrumentationGenCreateVarLegacyPass(CSInstrName); } namespace { @@ -496,6 +551,9 @@ private: Function &F; + // Is this is context-sensitive instrumentation. + bool IsCS; + // A map that stores the Comdat group in function F. std::unordered_multimap &ComdatMembers; @@ -535,15 +593,23 @@ Function &Func, std::unordered_multimap &ComdatMembers, bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, - BlockFrequencyInfo *BFI = nullptr) - : F(Func), ComdatMembers(ComdatMembers), ValueSites(IPVK_Last + 1), - SIVisitor(Func), MIVisitor(Func), MST(F, BPI, BFI) { + BlockFrequencyInfo *BFI = nullptr, bool IsCS = false) + : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), + ValueSites(IPVK_Last + 1), SIVisitor(Func), MIVisitor(Func), + MST(F, BPI, BFI) { // This should be done before CFG hash computation. SIVisitor.countSelects(Func); MIVisitor.countMemIntrinsics(Func); - NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); - NumOfPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics(); - ValueSites[IPVK_IndirectCallTarget] = findIndirectCalls(Func); + if (!IsCS) { + NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); + NumOfPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics(); + NumOfPGOBB += MST.BBInfos.size(); + ValueSites[IPVK_IndirectCallTarget] = findIndirectCalls(Func); + } else { + NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); + NumOfCSPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics(); + NumOfCSPGOBB += MST.BBInfos.size(); + } ValueSites[IPVK_MemOPSize] = MIVisitor.findMemIntrinsics(Func); FuncName = getPGOFuncName(F); @@ -552,13 +618,12 @@ renameComdatFunction(); LLVM_DEBUG(dumpInfo("after CFGMST")); - NumOfPGOBB += MST.BBInfos.size(); for (auto &E : MST.AllEdges) { if (E->Removed) continue; - NumOfPGOEdge++; + IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++; if (!E->InMST) - NumOfPGOInstrument++; + IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++; } if (CreateGlobalVar) @@ -597,9 +662,17 @@ } } JC.update(Indexes); + + // Hash format for context sensitive profile. Reserve 4 bits for other + // information. FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 | (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 | + //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 | (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC(); + // Reserve bit 60-63 for other information purpose. + FunctionHash &= 0x0FFFFFFFFFFFFFFF; + if (IsCS) + NamedInstrProfRecord::setCSFlagInHash(FunctionHash); LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n" << " CRC = " << JC.getCRC() << ", Selects = " << SIVisitor.getNumOfSelectInsts() @@ -705,7 +778,7 @@ // For a critical edge, we have to split. Instrument the newly // created BB. - NumOfPGOSplit++; + IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++; LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index << " --> " << getBBInfo(DestBB).Index << "\n"); unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); @@ -720,12 +793,14 @@ // Critical edges will be split. static void instrumentOneFunc( Function &F, Module *M, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI, - std::unordered_multimap &ComdatMembers) { + std::unordered_multimap &ComdatMembers, + bool IsCS) { // Split indirectbr critical edges here before computing the MST rather than // later in getInstrBB() to avoid invalidating it. SplitIndirectBrCriticalEdges(F, BPI, BFI); + FuncPGOInstrumentation FuncInfo(F, ComdatMembers, true, BPI, - BFI); + BFI, IsCS); unsigned NumCounters = FuncInfo.getNumCounters(); uint32_t I = 0; @@ -852,10 +927,10 @@ PGOUseFunc(Function &Func, Module *Modu, std::unordered_multimap &ComdatMembers, BranchProbabilityInfo *BPI = nullptr, - BlockFrequencyInfo *BFIin = nullptr) + BlockFrequencyInfo *BFIin = nullptr, bool IsCS = false) : F(Func), M(Modu), BFI(BFIin), - FuncInfo(Func, ComdatMembers, false, BPI, BFIin), - FreqAttr(FFA_Normal) {} + FuncInfo(Func, ComdatMembers, false, BPI, BFIin, IsCS), + FreqAttr(FFA_Normal), IsCS(IsCS) {} // Read counts for the instrumented BB from profile. bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros); @@ -928,6 +1003,9 @@ // Function hotness info derived from profile. FuncFreqAttr FreqAttr; + // Is to use the context sensitive profile. + bool IsCS; + // Find the Instrumented BB and set the value. void setInstrumentedCounts(const std::vector &CountFromProfile); @@ -1021,23 +1099,31 @@ handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { auto Err = IPE.get(); bool SkipWarning = false; + LLVM_DEBUG(dbgs() << "Error in reading profile for Func " + << FuncInfo.FuncName << ": "); if (Err == instrprof_error::unknown_function) { - NumOfPGOMissing++; + IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++; SkipWarning = !PGOWarnMissing; + LLVM_DEBUG(dbgs() << "unknown function"); } else if (Err == instrprof_error::hash_mismatch || Err == instrprof_error::malformed) { - NumOfPGOMismatch++; + IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++; SkipWarning = NoPGOWarnMismatch || (NoPGOWarnMismatchComdat && (F.hasComdat() || F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); + LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")"); } + LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n"); if (SkipWarning) return; - std::string Msg = IPE.message() + std::string(" ") + F.getName().str(); + std::string Msg = IPE.message() + std::string(" ") + F.getName().str() + + std::string(" Hash = ") + + std::to_string(FuncInfo.FunctionHash); + Ctx.diagnose( DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); }); @@ -1046,7 +1132,7 @@ ProfileRecord = std::move(Result.get()); std::vector &CountFromProfile = ProfileRecord.Counts; - NumOfPGOFunc++; + IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++; LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n"); uint64_t ValueSum = 0; for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) { @@ -1061,7 +1147,11 @@ getBBInfo(nullptr).UnknownCountInEdge = 2; setInstrumentedCounts(CountFromProfile); +#if 0 + ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS); +#else ProgramMaxCount = PGOReader->getMaximumFunctionCount(); +#endif return true; } @@ -1166,7 +1256,8 @@ // Assign the scaled count values to the BB with multiple out edges. void PGOUseFunc::setBranchWeights() { // Generate MD_prof metadata for every branch instruction. - LLVM_DEBUG(dbgs() << "\nSetting branch weights.\n"); + LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName() + << " IsCS=" << IsCS << "\n"); for (auto &BB : F) { Instruction *TI = BB.getTerminator(); if (TI->getNumSuccessors() < 2) @@ -1174,6 +1265,7 @@ if (!(isa(TI) || isa(TI) || isa(TI))) continue; + if (getBBInfo(&BB).CountValue == 0) continue; @@ -1351,24 +1443,6 @@ } } -// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime -// aware this is an ir_level profile so it can set the version flag. -static void createIRLevelProfileFlagVariable(Module &M) { - Type *IntTy64 = Type::getInt64Ty(M.getContext()); - uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF); - auto IRLevelVersionVariable = new GlobalVariable( - M, IntTy64, true, GlobalVariable::ExternalLinkage, - Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), - INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)); - IRLevelVersionVariable->setVisibility(GlobalValue::DefaultVisibility); - Triple TT(M.getTargetTriple()); - if (!TT.supportsCOMDAT()) - IRLevelVersionVariable->setLinkage(GlobalValue::WeakAnyLinkage); - else - IRLevelVersionVariable->setComdat(M.getOrInsertComdat( - StringRef(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)))); -} - // Collect the set of members for each Comdat in module M and store // in ComdatMembers. static void collectComdatMembers( @@ -1389,8 +1463,11 @@ static bool InstrumentAllFunctions( Module &M, function_ref LookupBPI, - function_ref LookupBFI) { - createIRLevelProfileFlagVariable(M); + function_ref LookupBFI, bool IsCS) { + // For the context-sensitve instrumentation, we should have a separated pass + // (before LTO/ThinLTO linking) to create these variables. + if (!IsCS) + createIRLevelProfileFlagVar(M, /* IsCS */ false); std::unordered_multimap ComdatMembers; collectComdatMembers(M, ComdatMembers); @@ -1399,7 +1476,7 @@ continue; auto *BPI = LookupBPI(F); auto *BFI = LookupBFI(F); - instrumentOneFunc(F, &M, BPI, BFI, ComdatMembers); + instrumentOneFunc(F, &M, BPI, BFI, ComdatMembers, IsCS); } return true; } @@ -1414,7 +1491,7 @@ auto LookupBFI = [this](Function &F) { return &this->getAnalysis(F).getBFI(); }; - return InstrumentAllFunctions(M, LookupBPI, LookupBFI); + return InstrumentAllFunctions(M, LookupBPI, LookupBFI, IsCS); } PreservedAnalyses PGOInstrumentationGen::run(Module &M, @@ -1428,7 +1505,7 @@ return &FAM.getResult(F); }; - if (!InstrumentAllFunctions(M, LookupBPI, LookupBFI)) + if (!InstrumentAllFunctions(M, LookupBPI, LookupBFI, IsCS)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); @@ -1437,7 +1514,7 @@ static bool annotateAllFunctions( Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, function_ref LookupBPI, - function_ref LookupBFI) { + function_ref LookupBFI, bool IsCS) { LLVM_DEBUG(dbgs() << "Read in profile counters: "); auto &Ctx = M.getContext(); // Read the counter array from file. @@ -1458,6 +1535,11 @@ StringRef("Cannot get PGOReader"))); return false; } +#if 0 + if (!PGOReader->hasCSIRLevelProfile() && IsCS) + return false; +#endif + // TODO: might need to change the warning once the clang option is finalized. if (!PGOReader->isIRLevelProfile()) { Ctx.diagnose(DiagnosticInfoPGOProfile( @@ -1477,7 +1559,7 @@ // Split indirectbr critical edges here before computing the MST rather than // later in getInstrBB() to avoid invalidating it. SplitIndirectBrCriticalEdges(F, BPI, BFI); - PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI); + PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI, IsCS); bool AllZeros = false; if (!Func.readCounters(PGOReader.get(), AllZeros)) continue; @@ -1525,7 +1607,14 @@ } } } +#if 0 + M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()), + IsCS ? ProfileSummary::PSK_CSInstr + : ProfileSummary::PSK_Instr); +#else M.setProfileSummary(PGOReader->getSummary().getMD(M.getContext())); +#endif + // Set function hotness attribute from the profile. // We have to apply these attributes at the end because their presence // can affect the BranchProbabilityInfo of any callers, resulting in an @@ -1544,9 +1633,10 @@ } PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename, - std::string RemappingFilename) + std::string RemappingFilename, + bool IsCS) : ProfileFileName(std::move(Filename)), - ProfileRemappingFileName(std::move(RemappingFilename)) { + ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS) { if (!PGOTestProfileFile.empty()) ProfileFileName = PGOTestProfileFile; if (!PGOTestProfileRemappingFile.empty()) @@ -1566,7 +1656,7 @@ }; if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, - LookupBPI, LookupBFI)) + LookupBPI, LookupBFI, IsCS)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); @@ -1583,7 +1673,8 @@ return &this->getAnalysis(F).getBFI(); }; - return annotateAllFunctions(M, ProfileFileName, "", LookupBPI, LookupBFI); + return annotateAllFunctions(M, ProfileFileName, "", LookupBPI, LookupBFI, + IsCS); } static std::string getSimpleNodeName(const BasicBlock *Node) { Index: llvm/trunk/tools/gold/gold-plugin.cpp =================================================================== --- llvm/trunk/tools/gold/gold-plugin.cpp +++ llvm/trunk/tools/gold/gold-plugin.cpp @@ -209,6 +209,10 @@ static std::string OptRemarksFilename; static bool OptRemarksWithHotness = false; + // Context sensitive PGO options. + static std::string cs_profile_path; + static bool cs_pgo_gen = false; + static void process_plugin_option(const char *opt_) { if (opt_ == nullptr) @@ -268,7 +272,11 @@ } else if (opt == "disable-verify") { DisableVerify = true; } else if (opt.startswith("sample-profile=")) { - sample_profile= opt.substr(strlen("sample-profile=")); + sample_profile = opt.substr(strlen("sample-profile=")); + } else if (opt == "cs-profile-generate") { + cs_pgo_gen = true; + } else if (opt.startswith("cs-profile-path=")) { + cs_profile_path = opt.substr(strlen("cs-profile-path=")); } else if (opt == "new-pass-manager") { new_pass_manager = true; } else if (opt == "debug-pass-manager") { @@ -892,6 +900,10 @@ if (!options::sample_profile.empty()) Conf.SampleProfile = options::sample_profile; + if (!options::cs_profile_path.empty()) + Conf.CSIRProfile = options::cs_profile_path; + Conf.RunCSIRInstr = options::cs_pgo_gen; + Conf.DwoDir = options::dwo_dir; // Set up optimization remarks handling. Index: llvm/trunk/tools/llvm-lto2/llvm-lto2.cpp =================================================================== --- llvm/trunk/tools/llvm-lto2/llvm-lto2.cpp +++ llvm/trunk/tools/llvm-lto2/llvm-lto2.cpp @@ -104,6 +104,15 @@ SamplePGOFile("lto-sample-profile-file", cl::desc("Specify a SamplePGO profile file")); +static cl::opt + CSPGOFile("lto-cspgo-profile-file", + cl::desc("Specify a context sensitive PGO profile file")); + +static cl::opt + RunCSIRInstr("lto-cspgo-gen", + cl::desc("Run PGO context sensitive IR instrumentation"), + cl::init(false), cl::Hidden); + static cl::opt UseNewPM("use-new-pm", cl::desc("Run LTO passes using the new pass manager"), @@ -214,6 +223,8 @@ Conf.RemarksWithHotness = OptRemarksWithHotness; Conf.SampleProfile = SamplePGOFile; + Conf.CSIRProfile = CSPGOFile; + Conf.RunCSIRInstr = RunCSIRInstr; // Run a custom pipeline, if asked for. Conf.OptPipeline = OptPipeline;