Index: include/llvm/Analysis/ProfileSummaryInfo.h =================================================================== --- include/llvm/Analysis/ProfileSummaryInfo.h +++ include/llvm/Analysis/ProfileSummaryInfo.h @@ -74,6 +74,12 @@ Summary->getKind() == ProfileSummary::PSK_Instr; } + /// Returns true if module \c M has context sensitive instrumentation profile. + bool hasCSInstrumentationProfile() { + return hasProfileSummary() && + Summary->getKind() == ProfileSummary::PSK_CSInstr; + } + /// Handle the invalidation of this information. /// /// When used as a result of \c ProfileSummaryAnalysis this method will be Index: include/llvm/IR/Module.h =================================================================== --- include/llvm/IR/Module.h +++ include/llvm/IR/Module.h @@ -29,6 +29,7 @@ #include "llvm/IR/GlobalIFunc.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/ProfileSummary.h" #include "llvm/IR/SymbolTableListTraits.h" #include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/CodeGen.h" @@ -856,10 +857,10 @@ /// @{ /// Attach profile summary metadata to this module. - void setProfileSummary(Metadata *M); + void setProfileSummary(Metadata *M, ProfileSummary::Kind Kind); /// Returns profile summary metadata - Metadata *getProfileSummary(); + Metadata *getProfileSummary(ProfileSummary::Kind Kind); /// @} /// Returns true if PLT should be avoided for RTLib calls. Index: include/llvm/IR/ProfileSummary.h =================================================================== --- include/llvm/IR/ProfileSummary.h +++ include/llvm/IR/ProfileSummary.h @@ -43,11 +43,10 @@ class ProfileSummary { public: - enum Kind { PSK_Instr, PSK_Sample }; + enum Kind { PSK_Instr, PSK_CSInstr, PSK_Sample }; private: const Kind PSK; - static const char *KindStr[2]; SummaryEntryVector DetailedSummary; uint64_t TotalCount, MaxCount, MaxInternalCount, MaxFunctionCount; uint32_t NumCounts, NumFunctions; Index: include/llvm/LTO/Config.h =================================================================== --- include/llvm/LTO/Config.h +++ include/llvm/LTO/Config.h @@ -56,6 +56,9 @@ /// Disable entirely the optimizer, including importing for ThinLTO bool CodeGenOnly = false; + /// Run PGO context sensitive IR instrumentation. + bool RunCSIRInstr = false; + /// If this field is set, the set of passes run in the middle-end optimizer /// will be the one specified by the string. Only works with the new pass /// manager as the old one doesn't have this ability. @@ -74,6 +77,9 @@ /// with this triple. std::string DefaultTriple; + /// Context Sensitive PGO profile path. + std::string CSIRProfile; + /// Sample PGO profile path. std::string SampleProfile; Index: include/llvm/Passes/PassBuilder.h =================================================================== --- include/llvm/Passes/PassBuilder.h +++ include/llvm/Passes/PassBuilder.h @@ -32,25 +32,24 @@ /// A struct capturing PGO tunables. struct PGOOptions { + enum PGOAction { NoAction, IRInstr, CSIRInstr, CSIRUse }; PGOOptions(std::string ProfileGenFile = "", std::string ProfileUseFile = "", std::string SampleProfileFile = "", - std::string ProfileRemappingFile = "", - bool RunProfileGen = false, bool SamplePGOSupport = false) + std::string ProfileRemappingFile = "", PGOAction Action = NoAction, + bool SamplePGOSupport = false) : ProfileGenFile(ProfileGenFile), ProfileUseFile(ProfileUseFile), SampleProfileFile(SampleProfileFile), - ProfileRemappingFile(ProfileRemappingFile), - RunProfileGen(RunProfileGen), + ProfileRemappingFile(ProfileRemappingFile), Action(Action), SamplePGOSupport(SamplePGOSupport || !SampleProfileFile.empty()) { - assert((RunProfileGen || - !SampleProfileFile.empty() || - !ProfileUseFile.empty() || - SamplePGOSupport) && "Illegal PGOOptions."); + assert((Action != NoAction || !SampleProfileFile.empty() || + !ProfileUseFile.empty() || SamplePGOSupport) && + "Illegal PGOOptions."); } std::string ProfileGenFile; std::string ProfileUseFile; std::string SampleProfileFile; std::string ProfileRemappingFile; - bool RunProfileGen; + PGOAction Action; bool SamplePGOSupport; }; @@ -606,9 +605,8 @@ bool VerifyEachPass, bool DebugLogging); void addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging, - OptimizationLevel Level, bool RunProfileGen, - std::string ProfileGenFile, - std::string ProfileUseFile, + OptimizationLevel Level, bool RunProfileGen, bool IsCS, + std::string ProfileGenFile, std::string ProfileUseFile, std::string ProfileRemappingFile); void invokePeepholeEPCallbacks(FunctionPassManager &, OptimizationLevel); Index: include/llvm/ProfileData/InstrProf.h =================================================================== --- include/llvm/ProfileData/InstrProf.h +++ include/llvm/ProfileData/InstrProf.h @@ -767,11 +767,19 @@ struct NamedInstrProfRecord : InstrProfRecord { StringRef Name; uint64_t Hash; + static const int CS_FLAG_IN_FUNC_HASH = 60; NamedInstrProfRecord() = default; NamedInstrProfRecord(StringRef Name, uint64_t Hash, std::vector Counts) : InstrProfRecord(std::move(Counts)), Name(Name), Hash(Hash) {} + + static bool hasCSFlagInHash(uint64_t FuncHash) { + return ((FuncHash >> CS_FLAG_IN_FUNC_HASH) & 1); + } + static void setCSFlagInHash(uint64_t &FuncHash) { + FuncHash |= ((uint64_t)1 << CS_FLAG_IN_FUNC_HASH); + } }; uint32_t InstrProfRecord::getNumValueKinds() const { @@ -1005,6 +1013,7 @@ // from control data struct is changed from raw pointer to Name's MD5 value. // Version 4: ValueDataBegin and ValueDataSizes fields are removed from the // raw header. +// Version 5: Bit 60-63 of FuncHash is reserved for storing other information. const uint64_t Version = INSTR_PROF_RAW_VERSION; template inline uint64_t getMagic(); Index: include/llvm/ProfileData/InstrProfData.inc =================================================================== --- include/llvm/ProfileData/InstrProfData.inc +++ include/llvm/ProfileData/InstrProfData.inc @@ -636,10 +636,12 @@ * version for other variants of profile. We set the lowest bit of the upper 8 * bits (i.e. bit 56) to 1 to indicate if this is an IR-level instrumentaiton * generated profile, and 0 if this is a Clang FE generated profile. + * 1 in bit 57 indicates there are context-sesitive records in the profile. */ #define VARIANT_MASKS_ALL 0xff00000000000000ULL #define GET_VERSION(V) ((V) & ~VARIANT_MASKS_ALL) #define VARIANT_MASK_IR_PROF (0x1ULL << 56) +#define VARIANT_MASK_CSIR_PROF (0x1ULL << 57) #define INSTR_PROF_RAW_VERSION_VAR __llvm_profile_raw_version #define INSTR_PROF_PROFILE_RUNTIME_VAR __llvm_profile_runtime Index: include/llvm/ProfileData/InstrProfReader.h =================================================================== --- include/llvm/ProfileData/InstrProfReader.h +++ include/llvm/ProfileData/InstrProfReader.h @@ -78,6 +78,8 @@ virtual bool isIRLevelProfile() const = 0; + virtual bool hasCSIRLevelProfile() const = 0; + /// Return the PGO symtab. There are three different readers: /// Raw, Text, and Indexed profile readers. The first two types /// of readers are used only by llvm-profdata tool, while the indexed @@ -143,6 +145,7 @@ /// Iterator over the profile data. line_iterator Line; bool IsIRLevelProfile = false; + bool HasCSIRLevelProfile = false; Error readValueProfileData(InstrProfRecord &Record); @@ -157,6 +160,8 @@ bool isIRLevelProfile() const override { return IsIRLevelProfile; } + bool hasCSIRLevelProfile() const override { return HasCSIRLevelProfile; } + /// Read the header. Error readHeader() override; @@ -213,6 +218,10 @@ return (Version & VARIANT_MASK_IR_PROF) != 0; } + bool hasCSIRLevelProfile() const override { + return (Version & VARIANT_MASK_CSIR_PROF) != 0; + } + InstrProfSymtab &getSymtab() override { assert(Symtab.get()); return *Symtab.get(); @@ -342,6 +351,7 @@ virtual void setValueProfDataEndianness(support::endianness Endianness) = 0; virtual uint64_t getVersion() const = 0; virtual bool isIRLevelProfile() const = 0; + virtual bool hasCSIRLevelProfile() const = 0; virtual Error populateSymtab(InstrProfSymtab &) = 0; }; @@ -386,6 +396,10 @@ return (FormatVersion & VARIANT_MASK_IR_PROF) != 0; } + bool hasCSIRLevelProfile() const override { + return (FormatVersion & VARIANT_MASK_CSIR_PROF) != 0; + } + Error populateSymtab(InstrProfSymtab &Symtab) override { return Symtab.create(HashTable->keys()); } @@ -413,13 +427,15 @@ std::unique_ptr Remapper; /// Profile summary data. std::unique_ptr Summary; + /// Context sensitive profile summary data. + std::unique_ptr CS_Summary; // Index to the current record in the record array. unsigned RecordIndex; // Read the profile summary. Return a pointer pointing to one byte past the // end of the summary data if it exists or the input \c Cur. const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version, - const unsigned char *Cur); + const unsigned char *Cur, bool IsCS); public: IndexedInstrProfReader( @@ -433,6 +449,9 @@ /// Return the profile version. uint64_t getVersion() const { return Index->getVersion(); } bool isIRLevelProfile() const override { return Index->isIRLevelProfile(); } + bool hasCSIRLevelProfile() const override { + return Index->hasCSIRLevelProfile(); + } /// Return true if the given buffer is in an indexed instrprof format. static bool hasFormat(const MemoryBuffer &DataBuffer); @@ -451,7 +470,15 @@ std::vector &Counts); /// Return the maximum of all known function counts. - uint64_t getMaximumFunctionCount() { return Summary->getMaxFunctionCount(); } + uint64_t getMaximumFunctionCount(bool IsCS) { + if (IsCS) { + assert(CS_Summary && "No context sensitive profile summary"); + return CS_Summary->getMaxFunctionCount(); + } else { + assert(Summary && "No profile summary"); + return Summary->getMaxFunctionCount(); + } + } /// Factory method to create an indexed reader. static Expected> @@ -470,7 +497,15 @@ // to be used by llvm-profdata (for dumping). Avoid using this when // the client is the compiler. InstrProfSymtab &getSymtab() override; - ProfileSummary &getSummary() { return *(Summary.get()); } + ProfileSummary &getSummary(bool IsCS) { + if (IsCS) { + assert(CS_Summary && "No context sensitive summary"); + return *(CS_Summary.get()); + } else { + assert(Summary && "No profile summary"); + return *(Summary.get()); + } + } }; } // end namespace llvm Index: include/llvm/ProfileData/InstrProfWriter.h =================================================================== --- include/llvm/ProfileData/InstrProfWriter.h +++ include/llvm/ProfileData/InstrProfWriter.h @@ -34,7 +34,7 @@ class InstrProfWriter { public: using ProfilingData = SmallDenseMap; - enum ProfKind { PF_Unknown = 0, PF_FE, PF_IRLevel }; + enum ProfKind { PF_Unknown = 0, PF_FE, PF_IRLevel, PF_IRLevelWithCS }; private: bool Sparse; @@ -75,15 +75,24 @@ std::unique_ptr writeBuffer(); /// Set the ProfileKind. Report error if mixing FE and IR level profiles. - Error setIsIRLevelProfile(bool IsIRLevel) { + Error setIsIRLevelProfile(bool IsIRLevel, bool WithCS) { if (ProfileKind == PF_Unknown) { - ProfileKind = IsIRLevel ? PF_IRLevel: PF_FE; + if (IsIRLevel) + ProfileKind = WithCS ? PF_IRLevelWithCS : PF_IRLevel; + else + ProfileKind = PF_FE; return Error::success(); } - return (IsIRLevel == (ProfileKind == PF_IRLevel)) - ? Error::success() - : make_error( - instrprof_error::unsupported_version); + + if (((ProfileKind != PF_FE) && !IsIRLevel) || + ((ProfileKind == PF_FE) && IsIRLevel)) + return make_error(instrprof_error::unsupported_version); + + // Promote to PF_IRLevelWithCS if WithCS is true; + if (ProfileKind == PF_IRLevel && WithCS) + ProfileKind = PF_IRLevelWithCS; + + return Error::success(); } // Internal interface for testing purpose only. Index: include/llvm/Transforms/IPO/PassManagerBuilder.h =================================================================== --- include/llvm/Transforms/IPO/PassManagerBuilder.h +++ include/llvm/Transforms/IPO/PassManagerBuilder.h @@ -160,6 +160,10 @@ /// Enable profile instrumentation pass. bool EnablePGOInstrGen; + /// Enable profile context sensitive instrumentation pass. + bool EnablePGOCSInstrGen; + /// Enable profile context sensitive profile use pass. + bool EnablePGOCSInstrUse; /// Profile data file name that the instrumentation will be written to. std::string PGOInstrGen; /// Path of the profile data file. @@ -186,7 +190,7 @@ void addInitialAliasAnalysisPasses(legacy::PassManagerBase &PM) const; void addLTOOptimizationPasses(legacy::PassManagerBase &PM); void addLateLTOOptimizationPasses(legacy::PassManagerBase &PM); - void addPGOInstrPasses(legacy::PassManagerBase &MPM); + void addPGOInstrPasses(legacy::PassManagerBase &MPM, bool IsCS); void addFunctionSimplificationPasses(legacy::PassManagerBase &MPM); void addInstructionCombiningPass(legacy::PassManagerBase &MPM) const; Index: include/llvm/Transforms/Instrumentation.h =================================================================== --- include/llvm/Transforms/Instrumentation.h +++ include/llvm/Transforms/Instrumentation.h @@ -89,9 +89,10 @@ GCOVOptions::getDefault()); // PGO Instrumention -ModulePass *createPGOInstrumentationGenLegacyPass(); +ModulePass *createPGOInstrumentationGenLegacyPass(bool IsCS = false); ModulePass * -createPGOInstrumentationUseLegacyPass(StringRef Filename = StringRef("")); +createPGOInstrumentationUseLegacyPass(StringRef Filename = StringRef(""), + bool IsCS = false); ModulePass *createPGOIndirectCallPromotionLegacyPass(bool InLTO = false, bool SamplePGO = false); FunctionPass *createPGOMemOPSizeOptLegacyPass(); @@ -133,6 +134,9 @@ // Use atomic profile counter increments. bool Atomic = false; + // Use BFI to guide register promotion + bool UseBFIInPromotion = false; + // Name of the profile file to use as output std::string InstrProfileOutput; Index: include/llvm/Transforms/Instrumentation/PGOInstrumentation.h =================================================================== --- include/llvm/Transforms/Instrumentation/PGOInstrumentation.h +++ include/llvm/Transforms/Instrumentation/PGOInstrumentation.h @@ -30,20 +30,27 @@ /// The instrumentation (profile-instr-gen) pass for IR based PGO. class PGOInstrumentationGen : public PassInfoMixin { public: + PGOInstrumentationGen(bool IsCS = false) : IsCS(IsCS) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + +private: + // If this is a context sensitive instrumentation. + bool IsCS; }; /// The profile annotation (profile-instr-use) pass for IR based PGO. class PGOInstrumentationUse : public PassInfoMixin { public: PGOInstrumentationUse(std::string Filename = "", - std::string RemappingFilename = ""); + std::string RemappingFilename = "", bool IsCS = false); PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); private: std::string ProfileFileName; std::string ProfileRemappingFileName; + // If this is a context sensitive instrumentation. + bool IsCS; }; /// The indirect function call promotion pass. Index: lib/Analysis/ProfileSummaryInfo.cpp =================================================================== --- lib/Analysis/ProfileSummaryInfo.cpp +++ lib/Analysis/ProfileSummaryInfo.cpp @@ -85,7 +85,14 @@ bool ProfileSummaryInfo::computeSummary() { if (Summary) return true; - auto *SummaryMD = M.getProfileSummary(); + // First try to get context sensitive ProfileSummary. + auto *SummaryMD = M.getProfileSummary(ProfileSummary::PSK_CSInstr); + if (SummaryMD) { + Summary.reset(ProfileSummary::getFromMD(SummaryMD)); + return true; + } + // This will actually return PSK_Instr or PSK_Sample summary. + SummaryMD = M.getProfileSummary(ProfileSummary::PSK_Instr); if (!SummaryMD) return false; Summary.reset(ProfileSummary::getFromMD(SummaryMD)); Index: lib/IR/Module.cpp =================================================================== --- lib/IR/Module.cpp +++ lib/IR/Module.cpp @@ -526,12 +526,18 @@ addModuleFlag(ModFlagBehavior::Error, "Code Model", CL); } -void Module::setProfileSummary(Metadata *M) { - addModuleFlag(ModFlagBehavior::Error, "ProfileSummary", M); +void Module::setProfileSummary(Metadata *M, ProfileSummary::Kind Kind) { + if (Kind == ProfileSummary::PSK_CSInstr) + addModuleFlag(ModFlagBehavior::Error, "CSProfileSummary", M); + else + addModuleFlag(ModFlagBehavior::Error, "ProfileSummary", M); } -Metadata *Module::getProfileSummary() { - return getModuleFlag("ProfileSummary"); +Metadata *Module::getProfileSummary(ProfileSummary::Kind Kind) { + if (Kind == ProfileSummary::PSK_CSInstr) + return getModuleFlag("CSProfileSummary"); + else + return getModuleFlag("ProfileSummary"); } void Module::setOwnedMemoryBuffer(std::unique_ptr MB) { Index: lib/IR/ProfileSummary.cpp =================================================================== --- lib/IR/ProfileSummary.cpp +++ lib/IR/ProfileSummary.cpp @@ -22,8 +22,6 @@ using namespace llvm; -const char *ProfileSummary::KindStr[2] = {"InstrProf", "SampleProfile"}; - // Return an MDTuple with two elements. The first element is a string Key and // the second is a uint64_t Value. static Metadata *getKeyValMD(LLVMContext &Context, const char *Key, @@ -69,6 +67,7 @@ // "SampleProfile"). The rest of the elements of the outer MDTuple are specific // to the kind of profile summary as returned by getFormatSpecificMD. Metadata *ProfileSummary::getMD(LLVMContext &Context) { + const char *KindStr[3] = {"InstrProf", "CSInstrProf", "SampleProfile"}; Metadata *Components[] = { getKeyValMD(Context, "ProfileFormat", KindStr[PSK]), getKeyValMD(Context, "TotalCount", getTotalCount()), @@ -154,6 +153,9 @@ else if (isKeyValuePair(dyn_cast_or_null(FormatMD), "ProfileFormat", "InstrProf")) SummaryKind = PSK_Instr; + else if (isKeyValuePair(dyn_cast_or_null(FormatMD), "ProfileFormat", + "CSInstrProf")) + SummaryKind = PSK_CSInstr; else return nullptr; Index: lib/LTO/LTOBackend.cpp =================================================================== --- lib/LTO/LTOBackend.cpp +++ lib/LTO/LTOBackend.cpp @@ -156,7 +156,14 @@ Optional PGOOpt; if (!Conf.SampleProfile.empty()) PGOOpt = PGOOptions("", "", Conf.SampleProfile, Conf.ProfileRemapping, - false, true); + PGOOptions::NoAction, true); + else if (Conf.RunCSIRInstr) { + PGOOpt = PGOOptions(Conf.CSIRProfile, "", "", Conf.ProfileRemapping, + PGOOptions::CSIRInstr); + } else if (!Conf.CSIRProfile.empty()) { + PGOOpt = PGOOptions("", Conf.CSIRProfile, "", Conf.ProfileRemapping, + PGOOptions::CSIRUse); + } PassBuilder PB(TM, PGOOpt); AAManager AA; Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -506,7 +506,7 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging, PassBuilder::OptimizationLevel Level, - bool RunProfileGen, + bool RunProfileGen, bool IsCS, std::string ProfileGenFile, std::string ProfileUseFile, std::string ProfileRemappingFile) { @@ -514,7 +514,7 @@ // threshold results in smaller executables, but there may be cases where // the size grows, so let's be conservative here and skip this simplification // at -Os/Oz. - if (!isOptimizingForSize(Level)) { + if (!isOptimizingForSize(Level) && !IsCS) { InlineParams IP; // In the old pass manager, this is a cl::opt. Should still this be one? @@ -547,7 +547,7 @@ MPM.addPass(GlobalDCEPass()); if (RunProfileGen) { - MPM.addPass(PGOInstrumentationGen()); + MPM.addPass(PGOInstrumentationGen(IsCS)); FunctionPassManager FPM; FPM.addPass( @@ -559,11 +559,11 @@ if (!ProfileGenFile.empty()) Options.InstrProfileOutput = ProfileGenFile; Options.DoCounterPromotion = true; + Options.UseBFIInPromotion = IsCS; MPM.addPass(InstrProfiling(Options)); - } - - if (!ProfileUseFile.empty()) - MPM.addPass(PGOInstrumentationUse(ProfileUseFile, ProfileRemappingFile)); + } else if (!ProfileUseFile.empty()) + MPM.addPass( + PGOInstrumentationUse(ProfileUseFile, ProfileRemappingFile, IsCS)); } static InlineParams @@ -658,7 +658,8 @@ // Add all the requested passes for instrumentation PGO, if requested. if (PGOOpt && Phase != ThinLTOPhase::PostLink && (!PGOOpt->ProfileGenFile.empty() || !PGOOpt->ProfileUseFile.empty())) { - addPGOInstrPasses(MPM, DebugLogging, Level, PGOOpt->RunProfileGen, + addPGOInstrPasses(MPM, DebugLogging, Level, + PGOOpt->Action == PGOOptions::IRInstr, false, PGOOpt->ProfileGenFile, PGOOpt->ProfileUseFile, PGOOpt->ProfileRemappingFile); MPM.addPass(PGOIndirectCallPromotion(false, false)); @@ -760,6 +761,15 @@ // FIXME: Is this really an optimization rather than a canonicalization? MPM.addPass(ReversePostOrderFunctionAttrsPass()); + // Do a post inline PGO instrumention and use pass. This is a context + // sensitive PGO pass. + if (PGOOpt && (PGOOpt->Action == PGOOptions::CSIRInstr || + PGOOpt->Action == PGOOptions::CSIRUse)) + addPGOInstrPasses(MPM, DebugLogging, Level, + PGOOpt->Action == PGOOptions::CSIRInstr, true, + PGOOpt->ProfileGenFile, PGOOpt->ProfileUseFile, + PGOOpt->ProfileRemappingFile); + // Re-require GloblasAA here prior to function passes. This is particularly // useful as the above will have inlined, DCE'ed, and function-attr // propagated everything. We should at this point have a reasonably minimal Index: lib/ProfileData/InstrProfReader.cpp =================================================================== --- lib/ProfileData/InstrProfReader.cpp +++ lib/ProfileData/InstrProfReader.cpp @@ -163,7 +163,10 @@ IsIRInstr = true; else if (Str.equals_lower("fe")) IsIRInstr = false; - else + else if (Str.equals_lower("csir")) { + IsIRInstr = true; + HasCSIRLevelProfile = true; + } else return error(instrprof_error::bad_header); ++Line; @@ -734,7 +737,7 @@ const unsigned char * IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version, - const unsigned char *Cur) { + const unsigned char *Cur, bool IsCS) { using namespace IndexedInstrProf; using namespace support; @@ -761,10 +764,13 @@ DetailedSummary.emplace_back((uint32_t)Ent.Cutoff, Ent.MinBlockCount, Ent.NumBlocks); } + std::unique_ptr &Summary = + IsCS ? this->CS_Summary : this->Summary; + // initialize InstrProfSummary using the SummaryData from disk. - this->Summary = llvm::make_unique( - ProfileSummary::PSK_Instr, DetailedSummary, - SummaryData->get(Summary::TotalBlockCount), + Summary = llvm::make_unique( + IsCS ? ProfileSummary::PSK_CSInstr : ProfileSummary::PSK_Instr, + DetailedSummary, SummaryData->get(Summary::TotalBlockCount), SummaryData->get(Summary::MaxBlockCount), SummaryData->get(Summary::MaxInternalBlockCount), SummaryData->get(Summary::MaxFunctionCount), @@ -806,7 +812,9 @@ IndexedInstrProf::ProfVersion::CurrentVersion) return error(instrprof_error::unsupported_version); - Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur); + Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur, false); + if (Header->Version & VARIANT_MASK_CSIR_PROF) + Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur, true); // Read the hash type and start offset. IndexedInstrProf::HashT HashType = static_cast( Index: lib/ProfileData/InstrProfWriter.cpp =================================================================== --- lib/ProfileData/InstrProfWriter.cpp +++ lib/ProfileData/InstrProfWriter.cpp @@ -102,6 +102,7 @@ support::endianness ValueProfDataEndianness = support::little; InstrProfSummaryBuilder *SummaryBuilder; + InstrProfSummaryBuilder *CSSummaryBuilder; InstrProfRecordWriterTrait() = default; @@ -143,7 +144,10 @@ endian::Writer LE(Out, little); for (const auto &ProfileData : *V) { const InstrProfRecord &ProfRecord = ProfileData.second; - SummaryBuilder->addRecord(ProfRecord); + if (NamedInstrProfRecord::hasCSFlagInHash(ProfileData.first)) + CSSummaryBuilder->addRecord(ProfRecord); + else + SummaryBuilder->addRecord(ProfRecord); LE.write(ProfileData.first); // Function hash LE.write(ProfRecord.Counts.size()); @@ -254,6 +258,8 @@ InstrProfSummaryBuilder ISB(ProfileSummaryBuilder::DefaultCutoffs); InfoObj->SummaryBuilder = &ISB; + InstrProfSummaryBuilder CSISB(ProfileSummaryBuilder::DefaultCutoffs); + InfoObj->CSSummaryBuilder = &CSISB; // Populate the hash table generator. for (const auto &I : FunctionData) @@ -265,6 +271,10 @@ Header.Version = IndexedInstrProf::ProfVersion::CurrentVersion; if (ProfileKind == PF_IRLevel) Header.Version |= VARIANT_MASK_IR_PROF; + if (ProfileKind == PF_IRLevelWithCS) { + Header.Version |= VARIANT_MASK_IR_PROF; + Header.Version |= VARIANT_MASK_CSIR_PROF; + } Header.Unused = 0; Header.HashType = static_cast(IndexedInstrProf::HashType); Header.HashOffset = 0; @@ -288,6 +298,14 @@ uint64_t SummaryOffset = OS.tell(); for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++) OS.write(0); + uint64_t CSSummaryOffset = 0; + uint64_t CSSummarySize = 0; + if (ProfileKind == PF_IRLevelWithCS) { + CSSummaryOffset = OS.tell(); + CSSummarySize = SummarySize / sizeof(uint64_t); + for (unsigned I = 0; I < CSSummarySize; I++) + OS.write(0); + } // Write the hash table. uint64_t HashTableStart = Generator.Emit(OS.OS, *InfoObj); @@ -301,13 +319,25 @@ setSummary(TheSummary.get(), *PS); InfoObj->SummaryBuilder = nullptr; + // For Context Sensitive summary. + std::unique_ptr TheCSSummary = nullptr; + if (ProfileKind == PF_IRLevelWithCS) { + TheCSSummary = IndexedInstrProf::allocSummary(SummarySize); + std::unique_ptr CSPS = CSISB.getSummary(); + setSummary(TheCSSummary.get(), *CSPS); + } + InfoObj->CSSummaryBuilder = nullptr; + // Now do the final patch: PatchItem PatchItems[] = { // Patch the Header.HashOffset field. {HashTableStartFieldOffset, &HashTableStart, 1}, // Patch the summary data. {SummaryOffset, reinterpret_cast(TheSummary.get()), - (int)(SummarySize / sizeof(uint64_t))}}; + (int)(SummarySize / sizeof(uint64_t))}, + {CSSummaryOffset, reinterpret_cast(TheCSSummary.get()), + (int)CSSummarySize}}; + OS.patch(PatchItems, sizeof(PatchItems) / sizeof(*PatchItems)); } @@ -376,6 +406,8 @@ Error InstrProfWriter::writeText(raw_fd_ostream &OS) { if (ProfileKind == PF_IRLevel) OS << "# IR level Instrumentation Flag\n:ir\n"; + else if (ProfileKind == PF_IRLevelWithCS) + OS << "# CSIR level Instrumentation Flag\n:csir\n"; InstrProfSymtab Symtab; for (const auto &I : FunctionData) if (shouldEncodeData(I.getValue())) Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -176,6 +176,8 @@ MergeFunctions = false; PrepareForLTO = false; EnablePGOInstrGen = RunPGOInstrGen; + EnablePGOCSInstrGen = false; + EnablePGOCSInstrUse = false; PGOInstrGen = PGOOutputFile; PGOInstrUse = RunPGOInstrUse; PrepareForThinLTO = EnablePrepareForThinLTO; @@ -272,13 +274,18 @@ } // Do PGO instrumentation generation or use pass as the option specified. -void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM) { - if (!EnablePGOInstrGen && PGOInstrUse.empty() && PGOSampleUse.empty()) +void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM, + bool IsCS = false) { + if (IsCS) { + if (!EnablePGOCSInstrGen && !EnablePGOCSInstrUse) + return; + } else if (!EnablePGOInstrGen && PGOInstrUse.empty() && PGOSampleUse.empty()) return; + // Perform the preinline and cleanup passes for O1 and above. // And avoid doing them if optimizing for size. if (OptLevel > 0 && SizeLevel == 0 && !DisablePreInliner && - PGOSampleUse.empty()) { + PGOSampleUse.empty() && !IsCS) { // Create preinline pass. We construct an InlineParams object and specify // the threshold here to avoid the command line options of the regular // inliner to influence pre-inlining. The only fields of InlineParams we @@ -296,22 +303,23 @@ MPM.add(createInstructionCombiningPass()); // Combine silly seq's addExtensionsToPM(EP_Peephole, MPM); } - if (EnablePGOInstrGen) { - MPM.add(createPGOInstrumentationGenLegacyPass()); + if ((EnablePGOInstrGen && !IsCS) || (EnablePGOCSInstrGen && IsCS)) { + MPM.add(createPGOInstrumentationGenLegacyPass(IsCS)); // Add the profile lowering pass. InstrProfOptions Options; if (!PGOInstrGen.empty()) Options.InstrProfileOutput = PGOInstrGen; Options.DoCounterPromotion = true; + Options.UseBFIInPromotion = IsCS; MPM.add(createLoopRotatePass()); MPM.add(createInstrProfilingLegacyPass(Options)); } if (!PGOInstrUse.empty()) - MPM.add(createPGOInstrumentationUseLegacyPass(PGOInstrUse)); + MPM.add(createPGOInstrumentationUseLegacyPass(PGOInstrUse, IsCS)); // Indirect call promotion that promotes intra-module targets only. // For ThinLTO this is done earlier due to interactions with globalopt // for imported functions. We don't run this at -O0. - if (OptLevel > 0) + if (OptLevel > 0 && !IsCS) MPM.add( createPGOIndirectCallPromotionLegacyPass(false, !PGOSampleUse.empty())); } @@ -553,6 +561,9 @@ if (RunPartialInlining) MPM.add(createPartialInliningPass()); + // CSFDO instrumentation and use pass. + addPGOInstrPasses(MPM, true); + if (OptLevel > 1 && !PrepareForLTO && !PrepareForThinLTO) // Remove avail extern fns and globals definitions if we aren't // compiling an object file for later LTO. For LTO we want to preserve @@ -834,6 +845,9 @@ PM.add(createPruneEHPass()); // Remove dead EH info. + // CSFDO instrumentation and use pass. + addPGOInstrPasses(PM, true); + // Optimize globals again if we ran the inliner. if (RunInliner) PM.add(createGlobalOptimizerPass()); Index: lib/Transforms/IPO/SampleProfile.cpp =================================================================== --- lib/Transforms/IPO/SampleProfile.cpp +++ lib/Transforms/IPO/SampleProfile.cpp @@ -1559,8 +1559,9 @@ return false; PSI = _PSI; - if (M.getProfileSummary() == nullptr) - M.setProfileSummary(Reader->getSummary().getMD(M.getContext())); + if (M.getProfileSummary(ProfileSummary::PSK_Sample) == nullptr) + M.setProfileSummary(Reader->getSummary().getMD(M.getContext()), + ProfileSummary::PSK_Sample); // Compute the total number of samples collected in this profile. for (const auto &I : Reader->getProfiles()) Index: lib/Transforms/Instrumentation/InstrProfiling.cpp =================================================================== --- lib/Transforms/Instrumentation/InstrProfiling.cpp +++ lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -19,6 +19,8 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Attributes.h" @@ -232,9 +234,9 @@ public: PGOCounterPromoter( DenseMap> &LoopToCands, - Loop &CurLoop, LoopInfo &LI) + Loop &CurLoop, LoopInfo &LI, BlockFrequencyInfo *BFI) : LoopToCandidates(LoopToCands), ExitBlocks(), InsertPts(), L(CurLoop), - LI(LI) { + LI(LI), BFI(BFI) { SmallVector LoopExitBlocks; SmallPtrSet BlockSet; @@ -263,6 +265,18 @@ SSAUpdater SSA(&NewPHIs); Value *InitVal = ConstantInt::get(Cand.first->getType(), 0); + // If BFI is set, we will use it to guide the promotions. + if (BFI) { + auto *BB = Cand.first->getParent(); + auto InstrCount = BFI->getBlockProfileCount(BB); + if (!InstrCount) + continue; + auto PreheaderCount = BFI->getBlockProfileCount(L.getLoopPreheader()); + if (PreheaderCount && + (PreheaderCount.getValue() * 3) >= (InstrCount.getValue() * 2)) + continue; + } + PGOCounterPromoterHelper Promoter(Cand.first, Cand.second, SSA, InitVal, L.getLoopPreheader(), ExitBlocks, InsertPts, LoopToCandidates, LI); @@ -312,6 +326,11 @@ SmallVector ExitingBlocks; LP->getExitingBlocks(ExitingBlocks); + + // If BFI is set, we do more aggressive promotions based on BFI. + if (BFI) + return (unsigned)-1; + // Not considierered speculative. if (ExitingBlocks.size() == 1) return MaxNumOfPromotionsPerLoop; @@ -343,6 +362,7 @@ SmallVector InsertPts; Loop &L; LoopInfo &LI; + BlockFrequencyInfo *BFI; }; } // end anonymous namespace @@ -415,6 +435,13 @@ LoopInfo LI(DT); DenseMap> LoopPromotionCandidates; + std::unique_ptr BFI; + if (Options.UseBFIInPromotion) { + std::unique_ptr BPI; + BPI.reset(new BranchProbabilityInfo(*F, LI, TLI)); + BFI.reset(new BlockFrequencyInfo(*F, *BPI, LI)); + } + for (const auto &LoadStore : PromotionCandidates) { auto *CounterLoad = LoadStore.first; auto *CounterStore = LoadStore.second; @@ -430,7 +457,7 @@ // Do a post-order traversal of the loops so that counter updates can be // iteratively hoisted outside the loop nest. for (auto *Loop : llvm::reverse(Loops)) { - PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI); + PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI, BFI.get()); Promoter.run(&TotalCountersPromoted); } } Index: lib/Transforms/Instrumentation/PGOInstrumentation.cpp =================================================================== --- lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -66,6 +66,7 @@ #include "llvm/Analysis/IndirectCallSiteVisitor.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -384,7 +385,8 @@ public: static char ID; - PGOInstrumentationGenLegacyPass() : ModulePass(ID) { + PGOInstrumentationGenLegacyPass(bool IsCS = false) + : ModulePass(ID), IsCS(IsCS) { initializePGOInstrumentationGenLegacyPassPass( *PassRegistry::getPassRegistry()); } @@ -392,6 +394,7 @@ StringRef getPassName() const override { return "PGOInstrumentationGenPass"; } private: + bool IsCS; bool runOnModule(Module &M) override; void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -404,8 +407,8 @@ static char ID; // Provide the profile filename as the parameter. - PGOInstrumentationUseLegacyPass(std::string Filename = "") - : ModulePass(ID), ProfileFileName(std::move(Filename)) { + PGOInstrumentationUseLegacyPass(std::string Filename = "", bool IsCS = false) + : ModulePass(ID), ProfileFileName(std::move(Filename)), IsCS(IsCS) { if (!PGOTestProfileFile.empty()) ProfileFileName = PGOTestProfileFile; initializePGOInstrumentationUseLegacyPassPass( @@ -416,10 +419,12 @@ private: std::string ProfileFileName; + bool IsCS; bool runOnModule(Module &M) override; void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); AU.addRequired(); } }; @@ -435,8 +440,8 @@ INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", "PGO instrumentation.", false, false) -ModulePass *llvm::createPGOInstrumentationGenLegacyPass() { - return new PGOInstrumentationGenLegacyPass(); +ModulePass *llvm::createPGOInstrumentationGenLegacyPass(bool IsCS) { + return new PGOInstrumentationGenLegacyPass(IsCS); } char PGOInstrumentationUseLegacyPass::ID = 0; @@ -445,11 +450,13 @@ "Read PGO instrumentation profile.", false, false) INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_END(PGOInstrumentationUseLegacyPass, "pgo-instr-use", "Read PGO instrumentation profile.", false, false) -ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename) { - return new PGOInstrumentationUseLegacyPass(Filename.str()); +ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename, + bool IsCS) { + return new PGOInstrumentationUseLegacyPass(Filename.str(), IsCS); } namespace { @@ -496,6 +503,7 @@ template class FuncPGOInstrumentation { private: Function &F; + bool IsCS; // A map that stores the Comdat group in function F. std::unordered_multimap &ComdatMembers; @@ -536,15 +544,17 @@ Function &Func, std::unordered_multimap &ComdatMembers, bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, - BlockFrequencyInfo *BFI = nullptr) - : F(Func), ComdatMembers(ComdatMembers), ValueSites(IPVK_Last + 1), - SIVisitor(Func), MIVisitor(Func), MST(F, BPI, BFI) { + BlockFrequencyInfo *BFI = nullptr, bool IsCS = false) + : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), + ValueSites(IPVK_Last + 1), SIVisitor(Func), MIVisitor(Func), + MST(F, BPI, BFI) { // This should be done before CFG hash computation. SIVisitor.countSelects(Func); MIVisitor.countMemIntrinsics(Func); NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); NumOfPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics(); - ValueSites[IPVK_IndirectCallTarget] = findIndirectCallSites(Func); + if (!IsCS) + ValueSites[IPVK_IndirectCallTarget] = findIndirectCallSites(Func); ValueSites[IPVK_MemOPSize] = MIVisitor.findMemIntrinsics(Func); FuncName = getPGOFuncName(F); @@ -598,14 +608,21 @@ } } JC.update(Indexes); + + // Hash format for context senstive profile. Reserve 4 bits for other + // information. FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 | (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 | (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC(); + // Reserve bit 60-63 for other information purpose. + FunctionHash &= 0x0FFFFFFFFFFFFFFF; + if (IsCS) + NamedInstrProfRecord::setCSFlagInHash(FunctionHash); LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n" << " CRC = " << JC.getCRC() - << ", Selects = " << SIVisitor.getNumOfSelectInsts() << ", Edges = " << MST.AllEdges.size() << ", ICSites = " << ValueSites[IPVK_IndirectCallTarget].size() + << ", Selects = " << SIVisitor.getNumOfSelectInsts() << ", Hash = " << FunctionHash << "\n";); } @@ -721,12 +738,14 @@ // Critical edges will be split. static void instrumentOneFunc( Function &F, Module *M, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI, - std::unordered_multimap &ComdatMembers) { + std::unordered_multimap &ComdatMembers, + bool IsCS) { // Split indirectbr critical edges here before computing the MST rather than // later in getInstrBB() to avoid invalidating it. SplitIndirectBrCriticalEdges(F, BPI, BFI); + FuncPGOInstrumentation FuncInfo(F, ComdatMembers, true, BPI, - BFI); + BFI, IsCS); unsigned NumCounters = FuncInfo.getNumCounters(); uint32_t I = 0; @@ -853,10 +872,10 @@ PGOUseFunc(Function &Func, Module *Modu, std::unordered_multimap &ComdatMembers, BranchProbabilityInfo *BPI = nullptr, - BlockFrequencyInfo *BFIin = nullptr) + BlockFrequencyInfo *BFIin = nullptr, bool IsCS = false) : F(Func), M(Modu), BFI(BFIin), - FuncInfo(Func, ComdatMembers, false, BPI, BFIin), - FreqAttr(FFA_Normal) {} + FuncInfo(Func, ComdatMembers, false, BPI, BFIin, IsCS), + FreqAttr(FFA_Normal), IsCS(IsCS) {} // Read counts for the instrumented BB from profile. bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros); @@ -929,6 +948,9 @@ // Function hotness info derived from profile. FuncFreqAttr FreqAttr; + // Is to use the context sensitive profile. + bool IsCS; + // Find the Instrumented BB and set the value. void setInstrumentedCounts(const std::vector &CountFromProfile); @@ -1062,7 +1084,7 @@ getBBInfo(nullptr).UnknownCountInEdge = 2; setInstrumentedCounts(CountFromProfile); - ProgramMaxCount = PGOReader->getMaximumFunctionCount(); + ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS); return true; } @@ -1175,6 +1197,7 @@ if (!(isa(TI) || isa(TI) || isa(TI))) continue; + if (getBBInfo(&BB).CountValue == 0) continue; @@ -1354,9 +1377,11 @@ // Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime // aware this is an ir_level profile so it can set the version flag. -static void createIRLevelProfileFlagVariable(Module &M) { +static void createIRLevelProfileFlagVariable(Module &M, bool IsCS) { Type *IntTy64 = Type::getInt64Ty(M.getContext()); uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF); + if (IsCS) + ProfileVersion |= VARIANT_MASK_CSIR_PROF; auto IRLevelVersionVariable = new GlobalVariable( M, IntTy64, true, GlobalVariable::ExternalLinkage, Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), @@ -1390,8 +1415,8 @@ static bool InstrumentAllFunctions( Module &M, function_ref LookupBPI, - function_ref LookupBFI) { - createIRLevelProfileFlagVariable(M); + function_ref LookupBFI, bool IsCS) { + createIRLevelProfileFlagVariable(M, IsCS); std::unordered_multimap ComdatMembers; collectComdatMembers(M, ComdatMembers); @@ -1400,7 +1425,7 @@ continue; auto *BPI = LookupBPI(F); auto *BFI = LookupBFI(F); - instrumentOneFunc(F, &M, BPI, BFI, ComdatMembers); + instrumentOneFunc(F, &M, BPI, BFI, ComdatMembers, IsCS); } return true; } @@ -1415,7 +1440,7 @@ auto LookupBFI = [this](Function &F) { return &this->getAnalysis(F).getBFI(); }; - return InstrumentAllFunctions(M, LookupBPI, LookupBFI); + return InstrumentAllFunctions(M, LookupBPI, LookupBFI, IsCS); } PreservedAnalyses PGOInstrumentationGen::run(Module &M, @@ -1429,7 +1454,7 @@ return &FAM.getResult(F); }; - if (!InstrumentAllFunctions(M, LookupBPI, LookupBFI)) + if (!InstrumentAllFunctions(M, LookupBPI, LookupBFI, IsCS)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); @@ -1438,7 +1463,8 @@ static bool annotateAllFunctions( Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, function_ref LookupBPI, - function_ref LookupBFI) { + function_ref LookupBFI, + ProfileSummaryInfo *PSI, bool IsCS) { LLVM_DEBUG(dbgs() << "Read in profile counters: "); auto &Ctx = M.getContext(); // Read the counter array from file. @@ -1459,6 +1485,9 @@ StringRef("Cannot get PGOReader"))); return false; } + if (!PGOReader->hasCSIRLevelProfile() && IsCS) + return false; + // TODO: might need to change the warning once the clang option is finalized. if (!PGOReader->isIRLevelProfile()) { Ctx.diagnose(DiagnosticInfoPGOProfile( @@ -1478,7 +1507,7 @@ // Split indirectbr critical edges here before computing the MST rather than // later in getInstrBB() to avoid invalidating it. SplitIndirectBrCriticalEdges(F, BPI, BFI); - PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI); + PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI, IsCS); bool AllZeros = false; if (!Func.readCounters(PGOReader.get(), AllZeros)) continue; @@ -1526,7 +1555,10 @@ } } } - M.setProfileSummary(PGOReader->getSummary().getMD(M.getContext())); + M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()), + IsCS ? ProfileSummary::PSK_CSInstr + : ProfileSummary::PSK_Instr); + // Set function hotness attribute from the profile. // We have to apply these attributes at the end because their presence // can affect the BranchProbabilityInfo of any callers, resulting in an @@ -1545,9 +1577,10 @@ } PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename, - std::string RemappingFilename) + std::string RemappingFilename, + bool IsCS) : ProfileFileName(std::move(Filename)), - ProfileRemappingFileName(std::move(RemappingFilename)) { + ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS) { if (!PGOTestProfileFile.empty()) ProfileFileName = PGOTestProfileFile; if (!PGOTestProfileRemappingFile.empty()) @@ -1566,8 +1599,12 @@ return &FAM.getResult(F); }; + ProfileSummaryInfo *PSI = nullptr; + if (IsCS) + PSI = &AM.getResult(M); + if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, - LookupBPI, LookupBFI)) + LookupBPI, LookupBFI, PSI, IsCS)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); @@ -1584,7 +1621,12 @@ return &this->getAnalysis(F).getBFI(); }; - return annotateAllFunctions(M, ProfileFileName, "", LookupBPI, LookupBFI); + ProfileSummaryInfo *PSI = nullptr; + if (IsCS) + PSI = &this->getAnalysis().getPSI(); + + return annotateAllFunctions(M, ProfileFileName, "", LookupBPI, LookupBFI, PSI, + IsCS); } static std::string getSimpleNodeName(const BasicBlock *Node) { Index: tools/gold/gold-plugin.cpp =================================================================== --- tools/gold/gold-plugin.cpp +++ tools/gold/gold-plugin.cpp @@ -209,6 +209,10 @@ static std::string OptRemarksFilename; static bool OptRemarksWithHotness = false; + // Context sensitive PGO options. + static std::string cs_profile_path; + static bool cs_pgo_gen = false; + static void process_plugin_option(const char *opt_) { if (opt_ == nullptr) @@ -266,7 +270,11 @@ } else if (opt == "disable-verify") { DisableVerify = true; } else if (opt.startswith("sample-profile=")) { - sample_profile= opt.substr(strlen("sample-profile=")); + sample_profile = opt.substr(strlen("sample-profile=")); + } else if (opt == "cs-profile-generate") { + cs_pgo_gen = true; + } else if (opt.startswith("cs-profile-path=")) { + cs_profile_path = opt.substr(strlen("cs-profile-path=")); } else if (opt == "new-pass-manager") { new_pass_manager = true; } else if (opt == "debug-pass-manager") { @@ -885,6 +893,10 @@ if (!options::sample_profile.empty()) Conf.SampleProfile = options::sample_profile; + if (!options::cs_profile_path.empty()) + Conf.CSIRProfile = options::cs_profile_path; + Conf.RunCSIRInstr = options::cs_pgo_gen; + Conf.DwoDir = options::dwo_dir; // Set up optimization remarks handling. Index: tools/llvm-profdata/llvm-profdata.cpp =================================================================== --- tools/llvm-profdata/llvm-profdata.cpp +++ tools/llvm-profdata/llvm-profdata.cpp @@ -226,7 +226,10 @@ auto Reader = std::move(ReaderOrErr.get()); bool IsIRProfile = Reader->isIRLevelProfile(); - if (WC->Writer.setIsIRLevelProfile(IsIRProfile)) { + bool HasCSIRProfile = Reader->hasCSIRLevelProfile(); + // outs() << "Loadinput for " << Input.Filename << " CS=" << HasCSIRProfile << + // " IR=" << IsIRProfile << "\n"; + if (WC->Writer.setIsIRLevelProfile(IsIRProfile, HasCSIRProfile)) { WC->Err = make_error( "Merge IR generated profile with Clang generated profile.", std::error_code()); @@ -252,6 +255,7 @@ FuncName, firstTime); }); } + if (Reader->hasError()) { if (Error E = Reader->getError()) { instrprof_error IPE = InstrProfError::take(std::move(E)); @@ -662,7 +666,7 @@ uint32_t TopN, bool ShowIndirectCallTargets, bool ShowMemOPSizes, bool ShowDetailedSummary, std::vector DetailedSummaryCutoffs, - bool ShowAllFunctions, + bool ShowAllFunctions, bool ShowCS, const std::string &ShowFunction, bool TextFormat, raw_fd_ostream &OS) { auto ReaderOrErr = InstrProfReader::create(Filename); @@ -695,6 +699,13 @@ OS << ":ir\n"; for (const auto &Func : *Reader) { + if (Reader->isIRLevelProfile() && Reader->hasCSIRLevelProfile()) { + bool FuncIsCS = NamedInstrProfRecord::hasCSFlagInHash(Func.Hash); + if (FuncIsCS && !ShowCS) + continue; + if (!FuncIsCS && ShowCS) + continue; + } bool Show = ShowAllFunctions || (!ShowFunction.empty() && Func.Name.find(ShowFunction) != Func.Name.npos); @@ -868,6 +879,8 @@ cl::value_desc("800000,901000,999999")); cl::opt ShowAllFunctions("all-functions", cl::init(false), cl::desc("Details for every function")); + cl::opt ShowCS("showcs", cl::init(false), + cl::desc("Show context sensitive counts")); cl::opt ShowFunction("function", cl::desc("Details for matching functions")); @@ -899,10 +912,10 @@ std::vector Cutoffs(DetailedSummaryCutoffs.begin(), DetailedSummaryCutoffs.end()); if (ProfileKind == instr) - return showInstrProfile(Filename, ShowCounts, TopNFunctions, - ShowIndirectCallTargets, ShowMemOPSizes, - ShowDetailedSummary, DetailedSummaryCutoffs, - ShowAllFunctions, ShowFunction, TextFormat, OS); + return showInstrProfile( + Filename, ShowCounts, TopNFunctions, ShowIndirectCallTargets, + ShowMemOPSizes, ShowDetailedSummary, DetailedSummaryCutoffs, + ShowAllFunctions, ShowCS, ShowFunction, TextFormat, OS); else return showSampleProfile(Filename, ShowCounts, ShowAllFunctions, ShowFunction, OS); Index: tools/opt/NewPMDriver.cpp =================================================================== --- tools/opt/NewPMDriver.cpp +++ tools/opt/NewPMDriver.cpp @@ -228,20 +228,21 @@ bool EnableDebugify) { bool VerifyEachPass = VK == VK_VerifyEachPass; + // TODO: handle CSFDO options. Optional P; switch (PGOKindFlag) { case InstrGen: - P = PGOOptions(ProfileFile, "", "", "", true); + P = PGOOptions(ProfileFile, "", "", "", PGOOptions::IRInstr); break; case InstrUse: - P = PGOOptions("", ProfileFile, "", ProfileRemappingFile, false); + P = PGOOptions("", ProfileFile, "", ProfileRemappingFile); break; case SampleUse: - P = PGOOptions("", "", ProfileFile, ProfileRemappingFile, false); + P = PGOOptions("", "", ProfileFile, ProfileRemappingFile); break; case NoPGO: if (DebugInfoForProfiling) - P = PGOOptions("", "", "", "", false, true); + P = PGOOptions("", "", "", "", PGOOptions::NoAction, true); else P = None; } Index: unittests/ProfileData/InstrProfTest.cpp =================================================================== --- unittests/ProfileData/InstrProfTest.cpp +++ unittests/ProfileData/InstrProfTest.cpp @@ -176,7 +176,7 @@ ASSERT_EQ(288230376151711744U, NinetyFivePerc->MinCount); ASSERT_EQ(72057594037927936U, NinetyNinePerc->MinCount); }; - ProfileSummary &PS = Reader->getSummary(); + ProfileSummary &PS = Reader->getSummary(false); VerifySummary(PS); // Test that conversion of summary to and from Metadata works. @@ -190,8 +190,8 @@ // Test that summary can be attached to and read back from module. Module M("my_module", Context); - M.setProfileSummary(MD); - MD = M.getProfileSummary(); + M.setProfileSummary(MD, ProfileSummary::PSK_Instr); + MD = M.getProfileSummary(ProfileSummary::PSK_Instr); ASSERT_TRUE(MD); PSFromMD = ProfileSummary::getFromMD(MD); ASSERT_TRUE(PSFromMD); @@ -802,7 +802,7 @@ auto Profile = Writer.writeBuffer(); readProfile(std::move(Profile)); - ASSERT_EQ(1ULL << 63, Reader->getMaximumFunctionCount()); + ASSERT_EQ(1ULL << 63, Reader->getMaximumFunctionCount(false)); } TEST_P(MaybeSparseInstrProfTest, get_weighted_function_counts) { Index: unittests/ProfileData/SampleProfTest.cpp =================================================================== --- unittests/ProfileData/SampleProfTest.cpp +++ unittests/ProfileData/SampleProfTest.cpp @@ -186,8 +186,8 @@ delete PS; // Test that summary can be attached to and read back from module. - M.setProfileSummary(MD); - MD = M.getProfileSummary(); + M.setProfileSummary(MD, ProfileSummary::PSK_Sample); + MD = M.getProfileSummary(ProfileSummary::PSK_Sample); ASSERT_TRUE(MD); PS = ProfileSummary::getFromMD(MD); ASSERT_TRUE(PS);