Index: CMakeLists.txt =================================================================== --- CMakeLists.txt +++ CMakeLists.txt @@ -568,6 +568,12 @@ endif() file(TO_NATIVE_PATH "${LLVM_PROFILE_DATA_DIR}/%${LLVM_PROFILE_MERGE_POOL_SIZE}m.profraw" LLVM_PROFILE_FILE_PATTERN) endif() + if(NOT LLVM_CSPROFILE_FILE_PATTERN) + if(NOT LLVM_CSPROFILE_DATA_DIR) + file(TO_NATIVE_PATH "${LLVM_BINARY_DIR}/csprofiles" LLVM_CSPROFILE_DATA_DIR) + endif() + file(TO_NATIVE_PATH "${LLVM_CSPROFILE_DATA_DIR}/%${LLVM_PROFILE_MERGE_POOL_SIZE}m.profraw" LLVM_CSPROFILE_FILE_PATTERN) + endif() endif() if (LLVM_BUILD_STATIC) Index: cmake/modules/HandleLLVMOptions.cmake =================================================================== --- cmake/modules/HandleLLVMOptions.cmake +++ cmake/modules/HandleLLVMOptions.cmake @@ -798,6 +798,12 @@ CMAKE_C_FLAGS CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS) + elseif(uppercase_LLVM_BUILD_INSTRUMENTED STREQUAL "CSIR") + append("-fcs-profile-generate='${LLVM_CSPROFILE_DATA_DIR}'" + CMAKE_CXX_FLAGS + CMAKE_C_FLAGS + CMAKE_EXE_LINKER_FLAGS + CMAKE_SHARED_LINKER_FLAGS) else() append("-fprofile-instr-generate='${LLVM_PROFILE_FILE_PATTERN}'" CMAKE_CXX_FLAGS @@ -807,6 +813,14 @@ endif() endif() +# Need to pass -fprofile-instr-use to linker for context-sensitive PGO +# compilation. +if(LLVM_PROFDATA_FILE AND EXISTS ${LLVM_PROFDATA_FILE}) + append("-fprofile-instr-use='${LLVM_PROFDATA_FILE}'" + CMAKE_EXE_LINKER_FLAGS + CMAKE_SHARED_LINKER_FLAGS) +endif() + option(LLVM_BUILD_INSTRUMENTED_COVERAGE "Build LLVM and tools with Code Coverage instrumentation" Off) mark_as_advanced(LLVM_BUILD_INSTRUMENTED_COVERAGE) append_if(LLVM_BUILD_INSTRUMENTED_COVERAGE "-fprofile-instr-generate='${LLVM_PROFILE_FILE_PATTERN}' -fcoverage-mapping" Index: include/llvm/Analysis/ProfileSummaryInfo.h =================================================================== --- include/llvm/Analysis/ProfileSummaryInfo.h +++ include/llvm/Analysis/ProfileSummaryInfo.h @@ -73,6 +73,12 @@ Summary->getKind() == ProfileSummary::PSK_Instr; } + /// Returns true if module \c M has context sensitive instrumentation profile. + bool hasCSInstrumentationProfile() { + return hasProfileSummary() && + Summary->getKind() == ProfileSummary::PSK_CSInstr; + } + /// Handle the invalidation of this information. /// /// When used as a result of \c ProfileSummaryAnalysis this method will be Index: include/llvm/IR/Module.h =================================================================== --- include/llvm/IR/Module.h +++ include/llvm/IR/Module.h @@ -28,6 +28,7 @@ #include "llvm/IR/GlobalIFunc.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/ProfileSummary.h" #include "llvm/IR/SymbolTableListTraits.h" #include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/CodeGen.h" @@ -865,10 +866,10 @@ /// @{ /// Attach profile summary metadata to this module. - void setProfileSummary(Metadata *M); + void setProfileSummary(Metadata *M, ProfileSummary::Kind Kind); /// Returns profile summary metadata - Metadata *getProfileSummary(); + Metadata *getProfileSummary(ProfileSummary::Kind Kind); /// @} /// Returns true if PLT should be avoided for RTLib calls. Index: include/llvm/IR/ProfileSummary.h =================================================================== --- include/llvm/IR/ProfileSummary.h +++ include/llvm/IR/ProfileSummary.h @@ -42,11 +42,10 @@ class ProfileSummary { public: - enum Kind { PSK_Instr, PSK_Sample }; + enum Kind { PSK_Instr, PSK_CSInstr, PSK_Sample }; private: const Kind PSK; - static const char *KindStr[2]; SummaryEntryVector DetailedSummary; uint64_t TotalCount, MaxCount, MaxInternalCount, MaxFunctionCount; uint32_t NumCounts, NumFunctions; Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -297,6 +297,7 @@ void initializePGOIndirectCallPromotionLegacyPassPass(PassRegistry&); void initializePGOInstrumentationGenLegacyPassPass(PassRegistry&); void initializePGOInstrumentationUseLegacyPassPass(PassRegistry&); +void initializePGOInstrumentationGenCreateVarLegacyPassPass(PassRegistry&); void initializePGOMemOPSizeOptLegacyPassPass(PassRegistry&); void initializePHIEliminationPass(PassRegistry&); void initializePartialInlinerLegacyPassPass(PassRegistry&); Index: include/llvm/LTO/Config.h =================================================================== --- include/llvm/LTO/Config.h +++ include/llvm/LTO/Config.h @@ -55,6 +55,9 @@ /// Disable entirely the optimizer, including importing for ThinLTO bool CodeGenOnly = false; + /// Run PGO context sensitive IR instrumentation. + bool RunCSIRInstr = false; + /// If this field is set, the set of passes run in the middle-end optimizer /// will be the one specified by the string. Only works with the new pass /// manager as the old one doesn't have this ability. @@ -73,6 +76,9 @@ /// with this triple. std::string DefaultTriple; + /// Context Sensitive PGO profile path. + std::string CSIRProfile; + /// Sample PGO profile path. std::string SampleProfile; Index: include/llvm/LinkAllPasses.h =================================================================== --- include/llvm/LinkAllPasses.h +++ include/llvm/LinkAllPasses.h @@ -102,6 +102,7 @@ (void) llvm::createGCOVProfilerPass(); (void) llvm::createPGOInstrumentationGenLegacyPass(); (void) llvm::createPGOInstrumentationUseLegacyPass(); + (void) llvm::createPGOInstrumentationGenCreateVarLegacyPass(); (void) llvm::createPGOIndirectCallPromotionLegacyPass(); (void) llvm::createPGOMemOPSizeOptLegacyPass(); (void) llvm::createInstrProfilingLegacyPass(); Index: include/llvm/Passes/PassBuilder.h =================================================================== --- include/llvm/Passes/PassBuilder.h +++ include/llvm/Passes/PassBuilder.h @@ -31,25 +31,38 @@ /// A struct capturing PGO tunables. struct PGOOptions { - PGOOptions(std::string ProfileGenFile = "", std::string ProfileUseFile = "", - std::string SampleProfileFile = "", - std::string ProfileRemappingFile = "", - bool RunProfileGen = false, bool SamplePGOSupport = false) - : ProfileGenFile(ProfileGenFile), ProfileUseFile(ProfileUseFile), - SampleProfileFile(SampleProfileFile), - ProfileRemappingFile(ProfileRemappingFile), - RunProfileGen(RunProfileGen), - SamplePGOSupport(SamplePGOSupport || !SampleProfileFile.empty()) { - assert((RunProfileGen || - !SampleProfileFile.empty() || - !ProfileUseFile.empty() || - SamplePGOSupport) && "Illegal PGOOptions."); - } - std::string ProfileGenFile; - std::string ProfileUseFile; - std::string SampleProfileFile; + enum PGOAction { NoAction, IRInstr, IRUse, SampleUse }; + enum CSPGOAction { NoCSAction, CSIRInstr, CSIRUse }; + PGOOptions(std::string ProfileFile = "", std::string CSProfileGenFile = "", + std::string ProfileRemappingFile = "", PGOAction Action = NoAction, + CSPGOAction CSAction = NoCSAction, bool SamplePGOSupport = false) + : ProfileFile(ProfileFile), CSProfileGenFile(CSProfileGenFile), + ProfileRemappingFile(ProfileRemappingFile), Action(Action), + CSAction(CSAction), + SamplePGOSupport(SamplePGOSupport || Action == SampleUse) { + // Note, we do allow ProfileFile.empty() for Action=IRUse LTO can + // callback with IRUse action without ProfileFile. + + // If there is a CSAction, PGOAction cannot be IRInstr or SampleUse. + assert(this->CSAction == NoCSAction || + (this->Action != IRInstr && this->Action != SampleUse)); + + // For CSIRInstr, CSProfileGenFile also needs to be nonempty. + assert(this->CSAction != CSIRInstr || !this->CSProfileGenFile.empty()); + + // If CSAction is CSIRUse, PGOAction needs to be IRUse as they share + // a profile. + assert(this->CSAction != CSIRUse || this->Action == IRUse); + + // If neither CSAction nor CSAction, SamplePGOSupport needs to be true. + assert(this->Action != NoAction || this->CSAction != NoCSAction || + this->SamplePGOSupport); + } + std::string ProfileFile; + std::string CSProfileGenFile; std::string ProfileRemappingFile; - bool RunProfileGen; + PGOAction Action; + CSPGOAction CSAction; bool SamplePGOSupport; }; @@ -274,7 +287,8 @@ /// require some transformations for semantic reasons, they should explicitly /// build them. ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level, - bool DebugLogging = false); + bool DebugLogging = false, + bool LTOPreLink = false); /// Build a per-module default optimization pipeline. /// @@ -288,7 +302,8 @@ /// require some transformations for semantic reasons, they should explicitly /// build them. ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level, - bool DebugLogging = false); + bool DebugLogging = false, + bool LTOPreLink = false); /// Build a pre-link, ThinLTO-targeting default optimization pipeline to /// a pass manager. @@ -605,9 +620,8 @@ bool VerifyEachPass, bool DebugLogging); void addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging, - OptimizationLevel Level, bool RunProfileGen, - std::string ProfileGenFile, - std::string ProfileUseFile, + OptimizationLevel Level, bool RunProfileGen, bool IsCS, + std::string ProfileFile, std::string ProfileRemappingFile); void invokePeepholeEPCallbacks(FunctionPassManager &, OptimizationLevel); Index: include/llvm/ProfileData/InstrProf.h =================================================================== --- include/llvm/ProfileData/InstrProf.h +++ include/llvm/ProfileData/InstrProf.h @@ -767,10 +767,20 @@ StringRef Name; uint64_t Hash; + // We reserve this bit as the flag for context sensitive profile record. + static const int CS_FLAG_IN_FUNC_HASH = 60; + NamedInstrProfRecord() = default; NamedInstrProfRecord(StringRef Name, uint64_t Hash, std::vector Counts) : InstrProfRecord(std::move(Counts)), Name(Name), Hash(Hash) {} + + static bool hasCSFlagInHash(uint64_t FuncHash) { + return ((FuncHash >> CS_FLAG_IN_FUNC_HASH) & 1); + } + static void setCSFlagInHash(uint64_t &FuncHash) { + FuncHash |= ((uint64_t)1 << CS_FLAG_IN_FUNC_HASH); + } }; uint32_t InstrProfRecord::getNumValueKinds() const { @@ -1004,6 +1014,8 @@ // from control data struct is changed from raw pointer to Name's MD5 value. // Version 4: ValueDataBegin and ValueDataSizes fields are removed from the // raw header. +// Version 5: Bit 60 of FuncHash is reserved for the flag for the context +// sensitive records. const uint64_t Version = INSTR_PROF_RAW_VERSION; template inline uint64_t getMagic(); @@ -1040,6 +1052,13 @@ void getMemOPSizeRangeFromOption(StringRef Str, int64_t &RangeStart, int64_t &RangeLast); +// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime +// aware this is an ir_level profile so it can set the version flag. +void createIRLevelProfileFlagVar(Module &M, bool IsCS); + +// Create variable for profile name. +void createProfileNameVar(Module &M, StringRef InstrProfileOutput); + } // end namespace llvm #endif // LLVM_PROFILEDATA_INSTRPROF_H Index: include/llvm/ProfileData/InstrProfData.inc =================================================================== --- include/llvm/ProfileData/InstrProfData.inc +++ include/llvm/ProfileData/InstrProfData.inc @@ -635,10 +635,12 @@ * version for other variants of profile. We set the lowest bit of the upper 8 * bits (i.e. bit 56) to 1 to indicate if this is an IR-level instrumentaiton * generated profile, and 0 if this is a Clang FE generated profile. + * 1 in bit 57 indicates there are context-sensitive records in the profile. */ #define VARIANT_MASKS_ALL 0xff00000000000000ULL #define GET_VERSION(V) ((V) & ~VARIANT_MASKS_ALL) #define VARIANT_MASK_IR_PROF (0x1ULL << 56) +#define VARIANT_MASK_CSIR_PROF (0x1ULL << 57) #define INSTR_PROF_RAW_VERSION_VAR __llvm_profile_raw_version #define INSTR_PROF_PROFILE_RUNTIME_VAR __llvm_profile_runtime Index: include/llvm/ProfileData/InstrProfReader.h =================================================================== --- include/llvm/ProfileData/InstrProfReader.h +++ include/llvm/ProfileData/InstrProfReader.h @@ -77,6 +77,8 @@ virtual bool isIRLevelProfile() const = 0; + virtual bool hasCSIRLevelProfile() const = 0; + /// Return the PGO symtab. There are three different readers: /// Raw, Text, and Indexed profile readers. The first two types /// of readers are used only by llvm-profdata tool, while the indexed @@ -142,6 +144,7 @@ /// Iterator over the profile data. line_iterator Line; bool IsIRLevelProfile = false; + bool HasCSIRLevelProfile = false; Error readValueProfileData(InstrProfRecord &Record); @@ -156,6 +159,8 @@ bool isIRLevelProfile() const override { return IsIRLevelProfile; } + bool hasCSIRLevelProfile() const override { return HasCSIRLevelProfile; } + /// Read the header. Error readHeader() override; @@ -212,6 +217,10 @@ return (Version & VARIANT_MASK_IR_PROF) != 0; } + bool hasCSIRLevelProfile() const override { + return (Version & VARIANT_MASK_CSIR_PROF) != 0; + } + InstrProfSymtab &getSymtab() override { assert(Symtab.get()); return *Symtab.get(); @@ -341,6 +350,7 @@ virtual void setValueProfDataEndianness(support::endianness Endianness) = 0; virtual uint64_t getVersion() const = 0; virtual bool isIRLevelProfile() const = 0; + virtual bool hasCSIRLevelProfile() const = 0; virtual Error populateSymtab(InstrProfSymtab &) = 0; }; @@ -385,6 +395,10 @@ return (FormatVersion & VARIANT_MASK_IR_PROF) != 0; } + bool hasCSIRLevelProfile() const override { + return (FormatVersion & VARIANT_MASK_CSIR_PROF) != 0; + } + Error populateSymtab(InstrProfSymtab &Symtab) override { return Symtab.create(HashTable->keys()); } @@ -412,13 +426,16 @@ std::unique_ptr Remapper; /// Profile summary data. std::unique_ptr Summary; + /// Context sensitive profile summary data. + std::unique_ptr CS_Summary; // Index to the current record in the record array. unsigned RecordIndex; // Read the profile summary. Return a pointer pointing to one byte past the // end of the summary data if it exists or the input \c Cur. + // \c UseCS indicates whether to use context-sensitive profile summary. const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version, - const unsigned char *Cur); + const unsigned char *Cur, bool UseCS); public: IndexedInstrProfReader( @@ -432,6 +449,9 @@ /// Return the profile version. uint64_t getVersion() const { return Index->getVersion(); } bool isIRLevelProfile() const override { return Index->isIRLevelProfile(); } + bool hasCSIRLevelProfile() const override { + return Index->hasCSIRLevelProfile(); + } /// Return true if the given buffer is in an indexed instrprof format. static bool hasFormat(const MemoryBuffer &DataBuffer); @@ -450,7 +470,15 @@ std::vector &Counts); /// Return the maximum of all known function counts. - uint64_t getMaximumFunctionCount() { return Summary->getMaxFunctionCount(); } + uint64_t getMaximumFunctionCount(bool IsCS) { + if (IsCS) { + assert(CS_Summary && "No context sensitive profile summary"); + return CS_Summary->getMaxFunctionCount(); + } else { + assert(Summary && "No profile summary"); + return Summary->getMaxFunctionCount(); + } + } /// Factory method to create an indexed reader. static Expected> @@ -469,7 +497,15 @@ // to be used by llvm-profdata (for dumping). Avoid using this when // the client is the compiler. InstrProfSymtab &getSymtab() override; - ProfileSummary &getSummary() { return *(Summary.get()); } + ProfileSummary &getSummary(bool IsCS) { + if (IsCS) { + assert(CS_Summary && "No context sensitive summary"); + return *(CS_Summary.get()); + } else { + assert(Summary && "No profile summary"); + return *(Summary.get()); + } + } }; } // end namespace llvm Index: include/llvm/ProfileData/InstrProfWriter.h =================================================================== --- include/llvm/ProfileData/InstrProfWriter.h +++ include/llvm/ProfileData/InstrProfWriter.h @@ -33,7 +33,7 @@ class InstrProfWriter { public: using ProfilingData = SmallDenseMap; - enum ProfKind { PF_Unknown = 0, PF_FE, PF_IRLevel }; + enum ProfKind { PF_Unknown = 0, PF_FE, PF_IRLevel, PF_IRLevelWithCS }; private: bool Sparse; @@ -74,15 +74,24 @@ std::unique_ptr writeBuffer(); /// Set the ProfileKind. Report error if mixing FE and IR level profiles. - Error setIsIRLevelProfile(bool IsIRLevel) { + Error setIsIRLevelProfile(bool IsIRLevel, bool WithCS) { if (ProfileKind == PF_Unknown) { - ProfileKind = IsIRLevel ? PF_IRLevel: PF_FE; + if (IsIRLevel) + ProfileKind = WithCS ? PF_IRLevelWithCS : PF_IRLevel; + else + ProfileKind = PF_FE; return Error::success(); } - return (IsIRLevel == (ProfileKind == PF_IRLevel)) - ? Error::success() - : make_error( - instrprof_error::unsupported_version); + + if (((ProfileKind != PF_FE) && !IsIRLevel) || + ((ProfileKind == PF_FE) && IsIRLevel)) + return make_error(instrprof_error::unsupported_version); + + // Promote to PF_IRLevelWithCS if WithCS is true; + if (ProfileKind == PF_IRLevel && WithCS) + ProfileKind = PF_IRLevelWithCS; + + return Error::success(); } // Internal interface for testing purpose only. Index: include/llvm/Transforms/IPO/PassManagerBuilder.h =================================================================== --- include/llvm/Transforms/IPO/PassManagerBuilder.h +++ include/llvm/Transforms/IPO/PassManagerBuilder.h @@ -159,6 +159,10 @@ /// Enable profile instrumentation pass. bool EnablePGOInstrGen; + /// Enable profile context sensitive instrumentation pass. + bool EnablePGOCSInstrGen; + /// Enable profile context sensitive profile use pass. + bool EnablePGOCSInstrUse; /// Profile data file name that the instrumentation will be written to. std::string PGOInstrGen; /// Path of the profile data file. @@ -185,7 +189,7 @@ void addInitialAliasAnalysisPasses(legacy::PassManagerBase &PM) const; void addLTOOptimizationPasses(legacy::PassManagerBase &PM); void addLateLTOOptimizationPasses(legacy::PassManagerBase &PM); - void addPGOInstrPasses(legacy::PassManagerBase &MPM); + void addPGOInstrPasses(legacy::PassManagerBase &MPM, bool IsCS); void addFunctionSimplificationPasses(legacy::PassManagerBase &MPM); void addInstructionCombiningPass(legacy::PassManagerBase &MPM) const; Index: include/llvm/Transforms/Instrumentation.h =================================================================== --- include/llvm/Transforms/Instrumentation.h +++ include/llvm/Transforms/Instrumentation.h @@ -88,9 +88,12 @@ GCOVOptions::getDefault()); // PGO Instrumention -ModulePass *createPGOInstrumentationGenLegacyPass(); +ModulePass *createPGOInstrumentationGenLegacyPass(bool IsCS = false); ModulePass * -createPGOInstrumentationUseLegacyPass(StringRef Filename = StringRef("")); +createPGOInstrumentationUseLegacyPass(StringRef Filename = StringRef(""), + bool IsCS = false); +ModulePass *createPGOInstrumentationGenCreateVarLegacyPass( + StringRef CSInstrName = StringRef("")); ModulePass *createPGOIndirectCallPromotionLegacyPass(bool InLTO = false, bool SamplePGO = false); FunctionPass *createPGOMemOPSizeOptLegacyPass(); @@ -132,6 +135,9 @@ // Use atomic profile counter increments. bool Atomic = false; + // Use BFI to guide register promotion + bool UseBFIInPromotion = false; + // Name of the profile file to use as output std::string InstrProfileOutput; @@ -140,7 +146,7 @@ /// Insert frontend instrumentation based profiling. ModulePass *createInstrProfilingLegacyPass( - const InstrProfOptions &Options = InstrProfOptions()); + const InstrProfOptions &Options = InstrProfOptions(), bool IsCS = false); // Insert AddressSanitizer (address sanity checking) instrumentation FunctionPass *createAddressSanitizerFunctionPass(bool CompileKernel = false, Index: include/llvm/Transforms/Instrumentation/InstrProfiling.h =================================================================== --- include/llvm/Transforms/Instrumentation/InstrProfiling.h +++ include/llvm/Transforms/Instrumentation/InstrProfiling.h @@ -35,7 +35,8 @@ class InstrProfiling : public PassInfoMixin { public: InstrProfiling() = default; - InstrProfiling(const InstrProfOptions &Options) : Options(Options) {} + InstrProfiling(const InstrProfOptions &Options, bool IsCS) + : Options(Options), IsCS(IsCS) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); bool run(Module &M, const TargetLibraryInfo &TLI); @@ -60,6 +61,9 @@ GlobalVariable *NamesVar; size_t NamesSize; + // Is this lowering for the context-sensitive instrumentation. + bool IsCS; + // vector of counter load/store pairs to be register promoted. std::vector PromotionCandidates; Index: include/llvm/Transforms/Instrumentation/PGOInstrumentation.h =================================================================== --- include/llvm/Transforms/Instrumentation/PGOInstrumentation.h +++ include/llvm/Transforms/Instrumentation/PGOInstrumentation.h @@ -17,6 +17,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/IR/PassManager.h" +#include "llvm/ProfileData/InstrProf.h" #include #include @@ -27,22 +28,45 @@ class Module; /// The instrumentation (profile-instr-gen) pass for IR based PGO. +class PGOInstrumentationGenCreateVar + : public PassInfoMixin { +public: + PGOInstrumentationGenCreateVar(std::string CSInstrName = "") + : CSInstrName(CSInstrName) {} + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) { + createProfileNameVar(M, CSInstrName); + createIRLevelProfileFlagVar(M, /* IsCS */ true); + return PreservedAnalyses::all(); + } + +private: + std::string CSInstrName; +}; + +/// The instrumentation (profile-instr-gen) pass for IR based PGO. class PGOInstrumentationGen : public PassInfoMixin { public: + PGOInstrumentationGen(bool IsCS = false) : IsCS(IsCS) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + +private: + // If this is a context sensitive instrumentation. + bool IsCS; }; /// The profile annotation (profile-instr-use) pass for IR based PGO. class PGOInstrumentationUse : public PassInfoMixin { public: PGOInstrumentationUse(std::string Filename = "", - std::string RemappingFilename = ""); + std::string RemappingFilename = "", bool IsCS = false); PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); private: std::string ProfileFileName; std::string ProfileRemappingFileName; + // If this is a context sensitive instrumentation. + bool IsCS; }; /// The indirect function call promotion pass. Index: lib/Analysis/ProfileSummaryInfo.cpp =================================================================== --- lib/Analysis/ProfileSummaryInfo.cpp +++ lib/Analysis/ProfileSummaryInfo.cpp @@ -79,7 +79,14 @@ bool ProfileSummaryInfo::computeSummary() { if (Summary) return true; - auto *SummaryMD = M.getProfileSummary(); + // First try to get context sensitive ProfileSummary. + auto *SummaryMD = M.getProfileSummary(ProfileSummary::PSK_CSInstr); + if (SummaryMD) { + Summary.reset(ProfileSummary::getFromMD(SummaryMD)); + return true; + } + // This will actually return PSK_Instr or PSK_Sample summary. + SummaryMD = M.getProfileSummary(ProfileSummary::PSK_Instr); if (!SummaryMD) return false; Summary.reset(ProfileSummary::getFromMD(SummaryMD)); Index: lib/IR/Module.cpp =================================================================== --- lib/IR/Module.cpp +++ lib/IR/Module.cpp @@ -532,12 +532,18 @@ addModuleFlag(ModFlagBehavior::Error, "Code Model", CL); } -void Module::setProfileSummary(Metadata *M) { - addModuleFlag(ModFlagBehavior::Error, "ProfileSummary", M); +void Module::setProfileSummary(Metadata *M, ProfileSummary::Kind Kind) { + if (Kind == ProfileSummary::PSK_CSInstr) + addModuleFlag(ModFlagBehavior::Error, "CSProfileSummary", M); + else + addModuleFlag(ModFlagBehavior::Error, "ProfileSummary", M); } -Metadata *Module::getProfileSummary() { - return getModuleFlag("ProfileSummary"); +Metadata *Module::getProfileSummary(ProfileSummary::Kind Kind) { + if (Kind == ProfileSummary::PSK_CSInstr) + return getModuleFlag("CSProfileSummary"); + else + return getModuleFlag("ProfileSummary"); } void Module::setOwnedMemoryBuffer(std::unique_ptr MB) { Index: lib/IR/ProfileSummary.cpp =================================================================== --- lib/IR/ProfileSummary.cpp +++ lib/IR/ProfileSummary.cpp @@ -21,8 +21,6 @@ using namespace llvm; -const char *ProfileSummary::KindStr[2] = {"InstrProf", "SampleProfile"}; - // Return an MDTuple with two elements. The first element is a string Key and // the second is a uint64_t Value. static Metadata *getKeyValMD(LLVMContext &Context, const char *Key, @@ -68,6 +66,7 @@ // "SampleProfile"). The rest of the elements of the outer MDTuple are specific // to the kind of profile summary as returned by getFormatSpecificMD. Metadata *ProfileSummary::getMD(LLVMContext &Context) { + const char *KindStr[3] = {"InstrProf", "CSInstrProf", "SampleProfile"}; Metadata *Components[] = { getKeyValMD(Context, "ProfileFormat", KindStr[PSK]), getKeyValMD(Context, "TotalCount", getTotalCount()), @@ -153,6 +152,9 @@ else if (isKeyValuePair(dyn_cast_or_null(FormatMD), "ProfileFormat", "InstrProf")) SummaryKind = PSK_Instr; + else if (isKeyValuePair(dyn_cast_or_null(FormatMD), "ProfileFormat", + "CSInstrProf")) + SummaryKind = PSK_CSInstr; else return nullptr; Index: lib/LTO/LTOBackend.cpp =================================================================== --- lib/LTO/LTOBackend.cpp +++ lib/LTO/LTOBackend.cpp @@ -154,8 +154,15 @@ const ModuleSummaryIndex *ImportSummary) { Optional PGOOpt; if (!Conf.SampleProfile.empty()) - PGOOpt = PGOOptions("", "", Conf.SampleProfile, Conf.ProfileRemapping, - false, true); + PGOOpt = PGOOptions(Conf.SampleProfile, "", Conf.ProfileRemapping, + PGOOptions::SampleUse, PGOOptions::NoCSAction, true); + else if (Conf.RunCSIRInstr) { + PGOOpt = PGOOptions("", Conf.CSIRProfile, Conf.ProfileRemapping, + PGOOptions::IRUse, PGOOptions::CSIRInstr); + } else if (!Conf.CSIRProfile.empty()) { + PGOOpt = PGOOptions(Conf.CSIRProfile, "", Conf.ProfileRemapping, + PGOOptions::IRUse, PGOOptions::CSIRUse); + } PassBuilder PB(TM, PGOOpt); AAManager AA; @@ -273,6 +280,11 @@ PMB.SLPVectorize = true; PMB.OptLevel = Conf.OptLevel; PMB.PGOSampleUse = Conf.SampleProfile; + PMB.EnablePGOCSInstrGen = Conf.RunCSIRInstr; + if (!Conf.RunCSIRInstr && !Conf.CSIRProfile.empty()) { + PMB.EnablePGOCSInstrUse = true; + PMB.PGOInstrUse = Conf.CSIRProfile; + } if (IsThinLTO) PMB.populateThinLTOPassManager(passes); else Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -402,7 +402,7 @@ // For PGO use pipeline, try to optimize memory intrinsics such as memcpy // using the size value profile. Don't perform this when optimizing for size. - if (PGOOpt && !PGOOpt->ProfileUseFile.empty() && + if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse && !isOptimizingForSize(Level)) FPM.addPass(PGOMemOPSizeOpt()); @@ -445,8 +445,8 @@ // Do not enable unrolling in PreLinkThinLTO phase during sample PGO // because it changes IR to makes profile annotation in back compile // inaccurate. - if (Phase != ThinLTOPhase::PreLink || - !PGOOpt || PGOOpt->SampleProfileFile.empty()) + if (Phase != ThinLTOPhase::PreLink || !PGOOpt || + PGOOpt->Action != PGOOptions::SampleUse) LPM2.addPass(LoopFullUnrollPass(Level)); for (auto &C : LoopOptimizerEndEPCallbacks) @@ -506,7 +506,8 @@ invokePeepholeEPCallbacks(FPM, Level); if (EnableCHR && Level == O3 && PGOOpt && - (!PGOOpt->ProfileUseFile.empty() || !PGOOpt->SampleProfileFile.empty())) + (PGOOpt->Action == PGOOptions::IRUse || + PGOOpt->Action == PGOOptions::SampleUse)) FPM.addPass(ControlHeightReductionPass()); return FPM; @@ -514,15 +515,14 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging, PassBuilder::OptimizationLevel Level, - bool RunProfileGen, - std::string ProfileGenFile, - std::string ProfileUseFile, + bool RunProfileGen, bool IsCS, + std::string ProfileFile, std::string ProfileRemappingFile) { // Generally running simplification passes and the inliner with an high // threshold results in smaller executables, but there may be cases where // the size grows, so let's be conservative here and skip this simplification // at -Os/Oz. - if (!isOptimizingForSize(Level)) { + if (!isOptimizingForSize(Level) && !IsCS) { InlineParams IP; // In the old pass manager, this is a cl::opt. Should still this be one? @@ -555,7 +555,7 @@ MPM.addPass(GlobalDCEPass()); if (RunProfileGen) { - MPM.addPass(PGOInstrumentationGen()); + MPM.addPass(PGOInstrumentationGen(IsCS)); FunctionPassManager FPM; FPM.addPass( @@ -564,14 +564,13 @@ // Add the profile lowering pass. InstrProfOptions Options; - if (!ProfileGenFile.empty()) - Options.InstrProfileOutput = ProfileGenFile; + if (!ProfileFile.empty()) + Options.InstrProfileOutput = ProfileFile; Options.DoCounterPromotion = true; - MPM.addPass(InstrProfiling(Options)); - } - - if (!ProfileUseFile.empty()) - MPM.addPass(PGOInstrumentationUse(ProfileUseFile, ProfileRemappingFile)); + Options.UseBFIInPromotion = IsCS; + MPM.addPass(InstrProfiling(Options, IsCS)); + } else if (!ProfileFile.empty()) + MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS)); } static InlineParams @@ -608,19 +607,19 @@ // More details about SamplePGO design can be found in: // https://research.google.com/pubs/pub45290.html // FIXME: revisit how SampleProfileLoad/Inliner/ICP is structured. - if (PGOOpt && !PGOOpt->SampleProfileFile.empty() && + if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse && Phase == ThinLTOPhase::PostLink) EarlyFPM.addPass(InstCombinePass()); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM))); - if (PGOOpt && !PGOOpt->SampleProfileFile.empty()) { + if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) { // Annotate sample profile right after early FPM to ensure freshness of // the debug info. // In ThinLTO mode, when flattened profile is used, all the available // profile information will be annotated in PreLink phase so there is // no need to load the profile again in PostLink. if (!(FlattenedProfileUsed && Phase == ThinLTOPhase::PostLink)) - MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile, + MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, Phase == ThinLTOPhase::PreLink)); // Do not invoke ICP in the ThinLTOPrelink phase as it makes it hard @@ -669,12 +668,17 @@ // Add all the requested passes for instrumentation PGO, if requested. if (PGOOpt && Phase != ThinLTOPhase::PostLink && - (!PGOOpt->ProfileGenFile.empty() || !PGOOpt->ProfileUseFile.empty())) { - addPGOInstrPasses(MPM, DebugLogging, Level, PGOOpt->RunProfileGen, - PGOOpt->ProfileGenFile, PGOOpt->ProfileUseFile, + (PGOOpt->Action == PGOOptions::IRInstr || + PGOOpt->Action == PGOOptions::IRUse)) { + addPGOInstrPasses(MPM, DebugLogging, Level, + /* RunProfileGen */ PGOOpt->Action == PGOOptions::IRInstr, + /* IsCS */ false, PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile); MPM.addPass(PGOIndirectCallPromotion(false, false)); } + if (PGOOpt && Phase != ThinLTOPhase::PostLink && + PGOOpt->CSAction == PGOOptions::CSIRInstr) + MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile)); // Synthesize function entry counts for non-PGO compilation. if (EnableSyntheticCounts && !PGOOpt) @@ -705,8 +709,8 @@ // For PreLinkThinLTO pass, we disable hot-caller heuristic for sample PGO // because it makes profile annotation in the backend inaccurate. InlineParams IP = getInlineParamsFromOptLevel(Level); - if (Phase == ThinLTOPhase::PreLink && - PGOOpt && !PGOOpt->SampleProfileFile.empty()) + if (Phase == ThinLTOPhase::PreLink && PGOOpt && + PGOOpt->Action == PGOOptions::SampleUse) IP.HotCallSiteThreshold = 0; MainCGPipeline.addPass(InlinerPass(IP)); @@ -743,9 +747,8 @@ return MPM; } -ModulePassManager -PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, - bool DebugLogging) { +ModulePassManager PassBuilder::buildModuleOptimizationPipeline( + OptimizationLevel Level, bool DebugLogging, bool LTOPreLink) { ModulePassManager MPM(DebugLogging); // Optimize globals now that the module is fully simplified. @@ -772,6 +775,21 @@ // FIXME: Is this really an optimization rather than a canonicalization? MPM.addPass(ReversePostOrderFunctionAttrsPass()); + // Do a post inline PGO instrumentation and use pass. This is a context + // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as + // cross-module inline has not been done yet. The context sensitive + // instrumentation is after all the inlines are done. + if (!LTOPreLink && PGOOpt) { + if (PGOOpt->CSAction == PGOOptions::CSIRInstr) + addPGOInstrPasses(MPM, DebugLogging, Level, /* RunProfileGen */ true, + /* IsCS */ true, PGOOpt->CSProfileGenFile, + PGOOpt->ProfileRemappingFile); + else if (PGOOpt->CSAction == PGOOptions::CSIRUse) + addPGOInstrPasses(MPM, DebugLogging, Level, /* RunProfileGen */ false, + /* IsCS */ true, PGOOpt->ProfileFile, + PGOOpt->ProfileRemappingFile); + } + // Re-require GloblasAA here prior to function passes. This is particularly // useful as the above will have inlined, DCE'ed, and function-attr // propagated everything. We should at this point have a reasonably minimal @@ -897,7 +915,7 @@ ModulePassManager PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, - bool DebugLogging) { + bool DebugLogging, bool LTOPreLink) { assert(Level != O0 && "Must request optimizations for the default pipeline!"); ModulePassManager MPM(DebugLogging); @@ -917,7 +935,7 @@ DebugLogging)); // Now add the optimization pipeline. - MPM.addPass(buildModuleOptimizationPipeline(Level, DebugLogging)); + MPM.addPass(buildModuleOptimizationPipeline(Level, DebugLogging, LTOPreLink)); return MPM; } @@ -991,7 +1009,7 @@ // look unreferenced and are removed. // FIXME: move this into buildModuleSimplificationPipeline to merge the logic // with SamplePGO. - if (!PGOOpt || PGOOpt->SampleProfileFile.empty()) + if (!PGOOpt || PGOOpt->Action != PGOOptions::SampleUse) MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, false /* SamplePGO */)); @@ -1010,7 +1028,8 @@ bool DebugLogging) { assert(Level != O0 && "Must request optimizations for the default pipeline!"); // FIXME: We should use a customized pre-link pipeline! - return buildPerModuleDefaultPipeline(Level, DebugLogging); + return buildPerModuleDefaultPipeline(Level, DebugLogging, + /* LTOPreLink */ true); } ModulePassManager @@ -1019,9 +1038,9 @@ assert(Level != O0 && "Must request optimizations for the default pipeline!"); ModulePassManager MPM(DebugLogging); - if (PGOOpt && !PGOOpt->SampleProfileFile.empty()) { + if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) { // Load sample profile before running the LTO optimization pipeline. - MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile, + MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, false /* ThinLTOPhase::PreLink */)); } @@ -1047,7 +1066,7 @@ // This two-step promotion is to save the compile time. For LTO, it should // produce the same result as if we only do promotion here. MPM.addPass(PGOIndirectCallPromotion( - true /* InLTO */, PGOOpt && !PGOOpt->SampleProfileFile.empty())); + true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)); // Propagate constants at call sites into the functions they call. This // opens opportunities for globalopt (and inlining) by substituting function // pointers passed as arguments to direct uses of functions. @@ -1129,6 +1148,19 @@ FPM.addPass(JumpThreadingPass()); + // Do a post inline PGO instrumentation and use pass. This is a context + // sensitive PGO pass. + if (PGOOpt) { + if (PGOOpt->CSAction == PGOOptions::CSIRInstr) + addPGOInstrPasses(MPM, DebugLogging, Level, /* RunProfileGen */ true, + /* IsCS */ true, PGOOpt->CSProfileGenFile, + PGOOpt->ProfileRemappingFile); + else if (PGOOpt->CSAction == PGOOptions::CSIRUse) + addPGOInstrPasses(MPM, DebugLogging, Level, /* RunProfileGen */ false, + /* IsCS */ true, PGOOpt->ProfileFile, + PGOOpt->ProfileRemappingFile); + } + // Break up allocas FPM.addPass(SROA()); Index: lib/ProfileData/InstrProf.cpp =================================================================== --- lib/ProfileData/InstrProf.cpp +++ lib/ProfileData/InstrProf.cpp @@ -1011,4 +1011,40 @@ assert(RangeLast >= RangeStart); } +// Create variable for profile name. +void createProfileNameVar(Module &M, StringRef InstrProfileOutput) { + if (InstrProfileOutput.empty()) + return; + Constant *ProfileNameConst = + ConstantDataArray::getString(M.getContext(), InstrProfileOutput, true); + GlobalVariable *ProfileNameVar = new GlobalVariable( + M, ProfileNameConst->getType(), true, GlobalValue::WeakAnyLinkage, + ProfileNameConst, INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR)); + Triple TT(M.getTargetTriple()); + if (TT.supportsCOMDAT()) { + ProfileNameVar->setLinkage(GlobalValue::ExternalLinkage); + ProfileNameVar->setComdat(M.getOrInsertComdat( + StringRef(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR)))); + } +} + +// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime +// aware this is an ir_level profile so it can set the version flag. +void createIRLevelProfileFlagVar(Module &M, bool IsCS) { + const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)); + Type *IntTy64 = Type::getInt64Ty(M.getContext()); + uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF); + if (IsCS) + ProfileVersion |= VARIANT_MASK_CSIR_PROF; + auto IRLevelVersionVariable = new GlobalVariable( + M, IntTy64, true, GlobalValue::WeakAnyLinkage, + Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName); + IRLevelVersionVariable->setVisibility(GlobalValue::DefaultVisibility); + Triple TT(M.getTargetTriple()); + if (TT.supportsCOMDAT()) { + IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage); + IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName)); + } +} + } // end namespace llvm Index: lib/ProfileData/InstrProfReader.cpp =================================================================== --- lib/ProfileData/InstrProfReader.cpp +++ lib/ProfileData/InstrProfReader.cpp @@ -162,7 +162,10 @@ IsIRInstr = true; else if (Str.equals_lower("fe")) IsIRInstr = false; - else + else if (Str.equals_lower("csir")) { + IsIRInstr = true; + HasCSIRLevelProfile = true; + } else return error(instrprof_error::bad_header); ++Line; @@ -733,7 +736,7 @@ const unsigned char * IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version, - const unsigned char *Cur) { + const unsigned char *Cur, bool UseCS) { using namespace IndexedInstrProf; using namespace support; @@ -760,10 +763,13 @@ DetailedSummary.emplace_back((uint32_t)Ent.Cutoff, Ent.MinBlockCount, Ent.NumBlocks); } + std::unique_ptr &Summary = + UseCS ? this->CS_Summary : this->Summary; + // initialize InstrProfSummary using the SummaryData from disk. - this->Summary = llvm::make_unique( - ProfileSummary::PSK_Instr, DetailedSummary, - SummaryData->get(Summary::TotalBlockCount), + Summary = llvm::make_unique( + UseCS ? ProfileSummary::PSK_CSInstr : ProfileSummary::PSK_Instr, + DetailedSummary, SummaryData->get(Summary::TotalBlockCount), SummaryData->get(Summary::MaxBlockCount), SummaryData->get(Summary::MaxInternalBlockCount), SummaryData->get(Summary::MaxFunctionCount), @@ -805,7 +811,11 @@ IndexedInstrProf::ProfVersion::CurrentVersion) return error(instrprof_error::unsupported_version); - Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur); + Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur, + /* UseCS */ false); + if (Header->Version & VARIANT_MASK_CSIR_PROF) + Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur, + /* UseCS */ true); // Read the hash type and start offset. IndexedInstrProf::HashT HashType = static_cast( Index: lib/ProfileData/InstrProfWriter.cpp =================================================================== --- lib/ProfileData/InstrProfWriter.cpp +++ lib/ProfileData/InstrProfWriter.cpp @@ -101,6 +101,7 @@ support::endianness ValueProfDataEndianness = support::little; InstrProfSummaryBuilder *SummaryBuilder; + InstrProfSummaryBuilder *CSSummaryBuilder; InstrProfRecordWriterTrait() = default; @@ -142,7 +143,10 @@ endian::Writer LE(Out, little); for (const auto &ProfileData : *V) { const InstrProfRecord &ProfRecord = ProfileData.second; - SummaryBuilder->addRecord(ProfRecord); + if (NamedInstrProfRecord::hasCSFlagInHash(ProfileData.first)) + CSSummaryBuilder->addRecord(ProfRecord); + else + SummaryBuilder->addRecord(ProfRecord); LE.write(ProfileData.first); // Function hash LE.write(ProfRecord.Counts.size()); @@ -253,6 +257,8 @@ InstrProfSummaryBuilder ISB(ProfileSummaryBuilder::DefaultCutoffs); InfoObj->SummaryBuilder = &ISB; + InstrProfSummaryBuilder CSISB(ProfileSummaryBuilder::DefaultCutoffs); + InfoObj->CSSummaryBuilder = &CSISB; // Populate the hash table generator. for (const auto &I : FunctionData) @@ -264,6 +270,10 @@ Header.Version = IndexedInstrProf::ProfVersion::CurrentVersion; if (ProfileKind == PF_IRLevel) Header.Version |= VARIANT_MASK_IR_PROF; + if (ProfileKind == PF_IRLevelWithCS) { + Header.Version |= VARIANT_MASK_IR_PROF; + Header.Version |= VARIANT_MASK_CSIR_PROF; + } Header.Unused = 0; Header.HashType = static_cast(IndexedInstrProf::HashType); Header.HashOffset = 0; @@ -287,6 +297,14 @@ uint64_t SummaryOffset = OS.tell(); for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++) OS.write(0); + uint64_t CSSummaryOffset = 0; + uint64_t CSSummarySize = 0; + if (ProfileKind == PF_IRLevelWithCS) { + CSSummaryOffset = OS.tell(); + CSSummarySize = SummarySize / sizeof(uint64_t); + for (unsigned I = 0; I < CSSummarySize; I++) + OS.write(0); + } // Write the hash table. uint64_t HashTableStart = Generator.Emit(OS.OS, *InfoObj); @@ -300,13 +318,25 @@ setSummary(TheSummary.get(), *PS); InfoObj->SummaryBuilder = nullptr; + // For Context Sensitive summary. + std::unique_ptr TheCSSummary = nullptr; + if (ProfileKind == PF_IRLevelWithCS) { + TheCSSummary = IndexedInstrProf::allocSummary(SummarySize); + std::unique_ptr CSPS = CSISB.getSummary(); + setSummary(TheCSSummary.get(), *CSPS); + } + InfoObj->CSSummaryBuilder = nullptr; + // Now do the final patch: PatchItem PatchItems[] = { // Patch the Header.HashOffset field. {HashTableStartFieldOffset, &HashTableStart, 1}, // Patch the summary data. {SummaryOffset, reinterpret_cast(TheSummary.get()), - (int)(SummarySize / sizeof(uint64_t))}}; + (int)(SummarySize / sizeof(uint64_t))}, + {CSSummaryOffset, reinterpret_cast(TheCSSummary.get()), + (int)CSSummarySize}}; + OS.patch(PatchItems, sizeof(PatchItems) / sizeof(*PatchItems)); } @@ -375,6 +405,8 @@ Error InstrProfWriter::writeText(raw_fd_ostream &OS) { if (ProfileKind == PF_IRLevel) OS << "# IR level Instrumentation Flag\n:ir\n"; + else if (ProfileKind == PF_IRLevelWithCS) + OS << "# CSIR level Instrumentation Flag\n:csir\n"; InstrProfSymtab Symtab; for (const auto &I : FunctionData) if (shouldEncodeData(I.getValue())) Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -170,6 +170,8 @@ MergeFunctions = false; PrepareForLTO = false; EnablePGOInstrGen = false; + EnablePGOCSInstrGen = false; + EnablePGOCSInstrUse = false; PGOInstrGen = ""; PGOInstrUse = ""; PGOSampleUse = ""; @@ -267,13 +269,18 @@ } // Do PGO instrumentation generation or use pass as the option specified. -void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM) { - if (!EnablePGOInstrGen && PGOInstrUse.empty() && PGOSampleUse.empty()) +void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM, + bool IsCS = false) { + if (IsCS) { + if (!EnablePGOCSInstrGen && !EnablePGOCSInstrUse) + return; + } else if (!EnablePGOInstrGen && PGOInstrUse.empty() && PGOSampleUse.empty()) return; + // Perform the preinline and cleanup passes for O1 and above. // And avoid doing them if optimizing for size. if (OptLevel > 0 && SizeLevel == 0 && !DisablePreInliner && - PGOSampleUse.empty()) { + PGOSampleUse.empty() && !IsCS) { // Create preinline pass. We construct an InlineParams object and specify // the threshold here to avoid the command line options of the regular // inliner to influence pre-inlining. The only fields of InlineParams we @@ -291,22 +298,23 @@ MPM.add(createInstructionCombiningPass()); // Combine silly seq's addExtensionsToPM(EP_Peephole, MPM); } - if (EnablePGOInstrGen) { - MPM.add(createPGOInstrumentationGenLegacyPass()); + if ((EnablePGOInstrGen && !IsCS) || (EnablePGOCSInstrGen && IsCS)) { + MPM.add(createPGOInstrumentationGenLegacyPass(IsCS)); // Add the profile lowering pass. InstrProfOptions Options; if (!PGOInstrGen.empty()) Options.InstrProfileOutput = PGOInstrGen; Options.DoCounterPromotion = true; + Options.UseBFIInPromotion = IsCS; MPM.add(createLoopRotatePass()); - MPM.add(createInstrProfilingLegacyPass(Options)); + MPM.add(createInstrProfilingLegacyPass(Options, IsCS)); } if (!PGOInstrUse.empty()) - MPM.add(createPGOInstrumentationUseLegacyPass(PGOInstrUse)); + MPM.add(createPGOInstrumentationUseLegacyPass(PGOInstrUse, IsCS)); // Indirect call promotion that promotes intra-module targets only. // For ThinLTO this is done earlier due to interactions with globalopt // for imported functions. We don't run this at -O0. - if (OptLevel > 0) + if (OptLevel > 0 && !IsCS) MPM.add( createPGOIndirectCallPromotionLegacyPass(false, !PGOSampleUse.empty())); } @@ -525,6 +533,11 @@ if (!PerformThinLTO && !PrepareForThinLTOUsingPGOSampleProfile) addPGOInstrPasses(MPM); + // Create profile COMDAT variables. Lld linker wants to see all variables + // before the LTO/ThinLTO link since it needs to resolve symbols/comdats. + if (!PerformThinLTO && EnablePGOCSInstrGen) + MPM.add(createPGOInstrumentationGenCreateVarLegacyPass(PGOInstrGen)); + // We add a module alias analysis pass here. In part due to bugs in the // analysis infrastructure this "works" in that the analysis stays alive // for the entire SCC pass run below. @@ -554,6 +567,12 @@ if (RunPartialInlining) MPM.add(createPartialInliningPass()); + // CSFDO instrumentation and use pass. Don't invoke this for Prepare pass + // for LTO and ThinLTO -- The actual pass will be called after all inlines + // are performed. + if (!(PrepareForLTO || PrepareForThinLTO)) + addPGOInstrPasses(MPM, /* IsCS */ true); + if (OptLevel > 1 && !PrepareForLTO && !PrepareForThinLTO) // Remove avail extern fns and globals definitions if we aren't // compiling an object file for later LTO. For LTO we want to preserve @@ -841,6 +860,9 @@ PM.add(createPruneEHPass()); // Remove dead EH info. + // CSFDO instrumentation and use pass. + addPGOInstrPasses(PM, /* IsCS */ true); + // Optimize globals again if we ran the inliner. if (RunInliner) PM.add(createGlobalOptimizerPass()); Index: lib/Transforms/IPO/SampleProfile.cpp =================================================================== --- lib/Transforms/IPO/SampleProfile.cpp +++ lib/Transforms/IPO/SampleProfile.cpp @@ -1567,8 +1567,9 @@ return false; PSI = _PSI; - if (M.getProfileSummary() == nullptr) - M.setProfileSummary(Reader->getSummary().getMD(M.getContext())); + if (M.getProfileSummary(ProfileSummary::PSK_Sample) == nullptr) + M.setProfileSummary(Reader->getSummary().getMD(M.getContext()), + ProfileSummary::PSK_Sample); // Compute the total number of samples collected in this profile. for (const auto &I : Reader->getProfiles()) Index: lib/Transforms/Instrumentation/InstrProfiling.cpp =================================================================== --- lib/Transforms/Instrumentation/InstrProfiling.cpp +++ lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -18,6 +18,8 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Attributes.h" @@ -147,8 +149,8 @@ static char ID; InstrProfilingLegacyPass() : ModulePass(ID) {} - InstrProfilingLegacyPass(const InstrProfOptions &Options) - : ModulePass(ID), InstrProf(Options) {} + InstrProfilingLegacyPass(const InstrProfOptions &Options, bool IsCS) + : ModulePass(ID), InstrProf(Options, IsCS) {} StringRef getPassName() const override { return "Frontend instrumentation-based coverage lowering"; @@ -231,9 +233,9 @@ public: PGOCounterPromoter( DenseMap> &LoopToCands, - Loop &CurLoop, LoopInfo &LI) + Loop &CurLoop, LoopInfo &LI, BlockFrequencyInfo *BFI) : LoopToCandidates(LoopToCands), ExitBlocks(), InsertPts(), L(CurLoop), - LI(LI) { + LI(LI), BFI(BFI) { SmallVector LoopExitBlocks; SmallPtrSet BlockSet; @@ -262,6 +264,20 @@ SSAUpdater SSA(&NewPHIs); Value *InitVal = ConstantInt::get(Cand.first->getType(), 0); + // If BFI is set, we will use it to guide the promotions. + if (BFI) { + auto *BB = Cand.first->getParent(); + auto InstrCount = BFI->getBlockProfileCount(BB); + if (!InstrCount) + continue; + auto PreheaderCount = BFI->getBlockProfileCount(L.getLoopPreheader()); + // If the average loop trip count is not greater than 1.5, we skip + // promotion. + if (PreheaderCount && + (PreheaderCount.getValue() * 3) >= (InstrCount.getValue() * 2)) + continue; + } + PGOCounterPromoterHelper Promoter(Cand.first, Cand.second, SSA, InitVal, L.getLoopPreheader(), ExitBlocks, InsertPts, LoopToCandidates, LI); @@ -311,6 +327,11 @@ SmallVector ExitingBlocks; LP->getExitingBlocks(ExitingBlocks); + + // If BFI is set, we do more aggressive promotions based on BFI. + if (BFI) + return (unsigned)-1; + // Not considierered speculative. if (ExitingBlocks.size() == 1) return MaxNumOfPromotionsPerLoop; @@ -342,6 +363,7 @@ SmallVector InsertPts; Loop &L; LoopInfo &LI; + BlockFrequencyInfo *BFI; }; } // end anonymous namespace @@ -364,8 +386,9 @@ "Frontend instrumentation-based coverage lowering.", false, false) ModulePass * -llvm::createInstrProfilingLegacyPass(const InstrProfOptions &Options) { - return new InstrProfilingLegacyPass(Options); +llvm::createInstrProfilingLegacyPass(const InstrProfOptions &Options, + bool IsCS) { + return new InstrProfilingLegacyPass(Options, IsCS); } static InstrProfIncrementInst *castToIncrementInst(Instruction *Instr) { @@ -414,6 +437,13 @@ LoopInfo LI(DT); DenseMap> LoopPromotionCandidates; + std::unique_ptr BFI; + if (Options.UseBFIInPromotion) { + std::unique_ptr BPI; + BPI.reset(new BranchProbabilityInfo(*F, LI, TLI)); + BFI.reset(new BlockFrequencyInfo(*F, *BPI, LI)); + } + for (const auto &LoadStore : PromotionCandidates) { auto *CounterLoad = LoadStore.first; auto *CounterStore = LoadStore.second; @@ -429,7 +459,7 @@ // Do a post-order traversal of the loops so that counter updates can be // iteratively hoisted outside the loop nest. for (auto *Loop : llvm::reverse(Loops)) { - PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI); + PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI, BFI.get()); Promoter.run(&TotalCountersPromoted); } } @@ -696,7 +726,6 @@ // Don't do this for Darwin. compiler-rt uses linker magic. if (Triple(M.getTargetTriple()).isOSDarwin()) return false; - // Use linker script magic to get data/cnts/name start/end. if (Triple(M.getTargetTriple()).isOSLinux() || Triple(M.getTargetTriple()).isOSFreeBSD() || @@ -967,20 +996,13 @@ } void InstrProfiling::emitInitialization() { - StringRef InstrProfileOutput = Options.InstrProfileOutput; - - if (!InstrProfileOutput.empty()) { - // Create variable for profile name. - Constant *ProfileNameConst = - ConstantDataArray::getString(M->getContext(), InstrProfileOutput, true); - GlobalVariable *ProfileNameVar = new GlobalVariable( - *M, ProfileNameConst->getType(), true, GlobalValue::WeakAnyLinkage, - ProfileNameConst, INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR)); - if (TT.supportsCOMDAT()) { - ProfileNameVar->setLinkage(GlobalValue::ExternalLinkage); - ProfileNameVar->setComdat(M->getOrInsertComdat( - StringRef(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR)))); - } + // Don't create ProfileName variable for context-sensitive instrumetation + // lowering: This lowering is after LTO/ThinLTO linking. Pass + // PGOInstrumentationGenCreateVar should have already create the + // variable before LTO/ThinLTO linking. + if (!IsCS) { + StringRef InstrProfileOutput = Options.InstrProfileOutput; + createProfileNameVar(*M, InstrProfileOutput); } Constant *RegisterF = M->getFunction(getInstrProfRegFuncsName()); Index: lib/Transforms/Instrumentation/PGOInstrumentation.cpp =================================================================== --- lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -65,6 +65,7 @@ #include "llvm/Analysis/IndirectCallVisitor.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -132,6 +133,19 @@ STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile."); STATISTIC(NumOfPGOMissing, "Number of functions without profile."); STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations."); +STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO."); +STATISTIC(NumOfCSPGOSelectInsts, + "Number of select instruction instrumented in CSPGO."); +STATISTIC(NumOfCSPGOMemIntrinsics, + "Number of mem intrinsics instrumented in CSPGO."); +STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO."); +STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO."); +STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO."); +STATISTIC(NumOfCSPGOFunc, + "Number of functions having valid profile counts in CSPGO."); +STATISTIC(NumOfCSPGOMismatch, + "Number of functions having mismatch profile in CSPGO."); +STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO."); // Command line option to specify the file to read profile from. This is // mainly used for testing. @@ -383,7 +397,8 @@ public: static char ID; - PGOInstrumentationGenLegacyPass() : ModulePass(ID) { + PGOInstrumentationGenLegacyPass(bool IsCS = false) + : ModulePass(ID), IsCS(IsCS) { initializePGOInstrumentationGenLegacyPassPass( *PassRegistry::getPassRegistry()); } @@ -391,6 +406,8 @@ StringRef getPassName() const override { return "PGOInstrumentationGenPass"; } private: + // Is this is context-sensitive instrumentation. + bool IsCS; bool runOnModule(Module &M) override; void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -403,8 +420,8 @@ static char ID; // Provide the profile filename as the parameter. - PGOInstrumentationUseLegacyPass(std::string Filename = "") - : ModulePass(ID), ProfileFileName(std::move(Filename)) { + PGOInstrumentationUseLegacyPass(std::string Filename = "", bool IsCS = false) + : ModulePass(ID), ProfileFileName(std::move(Filename)), IsCS(IsCS) { if (!PGOTestProfileFile.empty()) ProfileFileName = PGOTestProfileFile; initializePGOInstrumentationUseLegacyPassPass( @@ -415,14 +432,38 @@ private: std::string ProfileFileName; + // Is this is context-sensitive instrumentation use. + bool IsCS; bool runOnModule(Module &M) override; void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); AU.addRequired(); } }; +class PGOInstrumentationGenCreateVarLegacyPass : public ModulePass { +public: + static char ID; + StringRef getPassName() const override { + return "PGOInstrumentationGenCreateVarPass"; + } + PGOInstrumentationGenCreateVarLegacyPass(std::string CSInstrName = "") + : ModulePass(ID), InstrProfileOutput(CSInstrName) { + initializePGOInstrumentationGenCreateVarLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + +private: + bool runOnModule(Module &M) override { + createProfileNameVar(M, InstrProfileOutput); + createIRLevelProfileFlagVar(M, true); + return false; + } + std::string InstrProfileOutput; +}; + } // end anonymous namespace char PGOInstrumentationGenLegacyPass::ID = 0; @@ -434,8 +475,8 @@ INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", "PGO instrumentation.", false, false) -ModulePass *llvm::createPGOInstrumentationGenLegacyPass() { - return new PGOInstrumentationGenLegacyPass(); +ModulePass *llvm::createPGOInstrumentationGenLegacyPass(bool IsCS) { + return new PGOInstrumentationGenLegacyPass(IsCS); } char PGOInstrumentationUseLegacyPass::ID = 0; @@ -444,12 +485,28 @@ "Read PGO instrumentation profile.", false, false) INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_END(PGOInstrumentationUseLegacyPass, "pgo-instr-use", "Read PGO instrumentation profile.", false, false) -ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename) { - return new PGOInstrumentationUseLegacyPass(Filename.str()); +ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename, + bool IsCS) { + return new PGOInstrumentationUseLegacyPass(Filename.str(), IsCS); +} + +#if 1 +char PGOInstrumentationGenCreateVarLegacyPass::ID = 0; + +INITIALIZE_PASS(PGOInstrumentationGenCreateVarLegacyPass, + "pgo-instr-gen-create-var", + "Create PGO instrumentation version variable for CSPGO.", false, + false) + +ModulePass * +llvm::createPGOInstrumentationGenCreateVarLegacyPass(StringRef CSInstrName) { + return new PGOInstrumentationGenCreateVarLegacyPass(CSInstrName); } +#endif namespace { @@ -496,6 +553,9 @@ private: Function &F; + // Is this is context-sensitive instrumentation. + bool IsCS; + // A map that stores the Comdat group in function F. std::unordered_multimap &ComdatMembers; @@ -535,15 +595,23 @@ Function &Func, std::unordered_multimap &ComdatMembers, bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, - BlockFrequencyInfo *BFI = nullptr) - : F(Func), ComdatMembers(ComdatMembers), ValueSites(IPVK_Last + 1), - SIVisitor(Func), MIVisitor(Func), MST(F, BPI, BFI) { + BlockFrequencyInfo *BFI = nullptr, bool IsCS = false) + : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), + ValueSites(IPVK_Last + 1), SIVisitor(Func), MIVisitor(Func), + MST(F, BPI, BFI) { // This should be done before CFG hash computation. SIVisitor.countSelects(Func); MIVisitor.countMemIntrinsics(Func); - NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); - NumOfPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics(); - ValueSites[IPVK_IndirectCallTarget] = findIndirectCalls(Func); + if (!IsCS) { + NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); + NumOfPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics(); + NumOfPGOBB += MST.BBInfos.size(); + ValueSites[IPVK_IndirectCallTarget] = findIndirectCalls(Func); + } else { + NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); + NumOfCSPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics(); + NumOfCSPGOBB += MST.BBInfos.size(); + } ValueSites[IPVK_MemOPSize] = MIVisitor.findMemIntrinsics(Func); FuncName = getPGOFuncName(F); @@ -552,13 +620,12 @@ renameComdatFunction(); LLVM_DEBUG(dumpInfo("after CFGMST")); - NumOfPGOBB += MST.BBInfos.size(); for (auto &E : MST.AllEdges) { if (E->Removed) continue; - NumOfPGOEdge++; + IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++; if (!E->InMST) - NumOfPGOInstrument++; + IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++; } if (CreateGlobalVar) @@ -597,9 +664,16 @@ } } JC.update(Indexes); + + // Hash format for context sensitive profile. Reserve 4 bits for other + // information. FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 | (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 | (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC(); + // Reserve bit 60-63 for other information purpose. + FunctionHash &= 0x0FFFFFFFFFFFFFFF; + if (IsCS) + NamedInstrProfRecord::setCSFlagInHash(FunctionHash); LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n" << " CRC = " << JC.getCRC() << ", Selects = " << SIVisitor.getNumOfSelectInsts() @@ -705,7 +779,7 @@ // For a critical edge, we have to split. Instrument the newly // created BB. - NumOfPGOSplit++; + IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++; LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index << " --> " << getBBInfo(DestBB).Index << "\n"); unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); @@ -720,12 +794,14 @@ // Critical edges will be split. static void instrumentOneFunc( Function &F, Module *M, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI, - std::unordered_multimap &ComdatMembers) { + std::unordered_multimap &ComdatMembers, + bool IsCS) { // Split indirectbr critical edges here before computing the MST rather than // later in getInstrBB() to avoid invalidating it. SplitIndirectBrCriticalEdges(F, BPI, BFI); + FuncPGOInstrumentation FuncInfo(F, ComdatMembers, true, BPI, - BFI); + BFI, IsCS); unsigned NumCounters = FuncInfo.getNumCounters(); uint32_t I = 0; @@ -852,10 +928,10 @@ PGOUseFunc(Function &Func, Module *Modu, std::unordered_multimap &ComdatMembers, BranchProbabilityInfo *BPI = nullptr, - BlockFrequencyInfo *BFIin = nullptr) + BlockFrequencyInfo *BFIin = nullptr, bool IsCS = false) : F(Func), M(Modu), BFI(BFIin), - FuncInfo(Func, ComdatMembers, false, BPI, BFIin), - FreqAttr(FFA_Normal) {} + FuncInfo(Func, ComdatMembers, false, BPI, BFIin, IsCS), + FreqAttr(FFA_Normal), IsCS(IsCS) {} // Read counts for the instrumented BB from profile. bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros); @@ -928,6 +1004,9 @@ // Function hotness info derived from profile. FuncFreqAttr FreqAttr; + // Is to use the context sensitive profile. + bool IsCS; + // Find the Instrumented BB and set the value. void setInstrumentedCounts(const std::vector &CountFromProfile); @@ -1021,23 +1100,31 @@ handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { auto Err = IPE.get(); bool SkipWarning = false; + LLVM_DEBUG(dbgs() << "Error in reading profile for Func " + << FuncInfo.FuncName << ": "); if (Err == instrprof_error::unknown_function) { - NumOfPGOMissing++; + IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++; SkipWarning = !PGOWarnMissing; + LLVM_DEBUG(dbgs() << "unknown function"); } else if (Err == instrprof_error::hash_mismatch || Err == instrprof_error::malformed) { - NumOfPGOMismatch++; + IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++; SkipWarning = NoPGOWarnMismatch || (NoPGOWarnMismatchComdat && (F.hasComdat() || F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); + LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")"); } + LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n"); if (SkipWarning) return; - std::string Msg = IPE.message() + std::string(" ") + F.getName().str(); + std::string Msg = IPE.message() + std::string(" ") + F.getName().str() + + std::string(" Hash = ") + + std::to_string(FuncInfo.FunctionHash); + Ctx.diagnose( DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); }); @@ -1046,7 +1133,7 @@ ProfileRecord = std::move(Result.get()); std::vector &CountFromProfile = ProfileRecord.Counts; - NumOfPGOFunc++; + IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++; LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n"); uint64_t ValueSum = 0; for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) { @@ -1061,7 +1148,7 @@ getBBInfo(nullptr).UnknownCountInEdge = 2; setInstrumentedCounts(CountFromProfile); - ProgramMaxCount = PGOReader->getMaximumFunctionCount(); + ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS); return true; } @@ -1166,7 +1253,8 @@ // Assign the scaled count values to the BB with multiple out edges. void PGOUseFunc::setBranchWeights() { // Generate MD_prof metadata for every branch instruction. - LLVM_DEBUG(dbgs() << "\nSetting branch weights.\n"); + LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName() + << " IsCS=" << IsCS << "\n"); for (auto &BB : F) { Instruction *TI = BB.getTerminator(); if (TI->getNumSuccessors() < 2) @@ -1174,6 +1262,7 @@ if (!(isa(TI) || isa(TI) || isa(TI))) continue; + if (getBBInfo(&BB).CountValue == 0) continue; @@ -1351,24 +1440,6 @@ } } -// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime -// aware this is an ir_level profile so it can set the version flag. -static void createIRLevelProfileFlagVariable(Module &M) { - Type *IntTy64 = Type::getInt64Ty(M.getContext()); - uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF); - auto IRLevelVersionVariable = new GlobalVariable( - M, IntTy64, true, GlobalVariable::ExternalLinkage, - Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), - INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)); - IRLevelVersionVariable->setVisibility(GlobalValue::DefaultVisibility); - Triple TT(M.getTargetTriple()); - if (!TT.supportsCOMDAT()) - IRLevelVersionVariable->setLinkage(GlobalValue::WeakAnyLinkage); - else - IRLevelVersionVariable->setComdat(M.getOrInsertComdat( - StringRef(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)))); -} - // Collect the set of members for each Comdat in module M and store // in ComdatMembers. static void collectComdatMembers( @@ -1389,8 +1460,11 @@ static bool InstrumentAllFunctions( Module &M, function_ref LookupBPI, - function_ref LookupBFI) { - createIRLevelProfileFlagVariable(M); + function_ref LookupBFI, bool IsCS) { + // For the context-sensitve instrumentation, we should have a separated pass + // (before LTO/ThinLTO linking) to create these variables. + if (!IsCS) + createIRLevelProfileFlagVar(M, /* IsCS */ false); std::unordered_multimap ComdatMembers; collectComdatMembers(M, ComdatMembers); @@ -1399,7 +1473,7 @@ continue; auto *BPI = LookupBPI(F); auto *BFI = LookupBFI(F); - instrumentOneFunc(F, &M, BPI, BFI, ComdatMembers); + instrumentOneFunc(F, &M, BPI, BFI, ComdatMembers, IsCS); } return true; } @@ -1414,7 +1488,7 @@ auto LookupBFI = [this](Function &F) { return &this->getAnalysis(F).getBFI(); }; - return InstrumentAllFunctions(M, LookupBPI, LookupBFI); + return InstrumentAllFunctions(M, LookupBPI, LookupBFI, IsCS); } PreservedAnalyses PGOInstrumentationGen::run(Module &M, @@ -1428,7 +1502,7 @@ return &FAM.getResult(F); }; - if (!InstrumentAllFunctions(M, LookupBPI, LookupBFI)) + if (!InstrumentAllFunctions(M, LookupBPI, LookupBFI, IsCS)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); @@ -1437,7 +1511,7 @@ static bool annotateAllFunctions( Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, function_ref LookupBPI, - function_ref LookupBFI) { + function_ref LookupBFI, bool IsCS) { LLVM_DEBUG(dbgs() << "Read in profile counters: "); auto &Ctx = M.getContext(); // Read the counter array from file. @@ -1458,6 +1532,9 @@ StringRef("Cannot get PGOReader"))); return false; } + if (!PGOReader->hasCSIRLevelProfile() && IsCS) + return false; + // TODO: might need to change the warning once the clang option is finalized. if (!PGOReader->isIRLevelProfile()) { Ctx.diagnose(DiagnosticInfoPGOProfile( @@ -1477,7 +1554,7 @@ // Split indirectbr critical edges here before computing the MST rather than // later in getInstrBB() to avoid invalidating it. SplitIndirectBrCriticalEdges(F, BPI, BFI); - PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI); + PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI, IsCS); bool AllZeros = false; if (!Func.readCounters(PGOReader.get(), AllZeros)) continue; @@ -1525,7 +1602,10 @@ } } } - M.setProfileSummary(PGOReader->getSummary().getMD(M.getContext())); + M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()), + IsCS ? ProfileSummary::PSK_CSInstr + : ProfileSummary::PSK_Instr); + // Set function hotness attribute from the profile. // We have to apply these attributes at the end because their presence // can affect the BranchProbabilityInfo of any callers, resulting in an @@ -1544,9 +1624,10 @@ } PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename, - std::string RemappingFilename) + std::string RemappingFilename, + bool IsCS) : ProfileFileName(std::move(Filename)), - ProfileRemappingFileName(std::move(RemappingFilename)) { + ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS) { if (!PGOTestProfileFile.empty()) ProfileFileName = PGOTestProfileFile; if (!PGOTestProfileRemappingFile.empty()) @@ -1566,7 +1647,7 @@ }; if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, - LookupBPI, LookupBFI)) + LookupBPI, LookupBFI, IsCS)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); @@ -1583,7 +1664,8 @@ return &this->getAnalysis(F).getBFI(); }; - return annotateAllFunctions(M, ProfileFileName, "", LookupBPI, LookupBFI); + return annotateAllFunctions(M, ProfileFileName, "", LookupBPI, LookupBFI, + IsCS); } static std::string getSimpleNodeName(const BasicBlock *Node) { Index: tools/gold/gold-plugin.cpp =================================================================== --- tools/gold/gold-plugin.cpp +++ tools/gold/gold-plugin.cpp @@ -209,6 +209,10 @@ static std::string OptRemarksFilename; static bool OptRemarksWithHotness = false; + // Context sensitive PGO options. + static std::string cs_profile_path; + static bool cs_pgo_gen = false; + static void process_plugin_option(const char *opt_) { if (opt_ == nullptr) @@ -268,7 +272,11 @@ } else if (opt == "disable-verify") { DisableVerify = true; } else if (opt.startswith("sample-profile=")) { - sample_profile= opt.substr(strlen("sample-profile=")); + sample_profile = opt.substr(strlen("sample-profile=")); + } else if (opt == "cs-profile-generate") { + cs_pgo_gen = true; + } else if (opt.startswith("cs-profile-path=")) { + cs_profile_path = opt.substr(strlen("cs-profile-path=")); } else if (opt == "new-pass-manager") { new_pass_manager = true; } else if (opt == "debug-pass-manager") { @@ -892,6 +900,10 @@ if (!options::sample_profile.empty()) Conf.SampleProfile = options::sample_profile; + if (!options::cs_profile_path.empty()) + Conf.CSIRProfile = options::cs_profile_path; + Conf.RunCSIRInstr = options::cs_pgo_gen; + Conf.DwoDir = options::dwo_dir; // Set up optimization remarks handling. Index: tools/llvm-profdata/llvm-profdata.cpp =================================================================== --- tools/llvm-profdata/llvm-profdata.cpp +++ tools/llvm-profdata/llvm-profdata.cpp @@ -225,7 +225,8 @@ auto Reader = std::move(ReaderOrErr.get()); bool IsIRProfile = Reader->isIRLevelProfile(); - if (WC->Writer.setIsIRLevelProfile(IsIRProfile)) { + bool HasCSIRProfile = Reader->hasCSIRLevelProfile(); + if (WC->Writer.setIsIRLevelProfile(IsIRProfile, HasCSIRProfile)) { WC->Err = make_error( "Merge IR generated profile with Clang generated profile.", std::error_code()); @@ -669,9 +670,10 @@ uint32_t TopN, bool ShowIndirectCallTargets, bool ShowMemOPSizes, bool ShowDetailedSummary, std::vector DetailedSummaryCutoffs, - bool ShowAllFunctions, uint64_t ValueCutoff, - bool OnlyListBelow, const std::string &ShowFunction, - bool TextFormat, raw_fd_ostream &OS) { + bool ShowAllFunctions, bool ShowCS, + uint64_t ValueCutoff, bool OnlyListBelow, + const std::string &ShowFunction, bool TextFormat, + raw_fd_ostream &OS) { auto ReaderOrErr = InstrProfReader::create(Filename); std::vector Cutoffs = std::move(DetailedSummaryCutoffs); if (ShowDetailedSummary && Cutoffs.empty()) { @@ -708,6 +710,11 @@ OS << ":ir\n"; for (const auto &Func : *Reader) { + if (Reader->isIRLevelProfile() && Reader->hasCSIRLevelProfile()) { + bool FuncIsCS = NamedInstrProfRecord::hasCSFlagInHash(Func.Hash); + if (FuncIsCS != ShowCS) + continue; + } bool Show = ShowAllFunctions || (!ShowFunction.empty() && Func.Name.find(ShowFunction) != Func.Name.npos); @@ -899,6 +906,8 @@ cl::value_desc("800000,901000,999999")); cl::opt ShowAllFunctions("all-functions", cl::init(false), cl::desc("Details for every function")); + cl::opt ShowCS("showcs", cl::init(false), + cl::desc("Show context sensitive counts")); cl::opt ShowFunction("function", cl::desc("Details for matching functions")); @@ -940,8 +949,8 @@ return showInstrProfile(Filename, ShowCounts, TopNFunctions, ShowIndirectCallTargets, ShowMemOPSizes, ShowDetailedSummary, DetailedSummaryCutoffs, - ShowAllFunctions, ValueCutoff, OnlyListBelow, - ShowFunction, TextFormat, OS); + ShowAllFunctions, ShowCS, ValueCutoff, + OnlyListBelow, ShowFunction, TextFormat, OS); else return showSampleProfile(Filename, ShowCounts, ShowAllFunctions, ShowFunction, OS); Index: tools/opt/NewPMDriver.h =================================================================== --- tools/opt/NewPMDriver.h +++ tools/opt/NewPMDriver.h @@ -45,6 +45,7 @@ InstrUse, SampleUse }; +enum CSPGOKind { NoCSPGO, CSInstrGen, CSInstrUse }; } /// Driver function to run the new pass manager over a module. Index: tools/opt/NewPMDriver.cpp =================================================================== --- tools/opt/NewPMDriver.cpp +++ tools/opt/NewPMDriver.cpp @@ -102,6 +102,9 @@ extern cl::opt PGOKindFlag; extern cl::opt ProfileFile; +extern cl::opt CSPGOKindFlag; +extern cl::opt CSProfileGenFile; + static cl::opt ProfileRemappingFile("profile-remapping-file", cl::desc("Path to the profile remapping file."), @@ -219,20 +222,41 @@ Optional P; switch (PGOKindFlag) { case InstrGen: - P = PGOOptions(ProfileFile, "", "", "", true); + P = PGOOptions(ProfileFile, "", "", PGOOptions::IRInstr); break; case InstrUse: - P = PGOOptions("", ProfileFile, "", ProfileRemappingFile, false); + P = PGOOptions(ProfileFile, "", ProfileRemappingFile, PGOOptions::IRUse); break; case SampleUse: - P = PGOOptions("", "", ProfileFile, ProfileRemappingFile, false); + P = PGOOptions(ProfileFile, "", ProfileRemappingFile, + PGOOptions::SampleUse); break; case NoPGO: if (DebugInfoForProfiling) - P = PGOOptions("", "", "", "", false, true); + P = PGOOptions("", "", "", PGOOptions::NoAction, PGOOptions::NoCSAction, + true); else P = None; - } + } + if (CSPGOKindFlag != NoCSPGO) { + if (P && (P->Action == PGOOptions::IRInstr || + P->Action == PGOOptions::SampleUse)) + errs() << "CSPGOKind cannot be used with IRInstr or SampleUse"; + if (CSPGOKindFlag == CSInstrGen) { + if (CSProfileGenFile.empty()) + errs() << "CSInstrGen needs to specify CSProfileGenFile"; + if (P) { + P->CSAction = PGOOptions::CSIRInstr; + P->CSProfileGenFile = CSProfileGenFile; + } else + P = PGOOptions("", CSProfileGenFile, ProfileRemappingFile, + PGOOptions::NoAction, PGOOptions::CSIRInstr); + } else /* CSPGOKindFlag == CSInstrUse */ { + if (!P) + errs() << "CSInstrUse needs to be together with InstrUse"; + P->CSAction = PGOOptions::CSIRUse; + } + } PassInstrumentationCallbacks PIC; StandardInstrumentations SI; SI.registerCallbacks(PIC); Index: tools/opt/opt.cpp =================================================================== --- tools/opt/opt.cpp +++ tools/opt/opt.cpp @@ -287,6 +287,22 @@ cl::opt ProfileFile("profile-file", cl::desc("Path to the profile."), cl::Hidden); +cl::opt CSPGOKindFlag( + "cspgo-kind", cl::init(NoCSPGO), cl::Hidden, + cl::desc("The kind of context sensitive profile guided optimization"), + cl::values( + clEnumValN(NoCSPGO, "nocspgo", "Do not use CSPGO."), + clEnumValN( + CSInstrGen, "new-pm-cspgo-instr-gen-pipeline", + "Instrument (context sensitive) the IR to generate profile."), + clEnumValN( + CSInstrUse, "new-pm-cspgo-instr-use-pipeline", + "Use instrumented (context sensitive) profile to guide PGO."))); +cl::opt CSProfileGenFile( + "cs-profilegen-file", + cl::desc("Path to the instrumented context sensitive profile."), + cl::Hidden); + class OptCustomPassManager : public legacy::PassManager { DebugifyStatsMap DIStatsMap; Index: unittests/ProfileData/InstrProfTest.cpp =================================================================== --- unittests/ProfileData/InstrProfTest.cpp +++ unittests/ProfileData/InstrProfTest.cpp @@ -175,7 +175,7 @@ ASSERT_EQ(288230376151711744U, NinetyFivePerc->MinCount); ASSERT_EQ(72057594037927936U, NinetyNinePerc->MinCount); }; - ProfileSummary &PS = Reader->getSummary(); + ProfileSummary &PS = Reader->getSummary(/* IsCS */ false); VerifySummary(PS); // Test that conversion of summary to and from Metadata works. @@ -189,8 +189,8 @@ // Test that summary can be attached to and read back from module. Module M("my_module", Context); - M.setProfileSummary(MD); - MD = M.getProfileSummary(); + M.setProfileSummary(MD, ProfileSummary::PSK_Instr); + MD = M.getProfileSummary(ProfileSummary::PSK_Instr); ASSERT_TRUE(MD); PSFromMD = ProfileSummary::getFromMD(MD); ASSERT_TRUE(PSFromMD); @@ -801,7 +801,7 @@ auto Profile = Writer.writeBuffer(); readProfile(std::move(Profile)); - ASSERT_EQ(1ULL << 63, Reader->getMaximumFunctionCount()); + ASSERT_EQ(1ULL << 63, Reader->getMaximumFunctionCount(/* IsCS */ false)); } TEST_P(MaybeSparseInstrProfTest, get_weighted_function_counts) { Index: unittests/ProfileData/SampleProfTest.cpp =================================================================== --- unittests/ProfileData/SampleProfTest.cpp +++ unittests/ProfileData/SampleProfTest.cpp @@ -191,8 +191,8 @@ delete PS; // Test that summary can be attached to and read back from module. - M.setProfileSummary(MD); - MD = M.getProfileSummary(); + M.setProfileSummary(MD, ProfileSummary::PSK_Sample); + MD = M.getProfileSummary(ProfileSummary::PSK_Sample); ASSERT_TRUE(MD); PS = ProfileSummary::getFromMD(MD); ASSERT_TRUE(PS);