Index: CMakeLists.txt =================================================================== --- CMakeLists.txt +++ CMakeLists.txt @@ -636,6 +636,12 @@ endif() file(TO_NATIVE_PATH "${LLVM_PROFILE_DATA_DIR}/%${LLVM_PROFILE_MERGE_POOL_SIZE}m.profraw" LLVM_PROFILE_FILE_PATTERN) endif() + if(NOT LLVM_CSPROFILE_FILE_PATTERN) + if(NOT LLVM_CSPROFILE_DATA_DIR) + file(TO_NATIVE_PATH "${LLVM_BINARY_DIR}/csprofiles" LLVM_CSPROFILE_DATA_DIR) + endif() + file(TO_NATIVE_PATH "${LLVM_CSPROFILE_DATA_DIR}/%${LLVM_PROFILE_MERGE_POOL_SIZE}m.profraw" LLVM_CSPROFILE_FILE_PATTERN) + endif() endif() if (LLVM_BUILD_STATIC) Index: cmake/modules/HandleLLVMOptions.cmake =================================================================== --- cmake/modules/HandleLLVMOptions.cmake +++ cmake/modules/HandleLLVMOptions.cmake @@ -798,6 +798,12 @@ CMAKE_C_FLAGS CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS) + elseif(uppercase_LLVM_BUILD_INSTRUMENTED STREQUAL "CSIR") + append("-fcs-profile-generate='${LLVM_CSPROFILE_DATA_DIR}'" + CMAKE_CXX_FLAGS + CMAKE_C_FLAGS + CMAKE_EXE_LINKER_FLAGS + CMAKE_SHARED_LINKER_FLAGS) else() append("-fprofile-instr-generate='${LLVM_PROFILE_FILE_PATTERN}'" CMAKE_CXX_FLAGS @@ -807,6 +813,14 @@ endif() endif() +# Need to pass -fprofile-instr-use to linker for context-sensitive PGO +# compilation. +if(LLVM_PROFDATA_FILE AND EXISTS ${LLVM_PROFDATA_FILE}) + append("-fprofile-instr-use='${LLVM_PROFDATA_FILE}'" + CMAKE_EXE_LINKER_FLAGS + CMAKE_SHARED_LINKER_FLAGS) +endif() + option(LLVM_BUILD_INSTRUMENTED_COVERAGE "Build LLVM and tools with Code Coverage instrumentation" Off) mark_as_advanced(LLVM_BUILD_INSTRUMENTED_COVERAGE) append_if(LLVM_BUILD_INSTRUMENTED_COVERAGE "-fprofile-instr-generate='${LLVM_PROFILE_FILE_PATTERN}' -fcoverage-mapping" Index: docs/CommandGuide/llvm-profdata.rst =================================================================== --- docs/CommandGuide/llvm-profdata.rst +++ docs/CommandGuide/llvm-profdata.rst @@ -226,6 +226,10 @@ Only output names of functions whose max count value are below the cutoff value. +.. option:: -showcs + Only show context sensitive profile counts. The default is to filter all + context sensitive profile counts. + EXIT STATUS ----------- Index: include/llvm/Analysis/ProfileSummaryInfo.h =================================================================== --- include/llvm/Analysis/ProfileSummaryInfo.h +++ include/llvm/Analysis/ProfileSummaryInfo.h @@ -73,6 +73,12 @@ Summary->getKind() == ProfileSummary::PSK_Instr; } + /// Returns true if module \c M has context sensitive instrumentation profile. + bool hasCSInstrumentationProfile() { + return hasProfileSummary() && + Summary->getKind() == ProfileSummary::PSK_CSInstr; + } + /// Handle the invalidation of this information. /// /// When used as a result of \c ProfileSummaryAnalysis this method will be Index: include/llvm/IR/Module.h =================================================================== --- include/llvm/IR/Module.h +++ include/llvm/IR/Module.h @@ -28,6 +28,7 @@ #include "llvm/IR/GlobalIFunc.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/ProfileSummary.h" #include "llvm/IR/SymbolTableListTraits.h" #include "llvm/Support/CBindingWrapping.h" #include "llvm/Support/CodeGen.h" @@ -868,10 +869,11 @@ /// @{ /// Attach profile summary metadata to this module. - void setProfileSummary(Metadata *M); + void setProfileSummary(Metadata *M, ProfileSummary::Kind Kind); - /// Returns profile summary metadata - Metadata *getProfileSummary(); + /// Returns profile summary metadata. When IsCS is true, use the context + /// sensitive profile summary. + Metadata *getProfileSummary(bool IsCS); /// @} /// Returns true if PLT should be avoided for RTLib calls. Index: include/llvm/IR/ProfileSummary.h =================================================================== --- include/llvm/IR/ProfileSummary.h +++ include/llvm/IR/ProfileSummary.h @@ -42,11 +42,10 @@ class ProfileSummary { public: - enum Kind { PSK_Instr, PSK_Sample }; + enum Kind { PSK_Instr, PSK_CSInstr, PSK_Sample }; private: const Kind PSK; - static const char *KindStr[2]; SummaryEntryVector DetailedSummary; uint64_t TotalCount, MaxCount, MaxInternalCount, MaxFunctionCount; uint32_t NumCounts, NumFunctions; Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -298,6 +298,7 @@ void initializePGOIndirectCallPromotionLegacyPassPass(PassRegistry&); void initializePGOInstrumentationGenLegacyPassPass(PassRegistry&); void initializePGOInstrumentationUseLegacyPassPass(PassRegistry&); +void initializePGOInstrumentationGenCreateVarLegacyPassPass(PassRegistry&); void initializePGOMemOPSizeOptLegacyPassPass(PassRegistry&); void initializePHIEliminationPass(PassRegistry&); void initializePartialInlinerLegacyPassPass(PassRegistry&); Index: include/llvm/LTO/Config.h =================================================================== --- include/llvm/LTO/Config.h +++ include/llvm/LTO/Config.h @@ -55,6 +55,9 @@ /// Disable entirely the optimizer, including importing for ThinLTO bool CodeGenOnly = false; + /// Run PGO context sensitive IR instrumentation. + bool RunCSIRInstr = false; + /// If this field is set, the set of passes run in the middle-end optimizer /// will be the one specified by the string. Only works with the new pass /// manager as the old one doesn't have this ability. @@ -73,6 +76,9 @@ /// with this triple. std::string DefaultTriple; + /// Context Sensitive PGO profile path. + std::string CSIRProfile; + /// Sample PGO profile path. std::string SampleProfile; Index: include/llvm/LinkAllPasses.h =================================================================== --- include/llvm/LinkAllPasses.h +++ include/llvm/LinkAllPasses.h @@ -102,6 +102,7 @@ (void) llvm::createGCOVProfilerPass(); (void) llvm::createPGOInstrumentationGenLegacyPass(); (void) llvm::createPGOInstrumentationUseLegacyPass(); + (void) llvm::createPGOInstrumentationGenCreateVarLegacyPass(); (void) llvm::createPGOIndirectCallPromotionLegacyPass(); (void) llvm::createPGOMemOPSizeOptLegacyPass(); (void) llvm::createInstrProfilingLegacyPass(); Index: include/llvm/Passes/PassBuilder.h =================================================================== --- include/llvm/Passes/PassBuilder.h +++ include/llvm/Passes/PassBuilder.h @@ -31,25 +31,38 @@ /// A struct capturing PGO tunables. struct PGOOptions { - PGOOptions(std::string ProfileGenFile = "", std::string ProfileUseFile = "", - std::string SampleProfileFile = "", - std::string ProfileRemappingFile = "", - bool RunProfileGen = false, bool SamplePGOSupport = false) - : ProfileGenFile(ProfileGenFile), ProfileUseFile(ProfileUseFile), - SampleProfileFile(SampleProfileFile), - ProfileRemappingFile(ProfileRemappingFile), - RunProfileGen(RunProfileGen), - SamplePGOSupport(SamplePGOSupport || !SampleProfileFile.empty()) { - assert((RunProfileGen || - !SampleProfileFile.empty() || - !ProfileUseFile.empty() || - SamplePGOSupport) && "Illegal PGOOptions."); - } - std::string ProfileGenFile; - std::string ProfileUseFile; - std::string SampleProfileFile; + enum PGOAction { NoAction, IRInstr, IRUse, SampleUse }; + enum CSPGOAction { NoCSAction, CSIRInstr, CSIRUse }; + PGOOptions(std::string ProfileFile = "", std::string CSProfileGenFile = "", + std::string ProfileRemappingFile = "", PGOAction Action = NoAction, + CSPGOAction CSAction = NoCSAction, bool SamplePGOSupport = false) + : ProfileFile(ProfileFile), CSProfileGenFile(CSProfileGenFile), + ProfileRemappingFile(ProfileRemappingFile), Action(Action), + CSAction(CSAction), + SamplePGOSupport(SamplePGOSupport || Action == SampleUse) { + // Note, we do allow ProfileFile.empty() for Action=IRUse LTO can + // callback with IRUse action without ProfileFile. + + // If there is a CSAction, PGOAction cannot be IRInstr or SampleUse. + assert(this->CSAction == NoCSAction || + (this->Action != IRInstr && this->Action != SampleUse)); + + // For CSIRInstr, CSProfileGenFile also needs to be nonempty. + assert(this->CSAction != CSIRInstr || !this->CSProfileGenFile.empty()); + + // If CSAction is CSIRUse, PGOAction needs to be IRUse as they share + // a profile. + assert(this->CSAction != CSIRUse || this->Action == IRUse); + + // If neither CSAction nor CSAction, SamplePGOSupport needs to be true. + assert(this->Action != NoAction || this->CSAction != NoCSAction || + this->SamplePGOSupport); + } + std::string ProfileFile; + std::string CSProfileGenFile; std::string ProfileRemappingFile; - bool RunProfileGen; + PGOAction Action; + CSPGOAction CSAction; bool SamplePGOSupport; }; @@ -274,7 +287,8 @@ /// require some transformations for semantic reasons, they should explicitly /// build them. ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level, - bool DebugLogging = false); + bool DebugLogging = false, + bool LTOPreLink = false); /// Build a per-module default optimization pipeline. /// @@ -288,7 +302,8 @@ /// require some transformations for semantic reasons, they should explicitly /// build them. ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level, - bool DebugLogging = false); + bool DebugLogging = false, + bool LTOPreLink = false); /// Build a pre-link, ThinLTO-targeting default optimization pipeline to /// a pass manager. @@ -605,9 +620,8 @@ bool VerifyEachPass, bool DebugLogging); void addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging, - OptimizationLevel Level, bool RunProfileGen, - std::string ProfileGenFile, - std::string ProfileUseFile, + OptimizationLevel Level, bool RunProfileGen, bool IsCS, + std::string ProfileFile, std::string ProfileRemappingFile); void invokePeepholeEPCallbacks(FunctionPassManager &, OptimizationLevel); Index: include/llvm/ProfileData/InstrProf.h =================================================================== --- include/llvm/ProfileData/InstrProf.h +++ include/llvm/ProfileData/InstrProf.h @@ -767,10 +767,20 @@ StringRef Name; uint64_t Hash; + // We reserve this bit as the flag for context sensitive profile record. + static const int CS_FLAG_IN_FUNC_HASH = 60; + NamedInstrProfRecord() = default; NamedInstrProfRecord(StringRef Name, uint64_t Hash, std::vector Counts) : InstrProfRecord(std::move(Counts)), Name(Name), Hash(Hash) {} + + static bool hasCSFlagInHash(uint64_t FuncHash) { + return ((FuncHash >> CS_FLAG_IN_FUNC_HASH) & 1); + } + static void setCSFlagInHash(uint64_t &FuncHash) { + FuncHash |= ((uint64_t)1 << CS_FLAG_IN_FUNC_HASH); + } }; uint32_t InstrProfRecord::getNumValueKinds() const { @@ -1004,6 +1014,8 @@ // from control data struct is changed from raw pointer to Name's MD5 value. // Version 4: ValueDataBegin and ValueDataSizes fields are removed from the // raw header. +// Version 5: Bit 60 of FuncHash is reserved for the flag for the context +// sensitive records. const uint64_t Version = INSTR_PROF_RAW_VERSION; template inline uint64_t getMagic(); @@ -1040,6 +1052,10 @@ void getMemOPSizeRangeFromOption(StringRef Str, int64_t &RangeStart, int64_t &RangeLast); +// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime +// aware this is an ir_level profile so it can set the version flag. +void createIRLevelProfileFlagVar(Module &M, bool IsCS); + // Create the variable for the profile file name. void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput); Index: include/llvm/ProfileData/InstrProfData.inc =================================================================== --- include/llvm/ProfileData/InstrProfData.inc +++ include/llvm/ProfileData/InstrProfData.inc @@ -635,10 +635,12 @@ * version for other variants of profile. We set the lowest bit of the upper 8 * bits (i.e. bit 56) to 1 to indicate if this is an IR-level instrumentaiton * generated profile, and 0 if this is a Clang FE generated profile. + * 1 in bit 57 indicates there are context-sensitive records in the profile. */ #define VARIANT_MASKS_ALL 0xff00000000000000ULL #define GET_VERSION(V) ((V) & ~VARIANT_MASKS_ALL) #define VARIANT_MASK_IR_PROF (0x1ULL << 56) +#define VARIANT_MASK_CSIR_PROF (0x1ULL << 57) #define INSTR_PROF_RAW_VERSION_VAR __llvm_profile_raw_version #define INSTR_PROF_PROFILE_RUNTIME_VAR __llvm_profile_runtime Index: include/llvm/ProfileData/InstrProfReader.h =================================================================== --- include/llvm/ProfileData/InstrProfReader.h +++ include/llvm/ProfileData/InstrProfReader.h @@ -77,6 +77,8 @@ virtual bool isIRLevelProfile() const = 0; + virtual bool hasCSIRLevelProfile() const = 0; + /// Return the PGO symtab. There are three different readers: /// Raw, Text, and Indexed profile readers. The first two types /// of readers are used only by llvm-profdata tool, while the indexed @@ -142,6 +144,7 @@ /// Iterator over the profile data. line_iterator Line; bool IsIRLevelProfile = false; + bool HasCSIRLevelProfile = false; Error readValueProfileData(InstrProfRecord &Record); @@ -156,6 +159,8 @@ bool isIRLevelProfile() const override { return IsIRLevelProfile; } + bool hasCSIRLevelProfile() const override { return HasCSIRLevelProfile; } + /// Read the header. Error readHeader() override; @@ -212,6 +217,10 @@ return (Version & VARIANT_MASK_IR_PROF) != 0; } + bool hasCSIRLevelProfile() const override { + return (Version & VARIANT_MASK_CSIR_PROF) != 0; + } + InstrProfSymtab &getSymtab() override { assert(Symtab.get()); return *Symtab.get(); @@ -341,6 +350,7 @@ virtual void setValueProfDataEndianness(support::endianness Endianness) = 0; virtual uint64_t getVersion() const = 0; virtual bool isIRLevelProfile() const = 0; + virtual bool hasCSIRLevelProfile() const = 0; virtual Error populateSymtab(InstrProfSymtab &) = 0; }; @@ -385,6 +395,10 @@ return (FormatVersion & VARIANT_MASK_IR_PROF) != 0; } + bool hasCSIRLevelProfile() const override { + return (FormatVersion & VARIANT_MASK_CSIR_PROF) != 0; + } + Error populateSymtab(InstrProfSymtab &Symtab) override { return Symtab.create(HashTable->keys()); } @@ -412,13 +426,16 @@ std::unique_ptr Remapper; /// Profile summary data. std::unique_ptr Summary; + /// Context sensitive profile summary data. + std::unique_ptr CS_Summary; // Index to the current record in the record array. unsigned RecordIndex; // Read the profile summary. Return a pointer pointing to one byte past the // end of the summary data if it exists or the input \c Cur. + // \c UseCS indicates whether to use the context-sensitive profile summary. const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version, - const unsigned char *Cur); + const unsigned char *Cur, bool UseCS); public: IndexedInstrProfReader( @@ -432,6 +449,9 @@ /// Return the profile version. uint64_t getVersion() const { return Index->getVersion(); } bool isIRLevelProfile() const override { return Index->isIRLevelProfile(); } + bool hasCSIRLevelProfile() const override { + return Index->hasCSIRLevelProfile(); + } /// Return true if the given buffer is in an indexed instrprof format. static bool hasFormat(const MemoryBuffer &DataBuffer); @@ -450,7 +470,16 @@ std::vector &Counts); /// Return the maximum of all known function counts. - uint64_t getMaximumFunctionCount() { return Summary->getMaxFunctionCount(); } + /// \c UseCS indicates whether to use the context-sensitive count. + uint64_t getMaximumFunctionCount(bool UseCS) { + if (UseCS) { + assert(CS_Summary && "No context sensitive profile summary"); + return CS_Summary->getMaxFunctionCount(); + } else { + assert(Summary && "No profile summary"); + return Summary->getMaxFunctionCount(); + } + } /// Factory method to create an indexed reader. static Expected> @@ -469,7 +498,18 @@ // to be used by llvm-profdata (for dumping). Avoid using this when // the client is the compiler. InstrProfSymtab &getSymtab() override; - ProfileSummary &getSummary() { return *(Summary.get()); } + + /// Return the profile summary. + /// \c UseCS indicates whether to use the context-sensitive summary. + ProfileSummary &getSummary(bool UseCS) { + if (UseCS) { + assert(CS_Summary && "No context sensitive summary"); + return *(CS_Summary.get()); + } else { + assert(Summary && "No profile summary"); + return *(Summary.get()); + } + } }; } // end namespace llvm Index: include/llvm/ProfileData/InstrProfWriter.h =================================================================== --- include/llvm/ProfileData/InstrProfWriter.h +++ include/llvm/ProfileData/InstrProfWriter.h @@ -33,7 +33,8 @@ class InstrProfWriter { public: using ProfilingData = SmallDenseMap; - enum ProfKind { PF_Unknown = 0, PF_FE, PF_IRLevel }; + // PF_IRLevelWithCS is the profile from context sensitive IR instrumentation. + enum ProfKind { PF_Unknown = 0, PF_FE, PF_IRLevel, PF_IRLevelWithCS }; private: bool Sparse; @@ -74,15 +75,26 @@ std::unique_ptr writeBuffer(); /// Set the ProfileKind. Report error if mixing FE and IR level profiles. - Error setIsIRLevelProfile(bool IsIRLevel) { + /// \c WithCS indicates if this is for contenxt sensitive instrumentation. + Error setIsIRLevelProfile(bool IsIRLevel, bool WithCS) { if (ProfileKind == PF_Unknown) { - ProfileKind = IsIRLevel ? PF_IRLevel: PF_FE; + if (IsIRLevel) + ProfileKind = WithCS ? PF_IRLevelWithCS : PF_IRLevel; + else + ProfileKind = PF_FE; return Error::success(); } - return (IsIRLevel == (ProfileKind == PF_IRLevel)) - ? Error::success() - : make_error( - instrprof_error::unsupported_version); + + if (((ProfileKind != PF_FE) && !IsIRLevel) || + ((ProfileKind == PF_FE) && IsIRLevel)) + return make_error(instrprof_error::unsupported_version); + + // When merging a context-sensitive profile (WithCS == true) with an IRLevel + // profile, set the kind to PF_IRLevelWithCS. + if (ProfileKind == PF_IRLevel && WithCS) + ProfileKind = PF_IRLevelWithCS; + + return Error::success(); } // Internal interface for testing purpose only. Index: include/llvm/Transforms/IPO/PassManagerBuilder.h =================================================================== --- include/llvm/Transforms/IPO/PassManagerBuilder.h +++ include/llvm/Transforms/IPO/PassManagerBuilder.h @@ -159,6 +159,10 @@ /// Enable profile instrumentation pass. bool EnablePGOInstrGen; + /// Enable profile context sensitive instrumentation pass. + bool EnablePGOCSInstrGen; + /// Enable profile context sensitive profile use pass. + bool EnablePGOCSInstrUse; /// Profile data file name that the instrumentation will be written to. std::string PGOInstrGen; /// Path of the profile data file. @@ -185,7 +189,7 @@ void addInitialAliasAnalysisPasses(legacy::PassManagerBase &PM) const; void addLTOOptimizationPasses(legacy::PassManagerBase &PM); void addLateLTOOptimizationPasses(legacy::PassManagerBase &PM); - void addPGOInstrPasses(legacy::PassManagerBase &MPM); + void addPGOInstrPasses(legacy::PassManagerBase &MPM, bool IsCS); void addFunctionSimplificationPasses(legacy::PassManagerBase &MPM); void addInstructionCombiningPass(legacy::PassManagerBase &MPM) const; Index: include/llvm/Transforms/Instrumentation.h =================================================================== --- include/llvm/Transforms/Instrumentation.h +++ include/llvm/Transforms/Instrumentation.h @@ -87,10 +87,14 @@ ModulePass *createGCOVProfilerPass(const GCOVOptions &Options = GCOVOptions::getDefault()); -// PGO Instrumention -ModulePass *createPGOInstrumentationGenLegacyPass(); +// PGO Instrumention. Parameter IsCS indicates if this is the context senstive +// instrumentation. +ModulePass *createPGOInstrumentationGenLegacyPass(bool IsCS = false); ModulePass * -createPGOInstrumentationUseLegacyPass(StringRef Filename = StringRef("")); +createPGOInstrumentationUseLegacyPass(StringRef Filename = StringRef(""), + bool IsCS = false); +ModulePass *createPGOInstrumentationGenCreateVarLegacyPass( + StringRef CSInstrName = StringRef("")); ModulePass *createPGOIndirectCallPromotionLegacyPass(bool InLTO = false, bool SamplePGO = false); FunctionPass *createPGOMemOPSizeOptLegacyPass(); @@ -132,15 +136,19 @@ // Use atomic profile counter increments. bool Atomic = false; + // Use BFI to guide register promotion + bool UseBFIInPromotion = false; + // Name of the profile file to use as output std::string InstrProfileOutput; InstrProfOptions() = default; }; -/// Insert frontend instrumentation based profiling. +/// Insert frontend instrumentation based profiling. Parameter IsCS indicates if +// this is the context senstive instrumentation. ModulePass *createInstrProfilingLegacyPass( - const InstrProfOptions &Options = InstrProfOptions()); + const InstrProfOptions &Options = InstrProfOptions(), bool IsCS = false); // Insert AddressSanitizer (address sanity checking) instrumentation FunctionPass *createAddressSanitizerFunctionPass(bool CompileKernel = false, Index: include/llvm/Transforms/Instrumentation/InstrProfiling.h =================================================================== --- include/llvm/Transforms/Instrumentation/InstrProfiling.h +++ include/llvm/Transforms/Instrumentation/InstrProfiling.h @@ -35,7 +35,8 @@ class InstrProfiling : public PassInfoMixin { public: InstrProfiling() = default; - InstrProfiling(const InstrProfOptions &Options) : Options(Options) {} + InstrProfiling(const InstrProfOptions &Options, bool IsCS) + : Options(Options), IsCS(IsCS) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); bool run(Module &M, const TargetLibraryInfo &TLI); @@ -60,6 +61,9 @@ GlobalVariable *NamesVar; size_t NamesSize; + // Is this lowering for the context-sensitive instrumentation. + bool IsCS; + // vector of counter load/store pairs to be register promoted. std::vector PromotionCandidates; Index: include/llvm/Transforms/Instrumentation/PGOInstrumentation.h =================================================================== --- include/llvm/Transforms/Instrumentation/PGOInstrumentation.h +++ include/llvm/Transforms/Instrumentation/PGOInstrumentation.h @@ -17,6 +17,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/IR/PassManager.h" +#include "llvm/ProfileData/InstrProf.h" #include #include @@ -27,22 +28,50 @@ class Module; /// The instrumentation (profile-instr-gen) pass for IR based PGO. +// We use this pass to create COMDAT profile variables for context +// sensitive PGO (CSPGO). The reason to have a pass for this is CSPGO +// can be run after LTO/ThinLTO linking. Lld linker needs to see +// all the COMDAT variables before linking. So we have this pass +// always run before linking for CSPGO. +class PGOInstrumentationGenCreateVar + : public PassInfoMixin { +public: + PGOInstrumentationGenCreateVar(std::string CSInstrName = "") + : CSInstrName(CSInstrName) {} + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) { + createProfileFileNameVar(M, CSInstrName); + createIRLevelProfileFlagVar(M, /* IsCS */ true); + return PreservedAnalyses::all(); + } + +private: + std::string CSInstrName; +}; + +/// The instrumentation (profile-instr-gen) pass for IR based PGO. class PGOInstrumentationGen : public PassInfoMixin { public: + PGOInstrumentationGen(bool IsCS = false) : IsCS(IsCS) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + +private: + // If this is a context sensitive instrumentation. + bool IsCS; }; /// The profile annotation (profile-instr-use) pass for IR based PGO. class PGOInstrumentationUse : public PassInfoMixin { public: PGOInstrumentationUse(std::string Filename = "", - std::string RemappingFilename = ""); + std::string RemappingFilename = "", bool IsCS = false); PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); private: std::string ProfileFileName; std::string ProfileRemappingFileName; + // If this is a context sensitive instrumentation. + bool IsCS; }; /// The indirect function call promotion pass. Index: lib/Analysis/ProfileSummaryInfo.cpp =================================================================== --- lib/Analysis/ProfileSummaryInfo.cpp +++ lib/Analysis/ProfileSummaryInfo.cpp @@ -79,7 +79,14 @@ bool ProfileSummaryInfo::computeSummary() { if (Summary) return true; - auto *SummaryMD = M.getProfileSummary(); + // First try to get context sensitive ProfileSummary. + auto *SummaryMD = M.getProfileSummary(/* IsCS */ true); + if (SummaryMD) { + Summary.reset(ProfileSummary::getFromMD(SummaryMD)); + return true; + } + // This will actually return PSK_Instr or PSK_Sample summary. + SummaryMD = M.getProfileSummary(/* IsCS */ false); if (!SummaryMD) return false; Summary.reset(ProfileSummary::getFromMD(SummaryMD)); Index: lib/IR/Module.cpp =================================================================== --- lib/IR/Module.cpp +++ lib/IR/Module.cpp @@ -531,12 +531,16 @@ addModuleFlag(ModFlagBehavior::Error, "Code Model", CL); } -void Module::setProfileSummary(Metadata *M) { - addModuleFlag(ModFlagBehavior::Error, "ProfileSummary", M); +void Module::setProfileSummary(Metadata *M, ProfileSummary::Kind Kind) { + if (Kind == ProfileSummary::PSK_CSInstr) + addModuleFlag(ModFlagBehavior::Error, "CSProfileSummary", M); + else + addModuleFlag(ModFlagBehavior::Error, "ProfileSummary", M); } -Metadata *Module::getProfileSummary() { - return getModuleFlag("ProfileSummary"); +Metadata *Module::getProfileSummary(bool IsCS) { + return (IsCS ? getModuleFlag("CSProfileSummary") + : getModuleFlag("ProfileSummary")); } void Module::setOwnedMemoryBuffer(std::unique_ptr MB) { Index: lib/IR/ProfileSummary.cpp =================================================================== --- lib/IR/ProfileSummary.cpp +++ lib/IR/ProfileSummary.cpp @@ -21,8 +21,6 @@ using namespace llvm; -const char *ProfileSummary::KindStr[2] = {"InstrProf", "SampleProfile"}; - // Return an MDTuple with two elements. The first element is a string Key and // the second is a uint64_t Value. static Metadata *getKeyValMD(LLVMContext &Context, const char *Key, @@ -68,6 +66,7 @@ // "SampleProfile"). The rest of the elements of the outer MDTuple are specific // to the kind of profile summary as returned by getFormatSpecificMD. Metadata *ProfileSummary::getMD(LLVMContext &Context) { + const char *KindStr[3] = {"InstrProf", "CSInstrProf", "SampleProfile"}; Metadata *Components[] = { getKeyValMD(Context, "ProfileFormat", KindStr[PSK]), getKeyValMD(Context, "TotalCount", getTotalCount()), @@ -153,6 +152,9 @@ else if (isKeyValuePair(dyn_cast_or_null(FormatMD), "ProfileFormat", "InstrProf")) SummaryKind = PSK_Instr; + else if (isKeyValuePair(dyn_cast_or_null(FormatMD), "ProfileFormat", + "CSInstrProf")) + SummaryKind = PSK_CSInstr; else return nullptr; Index: lib/LTO/LTOBackend.cpp =================================================================== --- lib/LTO/LTOBackend.cpp +++ lib/LTO/LTOBackend.cpp @@ -154,8 +154,15 @@ const ModuleSummaryIndex *ImportSummary) { Optional PGOOpt; if (!Conf.SampleProfile.empty()) - PGOOpt = PGOOptions("", "", Conf.SampleProfile, Conf.ProfileRemapping, - false, true); + PGOOpt = PGOOptions(Conf.SampleProfile, "", Conf.ProfileRemapping, + PGOOptions::SampleUse, PGOOptions::NoCSAction, true); + else if (Conf.RunCSIRInstr) { + PGOOpt = PGOOptions("", Conf.CSIRProfile, Conf.ProfileRemapping, + PGOOptions::IRUse, PGOOptions::CSIRInstr); + } else if (!Conf.CSIRProfile.empty()) { + PGOOpt = PGOOptions(Conf.CSIRProfile, "", Conf.ProfileRemapping, + PGOOptions::IRUse, PGOOptions::CSIRUse); + } PassBuilder PB(TM, PGOOpt); AAManager AA; @@ -273,6 +280,11 @@ PMB.SLPVectorize = true; PMB.OptLevel = Conf.OptLevel; PMB.PGOSampleUse = Conf.SampleProfile; + PMB.EnablePGOCSInstrGen = Conf.RunCSIRInstr; + if (!Conf.RunCSIRInstr && !Conf.CSIRProfile.empty()) { + PMB.EnablePGOCSInstrUse = true; + PMB.PGOInstrUse = Conf.CSIRProfile; + } if (IsThinLTO) PMB.populateThinLTOPassManager(passes); else Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -403,7 +403,7 @@ // For PGO use pipeline, try to optimize memory intrinsics such as memcpy // using the size value profile. Don't perform this when optimizing for size. - if (PGOOpt && !PGOOpt->ProfileUseFile.empty() && + if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse && !isOptimizingForSize(Level)) FPM.addPass(PGOMemOPSizeOpt()); @@ -446,8 +446,8 @@ // Do not enable unrolling in PreLinkThinLTO phase during sample PGO // because it changes IR to makes profile annotation in back compile // inaccurate. - if (Phase != ThinLTOPhase::PreLink || - !PGOOpt || PGOOpt->SampleProfileFile.empty()) + if (Phase != ThinLTOPhase::PreLink || !PGOOpt || + PGOOpt->Action != PGOOptions::SampleUse) LPM2.addPass(LoopFullUnrollPass(Level)); for (auto &C : LoopOptimizerEndEPCallbacks) @@ -507,7 +507,8 @@ invokePeepholeEPCallbacks(FPM, Level); if (EnableCHR && Level == O3 && PGOOpt && - (!PGOOpt->ProfileUseFile.empty() || !PGOOpt->SampleProfileFile.empty())) + (PGOOpt->Action == PGOOptions::IRUse || + PGOOpt->Action == PGOOptions::SampleUse)) FPM.addPass(ControlHeightReductionPass()); return FPM; @@ -515,15 +516,15 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging, PassBuilder::OptimizationLevel Level, - bool RunProfileGen, - std::string ProfileGenFile, - std::string ProfileUseFile, + bool RunProfileGen, bool IsCS, + std::string ProfileFile, std::string ProfileRemappingFile) { // Generally running simplification passes and the inliner with an high // threshold results in smaller executables, but there may be cases where // the size grows, so let's be conservative here and skip this simplification - // at -Os/Oz. - if (!isOptimizingForSize(Level)) { + // at -Os/Oz. We will not do this inline for context sensistive PGO (when + // IsCS is true). + if (!isOptimizingForSize(Level) && !IsCS) { InlineParams IP; // In the old pass manager, this is a cl::opt. Should still this be one? @@ -556,7 +557,7 @@ MPM.addPass(GlobalDCEPass()); if (RunProfileGen) { - MPM.addPass(PGOInstrumentationGen()); + MPM.addPass(PGOInstrumentationGen(IsCS)); FunctionPassManager FPM; FPM.addPass( @@ -565,14 +566,13 @@ // Add the profile lowering pass. InstrProfOptions Options; - if (!ProfileGenFile.empty()) - Options.InstrProfileOutput = ProfileGenFile; + if (!ProfileFile.empty()) + Options.InstrProfileOutput = ProfileFile; Options.DoCounterPromotion = true; - MPM.addPass(InstrProfiling(Options)); - } - - if (!ProfileUseFile.empty()) - MPM.addPass(PGOInstrumentationUse(ProfileUseFile, ProfileRemappingFile)); + Options.UseBFIInPromotion = IsCS; + MPM.addPass(InstrProfiling(Options, IsCS)); + } else if (!ProfileFile.empty()) + MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS)); } static InlineParams @@ -589,7 +589,7 @@ bool DebugLogging) { ModulePassManager MPM(DebugLogging); - bool HasSampleProfile = PGOOpt && !PGOOpt->SampleProfileFile.empty(); + bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse); // In ThinLTO mode, when flattened profile is used, all the available // profile information will be annotated in PreLink phase so there is @@ -642,7 +642,7 @@ if (LoadSampleProfile) { // Annotate sample profile right after early FPM to ensure freshness of // the debug info. - MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile, + MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, Phase == ThinLTOPhase::PreLink)); // Do not invoke ICP in the ThinLTOPrelink phase as it makes it hard @@ -691,12 +691,17 @@ // Add all the requested passes for instrumentation PGO, if requested. if (PGOOpt && Phase != ThinLTOPhase::PostLink && - (!PGOOpt->ProfileGenFile.empty() || !PGOOpt->ProfileUseFile.empty())) { - addPGOInstrPasses(MPM, DebugLogging, Level, PGOOpt->RunProfileGen, - PGOOpt->ProfileGenFile, PGOOpt->ProfileUseFile, + (PGOOpt->Action == PGOOptions::IRInstr || + PGOOpt->Action == PGOOptions::IRUse)) { + addPGOInstrPasses(MPM, DebugLogging, Level, + /* RunProfileGen */ PGOOpt->Action == PGOOptions::IRInstr, + /* IsCS */ false, PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile); MPM.addPass(PGOIndirectCallPromotion(false, false)); } + if (PGOOpt && Phase != ThinLTOPhase::PostLink && + PGOOpt->CSAction == PGOOptions::CSIRInstr) + MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile)); // Synthesize function entry counts for non-PGO compilation. if (EnableSyntheticCounts && !PGOOpt) @@ -735,8 +740,8 @@ // For PreLinkThinLTO pass, we disable hot-caller heuristic for sample PGO // because it makes profile annotation in the backend inaccurate. InlineParams IP = getInlineParamsFromOptLevel(Level); - if (Phase == ThinLTOPhase::PreLink && - PGOOpt && !PGOOpt->SampleProfileFile.empty()) + if (Phase == ThinLTOPhase::PreLink && PGOOpt && + PGOOpt->Action == PGOOptions::SampleUse) IP.HotCallSiteThreshold = 0; MainCGPipeline.addPass(InlinerPass(IP)); @@ -768,9 +773,8 @@ return MPM; } -ModulePassManager -PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, - bool DebugLogging) { +ModulePassManager PassBuilder::buildModuleOptimizationPipeline( + OptimizationLevel Level, bool DebugLogging, bool LTOPreLink) { ModulePassManager MPM(DebugLogging); // Optimize globals now that the module is fully simplified. @@ -797,6 +801,21 @@ // FIXME: Is this really an optimization rather than a canonicalization? MPM.addPass(ReversePostOrderFunctionAttrsPass()); + // Do a post inline PGO instrumentation and use pass. This is a context + // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as + // cross-module inline has not been done yet. The context sensitive + // instrumentation is after all the inlines are done. + if (!LTOPreLink && PGOOpt) { + if (PGOOpt->CSAction == PGOOptions::CSIRInstr) + addPGOInstrPasses(MPM, DebugLogging, Level, /* RunProfileGen */ true, + /* IsCS */ true, PGOOpt->CSProfileGenFile, + PGOOpt->ProfileRemappingFile); + else if (PGOOpt->CSAction == PGOOptions::CSIRUse) + addPGOInstrPasses(MPM, DebugLogging, Level, /* RunProfileGen */ false, + /* IsCS */ true, PGOOpt->ProfileFile, + PGOOpt->ProfileRemappingFile); + } + // Re-require GloblasAA here prior to function passes. This is particularly // useful as the above will have inlined, DCE'ed, and function-attr // propagated everything. We should at this point have a reasonably minimal @@ -922,7 +941,7 @@ ModulePassManager PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, - bool DebugLogging) { + bool DebugLogging, bool LTOPreLink) { assert(Level != O0 && "Must request optimizations for the default pipeline!"); ModulePassManager MPM(DebugLogging); @@ -942,7 +961,7 @@ DebugLogging)); // Now add the optimization pipeline. - MPM.addPass(buildModuleOptimizationPipeline(Level, DebugLogging)); + MPM.addPass(buildModuleOptimizationPipeline(Level, DebugLogging, LTOPreLink)); return MPM; } @@ -1026,7 +1045,8 @@ bool DebugLogging) { assert(Level != O0 && "Must request optimizations for the default pipeline!"); // FIXME: We should use a customized pre-link pipeline! - return buildPerModuleDefaultPipeline(Level, DebugLogging); + return buildPerModuleDefaultPipeline(Level, DebugLogging, + /* LTOPreLink */ true); } ModulePassManager @@ -1035,9 +1055,9 @@ assert(Level != O0 && "Must request optimizations for the default pipeline!"); ModulePassManager MPM(DebugLogging); - if (PGOOpt && !PGOOpt->SampleProfileFile.empty()) { + if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) { // Load sample profile before running the LTO optimization pipeline. - MPM.addPass(SampleProfileLoaderPass(PGOOpt->SampleProfileFile, + MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile, false /* ThinLTOPhase::PreLink */)); } @@ -1063,7 +1083,7 @@ // This two-step promotion is to save the compile time. For LTO, it should // produce the same result as if we only do promotion here. MPM.addPass(PGOIndirectCallPromotion( - true /* InLTO */, PGOOpt && !PGOOpt->SampleProfileFile.empty())); + true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)); // Propagate constants at call sites into the functions they call. This // opens opportunities for globalopt (and inlining) by substituting function // pointers passed as arguments to direct uses of functions. @@ -1145,6 +1165,19 @@ FPM.addPass(JumpThreadingPass()); + // Do a post inline PGO instrumentation and use pass. This is a context + // sensitive PGO pass. + if (PGOOpt) { + if (PGOOpt->CSAction == PGOOptions::CSIRInstr) + addPGOInstrPasses(MPM, DebugLogging, Level, /* RunProfileGen */ true, + /* IsCS */ true, PGOOpt->CSProfileGenFile, + PGOOpt->ProfileRemappingFile); + else if (PGOOpt->CSAction == PGOOptions::CSIRUse) + addPGOInstrPasses(MPM, DebugLogging, Level, /* RunProfileGen */ false, + /* IsCS */ true, PGOOpt->ProfileFile, + PGOOpt->ProfileRemappingFile); + } + // Break up allocas FPM.addPass(SROA()); Index: lib/ProfileData/InstrProf.cpp =================================================================== --- lib/ProfileData/InstrProf.cpp +++ lib/ProfileData/InstrProf.cpp @@ -1011,6 +1011,25 @@ assert(RangeLast >= RangeStart); } +// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime +// aware this is an ir_level profile so it can set the version flag. +void createIRLevelProfileFlagVar(Module &M, bool IsCS) { + const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)); + Type *IntTy64 = Type::getInt64Ty(M.getContext()); + uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF); + if (IsCS) + ProfileVersion |= VARIANT_MASK_CSIR_PROF; + auto IRLevelVersionVariable = new GlobalVariable( + M, IntTy64, true, GlobalValue::WeakAnyLinkage, + Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName); + IRLevelVersionVariable->setVisibility(GlobalValue::DefaultVisibility); + Triple TT(M.getTargetTriple()); + if (TT.supportsCOMDAT()) { + IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage); + IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName)); + } +} + // Create the variable for the profile file name. void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput) { if (InstrProfileOutput.empty()) Index: lib/ProfileData/InstrProfReader.cpp =================================================================== --- lib/ProfileData/InstrProfReader.cpp +++ lib/ProfileData/InstrProfReader.cpp @@ -162,7 +162,10 @@ IsIRInstr = true; else if (Str.equals_lower("fe")) IsIRInstr = false; - else + else if (Str.equals_lower("csir")) { + IsIRInstr = true; + HasCSIRLevelProfile = true; + } else return error(instrprof_error::bad_header); ++Line; @@ -733,7 +736,7 @@ const unsigned char * IndexedInstrProfReader::readSummary(IndexedInstrProf::ProfVersion Version, - const unsigned char *Cur) { + const unsigned char *Cur, bool UseCS) { using namespace IndexedInstrProf; using namespace support; @@ -760,10 +763,13 @@ DetailedSummary.emplace_back((uint32_t)Ent.Cutoff, Ent.MinBlockCount, Ent.NumBlocks); } + std::unique_ptr &Summary = + UseCS ? this->CS_Summary : this->Summary; + // initialize InstrProfSummary using the SummaryData from disk. - this->Summary = llvm::make_unique( - ProfileSummary::PSK_Instr, DetailedSummary, - SummaryData->get(Summary::TotalBlockCount), + Summary = llvm::make_unique( + UseCS ? ProfileSummary::PSK_CSInstr : ProfileSummary::PSK_Instr, + DetailedSummary, SummaryData->get(Summary::TotalBlockCount), SummaryData->get(Summary::MaxBlockCount), SummaryData->get(Summary::MaxInternalBlockCount), SummaryData->get(Summary::MaxFunctionCount), @@ -805,7 +811,11 @@ IndexedInstrProf::ProfVersion::CurrentVersion) return error(instrprof_error::unsupported_version); - Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur); + Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur, + /* UseCS */ false); + if (Header->Version & VARIANT_MASK_CSIR_PROF) + Cur = readSummary((IndexedInstrProf::ProfVersion)FormatVersion, Cur, + /* UseCS */ true); // Read the hash type and start offset. IndexedInstrProf::HashT HashType = static_cast( Index: lib/ProfileData/InstrProfWriter.cpp =================================================================== --- lib/ProfileData/InstrProfWriter.cpp +++ lib/ProfileData/InstrProfWriter.cpp @@ -101,6 +101,7 @@ support::endianness ValueProfDataEndianness = support::little; InstrProfSummaryBuilder *SummaryBuilder; + InstrProfSummaryBuilder *CSSummaryBuilder; InstrProfRecordWriterTrait() = default; @@ -142,7 +143,10 @@ endian::Writer LE(Out, little); for (const auto &ProfileData : *V) { const InstrProfRecord &ProfRecord = ProfileData.second; - SummaryBuilder->addRecord(ProfRecord); + if (NamedInstrProfRecord::hasCSFlagInHash(ProfileData.first)) + CSSummaryBuilder->addRecord(ProfRecord); + else + SummaryBuilder->addRecord(ProfRecord); LE.write(ProfileData.first); // Function hash LE.write(ProfRecord.Counts.size()); @@ -253,6 +257,8 @@ InstrProfSummaryBuilder ISB(ProfileSummaryBuilder::DefaultCutoffs); InfoObj->SummaryBuilder = &ISB; + InstrProfSummaryBuilder CSISB(ProfileSummaryBuilder::DefaultCutoffs); + InfoObj->CSSummaryBuilder = &CSISB; // Populate the hash table generator. for (const auto &I : FunctionData) @@ -264,6 +270,10 @@ Header.Version = IndexedInstrProf::ProfVersion::CurrentVersion; if (ProfileKind == PF_IRLevel) Header.Version |= VARIANT_MASK_IR_PROF; + if (ProfileKind == PF_IRLevelWithCS) { + Header.Version |= VARIANT_MASK_IR_PROF; + Header.Version |= VARIANT_MASK_CSIR_PROF; + } Header.Unused = 0; Header.HashType = static_cast(IndexedInstrProf::HashType); Header.HashOffset = 0; @@ -287,6 +297,14 @@ uint64_t SummaryOffset = OS.tell(); for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++) OS.write(0); + uint64_t CSSummaryOffset = 0; + uint64_t CSSummarySize = 0; + if (ProfileKind == PF_IRLevelWithCS) { + CSSummaryOffset = OS.tell(); + CSSummarySize = SummarySize / sizeof(uint64_t); + for (unsigned I = 0; I < CSSummarySize; I++) + OS.write(0); + } // Write the hash table. uint64_t HashTableStart = Generator.Emit(OS.OS, *InfoObj); @@ -300,13 +318,25 @@ setSummary(TheSummary.get(), *PS); InfoObj->SummaryBuilder = nullptr; + // For Context Sensitive summary. + std::unique_ptr TheCSSummary = nullptr; + if (ProfileKind == PF_IRLevelWithCS) { + TheCSSummary = IndexedInstrProf::allocSummary(SummarySize); + std::unique_ptr CSPS = CSISB.getSummary(); + setSummary(TheCSSummary.get(), *CSPS); + } + InfoObj->CSSummaryBuilder = nullptr; + // Now do the final patch: PatchItem PatchItems[] = { // Patch the Header.HashOffset field. {HashTableStartFieldOffset, &HashTableStart, 1}, // Patch the summary data. {SummaryOffset, reinterpret_cast(TheSummary.get()), - (int)(SummarySize / sizeof(uint64_t))}}; + (int)(SummarySize / sizeof(uint64_t))}, + {CSSummaryOffset, reinterpret_cast(TheCSSummary.get()), + (int)CSSummarySize}}; + OS.patch(PatchItems, sizeof(PatchItems) / sizeof(*PatchItems)); } @@ -375,6 +405,8 @@ Error InstrProfWriter::writeText(raw_fd_ostream &OS) { if (ProfileKind == PF_IRLevel) OS << "# IR level Instrumentation Flag\n:ir\n"; + else if (ProfileKind == PF_IRLevelWithCS) + OS << "# CSIR level Instrumentation Flag\n:csir\n"; InstrProfSymtab Symtab; for (const auto &I : FunctionData) if (shouldEncodeData(I.getValue())) Index: lib/Transforms/IPO/HotColdSplitting.cpp =================================================================== --- lib/Transforms/IPO/HotColdSplitting.cpp +++ lib/Transforms/IPO/HotColdSplitting.cpp @@ -657,7 +657,7 @@ bool HotColdSplitting::run(Module &M) { bool Changed = false; - bool HasProfileSummary = M.getProfileSummary(); + bool HasProfileSummary = (M.getProfileSummary(/* IsCS */ false) != nullptr); for (auto It = M.begin(), End = M.end(); It != End; ++It) { Function &F = *It; Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -170,6 +170,8 @@ MergeFunctions = false; PrepareForLTO = false; EnablePGOInstrGen = false; + EnablePGOCSInstrGen = false; + EnablePGOCSInstrUse = false; PGOInstrGen = ""; PGOInstrUse = ""; PGOSampleUse = ""; @@ -267,13 +269,19 @@ } // Do PGO instrumentation generation or use pass as the option specified. -void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM) { - if (!EnablePGOInstrGen && PGOInstrUse.empty() && PGOSampleUse.empty()) +void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM, + bool IsCS = false) { + if (IsCS) { + if (!EnablePGOCSInstrGen && !EnablePGOCSInstrUse) + return; + } else if (!EnablePGOInstrGen && PGOInstrUse.empty() && PGOSampleUse.empty()) return; + // Perform the preinline and cleanup passes for O1 and above. // And avoid doing them if optimizing for size. + // We will not do this inline for context sensitive PGO (when IsCS is true). if (OptLevel > 0 && SizeLevel == 0 && !DisablePreInliner && - PGOSampleUse.empty()) { + PGOSampleUse.empty() && !IsCS) { // Create preinline pass. We construct an InlineParams object and specify // the threshold here to avoid the command line options of the regular // inliner to influence pre-inlining. The only fields of InlineParams we @@ -291,22 +299,23 @@ MPM.add(createInstructionCombiningPass()); // Combine silly seq's addExtensionsToPM(EP_Peephole, MPM); } - if (EnablePGOInstrGen) { - MPM.add(createPGOInstrumentationGenLegacyPass()); + if ((EnablePGOInstrGen && !IsCS) || (EnablePGOCSInstrGen && IsCS)) { + MPM.add(createPGOInstrumentationGenLegacyPass(IsCS)); // Add the profile lowering pass. InstrProfOptions Options; if (!PGOInstrGen.empty()) Options.InstrProfileOutput = PGOInstrGen; Options.DoCounterPromotion = true; + Options.UseBFIInPromotion = IsCS; MPM.add(createLoopRotatePass()); - MPM.add(createInstrProfilingLegacyPass(Options)); + MPM.add(createInstrProfilingLegacyPass(Options, IsCS)); } if (!PGOInstrUse.empty()) - MPM.add(createPGOInstrumentationUseLegacyPass(PGOInstrUse)); + MPM.add(createPGOInstrumentationUseLegacyPass(PGOInstrUse, IsCS)); // Indirect call promotion that promotes intra-module targets only. // For ThinLTO this is done earlier due to interactions with globalopt // for imported functions. We don't run this at -O0. - if (OptLevel > 0) + if (OptLevel > 0 && !IsCS) MPM.add( createPGOIndirectCallPromotionLegacyPass(false, !PGOSampleUse.empty())); } @@ -414,7 +423,7 @@ addExtensionsToPM(EP_Peephole, MPM); if (EnableCHR && OptLevel >= 3 && - (!PGOInstrUse.empty() || !PGOSampleUse.empty())) + (!PGOInstrUse.empty() || !PGOSampleUse.empty() || EnablePGOCSInstrGen)) MPM.add(createControlHeightReductionLegacyPass()); } @@ -529,6 +538,11 @@ if (DefaultOrPreLinkPipeline && !PrepareForThinLTOUsingPGOSampleProfile) addPGOInstrPasses(MPM); + // Create profile COMDAT variables. Lld linker wants to see all variables + // before the LTO/ThinLTO link since it needs to resolve symbols/comdats. + if (!PerformThinLTO && EnablePGOCSInstrGen) + MPM.add(createPGOInstrumentationGenCreateVarLegacyPass(PGOInstrGen)); + // Split out cold code before inlining. See comment in the new PM // (\ref buildModuleSimplificationPipeline). if (EnableHotColdSplit && DefaultOrPreLinkPipeline) @@ -575,6 +589,14 @@ // and saves running remaining passes on the eliminated functions. MPM.add(createEliminateAvailableExternallyPass()); + // CSFDO instrumentation and use pass. Don't invoke this for Prepare pass + // for LTO and ThinLTO -- The actual pass will be called after all inlines + // are performed. + // Need to do this after COMDAT variables have been eliminated, + // (i.e. after EliminateAvailableExternallyPass). + if (!(PrepareForLTO || PrepareForThinLTO)) + addPGOInstrPasses(MPM, /* IsCS */ true); + MPM.add(createReversePostOrderFunctionAttrsPass()); // The inliner performs some kind of dead code elimination as it goes, @@ -847,6 +869,9 @@ PM.add(createPruneEHPass()); // Remove dead EH info. + // CSFDO instrumentation and use pass. + addPGOInstrPasses(PM, /* IsCS */ true); + // Optimize globals again if we ran the inliner. if (RunInliner) PM.add(createGlobalOptimizerPass()); Index: lib/Transforms/IPO/SampleProfile.cpp =================================================================== --- lib/Transforms/IPO/SampleProfile.cpp +++ lib/Transforms/IPO/SampleProfile.cpp @@ -1595,8 +1595,9 @@ return false; PSI = _PSI; - if (M.getProfileSummary() == nullptr) - M.setProfileSummary(Reader->getSummary().getMD(M.getContext())); + if (M.getProfileSummary(/* IsCS */ false) == nullptr) + M.setProfileSummary(Reader->getSummary().getMD(M.getContext()), + ProfileSummary::PSK_Sample); // Compute the total number of samples collected in this profile. for (const auto &I : Reader->getProfiles()) Index: lib/Transforms/Instrumentation/InstrProfiling.cpp =================================================================== --- lib/Transforms/Instrumentation/InstrProfiling.cpp +++ lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -18,6 +18,8 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Attributes.h" @@ -147,8 +149,8 @@ static char ID; InstrProfilingLegacyPass() : ModulePass(ID) {} - InstrProfilingLegacyPass(const InstrProfOptions &Options) - : ModulePass(ID), InstrProf(Options) {} + InstrProfilingLegacyPass(const InstrProfOptions &Options, bool IsCS) + : ModulePass(ID), InstrProf(Options, IsCS) {} StringRef getPassName() const override { return "Frontend instrumentation-based coverage lowering"; @@ -232,9 +234,9 @@ public: PGOCounterPromoter( DenseMap> &LoopToCands, - Loop &CurLoop, LoopInfo &LI) + Loop &CurLoop, LoopInfo &LI, BlockFrequencyInfo *BFI) : LoopToCandidates(LoopToCands), ExitBlocks(), InsertPts(), L(CurLoop), - LI(LI) { + LI(LI), BFI(BFI) { SmallVector LoopExitBlocks; SmallPtrSet BlockSet; @@ -263,6 +265,20 @@ SSAUpdater SSA(&NewPHIs); Value *InitVal = ConstantInt::get(Cand.first->getType(), 0); + // If BFI is set, we will use it to guide the promotions. + if (BFI) { + auto *BB = Cand.first->getParent(); + auto InstrCount = BFI->getBlockProfileCount(BB); + if (!InstrCount) + continue; + auto PreheaderCount = BFI->getBlockProfileCount(L.getLoopPreheader()); + // If the average loop trip count is not greater than 1.5, we skip + // promotion. + if (PreheaderCount && + (PreheaderCount.getValue() * 3) >= (InstrCount.getValue() * 2)) + continue; + } + PGOCounterPromoterHelper Promoter(Cand.first, Cand.second, SSA, InitVal, L.getLoopPreheader(), ExitBlocks, InsertPts, LoopToCandidates, LI); @@ -312,6 +328,11 @@ SmallVector ExitingBlocks; LP->getExitingBlocks(ExitingBlocks); + + // If BFI is set, we do more aggressive promotions based on BFI. + if (BFI) + return (unsigned)-1; + // Not considierered speculative. if (ExitingBlocks.size() == 1) return MaxNumOfPromotionsPerLoop; @@ -343,6 +364,7 @@ SmallVector InsertPts; Loop &L; LoopInfo &LI; + BlockFrequencyInfo *BFI; }; } // end anonymous namespace @@ -365,8 +387,9 @@ "Frontend instrumentation-based coverage lowering.", false, false) ModulePass * -llvm::createInstrProfilingLegacyPass(const InstrProfOptions &Options) { - return new InstrProfilingLegacyPass(Options); +llvm::createInstrProfilingLegacyPass(const InstrProfOptions &Options, + bool IsCS) { + return new InstrProfilingLegacyPass(Options, IsCS); } static InstrProfIncrementInst *castToIncrementInst(Instruction *Instr) { @@ -415,6 +438,13 @@ LoopInfo LI(DT); DenseMap> LoopPromotionCandidates; + std::unique_ptr BFI; + if (Options.UseBFIInPromotion) { + std::unique_ptr BPI; + BPI.reset(new BranchProbabilityInfo(*F, LI, TLI)); + BFI.reset(new BlockFrequencyInfo(*F, *BPI, LI)); + } + for (const auto &LoadStore : PromotionCandidates) { auto *CounterLoad = LoadStore.first; auto *CounterStore = LoadStore.second; @@ -430,7 +460,7 @@ // Do a post-order traversal of the loops so that counter updates can be // iteratively hoisted outside the loop nest. for (auto *Loop : llvm::reverse(Loops)) { - PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI); + PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI, BFI.get()); Promoter.run(&TotalCountersPromoted); } } @@ -697,7 +727,6 @@ // Don't do this for Darwin. compiler-rt uses linker magic. if (TT.isOSDarwin()) return false; - // Use linker script magic to get data/cnts/name start/end. if (TT.isOSLinux() || TT.isOSFreeBSD() || TT.isOSNetBSD() || TT.isOSFuchsia() || TT.isPS4CPU()) @@ -965,8 +994,12 @@ } void InstrProfiling::emitInitialization() { - // Create variable for profile name. - createProfileFileNameVar(*M, Options.InstrProfileOutput); + // Create ProfileFileName variable. Don't don't this for the + // context-sensitive instrumentation lowering: This lowering is after + // LTO/ThinLTO linking. Pass PGOInstrumentationGenCreateVar should + // have already create the variable before LTO/ThinLTO linking. + if (!IsCS) + createProfileFileNameVar(*M, Options.InstrProfileOutput); Function *RegisterF = M->getFunction(getInstrProfRegFuncsName()); if (!RegisterF) return; Index: lib/Transforms/Instrumentation/PGOInstrumentation.cpp =================================================================== --- lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -65,6 +65,7 @@ #include "llvm/Analysis/IndirectCallVisitor.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -132,6 +133,19 @@ STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile."); STATISTIC(NumOfPGOMissing, "Number of functions without profile."); STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations."); +STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO."); +STATISTIC(NumOfCSPGOSelectInsts, + "Number of select instruction instrumented in CSPGO."); +STATISTIC(NumOfCSPGOMemIntrinsics, + "Number of mem intrinsics instrumented in CSPGO."); +STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO."); +STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO."); +STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO."); +STATISTIC(NumOfCSPGOFunc, + "Number of functions having valid profile counts in CSPGO."); +STATISTIC(NumOfCSPGOMismatch, + "Number of functions having mismatch profile in CSPGO."); +STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO."); // Command line option to specify the file to read profile from. This is // mainly used for testing. @@ -383,7 +397,8 @@ public: static char ID; - PGOInstrumentationGenLegacyPass() : ModulePass(ID) { + PGOInstrumentationGenLegacyPass(bool IsCS = false) + : ModulePass(ID), IsCS(IsCS) { initializePGOInstrumentationGenLegacyPassPass( *PassRegistry::getPassRegistry()); } @@ -391,6 +406,8 @@ StringRef getPassName() const override { return "PGOInstrumentationGenPass"; } private: + // Is this is context-sensitive instrumentation. + bool IsCS; bool runOnModule(Module &M) override; void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -403,8 +420,8 @@ static char ID; // Provide the profile filename as the parameter. - PGOInstrumentationUseLegacyPass(std::string Filename = "") - : ModulePass(ID), ProfileFileName(std::move(Filename)) { + PGOInstrumentationUseLegacyPass(std::string Filename = "", bool IsCS = false) + : ModulePass(ID), ProfileFileName(std::move(Filename)), IsCS(IsCS) { if (!PGOTestProfileFile.empty()) ProfileFileName = PGOTestProfileFile; initializePGOInstrumentationUseLegacyPassPass( @@ -415,14 +432,38 @@ private: std::string ProfileFileName; + // Is this is context-sensitive instrumentation use. + bool IsCS; bool runOnModule(Module &M) override; void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); AU.addRequired(); } }; +class PGOInstrumentationGenCreateVarLegacyPass : public ModulePass { +public: + static char ID; + StringRef getPassName() const override { + return "PGOInstrumentationGenCreateVarPass"; + } + PGOInstrumentationGenCreateVarLegacyPass(std::string CSInstrName = "") + : ModulePass(ID), InstrProfileOutput(CSInstrName) { + initializePGOInstrumentationGenCreateVarLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + +private: + bool runOnModule(Module &M) override { + createProfileFileNameVar(M, InstrProfileOutput); + createIRLevelProfileFlagVar(M, true); + return false; + } + std::string InstrProfileOutput; +}; + } // end anonymous namespace char PGOInstrumentationGenLegacyPass::ID = 0; @@ -434,8 +475,8 @@ INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen", "PGO instrumentation.", false, false) -ModulePass *llvm::createPGOInstrumentationGenLegacyPass() { - return new PGOInstrumentationGenLegacyPass(); +ModulePass *llvm::createPGOInstrumentationGenLegacyPass(bool IsCS) { + return new PGOInstrumentationGenLegacyPass(IsCS); } char PGOInstrumentationUseLegacyPass::ID = 0; @@ -444,11 +485,25 @@ "Read PGO instrumentation profile.", false, false) INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_END(PGOInstrumentationUseLegacyPass, "pgo-instr-use", "Read PGO instrumentation profile.", false, false) -ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename) { - return new PGOInstrumentationUseLegacyPass(Filename.str()); +ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename, + bool IsCS) { + return new PGOInstrumentationUseLegacyPass(Filename.str(), IsCS); +} + +char PGOInstrumentationGenCreateVarLegacyPass::ID = 0; + +INITIALIZE_PASS(PGOInstrumentationGenCreateVarLegacyPass, + "pgo-instr-gen-create-var", + "Create PGO instrumentation version variable for CSPGO.", false, + false) + +ModulePass * +llvm::createPGOInstrumentationGenCreateVarLegacyPass(StringRef CSInstrName) { + return new PGOInstrumentationGenCreateVarLegacyPass(CSInstrName); } namespace { @@ -496,6 +551,9 @@ private: Function &F; + // Is this is context-sensitive instrumentation. + bool IsCS; + // A map that stores the Comdat group in function F. std::unordered_multimap &ComdatMembers; @@ -535,15 +593,23 @@ Function &Func, std::unordered_multimap &ComdatMembers, bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr, - BlockFrequencyInfo *BFI = nullptr) - : F(Func), ComdatMembers(ComdatMembers), ValueSites(IPVK_Last + 1), - SIVisitor(Func), MIVisitor(Func), MST(F, BPI, BFI) { + BlockFrequencyInfo *BFI = nullptr, bool IsCS = false) + : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), + ValueSites(IPVK_Last + 1), SIVisitor(Func), MIVisitor(Func), + MST(F, BPI, BFI) { // This should be done before CFG hash computation. SIVisitor.countSelects(Func); MIVisitor.countMemIntrinsics(Func); - NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); - NumOfPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics(); - ValueSites[IPVK_IndirectCallTarget] = findIndirectCalls(Func); + if (!IsCS) { + NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); + NumOfPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics(); + NumOfPGOBB += MST.BBInfos.size(); + ValueSites[IPVK_IndirectCallTarget] = findIndirectCalls(Func); + } else { + NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); + NumOfCSPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics(); + NumOfCSPGOBB += MST.BBInfos.size(); + } ValueSites[IPVK_MemOPSize] = MIVisitor.findMemIntrinsics(Func); FuncName = getPGOFuncName(F); @@ -552,13 +618,12 @@ renameComdatFunction(); LLVM_DEBUG(dumpInfo("after CFGMST")); - NumOfPGOBB += MST.BBInfos.size(); for (auto &E : MST.AllEdges) { if (E->Removed) continue; - NumOfPGOEdge++; + IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++; if (!E->InMST) - NumOfPGOInstrument++; + IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++; } if (CreateGlobalVar) @@ -597,9 +662,17 @@ } } JC.update(Indexes); + + // Hash format for context sensitive profile. Reserve 4 bits for other + // information. FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 | (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 | + //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 | (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC(); + // Reserve bit 60-63 for other information purpose. + FunctionHash &= 0x0FFFFFFFFFFFFFFF; + if (IsCS) + NamedInstrProfRecord::setCSFlagInHash(FunctionHash); LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n" << " CRC = " << JC.getCRC() << ", Selects = " << SIVisitor.getNumOfSelectInsts() @@ -705,7 +778,7 @@ // For a critical edge, we have to split. Instrument the newly // created BB. - NumOfPGOSplit++; + IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++; LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index << " --> " << getBBInfo(DestBB).Index << "\n"); unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); @@ -720,12 +793,14 @@ // Critical edges will be split. static void instrumentOneFunc( Function &F, Module *M, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI, - std::unordered_multimap &ComdatMembers) { + std::unordered_multimap &ComdatMembers, + bool IsCS) { // Split indirectbr critical edges here before computing the MST rather than // later in getInstrBB() to avoid invalidating it. SplitIndirectBrCriticalEdges(F, BPI, BFI); + FuncPGOInstrumentation FuncInfo(F, ComdatMembers, true, BPI, - BFI); + BFI, IsCS); unsigned NumCounters = FuncInfo.getNumCounters(); uint32_t I = 0; @@ -852,10 +927,10 @@ PGOUseFunc(Function &Func, Module *Modu, std::unordered_multimap &ComdatMembers, BranchProbabilityInfo *BPI = nullptr, - BlockFrequencyInfo *BFIin = nullptr) + BlockFrequencyInfo *BFIin = nullptr, bool IsCS = false) : F(Func), M(Modu), BFI(BFIin), - FuncInfo(Func, ComdatMembers, false, BPI, BFIin), - FreqAttr(FFA_Normal) {} + FuncInfo(Func, ComdatMembers, false, BPI, BFIin, IsCS), + FreqAttr(FFA_Normal), IsCS(IsCS) {} // Read counts for the instrumented BB from profile. bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros); @@ -928,6 +1003,9 @@ // Function hotness info derived from profile. FuncFreqAttr FreqAttr; + // Is to use the context sensitive profile. + bool IsCS; + // Find the Instrumented BB and set the value. void setInstrumentedCounts(const std::vector &CountFromProfile); @@ -1021,23 +1099,31 @@ handleAllErrors(std::move(E), [&](const InstrProfError &IPE) { auto Err = IPE.get(); bool SkipWarning = false; + LLVM_DEBUG(dbgs() << "Error in reading profile for Func " + << FuncInfo.FuncName << ": "); if (Err == instrprof_error::unknown_function) { - NumOfPGOMissing++; + IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++; SkipWarning = !PGOWarnMissing; + LLVM_DEBUG(dbgs() << "unknown function"); } else if (Err == instrprof_error::hash_mismatch || Err == instrprof_error::malformed) { - NumOfPGOMismatch++; + IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++; SkipWarning = NoPGOWarnMismatch || (NoPGOWarnMismatchComdat && (F.hasComdat() || F.getLinkage() == GlobalValue::AvailableExternallyLinkage)); + LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")"); } + LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n"); if (SkipWarning) return; - std::string Msg = IPE.message() + std::string(" ") + F.getName().str(); + std::string Msg = IPE.message() + std::string(" ") + F.getName().str() + + std::string(" Hash = ") + + std::to_string(FuncInfo.FunctionHash); + Ctx.diagnose( DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); }); @@ -1046,7 +1132,7 @@ ProfileRecord = std::move(Result.get()); std::vector &CountFromProfile = ProfileRecord.Counts; - NumOfPGOFunc++; + IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++; LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n"); uint64_t ValueSum = 0; for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) { @@ -1061,7 +1147,7 @@ getBBInfo(nullptr).UnknownCountInEdge = 2; setInstrumentedCounts(CountFromProfile); - ProgramMaxCount = PGOReader->getMaximumFunctionCount(); + ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS); return true; } @@ -1166,7 +1252,8 @@ // Assign the scaled count values to the BB with multiple out edges. void PGOUseFunc::setBranchWeights() { // Generate MD_prof metadata for every branch instruction. - LLVM_DEBUG(dbgs() << "\nSetting branch weights.\n"); + LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName() + << " IsCS=" << IsCS << "\n"); for (auto &BB : F) { Instruction *TI = BB.getTerminator(); if (TI->getNumSuccessors() < 2) @@ -1174,6 +1261,7 @@ if (!(isa(TI) || isa(TI) || isa(TI))) continue; + if (getBBInfo(&BB).CountValue == 0) continue; @@ -1351,24 +1439,6 @@ } } -// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime -// aware this is an ir_level profile so it can set the version flag. -static void createIRLevelProfileFlagVariable(Module &M) { - Type *IntTy64 = Type::getInt64Ty(M.getContext()); - uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF); - auto IRLevelVersionVariable = new GlobalVariable( - M, IntTy64, true, GlobalVariable::ExternalLinkage, - Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), - INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)); - IRLevelVersionVariable->setVisibility(GlobalValue::DefaultVisibility); - Triple TT(M.getTargetTriple()); - if (!TT.supportsCOMDAT()) - IRLevelVersionVariable->setLinkage(GlobalValue::WeakAnyLinkage); - else - IRLevelVersionVariable->setComdat(M.getOrInsertComdat( - StringRef(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR)))); -} - // Collect the set of members for each Comdat in module M and store // in ComdatMembers. static void collectComdatMembers( @@ -1389,8 +1459,11 @@ static bool InstrumentAllFunctions( Module &M, function_ref LookupBPI, - function_ref LookupBFI) { - createIRLevelProfileFlagVariable(M); + function_ref LookupBFI, bool IsCS) { + // For the context-sensitve instrumentation, we should have a separated pass + // (before LTO/ThinLTO linking) to create these variables. + if (!IsCS) + createIRLevelProfileFlagVar(M, /* IsCS */ false); std::unordered_multimap ComdatMembers; collectComdatMembers(M, ComdatMembers); @@ -1399,7 +1472,7 @@ continue; auto *BPI = LookupBPI(F); auto *BFI = LookupBFI(F); - instrumentOneFunc(F, &M, BPI, BFI, ComdatMembers); + instrumentOneFunc(F, &M, BPI, BFI, ComdatMembers, IsCS); } return true; } @@ -1414,7 +1487,7 @@ auto LookupBFI = [this](Function &F) { return &this->getAnalysis(F).getBFI(); }; - return InstrumentAllFunctions(M, LookupBPI, LookupBFI); + return InstrumentAllFunctions(M, LookupBPI, LookupBFI, IsCS); } PreservedAnalyses PGOInstrumentationGen::run(Module &M, @@ -1428,7 +1501,7 @@ return &FAM.getResult(F); }; - if (!InstrumentAllFunctions(M, LookupBPI, LookupBFI)) + if (!InstrumentAllFunctions(M, LookupBPI, LookupBFI, IsCS)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); @@ -1437,7 +1510,7 @@ static bool annotateAllFunctions( Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName, function_ref LookupBPI, - function_ref LookupBFI) { + function_ref LookupBFI, bool IsCS) { LLVM_DEBUG(dbgs() << "Read in profile counters: "); auto &Ctx = M.getContext(); // Read the counter array from file. @@ -1458,6 +1531,9 @@ StringRef("Cannot get PGOReader"))); return false; } + if (!PGOReader->hasCSIRLevelProfile() && IsCS) + return false; + // TODO: might need to change the warning once the clang option is finalized. if (!PGOReader->isIRLevelProfile()) { Ctx.diagnose(DiagnosticInfoPGOProfile( @@ -1477,7 +1553,7 @@ // Split indirectbr critical edges here before computing the MST rather than // later in getInstrBB() to avoid invalidating it. SplitIndirectBrCriticalEdges(F, BPI, BFI); - PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI); + PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI, IsCS); bool AllZeros = false; if (!Func.readCounters(PGOReader.get(), AllZeros)) continue; @@ -1525,7 +1601,10 @@ } } } - M.setProfileSummary(PGOReader->getSummary().getMD(M.getContext())); + M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()), + IsCS ? ProfileSummary::PSK_CSInstr + : ProfileSummary::PSK_Instr); + // Set function hotness attribute from the profile. // We have to apply these attributes at the end because their presence // can affect the BranchProbabilityInfo of any callers, resulting in an @@ -1544,9 +1623,10 @@ } PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename, - std::string RemappingFilename) + std::string RemappingFilename, + bool IsCS) : ProfileFileName(std::move(Filename)), - ProfileRemappingFileName(std::move(RemappingFilename)) { + ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS) { if (!PGOTestProfileFile.empty()) ProfileFileName = PGOTestProfileFile; if (!PGOTestProfileRemappingFile.empty()) @@ -1566,7 +1646,7 @@ }; if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, - LookupBPI, LookupBFI)) + LookupBPI, LookupBFI, IsCS)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); @@ -1583,7 +1663,8 @@ return &this->getAnalysis(F).getBFI(); }; - return annotateAllFunctions(M, ProfileFileName, "", LookupBPI, LookupBFI); + return annotateAllFunctions(M, ProfileFileName, "", LookupBPI, LookupBFI, + IsCS); } static std::string getSimpleNodeName(const BasicBlock *Node) { Index: test/Other/Inputs/cspgo-cs.proftext =================================================================== --- test/Other/Inputs/cspgo-cs.proftext +++ test/Other/Inputs/cspgo-cs.proftext @@ -0,0 +1 @@ +:csir Index: test/Other/Inputs/cspgo-noncs.proftext =================================================================== --- test/Other/Inputs/cspgo-noncs.proftext +++ test/Other/Inputs/cspgo-noncs.proftext @@ -0,0 +1 @@ +:ir Index: test/Other/cspgo-O2-pipeline.ll =================================================================== --- test/Other/cspgo-O2-pipeline.ll +++ test/Other/cspgo-O2-pipeline.ll @@ -0,0 +1,13 @@ +; Test CSGen pass in CSPGO. +; RUN: llvm-profdata merge %S/Inputs/cspgo-noncs.proftext -o %t-noncs.profdata +; RUN: llvm-profdata merge %S/Inputs/cspgo-cs.proftext -o %t-cs.profdata +; RUN: opt -O2 -debug-pass=Structure -pgo-kind=pgo-instr-use-pipeline -profile-file='%t-noncs.profdata' -cspgo-kind=cspgo-instr-gen-pipeline -cs-profilegen-file=alloc %s 2>&1 |FileCheck %s --check-prefixes=CSGENDEFAULT +; CSGENDEFAULT: PGOInstrumentationUse +; CSGENDEFAULT: PGOInstrumentationGenCreateVar +; CSGENDEFAULT: PGOInstrumentationGen + +; Test CSUse pass in CSPGO. +; RUN: opt -O2 -debug-pass=Structure -pgo-kind=pgo-instr-use-pipeline -profile-file='%t-cs.profdata' -cspgo-kind=cspgo-instr-use-pipeline %s 2>&1 |FileCheck %s --check-prefixes=CSUSEDEFAULT +; CSUSEDEFAULT: PGOInstrumentationUse +; CSUSEDEFAULT-NOT: PGOInstrumentationGenCreateVar +; CSUSEDEFAULT: PGOInstrumentationUse Index: test/Other/new-pm-cspgo.ll =================================================================== --- test/Other/new-pm-cspgo.ll +++ test/Other/new-pm-cspgo.ll @@ -0,0 +1,32 @@ +; Test CSGen pass in CSPGO. +; RUN: llvm-profdata merge %S/Inputs/cspgo-noncs.proftext -o %t-noncs.profdata +; RUN: llvm-profdata merge %S/Inputs/cspgo-cs.proftext -o %t-cs.profdata +; RUN: opt -debug-pass-manager -passes='default' -pgo-kind=pgo-instr-use-pipeline -profile-file='%t-noncs.profdata' -cspgo-kind=cspgo-instr-gen-pipeline -cs-profilegen-file=alloc %s 2>&1 |FileCheck %s --check-prefixes=CSGENDEFAULT +; RUN: opt -debug-pass-manager -passes='thinlto-pre-link' -pgo-kind=pgo-instr-use-pipeline -profile-file='%t-noncs.profdata' -cspgo-kind=cspgo-instr-gen-pipeline -cs-profilegen-file=alloc %s 2>&1 |FileCheck %s --check-prefixes=CSGENPRELINK +; RUN: opt -debug-pass-manager -passes='thinlto' -pgo-kind=pgo-instr-use-pipeline -profile-file='%t-noncs.profdata' -cspgo-kind=cspgo-instr-gen-pipeline -cs-profilegen-file=alloc %s 2>&1 |FileCheck %s --check-prefixes=CSGENLTO +; RUN: opt -debug-pass-manager -passes='lto-pre-link' -pgo-kind=pgo-instr-use-pipeline -profile-file='%t-noncs.profdata' -cspgo-kind=cspgo-instr-gen-pipeline -cs-profilegen-file=alloc %s 2>&1 |FileCheck %s --check-prefixes=CSGENPRELINK +; RUN: opt -debug-pass-manager -passes='lto' -pgo-kind=pgo-instr-use-pipeline -profile-file='%t-noncs.profdata' -cspgo-kind=cspgo-instr-gen-pipeline -cs-profilegen-file=alloc %s 2>&1 |FileCheck %s --check-prefixes=CSGENLTO +; CSGENDEFAULT: Running pass: PGOInstrumentationUse +; CSGENDEFAULT: Running pass: PGOInstrumentationGenCreateVar +; CSGENDEFAULT: Running pass: PGOInstrumentationGen +; CSGENPRELINK: Running pass: PGOInstrumentationUse +; CSGENPRELINK: Running pass: PGOInstrumentationGenCreateVar +; CSGENPRELINK-NOT: Running pass: PGOInstrumentationGen +; CSGENLTO-NOT: Running pass: PGOInstrumentationUse +; CSGENLTO-NOT: Running pass: PGOInstrumentationGenCreateVar +; CSGENLTO: Running pass: PGOInstrumentationGen + +; Test CSUse pass in CSPGO. +; RUN: opt -debug-pass-manager -passes='default' -pgo-kind=pgo-instr-use-pipeline -profile-file='%t-cs.profdata' -cspgo-kind=cspgo-instr-use-pipeline %s 2>&1 |FileCheck %s --check-prefixes=CSUSEDEFAULT +; RUN: opt -debug-pass-manager -passes='thinlto-pre-link' -pgo-kind=pgo-instr-use-pipeline -profile-file='%t-cs.profdata' -cspgo-kind=cspgo-instr-use-pipeline %s 2>&1 |FileCheck %s --check-prefixes=CSUSEPRELINK +; RUN: opt -debug-pass-manager -passes='thinlto' -pgo-kind=pgo-instr-use-pipeline -profile-file='%t-cs.profdata' -cspgo-kind=cspgo-instr-use-pipeline %s 2>&1 |FileCheck %s --check-prefixes=CSUSELTO +; RUN: opt -debug-pass-manager -passes='lto-pre-link' -pgo-kind=pgo-instr-use-pipeline -profile-file='%t-noncs.profdata' -cspgo-kind=cspgo-instr-use-pipeline %s 2>&1 |FileCheck %s --check-prefixes=CSUSEPRELINK +; RUN: opt -debug-pass-manager -passes='lto' -pgo-kind=pgo-instr-use-pipeline -profile-file='%t-cs.profdata' -cspgo-kind=cspgo-instr-use-pipeline %s 2>&1 |FileCheck %s --check-prefixes=CSUSELTO +; CSUSEDEFAULT: Running pass: PGOInstrumentationUse +; CSUSEDEFAULT-NOT: Running pass: PGOInstrumentationGenCreateVar +; CSUSEDEFAULT: Running pass: PGOInstrumentationUse +; CSUSEPRELINK: Running pass: PGOInstrumentationUse +; CSUSEPRELINK-NOT: Running pass: PGOInstrumentationGenCreateVar +; CSUSEPRELINK-NOT: Running pass: PGOInstrumentationUse +; CSUSELTO: Running pass: PGOInstrumentationUse +; CSUSELTO-NOT: Running pass: PGOInstrumentationUse Index: test/Transforms/PGOProfile/Inputs/cspgo.proftext =================================================================== --- test/Transforms/PGOProfile/Inputs/cspgo.proftext +++ test/Transforms/PGOProfile/Inputs/cspgo.proftext @@ -0,0 +1,151 @@ +# CSIR level Instrumentation Flag +:csir +bar_m +# Func Hash: +29667547796 +# Num Counters: +2 +# Counter Values: +99949 +51 + +bar_m +# Func Hash: +1224979111529676799 +# Num Counters: +2 +# Counter Values: +100000 +99949 + +csfdo_plain.c:cond +# Func Hash: +1152921517491748863 +# Num Counters: +1 +# Counter Values: +200000 + +csfdo_plain.c:cond +# Func Hash: +12884901887 +# Num Counters: +1 +# Counter Values: +200000 + +bar_m2 +# Func Hash: +1152921534274394772 +# Num Counters: +2 +# Counter Values: +99938 +62 + +bar_m2 +# Func Hash: +29667547796 +# Num Counters: +2 +# Counter Values: +99938 +62 + +foo +# Func Hash: +1152921640672869708 +# Num Counters: +10 +# Counter Values: +100000 +100000 +0 +66666 +66666 +0 +100000 +66667 +100000 +1 + +foo +# Func Hash: +29212902728 +# Num Counters: +2 +# Counter Values: +100000 +1 + +bar +# Func Hash: +1152921569533132113 +# Num Counters: +5 +# Counter Values: +0 +0 +0 +0 +0 + +bar +# Func Hash: +56228292833 +# Num Counters: +4 +# Counter Values: +800000 +399999 +100000 +100000 + +main +# Func Hash: +1152921517491748863 +# Num Counters: +1 +# Counter Values: +1 + +main +# Func Hash: +12884901887 +# Num Counters: +1 +# Counter Values: +1 + +csfdo_plain.c:barbar +# Func Hash: +1152921517491748863 +# Num Counters: +1 +# Counter Values: +100000 + +csfdo_plain.c:barbar +# Func Hash: +12884901887 +# Num Counters: +1 +# Counter Values: +100000 + +goo +# Func Hash: +1152921517491748863 +# Num Counters: +1 +# Counter Values: +100000 + +goo +# Func Hash: +12884901887 +# Num Counters: +1 +# Counter Values: +100000 + Index: test/Transforms/PGOProfile/Inputs/thinlto_cs.proftext =================================================================== --- test/Transforms/PGOProfile/Inputs/thinlto_cs.proftext +++ test/Transforms/PGOProfile/Inputs/thinlto_cs.proftext @@ -0,0 +1,72 @@ +# CSIR level Instrumentation Flag +:csir +cond.llvm.11253644763537639171 +# Func Hash: +1152921517491748863 +# Num Counters: +1 +# Counter Values: +200000 + +foo +# Func Hash: +29212902728 +# Num Counters: +2 +# Counter Values: +100000 +1 + +bar +# Func Hash: +1152921534274394772 +# Num Counters: +2 +# Counter Values: +0 +0 + +bar +# Func Hash: +29667547796 +# Num Counters: +2 +# Counter Values: +100000 +100000 + +main +# Func Hash: +1152921517491748863 +# Num Counters: +1 +# Counter Values: +1 + +main +# Func Hash: +12884901887 +# Num Counters: +1 +# Counter Values: +1 + +cspgo.c:foo +# Func Hash: +1152921563228422740 +# Num Counters: +4 +# Counter Values: +100000 +100000 +0 +1 + +cspgo_bar.c:cond +# Func Hash: +12884901887 +# Num Counters: +1 +# Counter Values: +200000 + Index: test/Transforms/PGOProfile/Inputs/thinlto_cspgo_bar_gen.ll =================================================================== --- test/Transforms/PGOProfile/Inputs/thinlto_cspgo_bar_gen.ll +++ test/Transforms/PGOProfile/Inputs/thinlto_cspgo_bar_gen.ll @@ -0,0 +1,84 @@ +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +$__llvm_profile_filename = comdat any + +$__llvm_profile_raw_version = comdat any + +@odd = common dso_local local_unnamed_addr global i32 0, align 4 +@even = common dso_local local_unnamed_addr global i32 0, align 4 +@__llvm_profile_filename = local_unnamed_addr constant [25 x i8] c"pass2/default_%m.profraw\00", comdat +@__llvm_profile_raw_version = local_unnamed_addr constant i64 216172782113783812, comdat + +; Function Attrs: inlinehint norecurse nounwind uwtable +define dso_local void @bar(i32 %n) local_unnamed_addr #0 !prof !29 { +entry: + %call = tail call fastcc i32 @cond(i32 %n) + %tobool = icmp eq i32 %call, 0 + br i1 %tobool, label %if.else, label %if.then, !prof !30 + +if.then: ; preds = %entry + %0 = load i32, i32* @odd, align 4, !tbaa !31 + %inc = add i32 %0, 1 + store i32 %inc, i32* @odd, align 4, !tbaa !31 + br label %if.end + +if.else: ; preds = %entry + %1 = load i32, i32* @even, align 4, !tbaa !31 + %inc1 = add i32 %1, 1 + store i32 %inc1, i32* @even, align 4, !tbaa !31 + br label %if.end + +if.end: ; preds = %if.else, %if.then + ret void +} + +; Function Attrs: inlinehint noinline norecurse nounwind readnone uwtable +define internal fastcc i32 @cond(i32 %i) unnamed_addr #1 !prof !29 !PGOFuncName !35 { +entry: + %rem = srem i32 %i, 2 + ret i32 %rem +} + +attributes #0 = { inlinehint norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { inlinehint noinline norecurse nounwind readnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!28} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 500002} +!5 = !{!"MaxCount", i64 200000} +!6 = !{!"MaxInternalCount", i64 100000} +!7 = !{!"MaxFunctionCount", i64 200000} +!8 = !{!"NumCounts", i64 6} +!9 = !{!"NumFunctions", i64 4} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27} +!12 = !{i32 10000, i64 200000, i32 1} +!13 = !{i32 100000, i64 200000, i32 1} +!14 = !{i32 200000, i64 200000, i32 1} +!15 = !{i32 300000, i64 200000, i32 1} +!16 = !{i32 400000, i64 200000, i32 1} +!17 = !{i32 500000, i64 100000, i32 4} +!18 = !{i32 600000, i64 100000, i32 4} +!19 = !{i32 700000, i64 100000, i32 4} +!20 = !{i32 800000, i64 100000, i32 4} +!21 = !{i32 900000, i64 100000, i32 4} +!22 = !{i32 950000, i64 100000, i32 4} +!23 = !{i32 990000, i64 100000, i32 4} +!24 = !{i32 999000, i64 100000, i32 4} +!25 = !{i32 999900, i64 100000, i32 4} +!26 = !{i32 999990, i64 100000, i32 4} +!27 = !{i32 999999, i64 1, i32 6} +!28 = !{!"clang version 9.0.0 (trunk 353246)"} +!29 = !{!"function_entry_count", i64 200000} +!30 = !{!"branch_weights", i32 100000, i32 100000} +!31 = !{!32, !32, i64 0} +!32 = !{!"int", !33, i64 0} +!33 = !{!"omnipotent char", !34, i64 0} +!34 = !{!"Simple C/C++ TBAA"} +!35 = !{!"cspgo_bar.c:cond"} Index: test/Transforms/PGOProfile/Inputs/thinlto_cspgo_bar_use.ll =================================================================== --- test/Transforms/PGOProfile/Inputs/thinlto_cspgo_bar_use.ll +++ test/Transforms/PGOProfile/Inputs/thinlto_cspgo_bar_use.ll @@ -0,0 +1,78 @@ +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@odd = common dso_local local_unnamed_addr global i32 0, align 4 +@even = common dso_local local_unnamed_addr global i32 0, align 4 + +; Function Attrs: inlinehint norecurse nounwind uwtable +define dso_local void @bar(i32 %n) local_unnamed_addr #0 !prof !29 { +entry: + %call = tail call fastcc i32 @cond(i32 %n) + %tobool = icmp eq i32 %call, 0 + br i1 %tobool, label %if.else, label %if.then, !prof !30 + +if.then: ; preds = %entry + %0 = load i32, i32* @odd, align 4, !tbaa !31 + %inc = add i32 %0, 1 + store i32 %inc, i32* @odd, align 4, !tbaa !31 + br label %if.end + +if.else: ; preds = %entry + %1 = load i32, i32* @even, align 4, !tbaa !31 + %inc1 = add i32 %1, 1 + store i32 %inc1, i32* @even, align 4, !tbaa !31 + br label %if.end + +if.end: ; preds = %if.else, %if.then + ret void +} + +; Function Attrs: inlinehint noinline norecurse nounwind readnone uwtable +define internal fastcc i32 @cond(i32 %i) unnamed_addr #1 !prof !29 !PGOFuncName !35 { +entry: + %rem = srem i32 %i, 2 + ret i32 %rem +} + +attributes #0 = { inlinehint norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { inlinehint noinline norecurse nounwind readnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!28} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 500002} +!5 = !{!"MaxCount", i64 200000} +!6 = !{!"MaxInternalCount", i64 100000} +!7 = !{!"MaxFunctionCount", i64 200000} +!8 = !{!"NumCounts", i64 6} +!9 = !{!"NumFunctions", i64 4} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27} +!12 = !{i32 10000, i64 200000, i32 1} +!13 = !{i32 100000, i64 200000, i32 1} +!14 = !{i32 200000, i64 200000, i32 1} +!15 = !{i32 300000, i64 200000, i32 1} +!16 = !{i32 400000, i64 200000, i32 1} +!17 = !{i32 500000, i64 100000, i32 4} +!18 = !{i32 600000, i64 100000, i32 4} +!19 = !{i32 700000, i64 100000, i32 4} +!20 = !{i32 800000, i64 100000, i32 4} +!21 = !{i32 900000, i64 100000, i32 4} +!22 = !{i32 950000, i64 100000, i32 4} +!23 = !{i32 990000, i64 100000, i32 4} +!24 = !{i32 999000, i64 100000, i32 4} +!25 = !{i32 999900, i64 100000, i32 4} +!26 = !{i32 999990, i64 100000, i32 4} +!27 = !{i32 999999, i64 1, i32 6} +!28 = !{!"clang version 9.0.0 (trunk 353246)"} +!29 = !{!"function_entry_count", i64 200000} +!30 = !{!"branch_weights", i32 100000, i32 100000} +!31 = !{!32, !32, i64 0} +!32 = !{!"int", !33, i64 0} +!33 = !{!"omnipotent char", !34, i64 0} +!34 = !{!"Simple C/C++ TBAA"} +!35 = !{!"cspgo_bar.c:cond"} Index: test/Transforms/PGOProfile/cspgo_profile_summary.ll =================================================================== --- test/Transforms/PGOProfile/cspgo_profile_summary.ll +++ test/Transforms/PGOProfile/cspgo_profile_summary.ll @@ -0,0 +1,211 @@ +; Test the profile summary for context sensitive PGO (CSPGO) + +; RUN: llvm-profdata merge %S/Inputs/cspgo.proftext -o %t.profdata +; RUN: opt < %s -O2 -disable-preinline -pgo-kind=pgo-instr-use-pipeline -profile-file=%t.profdata -S | FileCheck %s --check-prefix=PGOSUMMARY +; RUN: opt < %s -O2 -disable-preinline -pgo-kind=pgo-instr-use-pipeline -profile-file=%t.profdata -S -cspgo-kind=cspgo-instr-use-pipeline| FileCheck %s --check-prefix=CSPGOSUMMARY + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@odd = common dso_local global i32 0, align 4 +@even = common dso_local global i32 0, align 4 +@not_six = common dso_local global i32 0, align 4 + +define dso_local i32 @goo(i32 %n) { +entry: + %i = alloca i32, align 4 + %i.0..sroa_cast = bitcast i32* %i to i8* + store volatile i32 %n, i32* %i, align 4 + %i.0. = load volatile i32, i32* %i, align 4 + ret i32 %i.0. +} + +define dso_local void @bar(i32 %n) { +entry: + %call = call fastcc i32 @cond(i32 %n) + %tobool = icmp eq i32 %call, 0 + br i1 %tobool, label %if.else, label %if.then + +if.then: + %0 = load i32, i32* @odd, align 4 + %inc = add i32 %0, 1 + store i32 %inc, i32* @odd, align 4 + br label %if.end + +if.else: + %1 = load i32, i32* @even, align 4 + %inc1 = add i32 %1, 1 + store i32 %inc1, i32* @even, align 4 + br label %if.end + +if.end: + br label %for.cond + +for.cond: + %i.0 = phi i32 [ 0, %if.end ], [ %inc6, %for.inc ] + %cmp = icmp ult i32 %i.0, 4 + br i1 %cmp, label %for.body, label %for.end + +for.body: + %mul = mul nsw i32 %i.0, %n + %rem = srem i32 %mul, 6 + %tobool2 = icmp eq i32 %rem, 0 + br i1 %tobool2, label %for.inc, label %if.then3 + +if.then3: + %2 = load i32, i32* @not_six, align 4 + %inc4 = add i32 %2, 1 + store i32 %inc4, i32* @not_six, align 4 + br label %for.inc + +for.inc: + %inc6 = add nuw nsw i32 %i.0, 1 + br label %for.cond + +for.end: + ret void +} + +define internal fastcc i32 @cond(i32 %i) { +entry: + %rem = srem i32 %i, 2 + ret i32 %rem +} + +define dso_local void @foo() { +entry: + br label %for.cond + +for.cond: + %i.0 = phi i32 [ 0, %entry ], [ %add4, %for.body ] + %cmp = icmp slt i32 %i.0, 200000 + br i1 %cmp, label %for.body, label %for.end + +for.body: + %call = call i32 @goo(i32 %i.0) + call void @bar(i32 %call) + %add = add nsw i32 %call, 1 + call void @bar(i32 %add) + %call1 = call i32 @bar_m(i32 %call) #4 + %call3 = call i32 @bar_m2(i32 %add) #4 + call fastcc void @barbar() + %add4 = add nsw i32 %call, 2 + br label %for.cond + +for.end: + ret void +} + +declare dso_local i32 @bar_m(i32) +declare dso_local i32 @bar_m2(i32) + +define internal fastcc void @barbar() { +entry: + %0 = load i32, i32* @odd, align 4 + %inc = add i32 %0, 1 + store i32 %inc, i32* @odd, align 4 + ret void +} + +define dso_local i32 @main() { +entry: + call void @foo() + ret i32 0 +} + +; PGOSUMMARY: !0 = !{i32 1, !"ProfileSummary", !1} +; PGOSUMMARY: !1 = !{!2, !3, !4, !5, !6, !7, !8, !9} +; PGOSUMMARY: !2 = !{!"ProfileFormat", !"InstrProf"} +; PGOSUMMARY: !3 = !{!"TotalCount", i64 2100001} +; PGOSUMMARY: !4 = !{!"MaxCount", i64 800000} +; PGOSUMMARY: !5 = !{!"MaxInternalCount", i64 399999} +; PGOSUMMARY: !6 = !{!"MaxFunctionCount", i64 800000} +; PGOSUMMARY: !7 = !{!"NumCounts", i64 14} +; PGOSUMMARY: !8 = !{!"NumFunctions", i64 8} +; PGOSUMMARY: !9 = !{!"DetailedSummary", !10} +; PGOSUMMARY: !10 = !{!11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26} +; PGOSUMMARY: !11 = !{i32 10000, i64 800000, i32 1} +; PGOSUMMARY: !12 = !{i32 100000, i64 800000, i32 1} +; PGOSUMMARY: !13 = !{i32 200000, i64 800000, i32 1} +; PGOSUMMARY: !14 = !{i32 300000, i64 800000, i32 1} +; PGOSUMMARY: !15 = !{i32 400000, i64 399999, i32 2} +; PGOSUMMARY: !16 = !{i32 500000, i64 399999, i32 2} +; PGOSUMMARY: !17 = !{i32 600000, i64 200000, i32 3} +; PGOSUMMARY: !18 = !{i32 700000, i64 100000, i32 8} +; PGOSUMMARY: !19 = !{i32 800000, i64 100000, i32 8} +; PGOSUMMARY: !20 = !{i32 900000, i64 100000, i32 8} +; PGOSUMMARY: !21 = !{i32 950000, i64 99949, i32 9} +; PGOSUMMARY: !22 = !{i32 990000, i64 99938, i32 10} +; PGOSUMMARY: !23 = !{i32 999000, i64 99938, i32 10} +; PGOSUMMARY: !24 = !{i32 999900, i64 99938, i32 10} +; PGOSUMMARY: !25 = !{i32 999990, i64 51, i32 12} +; PGOSUMMARY: !26 = !{i32 999999, i64 51, i32 12} +; PGOSUMMARY: !27 = !{!"function_entry_count", i64 100000} +; PGOSUMMARY: !28 = !{!"function_entry_count", i64 200000} +; PGOSUMMARY: !29 = !{!"branch_weights", i32 100000, i32 100000} +; PGOSUMMARY: !30 = !{!"branch_weights", i32 400001, i32 399999} +; PGOSUMMARY: !31 = !{!"function_entry_count", i64 1} +; PGOSUMMARY: !32 = !{!"branch_weights", i32 100000, i32 1} +; CSPGOSUMMARY: !0 = !{i32 1, !"ProfileSummary", !1} +; CSPGOSUMMARY: !1 = !{!2, !3, !4, !5, !6, !7, !8, !9} +; CSPGOSUMMARY: !2 = !{!"ProfileFormat", !"InstrProf"} +; CSPGOSUMMARY: !3 = !{!"TotalCount", i64 2100001} +; CSPGOSUMMARY: !4 = !{!"MaxCount", i64 800000} +; CSPGOSUMMARY: !5 = !{!"MaxInternalCount", i64 399999} +; CSPGOSUMMARY: !6 = !{!"MaxFunctionCount", i64 800000} +; CSPGOSUMMARY: !7 = !{!"NumCounts", i64 14} +; CSPGOSUMMARY: !8 = !{!"NumFunctions", i64 8} +; CSPGOSUMMARY: !9 = !{!"DetailedSummary", !10} +; CSPGOSUMMARY: !10 = !{!11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26} +; CSPGOSUMMARY: !11 = !{i32 10000, i64 800000, i32 1} +; CSPGOSUMMARY: !12 = !{i32 100000, i64 800000, i32 1} +; CSPGOSUMMARY: !13 = !{i32 200000, i64 800000, i32 1} +; CSPGOSUMMARY: !14 = !{i32 300000, i64 800000, i32 1} +; CSPGOSUMMARY: !15 = !{i32 400000, i64 399999, i32 2} +; CSPGOSUMMARY: !16 = !{i32 500000, i64 399999, i32 2} +; CSPGOSUMMARY: !17 = !{i32 600000, i64 200000, i32 3} +; CSPGOSUMMARY: !18 = !{i32 700000, i64 100000, i32 8} +; CSPGOSUMMARY: !19 = !{i32 800000, i64 100000, i32 8} +; CSPGOSUMMARY: !20 = !{i32 900000, i64 100000, i32 8} +; CSPGOSUMMARY: !21 = !{i32 950000, i64 99949, i32 9} +; CSPGOSUMMARY: !22 = !{i32 990000, i64 99938, i32 10} +; CSPGOSUMMARY: !23 = !{i32 999000, i64 99938, i32 10} +; CSPGOSUMMARY: !24 = !{i32 999900, i64 99938, i32 10} +; CSPGOSUMMARY: !25 = !{i32 999990, i64 51, i32 12} +; CSPGOSUMMARY: !26 = !{i32 999999, i64 51, i32 12} +; CSPGOSUMMARY: !27 = !{i32 1, !"CSProfileSummary", !28} +; CSPGOSUMMARY: !28 = !{!29, !30, !31, !32, !33, !34, !8, !35} +; CSPGOSUMMARY: !29 = !{!"ProfileFormat", !"CSInstrProf"} +; CSPGOSUMMARY: !30 = !{!"TotalCount", i64 1299950} +; CSPGOSUMMARY: !31 = !{!"MaxCount", i64 200000} +; CSPGOSUMMARY: !32 = !{!"MaxInternalCount", i64 100000} +; CSPGOSUMMARY: !33 = !{!"MaxFunctionCount", i64 200000} +; CSPGOSUMMARY: !34 = !{!"NumCounts", i64 23} +; CSPGOSUMMARY: !35 = !{!"DetailedSummary", !36} +; CSPGOSUMMARY: !36 = !{!37, !38, !39, !40, !41, !42, !43, !44, !45, !46, !47, !48, !49, !50, !51, !52} +; CSPGOSUMMARY: !37 = !{i32 10000, i64 200000, i32 1} +; CSPGOSUMMARY: !38 = !{i32 100000, i64 200000, i32 1} +; CSPGOSUMMARY: !39 = !{i32 200000, i64 100000, i32 8} +; CSPGOSUMMARY: !40 = !{i32 300000, i64 100000, i32 8} +; CSPGOSUMMARY: !41 = !{i32 400000, i64 100000, i32 8} +; CSPGOSUMMARY: !42 = !{i32 500000, i64 100000, i32 8} +; CSPGOSUMMARY: !43 = !{i32 600000, i64 100000, i32 8} +; CSPGOSUMMARY: !44 = !{i32 700000, i64 99949, i32 9} +; CSPGOSUMMARY: !45 = !{i32 800000, i64 99938, i32 10} +; CSPGOSUMMARY: !46 = !{i32 900000, i64 66666, i32 13} +; CSPGOSUMMARY: !47 = !{i32 950000, i64 66666, i32 13} +; CSPGOSUMMARY: !48 = !{i32 990000, i64 66666, i32 13} +; CSPGOSUMMARY: !49 = !{i32 999000, i64 66666, i32 13} +; CSPGOSUMMARY: !50 = !{i32 999900, i64 66666, i32 13} +; CSPGOSUMMARY: !51 = !{i32 999990, i64 62, i32 14} +; CSPGOSUMMARY: !52 = !{i32 999999, i64 62, i32 14} +; CSPGOSUMMARY: !53 = !{!"function_entry_count", i64 100000} +; CSPGOSUMMARY: !54 = !{!"function_entry_count", i64 0} +; CSPGOSUMMARY: !55 = !{!"branch_weights", i32 100000, i32 100000} +; CSPGOSUMMARY: !56 = !{!"branch_weights", i32 400001, i32 399999} +; CSPGOSUMMARY: !57 = !{!"function_entry_count", i64 1} +; CSPGOSUMMARY: !58 = !{!"branch_weights", i32 100000, i32 0} +; CSPGOSUMMARY: !59 = !{!"branch_weights", i32 33334, i32 66666} +; CSPGOSUMMARY: !60 = !{!"branch_weights", i32 0, i32 100000} +; CSPGOSUMMARY: !61 = !{!"branch_weights", i32 33333, i32 66667} +; CSPGOSUMMARY: !62 = !{!"branch_weights", i32 99999, i32 1} Index: test/Transforms/PGOProfile/thinlto_cspgo_gen.ll =================================================================== --- test/Transforms/PGOProfile/thinlto_cspgo_gen.ll +++ test/Transforms/PGOProfile/thinlto_cspgo_gen.ll @@ -0,0 +1,95 @@ +; RUN: opt -module-summary %s -o %t1.bc +; RUN: opt -module-summary %S/Inputs/thinlto_cspgo_bar_gen.ll -o %t2.bc +; RUN: llvm-lto2 run -lto-cspgo-profile-file=alloc -lto-cspgo-gen -save-temps -o %t %t1.bc %t2.bc \ +; RUN: -r=%t1.bc,foo,pl \ +; RUN: -r=%t1.bc,bar,l \ +; RUN: -r=%t1.bc,main,plx \ +; RUN: -r=%t1.bc,__llvm_profile_filename,plx \ +; RUN: -r=%t1.bc,__llvm_profile_raw_version,plx \ +; RUN: -r=%t2.bc,bar,pl \ +; RUN: -r=%t2.bc,odd,pl \ +; RUN: -r=%t2.bc,even,pl \ +; RUN: -r=%t2.bc,__llvm_profile_filename,x \ +; RUN: -r=%t2.bc,__llvm_profile_raw_version,x +; RUN: llvm-dis %t.1.4.opt.bc -o - | FileCheck %s --check-prefix=CSGEN + +; CSGEN: @__profc_ +; CSGEN: @__profd_ + +source_filename = "cspgo.c" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +$__llvm_profile_filename = comdat any + +$__llvm_profile_raw_version = comdat any + +@__llvm_profile_filename = local_unnamed_addr constant [25 x i8] c"pass2/default_%m.profraw\00", comdat +@__llvm_profile_raw_version = local_unnamed_addr constant i64 216172782113783812, comdat + +; Function Attrs: noinline nounwind uwtable +define dso_local void @foo() local_unnamed_addr #0 !prof !29 { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.06 = phi i32 [ 0, %entry ], [ %add1, %for.body ] + tail call void @bar(i32 %i.06) #3 + %add = or i32 %i.06, 1 + tail call void @bar(i32 %add) #3 + %add1 = add nuw nsw i32 %i.06, 2 + %cmp = icmp ult i32 %add1, 200000 + br i1 %cmp, label %for.body, label %for.end, !prof !30 + +for.end: ; preds = %for.body + ret void +} + +declare dso_local void @bar(i32) local_unnamed_addr #1 + +; Function Attrs: cold nounwind uwtable +define dso_local i32 @main() local_unnamed_addr #2 !prof !29 { +entry: + tail call void @foo() + ret i32 0 +} + +attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { cold nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!28} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 500002} +!5 = !{!"MaxCount", i64 200000} +!6 = !{!"MaxInternalCount", i64 100000} +!7 = !{!"MaxFunctionCount", i64 200000} +!8 = !{!"NumCounts", i64 6} +!9 = !{!"NumFunctions", i64 4} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27} +!12 = !{i32 10000, i64 200000, i32 1} +!13 = !{i32 100000, i64 200000, i32 1} +!14 = !{i32 200000, i64 200000, i32 1} +!15 = !{i32 300000, i64 200000, i32 1} +!16 = !{i32 400000, i64 200000, i32 1} +!17 = !{i32 500000, i64 100000, i32 4} +!18 = !{i32 600000, i64 100000, i32 4} +!19 = !{i32 700000, i64 100000, i32 4} +!20 = !{i32 800000, i64 100000, i32 4} +!21 = !{i32 900000, i64 100000, i32 4} +!22 = !{i32 950000, i64 100000, i32 4} +!23 = !{i32 990000, i64 100000, i32 4} +!24 = !{i32 999000, i64 100000, i32 4} +!25 = !{i32 999900, i64 100000, i32 4} +!26 = !{i32 999990, i64 100000, i32 4} +!27 = !{i32 999999, i64 1, i32 6} +!28 = !{!"clang version 9.0.0 (trunk 353246)"} +!29 = !{!"function_entry_count", i64 1} +!30 = !{!"branch_weights", i32 100000, i32 1} Index: test/Transforms/PGOProfile/thinlto_cspgo_use.ll =================================================================== --- test/Transforms/PGOProfile/thinlto_cspgo_use.ll +++ test/Transforms/PGOProfile/thinlto_cspgo_use.ll @@ -0,0 +1,87 @@ +; RUN: opt -module-summary %s -o %t1.bc +; RUN: opt -module-summary %S/Inputs/thinlto_cspgo_bar_use.ll -o %t2.bc +; RUN: llvm-profdata merge %S/Inputs/thinlto_cs.proftext -o %t3.profdata +; RUN: llvm-lto2 run -lto-cspgo-profile-file=%t3.profdata -save-temps -o %t %t1.bc %t2.bc \ +; RUN: -r=%t1.bc,foo,pl \ +; RUN: -r=%t1.bc,bar,l \ +; RUN: -r=%t1.bc,main,plx \ +; RUN: -r=%t2.bc,bar,pl \ +; RUN: -r=%t2.bc,odd,pl \ +; RUN: -r=%t2.bc,even,pl +; RUN: llvm-dis %t.1.4.opt.bc -o - | FileCheck %s --check-prefix=CSUSE + +; CSUSE: {{![0-9]+}} = !{i32 1, !"ProfileSummary", {{![0-9]+}}} +; CSUSE: {{![0-9]+}} = !{i32 1, !"CSProfileSummary", {{![0-9]+}}} +; CSUSE-DAG: {{![0-9]+}} = !{!"branch_weights", i32 100000, i32 0} +; CSUSE-DAG: {{![0-9]+}} = !{!"branch_weights", i32 0, i32 100000} + +source_filename = "cspgo.c" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: noinline nounwind uwtable +define dso_local void @foo() local_unnamed_addr #0 !prof !29 { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.06 = phi i32 [ 0, %entry ], [ %add1, %for.body ] + tail call void @bar(i32 %i.06) #3 + %add = or i32 %i.06, 1 + tail call void @bar(i32 %add) #3 + %add1 = add nuw nsw i32 %i.06, 2 + %cmp = icmp ult i32 %add1, 200000 + br i1 %cmp, label %for.body, label %for.end, !prof !30 + +for.end: ; preds = %for.body + ret void +} + +declare dso_local void @bar(i32) local_unnamed_addr #1 + +; Function Attrs: cold nounwind uwtable +define dso_local i32 @main() local_unnamed_addr #2 !prof !29 { +entry: + tail call void @foo() + ret i32 0 +} + +attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { cold nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!28} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 500002} +!5 = !{!"MaxCount", i64 200000} +!6 = !{!"MaxInternalCount", i64 100000} +!7 = !{!"MaxFunctionCount", i64 200000} +!8 = !{!"NumCounts", i64 6} +!9 = !{!"NumFunctions", i64 4} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27} +!12 = !{i32 10000, i64 200000, i32 1} +!13 = !{i32 100000, i64 200000, i32 1} +!14 = !{i32 200000, i64 200000, i32 1} +!15 = !{i32 300000, i64 200000, i32 1} +!16 = !{i32 400000, i64 200000, i32 1} +!17 = !{i32 500000, i64 100000, i32 4} +!18 = !{i32 600000, i64 100000, i32 4} +!19 = !{i32 700000, i64 100000, i32 4} +!20 = !{i32 800000, i64 100000, i32 4} +!21 = !{i32 900000, i64 100000, i32 4} +!22 = !{i32 950000, i64 100000, i32 4} +!23 = !{i32 990000, i64 100000, i32 4} +!24 = !{i32 999000, i64 100000, i32 4} +!25 = !{i32 999900, i64 100000, i32 4} +!26 = !{i32 999990, i64 100000, i32 4} +!27 = !{i32 999999, i64 1, i32 6} +!28 = !{!"clang version 9.0.0 (trunk 353246)"} +!29 = !{!"function_entry_count", i64 1} +!30 = !{!"branch_weights", i32 100000, i32 1} Index: test/tools/gold/X86/Inputs/cspgo.proftext =================================================================== --- test/tools/gold/X86/Inputs/cspgo.proftext +++ test/tools/gold/X86/Inputs/cspgo.proftext @@ -0,0 +1,39 @@ +# CSIR level Instrumentation Flag +:csir +csfdo_bar.c:cond +# Func Hash: +1152921517491748863 +# Num Counters: +1 +# Counter Values: +200000 + +main +# Func Hash: +1152921517491748863 +# Num Counters: +1 +# Counter Values: +1 + +bar +# Func Hash: +1152921569533132113 +# Num Counters: +5 +# Counter Values: +100000 +100000 +166666 +133333 +100000 + +csfdo.c:foo +# Func Hash: +1152921527029665692 +# Num Counters: +2 +# Counter Values: +99999 +1 + Index: test/tools/gold/X86/Inputs/thinlto_cspgo_bar.ll =================================================================== --- test/tools/gold/X86/Inputs/thinlto_cspgo_bar.ll +++ test/tools/gold/X86/Inputs/thinlto_cspgo_bar.ll @@ -0,0 +1,117 @@ +; ModuleID = 'csfdo_bar.c' +source_filename = "csfdo_bar.c" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@odd = common dso_local local_unnamed_addr global i32 0, align 4 +@even = common dso_local local_unnamed_addr global i32 0, align 4 +@not_six = common dso_local local_unnamed_addr global i32 0, align 4 + +; Function Attrs: inlinehint norecurse nounwind uwtable +define dso_local void @bar(i32 %n) local_unnamed_addr #0 !prof !29 { +entry: + %call = tail call fastcc i32 @cond(i32 %n) + %tobool = icmp eq i32 %call, 0 + br i1 %tobool, label %if.else, label %if.then, !prof !30 + +if.then: ; preds = %entry + %0 = load i32, i32* @odd, align 4, !tbaa !31 + %inc = add i32 %0, 1 + store i32 %inc, i32* @odd, align 4, !tbaa !31 + br label %for.inc + +if.else: ; preds = %entry + %1 = load i32, i32* @even, align 4, !tbaa !31 + %inc1 = add i32 %1, 1 + store i32 %inc1, i32* @even, align 4, !tbaa !31 + br label %for.inc + +for.inc: ; preds = %if.then, %if.else + %rem.1 = srem i32 %n, 6 + %tobool2.1 = icmp eq i32 %rem.1, 0 + br i1 %tobool2.1, label %for.inc.1, label %if.then3.1, !prof !35 + +if.then3.1: ; preds = %for.inc + %2 = load i32, i32* @not_six, align 4, !tbaa !31 + %inc4.1 = add i32 %2, 1 + store i32 %inc4.1, i32* @not_six, align 4, !tbaa !31 + br label %for.inc.1 + +for.inc.1: ; preds = %if.then3.1, %for.inc + %mul.2 = shl nsw i32 %n, 1 + %rem.2 = srem i32 %mul.2, 6 + %tobool2.2 = icmp eq i32 %rem.2, 0 + br i1 %tobool2.2, label %for.inc.2, label %if.then3.2, !prof !35 + +if.then3.2: ; preds = %for.inc.1 + %3 = load i32, i32* @not_six, align 4, !tbaa !31 + %inc4.2 = add i32 %3, 1 + store i32 %inc4.2, i32* @not_six, align 4, !tbaa !31 + br label %for.inc.2 + +for.inc.2: ; preds = %if.then3.2, %for.inc.1 + %mul.3 = mul nsw i32 %n, 3 + %rem.3 = srem i32 %mul.3, 6 + %tobool2.3 = icmp eq i32 %rem.3, 0 + br i1 %tobool2.3, label %for.inc.3, label %if.then3.3, !prof !35 + +if.then3.3: ; preds = %for.inc.2 + %4 = load i32, i32* @not_six, align 4, !tbaa !31 + %inc4.3 = add i32 %4, 1 + store i32 %inc4.3, i32* @not_six, align 4, !tbaa !31 + br label %for.inc.3 + +for.inc.3: ; preds = %if.then3.3, %for.inc.2 + ret void +} + +; Function Attrs: inlinehint noinline norecurse nounwind readnone uwtable +define internal fastcc i32 @cond(i32 %i) unnamed_addr #1 !prof !29 !PGOFuncName !36 { +entry: + %rem = srem i32 %i, 2 + ret i32 %rem +} + +attributes #0 = { inlinehint norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { inlinehint noinline norecurse nounwind readnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!28} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 1700001} +!5 = !{!"MaxCount", i64 800000} +!6 = !{!"MaxInternalCount", i64 399999} +!7 = !{!"MaxFunctionCount", i64 800000} +!8 = !{!"NumCounts", i64 8} +!9 = !{!"NumFunctions", i64 4} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27} +!12 = !{i32 10000, i64 800000, i32 1} +!13 = !{i32 100000, i64 800000, i32 1} +!14 = !{i32 200000, i64 800000, i32 1} +!15 = !{i32 300000, i64 800000, i32 1} +!16 = !{i32 400000, i64 800000, i32 1} +!17 = !{i32 500000, i64 399999, i32 2} +!18 = !{i32 600000, i64 399999, i32 2} +!19 = !{i32 700000, i64 399999, i32 2} +!20 = !{i32 800000, i64 200000, i32 3} +!21 = !{i32 900000, i64 100000, i32 6} +!22 = !{i32 950000, i64 100000, i32 6} +!23 = !{i32 990000, i64 100000, i32 6} +!24 = !{i32 999000, i64 100000, i32 6} +!25 = !{i32 999900, i64 100000, i32 6} +!26 = !{i32 999990, i64 100000, i32 6} +!27 = !{i32 999999, i64 100000, i32 6} +!28 = !{!"clang version 9.0.0 (trunk 353246)"} +!29 = !{!"function_entry_count", i64 200000} +!30 = !{!"branch_weights", i32 100000, i32 100000} +!31 = !{!32, !32, i64 0} +!32 = !{!"int", !33, i64 0} +!33 = !{!"omnipotent char", !34, i64 0} +!34 = !{!"Simple C/C++ TBAA"} +!35 = !{!"branch_weights", i32 400001, i32 399999} +!36 = !{!"csfdo_bar.c:cond"} Index: test/tools/gold/X86/thinlto_cspgo.ll =================================================================== --- test/tools/gold/X86/thinlto_cspgo.ll +++ test/tools/gold/X86/thinlto_cspgo.ll @@ -0,0 +1,85 @@ +; Generate summary sections +; RUN: opt -module-summary %s -o %t1.o +; RUN: opt -module-summary %p/Inputs/thinlto_cspgo_bar.ll -o %t2.o +; RUN: llvm-profdata merge -o %t.profdata %p/Inputs/cspgo.proftext + +; RUN: rm -f %t1.o.4.opt.bc +; RUN: %gold -plugin %llvmshlibdir/LLVMgold%shlibext \ +; RUN: -m elf_x86_64 \ +; RUN: --plugin-opt=thinlto \ +; RUN: --plugin-opt=save-temps \ +; RUN: --plugin-opt=cs-profile-path=%t.profdata \ +; RUN: --plugin-opt=jobs=1 \ +; RUN: %t1.o %t2.o -o %t3 +; RUN: opt -S %t1.o.4.opt.bc | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK: CSProfileSummary + +define dso_local void @foo() local_unnamed_addr #0 !prof !29 { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %i.06 = phi i32 [ 0, %entry ], [ %add1, %for.body ] + tail call void @bar(i32 %i.06) #3 + %add = or i32 %i.06, 1 + tail call void @bar(i32 %add) #3 + %add1 = add nuw nsw i32 %i.06, 2 + %cmp = icmp ult i32 %add1, 200000 + br i1 %cmp, label %for.body, label %for.end, !prof !30 + +for.end: ; preds = %for.body + ret void +} + +declare dso_local void @bar(i32) local_unnamed_addr #1 + +; Function Attrs: cold nounwind uwtable +define dso_local i32 @main() local_unnamed_addr #2 !prof !29 { +entry: + tail call void @foo() + ret i32 0 +} + +attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { cold nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!28} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 1700001} +!5 = !{!"MaxCount", i64 800000} +!6 = !{!"MaxInternalCount", i64 399999} +!7 = !{!"MaxFunctionCount", i64 800000} +!8 = !{!"NumCounts", i64 8} +!9 = !{!"NumFunctions", i64 4} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27} +!12 = !{i32 10000, i64 800000, i32 1} +!13 = !{i32 100000, i64 800000, i32 1} +!14 = !{i32 200000, i64 800000, i32 1} +!15 = !{i32 300000, i64 800000, i32 1} +!16 = !{i32 400000, i64 800000, i32 1} +!17 = !{i32 500000, i64 399999, i32 2} +!18 = !{i32 600000, i64 399999, i32 2} +!19 = !{i32 700000, i64 399999, i32 2} +!20 = !{i32 800000, i64 200000, i32 3} +!21 = !{i32 900000, i64 100000, i32 6} +!22 = !{i32 950000, i64 100000, i32 6} +!23 = !{i32 990000, i64 100000, i32 6} +!24 = !{i32 999000, i64 100000, i32 6} +!25 = !{i32 999900, i64 100000, i32 6} +!26 = !{i32 999990, i64 100000, i32 6} +!27 = !{i32 999999, i64 100000, i32 6} +!28 = !{!"clang version 9.0.0 (trunk 353246)"} +!29 = !{!"function_entry_count", i64 1} +!30 = !{!"branch_weights", i32 100000, i32 1} Index: test/tools/llvm-profdata/Inputs/CSIR_profile.proftext =================================================================== --- test/tools/llvm-profdata/Inputs/CSIR_profile.proftext +++ test/tools/llvm-profdata/Inputs/CSIR_profile.proftext @@ -0,0 +1,20 @@ +# CSIR level Instrumentation Flag +:csir +bar +# Func Hash: +1152921534274394772 +# Num Counters: +2 +# Counter Values: +99938 +62 + +bar +# Func Hash: +29667547796 +# Num Counters: +2 +# Counter Values: +99938 +62 + Index: test/tools/llvm-profdata/Inputs/cs.proftext =================================================================== --- test/tools/llvm-profdata/Inputs/cs.proftext +++ test/tools/llvm-profdata/Inputs/cs.proftext @@ -0,0 +1,10 @@ +# CSIR level Instrumentation Flag +:csir +bar +# Func Hash: +1152921534274394772 +# Num Counters: +2 +# Counter Values: +99938 +62 Index: test/tools/llvm-profdata/Inputs/noncs.proftext =================================================================== --- test/tools/llvm-profdata/Inputs/noncs.proftext +++ test/tools/llvm-profdata/Inputs/noncs.proftext @@ -0,0 +1,11 @@ +# IR level Instrumentation Flag +:ir +bar +# Func Hash: +29667547796 +# Num Counters: +2 +# Counter Values: +99938 +62 + Index: test/tools/llvm-profdata/csprof-dump.test =================================================================== --- test/tools/llvm-profdata/csprof-dump.test +++ test/tools/llvm-profdata/csprof-dump.test @@ -0,0 +1,32 @@ + +Basic test for option -showcs: +RUN: llvm-profdata show %p/Inputs/cs.proftext | FileCheck %s -check-prefix=ZEROSUMMARY +RUN: llvm-profdata show %p/Inputs/noncs.proftext | FileCheck %s -check-prefix=SUMMARY +RUN: llvm-profdata show -showcs %p/Inputs/cs.proftext | FileCheck %s -check-prefix=SUMMARY +RUN: llvm-profdata show -showcs %p/Inputs/noncs.proftext | FileCheck %s -check-prefix=ZEROSUMMARY +ZEROSUMMARY: Instrumentation level: IR +ZEROSUMMARY: Total functions: 0 +ZEROSUMMARY: Maximum function count: 0 +ZEROSUMMARY: Maximum internal block count: 0 +SUMMARY: Instrumentation level: IR +SUMMARY: Total functions: 1 +SUMMARY: Maximum function count: 99938 +SUMMARY: Maximum internal block count: 62 + +Basic tests for context sensitive profile dump functions: +RUN: llvm-profdata merge -o %t-combined.profdata %p/Inputs/cs.proftext %p/Inputs/noncs.proftext + +RUN: llvm-profdata show --all-functions -counts -showcs %p/Inputs/cs.proftext > %t-text.csdump +RUN: llvm-profdata show --all-functions -counts -showcs %t-combined.profdata > %t-index.csdump +RUN: diff %t-text.csdump %t-index.csdump + +RUN: llvm-profdata show --all-functions -counts %p/Inputs/noncs.proftext > %t-text.noncsdump +RUN: llvm-profdata show --all-functions -counts %t-combined.profdata > %t-index.noncsdump +RUN: diff %t-text.noncsdump %t-index.noncsdump + +Roundtrip test: + +RUN: llvm-profdata merge -o %t.0.profdata %S/Inputs/CSIR_profile.proftext +RUN: llvm-profdata merge -text -o %t.0.proftext %t.0.profdata +RUN: diff %t.0.proftext %S/Inputs/CSIR_profile.proftext + Index: tools/gold/gold-plugin.cpp =================================================================== --- tools/gold/gold-plugin.cpp +++ tools/gold/gold-plugin.cpp @@ -209,6 +209,10 @@ static std::string OptRemarksFilename; static bool OptRemarksWithHotness = false; + // Context sensitive PGO options. + static std::string cs_profile_path; + static bool cs_pgo_gen = false; + static void process_plugin_option(const char *opt_) { if (opt_ == nullptr) @@ -268,7 +272,11 @@ } else if (opt == "disable-verify") { DisableVerify = true; } else if (opt.startswith("sample-profile=")) { - sample_profile= opt.substr(strlen("sample-profile=")); + sample_profile = opt.substr(strlen("sample-profile=")); + } else if (opt == "cs-profile-generate") { + cs_pgo_gen = true; + } else if (opt.startswith("cs-profile-path=")) { + cs_profile_path = opt.substr(strlen("cs-profile-path=")); } else if (opt == "new-pass-manager") { new_pass_manager = true; } else if (opt == "debug-pass-manager") { @@ -892,6 +900,10 @@ if (!options::sample_profile.empty()) Conf.SampleProfile = options::sample_profile; + if (!options::cs_profile_path.empty()) + Conf.CSIRProfile = options::cs_profile_path; + Conf.RunCSIRInstr = options::cs_pgo_gen; + Conf.DwoDir = options::dwo_dir; // Set up optimization remarks handling. Index: tools/llvm-lto2/llvm-lto2.cpp =================================================================== --- tools/llvm-lto2/llvm-lto2.cpp +++ tools/llvm-lto2/llvm-lto2.cpp @@ -104,6 +104,15 @@ SamplePGOFile("lto-sample-profile-file", cl::desc("Specify a SamplePGO profile file")); +static cl::opt + CSPGOFile("lto-cspgo-profile-file", + cl::desc("Specify a context sensitive PGO profile file")); + +static cl::opt + RunCSIRInstr("lto-cspgo-gen", + cl::desc("Run PGO context sensitive IR instrumentation"), + cl::init(false), cl::Hidden); + static cl::opt UseNewPM("use-new-pm", cl::desc("Run LTO passes using the new pass manager"), @@ -214,6 +223,8 @@ Conf.RemarksWithHotness = OptRemarksWithHotness; Conf.SampleProfile = SamplePGOFile; + Conf.CSIRProfile = CSPGOFile; + Conf.RunCSIRInstr = RunCSIRInstr; // Run a custom pipeline, if asked for. Conf.OptPipeline = OptPipeline; Index: tools/llvm-profdata/llvm-profdata.cpp =================================================================== --- tools/llvm-profdata/llvm-profdata.cpp +++ tools/llvm-profdata/llvm-profdata.cpp @@ -225,7 +225,8 @@ auto Reader = std::move(ReaderOrErr.get()); bool IsIRProfile = Reader->isIRLevelProfile(); - if (WC->Writer.setIsIRLevelProfile(IsIRProfile)) { + bool HasCSIRProfile = Reader->hasCSIRLevelProfile(); + if (WC->Writer.setIsIRLevelProfile(IsIRProfile, HasCSIRProfile)) { WC->Err = make_error( "Merge IR generated profile with Clang generated profile.", std::error_code()); @@ -669,9 +670,10 @@ uint32_t TopN, bool ShowIndirectCallTargets, bool ShowMemOPSizes, bool ShowDetailedSummary, std::vector DetailedSummaryCutoffs, - bool ShowAllFunctions, uint64_t ValueCutoff, - bool OnlyListBelow, const std::string &ShowFunction, - bool TextFormat, raw_fd_ostream &OS) { + bool ShowAllFunctions, bool ShowCS, + uint64_t ValueCutoff, bool OnlyListBelow, + const std::string &ShowFunction, bool TextFormat, + raw_fd_ostream &OS) { auto ReaderOrErr = InstrProfReader::create(Filename); std::vector Cutoffs = std::move(DetailedSummaryCutoffs); if (ShowDetailedSummary && Cutoffs.empty()) { @@ -708,6 +710,11 @@ OS << ":ir\n"; for (const auto &Func : *Reader) { + if (Reader->isIRLevelProfile()) { + bool FuncIsCS = NamedInstrProfRecord::hasCSFlagInHash(Func.Hash); + if (FuncIsCS != ShowCS) + continue; + } bool Show = ShowAllFunctions || (!ShowFunction.empty() && Func.Name.find(ShowFunction) != Func.Name.npos); @@ -899,6 +906,8 @@ cl::value_desc("800000,901000,999999")); cl::opt ShowAllFunctions("all-functions", cl::init(false), cl::desc("Details for every function")); + cl::opt ShowCS("showcs", cl::init(false), + cl::desc("Show context sensitive counts")); cl::opt ShowFunction("function", cl::desc("Details for matching functions")); @@ -940,8 +949,8 @@ return showInstrProfile(Filename, ShowCounts, TopNFunctions, ShowIndirectCallTargets, ShowMemOPSizes, ShowDetailedSummary, DetailedSummaryCutoffs, - ShowAllFunctions, ValueCutoff, OnlyListBelow, - ShowFunction, TextFormat, OS); + ShowAllFunctions, ShowCS, ValueCutoff, + OnlyListBelow, ShowFunction, TextFormat, OS); else return showSampleProfile(Filename, ShowCounts, ShowAllFunctions, ShowFunction, OS); Index: tools/opt/NewPMDriver.h =================================================================== --- tools/opt/NewPMDriver.h +++ tools/opt/NewPMDriver.h @@ -45,6 +45,7 @@ InstrUse, SampleUse }; +enum CSPGOKind { NoCSPGO, CSInstrGen, CSInstrUse }; } /// Driver function to run the new pass manager over a module. Index: tools/opt/NewPMDriver.cpp =================================================================== --- tools/opt/NewPMDriver.cpp +++ tools/opt/NewPMDriver.cpp @@ -102,6 +102,9 @@ extern cl::opt PGOKindFlag; extern cl::opt ProfileFile; +extern cl::opt CSPGOKindFlag; +extern cl::opt CSProfileGenFile; + static cl::opt ProfileRemappingFile("profile-remapping-file", cl::desc("Path to the profile remapping file."), @@ -219,20 +222,41 @@ Optional P; switch (PGOKindFlag) { case InstrGen: - P = PGOOptions(ProfileFile, "", "", "", true); + P = PGOOptions(ProfileFile, "", "", PGOOptions::IRInstr); break; case InstrUse: - P = PGOOptions("", ProfileFile, "", ProfileRemappingFile, false); + P = PGOOptions(ProfileFile, "", ProfileRemappingFile, PGOOptions::IRUse); break; case SampleUse: - P = PGOOptions("", "", ProfileFile, ProfileRemappingFile, false); + P = PGOOptions(ProfileFile, "", ProfileRemappingFile, + PGOOptions::SampleUse); break; case NoPGO: if (DebugInfoForProfiling) - P = PGOOptions("", "", "", "", false, true); + P = PGOOptions("", "", "", PGOOptions::NoAction, PGOOptions::NoCSAction, + true); else P = None; - } + } + if (CSPGOKindFlag != NoCSPGO) { + if (P && (P->Action == PGOOptions::IRInstr || + P->Action == PGOOptions::SampleUse)) + errs() << "CSPGOKind cannot be used with IRInstr or SampleUse"; + if (CSPGOKindFlag == CSInstrGen) { + if (CSProfileGenFile.empty()) + errs() << "CSInstrGen needs to specify CSProfileGenFile"; + if (P) { + P->CSAction = PGOOptions::CSIRInstr; + P->CSProfileGenFile = CSProfileGenFile; + } else + P = PGOOptions("", CSProfileGenFile, ProfileRemappingFile, + PGOOptions::NoAction, PGOOptions::CSIRInstr); + } else /* CSPGOKindFlag == CSInstrUse */ { + if (!P) + errs() << "CSInstrUse needs to be together with InstrUse"; + P->CSAction = PGOOptions::CSIRUse; + } + } PassInstrumentationCallbacks PIC; StandardInstrumentations SI; SI.registerCallbacks(PIC); Index: tools/opt/opt.cpp =================================================================== --- tools/opt/opt.cpp +++ tools/opt/opt.cpp @@ -287,6 +287,22 @@ cl::opt ProfileFile("profile-file", cl::desc("Path to the profile."), cl::Hidden); +cl::opt CSPGOKindFlag( + "cspgo-kind", cl::init(NoCSPGO), cl::Hidden, + cl::desc("The kind of context sensitive profile guided optimization"), + cl::values( + clEnumValN(NoCSPGO, "nocspgo", "Do not use CSPGO."), + clEnumValN( + CSInstrGen, "cspgo-instr-gen-pipeline", + "Instrument (context sensitive) the IR to generate profile."), + clEnumValN( + CSInstrUse, "cspgo-instr-use-pipeline", + "Use instrumented (context sensitive) profile to guide PGO."))); +cl::opt CSProfileGenFile( + "cs-profilegen-file", + cl::desc("Path to the instrumented context sensitive profile."), + cl::Hidden); + class OptCustomPassManager : public legacy::PassManager { DebugifyStatsMap DIStatsMap; @@ -396,6 +412,17 @@ break; } + switch (CSPGOKindFlag) { + case CSInstrGen: + Builder.EnablePGOCSInstrGen = true; + break; + case InstrUse: + Builder.EnablePGOCSInstrUse = true; + break; + default: + break; + } + Builder.populateFunctionPassManager(FPM); Builder.populateModulePassManager(MPM); } Index: unittests/ProfileData/InstrProfTest.cpp =================================================================== --- unittests/ProfileData/InstrProfTest.cpp +++ unittests/ProfileData/InstrProfTest.cpp @@ -175,7 +175,7 @@ ASSERT_EQ(288230376151711744U, NinetyFivePerc->MinCount); ASSERT_EQ(72057594037927936U, NinetyNinePerc->MinCount); }; - ProfileSummary &PS = Reader->getSummary(); + ProfileSummary &PS = Reader->getSummary(/* IsCS */ false); VerifySummary(PS); // Test that conversion of summary to and from Metadata works. @@ -189,8 +189,8 @@ // Test that summary can be attached to and read back from module. Module M("my_module", Context); - M.setProfileSummary(MD); - MD = M.getProfileSummary(); + M.setProfileSummary(MD, ProfileSummary::PSK_Instr); + MD = M.getProfileSummary(/* IsCS */ false); ASSERT_TRUE(MD); PSFromMD = ProfileSummary::getFromMD(MD); ASSERT_TRUE(PSFromMD); @@ -801,7 +801,7 @@ auto Profile = Writer.writeBuffer(); readProfile(std::move(Profile)); - ASSERT_EQ(1ULL << 63, Reader->getMaximumFunctionCount()); + ASSERT_EQ(1ULL << 63, Reader->getMaximumFunctionCount(/* IsCS */ false)); } TEST_P(MaybeSparseInstrProfTest, get_weighted_function_counts) { Index: unittests/ProfileData/SampleProfTest.cpp =================================================================== --- unittests/ProfileData/SampleProfTest.cpp +++ unittests/ProfileData/SampleProfTest.cpp @@ -191,8 +191,8 @@ delete PS; // Test that summary can be attached to and read back from module. - M.setProfileSummary(MD); - MD = M.getProfileSummary(); + M.setProfileSummary(MD, ProfileSummary::PSK_Sample); + MD = M.getProfileSummary(/* IsCS */ false); ASSERT_TRUE(MD); PS = ProfileSummary::getFromMD(MD); ASSERT_TRUE(PS);