Index: llvm/include/llvm/Analysis/FuncSpecCost.h =================================================================== --- llvm/include/llvm/Analysis/FuncSpecCost.h +++ llvm/include/llvm/Analysis/FuncSpecCost.h @@ -17,6 +17,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/IR/Argument.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/Value.h" #include "llvm/Pass.h" @@ -27,6 +28,7 @@ class AssumptionCache; class TargetTransformInfo; class TargetLibraryInfo; +class ArgUsage; /// Cost/Bonus information for specialize a function with /// each argument. @@ -48,6 +50,20 @@ function_ref GetAC, function_ref GetTLI) const; + /// Given an ArgUsage, estimating if we should import corresponding function. + /// We should **only** call this when importing. + /// + /// If there is one argument marks function, we would think it would be + /// inlined if its lines of codes is less than a specific threshold. + /// + /// TOOD: Add profiling infomation. + bool shouldImport(const ArgUsage &) const; + + ArrayRef> getSpecBonusBaseMap() const { + return makeArrayRef(&SpecBonusBaseMap.front(), + SpecBonusBaseMap.size()); + } + FuncSpecCostInfo() {} FuncSpecCostInfo(FuncSpecCostInfo &&Other) : FuncSpecCostInfo(std::move(Other.Cost), @@ -90,6 +106,51 @@ StringRef getPassName() const override; }; +/// Represent the usage of args at the callsite used in module summary. +/// We should keep it as small as possible. +/// +/// Now we mainly cares if the argument is a function. If yes, it implies +/// a chance to hoist an indirect call to a direct call by function specialize +/// pass. +/// +/// It should be easy to add value infomation about the constantness or value +/// range. +class ArgUsage { +private: + static unsigned ConstantMarker; + /// Map from ArgNo to the lines of codes if the corresponding argument refer + /// to a function. If the corresponding argument is a constant other than + /// function, we would set the value to ConstantMarker(0xffffffff). + /// + /// For example, the value of LinesOfArgs for following example: + /// ``` + /// foo(var, 1, bar); // bar is a function; var is a variable + /// ``` + /// should be `[<1, 0xffffffff>, <2, lines of bar>]`. + SmallVector, 4> LinesOfArgs; + + friend class FuncSpecCostInfo; + friend class CalleeInfo; + +public: + ArgUsage() {} + ArgUsage(const CallBase &); + ArgUsage(SmallVectorImpl> &&Uses) + : LinesOfArgs(std::move(Uses)) {} + + ArgUsage(const ArgUsage &AU) : LinesOfArgs(AU.LinesOfArgs) {} + ArgUsage(ArgUsage &&AU) : LinesOfArgs(std::move(AU.LinesOfArgs)) {} + ArgUsage &operator=(ArgUsage &&AU) { + LinesOfArgs = std::move(AU.LinesOfArgs); + return *this; + } + ArgUsage &operator=(const ArgUsage &AU) { + LinesOfArgs = AU.LinesOfArgs; + return *this; + } + + static bool isConstant(unsigned value) { return value == ConstantMarker; } +}; } // namespace llvm #endif Index: llvm/include/llvm/Analysis/ModuleSummaryAnalysis.h =================================================================== --- llvm/include/llvm/Analysis/ModuleSummaryAnalysis.h +++ llvm/include/llvm/Analysis/ModuleSummaryAnalysis.h @@ -36,6 +36,7 @@ ModuleSummaryIndex buildModuleSummaryIndex( const Module &M, std::function GetBFICallback, + std::function GetFSCICallback, ProfileSummaryInfo *PSI, std::function GetSSICallback = [](const Function &F) -> const StackSafetyInfo * { return nullptr; }); Index: llvm/include/llvm/IR/ModuleSummaryIndex.h =================================================================== --- llvm/include/llvm/IR/ModuleSummaryIndex.h +++ llvm/include/llvm/IR/ModuleSummaryIndex.h @@ -23,6 +23,7 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/TinyPtrVector.h" +#include "llvm/Analysis/FuncSpecCost.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Module.h" @@ -70,13 +71,26 @@ /// The value stored in RelBlockFreq has to be interpreted as the digits of /// a scaled number with a scale of \p -ScaleShift. uint32_t RelBlockFreq : 29; + /// The infomation about how arguments are used at the callsite. + /// NOTE: This may enlarge the size of CalleeInfo. + ArgUsage Usages; static constexpr int32_t ScaleShift = 8; static constexpr uint64_t MaxRelBlockFreq = (1 << 29) - 1; CalleeInfo() - : Hotness(static_cast(HotnessType::Unknown)), RelBlockFreq(0) {} - explicit CalleeInfo(HotnessType Hotness, uint64_t RelBF) - : Hotness(static_cast(Hotness)), RelBlockFreq(RelBF) {} + : Hotness(static_cast(HotnessType::Unknown)), RelBlockFreq(0), + Usages() {} + explicit CalleeInfo(HotnessType Hotness, uint64_t RelBF, ArgUsage &&Usages) + : Hotness(static_cast(Hotness)), RelBlockFreq(RelBF), + Usages(std::move(Usages)) {} + + void updateUsages(ArgUsage &&OtherUsages) { Usages = std::move(OtherUsages); } + + size_t getUsagesSize() const { return Usages.LinesOfArgs.size(); } + + ArrayRef> getUsages() const { + return makeArrayRef(Usages.LinesOfArgs.begin(), Usages.LinesOfArgs.end()); + } void updateHotness(const HotnessType OtherHotness) { Hotness = std::max(Hotness, static_cast(OtherHotness)); @@ -625,7 +639,8 @@ std::vector(), std::vector(), std::vector(), - std::vector()); + std::vector(), + FuncSpecCostInfo()); } /// A dummy node to reference external functions that aren't in the index @@ -653,6 +668,8 @@ using ParamAccessesTy = std::vector; std::unique_ptr ParamAccesses; + FuncSpecCostInfo SpecCostInfo; + public: FunctionSummary(GVFlags Flags, unsigned NumInsts, FFlags FunFlags, uint64_t EntryCount, std::vector Refs, @@ -662,10 +679,12 @@ std::vector TypeCheckedLoadVCalls, std::vector TypeTestAssumeConstVCalls, std::vector TypeCheckedLoadConstVCalls, - std::vector Params) + std::vector Params, + FuncSpecCostInfo SpecCostInfo) : GlobalValueSummary(FunctionKind, Flags, std::move(Refs)), InstCount(NumInsts), FunFlags(FunFlags), EntryCount(EntryCount), - CallGraphEdgeList(std::move(CGEdges)) { + CallGraphEdgeList(std::move(CGEdges)), + SpecCostInfo(std::move(SpecCostInfo)) { if (!TypeTests.empty() || !TypeTestAssumeVCalls.empty() || !TypeCheckedLoadVCalls.empty() || !TypeTestAssumeConstVCalls.empty() || !TypeCheckedLoadConstVCalls.empty()) @@ -773,6 +792,21 @@ const TypeIdInfo *getTypeIdInfo() const { return TIdInfo.get(); }; + unsigned getSpecializeCost() const { + InstructionCost Cost = SpecCostInfo.getCost(); + if (Cost.isValid()) + return *Cost.getValue(); + return std::numeric_limits::max(); + } + + ArrayRef> getSpecBonusBase() const { + return SpecCostInfo.getSpecBonusBaseMap(); + } + + bool shouldImport(const CalleeInfo &CI) const { + return SpecCostInfo.shouldImport(CI.Usages); + } + friend struct GraphTraits; }; @@ -1121,7 +1155,9 @@ // in the way some record are interpreted, like flags for instance. // Note that incrementing this may require changes in both BitcodeReader.cpp // and BitcodeWriter.cpp. - static constexpr uint64_t BitcodeSummaryVersion = 9; + // ChangeLog: 9->10. Add ArgUsage in CalleeInfo and SpecCostInfo in + // Function Summary. + static constexpr uint64_t BitcodeSummaryVersion = 10; // Regular LTO module name for ASM writer static constexpr const char *getRegularLTOModuleName() { Index: llvm/include/llvm/IR/ModuleSummaryIndexYAML.h =================================================================== --- llvm/include/llvm/IR/ModuleSummaryIndexYAML.h +++ llvm/include/llvm/IR/ModuleSummaryIndexYAML.h @@ -234,7 +234,7 @@ std::move(FSum.TypeCheckedLoadVCalls), std::move(FSum.TypeTestAssumeConstVCalls), std::move(FSum.TypeCheckedLoadConstVCalls), - ArrayRef{})); + ArrayRef{}, FuncSpecCostInfo())); } } static void output(IO &io, GlobalValueSummaryMapTy &V) { Index: llvm/lib/Analysis/FuncSpecCost.cpp =================================================================== --- llvm/lib/Analysis/FuncSpecCost.cpp +++ llvm/lib/Analysis/FuncSpecCost.cpp @@ -28,6 +28,20 @@ #define DEBUG_TYPE "func-spec-cost" +/// Limit on instruction count of imported functions. +/// The function whose number of instruction below this argument +/// is considered to be inlined during function importing. +static cl::opt PotentialInlingLimit( + "funcspec-potential-inlininglimit", cl::init(100), cl::Hidden, + cl::value_desc("N"), + cl::desc("Only functions with less than N instructions are considered to " + "be inlined during analysis of func spec.")); + +static cl::opt + BonusFactorFromInlining("func-spec-bonus-from-inlining-factor", cl::Hidden, + cl::desc("Factor for bonus from potetial inling."), + cl::init(10)); + static cl::opt AvgLoopIterationCount("func-specialization-avg-iters-cost", cl::Hidden, cl::desc("Average loop iteration count cost"), @@ -220,4 +234,38 @@ StringRef FunctionSpecializationWrapperPass::getPassName() const { return "Function Specialization Cost Analysis"; +} + +unsigned ArgUsage::ConstantMarker = ~(unsigned)0; + +ArgUsage::ArgUsage(const CallBase &CB) { + for (auto &U : CB.args()) { + Function *F = getFunction(U.get()); + LinesOfArgs.push_back({CB.getArgOperandNo(&U), ConstantMarker}); + if (!F) + continue; + + LinesOfArgs.push_back({CB.getArgOperandNo(&U), F->getInstructionCount()}); + } +} + +bool FuncSpecCostInfo::shouldImport(const ArgUsage &AU) const { + if (!Cost.isValid()) + return false; + unsigned SpecCost = *Cost.getValue(); + /// FIXME: It would consider the first argument who fits + /// the condition. It should be fixed after Function Specialization + /// pass fix this. + for (auto &IndexValuePair : AU.LinesOfArgs) { + unsigned Index = IndexValuePair.first; + unsigned Value = IndexValuePair.second; + unsigned BonusBase = getBonusBase(Index); + // Should we consider attributes like `noinline` and `always_inline` here? + if (!ArgUsage::isConstant(Value) && Value < PotentialInlingLimit) + BonusBase += BonusFactorFromInlining * (PotentialInlingLimit - Value); + + if (BonusBase > SpecCost) + return true; + } + return false; } \ No newline at end of file Index: llvm/lib/Analysis/ModuleSummaryAnalysis.cpp =================================================================== --- llvm/lib/Analysis/ModuleSummaryAnalysis.cpp +++ llvm/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -239,7 +239,8 @@ BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, DominatorTree &DT, bool HasLocalsInUsedOrAsm, DenseSet &CantBePromoted, bool IsThinLTO, - std::function GetSSICallback) { + std::function GetSSICallback, + std::function GetFSCICallback) { // Summary not currently supported for anonymous functions, they should // have been named. assert(F.hasName()); @@ -356,6 +357,8 @@ uint64_t EntryFreq = BFI->getEntryFreq(); ValueInfo.updateRelBlockFreq(BBFreq, EntryFreq); } + + ValueInfo.updateUsages(ArgUsage(*CB)); } else { // Skip inline assembly calls. if (CI && CI->isInlineAsm()) @@ -469,12 +472,16 @@ std::vector ParamAccesses; if (auto *SSI = GetSSICallback(F)) ParamAccesses = SSI->getParamAccesses(Index); + FuncSpecCostInfo *FSCI = nullptr; + if (GetFSCICallback) + FSCI = GetFSCICallback(F); auto FuncSummary = std::make_unique( Flags, NumInsts, FunFlags, /*EntryCount=*/0, std::move(Refs), CallGraphEdges.takeVector(), TypeTests.takeVector(), TypeTestAssumeVCalls.takeVector(), TypeCheckedLoadVCalls.takeVector(), TypeTestAssumeConstVCalls.takeVector(), - TypeCheckedLoadConstVCalls.takeVector(), std::move(ParamAccesses)); + TypeCheckedLoadConstVCalls.takeVector(), std::move(ParamAccesses), + FSCI ? std::move(*FSCI) : FuncSpecCostInfo()); if (NonRenamableLocal) CantBePromoted.insert(F.getGUID()); Index.addGlobalValueSummary(F, std::move(FuncSummary)); @@ -643,6 +650,7 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex( const Module &M, std::function GetBFICallback, + std::function GetFSCICallback, ProfileSummaryInfo *PSI, std::function GetSSICallback) { assert(PSI); @@ -719,7 +727,8 @@ ArrayRef{}, ArrayRef{}, ArrayRef{}, - ArrayRef{}); + ArrayRef{}, + FuncSpecCostInfo()); Index.addGlobalValueSummary(*GV, std::move(Summary)); } else { std::unique_ptr Summary = @@ -759,7 +768,7 @@ computeFunctionSummary(Index, M, F, BFI, PSI, DT, !LocalsUsed.empty() || HasLocalInlineAsmSymbol, - CantBePromoted, IsThinLTO, GetSSICallback); + CantBePromoted, IsThinLTO, GetSSICallback, GetFSCICallback); } // Compute summaries for all variables defined in module, and save in the @@ -848,6 +857,10 @@ return &FAM.getResult( *const_cast(&F)); }, + [&FAM](const Function &F) { + return &FAM.getResult( + *const_cast(&F)); + }, &PSI, [&FAM, NeedSSI](const Function &F) -> const StackSafetyInfo * { return NeedSSI ? &FAM.getResult( @@ -863,6 +876,7 @@ INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(StackSafetyInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(FunctionSpecializationWrapperPass) INITIALIZE_PASS_END(ModuleSummaryIndexWrapperPass, "module-summary-analysis", "Module Summary Analysis", false, true) @@ -885,6 +899,11 @@ *const_cast(&F)) .getBFI()); }, + [&](const Function &F) { + return &(this->getAnalysis( + *const_cast(&F)) + .getFuncSpecCost()); + }, PSI, [&](const Function &F) -> const StackSafetyInfo * { return NeedSSI ? &getAnalysis( @@ -905,6 +924,7 @@ AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); } char ImmutableModuleSummaryIndexWrapperPass::ID = 0; Index: llvm/lib/AsmParser/LLParser.cpp =================================================================== --- llvm/lib/AsmParser/LLParser.cpp +++ llvm/lib/AsmParser/LLParser.cpp @@ -8703,7 +8703,9 @@ std::move(TypeIdInfo.TypeCheckedLoadVCalls), std::move(TypeIdInfo.TypeTestAssumeConstVCalls), std::move(TypeIdInfo.TypeCheckedLoadConstVCalls), - std::move(ParamAccesses)); + std::move(ParamAccesses), + /// FIXME: Implement actually parser for FuncSpecCostInfo. + FuncSpecCostInfo()); FS->setModulePath(ModulePath); @@ -8940,7 +8942,10 @@ // can only do so once the std::vector is finalized. if (VI.getRef() == FwdVIRef) IdToIndexMap[GVId].push_back(std::make_pair(Calls.size(), Loc)); - Calls.push_back(FunctionSummary::EdgeTy{VI, CalleeInfo(Hotness, RelBF)}); + + /// FIXME: Implement parser for new added infomation. + Calls.push_back( + FunctionSummary::EdgeTy{VI, CalleeInfo(Hotness, RelBF, ArgUsage())}); if (parseToken(lltok::rparen, "expected ')' in call")) return true; Index: llvm/lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -791,10 +791,14 @@ uint64_t Offset, DenseMap &ValueIdToLinkageMap); std::vector makeRefList(ArrayRef Record); + FuncSpecCostInfo makeSpecCostInfo(ArrayRef Record, + unsigned SpecCost); std::vector makeCallList(ArrayRef Record, + uint64_t Version, bool IsOldProfileFormat, bool HasProfile, bool HasRelBF); + ArgUsage makeArgUsages(ArrayRef Record, unsigned &I); Error parseEntireSummary(unsigned ID); Error parseModuleStringTable(); void parseTypeIdCompatibleVtableSummaryRecord(ArrayRef Record); @@ -5917,8 +5921,33 @@ return Ret; } +FuncSpecCostInfo +ModuleSummaryIndexBitcodeReader::makeSpecCostInfo(ArrayRef Record, + unsigned SpecCost) { + MapVector BonusBaseMap; + auto size = Record.size(); + assert(size % 2 == 0 && "The size of base bonus should be even!"); + for (unsigned i = 0; i < size; i += 2) + BonusBaseMap.insert({Record[i], Record[i + 1]}); + return FuncSpecCostInfo(SpecCost, std::move(BonusBaseMap)); +} + +ArgUsage +ModuleSummaryIndexBitcodeReader::makeArgUsages(ArrayRef Record, + unsigned &I) { + SmallVector, 4> Uses; + size_t Size = Record[I++]; + assert(Size % 2 == 0); + assert(I + Size <= Record.size()); + Size += I; + for (; I != Size; I += 2) + Uses.push_back({Record[I], Record[I + 1]}); + return ArgUsage(std::move(Uses)); +} + std::vector ModuleSummaryIndexBitcodeReader::makeCallList(ArrayRef Record, + uint64_t Version, bool IsOldProfileFormat, bool HasProfile, bool HasRelBF) { std::vector Ret; @@ -5927,6 +5956,11 @@ CalleeInfo::HotnessType Hotness = CalleeInfo::HotnessType::Unknown; uint64_t RelBF = 0; ValueInfo Callee = getValueInfoFromValueId(Record[I]).first; + ArgUsage Uses; + if (Version >= 10) { + Uses = makeArgUsages(Record, ++I); + I -= 1; // Match original order. + } if (IsOldProfileFormat) { I += 1; // Skip old callsitecount field if (HasProfile) @@ -5935,7 +5969,8 @@ Hotness = static_cast(Record[++I]); else if (HasRelBF) RelBF = Record[++I]; - Ret.push_back(FunctionSummary::EdgeTy{Callee, CalleeInfo(Hotness, RelBF)}); + Ret.push_back(FunctionSummary::EdgeTy{ + Callee, CalleeInfo(Hotness, RelBF, std::move(Uses))}); } return Ret; } @@ -6164,6 +6199,8 @@ unsigned NumRefs = Record[3]; unsigned NumRORefs = 0, NumWORefs = 0; int RefListStartIndex = 4; + unsigned SpecCost = ~(unsigned)0; + unsigned NumBaseBonus = 0; if (Version >= 4) { RawFunFlags = Record[3]; NumRefs = Record[4]; @@ -6174,6 +6211,11 @@ if (Version >= 7) { NumWORefs = Record[6]; RefListStartIndex = 7; + if (Version >= 10) { + SpecCost = Record[7]; + NumBaseBonus = Record[8]; + RefListStartIndex = 9; + } } } } @@ -6184,16 +6226,20 @@ // string table section in the per-module index, we create a single // module path string table entry with an empty (0) ID to take // ownership. - int CallGraphEdgeStartIndex = RefListStartIndex + NumRefs; - assert(Record.size() >= RefListStartIndex + NumRefs && + unsigned FuncSpecStartIndex = RefListStartIndex + NumRefs; + unsigned CallGraphEdgeStartIndex = FuncSpecStartIndex + NumBaseBonus; + assert(Record.size() >= CallGraphEdgeStartIndex && "Record size inconsistent with number of references"); std::vector Refs = makeRefList( ArrayRef(Record).slice(RefListStartIndex, NumRefs)); + FuncSpecCostInfo FSCI = makeSpecCostInfo( + ArrayRef(Record).slice(FuncSpecStartIndex, NumBaseBonus), + SpecCost); bool HasProfile = (BitCode == bitc::FS_PERMODULE_PROFILE); bool HasRelBF = (BitCode == bitc::FS_PERMODULE_RELBF); std::vector Calls = makeCallList( ArrayRef(Record).slice(CallGraphEdgeStartIndex), - IsOldProfileFormat, HasProfile, HasRelBF); + Version, IsOldProfileFormat, HasProfile, HasRelBF); setSpecialRefs(Refs, NumRORefs, NumWORefs); auto FS = std::make_unique( Flags, InstCount, getDecodedFFlags(RawFunFlags), /*EntryCount=*/0, @@ -6202,7 +6248,7 @@ std::move(PendingTypeCheckedLoadVCalls), std::move(PendingTypeTestAssumeConstVCalls), std::move(PendingTypeCheckedLoadConstVCalls), - std::move(PendingParamAccesses)); + std::move(PendingParamAccesses), std::move(FSCI)); auto VIAndOriginalGUID = getValueInfoFromValueId(ValueID); FS->setModulePath(getThisModule()->first()); FS->setOriginalName(VIAndOriginalGUID.second); @@ -6303,6 +6349,8 @@ unsigned NumRefs = Record[4]; unsigned NumRORefs = 0, NumWORefs = 0; int RefListStartIndex = 5; + unsigned SpecCost = ~(unsigned)0; + unsigned NumBaseBonus = 0; if (Version >= 4) { RawFunFlags = Record[4]; @@ -6319,6 +6367,13 @@ RefListStartIndex = 9; NumWORefs = Record[8]; NumRORefsOffset = 2; + + if (Version >= 10) { + SpecCost = Record[9]; + NumBaseBonus = Record[10]; + RefListStartIndex = 11; + NumRORefsOffset = 4; + } } } NumRORefs = Record[RefListStartIndex - NumRORefsOffset]; @@ -6327,15 +6382,20 @@ } auto Flags = getDecodedGVSummaryFlags(RawFlags, Version); - int CallGraphEdgeStartIndex = RefListStartIndex + NumRefs; - assert(Record.size() >= RefListStartIndex + NumRefs && + + uint64_t FuncSpecStartIndex = RefListStartIndex + NumRefs; + uint64_t CallGraphEdgeStartIndex = FuncSpecStartIndex + NumBaseBonus; + assert(Record.size() >= CallGraphEdgeStartIndex && "Record size inconsistent with number of references"); std::vector Refs = makeRefList( ArrayRef(Record).slice(RefListStartIndex, NumRefs)); + FuncSpecCostInfo FSCI = makeSpecCostInfo( + ArrayRef(Record).slice(FuncSpecStartIndex, NumBaseBonus), + SpecCost); bool HasProfile = (BitCode == bitc::FS_COMBINED_PROFILE); std::vector Edges = makeCallList( ArrayRef(Record).slice(CallGraphEdgeStartIndex), - IsOldProfileFormat, HasProfile, false); + Version, IsOldProfileFormat, HasProfile, false); ValueInfo VI = getValueInfoFromValueId(ValueID).first; setSpecialRefs(Refs, NumRORefs, NumWORefs); auto FS = std::make_unique( @@ -6345,7 +6405,7 @@ std::move(PendingTypeCheckedLoadVCalls), std::move(PendingTypeTestAssumeConstVCalls), std::move(PendingTypeCheckedLoadConstVCalls), - std::move(PendingParamAccesses)); + std::move(PendingParamAccesses), std::move(FSCI)); LastSeenSummary = FS.get(); LastSeenGUID = VI.getGUID(); FS->setModulePath(ModuleIdMap[ModuleId]); Index: llvm/lib/Bitcode/Writer/BitcodeWriter.cpp =================================================================== --- llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -3823,14 +3823,28 @@ NameVals.push_back(FS->refs().size()); NameVals.push_back(SpecialRefCnts.first); // rorefcnt NameVals.push_back(SpecialRefCnts.second); // worefcnt + NameVals.push_back(FS->getSpecializeCost()); + auto BonusBases = FS->getSpecBonusBase(); + NameVals.push_back(BonusBases.size() * 2); for (auto &RI : FS->refs()) NameVals.push_back(VE.getValueID(RI.getValue())); + for (auto &I : BonusBases) { + NameVals.push_back(I.first); + NameVals.push_back(I.second); + } + bool HasProfileData = F.hasProfileData() || ForceSummaryEdgesCold != FunctionSummary::FSHT_None; for (auto &ECI : FS->calls()) { NameVals.push_back(getValueId(ECI.first)); + NameVals.push_back(ECI.second.getUsagesSize() * 2); + auto Uses = ECI.second.getUsages(); + for (auto &IndexValuePair : Uses) { + NameVals.push_back(IndexValuePair.first); + NameVals.push_back(IndexValuePair.second); + } if (HasProfileData) NameVals.push_back(static_cast(ECI.second.Hotness)); else if (WriteRelBFToSummary) @@ -4236,6 +4250,9 @@ NameVals.push_back(0); // numrefs NameVals.push_back(0); // rorefcnt NameVals.push_back(0); // worefcnt + NameVals.push_back(FS->getSpecializeCost()); + auto BonusBases = FS->getSpecBonusBase(); + NameVals.push_back(BonusBases.size() * 2); unsigned Count = 0, RORefCnt = 0, WORefCnt = 0; for (auto &RI : FS->refs()) { @@ -4253,6 +4270,11 @@ NameVals[7] = RORefCnt; NameVals[8] = WORefCnt; + for (auto &I : BonusBases) { + NameVals.push_back(I.first); + NameVals.push_back(I.second); + } + bool HasProfileData = false; for (auto &EI : FS->calls()) { HasProfileData |= @@ -4268,6 +4290,12 @@ if (!CallValueId) continue; NameVals.push_back(*CallValueId); + NameVals.push_back(EI.second.getUsagesSize() * 2); + auto Uses = EI.second.getUsages(); + for (auto &IndexValuePair : Uses) { + NameVals.push_back(IndexValuePair.first); + NameVals.push_back(IndexValuePair.second); + } if (HasProfileData) NameVals.push_back(static_cast(EI.second.Hotness)); } Index: llvm/lib/IR/AsmWriter.cpp =================================================================== --- llvm/lib/IR/AsmWriter.cpp +++ llvm/lib/IR/AsmWriter.cpp @@ -3211,6 +3211,9 @@ Out << ", alwaysInline: " << FFlags.AlwaysInline; Out << ")"; } + + /// FIXME: Add writer for new infomation added for function specialization. + if (!FS->calls().empty()) { Out << ", calls: ("; FieldSeparator IFS; Index: llvm/lib/LTO/ThinLTOCodeGenerator.cpp =================================================================== --- llvm/lib/LTO/ThinLTOCodeGenerator.cpp +++ llvm/lib/LTO/ThinLTOCodeGenerator.cpp @@ -541,7 +541,7 @@ { raw_svector_ostream OS(OutputBuffer); ProfileSummaryInfo PSI(TheModule); - auto Index = buildModuleSummaryIndex(TheModule, nullptr, &PSI); + auto Index = buildModuleSummaryIndex(TheModule, nullptr, nullptr, &PSI); WriteBitcodeToFile(TheModule, OS, true, &Index); } return std::make_unique(std::move(OutputBuffer)); Index: llvm/lib/Transforms/IPO/FunctionImport.cpp =================================================================== --- llvm/lib/Transforms/IPO/FunctionImport.cpp +++ llvm/lib/Transforms/IPO/FunctionImport.cpp @@ -143,6 +143,12 @@ ImportAllIndex("import-all-index", cl::desc("Import all external functions in index.")); +static cl::opt + UseFuncSpecCostInfo("import-use-func-spec-info", + cl::desc("Considering function specialization cost " + "infomation when importing functions.\n"), + cl::Hidden, cl::init(true)); + // Load lazily a module from \p FileName in \p Context. static std::unique_ptr loadFile(const std::string &FileName, LLVMContext &Context) { @@ -171,12 +177,11 @@ /// number of source modules parsed/linked. /// - One that has PGO data attached. /// - [insert you fancy metric here] -static const GlobalValueSummary * -selectCallee(const ModuleSummaryIndex &Index, - ArrayRef> CalleeSummaryList, - unsigned Threshold, StringRef CallerModulePath, - FunctionImporter::ImportFailureReason &Reason, - GlobalValue::GUID GUID) { +static const GlobalValueSummary *selectCallee( + const ModuleSummaryIndex &Index, + ArrayRef> CalleeSummaryList, + const CalleeInfo &CI, unsigned Threshold, StringRef CallerModulePath, + FunctionImporter::ImportFailureReason &Reason, GlobalValue::GUID GUID) { Reason = FunctionImporter::ImportFailureReason::None; auto It = llvm::find_if( CalleeSummaryList, @@ -230,22 +235,32 @@ return false; } - if ((Summary->instCount() > Threshold) && - !Summary->fflags().AlwaysInline && !ForceImportAll) { - Reason = FunctionImporter::ImportFailureReason::TooLarge; - return false; - } + Reason = [&](FunctionSummary *Summary) { + if ((Summary->instCount() > Threshold) && + !Summary->fflags().AlwaysInline && !ForceImportAll) + return FunctionImporter::ImportFailureReason::TooLarge; - // Skip if it isn't legal to import (e.g. may reference unpromotable - // locals). - if (Summary->notEligibleToImport()) { - Reason = FunctionImporter::ImportFailureReason::NotEligible; + // Skip if it isn't legal to import (e.g. may reference unpromotable + // locals). + if (Summary->notEligibleToImport()) + return FunctionImporter::ImportFailureReason::NotEligible; + + // Don't bother importing if we can't inline it anyway. + if (Summary->fflags().NoInline && !ForceImportAll) + return FunctionImporter::ImportFailureReason::NoInline; + + return FunctionImporter::ImportFailureReason::None; + }(Summary); + + if (Reason == FunctionImporter::ImportFailureReason::None) + return true; + + if (!UseFuncSpecCostInfo) return false; - } - // Don't bother importing if we can't inline it anyway. - if (Summary->fflags().NoInline && !ForceImportAll) { - Reason = FunctionImporter::ImportFailureReason::NoInline; + if (!Summary->shouldImport(CI)) { + /// FIXME: add new failure type. + Reason = FunctionImporter::ImportFailureReason::NotEligible; return false; } @@ -469,8 +484,9 @@ } FunctionImporter::ImportFailureReason Reason; - CalleeSummary = selectCallee(Index, VI.getSummaryList(), NewThreshold, - Summary.modulePath(), Reason, VI.getGUID()); + CalleeSummary = + selectCallee(Index, VI.getSummaryList(), Edge.second, NewThreshold, + Summary.modulePath(), Reason, VI.getGUID()); if (!CalleeSummary) { // Update with new larger threshold if this was a retry (otherwise // we would have already inserted with NewThreshold above). Also @@ -511,6 +527,7 @@ CalleeSummary = CalleeSummary->getBaseObject(); ResolvedCalleeSummary = cast(CalleeSummary); + /// FIXME: add cheap condition for assert function specialization. assert((ResolvedCalleeSummary->fflags().AlwaysInline || ForceImportAll || (ResolvedCalleeSummary->instCount() <= NewThreshold)) && "selectCallee() didn't honor the threshold"); Index: llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp =================================================================== --- llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp +++ llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -231,7 +231,8 @@ // regular LTO module with an index for summary-based dead stripping. ProfileSummaryInfo PSI(M); M.addModuleFlag(Module::Error, "ThinLTO", uint32_t(0)); - ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, &PSI); + ModuleSummaryIndex Index = + buildModuleSummaryIndex(M, nullptr, nullptr, &PSI); WriteBitcodeToFile(M, OS, /*ShouldPreserveUseListOrder=*/false, &Index); if (ThinLinkOS) @@ -418,13 +419,13 @@ // FIXME: Try to re-use BSI and PFI from the original module here. ProfileSummaryInfo PSI(M); - ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, &PSI); + ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, nullptr, &PSI); // Mark the merged module as requiring full LTO. We still want an index for // it though, so that it can participate in summary-based dead stripping. MergedM->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0)); ModuleSummaryIndex MergedMIndex = - buildModuleSummaryIndex(*MergedM, nullptr, &PSI); + buildModuleSummaryIndex(*MergedM, nullptr, nullptr, &PSI); SmallVector Buffer; @@ -497,7 +498,7 @@ // buildModuleSummaryIndex when Module(s) are ready. ProfileSummaryInfo PSI(M); NewIndex = std::make_unique( - buildModuleSummaryIndex(M, nullptr, &PSI)); + buildModuleSummaryIndex(M, nullptr, nullptr, &PSI)); Index = NewIndex.get(); } } Index: llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp =================================================================== --- llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -1121,7 +1121,7 @@ // to better ensure we have the opportunity to inline them. bool IsExported = false; auto &S = Callee.getSummaryList()[0]; - CalleeInfo CI(CalleeInfo::HotnessType::Hot, /* RelBF = */ 0); + CalleeInfo CI(CalleeInfo::HotnessType::Hot, /* RelBF = */ 0, ArgUsage()); auto AddCalls = [&](CallSiteInfo &CSInfo) { for (auto *FS : CSInfo.SummaryTypeCheckedLoadUsers) { FS->addCall({Callee, CI}); Index: llvm/test/Bitcode/summary_version.ll =================================================================== --- llvm/test/Bitcode/summary_version.ll +++ llvm/test/Bitcode/summary_version.ll @@ -2,7 +2,7 @@ ; RUN: opt -module-summary %s -o - | llvm-bcanalyzer -dump | FileCheck %s ; CHECK: +; CHECK: Index: llvm/test/Bitcode/thinlto-alias.ll =================================================================== --- llvm/test/Bitcode/thinlto-alias.ll +++ llvm/test/Bitcode/thinlto-alias.ll @@ -21,7 +21,7 @@ ; CHECK-NEXT: +; CHECK-NEXT: ; CHECK-NEXT: ; CHECK-NEXT: @@ -35,7 +35,7 @@ ; COMBINED-NEXT: ; COMBINED-NEXT: -; COMBINED-NEXT: +; COMBINED-NEXT: ; COMBINED-NEXT: Index: llvm/test/Bitcode/thinlto-alias2.ll =================================================================== --- llvm/test/Bitcode/thinlto-alias2.ll +++ llvm/test/Bitcode/thinlto-alias2.ll @@ -5,7 +5,7 @@ ; CHECK: +; CHECK-NEXT: ; CHECK-NEXT: ; CHECK-NEXT: Index: llvm/test/Bitcode/thinlto-function-summary-callgraph-cast.ll =================================================================== --- llvm/test/Bitcode/thinlto-function-summary-callgraph-cast.ll +++ llvm/test/Bitcode/thinlto-function-summary-callgraph-cast.ll @@ -7,9 +7,9 @@ ; CHECK-NEXT: +; CHECK-NEXT: +; CHECK-NEXT: ; CHECK-NEXT: Index: llvm/test/Bitcode/thinlto-function-summary-callgraph-pgo.ll =================================================================== --- llvm/test/Bitcode/thinlto-function-summary-callgraph-pgo.ll +++ llvm/test/Bitcode/thinlto-function-summary-callgraph-pgo.ll @@ -18,7 +18,7 @@ ; CHECK-NEXT: +; CHECK-NEXT: ; CHECK-NEXT: ; CHECK-NEXT: ; CHECK: +; COMBINED-NEXT: ; COMBINED-NEXT: ; COMBINED-NEXT: Index: llvm/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll =================================================================== --- llvm/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll +++ llvm/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll @@ -49,7 +49,7 @@ ; CHECK-NEXT: ; op4=hot1 op6=cold op8=hot2 op10=hot4 op12=none1 op14=hot3 op16=none2 op18=none3 op20=123 -; CHECK-NEXT: +; CHECK-NEXT: ; CHECK-NEXT: ; CHECK-NEXT: @@ -73,7 +73,7 @@ ; COMBINED-NEXT: +; COMBINED-NEXT: ; COMBINED-NEXT: ; COMBINED-NEXT: Index: llvm/test/Bitcode/thinlto-function-summary-callgraph-relbf.ll =================================================================== --- llvm/test/Bitcode/thinlto-function-summary-callgraph-relbf.ll +++ llvm/test/Bitcode/thinlto-function-summary-callgraph-relbf.ll @@ -14,7 +14,7 @@ ; CHECK-NEXT: ; CHECK-NEXT: ; CHECK: ; op4=none1 op6=hot1 op8=cold1 op10=none2 op12=hot2 op14=cold2 op16=none3 op18=hot3 op20=cold3 op22=123 -; CHECK-NEXT: +; CHECK-NEXT: ; CHECK-NEXT: ; CHECK-NEXT: @@ -60,7 +60,7 @@ ; COMBINED-NEXT: +; COMBINED-NEXT: ; COMBINED-NEXT: ; COMBINED-NEXT: Index: llvm/test/Bitcode/thinlto-function-summary-callgraph.ll =================================================================== --- llvm/test/Bitcode/thinlto-function-summary-callgraph.ll +++ llvm/test/Bitcode/thinlto-function-summary-callgraph.ll @@ -35,7 +35,7 @@ ; COMBINED-NEXT: +; COMBINED-NEXT: ; COMBINED-NEXT: ; COMBINED-NEXT: Index: llvm/test/Bitcode/thinlto-function-summary-refgraph.ll =================================================================== --- llvm/test/Bitcode/thinlto-function-summary-refgraph.ll +++ llvm/test/Bitcode/thinlto-function-summary-refgraph.ll @@ -41,24 +41,24 @@ ; CHECK: +; CHECK-DAG: ; Function W contains a call to func3 as well as a reference to globalvar: ; op0=W op4=globalvar op5=func3 -; CHECK-DAG: +; CHECK-DAG: +; CHECK-DAG: +; CHECK-DAG: ; Function Z contains call to func2, and ensures we don't incorrectly add ; a reference to it when reached while analyzing subsequent use of its return ; value: ; op0=Z op4=func2 -; CHECK-DAG: +; CHECK-DAG: ; Variable bar initialization contains address reference to func: ; op0=bar op2=func ; CHECK-DAG: