Index: include/llvm/IR/ModuleSummaryIndex.h =================================================================== --- include/llvm/IR/ModuleSummaryIndex.h +++ include/llvm/IR/ModuleSummaryIndex.h @@ -17,6 +17,7 @@ #define LLVM_IR_MODULESUMMARYINDEX_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" @@ -278,11 +279,14 @@ /// within the same linkage unit. unsigned DSOLocal : 1; + /// Indicates that global isn't modified by any live function + unsigned Constant : 1; + /// Convenience Constructors explicit GVFlags(GlobalValue::LinkageTypes Linkage, bool NotEligibleToImport, bool Live, bool IsLocal) : Linkage(Linkage), NotEligibleToImport(NotEligibleToImport), - Live(Live), DSOLocal(IsLocal) {} + Live(Live), DSOLocal(IsLocal), Constant(false) {} }; private: @@ -363,6 +367,10 @@ bool isDSOLocal() const { return Flags.DSOLocal; } + void setConstant(bool Constant) { Flags.Constant = Constant; } + + bool isConstant() const { return Flags.Constant; } + /// Flag that this global value cannot be imported. void setNotEligibleToImport() { Flags.NotEligibleToImport = true; } @@ -517,6 +525,10 @@ /// List of call edge pairs from this function. std::vector CallGraphEdgeList; + // Number of "immutable" ref edges in RefEdgeList. Such refs come from + // non-volatile load instructions. + unsigned ImmutableRefCnt; + std::unique_ptr TIdInfo; public: @@ -529,7 +541,8 @@ std::vector TypeCheckedLoadConstVCalls) : GlobalValueSummary(FunctionKind, Flags, std::move(Refs)), InstCount(NumInsts), FunFlags(FunFlags), - CallGraphEdgeList(std::move(CGEdges)) { + CallGraphEdgeList(std::move(CGEdges)), + ImmutableRefCnt(0) { if (!TypeTests.empty() || !TypeTestAssumeVCalls.empty() || !TypeCheckedLoadVCalls.empty() || !TypeTestAssumeConstVCalls.empty() || !TypeCheckedLoadConstVCalls.empty()) @@ -540,6 +553,19 @@ std::move(TypeCheckedLoadConstVCalls)}); } + // Sets the number of immutable refs in RefEdgeList + void setImmutableRefCount(unsigned Cnt) { + assert(Cnt <= refs().size()); + ImmutableRefCnt = Cnt; + } + + // Gets the number of immutable refs in RefEdgeList + unsigned immutableRefCount() const { return ImmutableRefCnt; } + + // Checks if given ref edge is immutable. The VI parameter + // must point to some edge in RefEdgeList + bool isImmutableRef(const ValueInfo *VI) const; + /// Check if this is a function summary. static bool classof(const GlobalValueSummary *GVS) { return GVS->getSummaryKind() == FunctionKind; @@ -813,6 +839,10 @@ .first; } + void propagateConstantsForFunction(FunctionSummary *FS); + void collectMutableGVSummaries(GlobalValueSummary *Root, + DenseSet &MutableGVSet); + public: // See HaveGVs variable comment. ModuleSummaryIndex(bool HaveGVs) : HaveGVs(HaveGVs), Saver(Alloc) {} @@ -1131,6 +1161,9 @@ /// Print out strongly connected components for debugging. void dumpSCCs(raw_ostream &OS); + + /// Analyze index and detect unmodified globals + void propagateConstants(const DenseSet &PreservedSymbols); }; /// GraphTraits definition to build SCC for the index Index: include/llvm/Transforms/IPO/FunctionImport.h =================================================================== --- include/llvm/Transforms/IPO/FunctionImport.h +++ include/llvm/Transforms/IPO/FunctionImport.h @@ -172,6 +172,13 @@ const DenseSet &GUIDPreservedSymbols, function_ref isPrevailing); +/// Compute dead symbols and run constant propagation in combined index +/// after that. +void computeDeadSymbolsWithConstProp( + ModuleSummaryIndex &Index, + const DenseSet &GUIDPreservedSymbols, + function_ref isPrevailing); + /// Converts value \p GV to declaration, or replaces with a declaration if /// it is an alias. Returns true if converted, false if replaced. bool convertToDeclaration(GlobalValue &GV); Index: include/llvm/Transforms/Utils/FunctionImportUtils.h =================================================================== --- include/llvm/Transforms/Utils/FunctionImportUtils.h +++ include/llvm/Transforms/Utils/FunctionImportUtils.h @@ -114,6 +114,8 @@ Module &M, const ModuleSummaryIndex &Index, SetVector *GlobalsToImport = nullptr); +// Checks if GV can be imported to another module +bool canImportGV(GlobalValueSummary *GVS); } // End llvm namespace #endif Index: lib/Analysis/ModuleSummaryAnalysis.cpp =================================================================== --- lib/Analysis/ModuleSummaryAnalysis.cpp +++ lib/Analysis/ModuleSummaryAnalysis.cpp @@ -220,6 +220,13 @@ } } +static bool isNonVolatileLoad(const Instruction *I) { + if (const auto *LI = dyn_cast(I)) + return !LI->isVolatile(); + + return false; +} + static void computeFunctionSummary( ModuleSummaryIndex &Index, const Module &M, const Function &F, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, DominatorTree &DT, @@ -244,6 +251,7 @@ // Add personality function, prefix data and prologue data to function's ref // list. findRefEdges(Index, &F, RefEdges, Visited); + std::vector NonVolatileLoads; bool HasInlineAsmMaybeReferencingInternal = false; for (const BasicBlock &BB : F) @@ -251,6 +259,13 @@ if (isa(I)) continue; ++NumInsts; + if (isNonVolatileLoad(&I)) { + // Postpone processing of non-volatile load instructions + // See comments below + Visited.insert(&I); + NonVolatileLoads.push_back(&I); + continue; + } findRefEdges(Index, &I, RefEdges, Visited); auto CS = ImmutableCallSite(&I); if (!CS) @@ -340,6 +355,18 @@ } } + // By now we processed all instructions in a function, except + // non-volatile loads. All new refs we add in a loop below + // are obviously constant. All constant refs are grouped in the + // end of RefEdges vector, so we can use a single integer value + // to identify them. + unsigned MutableRefCnt = RefEdges.size(); + for (const Instruction *I : NonVolatileLoads) { + Visited.erase(I); + findRefEdges(Index, I, RefEdges, Visited); + } + unsigned ImmutableRefCnt = RefEdges.size() - MutableRefCnt; + // Explicit add hot edges to enforce importing for designated GUIDs for // sample PGO, to enable the same inlines as the profiled optimized binary. for (auto &I : F.getImportGUIDs()) @@ -372,6 +399,7 @@ TypeCheckedLoadConstVCalls.takeVector()); if (NonRenamableLocal) CantBePromoted.insert(F.getGUID()); + FuncSummary->setImmutableRefCount(ImmutableRefCnt); Index.addGlobalValueSummary(F, std::move(FuncSummary)); } Index: lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- lib/Bitcode/Reader/BitcodeReader.cpp +++ lib/Bitcode/Reader/BitcodeReader.cpp @@ -5186,9 +5186,9 @@ } const uint64_t Version = Record[0]; const bool IsOldProfileFormat = Version == 1; - if (Version < 1 || Version > 4) + if (Version < 1 || Version > 5) return error("Invalid summary version " + Twine(Version) + - ", 1, 2, 3 or 4 expected"); + ", 1, 2, 3, 4 or 5 expected"); Record.clear(); // Keep around the last seen summary to be used when we see an optional @@ -5267,11 +5267,16 @@ unsigned InstCount = Record[2]; uint64_t RawFunFlags = 0; unsigned NumRefs = Record[3]; + unsigned NumImmutableRefs = 0; int RefListStartIndex = 4; if (Version >= 4) { RawFunFlags = Record[3]; NumRefs = Record[4]; RefListStartIndex = 5; + if (Version >= 5) { + NumImmutableRefs = Record[5]; + RefListStartIndex = 6; + } } auto Flags = getDecodedGVSummaryFlags(RawFlags, Version); @@ -5305,6 +5310,7 @@ auto VIAndOriginalGUID = getValueInfoFromValueId(ValueID); FS->setModulePath(getThisModule()->first()); FS->setOriginalName(VIAndOriginalGUID.second); + FS->setImmutableRefCount(NumImmutableRefs); TheIndex.addGlobalValueSummary(VIAndOriginalGUID.first, std::move(FS)); break; } @@ -5364,12 +5370,17 @@ unsigned InstCount = Record[3]; uint64_t RawFunFlags = 0; unsigned NumRefs = Record[4]; + unsigned NumImmutableRefs = 0; int RefListStartIndex = 5; if (Version >= 4) { RawFunFlags = Record[4]; NumRefs = Record[5]; RefListStartIndex = 6; + if (Version >= 5) { + NumImmutableRefs = Record[6]; + RefListStartIndex = 7; + } } auto Flags = getDecodedGVSummaryFlags(RawFlags, Version); @@ -5398,6 +5409,7 @@ LastSeenSummary = FS.get(); LastSeenGUID = VI.getGUID(); FS->setModulePath(ModuleIdMap[ModuleId]); + FS->setImmutableRefCount(NumImmutableRefs); TheIndex.addGlobalValueSummary(VI, std::move(FS)); break; } Index: lib/Bitcode/Writer/BitcodeWriter.cpp =================================================================== --- lib/Bitcode/Writer/BitcodeWriter.cpp +++ lib/Bitcode/Writer/BitcodeWriter.cpp @@ -3488,6 +3488,7 @@ NameVals.push_back(FS->instCount()); NameVals.push_back(getEncodedFFlags(FS->fflags())); NameVals.push_back(FS->refs().size()); + NameVals.push_back(FS->immutableRefCount()); for (auto &RI : FS->refs()) NameVals.push_back(VE.getValueID(RI.getValue())); @@ -3545,7 +3546,7 @@ // Current version for the summary. // This is bumped whenever we introduce changes in the way some record are // interpreted, like flags for instance. -static const uint64_t INDEX_VERSION = 4; +static const uint64_t INDEX_VERSION = 5; /// Emit the per-module summary section alongside the rest of /// the module's bitcode. @@ -3580,6 +3581,7 @@ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // immutablerefcnt // numrefs x valueid, n x (valueid, hotness) Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); @@ -3596,6 +3598,7 @@ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // immutablerefcnt // numrefs x valueid, n x (valueid [, rel_block_freq]) Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); @@ -3690,6 +3693,7 @@ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // immutablerefcnt // numrefs x valueid, n x (valueid) Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); @@ -3704,6 +3708,7 @@ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // immutablerefcnt // numrefs x valueid, n x (valueid, hotness) Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); @@ -3801,17 +3806,21 @@ NameVals.push_back(FS->instCount()); NameVals.push_back(getEncodedFFlags(FS->fflags())); // Fill in below - NameVals.push_back(0); + NameVals.push_back(0); // numrefs + NameVals.push_back(0); // immutablerefcnt - unsigned Count = 0; + unsigned Count = 0, ImmutableRefCnt = 0; for (auto &RI : FS->refs()) { auto RefValueId = getValueId(RI.getGUID()); if (!RefValueId) continue; NameVals.push_back(*RefValueId); + if (FS->isImmutableRef(&RI)) + ImmutableRefCnt++; Count++; } NameVals[5] = Count; + NameVals[6] = ImmutableRefCnt; bool HasProfileData = false; for (auto &EI : FS->calls()) { Index: lib/IR/ModuleSummaryIndex.cpp =================================================================== --- lib/IR/ModuleSummaryIndex.cpp +++ lib/IR/ModuleSummaryIndex.cpp @@ -30,6 +30,14 @@ }); } +bool FunctionSummary::isImmutableRef(const ValueInfo *VI) const { + ArrayRef Refs = refs(); + + assert(VI >= &Refs.front() && VI <= &Refs.back()); + unsigned RefNo = VI - &Refs.front(); + return RefNo >= Refs.size() - ImmutableRefCnt; +} + // Collect for the given module the list of function it defines // (GUID -> Summary). void ModuleSummaryIndex::collectDefinedFunctionsForModule( @@ -84,6 +92,76 @@ return false; } +void ModuleSummaryIndex::propagateConstantsForFunction(FunctionSummary *FS) { + for (auto &VI : FS->refs()) + for (auto &Summary : VI.getSummaryList()) + if (!FS->isImmutableRef(&VI)) + Summary->getBaseObject()->setConstant(false); +} + +void ModuleSummaryIndex::collectMutableGVSummaries( + GlobalValueSummary *RootGVS, DenseSet &MutableGVSet) { + if (!RootGVS->isLive() || RootGVS->isConstant()) + return; + + SmallVector Worklist; + if (MutableGVSet.insert(RootGVS).second) + Worklist.push_back(RootGVS); + + while (!Worklist.empty()) { + auto *GVS = Worklist.pop_back_val(); + for (auto &VI : GVS->refs()) + for (auto &Summary : VI.getSummaryList()) { + GlobalValueSummary *S = Summary->getBaseObject(); + if (S->getSummaryKind() == GlobalValueSummary::GlobalVarKind && + MutableGVSet.insert(S).second) + Worklist.push_back(S); + } + } +} + +void ModuleSummaryIndex::propagateConstants( + const DenseSet &GUIDPreservedSymbols) { + auto ForEachLiveSummary = + [&](llvm::function_ref F) { + for (auto &VI : *this) + for (auto &Summary : VI.second.SummaryList) + if (Summary->isLive()) + F(VI.first, Summary.get()); + }; + // Step 1: Find immutable global value summaries, which we can later + // internalize. Preserved symbols are visible externally, so we don't + // consider them constant + ForEachLiveSummary([&](GlobalValue::GUID Id, GlobalValueSummary *S) { + S->setConstant(S->getSummaryKind() == GlobalValueSummary::GlobalVarKind && + !GUIDPreservedSymbols.count(Id)); + }); + + // Step 2: for each function summary check its refs. If ref is not constant + // then referenced summary list is not constant either. + ForEachLiveSummary([this](GlobalValue::GUID, GlobalValueSummary *S) { + if (auto *FS = dyn_cast(S)) + propagateConstantsForFunction(FS); + }); + + // Step 3: for each global variable check if it is still constant. If it isn't + // then all its refs are not constant either. + DenseSet MutableGVSet; + ForEachLiveSummary([&](GlobalValue::GUID, GlobalValueSummary *S) { + if (S->getSummaryKind() == GlobalValueSummary::GlobalVarKind) + collectMutableGVSummaries(S, MutableGVSet); + }); + + for (auto *Summary : MutableGVSet) + Summary->setConstant(false); + + // Step 4: aliases inherit constant attribute from aliasee. + ForEachLiveSummary([](GlobalValue::GUID, GlobalValueSummary *S) { + if (auto *AS = dyn_cast(S)) + AS->setConstant(AS->getAliasee().isConstant()); + }); +} + // TODO: write a graphviz dumper for SCCs (see ModuleSummaryIndex::exportToDot) // then delete this function and update its tests LLVM_DUMP_METHOD @@ -108,6 +186,7 @@ struct Attributes { void add(const Twine &Name, const Twine &Value, const Twine &Comment = Twine()); + void addComment(const Twine &Comment); std::string getAsString() const; std::vector Attrs; @@ -129,6 +208,10 @@ A += Value.str(); A += "\""; Attrs.push_back(A); + addComment(Comment); +} + +void Attributes::addComment(const Twine &Comment) { if (!Comment.isTriviallyEmpty()) { if (Comments.empty()) Comments = " // "; @@ -227,6 +310,14 @@ << "\"]; // defined externally\n"; } +static bool isReadOnlyRef(GlobalValueSummary *GVS, const ValueInfo &Ref) { + auto *FS = dyn_cast(GVS); + if (!FS) + return false; + + return FS->isImmutableRef(&Ref); +} + void ModuleSummaryIndex::exportToDot(raw_ostream& OS) const { std::vector CrossModuleEdges; DenseMap> NodeMap; @@ -243,12 +334,15 @@ auto DrawEdge = [&](const char *Pfx, int SrcMod, GlobalValue::GUID SrcId, int DstMod, GlobalValue::GUID DstId, int TypeOrHotness) { - // 0 corresponds to alias edge, 1 to ref edge, 2 to call with unknown - // hotness, ... - TypeOrHotness += 2; + // 0 - alias + // 1 - reference + // 2 - constant reference + // Other value: (hotness - 3). + TypeOrHotness += 3; static const char *EdgeAttrs[] = { " [style=dotted]; // alias", " [style=dashed]; // ref", + " [style=dashed,color=forestgreen]; // const-ref", " // call (hotness : Unknown)", " [color=blue]; // call (hotness : Cold)", " // call (hotness : None)", @@ -291,6 +385,8 @@ A.add("shape", "box"); } else { A.add("shape", "Mrecord", "variable"); + if (Flags.Live && SummaryIt.second->isConstant()) + A.addComment("immutable"); } auto VI = getValueInfo(SummaryIt.first); @@ -308,13 +404,13 @@ for (auto &SummaryIt : GVSMap) { auto *GVS = SummaryIt.second; for (auto &R : GVS->refs()) - Draw(SummaryIt.first, R.getGUID(), -1); + Draw(SummaryIt.first, R.getGUID(), isReadOnlyRef(GVS, R) ? -1 : -2); if (auto *AS = dyn_cast_or_null(SummaryIt.second)) { auto AliaseeOrigId = AS->getAliasee().getOriginalName(); auto AliaseeId = getGUIDFromOriginalID(AliaseeOrigId); - Draw(SummaryIt.first, AliaseeId ? AliaseeId : AliaseeOrigId, -2); + Draw(SummaryIt.first, AliaseeId ? AliaseeId : AliaseeOrigId, -3); continue; } Index: lib/LTO/LTO.cpp =================================================================== --- lib/LTO/LTO.cpp +++ lib/LTO/LTO.cpp @@ -178,6 +178,7 @@ auto AddUsedThings = [&](GlobalValueSummary *GS) { if (!GS) return; AddUnsigned(GS->isLive()); + AddUnsigned(GS->isConstant()); for (const ValueInfo &VI : GS->refs()) { AddUnsigned(VI.isDSOLocal()); AddUsedCfiGlobal(VI.getGUID()); @@ -194,9 +195,10 @@ for (auto &TT : FS->type_checked_load_const_vcalls()) UsedTypeIds.insert(TT.VFunc.GUID); for (auto &ET : FS->calls()) { - AddUnsigned(ET.first.isDSOLocal()); + AddUnsigned(ET.first.isDSOLocal()); AddUsedCfiGlobal(ET.first.getGUID()); } + AddUnsigned(FS->immutableRefCount()); } }; @@ -797,7 +799,8 @@ return PrevailingType::Unknown; return It->second; }; - computeDeadSymbols(ThinLTO.CombinedIndex, GUIDPreservedSymbols, isPrevailing); + computeDeadSymbolsWithConstProp(ThinLTO.CombinedIndex, GUIDPreservedSymbols, + isPrevailing); // Setup output file to emit statistics. std::unique_ptr StatsFile = nullptr; Index: lib/LTO/ThinLTOCodeGenerator.cpp =================================================================== --- lib/LTO/ThinLTOCodeGenerator.cpp +++ lib/LTO/ThinLTOCodeGenerator.cpp @@ -646,7 +646,7 @@ auto isPrevailing = [&](GlobalValue::GUID G) { return PrevailingType::Unknown; }; - computeDeadSymbols(Index, GUIDPreservedSymbols, isPrevailing); + computeDeadSymbolsWithConstProp(Index, GUIDPreservedSymbols, isPrevailing); } /** Index: lib/Linker/IRMover.cpp =================================================================== --- lib/Linker/IRMover.cpp +++ lib/Linker/IRMover.cpp @@ -1062,11 +1062,6 @@ ValueMap.MD()[CU->getRawEnumTypes()].reset(nullptr); ValueMap.MD()[CU->getRawMacros()].reset(nullptr); ValueMap.MD()[CU->getRawRetainedTypes()].reset(nullptr); - // We import global variables only temporarily in order for instcombine - // and globalopt to perform constant folding and static constructor - // evaluation. After that elim-avail-extern will covert imported globals - // back to declarations, so we don't need debug info for them. - ValueMap.MD()[CU->getRawGlobalVariables()].reset(nullptr); // Imported entities only need to be mapped in if they have local // scope, as those might correspond to an imported entity inside a Index: lib/Transforms/IPO/FunctionImport.cpp =================================================================== --- lib/Transforms/IPO/FunctionImport.cpp +++ lib/Transforms/IPO/FunctionImport.cpp @@ -278,9 +278,7 @@ for (auto &RefSummary : VI.getSummaryList()) if (RefSummary->getSummaryKind() == GlobalValueSummary::GlobalVarKind && - !RefSummary->notEligibleToImport() && - !GlobalValue::isInterposableLinkage(RefSummary->linkage()) && - RefSummary->refs().empty()) { + canImportGV(RefSummary.get())) { ImportList[RefSummary->modulePath()].insert(VI.getGUID()); if (ExportLists) (*ExportLists)[RefSummary->modulePath()].insert(VI.getGUID()); @@ -792,6 +790,15 @@ NumLiveSymbols += LiveSymbols; } +// Compute dead symbols and propagate constants in combined index. +void llvm::computeDeadSymbolsWithConstProp( + ModuleSummaryIndex &Index, + const DenseSet &GUIDPreservedSymbols, + function_ref isPrevailing) { + computeDeadSymbols(Index, GUIDPreservedSymbols, isPrevailing); + Index.propagateConstants(GUIDPreservedSymbols); +} + /// Compute the set of summaries needed for a ThinLTO backend compilation of /// \p ModulePath. void llvm::gatherImportedSummariesForModule( @@ -986,6 +993,18 @@ return NewFn; } +// Internalize values that we marked with specific attribute +// in processGlobalForThinLTO. +static void internalizeImmutableGVs(Module &M) { + for (auto &GV : M.globals()) + if (auto *GVar = dyn_cast(&GV)) + if (GVar->hasAttribute("thinlto-immutable")) { + assert(!GVar->isDeclaration()); + GVar->setLinkage(GlobalValue::InternalLinkage); + GVar->setVisibility(GlobalValue::DefaultVisibility); + } +} + // Automatically import functions in Module \p DestModule based on the summaries // index. Expected FunctionImporter::importFunctions( @@ -1109,6 +1128,8 @@ NumImportedModules++; } + internalizeImmutableGVs(DestModule); + NumImportedFunctions += (ImportedCount - ImportedGVCount); NumImportedGlobalVars += ImportedGVCount; Index: lib/Transforms/Utils/FunctionImportUtils.cpp =================================================================== --- lib/Transforms/Utils/FunctionImportUtils.cpp +++ lib/Transforms/Utils/FunctionImportUtils.cpp @@ -16,6 +16,39 @@ #include "llvm/IR/InstIterator.h" using namespace llvm; +static bool canBeInternalized(GlobalVariable *GVar) { + // Perform a quick check if we can internalize a global variable + // This is simplified version of what we already have in 'internalize' + // pass. + // Values with local linkage are ok. + if (GVar->hasLocalLinkage()) + return true; + + // We can't internalize a declaration + if (GVar->isDeclaration()) + return false; + + // Ignore comdat members for now (otherwise we have to analyze all members) + if (GVar->hasComdat()) + return false; + + // Ignore everything with appending linkage (llvm.ctors, llvm.dtors, e.t.c), + // available_externally linkage and values exported from dll. + return !(GVar->hasAppendingLinkage() || + GVar->hasAvailableExternallyLinkage() || + GVar->hasDLLExportStorageClass()); +} + +static GlobalValueSummary *getGVSummary(const ModuleSummaryIndex &Index, + const GlobalVariable *GVar) { + auto VI = Index.getValueInfo(GVar->getGUID()); + if (!VI) + return nullptr; + + const auto &SL = VI.getSummaryList(); + return SL.empty() ? nullptr : SL[0].get(); +} + /// Checks if we should import SGV as a definition, otherwise import as a /// declaration. bool FunctionImportGlobalProcessing::doImportAsDefinition( @@ -213,6 +246,15 @@ } } + // Mark read-only values which can be internalized with specific + // attribute. We'll internalize them after import is finished + // See internalizeImmutableGVs. + if (auto *GVar = dyn_cast(&GV)) + if (canBeInternalized(GVar)) + if (auto *GVS = getGVSummary(ImportIndex, GVar)) + if (GVS->isConstant() && canImportGV(GVS)) + GVar->addAttribute("thinlto-immutable"); + bool DoPromote = false; if (GV.hasLocalLinkage() && ((DoPromote = shouldPromoteLocalToGlobal(&GV)) || isPerformingImport())) { @@ -230,7 +272,7 @@ // Remove functions imported as available externally defs from comdats, // as this is a declaration for the linker, and will be dropped eventually. // It is illegal for comdats to contain declarations. - auto *GO = dyn_cast_or_null(&GV); + auto *GO = dyn_cast(&GV); if (GO && GO->isDeclarationForLinker() && GO->hasComdat()) { // The IRMover should not have placed any imported declarations in // a comdat, so the only declaration that should be in a comdat @@ -260,3 +302,8 @@ FunctionImportGlobalProcessing ThinLTOProcessing(M, Index, GlobalsToImport); return ThinLTOProcessing.run(); } + +bool llvm::canImportGV(GlobalValueSummary *GVS) { + return !GlobalValue::isInterposableLinkage(GVS->linkage()) && + !GVS->notEligibleToImport() && GVS->refs().empty(); +} Index: test/Bitcode/summary_version.ll =================================================================== --- test/Bitcode/summary_version.ll +++ test/Bitcode/summary_version.ll @@ -2,7 +2,7 @@ ; RUN: opt -module-summary %s -o - | llvm-bcanalyzer -dump | FileCheck %s ; CHECK: +; CHECK: Index: test/Bitcode/thinlto-alias.ll =================================================================== --- test/Bitcode/thinlto-alias.ll +++ test/Bitcode/thinlto-alias.ll @@ -20,7 +20,7 @@ ; CHECK-NEXT: +; CHECK-NEXT: ; CHECK-NEXT: ; CHECK: ; COMBINED-NEXT: -; COMBINED-NEXT: +; COMBINED-NEXT: ; COMBINED-NEXT: ; CHECK-NEXT: ; CHECK: ; CHECK-NEXT: ; CHECK: &1 | FileCheck %s -; CHECK: D external +; CHECK: no symbols target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" Index: test/ThinLTO/X86/Inputs/globals-import.ll =================================================================== --- test/ThinLTO/X86/Inputs/globals-import.ll +++ test/ThinLTO/X86/Inputs/globals-import.ll @@ -1,9 +1,11 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-pc-linux-gnu" -@baz = internal constant i32 10, align 4 +@baz = internal global i32 10, align 4 define linkonce_odr i32 @foo() { + ; Prevent thinlto from internalizing read-only variable @baz + store i32 20, i32* @baz, align 4 %1 = load i32, i32* @baz, align 4 ret i32 %1 } Index: test/ThinLTO/X86/Inputs/index-const-prop.ll =================================================================== --- test/ThinLTO/X86/Inputs/index-const-prop.ll +++ test/ThinLTO/X86/Inputs/index-const-prop.ll @@ -0,0 +1,64 @@ +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +@gBar = local_unnamed_addr global i32 2, align 4, !dbg !0 +@gFoo = internal unnamed_addr global i32 1, align 4, !dbg !6 + +; Function Attrs: norecurse nounwind readonly +define i32 @foo() local_unnamed_addr #0 !dbg !14 { + %1 = load i32, i32* @gFoo, align 4, !dbg !17 + ret i32 %1, !dbg !18 +} + +; Function Attrs: norecurse nounwind readonly +define i32 @bar() local_unnamed_addr #0 !dbg !19 { + %1 = load i32, i32* @gBar, align 4, !dbg !20 + ret i32 %1, !dbg !21 +} + +define void @baz() local_unnamed_addr !dbg !22 { + %1 = tail call i32 @rand(), !dbg !25 + store i32 %1, i32* @gFoo, align 4, !dbg !26 + %2 = tail call i32 @rand(), !dbg !27 + store i32 %2, i32* @gBar, align 4, !dbg !28 + ret void, !dbg !29 +} + +declare i32 @rand() local_unnamed_addr + +attributes #0 = { norecurse nounwind readonly } + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!9, !10, !11, !12} +!llvm.ident = !{!13} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "gBar", scope: !2, file: !3, line: 4, type: !8, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 7.0.0 (trunk 332246)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5) +!3 = !DIFile(filename: "foo.c", directory: "/data/work/lto/roref/test") +!4 = !{} +!5 = !{!0, !6} +!6 = !DIGlobalVariableExpression(var: !7, expr: !DIExpression()) +!7 = distinct !DIGlobalVariable(name: "gFoo", scope: !2, file: !3, line: 3, type: !8, isLocal: true, isDefinition: true) +!8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!9 = !{i32 2, !"Dwarf Version", i32 4} +!10 = !{i32 2, !"Debug Info Version", i32 3} +!11 = !{i32 1, !"wchar_size", i32 4} +!12 = !{i32 7, !"PIC Level", i32 2} +!13 = !{!"clang version 7.0.0 (trunk 332246)"} +!14 = distinct !DISubprogram(name: "foo", scope: !3, file: !3, line: 6, type: !15, isLocal: false, isDefinition: true, scopeLine: 6, isOptimized: true, unit: !2, retainedNodes: !4) +!15 = !DISubroutineType(types: !16) +!16 = !{!8} +!17 = !DILocation(line: 7, column: 10, scope: !14) +!18 = !DILocation(line: 7, column: 3, scope: !14) +!19 = distinct !DISubprogram(name: "bar", scope: !3, file: !3, line: 10, type: !15, isLocal: false, isDefinition: true, scopeLine: 10, isOptimized: true, unit: !2, retainedNodes: !4) +!20 = !DILocation(line: 11, column: 10, scope: !19) +!21 = !DILocation(line: 11, column: 3, scope: !19) +!22 = distinct !DISubprogram(name: "baz", scope: !3, file: !3, line: 14, type: !23, isLocal: false, isDefinition: true, scopeLine: 14, isOptimized: true, unit: !2, retainedNodes: !4) +!23 = !DISubroutineType(types: !24) +!24 = !{null} +!25 = !DILocation(line: 15, column: 10, scope: !22) +!26 = !DILocation(line: 15, column: 8, scope: !22) +!27 = !DILocation(line: 16, column: 10, scope: !22) +!28 = !DILocation(line: 16, column: 8, scope: !22) +!29 = !DILocation(line: 17, column: 1, scope: !22) Index: test/ThinLTO/X86/dot-dumper.ll =================================================================== --- test/ThinLTO/X86/dot-dumper.ll +++ test/ThinLTO/X86/dot-dumper.ll @@ -20,7 +20,7 @@ ; STRUCTURE-DAG: subgraph cluster_1 ; STRUCTURE: // Cross-module edges: ; STRUCTURE-DAG: M0_{{[0-9]+}} -> M1_{{[0-9]+}} // call -; STRUCTURE-DAG: M0_{{[0-9]+}} -> M1_{{[0-9]+}} [{{.*}}]; // ref +; STRUCTURE-DAG: M0_{{[0-9]+}} -> M1_{{[0-9]+}} [{{.*}}]; // const-ref ; STRUCTURE-NEXT: } ; CLUSTER0: // Module: {{.*}}1.bc @@ -33,13 +33,13 @@ ; CLUSTER1: // Module: {{.*}}2.bc ; CLUSTER1-NEXT: subgraph cluster_1 { -; CLUSTER1-DAG: M1_[[A:[0-9]+]] [{{.*}}A|extern{{.*}}]; // variable +; CLUSTER1-DAG: M1_[[A:[0-9]+]] [{{.*}}A|extern{{.*}}]; // variable, immutable ; CLUSTER1-DAG: M1_[[FOO:[0-9]+]] [{{.*}}foo|extern{{.*}}]; // function, not eligible to import -; CLUSTER1-DAG: M1_[[B:[0-9]+]] [{{.*}}B|extern{{.*}}]; // variable +; CLUSTER1-DAG: M1_[[B:[0-9]+]] [{{.*}}B|extern{{.*}}]; // variable, immutable ; CLUSTER1-DAG: M1_[[BAR:[0-9]+]] [{{.*}}bar|extern{{.*}}]; // function, dead ; CLUSTER1-NEXT: // Edges: -; CLUSTER1-DAG: M1_[[FOO]] -> M1_[[B]] [{{.*}}]; // ref -; CLUSTER1-DAG: M1_[[FOO]] -> M1_[[A]] [{{.*}}]; // ref +; CLUSTER1-DAG: M1_[[FOO]] -> M1_[[B]] [{{.*}}]; // const-ref +; CLUSTER1-DAG: M1_[[FOO]] -> M1_[[A]] [{{.*}}]; // const-ref ; CLUSTER1-DAG: } target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/ThinLTO/X86/globals-import-const-fold.ll =================================================================== --- test/ThinLTO/X86/globals-import-const-fold.ll +++ test/ThinLTO/X86/globals-import-const-fold.ll @@ -2,12 +2,12 @@ ; RUN: opt -module-summary %p/Inputs/globals-import-cf-baz.ll -o %t2.bc ; RUN: llvm-lto -thinlto-action=thinlink %t1.bc %t2.bc -o %t3.index.bc -; RUN: llvm-lto -thinlto-action=import %t1.bc %t2.bc -thinlto-index=%t3.index.bc +; RUN: llvm-lto -thinlto-action=import -exported-symbol=main %t1.bc -thinlto-index=%t3.index.bc ; RUN: llvm-dis %t1.bc.thinlto.imported.bc -o - | FileCheck --check-prefix=IMPORT %s ; RUN: llvm-lto -thinlto-action=optimize %t1.bc.thinlto.imported.bc -o %t1.bc.thinlto.opt.bc ; RUN: llvm-dis %t1.bc.thinlto.opt.bc -o - | FileCheck --check-prefix=OPTIMIZE %s -; IMPORT: @baz = available_externally local_unnamed_addr constant i32 10 +; IMPORT: @baz = internal local_unnamed_addr constant i32 10 ; OPTIMIZE: define i32 @main() ; OPTIMIZE-NEXT: ret i32 10 Index: test/ThinLTO/X86/globals-import.ll =================================================================== --- test/ThinLTO/X86/globals-import.ll +++ test/ThinLTO/X86/globals-import.ll @@ -8,20 +8,20 @@ ; RUN: opt -module-summary %p/Inputs/globals-import.ll -o %t2b.bc ; RUN: llvm-lto -thinlto-action=thinlink %t1.bc %t2.bc %t2b.bc -o %t3.index.bc -; RUN: llvm-lto -thinlto-action=import %t1.bc -thinlto-index=%t3.index.bc +; RUN: llvm-lto -thinlto-action=import %t1.bc -exported-symbol=main -thinlto-index=%t3.index.bc ; RUN: llvm-dis %t1.bc.thinlto.imported.bc -o - | FileCheck --check-prefix=IMPORT %s -; RUN: llvm-lto -thinlto-action=promote %t2.bc -thinlto-index=%t3.index.bc -; RUN: llvm-lto -thinlto-action=promote %t2b.bc -thinlto-index=%t3.index.bc +; RUN: llvm-lto -thinlto-action=promote %t2.bc -exported-symbol=main -thinlto-index=%t3.index.bc +; RUN: llvm-lto -thinlto-action=promote %t2b.bc -exported-symbol=main -thinlto-index=%t3.index.bc ; RUN: llvm-dis %t2.bc.thinlto.promoted.bc -o - | FileCheck --check-prefix=PROMOTE1 %s ; RUN: llvm-dis %t2b.bc.thinlto.promoted.bc -o - | FileCheck --check-prefix=PROMOTE2 %s -; IMPORT: @baz.llvm.0 = available_externally hidden constant i32 10, align 4 +; IMPORT: @baz.llvm.0 = available_externally hidden global i32 10, align 4 -; PROMOTE1: @baz.llvm.0 = hidden constant i32 10, align 4 +; PROMOTE1: @baz.llvm.0 = hidden global i32 10, align 4 ; PROMOTE1: define weak_odr i32 @foo() { ; Second copy of IR object should not have any symbols imported/promoted. -; PROMOTE2: @baz = internal constant i32 10, align 4 +; PROMOTE2: @baz = internal global i32 10, align 4 ; PROMOTE2: define available_externally i32 @foo() { target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/ThinLTO/X86/index-const-prop.ll =================================================================== --- test/ThinLTO/X86/index-const-prop.ll +++ test/ThinLTO/X86/index-const-prop.ll @@ -0,0 +1,38 @@ +; Check constant propagation in thinlto combined summary. This allows us to do 2 things: +; 1. Internalize global definition which is not used externally if all accesses to it are read-only +; 2. Make a local copy of internal definition if all accesses to it are readonly. This allows constant +; folding it during optimziation phase. + +; RUN: opt -module-summary %s -o %t1.bc +; RUN: opt -module-summary %p/Inputs/index-const-prop.ll -o %t2.bc +; RUN: llvm-lto -thinlto-action=thinlink -o %t3.index.bc %t1.bc %t2.bc +; RUN: llvm-lto -thinlto-action=import -exported-symbol=main %t1.bc -thinlto-index=%t3.index.bc -o %t1.imported.bc +; RUN: llvm-dis %t1.imported.bc -o - | FileCheck %s --check-prefix=IMPORT +; RUN: llvm-lto -thinlto-action=optimize %t1.imported.bc -o - | llvm-dis - -o - | FileCheck %s --check-prefix=OPTIMIZE + +; Check that we don't internalize gBar when it is exported +; RUN: llvm-lto -thinlto-action=import -exported-symbol main -exported-symbol gBar %t1.bc -thinlto-index=%t3.index.bc -o %t1.imported2.bc +; RUN: llvm-dis %t1.imported2.bc -o - | FileCheck %s --check-prefix=IMPORT2 + +; IMPORT: @gFoo.llvm.0 = internal unnamed_addr global i32 1, align 4, !dbg !0 +; IMPORT-NEXT: @gBar = internal local_unnamed_addr global i32 2, align 4, !dbg !5 +; IMPORT: !DICompileUnit({{.*}}, globals: !{{[0-9]+}}) + +; OPTIMIZE: define i32 @main +; OPTIMIZE-NEXT: ret i32 3 + +; IMPORT2: @gBar = available_externally local_unnamed_addr global i32 2, align 4, !dbg !5 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +define i32 @main() local_unnamed_addr { + %call = tail call i32 bitcast (i32 (...)* @foo to i32 ()*)() + %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() + %add = add nsw i32 %call1, %call + ret i32 %add +} + +declare i32 @foo(...) local_unnamed_addr + +declare i32 @bar(...) local_unnamed_addr Index: test/ThinLTO/X86/index-const-prop2.ll =================================================================== --- test/ThinLTO/X86/index-const-prop2.ll +++ test/ThinLTO/X86/index-const-prop2.ll @@ -0,0 +1,54 @@ +; Check constant propagation in thinlto combined summary. This allows us to do 2 things: +; 1. Internalize global definition which is not used externally if all accesses to it are read-only +; 2. Make a local copy of internal definition if all accesses to it are readonly. This allows constant +; folding it during optimziation phase. +; RUN: opt -module-summary %s -o %t1.bc +; RUN: opt -module-summary %p/Inputs/index-const-prop.ll -o %t2.bc +; RUN: llvm-lto2 run %t1.bc %t2.bc -save-temps \ +; RUN: -r=%t2.bc,foo,pl \ +; RUN: -r=%t2.bc,bar,pl \ +; RUN: -r=%t2.bc,baz,pl \ +; RUN: -r=%t2.bc,rand, \ +; RUN: -r=%t2.bc,gBar,pl \ +; RUN: -r=%t1.bc,main,plx \ +; RUN: -r=%t1.bc,foo, \ +; RUN: -r=%t1.bc,bar, \ +; RUN: -o %t3 +; RUN: llvm-dis %t3.1.3.import.bc -o - | FileCheck %s --check-prefix=IMPORT +; RUN: llvm-dis %t3.1.5.precodegen.bc -o - | FileCheck %s --check-prefix=CODEGEN + +; Now check that we won't internalize global (gBar) if it's externally referenced +; RUN: llvm-lto2 run %t1.bc %t2.bc -save-temps \ +; RUN: -r=%t2.bc,foo,pl \ +; RUN: -r=%t2.bc,bar,pl \ +; RUN: -r=%t2.bc,baz,pl \ +; RUN: -r=%t2.bc,rand, \ +; RUN: -r=%t2.bc,gBar,plx \ +; RUN: -r=%t1.bc,main,plx \ +; RUN: -r=%t1.bc,foo, \ +; RUN: -r=%t1.bc,bar, \ +; RUN: -o %t3 +; RUN: llvm-dis %t3.1.3.import.bc -o - | FileCheck %s --check-prefix=IMPORT2 + +; IMPORT: @gFoo.llvm.0 = internal unnamed_addr global i32 1, align 4 +; IMPORT-NEXT: @gBar = internal local_unnamed_addr global i32 2, align 4 +; IMPORT: !DICompileUnit({{.*}}, globals: !{{[0-9]+}}) + +; CODEGEN: i32 @main() +; CODEGEN-NEXT: ret i32 3 + +; IMPORT2: @gBar = available_externally dso_local local_unnamed_addr global i32 2, align 4 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +define i32 @main() local_unnamed_addr { + %call = tail call i32 bitcast (i32 (...)* @foo to i32 ()*)() + %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() + %add = add nsw i32 %call1, %call + ret i32 %add +} + +declare i32 @foo(...) local_unnamed_addr + +declare i32 @bar(...) local_unnamed_addr