Index: include/llvm/Bitcode/LLVMBitCodes.h =================================================================== --- include/llvm/Bitcode/LLVMBitCodes.h +++ include/llvm/Bitcode/LLVMBitCodes.h @@ -268,6 +268,8 @@ // n x (typeid, kind, name, numrba, // numrba x (numarg, numarg x arg, kind, info, byte, bit))] FS_TYPE_ID = 21, + // List of function constant references + FS_CONST_REF_LIST = 22, }; enum MetadataCodes { Index: include/llvm/IR/ModuleSummaryIndex.h =================================================================== --- include/llvm/IR/ModuleSummaryIndex.h +++ include/llvm/IR/ModuleSummaryIndex.h @@ -17,6 +17,7 @@ #define LLVM_IR_MODULESUMMARYINDEX_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" @@ -261,11 +262,14 @@ /// within the same linkage unit. unsigned DSOLocal : 1; + /// Indicates that global isn't modified by any live function + unsigned Constant : 1; + /// Convenience Constructors explicit GVFlags(GlobalValue::LinkageTypes Linkage, bool NotEligibleToImport, bool Live, bool IsLocal) : Linkage(Linkage), NotEligibleToImport(NotEligibleToImport), - Live(Live), DSOLocal(IsLocal) {} + Live(Live), DSOLocal(IsLocal), Constant(false) {} }; private: @@ -346,6 +350,10 @@ bool isDSOLocal() const { return Flags.DSOLocal; } + void setConstant(bool Constant) { Flags.Constant = Constant; } + + bool isConstant() const { return Flags.Constant; } + /// Flag that this global value cannot be imported. void setNotEligibleToImport() { Flags.NotEligibleToImport = true; } @@ -500,6 +508,10 @@ /// List of call edge pairs from this function. std::vector CallGraphEdgeList; + /// Bit vector, where each bit corresponds to a single outgoing reference. + /// Value of 1 means read-only access to referenced valuec. + BitVector RefAccessBits; + std::unique_ptr TIdInfo; public: @@ -523,6 +535,14 @@ std::move(TypeCheckedLoadConstVCalls)}); } + const BitVector &getRefAccessBits() const { return RefAccessBits; } + void setRefAccessBits(BitVector &&BV) { + // We accept either empty bit vector (means all refs are RW) or + // one of size equal to number of references. + assert(BV.empty() || BV.size() == refs().size()); + RefAccessBits = std::move(BV); + } + /// Check if this is a function summary. static bool classof(const GlobalValueSummary *GVS) { return GVS->getSummaryKind() == FunctionKind; @@ -791,6 +811,10 @@ .first; } + void propagateConstantsForFunction(FunctionSummary *FS); + void collectMutableGVSummaries(GlobalValueSummary *Root, + DenseSet &MutableGVSet); + public: // See HaveGVs variable comment. ModuleSummaryIndex(bool HaveGVs) : HaveGVs(HaveGVs), Saver(Alloc) {} @@ -1103,6 +1127,9 @@ /// Print out strongly connected components for debugging. void dumpSCCs(raw_ostream &OS); + + /// Analyze index and detect unmodified globals + void propagateConstants(); }; /// GraphTraits definition to build SCC for the index Index: include/llvm/LTO/LTO.h =================================================================== --- include/llvm/LTO/LTO.h +++ include/llvm/LTO/LTO.h @@ -58,7 +58,7 @@ /// must apply the changes to the Module via thinLTOInternalizeModule. void thinLTOInternalizeAndPromoteInIndex( ModuleSummaryIndex &Index, - function_ref isExported); + function_ref isExported); namespace lto { Index: include/llvm/Linker/IRMover.h =================================================================== --- include/llvm/Linker/IRMover.h +++ include/llvm/Linker/IRMover.h @@ -73,9 +73,11 @@ /// if the GlobalValue needs to be added to the \p ValuesToLink and linked. /// - \p IsPerformingImport is true when this IR link is to perform ThinLTO /// function importing from Src. - Error move(std::unique_ptr Src, ArrayRef ValuesToLink, - std::function AddLazyFor, - bool IsPerformingImport); + Error + move(std::unique_ptr Src, ArrayRef ValuesToLink, + std::function AddLazyFor, + std::function MaterializerHook, + bool IsPerformingImport); Module &getModule() { return Composite; } private: Index: lib/Analysis/ModuleSummaryAnalysis.cpp =================================================================== --- lib/Analysis/ModuleSummaryAnalysis.cpp +++ lib/Analysis/ModuleSummaryAnalysis.cpp @@ -14,6 +14,7 @@ #include "llvm/Analysis/ModuleSummaryAnalysis.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLExtras.h" @@ -208,6 +209,38 @@ } } +static bool isNonVolatileLoad(const Instruction *I) { + if (const auto *LI = dyn_cast(I)) + return !LI->isVolatile(); + + return false; +} + +static void processInstructionRefs(SetVector &FuncRefs, + const Instruction *I, + SetVector &InstRefs, + DenseMap &RefAccessMap) { + for (auto &VI : InstRefs) { + bool NonVolatileLoad = isNonVolatileLoad(I); + auto P = RefAccessMap.insert({VI, NonVolatileLoad}); + if (!P.second && !NonVolatileLoad) + (*P.first).second = false; + + FuncRefs.insert(VI); + } +} + +static BitVector computeRefAccessBits(SetVector &Refs, + DenseMap &RefAccessMap) { + BitVector Res; + Res.resize(Refs.size()); + int I = 0; + + for (auto &VI : Refs) + Res[I++] = RefAccessMap[VI]; + return Res; +} + static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, const Function &F, BlockFrequencyInfo *BFI, @@ -233,6 +266,7 @@ // Add personality function, prefix data and prologue data to function's ref // list. findRefEdges(Index, &F, RefEdges, Visited); + DenseMap RefAccessMap; bool HasInlineAsmMaybeReferencingInternal = false; for (const BasicBlock &BB : F) @@ -240,7 +274,11 @@ if (isa(I)) continue; ++NumInsts; - findRefEdges(Index, &I, RefEdges, Visited); + + SetVector InstRefEdges; + findRefEdges(Index, &I, InstRefEdges, Visited); + processInstructionRefs(RefEdges, &I, InstRefEdges, RefAccessMap); + auto CS = ImmutableCallSite(&I); if (!CS) continue; @@ -347,6 +385,7 @@ F.getAttributes().hasFnAttribute(Attribute::NoInline); GlobalValueSummary::GVFlags Flags(F.getLinkage(), NotEligibleForImport, /* Live = */ false, F.isDSOLocal()); + BitVector RefAccessBits = computeRefAccessBits(RefEdges, RefAccessMap); FunctionSummary::FFlags FunFlags{ F.hasFnAttribute(Attribute::ReadNone), F.hasFnAttribute(Attribute::ReadOnly), @@ -361,6 +400,7 @@ TypeCheckedLoadConstVCalls.takeVector()); if (NonRenamableLocal) CantBePromoted.insert(F.getGUID()); + FuncSummary->setRefAccessBits(std::move(RefAccessBits)); Index.addGlobalValueSummary(F, std::move(FuncSummary)); } Index: lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- lib/Bitcode/Reader/BitcodeReader.cpp +++ lib/Bitcode/Reader/BitcodeReader.cpp @@ -5160,6 +5160,16 @@ parseWholeProgramDevirtResolution(Record, Strtab, Slot, TypeId); } +static BitVector computeRefAccessBits(const std::vector &RORefIDs, + size_t NumRefs) { + BitVector Res; + Res.resize(NumRefs); + + for (auto RefID : RORefIDs) + Res.set(RefID); + return Res; +} + // Eagerly parse the entire summary block. This populates the GlobalValueSummary // objects in the index. Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { @@ -5196,6 +5206,9 @@ std::vector PendingTypeTestAssumeConstVCalls, PendingTypeCheckedLoadConstVCalls; + // Contains numbers of RO references. + std::vector RORefIDs; + while (true) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); @@ -5281,6 +5294,7 @@ std::vector Calls = makeCallList( ArrayRef(Record).slice(CallGraphEdgeStartIndex), IsOldProfileFormat, HasProfile, HasRelBF); + BitVector RefAccessBits = computeRefAccessBits(RORefIDs, Refs.size()); auto FS = llvm::make_unique( Flags, InstCount, getDecodedFFlags(RawFunFlags), std::move(Refs), std::move(Calls), std::move(PendingTypeTests), @@ -5293,9 +5307,11 @@ PendingTypeCheckedLoadVCalls.clear(); PendingTypeTestAssumeConstVCalls.clear(); PendingTypeCheckedLoadConstVCalls.clear(); + RORefIDs.clear(); auto VIAndOriginalGUID = getValueInfoFromValueId(ValueID); FS->setModulePath(getThisModule()->first()); FS->setOriginalName(VIAndOriginalGUID.second); + FS->setRefAccessBits(std::move(RefAccessBits)); TheIndex.addGlobalValueSummary(VIAndOriginalGUID.first, std::move(FS)); break; } @@ -5374,6 +5390,7 @@ ArrayRef(Record).slice(CallGraphEdgeStartIndex), IsOldProfileFormat, HasProfile, false); ValueInfo VI = getValueInfoFromValueId(ValueID).first; + BitVector RefAccessBits = computeRefAccessBits(RORefIDs, Refs.size()); auto FS = llvm::make_unique( Flags, InstCount, getDecodedFFlags(RawFunFlags), std::move(Refs), std::move(Edges), std::move(PendingTypeTests), @@ -5386,9 +5403,11 @@ PendingTypeCheckedLoadVCalls.clear(); PendingTypeTestAssumeConstVCalls.clear(); PendingTypeCheckedLoadConstVCalls.clear(); + RORefIDs.clear(); LastSeenSummary = FS.get(); LastSeenGUID = VI.getGUID(); FS->setModulePath(ModuleIdMap[ModuleId]); + FS->setRefAccessBits(std::move(RefAccessBits)); TheIndex.addGlobalValueSummary(VI, std::move(FS)); break; } @@ -5445,6 +5464,11 @@ LastSeenGUID = 0; break; } + case bitc::FS_CONST_REF_LIST: + assert(RORefIDs.empty()); + RORefIDs.insert(RORefIDs.end(), Record.begin(), Record.end()); + break; + case bitc::FS_TYPE_TESTS: assert(PendingTypeTests.empty()); PendingTypeTests.insert(PendingTypeTests.end(), Record.begin(), Index: lib/Bitcode/Writer/BitcodeWriter.cpp =================================================================== --- lib/Bitcode/Writer/BitcodeWriter.cpp +++ lib/Bitcode/Writer/BitcodeWriter.cpp @@ -3445,6 +3445,26 @@ W.second); } +static void +writeFunctionRefAccess(BitstreamWriter &Stream, GlobalValueSummary *Summary, + llvm::function_ref Filter) { + SmallVector Record; + auto *FS = cast(Summary); + const BitVector &BV = FS->getRefAccessBits(); + + ArrayRef Refs = FS->refs(); + assert(BV.empty() || BV.size() == Refs.size()); + + // Unabbreviated records are 6 bits VBR encoded. To be more space efficient + // we emit numbers of read-only refs. + for (size_t I = 0; I < BV.size(); ++I) + if (BV[I] && Filter(Refs[I])) + Record.push_back(I); + + if (!Record.empty()) + Stream.EmitRecord(bitc::FS_CONST_REF_LIST, Record); +} + // Helper to emit a single function summary record. void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord( SmallVector &NameVals, GlobalValueSummary *Summary, @@ -3455,6 +3475,7 @@ FunctionSummary *FS = cast(Summary); std::set ReferencedTypeIds; writeFunctionTypeMetadataRecords(Stream, FS, ReferencedTypeIds); + writeFunctionRefAccess(Stream, FS, [](const ValueInfo &Ref) { return true; }); NameVals.push_back(getEncodedGVSummaryFlags(FS->flags())); NameVals.push_back(FS->instCount()); @@ -3765,6 +3786,9 @@ auto *FS = cast(S); writeFunctionTypeMetadataRecords(Stream, FS, ReferencedTypeIds); + writeFunctionRefAccess(Stream, FS, [this](const ValueInfo &Ref) { + return !!getValueId(Ref.getGUID()); + }); NameVals.push_back(*ValueId); NameVals.push_back(Index.getModuleId(FS->modulePath())); @@ -3809,7 +3833,7 @@ continue; // The mapping from OriginalId to GUID may return a GUID // that corresponds to a static variable. Filter it out here. - // This can happen when + // This can happen when // 1) There is a call to a library function which does not have // a CallValidId; // 2) There is a static variable with the OriginalGUID identical Index: lib/IR/ModuleSummaryIndex.cpp =================================================================== --- lib/IR/ModuleSummaryIndex.cpp +++ lib/IR/ModuleSummaryIndex.cpp @@ -84,6 +84,76 @@ return false; } +void ModuleSummaryIndex::propagateConstantsForFunction( + FunctionSummary *FS) { + int RefNo = 0; + if (!FS->isLive()) + return; + + const BitVector &BV = FS->getRefAccessBits(); + for (auto &VI : FS->refs()) { + for (auto &Summary : VI.getSummaryList()) + if (!BV[RefNo]) + Summary->setConstant(false); + ++RefNo; + } +} + +void ModuleSummaryIndex::collectMutableGVSummaries( + GlobalValueSummary *RootGVS, DenseSet &MutableGVSet) { + if (!RootGVS->isLive() || RootGVS->isConstant()) + return; + + SmallVector Worklist; + if (MutableGVSet.insert(RootGVS).second) + Worklist.push_back(RootGVS); + + while (!Worklist.empty()) { + auto *GVS = Worklist.pop_back_val(); + for (auto &VI : GVS->refs()) + for (auto &Summary : VI.getSummaryList()) + if (Summary->getSummaryKind() == GlobalValueSummary::GlobalVarKind && + MutableGVSet.insert(Summary.get()).second) + Worklist.push_back(Summary.get()); + } +} + +void ModuleSummaryIndex::propagateConstants() { + auto ForEachSummary = [this](llvm::function_ref F) { + for (auto &GlobalList : *this) + for (auto &Summary : GlobalList.second.SummaryList) + F(Summary.get()); + }; + // Step 1: mark all summaries corresponding to global variables constant. + ForEachSummary([](GlobalValueSummary *S) { + S->setConstant(S->getSummaryKind() == GlobalValueSummary::GlobalVarKind); + }); + + // Step 2: for each function summary check its refs. If ref is not constant + // then referenced summary list is not constant either. + ForEachSummary([this](GlobalValueSummary *S) { + if (auto *FS = dyn_cast(S)) + propagateConstantsForFunction(FS); + }); + + // Step 3: for each global variable check if it is still constant. If it isn't + // then all its refs are not constant either. + DenseSet MutableGVSet; + ForEachSummary([&](GlobalValueSummary *S) { + if (S->getSummaryKind() == GlobalValueSummary::GlobalVarKind) + collectMutableGVSummaries(S, MutableGVSet); + }); + + for (auto *Summary : MutableGVSet) + Summary->setConstant(false); + + // Step 4: aliases inherit constant attribute from aliasee. + ForEachSummary([this](GlobalValueSummary *S) { + if (auto *AS = dyn_cast(S)) + AS->setConstant(AS->getAliasee().isConstant()); + }); +} + // TODO: write a graphviz dumper for SCCs (see ModuleSummaryIndex::exportToDot) // then delete this function and update its tests LLVM_DUMP_METHOD @@ -108,6 +178,7 @@ struct Attributes { void add(const Twine &Name, const Twine &Value, const Twine &Comment = Twine()); + void addComment(const Twine &Comment); std::string getAsString() const; std::vector Attrs; @@ -129,6 +200,10 @@ A += Value.str(); A += "\""; Attrs.push_back(A); + addComment(Comment); +} + +void Attributes::addComment(const Twine &Comment) { if (!Comment.isTriviallyEmpty()) { if (Comments.empty()) Comments = " // "; @@ -227,6 +302,19 @@ << "\"]; // defined externally\n"; } +static bool isReadOnlyRef(GlobalValueSummary *GVS, const ValueInfo &Ref) { + auto *FS = dyn_cast(GVS); + if (!FS) + return false; + + const BitVector &BV = FS->getRefAccessBits(); + if (BV.empty()) + return false; + + ArrayRef FuncRefs = FS->refs(); + return BV[&Ref - &FuncRefs[0]]; +} + void ModuleSummaryIndex::exportToDot(raw_ostream& OS) const { std::vector CrossModuleEdges; DenseMap> NodeMap; @@ -245,10 +333,11 @@ int DstMod, GlobalValue::GUID DstId, int TypeOrHotness) { // 0 corresponds to alias edge, 1 to ref edge, 2 to call with unknown // hotness, ... - TypeOrHotness += 2; + TypeOrHotness += 3; static const char *EdgeAttrs[] = { " [style=dotted]; // alias", " [style=dashed]; // ref", + " [style=dashed,color=forestgreen]; // const-ref", " // call (hotness : Unknown)", " [color=blue]; // call (hotness : Cold)", " // call (hotness : None)", @@ -291,6 +380,8 @@ A.add("shape", "box"); } else { A.add("shape", "Mrecord", "variable"); + if (Flags.Live && SummaryIt.second->isConstant()) + A.addComment("immutable"); } auto VI = getValueInfo(SummaryIt.first); @@ -308,13 +399,13 @@ for (auto &SummaryIt : GVSMap) { auto *GVS = SummaryIt.second; for (auto &R : GVS->refs()) - Draw(SummaryIt.first, R.getGUID(), -1); + Draw(SummaryIt.first, R.getGUID(), isReadOnlyRef(GVS, R) ? -1 : -2); if (auto *AS = dyn_cast_or_null(SummaryIt.second)) { auto AliaseeOrigId = AS->getAliasee().getOriginalName(); auto AliaseeId = getGUIDFromOriginalID(AliaseeOrigId); - Draw(SummaryIt.first, AliaseeId ? AliaseeId : AliaseeOrigId, -2); + Draw(SummaryIt.first, AliaseeId ? AliaseeId : AliaseeOrigId, -3); continue; } Index: lib/LTO/LTO.cpp =================================================================== --- lib/LTO/LTO.cpp +++ lib/LTO/LTO.cpp @@ -340,9 +340,10 @@ static void thinLTOInternalizeAndPromoteGUID( GlobalValueSummaryList &GVSummaryList, GlobalValue::GUID GUID, - function_ref isExported) { + function_ref isExported) { for (auto &S : GVSummaryList) { - if (isExported(S->modulePath(), GUID)) { + if (isExported(S.get(), GUID)) { + S->setConstant(false); if (GlobalValue::isLocalLinkage(S->linkage())) S->setLinkage(GlobalValue::ExternalLinkage); } else if (!GlobalValue::isLocalLinkage(S->linkage())) @@ -354,7 +355,7 @@ // as external and non-exported values as internal. void llvm::thinLTOInternalizeAndPromoteInIndex( ModuleSummaryIndex &Index, - function_ref isExported) { + function_ref isExported) { for (auto &I : Index) thinLTOInternalizeAndPromoteGUID(I.second.SummaryList, I.first, isExported); } @@ -715,6 +716,7 @@ return RegularLTO.Mover->move(std::move(Mod.M), Keep, [](GlobalValue &, IRMover::ValueAdder) {}, + [](GlobalValue *, GlobalValue *) {}, /* IsPerformingImport */ false); } @@ -800,6 +802,9 @@ }; computeDeadSymbols(ThinLTO.CombinedIndex, GUIDPreservedSymbols, isPrevailing); + // Run constant propagation as we're already know what symbols are dead + ThinLTO.CombinedIndex.propagateConstants(); + // Setup output file to emit statistics. std::unique_ptr StatsFile = nullptr; if (!Conf.StatsFile.empty()) { @@ -1172,7 +1177,7 @@ // at -O0 because summary-based DCE is implemented using internalization, and // we must apply DCE consistently with the full LTO module in order to avoid // undefined references during the final link. - std::set ExportedGUIDs; + std::map ExportedGUIDs; for (auto &Res : GlobalResolutions) { // If the symbol does not have external references or it is not prevailing, // then not need to mark it as exported from a ThinLTO partition. @@ -1183,21 +1188,32 @@ GlobalValue::dropLLVMManglingEscape(Res.second.IRName)); // Mark exported unless index-based analysis determined it to be dead. if (ThinLTO.CombinedIndex.isGUIDLive(GUID)) - ExportedGUIDs.insert(GUID); + ExportedGUIDs.insert( + {GUID, Res.second.Partition == GlobalResolution::External}); } // Any functions referenced by the jump table in the regular LTO object must // be exported. for (auto &Def : ThinLTO.CombinedIndex.cfiFunctionDefs()) ExportedGUIDs.insert( - GlobalValue::getGUID(GlobalValue::dropLLVMManglingEscape(Def))); + {GlobalValue::getGUID(GlobalValue::dropLLVMManglingEscape(Def)), true}); + + auto isExported = [&](GlobalValueSummary *GVS, GlobalValue::GUID GUID) { + auto It = ExportedGUIDs.find(GUID); + const auto &ExportList = ExportLists.find(GVS->modulePath()); + if ((ExportList != ExportLists.end() && ExportList->second.count(GUID)) || + It != ExportedGUIDs.end()) + // We can make local copy if (all conditions apply): + // - GV partition is not external + // - GV is constant + // - GV has no refs (this is because we don't currently support importing + // GV which has refs - see computeImportForReferencedGlobals). + return It->second || !GVS->isConstant() || !GVS->refs().empty(); - auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) { - const auto &ExportList = ExportLists.find(ModuleIdentifier); - return (ExportList != ExportLists.end() && - ExportList->second.count(GUID)) || - ExportedGUIDs.count(GUID); + + return false; }; + thinLTOInternalizeAndPromoteInIndex(ThinLTO.CombinedIndex, isExported); auto isPrevailing = [&](GlobalValue::GUID GUID, Index: lib/LTO/ThinLTOCodeGenerator.cpp =================================================================== --- lib/LTO/ThinLTOCodeGenerator.cpp +++ lib/LTO/ThinLTOCodeGenerator.cpp @@ -422,7 +422,7 @@ int TempFD; llvm::sys::path::remove_filename(CachePath); sys::path::append(TempFilename, CachePath, "Thin-%%%%%%.tmp.o"); - std::error_code EC = + std::error_code EC = sys::fs::createUniqueFile(TempFilename, TempFD, TempFilename); if (EC) { errs() << "Error: " << EC.message() << "\n"; @@ -432,7 +432,7 @@ raw_fd_ostream OS(TempFD, /* ShouldClose */ true); OS << OutputBuffer.getBuffer(); } - // Rename temp file to final destination; rename is atomic + // Rename temp file to final destination; rename is atomic EC = sys::fs::rename(TempFilename, EntryPath); if (EC) sys::fs::remove(TempFilename); @@ -626,13 +626,13 @@ const StringMap &ExportLists, const DenseSet &GUIDPreservedSymbols, ModuleSummaryIndex &Index) { - auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) { - const auto &ExportList = ExportLists.find(ModuleIdentifier); - return (ExportList != ExportLists.end() && - ExportList->second.count(GUID)) || - GUIDPreservedSymbols.count(GUID); + auto isExported = [&](GlobalValueSummary *GVS, GlobalValue::GUID GUID) { + const auto &ExportList = ExportLists.find(GVS->modulePath()); + const bool DoPromote = ExportList != ExportLists.end() && + ExportList->second.count(GUID) && + (!GVS->isConstant() || !GVS->refs().empty()); + return DoPromote || GUIDPreservedSymbols.count(GUID); }; - thinLTOInternalizeAndPromoteInIndex(Index, isExported); } @@ -646,6 +646,9 @@ return PrevailingType::Unknown; }; computeDeadSymbols(Index, GUIDPreservedSymbols, isPrevailing); + + // Find immutable globals + Index.propagateConstants(); } /** @@ -1048,10 +1051,10 @@ if (SavedObjectsDirectoryPath.empty()) { // We need to generated a memory buffer for the linker. if (!CacheEntryPath.empty()) { - // When cache is enabled, reload from the cache if possible. + // When cache is enabled, reload from the cache if possible. // Releasing the buffer from the heap and reloading it from the - // cache file with mmap helps us to lower memory pressure. - // The freed memory can be used for the next input file. + // cache file with mmap helps us to lower memory pressure. + // The freed memory can be used for the next input file. // The final binary link will read from the VFS cache (hopefully!) // or from disk (if the memory pressure was too high). auto ReloadedBufferOrErr = CacheEntry.tryLoadingBuffer(); Index: lib/Linker/IRMover.cpp =================================================================== --- lib/Linker/IRMover.cpp +++ lib/Linker/IRMover.cpp @@ -388,6 +388,8 @@ /// See IRMover::move(). std::function AddLazyFor; + std::function MaterializerHook; + TypeMapTy TypeMap; GlobalValueMaterializer GValMaterializer; LocalValueMaterializer LValMaterializer; @@ -507,10 +509,12 @@ IRMover::IdentifiedStructTypeSet &Set, std::unique_ptr SrcM, ArrayRef ValuesToLink, std::function AddLazyFor, + std::function MaterializerHook, bool IsPerformingImport) : DstM(DstM), SrcM(std::move(SrcM)), AddLazyFor(std::move(AddLazyFor)), - TypeMap(Set), GValMaterializer(*this), LValMaterializer(*this), - SharedMDs(SharedMDs), IsPerformingImport(IsPerformingImport), + MaterializerHook(std::move(MaterializerHook)), TypeMap(Set), + GValMaterializer(*this), LValMaterializer(*this), SharedMDs(SharedMDs), + IsPerformingImport(IsPerformingImport), Mapper(ValueMap, RF_MoveDistinctMDs | RF_IgnoreMissingLocals, &TypeMap, &GValMaterializer), AliasMCID(Mapper.registerAlternateMappingContext(AliasValueMap, @@ -574,6 +578,9 @@ if (!New) return *NewProto; + if (MaterializerHook) + MaterializerHook(New, SGV); + // If we already created the body, just return. if (auto *F = dyn_cast(New)) { if (!F->isDeclaration()) @@ -1470,10 +1477,11 @@ Error IRMover::move( std::unique_ptr Src, ArrayRef ValuesToLink, std::function AddLazyFor, + std::function MaterializerHook, bool IsPerformingImport) { IRLinker TheIRLinker(Composite, SharedMDs, IdentifiedStructTypes, std::move(Src), ValuesToLink, std::move(AddLazyFor), - IsPerformingImport); + std::move(MaterializerHook), IsPerformingImport); Error E = TheIRLinker.run(); Composite.dropTriviallyDeadConstantArrays(); return E; Index: lib/Linker/LinkModules.cpp =================================================================== --- lib/Linker/LinkModules.cpp +++ lib/Linker/LinkModules.cpp @@ -554,6 +554,7 @@ [this](GlobalValue &GV, IRMover::ValueAdder Add) { addLazyFor(GV, Add); }, + [](GlobalValue *, GlobalValue *) {}, /* IsPerformingImport */ false)) { handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) { DstM.getContext().diagnose(LinkDiagnosticInfo(DS_Error, EIB.message())); Index: lib/Transforms/IPO/FunctionImport.cpp =================================================================== --- lib/Transforms/IPO/FunctionImport.cpp +++ lib/Transforms/IPO/FunctionImport.cpp @@ -895,6 +895,13 @@ return NewFn; } +// Checks if global value is immutable according to combined summary. +static bool isImmutableGlobal(const ModuleSummaryIndex &Index, + const GlobalValue *GV) { + auto *GVS = Index.getGlobalValueSummary(GV->getGUID(), false); + return GVS->isConstant(); +} + // Automatically import functions in Module \p DestModule based on the summaries // index. Expected FunctionImporter::importFunctions( @@ -904,12 +911,14 @@ unsigned ImportedCount = 0, ImportedGVCount = 0; IRMover Mover(DestModule); + DenseSet ImmutableGVsMoved; // Do the actual import of functions now, one Module at a time std::set ModuleNameOrderedList; for (auto &FunctionsToImportPerModule : ImportList) { ModuleNameOrderedList.insert(FunctionsToImportPerModule.first()); } for (auto &Name : ModuleNameOrderedList) { + DenseSet ImmutableGVs; // Get the module for the import const auto &FunctionsToImportPerModule = ImportList.find(Name); assert(FunctionsToImportPerModule != ImportList.end()); @@ -962,6 +971,11 @@ if (Error Err = GV.materialize()) return std::move(Err); ImportedGVCount += GlobalsToImport.insert(&GV); + + // Immutable globals are not promoted, so we need to make a local copy. + // We are doing this by internalizing imported definitions later. + if (isImmutableGlobal(Index, &GV)) + ImmutableGVs.insert(&GV); } } for (GlobalAlias &GA : SrcModule->aliases()) { @@ -1011,6 +1025,10 @@ if (Mover.move(std::move(SrcModule), GlobalsToImport.getArrayRef(), [](GlobalValue &, IRMover::ValueAdder) {}, + [&](GlobalValue *DGV, GlobalValue *SGV) { + if (ImmutableGVs.count(SGV)) + ImmutableGVsMoved.insert(DGV); + }, /*IsPerformingImport=*/true)) report_fatal_error("Function Import: link error"); @@ -1018,6 +1036,10 @@ NumImportedModules++; } + // Internalize immutable globals. + for (auto *MGV : ImmutableGVsMoved) + MGV->setLinkage(GlobalValue::InternalLinkage); + NumImportedFunctions += (ImportedCount - ImportedGVCount); NumImportedGlobalVars += ImportedGVCount; Index: test/Bitcode/thinlto-function-summary-callgraph-relbf.ll =================================================================== --- test/Bitcode/thinlto-function-summary-callgraph-relbf.ll +++ test/Bitcode/thinlto-function-summary-callgraph-relbf.ll @@ -13,6 +13,7 @@ ; CHECK: ; CHECK-NEXT: ; CHECK: ; CHECK-NEXT: ; CHECK: M1_{{[0-9]+}} // call -; STRUCTURE-DAG: M0_{{[0-9]+}} -> M1_{{[0-9]+}} [{{.*}}]; // ref +; STRUCTURE-DAG: M0_{{[0-9]+}} -> M1_{{[0-9]+}} [{{.*}}]; // const-ref ; STRUCTURE-NEXT: } ; CLUSTER0: // Module: {{.*}}1.bc @@ -38,8 +38,8 @@ ; CLUSTER1-DAG: M1_[[B:[0-9]+]] [{{.*}}B|extern{{.*}}]; // variable ; CLUSTER1-DAG: M1_[[BAR:[0-9]+]] [{{.*}}bar|extern{{.*}}]; // function, dead ; CLUSTER1-NEXT: // Edges: -; CLUSTER1-DAG: M1_[[FOO]] -> M1_[[B]] [{{.*}}]; // ref -; CLUSTER1-DAG: M1_[[FOO]] -> M1_[[A]] [{{.*}}]; // ref +; CLUSTER1-DAG: M1_[[FOO]] -> M1_[[B]] [{{.*}}]; // const-ref +; CLUSTER1-DAG: M1_[[FOO]] -> M1_[[A]] [{{.*}}]; // const-ref ; CLUSTER1-DAG: } target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/ThinLTO/X86/export.ll =================================================================== --- test/ThinLTO/X86/export.ll +++ test/ThinLTO/X86/export.ll @@ -5,7 +5,7 @@ ; Ensure statics are promoted/renamed correctly from this file. ; RUN: llvm-lto -thinlto-action=promote %t1.bc -thinlto-index=%t3.bc -o - | llvm-dis -o - | FileCheck %s -; CHECK-DAG: @staticvar.llvm.0 = hidden global +; CHECK-DAG: @staticvar = internal global ; CHECK-DAG: define hidden void @staticfunc.llvm.0 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" Index: test/ThinLTO/X86/funcimport.ll =================================================================== --- test/ThinLTO/X86/funcimport.ll +++ test/ThinLTO/X86/funcimport.ll @@ -9,10 +9,10 @@ ; Ensure statics are promoted/renamed correctly from this file (all but ; constant variable need promotion). ; RUN: llvm-lto -thinlto-action=promote %t.bc -thinlto-index=%t3.bc -o - | llvm-dis -o - | FileCheck %s --check-prefix=EXPORTSTATIC -; EXPORTSTATIC-DAG: @staticvar.llvm.0 = hidden global +; EXPORTSTATIC-DAG: @staticvar = internal global ; Eventually @staticconstvar can be exported as a copy and not promoted -; EXPORTSTATIC-DAG: @staticconstvar.llvm.0 = hidden unnamed_addr constant -; EXPORTSTATIC-DAG: @P.llvm.0 = hidden global void ()* null +; EXPORTSTATIC-DAG: @staticconstvar = internal unnamed_addr constant +; EXPORTSTATIC-DAG: @P = internal global void ()* null ; EXPORTSTATIC-DAG: define hidden i32 @staticfunc.llvm.0 ; EXPORTSTATIC-DAG: define hidden void @staticfunc2.llvm.0 Index: test/ThinLTO/X86/globals-import-const-fold.ll =================================================================== --- test/ThinLTO/X86/globals-import-const-fold.ll +++ test/ThinLTO/X86/globals-import-const-fold.ll @@ -7,7 +7,7 @@ ; RUN: llvm-lto -thinlto-action=optimize %t1.bc.thinlto.imported.bc -o %t1.bc.thinlto.opt.bc ; RUN: llvm-dis %t1.bc.thinlto.opt.bc -o - | FileCheck --check-prefix=OPTIMIZE %s -; IMPORT: @baz = available_externally local_unnamed_addr constant i32 10 +; IMPORT: @baz = internal local_unnamed_addr constant i32 10 ; OPTIMIZE: define i32 @main() ; OPTIMIZE-NEXT: ret i32 10 Index: test/ThinLTO/X86/globals-import.ll =================================================================== --- test/ThinLTO/X86/globals-import.ll +++ test/ThinLTO/X86/globals-import.ll @@ -8,20 +8,20 @@ ; RUN: opt -module-summary %p/Inputs/globals-import.ll -o %t2b.bc ; RUN: llvm-lto -thinlto-action=thinlink %t1.bc %t2.bc %t2b.bc -o %t3.index.bc -; RUN: llvm-lto -thinlto-action=import %t1.bc -thinlto-index=%t3.index.bc +; RUN: llvm-lto -thinlto-action=import %t1.bc -exported-symbol=main -thinlto-index=%t3.index.bc ; RUN: llvm-dis %t1.bc.thinlto.imported.bc -o - | FileCheck --check-prefix=IMPORT %s -; RUN: llvm-lto -thinlto-action=promote %t2.bc -thinlto-index=%t3.index.bc -; RUN: llvm-lto -thinlto-action=promote %t2b.bc -thinlto-index=%t3.index.bc +; RUN: llvm-lto -thinlto-action=promote %t2.bc -exported-symbol=main -thinlto-index=%t3.index.bc +; RUN: llvm-lto -thinlto-action=promote %t2b.bc -exported-symbol=main -thinlto-index=%t3.index.bc ; RUN: llvm-dis %t2.bc.thinlto.promoted.bc -o - | FileCheck --check-prefix=PROMOTE1 %s ; RUN: llvm-dis %t2b.bc.thinlto.promoted.bc -o - | FileCheck --check-prefix=PROMOTE2 %s -; IMPORT: @baz.llvm.0 = available_externally hidden constant i32 10, align 4 +; IMPORT: @baz.llvm.0 = available_externally hidden global i32 10, align 4 -; PROMOTE1: @baz.llvm.0 = hidden constant i32 10, align 4 +; PROMOTE1: @baz.llvm.0 = hidden global i32 10, align 4 ; PROMOTE1: define weak_odr i32 @foo() { ; Second copy of IR object should not have any symbols imported/promoted. -; PROMOTE2: @baz = internal constant i32 10, align 4 +; PROMOTE2: @baz = internal global i32 10, align 4 ; PROMOTE2: define available_externally i32 @foo() { target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/ThinLTO/X86/index-const-prop.ll =================================================================== --- test/ThinLTO/X86/index-const-prop.ll +++ test/ThinLTO/X86/index-const-prop.ll @@ -0,0 +1,27 @@ +; Check constant propagation in thinlto combined summary. This allows us to do 2 things: +; 1. Internalize global definition which is not used externally if all accesses to it are read-only +; 2. Make a local copy of internal definition if all accesses to it are readonly. This allows constant +; folding it during optimziation phase. + +; RUN: opt -module-summary %s -o %t1.bc +; RUN: opt -module-summary %p/Inputs/index-const-prop.ll -o %t2.bc +; RUN: llvm-lto -thinlto-action=thinlink -o %t3.index.bc %t1.bc %t2.bc +; RUN: llvm-lto -thinlto-action=import %t1.bc -thinlto-index=%t3.index.bc -o %t1.imported.bc +; RUN: llvm-lto -thinlto-action=optimize %t1.imported.bc -o - | llvm-dis - -o - | FileCheck %s + +; CHECK: define i32 @main +; CHECK-NEXT: ret i32 3 + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-pc-linux-gnu" + +define i32 @main() local_unnamed_addr { + %call = tail call i32 bitcast (i32 (...)* @foo to i32 ()*)() + %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() + %add = add nsw i32 %call1, %call + ret i32 %add +} + +declare i32 @foo(...) local_unnamed_addr + +declare i32 @bar(...) local_unnamed_addr Index: test/ThinLTO/X86/index-const-prop2.ll =================================================================== --- test/ThinLTO/X86/index-const-prop2.ll +++ test/ThinLTO/X86/index-const-prop2.ll @@ -0,0 +1,34 @@ +; Check constant propagation in thinlto combined summary. This allows us to do 2 things: +; 1. Internalize global definition which is not used externally if all accesses to it are read-only +; 2. Make a local copy of internal definition if all accesses to it are readonly. This allows constant +; folding it during optimziation phase. +; RUN: opt -module-summary %s -o %t1.bc +; RUN: opt -module-summary %p/Inputs/index-const-prop.ll -o %t2.bc +; RUN: llvm-lto2 run %t1.bc %t2.bc -save-temps \ +; RUN: -r=%t2.bc,foo,pl \ +; RUN: -r=%t2.bc,bar,pl \ +; RUN: -r=%t2.bc,baz,pl \ +; RUN: -r=%t2.bc,rand, \ +; RUN: -r=%t2.bc,gBar,pl \ +; RUN: -r=%t1.bc,main,plx \ +; RUN: -r=%t1.bc,foo, \ +; RUN: -r=%t1.bc,bar, \ +; RUN: -o %t3 +; RUN: llvm-dis %t3.1.5.precodegen.bc -o - | FileCheck %s + +; CHECK: i32 @main() +; CHECK-NEXT: ret i32 3 + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-pc-linux-gnu" + +define i32 @main() local_unnamed_addr { + %call = tail call i32 bitcast (i32 (...)* @foo to i32 ()*)() + %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() + %add = add nsw i32 %call1, %call + ret i32 %add +} + +declare i32 @foo(...) local_unnamed_addr + +declare i32 @bar(...) local_unnamed_addr Index: tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp =================================================================== --- tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp +++ tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp @@ -327,6 +327,7 @@ STRINGIFY_CODE(FS, CFI_FUNCTION_DEFS) STRINGIFY_CODE(FS, CFI_FUNCTION_DECLS) STRINGIFY_CODE(FS, TYPE_ID) + STRINGIFY_CODE(FS, CONST_REF_LIST) } case bitc::METADATA_ATTACHMENT_ID: switch(CodeID) { @@ -557,7 +558,7 @@ BitstreamEntry Entry = Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs); - + switch (Entry.Kind) { case BitstreamEntry::Error: return ReportError("malformed bitcode file"); @@ -573,7 +574,7 @@ } return false; } - + case BitstreamEntry::SubBlock: { uint64_t SubBlockBitStart = Stream.GetCurrentBitNo(); if (ParseBlock(Stream, BlockInfo, Entry.ID, IndentLevel + 1, @@ -581,7 +582,7 @@ return true; ++BlockStats.NumSubBlocks; uint64_t SubBlockBitEnd = Stream.GetCurrentBitNo(); - + // Don't include subblock sizes in the size of this block. BlockBitStart += SubBlockBitEnd-SubBlockBitStart; continue; @@ -596,7 +597,7 @@ ++BlockStats.NumAbbrevs; continue; } - + Record.clear(); ++BlockStats.NumRecords; @@ -727,7 +728,7 @@ if (BlobIsPrintable) outs() << "'" << Blob << "'"; else - outs() << "unprintable, " << Blob.size() << " bytes."; + outs() << "unprintable, " << Blob.size() << " bytes."; } }