Index: include/llvm/Bitcode/LLVMBitCodes.h =================================================================== --- include/llvm/Bitcode/LLVMBitCodes.h +++ include/llvm/Bitcode/LLVMBitCodes.h @@ -268,6 +268,8 @@ // n x (typeid, kind, name, numrba, // numrba x (numarg, numarg x arg, kind, info, byte, bit))] FS_TYPE_ID = 21, + // List of function constant references + FS_CONST_REF_LIST = 22, }; enum MetadataCodes { Index: include/llvm/IR/ModuleSummaryIndex.h =================================================================== --- include/llvm/IR/ModuleSummaryIndex.h +++ include/llvm/IR/ModuleSummaryIndex.h @@ -17,6 +17,7 @@ #define LLVM_IR_MODULESUMMARYINDEX_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" @@ -261,11 +262,14 @@ /// within the same linkage unit. unsigned DSOLocal : 1; + /// Indicates that global isn't modified by any live function + unsigned Constant : 1; + /// Convenience Constructors explicit GVFlags(GlobalValue::LinkageTypes Linkage, bool NotEligibleToImport, bool Live, bool IsLocal) : Linkage(Linkage), NotEligibleToImport(NotEligibleToImport), - Live(Live), DSOLocal(IsLocal) {} + Live(Live), DSOLocal(IsLocal), Constant(false) {} }; private: @@ -346,6 +350,10 @@ bool isDSOLocal() const { return Flags.DSOLocal; } + void setConstant(bool Constant) { Flags.Constant = Constant; } + + bool isConstant() const { return Flags.Constant; } + /// Flag that this global value cannot be imported. void setNotEligibleToImport() { Flags.NotEligibleToImport = true; } @@ -500,6 +508,10 @@ /// List of call edge pairs from this function. std::vector CallGraphEdgeList; + /// Bit vector, where each bit corresponds to a single outgoing reference. + /// Value of 1 means read-only access to referenced valuec. + BitVector RefAccessBits; + std::unique_ptr TIdInfo; public: @@ -523,6 +535,14 @@ std::move(TypeCheckedLoadConstVCalls)}); } + const BitVector &getRefAccessBits() const { return RefAccessBits; } + void setRefAccessBits(BitVector &&BV) { + // We accept either empty bit vector (means all refs are RW) or + // one of size equal to number of references. + assert(BV.empty() || BV.size() == refs().size()); + RefAccessBits = std::move(BV); + } + /// Check if this is a function summary. static bool classof(const GlobalValueSummary *GVS) { return GVS->getSummaryKind() == FunctionKind; @@ -791,6 +811,10 @@ .first; } + void propagateConstantsForFunction(FunctionSummary *FS); + void collectMutableGVSummaries(GlobalValueSummary *Root, + DenseSet &MutableGVSet); + public: // See HaveGVs variable comment. ModuleSummaryIndex(bool HaveGVs) : HaveGVs(HaveGVs), Saver(Alloc) {} @@ -1103,6 +1127,9 @@ /// Print out strongly connected components for debugging. void dumpSCCs(raw_ostream &OS); + + /// Analyze index and detect unmodified globals + void propagateConstants(const DenseSet &PreservedSymbols); }; /// GraphTraits definition to build SCC for the index Index: include/llvm/Linker/IRMover.h =================================================================== --- include/llvm/Linker/IRMover.h +++ include/llvm/Linker/IRMover.h @@ -73,9 +73,11 @@ /// if the GlobalValue needs to be added to the \p ValuesToLink and linked. /// - \p IsPerformingImport is true when this IR link is to perform ThinLTO /// function importing from Src. - Error move(std::unique_ptr Src, ArrayRef ValuesToLink, - std::function AddLazyFor, - bool IsPerformingImport); + Error + move(std::unique_ptr Src, ArrayRef ValuesToLink, + std::function AddLazyFor, + std::function MaterializerHook, + bool IsPerformingImport); Module &getModule() { return Composite; } private: Index: include/llvm/Transforms/IPO/FunctionImport.h =================================================================== --- include/llvm/Transforms/IPO/FunctionImport.h +++ include/llvm/Transforms/IPO/FunctionImport.h @@ -128,6 +128,13 @@ const DenseSet &GUIDPreservedSymbols, function_ref isPrevailing); +/// Compute dead symbols and run constant propagation in combined index +/// after that. +void computeDeadSymbolsWithConstProp( + ModuleSummaryIndex &Index, + const DenseSet &GUIDPreservedSymbols, + function_ref isPrevailing); + /// Converts value \p GV to declaration, or replaces with a declaration if /// it is an alias. Returns true if converted, false if replaced. bool convertToDeclaration(GlobalValue &GV); Index: lib/Analysis/ModuleSummaryAnalysis.cpp =================================================================== --- lib/Analysis/ModuleSummaryAnalysis.cpp +++ lib/Analysis/ModuleSummaryAnalysis.cpp @@ -14,6 +14,7 @@ #include "llvm/Analysis/ModuleSummaryAnalysis.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/STLExtras.h" @@ -208,6 +209,38 @@ } } +static bool isNonVolatileLoad(const Instruction *I) { + if (const auto *LI = dyn_cast(I)) + return !LI->isVolatile(); + + return false; +} + +static void processInstructionRefs(SetVector &FuncRefs, + const Instruction *I, + SetVector &InstRefs, + DenseMap &RefAccessMap) { + for (auto &VI : InstRefs) { + bool NonVolatileLoad = isNonVolatileLoad(I); + auto P = RefAccessMap.insert({VI, NonVolatileLoad}); + if (!P.second && !NonVolatileLoad) + (*P.first).second = false; + + FuncRefs.insert(VI); + } +} + +static BitVector computeRefAccessBits(SetVector &Refs, + DenseMap &RefAccessMap) { + BitVector Res; + Res.resize(Refs.size()); + int I = 0; + + for (auto &VI : Refs) + Res[I++] = RefAccessMap[VI]; + return Res; +} + static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, const Function &F, BlockFrequencyInfo *BFI, @@ -233,6 +266,7 @@ // Add personality function, prefix data and prologue data to function's ref // list. findRefEdges(Index, &F, RefEdges, Visited); + DenseMap RefAccessMap; bool HasInlineAsmMaybeReferencingInternal = false; for (const BasicBlock &BB : F) @@ -240,7 +274,11 @@ if (isa(I)) continue; ++NumInsts; - findRefEdges(Index, &I, RefEdges, Visited); + + SetVector InstRefEdges; + findRefEdges(Index, &I, InstRefEdges, Visited); + processInstructionRefs(RefEdges, &I, InstRefEdges, RefAccessMap); + auto CS = ImmutableCallSite(&I); if (!CS) continue; @@ -347,6 +385,7 @@ F.getAttributes().hasFnAttribute(Attribute::NoInline); GlobalValueSummary::GVFlags Flags(F.getLinkage(), NotEligibleForImport, /* Live = */ false, F.isDSOLocal()); + BitVector RefAccessBits = computeRefAccessBits(RefEdges, RefAccessMap); FunctionSummary::FFlags FunFlags{ F.hasFnAttribute(Attribute::ReadNone), F.hasFnAttribute(Attribute::ReadOnly), @@ -361,6 +400,7 @@ TypeCheckedLoadConstVCalls.takeVector()); if (NonRenamableLocal) CantBePromoted.insert(F.getGUID()); + FuncSummary->setRefAccessBits(std::move(RefAccessBits)); Index.addGlobalValueSummary(F, std::move(FuncSummary)); } Index: lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- lib/Bitcode/Reader/BitcodeReader.cpp +++ lib/Bitcode/Reader/BitcodeReader.cpp @@ -5160,6 +5160,16 @@ parseWholeProgramDevirtResolution(Record, Strtab, Slot, TypeId); } +static BitVector computeRefAccessBits(const std::vector &RORefIDs, + size_t NumRefs) { + BitVector Res; + Res.resize(NumRefs); + + for (auto RefID : RORefIDs) + Res.set(RefID); + return Res; +} + // Eagerly parse the entire summary block. This populates the GlobalValueSummary // objects in the index. Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { @@ -5196,6 +5206,9 @@ std::vector PendingTypeTestAssumeConstVCalls, PendingTypeCheckedLoadConstVCalls; + // Contains numbers of RO references. + std::vector RORefIDs; + while (true) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); @@ -5281,6 +5294,7 @@ std::vector Calls = makeCallList( ArrayRef(Record).slice(CallGraphEdgeStartIndex), IsOldProfileFormat, HasProfile, HasRelBF); + BitVector RefAccessBits = computeRefAccessBits(RORefIDs, Refs.size()); auto FS = llvm::make_unique( Flags, InstCount, getDecodedFFlags(RawFunFlags), std::move(Refs), std::move(Calls), std::move(PendingTypeTests), @@ -5293,9 +5307,11 @@ PendingTypeCheckedLoadVCalls.clear(); PendingTypeTestAssumeConstVCalls.clear(); PendingTypeCheckedLoadConstVCalls.clear(); + RORefIDs.clear(); auto VIAndOriginalGUID = getValueInfoFromValueId(ValueID); FS->setModulePath(getThisModule()->first()); FS->setOriginalName(VIAndOriginalGUID.second); + FS->setRefAccessBits(std::move(RefAccessBits)); TheIndex.addGlobalValueSummary(VIAndOriginalGUID.first, std::move(FS)); break; } @@ -5374,6 +5390,7 @@ ArrayRef(Record).slice(CallGraphEdgeStartIndex), IsOldProfileFormat, HasProfile, false); ValueInfo VI = getValueInfoFromValueId(ValueID).first; + BitVector RefAccessBits = computeRefAccessBits(RORefIDs, Refs.size()); auto FS = llvm::make_unique( Flags, InstCount, getDecodedFFlags(RawFunFlags), std::move(Refs), std::move(Edges), std::move(PendingTypeTests), @@ -5386,9 +5403,11 @@ PendingTypeCheckedLoadVCalls.clear(); PendingTypeTestAssumeConstVCalls.clear(); PendingTypeCheckedLoadConstVCalls.clear(); + RORefIDs.clear(); LastSeenSummary = FS.get(); LastSeenGUID = VI.getGUID(); FS->setModulePath(ModuleIdMap[ModuleId]); + FS->setRefAccessBits(std::move(RefAccessBits)); TheIndex.addGlobalValueSummary(VI, std::move(FS)); break; } @@ -5445,6 +5464,11 @@ LastSeenGUID = 0; break; } + case bitc::FS_CONST_REF_LIST: + assert(RORefIDs.empty()); + RORefIDs.insert(RORefIDs.end(), Record.begin(), Record.end()); + break; + case bitc::FS_TYPE_TESTS: assert(PendingTypeTests.empty()); PendingTypeTests.insert(PendingTypeTests.end(), Record.begin(), Index: lib/Bitcode/Writer/BitcodeWriter.cpp =================================================================== --- lib/Bitcode/Writer/BitcodeWriter.cpp +++ lib/Bitcode/Writer/BitcodeWriter.cpp @@ -3445,6 +3445,26 @@ W.second); } +static void +writeFunctionRefAccess(BitstreamWriter &Stream, GlobalValueSummary *Summary, + llvm::function_ref Filter) { + SmallVector Record; + auto *FS = cast(Summary); + const BitVector &BV = FS->getRefAccessBits(); + + ArrayRef Refs = FS->refs(); + assert(BV.empty() || BV.size() == Refs.size()); + + // Unabbreviated records are 6 bits VBR encoded. To be more space efficient + // we emit numbers of read-only refs. + for (size_t I = 0; I < BV.size(); ++I) + if (BV[I] && Filter(Refs[I])) + Record.push_back(I); + + if (!Record.empty()) + Stream.EmitRecord(bitc::FS_CONST_REF_LIST, Record); +} + // Helper to emit a single function summary record. void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord( SmallVector &NameVals, GlobalValueSummary *Summary, @@ -3455,6 +3475,7 @@ FunctionSummary *FS = cast(Summary); std::set ReferencedTypeIds; writeFunctionTypeMetadataRecords(Stream, FS, ReferencedTypeIds); + writeFunctionRefAccess(Stream, FS, [](const ValueInfo &Ref) { return true; }); NameVals.push_back(getEncodedGVSummaryFlags(FS->flags())); NameVals.push_back(FS->instCount()); @@ -3765,6 +3786,9 @@ auto *FS = cast(S); writeFunctionTypeMetadataRecords(Stream, FS, ReferencedTypeIds); + writeFunctionRefAccess(Stream, FS, [this](const ValueInfo &Ref) { + return !!getValueId(Ref.getGUID()); + }); NameVals.push_back(*ValueId); NameVals.push_back(Index.getModuleId(FS->modulePath())); @@ -3809,7 +3833,7 @@ continue; // The mapping from OriginalId to GUID may return a GUID // that corresponds to a static variable. Filter it out here. - // This can happen when + // This can happen when // 1) There is a call to a library function which does not have // a CallValidId; // 2) There is a static variable with the OriginalGUID identical Index: lib/IR/ModuleSummaryIndex.cpp =================================================================== --- lib/IR/ModuleSummaryIndex.cpp +++ lib/IR/ModuleSummaryIndex.cpp @@ -84,6 +84,81 @@ return false; } +void ModuleSummaryIndex::propagateConstantsForFunction(FunctionSummary *FS) { + int RefNo = 0; + if (!FS->isLive()) + return; + + const BitVector &BV = FS->getRefAccessBits(); + for (auto &VI : FS->refs()) { + for (auto &Summary : VI.getSummaryList()) + if (!BV[RefNo]) + Summary->setConstant(false); + ++RefNo; + } +} + +void ModuleSummaryIndex::collectMutableGVSummaries( + GlobalValueSummary *RootGVS, DenseSet &MutableGVSet) { + if (!RootGVS->isLive() || RootGVS->isConstant()) + return; + + SmallVector Worklist; + if (MutableGVSet.insert(RootGVS).second) + Worklist.push_back(RootGVS); + + while (!Worklist.empty()) { + auto *GVS = Worklist.pop_back_val(); + for (auto &VI : GVS->refs()) + for (auto &Summary : VI.getSummaryList()) + if (Summary->getSummaryKind() == GlobalValueSummary::GlobalVarKind && + MutableGVSet.insert(Summary.get()).second) + Worklist.push_back(Summary.get()); + } +} + +void ModuleSummaryIndex::propagateConstants( + const DenseSet &GUIDPreservedSymbols) { + auto ForEachSummary = + [this]( + llvm::function_ref F) { + for (auto &VI : *this) + for (auto &Summary : VI.second.SummaryList) + F(VI.first, Summary.get()); + }; + // Step 1: mark all summaries corresponding to global variables constant. + // We don't want to mark exported symbols constant. We also don't currently + // support import of global variables which have refs. + ForEachSummary([&](GlobalValue::GUID Id, GlobalValueSummary *S) { + S->setConstant(S->getSummaryKind() == GlobalValueSummary::GlobalVarKind && + S->refs().empty() && !GUIDPreservedSymbols.count(Id)); + }); + + // Step 2: for each function summary check its refs. If ref is not constant + // then referenced summary list is not constant either. + ForEachSummary([this](GlobalValue::GUID, GlobalValueSummary *S) { + if (auto *FS = dyn_cast(S)) + propagateConstantsForFunction(FS); + }); + + // Step 3: for each global variable check if it is still constant. If it isn't + // then all its refs are not constant either. + DenseSet MutableGVSet; + ForEachSummary([&](GlobalValue::GUID, GlobalValueSummary *S) { + if (S->getSummaryKind() == GlobalValueSummary::GlobalVarKind) + collectMutableGVSummaries(S, MutableGVSet); + }); + + for (auto *Summary : MutableGVSet) + Summary->setConstant(false); + + // Step 4: aliases inherit constant attribute from aliasee. + ForEachSummary([this](GlobalValue::GUID, GlobalValueSummary *S) { + if (auto *AS = dyn_cast(S)) + AS->setConstant(AS->getAliasee().isConstant()); + }); +} + // TODO: write a graphviz dumper for SCCs (see ModuleSummaryIndex::exportToDot) // then delete this function and update its tests LLVM_DUMP_METHOD @@ -108,6 +183,7 @@ struct Attributes { void add(const Twine &Name, const Twine &Value, const Twine &Comment = Twine()); + void addComment(const Twine &Comment); std::string getAsString() const; std::vector Attrs; @@ -129,6 +205,10 @@ A += Value.str(); A += "\""; Attrs.push_back(A); + addComment(Comment); +} + +void Attributes::addComment(const Twine &Comment) { if (!Comment.isTriviallyEmpty()) { if (Comments.empty()) Comments = " // "; @@ -227,6 +307,19 @@ << "\"]; // defined externally\n"; } +static bool isReadOnlyRef(GlobalValueSummary *GVS, const ValueInfo &Ref) { + auto *FS = dyn_cast(GVS); + if (!FS) + return false; + + const BitVector &BV = FS->getRefAccessBits(); + if (BV.empty()) + return false; + + ArrayRef FuncRefs = FS->refs(); + return BV[&Ref - &FuncRefs[0]]; +} + void ModuleSummaryIndex::exportToDot(raw_ostream& OS) const { std::vector CrossModuleEdges; DenseMap> NodeMap; @@ -243,12 +336,15 @@ auto DrawEdge = [&](const char *Pfx, int SrcMod, GlobalValue::GUID SrcId, int DstMod, GlobalValue::GUID DstId, int TypeOrHotness) { - // 0 corresponds to alias edge, 1 to ref edge, 2 to call with unknown - // hotness, ... - TypeOrHotness += 2; + // 0 - alias + // 1 - reference + // 2 - constant reference + // Other value: (hotness - 3). + TypeOrHotness += 3; static const char *EdgeAttrs[] = { " [style=dotted]; // alias", " [style=dashed]; // ref", + " [style=dashed,color=forestgreen]; // const-ref", " // call (hotness : Unknown)", " [color=blue]; // call (hotness : Cold)", " // call (hotness : None)", @@ -291,6 +387,8 @@ A.add("shape", "box"); } else { A.add("shape", "Mrecord", "variable"); + if (Flags.Live && SummaryIt.second->isConstant()) + A.addComment("immutable"); } auto VI = getValueInfo(SummaryIt.first); @@ -308,13 +406,13 @@ for (auto &SummaryIt : GVSMap) { auto *GVS = SummaryIt.second; for (auto &R : GVS->refs()) - Draw(SummaryIt.first, R.getGUID(), -1); + Draw(SummaryIt.first, R.getGUID(), isReadOnlyRef(GVS, R) ? -1 : -2); if (auto *AS = dyn_cast_or_null(SummaryIt.second)) { auto AliaseeOrigId = AS->getAliasee().getOriginalName(); auto AliaseeId = getGUIDFromOriginalID(AliaseeOrigId); - Draw(SummaryIt.first, AliaseeId ? AliaseeId : AliaseeOrigId, -2); + Draw(SummaryIt.first, AliaseeId ? AliaseeId : AliaseeOrigId, -3); continue; } Index: lib/LTO/LTO.cpp =================================================================== --- lib/LTO/LTO.cpp +++ lib/LTO/LTO.cpp @@ -342,7 +342,7 @@ GlobalValueSummaryList &GVSummaryList, GlobalValue::GUID GUID, function_ref isExported) { for (auto &S : GVSummaryList) { - if (isExported(S->modulePath(), GUID)) { + if (!S->isConstant() && isExported(S->modulePath(), GUID)) { if (GlobalValue::isLocalLinkage(S->linkage())) S->setLinkage(GlobalValue::ExternalLinkage); } else if (!GlobalValue::isLocalLinkage(S->linkage())) @@ -715,6 +715,7 @@ return RegularLTO.Mover->move(std::move(Mod.M), Keep, [](GlobalValue &, IRMover::ValueAdder) {}, + [](GlobalValue *, GlobalValue *) {}, /* IsPerformingImport */ false); } @@ -798,7 +799,8 @@ return PrevailingType::Unknown; return It->second; }; - computeDeadSymbols(ThinLTO.CombinedIndex, GUIDPreservedSymbols, isPrevailing); + computeDeadSymbolsWithConstProp(ThinLTO.CombinedIndex, GUIDPreservedSymbols, + isPrevailing); // Setup output file to emit statistics. std::unique_ptr StatsFile = nullptr; Index: lib/LTO/ThinLTOCodeGenerator.cpp =================================================================== --- lib/LTO/ThinLTOCodeGenerator.cpp +++ lib/LTO/ThinLTOCodeGenerator.cpp @@ -645,7 +645,7 @@ auto isPrevailing = [&](GlobalValue::GUID G) { return PrevailingType::Unknown; }; - computeDeadSymbols(Index, GUIDPreservedSymbols, isPrevailing); + computeDeadSymbolsWithConstProp(Index, GUIDPreservedSymbols, isPrevailing); } /** Index: lib/Linker/IRMover.cpp =================================================================== --- lib/Linker/IRMover.cpp +++ lib/Linker/IRMover.cpp @@ -388,6 +388,8 @@ /// See IRMover::move(). std::function AddLazyFor; + std::function MaterializerHook; + TypeMapTy TypeMap; GlobalValueMaterializer GValMaterializer; LocalValueMaterializer LValMaterializer; @@ -507,10 +509,12 @@ IRMover::IdentifiedStructTypeSet &Set, std::unique_ptr SrcM, ArrayRef ValuesToLink, std::function AddLazyFor, + std::function MaterializerHook, bool IsPerformingImport) : DstM(DstM), SrcM(std::move(SrcM)), AddLazyFor(std::move(AddLazyFor)), - TypeMap(Set), GValMaterializer(*this), LValMaterializer(*this), - SharedMDs(SharedMDs), IsPerformingImport(IsPerformingImport), + MaterializerHook(std::move(MaterializerHook)), TypeMap(Set), + GValMaterializer(*this), LValMaterializer(*this), SharedMDs(SharedMDs), + IsPerformingImport(IsPerformingImport), Mapper(ValueMap, RF_MoveDistinctMDs | RF_IgnoreMissingLocals, &TypeMap, &GValMaterializer), AliasMCID(Mapper.registerAlternateMappingContext(AliasValueMap, @@ -574,6 +578,9 @@ if (!New) return *NewProto; + if (MaterializerHook) + MaterializerHook(New, SGV); + // If we already created the body, just return. if (auto *F = dyn_cast(New)) { if (!F->isDeclaration()) @@ -1060,11 +1067,6 @@ ValueMap.MD()[CU->getRawEnumTypes()].reset(nullptr); ValueMap.MD()[CU->getRawMacros()].reset(nullptr); ValueMap.MD()[CU->getRawRetainedTypes()].reset(nullptr); - // We import global variables only temporarily in order for instcombine - // and globalopt to perform constant folding and static constructor - // evaluation. After that elim-avail-extern will covert imported globals - // back to declarations, so we don't need debug info for them. - ValueMap.MD()[CU->getRawGlobalVariables()].reset(nullptr); // Imported entities only need to be mapped in if they have local // scope, as those might correspond to an imported entity inside a @@ -1470,10 +1472,11 @@ Error IRMover::move( std::unique_ptr Src, ArrayRef ValuesToLink, std::function AddLazyFor, + std::function MaterializerHook, bool IsPerformingImport) { IRLinker TheIRLinker(Composite, SharedMDs, IdentifiedStructTypes, std::move(Src), ValuesToLink, std::move(AddLazyFor), - IsPerformingImport); + std::move(MaterializerHook), IsPerformingImport); Error E = TheIRLinker.run(); Composite.dropTriviallyDeadConstantArrays(); return E; Index: lib/Linker/LinkModules.cpp =================================================================== --- lib/Linker/LinkModules.cpp +++ lib/Linker/LinkModules.cpp @@ -554,6 +554,7 @@ [this](GlobalValue &GV, IRMover::ValueAdder Add) { addLazyFor(GV, Add); }, + [](GlobalValue *, GlobalValue *) {}, /* IsPerformingImport */ false)) { handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) { DstM.getContext().diagnose(LinkDiagnosticInfo(DS_Error, EIB.message())); Index: lib/Transforms/IPO/FunctionImport.cpp =================================================================== --- lib/Transforms/IPO/FunctionImport.cpp +++ lib/Transforms/IPO/FunctionImport.cpp @@ -701,6 +701,15 @@ NumLiveSymbols += LiveSymbols; } +// Compute dead symbols and propagate constants in combined index. +void llvm::computeDeadSymbolsWithConstProp( + ModuleSummaryIndex &Index, + const DenseSet &GUIDPreservedSymbols, + function_ref isPrevailing) { + computeDeadSymbols(Index, GUIDPreservedSymbols, isPrevailing); + Index.propagateConstants(GUIDPreservedSymbols); +} + /// Compute the set of summaries needed for a ThinLTO backend compilation of /// \p ModulePath. void llvm::gatherImportedSummariesForModule( @@ -895,6 +904,13 @@ return NewFn; } +// Checks if global value is immutable according to combined summary. +static bool isImmutableGlobal(const ModuleSummaryIndex &Index, + const GlobalValue *GV) { + auto *GVS = Index.getGlobalValueSummary(GV->getGUID(), false); + return GVS->isConstant(); +} + // Automatically import functions in Module \p DestModule based on the summaries // index. Expected FunctionImporter::importFunctions( @@ -904,12 +920,14 @@ unsigned ImportedCount = 0, ImportedGVCount = 0; IRMover Mover(DestModule); + DenseSet ImmutableGVsMoved; // Do the actual import of functions now, one Module at a time std::set ModuleNameOrderedList; for (auto &FunctionsToImportPerModule : ImportList) { ModuleNameOrderedList.insert(FunctionsToImportPerModule.first()); } for (auto &Name : ModuleNameOrderedList) { + DenseSet ImmutableGVs; // Get the module for the import const auto &FunctionsToImportPerModule = ImportList.find(Name); assert(FunctionsToImportPerModule != ImportList.end()); @@ -962,6 +980,11 @@ if (Error Err = GV.materialize()) return std::move(Err); ImportedGVCount += GlobalsToImport.insert(&GV); + + // Immutable globals are not promoted, so we need to make a local copy. + // We are doing this by internalizing imported definitions later. + if (isImmutableGlobal(Index, &GV)) + ImmutableGVs.insert(&GV); } } for (GlobalAlias &GA : SrcModule->aliases()) { @@ -1011,6 +1034,10 @@ if (Mover.move(std::move(SrcModule), GlobalsToImport.getArrayRef(), [](GlobalValue &, IRMover::ValueAdder) {}, + [&](GlobalValue *DGV, GlobalValue *SGV) { + if (ImmutableGVs.count(SGV)) + ImmutableGVsMoved.insert(DGV); + }, /*IsPerformingImport=*/true)) report_fatal_error("Function Import: link error"); @@ -1018,6 +1045,10 @@ NumImportedModules++; } + // Internalize immutable globals. + for (auto *MGV : ImmutableGVsMoved) + MGV->setLinkage(GlobalValue::InternalLinkage); + NumImportedFunctions += (ImportedCount - ImportedGVCount); NumImportedGlobalVars += ImportedGVCount; Index: test/Bitcode/thinlto-function-summary-callgraph-relbf.ll =================================================================== --- test/Bitcode/thinlto-function-summary-callgraph-relbf.ll +++ test/Bitcode/thinlto-function-summary-callgraph-relbf.ll @@ -13,6 +13,7 @@ ; CHECK: ; CHECK-NEXT: ; CHECK: ; CHECK-NEXT: ; CHECK: M1_{{[0-9]+}} // call -; STRUCTURE-DAG: M0_{{[0-9]+}} -> M1_{{[0-9]+}} [{{.*}}]; // ref +; STRUCTURE-DAG: M0_{{[0-9]+}} -> M1_{{[0-9]+}} [{{.*}}]; // const-ref ; STRUCTURE-NEXT: } ; CLUSTER0: // Module: {{.*}}1.bc @@ -33,13 +33,13 @@ ; CLUSTER1: // Module: {{.*}}2.bc ; CLUSTER1-NEXT: subgraph cluster_1 { -; CLUSTER1-DAG: M1_[[A:[0-9]+]] [{{.*}}A|extern{{.*}}]; // variable +; CLUSTER1-DAG: M1_[[A:[0-9]+]] [{{.*}}A|extern{{.*}}]; // variable, immutable ; CLUSTER1-DAG: M1_[[FOO:[0-9]+]] [{{.*}}foo|extern{{.*}}]; // function, not eligible to import -; CLUSTER1-DAG: M1_[[B:[0-9]+]] [{{.*}}B|extern{{.*}}]; // variable +; CLUSTER1-DAG: M1_[[B:[0-9]+]] [{{.*}}B|extern{{.*}}]; // variable, immutable ; CLUSTER1-DAG: M1_[[BAR:[0-9]+]] [{{.*}}bar|extern{{.*}}]; // function, dead ; CLUSTER1-NEXT: // Edges: -; CLUSTER1-DAG: M1_[[FOO]] -> M1_[[B]] [{{.*}}]; // ref -; CLUSTER1-DAG: M1_[[FOO]] -> M1_[[A]] [{{.*}}]; // ref +; CLUSTER1-DAG: M1_[[FOO]] -> M1_[[B]] [{{.*}}]; // const-ref +; CLUSTER1-DAG: M1_[[FOO]] -> M1_[[A]] [{{.*}}]; // const-ref ; CLUSTER1-DAG: } target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/ThinLTO/X86/export.ll =================================================================== --- test/ThinLTO/X86/export.ll +++ test/ThinLTO/X86/export.ll @@ -5,7 +5,7 @@ ; Ensure statics are promoted/renamed correctly from this file. ; RUN: llvm-lto -thinlto-action=promote %t1.bc -thinlto-index=%t3.bc -o - | llvm-dis -o - | FileCheck %s -; CHECK-DAG: @staticvar.llvm.0 = hidden global +; CHECK-DAG: @staticvar = internal global ; CHECK-DAG: define hidden void @staticfunc.llvm.0 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" Index: test/ThinLTO/X86/funcimport.ll =================================================================== --- test/ThinLTO/X86/funcimport.ll +++ test/ThinLTO/X86/funcimport.ll @@ -9,10 +9,10 @@ ; Ensure statics are promoted/renamed correctly from this file (all but ; constant variable need promotion). ; RUN: llvm-lto -thinlto-action=promote %t.bc -thinlto-index=%t3.bc -o - | llvm-dis -o - | FileCheck %s --check-prefix=EXPORTSTATIC -; EXPORTSTATIC-DAG: @staticvar.llvm.0 = hidden global +; EXPORTSTATIC-DAG: @staticvar = internal global ; Eventually @staticconstvar can be exported as a copy and not promoted -; EXPORTSTATIC-DAG: @staticconstvar.llvm.0 = hidden unnamed_addr constant -; EXPORTSTATIC-DAG: @P.llvm.0 = hidden global void ()* null +; EXPORTSTATIC-DAG: @staticconstvar = internal unnamed_addr constant +; EXPORTSTATIC-DAG: @P = internal global void ()* null ; EXPORTSTATIC-DAG: define hidden i32 @staticfunc.llvm.0 ; EXPORTSTATIC-DAG: define hidden void @staticfunc2.llvm.0 Index: test/ThinLTO/X86/globals-import-const-fold.ll =================================================================== --- test/ThinLTO/X86/globals-import-const-fold.ll +++ test/ThinLTO/X86/globals-import-const-fold.ll @@ -7,7 +7,7 @@ ; RUN: llvm-lto -thinlto-action=optimize %t1.bc.thinlto.imported.bc -o %t1.bc.thinlto.opt.bc ; RUN: llvm-dis %t1.bc.thinlto.opt.bc -o - | FileCheck --check-prefix=OPTIMIZE %s -; IMPORT: @baz = available_externally local_unnamed_addr constant i32 10 +; IMPORT: @baz = internal local_unnamed_addr constant i32 10 ; OPTIMIZE: define i32 @main() ; OPTIMIZE-NEXT: ret i32 10 Index: test/ThinLTO/X86/globals-import.ll =================================================================== --- test/ThinLTO/X86/globals-import.ll +++ test/ThinLTO/X86/globals-import.ll @@ -8,20 +8,20 @@ ; RUN: opt -module-summary %p/Inputs/globals-import.ll -o %t2b.bc ; RUN: llvm-lto -thinlto-action=thinlink %t1.bc %t2.bc %t2b.bc -o %t3.index.bc -; RUN: llvm-lto -thinlto-action=import %t1.bc -thinlto-index=%t3.index.bc +; RUN: llvm-lto -thinlto-action=import %t1.bc -exported-symbol=main -thinlto-index=%t3.index.bc ; RUN: llvm-dis %t1.bc.thinlto.imported.bc -o - | FileCheck --check-prefix=IMPORT %s -; RUN: llvm-lto -thinlto-action=promote %t2.bc -thinlto-index=%t3.index.bc -; RUN: llvm-lto -thinlto-action=promote %t2b.bc -thinlto-index=%t3.index.bc +; RUN: llvm-lto -thinlto-action=promote %t2.bc -exported-symbol=main -thinlto-index=%t3.index.bc +; RUN: llvm-lto -thinlto-action=promote %t2b.bc -exported-symbol=main -thinlto-index=%t3.index.bc ; RUN: llvm-dis %t2.bc.thinlto.promoted.bc -o - | FileCheck --check-prefix=PROMOTE1 %s ; RUN: llvm-dis %t2b.bc.thinlto.promoted.bc -o - | FileCheck --check-prefix=PROMOTE2 %s -; IMPORT: @baz.llvm.0 = available_externally hidden constant i32 10, align 4 +; IMPORT: @baz.llvm.0 = available_externally hidden global i32 10, align 4 -; PROMOTE1: @baz.llvm.0 = hidden constant i32 10, align 4 +; PROMOTE1: @baz.llvm.0 = hidden global i32 10, align 4 ; PROMOTE1: define weak_odr i32 @foo() { ; Second copy of IR object should not have any symbols imported/promoted. -; PROMOTE2: @baz = internal constant i32 10, align 4 +; PROMOTE2: @baz = internal global i32 10, align 4 ; PROMOTE2: define available_externally i32 @foo() { target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/ThinLTO/X86/index-const-prop.ll =================================================================== --- test/ThinLTO/X86/index-const-prop.ll +++ test/ThinLTO/X86/index-const-prop.ll @@ -0,0 +1,38 @@ +; Check constant propagation in thinlto combined summary. This allows us to do 2 things: +; 1. Internalize global definition which is not used externally if all accesses to it are read-only +; 2. Make a local copy of internal definition if all accesses to it are readonly. This allows constant +; folding it during optimziation phase. + +; RUN: opt -module-summary %s -o %t1.bc +; RUN: opt -module-summary %p/Inputs/index-const-prop.ll -o %t2.bc +; RUN: llvm-lto -thinlto-action=thinlink -o %t3.index.bc %t1.bc %t2.bc +; RUN: llvm-lto -thinlto-action=import %t1.bc -thinlto-index=%t3.index.bc -o %t1.imported.bc +; RUN: llvm-dis %t1.imported.bc -o - | FileCheck %s --check-prefix=IMPORT +; RUN: llvm-lto -thinlto-action=optimize %t1.imported.bc -o - | llvm-dis - -o - | FileCheck %s --check-prefix=OPTIMIZE + +; Check that we don't internalize gBar when it is exported +; RUN: llvm-lto -thinlto-action=import -exported-symbol main -exported-symbol gBar %t1.bc -thinlto-index=%t3.index.bc -o %t1.imported2.bc +; RUN: llvm-dis %t1.imported2.bc -o - | FileCheck %s --check-prefix=IMPORT2 + +; IMPORT: @gFoo.llvm.0 = internal unnamed_addr global i32 1, align 4, !dbg !0 +; IMPORT-NEXT: @gBar = internal local_unnamed_addr global i32 2, align 4, !dbg !5 +; IMPORT: !DICompileUnit({{.*}}, globals: !{{[0-9]+}}) + +; OPTIMIZE: define i32 @main +; OPTIMIZE-NEXT: ret i32 3 + +; IMPORT2: @gBar = available_externally local_unnamed_addr global i32 2, align 4, !dbg !5 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +define i32 @main() local_unnamed_addr { + %call = tail call i32 bitcast (i32 (...)* @foo to i32 ()*)() + %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() + %add = add nsw i32 %call1, %call + ret i32 %add +} + +declare i32 @foo(...) local_unnamed_addr + +declare i32 @bar(...) local_unnamed_addr Index: test/ThinLTO/X86/index-const-prop2.ll =================================================================== --- test/ThinLTO/X86/index-const-prop2.ll +++ test/ThinLTO/X86/index-const-prop2.ll @@ -0,0 +1,54 @@ +; Check constant propagation in thinlto combined summary. This allows us to do 2 things: +; 1. Internalize global definition which is not used externally if all accesses to it are read-only +; 2. Make a local copy of internal definition if all accesses to it are readonly. This allows constant +; folding it during optimziation phase. +; RUN: opt -module-summary %s -o %t1.bc +; RUN: opt -module-summary %p/Inputs/index-const-prop.ll -o %t2.bc +; RUN: llvm-lto2 run %t1.bc %t2.bc -save-temps \ +; RUN: -r=%t2.bc,foo,pl \ +; RUN: -r=%t2.bc,bar,pl \ +; RUN: -r=%t2.bc,baz,pl \ +; RUN: -r=%t2.bc,rand, \ +; RUN: -r=%t2.bc,gBar,pl \ +; RUN: -r=%t1.bc,main,plx \ +; RUN: -r=%t1.bc,foo, \ +; RUN: -r=%t1.bc,bar, \ +; RUN: -o %t3 +; RUN: llvm-dis %t3.1.3.import.bc -o - | FileCheck %s --check-prefix=IMPORT +; RUN: llvm-dis %t3.1.5.precodegen.bc -o - | FileCheck %s --check-prefix=CODEGEN + +; Now check that we won't internalize global (gBar) if it's externally referenced +; RUN: llvm-lto2 run %t1.bc %t2.bc -save-temps \ +; RUN: -r=%t2.bc,foo,pl \ +; RUN: -r=%t2.bc,bar,pl \ +; RUN: -r=%t2.bc,baz,pl \ +; RUN: -r=%t2.bc,rand, \ +; RUN: -r=%t2.bc,gBar,plx \ +; RUN: -r=%t1.bc,main,plx \ +; RUN: -r=%t1.bc,foo, \ +; RUN: -r=%t1.bc,bar, \ +; RUN: -o %t3 +; RUN: llvm-dis %t3.1.3.import.bc -o - | FileCheck %s --check-prefix=IMPORT2 + +; IMPORT: @gFoo.llvm.0 = internal unnamed_addr global i32 1, align 4 +; IMPORT-NEXT: @gBar = internal local_unnamed_addr global i32 2, align 4 +; IMPORT: !DICompileUnit({{.*}}, globals: !{{[0-9]+}}) + +; CODEGEN: i32 @main() +; CODEGEN-NEXT: ret i32 3 + +; IMPORT2: @gBar = available_externally dso_local local_unnamed_addr global i32 2, align 4 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +define i32 @main() local_unnamed_addr { + %call = tail call i32 bitcast (i32 (...)* @foo to i32 ()*)() + %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() + %add = add nsw i32 %call1, %call + ret i32 %add +} + +declare i32 @foo(...) local_unnamed_addr + +declare i32 @bar(...) local_unnamed_addr Index: tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp =================================================================== --- tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp +++ tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp @@ -327,6 +327,7 @@ STRINGIFY_CODE(FS, CFI_FUNCTION_DEFS) STRINGIFY_CODE(FS, CFI_FUNCTION_DECLS) STRINGIFY_CODE(FS, TYPE_ID) + STRINGIFY_CODE(FS, CONST_REF_LIST) } case bitc::METADATA_ATTACHMENT_ID: switch(CodeID) { @@ -557,7 +558,7 @@ BitstreamEntry Entry = Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs); - + switch (Entry.Kind) { case BitstreamEntry::Error: return ReportError("malformed bitcode file"); @@ -573,7 +574,7 @@ } return false; } - + case BitstreamEntry::SubBlock: { uint64_t SubBlockBitStart = Stream.GetCurrentBitNo(); if (ParseBlock(Stream, BlockInfo, Entry.ID, IndentLevel + 1, @@ -581,7 +582,7 @@ return true; ++BlockStats.NumSubBlocks; uint64_t SubBlockBitEnd = Stream.GetCurrentBitNo(); - + // Don't include subblock sizes in the size of this block. BlockBitStart += SubBlockBitEnd-SubBlockBitStart; continue; @@ -596,7 +597,7 @@ ++BlockStats.NumAbbrevs; continue; } - + Record.clear(); ++BlockStats.NumRecords; @@ -727,7 +728,7 @@ if (BlobIsPrintable) outs() << "'" << Blob << "'"; else - outs() << "unprintable, " << Blob.size() << " bytes."; + outs() << "unprintable, " << Blob.size() << " bytes."; } }