Index: include/llvm/IR/ModuleSummaryIndex.h =================================================================== --- include/llvm/IR/ModuleSummaryIndex.h +++ include/llvm/IR/ModuleSummaryIndex.h @@ -163,13 +163,13 @@ /// Struct that holds a reference to a particular GUID in a global value /// summary. struct ValueInfo { - PointerIntPair - RefAndFlag; + PointerIntPair + RefAndFlags; ValueInfo() = default; ValueInfo(bool HaveGVs, const GlobalValueSummaryMapTy::value_type *R) { - RefAndFlag.setPointer(R); - RefAndFlag.setInt(HaveGVs); + RefAndFlags.setPointer(R); + RefAndFlags.setInt(HaveGVs); } operator bool() const { return getRef(); } @@ -189,10 +189,12 @@ : getRef()->second.U.Name; } - bool haveGVs() const { return RefAndFlag.getInt(); } + bool haveGVs() const { return RefAndFlags.getInt() & 0x1; } + bool isReadOnly() const { return RefAndFlags.getInt() & 0x2; } + void setReadOnly() { RefAndFlags.setInt(RefAndFlags.getInt() | 0x2); } const GlobalValueSummaryMapTy::value_type *getRef() const { - return RefAndFlag.getPointer(); + return RefAndFlags.getPointer(); } bool isDSOLocal() const; @@ -539,6 +541,8 @@ std::move(TypeTestAssumeConstVCalls), std::move(TypeCheckedLoadConstVCalls)}); } + // Gets the number of immutable refs in RefEdgeList + unsigned immutableRefCount() const; /// Check if this is a function summary. static bool classof(const GlobalValueSummary *GVS) { @@ -648,19 +652,30 @@ /// Global variable summary information to aid decisions and /// implementation of importing. /// -/// Currently this doesn't add anything to the base \p GlobalValueSummary, -/// but is a placeholder as additional info may be added to the summary -/// for variables. +/// Global variable summary has extra flag, telling if it is +/// modified during the program run or not. This affects ThinLTO +/// internalization class GlobalVarSummary : public GlobalValueSummary { - public: - GlobalVarSummary(GVFlags Flags, std::vector Refs) - : GlobalValueSummary(GlobalVarKind, Flags, std::move(Refs)) {} + struct GVarFlags { + GVarFlags(bool ReadOnly = false) : ReadOnly(ReadOnly) {} + + unsigned ReadOnly : 1; + } VarFlags; + + GlobalVarSummary(GVFlags Flags, GVarFlags VarFlags, + std::vector Refs) + : GlobalValueSummary(GlobalVarKind, Flags, std::move(Refs)), + VarFlags(VarFlags) {} /// Check if this is a global variable summary. static bool classof(const GlobalValueSummary *GVS) { return GVS->getSummaryKind() == GlobalVarKind; } + + GVarFlags varflags() const { return VarFlags; } + void setReadOnly(bool RO) { VarFlags.ReadOnly = RO; } + bool isReadOnly() const { return VarFlags.ReadOnly; } }; struct TypeTestResolution { @@ -813,6 +828,8 @@ .first; } + void propagateConstantsForFunction(FunctionSummary *FS); + public: // See HaveGVs variable comment. ModuleSummaryIndex(bool HaveGVs) : HaveGVs(HaveGVs), Saver(Alloc) {} @@ -1131,6 +1148,9 @@ /// Print out strongly connected components for debugging. void dumpSCCs(raw_ostream &OS); + + /// Analyze index and detect unmodified globals + void propagateConstants(const DenseSet &PreservedSymbols); }; /// GraphTraits definition to build SCC for the index Index: include/llvm/Transforms/IPO/FunctionImport.h =================================================================== --- include/llvm/Transforms/IPO/FunctionImport.h +++ include/llvm/Transforms/IPO/FunctionImport.h @@ -172,6 +172,14 @@ const DenseSet &GUIDPreservedSymbols, function_ref isPrevailing); +/// Compute dead symbols and run constant propagation in combined index +/// after that. +void computeDeadSymbolsWithConstProp( + ModuleSummaryIndex &Index, + const DenseSet &GUIDPreservedSymbols, + function_ref isPrevailing, + bool ImportEnabled); + /// Converts value \p GV to declaration, or replaces with a declaration if /// it is an alias. Returns true if converted, false if replaced. bool convertToDeclaration(GlobalValue &GV); Index: include/llvm/Transforms/Utils/FunctionImportUtils.h =================================================================== --- include/llvm/Transforms/Utils/FunctionImportUtils.h +++ include/llvm/Transforms/Utils/FunctionImportUtils.h @@ -113,7 +113,6 @@ bool renameModuleForThinLTO( Module &M, const ModuleSummaryIndex &Index, SetVector *GlobalsToImport = nullptr); - } // End llvm namespace #endif Index: lib/Analysis/ModuleSummaryAnalysis.cpp =================================================================== --- lib/Analysis/ModuleSummaryAnalysis.cpp +++ lib/Analysis/ModuleSummaryAnalysis.cpp @@ -220,10 +220,19 @@ } } -static void computeFunctionSummary( - ModuleSummaryIndex &Index, const Module &M, const Function &F, - BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, DominatorTree &DT, - bool HasLocalsInUsedOrAsm, DenseSet &CantBePromoted) { +static bool isNonVolatileLoad(const Instruction *I) { + if (const auto *LI = dyn_cast(I)) + return !LI->isVolatile(); + + return false; +} + +static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M, + const Function &F, BlockFrequencyInfo *BFI, + ProfileSummaryInfo *PSI, DominatorTree &DT, + bool HasLocalsInUsedOrAsm, + DenseSet &CantBePromoted, + bool IsThinLTO) { // Summary not currently supported for anonymous functions, they should // have been named. assert(F.hasName()); @@ -244,6 +253,7 @@ // Add personality function, prefix data and prologue data to function's ref // list. findRefEdges(Index, &F, RefEdges, Visited); + std::vector NonVolatileLoads; bool HasInlineAsmMaybeReferencingInternal = false; for (const BasicBlock &BB : F) @@ -251,6 +261,13 @@ if (isa(I)) continue; ++NumInsts; + if (isNonVolatileLoad(&I)) { + // Postpone processing of non-volatile load instructions + // See comments below + Visited.insert(&I); + NonVolatileLoads.push_back(&I); + continue; + } findRefEdges(Index, &I, RefEdges, Visited); auto CS = ImmutableCallSite(&I); if (!CS) @@ -340,6 +357,21 @@ } } + // By now we processed all instructions in a function, except + // non-volatile loads. All new refs we add in a loop below + // are obviously constant. All constant refs are grouped in the + // end of RefEdges vector, so we can use a single integer value + // to identify them. + unsigned RefCnt = RefEdges.size(); + for (const Instruction *I : NonVolatileLoads) { + Visited.erase(I); + findRefEdges(Index, I, RefEdges, Visited); + } + std::vector Refs = RefEdges.takeVector(); + if (IsThinLTO) + for (; RefCnt < Refs.size(); ++RefCnt) + Refs[RefCnt].setReadOnly(); + // Explicit add hot edges to enforce importing for designated GUIDs for // sample PGO, to enable the same inlines as the profiled optimized binary. for (auto &I : F.getImportGUIDs()) @@ -365,9 +397,9 @@ F.returnDoesNotAlias(), }; auto FuncSummary = llvm::make_unique( - Flags, NumInsts, FunFlags, RefEdges.takeVector(), - CallGraphEdges.takeVector(), TypeTests.takeVector(), - TypeTestAssumeVCalls.takeVector(), TypeCheckedLoadVCalls.takeVector(), + Flags, NumInsts, FunFlags, std::move(Refs), CallGraphEdges.takeVector(), + TypeTests.takeVector(), TypeTestAssumeVCalls.takeVector(), + TypeCheckedLoadVCalls.takeVector(), TypeTestAssumeConstVCalls.takeVector(), TypeCheckedLoadConstVCalls.takeVector()); if (NonRenamableLocal) @@ -384,8 +416,15 @@ bool NonRenamableLocal = isNonRenamableLocal(V); GlobalValueSummary::GVFlags Flags(V.getLinkage(), NonRenamableLocal, /* Live = */ false, V.isDSOLocal()); - auto GVarSummary = - llvm::make_unique(Flags, RefEdges.takeVector()); + + // TODO: Currently we don't support importing GV with references, + // so non-empty RefEdges is equivalent to notElgibleToImport. + GlobalVarSummary::GVarFlags VarFlags( + !V.hasComdat() && !V.hasAppendingLinkage() && !V.isInterposable() && + !V.hasAvailableExternallyLinkage() && !V.hasDLLExportStorageClass() && + RefEdges.empty()); + auto GVarSummary = llvm::make_unique(Flags, VarFlags, + RefEdges.takeVector()); if (NonRenamableLocal) CantBePromoted.insert(V.getGUID()); if (HasBlockAddress) @@ -488,13 +527,19 @@ Index.addGlobalValueSummary(*GV, std::move(Summary)); } else { std::unique_ptr Summary = - llvm::make_unique(GVFlags, - ArrayRef{}); + llvm::make_unique( + GVFlags, GlobalVarSummary::GVarFlags(), + ArrayRef{}); Index.addGlobalValueSummary(*GV, std::move(Summary)); } }); } + bool IsThinLTO = true; + if (auto *MD = + mdconst::extract_or_null(M.getModuleFlag("ThinLTO"))) + IsThinLTO = MD->getZExtValue(); + // Compute summaries for all functions defined in module, and save in the // index. for (auto &F : M) { @@ -515,7 +560,7 @@ computeFunctionSummary(Index, M, F, BFI, PSI, DT, !LocalsUsed.empty() || HasLocalInlineAsmSymbol, - CantBePromoted); + CantBePromoted, IsThinLTO); } // Compute summaries for all variables defined in module, and save in the @@ -546,11 +591,6 @@ setLiveRoot(Index, "llvm.global_dtors"); setLiveRoot(Index, "llvm.global.annotations"); - bool IsThinLTO = true; - if (auto *MD = - mdconst::extract_or_null(M.getModuleFlag("ThinLTO"))) - IsThinLTO = MD->getZExtValue(); - for (auto &GlobalList : Index) { // Ignore entries for references that are undefined in the current module. if (GlobalList.second.SummaryList.empty()) Index: lib/AsmParser/LLParser.cpp =================================================================== --- lib/AsmParser/LLParser.cpp +++ lib/AsmParser/LLParser.cpp @@ -7642,7 +7642,8 @@ if (ParseToken(lltok::rparen, "expected ')' here")) return true; - auto GS = llvm::make_unique(GVFlags, std::move(Refs)); + auto GS = llvm::make_unique( + GVFlags, GlobalVarSummary::GVarFlags(), std::move(Refs)); GS->setModulePath(ModulePath); Index: lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- lib/Bitcode/Reader/BitcodeReader.cpp +++ lib/Bitcode/Reader/BitcodeReader.cpp @@ -897,6 +897,11 @@ return GlobalValueSummary::GVFlags(Linkage, NotEligibleToImport, Live, Local); } +// Decode the flags for GlobalVariable in the summary +static GlobalVarSummary::GVarFlags getDecodedGVarFlags(uint64_t RawFlags) { + return GlobalVarSummary::GVarFlags((RawFlags & 0x1) ? true : false); +} + static GlobalValue::VisibilityTypes getDecodedVisibility(unsigned Val) { switch (Val) { default: // Map unknown visibilities to default. @@ -5169,6 +5174,12 @@ parseWholeProgramDevirtResolution(Record, Strtab, Slot, TypeId); } +static void setImmutableRefs(std::vector &Refs, unsigned Count) { + // Read-only refs are in the end of the refs list. + for (unsigned RefNo = Refs.size() - Count; RefNo < Refs.size(); ++RefNo) + Refs[RefNo].setReadOnly(); +} + // Eagerly parse the entire summary block. This populates the GlobalValueSummary // objects in the index. Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) { @@ -5186,9 +5197,9 @@ } const uint64_t Version = Record[0]; const bool IsOldProfileFormat = Version == 1; - if (Version < 1 || Version > 4) + if (Version < 1 || Version > 5) return error("Invalid summary version " + Twine(Version) + - ", 1, 2, 3 or 4 expected"); + ", 1, 2, 3, 4 or 5 expected"); Record.clear(); // Keep around the last seen summary to be used when we see an optional @@ -5267,11 +5278,16 @@ unsigned InstCount = Record[2]; uint64_t RawFunFlags = 0; unsigned NumRefs = Record[3]; + unsigned NumImmutableRefs = 0; int RefListStartIndex = 4; if (Version >= 4) { RawFunFlags = Record[3]; NumRefs = Record[4]; RefListStartIndex = 5; + if (Version >= 5) { + NumImmutableRefs = Record[5]; + RefListStartIndex = 6; + } } auto Flags = getDecodedGVSummaryFlags(RawFlags, Version); @@ -5290,6 +5306,7 @@ std::vector Calls = makeCallList( ArrayRef(Record).slice(CallGraphEdgeStartIndex), IsOldProfileFormat, HasProfile, HasRelBF); + setImmutableRefs(Refs, NumImmutableRefs); auto FS = llvm::make_unique( Flags, InstCount, getDecodedFFlags(RawFunFlags), std::move(Refs), std::move(Calls), std::move(PendingTypeTests), @@ -5338,14 +5355,21 @@ TheIndex.addGlobalValueSummary(GUID.first, std::move(AS)); break; } - // FS_PERMODULE_GLOBALVAR_INIT_REFS: [valueid, flags, n x valueid] + // FS_PERMODULE_GLOBALVAR_INIT_REFS: [valueid, flags, varflags, n x valueid] case bitc::FS_PERMODULE_GLOBALVAR_INIT_REFS: { unsigned ValueID = Record[0]; uint64_t RawFlags = Record[1]; + unsigned RefArrayStart = 2; + GlobalVarSummary::GVarFlags GVF; auto Flags = getDecodedGVSummaryFlags(RawFlags, Version); + if (Version >= 5) { + GVF = getDecodedGVarFlags(Record[2]); + RefArrayStart = 3; + } std::vector Refs = - makeRefList(ArrayRef(Record).slice(2)); - auto FS = llvm::make_unique(Flags, std::move(Refs)); + makeRefList(ArrayRef(Record).slice(RefArrayStart)); + auto FS = + llvm::make_unique(Flags, GVF, std::move(Refs)); FS->setModulePath(getThisModule()->first()); auto GUID = getValueInfoFromValueId(ValueID); FS->setOriginalName(GUID.second); @@ -5364,12 +5388,17 @@ unsigned InstCount = Record[3]; uint64_t RawFunFlags = 0; unsigned NumRefs = Record[4]; + unsigned NumImmutableRefs = 0; int RefListStartIndex = 5; if (Version >= 4) { RawFunFlags = Record[4]; NumRefs = Record[5]; RefListStartIndex = 6; + if (Version >= 5) { + NumImmutableRefs = Record[6]; + RefListStartIndex = 7; + } } auto Flags = getDecodedGVSummaryFlags(RawFlags, Version); @@ -5383,6 +5412,7 @@ ArrayRef(Record).slice(CallGraphEdgeStartIndex), IsOldProfileFormat, HasProfile, false); ValueInfo VI = getValueInfoFromValueId(ValueID).first; + setImmutableRefs(Refs, NumImmutableRefs); auto FS = llvm::make_unique( Flags, InstCount, getDecodedFFlags(RawFunFlags), std::move(Refs), std::move(Edges), std::move(PendingTypeTests), @@ -5431,10 +5461,17 @@ unsigned ValueID = Record[0]; uint64_t ModuleId = Record[1]; uint64_t RawFlags = Record[2]; + unsigned RefArrayStart = 3; + GlobalVarSummary::GVarFlags GVF; auto Flags = getDecodedGVSummaryFlags(RawFlags, Version); + if (Version >= 5) { + GVF = getDecodedGVarFlags(Record[3]); + RefArrayStart = 4; + } std::vector Refs = - makeRefList(ArrayRef(Record).slice(3)); - auto FS = llvm::make_unique(Flags, std::move(Refs)); + makeRefList(ArrayRef(Record).slice(RefArrayStart)); + auto FS = + llvm::make_unique(Flags, GVF, std::move(Refs)); LastSeenSummary = FS.get(); FS->setModulePath(ModuleIdMap[ModuleId]); ValueInfo VI = getValueInfoFromValueId(ValueID).first; Index: lib/Bitcode/Writer/BitcodeWriter.cpp =================================================================== --- lib/Bitcode/Writer/BitcodeWriter.cpp +++ lib/Bitcode/Writer/BitcodeWriter.cpp @@ -990,6 +990,11 @@ return RawFlags; } +static uint64_t getEncodedGVarFlags(GlobalVarSummary::GVarFlags Flags) { + uint64_t RawFlags = Flags.ReadOnly; + return RawFlags; +} + static unsigned getEncodedVisibility(const GlobalValue &GV) { switch (GV.getVisibility()) { case GlobalValue::DefaultVisibility: return 0; @@ -3488,6 +3493,7 @@ NameVals.push_back(FS->instCount()); NameVals.push_back(getEncodedFFlags(FS->fflags())); NameVals.push_back(FS->refs().size()); + NameVals.push_back(FS->immutableRefCount()); for (auto &RI : FS->refs()) NameVals.push_back(VE.getValueID(RI.getValue())); @@ -3529,6 +3535,7 @@ NameVals.push_back(VE.getValueID(&V)); GlobalVarSummary *VS = cast(Summary); NameVals.push_back(getEncodedGVSummaryFlags(VS->flags())); + NameVals.push_back(getEncodedGVarFlags(VS->varflags())); unsigned SizeBeforeRefs = NameVals.size(); for (auto &RI : VS->refs()) @@ -3545,7 +3552,7 @@ // Current version for the summary. // This is bumped whenever we introduce changes in the way some record are // interpreted, like flags for instance. -static const uint64_t INDEX_VERSION = 4; +static const uint64_t INDEX_VERSION = 5; /// Emit the per-module summary section alongside the rest of /// the module's bitcode. @@ -3580,6 +3587,7 @@ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // immutablerefcnt // numrefs x valueid, n x (valueid, hotness) Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); @@ -3596,6 +3604,7 @@ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // immutablerefcnt // numrefs x valueid, n x (valueid [, rel_block_freq]) Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); @@ -3690,6 +3699,7 @@ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // immutablerefcnt // numrefs x valueid, n x (valueid) Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); @@ -3704,6 +3714,7 @@ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // immutablerefcnt // numrefs x valueid, n x (valueid, hotness) Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); @@ -3776,6 +3787,7 @@ NameVals.push_back(*ValueId); NameVals.push_back(Index.getModuleId(VS->modulePath())); NameVals.push_back(getEncodedGVSummaryFlags(VS->flags())); + NameVals.push_back(getEncodedGVarFlags(VS->varflags())); for (auto &RI : VS->refs()) { auto RefValueId = getValueId(RI.getGUID()); if (!RefValueId) @@ -3801,17 +3813,21 @@ NameVals.push_back(FS->instCount()); NameVals.push_back(getEncodedFFlags(FS->fflags())); // Fill in below - NameVals.push_back(0); + NameVals.push_back(0); // numrefs + NameVals.push_back(0); // immutablerefcnt - unsigned Count = 0; + unsigned Count = 0, ImmutableRefCnt = 0; for (auto &RI : FS->refs()) { auto RefValueId = getValueId(RI.getGUID()); if (!RefValueId) continue; NameVals.push_back(*RefValueId); + if (RI.isReadOnly()) + ImmutableRefCnt++; Count++; } NameVals[5] = Count; + NameVals[6] = ImmutableRefCnt; bool HasProfileData = false; for (auto &EI : FS->calls()) { Index: lib/IR/ModuleSummaryIndex.cpp =================================================================== --- lib/IR/ModuleSummaryIndex.cpp +++ lib/IR/ModuleSummaryIndex.cpp @@ -30,6 +30,17 @@ }); } +// Gets the number of immutable refs in RefEdgeList +unsigned FunctionSummary::immutableRefCount() const { + // Here we take advantage of having all read-only references + // located in the end of the RefEdgeList. + auto Refs = refs(); + unsigned ImmutableRefCnt = 0; + for (int I = Refs.size() - 1; I >= 0; --I) + ImmutableRefCnt += Refs[I].isReadOnly(); + return ImmutableRefCnt; +} + // Collect for the given module the list of function it defines // (GUID -> Summary). void ModuleSummaryIndex::collectDefinedFunctionsForModule( @@ -84,6 +95,60 @@ return false; } +static void clearReadOnlyFlag(GlobalValueSummary *S) { + if (auto *AS = dyn_cast(S)) + S = &AS->getAliasee(); + if (auto *GVS = dyn_cast(S)) + GVS->setReadOnly(false); +} + +static bool hasReadOnlyFlag(const GlobalValueSummary *S) { + if (auto *GVS = dyn_cast(S)) + return GVS->isReadOnly(); + return false; +} + +void ModuleSummaryIndex::propagateConstantsForFunction(FunctionSummary *FS) { + for (auto &VI : FS->refs()) + for (auto &Summary : VI.getSummaryList()) + if (!VI.isReadOnly()) + clearReadOnlyFlag(Summary.get()); +} + +void ModuleSummaryIndex::propagateConstants( + const DenseSet &GUIDPreservedSymbols) { + auto ForEachLiveSummary = + [&](llvm::function_ref F) { + for (auto &VI : *this) + for (auto &Summary : VI.second.SummaryList) + if (Summary->isLive()) + F(VI.first, Summary.get()); + }; + + // Step 1: Find immutable global value summaries, which we can later + // internalize. Preserved symbols are visible externally, so we don't + // consider them constant. + ForEachLiveSummary([&](GlobalValue::GUID Id, GlobalValueSummary *S) { + if (GUIDPreservedSymbols.count(Id)) + clearReadOnlyFlag(S); + }); + + // Step 2: for each function summary check its refs. If ref is not constant + // then referenced summary list is not constant either. + ForEachLiveSummary([this](GlobalValue::GUID, GlobalValueSummary *S) { + if (auto *FS = dyn_cast(S)) + propagateConstantsForFunction(FS); + }); + + // Step 3: All non-instruction refs are not constant + ForEachLiveSummary([&](GlobalValue::GUID, GlobalValueSummary *S) { + if (GlobalVarSummary *GVS = dyn_cast(S)) + for (auto &VI : GVS->refs()) + for (auto &Ref : VI.getSummaryList()) + clearReadOnlyFlag(Ref.get()); + }); +} + // TODO: write a graphviz dumper for SCCs (see ModuleSummaryIndex::exportToDot) // then delete this function and update its tests LLVM_DUMP_METHOD @@ -108,6 +173,7 @@ struct Attributes { void add(const Twine &Name, const Twine &Value, const Twine &Comment = Twine()); + void addComment(const Twine &Comment); std::string getAsString() const; std::vector Attrs; @@ -129,6 +195,10 @@ A += Value.str(); A += "\""; Attrs.push_back(A); + addComment(Comment); +} + +void Attributes::addComment(const Twine &Comment) { if (!Comment.isTriviallyEmpty()) { if (Comments.empty()) Comments = " // "; @@ -243,12 +313,15 @@ auto DrawEdge = [&](const char *Pfx, int SrcMod, GlobalValue::GUID SrcId, int DstMod, GlobalValue::GUID DstId, int TypeOrHotness) { - // 0 corresponds to alias edge, 1 to ref edge, 2 to call with unknown - // hotness, ... - TypeOrHotness += 2; + // 0 - alias + // 1 - reference + // 2 - constant reference + // Other value: (hotness - 3). + TypeOrHotness += 3; static const char *EdgeAttrs[] = { " [style=dotted]; // alias", " [style=dashed]; // ref", + " [style=dashed,color=forestgreen]; // const-ref", " // call (hotness : Unknown)", " [color=blue]; // call (hotness : Cold)", " // call (hotness : None)", @@ -291,6 +364,8 @@ A.add("shape", "box"); } else { A.add("shape", "Mrecord", "variable"); + if (Flags.Live && hasReadOnlyFlag(SummaryIt.second)) + A.addComment("immutable"); } auto VI = getValueInfo(SummaryIt.first); @@ -308,13 +383,13 @@ for (auto &SummaryIt : GVSMap) { auto *GVS = SummaryIt.second; for (auto &R : GVS->refs()) - Draw(SummaryIt.first, R.getGUID(), -1); + Draw(SummaryIt.first, R.getGUID(), R.isReadOnly() ? -1 : -2); if (auto *AS = dyn_cast_or_null(SummaryIt.second)) { auto AliaseeOrigId = AS->getAliasee().getOriginalName(); auto AliaseeId = getGUIDFromOriginalID(AliaseeOrigId); - Draw(SummaryIt.first, AliaseeId ? AliaseeId : AliaseeOrigId, -2); + Draw(SummaryIt.first, AliaseeId ? AliaseeId : AliaseeOrigId, -3); continue; } Index: lib/LTO/LTO.cpp =================================================================== --- lib/LTO/LTO.cpp +++ lib/LTO/LTO.cpp @@ -148,8 +148,12 @@ Hasher.update(ArrayRef((uint8_t *)&ModHash[0], sizeof(ModHash))); AddUint64(Entry.second.size()); - for (auto &Fn : Entry.second) + for (auto &Fn : Entry.second) { AddUint64(Fn); + if (auto *GVS = dyn_cast( + Index.getGlobalValueSummary(Fn, false))) + AddUnsigned(GVS->isReadOnly()); + } } // Include the hash for the resolved ODR. @@ -182,6 +186,8 @@ AddUnsigned(VI.isDSOLocal()); AddUsedCfiGlobal(VI.getGUID()); } + if (auto *GVS = dyn_cast(GS)) + AddUnsigned(GVS->isReadOnly()); if (auto *FS = dyn_cast(GS)) { for (auto &TT : FS->type_tests()) UsedTypeIds.insert(TT); @@ -797,7 +803,8 @@ return PrevailingType::Unknown; return It->second; }; - computeDeadSymbols(ThinLTO.CombinedIndex, GUIDPreservedSymbols, isPrevailing); + computeDeadSymbolsWithConstProp(ThinLTO.CombinedIndex, GUIDPreservedSymbols, + isPrevailing, Conf.OptLevel > 0); // Setup output file to emit statistics. std::unique_ptr StatsFile = nullptr; Index: lib/LTO/ThinLTOCodeGenerator.cpp =================================================================== --- lib/LTO/ThinLTOCodeGenerator.cpp +++ lib/LTO/ThinLTOCodeGenerator.cpp @@ -646,7 +646,8 @@ auto isPrevailing = [&](GlobalValue::GUID G) { return PrevailingType::Unknown; }; - computeDeadSymbols(Index, GUIDPreservedSymbols, isPrevailing); + computeDeadSymbolsWithConstProp(Index, GUIDPreservedSymbols, isPrevailing, + true); } /** Index: lib/Linker/IRMover.cpp =================================================================== --- lib/Linker/IRMover.cpp +++ lib/Linker/IRMover.cpp @@ -1062,11 +1062,6 @@ ValueMap.MD()[CU->getRawEnumTypes()].reset(nullptr); ValueMap.MD()[CU->getRawMacros()].reset(nullptr); ValueMap.MD()[CU->getRawRetainedTypes()].reset(nullptr); - // We import global variables only temporarily in order for instcombine - // and globalopt to perform constant folding and static constructor - // evaluation. After that elim-avail-extern will covert imported globals - // back to declarations, so we don't need debug info for them. - ValueMap.MD()[CU->getRawGlobalVariables()].reset(nullptr); // Imported entities only need to be mapped in if they have local // scope, as those might correspond to an imported entity inside a Index: lib/Transforms/IPO/FunctionImport.cpp =================================================================== --- lib/Transforms/IPO/FunctionImport.cpp +++ lib/Transforms/IPO/FunctionImport.cpp @@ -814,6 +814,25 @@ NumLiveSymbols += LiveSymbols; } +// Compute dead symbols and propagate constants in combined index. +void llvm::computeDeadSymbolsWithConstProp( + ModuleSummaryIndex &Index, + const DenseSet &GUIDPreservedSymbols, + function_ref isPrevailing, + bool ImportEnabled) { + computeDeadSymbols(Index, GUIDPreservedSymbols, isPrevailing); + if (ImportEnabled) { + Index.propagateConstants(GUIDPreservedSymbols); + } else { + // If import is disabled we should drop read-only attribute + // from all summaries to prevent internalization. + for (auto &P : Index) + for (auto &S : P.second.SummaryList) + if (auto *GVS = dyn_cast(S.get())) + GVS->setReadOnly(false); + } +} + /// Compute the set of summaries needed for a ThinLTO backend compilation of /// \p ModulePath. void llvm::gatherImportedSummariesForModule( @@ -1008,6 +1027,18 @@ return NewFn; } +// Internalize values that we marked with specific attribute +// in processGlobalForThinLTO. +static void internalizeImmutableGVs(Module &M) { + for (auto &GV : M.globals()) + if (auto *GVar = dyn_cast(&GV)) + if (GVar->hasAttribute("thinlto-internalize")) { + assert(!GVar->isDeclaration()); + GVar->setLinkage(GlobalValue::InternalLinkage); + GVar->setVisibility(GlobalValue::DefaultVisibility); + } +} + // Automatically import functions in Module \p DestModule based on the summaries // index. Expected FunctionImporter::importFunctions( @@ -1131,6 +1162,8 @@ NumImportedModules++; } + internalizeImmutableGVs(DestModule); + NumImportedFunctions += (ImportedCount - ImportedGVCount); NumImportedGlobalVars += ImportedGVCount; Index: lib/Transforms/Utils/FunctionImportUtils.cpp =================================================================== --- lib/Transforms/Utils/FunctionImportUtils.cpp +++ lib/Transforms/Utils/FunctionImportUtils.cpp @@ -16,6 +16,16 @@ #include "llvm/IR/InstIterator.h" using namespace llvm; +static GlobalVarSummary *getGVarSummary(const ModuleSummaryIndex &Index, + const GlobalValue *GV) { + auto VI = Index.getValueInfo(GV->getGUID()); + if (!VI) + return nullptr; + + const auto &SL = VI.getSummaryList(); + return SL.empty() ? nullptr : dyn_cast(SL[0].get()); +} + /// Checks if we should import SGV as a definition, otherwise import as a /// declaration. bool FunctionImportGlobalProcessing::doImportAsDefinition( @@ -213,6 +223,14 @@ } } + // Mark read-only values which can be imported with specific + // attribute. We'll internalize them after import is finished + // See internalizeImmutableGVs. + if (!GV.isDeclaration()) + if (auto *GVS = getGVarSummary(ImportIndex, &GV)) + if (GVS->isLive() && GVS->isReadOnly() && !GVS->notEligibleToImport()) + cast(&GV)->addAttribute("thinlto-internalize"); + bool DoPromote = false; if (GV.hasLocalLinkage() && ((DoPromote = shouldPromoteLocalToGlobal(&GV)) || isPerformingImport())) { @@ -230,7 +248,7 @@ // Remove functions imported as available externally defs from comdats, // as this is a declaration for the linker, and will be dropped eventually. // It is illegal for comdats to contain declarations. - auto *GO = dyn_cast_or_null(&GV); + auto *GO = dyn_cast(&GV); if (GO && GO->isDeclarationForLinker() && GO->hasComdat()) { // The IRMover should not have placed any imported declarations in // a comdat, so the only declaration that should be in a comdat Index: test/Bitcode/summary_version.ll =================================================================== --- test/Bitcode/summary_version.ll +++ test/Bitcode/summary_version.ll @@ -2,7 +2,7 @@ ; RUN: opt -module-summary %s -o - | llvm-bcanalyzer -dump | FileCheck %s ; CHECK: +; CHECK: Index: test/Bitcode/thinlto-alias.ll =================================================================== --- test/Bitcode/thinlto-alias.ll +++ test/Bitcode/thinlto-alias.ll @@ -20,7 +20,7 @@ ; CHECK-NEXT: +; CHECK-NEXT: ; CHECK-NEXT: ; CHECK: ; COMBINED-NEXT: -; COMBINED-NEXT: +; COMBINED-NEXT: ; COMBINED-NEXT: +; CHECK-NEXT: ; CHECK-NEXT: ; CHECK-NEXT: Index: test/Bitcode/thinlto-function-summary-callgraph-cast.ll =================================================================== --- test/Bitcode/thinlto-function-summary-callgraph-cast.ll +++ test/Bitcode/thinlto-function-summary-callgraph-cast.ll @@ -6,9 +6,9 @@ ; CHECK: +; CHECK-NEXT: ; "another_caller" has only references but no calls. -; CHECK-NEXT: +; CHECK-NEXT: ; CHECK-NEXT: ; CHECK-NEXT: Index: test/Bitcode/thinlto-function-summary-callgraph-pgo.ll =================================================================== --- test/Bitcode/thinlto-function-summary-callgraph-pgo.ll +++ test/Bitcode/thinlto-function-summary-callgraph-pgo.ll @@ -17,7 +17,7 @@ ; CHECK: +; CHECK-NEXT: ; CHECK-NEXT: ; CHECK: +; COMBINED-NEXT: ; COMBINED-NEXT: ; ModuleID = 'thinlto-function-summary-callgraph.ll' Index: test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll =================================================================== --- test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll +++ test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll @@ -48,7 +48,7 @@ ; CHECK-NEXT: ; op4=hot1 op6=cold op8=hot2 op10=hot4 op12=none1 op14=hot3 op16=none2 op18=none3 op20=123 -; CHECK-NEXT: +; CHECK-NEXT: ; CHECK-NEXT: ; CHECK: +; COMBINED-NEXT: ; COMBINED_NEXT: Index: test/Bitcode/thinlto-function-summary-callgraph-relbf.ll =================================================================== --- test/Bitcode/thinlto-function-summary-callgraph-relbf.ll +++ test/Bitcode/thinlto-function-summary-callgraph-relbf.ll @@ -13,7 +13,7 @@ ; CHECK: ; CHECK: ; op4=none1 op6=hot1 op8=cold1 op10=none2 op12=hot2 op14=cold2 op16=none3 op18=hot3 op20=cold3 op22=123 -; CHECK-NEXT: +; CHECK-NEXT: ; CHECK-NEXT: ; CHECK: +; COMBINED-NEXT: ; COMBINED_NEXT: Index: test/Bitcode/thinlto-function-summary-callgraph.ll =================================================================== --- test/Bitcode/thinlto-function-summary-callgraph.ll +++ test/Bitcode/thinlto-function-summary-callgraph.ll @@ -17,7 +17,7 @@ ; CHECK-NEXT: ; CHECK: +; COMBINED-NEXT: ; COMBINED-NEXT: ; ModuleID = 'thinlto-function-summary-callgraph.ll' Index: test/Bitcode/thinlto-function-summary-refgraph.ll =================================================================== --- test/Bitcode/thinlto-function-summary-refgraph.ll +++ test/Bitcode/thinlto-function-summary-refgraph.ll @@ -41,27 +41,27 @@ ; CHECK: +; CHECK-DAG: ; Function W contains a call to func3 as well as a reference to globalvar: ; op0=W op4=globalvar op5=func3 -; CHECK-DAG: +; CHECK-DAG: ; Function X contains call to foo, as well as address reference to foo ; which is in the same instruction as the call: ; op0=X op4=foo op5=foo -; CHECK-DAG: +; CHECK-DAG: ; Function Y contains call to func2, and ensures we don't incorrectly add ; a reference to it when reached while earlier analyzing the phi using its ; return value: ; op0=Y op4=func2 -; CHECK-DAG: +; CHECK-DAG: ; Function Z contains call to func2, and ensures we don't incorrectly add ; a reference to it when reached while analyzing subsequent use of its return ; value: ; op0=Z op4=func2 -; CHECK-DAG: +; CHECK-DAG: ; Variable bar initialization contains address reference to func: ; op0=bar op2=func -; CHECK-DAG: +; CHECK-DAG: ; CHECK: ; CHECK: M1_{{[0-9]+}} // call -; STRUCTURE-DAG: M0_{{[0-9]+}} -> M1_{{[0-9]+}} [{{.*}}]; // ref +; STRUCTURE-DAG: M0_{{[0-9]+}} -> M1_{{[0-9]+}} [{{.*}}]; // const-ref ; STRUCTURE-NEXT: } ; CLUSTER0: // Module: {{.*}}1.bc @@ -33,13 +33,13 @@ ; CLUSTER1: // Module: {{.*}}2.bc ; CLUSTER1-NEXT: subgraph cluster_1 { -; CLUSTER1-DAG: M1_[[A:[0-9]+]] [{{.*}}A|extern{{.*}}]; // variable +; CLUSTER1-DAG: M1_[[A:[0-9]+]] [{{.*}}A|extern{{.*}}]; // variable, immutable ; CLUSTER1-DAG: M1_[[FOO:[0-9]+]] [{{.*}}foo|extern{{.*}}]; // function, not eligible to import -; CLUSTER1-DAG: M1_[[B:[0-9]+]] [{{.*}}B|extern{{.*}}]; // variable +; CLUSTER1-DAG: M1_[[B:[0-9]+]] [{{.*}}B|extern{{.*}}]; // variable, immutable ; CLUSTER1-DAG: M1_[[BAR:[0-9]+]] [{{.*}}bar|extern{{.*}}]; // function, dead ; CLUSTER1-NEXT: // Edges: -; CLUSTER1-DAG: M1_[[FOO]] -> M1_[[B]] [{{.*}}]; // ref -; CLUSTER1-DAG: M1_[[FOO]] -> M1_[[A]] [{{.*}}]; // ref +; CLUSTER1-DAG: M1_[[FOO]] -> M1_[[B]] [{{.*}}]; // const-ref +; CLUSTER1-DAG: M1_[[FOO]] -> M1_[[A]] [{{.*}}]; // const-ref ; CLUSTER1-DAG: } target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/ThinLTO/X86/globals-import-const-fold.ll =================================================================== --- test/ThinLTO/X86/globals-import-const-fold.ll +++ test/ThinLTO/X86/globals-import-const-fold.ll @@ -2,12 +2,12 @@ ; RUN: opt -module-summary %p/Inputs/globals-import-cf-baz.ll -o %t2.bc ; RUN: llvm-lto -thinlto-action=thinlink %t1.bc %t2.bc -o %t3.index.bc -; RUN: llvm-lto -thinlto-action=import %t1.bc %t2.bc -thinlto-index=%t3.index.bc +; RUN: llvm-lto -thinlto-action=import -exported-symbol=main %t1.bc -thinlto-index=%t3.index.bc ; RUN: llvm-dis %t1.bc.thinlto.imported.bc -o - | FileCheck --check-prefix=IMPORT %s ; RUN: llvm-lto -thinlto-action=optimize %t1.bc.thinlto.imported.bc -o %t1.bc.thinlto.opt.bc ; RUN: llvm-dis %t1.bc.thinlto.opt.bc -o - | FileCheck --check-prefix=OPTIMIZE %s -; IMPORT: @baz = available_externally local_unnamed_addr constant i32 10 +; IMPORT: @baz = internal local_unnamed_addr constant i32 10 ; OPTIMIZE: define i32 @main() ; OPTIMIZE-NEXT: ret i32 10 Index: test/ThinLTO/X86/index-const-prop-O0.ll =================================================================== --- test/ThinLTO/X86/index-const-prop-O0.ll +++ test/ThinLTO/X86/index-const-prop-O0.ll @@ -0,0 +1,18 @@ +; RUN: opt -module-summary %s -o %t1.bc +; RUN: opt -module-summary %p/Inputs/index-const-prop-define-g.ll -o %t2.bc +; RUN: llvm-lto2 run -O0 -save-temps %t2.bc -r=%t2.bc,g,pl %t1.bc -r=%t1.bc,main,plx -r=%t1.bc,g, -o %t3 +; RUN: llvm-dis %t3.1.3.import.bc -o - | FileCheck %s + +; With -O0 import is disabled so we must not internalize +; read-only globals +; CHECK: @g = dso_local global i32 42 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@g = external global i32 + +define i32 @main() { + %v = load i32, i32* @g + ret i32 %v +} Index: test/ThinLTO/X86/index-const-prop-alias.ll =================================================================== --- test/ThinLTO/X86/index-const-prop-alias.ll +++ test/ThinLTO/X86/index-const-prop-alias.ll @@ -0,0 +1,42 @@ +; RUN: opt -module-summary %s -o %t1.bc +; RUN: opt -module-summary %p/Inputs/index-const-prop-alias.ll -o %t2.bc +; RUN: llvm-lto2 run %t1.bc -r=%t1.bc,main,plx -r=%t1.bc,ret_ptr,pl -r=%t1.bc,g.alias,l -r=%t1.bc,g,l \ +; RUN: %t2.bc -r=%t2.bc,g,pl -r=%t2.bc,g.alias,pl -save-temps -o %t3 +; RUN: llvm-dis %t3.1.3.import.bc -o - | FileCheck %s --check-prefix=IMPORT +; RUN: llvm-dis %t3.1.5.precodegen.bc -o - | FileCheck %s --check-prefix=CODEGEN + +; When ret_ptr is preserved we return pointer to alias, so we can't internalize aliasee +; RUN: llvm-lto2 run %t1.bc -r=%t1.bc,main,plx -r=%t1.bc,ret_ptr,plx -r=%t1.bc,g.alias,l -r=%t1.bc,g,l \ +; RUN: %t2.bc -r=%t2.bc,g,pl -r=%t2.bc,g.alias,pl -save-temps -o %t4 +; RUN: llvm-dis %t4.1.3.import.bc -o - | FileCheck %s --check-prefix=PRESERVED + +; When g.alias is preserved we can't internalize aliasee either +; RUN: llvm-lto2 run %t1.bc -r=%t1.bc,main,plx -r=%t1.bc,ret_ptr,pl -r=%t1.bc,g.alias,l -r=%t1.bc,g,l \ +; RUN: %t2.bc -r=%t2.bc,g,pl -r=%t2.bc,g.alias,plx -save-temps -o %t5 +; RUN: llvm-dis %t5.1.3.import.bc -o - | FileCheck %s --check-prefix=PRESERVED + +; We currently don't support importing aliases +; IMPORT: @g.alias = external dso_local global i32 +; IMPORT-NEXT: @g = internal global i32 42, align 4 #0 +; IMPORT: attributes #0 = { "thinlto-internalize" } + +; CODEGEN: define dso_local i32 @main +; CODEGEN-NEXT: ret i32 42 + +; PRESERVED: @g.alias = external dso_local global i32 +; PRESERVED-NEXT: @g = available_externally dso_local global i32 42, align 4 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@g.alias = external global i32 +@g = external global i32 + +define i32 @main() { + %v = load i32, i32* @g + ret i32 %v +} + +define i32* @ret_ptr() { + ret i32* @g.alias +} Index: test/ThinLTO/X86/index-const-prop-comdat.ll =================================================================== --- test/ThinLTO/X86/index-const-prop-comdat.ll +++ test/ThinLTO/X86/index-const-prop-comdat.ll @@ -0,0 +1,17 @@ +; RUN: opt -module-summary %s -o %t1.bc +; RUN: opt -module-summary %p/Inputs/index-const-prop-comdat.ll -o %t2.bc +; RUN: llvm-lto2 run -save-temps %t2.bc -r=%t2.bc,g,pl %t1.bc -r=%t1.bc,main,plx -r=%t1.bc,g, -o %t3 +; RUN: llvm-dis %t3.2.3.import.bc -o - | FileCheck %s + +; Comdats are not internalized even if they are read only. +; CHECK: @g = available_externally dso_local global i32 42 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@g = external global i32 + +define i32 @main() { + %v = load i32, i32* @g + ret i32 %v +} Index: test/ThinLTO/X86/index-const-prop-dead.ll =================================================================== --- test/ThinLTO/X86/index-const-prop-dead.ll +++ test/ThinLTO/X86/index-const-prop-dead.ll @@ -0,0 +1,17 @@ +; RUN: opt -module-summary %s -o %t1.bc +; RUN: opt -module-summary %p/Inputs/index-const-prop-define-g.ll -o %t2.bc +; RUN: llvm-lto2 run -save-temps %t2.bc -r=%t2.bc,g,pl %t1.bc -r=%t1.bc,main,pl -r=%t1.bc,g, -o %t3 +; RUN: llvm-dis %t3.2.3.import.bc -o - | FileCheck %s + +; Dead symbols are not internalized even if they're readonly +; CHECK: @g = available_externally dso_local global i32 42 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@g = external global i32 + +define i32 @main() { + %v = load i32, i32* @g + ret i32 %v +} Index: test/ThinLTO/X86/index-const-prop-full-lto.ll =================================================================== --- test/ThinLTO/X86/index-const-prop-full-lto.ll +++ test/ThinLTO/X86/index-const-prop-full-lto.ll @@ -0,0 +1,24 @@ +; RUN: opt -module-summary %s -o %t1.bc +; RUN: opt -module-summary %p/Inputs/index-const-prop-define-g.ll -o %t2.bc +; RUN: opt -module-summary %p/Inputs/index-const-prop-full-lto.ll -o %t3.bc +; RUN: llvm-lto2 run -save-temps %t2.bc -r=%t2.bc,g,pl \ +; RUN: %t1.bc -r=%t1.bc,foo,l -r=%t1.bc,main,plx -r=%t1.bc,g, \ +; RUN: %t3.bc -r=%t3.bc,foo,pl -r=%t3.bc,g, -o %t4 +; RUN: llvm-dis %t4.2.3.import.bc -o - | FileCheck %s + +; All references from functions in full LTO module are not constant. +; We cannot internalize @g +; CHECK: @g = available_externally dso_local global i32 42 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare i32 @foo() +@g = external global i32 + +define i32 @main() { + %v = call i32 @foo() + %v2 = load i32, i32* @g + %v3 = add i32 %v, %v2 + ret i32 %v3 +} Index: test/ThinLTO/X86/index-const-prop-gvref.ll =================================================================== --- test/ThinLTO/X86/index-const-prop-gvref.ll +++ test/ThinLTO/X86/index-const-prop-gvref.ll @@ -0,0 +1,22 @@ +; RUN: opt -module-summary %s -o %t1.bc +; RUN: opt -module-summary %p/Inputs/index-const-prop-gvref.ll -o %t2.bc +; RUN: llvm-lto2 run -save-temps %t2.bc -r=%t2.bc,b,pl -r=%t2.bc,a,pl \ +; RUN: %t1.bc -r=%t1.bc,main,plx -r=%t1.bc,a, -r=%t1.bc,b, -o %t3 +; RUN: llvm-dis %t3.2.3.import.bc -o - | FileCheck %s + +; We can't internalize globals referenced by other live globals +; CHECK: @b = external dso_local global i32* +; CHECK-NEXT: @a = available_externally dso_local global i32 42, align 4 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@a = external global i32 +@b = external global i32* + +define i32 @main() { + %p = load i32*, i32** @b, align 8 + store i32 33, i32* %p, align 4 + %v = load i32, i32* @a, align 4 + ret i32 %v +} Index: test/ThinLTO/X86/index-const-prop-ldst.ll =================================================================== --- test/ThinLTO/X86/index-const-prop-ldst.ll +++ test/ThinLTO/X86/index-const-prop-ldst.ll @@ -0,0 +1,21 @@ +; RUN: opt -module-summary %s -o %t1.bc +; RUN: opt -module-summary %p/Inputs/index-const-prop-define-g.ll -o %t2.bc +; RUN: llvm-lto2 run -save-temps %t2.bc -r=%t2.bc,g,pl %t1.bc -r=%t1.bc,main,plx -r=%t1.bc,g, -o %t3 +; RUN: llvm-dis %t3.2.3.import.bc -o - | FileCheck %s + +; The 'store' instruction in @main should prevent internalization +; even when there is 'load' instruction before it. +; CHECK: @g = available_externally dso_local global i32 42 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@g = external global i32 + +define i32 @main() { + %v = load i32, i32* @g + %q = add i32 %v, 1 + store i32 %q, i32* @g + + ret i32 %v +} Index: test/ThinLTO/X86/index-const-prop-linkage.ll =================================================================== --- test/ThinLTO/X86/index-const-prop-linkage.ll +++ test/ThinLTO/X86/index-const-prop-linkage.ll @@ -0,0 +1,27 @@ +; RUN: opt -module-summary %s -o %t1.bc +; RUN: opt -module-summary %p/Inputs/index-const-prop-linkage.ll -o %t2.bc +; RUN: llvm-lto2 run -save-temps %t2.bc -r=%t2.bc,foo,pl -r=%t2.bc,g1,pl -r=%t2.bc,g2,pl -r=%t2.bc,g3, \ +; RUN: %t1.bc -r=%t1.bc,foo, -r=%t1.bc,main,plx -r=%t1.bc,g2, -o %t3 +; RUN: llvm-dis %t3.2.3.import.bc -o - | FileCheck %s + +; Check that we never internalize anything with: +; - appending linkage +; - common linkage +; - available_externally linkage +; - reference from @llvm.used +; CHECK: @llvm.used = appending global [1 x i32*] [i32* @g2] +; CHECK-NEXT: @g1 = external dso_local global i32, align 4 +; CHECK-NEXT: @g2 = available_externally dso_local global i32 42, align 4 +; CHECK-NEXT: @g3 = available_externally global i32 42, align 4 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare i32 @foo() +@g2 = external global i32 +@llvm.used = appending global [1 x i32*] [i32* @g2] + +define i32 @main() { + %v = call i32 @foo() + ret i32 %v +} Index: test/ThinLTO/X86/index-const-prop.ll =================================================================== --- test/ThinLTO/X86/index-const-prop.ll +++ test/ThinLTO/X86/index-const-prop.ll @@ -0,0 +1,40 @@ +; Check constant propagation in thinlto combined summary. This allows us to do 2 things: +; 1. Internalize global definition which is not used externally if all accesses to it are read-only +; 2. Make a local copy of internal definition if all accesses to it are readonly. This allows constant +; folding it during optimziation phase. + +; RUN: opt -module-summary %s -o %t1.bc +; RUN: opt -module-summary %p/Inputs/index-const-prop.ll -o %t2.bc +; RUN: llvm-lto -thinlto-action=thinlink -o %t3.index.bc %t1.bc %t2.bc +; RUN: llvm-lto -thinlto-action=import -exported-symbol=main %t1.bc -thinlto-index=%t3.index.bc -o %t1.imported.bc +; RUN: llvm-dis %t1.imported.bc -o - | FileCheck %s --check-prefix=IMPORT +; RUN: llvm-lto -thinlto-action=optimize %t1.imported.bc -o - | llvm-dis - -o - | FileCheck %s --check-prefix=OPTIMIZE + +; Check that we don't internalize gBar when it is exported +; RUN: llvm-lto -thinlto-action=import -exported-symbol main -exported-symbol gBar %t1.bc -thinlto-index=%t3.index.bc -o %t1.imported2.bc +; RUN: llvm-dis %t1.imported2.bc -o - | FileCheck %s --check-prefix=IMPORT2 + +; IMPORT: @gFoo.llvm.0 = internal unnamed_addr global i32 1, align 4, !dbg !0 +; IMPORT-NEXT: @gBar = internal local_unnamed_addr global i32 2, align 4, !dbg !5 +; IMPORT: !DICompileUnit({{.*}}, globals: !{{[0-9]+}}) + +; OPTIMIZE: define i32 @main +; OPTIMIZE-NEXT: ret i32 3 + +; IMPORT2: @gBar = available_externally local_unnamed_addr global i32 2, align 4, !dbg !5 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +@gBar = external global i32 + +define i32 @main() local_unnamed_addr { + %call = tail call i32 bitcast (i32 (...)* @foo to i32 ()*)() + %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() + %add = add nsw i32 %call1, %call + ret i32 %add +} + +declare i32 @foo(...) local_unnamed_addr + +declare i32 @bar(...) local_unnamed_addr Index: test/ThinLTO/X86/index-const-prop2.ll =================================================================== --- test/ThinLTO/X86/index-const-prop2.ll +++ test/ThinLTO/X86/index-const-prop2.ll @@ -0,0 +1,59 @@ +; Check constant propagation in thinlto combined summary. This allows us to do 2 things: +; 1. Internalize global definition which is not used externally if all accesses to it are read-only +; 2. Make a local copy of internal definition if all accesses to it are readonly. This allows constant +; folding it during optimziation phase. +; RUN: opt -module-summary %s -o %t1.bc +; RUN: opt -module-summary %p/Inputs/index-const-prop.ll -o %t2.bc +; RUN: llvm-lto2 run %t1.bc %t2.bc -save-temps \ +; RUN: -r=%t2.bc,foo,pl \ +; RUN: -r=%t2.bc,bar,pl \ +; RUN: -r=%t2.bc,baz,pl \ +; RUN: -r=%t2.bc,rand, \ +; RUN: -r=%t2.bc,gBar,pl \ +; RUN: -r=%t1.bc,main,plx \ +; RUN: -r=%t1.bc,foo, \ +; RUN: -r=%t1.bc,bar, \ +; RUN: -r=%t1.bc,gBar, \ +; RUN: -o %t3 +; RUN: llvm-dis %t3.1.3.import.bc -o - | FileCheck %s --check-prefix=IMPORT +; RUN: llvm-dis %t3.1.5.precodegen.bc -o - | FileCheck %s --check-prefix=CODEGEN + +; Now check that we won't internalize global (gBar) if it's externally referenced +; RUN: llvm-lto2 run %t1.bc %t2.bc -save-temps \ +; RUN: -r=%t2.bc,foo,pl \ +; RUN: -r=%t2.bc,bar,pl \ +; RUN: -r=%t2.bc,baz,pl \ +; RUN: -r=%t2.bc,rand, \ +; RUN: -r=%t2.bc,gBar,plx \ +; RUN: -r=%t1.bc,main,plx \ +; RUN: -r=%t1.bc,foo, \ +; RUN: -r=%t1.bc,bar, \ +; RUN: -r=%t1.bc,gBar, \ +; RUN: -o %t3 +; RUN: llvm-dis %t3.1.3.import.bc -o - | FileCheck %s --check-prefix=IMPORT2 + +; IMPORT: @gFoo.llvm.0 = internal unnamed_addr global i32 1, align 4 +; IMPORT-NEXT: @gBar = internal local_unnamed_addr global i32 2, align 4 +; IMPORT: !DICompileUnit({{.*}}, globals: !{{[0-9]+}}) + +; CODEGEN: i32 @main() +; CODEGEN-NEXT: ret i32 3 + +; IMPORT2: @gBar = available_externally dso_local local_unnamed_addr global i32 2, align 4 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +; We should be able to link external definition of gBar to its declaration +@gBar = external global i32 + +define i32 @main() local_unnamed_addr { + %call = tail call i32 bitcast (i32 (...)* @foo to i32 ()*)() + %call1 = tail call i32 bitcast (i32 (...)* @bar to i32 ()*)() + %add = add nsw i32 %call1, %call + ret i32 %add +} + +declare i32 @foo(...) local_unnamed_addr + +declare i32 @bar(...) local_unnamed_addr