Index: include/llvm/IR/ModuleSummaryIndex.h =================================================================== --- include/llvm/IR/ModuleSummaryIndex.h +++ include/llvm/IR/ModuleSummaryIndex.h @@ -941,6 +941,11 @@ /// considered live. bool WithGlobalValueDeadStripping = false; + /// Indicates that summary-based attribute propagation has run and + /// GVarFlags::MaybeReadonly / GVarFlags::MaybeWriteonly are really + /// read/write only. + bool WithAttributePropagation = false; + /// Indicates that summary-based synthetic entry count propagation has run bool HasSyntheticEntryCounts = false; @@ -1065,6 +1070,18 @@ WithGlobalValueDeadStripping = true; } + bool withAttributePropagation() const { return WithAttributePropagation; } + void setWithAttributePropagation(bool Flag) { + WithAttributePropagation = Flag; + } + + bool isReadOnly(const GlobalVarSummary *GVS) const { + return WithAttributePropagation && GVS->maybeReadOnly(); + } + bool isWriteOnly(const GlobalVarSummary *GVS) const { + return WithAttributePropagation && GVS->maybeWriteOnly(); + } + bool hasSyntheticEntryCounts() const { return HasSyntheticEntryCounts; } void setHasSyntheticEntryCounts() { HasSyntheticEntryCounts = true; } @@ -1356,6 +1373,9 @@ /// Analyze index and detect unmodified globals void propagateAttributes(const DenseSet &PreservedSymbols); + + /// Checks if we can import global variable from another module. + bool canImportGlobalVar(GlobalValueSummary *S) const; }; /// GraphTraits definition to build SCC for the index @@ -1427,15 +1447,6 @@ return ValueInfo(I->haveGVs(), &P); } }; - -static inline bool canImportGlobalVar(GlobalValueSummary *S) { - assert(isa(S->getBaseObject())); - - // We don't import GV with references, because it can result - // in promotion of local variables in the source module. - return !GlobalValue::isInterposableLinkage(S->linkage()) && - !S->notEligibleToImport() && S->refs().empty(); -} } // end namespace llvm #endif // LLVM_IR_MODULESUMMARYINDEX_H Index: lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- lib/Bitcode/Reader/BitcodeReader.cpp +++ lib/Bitcode/Reader/BitcodeReader.cpp @@ -5768,7 +5768,7 @@ } const uint64_t Version = Record[0]; const bool IsOldProfileFormat = Version == 1; - if (Version < 1 || Version > 7) + if (Version < 1 || Version > 8) return error("Invalid summary version " + Twine(Version) + ". Version should be in the range [1-7]."); Record.clear(); @@ -5821,7 +5821,7 @@ case bitc::FS_FLAGS: { // [flags] uint64_t Flags = Record[0]; // Scan flags. - assert(Flags <= 0x1f && "Unexpected bits in flag"); + assert(Flags <= 0x3f && "Unexpected bits in flag"); // 1 bit: WithGlobalValueDeadStripping flag. // Set on combined index only. @@ -5844,6 +5844,13 @@ // Set on combined index only. if (Flags & 0x10) TheIndex.setPartiallySplitLTOUnits(); + // 1 bit: WithAttributePropagation flag. + // Set on combined index only. + // Since version 5 running dead symbol computation + // also means we run attribute propagation. + TheIndex.setWithAttributePropagation( + (Flags & 0x20) || + (Version >= 5 && TheIndex.withGlobalValueDeadStripping())); break; } case bitc::FS_VALUE_GUID: { // [valueid, refguid] @@ -6559,7 +6566,7 @@ case bitc::FS_FLAGS: { // [flags] uint64_t Flags = Record[0]; // Scan flags. - assert(Flags <= 0x1f && "Unexpected bits in flag"); + assert(Flags <= 0x3f && "Unexpected bits in flag"); return Flags & 0x8; } Index: lib/Bitcode/Writer/BitcodeWriter.cpp =================================================================== --- lib/Bitcode/Writer/BitcodeWriter.cpp +++ lib/Bitcode/Writer/BitcodeWriter.cpp @@ -3723,7 +3723,7 @@ // Current version for the summary. // This is bumped whenever we introduce changes in the way some record are // interpreted, like flags for instance. -static const uint64_t INDEX_VERSION = 7; +static const uint64_t INDEX_VERSION = 8; /// Emit the per-module summary section alongside the rest of /// the module's bitcode. @@ -3899,6 +3899,8 @@ Flags |= 0x8; if (Index.partiallySplitLTOUnits()) Flags |= 0x10; + if (Index.withAttributePropagation()) + Flags |= 0x20; Stream.EmitRecord(bitc::FS_FLAGS, ArrayRef{Flags}); for (const auto &GVI : valueIds()) { Index: lib/IR/ModuleSummaryIndex.cpp =================================================================== --- lib/IR/ModuleSummaryIndex.cpp +++ lib/IR/ModuleSummaryIndex.cpp @@ -193,6 +193,21 @@ } } +bool ModuleSummaryIndex::canImportGlobalVar(GlobalValueSummary *S) const { + auto HasRefsPreventingImport = [this](const GlobalVarSummary *GVS) { + // During attribute propagation (WithAttributePropagation is false) + // we're not aware if GVS is read or write-only. We speculatively + // permit import. + return WithAttributePropagation && !isReadOnly(GVS) && GVS->refs().size(); + }; + auto *GVS = cast(S->getBaseObject()); + + // We don't import GV with references, because it can result + // in promotion of local variables in the source module. + return !GlobalValue::isInterposableLinkage(S->linkage()) && + !S->notEligibleToImport() && !HasRefsPreventingImport(GVS); +} + // TODO: write a graphviz dumper for SCCs (see ModuleSummaryIndex::exportToDot) // then delete this function and update its tests LLVM_DUMP_METHOD Index: lib/Transforms/IPO/FunctionImport.cpp =================================================================== --- lib/Transforms/IPO/FunctionImport.cpp +++ lib/Transforms/IPO/FunctionImport.cpp @@ -280,7 +280,8 @@ } static void computeImportForReferencedGlobals( - const FunctionSummary &Summary, const GVSummaryMapTy &DefinedGVSummaries, + const FunctionSummary &Summary, const ModuleSummaryIndex &Index, + const GVSummaryMapTy &DefinedGVSummaries, FunctionImporter::ImportMapTy &ImportList, StringMap *ExportLists) { for (auto &VI : Summary.refs()) { @@ -303,16 +304,24 @@ RefSummary->modulePath() != Summary.modulePath(); }; + auto MarkExported = [&](const ValueInfo &VI, const GlobalValueSummary *S) { + if (ExportLists) + (*ExportLists)[S->modulePath()].insert(VI.getGUID()); + }; + for (auto &RefSummary : VI.getSummaryList()) if (isa(RefSummary.get()) && - canImportGlobalVar(RefSummary.get()) && + Index.canImportGlobalVar(RefSummary.get()) && !LocalNotInModule(RefSummary.get())) { auto ILI = ImportList[RefSummary->modulePath()].insert(VI.getGUID()); // Only update stat if we haven't already imported this variable. if (ILI.second) NumImportedGlobalVarsThinLink++; - if (ExportLists) - (*ExportLists)[RefSummary->modulePath()].insert(VI.getGUID()); + MarkExported(VI, RefSummary.get()); + // Promote referenced functions and variables + for (const auto &VI : RefSummary->refs()) + for (const auto &RefFn : VI.getSummaryList()) + MarkExported(VI, RefFn.get()); break; } } @@ -351,8 +360,8 @@ FunctionImporter::ImportMapTy &ImportList, StringMap *ExportLists, FunctionImporter::ImportThresholdsTy &ImportThresholds) { - computeImportForReferencedGlobals(Summary, DefinedGVSummaries, ImportList, - ExportLists); + computeImportForReferencedGlobals(Summary, Index, DefinedGVSummaries, + ImportList, ExportLists); static int ImportCount = 0; for (auto &Edge : Summary.calls()) { ValueInfo VI = Edge.first; @@ -852,6 +861,11 @@ function_ref isPrevailing, bool ImportEnabled) { computeDeadSymbols(Index, GUIDPreservedSymbols, isPrevailing); + // This function can be run twice: once with empty list of preserved + // symbols and second time when some preserved symbols are given. + // We still need to decide if we have to import global variable correctly + // on such occasion. See ModuleSummaryIndex::canImportGlobalVar. + Index.setWithAttributePropagation(false); if (ImportEnabled) { Index.propagateAttributes(GUIDPreservedSymbols); } else { @@ -864,6 +878,7 @@ GVS->setWriteOnly(false); } } + Index.setWithAttributePropagation(true); } /// Compute the set of summaries needed for a ThinLTO backend compilation of Index: lib/Transforms/Utils/FunctionImportUtils.cpp =================================================================== --- lib/Transforms/Utils/FunctionImportUtils.cpp +++ lib/Transforms/Utils/FunctionImportUtils.cpp @@ -235,14 +235,12 @@ // ThinLTO import. We'll internalize read-only variables later, after // import is finished. See internalizeGVsAfterImport. // - // If global value dead stripping is not enabled in summary then - // propagateConstants hasn't been run. We can't internalize GV - // in such case. - if (!GV.isDeclaration() && VI && ImportIndex.withGlobalValueDeadStripping()) { + // We can't internalize GV if propagateAttributes hasn't been run + if (!GV.isDeclaration() && VI && ImportIndex.withAttributePropagation()) { const auto &SL = VI.getSummaryList(); auto *GVS = SL.empty() ? nullptr : dyn_cast(SL[0].get()); // At this stage "maybe" is "definitely" - if (GVS && (GVS->maybeReadOnly() || GVS->maybeWriteOnly())) + if (GVS && (ImportIndex.isReadOnly(GVS) || ImportIndex.isWriteOnly(GVS))) cast(&GV)->addAttribute("thinlto-internalize"); } Index: test/Bitcode/summary_version.ll =================================================================== --- test/Bitcode/summary_version.ll +++ test/Bitcode/summary_version.ll @@ -2,7 +2,7 @@ ; RUN: opt -module-summary %s -o - | llvm-bcanalyzer -dump | FileCheck %s ; CHECK: +; CHECK: Index: test/Bitcode/thinlto-deadstrip-flag.ll =================================================================== --- test/Bitcode/thinlto-deadstrip-flag.ll +++ test/Bitcode/thinlto-deadstrip-flag.ll @@ -5,14 +5,14 @@ ; RUN: llvm-lto2 run %t.o -o %t.out -thinlto-distributed-indexes \ ; RUN: -r %t.o,glob,plx ; RUN: llvm-bcanalyzer -dump %t.o.thinlto.bc | FileCheck %s --check-prefix=WITHDEAD -; WITHDEAD: +; WITHDEAD: ; Ensure dead stripping performed flag is not set on distributed index ; when option used to disable dead stripping computation. ; RUN: llvm-lto2 run %t.o -o %t.out -thinlto-distributed-indexes \ ; RUN: -r %t.o,glob,plx -compute-dead=false ; RUN: llvm-bcanalyzer -dump %t.o.thinlto.bc | FileCheck %s --check-prefix=NODEAD -; NODEAD: +; NODEAD: target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" Index: test/Bitcode/thinlto-synthetic-count-flag.ll =================================================================== --- test/Bitcode/thinlto-synthetic-count-flag.ll +++ test/Bitcode/thinlto-synthetic-count-flag.ll @@ -5,7 +5,7 @@ ; RUN: llvm-lto2 run %t.o -o %t.out -thinlto-distributed-indexes \ ; RUN: -r %t.o,glob,plx -compute-dead=false ; RUN: llvm-bcanalyzer -dump %t.o.thinlto.bc | FileCheck %s --check-prefix=NOSYNTHETIC -; NOSYNTHETIC: +; NOSYNTHETIC: ; Ensure synthetic entry count flag is set on distributed index ; when option used to enable synthetic count propagation @@ -13,7 +13,7 @@ ; RUN: -r %t.o,glob,plx -thinlto-synthesize-entry-counts \ ; RUN: -compute-dead=false ; RUN: llvm-bcanalyzer -dump %t.o.thinlto.bc | FileCheck %s --check-prefix=HASSYNTHETIC -; HASSYNTHETIC: +; HASSYNTHETIC: target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" Index: test/ThinLTO/X86/Inputs/indirect-call.ll =================================================================== --- test/ThinLTO/X86/Inputs/indirect-call.ll +++ test/ThinLTO/X86/Inputs/indirect-call.ll @@ -0,0 +1,12 @@ +source_filename = "foo.cpp" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.S = type { i32 ()*, i32* } + +@_ZL8localVar = internal global i32 11, align 4 +@Obj = dso_local local_unnamed_addr global %struct.S { i32 ()* @_ZL3barv, i32* @_ZL8localVar }, align 8 + +define internal i32 @_ZL3barv() { + ret i32 42 +} Index: test/ThinLTO/X86/globals-import.ll =================================================================== --- test/ThinLTO/X86/globals-import.ll +++ test/ThinLTO/X86/globals-import.ll @@ -15,7 +15,7 @@ ; RUN: llvm-dis %t2.bc.thinlto.promoted.bc -o - | FileCheck --check-prefix=PROMOTE1 %s ; RUN: llvm-dis %t2b.bc.thinlto.promoted.bc -o - | FileCheck --check-prefix=PROMOTE2 %s -; IMPORT: @baz.llvm.0 = available_externally hidden constant i32 10, align 4 +; IMPORT: @baz.llvm.0 = internal constant i32 10, align 4 ; PROMOTE1: @baz.llvm.0 = hidden constant i32 10, align 4 ; PROMOTE1: define weak_odr i32 @foo() { Index: test/ThinLTO/X86/indirect-call.ll =================================================================== --- test/ThinLTO/X86/indirect-call.ll +++ test/ThinLTO/X86/indirect-call.ll @@ -0,0 +1,29 @@ +; Checks that we replace indirect call to a function referenced +; in readonly structure initializer with a direct call. +; RUN: opt -thinlto-bc %s -o %t1 +; RUN: opt -thinlto-bc %p/Inputs/indirect-call.ll -o %t2 +; RUN: llvm-lto2 run -save-temps %t1 %t2 -o %t-out \ +; RUN: -r=%t2,Obj,pl \ +; RUN: -r=%t1,main,plx \ +; RUN: -r=%t1,Obj, +; RUN: llvm-dis %t-out.1.5.precodegen.bc -o - | FileCheck %s --check-prefix CODEGEN +; RUN: llvm-dis %t-out.2.1.promote.bc -o - | FileCheck %s --check-prefix PROMOTE + +; PROMOTE: @_ZL8localVar.llvm.{{[0-9]+}} = hidden global +; PROMOTE: define hidden i32 @_ZL3barv.llvm.{{[0-9]+}}() + +source_filename = "main.cpp" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.S = type { i32 ()*, i32* } + +@Obj = external dso_local local_unnamed_addr global %struct.S, align 8 + +define dso_local i32 @main() local_unnamed_addr { + %1 = load i32 ()*, i32 ()** getelementptr inbounds (%struct.S, %struct.S* @Obj, i64 0, i32 0), align 8 +; CODEGEN: %[[VAR:.*]] = tail call i32 @_ZL3barv.llvm.{{[0-9]+}}() + %2 = tail call i32 %1() +; CODEGEN-NEXT ret i32 %[[VAR]] + ret i32 %2 +} Index: test/ThinLTO/X86/local_name_conflict.ll =================================================================== --- test/ThinLTO/X86/local_name_conflict.ll +++ test/ThinLTO/X86/local_name_conflict.ll @@ -12,7 +12,7 @@ ; that module (%t3.bc) to be imported. Check that the imported reference's ; promoted name matches the imported copy. ; RUN: llvm-lto -thinlto-action=import %t.bc -thinlto-index=%t4.bc -o - | llvm-dis -o - | FileCheck %s --check-prefix=IMPORT -; IMPORT: @baz.llvm.[[HASH:[0-9]+]] = available_externally hidden constant i32 10, align 4 +; IMPORT: @baz.llvm.[[HASH:[0-9]+]] = internal constant i32 10, align 4 ; IMPORT: call i32 @foo.llvm.[[HASH]] ; IMPORT: define available_externally hidden i32 @foo.llvm.[[HASH]]() Index: test/tools/gold/X86/v1.12/thinlto_emit_linked_objects.ll =================================================================== --- test/tools/gold/X86/v1.12/thinlto_emit_linked_objects.ll +++ test/tools/gold/X86/v1.12/thinlto_emit_linked_objects.ll @@ -35,7 +35,7 @@ ; should not be set. ; RUN: llvm-bcanalyzer --dump %t1.o.thinlto.bc | FileCheck %s -check-prefixes=CHECK-BC1 ; CHECK-BC1: +; CHECK-BC1: ; CHECK-BC1: