diff --git a/llvm/include/llvm/Bitcode/BitcodeReader.h b/llvm/include/llvm/Bitcode/BitcodeReader.h --- a/llvm/include/llvm/Bitcode/BitcodeReader.h +++ b/llvm/include/llvm/Bitcode/BitcodeReader.h @@ -94,6 +94,7 @@ bool IsThinLTO; bool HasSummary; bool EnableSplitLTOUnit; + bool UnifiedLTO; }; /// Represents a module in a bitcode file. diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h --- a/llvm/include/llvm/IR/ModuleSummaryIndex.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h @@ -1327,6 +1327,9 @@ // True if the index was created for a module compiled with -fsplit-lto-unit. bool EnableSplitLTOUnit; + // True if the index was created for a module compiled with -funified-lto + bool UnifiedLTO; + // True if some of the modules were compiled with -fsplit-lto-unit and // some were not. Set when the combined index is created during the thin link. bool PartiallySplitLTOUnits = false; @@ -1372,9 +1375,10 @@ public: // See HaveGVs variable comment. - ModuleSummaryIndex(bool HaveGVs, bool EnableSplitLTOUnit = false) - : HaveGVs(HaveGVs), EnableSplitLTOUnit(EnableSplitLTOUnit), Saver(Alloc), - BlockCount(0) {} + ModuleSummaryIndex(bool HaveGVs, bool EnableSplitLTOUnit = false, + bool UnifiedLTO = false) + : HaveGVs(HaveGVs), EnableSplitLTOUnit(EnableSplitLTOUnit), + UnifiedLTO(UnifiedLTO), Saver(Alloc), BlockCount(0) {} // Current version for the module summary in bitcode files. // The BitcodeSummaryVersion should be bumped whenever we introduce changes @@ -1532,6 +1536,9 @@ bool enableSplitLTOUnit() const { return EnableSplitLTOUnit; } void setEnableSplitLTOUnit() { EnableSplitLTOUnit = true; } + bool hasUnifiedLTO() const { return UnifiedLTO; } + void setUnifiedLTO() { UnifiedLTO = true; } + bool partiallySplitLTOUnits() const { return PartiallySplitLTOUnits; } void setPartiallySplitLTOUnits() { PartiallySplitLTOUnits = true; } diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h --- a/llvm/include/llvm/LTO/LTO.h +++ b/llvm/include/llvm/LTO/LTO.h @@ -255,13 +255,26 @@ friend InputFile; public: + /// Unified LTO modes + enum LTOKind { + /// Any LTO mode without Unified LTO. The default mode. + LTOK_Default, + + /// Regular LTO, with Unified LTO enabled. + LTOK_UnifiedRegular, + + /// ThinLTO, with Unified LTO enabled. + LTOK_UnifiedThin, + }; + /// Create an LTO object. A default constructed LTO object has a reasonable /// production configuration, but you can customize it by passing arguments to /// this constructor. /// FIXME: We do currently require the DiagHandler field to be set in Conf. /// Until that is fixed, a Config argument is required. LTO(Config Conf, ThinBackend Backend = nullptr, - unsigned ParallelCodeGenParallelismLevel = 1); + unsigned ParallelCodeGenParallelismLevel = 1, + LTOKind LTOMode = LTOK_Default); ~LTO(); /// Add an input file to the LTO link, using the provided symbol resolutions. @@ -421,6 +434,9 @@ mutable bool CalledGetMaxTasks = false; + // LTO mode when using Unified LTO. + LTOKind LTOMode; + // Use Optional to distinguish false from not yet initialized. std::optional EnableSplitLTOUnit; diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -74,6 +74,9 @@ /// that of the flag: `-enable-npm-call-graph-profile`. bool CallGraphProfile; + // Add LTO pipeline tuning option to enable the unified LTO pipeline. + bool UnifiedLTO; + /// Tuning option to enable/disable function merging. Its default value is /// false. bool MergeFunctions; diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp --- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -786,10 +786,14 @@ std::function GetSSICallback) { assert(PSI); bool EnableSplitLTOUnit = false; + bool UnifiedLTO = false; if (auto *MD = mdconst::extract_or_null( M.getModuleFlag("EnableSplitLTOUnit"))) EnableSplitLTOUnit = MD->getZExtValue(); - ModuleSummaryIndex Index(/*HaveGVs=*/true, EnableSplitLTOUnit); + if (auto *MD = + mdconst::extract_or_null(M.getModuleFlag("UnifiedLTO"))) + UnifiedLTO = MD->getZExtValue(); + ModuleSummaryIndex Index(/*HaveGVs=*/true, EnableSplitLTOUnit, UnifiedLTO); // Identify the local values in the llvm.used and llvm.compiler.used sets, // which should not be exported as they would then require renaming and diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -8033,14 +8033,17 @@ return std::move(Index); } -static Expected getEnableSplitLTOUnitFlag(BitstreamCursor &Stream, - unsigned ID) { +static Expected> +getEnableSplitLTOUnitAndUnifiedFlag(BitstreamCursor &Stream, + unsigned ID, + BitcodeLTOInfo <OInfo) { if (Error Err = Stream.EnterSubBlock(ID)) return std::move(Err); SmallVector Record; while (true) { BitstreamEntry Entry; + std::pair Result = {false,false}; if (Error E = Stream.advanceSkippingSubblocks().moveInto(Entry)) return std::move(E); @@ -8048,10 +8051,10 @@ case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: return error("Malformed block"); - case BitstreamEntry::EndBlock: - // If no flags record found, conservatively return true to mimic - // behavior before this flag was added. - return true; + case BitstreamEntry::EndBlock: { + // If no flags record found, set both flags to false. + return Result; + } case BitstreamEntry::Record: // The interesting case. break; @@ -8068,9 +8071,13 @@ case bitc::FS_FLAGS: { // [flags] uint64_t Flags = Record[0]; // Scan flags. - assert(Flags <= 0x1ff && "Unexpected bits in flag"); + assert(Flags <= 0x2ff && "Unexpected bits in flag"); + + bool EnableSplitLTOUnit = Flags & 0x8; + bool UnifiedLTO = Flags & 0x200; + Result = {EnableSplitLTOUnit, UnifiedLTO}; - return Flags & 0x8; + return Result; } } } @@ -8096,25 +8103,31 @@ return error("Malformed block"); case BitstreamEntry::EndBlock: return BitcodeLTOInfo{/*IsThinLTO=*/false, /*HasSummary=*/false, - /*EnableSplitLTOUnit=*/false}; + /*EnableSplitLTOUnit=*/false, /*UnifiedLTO=*/false}; case BitstreamEntry::SubBlock: if (Entry.ID == bitc::GLOBALVAL_SUMMARY_BLOCK_ID) { - Expected EnableSplitLTOUnit = - getEnableSplitLTOUnitFlag(Stream, Entry.ID); - if (!EnableSplitLTOUnit) - return EnableSplitLTOUnit.takeError(); - return BitcodeLTOInfo{/*IsThinLTO=*/true, /*HasSummary=*/true, - *EnableSplitLTOUnit}; + BitcodeLTOInfo LTOInfo; + Expected> Flags = + getEnableSplitLTOUnitAndUnifiedFlag(Stream, Entry.ID, LTOInfo); + if (!Flags) + return Flags.takeError(); + std::tie(LTOInfo.EnableSplitLTOUnit, LTOInfo.UnifiedLTO) = Flags.get(); + LTOInfo.IsThinLTO = true; + LTOInfo.HasSummary = true; + return LTOInfo; } if (Entry.ID == bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID) { - Expected EnableSplitLTOUnit = - getEnableSplitLTOUnitFlag(Stream, Entry.ID); - if (!EnableSplitLTOUnit) - return EnableSplitLTOUnit.takeError(); - return BitcodeLTOInfo{/*IsThinLTO=*/false, /*HasSummary=*/true, - *EnableSplitLTOUnit}; + BitcodeLTOInfo LTOInfo; + Expected> Flags = + getEnableSplitLTOUnitAndUnifiedFlag(Stream, Entry.ID, LTOInfo); + if (!Flags) + return Flags.takeError(); + std::tie(LTOInfo.EnableSplitLTOUnit, LTOInfo.UnifiedLTO) = Flags.get(); + LTOInfo.IsThinLTO = false; + LTOInfo.HasSummary = true; + return LTOInfo; } // Ignore other sub-blocks. diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -4086,6 +4086,9 @@ // Bits 1-3 are set only in the combined index, skip them. if (Index->enableSplitLTOUnit()) Flags |= 0x8; + if (Index->hasUnifiedLTO()) + Flags |= 0x200; + Stream.EmitRecord(bitc::FS_FLAGS, ArrayRef{Flags}); if (Index->begin() == Index->end()) { @@ -4112,7 +4115,7 @@ auto Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_PROFILE)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // flags Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs diff --git a/llvm/lib/IR/ModuleSummaryIndex.cpp b/llvm/lib/IR/ModuleSummaryIndex.cpp --- a/llvm/lib/IR/ModuleSummaryIndex.cpp +++ b/llvm/lib/IR/ModuleSummaryIndex.cpp @@ -109,11 +109,13 @@ Flags |= 0x80; if (withSupportsHotColdNew()) Flags |= 0x100; + if (hasUnifiedLTO()) + Flags |= 0x200; return Flags; } void ModuleSummaryIndex::setFlags(uint64_t Flags) { - assert(Flags <= 0x1ff && "Unexpected bits in flag"); + assert(Flags <= 0x2ff && "Unexpected bits in flag"); // 1 bit: WithGlobalValueDeadStripping flag. // Set on combined index only. if (Flags & 0x1) @@ -151,6 +153,10 @@ // Set on combined index only. if (Flags & 0x100) setWithSupportsHotColdNew(); + // 1 bit: WithUnifiedLTO flag. + // Set on combined index only. + if (Flags & 0x200) + setUnifiedLTO(); } // Collect for the given module the list of function it defines diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -603,10 +603,10 @@ } LTO::LTO(Config Conf, ThinBackend Backend, - unsigned ParallelCodeGenParallelismLevel) + unsigned ParallelCodeGenParallelismLevel, LTOKind LTOMode) : Conf(std::move(Conf)), RegularLTO(ParallelCodeGenParallelismLevel, this->Conf), - ThinLTO(std::move(Backend)) {} + ThinLTO(std::move(Backend)), LTOMode(LTOMode) {} // Requires a destructor for MapVector. LTO::~LTO() = default; @@ -747,12 +747,25 @@ EnableSplitLTOUnit = LTOInfo->EnableSplitLTOUnit; BitcodeModule BM = Input.Mods[ModI]; + + if ((LTOMode == LTOK_UnifiedRegular || LTOMode == LTOK_UnifiedThin) && + !LTOInfo->UnifiedLTO) + return make_error( + "unified LTO compilation must use " + "compatible bitcode modules (use -funified-lto)", + inconvertibleErrorCode()); + + if (LTOInfo->UnifiedLTO && LTOMode == LTOK_Default) + LTOMode = LTOK_UnifiedThin; + + bool IsThinLTO = LTOInfo->IsThinLTO && (LTOMode != LTOK_UnifiedRegular); + auto ModSyms = Input.module_symbols(ModI); addModuleToGlobalRes(ModSyms, {ResI, ResE}, - LTOInfo->IsThinLTO ? ThinLTO.ModuleMap.size() + 1 : 0, + IsThinLTO ? ThinLTO.ModuleMap.size() + 1 : 0, LTOInfo->HasSummary); - if (LTOInfo->IsThinLTO) + if (IsThinLTO) return addThinLTO(BM, ModSyms, ResI, ResE); RegularLTO.EmptyCombinedModule = false; @@ -820,6 +833,15 @@ if (Error Err = M.materializeMetadata()) return std::move(Err); + + // If cfi.functions is present and we are in regular LTO mode, LowerTypeTests + // will rename local functions in the merged module as ".1". + // This causes linking errors, since other parts of the module expect the + // original function name. + if (LTOMode == LTOK_UnifiedRegular) + if (NamedMDNode *CfiFunctionsMD = M.getNamedMetadata("cfi.functions")) + M.eraseNamedMetadata(CfiFunctionsMD); + UpgradeDebugInfo(M); ModuleSymbolTable SymTab; @@ -1214,6 +1236,7 @@ RegularLTO.CombinedModule->getContext(), Conf.RemarksFilename, Conf.RemarksPasses, Conf.RemarksFormat, Conf.RemarksWithHotness, Conf.RemarksHotnessThreshold); + LLVM_DEBUG(dbgs() << "Running regular LTO\n"); if (!DiagFileOrErr) return DiagFileOrErr.takeError(); DiagnosticOutputFile = std::move(*DiagFileOrErr); @@ -1277,18 +1300,33 @@ if (!Conf.CodeGenOnly) { for (const auto &R : GlobalResolutions) { + GlobalValue *GV = + RegularLTO.CombinedModule->getNamedValue(R.second.IRName); if (!R.second.isPrevailingIRSymbol()) continue; if (R.second.Partition != 0 && R.second.Partition != GlobalResolution::External) continue; - GlobalValue *GV = - RegularLTO.CombinedModule->getNamedValue(R.second.IRName); // Ignore symbols defined in other partitions. // Also skip declarations, which are not allowed to have internal linkage. if (!GV || GV->hasLocalLinkage() || GV->isDeclaration()) continue; + + // Symbols that are marked DLLImport or DLLExport should not be + // internalized, as they are either externally visible or referencing + // external symbols. Symbols that have AvailableExternally or Appending + // linkage might be used by future passes and should be kept as is. + // These linkages are seen in Unified regular LTO, because the process + // of creating split LTO units introduces symbols with that linkage into + // one of the created modules. Normally, only the ThinLTO backend would + // compile this module, but Unified Regular LTO processes both + // modules created by the splitting process as regular LTO modules. + if ((LTOMode == LTOKind::LTOK_UnifiedRegular) && + ((GV->getDLLStorageClass() != GlobalValue::DefaultStorageClass) || + GV->hasAvailableExternallyLinkage() || GV->hasAppendingLinkage())) + continue; + GV->setUnnamedAddr(R.second.UnnamedAddr ? GlobalValue::UnnamedAddr::Global : GlobalValue::UnnamedAddr::None); if (EnableLTOInternalization && R.second.Partition == 0) @@ -1606,6 +1644,7 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, const DenseSet &GUIDPreservedSymbols) { + LLVM_DEBUG(dbgs() << "Running ThinLTO\n"); ThinLTO.CombinedIndex.releaseTemporaryMemory(); timeTraceProfilerBegin("ThinLink", StringRef("")); auto TimeTraceScopeExit = llvm::make_scope_exit([]() { diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -504,6 +504,7 @@ std::unique_ptr TM = createTargetMachine(C, *TOrErr, Mod); + LLVM_DEBUG(dbgs() << "Running regular LTO\n"); if (!C.CodeGenOnly) { if (!opt(C, TM.get(), 0, Mod, /*IsThinLTO=*/false, /*ExportSummary=*/&CombinedIndex, /*ImportSummary=*/nullptr, @@ -566,6 +567,7 @@ // the module, if applicable. Mod.setPartialSampleProfileRatio(CombinedIndex); + LLVM_DEBUG(dbgs() << "Running ThinLTO\n"); if (Conf.CodeGenOnly) { codegen(Conf, TM.get(), AddStream, Task, Mod, CombinedIndex); return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1379,7 +1379,13 @@ } else if (Matches[1] == "thinlto") { MPM.addPass(buildThinLTODefaultPipeline(L, nullptr)); } else if (Matches[1] == "lto-pre-link") { - MPM.addPass(buildLTOPreLinkDefaultPipeline(L)); + if (PTO.UnifiedLTO) + // When UnifiedLTO is enabled, use the ThinLTO pre-link pipeline. This + // avoids compile-time performance regressions and keeps the pre-link + // LTO pipeline "unified" for both LTO modes. + MPM.addPass(buildThinLTOPreLinkDefaultPipeline(L)); + else + MPM.addPass(buildLTOPreLinkDefaultPipeline(L)); } else { assert(Matches[1] == "lto" && "Not one of the matched options!"); MPM.addPass(buildLTODefaultPipeline(L, nullptr)); diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -283,6 +283,7 @@ LicmMssaOptCap = SetLicmMssaOptCap; LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap; CallGraphProfile = true; + UnifiedLTO = false; MergeFunctions = EnableMergeFunctions; InlinerThreshold = -1; EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses; diff --git a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp --- a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp +++ b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -259,26 +259,37 @@ appendToUsed(DestM, NewUsed); } +bool enableUnifiedLTO(Module &M) { + bool UnifiedLTO = false; + if (auto *MD = + mdconst::extract_or_null(M.getModuleFlag("UnifiedLTO"))) + UnifiedLTO = MD->getZExtValue(); + return UnifiedLTO; +} + // If it's possible to split M into regular and thin LTO parts, do so and write // a multi-module bitcode file with the two parts to OS. Otherwise, write only a // regular LTO bitcode file to OS. void splitAndWriteThinLTOBitcode( raw_ostream &OS, raw_ostream *ThinLinkOS, function_ref AARGetter, Module &M) { + bool UnifiedLTO = enableUnifiedLTO(M); std::string ModuleId = getUniqueModuleId(&M); if (ModuleId.empty()) { + assert(!UnifiedLTO); // We couldn't generate a module ID for this module, write it out as a // regular LTO module with an index for summary-based dead stripping. ProfileSummaryInfo PSI(M); M.addModuleFlag(Module::Error, "ThinLTO", uint32_t(0)); ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, &PSI); - WriteBitcodeToFile(M, OS, /*ShouldPreserveUseListOrder=*/false, &Index); + WriteBitcodeToFile(M, OS, /*ShouldPreserveUseListOrder=*/false, &Index, + /*UnifiedLTO=*/false); if (ThinLinkOS) // We don't have a ThinLTO part, but still write the module to the // ThinLinkOS if requested so that the expected output file is produced. WriteBitcodeToFile(M, *ThinLinkOS, /*ShouldPreserveUseListOrder=*/false, - &Index); + &Index, /*UnifiedLTO=*/false); return; } diff --git a/llvm/test/LTO/Resolution/X86/local-def-dllimport.ll b/llvm/test/LTO/Resolution/X86/local-def-dllimport.ll --- a/llvm/test/LTO/Resolution/X86/local-def-dllimport.ll +++ b/llvm/test/LTO/Resolution/X86/local-def-dllimport.ll @@ -11,6 +11,16 @@ ; RUN: -r %t0.bc,main,x \ ; RUN: -save-temps -o %t1 %t0.bc ; RUN: llvm-dis %t1.1.3.import.bc -o - | FileCheck %s + +; RUN: opt --unified-lto -thinlto-split-lto-unit -thinlto-bc -o %t0.bc %s +; RUN: llvm-lto2 run -r %t0.bc,__imp_f,l \ +; RUN: -r %t0.bc,g,p \ +; RUN: -r %t0.bc,g,l \ +; RUN: -r %t0.bc,e,l \ +; RUN: -r %t0.bc,main,x \ +; RUN: --unified-lto=thin \ +; RUN: -save-temps -o %t1 %t0.bc +; RUN: llvm-dis %t1.1.3.import.bc -o - | FileCheck %s source_filename = "test.cpp" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/LTO/Resolution/X86/unified-lto-check.ll b/llvm/test/LTO/Resolution/X86/unified-lto-check.ll new file mode 100644 --- /dev/null +++ b/llvm/test/LTO/Resolution/X86/unified-lto-check.ll @@ -0,0 +1,56 @@ +; REQUIRES: asserts +; Test to ensure that the Unified LTO flag is set properly in the summary, and +; that we emit the correct error when linking bitcode files with different +; values of this flag. + +; Linking bitcode both without UnifiedLTO set should work +; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t1 %s +; RUN: llvm-bcanalyzer -dump %t1 | FileCheck %s --check-prefix=NOUNIFIEDLTO +; RUN: llvm-dis -o - %t1 | FileCheck %s --check-prefix=NOUNIFIEDLTOFLAG +; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t2 %s +; RUN: llvm-bcanalyzer -dump %t2 | FileCheck %s --check-prefix=NOUNIFIEDLTO +; RUN: llvm-dis -o - %t2 | FileCheck %s --check-prefix=NOUNIFIEDLTOFLAG +; RUN: llvm-lto2 run -o %t3 %t1 %t2 +; RUN: not llvm-lto2 run --unified-lto=thin -o %t3 %t1 %t2 2>&1 | \ +; RUN: FileCheck --allow-empty %s --check-prefix UNIFIEDERR + +; Linking bitcode with different values of UnifiedLTO should fail +; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t1 %s +; RUN: llvm-bcanalyzer -dump %t1 | FileCheck %s --check-prefix=NOUNIFIEDLTO +; RUN: llvm-dis -o - %t1 | FileCheck %s --check-prefix=NOUNIFIEDLTOFLAG +; RUN: opt -unified-lto -thinlto-bc -thinlto-split-lto-unit -o %t2 %s +; RUN: llvm-bcanalyzer -dump %t2 | FileCheck %s --check-prefix=UNIFIEDLTO +; RUN: llvm-dis -o - %t2 | FileCheck %s --check-prefix=UNIFIEDLTOFLAG +; RUN: not llvm-lto2 run --unified-lto=thin -o %t3 %t1 %t2 2>&1 | \ +; RUN: FileCheck --allow-empty %s --check-prefix UNIFIEDERR + +; Linking bitcode with identical Unified LTO flags should succeed +; RUN: opt -unified-lto -thinlto-bc -thinlto-split-lto-unit -o %t1 %s +; RUN: llvm-bcanalyzer -dump %t1 | FileCheck %s --check-prefix=UNIFIEDLTO +; RUN: llvm-dis -o - %t1 | FileCheck %s --check-prefix=UNIFIEDLTOFLAG +; RUN: opt -unified-lto -thinlto-bc -thinlto-split-lto-unit -o %t2 %s +; RUN: llvm-bcanalyzer -dump %t2 | FileCheck %s --check-prefix=UNIFIEDLTO +; RUN: llvm-dis -o - %t2 | FileCheck %s --check-prefix=UNIFIEDLTOFLAG +; RUN: llvm-lto2 run --unified-lto=full --debug-only=lto -o %t3 %t1 %t2 2>&1 | \ +; RUN: FileCheck --allow-empty %s --check-prefix NOUNIFIEDERR --check-prefix FULL +; RUN: llvm-lto2 run --unified-lto=thin --debug-only=lto -o %t3 %t1 %t2 2>&1 | \ +; RUN: FileCheck --allow-empty %s --check-prefix NOUNIFIEDERR --check-prefix THIN +; RUN: llvm-lto2 run --debug-only=lto -o %t3 %t1 %t2 2>&1 | \ +; RUN: FileCheck --allow-empty %s --check-prefix THIN + +; UNIFIEDERR: unified LTO compilation must use compatible bitcode modules +; NOUNIFIEDERR-NOT: unified LTO compilation must use compatible bitcode modules + +; The flag should be set when UnifiedLTO is enabled +; UNIFIEDLTO: +; NOUNIFIEDLTO: + +; Check that the corresponding module flag is set when expected. +; UNIFIEDLTOFLAG: !{i32 1, !"UnifiedLTO", i32 1} +; NOUNIFIEDLTOFLAG-NOT: !{i32 1, !"UnifiedLTO", i32 1} + +; FULL: Running regular LTO +; THIN: Running ThinLTO + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/LTO/X86/cfi-func-remove.ll b/llvm/test/LTO/X86/cfi-func-remove.ll new file mode 100644 --- /dev/null +++ b/llvm/test/LTO/X86/cfi-func-remove.ll @@ -0,0 +1,10 @@ +; RUN: opt -thinlto-bc -thinlto-split-lto-unit -unified-lto <%s -o %t0 +; RUN: llvm-lto2 run -o %t1 --unified-lto=full --save-temps %t0 +; RUN: llvm-dis <%t1.0.0.preopt.bc 2>&1 | FileCheck %s --implicit-check-not warning: +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +!cfi.functions = !{!2} +; CHECK-NOT: cfi.functions + +!2 = !{!"main", i8 0} diff --git a/llvm/test/LTO/X86/unified-cfi.ll b/llvm/test/LTO/X86/unified-cfi.ll new file mode 100644 --- /dev/null +++ b/llvm/test/LTO/X86/unified-cfi.ll @@ -0,0 +1,86 @@ +; Test for the expected CFI codegen in a module with CFI metadata. +; RUN: opt -unified-lto -thinlto-bc -o %t0.o %s +; RUN: llvm-lto --exported-symbol=main -filetype=asm -o - %t0.o | FileCheck %s + +; CHECK-LABEL: main + +; CHECK: jbe +; CHECK-NEXT: ud2 +; CHECK-NEXT: ud2 + +; ModuleID = 'llvm/test/LTO/X86/unified-cfi.ll' +source_filename = "cfi.cpp" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-scei-ps4" + +@func = hidden global [3 x i32 ()*] [i32 ()* @_Z1av, i32 ()* @_Z1bv, i32 ()* @_Z1cv], align 16 +@.src = private unnamed_addr constant [8 x i8] c"cfi.cpp\00", align 1 +@anon.9260195284c792ab5c6ef4d97bfcf95d.0 = private unnamed_addr constant { i16, i16, [9 x i8] } { i16 -1, i16 0, [9 x i8] c"'int ()'\00" } + +; Function Attrs: noinline nounwind optnone sspstrong uwtable +define hidden i32 @_Z1av() #0 !type !3 !type !4 { +entry: + ret i32 1 +} + +; Function Attrs: noinline nounwind optnone sspstrong uwtable +define hidden i32 @_Z1bv() #0 !type !3 !type !4 { +entry: + ret i32 2 +} + +; Function Attrs: noinline nounwind optnone sspstrong uwtable +define hidden i32 @_Z1cv() #0 !type !3 !type !4 { +entry: + ret i32 3 +} + +; Function Attrs: noinline norecurse nounwind optnone sspstrong uwtable +define hidden i32 @main(i32 %argc, i8** %argv) #1 !type !5 !type !6 { +entry: + %retval = alloca i32, align 4 + %argc.addr = alloca i32, align 4 + %argv.addr = alloca i8**, align 8 + store i32 0, i32* %retval, align 4 + store i32 %argc, i32* %argc.addr, align 4 + store i8** %argv, i8*** %argv.addr, align 8 + %0 = load i32, i32* %argc.addr, align 4 + %idxprom = sext i32 %0 to i64 + %arrayidx = getelementptr inbounds [3 x i32 ()*], [3 x i32 ()*]* @func, i64 0, i64 %idxprom + %1 = load i32 ()*, i32 ()** %arrayidx, align 8 + %2 = bitcast i32 ()* %1 to i8*, !nosanitize !7 + %3 = call i1 @llvm.type.test(i8* %2, metadata !"_ZTSFivE"), !nosanitize !7 + br i1 %3, label %cont, label %trap, !nosanitize !7 + +trap: ; preds = %entry + call void @llvm.trap() #4, !nosanitize !7 + unreachable, !nosanitize !7 + +cont: ; preds = %entry + %call = call i32 %1() + ret i32 %call +} + +; Function Attrs: nofree nosync nounwind readnone speculatable willreturn +declare i1 @llvm.type.test(i8*, metadata) #2 + +; Function Attrs: cold noreturn nounwind +declare void @llvm.trap() #3 + +attributes #0 = { noinline nounwind optnone sspstrong uwtable } +attributes #1 = { noinline norecurse nounwind optnone sspstrong uwtable } +attributes #2 = { nofree nosync nounwind readnone speculatable willreturn } +attributes #3 = { cold noreturn nounwind } +attributes #4 = { noreturn nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 2} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{!"clang version 7.0.0 (PS4 clang version 99.99.0.1562 432a534f checking)"} +!3 = !{i64 0, !"_ZTSFivE"} +!4 = !{i64 0, !"_ZTSFivE.generalized"} +!5 = !{i64 0, !"_ZTSFiiPPcE"} +!6 = !{i64 0, !"_ZTSFiiPvE.generalized"} +!7 = !{} diff --git a/llvm/test/LTO/X86/unified-internalize.ll b/llvm/test/LTO/X86/unified-internalize.ll new file mode 100644 --- /dev/null +++ b/llvm/test/LTO/X86/unified-internalize.ll @@ -0,0 +1,50 @@ +; RUN: opt <%s -unified-lto -thinlto-split-lto-unit -thinlto-bc -o %t.bc + +; Test internalization during unified LTO. This makes sure internalization does +; happen in runRegularLTO(). +; RUN: llvm-lto2 run %t.bc -o %t.o -save-temps --unified-lto=full \ +; RUN: -r=%t.bc,salad,pxl \ +; RUN: -r=%t.bc,balsamic,pl \ +; RUN: -r=%t.bc,thousandisland,pl \ +; RUN: -r=%t.bc,main,pxl \ +; RUN: -r %t.bc,ranch,px \ +; RUN: -r %t.bc,egg, \ +; RUN: -r %t.bc,bar,px +; RUN: llvm-dis < %t.o.0.2.internalize.bc | FileCheck %s + +; CHECK: @llvm.used = appending global {{.*}} @bar +; CHECK: define dso_local dllexport void @thousandisland +; CHECK: define dso_local void @salad +; CHECK: define internal void @balsamic +; CHECK: define dso_local void @main +; CHECK: define available_externally void @egg() + +target triple = "x86_64-scei-ps4" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @salad() { + call void @balsamic() + ret void +} +define void @balsamic() { + ret void +} +define dllexport void @thousandisland() { + ret void +} + +define void @main() { + ret void +} + +define void ()* @ranch() { + ret void ()* @egg +} + +define available_externally void @egg() { + ret void +} + +%"foo.1" = type { i8, i8 } +declare dso_local i32 @bar(%"foo.1"* nocapture readnone %this) local_unnamed_addr +@llvm.used = appending global [2 x i8*] [i8* bitcast (i32 (%"foo.1"*)* @bar to i8*), i8* bitcast (void ()* @thousandisland to i8*)], section "llvm.metadata" diff --git a/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll @@ -12,26 +12,50 @@ ; RUN: -passes='thinlto-pre-link' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O1,CHECK-O-NODIS ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ +; RUN: -unified-lto -passes='lto-pre-link' -S %s 2>&1 \ +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O1,CHECK-O-NODIS +; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes='thinlto-pre-link' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,CHECK-O-NODIS ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ +; RUN: -unified-lto -passes='lto-pre-link' -S %s 2>&1 \ +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,CHECK-O-NODIS +; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes='thinlto-pre-link' -S -passes-ep-pipeline-start='no-op-module' %s 2>&1 \ ; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23SZ,CHECK-O-NODIS,CHECK-EP-PIPELINE-START ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ +; RUN: -unified-lto -passes='lto-pre-link' -S -passes-ep-pipeline-start='no-op-module' %s 2>&1 \ +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23SZ,CHECK-O-NODIS,CHECK-EP-PIPELINE-START +; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes='thinlto-pre-link' -S -passes-ep-optimizer-early='no-op-module' %s 2>&1 \ ; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23SZ,CHECK-O-NODIS,CHECK-EP-OPT-EARLY ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ +; RUN: -unified-lto -passes='lto-pre-link' -S -passes-ep-optimizer-early='no-op-module' %s 2>&1 \ +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23SZ,CHECK-O-NODIS,CHECK-EP-OPT-EARLY +; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes='thinlto-pre-link' -S -passes-ep-optimizer-last='no-op-module' %s 2>&1 \ ; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23SZ,CHECK-O-NODIS,CHECK-EP-OPT-LAST ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ +; RUN: -unified-lto -passes='lto-pre-link' -S -passes-ep-optimizer-last='no-op-module' %s 2>&1 \ +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23SZ,CHECK-O-NODIS,CHECK-EP-OPT-LAST +; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes='thinlto-pre-link' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O23SZ,CHECK-O-NODIS ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ +; RUN: -unified-lto -passes='lto-pre-link' -S %s 2>&1 \ +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O23SZ,CHECK-O-NODIS +; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes='thinlto-pre-link' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O23SZ,CHECK-O-NODIS +; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ +; RUN: -unified-lto -passes='lto-pre-link' -S %s 2>&1 \ +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O23SZ,CHECK-O-NODIS ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager -debug-info-for-profiling \ ; RUN: -passes='thinlto-pre-link' -S %s 2>&1 \ ; RUN: | FileCheck %s --check-prefixes=CHECK-DIS,CHECK-O,CHECK-O2,CHECK-O23SZ +; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager -debug-info-for-profiling \ +; RUN: -unified-lto -passes='lto-pre-link' -S %s 2>&1 \ +; RUN: | FileCheck %s --check-prefixes=CHECK-DIS,CHECK-O,CHECK-O2,CHECK-O23SZ ; ; Suppress FileCheck --allow-unused-prefixes=false diagnostics. diff --git a/llvm/test/Transforms/ThinLTOBitcodeWriter/split-unified.ll b/llvm/test/Transforms/ThinLTOBitcodeWriter/split-unified.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ThinLTOBitcodeWriter/split-unified.ll @@ -0,0 +1,24 @@ +; Generate bitcode files with split LTO modules +; RUN: opt -unified-lto -thinlto-bc -thinlto-split-lto-unit -o %t0.bc %s +; RUN: llvm-dis -o %t3.ll %t0.bc +; RUN: FileCheck <%t3.ll.0 --check-prefix=M0 %s +; RUN: FileCheck <%t3.ll.1 --check-prefix=M1 %s +; RUN: llvm-bcanalyzer -dump %t0.bc | FileCheck --check-prefix=BCA0 %s + +; ERROR: llvm-modextract: error: module index out of range; bitcode file contains 1 module(s) + +; BCA0: UnifiedLTOMode("unified-lto", cl::Optional, + cl::desc("Set LTO mode"), + cl::value_desc("mode")); + static cl::opt EnableFreestanding( "lto-freestanding", cl::desc("Enable Freestanding (disable builtins / TLI) during LTO"), @@ -348,7 +352,20 @@ HasErrors = true; }; - LTO Lto(std::move(Conf), std::move(Backend)); + LTO::LTOKind LTOMode = LTO::LTOK_Default; + + if (UnifiedLTOMode == "full") { + LTOMode = LTO::LTOK_UnifiedRegular; + } else if (UnifiedLTOMode == "thin") { + LTOMode = LTO::LTOK_UnifiedThin; + } else if (UnifiedLTOMode == "default") { + LTOMode = LTO::LTOK_Default; + } else if (!UnifiedLTOMode.empty()) { + llvm::errs() << "invalid LTO mode\n"; + return 1; + } + + LTO Lto(std::move(Conf), std::move(Backend), 1, LTOMode); for (std::string F : InputFilenames) { std::unique_ptr MB = check(MemoryBuffer::getFile(F), F); diff --git a/llvm/tools/opt/NewPMDriver.h b/llvm/tools/opt/NewPMDriver.h --- a/llvm/tools/opt/NewPMDriver.h +++ b/llvm/tools/opt/NewPMDriver.h @@ -67,13 +67,13 @@ bool runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM, TargetLibraryInfoImpl *TLII, ToolOutputFile *Out, ToolOutputFile *ThinLinkOut, ToolOutputFile *OptRemarkFile, - StringRef PassPipeline, - ArrayRef PassPlugins, opt_tool::OutputKind OK, - opt_tool::VerifierKind VK, + StringRef PassPipeline, ArrayRef PassPlugins, + opt_tool::OutputKind OK, opt_tool::VerifierKind VK, bool ShouldPreserveAssemblyUseListOrder, bool ShouldPreserveBitcodeUseListOrder, bool EmitSummaryIndex, bool EmitModuleHash, - bool EnableDebugify, bool VerifyDIPreserve); + bool EnableDebugify, bool VerifyDIPreserve, + bool UnifiedLTO = false); } // namespace llvm #endif diff --git a/llvm/tools/opt/NewPMDriver.cpp b/llvm/tools/opt/NewPMDriver.cpp --- a/llvm/tools/opt/NewPMDriver.cpp +++ b/llvm/tools/opt/NewPMDriver.cpp @@ -321,17 +321,15 @@ llvm::PassPluginLibraryInfo get##Ext##PluginInfo(); #include "llvm/Support/Extension.def" -bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM, - TargetLibraryInfoImpl *TLII, ToolOutputFile *Out, - ToolOutputFile *ThinLTOLinkOut, - ToolOutputFile *OptRemarkFile, - StringRef PassPipeline, - ArrayRef PassPlugins, - OutputKind OK, VerifierKind VK, - bool ShouldPreserveAssemblyUseListOrder, - bool ShouldPreserveBitcodeUseListOrder, - bool EmitSummaryIndex, bool EmitModuleHash, - bool EnableDebugify, bool VerifyDIPreserve) { +bool llvm::runPassPipeline( + StringRef Arg0, Module &M, TargetMachine *TM, TargetLibraryInfoImpl *TLII, + ToolOutputFile *Out, ToolOutputFile *ThinLTOLinkOut, + ToolOutputFile *OptRemarkFile, StringRef PassPipeline, + ArrayRef PassPlugins, OutputKind OK, VerifierKind VK, + bool ShouldPreserveAssemblyUseListOrder, + bool ShouldPreserveBitcodeUseListOrder, bool EmitSummaryIndex, + bool EmitModuleHash, bool EnableDebugify, bool VerifyDIPreserve, + bool UnifiedLTO) { bool VerifyEachPass = VK == VK_VerifyEachPass; auto FS = vfs::getRealFileSystem(); @@ -416,6 +414,7 @@ // to false above so we shouldn't necessarily need to check whether or not the // option has been enabled. PTO.LoopUnrolling = !DisableLoopUnrolling; + PTO.UnifiedLTO = UnifiedLTO; PassBuilder PB(TM, PTO, P, &PIC); registerEPCallbacks(PB); diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -116,6 +116,12 @@ SplitLTOUnit("thinlto-split-lto-unit", cl::desc("Enable splitting of a ThinLTO LTOUnit")); +static cl::opt + UnifiedLTO("unified-lto", + cl::desc("Use unified LTO piplines. Ignored unless -thinlto-bc " + "is also specified."), + cl::Hidden, cl::init(false)); + static cl::opt ThinLinkBitcodeFile( "thin-link-bitcode-file", cl::value_desc("filename"), cl::desc( @@ -629,8 +635,11 @@ if (CheckBitcodeOutputToConsole(Out->os())) NoOutput = true; - if (OutputThinLTOBC) + if (OutputThinLTOBC) { M->addModuleFlag(Module::Error, "EnableSplitLTOUnit", SplitLTOUnit); + if (UnifiedLTO) + M->addModuleFlag(Module::Error, "UnifiedLTO", 1); + } // Add an appropriate TargetLibraryInfo pass for the module's triple. TargetLibraryInfoImpl TLII(ModuleTriple); @@ -702,7 +711,7 @@ PluginList, OK, VK, PreserveAssemblyUseListOrder, PreserveBitcodeUseListOrder, EmitSummaryIndex, EmitModuleHash, EnableDebugify, - VerifyDebugInfoPreserve) + VerifyDebugInfoPreserve, UnifiedLTO) ? 0 : 1; }