Index: llvm/include/llvm/Bitcode/BitcodeReader.h =================================================================== --- llvm/include/llvm/Bitcode/BitcodeReader.h +++ llvm/include/llvm/Bitcode/BitcodeReader.h @@ -55,6 +55,7 @@ bool IsThinLTO; bool HasSummary; bool EnableSplitLTOUnit; + bool UnifiedLTO; }; /// Represents a module in a bitcode file. Index: llvm/include/llvm/IR/ModuleSummaryIndex.h =================================================================== --- llvm/include/llvm/IR/ModuleSummaryIndex.h +++ llvm/include/llvm/IR/ModuleSummaryIndex.h @@ -1141,6 +1141,9 @@ // True if the index was created for a module compiled with -fsplit-lto-unit. bool EnableSplitLTOUnit; + // True if the index was created for a module compiled with -funified-lto + bool UnifiedLTO; + // True if some of the modules were compiled with -fsplit-lto-unit and // some were not. Set when the combined index is created during the thin link. bool PartiallySplitLTOUnits = false; @@ -1171,9 +1174,10 @@ public: // See HaveGVs variable comment. - ModuleSummaryIndex(bool HaveGVs, bool EnableSplitLTOUnit = false) - : HaveGVs(HaveGVs), EnableSplitLTOUnit(EnableSplitLTOUnit), Saver(Alloc), - BlockCount(0) {} + ModuleSummaryIndex(bool HaveGVs, bool EnableSplitLTOUnit = false, + bool UnifiedLTO = false) + : HaveGVs(HaveGVs), EnableSplitLTOUnit(EnableSplitLTOUnit), UnifiedLTO(UnifiedLTO), + Saver(Alloc), BlockCount(0) {} // Current version for the module summary in bitcode files. // The BitcodeSummaryVersion should be bumped whenever we introduce changes @@ -1300,6 +1304,9 @@ bool enableSplitLTOUnit() const { return EnableSplitLTOUnit; } void setEnableSplitLTOUnit() { EnableSplitLTOUnit = true; } + bool hasUnifiedLTO() const { return UnifiedLTO; } + void setUnifiedLTO() { UnifiedLTO = true; } + bool partiallySplitLTOUnits() const { return PartiallySplitLTOUnits; } void setPartiallySplitLTOUnits() { PartiallySplitLTOUnits = true; } Index: llvm/include/llvm/LTO/Config.h =================================================================== --- llvm/include/llvm/LTO/Config.h +++ llvm/include/llvm/LTO/Config.h @@ -171,6 +171,8 @@ /// Time trace granularity. unsigned TimeTraceGranularity = 500; + bool UnifiedLTO = false; + bool ShouldDiscardValueNames = true; DiagnosticHandlerFunction DiagHandler; Index: llvm/include/llvm/LTO/LTO.h =================================================================== --- llvm/include/llvm/LTO/LTO.h +++ llvm/include/llvm/LTO/LTO.h @@ -239,13 +239,21 @@ friend InputFile; public: + + enum LTOKind { + LTOK_Default, + LTOK_UnifiedRegular, + LTOK_UnifiedThin, + }; + /// Create an LTO object. A default constructed LTO object has a reasonable /// production configuration, but you can customize it by passing arguments to /// this constructor. /// FIXME: We do currently require the DiagHandler field to be set in Conf. /// Until that is fixed, a Config argument is required. LTO(Config Conf, ThinBackend Backend = nullptr, - unsigned ParallelCodeGenParallelismLevel = 1); + unsigned ParallelCodeGenParallelismLevel = 1, + LTOKind LTOMode = LTOK_Default); ~LTO(); /// Add an input file to the LTO link, using the provided symbol resolutions. @@ -405,6 +413,8 @@ mutable bool CalledGetMaxTasks = false; + LTOKind LTOMode; + // Use Optional to distinguish false from not yet initialized. Optional EnableSplitLTOUnit; Index: llvm/include/llvm/Passes/PassBuilder.h =================================================================== --- llvm/include/llvm/Passes/PassBuilder.h +++ llvm/include/llvm/Passes/PassBuilder.h @@ -71,6 +71,9 @@ /// that of the flag: `-enable-npm-call-graph-profile`. bool CallGraphProfile; + // Add LTO pipeline tuning option to enable our unified LTO pipeline. + bool UnifiedLTO; + /// Tuning option to enable/disable function merging. Its default value is /// false. bool MergeFunctions; Index: llvm/include/llvm/Transforms/IPO.h =================================================================== --- llvm/include/llvm/Transforms/IPO.h +++ llvm/include/llvm/Transforms/IPO.h @@ -291,7 +291,8 @@ /// Write ThinLTO-ready bitcode to Str. ModulePass *createWriteThinLTOBitcodePass(raw_ostream &Str, - raw_ostream *ThinLinkOS = nullptr); + raw_ostream *ThinLinkOS = nullptr, + bool UnifiedLTO = false); } // End llvm namespace Index: llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h =================================================================== --- llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h +++ llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h @@ -167,6 +167,7 @@ bool PrepareForLTO; bool PrepareForThinLTO; bool PerformThinLTO; + bool UnifiedLTO; bool DivergentTarget; unsigned LicmMssaOptCap; unsigned LicmMssaNoAccForPromotionCap; Index: llvm/include/llvm/Transforms/IPO/ThinLTOBitcodeWriter.h =================================================================== --- llvm/include/llvm/Transforms/IPO/ThinLTOBitcodeWriter.h +++ llvm/include/llvm/Transforms/IPO/ThinLTOBitcodeWriter.h @@ -30,7 +30,8 @@ public: // Writes bitcode to OS. Also write thin link file to ThinLinkOS, if // it's not nullptr. - ThinLTOBitcodeWriterPass(raw_ostream &OS, raw_ostream *ThinLinkOS) + ThinLTOBitcodeWriterPass(raw_ostream &OS, raw_ostream *ThinLinkOS, + bool UnifiedLTO = false) : OS(OS), ThinLinkOS(ThinLinkOS) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); Index: llvm/lib/Analysis/ModuleSummaryAnalysis.cpp =================================================================== --- llvm/lib/Analysis/ModuleSummaryAnalysis.cpp +++ llvm/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -679,10 +679,14 @@ std::function GetSSICallback) { assert(PSI); bool EnableSplitLTOUnit = false; + bool UnifiedLTO = false; if (auto *MD = mdconst::extract_or_null( M.getModuleFlag("EnableSplitLTOUnit"))) EnableSplitLTOUnit = MD->getZExtValue(); - ModuleSummaryIndex Index(/*HaveGVs=*/true, EnableSplitLTOUnit); + if (auto *MD = mdconst::extract_or_null( + M.getModuleFlag("UnifiedLTO"))) + UnifiedLTO = MD->getZExtValue(); + ModuleSummaryIndex Index(/*HaveGVs=*/true, EnableSplitLTOUnit, UnifiedLTO); // Identify the local values in the llvm.used and llvm.compiler.used sets, // which should not be exported as they would then require renaming and Index: llvm/lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -7365,7 +7365,56 @@ return std::move(Index); } -static Expected getEnableSplitLTOUnitFlag(BitstreamCursor &Stream, +static Error getEnableSplitLTOUnitAndUnifiedFlag(BitstreamCursor &Stream, + unsigned ID, BitcodeLTOInfo <OInfo) { + if (Error Err = Stream.EnterSubBlock(ID)) + return std::move(Err); + SmallVector Record; + + while (true) { + BitstreamEntry Entry; + if (Error E = Stream.advanceSkippingSubblocks().moveInto(Entry)) + return std::move(E); + + switch (Entry.Kind) { + case BitstreamEntry::SubBlock: // Handled for us already. + case BitstreamEntry::Error: + return error("Malformed block"); + case BitstreamEntry::EndBlock: { + // If no flags record found, conservatively return true to mimic + // behavior before this flag was added. + LTOInfo.EnableSplitLTOUnit = true; + return Error::success(); + } + case BitstreamEntry::Record: + // The interesting case. + break; + } + + // Look for the FS_FLAGS record. + Record.clear(); + Expected MaybeBitCode = Stream.readRecord(Entry.ID, Record); + if (!MaybeBitCode) + return MaybeBitCode.takeError(); + switch (MaybeBitCode.get()) { + default: // Default behavior: ignore. + break; + case bitc::FS_FLAGS: { // [flags] + uint64_t Flags = Record[0]; + // Scan flags. + assert(Flags <= 0x7ff && "Unexpected bits in flag"); + + LTOInfo.EnableSplitLTOUnit = Flags & 0x8; + LTOInfo.UnifiedLTO = Flags & 0x80; + + return Error::success(); + } + } + } + llvm_unreachable("Exit infinite loop"); +} + +static Expected getUnifiedLTOFlag(BitstreamCursor &Stream, unsigned ID) { if (Error Err = Stream.EnterSubBlock(ID)) return std::move(Err); @@ -7402,7 +7451,7 @@ // Scan flags. assert(Flags <= 0x7f && "Unexpected bits in flag"); - return Flags & 0x8; + return Flags & 0x80; } } } @@ -7428,25 +7477,25 @@ return error("Malformed block"); case BitstreamEntry::EndBlock: return BitcodeLTOInfo{/*IsThinLTO=*/false, /*HasSummary=*/false, - /*EnableSplitLTOUnit=*/false}; + /*EnableSplitLTOUnit=*/false, /*UnifiedLTO=*/false}; case BitstreamEntry::SubBlock: if (Entry.ID == bitc::GLOBALVAL_SUMMARY_BLOCK_ID) { - Expected EnableSplitLTOUnit = - getEnableSplitLTOUnitFlag(Stream, Entry.ID); - if (!EnableSplitLTOUnit) - return EnableSplitLTOUnit.takeError(); - return BitcodeLTOInfo{/*IsThinLTO=*/true, /*HasSummary=*/true, - *EnableSplitLTOUnit}; + BitcodeLTOInfo LTOInfo; + if (Error E = getEnableSplitLTOUnitAndUnifiedFlag(Stream, Entry.ID, LTOInfo)) + return std::move(E); + LTOInfo.IsThinLTO = true; + LTOInfo.HasSummary = true; + return LTOInfo; } if (Entry.ID == bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID) { - Expected EnableSplitLTOUnit = - getEnableSplitLTOUnitFlag(Stream, Entry.ID); - if (!EnableSplitLTOUnit) - return EnableSplitLTOUnit.takeError(); - return BitcodeLTOInfo{/*IsThinLTO=*/false, /*HasSummary=*/true, - *EnableSplitLTOUnit}; + BitcodeLTOInfo LTOInfo; + if (Error E = getEnableSplitLTOUnitAndUnifiedFlag(Stream, Entry.ID, LTOInfo)) + return std::move(E); + LTOInfo.IsThinLTO = false; + LTOInfo.HasSummary = true; + return LTOInfo; } // Ignore other sub-blocks. Index: llvm/lib/Bitcode/Writer/BitcodeWriter.cpp =================================================================== --- llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -3965,6 +3965,9 @@ // Bits 1-3 are set only in the combined index, skip them. if (Index->enableSplitLTOUnit()) Flags |= 0x8; + if (Index->hasUnifiedLTO()) + Flags |= 0x80; + Stream.EmitRecord(bitc::FS_FLAGS, ArrayRef{Flags}); if (Index->begin() == Index->end()) { Index: llvm/lib/IR/ModuleSummaryIndex.cpp =================================================================== --- llvm/lib/IR/ModuleSummaryIndex.cpp +++ llvm/lib/IR/ModuleSummaryIndex.cpp @@ -105,11 +105,13 @@ Flags |= 0x20; if (withDSOLocalPropagation()) Flags |= 0x40; + if (hasUnifiedLTO()) + Flags |= 0x50; return Flags; } void ModuleSummaryIndex::setFlags(uint64_t Flags) { - assert(Flags <= 0x7f && "Unexpected bits in flag"); + assert(Flags <= 0x7ff && "Unexpected bits in flag"); // 1 bit: WithGlobalValueDeadStripping flag. // Set on combined index only. if (Flags & 0x1) @@ -139,6 +141,10 @@ // Set on combined index only. if (Flags & 0x40) setWithDSOLocalPropagation(); + // 1 bit: WithUnifiedLTO flag. + // Set on combined index only. + if (Flags & 0x80) + setUnifiedLTO(); } // Collect for the given module the list of function it defines Index: llvm/lib/LTO/LTO.cpp =================================================================== --- llvm/lib/LTO/LTO.cpp +++ llvm/lib/LTO/LTO.cpp @@ -521,10 +521,12 @@ } LTO::LTO(Config Conf, ThinBackend Backend, - unsigned ParallelCodeGenParallelismLevel) + unsigned ParallelCodeGenParallelismLevel, + LTOKind LTOMode) : Conf(std::move(Conf)), RegularLTO(ParallelCodeGenParallelismLevel, this->Conf), - ThinLTO(std::move(Backend)) {} + ThinLTO(std::move(Backend)), + LTOMode(LTOMode) {} // Requires a destructor for MapVector. LTO::~LTO() = default; @@ -649,12 +651,22 @@ EnableSplitLTOUnit = LTOInfo->EnableSplitLTOUnit; BitcodeModule BM = Input.Mods[ModI]; + + if ((LTOMode == LTOK_UnifiedRegular || LTOMode == LTOK_UnifiedThin) && + !LTOInfo->UnifiedLTO) + return make_error( + "unified LTO compilation must use " + "compatible bitcode modules (use -funified-lto)", + inconvertibleErrorCode()); + + bool IsThinLTO = LTOInfo->IsThinLTO && (LTOMode != LTOK_UnifiedRegular); + auto ModSyms = Input.module_symbols(ModI); addModuleToGlobalRes(ModSyms, {ResI, ResE}, - LTOInfo->IsThinLTO ? ThinLTO.ModuleMap.size() + 1 : 0, + IsThinLTO ? ThinLTO.ModuleMap.size() + 1 : 0, LTOInfo->HasSummary); - if (LTOInfo->IsThinLTO) + if (IsThinLTO) return addThinLTO(BM, ModSyms, ResI, ResE); RegularLTO.EmptyCombinedModule = false; @@ -722,6 +734,11 @@ if (Error Err = M.materializeMetadata()) return std::move(Err); + + if (LTOMode == LTOK_UnifiedRegular) + if (NamedMDNode *CfiFunctionsMD = M.getNamedMetadata("cfi.functions")) + M.eraseNamedMetadata(CfiFunctionsMD); + UpgradeDebugInfo(M); ModuleSymbolTable SymTab; @@ -1109,18 +1126,24 @@ if (!Conf.CodeGenOnly) { for (const auto &R : GlobalResolutions) { + GlobalValue *GV = + RegularLTO.CombinedModule->getNamedValue(R.second.IRName); if (!R.second.isPrevailingIRSymbol()) continue; if (R.second.Partition != 0 && R.second.Partition != GlobalResolution::External) continue; - GlobalValue *GV = - RegularLTO.CombinedModule->getNamedValue(R.second.IRName); // Ignore symbols defined in other partitions. // Also skip declarations, which are not allowed to have internal linkage. if (!GV || GV->hasLocalLinkage() || GV->isDeclaration()) continue; + if ((LTOMode == LTOKind::LTOK_UnifiedRegular) && + ((GV->getDLLStorageClass() != GlobalValue::DefaultStorageClass) + || GV->hasAvailableExternallyLinkage() + || GV->hasAppendingLinkage())) + continue; + GV->setUnnamedAddr(R.second.UnnamedAddr ? GlobalValue::UnnamedAddr::Global : GlobalValue::UnnamedAddr::None); if (EnableLTOInternalization && R.second.Partition == 0) @@ -1516,6 +1539,10 @@ thinLTOInternalizeAndPromoteInIndex(ThinLTO.CombinedIndex, isExported, isPrevailing); + Conf.UnifiedLTO = (LTOMode != LTOK_Default); + if (Conf.UnifiedLTO) + Conf.PTO.CallGraphProfile = false; + auto recordNewLinkage = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID, GlobalValue::LinkageTypes NewLinkage) { Index: llvm/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/lib/Passes/PassBuilder.cpp +++ llvm/lib/Passes/PassBuilder.cpp @@ -1140,7 +1140,10 @@ } else if (Matches[1] == "thinlto-pre-link") { MPM.addPass(buildThinLTOPreLinkDefaultPipeline(L)); } else if (Matches[1] == "thinlto") { - MPM.addPass(buildThinLTODefaultPipeline(L, nullptr)); + if (!PTO.UnifiedLTO) + MPM.addPass(buildThinLTODefaultPipeline(L, nullptr)); + else + MPM.addPass(buildLTOPreLinkDefaultPipeline(L)); } else if (Matches[1] == "lto-pre-link") { MPM.addPass(buildLTOPreLinkDefaultPipeline(L)); } else { Index: llvm/lib/Passes/PassBuilderPipelines.cpp =================================================================== --- llvm/lib/Passes/PassBuilderPipelines.cpp +++ llvm/lib/Passes/PassBuilderPipelines.cpp @@ -192,6 +192,7 @@ LicmMssaOptCap = SetLicmMssaOptCap; LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap; CallGraphProfile = true; + UnifiedLTO = false; MergeFunctions = EnableMergeFunctions; EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses; } Index: llvm/lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -209,6 +209,7 @@ PGOSampleUse = ""; PrepareForThinLTO = EnablePrepareForThinLTO; PerformThinLTO = EnablePerformThinLTO; + UnifiedLTO = false; DivergentTarget = false; CallGraphProfile = true; } @@ -703,7 +704,7 @@ else if (GlobalExtensionsNotEmpty() || !Extensions.empty()) MPM.add(createBarrierNoopPass()); - if (PerformThinLTO) { + if (PerformThinLTO && !UnifiedLTO) { MPM.add(createLowerTypeTestsPass(nullptr, nullptr, true)); // Drop available_externally and unreferenced globals. This is necessary // with ThinLTO in order to avoid leaving undefined references to dead @@ -740,7 +741,7 @@ // inter-module indirect calls. For that we perform indirect call promotion // earlier in the pass pipeline, here before globalopt. Otherwise imported // available_externally functions look unreferenced and are removed. - if (PerformThinLTO) { + if (PerformThinLTO && !UnifiedLTO) { MPM.add(createPGOIndirectCallPromotionLegacyPass(/*InLTO = */ true, !PGOSampleUse.empty())); MPM.add(createLowerTypeTestsPass(nullptr, nullptr, true)); @@ -751,7 +752,7 @@ // in backend more difficult. bool PrepareForThinLTOUsingPGOSampleProfile = PrepareForThinLTO && !PGOSampleUse.empty(); - if (PrepareForThinLTOUsingPGOSampleProfile) + if (PrepareForThinLTOUsingPGOSampleProfile && !UnifiedLTO) DisableUnrollLoops = true; // Infer attributes about declarations if possible. @@ -875,7 +876,7 @@ // If we are planning to perform ThinLTO later, let's not bloat the code with // unrolling/vectorization/... now. We'll first run the inliner + CGSCC passes // during ThinLTO and perform the rest of the optimizations afterward. - if (PrepareForThinLTO) { + if (PrepareForThinLTO && !UnifiedLTO) { // Ensure we perform any last passes, but do so before renaming anonymous // globals in case the passes add any. addExtensionsToPM(EP_OptimizerLast, MPM); @@ -885,7 +886,7 @@ return; } - if (PerformThinLTO) + if (PerformThinLTO && !UnifiedLTO) // Optimize globals now when performing ThinLTO, this enables more // optimizations later. MPM.add(createGlobalOptimizerPass()); @@ -989,7 +990,9 @@ addExtensionsToPM(EP_OptimizerLast, MPM); - if (PrepareForLTO) { + // Anonymous globals need a name to ensure that CFI works in both Thin and + // Full LTO + if (PrepareForLTO || (PrepareForThinLTO && UnifiedLTO)) { MPM.add(createCanonicalizeAliasesPass()); // Rename anon globals to be able to handle them in the summary MPM.add(createNameAnonGlobalPass()); @@ -1192,6 +1195,7 @@ void PassManagerBuilder::populateThinLTOPassManager( legacy::PassManagerBase &PM) { PerformThinLTO = true; + UnifiedLTO = false; if (LibraryInfo) PM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); Index: llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp =================================================================== --- llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp +++ llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -238,26 +238,37 @@ appendToUsed(DestM, NewUsed); } +bool enableUnifiedLTO(Module &M) { + bool UnifiedLTO = false; + if (auto *MD = mdconst::extract_or_null( + M.getModuleFlag("UnifiedLTO"))) + UnifiedLTO = MD->getZExtValue(); + return UnifiedLTO; +} + // If it's possible to split M into regular and thin LTO parts, do so and write // a multi-module bitcode file with the two parts to OS. Otherwise, write only a // regular LTO bitcode file to OS. void splitAndWriteThinLTOBitcode( raw_ostream &OS, raw_ostream *ThinLinkOS, function_ref AARGetter, Module &M) { + bool UnifiedLTO = enableUnifiedLTO(M); std::string ModuleId = getUniqueModuleId(&M); if (ModuleId.empty()) { + assert(!UnifiedLTO); // We couldn't generate a module ID for this module, write it out as a // regular LTO module with an index for summary-based dead stripping. ProfileSummaryInfo PSI(M); M.addModuleFlag(Module::Error, "ThinLTO", uint32_t(0)); ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, &PSI); - WriteBitcodeToFile(M, OS, /*ShouldPreserveUseListOrder=*/false, &Index); + WriteBitcodeToFile(M, OS, /*ShouldPreserveUseListOrder=*/false, &Index, + /*GenerateHash=*/ UnifiedLTO); if (ThinLinkOS) // We don't have a ThinLTO part, but still write the module to the // ThinLinkOS if requested so that the expected output file is produced. WriteBitcodeToFile(M, *ThinLinkOS, /*ShouldPreserveUseListOrder=*/false, - &Index); + &Index, UnifiedLTO); return; } @@ -551,7 +562,7 @@ } explicit WriteThinLTOBitcode(raw_ostream &o, raw_ostream *ThinLinkOS) - : ModulePass(ID), OS(o), ThinLinkOS(ThinLinkOS) { + : ModulePass(ID), OS(o), ThinLinkOS(ThinLinkOS) { initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry()); } @@ -582,7 +593,8 @@ "Write ThinLTO Bitcode", false, true) ModulePass *llvm::createWriteThinLTOBitcodePass(raw_ostream &Str, - raw_ostream *ThinLinkOS) { + raw_ostream *ThinLinkOS, + bool UnifiedLTO) { return new WriteThinLTOBitcode(Str, ThinLinkOS); } Index: llvm/test/LTO/Resolution/X86/local-def-dllimport.ll =================================================================== --- llvm/test/LTO/Resolution/X86/local-def-dllimport.ll +++ llvm/test/LTO/Resolution/X86/local-def-dllimport.ll @@ -1,10 +1,12 @@ -; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t0.bc %s +; RUN: opt --unified-lto -thinlto-split-lto-unit -thinlto-bc -o %t0.bc %s + ; RUN: llvm-lto2 run -r %t0.bc,__imp_f,l \ -; RUN: -r %t0.bc,g,p \ ; RUN: -r %t0.bc,g,l \ +; RUN: -r %t0.bc,g,p \ ; RUN: -r %t0.bc,e,l \ ; RUN: -r %t0.bc,main,x \ -; RUN: -save-temps -o %t1 %t0.bc +; RUN: -save-temps -o %t1 %t0.bc \ +; RUN: --lto=thin ; RUN: llvm-dis %t1.1.3.import.bc -o - | FileCheck %s source_filename = "test.cpp" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" Index: llvm/test/LTO/Resolution/X86/unified-lto-check.ll =================================================================== --- /dev/null +++ llvm/test/LTO/Resolution/X86/unified-lto-check.ll @@ -0,0 +1,46 @@ +; Test to ensure that the Unified LTO flag is set properly in the summary, and +; that we correctly silently handle linking bitcode files with different values +; of this flag. + +; Linking bitcode both without UnifiedLTO set should work +; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t1 %s +; RUN: llvm-bcanalyzer -dump %t1 | FileCheck %s --check-prefix=NOUNIFIEDLTO +; RUN: llvm-dis -o - %t1 | FileCheck %s --check-prefix=NOUNIFIEDLTOFLAG +; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t2 %s +; RUN: llvm-bcanalyzer -dump %t2 | FileCheck %s --check-prefix=NOUNIFIEDLTO +; RUN: llvm-dis -o - %t2 | FileCheck %s --check-prefix=NOUNIFIEDLTOFLAG +; RUN: llvm-lto2 run -o %t3 %t1 %t2 + +; Linking bitcode with different values of UnifiedLTO should fail +; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t1 %s +; RUN: llvm-bcanalyzer -dump %t1 | FileCheck %s --check-prefix=NOUNIFIEDLTO +; RUN: llvm-dis -o - %t1 | FileCheck %s --check-prefix=NOUNIFIEDLTOFLAG +; RUN: opt -unified-lto -thinlto-bc -thinlto-split-lto-unit -o %t2 %s +; RUN: llvm-bcanalyzer -dump %t2 | FileCheck %s --check-prefix=UNIFIEDLTO +; RUN: llvm-dis -o - %t2 | FileCheck %s --check-prefix=UNIFIEDLTOFLAG +; RUN: not llvm-lto2 run --lto=thin -o %t3 %t1 %t2 2>&1 | \ +; RUN: FileCheck --allow-empty %s --check-prefix UNIFIEDERR + +; Linking bitcode with identical Unified LTO flags should succeed +; RUN: opt -unified-lto -thinlto-bc -thinlto-split-lto-unit -o %t1 %s +; RUN: llvm-bcanalyzer -dump %t1 | FileCheck %s --check-prefix=UNIFIEDLTO +; RUN: llvm-dis -o - %t1 | FileCheck %s --check-prefix=UNIFIEDLTOFLAG +; RUN: opt -unified-lto -thinlto-bc -thinlto-split-lto-unit -o %t2 %s +; RUN: llvm-bcanalyzer -dump %t2 | FileCheck %s --check-prefix=UNIFIEDLTO +; RUN: llvm-dis -o - %t2 | FileCheck %s --check-prefix=UNIFIEDLTOFLAG +; RUN: llvm-lto2 run --lto=thin -o %t3 %t1 %t2 | \ +; RUN: FileCheck --allow-empty %s --check-prefix NOUNIFIEDERR + +; UNIFIEDERR: unified LTO compilation must use compatible bitcode modules +; NOUNIFIEDERR-NOT: unified LTO compilation must use compatible bitcode modules + +; The flag should be set when UnifiedLTO is enabled +; UNIFIEDLTO: +; NOUNIFIEDLTO: + +; Check that the corresponding module flag is set when expected. +; UNIFIEDLTOFLAG: !{i32 1, !"UnifiedLTO", i32 1} +; NOUNIFIEDLTOFLAG-NOT: !{i32 1, !"UnifiedLTO", i32 1} + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" Index: llvm/test/LTO/X86/cfi-func-remove.ll =================================================================== --- /dev/null +++ llvm/test/LTO/X86/cfi-func-remove.ll @@ -0,0 +1,10 @@ +; RUN: opt -thinlto-bc -thinlto-split-lto-unit -unified-lto <%s -o %t0 +; RUN: llvm-lto2 run -o %t1 --lto=full --save-temps %t0 +; RUN: llvm-dis <%t1.0.0.preopt.bc 2>&1 | FileCheck %s --implicit-check-not warning: +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +!cfi.functions = !{!2} +; CHECK-NOT: cfi.functions + +!2 = !{!"main", i8 0} Index: llvm/test/LTO/X86/unified-cfi.ll =================================================================== --- /dev/null +++ llvm/test/LTO/X86/unified-cfi.ll @@ -0,0 +1,98 @@ +; Test for the expected CFI codegen in a module with CFI metadata. +; RUN: opt -unified-lto -thinlto-bc -o %t0.o %s +; RUN: llvm-lto --exported-symbol=main -filetype=asm -o - %t0.o | FileCheck %s + +; CHECK-LABEL: main + +; CHECK: jbe +; CHECK-NEXT: ud2 +; CHECK-NEXT: ud2 + +; ModuleID = 'llvm/test/LTO/X86/unified-cfi.ll' +source_filename = "cfi.cpp" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-scei-ps4" + +@func = hidden global [3 x i32 ()*] [i32 ()* @_Z1av, i32 ()* @_Z1bv, i32 ()* @_Z1cv], align 16 +@.src = private unnamed_addr constant [8 x i8] c"cfi.cpp\00", align 1 +@anon.9260195284c792ab5c6ef4d97bfcf95d.0 = private unnamed_addr constant { i16, i16, [9 x i8] } { i16 -1, i16 0, [9 x i8] c"'int ()'\00" } + +; Function Attrs: noinline nounwind optnone sspstrong uwtable +define hidden i32 @_Z1av() #0 !type !3 !type !4 { +entry: + ret i32 1 +} + +; Function Attrs: noinline nounwind optnone sspstrong uwtable +define hidden i32 @_Z1bv() #0 !type !3 !type !4 { +entry: + ret i32 2 +} + +; Function Attrs: noinline nounwind optnone sspstrong uwtable +define hidden i32 @_Z1cv() #0 !type !3 !type !4 { +entry: + ret i32 3 +} + +; Function Attrs: noinline norecurse nounwind optnone sspstrong uwtable +define hidden i32 @main(i32 %argc, i8** %argv) #1 !type !5 !type !6 { +entry: + %retval = alloca i32, align 4 + %argc.addr = alloca i32, align 4 + %argv.addr = alloca i8**, align 8 + store i32 0, i32* %retval, align 4 + store i32 %argc, i32* %argc.addr, align 4 + store i8** %argv, i8*** %argv.addr, align 8 + %0 = load i32, i32* %argc.addr, align 4 + %idxprom = sext i32 %0 to i64 + %arrayidx = getelementptr inbounds [3 x i32 ()*], [3 x i32 ()*]* @func, i64 0, i64 %idxprom + %1 = load i32 ()*, i32 ()** %arrayidx, align 8 + %2 = bitcast i32 ()* %1 to i8*, !nosanitize !7 + %3 = call i1 @llvm.type.test(i8* %2, metadata !"_ZTSFivE"), !nosanitize !7 + br i1 %3, label %cont, label %trap, !nosanitize !7 + +trap: ; preds = %entry + call void @llvm.trap() #4, !nosanitize !7 + unreachable, !nosanitize !7 + +cont: ; preds = %entry + %call = call i32 %1() + ret i32 %call +} + +; Function Attrs: nofree nosync nounwind readnone speculatable willreturn +declare i1 @llvm.type.test(i8*, metadata) #2 + +; Function Attrs: cold noreturn nounwind +declare void @llvm.trap() #3 + +attributes #0 = { noinline nounwind optnone sspstrong uwtable } +attributes #1 = { noinline norecurse nounwind optnone sspstrong uwtable } +attributes #2 = { nofree nosync nounwind readnone speculatable willreturn } +attributes #3 = { cold noreturn nounwind } +attributes #4 = { noreturn nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 2} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{!"clang version 7.0.0 (PS4 clang version 99.99.0.1562 432a534f checking)"} +!3 = !{i64 0, !"_ZTSFivE"} +!4 = !{i64 0, !"_ZTSFivE.generalized"} +!5 = !{i64 0, !"_ZTSFiiPPcE"} +!6 = !{i64 0, !"_ZTSFiiPvE.generalized"} +!7 = !{} + +^0 = module: (path: "llvm/test/LTO/X86/unified-cfi.ll", hash: (0, 0, 0, 0, 0)) +^1 = gv: (name: "llvm.type.test") ; guid = 608142985856744218 +^2 = gv: (name: "_Z1cv", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 1, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 1))) ; guid = 1031113446561889624 +^3 = gv: (name: "_Z1bv", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 1, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 1))) ; guid = 2000451273547961259 +^4 = gv: (name: "_Z1av", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 1, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 1))) ; guid = 3456846378323757990 +^5 = gv: (name: ".src", summaries: (variable: (module: ^0, flags: (linkage: private, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0, constant: 0)))) ; guid = 5614330533059031665 +^6 = gv: (name: "llvm.trap") ; guid = 6116349651215144041 +^7 = gv: (name: "func", summaries: (variable: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0, constant: 0), refs: (^4, ^3, ^2)))) ; guid = 7289175272376759421 +^8 = gv: (name: "anon.9260195284c792ab5c6ef4d97bfcf95d.0", summaries: (variable: (module: ^0, flags: (linkage: private, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0, constant: 0)))) ; guid = 10197562899942851386 +^9 = gv: (name: "main", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 1, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 17, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0), typeIdInfo: (typeTests: (194679795792225349)), refs: (^7)))) ; guid = 15822663052811949562 +^10 = blockcount: 0 Index: llvm/test/LTO/X86/unified-internalize.ll =================================================================== --- /dev/null +++ llvm/test/LTO/X86/unified-internalize.ll @@ -0,0 +1,50 @@ +; RUN: opt <%s -unified-lto -thinlto-split-lto-unit -thinlto-bc -o %t.bc + +; Test internalization during unified LTO. This makes sure internalization does +; happen in runRegularLTO(). +; RUN: llvm-lto2 run %t.bc -o %t.o -save-temps --lto=full \ +; RUN: -r=%t.bc,salad,pxl \ +; RUN: -r=%t.bc,balsamic,pl \ +; RUN: -r=%t.bc,thousandisland,pl \ +; RUN: -r=%t.bc,main,pxl \ +; RUN: -r %t.bc,ranch,px \ +; RUN: -r %t.bc,egg, \ +; RUN: -r %t.bc,bar,px +; RUN: llvm-dis < %t.o.0.2.internalize.bc | FileCheck %s + +; CHECK: @llvm.used = appending global {{.*}} @bar +; CHECK: define dso_local dllexport void @thousandisland +; CHECK: define dso_local void @salad +; CHECK: define internal void @balsamic +; CHECK: define dso_local void @main +; CHECK: define available_externally void @egg() + +target triple = "x86_64-scei-ps4" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @salad() { + call void @balsamic() + ret void +} +define void @balsamic() { + ret void +} +define dllexport void @thousandisland() { + ret void +} + +define void @main() { + ret void +} + +define void ()* @ranch() { + ret void ()* @egg +} + +define available_externally void @egg() { + ret void +} + +%"foo.1" = type { i8, i8 } +declare dso_local i32 @bar(%"foo.1"* nocapture readnone %this) local_unnamed_addr +@llvm.used = appending global [2 x i8*] [i8* bitcast (i32 (%"foo.1"*)* @bar to i8*), i8* bitcast (void ()* @thousandisland to i8*)], section "llvm.metadata" Index: llvm/test/LTO/X86/whole-program-no-crash.ll =================================================================== --- /dev/null +++ llvm/test/LTO/X86/whole-program-no-crash.ll @@ -0,0 +1,103 @@ +; Run the ThinLTO and LTO backends on a module with +; devirtualizaiton metadata. In previous versions of the compiler, +; this crashed. +; RUN: opt -unified-lto -thinlto-bc <%s -o %t0.o +; RUN: llvm-lto --thinlto-action=run %t0.o -thinlto-save-objects=%t +; RUN: llvm-lto %t0.o + +; ModuleID = 'llvm/test/LTO/X86/whole-program-no-crash.ll +source_filename = "main.cpp" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-scei-ps4" + +%struct.Square = type { %struct.Shape, double } +%struct.Shape = type { i32 (...)** } + +@.str = private unnamed_addr constant [21 x i8] c"Area of a circle: %e\00", align 1 +@.str.1 = private unnamed_addr constant [21 x i8] c"Area of a square: %e\00", align 1 +@.str.2 = private unnamed_addr constant [30 x i8] c"Area of a circle, squared: %f\00", align 1 +@.str.3 = private unnamed_addr constant [30 x i8] c"Area of a square, squared: %f\00", align 1 + +; Function Attrs: norecurse nounwind uwtable +define hidden i32 @main(i32 %argc, i8** nocapture readnone %argv) local_unnamed_addr #0 { +entry: + %call = tail call i8* @_Znwm(i64 16) #6 + %0 = bitcast i8* %call to %struct.Square* + tail call void @_ZN6SquareC1Ed(%struct.Square* nonnull %0, double 1.000000e+00) #3 + %1 = bitcast i8* %call to %struct.Shape* + %call1 = tail call i8* @_Znwm(i64 16) #6 + %2 = bitcast i8* %call1 to %struct.Square* + tail call void @_ZN6SquareC1Ed(%struct.Square* nonnull %2, double 1.000000e+00) #3 + %3 = bitcast i8* %call1 to %struct.Shape* + %4 = bitcast i8* %call to double (%struct.Shape*)*** + %vtable = load double (%struct.Shape*)**, double (%struct.Shape*)*** %4, align 8, !tbaa !3 + %5 = bitcast double (%struct.Shape*)** %vtable to i8* + %6 = tail call i1 @llvm.type.test(i8* %5, metadata !"_ZTS5Shape") + tail call void @llvm.assume(i1 %6) + %7 = load double (%struct.Shape*)*, double (%struct.Shape*)** %vtable, align 8 + %call2 = tail call double %7(%struct.Shape* nonnull %1) #3 + %call3 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str, i64 0, i64 0), double %call2) + %8 = bitcast i8* %call1 to double (%struct.Shape*)*** + %vtable4 = load double (%struct.Shape*)**, double (%struct.Shape*)*** %8, align 8, !tbaa !3 + %9 = bitcast double (%struct.Shape*)** %vtable4 to i8* + %10 = tail call i1 @llvm.type.test(i8* %9, metadata !"_ZTS5Shape") + tail call void @llvm.assume(i1 %10) + %11 = load double (%struct.Shape*)*, double (%struct.Shape*)** %vtable4, align 8 + %call6 = tail call double %11(%struct.Shape* nonnull %3) #3 + %call7 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.1, i64 0, i64 0), double %call6) + %call8 = tail call double @_Z14circle_squaredP5Shape(%struct.Shape* nonnull %1) #3 + %call9 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([30 x i8], [30 x i8]* @.str.2, i64 0, i64 0), double %call8) + %call10 = tail call double @_Z14square_squaredP5Shape(%struct.Shape* nonnull %3) #3 + %call11 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([30 x i8], [30 x i8]* @.str.3, i64 0, i64 0), double %call10) + ret i32 0 +} + +; Function Attrs: nobuiltin +declare noalias nonnull i8* @_Znwm(i64) local_unnamed_addr #1 + +declare void @_ZN6SquareC1Ed(%struct.Square*, double) unnamed_addr + +; Function Attrs: nounwind +declare i32 @printf(i8* nocapture readonly, ...) local_unnamed_addr #3 + +; Function Attrs: nofree nosync nounwind readnone speculatable willreturn +declare i1 @llvm.type.test(i8*, metadata) #4 + +; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn +declare void @llvm.assume(i1 noundef) #5 + +declare double @_Z14circle_squaredP5Shape(%struct.Shape*) local_unnamed_addr + +declare double @_Z14square_squaredP5Shape(%struct.Shape*) local_unnamed_addr + +attributes #0 = { norecurse nounwind uwtable } +attributes #1 = { nobuiltin } +attributes #3 = { nounwind } +attributes #4 = { nofree nosync nounwind readnone speculatable willreturn } +attributes #5 = { inaccessiblememonly nofree nosync nounwind willreturn } +attributes #6 = { builtin nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 2} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{!"clang version 7.0.0 (PS4 clang version 99.99.0.1564 e05e1b5f checking)"} +!3 = !{!4, !4, i64 0} +!4 = !{!"vtable pointer", !5, i64 0} +!5 = !{!"Simple C++ TBAA"} + +^0 = module: (path: "llvm/test/LTO/X86/whole-program-no-crash.ll", hash: (160140095, 1084170952, 2125434145, 3248440305, 919813895)) +^1 = gv: (name: "llvm.type.test") ; guid = 608142985856744218 +^2 = gv: (name: ".str", summaries: (variable: (module: ^0, flags: (linkage: private, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0, constant: 0)))) ; guid = 3057614271122621510 +^3 = gv: (name: ".str.1", summaries: (variable: (module: ^0, flags: (linkage: private, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0, constant: 0)))) ; guid = 5124566073124437459 +^4 = gv: (name: "_Z14circle_squaredP5Shape") ; guid = 6033955522051173057 +^5 = gv: (name: "llvm.assume") ; guid = 6385187066495850096 +^6 = gv: (name: "printf") ; guid = 7383291119112528047 +^7 = gv: (name: ".str.3", summaries: (variable: (module: ^0, flags: (linkage: private, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0, constant: 0)))) ; guid = 8135577886398900316 +^8 = gv: (name: "_Z14square_squaredP5Shape") ; guid = 8213923296236276854 +^9 = gv: (name: "_ZN6SquareC1Ed") ; guid = 10727975616611545044 +^10 = gv: (name: "main", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 29, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0), calls: ((callee: ^11), (callee: ^9), (callee: ^6), (callee: ^4), (callee: ^8)), typeIdInfo: (typeTestAssumeConstVCalls: ((vFuncId: (guid: 14923871475266172186, offset: 0)))), refs: (^2, ^3, ^12, ^7)))) ; guid = 15822663052811949562 +^11 = gv: (name: "_Znwm") ; guid = 16793709562209971782 +^12 = gv: (name: ".str.2", summaries: (variable: (module: ^0, flags: (linkage: private, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0, constant: 0)))) ; guid = 17414738078732285526 +^13 = blockcount: 0 Index: llvm/test/ThinLTO/X86/dup-cgprofile-flag.ll =================================================================== --- /dev/null +++ llvm/test/ThinLTO/X86/dup-cgprofile-flag.ll @@ -0,0 +1,74 @@ +; RUN: opt <%s -unified-lto -thinlto-bc -thinlto-split-lto-unit -o %t0 +; RUN: llvm-lto2 run %t0 --lto=full -o %t1 \ +; RUN: -r=%t0,freq,px \ +; RUN: -r=%t0,a,px \ +; RUN: -r=%t0,b,px \ +; RUN: -r=%t0,func1,px \ +; RUN: -r=%t0,func2,px \ +; RUN: -r=%t0,func3,px \ +; RUN: -r=%t0,func4,px \ +; RUN: -r=%t0,foo,px +; RUN: llvm-lto2 run %t0 --lto=thin -o %t1 \ +; RUN: -r=%t0,freq,px \ +; RUN: -r=%t0,a,px \ +; RUN: -r=%t0,b,px \ +; RUN: -r=%t0,func1,px \ +; RUN: -r=%t0,func2,px \ +; RUN: -r=%t0,func3,px \ +; RUN: -r=%t0,func4,px \ +; RUN: -r=%t0,foo,px +; RUN: not --crash llvm-lto2 run %t0 -o %t1 \ +; RUN: -r=%t0,freq,px \ +; RUN: -r=%t0,a,px \ +; RUN: -r=%t0,b,px \ +; RUN: -r=%t0,func1,px \ +; RUN: -r=%t0,func2,px \ +; RUN: -r=%t0,func3,px \ +; RUN: -r=%t0,func4,px \ +; RUN: -r=%t0,foo,px 2>&1 | FileCheck %s + +; CHECK: module flag identifiers must be unique + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +@foo = common global i32 ()* null, align 8 + +declare void @b() + +define void @a() !prof !1 { + call void @b() + ret void +} + +declare i32 @func1() +declare i32 @func2() +declare i32 @func3() +declare i32 @func4() + +define void @freq(i1 %cond) !prof !1 { + %tmp = load i32 ()*, i32 ()** @foo, align 8 + call i32 %tmp(), !prof !3 + br i1 %cond, label %A, label %B, !prof !2 +A: + call void @a(); + ret void +B: + call void @b(); + ret void +} + +!1 = !{!"function_entry_count", i64 32} +!2 = !{!"branch_weights", i32 5, i32 10} +!3 = !{!"VP", i32 0, i64 1600, i64 7651369219802541373, i64 1030, i64 -4377547752858689819, i64 410, i64 -6929281286627296573, i64 150, i64 -2545542355363006406, i64 10} + +!llvm.module.flags = !{!4} +!4 = !{i32 5, !"CG Profile", !5} +!5 = !{!6,!7,!8,!6,!10,!11,!12} +!6 = !{void ()* @a, void ()* @b, i64 32} +!7 = !{void (i1)* @freq, i32 ()* @func4, i64 1030} +!8 = !{void (i1)* @freq, i32 ()* @func2, i64 410} +!9 = !{void (i1)* @freq, i32 ()* @func3, i64 150} +!10 = !{void (i1)* @freq, i32 ()* @func1, i64 10} +!11 = !{void (i1)* @freq, void ()* @a, i64 11} +!12 = !{void (i1)* @freq, void ()* @b, i64 21} Index: llvm/test/Transforms/ThinLTOBitcodeWriter/split-unified.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/ThinLTOBitcodeWriter/split-unified.ll @@ -0,0 +1,25 @@ +; Generate bitcode files with summary, as well as minimized bitcode without +; the debug metadata for the thin link. +; RUN: opt -unified-lto -thinlto-bc -thin-link-bitcode-file=%t2 -o %t %s + +; RUN: llvm-modextract -b -n 0 -o %t0.bc %t +; RUN: not llvm-modextract -b -n 1 -o - %t 2>&1 | FileCheck --check-prefix=ERROR %s +; RUN: llvm-dis -o - %t0.bc | FileCheck --check-prefix=M0 %s +; RUN: llvm-bcanalyzer -dump %t0.bc | FileCheck --check-prefix=BCA0 %s + +; ERROR: llvm-modextract: error: module index out of range; bitcode file contains 1 module(s) + +; BCA0: UnifiedLTOMode("lto", cl::Optional, + cl::desc("Set LTO mode"), + cl::value_desc("mode")); + static cl::opt EnableFreestanding( "lto-freestanding", cl::desc("Enable Freestanding (disable builtins / TLI) during LTO"), @@ -319,7 +323,22 @@ HasErrors = true; }; - LTO Lto(std::move(Conf), std::move(Backend)); + LTO::LTOKind LTOMode = LTO::LTOK_Default; + + if (UnifiedLTOMode == "full") { + LTOMode = LTO::LTOK_UnifiedRegular; + } else if (UnifiedLTOMode == "thin") { + LTOMode = LTO::LTOK_UnifiedThin; + } else if (!UnifiedLTOMode.empty()) { + llvm::errs() << "invalid LTO mode\n"; + return 1; + } + + + Conf.UnifiedLTO = (LTOMode != LTO::LTOK_Default); + Conf.PTO.CallGraphProfile = !Conf.UnifiedLTO; + + LTO Lto(std::move(Conf), std::move(Backend), 1, LTOMode); for (std::string F : InputFilenames) { std::unique_ptr MB = check(MemoryBuffer::getFile(F), F); Index: llvm/tools/opt/NewPMDriver.h =================================================================== --- llvm/tools/opt/NewPMDriver.h +++ llvm/tools/opt/NewPMDriver.h @@ -75,7 +75,7 @@ bool ShouldPreserveAssemblyUseListOrder, bool ShouldPreserveBitcodeUseListOrder, bool EmitSummaryIndex, bool EmitModuleHash, - bool EnableDebugify); + bool EnableDebugify, bool UnifiedLTO = false); } // namespace llvm #endif Index: llvm/tools/opt/NewPMDriver.cpp =================================================================== --- llvm/tools/opt/NewPMDriver.cpp +++ llvm/tools/opt/NewPMDriver.cpp @@ -280,7 +280,7 @@ bool ShouldPreserveAssemblyUseListOrder, bool ShouldPreserveBitcodeUseListOrder, bool EmitSummaryIndex, bool EmitModuleHash, - bool EnableDebugify) { + bool EnableDebugify, bool UnifiedLTO) { bool VerifyEachPass = VK == VK_VerifyEachPass; Optional P; @@ -345,6 +345,7 @@ // to false above so we shouldn't necessarily need to check whether or not the // option has been enabled. PTO.LoopUnrolling = !DisableLoopUnrolling; + PTO.UnifiedLTO = UnifiedLTO; PassBuilder PB(TM, PTO, P, &PIC); registerEPCallbacks(PB); @@ -474,7 +475,8 @@ break; case OK_OutputThinLTOBitcode: MPM.addPass(ThinLTOBitcodeWriterPass( - Out->os(), ThinLTOLinkOut ? &ThinLTOLinkOut->os() : nullptr)); + Out->os(), ThinLTOLinkOut ? &ThinLTOLinkOut->os() : nullptr, + /*UseDistinctLTOPipelines=*/ !UnifiedLTO)); break; } Index: llvm/tools/opt/opt.cpp =================================================================== --- llvm/tools/opt/opt.cpp +++ llvm/tools/opt/opt.cpp @@ -117,6 +117,11 @@ SplitLTOUnit("thinlto-split-lto-unit", cl::desc("Enable splitting of a ThinLTO LTOUnit")); +static cl::opt + UnifiedLTO("unified-lto", + cl::desc("Use unified LTO piplines"), + cl::Hidden, cl::init(false)); + static cl::opt ThinLinkBitcodeFile( "thin-link-bitcode-file", cl::value_desc("filename"), cl::desc( @@ -750,8 +755,11 @@ if (CheckBitcodeOutputToConsole(Out->os())) NoOutput = true; - if (OutputThinLTOBC) + if (OutputThinLTOBC) { M->addModuleFlag(Module::Error, "EnableSplitLTOUnit", SplitLTOUnit); + if (UnifiedLTO) + M->addModuleFlag(Module::Error, "UnifiedLTO", 1); + } // Add an appropriate TargetLibraryInfo pass for the module's triple. TargetLibraryInfoImpl TLII(ModuleTriple); @@ -831,7 +839,7 @@ ThinLinkOut.get(), RemarksFile.get(), Pipeline, Passes, PluginList, OK, VK, PreserveAssemblyUseListOrder, PreserveBitcodeUseListOrder, EmitSummaryIndex, - EmitModuleHash, EnableDebugify) + EmitModuleHash, EnableDebugify, UnifiedLTO) ? 0 : 1; } @@ -1018,7 +1026,8 @@ Passes.add(createPrintModulePass(*OS, "", PreserveAssemblyUseListOrder)); } else if (OutputThinLTOBC) Passes.add(createWriteThinLTOBitcodePass( - *OS, ThinLinkOut ? &ThinLinkOut->os() : nullptr)); + *OS, ThinLinkOut ? &ThinLinkOut->os() : nullptr, + /*UnifiedLTO=*/ UnifiedLTO)); else Passes.add(createBitcodeWriterPass(*OS, PreserveBitcodeUseListOrder, EmitSummaryIndex, EmitModuleHash));