Index: clang/include/clang/Basic/CodeGenOptions.def =================================================================== --- clang/include/clang/Basic/CodeGenOptions.def +++ clang/include/clang/Basic/CodeGenOptions.def @@ -165,6 +165,7 @@ /// CFI and traditional whole program /// devirtualization that require whole /// program IR support. +CODEGENOPT(UnifiedLTO, 1, 0) ///< Use the unified LTO pipeline. CODEGENOPT(IncrementalLinkerCompatible, 1, 0) ///< Emit an object file which can ///< be used with an incremental ///< linker. Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -2244,6 +2244,11 @@ Alias, AliasArgs<["full"]>, HelpText<"Enable LTO in 'full' mode">; def flto : Flag<["-"], "flto">, Flags<[CoreOption, CC1Option, FC1Option, FlangOption]>, Group, Alias, AliasArgs<["full"]>, HelpText<"Enable LTO in 'full' mode">; +defm unified_lto : BoolFOption<"unified-lto", + CodeGenOpts<"UnifiedLTO">, DefaultFalse, + PosFlag, + NegFlag, + BothFlags<[CC1Option], "">>; def fno_lto : Flag<["-"], "fno-lto">, Flags<[CoreOption, CC1Option]>, Group, HelpText<"Disable LTO mode (default)">; def foffload_lto_EQ : Joined<["-"], "foffload-lto=">, Flags<[CoreOption]>, Group, Index: clang/lib/CodeGen/BackendUtil.cpp =================================================================== --- clang/lib/CodeGen/BackendUtil.cpp +++ clang/lib/CodeGen/BackendUtil.cpp @@ -823,6 +823,7 @@ // Only enable CGProfilePass when using integrated assembler, since // non-integrated assemblers don't recognize .cgprofile section. PTO.CallGraphProfile = !CodeGenOpts.DisableIntegratedAS; + PTO.UnifiedLTO = CodeGenOpts.UnifiedLTO; LoopAnalysisManager LAM; FunctionAnalysisManager FAM; @@ -995,7 +996,10 @@ if (IsThinLTO) { MPM = PB.buildThinLTOPreLinkDefaultPipeline(Level); } else if (IsLTO) { - MPM = PB.buildLTOPreLinkDefaultPipeline(Level); + if (CodeGenOpts.UnifiedLTO) + MPM = PB.buildThinLTOPreLinkDefaultPipeline(Level); + else + MPM = PB.buildLTOPreLinkDefaultPipeline(Level); } else { MPM = PB.buildPerModuleDefaultPipeline(Level); } @@ -1023,8 +1027,11 @@ if (!ThinLinkOS) return; } - MPM.addPass(ThinLTOBitcodeWriterPass(*OS, ThinLinkOS ? &ThinLinkOS->os() - : nullptr)); + if (CodeGenOpts.UnifiedLTO) + TheModule->addModuleFlag(Module::Error, "UnifiedLTO", uint32_t(1)); + MPM.addPass(ThinLTOBitcodeWriterPass( + *OS, ThinLinkOS ? &ThinLinkOS->os() : nullptr, + CodeGenOpts.UnifiedLTO)); } else { MPM.addPass(PrintModulePass(*OS, "", CodeGenOpts.EmitLLVMUseLists, /*EmitLTOSummary=*/true)); @@ -1035,11 +1042,14 @@ // targets bool EmitLTOSummary = shouldEmitRegularLTOSummary(); if (EmitLTOSummary) { - if (!TheModule->getModuleFlag("ThinLTO")) + if (!TheModule->getModuleFlag("ThinLTO") && !CodeGenOpts.UnifiedLTO) TheModule->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0)); + if (CodeGenOpts.UnifiedLTO) + TheModule->addModuleFlag(Module::Error, "UnifiedLTO", uint32_t(1)); if (!TheModule->getModuleFlag("EnableSplitLTOUnit")) - TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit", - uint32_t(1)); + TheModule->addModuleFlag( + Module::Error, "EnableSplitLTOUnit", + !CodeGenOpts.UnifiedLTO ? uint32_t(1) : !CodeGenOpts.UnifiedLTO); } if (Action == Backend_EmitBC) MPM.addPass(BitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists, Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -4721,6 +4721,11 @@ // Select the appropriate action. RewriteKind rewriteKind = RK_None; + bool UnifiedLTO = Args.hasFlag(options::OPT_funified_lto, + options::OPT_fno_unified_lto, false); + if (UnifiedLTO) + CmdArgs.push_back("-funified-lto"); + // If CollectArgsForIntegratedAssembler() isn't called below, claim the args // it claims when not running an assembler. Otherwise, clang would emit // "argument unused" warnings for assembler flags when e.g. adding "-E" to @@ -4865,7 +4870,9 @@ assert(LTOMode == LTOK_Full || LTOMode == LTOK_Thin); CmdArgs.push_back(Args.MakeArgString( Twine("-flto=") + (LTOMode == LTOK_Thin ? "thin" : "full"))); - CmdArgs.push_back("-flto-unit"); + if ((RawTriple.getOS() != llvm::Triple::PS4) || + (D.getLTOMode() == LTOK_Full) || !UnifiedLTO) + CmdArgs.push_back("-flto-unit"); } } } @@ -7259,17 +7266,21 @@ } if (WholeProgramVTables) { - // Propagate -fwhole-program-vtables if this is an LTO compile. - if (IsUsingLTO) - CmdArgs.push_back("-fwhole-program-vtables"); + bool IsPS4 = getToolChain().getTriple().isPS4(); + // Check if we passed LTO options but they were suppressed because this is a // device offloading action, or we passed device offload LTO options which // were suppressed because this is not the device offload action. // Otherwise, issue an error. - else if (!D.isUsingLTO(!IsDeviceOffloadAction)) + if ((!IsUsingLTO && !D.isUsingLTO(!IsDeviceOffloadAction)) || + (IsPS4 && !UnifiedLTO && (D.getLTOMode() != LTOK_Full))) D.Diag(diag::err_drv_argument_only_allowed_with) << "-fwhole-program-vtables" - << "-flto"; + << ((IsPS4 && !UnifiedLTO) ? "-flto=full" : "-flto"); + + // Propagate -fwhole-program-vtables if this is an LTO compile. + if (IsUsingLTO) + CmdArgs.push_back("-fwhole-program-vtables"); } bool DefaultsSplitLTOUnit = Index: clang/lib/Driver/ToolChains/PS4CPU.cpp =================================================================== --- clang/lib/Driver/ToolChains/PS4CPU.cpp +++ clang/lib/Driver/ToolChains/PS4CPU.cpp @@ -204,6 +204,13 @@ if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) TC.addSanitizerArgs(Args, CmdArgs, "-l", ""); + if (D.isUsingLTO() && Args.hasArg(options::OPT_funified_lto)) { + if (D.getLTOMode() == LTOK_Thin) + CmdArgs.push_back("--lto=thin"); + else if (D.getLTOMode() == LTOK_Full) + CmdArgs.push_back("--lto=full"); + } + Args.AddAllArgs(CmdArgs, options::OPT_L); Args.AddAllArgs(CmdArgs, options::OPT_T_Group); Args.AddAllArgs(CmdArgs, options::OPT_e); Index: clang/test/CodeGen/asan-unified-lto.ll =================================================================== --- /dev/null +++ clang/test/CodeGen/asan-unified-lto.ll @@ -0,0 +1,18 @@ +; Verify that in the cases of explict distinct LTO piplines, +; explicit unified LTO pipelines, and the default LTO pipeline, +; there is no crash and the anonoymous global is named +; as expected. + +; RUN: %clang_cc1 -emit-llvm-bc -O1 -flto -fsanitize=address -o - -x ir < %s | llvm-dis -o - | FileCheck %s +; RUN: %clang_cc1 -emit-llvm-bc -O1 -flto -funified-lto -fsanitize=address -o - -x ir < %s | llvm-dis -o - | FileCheck %s +; CHECK: @anon.3ee0898e5200a57350fed5485ae5d237 + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +@.str = private unnamed_addr constant [5 x i8] c"none\00", align 1 + +define i8* @f() { + %ptr = getelementptr inbounds [5 x i8], [5 x i8]* @.str, i32 0, i32 0 + ret i8* %ptr +} Index: clang/test/CodeGen/emit-summary-index.c =================================================================== --- clang/test/CodeGen/emit-summary-index.c +++ clang/test/CodeGen/emit-summary-index.c @@ -14,4 +14,9 @@ // RUN: %clang_cc1 -flto -triple x86_64-pc-linux-gnu -emit-llvm-bc -disable-llvm-passes < %s -o %t.bc // RUN: %clang_cc1 -flto -triple x86_64-pc-linux-gnu -emit-llvm-bc -x ir < %t.bc | llvm-bcanalyzer -dump | FileCheck --check-prefix=LTOINDEX %s +// RUN: %clang_cc1 -flto=thin -funified-lto -emit-llvm-bc < %s | llvm-bcanalyzer -dump | FileCheck --check-prefix=UNITHIN %s +// RUN: %clang_cc1 -flto -funified-lto -emit-llvm-bc < %s | llvm-bcanalyzer -dump | FileCheck --check-prefix=UNITHIN %s + +// UNITHIN: &1 | FileCheck --check-prefix=UNIT %s // RUN: %clang -target x86_64-apple-darwin13.3.0 -### %s -flto=full 2>&1 | FileCheck --check-prefix=UNIT %s // RUN: %clang -target x86_64-apple-darwin13.3.0 -### %s -flto=thin 2>&1 | FileCheck --check-prefix=UNIT %s -// RUN: %clang -target x86_64-scei-ps4 -### %s -flto=full 2>&1 | FileCheck --check-prefix=UNIT %s -// RUN: %clang -target x86_64-scei-ps4 -### %s -flto=thin 2>&1 | FileCheck --check-prefix=UNIT %s +// RUN: %clang -target x86_64-scei-ps4 -### %s -flto=full -funified-lto 2>&1 | FileCheck --check-prefix=UNIT %s +// RUN: %clang -target x86_64-scei-ps4 -### %s -flto=thin -funified-lto 2>&1 | FileCheck --check-prefix=NOUNIT %s +// RUN: %clang -target x86_64-unknown-linux -### %s -flto=full -funified-lto 2>&1 | FileCheck --check-prefix=UNIT %s +// RUN: %clang -target x86_64-unknown-linux -### %s -flto=thin -funified-lto 2>&1 | FileCheck --check-prefix=UNIT %s // UNIT: "-flto-unit" +// NOUNIT-NOT: "-flto-unit" Index: clang/test/Driver/unified-whole-program-vtables.c =================================================================== --- /dev/null +++ clang/test/Driver/unified-whole-program-vtables.c @@ -0,0 +1,3 @@ +// RUN: %clang -target x86_64-pc-linux-gnu -fwhole-program-vtables -funified-lto -### %s 2>&1 | FileCheck %s +// RUN: %clang -target x86_64-pc-linux-gnu -fwhole-program-vtables -fno-unified-lto -### %s 2>&1 | FileCheck %s +// CHECK: invalid argument '-fwhole-program-vtables' only allowed with '-flto' Index: lld/ELF/Config.h =================================================================== --- lld/ELF/Config.h +++ lld/ELF/Config.h @@ -17,6 +17,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" #include "llvm/BinaryFormat/ELF.h" +#include "llvm/LTO/LTO.h" #include "llvm/Option/ArgList.h" #include "llvm/Support/CachePruning.h" #include "llvm/Support/CodeGen.h" @@ -405,6 +406,8 @@ // this means to map the primary and thread stacks as PROT_MTE. Note: This is // not supported on Android 11 & 12. bool androidMemtagStack; + // When using a unified pre-link LTO pipeline, specify the backend LTO mode. + llvm::lto::LTO::LTOKind forceLTOMode = llvm::lto::LTO::LTOK_Default; unsigned threadCount; Index: lld/ELF/Driver.cpp =================================================================== --- lld/ELF/Driver.cpp +++ lld/ELF/Driver.cpp @@ -1454,6 +1454,21 @@ config->mllvmOpts.emplace_back(arg->getValue()); } + config->threadCount = parallel::strategy.compute_thread_count(); + + config->forceLTOMode = llvm::lto::LTO::LTOK_Default; + if (auto *arg = args.getLastArg(OPT_lto_type_eq)) { + StringRef s = arg->getValue(); + if (s == "thin") + config->forceLTOMode = llvm::lto::LTO::LTOK_UnifiedThin; + else if (s == "full") + config->forceLTOMode = llvm::lto::LTO::LTOK_UnifiedRegular; + else if (s == "default") + config->forceLTOMode = llvm::lto::LTO::LTOK_Default; + else + error("unknown LTO mode: " + s); + } + // --threads= takes a positive integer and provides the default value for // --thinlto-jobs=. If unspecified, cap the number of threads since // overhead outweighs optimization for used parallel algorithms for the Index: lld/ELF/LTO.cpp =================================================================== --- lld/ELF/LTO.cpp +++ lld/ELF/LTO.cpp @@ -206,8 +206,8 @@ config->thinLTOEmitImportsFiles); } - ltoObj = std::make_unique(createConfig(), backend, - config->ltoPartitions); + ltoObj = std::make_unique( + createConfig(), backend, config->ltoPartitions, config->forceLTOMode); // Initialize usedStartStop. if (ctx.bitcodeFiles.empty()) Index: lld/ELF/Options.td =================================================================== --- lld/ELF/Options.td +++ lld/ELF/Options.td @@ -560,6 +560,9 @@ def: Flag<["-"], "V">, Alias, HelpText<"Alias for -v">; // LTO-related options. + +def lto_type_eq: J<"lto=">, HelpText<"Set LTO backend">, + MetaVarName<"[full,thin]">; def lto_aa_pipeline: JJ<"lto-aa-pipeline=">, HelpText<"AA pipeline to run during LTO. Used in conjunction with -lto-newpm-passes">; def lto_debug_pass_manager: FF<"lto-debug-pass-manager">,