Index: clang/include/clang/Basic/CodeGenOptions.def =================================================================== --- clang/include/clang/Basic/CodeGenOptions.def +++ clang/include/clang/Basic/CodeGenOptions.def @@ -166,6 +166,7 @@ /// CFI and traditional whole program /// devirtualization that require whole /// program IR support. +CODEGENOPT(UnifiedLTO, 1, 0) ///< Use the unified LTO pipeline. CODEGENOPT(IncrementalLinkerCompatible, 1, 0) ///< Emit an object file which can ///< be used with an incremental ///< linker. Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -2245,6 +2245,11 @@ Alias, AliasArgs<["full"]>, HelpText<"Enable LTO in 'full' mode">; def flto : Flag<["-"], "flto">, Flags<[CoreOption, CC1Option, FC1Option, FlangOption]>, Group, Alias, AliasArgs<["full"]>, HelpText<"Enable LTO in 'full' mode">; +defm unified_lto : BoolFOption<"unified-lto", + CodeGenOpts<"UnifiedLTO">, DefaultFalse, + PosFlag, + NegFlag, + BothFlags<[CC1Option], "">>; def fno_lto : Flag<["-"], "fno-lto">, Flags<[CoreOption, CC1Option]>, Group, HelpText<"Disable LTO mode (default)">; def foffload_lto_EQ : Joined<["-"], "foffload-lto=">, Flags<[CoreOption]>, Group, Index: clang/lib/CodeGen/BackendUtil.cpp =================================================================== --- clang/lib/CodeGen/BackendUtil.cpp +++ clang/lib/CodeGen/BackendUtil.cpp @@ -822,6 +822,7 @@ // Only enable CGProfilePass when using integrated assembler, since // non-integrated assemblers don't recognize .cgprofile section. PTO.CallGraphProfile = !CodeGenOpts.DisableIntegratedAS; + PTO.UnifiedLTO = CodeGenOpts.UnifiedLTO; LoopAnalysisManager LAM; FunctionAnalysisManager FAM; @@ -992,7 +993,10 @@ }); if (IsThinLTO) { - MPM = PB.buildThinLTOPreLinkDefaultPipeline(Level); + if (CodeGenOpts.UnifiedLTO) + MPM = PB.buildLTOPreLinkDefaultPipeline(Level); + else + MPM = PB.buildThinLTOPreLinkDefaultPipeline(Level); } else if (IsLTO) { MPM = PB.buildLTOPreLinkDefaultPipeline(Level); } else { @@ -1022,8 +1026,11 @@ if (!ThinLinkOS) return; } + if (CodeGenOpts.UnifiedLTO) + TheModule->addModuleFlag(Module::Error, "UnifiedLTO", uint32_t(1)); MPM.addPass(ThinLTOBitcodeWriterPass(*OS, ThinLinkOS ? &ThinLinkOS->os() - : nullptr)); + : nullptr, + CodeGenOpts.UnifiedLTO)); } else { MPM.addPass(PrintModulePass(*OS, "", CodeGenOpts.EmitLLVMUseLists, /*EmitLTOSummary=*/true)); @@ -1034,11 +1041,14 @@ // targets bool EmitLTOSummary = shouldEmitRegularLTOSummary(); if (EmitLTOSummary) { - if (!TheModule->getModuleFlag("ThinLTO")) + if (!TheModule->getModuleFlag("ThinLTO") && !CodeGenOpts.UnifiedLTO) TheModule->addModuleFlag(Module::Error, "ThinLTO", uint32_t(0)); + if (CodeGenOpts.UnifiedLTO) + TheModule->addModuleFlag(Module::Error, "UnifiedLTO", uint32_t(1)); if (!TheModule->getModuleFlag("EnableSplitLTOUnit")) TheModule->addModuleFlag(Module::Error, "EnableSplitLTOUnit", - uint32_t(1)); + !CodeGenOpts.UnifiedLTO ? + uint32_t(1) : !CodeGenOpts.UnifiedLTO); } if (Action == Backend_EmitBC) MPM.addPass(BitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists, Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -4683,6 +4683,12 @@ // Select the appropriate action. RewriteKind rewriteKind = RK_None; + bool UnifiedLTO = + Args.hasFlag(options::OPT_funified_lto, + options::OPT_fno_unified_lto, false); + if (UnifiedLTO) + CmdArgs.push_back("-funified-lto"); + // If CollectArgsForIntegratedAssembler() isn't called below, claim the args // it claims when not running an assembler. Otherwise, clang would emit // "argument unused" warnings for assembler flags when e.g. adding "-E" to @@ -4827,7 +4833,9 @@ assert(LTOMode == LTOK_Full || LTOMode == LTOK_Thin); CmdArgs.push_back(Args.MakeArgString( Twine("-flto=") + (LTOMode == LTOK_Thin ? "thin" : "full"))); - CmdArgs.push_back("-flto-unit"); + if ((RawTriple.getOS() != llvm::Triple::PS4) || + (D.getLTOMode() == LTOK_Full) || !UnifiedLTO) + CmdArgs.push_back("-flto-unit"); } } } @@ -7216,17 +7224,21 @@ } if (WholeProgramVTables) { - // Propagate -fwhole-program-vtables if this is an LTO compile. - if (IsUsingLTO) - CmdArgs.push_back("-fwhole-program-vtables"); + bool IsPS4 = getToolChain().getTriple().isPS4(); + // Check if we passed LTO options but they were suppressed because this is a // device offloading action, or we passed device offload LTO options which // were suppressed because this is not the device offload action. // Otherwise, issue an error. - else if (!D.isUsingLTO(!IsDeviceOffloadAction)) + if ((!IsUsingLTO && !D.isUsingLTO(!IsDeviceOffloadAction)) || + (IsPS4 && !UnifiedLTO && (D.getLTOMode() != LTOK_Full))) D.Diag(diag::err_drv_argument_only_allowed_with) << "-fwhole-program-vtables" - << "-flto"; + << ((IsPS4 && !UnifiedLTO) ? "-flto=full" : "-flto"); + + // Propagate -fwhole-program-vtables if this is an LTO compile. + if (IsUsingLTO) + CmdArgs.push_back("-fwhole-program-vtables"); } bool DefaultsSplitLTOUnit = Index: clang/lib/Driver/ToolChains/PS4CPU.cpp =================================================================== --- clang/lib/Driver/ToolChains/PS4CPU.cpp +++ clang/lib/Driver/ToolChains/PS4CPU.cpp @@ -204,6 +204,13 @@ if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) TC.addSanitizerArgs(Args, CmdArgs, "-l", ""); + if (D.isUsingLTO() && Args.hasArg(options::OPT_funified_lto)) { + if (D.getLTOMode() == LTOK_Thin) + CmdArgs.push_back("--lto=thin"); + else if (D.getLTOMode() == LTOK_Full) + CmdArgs.push_back("--lto=full"); + } + Args.AddAllArgs(CmdArgs, options::OPT_L); Args.AddAllArgs(CmdArgs, options::OPT_T_Group); Args.AddAllArgs(CmdArgs, options::OPT_e); Index: clang/lib/Frontend/CompilerInvocation.cpp =================================================================== --- clang/lib/Frontend/CompilerInvocation.cpp +++ clang/lib/Frontend/CompilerInvocation.cpp @@ -1776,6 +1776,12 @@ else if (S != "full") Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << S; } + + if (Opts.UnifiedLTO && Opts.PrepareForLTO) { + Opts.PrepareForThinLTO = true; + Opts.PrepareForLTO = false; + } + if (Arg *A = Args.getLastArg(OPT_fthinlto_index_EQ)) { if (IK.getLanguage() != Language::LLVM_IR) Diags.Report(diag::err_drv_argument_only_allowed_with) Index: clang/test/CodeGen/asan-unified-lto.ll =================================================================== --- /dev/null +++ clang/test/CodeGen/asan-unified-lto.ll @@ -0,0 +1,18 @@ +; Verify that in the cases of explict distinct LTO piplines, +; explicit unified LTO pipelines, and the default LTO pipeline, +; there is no crash and the anonoymous global is named +; as expected. + +; RUN: %clang_cc1 -emit-llvm-bc -O1 -flto -fsanitize=address -o - -x ir < %s | llvm-dis -o - | FileCheck %s +; RUN: %clang_cc1 -emit-llvm-bc -O1 -flto -funified-lto -fsanitize=address -o - -x ir < %s | llvm-dis -o - | FileCheck %s +; CHECK: @anon.3ee0898e5200a57350fed5485ae5d237 + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +@.str = private unnamed_addr constant [5 x i8] c"none\00", align 1 + +define i8* @f() { + %ptr = getelementptr inbounds [5 x i8], [5 x i8]* @.str, i32 0, i32 0 + ret i8* %ptr +} Index: clang/test/CodeGen/emit-summary-index.c =================================================================== --- clang/test/CodeGen/emit-summary-index.c +++ clang/test/CodeGen/emit-summary-index.c @@ -14,4 +14,9 @@ // RUN: %clang_cc1 -flto -triple x86_64-pc-linux-gnu -emit-llvm-bc -disable-llvm-passes < %s -o %t.bc // RUN: %clang_cc1 -flto -triple x86_64-pc-linux-gnu -emit-llvm-bc -x ir < %t.bc | llvm-bcanalyzer -dump | FileCheck --check-prefix=LTOINDEX %s +// RUN: %clang_cc1 -flto=thin -funified-lto -emit-llvm-bc < %s | llvm-bcanalyzer -dump | FileCheck --check-prefix=UNITHIN %s +// RUN: %clang_cc1 -flto -funified-lto -emit-llvm-bc < %s | llvm-bcanalyzer -dump | FileCheck --check-prefix=UNITHIN %s + +// UNITHIN: &1 | FileCheck --check-prefix=UNIT %s // RUN: %clang -target x86_64-apple-darwin13.3.0 -### %s -flto=full 2>&1 | FileCheck --check-prefix=UNIT %s // RUN: %clang -target x86_64-apple-darwin13.3.0 -### %s -flto=thin 2>&1 | FileCheck --check-prefix=UNIT %s -// RUN: %clang -target x86_64-scei-ps4 -### %s -flto=full 2>&1 | FileCheck --check-prefix=UNIT %s -// RUN: %clang -target x86_64-scei-ps4 -### %s -flto=thin 2>&1 | FileCheck --check-prefix=UNIT %s +// RUN: %clang -target x86_64-scei-ps4 -### %s -flto=full -funified-lto 2>&1 | FileCheck --check-prefix=UNIT %s +// RUN: %clang -target x86_64-scei-ps4 -### %s -flto=thin -funified-lto 2>&1 | FileCheck --check-prefix=NOUNIT %s +// RUN: %clang -target x86_64-unknown-linux -### %s -flto=full -funified-lto 2>&1 | FileCheck --check-prefix=UNIT %s +// RUN: %clang -target x86_64-unknown-linux -### %s -flto=thin -funified-lto 2>&1 | FileCheck --check-prefix=UNIT %s // UNIT: "-flto-unit" +// NOUNIT-NOT: "-flto-unit" Index: clang/test/Driver/unified-whole-program-vtables.c =================================================================== --- /dev/null +++ clang/test/Driver/unified-whole-program-vtables.c @@ -0,0 +1,3 @@ +// RUN: %clang -target x86_64-pc-linux-gnu -fwhole-program-vtables -funified-lto -### %s 2>&1 | FileCheck %s +// RUN: %clang -target x86_64-pc-linux-gnu -fwhole-program-vtables -fno-unified-lto -### %s 2>&1 | FileCheck %s +// CHECK: invalid argument '-fwhole-program-vtables' only allowed with '-flto' Index: lld/ELF/Config.h =================================================================== --- lld/ELF/Config.h +++ lld/ELF/Config.h @@ -18,6 +18,7 @@ #include "llvm/ADT/StringSet.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/Option/ArgList.h" +#include "llvm/LTO/LTO.h" #include "llvm/Support/CachePruning.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/Compiler.h" @@ -405,6 +406,8 @@ // this means to map the primary and thread stacks as PROT_MTE. Note: This is // not supported on Android 11 & 12. bool androidMemtagStack; + // When using a unified pre-link LTO pipeline, specify the backend LTO mode. + llvm::lto::LTO::LTOKind forceLTOMode = llvm::lto::LTO::LTOK_Default; unsigned threadCount; }; Index: lld/ELF/Driver.cpp =================================================================== --- lld/ELF/Driver.cpp +++ lld/ELF/Driver.cpp @@ -1422,6 +1422,19 @@ config->threadCount = parallel::strategy.compute_thread_count(); + config->forceLTOMode = llvm::lto::LTO::LTOK_Default; + if (auto *arg = args.getLastArg(OPT_lto_type_eq)) { + StringRef s = arg->getValue(); + if (s == "thin") + config->forceLTOMode = llvm::lto::LTO::LTOK_UnifiedThin; + else if (s == "full") + config->forceLTOMode = llvm::lto::LTO::LTOK_UnifiedRegular; + else if (s == "default") + config->forceLTOMode = llvm::lto::LTO::LTOK_Default; + else + error("unknown LTO mode: " + s); + } + // --threads= takes a positive integer and provides the default value for // --thinlto-jobs=. If unspecified, cap the number of threads since // overhead outweighs optimization for used parallel algorithms for the Index: lld/ELF/LTO.cpp =================================================================== --- lld/ELF/LTO.cpp +++ lld/ELF/LTO.cpp @@ -208,7 +208,8 @@ } ltoObj = std::make_unique(createConfig(), backend, - config->ltoPartitions); + config->ltoPartitions, + config->forceLTOMode); // Initialize usedStartStop. if (ctx.bitcodeFiles.empty()) Index: lld/ELF/Options.td =================================================================== --- lld/ELF/Options.td +++ lld/ELF/Options.td @@ -552,6 +552,9 @@ def: Flag<["-"], "V">, Alias, HelpText<"Alias for -v">; // LTO-related options. + +def lto_type_eq: J<"lto=">, HelpText<"Set LTO backend">, + MetaVarName<"[full,thin]">; def lto_aa_pipeline: JJ<"lto-aa-pipeline=">, HelpText<"AA pipeline to run during LTO. Used in conjunction with -lto-newpm-passes">; def lto_debug_pass_manager: FF<"lto-debug-pass-manager">, Index: llvm/test/LTO/Resolution/X86/unified-lto-check.ll =================================================================== --- /dev/null +++ llvm/test/LTO/Resolution/X86/unified-lto-check.ll @@ -0,0 +1,46 @@ +; Test to ensure that the Unified LTO flag is set properly in the summary, and +; that we correctly silently handle linking bitcode files with different values +; of this flag. + +; Linking bitcode both without UnifiedLTO set should work +; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t1 %s +; RUN: llvm-bcanalyzer -dump %t1 | FileCheck %s --check-prefix=NOUNIFIEDLTO +; RUN: llvm-dis -o - %t1 | FileCheck %s --check-prefix=NOUNIFIEDLTOFLAG +; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t2 %s +; RUN: llvm-bcanalyzer -dump %t2 | FileCheck %s --check-prefix=NOUNIFIEDLTO +; RUN: llvm-dis -o - %t2 | FileCheck %s --check-prefix=NOUNIFIEDLTOFLAG +; RUN: llvm-lto2 run -o %t3 %t1 %t2 + +; Linking bitcode with different values of UnifiedLTO should fail +; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t1 %s +; RUN: llvm-bcanalyzer -dump %t1 | FileCheck %s --check-prefix=NOUNIFIEDLTO +; RUN: llvm-dis -o - %t1 | FileCheck %s --check-prefix=NOUNIFIEDLTOFLAG +; RUN: opt -unified-lto -thinlto-bc -thinlto-split-lto-unit -o %t2 %s +; RUN: llvm-bcanalyzer -dump %t2 | FileCheck %s --check-prefix=UNIFIEDLTO +; RUN: llvm-dis -o - %t2 | FileCheck %s --check-prefix=UNIFIEDLTOFLAG +; RUN: not llvm-lto2 run --lto=thin -o %t3 %t1 %t2 2>&1 | \ +; RUN: FileCheck --allow-empty %s --check-prefix UNIFIEDERR + +; Linking bitcode with identical Unified LTO flags should succeed +; RUN: opt -unified-lto -thinlto-bc -thinlto-split-lto-unit -o %t1 %s +; RUN: llvm-bcanalyzer -dump %t1 | FileCheck %s --check-prefix=UNIFIEDLTO +; RUN: llvm-dis -o - %t1 | FileCheck %s --check-prefix=UNIFIEDLTOFLAG +; RUN: opt -unified-lto -thinlto-bc -thinlto-split-lto-unit -o %t2 %s +; RUN: llvm-bcanalyzer -dump %t2 | FileCheck %s --check-prefix=UNIFIEDLTO +; RUN: llvm-dis -o - %t2 | FileCheck %s --check-prefix=UNIFIEDLTOFLAG +; RUN: llvm-lto2 run --lto=thin -o %t3 %t1 %t2 | \ +; RUN: FileCheck --allow-empty %s --check-prefix NOUNIFIEDERR + +; UNIFIEDERR: unified LTO compilation must use compatible bitcode modules +; NOUNIFIEDERR-NOT: unified LTO compilation must use compatible bitcode modules + +; The flag should be set when UnifiedLTO is enabled +; UNIFIEDLTO: +; NOUNIFIEDLTO: + +; Check that the corresponding module flag is set when expected. +; UNIFIEDLTOFLAG: !{i32 1, !"UnifiedLTO", i32 1} +; NOUNIFIEDLTOFLAG-NOT: !{i32 1, !"UnifiedLTO", i32 1} + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" Index: llvm/test/LTO/X86/cfi-func-remove.ll =================================================================== --- /dev/null +++ llvm/test/LTO/X86/cfi-func-remove.ll @@ -0,0 +1,10 @@ +; RUN: opt -thinlto-bc -thinlto-split-lto-unit -unified-lto <%s -o %t0 +; RUN: llvm-lto2 run -o %t1 --lto=full --save-temps %t0 +; RUN: llvm-dis <%t1.0.0.preopt.bc 2>&1 | FileCheck %s --implicit-check-not warning: +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +!cfi.functions = !{!2} +; CHECK-NOT: cfi.functions + +!2 = !{!"main", i8 0} Index: llvm/test/LTO/X86/unified-cfi.ll =================================================================== --- /dev/null +++ llvm/test/LTO/X86/unified-cfi.ll @@ -0,0 +1,98 @@ +; Test for the expected CFI codegen in a module with CFI metadata. +; RUN: opt -unified-lto -thinlto-bc -o %t0.o %s +; RUN: llvm-lto --exported-symbol=main -filetype=asm -o - %t0.o | FileCheck %s + +; CHECK-LABEL: main + +; CHECK: jbe +; CHECK-NEXT: ud2 +; CHECK-NEXT: ud2 + +; ModuleID = 'llvm/test/LTO/X86/unified-cfi.ll' +source_filename = "cfi.cpp" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-scei-ps4" + +@func = hidden global [3 x i32 ()*] [i32 ()* @_Z1av, i32 ()* @_Z1bv, i32 ()* @_Z1cv], align 16 +@.src = private unnamed_addr constant [8 x i8] c"cfi.cpp\00", align 1 +@anon.9260195284c792ab5c6ef4d97bfcf95d.0 = private unnamed_addr constant { i16, i16, [9 x i8] } { i16 -1, i16 0, [9 x i8] c"'int ()'\00" } + +; Function Attrs: noinline nounwind optnone sspstrong uwtable +define hidden i32 @_Z1av() #0 !type !3 !type !4 { +entry: + ret i32 1 +} + +; Function Attrs: noinline nounwind optnone sspstrong uwtable +define hidden i32 @_Z1bv() #0 !type !3 !type !4 { +entry: + ret i32 2 +} + +; Function Attrs: noinline nounwind optnone sspstrong uwtable +define hidden i32 @_Z1cv() #0 !type !3 !type !4 { +entry: + ret i32 3 +} + +; Function Attrs: noinline norecurse nounwind optnone sspstrong uwtable +define hidden i32 @main(i32 %argc, i8** %argv) #1 !type !5 !type !6 { +entry: + %retval = alloca i32, align 4 + %argc.addr = alloca i32, align 4 + %argv.addr = alloca i8**, align 8 + store i32 0, i32* %retval, align 4 + store i32 %argc, i32* %argc.addr, align 4 + store i8** %argv, i8*** %argv.addr, align 8 + %0 = load i32, i32* %argc.addr, align 4 + %idxprom = sext i32 %0 to i64 + %arrayidx = getelementptr inbounds [3 x i32 ()*], [3 x i32 ()*]* @func, i64 0, i64 %idxprom + %1 = load i32 ()*, i32 ()** %arrayidx, align 8 + %2 = bitcast i32 ()* %1 to i8*, !nosanitize !7 + %3 = call i1 @llvm.type.test(i8* %2, metadata !"_ZTSFivE"), !nosanitize !7 + br i1 %3, label %cont, label %trap, !nosanitize !7 + +trap: ; preds = %entry + call void @llvm.trap() #4, !nosanitize !7 + unreachable, !nosanitize !7 + +cont: ; preds = %entry + %call = call i32 %1() + ret i32 %call +} + +; Function Attrs: nofree nosync nounwind readnone speculatable willreturn +declare i1 @llvm.type.test(i8*, metadata) #2 + +; Function Attrs: cold noreturn nounwind +declare void @llvm.trap() #3 + +attributes #0 = { noinline nounwind optnone sspstrong uwtable } +attributes #1 = { noinline norecurse nounwind optnone sspstrong uwtable } +attributes #2 = { nofree nosync nounwind readnone speculatable willreturn } +attributes #3 = { cold noreturn nounwind } +attributes #4 = { noreturn nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 2} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{!"clang version 7.0.0 (PS4 clang version 99.99.0.1562 432a534f checking)"} +!3 = !{i64 0, !"_ZTSFivE"} +!4 = !{i64 0, !"_ZTSFivE.generalized"} +!5 = !{i64 0, !"_ZTSFiiPPcE"} +!6 = !{i64 0, !"_ZTSFiiPvE.generalized"} +!7 = !{} + +^0 = module: (path: "llvm/test/LTO/X86/unified-cfi.ll", hash: (0, 0, 0, 0, 0)) +^1 = gv: (name: "llvm.type.test") ; guid = 608142985856744218 +^2 = gv: (name: "_Z1cv", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 1, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 1))) ; guid = 1031113446561889624 +^3 = gv: (name: "_Z1bv", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 1, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 1))) ; guid = 2000451273547961259 +^4 = gv: (name: "_Z1av", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 1, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 1))) ; guid = 3456846378323757990 +^5 = gv: (name: ".src", summaries: (variable: (module: ^0, flags: (linkage: private, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0, constant: 0)))) ; guid = 5614330533059031665 +^6 = gv: (name: "llvm.trap") ; guid = 6116349651215144041 +^7 = gv: (name: "func", summaries: (variable: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0, constant: 0), refs: (^4, ^3, ^2)))) ; guid = 7289175272376759421 +^8 = gv: (name: "anon.9260195284c792ab5c6ef4d97bfcf95d.0", summaries: (variable: (module: ^0, flags: (linkage: private, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0, constant: 0)))) ; guid = 10197562899942851386 +^9 = gv: (name: "main", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 1, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 17, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0), typeIdInfo: (typeTests: (194679795792225349)), refs: (^7)))) ; guid = 15822663052811949562 +^10 = blockcount: 0 Index: llvm/test/LTO/X86/unified-internalize.ll =================================================================== --- /dev/null +++ llvm/test/LTO/X86/unified-internalize.ll @@ -0,0 +1,50 @@ +; RUN: opt <%s -unified-lto -thinlto-split-lto-unit -thinlto-bc -o %t.bc + +; Test internalization during unified LTO. This makes sure internalization does +; happen in runRegularLTO(). +; RUN: llvm-lto2 run %t.bc -o %t.o -save-temps --lto=full \ +; RUN: -r=%t.bc,salad,pxl \ +; RUN: -r=%t.bc,balsamic,pl \ +; RUN: -r=%t.bc,thousandisland,pl \ +; RUN: -r=%t.bc,main,pxl \ +; RUN: -r %t.bc,ranch,px \ +; RUN: -r %t.bc,egg, \ +; RUN: -r %t.bc,bar,px +; RUN: llvm-dis < %t.o.0.2.internalize.bc | FileCheck %s + +; CHECK: @llvm.used = appending global {{.*}} @bar +; CHECK: define dso_local dllexport void @thousandisland +; CHECK: define dso_local void @salad +; CHECK: define internal void @balsamic +; CHECK: define dso_local void @main +; CHECK: define available_externally void @egg() + +target triple = "x86_64-scei-ps4" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @salad() { + call void @balsamic() + ret void +} +define void @balsamic() { + ret void +} +define dllexport void @thousandisland() { + ret void +} + +define void @main() { + ret void +} + +define void ()* @ranch() { + ret void ()* @egg +} + +define available_externally void @egg() { + ret void +} + +%"foo.1" = type { i8, i8 } +declare dso_local i32 @bar(%"foo.1"* nocapture readnone %this) local_unnamed_addr +@llvm.used = appending global [2 x i8*] [i8* bitcast (i32 (%"foo.1"*)* @bar to i8*), i8* bitcast (void ()* @thousandisland to i8*)], section "llvm.metadata" Index: llvm/test/LTO/X86/whole-program-no-crash.ll =================================================================== --- /dev/null +++ llvm/test/LTO/X86/whole-program-no-crash.ll @@ -0,0 +1,103 @@ +; Run the ThinLTO and LTO backends on a module with +; devirtualizaiton metadata. In previous versions of the compiler, +; this crashed. +; RUN: opt -unified-lto -thinlto-bc <%s -o %t0.o +; RUN: llvm-lto --thinlto-action=run %t0.o -thinlto-save-objects=%t +; RUN: llvm-lto %t0.o + +; ModuleID = 'llvm/test/LTO/X86/whole-program-no-crash.ll +source_filename = "main.cpp" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-scei-ps4" + +%struct.Square = type { %struct.Shape, double } +%struct.Shape = type { i32 (...)** } + +@.str = private unnamed_addr constant [21 x i8] c"Area of a circle: %e\00", align 1 +@.str.1 = private unnamed_addr constant [21 x i8] c"Area of a square: %e\00", align 1 +@.str.2 = private unnamed_addr constant [30 x i8] c"Area of a circle, squared: %f\00", align 1 +@.str.3 = private unnamed_addr constant [30 x i8] c"Area of a square, squared: %f\00", align 1 + +; Function Attrs: norecurse nounwind uwtable +define hidden i32 @main(i32 %argc, i8** nocapture readnone %argv) local_unnamed_addr #0 { +entry: + %call = tail call i8* @_Znwm(i64 16) #6 + %0 = bitcast i8* %call to %struct.Square* + tail call void @_ZN6SquareC1Ed(%struct.Square* nonnull %0, double 1.000000e+00) #3 + %1 = bitcast i8* %call to %struct.Shape* + %call1 = tail call i8* @_Znwm(i64 16) #6 + %2 = bitcast i8* %call1 to %struct.Square* + tail call void @_ZN6SquareC1Ed(%struct.Square* nonnull %2, double 1.000000e+00) #3 + %3 = bitcast i8* %call1 to %struct.Shape* + %4 = bitcast i8* %call to double (%struct.Shape*)*** + %vtable = load double (%struct.Shape*)**, double (%struct.Shape*)*** %4, align 8, !tbaa !3 + %5 = bitcast double (%struct.Shape*)** %vtable to i8* + %6 = tail call i1 @llvm.type.test(i8* %5, metadata !"_ZTS5Shape") + tail call void @llvm.assume(i1 %6) + %7 = load double (%struct.Shape*)*, double (%struct.Shape*)** %vtable, align 8 + %call2 = tail call double %7(%struct.Shape* nonnull %1) #3 + %call3 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str, i64 0, i64 0), double %call2) + %8 = bitcast i8* %call1 to double (%struct.Shape*)*** + %vtable4 = load double (%struct.Shape*)**, double (%struct.Shape*)*** %8, align 8, !tbaa !3 + %9 = bitcast double (%struct.Shape*)** %vtable4 to i8* + %10 = tail call i1 @llvm.type.test(i8* %9, metadata !"_ZTS5Shape") + tail call void @llvm.assume(i1 %10) + %11 = load double (%struct.Shape*)*, double (%struct.Shape*)** %vtable4, align 8 + %call6 = tail call double %11(%struct.Shape* nonnull %3) #3 + %call7 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.1, i64 0, i64 0), double %call6) + %call8 = tail call double @_Z14circle_squaredP5Shape(%struct.Shape* nonnull %1) #3 + %call9 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([30 x i8], [30 x i8]* @.str.2, i64 0, i64 0), double %call8) + %call10 = tail call double @_Z14square_squaredP5Shape(%struct.Shape* nonnull %3) #3 + %call11 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([30 x i8], [30 x i8]* @.str.3, i64 0, i64 0), double %call10) + ret i32 0 +} + +; Function Attrs: nobuiltin +declare noalias nonnull i8* @_Znwm(i64) local_unnamed_addr #1 + +declare void @_ZN6SquareC1Ed(%struct.Square*, double) unnamed_addr + +; Function Attrs: nounwind +declare i32 @printf(i8* nocapture readonly, ...) local_unnamed_addr #3 + +; Function Attrs: nofree nosync nounwind readnone speculatable willreturn +declare i1 @llvm.type.test(i8*, metadata) #4 + +; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn +declare void @llvm.assume(i1 noundef) #5 + +declare double @_Z14circle_squaredP5Shape(%struct.Shape*) local_unnamed_addr + +declare double @_Z14square_squaredP5Shape(%struct.Shape*) local_unnamed_addr + +attributes #0 = { norecurse nounwind uwtable } +attributes #1 = { nobuiltin } +attributes #3 = { nounwind } +attributes #4 = { nofree nosync nounwind readnone speculatable willreturn } +attributes #5 = { inaccessiblememonly nofree nosync nounwind willreturn } +attributes #6 = { builtin nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 2} +!1 = !{i32 7, !"PIC Level", i32 2} +!2 = !{!"clang version 7.0.0 (PS4 clang version 99.99.0.1564 e05e1b5f checking)"} +!3 = !{!4, !4, i64 0} +!4 = !{!"vtable pointer", !5, i64 0} +!5 = !{!"Simple C++ TBAA"} + +^0 = module: (path: "llvm/test/LTO/X86/whole-program-no-crash.ll", hash: (160140095, 1084170952, 2125434145, 3248440305, 919813895)) +^1 = gv: (name: "llvm.type.test") ; guid = 608142985856744218 +^2 = gv: (name: ".str", summaries: (variable: (module: ^0, flags: (linkage: private, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0, constant: 0)))) ; guid = 3057614271122621510 +^3 = gv: (name: ".str.1", summaries: (variable: (module: ^0, flags: (linkage: private, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0, constant: 0)))) ; guid = 5124566073124437459 +^4 = gv: (name: "_Z14circle_squaredP5Shape") ; guid = 6033955522051173057 +^5 = gv: (name: "llvm.assume") ; guid = 6385187066495850096 +^6 = gv: (name: "printf") ; guid = 7383291119112528047 +^7 = gv: (name: ".str.3", summaries: (variable: (module: ^0, flags: (linkage: private, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0, constant: 0)))) ; guid = 8135577886398900316 +^8 = gv: (name: "_Z14square_squaredP5Shape") ; guid = 8213923296236276854 +^9 = gv: (name: "_ZN6SquareC1Ed") ; guid = 10727975616611545044 +^10 = gv: (name: "main", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 29, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0), calls: ((callee: ^11), (callee: ^9), (callee: ^6), (callee: ^4), (callee: ^8)), typeIdInfo: (typeTestAssumeConstVCalls: ((vFuncId: (guid: 14923871475266172186, offset: 0)))), refs: (^2, ^3, ^12, ^7)))) ; guid = 15822663052811949562 +^11 = gv: (name: "_Znwm") ; guid = 16793709562209971782 +^12 = gv: (name: ".str.2", summaries: (variable: (module: ^0, flags: (linkage: private, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), varFlags: (readonly: 0, writeonly: 0, constant: 0)))) ; guid = 17414738078732285526 +^13 = blockcount: 0 Index: llvm/test/ThinLTO/X86/dup-cgprofile-flag.ll =================================================================== --- /dev/null +++ llvm/test/ThinLTO/X86/dup-cgprofile-flag.ll @@ -0,0 +1,74 @@ +; RUN: opt <%s -unified-lto -thinlto-bc -thinlto-split-lto-unit -o %t0 +; RUN: llvm-lto2 run %t0 --lto=full -o %t1 \ +; RUN: -r=%t0,freq,px \ +; RUN: -r=%t0,a,px \ +; RUN: -r=%t0,b,px \ +; RUN: -r=%t0,func1,px \ +; RUN: -r=%t0,func2,px \ +; RUN: -r=%t0,func3,px \ +; RUN: -r=%t0,func4,px \ +; RUN: -r=%t0,foo,px +; RUN: llvm-lto2 run %t0 --lto=thin -o %t1 \ +; RUN: -r=%t0,freq,px \ +; RUN: -r=%t0,a,px \ +; RUN: -r=%t0,b,px \ +; RUN: -r=%t0,func1,px \ +; RUN: -r=%t0,func2,px \ +; RUN: -r=%t0,func3,px \ +; RUN: -r=%t0,func4,px \ +; RUN: -r=%t0,foo,px +; RUN: not --crash llvm-lto2 run %t0 -o %t1 \ +; RUN: -r=%t0,freq,px \ +; RUN: -r=%t0,a,px \ +; RUN: -r=%t0,b,px \ +; RUN: -r=%t0,func1,px \ +; RUN: -r=%t0,func2,px \ +; RUN: -r=%t0,func3,px \ +; RUN: -r=%t0,func4,px \ +; RUN: -r=%t0,foo,px 2>&1 | FileCheck %s + +; CHECK: module flag identifiers must be unique + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +@foo = common global i32 ()* null, align 8 + +declare void @b() + +define void @a() !prof !1 { + call void @b() + ret void +} + +declare i32 @func1() +declare i32 @func2() +declare i32 @func3() +declare i32 @func4() + +define void @freq(i1 %cond) !prof !1 { + %tmp = load i32 ()*, i32 ()** @foo, align 8 + call i32 %tmp(), !prof !3 + br i1 %cond, label %A, label %B, !prof !2 +A: + call void @a(); + ret void +B: + call void @b(); + ret void +} + +!1 = !{!"function_entry_count", i64 32} +!2 = !{!"branch_weights", i32 5, i32 10} +!3 = !{!"VP", i32 0, i64 1600, i64 7651369219802541373, i64 1030, i64 -4377547752858689819, i64 410, i64 -6929281286627296573, i64 150, i64 -2545542355363006406, i64 10} + +!llvm.module.flags = !{!4} +!4 = !{i32 5, !"CG Profile", !5} +!5 = !{!6,!7,!8,!6,!10,!11,!12} +!6 = !{void ()* @a, void ()* @b, i64 32} +!7 = !{void (i1)* @freq, i32 ()* @func4, i64 1030} +!8 = !{void (i1)* @freq, i32 ()* @func2, i64 410} +!9 = !{void (i1)* @freq, i32 ()* @func3, i64 150} +!10 = !{void (i1)* @freq, i32 ()* @func1, i64 10} +!11 = !{void (i1)* @freq, void ()* @a, i64 11} +!12 = !{void (i1)* @freq, void ()* @b, i64 21} Index: llvm/test/Transforms/ThinLTOBitcodeWriter/split-unified.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/ThinLTOBitcodeWriter/split-unified.ll @@ -0,0 +1,25 @@ +; Generate bitcode files with summary, as well as minimized bitcode without +; the debug metadata for the thin link. +; RUN: opt -unified-lto -thinlto-bc -thin-link-bitcode-file=%t2 -o %t %s + +; RUN: llvm-modextract -b -n 0 -o %t0.bc %t +; RUN: not llvm-modextract -b -n 1 -o - %t 2>&1 | FileCheck --check-prefix=ERROR %s +; RUN: llvm-dis -o - %t0.bc | FileCheck --check-prefix=M0 %s +; RUN: llvm-bcanalyzer -dump %t0.bc | FileCheck --check-prefix=BCA0 %s + +; ERROR: llvm-modextract: error: module index out of range; bitcode file contains 1 module(s) + +; BCA0: