diff --git a/lld/COFF/Config.h b/lld/COFF/Config.h --- a/lld/COFF/Config.h +++ b/lld/COFF/Config.h @@ -162,6 +162,8 @@ // Used for /opt:lldlto=N unsigned ltoo = 2; + // Used for /opt:lldltocgo=N + std::optional ltocgo; // Used for /opt:lldltojobs=N std::string thinLTOJobs; diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -1771,6 +1771,11 @@ StringRef optLevel = s.substr(7); if (optLevel.getAsInteger(10, config->ltoo) || config->ltoo > 3) error("/opt:lldlto: invalid optimization level: " + optLevel); + } else if (s.startswith("lldltocgo=")) { + StringRef optLevel = s.substr(10); + config->ltocgo.emplace(); + if (optLevel.getAsInteger(10, *config->ltocgo) || *config->ltocgo > 3) + error("/opt:lldltocgo: invalid optimization level: " + optLevel); } else if (s.startswith("lldltojobs=")) { StringRef jobs = s.substr(11); if (!get_threadpool_strategy(jobs)) diff --git a/lld/COFF/LTO.cpp b/lld/COFF/LTO.cpp --- a/lld/COFF/LTO.cpp +++ b/lld/COFF/LTO.cpp @@ -88,7 +88,8 @@ c.OptLevel = ctx.config.ltoo; c.CPU = getCPUStr(); c.MAttrs = getMAttrs(); - c.CGOptLevel = args::getCGOptLevel(ctx.config.ltoo); + c.CGOptLevel = *CodeGenOpt::getLevel( + ctx.config.ltocgo.value_or(args::getCGOptLevel(ctx.config.ltoo))); c.AlwaysEmitRegularLTOObj = !ctx.config.ltoObjPath.empty(); c.DebugPassManager = ctx.config.ltoDebugPassManager; c.CSIRProfile = std::string(ctx.config.ltoCSProfileFile); diff --git a/lld/Common/Args.cpp b/lld/Common/Args.cpp --- a/lld/Common/Args.cpp +++ b/lld/Common/Args.cpp @@ -19,11 +19,8 @@ // TODO(sbc): Remove this once CGOptLevel can be set completely based on bitcode // function metadata. -CodeGenOpt::Level lld::args::getCGOptLevel(int optLevelLTO) { - if (optLevelLTO == 3) - return CodeGenOpt::Aggressive; - assert(optLevelLTO < 3); - return CodeGenOpt::Default; +int lld::args::getCGOptLevel(int optLevelLTO) { + return std::clamp(optLevelLTO, 2, 3); } static int64_t getInteger(opt::InputArgList &args, unsigned key, diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -318,6 +318,7 @@ uint64_t zStackSize; unsigned ltoPartitions; unsigned ltoo; + llvm::CodeGenOpt::Level ltocgo; unsigned optimize; StringRef thinLTOJobs; unsigned timeTraceGranularity; diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -1139,6 +1139,8 @@ args.hasFlag(OPT_lto_whole_program_visibility, OPT_no_lto_whole_program_visibility, false); config->ltoo = args::getInteger(args, OPT_lto_O, 2); + config->ltocgo = *CodeGenOpt::getLevel( + args::getInteger(args, OPT_lto_CGO, args::getCGOptLevel(config->ltoo))); config->ltoObjPath = args.getLastArgValue(OPT_lto_obj_path_eq); config->ltoPartitions = args::getInteger(args, OPT_lto_partitions, 1); config->ltoSampleProfile = args.getLastArgValue(OPT_lto_sample_profile); diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp --- a/lld/ELF/LTO.cpp +++ b/lld/ELF/LTO.cpp @@ -127,7 +127,7 @@ c.OptLevel = config->ltoo; c.CPU = getCPUStr(); c.MAttrs = getMAttrs(); - c.CGOptLevel = args::getCGOptLevel(config->ltoo); + c.CGOptLevel = config->ltocgo; c.PTO.LoopVectorization = c.OptLevel > 1; c.PTO.SLPVectorization = c.OptLevel > 1; diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -560,6 +560,8 @@ HelpText<"Passes to run during LTO">; def lto_O: JJ<"lto-O">, MetaVarName<"">, HelpText<"Optimization level for LTO">; +def lto_CGO: JJ<"lto-CGO">, MetaVarName<"">, + HelpText<"Codegen optimization level for LTO">; def lto_partitions: JJ<"lto-partitions=">, HelpText<"Number of LTO codegen partitions">; def lto_cs_profile_generate: FF<"lto-cs-profile-generate">, diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h --- a/lld/MachO/Config.h +++ b/lld/MachO/Config.h @@ -18,6 +18,7 @@ #include "llvm/ADT/StringSet.h" #include "llvm/BinaryFormat/MachO.h" #include "llvm/Support/CachePruning.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/GlobPattern.h" #include "llvm/Support/VersionTuple.h" #include "llvm/TextAPI/Architecture.h" @@ -165,6 +166,7 @@ llvm::StringRef thinLTOJobs; llvm::StringRef umbrella; uint32_t ltoo = 2; + llvm::CodeGenOpt::Level ltocgo; llvm::CachePruningPolicy thinLTOCachePolicy; llvm::StringRef thinLTOCacheDir; llvm::StringRef thinLTOIndexOnlyArg; diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -1561,6 +1561,8 @@ config->ltoo = args::getInteger(args, OPT_lto_O, 2); if (config->ltoo > 3) error("--lto-O: invalid optimization level: " + Twine(config->ltoo)); + config->ltocgo = *CodeGenOpt::getLevel( + args::getInteger(args, OPT_lto_CGO, args::getCGOptLevel(config->ltoo))); config->thinLTOCacheDir = args.getLastArgValue(OPT_cache_path_lto); config->thinLTOCachePolicy = getLTOCachePolicy(args); config->thinLTOEmitImportsFiles = args.hasArg(OPT_thinlto_emit_imports_files); diff --git a/lld/MachO/LTO.cpp b/lld/MachO/LTO.cpp --- a/lld/MachO/LTO.cpp +++ b/lld/MachO/LTO.cpp @@ -70,7 +70,7 @@ c.TimeTraceEnabled = config->timeTraceEnabled; c.TimeTraceGranularity = config->timeTraceGranularity; c.OptLevel = config->ltoo; - c.CGOptLevel = args::getCGOptLevel(config->ltoo); + c.CGOptLevel = config->ltocgo; if (config->saveTemps) checkError(c.addSaveTemps(config->outputFile.str() + ".", /*UseInputModulePath=*/true)); diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td --- a/lld/MachO/Options.td +++ b/lld/MachO/Options.td @@ -87,6 +87,10 @@ HelpText<"Set optimization level for LTO (default: 2)">, MetaVarName<"">, Group; +def lto_CGO: Joined<["--"], "lto-CGO">, + HelpText<"Set codegen optimization level for LTO (default: 2)">, + MetaVarName<"">, + Group; def thinlto_cache_policy_eq: Joined<["--"], "thinlto-cache-policy=">, HelpText<"Pruning policy for the ThinLTO cache">, Group; diff --git a/lld/include/lld/Common/Args.h b/lld/include/lld/Common/Args.h --- a/lld/include/lld/Common/Args.h +++ b/lld/include/lld/Common/Args.h @@ -23,7 +23,7 @@ namespace lld { namespace args { -llvm::CodeGenOpt::Level getCGOptLevel(int optLevelLTO); +int getCGOptLevel(int optLevelLTO); int64_t getInteger(llvm::opt::InputArgList &args, unsigned key, int64_t Default); diff --git a/lld/test/COFF/lto-cgo.ll b/lld/test/COFF/lto-cgo.ll new file mode 100644 --- /dev/null +++ b/lld/test/COFF/lto-cgo.ll @@ -0,0 +1,23 @@ +; REQUIRES: x86 + +; RUN: llvm-as %s -o %t.obj +; RUN: lld-link -opt:lldlto=0 %t.obj -dll -noentry -out:%t.dll -mllvm:-debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s +; RUN: lld-link -opt:lldltocgo=0 %t.obj -dll -noentry -out:%t.dll -mllvm:-debug-pass=Structure 2>&1 | FileCheck --check-prefix=NOOPT %s +; RUN: lld-link -opt:lldlto=3 %t.obj -dll -noentry -out:%t.dll -mllvm:-debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s +; RUN: lld-link -opt:lldltocgo=3 %t.obj -dll -noentry -out:%t.dll -mllvm:-debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s +; RUN: lld-link -opt:lldlto=0 -opt:lldltocgo=0 %t.obj -dll -noentry -out:%t.dll -mllvm:-debug-pass=Structure 2>&1 | FileCheck --check-prefix=NOOPT %s +; RUN: lld-link -opt:lldlto=3 -opt:lldltocgo=0 %t.obj -dll -noentry -out:%t.dll -mllvm:-debug-pass=Structure 2>&1 | FileCheck --check-prefix=NOOPT %s +; RUN: lld-link -opt:lldlto=0 -opt:lldltocgo=3 %t.obj -dll -noentry -out:%t.dll -mllvm:-debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s +; RUN: lld-link -opt:lldlto=3 -opt:lldltocgo=3 %t.obj -dll -noentry -out:%t.dll -mllvm:-debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s +; RUN: lld-link -opt:lldlto=0 -opt:lldltocgo=0 -opt:lldltocgo=2 %t.obj -dll -noentry -out:%t.dll -mllvm:-debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s + +; NOOPT: Fast Register Allocator +; OPT: Greedy Register Allocator + +target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-windows-msvc" + +define void @_start() { +entry: + ret void +} diff --git a/lld/test/ELF/lto/cgo.ll b/lld/test/ELF/lto/cgo.ll new file mode 100644 --- /dev/null +++ b/lld/test/ELF/lto/cgo.ll @@ -0,0 +1,23 @@ +; REQUIRES: x86 + +; RUN: llvm-as %s -o %t.o +; RUN: ld.lld --lto-O0 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s +; RUN: ld.lld --lto-CGO0 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=NOOPT %s +; RUN: ld.lld --lto-O3 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s +; RUN: ld.lld --lto-CGO3 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s +; RUN: ld.lld --lto-O0 --lto-CGO0 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=NOOPT %s +; RUN: ld.lld --lto-O3 --lto-CGO0 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=NOOPT %s +; RUN: ld.lld --lto-O0 --lto-CGO3 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s +; RUN: ld.lld --lto-O3 --lto-CGO3 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s +; RUN: ld.lld --lto-O0 --lto-CGO0 --lto-CGO2 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s + +; NOOPT: Fast Register Allocator +; OPT: Greedy Register Allocator + +target triple = "x86_64-unknown-linux-gnu" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + +define void @_start() { +entry: + ret void +} diff --git a/lld/test/MachO/lto-cgo.ll b/lld/test/MachO/lto-cgo.ll new file mode 100644 --- /dev/null +++ b/lld/test/MachO/lto-cgo.ll @@ -0,0 +1,23 @@ +; REQUIRES: x86 + +; RUN: llvm-as %s -o %t.o +; RUN: %lld -dylib --lto-O0 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s +; RUN: %lld -dylib --lto-CGO0 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=NOOPT %s +; RUN: %lld -dylib --lto-O3 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s +; RUN: %lld -dylib --lto-CGO3 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s +; RUN: %lld -dylib --lto-O0 --lto-CGO0 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=NOOPT %s +; RUN: %lld -dylib --lto-O3 --lto-CGO0 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=NOOPT %s +; RUN: %lld -dylib --lto-O0 --lto-CGO3 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s +; RUN: %lld -dylib --lto-O3 --lto-CGO3 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s +; RUN: %lld -dylib --lto-O0 --lto-CGO0 --lto-CGO2 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s + +; NOOPT: Fast Register Allocator +; OPT: Greedy Register Allocator + +target triple = "x86_64-apple-darwin" +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + +define void @_start() { +entry: + ret void +} diff --git a/lld/test/wasm/lto/cgo.ll b/lld/test/wasm/lto/cgo.ll new file mode 100644 --- /dev/null +++ b/lld/test/wasm/lto/cgo.ll @@ -0,0 +1,23 @@ +; REQUIRES: x86 + +; RUN: llvm-as %s -o %t.o +; RUN: wasm-ld --lto-O0 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s +; RUN: wasm-ld --lto-CGO0 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=NOOPT %s +; RUN: wasm-ld --lto-O3 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s +; RUN: wasm-ld --lto-CGO3 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s +; RUN: wasm-ld --lto-O0 --lto-CGO0 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=NOOPT %s +; RUN: wasm-ld --lto-O3 --lto-CGO0 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=NOOPT %s +; RUN: wasm-ld --lto-O0 --lto-CGO3 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s +; RUN: wasm-ld --lto-O3 --lto-CGO3 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s +; RUN: wasm-ld --lto-O0 --lto-CGO0 --lto-CGO2 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s + +; NOOPT-NOT: WebAssembly Optimize Returned +; OPT: WebAssembly Optimize Returned + +target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown-wasm" + +define void @_start() { +entry: + ret void +} diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h --- a/lld/wasm/Config.h +++ b/lld/wasm/Config.h @@ -14,6 +14,7 @@ #include "llvm/ADT/StringSet.h" #include "llvm/BinaryFormat/Wasm.h" #include "llvm/Support/CachePruning.h" +#include "llvm/Support/CodeGen.h" #include namespace lld { @@ -63,6 +64,7 @@ uint64_t zStackSize; unsigned ltoPartitions; unsigned ltoo; + llvm::CodeGenOpt::Level ltocgo; unsigned optimize; llvm::StringRef thinLTOJobs; bool ltoDebugPassManager; diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -424,6 +424,8 @@ config->importTable = args.hasArg(OPT_import_table); config->importUndefined = args.hasArg(OPT_import_undefined); config->ltoo = args::getInteger(args, OPT_lto_O, 2); + config->ltocgo = *CodeGenOpt::getLevel( + args::getInteger(args, OPT_lto_CGO, args::getCGOptLevel(config->ltoo))); config->ltoPartitions = args::getInteger(args, OPT_lto_partitions, 1); config->ltoDebugPassManager = args.hasArg(OPT_lto_debug_pass_manager); config->mapFile = args.getLastArgValue(OPT_Map); diff --git a/lld/wasm/LTO.cpp b/lld/wasm/LTO.cpp --- a/lld/wasm/LTO.cpp +++ b/lld/wasm/LTO.cpp @@ -51,7 +51,7 @@ c.DiagHandler = diagnosticHandler; c.OptLevel = config->ltoo; c.MAttrs = getMAttrs(); - c.CGOptLevel = args::getCGOptLevel(config->ltoo); + c.CGOptLevel = config->ltocgo; c.DebugPassManager = config->ltoDebugPassManager; if (config->relocatable) diff --git a/lld/wasm/Options.td b/lld/wasm/Options.td --- a/lld/wasm/Options.td +++ b/lld/wasm/Options.td @@ -256,6 +256,8 @@ // LTO-related options. def lto_O: JJ<"lto-O">, MetaVarName<"">, HelpText<"Optimization level for LTO">; +def lto_CGO: JJ<"lto-CGO">, MetaVarName<"">, + HelpText<"Codegen optimization level for LTO">; def lto_partitions: JJ<"lto-partitions=">, HelpText<"Number of LTO codegen partitions">; def disable_verify: F<"disable-verify">;