diff --git a/lld/COFF/Config.h b/lld/COFF/Config.h --- a/lld/COFF/Config.h +++ b/lld/COFF/Config.h @@ -162,6 +162,8 @@ // Used for /opt:lldlto=N unsigned ltoo = 2; + // Used for /opt:lldltocgo=N + std::optional ltocgo; // Used for /opt:lldltojobs=N std::string thinLTOJobs; diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -1771,6 +1771,11 @@ StringRef optLevel = s.substr(7); if (optLevel.getAsInteger(10, config->ltoo) || config->ltoo > 3) error("/opt:lldlto: invalid optimization level: " + optLevel); + } else if (s.startswith("lldltocgo=")) { + StringRef optLevel = s.substr(10); + config->ltocgo.emplace(); + if (optLevel.getAsInteger(10, *config->ltocgo) || *config->ltocgo > 3) + error("/opt:lldltocgo: invalid optimization level: " + optLevel); } else if (s.startswith("lldltojobs=")) { StringRef jobs = s.substr(11); if (!get_threadpool_strategy(jobs)) diff --git a/lld/COFF/LTO.cpp b/lld/COFF/LTO.cpp --- a/lld/COFF/LTO.cpp +++ b/lld/COFF/LTO.cpp @@ -88,7 +88,8 @@ c.OptLevel = ctx.config.ltoo; c.CPU = getCPUStr(); c.MAttrs = getMAttrs(); - c.CGOptLevel = args::getCGOptLevel(ctx.config.ltoo); + c.CGOptLevel = + *CodeGenOpt::getLevel(ctx.config.ltocgo.value_or(ctx.config.ltoo)); c.AlwaysEmitRegularLTOObj = !ctx.config.ltoObjPath.empty(); c.DebugPassManager = ctx.config.ltoDebugPassManager; c.CSIRProfile = std::string(ctx.config.ltoCSProfileFile); diff --git a/lld/Common/Args.cpp b/lld/Common/Args.cpp --- a/lld/Common/Args.cpp +++ b/lld/Common/Args.cpp @@ -17,15 +17,6 @@ using namespace llvm; using namespace lld; -// TODO(sbc): Remove this once CGOptLevel can be set completely based on bitcode -// function metadata. -CodeGenOpt::Level lld::args::getCGOptLevel(int optLevelLTO) { - if (optLevelLTO == 3) - return CodeGenOpt::Aggressive; - assert(optLevelLTO < 3); - return CodeGenOpt::Default; -} - static int64_t getInteger(opt::InputArgList &args, unsigned key, int64_t Default, unsigned base) { auto *a = args.getLastArg(key); diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -318,6 +318,7 @@ uint64_t zStackSize; unsigned ltoPartitions; unsigned ltoo; + llvm::CodeGenOpt::Level ltocgo; unsigned optimize; StringRef thinLTOJobs; unsigned timeTraceGranularity; diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -1139,6 +1139,8 @@ args.hasFlag(OPT_lto_whole_program_visibility, OPT_no_lto_whole_program_visibility, false); config->ltoo = args::getInteger(args, OPT_lto_O, 2); + config->ltocgo = + *CodeGenOpt::getLevel(args::getInteger(args, OPT_lto_CGO, config->ltoo)); config->ltoObjPath = args.getLastArgValue(OPT_lto_obj_path_eq); config->ltoPartitions = args::getInteger(args, OPT_lto_partitions, 1); config->ltoSampleProfile = args.getLastArgValue(OPT_lto_sample_profile); diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp --- a/lld/ELF/LTO.cpp +++ b/lld/ELF/LTO.cpp @@ -127,7 +127,7 @@ c.OptLevel = config->ltoo; c.CPU = getCPUStr(); c.MAttrs = getMAttrs(); - c.CGOptLevel = args::getCGOptLevel(config->ltoo); + c.CGOptLevel = config->ltocgo; c.PTO.LoopVectorization = c.OptLevel > 1; c.PTO.SLPVectorization = c.OptLevel > 1; diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -560,6 +560,8 @@ HelpText<"Passes to run during LTO">; def lto_O: JJ<"lto-O">, MetaVarName<"">, HelpText<"Optimization level for LTO">; +def lto_CGO: JJ<"lto-CGO">, MetaVarName<"">, + HelpText<"Codegen optimization level for LTO">; def lto_partitions: JJ<"lto-partitions=">, HelpText<"Number of LTO codegen partitions">; def lto_cs_profile_generate: FF<"lto-cs-profile-generate">, diff --git a/lld/MachO/Arch/ARM.cpp b/lld/MachO/Arch/ARM.cpp --- a/lld/MachO/Arch/ARM.cpp +++ b/lld/MachO/Arch/ARM.cpp @@ -29,7 +29,7 @@ int64_t getEmbeddedAddend(MemoryBufferRef, uint64_t offset, const relocation_info) const override; - void relocateOne(uint8_t *loc, const Reloc &, uint64_t va, + void relocateOne(uint8_t *loc, const macho::Reloc &, uint64_t va, uint64_t pc) const override; void writeStub(uint8_t *buf, const Symbol &, uint64_t) const override; @@ -45,7 +45,7 @@ void relaxGotLoad(uint8_t *loc, uint8_t type) const override; uint64_t getPageSize() const override { return 4 * 1024; } - void handleDtraceReloc(const Symbol *sym, const Reloc &r, + void handleDtraceReloc(const Symbol *sym, const macho::Reloc &r, uint8_t *loc) const override; }; } // namespace @@ -101,7 +101,7 @@ using Cond = Bitfield::Element; using Imm24 = Bitfield::Element; -void ARM::relocateOne(uint8_t *loc, const Reloc &r, uint64_t value, +void ARM::relocateOne(uint8_t *loc, const macho::Reloc &r, uint64_t value, uint64_t pc) const { switch (r.type) { case ARM_RELOC_BR24: { @@ -180,7 +180,7 @@ return &t; } -void ARM::handleDtraceReloc(const Symbol *sym, const Reloc &r, +void ARM::handleDtraceReloc(const Symbol *sym, const macho::Reloc &r, uint8_t *loc) const { if (config->outputType == MH_OBJECT) return; diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h --- a/lld/MachO/Config.h +++ b/lld/MachO/Config.h @@ -18,6 +18,7 @@ #include "llvm/ADT/StringSet.h" #include "llvm/BinaryFormat/MachO.h" #include "llvm/Support/CachePruning.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/GlobPattern.h" #include "llvm/Support/VersionTuple.h" #include "llvm/TextAPI/Architecture.h" @@ -165,6 +166,7 @@ llvm::StringRef thinLTOJobs; llvm::StringRef umbrella; uint32_t ltoo = 2; + llvm::CodeGenOpt::Level ltocgo; llvm::CachePruningPolicy thinLTOCachePolicy; llvm::StringRef thinLTOCacheDir; llvm::StringRef thinLTOIndexOnlyArg; diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -1561,6 +1561,8 @@ config->ltoo = args::getInteger(args, OPT_lto_O, 2); if (config->ltoo > 3) error("--lto-O: invalid optimization level: " + Twine(config->ltoo)); + config->ltocgo = + *CodeGenOpt::getLevel(args::getInteger(args, OPT_lto_CGO, config->ltoo)); config->thinLTOCacheDir = args.getLastArgValue(OPT_cache_path_lto); config->thinLTOCachePolicy = getLTOCachePolicy(args); config->thinLTOEmitImportsFiles = args.hasArg(OPT_thinlto_emit_imports_files); diff --git a/lld/MachO/EhFrame.cpp b/lld/MachO/EhFrame.cpp --- a/lld/MachO/EhFrame.cpp +++ b/lld/MachO/EhFrame.cpp @@ -109,16 +109,16 @@ static void createSubtraction(PointerUnion a, PointerUnion b, uint64_t off, uint8_t length, - SmallVectorImpl *newRelocs) { + SmallVectorImpl *newRelocs) { auto subtrahend = a; auto minuend = b; if (Invert) std::swap(subtrahend, minuend); assert(subtrahend.is()); - Reloc subtrahendReloc(target->subtractorRelocType, /*pcrel=*/false, length, - off, /*addend=*/0, subtrahend); - Reloc minuendReloc(target->unsignedRelocType, /*pcrel=*/false, length, off, - (Invert ? 1 : -1) * off, minuend); + macho::Reloc subtrahendReloc(target->subtractorRelocType, /*pcrel=*/false, + length, off, /*addend=*/0, subtrahend); + macho::Reloc minuendReloc(target->unsignedRelocType, /*pcrel=*/false, length, + off, (Invert ? 1 : -1) * off, minuend); newRelocs->push_back(subtrahendReloc); newRelocs->push_back(minuendReloc); } diff --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp --- a/lld/MachO/ICF.cpp +++ b/lld/MachO/ICF.cpp @@ -105,7 +105,7 @@ return false; if (ia->relocs.size() != ib->relocs.size()) return false; - auto f = [](const Reloc &ra, const Reloc &rb) { + auto f = [](const macho::Reloc &ra, const macho::Reloc &rb) { if (ra.type != rb.type) return false; if (ra.pcrel != rb.pcrel) @@ -178,7 +178,7 @@ if (verboseDiagnostics) ++equalsVariableCount; assert(ia->relocs.size() == ib->relocs.size()); - auto f = [this](const Reloc &ra, const Reloc &rb) { + auto f = [this](const macho::Reloc &ra, const macho::Reloc &rb) { // We already filtered out mismatching values/addends in equalsConstant. if (ra.referent == rb.referent) return true; @@ -286,7 +286,7 @@ for (icfPass = 0; icfPass < 2; ++icfPass) { parallelForEach(icfInputs, [&](ConcatInputSection *isec) { uint32_t hash = isec->icfEqClass[icfPass % 2]; - for (const Reloc &r : isec->relocs) { + for (const macho::Reloc &r : isec->relocs) { if (auto *sym = r.referent.dyn_cast()) { if (auto *defined = dyn_cast(sym)) { if (defined->isec) { @@ -386,7 +386,7 @@ const InputSection *isec = addrSigSection->subsections[0].isec; - for (const Reloc &r : isec->relocs) { + for (const macho::Reloc &r : isec->relocs) { if (auto *sym = r.referent.dyn_cast()) markSymAsAddrSig(sym); else @@ -442,7 +442,7 @@ // We have to do this copying serially as the BumpPtrAllocator is not // thread-safe. FIXME: Make a thread-safe allocator. MutableArrayRef copy = isec->data.copy(bAlloc()); - for (const Reloc &r : isec->relocs) + for (const macho::Reloc &r : isec->relocs) target->relocateOne(copy.data() + r.offset, r, /*va=*/0, /*relocVA=*/0); isec->data = copy; diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp --- a/lld/MachO/InputSection.cpp +++ b/lld/MachO/InputSection.cpp @@ -31,8 +31,8 @@ // Verify ConcatInputSection's size on 64-bit builds. The size of std::vector // can differ based on STL debug levels (e.g. iterator debugging on MSVC's STL), // so account for that. -static_assert(sizeof(void *) != 8 || - sizeof(ConcatInputSection) == sizeof(std::vector) + 88, +static_assert(sizeof(void *) != 8 || sizeof(ConcatInputSection) == + sizeof(std::vector) + 88, "Try to minimize ConcatInputSection's size, we create many " "instances of it"); @@ -166,7 +166,7 @@ memcpy(buf, data.data(), data.size()); for (size_t i = 0; i < relocs.size(); i++) { - const Reloc &r = relocs[i]; + const macho::Reloc &r = relocs[i]; uint8_t *loc = buf + r.offset; uint64_t referentVA = 0; @@ -174,7 +174,7 @@ target->hasAttr(r.type, RelocAttrBits::UNSIGNED); if (target->hasAttr(r.type, RelocAttrBits::SUBTRAHEND)) { const Symbol *fromSym = r.referent.get(); - const Reloc &minuend = relocs[++i]; + const macho::Reloc &minuend = relocs[++i]; uint64_t minuendVA; if (const Symbol *toSym = minuend.referent.dyn_cast()) minuendVA = toSym->getVA() + minuend.addend; diff --git a/lld/MachO/LTO.cpp b/lld/MachO/LTO.cpp --- a/lld/MachO/LTO.cpp +++ b/lld/MachO/LTO.cpp @@ -70,7 +70,7 @@ c.TimeTraceEnabled = config->timeTraceEnabled; c.TimeTraceGranularity = config->timeTraceGranularity; c.OptLevel = config->ltoo; - c.CGOptLevel = args::getCGOptLevel(config->ltoo); + c.CGOptLevel = config->ltocgo; if (config->saveTemps) checkError(c.addSaveTemps(config->outputFile.str() + ".", /*UseInputModulePath=*/true)); diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td --- a/lld/MachO/Options.td +++ b/lld/MachO/Options.td @@ -85,6 +85,10 @@ Group; def lto_O: Joined<["--"], "lto-O">, HelpText<"Set optimization level for LTO (default: 2)">, + MetaVarName<"">, + Group; +def lto_CGO: Joined<["--"], "lto-CGO">, + HelpText<"Set codegen optimization level for LTO (default: 2)">, MetaVarName<"">, Group; def thinlto_cache_policy_eq: Joined<["--"], "thinlto-cache-policy=">, diff --git a/lld/MachO/Relocations.cpp b/lld/MachO/Relocations.cpp --- a/lld/MachO/Relocations.cpp +++ b/lld/MachO/Relocations.cpp @@ -18,11 +18,12 @@ using namespace lld; using namespace lld::macho; -static_assert(sizeof(void *) != 8 || sizeof(Reloc) == 24, +static_assert(sizeof(void *) != 8 || sizeof(macho::Reloc) == 24, "Try to minimize Reloc's size; we create many instances"); bool macho::validateSymbolRelocation(const Symbol *sym, - const InputSection *isec, const Reloc &r) { + const InputSection *isec, + const macho::Reloc &r) { const RelocAttrs &relocAttrs = target->getRelocAttrs(r.type); bool valid = true; auto message = [&](const Twine &diagnostic) { @@ -82,7 +83,7 @@ return nullptr; } -void macho::reportRangeError(void *loc, const Reloc &r, const Twine &v, +void macho::reportRangeError(void *loc, const macho::Reloc &r, const Twine &v, uint8_t bits, int64_t min, uint64_t max) { std::string hint; uint64_t off = reinterpret_cast(loc) - in.bufferStart; diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp --- a/lld/MachO/UnwindInfoSection.cpp +++ b/lld/MachO/UnwindInfoSection.cpp @@ -249,7 +249,7 @@ // that are referenced from many places, at least some of them likely // live, it wouldn't reduce number of got entries. for (size_t i = 0; i < isec->relocs.size(); ++i) { - Reloc &r = isec->relocs[i]; + macho::Reloc &r = isec->relocs[i]; assert(target->hasAttr(r.type, RelocAttrBits::UNSIGNED)); // Since compact unwind sections aren't part of the inputSections vector, // they don't get canonicalized by scanRelocations(), so we have to do the @@ -375,7 +375,7 @@ cu.functionLength = support::endian::read32le(buf + cuOffsets.functionLength); cu.encoding = support::endian::read32le(buf + cuOffsets.encoding); - for (const Reloc &r : d->unwindEntry->relocs) { + for (const macho::Reloc &r : d->unwindEntry->relocs) { if (r.offset == cuOffsets.personality) { cu.personality = r.referent.get(); } else if (r.offset == cuOffsets.lsda) { diff --git a/lld/include/lld/Common/Args.h b/lld/include/lld/Common/Args.h --- a/lld/include/lld/Common/Args.h +++ b/lld/include/lld/Common/Args.h @@ -23,8 +23,6 @@ namespace lld { namespace args { -llvm::CodeGenOpt::Level getCGOptLevel(int optLevelLTO); - int64_t getInteger(llvm::opt::InputArgList &args, unsigned key, int64_t Default); diff --git a/lld/test/COFF/lto-cgo.ll b/lld/test/COFF/lto-cgo.ll new file mode 100644 --- /dev/null +++ b/lld/test/COFF/lto-cgo.ll @@ -0,0 +1,22 @@ +; REQUIRES: x86 + +; RUN: llvm-as %s -o %t.obj +; RUN: lld-link -opt:lldlto=0 %t.obj -dll -noentry -out:%t.dll -mllvm:-debug-pass=Structure 2>&1 | FileCheck --check-prefix=NOOPT %s +; RUN: lld-link -opt:lldltocgo=0 %t.obj -dll -noentry -out:%t.dll -mllvm:-debug-pass=Structure 2>&1 | FileCheck --check-prefix=NOOPT %s +; RUN: lld-link -opt:lldlto=3 %t.obj -dll -noentry -out:%t.dll -mllvm:-debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s +; RUN: lld-link -opt:lldltocgo=3 %t.obj -dll -noentry -out:%t.dll -mllvm:-debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s +; RUN: lld-link -opt:lldlto=0 -opt:lldltocgo=0 %t.obj -dll -noentry -out:%t.dll -mllvm:-debug-pass=Structure 2>&1 | FileCheck --check-prefix=NOOPT %s +; RUN: lld-link -opt:lldlto=3 -opt:lldltocgo=0 %t.obj -dll -noentry -out:%t.dll -mllvm:-debug-pass=Structure 2>&1 | FileCheck --check-prefix=NOOPT %s +; RUN: lld-link -opt:lldlto=0 -opt:lldltocgo=3 %t.obj -dll -noentry -out:%t.dll -mllvm:-debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s +; RUN: lld-link -opt:lldlto=3 -opt:lldltocgo=3 %t.obj -dll -noentry -out:%t.dll -mllvm:-debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s + +; NOOPT: Fast Register Allocator +; OPT: Greedy Register Allocator + +target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-windows-msvc" + +define void @_start() { +entry: + ret void +} diff --git a/lld/test/ELF/lto/cgo.ll b/lld/test/ELF/lto/cgo.ll new file mode 100644 --- /dev/null +++ b/lld/test/ELF/lto/cgo.ll @@ -0,0 +1,22 @@ +; REQUIRES: x86 + +; RUN: llvm-as %s -o %t.o +; RUN: ld.lld --lto-O0 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=NOOPT %s +; RUN: ld.lld --lto-CGO0 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=NOOPT %s +; RUN: ld.lld --lto-O3 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s +; RUN: ld.lld --lto-CGO3 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s +; RUN: ld.lld --lto-O0 --lto-CGO0 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=NOOPT %s +; RUN: ld.lld --lto-O3 --lto-CGO0 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=NOOPT %s +; RUN: ld.lld --lto-O0 --lto-CGO3 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s +; RUN: ld.lld --lto-O3 --lto-CGO3 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s + +; NOOPT: Fast Register Allocator +; OPT: Greedy Register Allocator + +target triple = "x86_64-unknown-linux-gnu" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + +define void @_start() { +entry: + ret void +} diff --git a/lld/test/MachO/lto-cgo.ll b/lld/test/MachO/lto-cgo.ll new file mode 100644 --- /dev/null +++ b/lld/test/MachO/lto-cgo.ll @@ -0,0 +1,22 @@ +; REQUIRES: x86 + +; RUN: llvm-as %s -o %t.o +; RUN: %lld -dylib --lto-O0 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=NOOPT %s +; RUN: %lld -dylib --lto-CGO0 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=NOOPT %s +; RUN: %lld -dylib --lto-O3 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s +; RUN: %lld -dylib --lto-CGO3 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s +; RUN: %lld -dylib --lto-O0 --lto-CGO0 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=NOOPT %s +; RUN: %lld -dylib --lto-O3 --lto-CGO0 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=NOOPT %s +; RUN: %lld -dylib --lto-O0 --lto-CGO3 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s +; RUN: %lld -dylib --lto-O3 --lto-CGO3 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s + +; NOOPT: Fast Register Allocator +; OPT: Greedy Register Allocator + +target triple = "x86_64-apple-darwin" +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + +define void @_start() { +entry: + ret void +} diff --git a/lld/test/wasm/lto/cgo.ll b/lld/test/wasm/lto/cgo.ll new file mode 100644 --- /dev/null +++ b/lld/test/wasm/lto/cgo.ll @@ -0,0 +1,22 @@ +; REQUIRES: x86 + +; RUN: llvm-as %s -o %t.o +; RUN: wasm-ld --lto-O0 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=NOOPT %s +; RUN: wasm-ld --lto-CGO0 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=NOOPT %s +; RUN: wasm-ld --lto-O3 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s +; RUN: wasm-ld --lto-CGO3 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s +; RUN: wasm-ld --lto-O0 --lto-CGO0 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=NOOPT %s +; RUN: wasm-ld --lto-O3 --lto-CGO0 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=NOOPT %s +; RUN: wasm-ld --lto-O0 --lto-CGO3 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s +; RUN: wasm-ld --lto-O3 --lto-CGO3 %t.o -o %t -mllvm -debug-pass=Structure 2>&1 | FileCheck --check-prefix=OPT %s + +; NOOPT-NOT: WebAssembly Optimize Returned +; OPT: WebAssembly Optimize Returned + +target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-n32:64-S128" +target triple = "wasm32-unknown-unknown-wasm" + +define void @_start() { +entry: + ret void +} diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h --- a/lld/wasm/Config.h +++ b/lld/wasm/Config.h @@ -14,6 +14,7 @@ #include "llvm/ADT/StringSet.h" #include "llvm/BinaryFormat/Wasm.h" #include "llvm/Support/CachePruning.h" +#include "llvm/Support/CodeGen.h" #include namespace lld { @@ -63,6 +64,7 @@ uint64_t zStackSize; unsigned ltoPartitions; unsigned ltoo; + llvm::CodeGenOpt::Level ltocgo; unsigned optimize; llvm::StringRef thinLTOJobs; bool ltoDebugPassManager; diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -424,6 +424,8 @@ config->importTable = args.hasArg(OPT_import_table); config->importUndefined = args.hasArg(OPT_import_undefined); config->ltoo = args::getInteger(args, OPT_lto_O, 2); + config->ltocgo = + *CodeGenOpt::getLevel(args::getInteger(args, OPT_lto_CGO, config->ltoo)); config->ltoPartitions = args::getInteger(args, OPT_lto_partitions, 1); config->ltoDebugPassManager = args.hasArg(OPT_lto_debug_pass_manager); config->mapFile = args.getLastArgValue(OPT_Map); diff --git a/lld/wasm/LTO.cpp b/lld/wasm/LTO.cpp --- a/lld/wasm/LTO.cpp +++ b/lld/wasm/LTO.cpp @@ -51,7 +51,7 @@ c.DiagHandler = diagnosticHandler; c.OptLevel = config->ltoo; c.MAttrs = getMAttrs(); - c.CGOptLevel = args::getCGOptLevel(config->ltoo); + c.CGOptLevel = config->ltocgo; c.DebugPassManager = config->ltoDebugPassManager; if (config->relocatable) diff --git a/lld/wasm/Options.td b/lld/wasm/Options.td --- a/lld/wasm/Options.td +++ b/lld/wasm/Options.td @@ -256,6 +256,8 @@ // LTO-related options. def lto_O: JJ<"lto-O">, MetaVarName<"">, HelpText<"Optimization level for LTO">; +def lto_CGO: JJ<"lto-CGO">, MetaVarName<"">, + HelpText<"Codegen optimization level for LTO">; def lto_partitions: JJ<"lto-partitions=">, HelpText<"Number of LTO codegen partitions">; def disable_verify: F<"disable-verify">;