diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -192,6 +192,7 @@ bool trace; bool thinLTOEmitImportsFiles; bool thinLTOIndexOnly; + bool timeTraceEnabled; bool tocOptimize; bool undefinedVersion; bool useAndroidRelrTags = false; @@ -243,6 +244,7 @@ unsigned ltoo; unsigned optimize; unsigned thinLTOJobs; + unsigned timeTraceGranularity; int32_t splitStackAdjustSize; // The following config options do not directly correspond to any diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -56,6 +56,7 @@ #include "llvm/Support/Path.h" #include "llvm/Support/TarWriter.h" #include "llvm/Support/TargetSelect.h" +#include "llvm/Support/TimeProfiler.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -487,37 +488,60 @@ if (args.hasArg(OPT_version)) return; - initLLVM(); - createFiles(args); - if (errorCount()) - return; + // Initialize time trace profiler. + if (config->timeTraceEnabled) + timeTraceProfilerInitialize(config->timeTraceGranularity, config->progName); - inferMachineType(); - setConfigs(args); - checkOptions(); - if (errorCount()) - return; + { + llvm::TimeTraceScope timeScope("ExecuteLinker"); - // The Target instance handles target-specific stuff, such as applying - // relocations or writing a PLT section. It also contains target-dependent - // values such as a default image base address. - target = getTarget(); + initLLVM(); + createFiles(args); + if (errorCount()) + return; - switch (config->ekind) { - case ELF32LEKind: - link(args); - return; - case ELF32BEKind: - link(args); - return; - case ELF64LEKind: - link(args); - return; - case ELF64BEKind: - link(args); - return; - default: - llvm_unreachable("unknown Config->EKind"); + inferMachineType(); + setConfigs(args); + checkOptions(); + if (errorCount()) + return; + + // The Target instance handles target-specific stuff, such as applying + // relocations or writing a PLT section. It also contains target-dependent + // values such as a default image base address. + target = getTarget(); + + switch (config->ekind) { + case ELF32LEKind: + link(args); + break; + case ELF32BEKind: + link(args); + break; + case ELF64LEKind: + link(args); + break; + case ELF64BEKind: + link(args); + break; + default: + llvm_unreachable("unknown Config->EKind"); + } + } + + if (config->timeTraceEnabled) { + // Write the result of the time trace profiler. + std::string path = args.getLastArgValue(OPT_time_trace_file_eq).str(); + if (path.empty()) + path = (config->outputFile + ".time-trace").str(); + std::error_code ec; + raw_fd_ostream os(path, ec, sys::fs::OF_Text); + if (ec) { + error("cannot open " + path + ": " + ec.message()); + return; + } + timeTraceProfilerWrite(os); + timeTraceProfilerCleanup(); } } @@ -958,6 +982,9 @@ getOldNewOptions(args, OPT_thinlto_object_suffix_replace_eq); config->thinLTOPrefixReplace = getOldNewOptions(args, OPT_thinlto_prefix_replace_eq); + config->timeTraceEnabled = args.hasArg(OPT_time_trace); + config->timeTraceGranularity = + args::getInteger(args, OPT_time_trace_granularity, 500); config->trace = args.hasArg(OPT_trace); config->undefined = args::getStrings(args, OPT_undefined); config->undefinedVersion = @@ -1604,6 +1631,7 @@ // Because all bitcode files that the program consists of are passed to // the compiler at once, it can do a whole-program optimization. template void LinkerDriver::compileBitcodeFiles() { + llvm::TimeTraceScope timeScope("LTO"); // Compile bitcode files and replace bitcode symbols. lto.reset(new BitcodeCompiler); for (BitcodeFile *file : bitcodeFiles) @@ -1733,6 +1761,7 @@ // Do actual linking. Note that when this function is called, // all linker scripts have already been parsed. template void LinkerDriver::link(opt::InputArgList &args) { + llvm::TimeTraceScope timeScope("Link", StringRef("LinkerDriver::Link")); // If a -hash-style option was not given, set to a default value, // which varies depending on the target. if (!args.hasArg(OPT_hash_style)) { @@ -1772,8 +1801,11 @@ // symbols that we need to the symbol table. This process might // add files to the link, via autolinking, these files are always // appended to the Files vector. - for (size_t i = 0; i < files.size(); ++i) - parseFile(files[i]); + { + llvm::TimeTraceScope timeScope("Parse input files"); + for (size_t i = 0; i < files.size(); ++i) + parseFile(files[i]); + } // Now that we have every file, we can decide if we will need a // dynamic symbol table. diff --git a/lld/ELF/ICF.cpp b/lld/ELF/ICF.cpp --- a/lld/ELF/ICF.cpp +++ b/lld/ELF/ICF.cpp @@ -84,6 +84,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/Object/ELF.h" +#include "llvm/Support/TimeProfiler.h" #include "llvm/Support/xxhash.h" #include #include @@ -525,7 +526,10 @@ } // ICF entry point function. -template void doIcf() { ICF().run(); } +template void doIcf() { + llvm::TimeTraceScope timeScope("ICF"); + ICF().run(); +} template void doIcf(); template void doIcf(); diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp --- a/lld/ELF/LTO.cpp +++ b/lld/ELF/LTO.cpp @@ -113,6 +113,9 @@ c.HasWholeProgramVisibility = config->ltoWholeProgramVisibility; + c.TimeTraceEnabled = config->timeTraceEnabled; + c.TimeTraceGranularity = config->timeTraceGranularity; + c.CSIRProfile = std::string(config->ltoCSProfileFile); c.RunCSIRInstr = config->ltoCSProfileGenerate; diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp --- a/lld/ELF/MarkLive.cpp +++ b/lld/ELF/MarkLive.cpp @@ -31,6 +31,7 @@ #include "lld/Common/Strings.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Object/ELF.h" +#include "llvm/Support/TimeProfiler.h" #include #include @@ -323,6 +324,7 @@ // input sections. This function make some or all of them on // so that they are emitted to the output file. template void markLive() { + llvm::TimeTraceScope timeScope("markLive"); // If -gc-sections is not given, no sections are removed. if (!config->gcSections) { for (InputSectionBase *sec : inputSections) diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -354,6 +354,12 @@ "Run the linker multi-threaded (default)", "Do not run the linker multi-threaded">; +def time_trace: F<"time-trace">, HelpText<"Record time trace">; +def time_trace_file_eq: J<"time-trace-file=">, HelpText<"Specify time trace output file">; + +defm time_trace_granularity: Eq<"time-trace-granularity", + "Minimum time granularity (in microseconds) traced by time profiler">; + defm toc_optimize : B<"toc-optimize", "(PowerPC64) Enable TOC related optimizations (default)", "(PowerPC64) Disable TOC related optimizations">; diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -36,6 +36,7 @@ #include "llvm/Support/Endian.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/MD5.h" +#include "llvm/Support/TimeProfiler.h" #include #include @@ -3237,6 +3238,7 @@ } template void splitSections() { + llvm::TimeTraceScope timeScope("Split sections"); // splitIntoPieces needs to be called on each MergeInputSection // before calling finalizeContents(). parallelForEach(inputSections, [](InputSectionBase *sec) { diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -27,6 +27,7 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/RandomNumberGenerator.h" #include "llvm/Support/SHA1.h" +#include "llvm/Support/TimeProfiler.h" #include "llvm/Support/xxhash.h" #include @@ -139,7 +140,10 @@ !config->dynamicLinker.empty() && script->needsInterpSection(); } -template void writeResult() { Writer().run(); } +template void writeResult() { + llvm::TimeTraceScope timeScope("Write output file"); + Writer().run(); +} static void removeEmptyPTLoad(std::vector &phdrs) { llvm::erase_if(phdrs, [&](const PhdrEntry *p) { diff --git a/lld/test/ELF/lto/thinlto-time-trace.ll b/lld/test/ELF/lto/thinlto-time-trace.ll new file mode 100644 --- /dev/null +++ b/lld/test/ELF/lto/thinlto-time-trace.ll @@ -0,0 +1,43 @@ +; REQUIRES: x86 + +; Test ThinLTO with time trace +; RUN: opt -module-summary %s -o %t1.o +; RUN: opt -module-summary %p/Inputs/thinlto.ll -o %t2.o + +; Test single-threaded +; RUN: ld.lld --thinlto-jobs=1 -time-trace -time-trace-granularity=0 -shared %t1.o %t2.o -o %t3.so +; RUN: cat %t3.so.time-trace \ +; RUN: | %python -c 'import json, sys; json.dump(json.loads(sys.stdin.read()), sys.stdout, sort_keys=True, indent=2)' \ +; RUN: | FileCheck %s + +; Test multi-threaded +; RUN: ld.lld -time-trace -time-trace-granularity=0 -shared %t1.o %t2.o -o %t4.so +; RUN: cat %t4.so.time-trace \ +; RUN: | %python -c 'import json, sys; json.dump(json.loads(sys.stdin.read()), sys.stdout, sort_keys=True, indent=2)' \ +; RUN: | FileCheck %s + +; CHECK: "traceEvents": [ +; Check fields for an event are present +; CHECK: "args": +; CHECK-NEXT: "detail": +; CHECK: "dur": +; CHECK-NEXT: "name": +; CHECK-NEXT: "ph": +; CHECK-NEXT: "pid": +; CHECK-NEXT: "tid": +; CHECK-NEXT: "ts": + +; Check that an optimisation event is present +; CHECK: "name": "OptModule" + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare void @g(...) + +define void @f() { +entry: + call void (...) @g() + ret void +} + diff --git a/lld/test/ELF/time-trace.s b/lld/test/ELF/time-trace.s new file mode 100644 --- /dev/null +++ b/lld/test/ELF/time-trace.s @@ -0,0 +1,40 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o + +# Test implicit trace file name +# RUN: ld.lld -time-trace -time-trace-granularity=0 -o %t1.elf %t.o +# RUN: cat %t1.elf.time-trace \ +# RUN: | %python -c 'import json, sys; json.dump(json.loads(sys.stdin.read()), sys.stdout, sort_keys=True, indent=2)' \ +# RUN: | FileCheck %s + +# Test specified trace file name +# RUN: ld.lld -time-trace -time-trace-file=%t2.json -time-trace-granularity=0 -o %t2.elf %t.o +# RUN: cat %t2.json \ +# RUN: | %python -c 'import json, sys; json.dump(json.loads(sys.stdin.read()), sys.stdout, sort_keys=True, indent=2)' \ +# RUN: | FileCheck %s + +# Test trace requested to stdout +# RUN: ld.lld -time-trace -time-trace-file=- -time-trace-granularity=0 -o %t3.elf %t.o \ +# RUN: | %python -c 'import json, sys; json.dump(json.loads(sys.stdin.read()), sys.stdout, sort_keys=True, indent=2)' \ +# RUN: | FileCheck %s + +# CHECK: "traceEvents": [ + +# Check one event has correct fields +# CHECK: "dur": +# CHECK-NEXT: "name": +# CHECK-NEXT: "ph": +# CHECK-NEXT: "pid": +# CHECK-NEXT: "tid": +# CHECK-NEXT: "ts": + +# Check there is an ExecuteLinker event +# CHECK: "name": "ExecuteLinker" + +# Check process_name entry field +# CHECK: "name": "ld.lld{{(.exe)?}}" +# CHECK: "name": "process_name" + +.globl _start +_start: + ret diff --git a/llvm/include/llvm/LTO/Config.h b/llvm/include/llvm/LTO/Config.h --- a/llvm/include/llvm/LTO/Config.h +++ b/llvm/include/llvm/LTO/Config.h @@ -124,6 +124,12 @@ /// Statistics output file path. std::string StatsFile; + /// Time trace enabled. + bool TimeTraceEnabled = false; + + /// Time trace granularity. + unsigned TimeTraceGranularity = 500; + bool ShouldDiscardValueNames = true; DiagnosticHandlerFunction DiagHandler; diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -40,6 +40,7 @@ #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/ThreadPool.h" #include "llvm/Support/Threading.h" +#include "llvm/Support/TimeProfiler.h" #include "llvm/Support/VCSRevision.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" @@ -1160,6 +1161,9 @@ &ResolvedODR, const GVSummaryMapTy &DefinedGlobals, MapVector &ModuleMap) { + if (LLVM_ENABLE_THREADS && Conf.TimeTraceEnabled) + timeTraceProfilerInitialize(Conf.TimeTraceGranularity, + "thin backend"); Error E = runThinLTOBackendThread( AddStream, Cache, Task, BM, CombinedIndex, ImportList, ExportList, ResolvedODR, DefinedGlobals, ModuleMap); @@ -1170,6 +1174,8 @@ else Err = std::move(E); } + if (LLVM_ENABLE_THREADS && Conf.TimeTraceEnabled) + timeTraceProfilerFinishThread(); }, BM, std::ref(CombinedIndex), std::ref(ImportList), std::ref(ExportList), std::ref(ResolvedODR), std::ref(DefinedGlobals), std::ref(ModuleMap));