Index: lld/ELF/Config.h =================================================================== --- lld/ELF/Config.h +++ lld/ELF/Config.h @@ -191,6 +191,7 @@ bool trace; bool thinLTOEmitImportsFiles; bool thinLTOIndexOnly; + bool timeTraceEnabled; bool tocOptimize; bool undefinedVersion; bool useAndroidRelrTags = false; @@ -240,6 +241,7 @@ unsigned ltoo; unsigned optimize; unsigned thinLTOJobs; + unsigned timeTraceGranularity; int32_t splitStackAdjustSize; // The following config options do not directly correspond to any Index: lld/ELF/Driver.cpp =================================================================== --- lld/ELF/Driver.cpp +++ lld/ELF/Driver.cpp @@ -56,6 +56,7 @@ #include "llvm/Support/Path.h" #include "llvm/Support/TarWriter.h" #include "llvm/Support/TargetSelect.h" +#include "llvm/Support/TimeProfiler.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -490,6 +491,10 @@ if (args.hasArg(OPT_version)) return; + // Initialize time trace profiler. + if (config->timeTraceEnabled) + timeTraceProfilerInitialize(config->timeTraceGranularity, config->progName); + initLLVM(); createFiles(args); if (errorCount()) @@ -509,19 +514,34 @@ switch (config->ekind) { case ELF32LEKind: link(args); - return; + break; case ELF32BEKind: link(args); - return; + break; case ELF64LEKind: link(args); - return; + break; case ELF64BEKind: link(args); - return; + break; default: llvm_unreachable("unknown Config->EKind"); } + + if (config->timeTraceEnabled) { + // Write the result of the time trace profiler. + std::string path = args.getLastArgValue(OPT_time_trace_file_eq); + if (path.empty()) + path = (config->outputFile + ".time-trace").str(); + std::error_code ec; + raw_fd_ostream os(path, ec, sys::fs::OF_Text); + if (ec) { + error("cannot open " + path + ": " + ec.message()); + return; + } + timeTraceProfilerWrite(os); + timeTraceProfilerCleanup(); + } } static std::string getRpath(opt::InputArgList &args) { @@ -953,6 +973,9 @@ getOldNewOptions(args, OPT_thinlto_object_suffix_replace_eq); config->thinLTOPrefixReplace = getOldNewOptions(args, OPT_thinlto_prefix_replace_eq); + config->timeTraceEnabled = args.hasArg(OPT_time_trace); + config->timeTraceGranularity = + args::getInteger(args, OPT_time_trace_granularity, 500); config->trace = args.hasArg(OPT_trace); config->undefined = args::getStrings(args, OPT_undefined); config->undefinedVersion = @@ -1597,6 +1620,7 @@ // Because all bitcode files that the program consists of are passed to // the compiler at once, it can do a whole-program optimization. template void LinkerDriver::compileBitcodeFiles() { + llvm::TimeTraceScope timeScope("LTO"); // Compile bitcode files and replace bitcode symbols. lto.reset(new BitcodeCompiler); for (BitcodeFile *file : bitcodeFiles) @@ -1723,6 +1747,8 @@ // Do actual linking. Note that when this function is called, // all linker scripts have already been parsed. template void LinkerDriver::link(opt::InputArgList &args) { + llvm::TimeTraceScope timeScope("ExecuteLinker"); + // If a -hash-style option was not given, set to a default value, // which varies depending on the target. if (!args.hasArg(OPT_hash_style)) { @@ -1762,8 +1788,11 @@ // symbols that we need to the symbol table. This process might // add files to the link, via autolinking, these files are always // appended to the Files vector. - for (size_t i = 0; i < files.size(); ++i) - parseFile(files[i]); + { + llvm::TimeTraceScope timeScope("Parse input files"); + for (size_t i = 0; i < files.size(); ++i) + parseFile(files[i]); + } // Now that we have every file, we can decide if we will need a // dynamic symbol table. @@ -1981,11 +2010,15 @@ // merging MergeInputSections into a single MergeSyntheticSection. From this // point onwards InputSectionDescription::sections should be used instead of // sectionBases. - for (BaseCommand *base : script->sectionCommands) - if (auto *sec = dyn_cast(base)) - sec->finalizeInputSections(); - llvm::erase_if(inputSections, - [](InputSectionBase *s) { return isa(s); }); + { + llvm::TimeTraceScope timeScope("Merge input sections"); + for (BaseCommand *base : script->sectionCommands) + if (auto *sec = dyn_cast(base)) + sec->finalizeInputSections(); + llvm::erase_if(inputSections, [](InputSectionBase *s) { + return isa(s); + }); + } // Two input sections with different output sections should not be folded. // ICF runs after processSectionCommands() so that we know the output sections. Index: lld/ELF/ICF.cpp =================================================================== --- lld/ELF/ICF.cpp +++ lld/ELF/ICF.cpp @@ -84,6 +84,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/Object/ELF.h" +#include "llvm/Support/TimeProfiler.h" #include "llvm/Support/xxhash.h" #include #include @@ -525,7 +526,10 @@ } // ICF entry point function. -template void doIcf() { ICF().run(); } +template void doIcf() { + llvm::TimeTraceScope timeScope("ICF"); + ICF().run(); +} template void doIcf(); template void doIcf(); Index: lld/ELF/LTO.cpp =================================================================== --- lld/ELF/LTO.cpp +++ lld/ELF/LTO.cpp @@ -108,6 +108,9 @@ c.DebugPassManager = config->ltoDebugPassManager; c.DwoDir = config->dwoDir; + c.TimeTraceEnabled = config->timeTraceEnabled; + c.TimeTraceGranularity = config->timeTraceGranularity; + c.CSIRProfile = config->ltoCSProfileFile; c.RunCSIRInstr = config->ltoCSProfileGenerate; Index: lld/ELF/MarkLive.cpp =================================================================== --- lld/ELF/MarkLive.cpp +++ lld/ELF/MarkLive.cpp @@ -31,6 +31,7 @@ #include "lld/Common/Strings.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Object/ELF.h" +#include "llvm/Support/TimeProfiler.h" #include #include @@ -323,6 +324,7 @@ // input sections. This function make some or all of them on // so that they are emitted to the output file. template void markLive() { + llvm::TimeTraceScope timeScope("GC"); // If -gc-sections is not given, no sections are removed. if (!config->gcSections) { for (InputSectionBase *sec : inputSections) Index: lld/ELF/Options.td =================================================================== --- lld/ELF/Options.td +++ lld/ELF/Options.td @@ -358,6 +358,12 @@ "Run the linker multi-threaded (default)", "Do not run the linker multi-threaded">; +def time_trace: F<"time-trace">, HelpText<"Record time trace">; +def time_trace_file_eq: J<"time-trace-file=">, HelpText<"Specify time trace output file">; + +defm time_trace_granularity: Eq<"time-trace-granularity", + "Minimum time granularity (in microseconds) traced by time profiler">; + defm toc_optimize : B<"toc-optimize", "(PowerPC64) Enable TOC related optimizations (default)", "(PowerPC64) Disable TOC related optimizations">; Index: lld/ELF/SyntheticSections.cpp =================================================================== --- lld/ELF/SyntheticSections.cpp +++ lld/ELF/SyntheticSections.cpp @@ -36,6 +36,7 @@ #include "llvm/Support/Endian.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/MD5.h" +#include "llvm/Support/TimeProfiler.h" #include #include @@ -3132,6 +3133,7 @@ } template void splitSections() { + llvm::TimeTraceScope timeScope("Split sections"); // splitIntoPieces needs to be called on each MergeInputSection // before calling finalizeContents(). parallelForEach(inputSections, [](InputSectionBase *sec) { Index: lld/ELF/Writer.cpp =================================================================== --- lld/ELF/Writer.cpp +++ lld/ELF/Writer.cpp @@ -27,6 +27,7 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/RandomNumberGenerator.h" #include "llvm/Support/SHA1.h" +#include "llvm/Support/TimeProfiler.h" #include "llvm/Support/xxhash.h" #include @@ -139,7 +140,10 @@ script->needsInterpSection(); } -template void writeResult() { Writer().run(); } +template void writeResult() { + llvm::TimeTraceScope timeScope("Write output file"); + Writer().run(); +} static void removeEmptyPTLoad(std::vector &phdrs) { llvm::erase_if(phdrs, [&](const PhdrEntry *p) { Index: lld/test/ELF/lto/thinlto-time-trace.ll =================================================================== --- /dev/null +++ lld/test/ELF/lto/thinlto-time-trace.ll @@ -0,0 +1,43 @@ +; REQUIRES: x86 + +; Test ThinLTO with time trace +; RUN: opt -module-summary %s -o %t1.o +; RUN: opt -module-summary %p/Inputs/thinlto.ll -o %t2.o + +; Test single-threaded +; RUN: ld.lld --thinlto-jobs=1 -time-trace -time-trace-granularity=0 -shared %t1.o %t2.o -o %t3.so +; RUN: cat %t3.so.time-trace \ +; RUN: | %python -c 'import json, sys; json.dump(json.loads(sys.stdin.read()), sys.stdout, sort_keys=True, indent=2)' \ +; RUN: | FileCheck %s + +; Test multi-threaded +; RUN: ld.lld -time-trace -time-trace-granularity=0 -shared %t1.o %t2.o -o %t4.so +; RUN: cat %t4.so.time-trace \ +; RUN: | %python -c 'import json, sys; json.dump(json.loads(sys.stdin.read()), sys.stdout, sort_keys=True, indent=2)' \ +; RUN: | FileCheck %s + +; CHECK: "traceEvents": [ +; Check fields for an event are present +; CHECK: "args": +; CHECK-NEXT: "detail": +; CHECK: "dur": +; CHECK-NEXT: "name": +; CHECK-NEXT: "ph": +; CHECK-NEXT: "pid": +; CHECK-NEXT: "tid": +; CHECK-NEXT: "ts": + +; Check that an optimisation event is present +; CHECK: "name": "OptModule" + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare void @g(...) + +define void @f() { +entry: + call void (...) @g() + ret void +} + Index: lld/test/ELF/time-trace.s =================================================================== --- /dev/null +++ lld/test/ELF/time-trace.s @@ -0,0 +1,40 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o + +# Test implicit trace file name +# RUN: ld.lld -time-trace -time-trace-granularity=0 -o %t1.elf %t.o +# RUN: cat %t1.elf.time-trace \ +# RUN: | %python -c 'import json, sys; json.dump(json.loads(sys.stdin.read()), sys.stdout, sort_keys=True, indent=2)' \ +# RUN: | FileCheck %s + +# Test specified trace file name +# RUN: ld.lld -time-trace -time-trace-file=%t2.json -time-trace-granularity=0 -o %t2.elf %t.o +# RUN: cat %t2.json \ +# RUN: | %python -c 'import json, sys; json.dump(json.loads(sys.stdin.read()), sys.stdout, sort_keys=True, indent=2)' \ +# RUN: | FileCheck %s + +# Test trace requested to stdout +# RUN: ld.lld -time-trace -time-trace-file=- -time-trace-granularity=0 -o %t3.elf %t.o \ +# RUN: | %python -c 'import json, sys; json.dump(json.loads(sys.stdin.read()), sys.stdout, sort_keys=True, indent=2)' \ +# RUN: | FileCheck %s + +# CHECK: "traceEvents": [ + +# Check one event has correct fields +# CHECK: "dur": +# CHECK-NEXT: "name": +# CHECK-NEXT: "ph": +# CHECK-NEXT: "pid": +# CHECK-NEXT: "tid": +# CHECK-NEXT: "ts": + +# Check there is an ExecuteLinker event +# CHECK: "name": "ExecuteLinker" + +# Check process_name entry field +# CHECK: "name": "ld.lld{{(.exe)?}}" +# CHECK: "name": "process_name" + +.globl _start +_start: + ret Index: llvm/include/llvm/LTO/Config.h =================================================================== --- llvm/include/llvm/LTO/Config.h +++ llvm/include/llvm/LTO/Config.h @@ -117,6 +117,12 @@ /// Statistics output file path. std::string StatsFile; + /// Time trace enabled. + bool TimeTraceEnabled = false; + + /// Time trace granularity. + unsigned TimeTraceGranularity = 500; + bool ShouldDiscardValueNames = true; DiagnosticHandlerFunction DiagHandler; Index: llvm/lib/LTO/LTO.cpp =================================================================== --- llvm/lib/LTO/LTO.cpp +++ llvm/lib/LTO/LTO.cpp @@ -39,6 +39,7 @@ #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/ThreadPool.h" #include "llvm/Support/Threading.h" +#include "llvm/Support/TimeProfiler.h" #include "llvm/Support/VCSRevision.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" @@ -1133,6 +1134,9 @@ &ResolvedODR, const GVSummaryMapTy &DefinedGlobals, MapVector &ModuleMap) { + if (LLVM_ENABLE_THREADS && Conf.TimeTraceEnabled) + timeTraceProfilerInitialize(Conf.TimeTraceGranularity, + "thin backend"); Error E = runThinLTOBackendThread( AddStream, Cache, Task, BM, CombinedIndex, ImportList, ExportList, ResolvedODR, DefinedGlobals, ModuleMap); @@ -1143,6 +1147,8 @@ else Err = std::move(E); } + if (LLVM_ENABLE_THREADS && Conf.TimeTraceEnabled) + timeTraceProfilerFinishThread(); }, BM, std::ref(CombinedIndex), std::ref(ImportList), std::ref(ExportList), std::ref(ResolvedODR), std::ref(DefinedGlobals), std::ref(ModuleMap));