Index: clang/tools/driver/cc1_main.cpp =================================================================== --- clang/tools/driver/cc1_main.cpp +++ clang/tools/driver/cc1_main.cpp @@ -218,7 +218,7 @@ if (Clang->getFrontendOpts().TimeTrace) { llvm::timeTraceProfilerInitialize( - Clang->getFrontendOpts().TimeTraceGranularity); + Clang->getFrontendOpts().TimeTraceGranularity, Argv0); } // --print-supported-cpus takes priority over the actual compilation. if (Clang->getFrontendOpts().PrintSupportedCPUs) Index: lld/ELF/Driver.cpp =================================================================== --- lld/ELF/Driver.cpp +++ lld/ELF/Driver.cpp @@ -56,6 +56,7 @@ #include "llvm/Support/Path.h" #include "llvm/Support/TarWriter.h" #include "llvm/Support/TargetSelect.h" +#include "llvm/Support/TimeProfiler.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -483,6 +484,12 @@ if (errorCount()) return; + // Initialize time trace. + if (args.hasArg(OPT_time_trace)) + llvm::timeTraceProfilerInitialize( + args::getInteger(args, OPT_time_trace_granularity, 500u), + config->progName); + // The Target instance handles target-specific stuff, such as applying // relocations or writing a PLT section. It also contains target-dependent // values such as a default image base address. @@ -491,19 +498,33 @@ switch (config->ekind) { case ELF32LEKind: link(args); - return; + break; case ELF32BEKind: link(args); - return; + break; case ELF64LEKind: link(args); - return; + break; case ELF64BEKind: link(args); - return; + break; default: llvm_unreachable("unknown Config->EKind"); } + + if (llvm::timeTraceProfilerEnabled()) { + SmallString<128> path(config->outputFile); + llvm::sys::path::replace_extension(path, "json"); + std::error_code errorCode; + raw_fd_ostream profilerOS(path, errorCode, sys::fs::F_Text); + if (errorCode) { + error("cannot open " + path + ": " + errorCode.message()); + return; + } + llvm::timeTraceProfilerWrite(profilerOS); + llvm::timeTraceProfilerCleanup(); + } + return; } static std::string getRpath(opt::InputArgList &args) { @@ -1570,6 +1591,7 @@ // Because all bitcode files that the program consists of are passed to // the compiler at once, it can do a whole-program optimization. template void LinkerDriver::compileBitcodeFiles() { + llvm::TimeTraceScope timeScope("LTO", StringRef("")); // Compile bitcode files and replace bitcode symbols. lto.reset(new BitcodeCompiler); for (BitcodeFile *file : bitcodeFiles) @@ -1696,6 +1718,8 @@ // Do actual linking. Note that when this function is called, // all linker scripts have already been parsed. template void LinkerDriver::link(opt::InputArgList &args) { + llvm::TimeTraceScope timeScope("ExecuteLinker", StringRef("")); + // If a -hash-style option was not given, set to a default value, // which varies depending on the target. if (!args.hasArg(OPT_hash_style)) { @@ -1735,8 +1759,11 @@ // symbols that we need to the symbol table. This process might // add files to the link, via autolinking, these files are always // appended to the Files vector. - for (size_t i = 0; i < files.size(); ++i) - parseFile(files[i]); + { + llvm::TimeTraceScope timeScope("Parse input files", StringRef("")); + for (size_t i = 0; i < files.size(); ++i) + parseFile(files[i]); + } // Now that we have every file, we can decide if we will need a // dynamic symbol table. Index: lld/ELF/ICF.cpp =================================================================== --- lld/ELF/ICF.cpp +++ lld/ELF/ICF.cpp @@ -84,6 +84,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/Object/ELF.h" +#include "llvm/Support/TimeProfiler.h" #include "llvm/Support/xxhash.h" #include #include @@ -445,6 +446,7 @@ // The main function of ICF. template void ICF::run() { + llvm::TimeTraceScope timeScope("ICF", StringRef("")); // Collect sections to merge. for (InputSectionBase *sec : inputSections) { auto *s = cast(sec); Index: lld/ELF/MarkLive.cpp =================================================================== --- lld/ELF/MarkLive.cpp +++ lld/ELF/MarkLive.cpp @@ -31,6 +31,7 @@ #include "lld/Common/Strings.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Object/ELF.h" +#include "llvm/Support/TimeProfiler.h" #include #include @@ -318,6 +319,7 @@ // input sections. This function make some or all of them on // so that they are emitted to the output file. template void markLive() { + llvm::TimeTraceScope timeScope("GC", StringRef("")); // If -gc-sections is not given, no sections are removed. if (!config->gcSections) { for (InputSectionBase *sec : inputSections) Index: lld/ELF/Options.td =================================================================== --- lld/ELF/Options.td +++ lld/ELF/Options.td @@ -358,6 +358,11 @@ "Run the linker multi-threaded (default)", "Do not run the linker multi-threaded">; +def time_trace: F<"time-trace">, HelpText<"Record time trace">; + +defm time_trace_granularity: Eq<"time-trace-granularity", + "Minimum time granularity (in microseconds) traced by time profiler">; + defm toc_optimize : B<"toc-optimize", "(PowerPC64) Enable TOC related optimizations (default)", "(PowerPC64) Disable TOC related optimizations">; Index: lld/ELF/Writer.cpp =================================================================== --- lld/ELF/Writer.cpp +++ lld/ELF/Writer.cpp @@ -27,6 +27,7 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/RandomNumberGenerator.h" #include "llvm/Support/SHA1.h" +#include "llvm/Support/TimeProfiler.h" #include "llvm/Support/xxhash.h" #include @@ -139,7 +140,10 @@ script->needsInterpSection(); } -template void writeResult() { Writer().run(); } +template void writeResult() { + llvm::TimeTraceScope timeScope("Write output file", StringRef("")); + Writer().run(); +} static void removeEmptyPTLoad(std::vector &phdrs) { llvm::erase_if(phdrs, [&](const PhdrEntry *p) { Index: llvm/include/llvm/Support/TimeProfiler.h =================================================================== --- llvm/include/llvm/Support/TimeProfiler.h +++ llvm/include/llvm/Support/TimeProfiler.h @@ -19,7 +19,8 @@ /// Initialize the time trace profiler. /// This sets up the global \p TimeTraceProfilerInstance /// variable to be the profiler instance. -void timeTraceProfilerInitialize(unsigned TimeTraceGranularity); +void timeTraceProfilerInitialize(unsigned TimeTraceGranularity, + StringRef ProgName); /// Cleanup the time trace profiler, if it was initialized. void timeTraceProfilerCleanup(); Index: llvm/lib/Support/TimeProfiler.cpp =================================================================== --- llvm/lib/Support/TimeProfiler.cpp +++ llvm/lib/Support/TimeProfiler.cpp @@ -13,8 +13,9 @@ #include "llvm/Support/TimeProfiler.h" #include "llvm/ADT/StringMap.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/FileSystem.h" #include "llvm/Support/JSON.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Threading.h" #include #include #include @@ -37,10 +38,11 @@ TimePointType End; std::string Name; std::string Detail; + uint64_t Tid; Entry(TimePointType &&S, TimePointType &&E, std::string &&N, std::string &&Dt) : Start(std::move(S)), End(std::move(E)), Name(std::move(N)), - Detail(std::move(Dt)){}; + Detail(std::move(Dt)), Tid(llvm::get_threadid()){}; // Calculate timings for FlameGraph. Cast time points to microsecond precision // rather than casting duration. This avoid truncation issues causing inner @@ -59,26 +61,47 @@ }; struct TimeTraceProfiler { - TimeTraceProfiler() { + TimeTraceProfiler(StringRef ProgName) { StartTime = steady_clock::now(); + this->ProgName = ProgName; } void begin(std::string Name, llvm::function_ref Detail) { + std::lock_guard Lock(Mu); Stack.emplace_back(steady_clock::now(), TimePointType(), std::move(Name), Detail()); } void end() { + std::lock_guard Lock(Mu); + uint64_t Tid = llvm::get_threadid(); + assert(!Stack.empty() && "Must call begin() first"); - auto &E = Stack.back(); + + // Find the latest entry from this thread on the stack. + auto LatestEvent = + std::find_if(Stack.rbegin(), Stack.rend(), + [&](const Entry &Val) { return Val.Tid == Tid; }); + // Should always be able to find a stack entry on this thread. + assert(LatestEvent != Stack.rend() && + "end() without begin() found on thread"); + + // Get reference from iterator. + auto &E = *LatestEvent; + E.End = steady_clock::now(); // Check that end times monotonically increase. - assert((Entries.empty() || - (E.getFlameGraphStartUs(StartTime) + E.getFlameGraphDurUs() >= - Entries.back().getFlameGraphStartUs(StartTime) + - Entries.back().getFlameGraphDurUs())) && - "TimeProfiler scope ended earlier than previous scope"); + // auto LastEntryOnThreadIter = std::find_if(Entries.rbegin(), + // Entries.rend(), [&](const Entry &Val) { + // return Val.Tid == Tid; + // }); + // auto &LastEntryOnThread = *LastEntryOnThreadIter; + // assert((Entries.empty() || + // (E.getFlameGraphStartUs(StartTime) + E.getFlameGraphDurUs() >= + // LastEntryOnThread.getFlameGraphStartUs(StartTime) + + // LastEntryOnThread.getFlameGraphDurUs())) && + // "TimeProfiler scope ended earlier than previous scope"); // Calculate duration at full precision for overall counts. DurationType Duration = E.End - E.Start; @@ -92,18 +115,21 @@ // templates from within, we only want to add the topmost one. "topmost" // happens to be the ones that don't have any currently open entries above // itself. - if (std::find_if(++Stack.rbegin(), Stack.rend(), [&](const Entry &Val) { - return Val.Name == E.Name; - }) == Stack.rend()) { + if (std::find_if(std::next(LatestEvent), Stack.rend(), + [&](const Entry &Val) { + return (Val.Name == E.Name) && (Val.Tid == Tid); + }) == Stack.rend()) { auto &CountAndTotal = CountAndTotalPerName[E.Name]; CountAndTotal.first++; CountAndTotal.second += Duration; } - Stack.pop_back(); + // Remove the last entry in the stack that matches this Tid. + Stack.erase(std::next(LatestEvent).base()); } void Write(raw_pwrite_stream &OS) { + std::lock_guard Lock(Mu); assert(Stack.empty() && "All profiler sections should be ended when calling Write"); json::OStream J(OS); @@ -118,7 +144,7 @@ J.object([&]{ J.attribute("pid", 1); - J.attribute("tid", 0); + J.attribute("tid", int64_t(E.Tid)); J.attribute("ph", "X"); J.attribute("ts", StartUs); J.attribute("dur", DurUs); @@ -129,7 +155,12 @@ // Emit totals by section name as additional "thread" events, sorted from // longest one. - int Tid = 1; + // Find highest used thread id. + uint64_t MaxTid = 0; + for (const auto &E : Entries) { + MaxTid = std::max(MaxTid, E.Tid); + } + uint64_t Tid = MaxTid + 1; std::vector SortedTotals; SortedTotals.reserve(CountAndTotalPerName.size()); for (const auto &E : CountAndTotalPerName) @@ -146,7 +177,7 @@ J.object([&]{ J.attribute("pid", 1); - J.attribute("tid", Tid); + J.attribute("tid", int64_t(Tid)); J.attribute("ph", "X"); J.attribute("ts", 0); J.attribute("dur", DurUs); @@ -168,7 +199,7 @@ J.attribute("ts", 0); J.attribute("ph", "M"); J.attribute("name", "process_name"); - J.attributeObject("args", [&] { J.attribute("name", "clang"); }); + J.attributeObject("args", [&] { J.attribute("name", ProgName); }); }); J.arrayEnd(); @@ -176,19 +207,23 @@ J.objectEnd(); } - SmallVector Stack; - SmallVector Entries; - StringMap CountAndTotalPerName; + std::mutex Mu; + SmallVector Stack /*GUARDED_BY(Mu)*/; + SmallVector Entries /*GUARDED_BY(Mu)*/; + StringMap CountAndTotalPerName /*GUARDED_BY(Mu)*/; TimePointType StartTime; + StringRef ProgName; // Minimum time granularity (in microseconds) unsigned TimeTraceGranularity; }; -void timeTraceProfilerInitialize(unsigned TimeTraceGranularity) { +void timeTraceProfilerInitialize(unsigned TimeTraceGranularity, + StringRef ProgName) { assert(TimeTraceProfilerInstance == nullptr && "Profiler should not be initialized"); - TimeTraceProfilerInstance = new TimeTraceProfiler(); + TimeTraceProfilerInstance = + new TimeTraceProfiler(llvm::sys::path::filename(ProgName)); TimeTraceProfilerInstance->TimeTraceGranularity = TimeTraceGranularity; }