diff --git a/lldb/include/lldb/Target/TraceCursor.h b/lldb/include/lldb/Target/TraceCursor.h --- a/lldb/include/lldb/Target/TraceCursor.h +++ b/lldb/include/lldb/Target/TraceCursor.h @@ -215,7 +215,7 @@ /// of this cursor. ExecutionContextRef &GetExecutionContextRef(); - /// Instruction, event or error information + /// Trace item information (instructions, errors and events) /// \{ /// \return @@ -255,27 +255,35 @@ /// The load address of the instruction the cursor is pointing at. virtual lldb::addr_t GetLoadAddress() const = 0; - /// Get the hardware counter of a given type associated with the current - /// instruction. Each architecture might support different counters. It might - /// happen that only some instructions of an entire trace have a given counter - /// associated with them. - /// - /// \param[in] counter_type - /// The counter type. - /// \return - /// The value of the counter or \b llvm::None if not available. - virtual llvm::Optional - GetCounter(lldb::TraceCounter counter_type) const = 0; - /// Get the CPU associated with the current trace item. /// /// This call might not be O(1), so it's suggested to invoke this method - /// whenever a cpu change event is fired. + /// whenever an eTraceEventCPUChanged event is fired. /// /// \return /// The requested CPU id, or \a llvm::None if this information is /// not available for the current item. virtual llvm::Optional GetCPU() const = 0; + + /// Get the last hardware clock value that was emitted before the current + /// trace item. + /// + /// This call might not be O(1), so it's suggested to invoke this method + /// whenever an eTraceEventHWClockTick event is fired. + /// + /// \return + /// The requested HW clock value, or \a llvm::None if this information is + /// not available for the current item. + virtual llvm::Optional GetHWClock() const = 0; + + /// Get the approximate wall clock time in nanoseconds at which the current + /// trace item was executed. Each trace plug-in has a different definition for + /// what time 0 means. + /// + /// \return + /// The approximate wall clock time for the trace item, or \a llvm::None + /// if not available. + virtual llvm::Optional GetWallClockTime() const = 0; /// \} protected: diff --git a/lldb/include/lldb/Target/TraceDumper.h b/lldb/include/lldb/Target/TraceDumper.h --- a/lldb/include/lldb/Target/TraceDumper.h +++ b/lldb/include/lldb/Target/TraceDumper.h @@ -29,9 +29,9 @@ bool json = false; /// When dumping in JSON format, pretty print the output. bool pretty_print_json = false; - /// For each instruction, print the corresponding timestamp counter if + /// For each trace item, print the corresponding timestamp in nanoseconds if /// available. - bool show_tsc = false; + bool show_timestamps = false; /// Dump the events that happened between instructions. bool show_events = false; /// For each instruction, print the instruction kind. @@ -61,7 +61,8 @@ struct TraceItem { lldb::user_id_t id; lldb::addr_t load_address; - llvm::Optional tsc; + llvm::Optional timestamp; + llvm::Optional hw_clock; llvm::Optional error; llvm::Optional event; llvm::Optional symbol_info; diff --git a/lldb/include/lldb/Utility/TraceIntelPTGDBRemotePackets.h b/lldb/include/lldb/Utility/TraceIntelPTGDBRemotePackets.h --- a/lldb/include/lldb/Utility/TraceIntelPTGDBRemotePackets.h +++ b/lldb/include/lldb/Utility/TraceIntelPTGDBRemotePackets.h @@ -91,8 +91,8 @@ /// nanoseconds) is defined by the kernel at boot time and has no particularly /// useful meaning. On the other hand, this value is constant for an entire /// trace session. - // See 'time_zero' section of - // https://man7.org/linux/man-pages/man2/perf_event_open.2.html + /// See 'time_zero' section of + /// https://man7.org/linux/man-pages/man2/perf_event_open.2.html /// /// \param[in] tsc /// The TSC value to be converted. diff --git a/lldb/include/lldb/lldb-enumerations.h b/lldb/include/lldb/lldb-enumerations.h --- a/lldb/include/lldb/lldb-enumerations.h +++ b/lldb/include/lldb/lldb-enumerations.h @@ -1157,12 +1157,6 @@ eSaveCoreStackOnly = 3, }; -// Type of counter values associated with instructions in a trace. -enum TraceCounter { - // Timestamp counter, like the one offered by Intel CPUs (TSC). - eTraceCounterTSC = 0, -}; - /// Events that might happen during a trace session. enum TraceEvent { /// Tracing was disabled for some time due to a software trigger @@ -1172,6 +1166,8 @@ /// Event due to CPU change for a thread. This event is also fired when /// suddenly it's not possible to identify the cpu of a given thread. eTraceEventCPUChanged, + /// Event due to a CPU HW clock tick + eTraceEventHWClockTick, }; // Enum used to identify which kind of item a \a TraceCursor is pointing at diff --git a/lldb/source/Commands/CommandObjectThread.cpp b/lldb/source/Commands/CommandObjectThread.cpp --- a/lldb/source/Commands/CommandObjectThread.cpp +++ b/lldb/source/Commands/CommandObjectThread.cpp @@ -2179,7 +2179,7 @@ break; } case 't': { - m_dumper_options.show_tsc = true; + m_dumper_options.show_timestamps = true; break; } case 'e': { diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td --- a/lldb/source/Commands/Options.td +++ b/lldb/source/Commands/Options.td @@ -1146,15 +1146,17 @@ Desc<"Dump in simple JSON format.">; def thread_trace_dump_instructions_pretty_print: Option<"pretty-json", "J">, Group<1>, - Desc<"Dump in JSON format but pretty printing the output for easier readability.">; + Desc<"Dump in JSON format but pretty printing the output for easier " + "readability.">; def thread_trace_dump_instructions_show_kind : Option<"kind", "k">, Group<1>, Desc<"Show instruction control flow kind. Refer to the enum " "`InstructionControlFlowKind` for a list of control flow kind. " "As an important note, far jumps, far calls and far returns often indicate " "calls to and from kernel.">; - def thread_trace_dump_instructions_show_tsc : Option<"tsc", "t">, Group<1>, - Desc<"For each instruction, print the corresponding timestamp counter if " - "available.">; + def thread_trace_dump_instructions_show_timestamps: Option<"time", "t">, + Group<1>, + Desc<"For each trace item, print the corresponding wall clock timestamp " + "if available.">; def thread_trace_dump_instructions_show_events : Option<"events", "e">, Group<1>, Desc<"Dump the events that happened during the execution of the target.">; diff --git a/lldb/source/Plugins/Trace/intel-pt/DecodedThread.h b/lldb/source/Plugins/Trace/intel-pt/DecodedThread.h --- a/lldb/source/Plugins/Trace/intel-pt/DecodedThread.h +++ b/lldb/source/Plugins/Trace/intel-pt/DecodedThread.h @@ -71,45 +71,7 @@ /// stopped at. See \a Trace::GetCursorPosition for more information. class DecodedThread : public std::enable_shared_from_this { public: - /// \class TscRange - /// Class that represents the trace range associated with a given TSC. - /// It provides efficient iteration to the previous or next TSC range in the - /// decoded trace. - /// - /// TSC timestamps are emitted by the decoder infrequently, which means - /// that each TSC covers a range of instruction indices, which can be used to - /// speed up TSC lookups. - class TscRange { - public: - /// Check if this TSC range includes the given instruction index. - bool InRange(size_t insn_index) const; - - /// Get the next range chronologically. - llvm::Optional Next() const; - - /// Get the previous range chronologically. - llvm::Optional Prev() const; - - /// Get the TSC value. - size_t GetTsc() const; - /// Get the smallest instruction index that has this TSC. - size_t GetStartInstructionIndex() const; - /// Get the largest instruction index that has this TSC. - size_t GetEndInstructionIndex() const; - - private: - friend class DecodedThread; - - TscRange(std::map::const_iterator it, - const DecodedThread &decoded_thread); - - /// The iterator pointing to the beginning of the range. - std::map::const_iterator m_it; - /// The largest instruction index that has this TSC. - size_t m_end_index; - - const DecodedThread *m_decoded_thread; - }; + using TSC = uint64_t; // Struct holding counts for libipts errors; struct LibiptErrorsStats { @@ -120,6 +82,34 @@ void RecordError(int libipt_error_code); }; + struct TSCRange { + TSC tsc; + uint64_t items_count; + uint64_t first_item_index; + + /// \return + /// \b true if and only if the given \p item_index is covered by this + /// range. + bool InRange(uint64_t item_index) const; + }; + + struct NanosecondsRange { + uint64_t nanos; + TSC tsc; + NanosecondsRange *next_range; + uint64_t items_count; + uint64_t first_item_index; + + double + GetInterpolatedTime(uint64_t item_index, uint64_t begin_of_time_nanos, + const LinuxPerfZeroTscConversion &tsc_conversion) const; + + /// \return + /// \b true if and only if the given \p item_index is covered by this + /// range. + bool InRange(uint64_t item_index) const; + }; + // Struct holding counts for events; struct EventsStats { /// A count for each individual event kind. We use an unordered map instead @@ -130,39 +120,21 @@ void RecordEvent(lldb::TraceEvent event); }; - DecodedThread(lldb::ThreadSP thread_sp); - - /// Utility constructor that initializes the trace with a provided error. - DecodedThread(lldb::ThreadSP thread_sp, llvm::Error &&err); + DecodedThread( + lldb::ThreadSP thread_sp, + const llvm::Optional &tsc_conversion); /// Get the total number of instruction, errors and events from the decoded /// trace. - int64_t GetItemsCount() const; - - /// Construct the TSC range that covers the given instruction index. - /// This operation is O(logn) and should be used sparingly. - /// If the trace was collected with TSC support, all the instructions of - /// the trace will have associated TSCs. This means that this method will - /// only return \b llvm::None if there are no TSCs whatsoever in the trace. - /// - /// \param[in] insn_index - /// The instruction index in question. - /// - /// \param[in] hint_range - /// An optional range that might include the given index or might be a - /// neighbor of it. It might help speed it traversals of the trace with - /// short jumps. - llvm::Optional CalculateTscRange( - size_t insn_index, - const llvm::Optional &hint_range) const; + uint64_t GetItemsCount() const; /// \return /// The error associated with a given trace item. - const char *GetErrorByIndex(size_t item_index) const; + const char *GetErrorByIndex(uint64_t item_index) const; /// \return /// The trace item kind given an item index. - lldb::TraceItemKind GetItemKindByIndex(size_t item_index) const; + lldb::TraceItemKind GetItemKindByIndex(uint64_t item_index) const; /// \return /// The underlying event type for the given trace item index. @@ -177,12 +149,31 @@ /// The requested cpu id, or \a llvm::None if not available. llvm::Optional GetCPUByIndex(uint64_t item_index) const; + /// Get a maximal range of trace items that include the given \p item_index + /// that have the same TSC value. + /// + /// \param[in] item_index + /// The trace item index to compare with. + /// /// \return - /// The load address of the instruction at the given index. - lldb::addr_t GetInstructionLoadAddress(size_t item_index) const; + /// The requested TSC range, or \a llvm::None if not available. + llvm::Optional + GetTSCRangeByIndex(uint64_t item_index) const; + + /// Get a maximal range of trace items that include the given \p item_index + /// that have the same nanoseconds timestamp without interpolation. + /// + /// \param[in] item_index + /// The trace item index to compare with. + /// + /// \return + /// The requested nanoseconds range, or \a llvm::None if not available. + llvm::Optional + GetNanosecondsRangeByIndex(uint64_t item_index); - /// Get a new cursor for the decoded thread. - lldb::TraceCursorUP CreateNewCursor(); + /// \return + /// The load address of the instruction at the given index. + lldb::addr_t GetInstructionLoadAddress(uint64_t item_index) const; /// Return an object with statistics of the TSC decoding errors that happened. /// A TSC error is not a fatal error and doesn't create gaps in the trace. @@ -214,7 +205,7 @@ /// Notify this object that a new tsc has been seen. /// If this a new TSC, an event will be created. - void NotifyTsc(uint64_t tsc); + void NotifyTsc(TSC tsc); /// Notify this object that a CPU has been seen. /// If this a new CPU, an event will be created. @@ -262,15 +253,22 @@ /// it in TraceItemStorage to avoid padding. std::vector m_item_kinds; - /// This map contains the TSCs of the decoded instructions. It maps - /// `instruction index -> TSC`, where `instruction index` is the first index - /// at which the mapped TSC appears. We use this representation because TSCs - /// are sporadic and we can think of them as ranges. If TSCs are present in - /// the trace, all instructions will have an associated TSC, including the - /// first one. Otherwise, this map will be empty. - std::map m_timestamps; + /// This map contains the TSCs of the decoded trace items. It maps + /// `item index -> TSC`, where `item index` is the first index + /// at which the mapped TSC first appears. We use this representation because + /// TSCs are sporadic and we can think of them as ranges. + std::map m_tscs; /// This is the chronologically last TSC that has been added. - llvm::Optional m_last_tsc = llvm::None; + llvm::Optional::iterator> m_last_tsc = + llvm::None; + /// This map contains the non-interpolated nanoseconds timestamps of the + /// decoded trace items. It maps `item index -> nanoseconds`, where `item + /// index` is the first index at which the mapped nanoseconds first appears. + /// We use this representation because timestamps are sporadic and we think of + /// them as ranges. + std::map m_nanoseconds; + llvm::Optional::iterator> + m_last_nanoseconds = llvm::None; // The cpu information is stored as a map. It maps `instruction index -> CPU` // A CPU is associated with the next instructions that follow until the next @@ -279,6 +277,9 @@ /// This is the chronologically last CPU ID. llvm::Optional m_last_cpu = llvm::None; + /// TSC -> nanos conversion utility. + llvm::Optional m_tsc_conversion; + /// Statistics of all tracing events. EventsStats m_events_stats; /// Statistics of libipt errors when decoding TSCs. diff --git a/lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp b/lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp --- a/lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp @@ -44,11 +44,65 @@ OS << formatv(": {0:x+16}", m_address); } -int64_t DecodedThread::GetItemsCount() const { - return static_cast(m_item_kinds.size()); +bool DecodedThread::TSCRange::InRange(uint64_t item_index) const { + return item_index >= first_item_index && + item_index < first_item_index + items_count; +} + +bool DecodedThread::NanosecondsRange::InRange(uint64_t item_index) const { + return item_index >= first_item_index && + item_index < first_item_index + items_count; +} + +double DecodedThread::NanosecondsRange::GetInterpolatedTime( + uint64_t item_index, uint64_t begin_of_time_nanos, + const LinuxPerfZeroTscConversion &tsc_conversion) const { + uint64_t items_since_last_tsc = item_index - first_item_index; + + auto interpolate = [&](uint64_t next_range_start_ns) { + if (next_range_start_ns == nanos) { + // If the resolution of the conversion formula is bad enough to consider + // these two timestamps as equal, then we just increase the next one by 1 + // for correction + next_range_start_ns++; + } + long double item_duration = + static_cast(items_count) / (next_range_start_ns - nanos); + return (nanos - begin_of_time_nanos) + items_since_last_tsc * item_duration; + }; + + if (!next_range) { + // If this is the last TSC range, so we have to extrapolate. In this case, + // we assume that each instruction took one TSC, which is what an + // instruction would take if no parallelism is achieved and the frequency + // multiplier is 1. + return interpolate(tsc_conversion.ToNanos(tsc + items_count)); + } + if (items_count < (next_range->tsc - tsc)) { + // If the numbers of items in this range is less than the total TSC duration + // of this range, i.e. each instruction taking longer than 1 TSC, then we + // can assume that something else happened between these TSCs (e.g. a + // context switch, change to kernel, decoding errors, etc). In this case, we + // also assume that each instruction took 1 TSC. A proper way to improve + // this would be to analize the next events in the trace looking for context + // switches or trace disablement events, but for now, as we only want an + // approximation, we keep it simple. We are also guaranteed that the time in + // nanos of the next range is different to the current one, just because of + // the definition of a NanosecondsRange. + return interpolate( + std::min(tsc_conversion.ToNanos(tsc + items_count), next_range->nanos)); + } + + // In this case, each item took less than 1 TSC, so some parallelism was + // achieved, which is an indication that we didn't suffered of any kind of + // interruption. + return interpolate(next_range->nanos); } -lldb::addr_t DecodedThread::GetInstructionLoadAddress(size_t item_index) const { +uint64_t DecodedThread::GetItemsCount() const { return m_item_kinds.size(); } + +lldb::addr_t +DecodedThread::GetInstructionLoadAddress(uint64_t item_index) const { return m_item_data[item_index].load_address; } @@ -58,33 +112,69 @@ DecodedThread::CreateNewTraceItem(lldb::TraceItemKind kind) { m_item_kinds.push_back(kind); m_item_data.emplace_back(); + if (m_last_tsc) + (*m_last_tsc)->second.items_count++; + if (m_last_nanoseconds) + (*m_last_nanoseconds)->second.items_count++; return m_item_data.back(); } -void DecodedThread::NotifyTsc(uint64_t tsc) { - if (!m_last_tsc || *m_last_tsc != tsc) { - m_timestamps.emplace(m_item_kinds.size(), tsc); - m_last_tsc = tsc; +void DecodedThread::NotifyTsc(TSC tsc) { + if (m_last_tsc && (*m_last_tsc)->second.tsc == tsc) + return; + + m_last_tsc = + m_tscs.emplace(GetItemsCount(), TSCRange{tsc, 0, GetItemsCount()}).first; + + if (m_tsc_conversion) { + uint64_t nanos = m_tsc_conversion->ToNanos(tsc); + if (!m_last_nanoseconds || (*m_last_nanoseconds)->second.nanos != nanos) { + m_last_nanoseconds = + m_nanoseconds + .emplace(GetItemsCount(), NanosecondsRange{nanos, tsc, nullptr, 0, + GetItemsCount()}) + .first; + if (*m_last_nanoseconds != m_nanoseconds.begin()) { + auto prev_range = prev(*m_last_nanoseconds); + prev_range->second.next_range = &(*m_last_nanoseconds)->second; + } + } } + AppendEvent(lldb::eTraceEventHWClockTick); } void DecodedThread::NotifyCPU(lldb::cpu_id_t cpu_id) { if (!m_last_cpu || *m_last_cpu != cpu_id) { - m_cpus.emplace(m_item_kinds.size(), cpu_id); + m_cpus.emplace(GetItemsCount(), cpu_id); m_last_cpu = cpu_id; AppendEvent(lldb::eTraceEventCPUChanged); } } Optional -DecodedThread::GetCPUByIndex(uint64_t insn_index) const { - // Could possibly optimize the search - auto it = m_cpus.upper_bound(insn_index); +DecodedThread::GetCPUByIndex(uint64_t item_index) const { + auto it = m_cpus.upper_bound(item_index); if (it == m_cpus.begin()) return None; return prev(it)->second; } +Optional +DecodedThread::GetTSCRangeByIndex(uint64_t item_index) const { + auto next_it = m_tscs.upper_bound(item_index); + if (next_it == m_tscs.begin()) + return None; + return prev(next_it)->second; +} + +Optional +DecodedThread::GetNanosecondsRangeByIndex(uint64_t item_index) { + auto next_it = m_nanoseconds.upper_bound(item_index); + if (next_it == m_nanoseconds.begin()) + return None; + return prev(next_it)->second; +} + void DecodedThread::AppendEvent(lldb::TraceEvent event) { CreateNewTraceItem(lldb::eTraceItemKindEvent).event = event; m_events_stats.RecordEvent(event); @@ -134,90 +224,24 @@ total_count++; } -Optional DecodedThread::CalculateTscRange( - size_t insn_index, - const Optional &hint_range) const { - // We first try to check the given hint range in case we are traversing the - // trace in short jumps. If that fails, then we do the more expensive - // arbitrary lookup. - if (hint_range) { - Optional candidate_range; - if (insn_index < hint_range->GetStartInstructionIndex()) - candidate_range = hint_range->Prev(); - else if (insn_index > hint_range->GetEndInstructionIndex()) - candidate_range = hint_range->Next(); - else - candidate_range = hint_range; - - if (candidate_range && candidate_range->InRange(insn_index)) - return candidate_range; - } - // Now we do a more expensive lookup - auto it = m_timestamps.upper_bound(insn_index); - if (it == m_timestamps.begin()) - return None; - - return TscRange(--it, *this); -} - -lldb::TraceItemKind DecodedThread::GetItemKindByIndex(size_t item_index) const { +lldb::TraceItemKind +DecodedThread::GetItemKindByIndex(uint64_t item_index) const { return static_cast(m_item_kinds[item_index]); } -const char *DecodedThread::GetErrorByIndex(size_t item_index) const { +const char *DecodedThread::GetErrorByIndex(uint64_t item_index) const { return m_item_data[item_index].error; } -DecodedThread::DecodedThread(ThreadSP thread_sp) : m_thread_sp(thread_sp) {} - -lldb::TraceCursorUP DecodedThread::CreateNewCursor() { - return std::make_unique(m_thread_sp, shared_from_this()); -} +DecodedThread::DecodedThread( + ThreadSP thread_sp, + const llvm::Optional &tsc_conversion) + : m_thread_sp(thread_sp), m_tsc_conversion(tsc_conversion) {} size_t DecodedThread::CalculateApproximateMemoryUsage() const { return sizeof(TraceItemStorage) * m_item_data.size() + sizeof(uint8_t) * m_item_kinds.size() + - (sizeof(size_t) + sizeof(uint64_t)) * m_timestamps.size() + - (sizeof(size_t) + sizeof(lldb::cpu_id_t)) * m_cpus.size(); -} - -DecodedThread::TscRange::TscRange(std::map::const_iterator it, - const DecodedThread &decoded_thread) - : m_it(it), m_decoded_thread(&decoded_thread) { - auto next_it = m_it; - ++next_it; - m_end_index = (next_it == m_decoded_thread->m_timestamps.end()) - ? std::numeric_limits::max() - : next_it->first - 1; -} - -size_t DecodedThread::TscRange::GetTsc() const { return m_it->second; } - -size_t DecodedThread::TscRange::GetStartInstructionIndex() const { - return m_it->first; -} - -size_t DecodedThread::TscRange::GetEndInstructionIndex() const { - return m_end_index; -} - -bool DecodedThread::TscRange::InRange(size_t insn_index) const { - return GetStartInstructionIndex() <= insn_index && - insn_index <= GetEndInstructionIndex(); -} - -Optional DecodedThread::TscRange::Next() const { - auto next_it = m_it; - ++next_it; - if (next_it == m_decoded_thread->m_timestamps.end()) - return None; - return TscRange(next_it, *m_decoded_thread); -} - -Optional DecodedThread::TscRange::Prev() const { - if (m_it == m_decoded_thread->m_timestamps.begin()) - return None; - auto prev_it = m_it; - --prev_it; - return TscRange(prev_it, *m_decoded_thread); + (sizeof(uint64_t) + sizeof(TSC)) * m_tscs.size() + + (sizeof(uint64_t) + sizeof(uint64_t)) * m_nanoseconds.size() + + (sizeof(uint64_t) + sizeof(lldb::cpu_id_t)) * m_cpus.size(); } diff --git a/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.h b/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.h --- a/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.h +++ b/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.h @@ -94,6 +94,16 @@ SplitTraceInContinuousExecutions(TraceIntelPT &trace_intel_pt, llvm::ArrayRef buffer); +/// Find the lowest TSC in the given trace. +/// +/// \return +/// The lowest TSC value in this trace if available, \a llvm::None if the +/// trace is empty or the trace contains no timing information, or an \a +/// llvm::Error if it was not possible to set up the decoder. +llvm::Expected> +FindLowestTSCInTrace(TraceIntelPT &trace_intel_pt, + llvm::ArrayRef buffer); + } // namespace trace_intel_pt } // namespace lldb_private diff --git a/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.cpp b/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.cpp --- a/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.cpp @@ -285,6 +285,7 @@ for (size_t i = 0; i < executions.size(); i++) { const IntelPTThreadContinousExecution &execution = executions[i]; + decoded_thread.NotifyTsc(execution.thread_execution.GetLowestKnownTSC()); decoded_thread.NotifyCPU(execution.thread_execution.cpu_id); auto variant = execution.thread_execution.variant; @@ -380,3 +381,22 @@ } return executions; } + +Expected> +lldb_private::trace_intel_pt::FindLowestTSCInTrace(TraceIntelPT &trace_intel_pt, + ArrayRef buffer) { + Expected decoder_up = + CreateInstructionDecoder(trace_intel_pt, buffer); + if (!decoder_up) + return decoder_up.takeError(); + + pt_insn_decoder *decoder = decoder_up.get().get(); + int status = pte_ok; + if (IsLibiptError(status = pt_insn_sync_forward(decoder))) + return None; + + uint64_t tsc; + if (IsLibiptError(pt_insn_time(decoder, &tsc, nullptr, nullptr))) + return None; + return tsc; +} diff --git a/lldb/source/Plugins/Trace/intel-pt/ThreadDecoder.h b/lldb/source/Plugins/Trace/intel-pt/ThreadDecoder.h --- a/lldb/source/Plugins/Trace/intel-pt/ThreadDecoder.h +++ b/lldb/source/Plugins/Trace/intel-pt/ThreadDecoder.h @@ -36,6 +36,12 @@ /// A \a DecodedThread instance. llvm::Expected Decode(); + /// \return + /// The lowest TSC value in this trace if available, \a llvm::None if the + /// trace is empty or the trace contains no timing information, or an \a + /// llvm::Error if it was not possible to set up the decoder. + llvm::Expected> FindLowestTSC(); + ThreadDecoder(const ThreadDecoder &other) = delete; ThreadDecoder &operator=(const ThreadDecoder &other) = delete; diff --git a/lldb/source/Plugins/Trace/intel-pt/ThreadDecoder.cpp b/lldb/source/Plugins/Trace/intel-pt/ThreadDecoder.cpp --- a/lldb/source/Plugins/Trace/intel-pt/ThreadDecoder.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/ThreadDecoder.cpp @@ -23,6 +23,21 @@ ThreadDecoder::ThreadDecoder(const ThreadSP &thread_sp, TraceIntelPT &trace) : m_thread_sp(thread_sp), m_trace(trace) {} +Expected> ThreadDecoder::FindLowestTSC() { + Optional lowest_tsc; + Error err = m_trace.OnThreadBufferRead( + m_thread_sp->GetID(), [&](llvm::ArrayRef data) -> llvm::Error { + Expected> tsc = FindLowestTSCInTrace(m_trace, data); + if (!tsc) + return tsc.takeError(); + lowest_tsc = *tsc; + return Error::success(); + }); + if (err) + return std::move(err); + return lowest_tsc; +} + Expected ThreadDecoder::Decode() { if (!m_decoded_thread.hasValue()) { if (Expected decoded_thread = DoDecode()) { @@ -38,8 +53,8 @@ return m_trace.GetThreadTimer(m_thread_sp->GetID()) .TimeTask( "Decoding instructions", [&]() -> Expected { - DecodedThreadSP decoded_thread_sp = - std::make_shared(m_thread_sp); + DecodedThreadSP decoded_thread_sp = std::make_shared( + m_thread_sp, m_trace.GetPerfZeroTscConversion()); Error err = m_trace.OnThreadBufferRead( m_thread_sp->GetID(), [&](llvm::ArrayRef data) { diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.h b/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.h --- a/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.h +++ b/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.h @@ -16,8 +16,10 @@ class TraceCursorIntelPT : public TraceCursor { public: - TraceCursorIntelPT(lldb::ThreadSP thread_sp, - DecodedThreadSP decoded_thread_sp); + TraceCursorIntelPT( + lldb::ThreadSP thread_sp, DecodedThreadSP decoded_thread_sp, + const llvm::Optional &tsc_conversion, + llvm::Optional beginning_of_time_nanos); bool Seek(int64_t offset, SeekType origin) override; @@ -29,13 +31,12 @@ lldb::addr_t GetLoadAddress() const override; - llvm::Optional - GetCounter(lldb::TraceCounter counter_type) const override; - lldb::TraceEvent GetEventType() const override; llvm::Optional GetCPU() const override; + llvm::Optional GetHWClock() const override; + lldb::TraceItemKind GetItemKind() const override; bool GoToId(lldb::user_id_t id) override; @@ -44,16 +45,43 @@ bool HasId(lldb::user_id_t id) const override; + llvm::Optional GetWallClockTime() const override; + private: - /// Calculate the tsc range for the current position if needed. - void CalculateTscRange(); + /// Clear the current TSC and nanoseconds ranges if after moving they are not + /// valid anymore. + void ClearTimingRangesIfInvalid(); + + /// Get or calculate the TSC range that includes the current trace item. + const llvm::Optional &GetTSCRange() const; + + /// Get or calculate the TSC range that includes the current trace item. + const llvm::Optional & + GetNanosecondsRange() const; /// Storage of the actual instructions DecodedThreadSP m_decoded_thread_sp; /// Internal instruction index currently pointing at. int64_t m_pos; - /// Tsc range covering the current instruction. - llvm::Optional m_tsc_range; + + /// Timing information and cached values. + /// \{ + + /// TSC -> nanos conversion utility. \a None if not available at all. + llvm::Optional m_tsc_conversion; + /// Lowest nanoseconds timestamp seen in any thread trace, \a None if not + /// available at all. + llvm::Optional m_beginning_of_time_nanos; + /// Range of trace items with the same TSC that includes the current trace + /// item, \a None if not calculated or not available. + llvm::Optional mutable m_tsc_range; + bool mutable m_tsc_range_calculated = false; + /// Range of trace items with the same non-interpolated timestamps in + /// nanoseconds that includes the current trace item, \a None if not + /// calculated or not available. + llvm::Optional mutable m_nanoseconds_range; + bool mutable m_nanoseconds_range_calculated = false; + /// \} }; } // namespace trace_intel_pt diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.cpp b/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.cpp --- a/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.cpp @@ -17,28 +17,49 @@ using namespace lldb_private::trace_intel_pt; using namespace llvm; -TraceCursorIntelPT::TraceCursorIntelPT(ThreadSP thread_sp, - DecodedThreadSP decoded_thread_sp) - : TraceCursor(thread_sp), m_decoded_thread_sp(decoded_thread_sp) { +TraceCursorIntelPT::TraceCursorIntelPT( + ThreadSP thread_sp, DecodedThreadSP decoded_thread_sp, + const Optional &tsc_conversion, + Optional beginning_of_time_nanos) + : TraceCursor(thread_sp), m_decoded_thread_sp(decoded_thread_sp), + m_tsc_conversion(tsc_conversion), + m_beginning_of_time_nanos(beginning_of_time_nanos) { Seek(0, SeekType::End); } -void TraceCursorIntelPT::CalculateTscRange() { - // If we failed, then we look for the exact range - if (!m_tsc_range || !m_tsc_range->InRange(m_pos)) - m_tsc_range = m_decoded_thread_sp->CalculateTscRange( - m_pos, /*hit_range=*/m_tsc_range); -} - void TraceCursorIntelPT::Next() { m_pos += IsForwards() ? 1 : -1; + ClearTimingRangesIfInvalid(); +} - // We try to go to a neighbor tsc range that might contain the current pos - if (m_tsc_range && !m_tsc_range->InRange(m_pos)) - m_tsc_range = IsForwards() ? m_tsc_range->Next() : m_tsc_range->Prev(); +void TraceCursorIntelPT::ClearTimingRangesIfInvalid() { + if (m_tsc_range && !m_tsc_range->InRange(m_pos)) { + m_tsc_range = None; + m_tsc_range_calculated = false; + } + if (m_nanoseconds_range && !m_nanoseconds_range->InRange(m_pos)) { + m_nanoseconds_range = None; + m_nanoseconds_range_calculated = false; + } +} - // If we failed, this call will fix it - CalculateTscRange(); +const Optional & +TraceCursorIntelPT::GetTSCRange() const { + if (!m_tsc_range_calculated) { + m_tsc_range_calculated = true; + m_tsc_range = m_decoded_thread_sp->GetTSCRangeByIndex(m_pos); + } + return m_tsc_range; +} + +const Optional & +TraceCursorIntelPT::GetNanosecondsRange() const { + if (!m_nanoseconds_range_calculated) { + m_nanoseconds_range_calculated = true; + m_nanoseconds_range = + m_decoded_thread_sp->GetNanosecondsRangeByIndex(m_pos); + } + return m_nanoseconds_range; } bool TraceCursorIntelPT::Seek(int64_t offset, SeekType origin) { @@ -52,13 +73,15 @@ case TraceCursor::SeekType::Current: m_pos += offset; } - CalculateTscRange(); + + ClearTimingRangesIfInvalid(); return HasValue(); } bool TraceCursorIntelPT::HasValue() const { - return m_pos >= 0 && m_pos < m_decoded_thread_sp->GetItemsCount(); + return m_pos >= 0 && + static_cast(m_pos) < m_decoded_thread_sp->GetItemsCount(); } lldb::TraceItemKind TraceCursorIntelPT::GetItemKind() const { @@ -73,15 +96,18 @@ return m_decoded_thread_sp->GetInstructionLoadAddress(m_pos); } -Optional -TraceCursorIntelPT::GetCounter(lldb::TraceCounter counter_type) const { - switch (counter_type) { - case lldb::eTraceCounterTSC: - if (m_tsc_range) - return m_tsc_range->GetTsc(); - else - return llvm::None; - } +Optional TraceCursorIntelPT::GetHWClock() const { + if (const Optional &range = GetTSCRange()) + return range->tsc; + return None; +} + +Optional TraceCursorIntelPT::GetWallClockTime() const { + if (const Optional &range = + GetNanosecondsRange()) + return range->GetInterpolatedTime(m_pos, *m_beginning_of_time_nanos, + *m_tsc_conversion); + return None; } Optional TraceCursorIntelPT::GetCPU() const { @@ -96,13 +122,11 @@ if (!HasId(id)) return false; m_pos = id; - m_tsc_range = m_decoded_thread_sp->CalculateTscRange(m_pos, m_tsc_range); - return true; } bool TraceCursorIntelPT::HasId(lldb::user_id_t id) const { - return static_cast(id) < m_decoded_thread_sp->GetItemsCount(); + return id < m_decoded_thread_sp->GetItemsCount(); } user_id_t TraceCursorIntelPT::GetId() const { return m_pos; } diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h --- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h +++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h @@ -220,6 +220,13 @@ /// returned if the decoder couldn't be properly set up. llvm::Expected Decode(Thread &thread); + /// \return + /// The lowest timestamp in nanoseconds in all traces if available, \a + /// llvm::None if all the traces were empty or no trace contained no + /// timing information, or an \a llvm::Error if it was not possible to set + /// up the decoder for some trace. + llvm::Expected> FindBeginningOfTimeNanos(); + // Dump out trace info in JSON format void DumpTraceInfoAsJson(Thread &thread, Stream &s, bool verbose); @@ -236,6 +243,8 @@ /// It is provided by either a trace bundle or a live process to convert TSC /// counters to and from nanos. It might not be available on all hosts. llvm::Optional tsc_conversion; + llvm::Optional beginning_of_time_nanos; + bool beginning_of_time_nanos_calculated = false; } m_storage; /// It is provided by either a trace bundle or a live process' "cpuInfo" diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp --- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp @@ -8,6 +8,8 @@ #include "TraceIntelPT.h" +#include "TraceCursorIntelPT.h" + #include "../common/ThreadPostMortemTrace.h" #include "CommandObjectTraceStartIntelPT.h" #include "DecodedThread.h" @@ -138,11 +140,53 @@ return it->second->Decode(); } +Expected> TraceIntelPT::FindBeginningOfTimeNanos() { + Storage &storage = GetUpdatedStorage(); + if (storage.beginning_of_time_nanos_calculated) + return storage.beginning_of_time_nanos; + storage.beginning_of_time_nanos_calculated = true; + + if (!storage.tsc_conversion) + return None; + + Optional lowest_tsc; + + if (storage.multicpu_decoder) { + if (Expected> tsc = + storage.multicpu_decoder->FindLowestTSC()) { + lowest_tsc = *tsc; + } else { + return tsc.takeError(); + } + } + + for (auto &decoder : storage.thread_decoders) { + Expected> tsc = decoder.second->FindLowestTSC(); + if (!tsc) + return tsc.takeError(); + + if (*tsc && (!lowest_tsc || *lowest_tsc > **tsc)) + lowest_tsc = **tsc; + } + + if (lowest_tsc) { + storage.beginning_of_time_nanos = + storage.tsc_conversion->ToNanos(*lowest_tsc); + } + return storage.beginning_of_time_nanos; +} + llvm::Expected TraceIntelPT::CreateNewCursor(Thread &thread) { - if (Expected decoded_thread = Decode(thread)) - return decoded_thread.get()->CreateNewCursor(); - else + if (Expected decoded_thread = Decode(thread)) { + if (Expected> beginning_of_time = + FindBeginningOfTimeNanos()) + return std::make_unique( + thread.shared_from_this(), *decoded_thread, m_storage.tsc_conversion, + *beginning_of_time); + else + return beginning_of_time.takeError(); + } else return decoded_thread.takeError(); } diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCpuDecoder.h b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCpuDecoder.h --- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCpuDecoder.h +++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCpuDecoder.h @@ -66,6 +66,12 @@ /// The total number of PSB blocks in all cores. size_t GetTotalPSBBlocksCount() const; + /// \return + /// The lowest TSC value in this trace if available, \a llvm::None if the + /// trace is empty or the trace contains no timing information, or an \a + /// llvm::Error if it was not possible to set up the decoder. + llvm::Expected> FindLowestTSC(); + private: /// Traverse the context switch traces and the basic intel pt continuous /// subtraces and produce a list of continuous executions for each process and diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCpuDecoder.cpp b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCpuDecoder.cpp --- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCpuDecoder.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCpuDecoder.cpp @@ -35,6 +35,28 @@ return m_tids.count(tid); } +Expected> TraceIntelPTMultiCpuDecoder::FindLowestTSC() { + Optional lowest_tsc; + TraceIntelPTSP trace_sp = GetTrace(); + + Error err = GetTrace()->OnAllCpusBinaryDataRead( + IntelPTDataKinds::kIptTrace, + [&](const DenseMap> &buffers) -> Error { + for (auto &cpu_id_to_buffer : buffers) { + Expected> tsc = + FindLowestTSCInTrace(*trace_sp, cpu_id_to_buffer.second); + if (!tsc) + return tsc.takeError(); + if (*tsc && (!lowest_tsc || *lowest_tsc > **tsc)) + lowest_tsc = **tsc; + } + return Error::success(); + }); + if (err) + return std::move(err); + return lowest_tsc; +} + Expected TraceIntelPTMultiCpuDecoder::Decode(Thread &thread) { if (Error err = CorrelateContextSwitchesAndIntelPtTraces()) return std::move(err); @@ -48,8 +70,8 @@ if (it != m_decoded_threads.end()) return it->second; - DecodedThreadSP decoded_thread_sp = - std::make_shared(thread.shared_from_this()); + DecodedThreadSP decoded_thread_sp = std::make_shared( + thread.shared_from_this(), trace_sp->GetPerfZeroTscConversion()); Error err = trace_sp->OnAllCpusBinaryDataRead( IntelPTDataKinds::kIptTrace, diff --git a/lldb/source/Target/TraceCursor.cpp b/lldb/source/Target/TraceCursor.cpp --- a/lldb/source/Target/TraceCursor.cpp +++ b/lldb/source/Target/TraceCursor.cpp @@ -50,5 +50,7 @@ return "software disabled tracing"; case lldb::eTraceEventCPUChanged: return "CPU core changed"; + case lldb::eTraceEventHWClockTick: + return "HW clock tick"; } } diff --git a/lldb/source/Target/TraceDumper.cpp b/lldb/source/Target/TraceDumper.cpp --- a/lldb/source/Target/TraceDumper.cpp +++ b/lldb/source/Target/TraceDumper.cpp @@ -128,16 +128,26 @@ m_s.Format(" {0}: ", item.id); - if (m_options.show_tsc) { - m_s.Format("[tsc={0}] ", - item.tsc ? std::to_string(*item.tsc) : "unavailable"); + if (m_options.show_timestamps) { + m_s.Format("[{0}] ", item.timestamp + ? (formatv("{0:2}", *item.timestamp) + " ns") + : "unavailable"); } if (item.event) { m_s << "(event) " << TraceCursor::EventKindToString(*item.event); - if (*item.event == eTraceEventCPUChanged) { + switch (*item.event) { + case eTraceEventCPUChanged: m_s.Format(" [new CPU={0}]", item.cpu_id ? std::to_string(*item.cpu_id) : "unavailable"); + break; + case eTraceEventHWClockTick: + m_s.Format(" [{0}]", item.hw_clock ? std::to_string(*item.hw_clock) + : "unavailable"); + break; + case eTraceEventDisabledHW: + case eTraceEventDisabledSW: + break; } } else if (item.error) { m_s << "(error) " << *item.error; @@ -181,7 +191,8 @@ | { "loadAddress": string decimal, "id": decimal, - "tsc"?: string decimal, + "hwClock"?: string decimal, + "timestamp_ns"?: string decimal, "module"?: string, "symbol"?: string, "line"?: decimal, @@ -202,8 +213,17 @@ void DumpEvent(const TraceDumper::TraceItem &item) { m_j.attribute("event", TraceCursor::EventKindToString(*item.event)); - if (item.event == eTraceEventCPUChanged) + switch (*item.event) { + case eTraceEventCPUChanged: m_j.attribute("cpuId", item.cpu_id); + break; + case eTraceEventHWClockTick: + m_j.attribute("hwClock", item.hw_clock); + break; + case eTraceEventDisabledHW: + case eTraceEventDisabledSW: + break; + } } void DumpInstruction(const TraceDumper::TraceItem &item) { @@ -234,10 +254,11 @@ void TraceItem(const TraceDumper::TraceItem &item) override { m_j.object([&] { m_j.attribute("id", item.id); - if (m_options.show_tsc) - m_j.attribute( - "tsc", - item.tsc ? Optional(std::to_string(*item.tsc)) : None); + if (m_options.show_timestamps) + m_j.attribute("timestamp_ns", item.timestamp + ? Optional( + std::to_string(*item.timestamp)) + : None); if (item.event) { DumpEvent(item); @@ -289,8 +310,8 @@ TraceItem item; item.id = m_cursor_up->GetId(); - if (m_options.show_tsc) - item.tsc = m_cursor_up->GetCounter(lldb::eTraceCounterTSC); + if (m_options.show_timestamps) + item.timestamp = m_cursor_up->GetWallClockTime(); return item; } @@ -366,8 +387,17 @@ if (!m_options.show_events) continue; item.event = m_cursor_up->GetEventType(); - if (*item.event == eTraceEventCPUChanged) + switch (*item.event) { + case eTraceEventCPUChanged: item.cpu_id = m_cursor_up->GetCPU(); + break; + case eTraceEventHWClockTick: + item.hw_clock = m_cursor_up->GetHWClock(); + break; + case eTraceEventDisabledHW: + case eTraceEventDisabledSW: + break; + } } else if (m_cursor_up->IsError()) { item.error = m_cursor_up->GetError(); } else {