diff --git a/lldb/include/lldb/Target/TraceCursor.h b/lldb/include/lldb/Target/TraceCursor.h --- a/lldb/include/lldb/Target/TraceCursor.h +++ b/lldb/include/lldb/Target/TraceCursor.h @@ -215,7 +215,7 @@ /// of this cursor. ExecutionContextRef &GetExecutionContextRef(); - /// Instruction, event or error information + /// Trace item information (instructions, errors and events) /// \{ /// \return @@ -255,27 +255,35 @@ /// The load address of the instruction the cursor is pointing at. virtual lldb::addr_t GetLoadAddress() const = 0; - /// Get the hardware counter of a given type associated with the current - /// instruction. Each architecture might support different counters. It might - /// happen that only some instructions of an entire trace have a given counter - /// associated with them. - /// - /// \param[in] counter_type - /// The counter type. - /// \return - /// The value of the counter or \b llvm::None if not available. - virtual llvm::Optional - GetCounter(lldb::TraceCounter counter_type) const = 0; - /// Get the CPU associated with the current trace item. /// /// This call might not be O(1), so it's suggested to invoke this method - /// whenever a cpu change event is fired. + /// whenever an eTraceEventCPUChanged event is fired. /// /// \return /// The requested CPU id, or \a llvm::None if this information is /// not available for the current item. virtual llvm::Optional GetCPU() const = 0; + + /// Get the last hardware clock value that was emitted before the current + /// trace item. + /// + /// This call might not be O(1), so it's suggested to invoke this method + /// whenever an eTraceEventHWClockTick event is fired. + /// + /// \return + /// The requested HW clock value, or \a llvm::None if this information is + /// not available for the current item. + virtual llvm::Optional GetHWClock() const = 0; + + /// Get the approximate wall clock time in nanoseconds at which the current + /// trace item was executed. Each trace plug-in has a different definition for + /// what time 0 means. + /// + /// \return + /// The approximate wall clock time for the trace item, or \a llvm::None + /// if not available. + virtual llvm::Optional GetWallClockTime() const = 0; /// \} protected: diff --git a/lldb/include/lldb/Target/TraceDumper.h b/lldb/include/lldb/Target/TraceDumper.h --- a/lldb/include/lldb/Target/TraceDumper.h +++ b/lldb/include/lldb/Target/TraceDumper.h @@ -29,9 +29,9 @@ bool json = false; /// When dumping in JSON format, pretty print the output. bool pretty_print_json = false; - /// For each instruction, print the corresponding timestamp counter if + /// For each trace item, print the corresponding timestamp in nanoseconds if /// available. - bool show_tsc = false; + bool show_timestamps = false; /// Dump the events that happened between instructions. bool show_events = false; /// For each instruction, print the instruction kind. @@ -61,7 +61,8 @@ struct TraceItem { lldb::user_id_t id; lldb::addr_t load_address; - llvm::Optional tsc; + llvm::Optional timestamp; + llvm::Optional hw_clock; llvm::Optional error; llvm::Optional event; llvm::Optional symbol_info; diff --git a/lldb/include/lldb/Utility/TraceIntelPTGDBRemotePackets.h b/lldb/include/lldb/Utility/TraceIntelPTGDBRemotePackets.h --- a/lldb/include/lldb/Utility/TraceIntelPTGDBRemotePackets.h +++ b/lldb/include/lldb/Utility/TraceIntelPTGDBRemotePackets.h @@ -91,8 +91,8 @@ /// nanoseconds) is defined by the kernel at boot time and has no particularly /// useful meaning. On the other hand, this value is constant for an entire /// trace session. - // See 'time_zero' section of - // https://man7.org/linux/man-pages/man2/perf_event_open.2.html + /// See 'time_zero' section of + /// https://man7.org/linux/man-pages/man2/perf_event_open.2.html /// /// \param[in] tsc /// The TSC value to be converted. @@ -101,6 +101,10 @@ /// Nanosecond wall time. uint64_t ToNanos(uint64_t tsc) const; + /// Similar to \a ToNanos but capable of returning a floating point number to + /// avoid similar TSCs from returning the same value. + long double ToNanosFloat(long double lowest_nanos, uint64_t tsc) const; + uint64_t ToTSC(uint64_t nanos) const; uint32_t time_mult; diff --git a/lldb/include/lldb/lldb-enumerations.h b/lldb/include/lldb/lldb-enumerations.h --- a/lldb/include/lldb/lldb-enumerations.h +++ b/lldb/include/lldb/lldb-enumerations.h @@ -1157,12 +1157,6 @@ eSaveCoreStackOnly = 3, }; -// Type of counter values associated with instructions in a trace. -enum TraceCounter { - // Timestamp counter, like the one offered by Intel CPUs (TSC). - eTraceCounterTSC = 0, -}; - /// Events that might happen during a trace session. enum TraceEvent { /// Tracing was disabled for some time due to a software trigger @@ -1172,6 +1166,8 @@ /// Event due to CPU change for a thread. This event is also fired when /// suddenly it's not possible to identify the cpu of a given thread. eTraceEventCPUChanged, + /// Event due to a CPU HW clock tick + eTraceEventHWClockTick, }; // Enum used to identify which kind of item a \a TraceCursor is pointing at diff --git a/lldb/source/Commands/CommandObjectThread.cpp b/lldb/source/Commands/CommandObjectThread.cpp --- a/lldb/source/Commands/CommandObjectThread.cpp +++ b/lldb/source/Commands/CommandObjectThread.cpp @@ -2179,7 +2179,7 @@ break; } case 't': { - m_dumper_options.show_tsc = true; + m_dumper_options.show_timestamps = true; break; } case 'e': { diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td --- a/lldb/source/Commands/Options.td +++ b/lldb/source/Commands/Options.td @@ -1146,15 +1146,17 @@ Desc<"Dump in simple JSON format.">; def thread_trace_dump_instructions_pretty_print: Option<"pretty-json", "J">, Group<1>, - Desc<"Dump in JSON format but pretty printing the output for easier readability.">; + Desc<"Dump in JSON format but pretty printing the output for easier " + "readability.">; def thread_trace_dump_instructions_show_kind : Option<"kind", "k">, Group<1>, Desc<"Show instruction control flow kind. Refer to the enum " "`InstructionControlFlowKind` for a list of control flow kind. " "As an important note, far jumps, far calls and far returns often indicate " "calls to and from kernel.">; - def thread_trace_dump_instructions_show_tsc : Option<"tsc", "t">, Group<1>, - Desc<"For each instruction, print the corresponding timestamp counter if " - "available.">; + def thread_trace_dump_instructions_show_timestamps: Option<"time", "t">, + Group<1>, + Desc<"For each trace item, print the corresponding wall clock timestamp " + "if available.">; def thread_trace_dump_instructions_show_events : Option<"events", "e">, Group<1>, Desc<"Dump the events that happened during the execution of the target.">; diff --git a/lldb/source/Plugins/Trace/intel-pt/DecodedThread.h b/lldb/source/Plugins/Trace/intel-pt/DecodedThread.h --- a/lldb/source/Plugins/Trace/intel-pt/DecodedThread.h +++ b/lldb/source/Plugins/Trace/intel-pt/DecodedThread.h @@ -71,45 +71,7 @@ /// stopped at. See \a Trace::GetCursorPosition for more information. class DecodedThread : public std::enable_shared_from_this { public: - /// \class TscRange - /// Class that represents the trace range associated with a given TSC. - /// It provides efficient iteration to the previous or next TSC range in the - /// decoded trace. - /// - /// TSC timestamps are emitted by the decoder infrequently, which means - /// that each TSC covers a range of instruction indices, which can be used to - /// speed up TSC lookups. - class TscRange { - public: - /// Check if this TSC range includes the given instruction index. - bool InRange(size_t insn_index) const; - - /// Get the next range chronologically. - llvm::Optional Next() const; - - /// Get the previous range chronologically. - llvm::Optional Prev() const; - - /// Get the TSC value. - size_t GetTsc() const; - /// Get the smallest instruction index that has this TSC. - size_t GetStartInstructionIndex() const; - /// Get the largest instruction index that has this TSC. - size_t GetEndInstructionIndex() const; - - private: - friend class DecodedThread; - - TscRange(std::map::const_iterator it, - const DecodedThread &decoded_thread); - - /// The iterator pointing to the beginning of the range. - std::map::const_iterator m_it; - /// The largest instruction index that has this TSC. - size_t m_end_index; - - const DecodedThread *m_decoded_thread; - }; + using TSC = uint64_t; // Struct holding counts for libipts errors; struct LibiptErrorsStats { @@ -120,6 +82,20 @@ void RecordError(int libipt_error_code); }; + struct TSCRange { + TSC tsc; + /// The next TSC value in the trace. + llvm::Optional next_tsc; + /// How many trace items share the current TSC value. + uint64_t items_count; + uint64_t first_item_index; + + /// \return + /// \b true if and only if the given \p item_index is covered by this + /// range. + bool InRange(uint64_t item_index); + }; + // Struct holding counts for events; struct EventsStats { /// A count for each individual event kind. We use an unordered map instead @@ -139,30 +115,13 @@ /// trace. int64_t GetItemsCount() const; - /// Construct the TSC range that covers the given instruction index. - /// This operation is O(logn) and should be used sparingly. - /// If the trace was collected with TSC support, all the instructions of - /// the trace will have associated TSCs. This means that this method will - /// only return \b llvm::None if there are no TSCs whatsoever in the trace. - /// - /// \param[in] insn_index - /// The instruction index in question. - /// - /// \param[in] hint_range - /// An optional range that might include the given index or might be a - /// neighbor of it. It might help speed it traversals of the trace with - /// short jumps. - llvm::Optional CalculateTscRange( - size_t insn_index, - const llvm::Optional &hint_range) const; - /// \return /// The error associated with a given trace item. - const char *GetErrorByIndex(size_t item_index) const; + const char *GetErrorByIndex(uint64_t item_index) const; /// \return /// The trace item kind given an item index. - lldb::TraceItemKind GetItemKindByIndex(size_t item_index) const; + lldb::TraceItemKind GetItemKindByIndex(uint64_t item_index) const; /// \return /// The underlying event type for the given trace item index. @@ -177,12 +136,20 @@ /// The requested cpu id, or \a llvm::None if not available. llvm::Optional GetCPUByIndex(uint64_t item_index) const; + /// Get the information of the most recent TSC before or at the given trace + /// item index. + /// + /// \param[in] item_index + /// The trace item index to compare with. + /// /// \return - /// The load address of the instruction at the given index. - lldb::addr_t GetInstructionLoadAddress(size_t item_index) const; + /// The requested TSC info, or \a llvm::None if not available. + llvm::Optional + GetTSCRangeByIndex(uint64_t item_index) const; - /// Get a new cursor for the decoded thread. - lldb::TraceCursorUP CreateNewCursor(); + /// \return + /// The load address of the instruction at the given index. + lldb::addr_t GetInstructionLoadAddress(uint64_t item_index) const; /// Return an object with statistics of the TSC decoding errors that happened. /// A TSC error is not a fatal error and doesn't create gaps in the trace. @@ -214,7 +181,7 @@ /// Notify this object that a new tsc has been seen. /// If this a new TSC, an event will be created. - void NotifyTsc(uint64_t tsc); + void NotifyTsc(TSC tsc); /// Notify this object that a CPU has been seen. /// If this a new CPU, an event will be created. @@ -263,14 +230,12 @@ std::vector m_item_kinds; /// This map contains the TSCs of the decoded instructions. It maps - /// `instruction index -> TSC`, where `instruction index` is the first index + /// `item index -> TSC`, where `item index` is the first index /// at which the mapped TSC appears. We use this representation because TSCs - /// are sporadic and we can think of them as ranges. If TSCs are present in - /// the trace, all instructions will have an associated TSC, including the - /// first one. Otherwise, this map will be empty. - std::map m_timestamps; + /// are sporadic and we can think of them as ranges. + std::map m_tscs; /// This is the chronologically last TSC that has been added. - llvm::Optional m_last_tsc = llvm::None; + llvm::Optional m_last_tsc = llvm::None; // The cpu information is stored as a map. It maps `instruction index -> CPU` // A CPU is associated with the next instructions that follow until the next diff --git a/lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp b/lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp --- a/lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp @@ -44,11 +44,17 @@ OS << formatv(": {0:x+16}", m_address); } +bool DecodedThread::TSCRange::InRange(uint64_t item_index) { + return item_index >= first_item_index && + item_index < first_item_index + items_count; +} + int64_t DecodedThread::GetItemsCount() const { return static_cast(m_item_kinds.size()); } -lldb::addr_t DecodedThread::GetInstructionLoadAddress(size_t item_index) const { +lldb::addr_t +DecodedThread::GetInstructionLoadAddress(uint64_t item_index) const { return m_item_data[item_index].load_address; } @@ -63,28 +69,42 @@ void DecodedThread::NotifyTsc(uint64_t tsc) { if (!m_last_tsc || *m_last_tsc != tsc) { - m_timestamps.emplace(m_item_kinds.size(), tsc); + m_tscs.emplace(GetItemsCount(), tsc); m_last_tsc = tsc; + AppendEvent(lldb::eTraceEventHWClockTick); } } void DecodedThread::NotifyCPU(lldb::cpu_id_t cpu_id) { if (!m_last_cpu || *m_last_cpu != cpu_id) { - m_cpus.emplace(m_item_kinds.size(), cpu_id); + m_cpus.emplace(GetItemsCount(), cpu_id); m_last_cpu = cpu_id; AppendEvent(lldb::eTraceEventCPUChanged); } } Optional -DecodedThread::GetCPUByIndex(uint64_t insn_index) const { - // Could possibly optimize the search - auto it = m_cpus.upper_bound(insn_index); +DecodedThread::GetCPUByIndex(uint64_t item_index) const { + auto it = m_cpus.upper_bound(item_index); if (it == m_cpus.begin()) return None; return prev(it)->second; } +Optional +DecodedThread::GetTSCRangeByIndex(uint64_t item_index) const { + auto next_it = m_tscs.upper_bound(item_index); + if (next_it == m_tscs.begin()) + return None; + auto it = prev(next_it); + if (next_it == m_tscs.end()) + return DecodedThread::TSCRange{it->second, None, + GetItemsCount() - it->first, it->first}; + else + return DecodedThread::TSCRange{it->second, next_it->second, + next_it->first - it->first, it->first}; +} + void DecodedThread::AppendEvent(lldb::TraceEvent event) { CreateNewTraceItem(lldb::eTraceItemKindEvent).event = event; m_events_stats.RecordEvent(event); @@ -134,90 +154,20 @@ total_count++; } -Optional DecodedThread::CalculateTscRange( - size_t insn_index, - const Optional &hint_range) const { - // We first try to check the given hint range in case we are traversing the - // trace in short jumps. If that fails, then we do the more expensive - // arbitrary lookup. - if (hint_range) { - Optional candidate_range; - if (insn_index < hint_range->GetStartInstructionIndex()) - candidate_range = hint_range->Prev(); - else if (insn_index > hint_range->GetEndInstructionIndex()) - candidate_range = hint_range->Next(); - else - candidate_range = hint_range; - - if (candidate_range && candidate_range->InRange(insn_index)) - return candidate_range; - } - // Now we do a more expensive lookup - auto it = m_timestamps.upper_bound(insn_index); - if (it == m_timestamps.begin()) - return None; - - return TscRange(--it, *this); -} - -lldb::TraceItemKind DecodedThread::GetItemKindByIndex(size_t item_index) const { +lldb::TraceItemKind +DecodedThread::GetItemKindByIndex(uint64_t item_index) const { return static_cast(m_item_kinds[item_index]); } -const char *DecodedThread::GetErrorByIndex(size_t item_index) const { +const char *DecodedThread::GetErrorByIndex(uint64_t item_index) const { return m_item_data[item_index].error; } DecodedThread::DecodedThread(ThreadSP thread_sp) : m_thread_sp(thread_sp) {} -lldb::TraceCursorUP DecodedThread::CreateNewCursor() { - return std::make_unique(m_thread_sp, shared_from_this()); -} - size_t DecodedThread::CalculateApproximateMemoryUsage() const { return sizeof(TraceItemStorage) * m_item_data.size() + sizeof(uint8_t) * m_item_kinds.size() + - (sizeof(size_t) + sizeof(uint64_t)) * m_timestamps.size() + + (sizeof(size_t) + sizeof(uint64_t)) * m_tscs.size() + (sizeof(size_t) + sizeof(lldb::cpu_id_t)) * m_cpus.size(); } - -DecodedThread::TscRange::TscRange(std::map::const_iterator it, - const DecodedThread &decoded_thread) - : m_it(it), m_decoded_thread(&decoded_thread) { - auto next_it = m_it; - ++next_it; - m_end_index = (next_it == m_decoded_thread->m_timestamps.end()) - ? std::numeric_limits::max() - : next_it->first - 1; -} - -size_t DecodedThread::TscRange::GetTsc() const { return m_it->second; } - -size_t DecodedThread::TscRange::GetStartInstructionIndex() const { - return m_it->first; -} - -size_t DecodedThread::TscRange::GetEndInstructionIndex() const { - return m_end_index; -} - -bool DecodedThread::TscRange::InRange(size_t insn_index) const { - return GetStartInstructionIndex() <= insn_index && - insn_index <= GetEndInstructionIndex(); -} - -Optional DecodedThread::TscRange::Next() const { - auto next_it = m_it; - ++next_it; - if (next_it == m_decoded_thread->m_timestamps.end()) - return None; - return TscRange(next_it, *m_decoded_thread); -} - -Optional DecodedThread::TscRange::Prev() const { - if (m_it == m_decoded_thread->m_timestamps.begin()) - return None; - auto prev_it = m_it; - --prev_it; - return TscRange(prev_it, *m_decoded_thread); -} diff --git a/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.h b/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.h --- a/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.h +++ b/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.h @@ -94,6 +94,16 @@ SplitTraceInContinuousExecutions(TraceIntelPT &trace_intel_pt, llvm::ArrayRef buffer); +/// Find the lowest TSC in the given trace. +/// +/// \return +/// The lowest TSC value in this trace if available, \a llvm::None if the +/// trace is empty or the trace contains no timing information, or an \a +/// llvm::Error if it was not possible to set up the decoder. +llvm::Expected> +FindLowestTSCInTrace(TraceIntelPT &trace_intel_pt, + llvm::ArrayRef buffer); + } // namespace trace_intel_pt } // namespace lldb_private diff --git a/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.cpp b/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.cpp --- a/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.cpp @@ -285,6 +285,7 @@ for (size_t i = 0; i < executions.size(); i++) { const IntelPTThreadContinousExecution &execution = executions[i]; + decoded_thread.NotifyTsc(execution.thread_execution.GetLowestKnownTSC()); decoded_thread.NotifyCPU(execution.thread_execution.cpu_id); auto variant = execution.thread_execution.variant; @@ -380,3 +381,22 @@ } return executions; } + +Expected> +lldb_private::trace_intel_pt::FindLowestTSCInTrace(TraceIntelPT &trace_intel_pt, + ArrayRef buffer) { + Expected decoder_up = + CreateInstructionDecoder(trace_intel_pt, buffer); + if (!decoder_up) + return decoder_up.takeError(); + + pt_insn_decoder *decoder = decoder_up.get().get(); + int status = pte_ok; + if (IsLibiptError(status = pt_insn_sync_forward(decoder))) + return None; + + uint64_t tsc; + if (IsLibiptError(pt_insn_time(decoder, &tsc, nullptr, nullptr))) + return None; + return tsc; +} diff --git a/lldb/source/Plugins/Trace/intel-pt/ThreadDecoder.h b/lldb/source/Plugins/Trace/intel-pt/ThreadDecoder.h --- a/lldb/source/Plugins/Trace/intel-pt/ThreadDecoder.h +++ b/lldb/source/Plugins/Trace/intel-pt/ThreadDecoder.h @@ -36,6 +36,12 @@ /// A \a DecodedThread instance. llvm::Expected Decode(); + /// \return + /// The lowest TSC value in this trace if available, \a llvm::None if the + /// trace is empty or the trace contains no timing information, or an \a + /// llvm::Error if it was not possible to set up the decoder. + llvm::Expected> FindLowestTSC(); + ThreadDecoder(const ThreadDecoder &other) = delete; ThreadDecoder &operator=(const ThreadDecoder &other) = delete; diff --git a/lldb/source/Plugins/Trace/intel-pt/ThreadDecoder.cpp b/lldb/source/Plugins/Trace/intel-pt/ThreadDecoder.cpp --- a/lldb/source/Plugins/Trace/intel-pt/ThreadDecoder.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/ThreadDecoder.cpp @@ -23,6 +23,21 @@ ThreadDecoder::ThreadDecoder(const ThreadSP &thread_sp, TraceIntelPT &trace) : m_thread_sp(thread_sp), m_trace(trace) {} +Expected> ThreadDecoder::FindLowestTSC() { + Optional lowest_tsc; + Error err = m_trace.OnThreadBufferRead( + m_thread_sp->GetID(), [&](llvm::ArrayRef data) -> llvm::Error { + Expected> tsc = FindLowestTSCInTrace(m_trace, data); + if (!tsc) + return tsc.takeError(); + lowest_tsc = *tsc; + return Error::success(); + }); + if (err) + return std::move(err); + return lowest_tsc; +} + Expected ThreadDecoder::Decode() { if (!m_decoded_thread.hasValue()) { if (Expected decoded_thread = DoDecode()) { diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.h b/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.h --- a/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.h +++ b/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.h @@ -16,8 +16,10 @@ class TraceCursorIntelPT : public TraceCursor { public: - TraceCursorIntelPT(lldb::ThreadSP thread_sp, - DecodedThreadSP decoded_thread_sp); + TraceCursorIntelPT( + lldb::ThreadSP thread_sp, DecodedThreadSP decoded_thread_sp, + const llvm::Optional &tsc_conversion, + llvm::Optional lowest_tsc); bool Seek(int64_t offset, SeekType origin) override; @@ -29,13 +31,12 @@ lldb::addr_t GetLoadAddress() const override; - llvm::Optional - GetCounter(lldb::TraceCounter counter_type) const override; - lldb::TraceEvent GetEventType() const override; llvm::Optional GetCPU() const override; + llvm::Optional GetHWClock() const override; + lldb::TraceItemKind GetItemKind() const override; bool GoToId(lldb::user_id_t id) override; @@ -44,16 +45,25 @@ bool HasId(lldb::user_id_t id) const override; + llvm::Optional GetWallClockTime() const override; + private: - /// Calculate the tsc range for the current position if needed. - void CalculateTscRange(); + /// Clear the current TSC range if after moving it's not valid anymore. + void ClearTSCRangeIfInvalid(); + + llvm::Optional &GetTSCRange() const; /// Storage of the actual instructions DecodedThreadSP m_decoded_thread_sp; /// Internal instruction index currently pointing at. int64_t m_pos; - /// Tsc range covering the current instruction. - llvm::Optional m_tsc_range; + /// Timing information and cached valued + /// \{ + llvm::Optional m_tsc_conversion; + llvm::Optional mutable m_tsc_range; + llvm::Optional m_lowest_tsc; + llvm::Optional mutable m_lowest_nanos; + /// \} }; } // namespace trace_intel_pt diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.cpp b/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.cpp --- a/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.cpp @@ -17,28 +17,30 @@ using namespace lldb_private::trace_intel_pt; using namespace llvm; -TraceCursorIntelPT::TraceCursorIntelPT(ThreadSP thread_sp, - DecodedThreadSP decoded_thread_sp) - : TraceCursor(thread_sp), m_decoded_thread_sp(decoded_thread_sp) { +TraceCursorIntelPT::TraceCursorIntelPT( + ThreadSP thread_sp, DecodedThreadSP decoded_thread_sp, + const Optional &tsc_conversion, + Optional lowest_tsc) + : TraceCursor(thread_sp), m_decoded_thread_sp(decoded_thread_sp), + m_tsc_conversion(tsc_conversion), m_lowest_tsc(lowest_tsc) { Seek(0, SeekType::End); } -void TraceCursorIntelPT::CalculateTscRange() { - // If we failed, then we look for the exact range - if (!m_tsc_range || !m_tsc_range->InRange(m_pos)) - m_tsc_range = m_decoded_thread_sp->CalculateTscRange( - m_pos, /*hit_range=*/m_tsc_range); -} - void TraceCursorIntelPT::Next() { m_pos += IsForwards() ? 1 : -1; + ClearTSCRangeIfInvalid(); +} - // We try to go to a neighbor tsc range that might contain the current pos +void TraceCursorIntelPT::ClearTSCRangeIfInvalid() { if (m_tsc_range && !m_tsc_range->InRange(m_pos)) - m_tsc_range = IsForwards() ? m_tsc_range->Next() : m_tsc_range->Prev(); + m_tsc_range = None; +} - // If we failed, this call will fix it - CalculateTscRange(); +Optional &TraceCursorIntelPT::GetTSCRange() const { + if (!m_tsc_range) + m_tsc_range = m_decoded_thread_sp->GetTSCRangeByIndex(m_pos); + + return m_tsc_range; } bool TraceCursorIntelPT::Seek(int64_t offset, SeekType origin) { @@ -52,7 +54,8 @@ case TraceCursor::SeekType::Current: m_pos += offset; } - CalculateTscRange(); + + ClearTSCRangeIfInvalid(); return HasValue(); } @@ -73,15 +76,55 @@ return m_decoded_thread_sp->GetInstructionLoadAddress(m_pos); } -Optional -TraceCursorIntelPT::GetCounter(lldb::TraceCounter counter_type) const { - switch (counter_type) { - case lldb::eTraceCounterTSC: - if (m_tsc_range) - return m_tsc_range->GetTsc(); - else - return llvm::None; +Optional TraceCursorIntelPT::GetHWClock() const { + if (const Optional &range = GetTSCRange()) + return range->tsc; + return None; +} + +Optional TraceCursorIntelPT::GetWallClockTime() const { + if (!m_tsc_conversion || !m_lowest_tsc) + return None; + if (!m_lowest_nanos) + m_lowest_nanos = + static_cast(m_tsc_conversion->ToNanos(*m_lowest_tsc)); + long double lowest_nanos = *m_lowest_nanos; + + Optional &range = GetTSCRange(); + if (!range) + return None; + + uint64_t items_since_last_tsc = m_pos - range->first_item_index; + + if (!range->next_tsc) { + // This is the last TSC range, so we have to extrapolate. In this case, we + // assume that each instruction took one TSC, which is what an instruction + // would take if no parallelism is achieved and the frequency multiplier + // is 1. + return m_tsc_conversion->ToNanosFloat(lowest_nanos, + range->tsc + items_since_last_tsc); + } + + if (range->items_count < (*range->next_tsc - range->tsc)) { + // If the numbers of items in this range is less than total TSC duration of + // this range, i.e. each instruction taking longer than 1 TSC, then we can + // assume that something else happened between these TSCs (e.g. a context + // switch, change to kernel, decoding errors, etc). In this case, we can + // again assume that each instruction took one TSC, which is what an + // instruction would take if no parallelism is achieved and the frequency + // multiplier is 1. + return m_tsc_conversion->ToNanosFloat(lowest_nanos, + range->tsc + items_since_last_tsc); } + + // In this case, each item took less than 1 TSC, so some parallelism was + // achieved, and it all seem correct. + uint64_t range_start_ns = m_tsc_conversion->ToNanos(range->tsc); + uint64_t next_range_start_ns = m_tsc_conversion->ToNanos(*range->next_tsc); + long double item_duration = static_cast(range->items_count) / + (next_range_start_ns - range_start_ns); + + return (range_start_ns - lowest_nanos) + items_since_last_tsc * item_duration; } Optional TraceCursorIntelPT::GetCPU() const { @@ -96,8 +139,6 @@ if (!HasId(id)) return false; m_pos = id; - m_tsc_range = m_decoded_thread_sp->CalculateTscRange(m_pos, m_tsc_range); - return true; } diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h --- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h +++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h @@ -220,6 +220,13 @@ /// returned if the decoder couldn't be properly set up. llvm::Expected Decode(Thread &thread); + /// \return + /// The lowest TSC value in all traces if available, \a llvm::None if all + /// the traces were empty or no trace contained no timing information, or + /// an \a llvm::Error if it was not possible to set up the decoder for + /// some trace. + llvm::Expected> FindLowestTSC(); + // Dump out trace info in JSON format void DumpTraceInfoAsJson(Thread &thread, Stream &s, bool verbose); @@ -236,6 +243,10 @@ /// It is provided by either a trace bundle or a live process to convert TSC /// counters to and from nanos. It might not be available on all hosts. llvm::Optional tsc_conversion; + /// None -> not calculated + /// Has value -> None -> not available + /// Has value -> uint64_t -> actual tsc + llvm::Optional> lowest_tsc; } m_storage; /// It is provided by either a trace bundle or a live process' "cpuInfo" diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp --- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp @@ -8,6 +8,8 @@ #include "TraceIntelPT.h" +#include "TraceCursorIntelPT.h" + #include "../common/ThreadPostMortemTrace.h" #include "CommandObjectTraceStartIntelPT.h" #include "DecodedThread.h" @@ -138,11 +140,44 @@ return it->second->Decode(); } +Expected> TraceIntelPT::FindLowestTSC() { + Storage &storage = GetUpdatedStorage(); + if (storage.lowest_tsc) + return *storage.lowest_tsc; + + if (storage.multicpu_decoder) { + if (Expected> tsc = + storage.multicpu_decoder->FindLowestTSC()) { + storage.lowest_tsc = *tsc; + return *storage.lowest_tsc; + } else { + return tsc.takeError(); + } + } + + Optional lowest_tsc; + for (auto &decoder : storage.thread_decoders) { + Expected> tsc = decoder.second->FindLowestTSC(); + if (!tsc) + return tsc.takeError(); + + if (*tsc && (!lowest_tsc || *lowest_tsc > **tsc)) + lowest_tsc = **tsc; + } + storage.lowest_tsc = lowest_tsc; + return *storage.lowest_tsc; +} + llvm::Expected TraceIntelPT::CreateNewCursor(Thread &thread) { - if (Expected decoded_thread = Decode(thread)) - return decoded_thread.get()->CreateNewCursor(); - else + if (Expected decoded_thread = Decode(thread)) { + if (Expected> lowest_tsc = FindLowestTSC()) + return std::make_unique( + thread.shared_from_this(), *decoded_thread, m_storage.tsc_conversion, + *lowest_tsc); + else + return lowest_tsc.takeError(); + } else return decoded_thread.takeError(); } diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCpuDecoder.h b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCpuDecoder.h --- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCpuDecoder.h +++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCpuDecoder.h @@ -66,6 +66,12 @@ /// The total number of PSB blocks in all cores. size_t GetTotalPSBBlocksCount() const; + /// \return + /// The lowest TSC value in this trace if available, \a llvm::None if the + /// trace is empty or the trace contains no timing information, or an \a + /// llvm::Error if it was not possible to set up the decoder. + llvm::Expected> FindLowestTSC(); + private: /// Traverse the context switch traces and the basic intel pt continuous /// subtraces and produce a list of continuous executions for each process and diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCpuDecoder.cpp b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCpuDecoder.cpp --- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCpuDecoder.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCpuDecoder.cpp @@ -35,6 +35,28 @@ return m_tids.count(tid); } +Expected> TraceIntelPTMultiCpuDecoder::FindLowestTSC() { + Optional lowest_tsc; + TraceIntelPTSP trace_sp = GetTrace(); + + Error err = GetTrace()->OnAllCpusBinaryDataRead( + IntelPTDataKinds::kIptTrace, + [&](const DenseMap> &buffers) -> Error { + for (auto &cpu_id_to_buffer : buffers) { + Expected> tsc = + FindLowestTSCInTrace(*trace_sp, cpu_id_to_buffer.second); + if (!tsc) + return tsc.takeError(); + if (*tsc && (!lowest_tsc || *lowest_tsc > **tsc)) + lowest_tsc = **tsc; + } + return Error::success(); + }); + if (err) + return std::move(err); + return lowest_tsc; +} + Expected TraceIntelPTMultiCpuDecoder::Decode(Thread &thread) { if (Error err = CorrelateContextSwitchesAndIntelPtTraces()) return std::move(err); diff --git a/lldb/source/Target/TraceCursor.cpp b/lldb/source/Target/TraceCursor.cpp --- a/lldb/source/Target/TraceCursor.cpp +++ b/lldb/source/Target/TraceCursor.cpp @@ -50,5 +50,7 @@ return "software disabled tracing"; case lldb::eTraceEventCPUChanged: return "CPU core changed"; + case lldb::eTraceEventHWClockTick: + return "HW clock tick"; } } diff --git a/lldb/source/Target/TraceDumper.cpp b/lldb/source/Target/TraceDumper.cpp --- a/lldb/source/Target/TraceDumper.cpp +++ b/lldb/source/Target/TraceDumper.cpp @@ -128,16 +128,26 @@ m_s.Format(" {0}: ", item.id); - if (m_options.show_tsc) { - m_s.Format("[tsc={0}] ", - item.tsc ? std::to_string(*item.tsc) : "unavailable"); + if (m_options.show_timestamps) { + m_s.Format("[{0}] ", item.timestamp + ? (formatv("{0:2}", *item.timestamp) + " ns") + : "unavailable"); } if (item.event) { m_s << "(event) " << TraceCursor::EventKindToString(*item.event); - if (*item.event == eTraceEventCPUChanged) { + switch (*item.event) { + case eTraceEventCPUChanged: m_s.Format(" [new CPU={0}]", item.cpu_id ? std::to_string(*item.cpu_id) : "unavailable"); + break; + case eTraceEventHWClockTick: + m_s.Format(" [{0}]", item.hw_clock ? std::to_string(*item.hw_clock) + : "unavailable"); + break; + case eTraceEventDisabledHW: + case eTraceEventDisabledSW: + break; } } else if (item.error) { m_s << "(error) " << *item.error; @@ -181,7 +191,8 @@ | { "loadAddress": string decimal, "id": decimal, - "tsc"?: string decimal, + "hwClock"?: string decimal, + "timestamp_ns"?: string decimal, "module"?: string, "symbol"?: string, "line"?: decimal, @@ -202,8 +213,17 @@ void DumpEvent(const TraceDumper::TraceItem &item) { m_j.attribute("event", TraceCursor::EventKindToString(*item.event)); - if (item.event == eTraceEventCPUChanged) + switch (*item.event) { + case eTraceEventCPUChanged: m_j.attribute("cpuId", item.cpu_id); + break; + case eTraceEventHWClockTick: + m_j.attribute("hwClock", item.hw_clock); + break; + case eTraceEventDisabledHW: + case eTraceEventDisabledSW: + break; + } } void DumpInstruction(const TraceDumper::TraceItem &item) { @@ -234,10 +254,11 @@ void TraceItem(const TraceDumper::TraceItem &item) override { m_j.object([&] { m_j.attribute("id", item.id); - if (m_options.show_tsc) - m_j.attribute( - "tsc", - item.tsc ? Optional(std::to_string(*item.tsc)) : None); + if (m_options.show_timestamps) + m_j.attribute("timestamp_ns", item.timestamp + ? Optional( + std::to_string(*item.timestamp)) + : None); if (item.event) { DumpEvent(item); @@ -289,8 +310,8 @@ TraceItem item; item.id = m_cursor_up->GetId(); - if (m_options.show_tsc) - item.tsc = m_cursor_up->GetCounter(lldb::eTraceCounterTSC); + if (m_options.show_timestamps) + item.timestamp = m_cursor_up->GetWallClockTime(); return item; } @@ -366,8 +387,17 @@ if (!m_options.show_events) continue; item.event = m_cursor_up->GetEventType(); - if (*item.event == eTraceEventCPUChanged) + switch (*item.event) { + case eTraceEventCPUChanged: item.cpu_id = m_cursor_up->GetCPU(); + break; + case eTraceEventHWClockTick: + item.hw_clock = m_cursor_up->GetHWClock(); + break; + case eTraceEventDisabledHW: + case eTraceEventDisabledSW: + break; + } } else if (m_cursor_up->IsError()) { item.error = m_cursor_up->GetError(); } else { diff --git a/lldb/source/Utility/TraceIntelPTGDBRemotePackets.cpp b/lldb/source/Utility/TraceIntelPTGDBRemotePackets.cpp --- a/lldb/source/Utility/TraceIntelPTGDBRemotePackets.cpp +++ b/lldb/source/Utility/TraceIntelPTGDBRemotePackets.cpp @@ -79,6 +79,15 @@ return time_zero.value + quot * time_mult + ((rem * time_mult) >> time_shift); } +long double LinuxPerfZeroTscConversion::ToNanosFloat(long double lowest_nanos, + uint64_t tsc) const { + uint64_t quot = tsc >> time_shift; + uint64_t rem_flag = (((uint64_t)1 << time_shift) - 1); + uint64_t rem = tsc & rem_flag; + return (time_zero.value + quot * time_mult - lowest_nanos) + + (static_cast(rem * time_mult) / pow(2, time_shift)); +} + uint64_t LinuxPerfZeroTscConversion::ToTSC(uint64_t nanos) const { uint64_t time = nanos - time_zero.value; uint64_t quot = time / time_mult;