diff --git a/lldb/include/lldb/Target/Trace.h b/lldb/include/lldb/Target/Trace.h --- a/lldb/include/lldb/Target/Trace.h +++ b/lldb/include/lldb/Target/Trace.h @@ -433,15 +433,15 @@ GetLiveProcessBinaryData(llvm::StringRef kind); /// Get the size of the data returned by \a GetLiveThreadBinaryData - llvm::Optional GetLiveThreadBinaryDataSize(lldb::tid_t tid, - llvm::StringRef kind); + llvm::Optional GetLiveThreadBinaryDataSize(lldb::tid_t tid, + llvm::StringRef kind); /// Get the size of the data returned by \a GetLiveCoreBinaryData - llvm::Optional GetLiveCoreBinaryDataSize(lldb::core_id_t core_id, - llvm::StringRef kind); + llvm::Optional GetLiveCoreBinaryDataSize(lldb::core_id_t core_id, + llvm::StringRef kind); /// Get the size of the data returned by \a GetLiveProcessBinaryData - llvm::Optional GetLiveProcessBinaryDataSize(llvm::StringRef kind); + llvm::Optional GetLiveProcessBinaryDataSize(llvm::StringRef kind); /// Constructor for post mortem processes Trace(llvm::ArrayRef postmortem_processes, @@ -513,14 +513,14 @@ /// \{ /// tid -> data kind -> size - llvm::DenseMap> + llvm::DenseMap> m_live_thread_data; /// core id -> data kind -> size - llvm::DenseMap> + llvm::DenseMap> m_live_core_data; /// data kind -> size - std::unordered_map m_live_process_data; + std::unordered_map m_live_process_data; /// \} /// The list of cores being traced. Might be \b None depending on the plug-in. diff --git a/lldb/include/lldb/Utility/TraceGDBRemotePackets.h b/lldb/include/lldb/Utility/TraceGDBRemotePackets.h --- a/lldb/include/lldb/Utility/TraceGDBRemotePackets.h +++ b/lldb/include/lldb/Utility/TraceGDBRemotePackets.h @@ -154,7 +154,7 @@ /// Optional tid if the data is related to a thread. llvm::Optional tid; /// Optional core id if the data is related to a cpu core. - llvm::Optional core_id; + llvm::Optional core_id; /// Offset in bytes from where to start reading the data. uint64_t offset; /// Number of bytes to read. diff --git a/lldb/include/lldb/Utility/TraceIntelPTGDBRemotePackets.h b/lldb/include/lldb/Utility/TraceIntelPTGDBRemotePackets.h --- a/lldb/include/lldb/Utility/TraceIntelPTGDBRemotePackets.h +++ b/lldb/include/lldb/Utility/TraceIntelPTGDBRemotePackets.h @@ -80,7 +80,9 @@ /// /// \return /// Nanosecond wall time. - std::chrono::nanoseconds ToNanos(uint64_t tsc); + std::chrono::nanoseconds ToNanos(uint64_t tsc) const; + + uint64_t ToTSC(std::chrono::nanoseconds nanos) const; uint32_t time_mult; uint16_t time_shift; diff --git a/lldb/source/Plugins/Process/Linux/Perf.cpp b/lldb/source/Plugins/Process/Linux/Perf.cpp --- a/lldb/source/Plugins/Process/Linux/Perf.cpp +++ b/lldb/source/Plugins/Process/Linux/Perf.cpp @@ -231,7 +231,7 @@ "buffer but only {1} are available", size, output.size())); - return data; + return output; } Expected> @@ -282,7 +282,7 @@ "buffer but only {1} are available", size, output.size())); - return data; + return output; } Error PerfEvent::DisableWithIoctl() { @@ -313,7 +313,7 @@ size_t PerfEvent::GetEffectiveDataBufferSize() const { perf_event_mmap_page &mmap_metadata = GetMetadataPage(); - if (mmap_metadata.data_head <= mmap_metadata.data_size) + if (mmap_metadata.data_head < mmap_metadata.data_size) return mmap_metadata.data_head; else return mmap_metadata.data_size; // The buffer has wrapped. diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp @@ -2697,8 +2697,8 @@ packet.Printf("%" PRIx64, tid); StringExtractorGDBRemote response; - if (SendPacketAndWaitForResponse(packet.GetString(), response) - == PacketResult::Success) { + if (SendPacketAndWaitForResponse(packet.GetString(), response) == + PacketResult::Success) { if (response.IsOKResponse()) return {{pid, tid}}; @@ -3701,9 +3701,6 @@ GDBRemoteCommunication::PacketResult::Success) { if (response.IsErrorResponse()) return response.GetStatus().ToError(); - if (response.IsUnsupportedResponse()) - return llvm::createStringError(llvm::inconvertibleErrorCode(), - "jLLDBTraceGetBinaryData is unsupported"); std::string data; response.GetEscapedBinaryData(data); return std::vector(data.begin(), data.end()); diff --git a/lldb/source/Plugins/Trace/intel-pt/CMakeLists.txt b/lldb/source/Plugins/Trace/intel-pt/CMakeLists.txt --- a/lldb/source/Plugins/Trace/intel-pt/CMakeLists.txt +++ b/lldb/source/Plugins/Trace/intel-pt/CMakeLists.txt @@ -22,6 +22,7 @@ TraceCursorIntelPT.cpp TraceIntelPT.cpp TraceIntelPTJSONStructs.cpp + TraceIntelPTMultiCoreDecoder.cpp TraceIntelPTSessionFileParser.cpp TraceIntelPTSessionSaver.cpp diff --git a/lldb/source/Plugins/Trace/intel-pt/CommandObjectTraceStartIntelPT.h b/lldb/source/Plugins/Trace/intel-pt/CommandObjectTraceStartIntelPT.h --- a/lldb/source/Plugins/Trace/intel-pt/CommandObjectTraceStartIntelPT.h +++ b/lldb/source/Plugins/Trace/intel-pt/CommandObjectTraceStartIntelPT.h @@ -31,9 +31,9 @@ llvm::ArrayRef GetDefinitions() override; - size_t m_trace_buffer_size; + uint64_t m_trace_buffer_size; bool m_enable_tsc; - llvm::Optional m_psb_period; + llvm::Optional m_psb_period; }; CommandObjectThreadTraceStartIntelPT(TraceIntelPT &trace, @@ -74,10 +74,10 @@ llvm::ArrayRef GetDefinitions() override; - size_t m_trace_buffer_size; - size_t m_process_buffer_size_limit; + uint64_t m_trace_buffer_size; + uint64_t m_process_buffer_size_limit; bool m_enable_tsc; - llvm::Optional m_psb_period; + llvm::Optional m_psb_period; bool m_per_core_tracing; }; diff --git a/lldb/source/Plugins/Trace/intel-pt/TaskTimer.h b/lldb/source/Plugins/Trace/intel-pt/TaskTimer.h --- a/lldb/source/Plugins/Trace/intel-pt/TaskTimer.h +++ b/lldb/source/Plugins/Trace/intel-pt/TaskTimer.h @@ -22,8 +22,8 @@ namespace trace_intel_pt { /// Class used to track the duration of long running tasks related to a single -/// thread for reporting. -class ThreadTaskTimer { +/// scope for reporting. +class ScopedTaskTimer { public: /// Execute the given \p task and record its duration. /// @@ -63,10 +63,15 @@ public: /// \return /// The timer object for the given thread. - ThreadTaskTimer &ForThread(lldb::tid_t tid); + ScopedTaskTimer &ForThread(lldb::tid_t tid); + + /// \return + /// The timer object for global tasks. + ScopedTaskTimer &ForGlobal(); private: - llvm::DenseMap m_thread_timers; + llvm::DenseMap m_thread_timers; + ScopedTaskTimer m_global_timer; }; } // namespace trace_intel_pt diff --git a/lldb/source/Plugins/Trace/intel-pt/TaskTimer.cpp b/lldb/source/Plugins/Trace/intel-pt/TaskTimer.cpp --- a/lldb/source/Plugins/Trace/intel-pt/TaskTimer.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/TaskTimer.cpp @@ -5,7 +5,7 @@ using namespace lldb_private::trace_intel_pt; using namespace llvm; -void ThreadTaskTimer::ForEachTimedTask( +void ScopedTaskTimer::ForEachTimedTask( std::function callback) { @@ -14,9 +14,11 @@ } } -ThreadTaskTimer &TaskTimer::ForThread(lldb::tid_t tid) { +ScopedTaskTimer &TaskTimer::ForThread(lldb::tid_t tid) { auto it = m_thread_timers.find(tid); if (it == m_thread_timers.end()) - it = m_thread_timers.try_emplace(tid, ThreadTaskTimer{}).first; + it = m_thread_timers.try_emplace(tid, ScopedTaskTimer{}).first; return it->second; } + +ScopedTaskTimer &TaskTimer::ForGlobal() { return m_global_timer; } diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h --- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h +++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h @@ -11,7 +11,9 @@ #include "TaskTimer.h" #include "ThreadDecoder.h" +#include "TraceIntelPTMultiCoreDecoder.h" #include "TraceIntelPTSessionFileParser.h" + #include "lldb/Utility/FileSpec.h" #include "lldb/lldb-types.h" #include "llvm/Support/raw_ostream.h" @@ -73,7 +75,7 @@ void DumpTraceInfo(Thread &thread, Stream &s, bool verbose) override; - llvm::Expected GetRawTraceSize(Thread &thread); + llvm::Expected> GetRawTraceSize(Thread &thread); llvm::Error DoRefreshLiveProcessState(TraceGetStateResponse state, llvm::StringRef json_response) override; @@ -106,8 +108,9 @@ /// \return /// \a llvm::Error::success if the operation was successful, or /// \a llvm::Error otherwise. - llvm::Error Start(size_t trace_buffer_size, size_t total_buffer_size_limit, - bool enable_tsc, llvm::Optional psb_period, + llvm::Error Start(uint64_t trace_buffer_size, + uint64_t total_buffer_size_limit, bool enable_tsc, + llvm::Optional psb_period, bool m_per_core_tracing); /// \copydoc Trace::Start @@ -134,8 +137,9 @@ /// \return /// \a llvm::Error::success if the operation was successful, or /// \a llvm::Error otherwise. - llvm::Error Start(llvm::ArrayRef tids, size_t trace_buffer_size, - bool enable_tsc, llvm::Optional psb_period); + llvm::Error Start(llvm::ArrayRef tids, + uint64_t trace_buffer_size, bool enable_tsc, + llvm::Optional psb_period); /// \copydoc Trace::Start llvm::Error Start(llvm::ArrayRef tids, @@ -195,6 +199,8 @@ /// It is provided by either a session file or a live process' "cpuInfo" /// binary data. llvm::Optional m_cpu_info; + llvm::Optional m_multicore_decoder; + /// These decoders are used for the non-per-core case std::map> m_thread_decoders; /// Helper variable used to track long running operations for telemetry. TaskTimer m_task_timer; diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp --- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp @@ -89,13 +89,24 @@ } } if (session.cores) { + std::vector cores; + for (const JSONCore &core : *session.cores) { SetPostMortemCoreDataFile(core.core_id, IntelPTDataKinds::kTraceBuffer, FileSpec(core.trace_buffer)); SetPostMortemCoreDataFile(core.core_id, IntelPTDataKinds::kPerfContextSwitchTrace, FileSpec(core.context_switch_trace)); + cores.push_back(core.core_id); } + + std::vector tids; + for (const JSONProcess &process : session.processes) + for (const JSONThread &thread : process.threads) + tids.push_back(thread.tid); + + m_multicore_decoder.emplace(*this, cores, tids, + *session.tsc_perf_zero_conversion); } } @@ -105,6 +116,9 @@ thread.shared_from_this(), createStringError(inconvertibleErrorCode(), error)); + if (m_multicore_decoder) + return m_multicore_decoder->Decode(thread); + auto it = m_thread_decoders.find(thread.GetID()); if (it == m_thread_decoders.end()) return std::make_shared( @@ -126,60 +140,98 @@ } s << "\n"; - Expected raw_size = GetRawTraceSize(thread); - if (!raw_size) { - s.Format(" {0}\n", toString(raw_size.takeError())); + Expected> raw_size_or_error = GetRawTraceSize(thread); + if (!raw_size_or_error) { + s.Format(" {0}\n", toString(raw_size_or_error.takeError())); return; } + Optional raw_size = *raw_size_or_error; DecodedThreadSP decoded_trace_sp = Decode(thread); - size_t insn_len = decoded_trace_sp->GetInstructionsCount(); - size_t mem_used = decoded_trace_sp->CalculateApproximateMemoryUsage(); - - s.Format(" Total number of instructions: {0}\n", insn_len); - - s << "\n Memory usage:\n"; - s.Format(" Raw trace size: {0} KiB\n", *raw_size / 1024); - s.Format( - " Total approximate memory usage (excluding raw trace): {0:2} KiB\n", - (double)mem_used / 1024); - if (insn_len != 0) - s.Format(" Average memory usage per instruction (excluding raw trace): " - "{0:2} bytes\n", - (double)mem_used / insn_len); - - s << "\n Timing:\n"; - GetTimer().ForThread(tid).ForEachTimedTask( - [&](const std::string &name, std::chrono::milliseconds duration) { - s.Format(" {0}: {1:2}s\n", name, duration.count() / 1000.0); - }); - - const DecodedThread::EventsStats &events_stats = - decoded_trace_sp->GetEventsStats(); - s << "\n Events:\n"; - s.Format(" Number of instructions with events: {0}\n", - events_stats.total_instructions_with_events); - s.Format(" Number of individual events: {0}\n", events_stats.total_count); - for (const auto &event_to_count : events_stats.events_counts) { - s.Format(" {0}: {1}\n", - trace_event_utils::EventToDisplayString(event_to_count.first), - event_to_count.second); + + /// Instruction stats + { + uint64_t insn_len = decoded_trace_sp->GetInstructionsCount(); + uint64_t mem_used = decoded_trace_sp->CalculateApproximateMemoryUsage(); + + s.Format(" Total number of instructions: {0}\n", insn_len); + + s << "\n Memory usage:\n"; + if (raw_size) + s.Format(" Raw trace size: {0} KiB\n", *raw_size / 1024); + + s.Format( + " Total approximate memory usage (excluding raw trace): {0:2} KiB\n", + (double)mem_used / 1024); + if (insn_len != 0) + s.Format( + " Average memory usage per instruction (excluding raw trace): " + "{0:2} bytes\n", + (double)mem_used / insn_len); + } + + // Timing + { + s << "\n Timing for this thread:\n"; + auto print_duration = [&](const std::string &name, + std::chrono::milliseconds duration) { + s.Format(" {0}: {1:2}s\n", name, duration.count() / 1000.0); + }; + GetTimer().ForThread(tid).ForEachTimedTask(print_duration); + + s << "\n Timing for global tasks:\n"; + GetTimer().ForGlobal().ForEachTimedTask(print_duration); + } + + // Instruction events stats + { + const DecodedThread::EventsStats &events_stats = + decoded_trace_sp->GetEventsStats(); + s << "\n Events:\n"; + s.Format(" Number of instructions with events: {0}\n", + events_stats.total_instructions_with_events); + s.Format(" Number of individual events: {0}\n", + events_stats.total_count); + for (const auto &event_to_count : events_stats.events_counts) { + s.Format(" {0}: {1}\n", + trace_event_utils::EventToDisplayString(event_to_count.first), + event_to_count.second); + } } - s << "\n Errors:\n"; - const DecodedThread::LibiptErrorsStats &tsc_errors_stats = - decoded_trace_sp->GetTscErrorsStats(); - s.Format(" Number of TSC decoding errors: {0}\n", - tsc_errors_stats.total_count); - for (const auto &error_message_to_count : - tsc_errors_stats.libipt_errors_counts) { - s.Format(" {0}: {1}\n", error_message_to_count.first, - error_message_to_count.second); + // Multicode decoding stats + if (m_multicore_decoder) { + s << "\n Multi-core decoding:\n"; + s.Format(" Total number of continuous executions found: {0}\n", + m_multicore_decoder->GetTotalContinuousExecutionsCount()); + s.Format(" Number of continuous executions for this thread: {0}\n", + m_multicore_decoder->GetNumContinuousExecutionsForThread(tid)); + } + + // Errors + { + s << "\n Errors:\n"; + const DecodedThread::LibiptErrorsStats &tsc_errors_stats = + decoded_trace_sp->GetTscErrorsStats(); + s.Format(" Number of TSC decoding errors: {0}\n", + tsc_errors_stats.total_count); + for (const auto &error_message_to_count : + tsc_errors_stats.libipt_errors_counts) { + s.Format(" {0}: {1}\n", error_message_to_count.first, + error_message_to_count.second); + } } } -llvm::Expected TraceIntelPT::GetRawTraceSize(Thread &thread) { - size_t size; +llvm::Expected> +TraceIntelPT::GetRawTraceSize(Thread &thread) { + if (m_multicore_decoder) + return None; // TODO: calculate the amount of intel pt raw trace associated + // with the given thread. + if (GetLiveProcess()) + return GetLiveThreadBinaryDataSize(thread.GetID(), + IntelPTDataKinds::kTraceBuffer); + uint64_t size; auto callback = [&](llvm::ArrayRef data) { size = data.size(); return Error::success(); @@ -262,13 +314,8 @@ Error TraceIntelPT::DoRefreshLiveProcessState(TraceGetStateResponse state, StringRef json_response) { m_thread_decoders.clear(); - - for (const TraceThreadState &thread_state : state.traced_threads) { - ThreadSP thread_sp = - GetLiveProcess()->GetThreadList().FindThreadByID(thread_state.tid); - m_thread_decoders.emplace( - thread_state.tid, std::make_unique(thread_sp, *this)); - } + m_tsc_conversion.reset(); + m_multicore_decoder.reset(); Expected intelpt_state = json::parse(json_response, @@ -276,6 +323,29 @@ if (!intelpt_state) return intelpt_state.takeError(); + if (!intelpt_state->cores) { + for (const TraceThreadState &thread_state : state.traced_threads) { + ThreadSP thread_sp = + GetLiveProcess()->GetThreadList().FindThreadByID(thread_state.tid); + m_thread_decoders.emplace( + thread_state.tid, std::make_unique(thread_sp, *this)); + } + } else { + std::vector cores; + for (const TraceCoreState &core : *intelpt_state->cores) + cores.push_back(core.core_id); + + std::vector tids; + for (const TraceThreadState &thread : intelpt_state->traced_threads) + tids.push_back(thread.tid); + + if (!intelpt_state->tsc_perf_zero_conversion) + return createStringError(inconvertibleErrorCode(), + "Missing perf time_zero conversion values"); + m_multicore_decoder.emplace(*this, cores, tids, + *intelpt_state->tsc_perf_zero_conversion); + } + m_tsc_conversion = intelpt_state->tsc_perf_zero_conversion; if (m_tsc_conversion) { Log *log = GetLog(LLDBLog::Target); @@ -286,6 +356,8 @@ bool TraceIntelPT::IsTraced(lldb::tid_t tid) { RefreshLiveProcessState(); + if (m_multicore_decoder) + return m_multicore_decoder->TracesThread(tid); return m_thread_decoders.count(tid); } @@ -322,25 +394,25 @@ return message->c_str(); } -Error TraceIntelPT::Start(size_t trace_buffer_size, - size_t total_buffer_size_limit, bool enable_tsc, - Optional psb_period, bool per_core_tracing) { +Error TraceIntelPT::Start(uint64_t trace_buffer_size, + uint64_t total_buffer_size_limit, bool enable_tsc, + Optional psb_period, + bool per_core_tracing) { TraceIntelPTStartRequest request; request.trace_buffer_size = trace_buffer_size; request.process_buffer_size_limit = total_buffer_size_limit; request.enable_tsc = enable_tsc; - request.psb_period = - psb_period.map([](size_t val) { return static_cast(val); }); + request.psb_period = psb_period; request.type = GetPluginName().str(); request.per_core_tracing = per_core_tracing; return Trace::Start(toJSON(request)); } Error TraceIntelPT::Start(StructuredData::ObjectSP configuration) { - size_t trace_buffer_size = kDefaultTraceBufferSize; - size_t process_buffer_size_limit = kDefaultProcessBufferSizeLimit; + uint64_t trace_buffer_size = kDefaultTraceBufferSize; + uint64_t process_buffer_size_limit = kDefaultProcessBufferSizeLimit; bool enable_tsc = kDefaultEnableTscValue; - Optional psb_period = kDefaultPsbPeriod; + Optional psb_period = kDefaultPsbPeriod; bool per_core_tracing = kDefaultPerCoreTracing; if (configuration) { @@ -362,13 +434,12 @@ } llvm::Error TraceIntelPT::Start(llvm::ArrayRef tids, - size_t trace_buffer_size, bool enable_tsc, - Optional psb_period) { + uint64_t trace_buffer_size, bool enable_tsc, + Optional psb_period) { TraceIntelPTStartRequest request; request.trace_buffer_size = trace_buffer_size; request.enable_tsc = enable_tsc; - request.psb_period = - psb_period.map([](size_t val) { return static_cast(val); }); + request.psb_period = psb_period; request.type = GetPluginName().str(); request.tids.emplace(); for (lldb::tid_t tid : tids) diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTJSONStructs.cpp b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTJSONStructs.cpp --- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTJSONStructs.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTJSONStructs.cpp @@ -132,6 +132,11 @@ !o.map("type", session.type) || !o.map("cores", session.cores) || !o.map("tscPerfZeroConversion", session.tsc_perf_zero_conversion)) return false; + if (session.cores && !session.tsc_perf_zero_conversion) { + path.report( + "\"tscPerfZeroConversion\" is required when \"cores\" is provided"); + return false; + } // We have to do this because the compiler fails at doing it automatically // because pt_cpu is not in a namespace if (!fromJSON(*value.getAsObject()->get("cpuInfo"), session.cpu_info, diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCoreDecoder.h b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCoreDecoder.h new file mode 100644 --- /dev/null +++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCoreDecoder.h @@ -0,0 +1,161 @@ +//===-- TraceIntelPTMultiCoreDecoder.h --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_SOURCE_PLUGINS_TRACE_INTEL_PT_TRACEINTELPTMULTICOREDECODER_H +#define LLDB_SOURCE_PLUGINS_TRACE_INTEL_PT_TRACEINTELPTMULTICOREDECODER_H + +#include "ThreadDecoder.h" + +namespace lldb_private { +namespace trace_intel_pt { + +/// This class indicates the time interval in which a thread was running +/// continuously on a cpu core. +/// +/// In most cases both endpoints of the intervals can be accurately recovered +/// from a context switch trace, but in some cases one of these endpoints might +/// be guessed or not known at all, due to contention problems in the trace or +/// because tracing was interrupted. +/// +/// Note: we use the terms CPU and cores interchangeably. +struct ThreadContinuousExecution { + enum class Variant { + /// Both endpoints are known + Complete, + /// The end is known and we have a guess for the start + HintedStart, + /// The start is known and we have a guess for the end + HintedEnd, + /// We only know the start. This might be the last entry of a core trace. + OnlyStart, + /// We only know the end. This might be the first entry or a core trace. + OnlyEnd, + } variant; + + union { + struct { + uint64_t start; + uint64_t end; + } complete; + struct { + uint64_t start; + } only_start; + struct { + uint64_t end; + } only_end; + /// The following 'hinted' structures are useful when there are contention + /// problems in the trace + struct { + uint64_t hinted_start; + uint64_t end; + } hinted_start; + struct { + uint64_t start; + uint64_t hinted_end; + } hinted_end; + } tscs; + + lldb::core_id_t core_id; + lldb::tid_t tid; + + /// \return + /// A tsc that we are certain of, either the start or the end. + uint64_t GetErrorFreeTSC() const; + + /// Constructors for the different variants of this object + /// + /// \{ + static ThreadContinuousExecution + CreateCompleteExecution(lldb::core_id_t core_id, lldb::tid_t tid, + uint64_t start, uint64_t end); + + static ThreadContinuousExecution + CreateHintedStartExecution(lldb::core_id_t core_id, lldb::tid_t tid, + uint64_t hinted_start, uint64_t end); + + static ThreadContinuousExecution + CreateHintedEndExecution(lldb::core_id_t core_id, lldb::tid_t tid, + uint64_t start, uint64_t hinted_end); + + static ThreadContinuousExecution + CreateOnlyEndExecution(lldb::core_id_t core_id, lldb::tid_t tid, + uint64_t end); + + static ThreadContinuousExecution + CreateOnlyStartExecution(lldb::core_id_t core_id, lldb::tid_t tid, + uint64_t start); + /// \} + + /// Comparator by TSCs + bool operator<(const ThreadContinuousExecution &o) const; + +private: + ThreadContinuousExecution(lldb::core_id_t core_id, lldb::tid_t tid) + : core_id(core_id), tid(tid) {} +}; + +/// Class used to decode a multi-core Intel PT trace. It assumes that each +/// thread could have potentially been executed on different cores. It uses a +/// context switch trace per CPU with timestamps to identify which thread owns +/// each Intel PT decoded instruction and in which order. It also assumes that +/// the Intel PT data and context switches might have gaps in their traces due +/// to contention or race conditions. +class TraceIntelPTMultiCoreDecoder { +public: + /// \param[in] core_ids + /// The list of cores where the traced programs were running on. + /// + /// \param[in] tid + /// The full list of tids that were traced. + /// + /// \param[in] tsc_conversion + /// The conversion values for converting between nanoseconds and TSCs. + TraceIntelPTMultiCoreDecoder( + TraceIntelPT &trace, llvm::ArrayRef core_ids, + llvm::ArrayRef tids, + const LinuxPerfZeroTscConversion &tsc_conversion); + + /// \return + /// A \a DecodedThread for the \p thread by decoding its instructions on all + /// CPUs, sorted by TSCs. + DecodedThreadSP Decode(Thread &thread); + + /// \return + /// \b true if the given \p tid is managed by this decoder, regardless of + /// whether there's tracing data associated to it or not. + bool TracesThread(lldb::tid_t tid) const; + + /// \return + /// The number of continuous executions found for the given \p tid. + size_t GetNumContinuousExecutionsForThread(lldb::tid_t tid) const; + + /// \return + /// The total number of continuous executions found across CPUs. + size_t GetTotalContinuousExecutionsCount() const; + +private: + /// Traverse the context switch traces and recover the continuous executions + /// by thread. + llvm::Error DecodeContextSwitchTraces(); + + TraceIntelPT &m_trace; + std::set m_cores; + std::set m_tids; + llvm::Optional< + llvm::DenseMap>> + m_continuous_executions_per_thread; + LinuxPerfZeroTscConversion m_tsc_conversion; + /// This variable will be non-None if a severe error happened during the setup + /// of the decoder. + llvm::Optional m_setup_error; +}; + +} // namespace trace_intel_pt +} // namespace lldb_private + +#endif // LLDB_SOURCE_PLUGINS_TRACE_INTEL_PT_TRACEINTELPTMULTICOREDECODER_H diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCoreDecoder.cpp b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCoreDecoder.cpp new file mode 100644 --- /dev/null +++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCoreDecoder.cpp @@ -0,0 +1,312 @@ +//===-- TraceIntelPTMultiCoreDecoder.cpp ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "TraceIntelPTMultiCoreDecoder.h" + +#include "TraceIntelPT.h" + +#include "llvm/Support/Error.h" + +#include + +using namespace lldb; +using namespace lldb_private; +using namespace lldb_private::trace_intel_pt; +using namespace llvm; + +struct PerfContextSwitchRecord { + struct perf_event_header header; + uint32_t next_prev_pid; + uint32_t next_prev_tid; + uint32_t pid, tid; + uint64_t time_in_nanos; + + bool IsOut() const { return header.misc & PERF_RECORD_MISC_SWITCH_OUT; } +}; + +struct ContextSwitchRecord { + uint64_t tsc; + bool is_out; + /// A pid of 0 indicates an execution in the kernel + lldb::pid_t pid; + lldb::tid_t tid; + + bool IsOut() const { return is_out; } + + bool IsIn() const { return !is_out; } +}; + +uint64_t ThreadContinuousExecution::GetErrorFreeTSC() const { + switch (variant) { + case Variant::Complete: + return tscs.complete.start; // end would also work + case Variant::HintedStart: + return tscs.hinted_start.end; + case Variant::HintedEnd: + return tscs.hinted_end.start; + case Variant::OnlyEnd: + return tscs.only_end.end; + case Variant::OnlyStart: + return tscs.only_start.start; + } +} + +ThreadContinuousExecution ThreadContinuousExecution::CreateCompleteExecution( + lldb::core_id_t core_id, lldb::tid_t tid, uint64_t start, uint64_t end) { + ThreadContinuousExecution o(core_id, tid); + o.variant = Variant::Complete; + o.tscs.complete.start = start; + o.tscs.complete.end = end; + return o; +} + +ThreadContinuousExecution ThreadContinuousExecution::CreateHintedStartExecution( + lldb::core_id_t core_id, lldb::tid_t tid, uint64_t hinted_start, + uint64_t end) { + ThreadContinuousExecution o(core_id, tid); + o.variant = Variant::HintedStart; + o.tscs.hinted_start.hinted_start = hinted_start; + o.tscs.hinted_start.end = end; + return o; +} + +ThreadContinuousExecution ThreadContinuousExecution::CreateHintedEndExecution( + lldb::core_id_t core_id, lldb::tid_t tid, uint64_t start, + uint64_t hinted_end) { + ThreadContinuousExecution o(core_id, tid); + o.variant = Variant::HintedEnd; + o.tscs.hinted_end.start = start; + o.tscs.hinted_end.hinted_end = hinted_end; + return o; +} + +ThreadContinuousExecution ThreadContinuousExecution::CreateOnlyEndExecution( + lldb::core_id_t core_id, lldb::tid_t tid, uint64_t end) { + ThreadContinuousExecution o(core_id, tid); + o.variant = Variant::OnlyEnd; + o.tscs.only_end.end = end; + return o; +} + +ThreadContinuousExecution ThreadContinuousExecution::CreateOnlyStartExecution( + lldb::core_id_t core_id, lldb::tid_t tid, uint64_t start) { + ThreadContinuousExecution o(core_id, tid); + o.variant = Variant::OnlyStart; + o.tscs.only_start.start = start; + return o; +} + +bool ThreadContinuousExecution::operator<( + const ThreadContinuousExecution &o) const { + // We can compare by GetErrorFreeTSC because context switches across CPUs can + // be sorted by any of its TSC. + return GetErrorFreeTSC() < o.GetErrorFreeTSC(); +} + +/// Tries to recover a continuous execution by analyzing two consecutive context +/// switch records. +static Error +HandleContextSwitch(core_id_t core_id, + const LinuxPerfZeroTscConversion &tsc_conversion, + const ContextSwitchRecord &record, + const Optional &prev_record, + std::function + on_new_thread_execution) { + if (!prev_record) { + if (record.IsOut()) + on_new_thread_execution(ThreadContinuousExecution::CreateOnlyEndExecution( + core_id, record.tid, record.tsc)); + // The 'in' case will be handled later when we try to look for its end + return Error::success(); + } + + const ContextSwitchRecord &prev = *prev_record; + if (prev.tsc > record.tsc) + return createStringError( + inconvertibleErrorCode(), + formatv("A context switch record out doesn't happen after the previous " + "record. Previous TSC= {0}, current TSC = {1}.", + prev.tsc, record.tsc)); + + if (record.IsIn() && prev.IsIn()) { + // We found two consecutive ins, which means that we didn't capture + // the end of the previous execution. + on_new_thread_execution(ThreadContinuousExecution::CreateHintedEndExecution( + core_id, prev.tid, prev.tsc, record.tsc - 1)); + } else if (record.IsOut() && prev.IsOut()) { + // We found two consecutive outs, that means that we didn't capture + // the beginning of the current execution. + on_new_thread_execution( + ThreadContinuousExecution::CreateHintedStartExecution( + core_id, record.tid, prev.tsc + 1, record.tsc)); + } else if (record.IsOut() && prev.IsIn()) { + if (record.pid == prev.pid && record.tid == prev.tid) { + /// A complete execution + on_new_thread_execution( + ThreadContinuousExecution::CreateCompleteExecution( + core_id, record.tid, prev.tsc, record.tsc)); + } else { + // An out after the in of a different thread. The first one doesn't + // have an end, and the second one doesn't have a start. + on_new_thread_execution( + ThreadContinuousExecution::CreateHintedEndExecution( + core_id, prev.tid, prev.tsc, record.tsc - 1)); + on_new_thread_execution( + ThreadContinuousExecution::CreateHintedStartExecution( + core_id, record.tid, prev.tsc + 1, record.tsc)); + } + } + return Error::success(); +} + +/// Decodes a context switch trace gotten with perf_event_open. +/// +/// \param[in] data +/// The context switch trace in binary format. +/// +/// \param[i] core_id +/// The core_id where the trace were gotten from. +/// +/// \param[in] tsc_conversion +/// The conversion values used to confert nanoseconds to TSC. +/// +/// \param[in] on_new_thread_execution +/// Callback to be invoked whenever a continuous execution is recovered from +/// the trace. +static Error DecodePerfContextSwitchTrace( + ArrayRef data, core_id_t core_id, + const LinuxPerfZeroTscConversion &tsc_conversion, + std::function + on_new_thread_execution) { + auto CreateError = [&](size_t offset, auto error) -> Error { + return createStringError(inconvertibleErrorCode(), + formatv("Malformed perf context switch trace for " + "cpu {0} at offset {1}. {2}", + core_id, offset, error)); + }; + + Optional prev_record; + for (size_t offset = 0; offset < data.size();) { + const PerfContextSwitchRecord &perf_record = + *reinterpret_cast(data.data() + + offset); + // A record of 1000 uint64_t's or more should mean that the data is wrong + if (perf_record.header.size == 0 || + perf_record.header.size > sizeof(uint64_t) * 1000) + return CreateError(offset, formatv("A record of {0} bytes was found.", + perf_record.header.size)); + + // We add + 100 to this record because some systems might have custom + // records. In any case, we are looking only for abnormal data. + if (perf_record.header.type >= PERF_RECORD_MAX + 100) + return CreateError(offset, formatv("Invalid record type {0} was found.", + perf_record.header.type)); + + if (perf_record.header.type == PERF_RECORD_SWITCH_CPU_WIDE) { + ContextSwitchRecord record{tsc_conversion.ToTSC(std::chrono::nanoseconds( + perf_record.time_in_nanos)), + perf_record.IsOut(), + static_cast(perf_record.pid), + static_cast(perf_record.tid)}; + + if (Error err = HandleContextSwitch(core_id, tsc_conversion, record, + prev_record, on_new_thread_execution)) + return CreateError(offset, toString(std::move(err))); + + prev_record = record; + } + offset += perf_record.header.size; + } + + // We might have an incomplete last record + if (prev_record && prev_record->IsIn()) + on_new_thread_execution(ThreadContinuousExecution::CreateOnlyStartExecution( + core_id, prev_record->tid, prev_record->tsc)); + + return Error::success(); +} + +TraceIntelPTMultiCoreDecoder::TraceIntelPTMultiCoreDecoder( + TraceIntelPT &trace, ArrayRef core_ids, ArrayRef tids, + const LinuxPerfZeroTscConversion &tsc_conversion) + : m_trace(trace), m_cores(core_ids.begin(), core_ids.end()), + m_tids(tids.begin(), tids.end()), m_tsc_conversion(tsc_conversion) {} + +bool TraceIntelPTMultiCoreDecoder::TracesThread(lldb::tid_t tid) const { + return m_tids.count(tid); +} + +DecodedThreadSP TraceIntelPTMultiCoreDecoder::Decode(Thread &thread) { + if (Error err = DecodeContextSwitchTraces()) + return std::make_shared(thread.shared_from_this(), + std::move(err)); + + return std::make_shared( + thread.shared_from_this(), + createStringError(inconvertibleErrorCode(), "unimplemented")); +} + +Error TraceIntelPTMultiCoreDecoder::DecodeContextSwitchTraces() { + if (m_setup_error) + return createStringError(inconvertibleErrorCode(), m_setup_error->c_str()); + + if (m_continuous_executions_per_thread) + return Error::success(); + + m_continuous_executions_per_thread.emplace(); + + auto do_decode = [&]() -> Error { + // We'll decode all context switch traces, identify continuous executions + // and group them by thread. + for (core_id_t core_id : m_cores) { + Error err = m_trace.OnCoreBinaryDataRead( + core_id, IntelPTDataKinds::kPerfContextSwitchTrace, + [&](ArrayRef data) -> Error { + return DecodePerfContextSwitchTrace( + data, core_id, m_tsc_conversion, + [&](const ThreadContinuousExecution &execution) { + (*m_continuous_executions_per_thread)[execution.tid] + .push_back(execution); + }); + }); + if (err) { + m_setup_error = toString(std::move(err)); + return createStringError(inconvertibleErrorCode(), + m_setup_error->c_str()); + } + } + // We now sort the executions of each to have them ready for instruction + // decoding + for (auto &tid_executions : *m_continuous_executions_per_thread) + std::sort(tid_executions.second.begin(), tid_executions.second.end()); + + return Error::success(); + }; + + return m_trace.GetTimer().ForGlobal().TimeTask( + "Context switch trace decoding", do_decode); +} + +size_t TraceIntelPTMultiCoreDecoder::GetNumContinuousExecutionsForThread( + lldb::tid_t tid) const { + if (!m_continuous_executions_per_thread) + return 0; + auto it = m_continuous_executions_per_thread->find(tid); + if (it == m_continuous_executions_per_thread->end()) + return 0; + return it->second.size(); +} + +size_t TraceIntelPTMultiCoreDecoder::GetTotalContinuousExecutionsCount() const { + if (!m_continuous_executions_per_thread) + return 0; + size_t count = 0; + for (const auto &kv : *m_continuous_executions_per_thread) + count += kv.second.size(); + return count; +} diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionFileParser.cpp b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionFileParser.cpp --- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionFileParser.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionFileParser.cpp @@ -141,6 +141,7 @@ else return HandleError(parsed_process.takeError()); } + return parsed_processes; } @@ -191,6 +192,8 @@ // Path to the raw Intel PT buffer for this core. "contextSwitchTrace": string, // Path to the raw perf_event_open context switch trace file for this core. + // The perf_event must have been configured with PERF_SAMPLE_TID and + // PERF_SAMPLE_TIME, as well as sample_id_all = 1. } ], "tscPerfZeroConversion"?: { diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionSaver.cpp b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionSaver.cpp --- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionSaver.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionSaver.cpp @@ -34,8 +34,9 @@ ArrayRef data) { std::basic_fstream out_fs = std::fstream( output_file.GetPath().c_str(), std::ios::out | std::ios::binary); - out_fs.write(reinterpret_cast(&data[0]), - data.size() * sizeof(uint8_t)); + if (!data.empty()) + out_fs.write(reinterpret_cast(&data[0]), data.size()); + out_fs.close(); if (!out_fs) return createStringError(inconvertibleErrorCode(), diff --git a/lldb/source/Target/Trace.cpp b/lldb/source/Target/Trace.cpp --- a/lldb/source/Target/Trace.cpp +++ b/lldb/source/Target/Trace.cpp @@ -114,31 +114,31 @@ return m_live_process->TraceGetState(GetPluginName()); } -Optional Trace::GetLiveThreadBinaryDataSize(lldb::tid_t tid, - llvm::StringRef kind) { +Optional Trace::GetLiveThreadBinaryDataSize(lldb::tid_t tid, + llvm::StringRef kind) { auto it = m_live_thread_data.find(tid); if (it == m_live_thread_data.end()) return None; - std::unordered_map &single_thread_data = it->second; + std::unordered_map &single_thread_data = it->second; auto single_thread_data_it = single_thread_data.find(kind.str()); if (single_thread_data_it == single_thread_data.end()) return None; return single_thread_data_it->second; } -Optional Trace::GetLiveCoreBinaryDataSize(lldb::core_id_t core_id, - llvm::StringRef kind) { +Optional Trace::GetLiveCoreBinaryDataSize(lldb::core_id_t core_id, + llvm::StringRef kind) { auto it = m_live_core_data.find(core_id); if (it == m_live_core_data.end()) return None; - std::unordered_map &single_core_data = it->second; + std::unordered_map &single_core_data = it->second; auto single_thread_data_it = single_core_data.find(kind.str()); if (single_thread_data_it == single_core_data.end()) return None; return single_thread_data_it->second; } -Optional Trace::GetLiveProcessBinaryDataSize(llvm::StringRef kind) { +Optional Trace::GetLiveProcessBinaryDataSize(llvm::StringRef kind) { auto data_it = m_live_process_data.find(kind.str()); if (data_it == m_live_process_data.end()) return None; @@ -150,7 +150,7 @@ if (!m_live_process) return createStringError(inconvertibleErrorCode(), "Tracing requires a live process."); - llvm::Optional size = GetLiveThreadBinaryDataSize(tid, kind); + llvm::Optional size = GetLiveThreadBinaryDataSize(tid, kind); if (!size) return createStringError( inconvertibleErrorCode(), @@ -167,7 +167,7 @@ if (!m_live_process) return createStringError(inconvertibleErrorCode(), "Tracing requires a live process."); - llvm::Optional size = GetLiveCoreBinaryDataSize(core_id, kind); + llvm::Optional size = GetLiveCoreBinaryDataSize(core_id, kind); if (!size) return createStringError( inconvertibleErrorCode(), @@ -185,7 +185,7 @@ if (!m_live_process) return createStringError(inconvertibleErrorCode(), "Tracing requires a live process."); - llvm::Optional size = GetLiveProcessBinaryDataSize(kind); + llvm::Optional size = GetLiveProcessBinaryDataSize(kind); if (!size) return createStringError( inconvertibleErrorCode(), diff --git a/lldb/source/Utility/TraceGDBRemotePackets.cpp b/lldb/source/Utility/TraceGDBRemotePackets.cpp --- a/lldb/source/Utility/TraceGDBRemotePackets.cpp +++ b/lldb/source/Utility/TraceGDBRemotePackets.cpp @@ -120,7 +120,7 @@ bool fromJSON(const json::Value &value, TraceCoreState &packet, json::Path path) { ObjectMapper o(value, path); - int64_t core_id; + uint64_t core_id; if (!o || !o.map("coreId", core_id) || !o.map("binaryData", packet.binary_data)) return false; @@ -148,9 +148,15 @@ bool fromJSON(const json::Value &value, TraceGetBinaryDataRequest &packet, Path path) { ObjectMapper o(value, path); - return o && o.map("type", packet.type) && o.map("kind", packet.kind) && - o.map("tid", packet.tid) && o.map("offset", packet.offset) && - o.map("size", packet.size) && o.map("coreId", packet.core_id); + Optional core_id; + if (!o || !o.map("type", packet.type) || !o.map("kind", packet.kind) || + !o.map("tid", packet.tid) || !o.map("offset", packet.offset) || + !o.map("size", packet.size) || !o.map("coreId", core_id)) + return false; + + if (core_id) + packet.core_id = static_cast(*core_id); + return true; } /// \} diff --git a/lldb/source/Utility/TraceIntelPTGDBRemotePackets.cpp b/lldb/source/Utility/TraceIntelPTGDBRemotePackets.cpp --- a/lldb/source/Utility/TraceIntelPTGDBRemotePackets.cpp +++ b/lldb/source/Utility/TraceIntelPTGDBRemotePackets.cpp @@ -50,7 +50,8 @@ return base; } -std::chrono::nanoseconds LinuxPerfZeroTscConversion::ToNanos(uint64_t tsc) { +std::chrono::nanoseconds +LinuxPerfZeroTscConversion::ToNanos(uint64_t tsc) const { uint64_t quot = tsc >> time_shift; uint64_t rem_flag = (((uint64_t)1 << time_shift) - 1); uint64_t rem = tsc & rem_flag; @@ -58,6 +59,14 @@ ((rem * time_mult) >> time_shift)}; } +uint64_t +LinuxPerfZeroTscConversion::ToTSC(std::chrono::nanoseconds nanos) const { + uint64_t time = nanos.count() - time_zero; + uint64_t quot = time / time_mult; + uint64_t rem = time % time_mult; + return (quot << time_shift) + (rem << time_shift) / time_mult; +} + json::Value toJSON(const LinuxPerfZeroTscConversion &packet) { return json::Value(json::Object{ {"timeMult", packet.time_mult}, diff --git a/lldb/test/API/commands/trace/TestTraceDumpInfo.py b/lldb/test/API/commands/trace/TestTraceDumpInfo.py --- a/lldb/test/API/commands/trace/TestTraceDumpInfo.py +++ b/lldb/test/API/commands/trace/TestTraceDumpInfo.py @@ -45,8 +45,8 @@ Total approximate memory usage (excluding raw trace): 1.27 KiB Average memory usage per instruction (excluding raw trace): 61.76 bytes - Timing: - Decoding instructions: ''', '''s + Timing for this thread: + Decoding instructions: ''', ''' Events: Number of instructions with events: 1 diff --git a/lldb/test/API/commands/trace/TestTraceLoad.py b/lldb/test/API/commands/trace/TestTraceLoad.py --- a/lldb/test/API/commands/trace/TestTraceLoad.py +++ b/lldb/test/API/commands/trace/TestTraceLoad.py @@ -43,8 +43,8 @@ Total approximate memory usage (excluding raw trace): 1.27 KiB Average memory usage per instruction (excluding raw trace): 61.76 bytes - Timing: - Decoding instructions: ''', '''s + Timing for this thread: + Decoding instructions: ''', ''' Events: Number of instructions with events: 1 diff --git a/lldb/test/API/commands/trace/multiple-threads/TestTraceStartStopMultipleThreads.py b/lldb/test/API/commands/trace/multiple-threads/TestTraceStartStopMultipleThreads.py --- a/lldb/test/API/commands/trace/multiple-threads/TestTraceStartStopMultipleThreads.py +++ b/lldb/test/API/commands/trace/multiple-threads/TestTraceStartStopMultipleThreads.py @@ -233,5 +233,6 @@ # We must have captured the context switch of when the target resumed self.assertTrue(found_non_empty_context_switch) + self.expect("thread trace dump instructions", substrs=['unimplemented']) self.traceStopProcess()