diff --git a/lldb/include/lldb/Target/Trace.h b/lldb/include/lldb/Target/Trace.h --- a/lldb/include/lldb/Target/Trace.h +++ b/lldb/include/lldb/Target/Trace.h @@ -240,6 +240,9 @@ using OnBinaryDataReadCallback = std::function data)>; + using OnCoresBinaryDataReadCallback = std::function> + &core_to_data)>; /// Fetch binary data associated with a thread, either live or postmortem, and /// pass it to the given callback. The reason of having a callback is to free @@ -292,6 +295,12 @@ llvm::StringRef kind, OnBinaryDataReadCallback callback); + /// Similar to \a OnCoreBinaryDataRead but this is able to fetch the same data + /// from multiple cores at once. + llvm::Error OnCoresBinaryDataRead(const std::set core_ids, + llvm::StringRef kind, + OnCoresBinaryDataReadCallback callback); + /// \return /// All the currently traced processes. std::vector GetAllProcesses(); @@ -518,7 +527,12 @@ /// core id -> data kind -> size llvm::DenseMap> + m_live_core_data_sizes; + /// core id -> data kind -> bytes + llvm::DenseMap>> m_live_core_data; + /// data kind -> size std::unordered_map m_live_process_data; /// \} diff --git a/lldb/include/lldb/Utility/TraceIntelPTGDBRemotePackets.h b/lldb/include/lldb/Utility/TraceIntelPTGDBRemotePackets.h --- a/lldb/include/lldb/Utility/TraceIntelPTGDBRemotePackets.h +++ b/lldb/include/lldb/Utility/TraceIntelPTGDBRemotePackets.h @@ -80,9 +80,9 @@ /// /// \return /// Nanosecond wall time. - std::chrono::nanoseconds ToNanos(uint64_t tsc) const; + uint64_t ToNanos(uint64_t tsc) const; - uint64_t ToTSC(std::chrono::nanoseconds nanos) const; + uint64_t ToTSC(uint64_t nanos) const; uint32_t time_mult; uint16_t time_shift; diff --git a/lldb/source/Plugins/Trace/intel-pt/CMakeLists.txt b/lldb/source/Plugins/Trace/intel-pt/CMakeLists.txt --- a/lldb/source/Plugins/Trace/intel-pt/CMakeLists.txt +++ b/lldb/source/Plugins/Trace/intel-pt/CMakeLists.txt @@ -18,6 +18,7 @@ DecodedThread.cpp TaskTimer.cpp LibiptDecoder.cpp + PerfContextSwitchDecoder.cpp ThreadDecoder.cpp TraceCursorIntelPT.cpp TraceIntelPT.cpp diff --git a/lldb/source/Plugins/Trace/intel-pt/DecodedThread.h b/lldb/source/Plugins/Trace/intel-pt/DecodedThread.h --- a/lldb/source/Plugins/Trace/intel-pt/DecodedThread.h +++ b/lldb/source/Plugins/Trace/intel-pt/DecodedThread.h @@ -243,10 +243,10 @@ lldb::ThreadSP GetThread(); -private: /// Append a decoding error given an llvm::Error. void AppendError(llvm::Error &&error); +private: /// Notify this class that the last added instruction or error has /// an associated TSC. void RecordTscForLastInstruction(uint64_t tsc); diff --git a/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.h b/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.h --- a/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.h +++ b/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.h @@ -9,20 +9,50 @@ #ifndef LLDB_SOURCE_PLUGINS_TRACE_LIBIPT_DECODER_H #define LLDB_SOURCE_PLUGINS_TRACE_LIBIPT_DECODER_H -#include "intel-pt.h" - #include "DecodedThread.h" +#include "PerfContextSwitchDecoder.h" #include "forward-declarations.h" +#include "intel-pt.h" + namespace lldb_private { namespace trace_intel_pt { +struct IntelPTThreadSubtrace { + uint64_t tsc; + uint64_t psb_offset; +}; + +/// This struct represents a continuous execution of a thread in a core, +/// delimited by a context switch in and out, and a list of Intel PT subtraces +/// that belong to this execution. +struct IntelPTThreadContinousExecution { + ThreadContinuousExecution thread_execution; + std::vector intelpt_subtraces; + + IntelPTThreadContinousExecution( + const ThreadContinuousExecution &thread_execution) + : thread_execution(thread_execution) {} + + /// Comparator by time + bool operator<(const IntelPTThreadContinousExecution &o) const; +}; + /// Decode a raw Intel PT trace given in \p buffer and append the decoded /// instructions and errors in \p decoded_thread. It uses the low level libipt /// library underneath. void DecodeTrace(DecodedThread &decoded_thread, TraceIntelPT &trace_intel_pt, llvm::ArrayRef buffer); +void DecodeTrace( + DecodedThread &decoded_thread, TraceIntelPT &trace_intel_pt, + const llvm::DenseMap> &buffers, + const std::vector &executions); + +llvm::Expected> +SplitTraceInContinuousExecutions(TraceIntelPT &trace_intel_pt, + llvm::ArrayRef buffer); + } // namespace trace_intel_pt } // namespace lldb_private diff --git a/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.cpp b/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.cpp --- a/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.cpp @@ -50,18 +50,33 @@ /// Decode all the instructions until the end of the trace. /// The decoding flow is based on - /// https://github.com/intel/libipt/blob/master/doc/howto_libipt.md#the-instruction-flow-decode-loop - /// but with some relaxation to allow for gaps in the trace. + /// https://github.com/intel/libipt/blob/master/doc/howto_libipt.md#the-instruction-flow-decode-loop. void DecodeUntilEndOfTrace() { - int status = pte_ok; - while (!IsLibiptError(status = FindNextSynchronizationPoint())) { - // We have synchronized, so we can start decoding instructions and - // events. - // Multiple loops indicate gaps in the trace. + // Multiple loops indicate gaps in the trace, which are found by the inner + // call to DecodeInstructionsAndEvents. + while (true) { + int status = pt_insn_sync_forward(&m_decoder); + + if (IsLibiptError(status)) { + m_decoded_thread.Append(DecodedInstruction(status)); + break; + } + DecodeInstructionsAndEvents(status); } } + /// Decode all the instructions that belong to the same PSB packet given its + /// offset. + void DecodePSB(uint64_t psb_offset) { + int status = pt_insn_sync_set(&m_decoder, psb_offset); + if (IsLibiptError(status)) { + m_decoded_thread.Append(DecodedInstruction(status)); + return; + } + DecodeInstructionsAndEvents(status, /*stop_on_psb_change=*/true); + } + private: /// Invoke the low level function \a pt_insn_next and store the decoded /// instruction in the given \a DecodedInstruction. @@ -75,13 +90,37 @@ return pt_insn_next(&m_decoder, &insn.pt_insn, sizeof(insn.pt_insn)); } - /// Decode all the instructions and events until an error is found or the end - /// of the trace is reached. + /// Decode all the instructions and events until an error is found, the end + /// of the trace is reached, or optionally a new PSB is reached. /// /// \param[in] status /// The status that was result of synchronizing to the most recent PSB. - void DecodeInstructionsAndEvents(int status) { - while (DecodedInstruction insn = ProcessPTEvents(status)) { + /// + /// \param[in] stop_on_psb_change + /// If \b true, decoding + /// An optional offset to a given PSB. Decoding stops if a different PSB is + /// reached. + void DecodeInstructionsAndEvents(int status, + bool stop_on_psb_change = false) { + uint64_t psb_offset; + pt_insn_get_sync_offset(&m_decoder, + &psb_offset); // this can't fail because we got here + + while (true) { + DecodedInstruction insn = ProcessPTEvents(status); + if (!insn) { + m_decoded_thread.Append(insn); + break; + } + + if (stop_on_psb_change) { + uint64_t cur_psb_offset; + pt_insn_get_sync_offset( + &m_decoder, &cur_psb_offset); // this can't fail because we got here + if (cur_psb_offset != psb_offset) + break; + } + // The status returned by DecodeNextInstruction will need to be processed // by ProcessPTEvents in the next loop if it is not an error. if (IsLibiptError(status = DecodeNextInstruction(insn))) { @@ -112,31 +151,6 @@ // of trace stream (eos) is reached or pt_insn_sync_forward() passes. int status = pt_insn_sync_forward(&m_decoder); - if (!IsEndOfStream(status) && IsLibiptError(status)) { - uint64_t decoder_offset = 0; - int errcode_off = pt_insn_get_offset(&m_decoder, &decoder_offset); - if (!IsLibiptError(errcode_off)) { // we could get the offset - while (true) { - status = pt_insn_sync_forward(&m_decoder); - if (!IsLibiptError(status) || IsEndOfStream(status)) - break; - - uint64_t new_decoder_offset = 0; - errcode_off = pt_insn_get_offset(&m_decoder, &new_decoder_offset); - if (IsLibiptError(errcode_off)) - break; // We can't further synchronize. - else if (new_decoder_offset <= decoder_offset) { - // We tried resyncing the decoder and it didn't make any progress - // because the offset didn't change. We will not make any further - // progress. Hence, we stop in this situation. - break; - } - // We'll try again starting from a new offset. - decoder_offset = new_decoder_offset; - } - } - } - // We make this call to record any synchronization errors. if (IsLibiptError(status)) m_decoded_thread.Append(DecodedInstruction(status)); @@ -148,8 +162,6 @@ /// instruction e.g. timing events like ptev_tick, or paging events like /// ptev_paging. /// - /// If an error is found, it will be appended to the trace. - /// /// \param[in] status /// The status gotten from the previous instruction decoding or PSB /// synchronization. @@ -199,8 +211,6 @@ RefreshTscInfo(); if (m_tsc_info) insn.tsc = m_tsc_info.tsc; - if (!insn) - m_decoded_thread.Append(insn); return insn; } @@ -264,8 +274,7 @@ std::unique_ptr; static Expected -CreateInstructionDecoder(DecodedThread &decoded_thread, - TraceIntelPT &trace_intel_pt, +CreateInstructionDecoder(TraceIntelPT &trace_intel_pt, ArrayRef buffer) { Expected cpu_info = trace_intel_pt.GetCPUInfo(); if (!cpu_info) @@ -287,25 +296,151 @@ pt_insn_decoder *decoder_ptr = pt_insn_alloc_decoder(&config); if (!decoder_ptr) return make_error(-pte_nomem); - PtInsnDecoderUP decoder_up(decoder_ptr, DecoderDeleter); - pt_image *image = pt_insn_get_image(decoder_ptr); - Process *process = decoded_thread.GetThread()->GetProcess().get(); + return PtInsnDecoderUP(decoder_ptr, DecoderDeleter); +} + +static Error SetupMemoryImage(PtInsnDecoderUP &decoder_up, Process &process) { + pt_image *image = pt_insn_get_image(decoder_up.get()); + int status = pte_ok; if (IsLibiptError( - status = pt_image_set_callback(image, ReadProcessMemory, process))) + status = pt_image_set_callback(image, ReadProcessMemory, &process))) return make_error(status); - return decoder_up; + return Error::success(); } void lldb_private::trace_intel_pt::DecodeTrace(DecodedThread &decoded_thread, TraceIntelPT &trace_intel_pt, ArrayRef buffer) { Expected decoder_up = - CreateInstructionDecoder(decoded_thread, trace_intel_pt, buffer); + CreateInstructionDecoder(trace_intel_pt, buffer); if (!decoder_up) return decoded_thread.SetAsFailed(decoder_up.takeError()); + if (Error err = SetupMemoryImage(*decoder_up, + *decoded_thread.GetThread()->GetProcess())) + return decoded_thread.SetAsFailed(std::move(err)); + LibiptDecoder libipt_decoder(*decoder_up.get(), decoded_thread); libipt_decoder.DecodeUntilEndOfTrace(); } + +void lldb_private::trace_intel_pt::DecodeTrace( + DecodedThread &decoded_thread, TraceIntelPT &trace_intel_pt, + const DenseMap> &buffers, + const std::vector &executions) { + DenseMap decoders; + for (auto &core_id_buffer : buffers) { + Expected decoder_up = + CreateInstructionDecoder(trace_intel_pt, core_id_buffer.second); + if (!decoder_up) + return decoded_thread.SetAsFailed(decoder_up.takeError()); + + if (Error err = SetupMemoryImage(*decoder_up, + *decoded_thread.GetThread()->GetProcess())) + return decoded_thread.SetAsFailed(std::move(err)); + + decoders.try_emplace(core_id_buffer.first, + LibiptDecoder(*decoder_up->release(), decoded_thread)); + } + + bool has_seen_psbs = false; + for (size_t i = 0; i < executions.size(); i++) { + const IntelPTThreadContinousExecution &execution = executions[i]; + + auto variant = execution.thread_execution.variant; + // If we haven't seen a PSB yet, then it's fine not to show errors + if (has_seen_psbs) { + if (execution.intelpt_subtraces.empty()) { + decoded_thread.AppendError(createStringError( + inconvertibleErrorCode(), + formatv("Unable to find intel pt data for thread execution with " + "tsc = {0} on core id = {1}", + execution.thread_execution.GetLowestKnownTSC(), + execution.thread_execution.core_id))); + } + + // If the first execution is incomplete because it doesn't have a previous + // context switch in its cpu, all good. + if (variant == ThreadContinuousExecution::Variant::OnlyEnd || + variant == ThreadContinuousExecution::Variant::HintedStart) { + decoded_thread.AppendError(createStringError( + inconvertibleErrorCode(), + formatv("Thread execution starting at tsc = {0} on core id = {1} " + "doesn't have a matching context switch in.", + execution.thread_execution.GetLowestKnownTSC(), + execution.thread_execution.core_id))); + } + } + + LibiptDecoder &decoder = + decoders.find(execution.thread_execution.core_id)->second; + for (const IntelPTThreadSubtrace &intel_pt_execution : + execution.intelpt_subtraces) { + has_seen_psbs = true; + decoder.DecodePSB(intel_pt_execution.psb_offset); + } + + // If we haven't seen a PSB yet, then it's fine not to show errors + if (has_seen_psbs) { + // If the last execution is incomplete because it doesn't have a following + // context switch in its cpu, all good. + if ((variant == ThreadContinuousExecution::Variant::OnlyStart && + i + 1 != executions.size()) || + variant == ThreadContinuousExecution::Variant::HintedEnd) { + decoded_thread.AppendError(createStringError( + inconvertibleErrorCode(), + formatv("Thread execution starting at tsc = {0} on core id = {1} " + "doesn't have a matching context switch out", + execution.thread_execution.GetLowestKnownTSC(), + execution.thread_execution.core_id))); + } + } + } +} + +bool IntelPTThreadContinousExecution::operator<( + const IntelPTThreadContinousExecution &o) const { + // As the context switch might be incomplete, we look first for the first real + // PSB packet, which is a valid TSC. Otherwise, We query the thread execution + // itself for some tsc. + auto get_tsc = [](const IntelPTThreadContinousExecution &exec) { + return exec.intelpt_subtraces.empty() + ? exec.thread_execution.GetLowestKnownTSC() + : exec.intelpt_subtraces.front().tsc; + }; + + return get_tsc(*this) < get_tsc(o); +} + +Expected> +lldb_private::trace_intel_pt::SplitTraceInContinuousExecutions( + TraceIntelPT &trace_intel_pt, llvm::ArrayRef buffer) { + Expected decoder_up = + CreateInstructionDecoder(trace_intel_pt, buffer); + if (!decoder_up) + return decoder_up.takeError(); + + pt_insn_decoder *decoder = decoder_up.get().get(); + + std::vector executions; + + int status = pte_ok; + while (!IsLibiptError(status = pt_insn_sync_forward(decoder))) { + uint64_t tsc; + if (IsLibiptError(pt_insn_time(decoder, &tsc, nullptr, nullptr))) + return createStringError(inconvertibleErrorCode(), + "intel pt trace doesn't have TSC timestamps"); + + uint64_t psb_offset; + pt_insn_get_sync_offset(decoder, + &psb_offset); // this can't fail because we got here + + executions.push_back({ + tsc, + psb_offset, + }); + } + return executions; +} diff --git a/lldb/source/Plugins/Trace/intel-pt/PerfContextSwitchDecoder.h b/lldb/source/Plugins/Trace/intel-pt/PerfContextSwitchDecoder.h new file mode 100644 --- /dev/null +++ b/lldb/source/Plugins/Trace/intel-pt/PerfContextSwitchDecoder.h @@ -0,0 +1,146 @@ +//===-- PerfContextSwitchDecoder.h --======----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_SOURCE_PLUGINS_TRACE_INTEL_PT_PERFCONTEXTSWITCHDECODER_H +#define LLDB_SOURCE_PLUGINS_TRACE_INTEL_PT_PERFCONTEXTSWITCHDECODER_H + +#include "lldb/Utility/TraceIntelPTGDBRemotePackets.h" +#include "lldb/lldb-types.h" + +#include "llvm/Support/Error.h" + +#include + +namespace lldb_private { +namespace trace_intel_pt { + +/// This class indicates the time interval in which a thread was running +/// continuously on a cpu core. +/// +/// Note: we use the terms CPU and cores interchangeably. +struct ThreadContinuousExecution { + + /// In most cases both the start and end of a continuous execution can be + /// accurately recovered from the context switch trace, but in some cases one + /// of these endpoints might be guessed or not known at all, due to contention + /// problems in the trace or because tracing was interrupted, e.g. with ioctl + /// calls, which causes gaps in the trace. Because of that, we identify which + /// situation we fall into with the following variants. + enum class Variant { + /// Both endpoints are known. + Complete, + /// The end is known and we have a lower bound for the start, i.e. the + /// previous execution in the same core happens strictly before the hinted + /// start. + HintedStart, + /// The start is known and we have an upper bound for the end, i.e. the next + /// execution in the same core happens strictly after the hinted end. + HintedEnd, + /// We only know the start. This might be the last entry of a core trace. + OnlyStart, + /// We only know the end. This might be the first entry or a core trace. + OnlyEnd, + } variant; + + /// \return + /// The lowest tsc that we are sure of, i.e. not hinted. + uint64_t GetLowestKnownTSC() const; + + /// \return + /// The known or hinted start tsc, or 0 if the variant is \a OnlyEnd. + uint64_t GetStartTSC() const; + + /// \return + /// The known or hinted end tsc, or max \a uint64_t if the variant is \a + /// OnlyStart. + uint64_t GetEndTSC() const; + + /// Constructors for the different variants of this object + /// + /// \{ + static ThreadContinuousExecution + CreateCompleteExecution(lldb::core_id_t core_id, lldb::tid_t tid, + lldb::pid_t pid, uint64_t start, uint64_t end); + + static ThreadContinuousExecution + CreateHintedStartExecution(lldb::core_id_t core_id, lldb::tid_t tid, + lldb::pid_t pid, uint64_t hinted_start, + uint64_t end); + + static ThreadContinuousExecution + CreateHintedEndExecution(lldb::core_id_t core_id, lldb::tid_t tid, + lldb::pid_t pid, uint64_t start, + uint64_t hinted_end); + + static ThreadContinuousExecution + CreateOnlyEndExecution(lldb::core_id_t core_id, lldb::tid_t tid, + lldb::pid_t pid, uint64_t end); + + static ThreadContinuousExecution + CreateOnlyStartExecution(lldb::core_id_t core_id, lldb::tid_t tid, + lldb::pid_t pid, uint64_t start); + /// \} + + union { + struct { + uint64_t start; + uint64_t end; + } complete; + struct { + uint64_t start; + } only_start; + struct { + uint64_t end; + } only_end; + /// The following 'hinted' structures are useful when there are contention + /// problems in the trace + struct { + uint64_t hinted_start; + uint64_t end; + } hinted_start; + struct { + uint64_t start; + uint64_t hinted_end; + } hinted_end; + } tscs; + + lldb::core_id_t core_id; + lldb::tid_t tid; + lldb::pid_t pid; + +private: + /// We keep this constructor private to force the usage of the static named + /// constructors. + ThreadContinuousExecution(lldb::core_id_t core_id, lldb::tid_t tid, + lldb::pid_t pid) + : core_id(core_id), tid(tid), pid(pid) {} +}; + +/// Decodes a context switch trace gotten with perf_event_open. +/// +/// \param[in] data +/// The context switch trace in binary format. +/// +/// \param[i] core_id +/// The core_id where the trace were gotten from. +/// +/// \param[in] tsc_conversion +/// The conversion values used to confert nanoseconds to TSC. +/// +/// \return +/// A list of continuous executions recovered from the raw trace sorted by +/// time, or an \a llvm::Error if the data is malformed. +llvm::Expected> +DecodePerfContextSwitchTrace(llvm::ArrayRef data, + lldb::core_id_t core_id, + const LinuxPerfZeroTscConversion &tsc_conversion); + +} // namespace trace_intel_pt +} // namespace lldb_private + +#endif // LLDB_SOURCE_PLUGINS_TRACE_INTEL_PT_PERFCONTEXTSWITCHDECODER_H diff --git a/lldb/source/Plugins/Trace/intel-pt/PerfContextSwitchDecoder.cpp b/lldb/source/Plugins/Trace/intel-pt/PerfContextSwitchDecoder.cpp new file mode 100644 --- /dev/null +++ b/lldb/source/Plugins/Trace/intel-pt/PerfContextSwitchDecoder.cpp @@ -0,0 +1,281 @@ +//===-- PerfContextSwitchDecoder.cpp --======------------------------------===// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "PerfContextSwitchDecoder.h" + +using namespace lldb; +using namespace lldb_private; +using namespace lldb_private::trace_intel_pt; +using namespace llvm; + +/// Copied from to avoid depending on perf_event.h on +/// non-linux platforms. +/// \{ +struct perf_event_header { + uint32_t type; + uint16_t misc; + uint16_t size; +}; + +#define PERF_RECORD_MISC_SWITCH_OUT (1 << 13) +#define PERF_RECORD_MAX 19 +#define PERF_RECORD_SWITCH_CPU_WIDE 15 +/// \} + +/// Record found in the perf_event context switch traces. It might contain +/// additional fields in memory, but header.size should have the actual size +/// of the record. +struct PerfContextSwitchRecord { + struct perf_event_header header; + uint32_t next_prev_pid; + uint32_t next_prev_tid; + uint32_t pid, tid; + uint64_t time_in_nanos; + + bool IsOut() const { return header.misc & PERF_RECORD_MISC_SWITCH_OUT; } + + bool IsContextSwitchRecord() const { + return header.type == PERF_RECORD_SWITCH_CPU_WIDE; + } + + /// \return + /// An \a llvm::Error if the record looks obviously wrong, or \a + /// llvm::Error::success() otherwise. + Error SanityCheck() const { + // A record of too many uint64_t's or more should mean that the data is + // wrong + if (header.size == 0 || header.size > sizeof(uint64_t) * 1000) + return createStringError( + inconvertibleErrorCode(), + formatv("A record of {0} bytes was found.", header.size)); + + // We add some numbers to PERF_RECORD_MAX because some systems might have + // custom records. In any case, we are looking only for abnormal data. + if (header.type >= PERF_RECORD_MAX + 100) + return createStringError( + inconvertibleErrorCode(), + formatv("Invalid record type {0} was found.", header.type)); + return Error::success(); + } +}; + +/// Record produced after parsing the raw context switch trace produce by +/// perf_event. A major difference between this struct and +/// PerfContextSwitchRecord is that this one uses tsc instead of nanos. +struct ContextSwitchRecord { + uint64_t tsc; + /// Whether the switch is in or out + bool is_out; + /// pid = 0 and tid = 0 indicate the swapper or idle process, which normally + /// runs after a context switch out of a normal user thread. + lldb::pid_t pid; + lldb::tid_t tid; + + bool IsOut() const { return is_out; } + + bool IsIn() const { return !is_out; } +}; + +uint64_t ThreadContinuousExecution::GetLowestKnownTSC() const { + switch (variant) { + case Variant::Complete: + return tscs.complete.start; + case Variant::OnlyStart: + return tscs.only_start.start; + case Variant::OnlyEnd: + return tscs.only_end.end; + case Variant::HintedEnd: + return tscs.hinted_end.start; + case Variant::HintedStart: + return tscs.hinted_start.end; + } +} + +uint64_t ThreadContinuousExecution::GetStartTSC() const { + switch (variant) { + case Variant::Complete: + return tscs.complete.start; + case Variant::OnlyStart: + return tscs.only_start.start; + case Variant::OnlyEnd: + return 0; + case Variant::HintedEnd: + return tscs.hinted_end.start; + case Variant::HintedStart: + return tscs.hinted_start.hinted_start; + } +} + +uint64_t ThreadContinuousExecution::GetEndTSC() const { + switch (variant) { + case Variant::Complete: + return tscs.complete.end; + case Variant::OnlyStart: + return std::numeric_limits::max(); + case Variant::OnlyEnd: + return tscs.only_end.end; + case Variant::HintedEnd: + return tscs.hinted_end.hinted_end; + case Variant::HintedStart: + return tscs.hinted_start.end; + } +} + +ThreadContinuousExecution ThreadContinuousExecution::CreateCompleteExecution( + lldb::core_id_t core_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t start, + uint64_t end) { + ThreadContinuousExecution o(core_id, tid, pid); + o.variant = Variant::Complete; + o.tscs.complete.start = start; + o.tscs.complete.end = end; + return o; +} + +ThreadContinuousExecution ThreadContinuousExecution::CreateHintedStartExecution( + lldb::core_id_t core_id, lldb::tid_t tid, lldb::pid_t pid, + uint64_t hinted_start, uint64_t end) { + ThreadContinuousExecution o(core_id, tid, pid); + o.variant = Variant::HintedStart; + o.tscs.hinted_start.hinted_start = hinted_start; + o.tscs.hinted_start.end = end; + return o; +} + +ThreadContinuousExecution ThreadContinuousExecution::CreateHintedEndExecution( + lldb::core_id_t core_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t start, + uint64_t hinted_end) { + ThreadContinuousExecution o(core_id, tid, pid); + o.variant = Variant::HintedEnd; + o.tscs.hinted_end.start = start; + o.tscs.hinted_end.hinted_end = hinted_end; + return o; +} + +ThreadContinuousExecution ThreadContinuousExecution::CreateOnlyEndExecution( + lldb::core_id_t core_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t end) { + ThreadContinuousExecution o(core_id, tid, pid); + o.variant = Variant::OnlyEnd; + o.tscs.only_end.end = end; + return o; +} + +ThreadContinuousExecution ThreadContinuousExecution::CreateOnlyStartExecution( + lldb::core_id_t core_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t start) { + ThreadContinuousExecution o(core_id, tid, pid); + o.variant = Variant::OnlyStart; + o.tscs.only_start.start = start; + return o; +} + +static Error RecoverExecutionsFromConsecutiveRecords( + core_id_t core_id, const LinuxPerfZeroTscConversion &tsc_conversion, + const ContextSwitchRecord ¤t_record, + const Optional &prev_record, + std::function + on_new_execution) { + if (!prev_record) { + if (current_record.IsOut()) { + on_new_execution(ThreadContinuousExecution::CreateOnlyEndExecution( + core_id, current_record.tid, current_record.pid, current_record.tsc)); + } + // The 'in' case will be handled later when we try to look for its end + return Error::success(); + } + + const ContextSwitchRecord &prev = *prev_record; + if (prev.tsc >= current_record.tsc) + return createStringError( + inconvertibleErrorCode(), + formatv("A context switch record doesn't happen after the previous " + "record. Previous TSC= {0}, current TSC = {1}.", + prev.tsc, current_record.tsc)); + + if (current_record.IsIn() && prev.IsIn()) { + // We found two consecutive ins, which means that we didn't capture + // the end of the previous execution. + on_new_execution(ThreadContinuousExecution::CreateHintedEndExecution( + core_id, prev.tid, prev.pid, prev.tsc, current_record.tsc - 1)); + } else if (current_record.IsOut() && prev.IsOut()) { + // We found two consecutive outs, that means that we didn't capture + // the beginning of the current execution. + on_new_execution(ThreadContinuousExecution::CreateHintedStartExecution( + core_id, current_record.tid, current_record.pid, prev.tsc + 1, + current_record.tsc)); + } else if (current_record.IsOut() && prev.IsIn()) { + if (current_record.pid == prev.pid && current_record.tid == prev.tid) { + /// A complete execution + on_new_execution(ThreadContinuousExecution::CreateCompleteExecution( + core_id, current_record.tid, current_record.pid, prev.tsc, + current_record.tsc)); + } else { + // An out after the in of a different thread. The first one doesn't + // have an end, and the second one doesn't have a start. + on_new_execution(ThreadContinuousExecution::CreateHintedEndExecution( + core_id, prev.tid, prev.pid, prev.tsc, current_record.tsc - 1)); + on_new_execution(ThreadContinuousExecution::CreateHintedStartExecution( + core_id, current_record.tid, current_record.pid, prev.tsc + 1, + current_record.tsc)); + } + } + return Error::success(); +} + +#include + +Expected> +lldb_private::trace_intel_pt::DecodePerfContextSwitchTrace( + ArrayRef data, core_id_t core_id, + const LinuxPerfZeroTscConversion &tsc_conversion) { + + std::vector executions; + + // This offset is used to create the error message in case of failures. + size_t offset = 0; + + auto do_decode = [&]() -> Error { + Optional prev_record; + while (offset < data.size()) { + const PerfContextSwitchRecord &perf_record = + *reinterpret_cast(data.data() + + offset); + if (Error err = perf_record.SanityCheck()) + return err; + + if (perf_record.IsContextSwitchRecord()) { + ContextSwitchRecord record{ + tsc_conversion.ToTSC(perf_record.time_in_nanos), + perf_record.IsOut(), static_cast(perf_record.pid), + static_cast(perf_record.tid)}; + + if (Error err = RecoverExecutionsFromConsecutiveRecords( + core_id, tsc_conversion, record, prev_record, + [&](const ThreadContinuousExecution &execution) { + executions.push_back(execution); + })) + return err; + + prev_record = record; + } + offset += perf_record.header.size; + } + + // We might have an incomplete last record + if (prev_record && prev_record->IsIn()) + executions.push_back(ThreadContinuousExecution::CreateOnlyStartExecution( + core_id, prev_record->tid, prev_record->pid, prev_record->tsc)); + return Error::success(); + }; + + if (Error err = do_decode()) + return createStringError(inconvertibleErrorCode(), + formatv("Malformed perf context switch trace for " + "cpu {0} at offset {1}. {2}", + core_id, offset, + toString(std::move(err)))); + + return executions; +} diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTJSONStructs.cpp b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTJSONStructs.cpp --- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTJSONStructs.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTJSONStructs.cpp @@ -49,8 +49,10 @@ } json::Value toJSON(const JSONThread &thread) { - return json::Object{{"tid", thread.tid}, - {"traceBuffer", thread.trace_buffer}}; + json::Object obj{{"tid", thread.tid}}; + if (thread.trace_buffer) + obj["traceBuffer"] = *thread.trace_buffer; + return obj; } bool fromJSON(const json::Value &value, JSONThread &thread, Path path) { diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCoreDecoder.h b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCoreDecoder.h --- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCoreDecoder.h +++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCoreDecoder.h @@ -9,108 +9,27 @@ #ifndef LLDB_SOURCE_PLUGINS_TRACE_INTEL_PT_TRACEINTELPTMULTICOREDECODER_H #define LLDB_SOURCE_PLUGINS_TRACE_INTEL_PT_TRACEINTELPTMULTICOREDECODER_H +#include "LibiptDecoder.h" +#include "PerfContextSwitchDecoder.h" #include "ThreadDecoder.h" namespace lldb_private { namespace trace_intel_pt { -/// This class indicates the time interval in which a thread was running -/// continuously on a cpu core. -/// -/// In most cases both endpoints of the intervals can be accurately recovered -/// from a context switch trace, but in some cases one of these endpoints might -/// be guessed or not known at all, due to contention problems in the trace or -/// because tracing was interrupted. -/// -/// Note: we use the terms CPU and cores interchangeably. -struct ThreadContinuousExecution { - enum class Variant { - /// Both endpoints are known - Complete, - /// The end is known and we have a guess for the start - HintedStart, - /// The start is known and we have a guess for the end - HintedEnd, - /// We only know the start. This might be the last entry of a core trace. - OnlyStart, - /// We only know the end. This might be the first entry or a core trace. - OnlyEnd, - } variant; - - union { - struct { - uint64_t start; - uint64_t end; - } complete; - struct { - uint64_t start; - } only_start; - struct { - uint64_t end; - } only_end; - /// The following 'hinted' structures are useful when there are contention - /// problems in the trace - struct { - uint64_t hinted_start; - uint64_t end; - } hinted_start; - struct { - uint64_t start; - uint64_t hinted_end; - } hinted_end; - } tscs; - - lldb::core_id_t core_id; - lldb::tid_t tid; - - /// \return - /// A tsc that we are certain of, either the start or the end. - uint64_t GetErrorFreeTSC() const; - - /// Constructors for the different variants of this object - /// - /// \{ - static ThreadContinuousExecution - CreateCompleteExecution(lldb::core_id_t core_id, lldb::tid_t tid, - uint64_t start, uint64_t end); - - static ThreadContinuousExecution - CreateHintedStartExecution(lldb::core_id_t core_id, lldb::tid_t tid, - uint64_t hinted_start, uint64_t end); - - static ThreadContinuousExecution - CreateHintedEndExecution(lldb::core_id_t core_id, lldb::tid_t tid, - uint64_t start, uint64_t hinted_end); - - static ThreadContinuousExecution - CreateOnlyEndExecution(lldb::core_id_t core_id, lldb::tid_t tid, - uint64_t end); - - static ThreadContinuousExecution - CreateOnlyStartExecution(lldb::core_id_t core_id, lldb::tid_t tid, - uint64_t start); - /// \} - - /// Comparator by TSCs - bool operator<(const ThreadContinuousExecution &o) const; - -private: - ThreadContinuousExecution(lldb::core_id_t core_id, lldb::tid_t tid) - : core_id(core_id), tid(tid) {} -}; - /// Class used to decode a multi-core Intel PT trace. It assumes that each /// thread could have potentially been executed on different cores. It uses a /// context switch trace per CPU with timestamps to identify which thread owns /// each Intel PT decoded instruction and in which order. It also assumes that /// the Intel PT data and context switches might have gaps in their traces due -/// to contention or race conditions. +/// to contention or race conditions. Finally, it assumes that a tid is not +/// repeated twice for two different threads because of the shortness of the +/// intel pt trace. class TraceIntelPTMultiCoreDecoder { public: /// \param[in] core_ids /// The list of cores where the traced programs were running on. /// - /// \param[in] tid + /// \param[in] tids /// The full list of tids that were traced. /// /// \param[in] tsc_conversion @@ -143,16 +62,24 @@ /// by thread. llvm::Error DecodeContextSwitchTraces(); + /// Produce a mapping from thread ids to the list of continuos executions with + /// their associated intel pt subtraces. + llvm::Expected< + llvm::DenseMap>> + CorrelateContextSwitchesAndIntelPtTraces(); + TraceIntelPT &m_trace; std::set m_cores; std::set m_tids; llvm::Optional< - llvm::DenseMap>> + llvm::DenseMap>> m_continuous_executions_per_thread; + llvm::DenseMap m_decoded_threads; LinuxPerfZeroTscConversion m_tsc_conversion; /// This variable will be non-None if a severe error happened during the setup /// of the decoder. llvm::Optional m_setup_error; + uint64_t m_unattributed_intelpt_subtraces; }; } // namespace trace_intel_pt diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCoreDecoder.cpp b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCoreDecoder.cpp --- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCoreDecoder.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCoreDecoder.cpp @@ -12,225 +12,11 @@ #include "llvm/Support/Error.h" -#include - using namespace lldb; using namespace lldb_private; using namespace lldb_private::trace_intel_pt; using namespace llvm; -struct PerfContextSwitchRecord { - struct perf_event_header header; - uint32_t next_prev_pid; - uint32_t next_prev_tid; - uint32_t pid, tid; - uint64_t time_in_nanos; - - bool IsOut() const { return header.misc & PERF_RECORD_MISC_SWITCH_OUT; } -}; - -struct ContextSwitchRecord { - uint64_t tsc; - bool is_out; - /// A pid of 0 indicates an execution in the kernel - lldb::pid_t pid; - lldb::tid_t tid; - - bool IsOut() const { return is_out; } - - bool IsIn() const { return !is_out; } -}; - -uint64_t ThreadContinuousExecution::GetErrorFreeTSC() const { - switch (variant) { - case Variant::Complete: - return tscs.complete.start; // end would also work - case Variant::HintedStart: - return tscs.hinted_start.end; - case Variant::HintedEnd: - return tscs.hinted_end.start; - case Variant::OnlyEnd: - return tscs.only_end.end; - case Variant::OnlyStart: - return tscs.only_start.start; - } -} - -ThreadContinuousExecution ThreadContinuousExecution::CreateCompleteExecution( - lldb::core_id_t core_id, lldb::tid_t tid, uint64_t start, uint64_t end) { - ThreadContinuousExecution o(core_id, tid); - o.variant = Variant::Complete; - o.tscs.complete.start = start; - o.tscs.complete.end = end; - return o; -} - -ThreadContinuousExecution ThreadContinuousExecution::CreateHintedStartExecution( - lldb::core_id_t core_id, lldb::tid_t tid, uint64_t hinted_start, - uint64_t end) { - ThreadContinuousExecution o(core_id, tid); - o.variant = Variant::HintedStart; - o.tscs.hinted_start.hinted_start = hinted_start; - o.tscs.hinted_start.end = end; - return o; -} - -ThreadContinuousExecution ThreadContinuousExecution::CreateHintedEndExecution( - lldb::core_id_t core_id, lldb::tid_t tid, uint64_t start, - uint64_t hinted_end) { - ThreadContinuousExecution o(core_id, tid); - o.variant = Variant::HintedEnd; - o.tscs.hinted_end.start = start; - o.tscs.hinted_end.hinted_end = hinted_end; - return o; -} - -ThreadContinuousExecution ThreadContinuousExecution::CreateOnlyEndExecution( - lldb::core_id_t core_id, lldb::tid_t tid, uint64_t end) { - ThreadContinuousExecution o(core_id, tid); - o.variant = Variant::OnlyEnd; - o.tscs.only_end.end = end; - return o; -} - -ThreadContinuousExecution ThreadContinuousExecution::CreateOnlyStartExecution( - lldb::core_id_t core_id, lldb::tid_t tid, uint64_t start) { - ThreadContinuousExecution o(core_id, tid); - o.variant = Variant::OnlyStart; - o.tscs.only_start.start = start; - return o; -} - -bool ThreadContinuousExecution::operator<( - const ThreadContinuousExecution &o) const { - // We can compare by GetErrorFreeTSC because context switches across CPUs can - // be sorted by any of its TSC. - return GetErrorFreeTSC() < o.GetErrorFreeTSC(); -} - -/// Tries to recover a continuous execution by analyzing two consecutive context -/// switch records. -static Error -HandleContextSwitch(core_id_t core_id, - const LinuxPerfZeroTscConversion &tsc_conversion, - const ContextSwitchRecord &record, - const Optional &prev_record, - std::function - on_new_thread_execution) { - if (!prev_record) { - if (record.IsOut()) - on_new_thread_execution(ThreadContinuousExecution::CreateOnlyEndExecution( - core_id, record.tid, record.tsc)); - // The 'in' case will be handled later when we try to look for its end - return Error::success(); - } - - const ContextSwitchRecord &prev = *prev_record; - if (prev.tsc > record.tsc) - return createStringError( - inconvertibleErrorCode(), - formatv("A context switch record out doesn't happen after the previous " - "record. Previous TSC= {0}, current TSC = {1}.", - prev.tsc, record.tsc)); - - if (record.IsIn() && prev.IsIn()) { - // We found two consecutive ins, which means that we didn't capture - // the end of the previous execution. - on_new_thread_execution(ThreadContinuousExecution::CreateHintedEndExecution( - core_id, prev.tid, prev.tsc, record.tsc - 1)); - } else if (record.IsOut() && prev.IsOut()) { - // We found two consecutive outs, that means that we didn't capture - // the beginning of the current execution. - on_new_thread_execution( - ThreadContinuousExecution::CreateHintedStartExecution( - core_id, record.tid, prev.tsc + 1, record.tsc)); - } else if (record.IsOut() && prev.IsIn()) { - if (record.pid == prev.pid && record.tid == prev.tid) { - /// A complete execution - on_new_thread_execution( - ThreadContinuousExecution::CreateCompleteExecution( - core_id, record.tid, prev.tsc, record.tsc)); - } else { - // An out after the in of a different thread. The first one doesn't - // have an end, and the second one doesn't have a start. - on_new_thread_execution( - ThreadContinuousExecution::CreateHintedEndExecution( - core_id, prev.tid, prev.tsc, record.tsc - 1)); - on_new_thread_execution( - ThreadContinuousExecution::CreateHintedStartExecution( - core_id, record.tid, prev.tsc + 1, record.tsc)); - } - } - return Error::success(); -} - -/// Decodes a context switch trace gotten with perf_event_open. -/// -/// \param[in] data -/// The context switch trace in binary format. -/// -/// \param[i] core_id -/// The core_id where the trace were gotten from. -/// -/// \param[in] tsc_conversion -/// The conversion values used to confert nanoseconds to TSC. -/// -/// \param[in] on_new_thread_execution -/// Callback to be invoked whenever a continuous execution is recovered from -/// the trace. -static Error DecodePerfContextSwitchTrace( - ArrayRef data, core_id_t core_id, - const LinuxPerfZeroTscConversion &tsc_conversion, - std::function - on_new_thread_execution) { - auto CreateError = [&](size_t offset, auto error) -> Error { - return createStringError(inconvertibleErrorCode(), - formatv("Malformed perf context switch trace for " - "cpu {0} at offset {1}. {2}", - core_id, offset, error)); - }; - - Optional prev_record; - for (size_t offset = 0; offset < data.size();) { - const PerfContextSwitchRecord &perf_record = - *reinterpret_cast(data.data() + - offset); - // A record of 1000 uint64_t's or more should mean that the data is wrong - if (perf_record.header.size == 0 || - perf_record.header.size > sizeof(uint64_t) * 1000) - return CreateError(offset, formatv("A record of {0} bytes was found.", - perf_record.header.size)); - - // We add + 100 to this record because some systems might have custom - // records. In any case, we are looking only for abnormal data. - if (perf_record.header.type >= PERF_RECORD_MAX + 100) - return CreateError(offset, formatv("Invalid record type {0} was found.", - perf_record.header.type)); - - if (perf_record.header.type == PERF_RECORD_SWITCH_CPU_WIDE) { - ContextSwitchRecord record{tsc_conversion.ToTSC(std::chrono::nanoseconds( - perf_record.time_in_nanos)), - perf_record.IsOut(), - static_cast(perf_record.pid), - static_cast(perf_record.tid)}; - - if (Error err = HandleContextSwitch(core_id, tsc_conversion, record, - prev_record, on_new_thread_execution)) - return CreateError(offset, toString(std::move(err))); - - prev_record = record; - } - offset += perf_record.header.size; - } - - // We might have an incomplete last record - if (prev_record && prev_record->IsIn()) - on_new_thread_execution(ThreadContinuousExecution::CreateOnlyStartExecution( - core_id, prev_record->tid, prev_record->tsc)); - - return Error::success(); -} - TraceIntelPTMultiCoreDecoder::TraceIntelPTMultiCoreDecoder( TraceIntelPT &trace, ArrayRef core_ids, ArrayRef tids, const LinuxPerfZeroTscConversion &tsc_conversion) @@ -245,10 +31,91 @@ if (Error err = DecodeContextSwitchTraces()) return std::make_shared(thread.shared_from_this(), std::move(err)); + auto it = m_decoded_threads.find(thread.GetID()); + if (it != m_decoded_threads.end()) + return it->second; + + DecodedThreadSP decoded_thread_sp = + std::make_shared(thread.shared_from_this()); + + Error err = m_trace.OnCoresBinaryDataRead( + m_cores, IntelPTDataKinds::kTraceBuffer, + [&](const DenseMap> buffers) -> Error { + auto it = m_continuous_executions_per_thread->find(thread.GetID()); + if (it != m_continuous_executions_per_thread->end()) + DecodeTrace(*decoded_thread_sp, m_trace, buffers, it->second); + + return Error::success(); + }); + if (err) + decoded_thread_sp->SetAsFailed(std::move(err)); + + m_decoded_threads.try_emplace(thread.GetID(), decoded_thread_sp); + return decoded_thread_sp; +} - return std::make_shared( - thread.shared_from_this(), - createStringError(inconvertibleErrorCode(), "unimplemented")); +llvm::Expected< + llvm::DenseMap>> +TraceIntelPTMultiCoreDecoder::CorrelateContextSwitchesAndIntelPtTraces() { + llvm::DenseMap> + continuous_executions_per_thread; + + for (core_id_t core_id : m_cores) { + std::vector intel_pt_executions; + + Error err = m_trace.OnCoreBinaryDataRead( + core_id, IntelPTDataKinds::kTraceBuffer, + [&](ArrayRef data) -> Error { + Expected> split_trace = + SplitTraceInContinuousExecutions(m_trace, data); + if (!split_trace) + return split_trace.takeError(); + + intel_pt_executions = std::move(*split_trace); + return Error::success(); + }); + if (err) + return std::move(err); + + // We'll be iterating through the thread continuous executions and the intel + // pt subtraces sorted by time. + auto it = intel_pt_executions.begin(); + auto on_new_thread_execution = + [&](ThreadContinuousExecution thread_execution) { + IntelPTThreadContinousExecution execution(thread_execution); + + for (; it != intel_pt_executions.end() && + it->tsc < thread_execution.GetEndTSC(); + it++) { + if (it->tsc > thread_execution.GetStartTSC()) { + execution.intelpt_subtraces.push_back(*it); + } else { + m_unattributed_intelpt_subtraces++; + } + } + continuous_executions_per_thread[thread_execution.tid].push_back( + execution); + }; + err = m_trace.OnCoreBinaryDataRead( + core_id, IntelPTDataKinds::kPerfContextSwitchTrace, + [&](ArrayRef data) -> Error { + Expected> executions = + DecodePerfContextSwitchTrace(data, core_id, m_tsc_conversion); + if (!executions) + return executions.takeError(); + for (const ThreadContinuousExecution &exec : *executions) + on_new_thread_execution(exec); + return Error::success(); + }); + if (err) + return std::move(err); + } + // We now sort the executions of each thread to have them ready for + // instruction decoding + for (auto &tid_executions : continuous_executions_per_thread) + std::sort(tid_executions.second.begin(), tid_executions.second.end()); + + return continuous_executions_per_thread; } Error TraceIntelPTMultiCoreDecoder::DecodeContextSwitchTraces() { @@ -258,38 +125,20 @@ if (m_continuous_executions_per_thread) return Error::success(); - m_continuous_executions_per_thread.emplace(); - - auto do_decode = [&]() -> Error { - // We'll decode all context switch traces, identify continuous executions - // and group them by thread. - for (core_id_t core_id : m_cores) { - Error err = m_trace.OnCoreBinaryDataRead( - core_id, IntelPTDataKinds::kPerfContextSwitchTrace, - [&](ArrayRef data) -> Error { - return DecodePerfContextSwitchTrace( - data, core_id, m_tsc_conversion, - [&](const ThreadContinuousExecution &execution) { - (*m_continuous_executions_per_thread)[execution.tid] - .push_back(execution); - }); - }); - if (err) { - m_setup_error = toString(std::move(err)); - return createStringError(inconvertibleErrorCode(), - m_setup_error->c_str()); - } - } - // We now sort the executions of each to have them ready for instruction - // decoding - for (auto &tid_executions : *m_continuous_executions_per_thread) - std::sort(tid_executions.second.begin(), tid_executions.second.end()); - - return Error::success(); - }; - - return m_trace.GetTimer().ForGlobal().TimeTask( - "Context switch trace decoding", do_decode); + Error err = m_trace.GetTimer().ForGlobal().TimeTask( + "Context switch and Intel PT traces correlation", [&]() -> Error { + if (auto correlation = CorrelateContextSwitchesAndIntelPtTraces()) { + m_continuous_executions_per_thread.emplace(std::move(*correlation)); + return Error::success(); + } else { + return correlation.takeError(); + } + }); + if (err) { + m_setup_error = toString(std::move(err)); + return createStringError(inconvertibleErrorCode(), m_setup_error->c_str()); + } + return Error::success(); } size_t TraceIntelPTMultiCoreDecoder::GetNumContinuousExecutionsForThread( diff --git a/lldb/source/Target/Trace.cpp b/lldb/source/Target/Trace.cpp --- a/lldb/source/Target/Trace.cpp +++ b/lldb/source/Target/Trace.cpp @@ -128,8 +128,8 @@ Optional Trace::GetLiveCoreBinaryDataSize(lldb::core_id_t core_id, llvm::StringRef kind) { - auto it = m_live_core_data.find(core_id); - if (it == m_live_core_data.end()) + auto it = m_live_core_data_sizes.find(core_id); + if (it == m_live_core_data_sizes.end()) return None; std::unordered_map &single_core_data = it->second; auto single_thread_data_it = single_core_data.find(kind.str()); @@ -211,6 +211,8 @@ m_stop_id = new_stop_id; m_live_thread_data.clear(); m_live_refresh_error.reset(); + m_live_core_data_sizes.clear(); + m_live_core_data.clear(); m_cores.reset(); auto HandleError = [&](Error &&err) -> const char * { @@ -246,7 +248,7 @@ for (const TraceCoreState &core_state : *live_process_state->cores) { m_cores->push_back(core_state.core_id); for (const TraceBinaryData &item : core_state.binary_data) - m_live_core_data[core_state.core_id][item.kind] = item.size; + m_live_core_data_sizes[core_state.core_id][item.kind] = item.size; } LLDB_LOG(log, "== Found {0} cpu cores being traced", live_process_state->cores->size()); @@ -353,10 +355,18 @@ llvm::Error Trace::OnLiveCoreBinaryDataRead(lldb::core_id_t core_id, llvm::StringRef kind, OnBinaryDataReadCallback callback) { + auto core_data_entries = m_live_core_data.find(core_id); + if (core_data_entries != m_live_core_data.end()) { + auto core_data = core_data_entries->second.find(kind.str()); + if (core_data != core_data_entries->second.end()) + return callback(core_data->second); + } + Expected> data = GetLiveCoreBinaryData(core_id, kind); if (!data) return data.takeError(); - return callback(*data); + auto it = m_live_core_data[core_id].insert({kind.str(), std::move(*data)}); + return callback(it.first->second); } llvm::Error Trace::OnDataFileRead(FileSpec file, @@ -401,6 +411,28 @@ return OnPostMortemThreadBinaryDataRead(tid, kind, callback); } +llvm::Error +Trace::OnCoresBinaryDataRead(const std::set core_ids, + llvm::StringRef kind, + OnCoresBinaryDataReadCallback callback) { + DenseMap> buffers; + + std::function::iterator)> process_core = + [&](std::set::iterator core_id) -> Error { + if (core_id == core_ids.end()) + return callback(buffers); + + return OnCoreBinaryDataRead(*core_id, kind, + [&](ArrayRef data) -> Error { + buffers.try_emplace(*core_id, data); + auto next_id = core_id; + next_id++; + return process_core(next_id); + }); + }; + return process_core(core_ids.begin()); +} + llvm::Error Trace::OnCoreBinaryDataRead(lldb::core_id_t core_id, llvm::StringRef kind, OnBinaryDataReadCallback callback) { diff --git a/lldb/source/Utility/TraceIntelPTGDBRemotePackets.cpp b/lldb/source/Utility/TraceIntelPTGDBRemotePackets.cpp --- a/lldb/source/Utility/TraceIntelPTGDBRemotePackets.cpp +++ b/lldb/source/Utility/TraceIntelPTGDBRemotePackets.cpp @@ -50,18 +50,15 @@ return base; } -std::chrono::nanoseconds -LinuxPerfZeroTscConversion::ToNanos(uint64_t tsc) const { +uint64_t LinuxPerfZeroTscConversion::ToNanos(uint64_t tsc) const { uint64_t quot = tsc >> time_shift; uint64_t rem_flag = (((uint64_t)1 << time_shift) - 1); uint64_t rem = tsc & rem_flag; - return std::chrono::nanoseconds{time_zero + quot * time_mult + - ((rem * time_mult) >> time_shift)}; + return time_zero + quot * time_mult + ((rem * time_mult) >> time_shift); } -uint64_t -LinuxPerfZeroTscConversion::ToTSC(std::chrono::nanoseconds nanos) const { - uint64_t time = nanos.count() - time_zero; +uint64_t LinuxPerfZeroTscConversion::ToTSC(uint64_t nanos) const { + uint64_t time = nanos - time_zero; uint64_t quot = time / time_mult; uint64_t rem = time % time_mult; return (quot << time_shift) + (rem << time_shift) / time_mult; diff --git a/lldb/test/API/commands/trace/TestTraceLoad.py b/lldb/test/API/commands/trace/TestTraceLoad.py --- a/lldb/test/API/commands/trace/TestTraceLoad.py +++ b/lldb/test/API/commands/trace/TestTraceLoad.py @@ -9,6 +9,18 @@ mydir = TestBase.compute_mydir(__file__) NO_DEBUG_INFO_TESTCASE = True + def testLoadMultiCoreTrace(self): + src_dir = self.getSourceDir() + trace_definition_file = os.path.join(src_dir, "intelpt-multi-core-trace", "trace.json") + self.expect("trace load -v " + trace_definition_file, substrs=["intel-pt"]) + self.expect("thread trace dump instructions 2 -t", + substrs=["19521: [tsc=0x008fb5211c143fd8] error: expected tracing enabled event", + "m.out`foo() + 65 at multi_thread.cpp:12:21", + "19520: [tsc=0x008fb5211bfbc69e] 0x0000000000400ba7 jg 0x400bb3"]) + self.expect("thread trace dump instructions 3 -t", + substrs=["67910: [tsc=0x008fb5211bfdf270] 0x0000000000400bd7 addl $0x1, -0x4(%rbp)", + "m.out`bar() + 26 at multi_thread.cpp:20:6"]) + def testLoadTrace(self): src_dir = self.getSourceDir() trace_definition_file = os.path.join(src_dir, "intelpt-trace", "trace.json") diff --git a/lldb/test/API/commands/trace/intelpt-multi-core-trace/cores/45.intelpt_trace b/lldb/test/API/commands/trace/intelpt-multi-core-trace/cores/45.intelpt_trace new file mode 100644 index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 GIT binary patch literal 0 Hc$@ +#include +#include +using namespace std; + +bool done = false; +void foo() { + int x = 0; + for (int i = 0; i < 10000; i++) + x++; + sleep(1); + for (int i = 0; i < 10000; i++) + x++; + done = true; +} + +void bar() { + int y = 0; + while (!done) { + y++; + } + printf("bar %d\n", y); +} + +int main() { + std::thread first(foo); + std::thread second(bar); + first.join(); + second.join(); + + printf("complete\n"); + return 0; + +} diff --git a/lldb/test/API/commands/trace/intelpt-multi-core-trace/trace.json b/lldb/test/API/commands/trace/intelpt-multi-core-trace/trace.json new file mode 100644 --- /dev/null +++ b/lldb/test/API/commands/trace/intelpt-multi-core-trace/trace.json @@ -0,0 +1,51 @@ +{ + "cores": [ + { + "contextSwitchTrace": "/tmp/trace8/cores/45.perf_context_switch_trace", + "coreId": 45, + "traceBuffer": "/tmp/trace8/cores/45.intelpt_trace" + }, + { + "contextSwitchTrace": "/tmp/trace8/cores/51.perf_context_switch_trace", + "coreId": 51, + "traceBuffer": "/tmp/trace8/cores/51.intelpt_trace" + } + ], + "cpuInfo": { + "family": 6, + "model": 85, + "stepping": 4, + "vendor": "GenuineIntel" + }, + "processes": [ + { + "modules": [ + { + "file": "modules/m.out", + "systemPath": "/tmp/m.out", + "loadAddress": 4194304, + "uuid": "AEFB0D59-233F-80FF-6D3C-4DED498534CF-11017B3B" + } + ], + "pid": 3497234, + "threads": [ + { + "tid": 3497234 + }, + { + "tid": 3497496 + }, + { + "tid": 3497497 + } + ], + "triple": "x86_64-unknown-linux-gnu" + } + ], + "tscPerfZeroConversion": { + "timeMult": 1076264588, + "timeShift": 31, + "timeZero": 18433473881008870804 + }, + "type": "intel-pt" +} diff --git a/lldb/test/API/commands/trace/multiple-threads/TestTraceStartStopMultipleThreads.py b/lldb/test/API/commands/trace/multiple-threads/TestTraceStartStopMultipleThreads.py --- a/lldb/test/API/commands/trace/multiple-threads/TestTraceStartStopMultipleThreads.py +++ b/lldb/test/API/commands/trace/multiple-threads/TestTraceStartStopMultipleThreads.py @@ -233,6 +233,6 @@ # We must have captured the context switch of when the target resumed self.assertTrue(found_non_empty_context_switch) - self.expect("thread trace dump instructions", substrs=['unimplemented']) + self.expect("thread trace dump instructions") self.traceStopProcess() diff --git a/lldb/unittests/Process/Linux/PerfTests.cpp b/lldb/unittests/Process/Linux/PerfTests.cpp --- a/lldb/unittests/Process/Linux/PerfTests.cpp +++ b/lldb/unittests/Process/Linux/PerfTests.cpp @@ -76,14 +76,14 @@ if (!tsc_after_sleep) GTEST_SKIP() << toString(tsc_after_sleep.takeError()); - std::chrono::nanoseconds converted_tsc_diff = + uint64_t converted_tsc_diff = params->ToNanos(*tsc_after_sleep) - params->ToNanos(*tsc_before_sleep); std::chrono::microseconds acceptable_overhead(500); - ASSERT_GE(converted_tsc_diff.count(), SLEEP_NANOS.count()); - ASSERT_LT(converted_tsc_diff.count(), - (SLEEP_NANOS + acceptable_overhead).count()); + ASSERT_GE(converted_tsc_diff, static_cast(SLEEP_NANOS.count())); + ASSERT_LT(converted_tsc_diff, + static_cast((SLEEP_NANOS + acceptable_overhead).count())); } #endif // __x86_64__