diff --git a/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.h b/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.h --- a/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.h +++ b/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.h @@ -18,9 +18,13 @@ namespace lldb_private { namespace trace_intel_pt { +/// This struct represents a point in the intel pt trace that the decoder can start decoding from without errors. struct IntelPTThreadSubtrace { - uint64_t tsc; + /// The memory offset of a PSB packet that is a synchronization point for the decoder. A decoder normally looks first + /// for a PSB packet and then it starts decoding. uint64_t psb_offset; + /// The timestamp associated with the PSB packet above. + uint64_t tsc; }; /// This struct represents a continuous execution of a thread in a core, @@ -38,17 +42,43 @@ bool operator<(const IntelPTThreadContinousExecution &o) const; }; -/// Decode a raw Intel PT trace given in \p buffer and append the decoded +/// Decode a raw Intel PT trace for a single thread given in \p buffer and append the decoded /// instructions and errors in \p decoded_thread. It uses the low level libipt /// library underneath. -void DecodeTrace(DecodedThread &decoded_thread, TraceIntelPT &trace_intel_pt, +void DecodeSingleTraceForThread(DecodedThread &decoded_thread, TraceIntelPT &trace_intel_pt, llvm::ArrayRef buffer); -void DecodeTrace( +/// Decode a raw Intel PT trace for a single thread that was collected in a per cpu core basis. +/// +/// \param[in] decoded_thread +/// All decoded instructions, errors and events will be appended to this object. +/// +/// \param[in] trace_intel_pt +/// The main Trace object that contains all the information related to the trace session. +/// +/// \param[in] buffers +/// A map from cpu core id to raw intel pt buffers. +/// +/// \param[in] executions +/// A list of chunks of timed executions of the same given thread. It is used to identify if +/// some executions have missing intel pt data and also to determine in which core a certain +/// part of the execution ocurred. +void DecodeSystemWideTraceForThread( DecodedThread &decoded_thread, TraceIntelPT &trace_intel_pt, const llvm::DenseMap> &buffers, const std::vector &executions); +/// Given an intel pt trace, split it in chunks delimited by PSB packets. Each of these chunks +/// is guaranteed to have been executed continuously. +/// +/// \param[in] trace_intel_pt +/// The main Trace object that contains all the information related to the trace session. +/// +/// \param[in] buffer +/// The intel pt buffer that belongs to a single thread or to a single cpu core. +/// +/// \return +/// A list of continuous executions sorted by time, or an \a llvm::Error in case of failures. llvm::Expected> SplitTraceInContinuousExecutions(TraceIntelPT &trace_intel_pt, llvm::ArrayRef buffer); diff --git a/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.cpp b/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.cpp --- a/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/LibiptDecoder.cpp @@ -97,9 +97,7 @@ /// The status that was result of synchronizing to the most recent PSB. /// /// \param[in] stop_on_psb_change - /// If \b true, decoding - /// An optional offset to a given PSB. Decoding stops if a different PSB is - /// reached. + /// If \b true, decoding stops if a different PSB is reached. void DecodeInstructionsAndEvents(int status, bool stop_on_psb_change = false) { uint64_t psb_offset; @@ -310,7 +308,7 @@ return Error::success(); } -void lldb_private::trace_intel_pt::DecodeTrace(DecodedThread &decoded_thread, +void lldb_private::trace_intel_pt::DecodeSingleTraceForThread(DecodedThread &decoded_thread, TraceIntelPT &trace_intel_pt, ArrayRef buffer) { Expected decoder_up = @@ -326,7 +324,7 @@ libipt_decoder.DecodeUntilEndOfTrace(); } -void lldb_private::trace_intel_pt::DecodeTrace( +void lldb_private::trace_intel_pt::DecodeSystemWideTraceForThread( DecodedThread &decoded_thread, TraceIntelPT &trace_intel_pt, const DenseMap> &buffers, const std::vector &executions) { @@ -438,8 +436,8 @@ &psb_offset); // this can't fail because we got here executions.push_back({ - tsc, psb_offset, + tsc, }); } return executions; diff --git a/lldb/source/Plugins/Trace/intel-pt/PerfContextSwitchDecoder.h b/lldb/source/Plugins/Trace/intel-pt/PerfContextSwitchDecoder.h --- a/lldb/source/Plugins/Trace/intel-pt/PerfContextSwitchDecoder.h +++ b/lldb/source/Plugins/Trace/intel-pt/PerfContextSwitchDecoder.h @@ -121,7 +121,7 @@ : core_id(core_id), tid(tid), pid(pid) {} }; -/// Decodes a context switch trace gotten with perf_event_open. +/// Decodes a context switch trace collected with perf_event_open. /// /// \param[in] data /// The context switch trace in binary format. diff --git a/lldb/source/Plugins/Trace/intel-pt/PerfContextSwitchDecoder.cpp b/lldb/source/Plugins/Trace/intel-pt/PerfContextSwitchDecoder.cpp --- a/lldb/source/Plugins/Trace/intel-pt/PerfContextSwitchDecoder.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/PerfContextSwitchDecoder.cpp @@ -15,52 +15,59 @@ /// Copied from to avoid depending on perf_event.h on /// non-linux platforms. /// \{ -struct perf_event_header { - uint32_t type; - uint16_t misc; - uint16_t size; -}; - #define PERF_RECORD_MISC_SWITCH_OUT (1 << 13) #define PERF_RECORD_MAX 19 #define PERF_RECORD_SWITCH_CPU_WIDE 15 -/// \} - -/// Record found in the perf_event context switch traces. It might contain -/// additional fields in memory, but header.size should have the actual size -/// of the record. -struct PerfContextSwitchRecord { - struct perf_event_header header; - uint32_t next_prev_pid; - uint32_t next_prev_tid; - uint32_t pid, tid; - uint64_t time_in_nanos; - bool IsOut() const { return header.misc & PERF_RECORD_MISC_SWITCH_OUT; } - - bool IsContextSwitchRecord() const { - return header.type == PERF_RECORD_SWITCH_CPU_WIDE; - } +struct perf_event_header { + uint32_t type; + uint16_t misc; + uint16_t size; /// \return /// An \a llvm::Error if the record looks obviously wrong, or \a /// llvm::Error::success() otherwise. Error SanityCheck() const { + // The following checks are based on visual inspection of the records and + // enums in + // https://elixir.bootlin.com/linux/v4.8/source/include/uapi/linux/perf_event.h + // See PERF_RECORD_MAX, PERF_RECORD_SWITCH and the data similar records + // hold. + // A record of too many uint64_t's or more should mean that the data is // wrong - if (header.size == 0 || header.size > sizeof(uint64_t) * 1000) + const uint64_t max_valid_size_bytes = 8000; + if (size == 0 || size > max_valid_size_bytes) return createStringError( inconvertibleErrorCode(), - formatv("A record of {0} bytes was found.", header.size)); + formatv("A record of {0} bytes was found.", size)); // We add some numbers to PERF_RECORD_MAX because some systems might have // custom records. In any case, we are looking only for abnormal data. - if (header.type >= PERF_RECORD_MAX + 100) + if (type >= PERF_RECORD_MAX + 100) return createStringError( inconvertibleErrorCode(), - formatv("Invalid record type {0} was found.", header.type)); + formatv("Invalid record type {0} was found.", type)); return Error::success(); } + + bool IsContextSwitchRecord() const { + return type == PERF_RECORD_SWITCH_CPU_WIDE; + } +}; +/// \} + +/// Record found in the perf_event context switch traces. It might contain +/// additional fields in memory, but header.size should have the actual size +/// of the record. +struct PerfContextSwitchRecord { + struct perf_event_header header; + uint32_t next_prev_pid; + uint32_t next_prev_tid; + uint32_t pid, tid; + uint64_t time_in_nanos; + + bool IsOut() const { return header.misc & PERF_RECORD_MISC_SWITCH_OUT; } }; /// Record produced after parsing the raw context switch trace produce by @@ -224,8 +231,6 @@ return Error::success(); } -#include - Expected> lldb_private::trace_intel_pt::DecodePerfContextSwitchTrace( ArrayRef data, core_id_t core_id, @@ -239,17 +244,20 @@ auto do_decode = [&]() -> Error { Optional prev_record; while (offset < data.size()) { - const PerfContextSwitchRecord &perf_record = - *reinterpret_cast(data.data() + - offset); + const perf_event_header &perf_record = + *reinterpret_cast(data.data() + offset); if (Error err = perf_record.SanityCheck()) return err; if (perf_record.IsContextSwitchRecord()) { + const PerfContextSwitchRecord &context_switch_record = + *reinterpret_cast(data.data() + + offset); ContextSwitchRecord record{ - tsc_conversion.ToTSC(perf_record.time_in_nanos), - perf_record.IsOut(), static_cast(perf_record.pid), - static_cast(perf_record.tid)}; + tsc_conversion.ToTSC(context_switch_record.time_in_nanos), + context_switch_record.IsOut(), + static_cast(context_switch_record.pid), + static_cast(context_switch_record.tid)}; if (Error err = RecoverExecutionsFromConsecutiveRecords( core_id, tsc_conversion, record, prev_record, @@ -260,7 +268,7 @@ prev_record = record; } - offset += perf_record.header.size; + offset += perf_record.size; } // We might have an incomplete last record diff --git a/lldb/source/Plugins/Trace/intel-pt/ThreadDecoder.cpp b/lldb/source/Plugins/Trace/intel-pt/ThreadDecoder.cpp --- a/lldb/source/Plugins/Trace/intel-pt/ThreadDecoder.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/ThreadDecoder.cpp @@ -38,7 +38,7 @@ Error err = m_trace.OnThreadBufferRead( m_thread_sp->GetID(), [&](llvm::ArrayRef data) { - DecodeTrace(*decoded_thread_sp, m_trace, data); + DecodeSingleTraceForThread(*decoded_thread_sp, m_trace, data); return Error::success(); }); diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCoreDecoder.h b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCoreDecoder.h --- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCoreDecoder.h +++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCoreDecoder.h @@ -52,15 +52,19 @@ size_t GetTotalContinuousExecutionsCount() const; private: - /// Traverse the context switch traces and recover the continuous executions - /// by thread. - llvm::Error DecodeContextSwitchTraces(); + /// Traverse the context switch traces and the basic intel pt continuous subtraces + /// and produce a list of continuous executions for each process and thread. + /// + /// See \a DoCorrelateContextSwitchesAndIntelPtTraces. + /// + /// Any errors are stored in \a m_setup_error. + llvm::Error CorrelateContextSwitchesAndIntelPtTraces(); /// Produce a mapping from thread ids to the list of continuos executions with /// their associated intel pt subtraces. llvm::Expected< llvm::DenseMap>> - CorrelateContextSwitchesAndIntelPtTraces(); + DoCorrelateContextSwitchesAndIntelPtTraces(); TraceIntelPT *m_trace; std::set m_tids; diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCoreDecoder.cpp b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCoreDecoder.cpp --- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCoreDecoder.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTMultiCoreDecoder.cpp @@ -31,7 +31,7 @@ } DecodedThreadSP TraceIntelPTMultiCoreDecoder::Decode(Thread &thread) { - if (Error err = DecodeContextSwitchTraces()) + if (Error err = CorrelateContextSwitchesAndIntelPtTraces()) return std::make_shared(thread.shared_from_this(), std::move(err)); auto it = m_decoded_threads.find(thread.GetID()); @@ -43,10 +43,10 @@ Error err = m_trace->OnAllCoresBinaryDataRead( IntelPTDataKinds::kTraceBuffer, - [&](const DenseMap> buffers) -> Error { + [&](const DenseMap>& buffers) -> Error { auto it = m_continuous_executions_per_thread->find(thread.GetID()); if (it != m_continuous_executions_per_thread->end()) - DecodeTrace(*decoded_thread_sp, *m_trace, buffers, it->second); + DecodeSystemWideTraceForThread(*decoded_thread_sp, *m_trace, buffers, it->second); return Error::success(); }); @@ -57,10 +57,29 @@ return decoded_thread_sp; } -llvm::Expected< - llvm::DenseMap>> -TraceIntelPTMultiCoreDecoder::CorrelateContextSwitchesAndIntelPtTraces() { - llvm::DenseMap> +static Expected> +GetIntelPTSubtracesForCore(TraceIntelPT &trace, core_id_t core_id) { + std::vector intel_pt_subtraces; + Error err = trace.OnCoreBinaryDataRead( + core_id, IntelPTDataKinds::kTraceBuffer, + [&](ArrayRef data) -> Error { + Expected> split_trace = + SplitTraceInContinuousExecutions(trace, data); + if (!split_trace) + return split_trace.takeError(); + + intel_pt_subtraces = std::move(*split_trace); + return Error::success(); + }); + if (err) + return std::move(err); + return intel_pt_subtraces; +} + +Expected< + DenseMap>> +TraceIntelPTMultiCoreDecoder::DoCorrelateContextSwitchesAndIntelPtTraces() { + DenseMap> continuous_executions_per_thread; Optional conv_opt = @@ -73,30 +92,19 @@ LinuxPerfZeroTscConversion tsc_conversion = *conv_opt; for (core_id_t core_id : m_trace->GetTracedCores()) { - std::vector intel_pt_executions; - - Error err = m_trace->OnCoreBinaryDataRead( - core_id, IntelPTDataKinds::kTraceBuffer, - [&](ArrayRef data) -> Error { - Expected> split_trace = - SplitTraceInContinuousExecutions(*m_trace, data); - if (!split_trace) - return split_trace.takeError(); - - intel_pt_executions = std::move(*split_trace); - return Error::success(); - }); - if (err) - return std::move(err); + Expected> intel_pt_subtraces = + GetIntelPTSubtracesForCore(*m_trace, core_id); + if (!intel_pt_subtraces) + return intel_pt_subtraces.takeError(); // We'll be iterating through the thread continuous executions and the intel // pt subtraces sorted by time. - auto it = intel_pt_executions.begin(); + auto it = intel_pt_subtraces->begin(); auto on_new_thread_execution = - [&](ThreadContinuousExecution thread_execution) { + [&](const ThreadContinuousExecution& thread_execution) { IntelPTThreadContinousExecution execution(thread_execution); - for (; it != intel_pt_executions.end() && + for (; it != intel_pt_subtraces->end() && it->tsc < thread_execution.GetEndTSC(); it++) { if (it->tsc > thread_execution.GetStartTSC()) { @@ -108,7 +116,7 @@ continuous_executions_per_thread[thread_execution.tid].push_back( execution); }; - err = m_trace->OnCoreBinaryDataRead( + Error err = m_trace->OnCoreBinaryDataRead( core_id, IntelPTDataKinds::kPerfContextSwitchTrace, [&](ArrayRef data) -> Error { Expected> executions = @@ -130,7 +138,7 @@ return continuous_executions_per_thread; } -Error TraceIntelPTMultiCoreDecoder::DecodeContextSwitchTraces() { +Error TraceIntelPTMultiCoreDecoder::CorrelateContextSwitchesAndIntelPtTraces() { if (m_setup_error) return createStringError(inconvertibleErrorCode(), m_setup_error->c_str()); @@ -139,7 +147,7 @@ Error err = m_trace->GetTimer().ForGlobal().TimeTask( "Context switch and Intel PT traces correlation", [&]() -> Error { - if (auto correlation = CorrelateContextSwitchesAndIntelPtTraces()) { + if (auto correlation = DoCorrelateContextSwitchesAndIntelPtTraces()) { m_continuous_executions_per_thread.emplace(std::move(*correlation)); return Error::success(); } else { diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionFileParser.cpp b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionFileParser.cpp --- a/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionFileParser.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionFileParser.cpp @@ -171,7 +171,8 @@ // A list of known threads for the given process. When context switch // data is provided, LLDB will automatically create threads for the // this process whenever it finds new threads when traversing the - // context switches. + // context switches, so passing values to this list in this case is + // optional. { "tid": integer, "traceBuffer"?: string @@ -213,10 +214,6 @@ "timeShift": integer, "timeZero": integer, } - "dontCreateThreadsFromContextSwitches"?: boolean, - // If this is true, then the automatic creation of threads from context switch - // data is disabled, and thus only the threads provided in the "processes.threads" - // section will be created. } Notes: