diff --git a/lldb/source/Plugins/Trace/intel-pt/DecodedThread.h b/lldb/source/Plugins/Trace/intel-pt/DecodedThread.h --- a/lldb/source/Plugins/Trace/intel-pt/DecodedThread.h +++ b/lldb/source/Plugins/Trace/intel-pt/DecodedThread.h @@ -109,6 +109,31 @@ void RecordError(int libipt_error_code); }; + /// \struct InstructionPointer + /// Structure that stores a 6 byte prefix a list of 2 byte suffixes for each + /// instruction that is appended to the trace, in order. Concatenating the + /// prefix and suffix for any instruction ID (which is the concatenation of + /// two indices) will give the instruction load address. + /// + /// Generally most consecutive instructions will be close. Unless you are + /// doing a function call between different shared libraries, all your + /// instructions should be close in memory. This means that the addresses of + /// most consecutive instructions will have the same prefix. For we'll divide + /// the instruction addresses by the first 48 bits (6 bytes). + /// Each new instruction will now occupy 2 bytes. And each time you see a + /// change in the first 6 bytes, you'll pay the cost of storing that prefix, + /// which takes 8 bytes. + struct InstructionPointer { + uint64_t insn_prefix; + std::vector suffixes; + + InstructionPointer(uint64_t prefix, uint16_t suffix) + : insn_prefix(prefix), suffixes(std::vector({suffix})){}; + + size_t AppendInstructionSuffix(uint16_t suffix); + uint64_t GetFullInstructionAddress(size_t suffix_index) const; + }; + DecodedThread(lldb::ThreadSP thread_sp); /// Utility constructor that initializes the trace with a provided error. @@ -132,10 +157,23 @@ /// message with the \a GetErrorByInstructionIndex() method. size_t GetInstructionsCount() const; + /// For each \a InstructionPointer block, calculate the net number of + /// instructions upto that block. See also, \a m_ipblock_sizes member. + void CalcIPBlockSizes(); + + /// Convert instruction ID to index + size_t ToIndex(lldb::user_id_t id) const; + /// Convert instruction index to ID + lldb::user_id_t ToID(size_t index) const; + /// Calculate next instruction ID + lldb::user_id_t NextID(lldb::user_id_t id) const; + /// Calculate previous instruction ID + lldb::user_id_t PrevID(lldb::user_id_t id) const; + /// \return /// The load address of the instruction at the given index, or \a /// LLDB_INVALID_ADDRESS if it is an error. - lldb::addr_t GetInstructionLoadAddress(size_t insn_index) const; + lldb::addr_t GetInstructionLoadAddress(size_t insn_id) const; /// Get the \a lldb::TraceInstructionControlFlowType categories of the /// instruction. @@ -143,7 +181,7 @@ /// \return /// The control flow categories, or \b 0 if the instruction is an error. lldb::TraceInstructionControlFlowType - GetInstructionControlFlowType(size_t insn_index) const; + GetInstructionControlFlowType(lldb::user_id_t insn_id) const; /// Construct the TSC range that covers the given instruction index. /// This operation is O(logn) and should be used sparingly. @@ -158,7 +196,7 @@ /// An optional range that might include the given index or might be a /// neighbor of it. It might help speed it traversals of the trace with /// short jumps. - llvm::Optional CalculateTscRange( + llvm::Optional CalculateTscRangeByIndex( size_t insn_index, const llvm::Optional &hint_range) const; @@ -214,9 +252,11 @@ /// When adding new members to this class, make sure /// to update \a CalculateApproximateMemoryUsage() accordingly. lldb::ThreadSP m_thread_sp; - /// The low level storage of all instruction addresses. Each instruction has - /// an index in this vector and it will be used in other parts of the code. - std::vector m_instruction_ips; + /// The low level storage of all instruction addresses. This stores + /// instructions in blocks such that instructions with the same high 48 bits + /// are stored together. Each instruction has an ID in this vector and it + /// will be used in other parts of the code. + std::vector m_instruction_blocks; /// The size in bytes of each instruction. std::vector m_instruction_sizes; /// The libipt instruction class for each instruction. @@ -231,8 +271,11 @@ std::map m_instruction_timestamps; /// This is the chronologically last TSC that has been added. llvm::Optional m_last_tsc = llvm::None; - // This variables stores the messages of all the error instructions in the - // trace. It maps `instruction index -> error message`. + /// For each \a InstructionPointer block, this contains the net number of + /// instructions upto that block + std::vector m_ipblock_sizes; + /// This variables stores the messages of all the error instructions in the + /// trace. It maps `instruction index -> error message`. llvm::DenseMap m_errors; /// The size in bytes of the raw buffer before decoding. It might be None if /// the decoding failed. diff --git a/lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp b/lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp --- a/lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp @@ -10,6 +10,7 @@ #include #include +#include #include "TraceCursorIntelPT.h" #include "lldb/Utility/StreamString.h" @@ -21,7 +22,7 @@ char IntelPTError::ID; -IntelPTError::IntelPTError(int libipt_error_code, lldb::addr_t address) +IntelPTError::IntelPTError(int libipt_error_code, addr_t address) : m_libipt_error_code(libipt_error_code), m_address(address) { assert(libipt_error_code < 0); } @@ -35,27 +36,124 @@ OS << "error: " << libipt_error_message; } +DecodedThread::DecodedThread(ThreadSP thread_sp) : m_thread_sp(thread_sp) {} + +DecodedThread::DecodedThread(ThreadSP thread_sp, Error &&error) + : m_thread_sp(thread_sp) { + AppendError(std::move(error)); +} + +void DecodedThread::SetRawTraceSize(size_t size) { m_raw_trace_size = size; } + +TraceCursorUP DecodedThread::GetCursor() { + // We insert a fake error signaling an empty trace if needed becasue the + // TraceCursor requires non-empty traces. + if (m_instruction_blocks.empty()) + AppendError(createStringError(inconvertibleErrorCode(), "empty trace")); + return std::make_unique(m_thread_sp, shared_from_this()); +} + Optional DecodedThread::GetRawTraceSize() const { return m_raw_trace_size; } size_t DecodedThread::GetInstructionsCount() const { - return m_instruction_ips.size(); + size_t count = 0; + for (const InstructionPointer &ip : m_instruction_blocks) + count += ip.suffixes.size(); + return count; +} + +uint64_t DecodedThread::InstructionPointer::GetFullInstructionAddress( + size_t suffix_index) const { + uint16_t suffix = suffixes[suffix_index]; + uint64_t address = (insn_prefix << 16) | (uint64_t)suffix; + return address; +} + +void DecodedThread::CalcIPBlockSizes() { + size_t acc_insn_count = 0; + for (const InstructionPointer &ip : m_instruction_blocks) { + acc_insn_count += ip.suffixes.size(); + m_ipblock_sizes.push_back(acc_insn_count); + } } -lldb::addr_t DecodedThread::GetInstructionLoadAddress(size_t insn_index) const { - return m_instruction_ips[insn_index]; +size_t DecodedThread::ToIndex(user_id_t id) const { + return m_ipblock_sizes[id >> 32] + (UINT32_MAX & id); +} + +user_id_t DecodedThread::ToID(size_t index) const { + if (m_ipblock_sizes.size() == 0) { + // Simple slow search if block sizes were not set up + size_t prefix_index = 0; + for (const InstructionPointer &ip : m_instruction_blocks) { + if (index >= 0 && index < ip.suffixes.size()) + return (prefix_index << 32 | UINT32_MAX & index); + + index -= ip.suffixes.size(); + prefix_index++; + } + + return LLDB_INVALID_ADDRESS; + } + + // Binary fast search if block sizes are available + size_t lower = 0, upper = m_ipblock_sizes.size() - 1; + while (upper != lower && upper - lower != 1) { + size_t mid = (upper + lower) / 2; + if (index > m_ipblock_sizes[mid]) + upper = mid; + else + lower = mid; + } + + if (lower > 0) + index -= m_ipblock_sizes[lower]; + return (upper << 32 | UINT32_MAX & index); +} + +user_id_t DecodedThread::NextID(user_id_t id) const { + size_t prefix_idx = id >> 32; + size_t suffix_idx = UINT32_MAX & id; + if (m_instruction_blocks[prefix_idx].suffixes.size() - 1 > suffix_idx && + suffix_idx < UINT32_MAX) + return id + 1; + else if (m_instruction_blocks.size() - 1 > prefix_idx && + prefix_idx < UINT32_MAX) + return (prefix_idx + 1) << 32; + else + return id; // Sorry cant help you, already at MAX,MAX +} + +user_id_t DecodedThread::PrevID(user_id_t id) const { + size_t prefix_idx = id >> 32; + size_t suffix_idx = UINT32_MAX & id; + if (suffix_idx < 0) + return id - 1; + else if (prefix_idx < 0) + return (prefix_idx - 1) << 32 | + (UINT32_MAX & + (m_instruction_blocks[prefix_idx - 1].suffixes.size() - 1)); + else + return id; // Sorry cant help you, already at 0,0 +} + +addr_t DecodedThread::GetInstructionLoadAddress(user_id_t insn_id) const { + return m_instruction_blocks[insn_id >> 32].GetFullInstructionAddress( + UINT32_MAX & insn_id); } TraceInstructionControlFlowType -DecodedThread::GetInstructionControlFlowType(size_t insn_index) const { - if (IsInstructionAnError(insn_index)) +DecodedThread::GetInstructionControlFlowType(user_id_t insn_id) const { + if (IsInstructionAnError(insn_id)) return (TraceInstructionControlFlowType)0; TraceInstructionControlFlowType mask = eTraceInstructionControlFlowTypeInstruction; - lldb::addr_t load_address = m_instruction_ips[insn_index]; + addr_t load_address = GetInstructionLoadAddress(insn_id); + size_t insn_index = ToIndex(insn_id); uint8_t insn_byte_size = m_instruction_sizes[insn_index]; pt_insn_class iclass = m_instruction_classes[insn_index]; @@ -64,8 +162,9 @@ case ptic_jump: case ptic_far_jump: mask |= eTraceInstructionControlFlowTypeBranch; - if (insn_index + 1 < m_instruction_ips.size() && - load_address + insn_byte_size != m_instruction_ips[insn_index + 1]) + if (insn_index + 1 < m_instruction_blocks.size() && + load_address + insn_byte_size != + GetInstructionLoadAddress(NextID(insn_id))) mask |= eTraceInstructionControlFlowTypeTakenBranch; break; case ptic_return: @@ -85,20 +184,24 @@ ThreadSP DecodedThread::GetThread() { return m_thread_sp; } -void DecodedThread::RecordTscForLastInstruction(uint64_t tsc) { - if (!m_last_tsc || *m_last_tsc != tsc) { - // In case the first instructions are errors or did not have a TSC, we'll - // get a first valid TSC not in position 0. We can safely force these error - // instructions to use the first valid TSC, so that all the trace has TSCs. - size_t start_index = - m_instruction_timestamps.empty() ? 0 : m_instruction_ips.size() - 1; - m_instruction_timestamps.emplace(start_index, tsc); - m_last_tsc = tsc; - } +size_t +DecodedThread::InstructionPointer::AppendInstructionSuffix(uint16_t suffix) { + suffixes.push_back(suffix); + return suffixes.size() - 1; } void DecodedThread::AppendInstruction(const pt_insn &insn) { - m_instruction_ips.emplace_back(insn.ip); + uint16_t suffix = UINT16_MAX & insn.ip; + uint64_t prefix = insn.ip >> 16; + + auto last_ip = m_instruction_blocks.end(); + last_ip--; + if (!m_instruction_blocks.empty() && last_ip->insn_prefix == prefix && + last_ip->suffixes.size() < UINT32_MAX) + last_ip->AppendInstructionSuffix(suffix); + else + m_instruction_blocks.emplace_back(prefix, suffix); + m_instruction_sizes.emplace_back(insn.size); m_instruction_classes.emplace_back(insn.iclass); } @@ -109,10 +212,10 @@ } void DecodedThread::AppendError(llvm::Error &&error) { - m_errors.try_emplace(m_instruction_ips.size(), toString(std::move(error))); - m_instruction_ips.emplace_back(LLDB_INVALID_ADDRESS); - m_instruction_sizes.emplace_back(0); - m_instruction_classes.emplace_back(pt_insn_class::ptic_error); + m_errors.try_emplace(m_instruction_blocks.size(), toString(std::move(error))); + AppendInstruction(pt_insn{.ip = LLDB_INVALID_ADDRESS, + .iclass = pt_insn_class::ptic_error, + .size = 0}); } void DecodedThread::AppendError(llvm::Error &&error, uint64_t tsc) { @@ -125,6 +228,18 @@ total_count++; } +bool DecodedThread::IsInstructionAnError(user_id_t insn_id) const { + return GetInstructionLoadAddress(insn_id) == LLDB_INVALID_ADDRESS; +} + +const char *DecodedThread::GetErrorByInstructionIndex(size_t insn_idx) { + auto it = m_errors.find(insn_idx); + if (it == m_errors.end()) + return nullptr; + + return it->second.c_str(); +} + void DecodedThread::RecordTscError(int libipt_error_code) { m_tsc_errors.RecordError(libipt_error_code); } @@ -133,7 +248,7 @@ return m_tsc_errors; } -Optional DecodedThread::CalculateTscRange( +Optional DecodedThread::CalculateTscRangeByIndex( size_t insn_index, const Optional &hint_range) const { // We first try to check the given hint range in case we are traversing the @@ -159,41 +274,16 @@ return TscRange(--it, *this); } -bool DecodedThread::IsInstructionAnError(size_t insn_idx) const { - return m_instruction_ips[insn_idx] == LLDB_INVALID_ADDRESS; -} - -const char *DecodedThread::GetErrorByInstructionIndex(size_t insn_idx) { - auto it = m_errors.find(insn_idx); - if (it == m_errors.end()) - return nullptr; - - return it->second.c_str(); -} - -DecodedThread::DecodedThread(ThreadSP thread_sp) : m_thread_sp(thread_sp) {} - -DecodedThread::DecodedThread(ThreadSP thread_sp, Error &&error) - : m_thread_sp(thread_sp) { - AppendError(std::move(error)); -} - -void DecodedThread::SetRawTraceSize(size_t size) { m_raw_trace_size = size; } - -lldb::TraceCursorUP DecodedThread::GetCursor() { - // We insert a fake error signaling an empty trace if needed becasue the - // TraceCursor requires non-empty traces. - if (m_instruction_ips.empty()) - AppendError(createStringError(inconvertibleErrorCode(), "empty trace")); - return std::make_unique(m_thread_sp, shared_from_this()); -} - -size_t DecodedThread::CalculateApproximateMemoryUsage() const { - return sizeof(pt_insn::ip) * m_instruction_ips.size() + - sizeof(pt_insn::size) * m_instruction_sizes.size() + - sizeof(pt_insn::iclass) * m_instruction_classes.size() + - (sizeof(size_t) + sizeof(uint64_t)) * m_instruction_timestamps.size() + - m_errors.getMemorySize(); +void DecodedThread::RecordTscForLastInstruction(uint64_t tsc) { + if (!m_last_tsc || *m_last_tsc != tsc) { + // In case the first instructions are errors or did not have a TSC, we'll + // get a first valid TSC not in position 0. We can safely force these error + // instructions to use the first valid TSC, so that all the trace has TSCs. + size_t start_index = + m_instruction_timestamps.empty() ? 0 : m_instruction_blocks.size() - 1; + m_instruction_timestamps.emplace(start_index, tsc); + m_last_tsc = tsc; + } } DecodedThread::TscRange::TscRange(std::map::const_iterator it, @@ -236,3 +326,11 @@ --prev_it; return TscRange(prev_it, *m_decoded_thread); } + +size_t DecodedThread::CalculateApproximateMemoryUsage() const { + return sizeof(InstructionPointer) * m_instruction_blocks.size() + + sizeof(pt_insn::size) * m_instruction_sizes.size() + + sizeof(pt_insn::iclass) * m_instruction_classes.size() + + (sizeof(size_t) + sizeof(uint64_t)) * m_instruction_timestamps.size() + + m_errors.getMemorySize(); +} diff --git a/lldb/source/Plugins/Trace/intel-pt/IntelPTDecoder.cpp b/lldb/source/Plugins/Trace/intel-pt/IntelPTDecoder.cpp --- a/lldb/source/Plugins/Trace/intel-pt/IntelPTDecoder.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/IntelPTDecoder.cpp @@ -197,6 +197,8 @@ AppendInstruction(decoded_thread, insn, tsc_info); } } + + decoded_thread.CalcIPBlockSizes(); } /// Callback used by libipt for reading the process memory. diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.h b/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.h --- a/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.h +++ b/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.h @@ -46,6 +46,8 @@ DecodedThreadSP m_decoded_thread_sp; /// Internal instruction index currently pointing at. size_t m_pos; + /// Internal instruction ID currently pointing at. + lldb::user_id_t m_id; /// Tsc range covering the current instruction. llvm::Optional m_tsc_range; }; diff --git a/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.cpp b/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.cpp --- a/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.cpp +++ b/lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.cpp @@ -23,8 +23,9 @@ assert(m_decoded_thread_sp->GetInstructionsCount() > 0 && "a trace should have at least one instruction or error"); m_pos = m_decoded_thread_sp->GetInstructionsCount() - 1; + m_id = m_decoded_thread_sp->ToID(m_pos); m_tsc_range = - m_decoded_thread_sp->CalculateTscRange(m_pos, /*hint_range*/ None); + m_decoded_thread_sp->CalculateTscRangeByIndex(m_pos, /*hint_range*/ None); } size_t TraceCursorIntelPT::GetInternalInstructionSize() { @@ -42,6 +43,7 @@ while (canMoveOne()) { m_pos += IsForwards() ? 1 : -1; + m_id = m_decoded_thread_sp->ToID(m_pos); if (m_tsc_range && !m_tsc_range->InRange(m_pos)) m_tsc_range = IsForwards() ? m_tsc_range->Next() : m_tsc_range->Prev(); @@ -81,12 +83,14 @@ }; int64_t dist = FindDistanceAndSetPos(); - m_tsc_range = m_decoded_thread_sp->CalculateTscRange(m_pos, m_tsc_range); + m_id = m_decoded_thread_sp->ToID(m_pos); + m_tsc_range = + m_decoded_thread_sp->CalculateTscRangeByIndex(m_pos, m_tsc_range); return dist; } bool TraceCursorIntelPT::IsError() { - return m_decoded_thread_sp->IsInstructionAnError(m_pos); + return m_decoded_thread_sp->IsInstructionAnError(m_id); } const char *TraceCursorIntelPT::GetError() { @@ -94,7 +98,7 @@ } lldb::addr_t TraceCursorIntelPT::GetLoadAddress() { - return m_decoded_thread_sp->GetInstructionLoadAddress(m_pos); + return m_decoded_thread_sp->GetInstructionLoadAddress(m_id); } Optional @@ -110,16 +114,19 @@ TraceInstructionControlFlowType TraceCursorIntelPT::GetInstructionControlFlowType() { - return m_decoded_thread_sp->GetInstructionControlFlowType(m_pos); + return m_decoded_thread_sp->GetInstructionControlFlowType(m_id); } bool TraceCursorIntelPT::GoToId(user_id_t id) { - if (m_decoded_thread_sp->GetInstructionsCount() <= id) + size_t idx = m_decoded_thread_sp->ToIndex(id); + if (m_decoded_thread_sp->GetInstructionsCount() <= idx) return false; - m_pos = id; - m_tsc_range = m_decoded_thread_sp->CalculateTscRange(m_pos, m_tsc_range); + m_pos = idx; + m_id = id; + m_tsc_range = + m_decoded_thread_sp->CalculateTscRangeByIndex(m_pos, m_tsc_range); return true; } -user_id_t TraceCursorIntelPT::GetId() const { return m_pos; } +user_id_t TraceCursorIntelPT::GetId() const { return m_id; }