diff --git a/lldb/include/lldb/Core/Disassembler.h b/lldb/include/lldb/Core/Disassembler.h --- a/lldb/include/lldb/Core/Disassembler.h +++ b/lldb/include/lldb/Core/Disassembler.h @@ -79,6 +79,12 @@ return m_comment.c_str(); } + /// \return + /// The control flow kind of this instruction, or + /// eInstructionControlFlowKindUnknown if the instruction + /// can't be classified. + lldb::InstructionControlFlowKind GetControlFlowKind(const ArchSpec &arch); + virtual void CalculateMnemonicOperandsAndComment(const ExecutionContext *exe_ctx) = 0; @@ -105,6 +111,9 @@ /// \param[in] show_bytes /// Whether the bytes of the assembly instruction should be printed. /// + /// \param[in] show_control_flow_kind + /// Whether the control flow kind of the instruction should be printed. + /// /// \param[in] max_opcode_byte_size /// The size (in bytes) of the largest instruction in the list that /// we are printing (for text justification/alignment purposes) @@ -140,7 +149,8 @@ /// so this method can properly align the instruction opcodes. /// May be 0 to indicate no indentation/alignment of the opcodes. virtual void Dump(Stream *s, uint32_t max_opcode_byte_size, bool show_address, - bool show_bytes, const ExecutionContext *exe_ctx, + bool show_bytes, bool show_control_flow_kind, + const ExecutionContext *exe_ctx, const SymbolContext *sym_ctx, const SymbolContext *prev_sym_ctx, const FormatEntity::Entry *disassembly_addr_format, @@ -320,7 +330,7 @@ void Append(lldb::InstructionSP &inst_sp); void Dump(Stream *s, bool show_address, bool show_bytes, - const ExecutionContext *exe_ctx); + bool show_control_flow_kind, const ExecutionContext *exe_ctx); private: typedef std::vector collection; @@ -375,7 +385,8 @@ eOptionMarkPCSourceLine = (1u << 2), // Mark the source line that contains // the current PC (mixed mode only) eOptionMarkPCAddress = - (1u << 3) // Mark the disassembly line the contains the PC + (1u << 3), // Mark the disassembly line the contains the PC + eOptionShowControlFlowKind = (1u << 4), }; enum HexImmediateStyle { diff --git a/lldb/include/lldb/Target/TraceDumper.h b/lldb/include/lldb/Target/TraceDumper.h --- a/lldb/include/lldb/Target/TraceDumper.h +++ b/lldb/include/lldb/Target/TraceDumper.h @@ -34,6 +34,8 @@ bool show_tsc = false; /// Dump the events that happened between instructions. bool show_events = false; + /// For each instruction, print the instruction kind. + bool show_control_flow_kind = false; /// Optional custom id to start traversing from. llvm::Optional id = llvm::None; /// Optional number of instructions to skip from the starting position diff --git a/lldb/include/lldb/lldb-enumerations.h b/lldb/include/lldb/lldb-enumerations.h --- a/lldb/include/lldb/lldb-enumerations.h +++ b/lldb/include/lldb/lldb-enumerations.h @@ -970,20 +970,30 @@ /// control flow of a trace. /// /// A single instruction can match one or more of these categories. -FLAGS_ENUM(TraceInstructionControlFlowType){ - /// Any instruction. - eTraceInstructionControlFlowTypeInstruction = (1u << 1), - /// A conditional or unconditional branch/jump. - eTraceInstructionControlFlowTypeBranch = (1u << 2), - /// A conditional or unconditional branch/jump that changed - /// the control flow of the program. - eTraceInstructionControlFlowTypeTakenBranch = (1u << 3), - /// A call to a function. - eTraceInstructionControlFlowTypeCall = (1u << 4), - /// A return from a function. - eTraceInstructionControlFlowTypeReturn = (1u << 5)}; - -LLDB_MARK_AS_BITMASK_ENUM(TraceInstructionControlFlowType) +enum InstructionControlFlowKind { + /// The instruction could not be classified. + eInstructionControlFlowKindUnknown = 0, + /// The instruction is something not listed below, i.e. it's a sequential + /// instruction that doesn't affect the control flow of the program. + eInstructionControlFlowKindOther, + /// The instruction is a near (function) call. + eInstructionControlFlowKindCall, + /// The instruction is a near (function) return. + eInstructionControlFlowKindReturn, + /// The instruction is a near unconditional jump. + eInstructionControlFlowKindJump, + /// The instruction is a near conditional jump. + eInstructionControlFlowKindCondJump, + /// The instruction is a call-like far transfer. + /// E.g. SYSCALL, SYSENTER, or FAR CALL. + eInstructionControlFlowKindFarCall, + /// The instruction is a return-like far transfer. + /// E.g. SYSRET, SYSEXIT, IRET, or FAR RET. + eInstructionControlFlowKindFarReturn, + /// The instruction is a jump-like far transfer. + /// E.g. FAR JMP. + eInstructionControlFlowKindFarJump +}; /// Watchpoint Kind. /// diff --git a/lldb/source/API/SBInstruction.cpp b/lldb/source/API/SBInstruction.cpp --- a/lldb/source/API/SBInstruction.cpp +++ b/lldb/source/API/SBInstruction.cpp @@ -241,7 +241,8 @@ // didn't have a stream already created, one will get created... FormatEntity::Entry format; FormatEntity::Parse("${addr}: ", format); - inst_sp->Dump(&s.ref(), 0, true, false, nullptr, &sc, nullptr, &format, 0); + inst_sp->Dump(&s.ref(), 0, true, false, /*show_control_flow_kind=*/false, + nullptr, &sc, nullptr, &format, 0); return true; } return false; @@ -275,8 +276,8 @@ StreamFile out_stream(out_sp); FormatEntity::Entry format; FormatEntity::Parse("${addr}: ", format); - inst_sp->Dump(&out_stream, 0, true, false, nullptr, &sc, nullptr, &format, - 0); + inst_sp->Dump(&out_stream, 0, true, false, /*show_control_flow_kind=*/false, + nullptr, &sc, nullptr, &format, 0); } } diff --git a/lldb/source/API/SBInstructionList.cpp b/lldb/source/API/SBInstructionList.cpp --- a/lldb/source/API/SBInstructionList.cpp +++ b/lldb/source/API/SBInstructionList.cpp @@ -165,8 +165,9 @@ addr, eSymbolContextEverything, sc); } - inst->Dump(&sref, max_opcode_byte_size, true, false, nullptr, &sc, - &prev_sc, &format, 0); + inst->Dump(&sref, max_opcode_byte_size, true, false, + /*show_control_flow_kind=*/false, nullptr, &sc, &prev_sc, + &format, 0); sref.EOL(); } return true; diff --git a/lldb/source/Commands/CommandObjectDisassemble.h b/lldb/source/Commands/CommandObjectDisassemble.h --- a/lldb/source/Commands/CommandObjectDisassemble.h +++ b/lldb/source/Commands/CommandObjectDisassemble.h @@ -46,6 +46,7 @@ bool show_mixed; // Show mixed source/assembly bool show_bytes; + bool show_control_flow_kind; uint32_t num_lines_context = 0; uint32_t num_instructions = 0; bool raw; diff --git a/lldb/source/Commands/CommandObjectDisassemble.cpp b/lldb/source/Commands/CommandObjectDisassemble.cpp --- a/lldb/source/Commands/CommandObjectDisassemble.cpp +++ b/lldb/source/Commands/CommandObjectDisassemble.cpp @@ -65,6 +65,10 @@ show_bytes = true; break; + case 'k': + show_control_flow_kind = true; + break; + case 's': { start_addr = OptionArgParser::ToAddress(execution_context, option_arg, LLDB_INVALID_ADDRESS, &error); @@ -154,6 +158,7 @@ ExecutionContext *execution_context) { show_mixed = false; show_bytes = false; + show_control_flow_kind = false; num_lines_context = 0; num_instructions = 0; func_name.clear(); @@ -493,6 +498,9 @@ if (m_options.show_bytes) options |= Disassembler::eOptionShowBytes; + if (m_options.show_control_flow_kind) + options |= Disassembler::eOptionShowControlFlowKind; + if (m_options.raw) options |= Disassembler::eOptionRawOuput; diff --git a/lldb/source/Commands/CommandObjectThread.cpp b/lldb/source/Commands/CommandObjectThread.cpp --- a/lldb/source/Commands/CommandObjectThread.cpp +++ b/lldb/source/Commands/CommandObjectThread.cpp @@ -2174,6 +2174,10 @@ m_dumper_options.forwards = true; break; } + case 'k': { + m_dumper_options.show_control_flow_kind = true; + break; + } case 't': { m_dumper_options.show_tsc = true; break; diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td --- a/lldb/source/Commands/Options.td +++ b/lldb/source/Commands/Options.td @@ -300,6 +300,11 @@ let Command = "disassemble" in { def disassemble_options_bytes : Option<"bytes", "b">, Desc<"Show opcode bytes when disassembling.">; + def disassemble_options_kind : Option<"kind", "k">, + Desc<"Show instruction control flow kind. Refer to the enum " + "`InstructionControlFlowKind` for a list of control flow kind. " + "As an important note, far jumps, far calls and far returns often indicate " + "calls to and from kernel.">; def disassemble_options_context : Option<"context", "C">, Arg<"NumLines">, Desc<"Number of context lines of source to show.">; def disassemble_options_mixed : Option<"mixed", "m">, @@ -1150,6 +1155,11 @@ def thread_trace_dump_instructions_pretty_print: Option<"pretty-json", "J">, Group<1>, Desc<"Dump in JSON format but pretty printing the output for easier readability.">; + def thread_trace_dump_instructions_show_kind : Option<"kind", "k">, Group<1>, + Desc<"Show instruction control flow kind. Refer to the enum " + "`InstructionControlFlowKind` for a list of control flow kind. " + "As an important note, far jumps, far calls and far returns often indicate " + "calls to and from kernel.">; def thread_trace_dump_instructions_show_tsc : Option<"tsc", "t">, Group<1>, Desc<"For each instruction, print the corresponding timestamp counter if " "available.">; diff --git a/lldb/source/Core/Disassembler.cpp b/lldb/source/Core/Disassembler.cpp --- a/lldb/source/Core/Disassembler.cpp +++ b/lldb/source/Core/Disassembler.cpp @@ -527,8 +527,11 @@ } const bool show_bytes = (options & eOptionShowBytes) != 0; - inst->Dump(&strm, max_opcode_byte_size, true, show_bytes, &exe_ctx, &sc, - &prev_sc, nullptr, address_text_size); + const bool show_control_flow_kind = + (options & eOptionShowControlFlowKind) != 0; + inst->Dump(&strm, max_opcode_byte_size, true, show_bytes, + show_control_flow_kind, &exe_ctx, &sc, &prev_sc, nullptr, + address_text_size); strm.EOL(); } else { break; @@ -568,6 +571,334 @@ Instruction::~Instruction() = default; +namespace x86 { + +/// These are the three values deciding instruction control flow kind. +/// InstructionLengthDecode function decodes an instruction and get this struct. +/// +/// primary_opcode +/// Primary opcode of the instruction. +/// For one-byte opcode instruction, it's the first byte after prefix. +/// For two- and three-byte opcodes, it's the second byte. +/// +/// opcode_len +/// The length of opcode in bytes. Valid opcode lengths are 1, 2, or 3. +/// +/// modrm +/// ModR/M byte of the instruction. +/// Bits[7:6] indicate MOD. Bits[5:3] specify a register and R/M bits[2:0] +/// may contain a register or specify an addressing mode, depending on MOD. +struct InstructionOpcodeAndModrm { + uint8_t primary_opcode; + uint8_t opcode_len; + uint8_t modrm; +}; + +/// Determine the InstructionControlFlowKind based on opcode and modrm bytes. +/// Refer to http://ref.x86asm.net/coder.html for the full list of opcode and +/// instruction set. +/// +/// \param[in] opcode_and_modrm +/// Contains primary_opcode byte, its length, and ModR/M byte. +/// Refer to the struct InstructionOpcodeAndModrm for details. +/// +/// \return +/// The control flow kind of the instruction or +/// eInstructionControlFlowKindOther if the instruction doesn't affect +/// the control flow of the program. +lldb::InstructionControlFlowKind +MapOpcodeIntoControlFlowKind(InstructionOpcodeAndModrm opcode_and_modrm) { + uint8_t opcode = opcode_and_modrm.primary_opcode; + uint8_t opcode_len = opcode_and_modrm.opcode_len; + uint8_t modrm = opcode_and_modrm.modrm; + + if (opcode_len > 2) + return lldb::eInstructionControlFlowKindOther; + + if (opcode >= 0x70 && opcode <= 0x7F) { + if (opcode_len == 1) + return lldb::eInstructionControlFlowKindCondJump; + else + return lldb::eInstructionControlFlowKindOther; + } + + if (opcode >= 0x80 && opcode <= 0x8F) { + if (opcode_len == 2) + return lldb::eInstructionControlFlowKindCondJump; + else + return lldb::eInstructionControlFlowKindOther; + } + + switch (opcode) { + case 0x9A: + if (opcode_len == 1) + return lldb::eInstructionControlFlowKindFarCall; + break; + case 0xFF: + if (opcode_len == 1) { + uint8_t modrm_reg = (modrm >> 3) & 7; + if (modrm_reg == 2) + return lldb::eInstructionControlFlowKindCall; + else if (modrm_reg == 3) + return lldb::eInstructionControlFlowKindFarCall; + else if (modrm_reg == 4) + return lldb::eInstructionControlFlowKindJump; + else if (modrm_reg == 5) + return lldb::eInstructionControlFlowKindFarJump; + } + break; + case 0xE8: + if (opcode_len == 1) + return lldb::eInstructionControlFlowKindCall; + break; + case 0xCD: + case 0xCC: + case 0xCE: + case 0xF1: + if (opcode_len == 1) + return lldb::eInstructionControlFlowKindFarCall; + break; + case 0xCF: + if (opcode_len == 1) + return lldb::eInstructionControlFlowKindFarReturn; + break; + case 0xE9: + case 0xEB: + if (opcode_len == 1) + return lldb::eInstructionControlFlowKindJump; + break; + case 0xEA: + if (opcode_len == 1) + return lldb::eInstructionControlFlowKindFarJump; + break; + case 0xE3: + case 0xE0: + case 0xE1: + case 0xE2: + if (opcode_len == 1) + return lldb::eInstructionControlFlowKindCondJump; + break; + case 0xC3: + case 0xC2: + if (opcode_len == 1) + return lldb::eInstructionControlFlowKindReturn; + break; + case 0xCB: + case 0xCA: + if (opcode_len == 1) + return lldb::eInstructionControlFlowKindFarReturn; + break; + case 0x05: + case 0x34: + if (opcode_len == 2) + return lldb::eInstructionControlFlowKindFarCall; + break; + case 0x35: + case 0x07: + if (opcode_len == 2) + return lldb::eInstructionControlFlowKindFarReturn; + break; + case 0x01: + if (opcode_len == 2) { + switch (modrm) { + case 0xc1: + return lldb::eInstructionControlFlowKindFarCall; + case 0xc2: + case 0xc3: + return lldb::eInstructionControlFlowKindFarReturn; + default: + break; + } + } + break; + default: + break; + } + + return lldb::eInstructionControlFlowKindOther; +} + +/// Decode an instruction into opcode, modrm and opcode_len. +/// Refer to http://ref.x86asm.net/coder.html for the instruction bytes layout. +/// Opcodes in x86 are generally the first byte of instruction, though two-byte +/// instructions and prefixes exist. ModR/M is the byte following the opcode +/// and adds additional information for how the instruction is executed. +/// +/// \param[in] inst_bytes +/// Raw bytes of the instruction +/// +/// +/// \param[in] bytes_len +/// The length of the inst_bytes array. +/// +/// \param[in] is_exec_mode_64b +/// If true, the execution mode is 64 bit. +/// +/// \return +/// Returns decoded instruction as struct InstructionOpcodeAndModrm, holding +/// primary_opcode, opcode_len and modrm byte. Refer to the struct definition +/// for more details. +/// Otherwise if the given instruction is invalid, returns None. +llvm::Optional +InstructionLengthDecode(const uint8_t *inst_bytes, int bytes_len, + bool is_exec_mode_64b) { + int op_idx = 0; + bool prefix_done = false; + InstructionOpcodeAndModrm ret = {0, 0, 0}; + + // In most cases, the primary_opcode is the first byte of the instruction + // but some instructions have a prefix to be skipped for these calculations. + // The following mapping is inspired from libipt's instruction decoding logic + // in `src/pt_ild.c` + while (!prefix_done) { + if (op_idx >= bytes_len) + return llvm::None; + + ret.primary_opcode = inst_bytes[op_idx]; + switch (ret.primary_opcode) { + // prefix_ignore + case 0x26: + case 0x2e: + case 0x36: + case 0x3e: + case 0x64: + case 0x65: + // prefix_osz, prefix_asz + case 0x66: + case 0x67: + // prefix_lock, prefix_f2, prefix_f3 + case 0xf0: + case 0xf2: + case 0xf3: + op_idx++; + break; + + // prefix_rex + case 0x40: + case 0x41: + case 0x42: + case 0x43: + case 0x44: + case 0x45: + case 0x46: + case 0x47: + case 0x48: + case 0x49: + case 0x4a: + case 0x4b: + case 0x4c: + case 0x4d: + case 0x4e: + case 0x4f: + if (is_exec_mode_64b) + op_idx++; + else + prefix_done = true; + break; + + // prefix_vex_c4, c5 + case 0xc5: + if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) { + prefix_done = true; + break; + } + + ret.opcode_len = 2; + ret.primary_opcode = inst_bytes[op_idx + 2]; + ret.modrm = inst_bytes[op_idx + 3]; + return ret; + + case 0xc4: + if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) { + prefix_done = true; + break; + } + ret.opcode_len = inst_bytes[op_idx + 1] & 0x1f; + ret.primary_opcode = inst_bytes[op_idx + 3]; + ret.modrm = inst_bytes[op_idx + 4]; + return ret; + + // prefix_evex + case 0x62: + if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) { + prefix_done = true; + break; + } + ret.opcode_len = inst_bytes[op_idx + 1] & 0x03; + ret.primary_opcode = inst_bytes[op_idx + 4]; + ret.modrm = inst_bytes[op_idx + 5]; + return ret; + + default: + prefix_done = true; + break; + } + } // prefix done + + ret.primary_opcode = inst_bytes[op_idx]; + ret.modrm = inst_bytes[op_idx + 1]; + ret.opcode_len = 1; + + // If the first opcode is 0F, it's two- or three- byte opcodes. + if (ret.primary_opcode == 0x0F) { + ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte + + if (ret.primary_opcode == 0x38) { + ret.opcode_len = 3; + ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte + ret.modrm = inst_bytes[op_idx + 1]; + } else if (ret.primary_opcode == 0x3A) { + ret.opcode_len = 3; + ret.primary_opcode = inst_bytes[++op_idx]; + ret.modrm = inst_bytes[op_idx + 1]; + } else if ((ret.primary_opcode & 0xf8) == 0x38) { + ret.opcode_len = 0; + ret.primary_opcode = inst_bytes[++op_idx]; + ret.modrm = inst_bytes[op_idx + 1]; + } else if (ret.primary_opcode == 0x0F) { + ret.opcode_len = 3; + // opcode is 0x0F, no needs to update + ret.modrm = inst_bytes[op_idx + 1]; + } else { + ret.opcode_len = 2; + ret.modrm = inst_bytes[op_idx + 1]; + } + } + + return ret; +} + +lldb::InstructionControlFlowKind GetControlFlowKind(bool is_exec_mode_64b, + Opcode m_opcode) { + llvm::Optional ret = llvm::None; + + if (m_opcode.GetOpcodeBytes() == nullptr || m_opcode.GetByteSize() <= 0) { + // x86_64 and i386 instructions are categorized as Opcode::Type::eTypeBytes + return lldb::eInstructionControlFlowKindUnknown; + } + + // Opcode bytes will be decoded into primary_opcode, modrm and opcode length. + // These are the three values deciding instruction control flow kind. + ret = InstructionLengthDecode((const uint8_t *)m_opcode.GetOpcodeBytes(), + m_opcode.GetByteSize(), is_exec_mode_64b); + if (!ret) + return lldb::eInstructionControlFlowKindUnknown; + else + return MapOpcodeIntoControlFlowKind(ret.value()); +} + +} // namespace x86 + +lldb::InstructionControlFlowKind +Instruction::GetControlFlowKind(const ArchSpec &arch) { + if (arch.GetTriple().getArch() == llvm::Triple::x86) + return x86::GetControlFlowKind(/*is_exec_mode_64b=*/false, m_opcode); + else if (arch.GetTriple().getArch() == llvm::Triple::x86_64) + return x86::GetControlFlowKind(/*is_exec_mode_64b=*/true, m_opcode); + else + return eInstructionControlFlowKindUnknown; // not implemented +} + AddressClass Instruction::GetAddressClass() { if (m_address_class == AddressClass::eInvalid) m_address_class = m_address.GetAddressClass(); @@ -576,6 +907,7 @@ void Instruction::Dump(lldb_private::Stream *s, uint32_t max_opcode_byte_size, bool show_address, bool show_bytes, + bool show_control_flow_kind, const ExecutionContext *exe_ctx, const SymbolContext *sym_ctx, const SymbolContext *prev_sym_ctx, @@ -613,6 +945,38 @@ } } + if (show_control_flow_kind) { + switch (GetControlFlowKind(exe_ctx->GetTargetRef().GetArchitecture())) { + case eInstructionControlFlowKindUnknown: + ss.Printf("%-12s", "unknown"); + break; + case eInstructionControlFlowKindOther: + ss.Printf("%-12s", "other"); + break; + case eInstructionControlFlowKindCall: + ss.Printf("%-12s", "call"); + break; + case eInstructionControlFlowKindReturn: + ss.Printf("%-12s", "return"); + break; + case eInstructionControlFlowKindJump: + ss.Printf("%-12s", "jump"); + break; + case eInstructionControlFlowKindCondJump: + ss.Printf("%-12s", "cond jump"); + break; + case eInstructionControlFlowKindFarCall: + ss.Printf("%-12s", "far call"); + break; + case eInstructionControlFlowKindFarReturn: + ss.Printf("%-12s", "far return"); + break; + case eInstructionControlFlowKindFarJump: + ss.Printf("%-12s", "far jump"); + break; + } + } + const size_t opcode_pos = ss.GetSizeOfLastLine(); // The default opcode size of 7 characters is plenty for most architectures @@ -957,6 +1321,7 @@ } void InstructionList::Dump(Stream *s, bool show_address, bool show_bytes, + bool show_control_flow_kind, const ExecutionContext *exe_ctx) { const uint32_t max_opcode_byte_size = GetMaxOpcocdeByteSize(); collection::const_iterator pos, begin, end; @@ -975,8 +1340,9 @@ pos != end; ++pos) { if (pos != begin) s->EOL(); - (*pos)->Dump(s, max_opcode_byte_size, show_address, show_bytes, exe_ctx, - nullptr, nullptr, disassembly_format, 0); + (*pos)->Dump(s, max_opcode_byte_size, show_address, show_bytes, + show_control_flow_kind, exe_ctx, nullptr, nullptr, + disassembly_format, 0); } } @@ -994,7 +1360,7 @@ size_t num_instructions = m_instructions.size(); uint32_t next_branch = UINT32_MAX; - + if (found_calls) *found_calls = false; for (size_t i = start; i < num_instructions; i++) { diff --git a/lldb/source/Core/DumpDataExtractor.cpp b/lldb/source/Core/DumpDataExtractor.cpp --- a/lldb/source/Core/DumpDataExtractor.cpp +++ b/lldb/source/Core/DumpDataExtractor.cpp @@ -170,10 +170,11 @@ offset += bytes_consumed; const bool show_address = base_addr != LLDB_INVALID_ADDRESS; const bool show_bytes = true; + const bool show_control_flow_kind = true; ExecutionContext exe_ctx; exe_scope->CalculateExecutionContext(exe_ctx); - disassembler_sp->GetInstructionList().Dump(s, show_address, show_bytes, - &exe_ctx); + disassembler_sp->GetInstructionList().Dump( + s, show_address, show_bytes, show_control_flow_kind, &exe_ctx); } } } else diff --git a/lldb/source/Expression/IRExecutionUnit.cpp b/lldb/source/Expression/IRExecutionUnit.cpp --- a/lldb/source/Expression/IRExecutionUnit.cpp +++ b/lldb/source/Expression/IRExecutionUnit.cpp @@ -200,7 +200,9 @@ UINT32_MAX, false, false); InstructionList &instruction_list = disassembler_sp->GetInstructionList(); - instruction_list.Dump(&stream, true, true, &exe_ctx); + instruction_list.Dump(&stream, true, true, /*show_control_flow_kind=*/true, + &exe_ctx); + return ret; } diff --git a/lldb/source/Plugins/TraceExporter/common/TraceHTR.cpp b/lldb/source/Plugins/TraceExporter/common/TraceHTR.cpp --- a/lldb/source/Plugins/TraceExporter/common/TraceHTR.cpp +++ b/lldb/source/Plugins/TraceExporter/common/TraceHTR.cpp @@ -160,15 +160,15 @@ cursor.Next(); } else { lldb::addr_t current_instruction_load_address = cursor.GetLoadAddress(); - lldb::TraceInstructionControlFlowType current_instruction_type = - cursor.GetInstructionControlFlowType(); + lldb::InstructionControlFlowKind current_instruction_type = + cursor.GetInstructionControlFlowKind(); m_instruction_layer_up->AppendInstruction( current_instruction_load_address); cursor.Next(); bool more_data_in_trace = cursor.HasValue(); if (current_instruction_type & - lldb::eTraceInstructionControlFlowTypeCall) { + lldb::eInstructionControlFlowKindCall) { if (more_data_in_trace && !cursor.IsError()) { m_instruction_layer_up->AddCallInstructionMetadata( current_instruction_load_address, diff --git a/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp b/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp --- a/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp +++ b/lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.cpp @@ -83,6 +83,7 @@ const uint32_t addr_byte_size = m_arch.GetAddressByteSize(); const bool show_address = true; const bool show_bytes = true; + const bool show_control_flow_kind = true; m_inst_emulator_up->GetRegisterInfo(unwind_plan.GetRegisterKind(), unwind_plan.GetInitialCFARegister(), m_cfa_reg_info); @@ -244,7 +245,8 @@ lldb_private::FormatEntity::Entry format; FormatEntity::Parse("${frame.pc}: ", format); inst->Dump(&strm, inst_list.GetMaxOpcocdeByteSize(), show_address, - show_bytes, nullptr, nullptr, nullptr, &format, 0); + show_bytes, show_control_flow_kind, nullptr, nullptr, + nullptr, &format, 0); log->PutString(strm.GetString()); } diff --git a/lldb/source/Symbol/Function.cpp b/lldb/source/Symbol/Function.cpp --- a/lldb/source/Symbol/Function.cpp +++ b/lldb/source/Symbol/Function.cpp @@ -439,8 +439,9 @@ if (disassembler_sp) { const bool show_address = true; const bool show_bytes = false; - disassembler_sp->GetInstructionList().Dump(&strm, show_address, show_bytes, - &exe_ctx); + const bool show_control_flow_kind = false; + disassembler_sp->GetInstructionList().Dump( + &strm, show_address, show_bytes, show_control_flow_kind, &exe_ctx); return true; } return false; diff --git a/lldb/source/Symbol/Symbol.cpp b/lldb/source/Symbol/Symbol.cpp --- a/lldb/source/Symbol/Symbol.cpp +++ b/lldb/source/Symbol/Symbol.cpp @@ -558,8 +558,9 @@ if (disassembler_sp) { const bool show_address = true; const bool show_bytes = false; - disassembler_sp->GetInstructionList().Dump(&strm, show_address, show_bytes, - &exe_ctx); + const bool show_control_flow_kind = false; + disassembler_sp->GetInstructionList().Dump( + &strm, show_address, show_bytes, show_control_flow_kind, &exe_ctx); return true; } return false; diff --git a/lldb/source/Target/ThreadPlanTracer.cpp b/lldb/source/Target/ThreadPlanTracer.cpp --- a/lldb/source/Target/ThreadPlanTracer.cpp +++ b/lldb/source/Target/ThreadPlanTracer.cpp @@ -170,13 +170,14 @@ if (instruction_list.GetSize()) { const bool show_bytes = true; const bool show_address = true; + const bool show_control_flow_kind = true; Instruction *instruction = instruction_list.GetInstructionAtIndex(0).get(); const FormatEntity::Entry *disassemble_format = m_process.GetTarget().GetDebugger().GetDisassemblyFormat(); instruction->Dump(stream, max_opcode_byte_size, show_address, - show_bytes, nullptr, nullptr, nullptr, - disassemble_format, 0); + show_bytes, show_control_flow_kind, nullptr, nullptr, + nullptr, disassemble_format, 0); } } } diff --git a/lldb/source/Target/TraceDumper.cpp b/lldb/source/Target/TraceDumper.cpp --- a/lldb/source/Target/TraceDumper.cpp +++ b/lldb/source/Target/TraceDumper.cpp @@ -147,14 +147,14 @@ m_s.Format("{0:x+16}", item.load_address); if (item.symbol_info) { m_s << " "; - item.symbol_info->instruction->Dump(&m_s, /*max_opcode_byte_size=*/0, - /*show_address=*/false, - /*show_bytes=*/false, - &item.symbol_info->exe_ctx, - &item.symbol_info->sc, - /*prev_sym_ctx=*/nullptr, - /*disassembly_addr_format=*/nullptr, - /*max_address_text_size=*/0); + item.symbol_info->instruction->Dump( + &m_s, /*max_opcode_byte_size=*/0, + /*show_address=*/false, + /*show_bytes=*/false, m_options.show_control_flow_kind, + &item.symbol_info->exe_ctx, &item.symbol_info->sc, + /*prev_sym_ctx=*/nullptr, + /*disassembly_addr_format=*/nullptr, + /*max_address_text_size=*/0); } } diff --git a/lldb/unittests/Disassembler/ARM/CMakeLists.txt b/lldb/unittests/Disassembler/ARM/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/lldb/unittests/Disassembler/ARM/CMakeLists.txt @@ -0,0 +1,12 @@ +add_lldb_unittest(DisassemblerTests + TestArm64Disassembly.cpp + TestArmv7Disassembly.cpp + LINK_LIBS + lldbCore + lldbSymbol + lldbTarget + lldbPluginDisassemblerLLVMC + lldbPluginProcessUtility + LINK_COMPONENTS + Support + ${LLVM_TARGETS_TO_BUILD}) diff --git a/lldb/unittests/Disassembler/TestArm64Disassembly.cpp b/lldb/unittests/Disassembler/ARM/TestArm64Disassembly.cpp rename from lldb/unittests/Disassembler/TestArm64Disassembly.cpp rename to lldb/unittests/Disassembler/ARM/TestArm64Disassembly.cpp diff --git a/lldb/unittests/Disassembler/TestArmv7Disassembly.cpp b/lldb/unittests/Disassembler/ARM/TestArmv7Disassembly.cpp rename from lldb/unittests/Disassembler/TestArmv7Disassembly.cpp rename to lldb/unittests/Disassembler/ARM/TestArmv7Disassembly.cpp diff --git a/lldb/unittests/Disassembler/CMakeLists.txt b/lldb/unittests/Disassembler/CMakeLists.txt --- a/lldb/unittests/Disassembler/CMakeLists.txt +++ b/lldb/unittests/Disassembler/CMakeLists.txt @@ -1,14 +1,7 @@ if("ARM" IN_LIST LLVM_TARGETS_TO_BUILD) - add_lldb_unittest(DisassemblerTests - TestArm64Disassembly.cpp - TestArmv7Disassembly.cpp - LINK_LIBS - lldbCore - lldbSymbol - lldbTarget - lldbPluginDisassemblerLLVMC - lldbPluginProcessUtility - LINK_COMPONENTS - Support - ${LLVM_TARGETS_TO_BUILD}) + add_subdirectory(ARM) +endif() + +if("X86" IN_LIST LLVM_TARGETS_TO_BUILD) + add_subdirectory(x86) endif() diff --git a/lldb/unittests/Disassembler/x86/CMakeLists.txt b/lldb/unittests/Disassembler/x86/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/lldb/unittests/Disassembler/x86/CMakeLists.txt @@ -0,0 +1,12 @@ +add_lldb_unittest(GetControlFlowKindx86Tests + TestGetControlFlowKindx86.cpp + LINK_LIBS + lldbCore + lldbSymbol + lldbTarget + lldbPluginDisassemblerLLVMC + lldbPluginProcessUtility + LINK_COMPONENTS + Support + ${LLVM_TARGETS_TO_BUILD} + ) diff --git a/lldb/unittests/Disassembler/x86/TestGetControlFlowKindx86.cpp b/lldb/unittests/Disassembler/x86/TestGetControlFlowKindx86.cpp new file mode 100644 --- /dev/null +++ b/lldb/unittests/Disassembler/x86/TestGetControlFlowKindx86.cpp @@ -0,0 +1,144 @@ +//===-- TextX86GetControlFlowKind.cpp ------------------------------------------===// + +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/TargetSelect.h" +#include "gtest/gtest.h" + +#include "lldb/Core/Address.h" +#include "lldb/Core/Disassembler.h" +#include "lldb/Target/ExecutionContext.h" +#include "lldb/Utility/ArchSpec.h" + +#include "Plugins/Disassembler/LLVMC/DisassemblerLLVMC.h" + +using namespace lldb; +using namespace lldb_private; + +class TestGetControlFlowKindx86 : public testing::Test { +public: + static void SetUpTestCase(); + static void TearDownTestCase(); + +protected: +}; + +void TestGetControlFlowKindx86::SetUpTestCase() { + llvm::InitializeAllTargets(); + llvm::InitializeAllAsmPrinters(); + llvm::InitializeAllTargetMCs(); + llvm::InitializeAllDisassemblers(); + DisassemblerLLVMC::Initialize(); +} + +void TestGetControlFlowKindx86::TearDownTestCase() { + DisassemblerLLVMC::Terminate(); +} + +TEST_F(TestGetControlFlowKindx86, TestX86_64Instruction) { + ArchSpec arch("x86_64-*-linux"); + + const unsigned num_of_instructions = 29; + uint8_t data[] = { + 0x55, // other -- pushq %rbp + 0x48, 0x89, 0xe5, // other -- movq %rsp, %rbp + + 0xe8, 0xfc, 0xfe, 0xff, 0xff, // call -- callq 0x4004c0 + 0x41, 0xff, 0x14, 0xdc, // call -- callq *(%r12,%rbx,8) + 0xff, 0x50, 0x18, // call -- callq *0x18(%rax) + 0xe8, 0x48, 0x0d, 0x00, 0x00, // call -- callq 0x94fe0 + + 0xc3, // return -- retq + + 0xeb, 0xd3, // jump -- jmp 0x92dab + 0xe9, 0x22, 0xff, 0xff, 0xff, // jump -- jmp 0x933ae + 0xff, 0xe0, // jump -- jmpq *%rax + 0xf2, 0xff, 0x25, 0x75, 0xe7, 0x39, 0x00, // jump -- repne jmpq *0x39e775 + + 0x73, 0xc2, // cond jump -- jae 0x9515c + 0x74, 0x1f, // cond jump -- je 0x400626 + 0x75, 0xea, // cond jump -- jne 0x400610 + 0x76, 0x10, // cond jump -- jbe 0x94d10 + 0x77, 0x58, // cond jump -- ja 0x1208c8 + 0x7e, 0x67, // cond jump -- jle 0x92180 + 0x78, 0x0b, // cond jump -- js 0x92dc3 + 0x0f, 0x82, 0x17, 0x01, 0x00, 0x00, // cond jump -- jb 0x9c7b0 + 0x0f, 0x83, 0xa7, 0x00, 0x00, 0x00, // cond jump -- jae 0x895c8 + 0x0f, 0x84, 0x8c, 0x00, 0x00, 0x00, // cond jump -- je 0x941f0 + 0x0f, 0x85, 0x51, 0xff, 0xff, 0xff, // cond jump -- jne 0x8952c + 0x0f, 0x86, 0xa3, 0x02, 0x00, 0x00, // cond jump -- jbe 0x9ae10 + 0x0f, 0x87, 0xff, 0x00, 0x00, 0x00, // cond jump -- ja 0x9ab60 + 0x0f, 0x8e, 0x7e, 0x00, 0x00, 0x00, // cond jump -- jle 0x92dd8 + 0x0f, 0x86, 0xdf, 0x00, 0x00, 0x00, // cond jump -- jbe 0x921b0 + + 0x0f, 0x05, // far call -- syscall + + 0x0f, 0x07, // far return -- sysret + 0xcf, // far return -- interrupt ret + }; + + InstructionControlFlowKind result[] = { + eInstructionControlFlowKindOther, + eInstructionControlFlowKindOther, + + eInstructionControlFlowKindCall, + eInstructionControlFlowKindCall, + eInstructionControlFlowKindCall, + eInstructionControlFlowKindCall, + + eInstructionControlFlowKindReturn, + + eInstructionControlFlowKindJump, + eInstructionControlFlowKindJump, + eInstructionControlFlowKindJump, + eInstructionControlFlowKindJump, + + eInstructionControlFlowKindCondJump, + eInstructionControlFlowKindCondJump, + eInstructionControlFlowKindCondJump, + eInstructionControlFlowKindCondJump, + eInstructionControlFlowKindCondJump, + eInstructionControlFlowKindCondJump, + eInstructionControlFlowKindCondJump, + eInstructionControlFlowKindCondJump, + eInstructionControlFlowKindCondJump, + eInstructionControlFlowKindCondJump, + eInstructionControlFlowKindCondJump, + eInstructionControlFlowKindCondJump, + eInstructionControlFlowKindCondJump, + eInstructionControlFlowKindCondJump, + eInstructionControlFlowKindCondJump, + + eInstructionControlFlowKindFarCall, + + eInstructionControlFlowKindFarReturn, + eInstructionControlFlowKindFarReturn, + }; + + DisassemblerSP disass_sp; + Address start_addr(0x100); + disass_sp = + Disassembler::DisassembleBytes(arch, nullptr, nullptr, start_addr, &data, + sizeof (data), num_of_instructions, false); + + // If we failed to get a disassembler, we can assume it is because + // the llvm we linked against was not built with the i386 target, + // and we should skip these tests without marking anything as failing. + + if (disass_sp) { + const InstructionList inst_list(disass_sp->GetInstructionList()); + EXPECT_EQ(num_of_instructions, inst_list.GetSize()); + + for (size_t i = 0; i < num_of_instructions; ++i) { + InstructionSP inst_sp; + inst_sp = inst_list.GetInstructionAtIndex(i); + InstructionControlFlowKind kind = inst_sp->GetControlFlowKind(arch); + EXPECT_EQ(kind, result[i]); + } + } +}