Index: lldb/include/lldb/Core/Disassembler.h =================================================================== --- lldb/include/lldb/Core/Disassembler.h +++ lldb/include/lldb/Core/Disassembler.h @@ -83,7 +83,10 @@ /// The control flow kind of this instruction, or /// eInstructionControlFlowKindUnknown if the instruction /// can't be classified. - lldb::InstructionControlFlowKind GetControlFlowKind(const ArchSpec &arch); + virtual lldb::InstructionControlFlowKind + GetControlFlowKind(const ExecutionContext *exe_ctx) { + return lldb::eInstructionControlFlowKindUnknown; + } virtual void CalculateMnemonicOperandsAndComment(const ExecutionContext *exe_ctx) = 0; Index: lldb/source/Core/Disassembler.cpp =================================================================== --- lldb/source/Core/Disassembler.cpp +++ lldb/source/Core/Disassembler.cpp @@ -571,334 +571,6 @@ Instruction::~Instruction() = default; -namespace x86 { - -/// These are the three values deciding instruction control flow kind. -/// InstructionLengthDecode function decodes an instruction and get this struct. -/// -/// primary_opcode -/// Primary opcode of the instruction. -/// For one-byte opcode instruction, it's the first byte after prefix. -/// For two- and three-byte opcodes, it's the second byte. -/// -/// opcode_len -/// The length of opcode in bytes. Valid opcode lengths are 1, 2, or 3. -/// -/// modrm -/// ModR/M byte of the instruction. -/// Bits[7:6] indicate MOD. Bits[5:3] specify a register and R/M bits[2:0] -/// may contain a register or specify an addressing mode, depending on MOD. -struct InstructionOpcodeAndModrm { - uint8_t primary_opcode; - uint8_t opcode_len; - uint8_t modrm; -}; - -/// Determine the InstructionControlFlowKind based on opcode and modrm bytes. -/// Refer to http://ref.x86asm.net/coder.html for the full list of opcode and -/// instruction set. -/// -/// \param[in] opcode_and_modrm -/// Contains primary_opcode byte, its length, and ModR/M byte. -/// Refer to the struct InstructionOpcodeAndModrm for details. -/// -/// \return -/// The control flow kind of the instruction or -/// eInstructionControlFlowKindOther if the instruction doesn't affect -/// the control flow of the program. -lldb::InstructionControlFlowKind -MapOpcodeIntoControlFlowKind(InstructionOpcodeAndModrm opcode_and_modrm) { - uint8_t opcode = opcode_and_modrm.primary_opcode; - uint8_t opcode_len = opcode_and_modrm.opcode_len; - uint8_t modrm = opcode_and_modrm.modrm; - - if (opcode_len > 2) - return lldb::eInstructionControlFlowKindOther; - - if (opcode >= 0x70 && opcode <= 0x7F) { - if (opcode_len == 1) - return lldb::eInstructionControlFlowKindCondJump; - else - return lldb::eInstructionControlFlowKindOther; - } - - if (opcode >= 0x80 && opcode <= 0x8F) { - if (opcode_len == 2) - return lldb::eInstructionControlFlowKindCondJump; - else - return lldb::eInstructionControlFlowKindOther; - } - - switch (opcode) { - case 0x9A: - if (opcode_len == 1) - return lldb::eInstructionControlFlowKindFarCall; - break; - case 0xFF: - if (opcode_len == 1) { - uint8_t modrm_reg = (modrm >> 3) & 7; - if (modrm_reg == 2) - return lldb::eInstructionControlFlowKindCall; - else if (modrm_reg == 3) - return lldb::eInstructionControlFlowKindFarCall; - else if (modrm_reg == 4) - return lldb::eInstructionControlFlowKindJump; - else if (modrm_reg == 5) - return lldb::eInstructionControlFlowKindFarJump; - } - break; - case 0xE8: - if (opcode_len == 1) - return lldb::eInstructionControlFlowKindCall; - break; - case 0xCD: - case 0xCC: - case 0xCE: - case 0xF1: - if (opcode_len == 1) - return lldb::eInstructionControlFlowKindFarCall; - break; - case 0xCF: - if (opcode_len == 1) - return lldb::eInstructionControlFlowKindFarReturn; - break; - case 0xE9: - case 0xEB: - if (opcode_len == 1) - return lldb::eInstructionControlFlowKindJump; - break; - case 0xEA: - if (opcode_len == 1) - return lldb::eInstructionControlFlowKindFarJump; - break; - case 0xE3: - case 0xE0: - case 0xE1: - case 0xE2: - if (opcode_len == 1) - return lldb::eInstructionControlFlowKindCondJump; - break; - case 0xC3: - case 0xC2: - if (opcode_len == 1) - return lldb::eInstructionControlFlowKindReturn; - break; - case 0xCB: - case 0xCA: - if (opcode_len == 1) - return lldb::eInstructionControlFlowKindFarReturn; - break; - case 0x05: - case 0x34: - if (opcode_len == 2) - return lldb::eInstructionControlFlowKindFarCall; - break; - case 0x35: - case 0x07: - if (opcode_len == 2) - return lldb::eInstructionControlFlowKindFarReturn; - break; - case 0x01: - if (opcode_len == 2) { - switch (modrm) { - case 0xc1: - return lldb::eInstructionControlFlowKindFarCall; - case 0xc2: - case 0xc3: - return lldb::eInstructionControlFlowKindFarReturn; - default: - break; - } - } - break; - default: - break; - } - - return lldb::eInstructionControlFlowKindOther; -} - -/// Decode an instruction into opcode, modrm and opcode_len. -/// Refer to http://ref.x86asm.net/coder.html for the instruction bytes layout. -/// Opcodes in x86 are generally the first byte of instruction, though two-byte -/// instructions and prefixes exist. ModR/M is the byte following the opcode -/// and adds additional information for how the instruction is executed. -/// -/// \param[in] inst_bytes -/// Raw bytes of the instruction -/// -/// -/// \param[in] bytes_len -/// The length of the inst_bytes array. -/// -/// \param[in] is_exec_mode_64b -/// If true, the execution mode is 64 bit. -/// -/// \return -/// Returns decoded instruction as struct InstructionOpcodeAndModrm, holding -/// primary_opcode, opcode_len and modrm byte. Refer to the struct definition -/// for more details. -/// Otherwise if the given instruction is invalid, returns None. -llvm::Optional -InstructionLengthDecode(const uint8_t *inst_bytes, int bytes_len, - bool is_exec_mode_64b) { - int op_idx = 0; - bool prefix_done = false; - InstructionOpcodeAndModrm ret = {0, 0, 0}; - - // In most cases, the primary_opcode is the first byte of the instruction - // but some instructions have a prefix to be skipped for these calculations. - // The following mapping is inspired from libipt's instruction decoding logic - // in `src/pt_ild.c` - while (!prefix_done) { - if (op_idx >= bytes_len) - return llvm::None; - - ret.primary_opcode = inst_bytes[op_idx]; - switch (ret.primary_opcode) { - // prefix_ignore - case 0x26: - case 0x2e: - case 0x36: - case 0x3e: - case 0x64: - case 0x65: - // prefix_osz, prefix_asz - case 0x66: - case 0x67: - // prefix_lock, prefix_f2, prefix_f3 - case 0xf0: - case 0xf2: - case 0xf3: - op_idx++; - break; - - // prefix_rex - case 0x40: - case 0x41: - case 0x42: - case 0x43: - case 0x44: - case 0x45: - case 0x46: - case 0x47: - case 0x48: - case 0x49: - case 0x4a: - case 0x4b: - case 0x4c: - case 0x4d: - case 0x4e: - case 0x4f: - if (is_exec_mode_64b) - op_idx++; - else - prefix_done = true; - break; - - // prefix_vex_c4, c5 - case 0xc5: - if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) { - prefix_done = true; - break; - } - - ret.opcode_len = 2; - ret.primary_opcode = inst_bytes[op_idx + 2]; - ret.modrm = inst_bytes[op_idx + 3]; - return ret; - - case 0xc4: - if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) { - prefix_done = true; - break; - } - ret.opcode_len = inst_bytes[op_idx + 1] & 0x1f; - ret.primary_opcode = inst_bytes[op_idx + 3]; - ret.modrm = inst_bytes[op_idx + 4]; - return ret; - - // prefix_evex - case 0x62: - if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) { - prefix_done = true; - break; - } - ret.opcode_len = inst_bytes[op_idx + 1] & 0x03; - ret.primary_opcode = inst_bytes[op_idx + 4]; - ret.modrm = inst_bytes[op_idx + 5]; - return ret; - - default: - prefix_done = true; - break; - } - } // prefix done - - ret.primary_opcode = inst_bytes[op_idx]; - ret.modrm = inst_bytes[op_idx + 1]; - ret.opcode_len = 1; - - // If the first opcode is 0F, it's two- or three- byte opcodes. - if (ret.primary_opcode == 0x0F) { - ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte - - if (ret.primary_opcode == 0x38) { - ret.opcode_len = 3; - ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte - ret.modrm = inst_bytes[op_idx + 1]; - } else if (ret.primary_opcode == 0x3A) { - ret.opcode_len = 3; - ret.primary_opcode = inst_bytes[++op_idx]; - ret.modrm = inst_bytes[op_idx + 1]; - } else if ((ret.primary_opcode & 0xf8) == 0x38) { - ret.opcode_len = 0; - ret.primary_opcode = inst_bytes[++op_idx]; - ret.modrm = inst_bytes[op_idx + 1]; - } else if (ret.primary_opcode == 0x0F) { - ret.opcode_len = 3; - // opcode is 0x0F, no needs to update - ret.modrm = inst_bytes[op_idx + 1]; - } else { - ret.opcode_len = 2; - ret.modrm = inst_bytes[op_idx + 1]; - } - } - - return ret; -} - -lldb::InstructionControlFlowKind GetControlFlowKind(bool is_exec_mode_64b, - Opcode m_opcode) { - llvm::Optional ret = llvm::None; - - if (m_opcode.GetOpcodeBytes() == nullptr || m_opcode.GetByteSize() <= 0) { - // x86_64 and i386 instructions are categorized as Opcode::Type::eTypeBytes - return lldb::eInstructionControlFlowKindUnknown; - } - - // Opcode bytes will be decoded into primary_opcode, modrm and opcode length. - // These are the three values deciding instruction control flow kind. - ret = InstructionLengthDecode((const uint8_t *)m_opcode.GetOpcodeBytes(), - m_opcode.GetByteSize(), is_exec_mode_64b); - if (!ret) - return lldb::eInstructionControlFlowKindUnknown; - else - return MapOpcodeIntoControlFlowKind(ret.value()); -} - -} // namespace x86 - -lldb::InstructionControlFlowKind -Instruction::GetControlFlowKind(const ArchSpec &arch) { - if (arch.GetTriple().getArch() == llvm::Triple::x86) - return x86::GetControlFlowKind(/*is_exec_mode_64b=*/false, m_opcode); - else if (arch.GetTriple().getArch() == llvm::Triple::x86_64) - return x86::GetControlFlowKind(/*is_exec_mode_64b=*/true, m_opcode); - else - return eInstructionControlFlowKindUnknown; // not implemented -} - AddressClass Instruction::GetAddressClass() { if (m_address_class == AddressClass::eInvalid) m_address_class = m_address.GetAddressClass(); @@ -946,7 +618,7 @@ } if (show_control_flow_kind) { - switch (GetControlFlowKind(exe_ctx->GetTargetRef().GetArchitecture())) { + switch (GetControlFlowKind(exe_ctx)) { case eInstructionControlFlowKindUnknown: ss.Printf("%-12s", "unknown"); break; Index: lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp =================================================================== --- lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp +++ lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp @@ -85,6 +85,324 @@ std::unique_ptr m_instr_printer_up; }; +namespace x86 { + +/// These are the three values deciding instruction control flow kind. +/// InstructionLengthDecode function decodes an instruction and get this struct. +/// +/// primary_opcode +/// Primary opcode of the instruction. +/// For one-byte opcode instruction, it's the first byte after prefix. +/// For two- and three-byte opcodes, it's the second byte. +/// +/// opcode_len +/// The length of opcode in bytes. Valid opcode lengths are 1, 2, or 3. +/// +/// modrm +/// ModR/M byte of the instruction. +/// Bits[7:6] indicate MOD. Bits[5:3] specify a register and R/M bits[2:0] +/// may contain a register or specify an addressing mode, depending on MOD. +struct InstructionOpcodeAndModrm { + uint8_t primary_opcode; + uint8_t opcode_len; + uint8_t modrm; +}; + +/// Determine the InstructionControlFlowKind based on opcode and modrm bytes. +/// Refer to http://ref.x86asm.net/coder.html for the full list of opcode and +/// instruction set. +/// +/// \param[in] opcode_and_modrm +/// Contains primary_opcode byte, its length, and ModR/M byte. +/// Refer to the struct InstructionOpcodeAndModrm for details. +/// +/// \return +/// The control flow kind of the instruction or +/// eInstructionControlFlowKindOther if the instruction doesn't affect +/// the control flow of the program. +lldb::InstructionControlFlowKind +MapOpcodeIntoControlFlowKind(InstructionOpcodeAndModrm opcode_and_modrm) { + uint8_t opcode = opcode_and_modrm.primary_opcode; + uint8_t opcode_len = opcode_and_modrm.opcode_len; + uint8_t modrm = opcode_and_modrm.modrm; + + if (opcode_len > 2) + return lldb::eInstructionControlFlowKindOther; + + if (opcode >= 0x70 && opcode <= 0x7F) { + if (opcode_len == 1) + return lldb::eInstructionControlFlowKindCondJump; + else + return lldb::eInstructionControlFlowKindOther; + } + + if (opcode >= 0x80 && opcode <= 0x8F) { + if (opcode_len == 2) + return lldb::eInstructionControlFlowKindCondJump; + else + return lldb::eInstructionControlFlowKindOther; + } + + switch (opcode) { + case 0x9A: + if (opcode_len == 1) + return lldb::eInstructionControlFlowKindFarCall; + break; + case 0xFF: + if (opcode_len == 1) { + uint8_t modrm_reg = (modrm >> 3) & 7; + if (modrm_reg == 2) + return lldb::eInstructionControlFlowKindCall; + else if (modrm_reg == 3) + return lldb::eInstructionControlFlowKindFarCall; + else if (modrm_reg == 4) + return lldb::eInstructionControlFlowKindJump; + else if (modrm_reg == 5) + return lldb::eInstructionControlFlowKindFarJump; + } + break; + case 0xE8: + if (opcode_len == 1) + return lldb::eInstructionControlFlowKindCall; + break; + case 0xCD: + case 0xCC: + case 0xCE: + case 0xF1: + if (opcode_len == 1) + return lldb::eInstructionControlFlowKindFarCall; + break; + case 0xCF: + if (opcode_len == 1) + return lldb::eInstructionControlFlowKindFarReturn; + break; + case 0xE9: + case 0xEB: + if (opcode_len == 1) + return lldb::eInstructionControlFlowKindJump; + break; + case 0xEA: + if (opcode_len == 1) + return lldb::eInstructionControlFlowKindFarJump; + break; + case 0xE3: + case 0xE0: + case 0xE1: + case 0xE2: + if (opcode_len == 1) + return lldb::eInstructionControlFlowKindCondJump; + break; + case 0xC3: + case 0xC2: + if (opcode_len == 1) + return lldb::eInstructionControlFlowKindReturn; + break; + case 0xCB: + case 0xCA: + if (opcode_len == 1) + return lldb::eInstructionControlFlowKindFarReturn; + break; + case 0x05: + case 0x34: + if (opcode_len == 2) + return lldb::eInstructionControlFlowKindFarCall; + break; + case 0x35: + case 0x07: + if (opcode_len == 2) + return lldb::eInstructionControlFlowKindFarReturn; + break; + case 0x01: + if (opcode_len == 2) { + switch (modrm) { + case 0xc1: + return lldb::eInstructionControlFlowKindFarCall; + case 0xc2: + case 0xc3: + return lldb::eInstructionControlFlowKindFarReturn; + default: + break; + } + } + break; + default: + break; + } + + return lldb::eInstructionControlFlowKindOther; +} + +/// Decode an instruction into opcode, modrm and opcode_len. +/// Refer to http://ref.x86asm.net/coder.html for the instruction bytes layout. +/// Opcodes in x86 are generally the first byte of instruction, though two-byte +/// instructions and prefixes exist. ModR/M is the byte following the opcode +/// and adds additional information for how the instruction is executed. +/// +/// \param[in] inst_bytes +/// Raw bytes of the instruction +/// +/// +/// \param[in] bytes_len +/// The length of the inst_bytes array. +/// +/// \param[in] is_exec_mode_64b +/// If true, the execution mode is 64 bit. +/// +/// \return +/// Returns decoded instruction as struct InstructionOpcodeAndModrm, holding +/// primary_opcode, opcode_len and modrm byte. Refer to the struct definition +/// for more details. +/// Otherwise if the given instruction is invalid, returns None. +llvm::Optional +InstructionLengthDecode(const uint8_t *inst_bytes, int bytes_len, + bool is_exec_mode_64b) { + int op_idx = 0; + bool prefix_done = false; + InstructionOpcodeAndModrm ret = {0, 0, 0}; + + // In most cases, the primary_opcode is the first byte of the instruction + // but some instructions have a prefix to be skipped for these calculations. + // The following mapping is inspired from libipt's instruction decoding logic + // in `src/pt_ild.c` + while (!prefix_done) { + if (op_idx >= bytes_len) + return llvm::None; + + ret.primary_opcode = inst_bytes[op_idx]; + switch (ret.primary_opcode) { + // prefix_ignore + case 0x26: + case 0x2e: + case 0x36: + case 0x3e: + case 0x64: + case 0x65: + // prefix_osz, prefix_asz + case 0x66: + case 0x67: + // prefix_lock, prefix_f2, prefix_f3 + case 0xf0: + case 0xf2: + case 0xf3: + op_idx++; + break; + + // prefix_rex + case 0x40: + case 0x41: + case 0x42: + case 0x43: + case 0x44: + case 0x45: + case 0x46: + case 0x47: + case 0x48: + case 0x49: + case 0x4a: + case 0x4b: + case 0x4c: + case 0x4d: + case 0x4e: + case 0x4f: + if (is_exec_mode_64b) + op_idx++; + else + prefix_done = true; + break; + + // prefix_vex_c4, c5 + case 0xc5: + if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) { + prefix_done = true; + break; + } + + ret.opcode_len = 2; + ret.primary_opcode = inst_bytes[op_idx + 2]; + ret.modrm = inst_bytes[op_idx + 3]; + return ret; + + case 0xc4: + if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) { + prefix_done = true; + break; + } + ret.opcode_len = inst_bytes[op_idx + 1] & 0x1f; + ret.primary_opcode = inst_bytes[op_idx + 3]; + ret.modrm = inst_bytes[op_idx + 4]; + return ret; + + // prefix_evex + case 0x62: + if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) { + prefix_done = true; + break; + } + ret.opcode_len = inst_bytes[op_idx + 1] & 0x03; + ret.primary_opcode = inst_bytes[op_idx + 4]; + ret.modrm = inst_bytes[op_idx + 5]; + return ret; + + default: + prefix_done = true; + break; + } + } // prefix done + + ret.primary_opcode = inst_bytes[op_idx]; + ret.modrm = inst_bytes[op_idx + 1]; + ret.opcode_len = 1; + + // If the first opcode is 0F, it's two- or three- byte opcodes. + if (ret.primary_opcode == 0x0F) { + ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte + + if (ret.primary_opcode == 0x38) { + ret.opcode_len = 3; + ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte + ret.modrm = inst_bytes[op_idx + 1]; + } else if (ret.primary_opcode == 0x3A) { + ret.opcode_len = 3; + ret.primary_opcode = inst_bytes[++op_idx]; + ret.modrm = inst_bytes[op_idx + 1]; + } else if ((ret.primary_opcode & 0xf8) == 0x38) { + ret.opcode_len = 0; + ret.primary_opcode = inst_bytes[++op_idx]; + ret.modrm = inst_bytes[op_idx + 1]; + } else if (ret.primary_opcode == 0x0F) { + ret.opcode_len = 3; + // opcode is 0x0F, no needs to update + ret.modrm = inst_bytes[op_idx + 1]; + } else { + ret.opcode_len = 2; + ret.modrm = inst_bytes[op_idx + 1]; + } + } + + return ret; +} + +lldb::InstructionControlFlowKind GetControlFlowKind(bool is_exec_mode_64b, + Opcode m_opcode) { + llvm::Optional ret = llvm::None; + + if (m_opcode.GetOpcodeBytes() == nullptr || m_opcode.GetByteSize() <= 0) { + // x86_64 and i386 instructions are categorized as Opcode::Type::eTypeBytes + return lldb::eInstructionControlFlowKindUnknown; + } + + // Opcode bytes will be decoded into primary_opcode, modrm and opcode length. + // These are the three values deciding instruction control flow kind. + ret = InstructionLengthDecode((const uint8_t *)m_opcode.GetOpcodeBytes(), + m_opcode.GetByteSize(), is_exec_mode_64b); + if (!ret) + return lldb::eInstructionControlFlowKindUnknown; + else + return MapOpcodeIntoControlFlowKind(ret.value()); +} + +} // namespace x86 + class InstructionLLVMC : public lldb_private::Instruction { public: InstructionLLVMC(DisassemblerLLVMC &disasm, @@ -223,6 +541,19 @@ } } + lldb::InstructionControlFlowKind + GetControlFlowKind(const lldb_private::ExecutionContext *exe_ctx) override { + DisassemblerScope disasm(*this, exe_ctx); + if (disasm){ + if (disasm->GetArchitecture().GetMachine() == llvm::Triple::x86) + return x86::GetControlFlowKind(/*is_64b=*/false, m_opcode); + else if (disasm->GetArchitecture().GetMachine() == llvm::Triple::x86_64) + return x86::GetControlFlowKind(/*is_64b=*/true, m_opcode); + } + + return eInstructionControlFlowKindUnknown; + } + void CalculateMnemonicOperandsAndComment( const lldb_private::ExecutionContext *exe_ctx) override { DataExtractor data; Index: lldb/unittests/Disassembler/x86/TestGetControlFlowKindx86.cpp =================================================================== --- lldb/unittests/Disassembler/x86/TestGetControlFlowKindx86.cpp +++ lldb/unittests/Disassembler/x86/TestGetControlFlowKindx86.cpp @@ -137,7 +137,8 @@ for (size_t i = 0; i < num_of_instructions; ++i) { InstructionSP inst_sp; inst_sp = inst_list.GetInstructionAtIndex(i); - InstructionControlFlowKind kind = inst_sp->GetControlFlowKind(arch); + ExecutionContext exe_ctx (nullptr, nullptr, nullptr); + InstructionControlFlowKind kind = inst_sp->GetControlFlowKind(&exe_ctx); EXPECT_EQ(kind, result[i]); } }