diff --git a/llvm/include/llvm-c/DisassemblerTypes.h b/llvm/include/llvm-c/DisassemblerTypes.h --- a/llvm/include/llvm-c/DisassemblerTypes.h +++ b/llvm/include/llvm-c/DisassemblerTypes.h @@ -38,15 +38,15 @@ * one operand with symbolic information. To determine the symbolic operand * information for each operand, the bytes for the specific operand in the * instruction are specified by the Offset parameter and its byte widith is the - * size parameter. For instructions sets with fixed widths and one symbolic - * operand per instruction, the Offset parameter will be zero and Size parameter - * will be the instruction width. The information is returned in TagBuf and is - * Triple specific with its specific information defined by the value of - * TagType for that Triple. If symbolic information is returned the function - * returns 1, otherwise it returns 0. + * OpSize parameter. For instructions sets with fixed widths and one symbolic + * operand per instruction, the Offset parameter will be zero and InstSize + * parameter will be the instruction width. The information is returned in + * TagBuf and is Triple specific with its specific information defined by the + * value of TagType for that Triple. If symbolic information is returned the + * function * returns 1, otherwise it returns 0. */ -typedef int (*LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, - uint64_t Offset, uint64_t Size, +typedef int (*LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, + uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf); /** diff --git a/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h b/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h --- a/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h +++ b/llvm/include/llvm/MC/MCDisassembler/MCDisassembler.h @@ -181,10 +181,9 @@ public: // Helpers around MCSymbolizer - bool tryAddingSymbolicOperand(MCInst &Inst, - int64_t Value, - uint64_t Address, bool IsBranch, - uint64_t Offset, uint64_t InstSize) const; + bool tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address, + bool IsBranch, uint64_t Offset, uint64_t OpSize, + uint64_t InstSize) const; void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const; diff --git a/llvm/include/llvm/MC/MCDisassembler/MCExternalSymbolizer.h b/llvm/include/llvm/MC/MCDisassembler/MCExternalSymbolizer.h --- a/llvm/include/llvm/MC/MCDisassembler/MCExternalSymbolizer.h +++ b/llvm/include/llvm/MC/MCDisassembler/MCExternalSymbolizer.h @@ -46,7 +46,8 @@ bool tryAddingSymbolicOperand(MCInst &MI, raw_ostream &CommentStream, int64_t Value, uint64_t Address, bool IsBranch, - uint64_t Offset, uint64_t InstSize) override; + uint64_t Offset, uint64_t OpSize, + uint64_t InstSize) override; void tryAddingPcLoadReferenceComment(raw_ostream &CommentStream, int64_t Value, uint64_t Address) override; diff --git a/llvm/include/llvm/MC/MCDisassembler/MCSymbolizer.h b/llvm/include/llvm/MC/MCDisassembler/MCSymbolizer.h --- a/llvm/include/llvm/MC/MCDisassembler/MCSymbolizer.h +++ b/llvm/include/llvm/MC/MCDisassembler/MCSymbolizer.h @@ -63,12 +63,13 @@ /// \param Address - Load address of the instruction. /// \param IsBranch - Is the instruction a branch? /// \param Offset - Byte offset of the operand inside the inst. + /// \param OpSize - Size of the operand in bytes. /// \param InstSize - Size of the instruction in bytes. /// \return Whether a symbolic operand was added. virtual bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, - uint64_t InstSize) = 0; + uint64_t OpSize, uint64_t InstSize) = 0; /// Try to add a comment on the PC-relative load. /// For instance, in Mach-O, this is used to add annotations to instructions diff --git a/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp b/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp --- a/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp +++ b/llvm/lib/MC/MCDisassembler/MCDisassembler.cpp @@ -22,11 +22,12 @@ bool MCDisassembler::tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address, bool IsBranch, - uint64_t Offset, + uint64_t Offset, uint64_t OpSize, uint64_t InstSize) const { if (Symbolizer) - return Symbolizer->tryAddingSymbolicOperand( - Inst, *CommentStream, Value, Address, IsBranch, Offset, InstSize); + return Symbolizer->tryAddingSymbolicOperand(Inst, *CommentStream, Value, + Address, IsBranch, Offset, + OpSize, InstSize); return false; } diff --git a/llvm/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp b/llvm/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp --- a/llvm/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp +++ b/llvm/lib/MC/MCDisassembler/MCExternalSymbolizer.cpp @@ -31,19 +31,15 @@ // is found an MCExpr is created with that, else an MCExpr with Value is // created. This function returns true if it adds an operand to the MCInst and // false otherwise. -bool MCExternalSymbolizer::tryAddingSymbolicOperand(MCInst &MI, - raw_ostream &cStream, - int64_t Value, - uint64_t Address, - bool IsBranch, - uint64_t Offset, - uint64_t InstSize) { +bool MCExternalSymbolizer::tryAddingSymbolicOperand( + MCInst &MI, raw_ostream &cStream, int64_t Value, uint64_t Address, + bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) { struct LLVMOpInfo1 SymbolicOp; std::memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); SymbolicOp.Value = Value; if (!GetOpInfo || - !GetOpInfo(DisInfo, Address, Offset, InstSize, 1, &SymbolicOp)) { + !GetOpInfo(DisInfo, Address, Offset, OpSize, InstSize, 1, &SymbolicOp)) { // Clear SymbolicOp.Value from above and also all other fields. std::memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); @@ -53,10 +49,10 @@ // that always makes sense to guess. But in the case of an immediate it is // a bit more questionable if it is an address of a symbol or some other // reference. So if the immediate Value comes from a width of 1 byte, - // InstSize, we will not guess it is an address of a symbol. Because in + // OpSize, we will not guess it is an address of a symbol. Because in // object files assembled starting at address 0 this usually leads to // incorrect symbolication. - if (!SymbolLookUp || (InstSize == 1 && !IsBranch)) + if (!SymbolLookUp || (OpSize == 1 && !IsBranch)) return false; uint64_t ReferenceType; diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp --- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -749,7 +749,7 @@ ImmVal |= ~((1LL << 19) - 1); if (!Decoder->tryAddingSymbolicOperand( - Inst, ImmVal * 4, Addr, Inst.getOpcode() != AArch64::LDRXl, 0, 4)) + Inst, ImmVal * 4, Addr, Inst.getOpcode() != AArch64::LDRXl, 0, 0, 4)) Inst.addOperand(MCOperand::createImm(ImmVal)); return Success; } @@ -1030,7 +1030,7 @@ } DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder); - if (!Decoder->tryAddingSymbolicOperand(Inst, offset, Addr, Fail, 0, 4)) + if (!Decoder->tryAddingSymbolicOperand(Inst, offset, Addr, Fail, 0, 0, 4)) Inst.addOperand(MCOperand::createImm(offset)); return Success; } @@ -1640,7 +1640,7 @@ imm |= ~((1LL << 21) - 1); DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder); - if (!Decoder->tryAddingSymbolicOperand(Inst, imm, Addr, Fail, 0, 4)) + if (!Decoder->tryAddingSymbolicOperand(Inst, imm, Addr, Fail, 0, 0, 4)) Inst.addOperand(MCOperand::createImm(imm)); return Success; @@ -1675,7 +1675,7 @@ DecodeGPR32spRegisterClass(Inst, Rn, Addr, Decoder); } - if (!Decoder->tryAddingSymbolicOperand(Inst, Imm, Addr, Fail, 0, 4)) + if (!Decoder->tryAddingSymbolicOperand(Inst, Imm, Addr, Fail, 0, 0, 4)) Inst.addOperand(MCOperand::createImm(ImmVal)); Inst.addOperand(MCOperand::createImm(12 * ShifterVal)); return Success; @@ -1690,7 +1690,7 @@ if (imm & (1 << (26 - 1))) imm |= ~((1LL << 26) - 1); - if (!Decoder->tryAddingSymbolicOperand(Inst, imm * 4, Addr, true, 0, 4)) + if (!Decoder->tryAddingSymbolicOperand(Inst, imm * 4, Addr, true, 0, 0, 4)) Inst.addOperand(MCOperand::createImm(imm)); return Success; @@ -1742,7 +1742,7 @@ else DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder); Inst.addOperand(MCOperand::createImm(bit)); - if (!Decoder->tryAddingSymbolicOperand(Inst, dst * 4, Addr, true, 0, 4)) + if (!Decoder->tryAddingSymbolicOperand(Inst, dst * 4, Addr, true, 0, 0, 4)) Inst.addOperand(MCOperand::createImm(dst)); return Success; diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h b/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h --- a/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h +++ b/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h @@ -29,7 +29,8 @@ bool tryAddingSymbolicOperand(MCInst &MI, raw_ostream &CommentStream, int64_t Value, uint64_t Address, bool IsBranch, - uint64_t Offset, uint64_t InstSize) override; + uint64_t Offset, uint64_t OpSize, + uint64_t InstSize) override; }; } // namespace llvm diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp --- a/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp +++ b/llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp @@ -60,7 +60,7 @@ /// an operand to the MCInst and Fail otherwise. bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand( MCInst &MI, raw_ostream &CommentStream, int64_t Value, uint64_t Address, - bool IsBranch, uint64_t Offset, uint64_t InstSize) { + bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) { if (!SymbolLookUp) return false; // FIXME: This method shares a lot of code with @@ -73,8 +73,8 @@ SymbolicOp.Value = Value; uint64_t ReferenceType; const char *ReferenceName; - if (!GetOpInfo || - !GetOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) { + if (!GetOpInfo || !GetOpInfo(DisInfo, Address, 0 /* Offset */, OpSize, + InstSize, 1, &SymbolicOp)) { if (IsBranch) { ReferenceType = LLVMDisassembler_ReferenceType_In_Branch; const char *Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType, diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -215,8 +215,8 @@ : MCSymbolizer(Ctx, std::move(RelInfo)), DisInfo(disInfo) {} bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, - int64_t Value, uint64_t Address, - bool IsBranch, uint64_t Offset, + int64_t Value, uint64_t Address, bool IsBranch, + uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override; void tryAddingPcLoadReferenceComment(raw_ostream &cStream, diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -82,7 +82,7 @@ APInt SignedOffset(18, Imm * 4, true); int64_t Offset = (SignedOffset.sext(64) + 4 + Addr).getSExtValue(); - if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2)) + if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2, 0)) return MCDisassembler::Success; return addOperand(Inst, MCOperand::createImm(Imm)); } @@ -1890,10 +1890,10 @@ //===----------------------------------------------------------------------===// // Try to find symbol name for specified label -bool AMDGPUSymbolizer::tryAddingSymbolicOperand(MCInst &Inst, - raw_ostream &/*cStream*/, int64_t Value, - uint64_t /*Address*/, bool IsBranch, - uint64_t /*Offset*/, uint64_t /*InstSize*/) { +bool AMDGPUSymbolizer::tryAddingSymbolicOperand( + MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value, + uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/, + uint64_t /*OpSize*/, uint64_t /*InstSize*/) { if (!IsBranch) { return false; diff --git a/llvm/lib/Target/ARC/Disassembler/ARCDisassembler.cpp b/llvm/lib/Target/ARC/Disassembler/ARCDisassembler.cpp --- a/llvm/lib/Target/ARC/Disassembler/ARCDisassembler.cpp +++ b/llvm/lib/Target/ARC/Disassembler/ARCDisassembler.cpp @@ -181,7 +181,7 @@ const MCDisassembler *Decoder) { static const uint64_t AtLeast = 2; return (nullptr != Decoder && Decoder->tryAddingSymbolicOperand( - Inst, Value, Address, true, 0, AtLeast)); + Inst, Value, Address, true, 0, AtLeast, 0)); } static void DecodeSymbolicOperandOff(MCInst &Inst, uint64_t Address, diff --git a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp --- a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -835,7 +835,8 @@ const MCDisassembler *Decoder) { // FIXME: Does it make sense for value to be negative? return Decoder->tryAddingSymbolicOperand(MI, (uint32_t)Value, Address, - isBranch, /* Offset */ 0, InstSize); + isBranch, /* Offset */ 0, + /* OpSize */ 0, InstSize); } /// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being diff --git a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp --- a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp +++ b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp @@ -768,7 +768,8 @@ Bits = 15; uint64_t FullValue = fullValue(Disassembler, MI, SignExtend64(tmp, Bits)); uint32_t Extended = FullValue + Address; - if (!Disassembler.tryAddingSymbolicOperand(MI, Extended, Address, true, 0, 4)) + if (!Disassembler.tryAddingSymbolicOperand(MI, Extended, Address, true, 0, 0, + 4)) HexagonMCInstrInfo::addConstant(MI, Extended, Disassembler.getContext()); return MCDisassembler::Success; } diff --git a/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp b/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp --- a/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp +++ b/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp @@ -215,7 +215,7 @@ uint64_t Width, MCInst &MI, const MCDisassembler *Decoder) { return Decoder->tryAddingSymbolicOperand(MI, Value, Address, IsBranch, Offset, - Width); + Width, /* InstSize */ 0); } static DecodeStatus decodeBranch(MCInst &MI, unsigned Insn, uint64_t Address, diff --git a/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp b/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp --- a/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp +++ b/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp @@ -509,7 +509,7 @@ uint64_t Width, MCInst &MI, const MCDisassembler *Decoder) { return Decoder->tryAddingSymbolicOperand(MI, Value, Address, isBranch, Offset, - Width); + Width, /* InstSize */ 0); } static DecodeStatus DecodeCall(MCInst &MI, unsigned insn, uint64_t Address, diff --git a/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp --- a/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp +++ b/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp @@ -75,7 +75,7 @@ uint64_t Width, MCInst &MI, const MCDisassembler *Decoder) { return Decoder->tryAddingSymbolicOperand(MI, Value, Address, isBranch, Offset, - Width); + Width, /* InstSize */ 0); } static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo, diff --git a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp --- a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -1874,8 +1874,7 @@ uint64_t pcrel = 0; if (type == TYPE_REL) { isBranch = true; - pcrel = insn.startLocation + - insn.immediateOffset + insn.immediateSize; + pcrel = insn.startLocation + insn.length; switch (operand.encoding) { default: break; @@ -1950,9 +1949,9 @@ break; } - if (!Dis->tryAddingSymbolicOperand(mcInst, immediate + pcrel, - insn.startLocation, isBranch, - insn.immediateOffset, insn.immediateSize)) + if (!Dis->tryAddingSymbolicOperand( + mcInst, immediate + pcrel, insn.startLocation, isBranch, + insn.immediateOffset, insn.immediateSize, insn.length)) mcInst.addOperand(MCOperand::createImm(immediate)); if (type == TYPE_MOFFS) { @@ -2089,8 +2088,7 @@ return true; } if (insn.mode == MODE_64BIT){ - pcrel = insn.startLocation + - insn.displacementOffset + insn.displacementSize; + pcrel = insn.startLocation + insn.length; Dis->tryAddingPcLoadReferenceComment(insn.displacement + pcrel, insn.startLocation + insn.displacementOffset); @@ -2153,9 +2151,13 @@ mcInst.addOperand(baseReg); mcInst.addOperand(scaleAmount); mcInst.addOperand(indexReg); + + const uint8_t dispSize = + (insn.eaDisplacement == EA_DISP_NONE) ? 0 : insn.displacementSize; + if (!Dis->tryAddingSymbolicOperand( mcInst, insn.displacement + pcrel, insn.startLocation, false, - insn.displacementOffset, insn.displacementSize)) + insn.displacementOffset, dispSize, insn.length)) mcInst.addOperand(displacement); mcInst.addOperand(segmentReg); return false; diff --git a/llvm/tools/llvm-objdump/MachODump.cpp b/llvm/tools/llvm-objdump/MachODump.cpp --- a/llvm/tools/llvm-objdump/MachODump.cpp +++ b/llvm/tools/llvm-objdump/MachODump.cpp @@ -2608,7 +2608,8 @@ // value of TagType is currently 1 (for the LLVMOpInfo1 struct). If symbolic // information is returned then this function returns 1 else it returns 0. static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset, - uint64_t Size, int TagType, void *TagBuf) { + uint64_t OpSize, uint64_t InstSize, int TagType, + void *TagBuf) { struct DisassembleInfo *info = (struct DisassembleInfo *)DisInfo; struct LLVMOpInfo1 *op_info = (struct LLVMOpInfo1 *)TagBuf; uint64_t value = op_info->Value; @@ -2625,7 +2626,7 @@ unsigned int Arch = info->O->getArch(); if (Arch == Triple::x86) { - if (Size != 1 && Size != 2 && Size != 4 && Size != 0) + if (OpSize != 1 && OpSize != 2 && OpSize != 4 && OpSize != 0) return 0; if (info->O->getHeader().filetype != MachO::MH_OBJECT) { // TODO: @@ -2705,7 +2706,7 @@ return 0; } if (Arch == Triple::x86_64) { - if (Size != 1 && Size != 2 && Size != 4 && Size != 0) + if (OpSize != 1 && OpSize != 2 && OpSize != 4 && OpSize != 0) return 0; // For non MH_OBJECT types, like MH_KEXT_BUNDLE, Search the external // relocation entries of a linked image (if any) for an entry that matches @@ -2737,7 +2738,7 @@ // adds the Pc. But for x86_64 external relocation entries the Value // is the offset from the external symbol. if (info->O->getAnyRelocationPCRel(RE)) - op_info->Value -= Pc + Offset + Size; + op_info->Value -= Pc + InstSize; const char *name = unwrapOrError(Symbol.getName(), info->O->getFileName()).data(); op_info->AddSymbol.Present = 1; @@ -2775,7 +2776,7 @@ // adds the Pc. But for x86_64 external relocation entries the Value // is the offset from the external symbol. if (info->O->getAnyRelocationPCRel(RE)) - op_info->Value -= Pc + Offset + Size; + op_info->Value -= Pc + InstSize; const char *name = unwrapOrError(Symbol.getName(), info->O->getFileName()).data(); unsigned Type = info->O->getAnyRelocationType(RE); @@ -2803,7 +2804,7 @@ return 0; } if (Arch == Triple::arm) { - if (Offset != 0 || (Size != 4 && Size != 2)) + if (Offset != 0 || (InstSize != 4 && InstSize != 2)) return 0; if (info->O->getHeader().filetype != MachO::MH_OBJECT) { // TODO: @@ -2940,7 +2941,7 @@ return 1; } if (Arch == Triple::aarch64) { - if (Offset != 0 || Size != 4) + if (Offset != 0 || InstSize != 4) return 0; if (info->O->getHeader().filetype != MachO::MH_OBJECT) { // TODO: diff --git a/llvm/unittests/MC/X86/CMakeLists.txt b/llvm/unittests/MC/X86/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/llvm/unittests/MC/X86/CMakeLists.txt @@ -0,0 +1,12 @@ +set(LLVM_LINK_COMPONENTS + MC + MCDisassembler + Target + X86Desc + X86Disassembler + X86Info + ) + +add_llvm_unittest(X86MCTests + X86MCDisassemblerTest.cpp + ) diff --git a/llvm/unittests/MC/X86/X86MCDisassemblerTest.cpp b/llvm/unittests/MC/X86/X86MCDisassemblerTest.cpp new file mode 100644 --- /dev/null +++ b/llvm/unittests/MC/X86/X86MCDisassemblerTest.cpp @@ -0,0 +1,146 @@ +//===- X86MCDisassemblerTest.cpp - Tests for X86 MCDisassembler -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/MC/MCDisassembler/MCSymbolizer.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/TargetSelect.h" +#include "gtest/gtest.h" + +using namespace llvm; + +namespace { + +struct Context { + const char *TripleName = "x86_64-unknown-elf"; + std::unique_ptr MRI; + std::unique_ptr MAI; + std::unique_ptr Ctx; + std::unique_ptr STI; + std::unique_ptr DisAsm; + + Context() { + LLVMInitializeX86TargetInfo(); + LLVMInitializeX86TargetMC(); + LLVMInitializeX86Disassembler(); + + // If we didn't build x86, do not run the test. + std::string Error; + const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error); + if (!TheTarget) + return; + + MRI.reset(TheTarget->createMCRegInfo(TripleName)); + MAI.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions())); + STI.reset(TheTarget->createMCSubtargetInfo(TripleName, "", "")); + Ctx = std::make_unique(Triple(TripleName), MAI.get(), MRI.get(), + STI.get()); + + DisAsm.reset(TheTarget->createMCDisassembler(*STI, *Ctx)); + } + + operator bool() { return Ctx.get(); } + operator MCContext &() { return *Ctx; }; +}; + +Context &getContext() { + static Context Ctxt; + return Ctxt; +} + +class X86MCSymbolizerTest : public MCSymbolizer { +public: + X86MCSymbolizerTest(MCContext &MC) : MCSymbolizer(MC, nullptr) {} + ~X86MCSymbolizerTest() {} + + struct OpInfo { + int64_t Value = 0; + uint64_t Offset = 0; + uint64_t Size; + }; + std::vector Operands; + uint64_t InstructionSize = 0; + + void reset() { + Operands.clear(); + InstructionSize = 0; + } + + bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &CStream, + int64_t Value, uint64_t Address, bool IsBranch, + uint64_t Offset, uint64_t OpSize, + uint64_t InstSize) override { + Operands.push_back({Value, Offset, OpSize}); + InstructionSize = InstSize; + return false; + } + + void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, + uint64_t Address) override {} +}; + +} // namespace + +TEST(X86Disassembler, X86MCSymbolizerTest) { + X86MCSymbolizerTest *TestSymbolizer = new X86MCSymbolizerTest(getContext()); + getContext().DisAsm->setSymbolizer( + std::unique_ptr(TestSymbolizer)); + + MCDisassembler::DecodeStatus Status; + MCInst Inst; + uint64_t InstSize; + + auto checkBytes = [&](ArrayRef Bytes) { + TestSymbolizer->reset(); + Status = + getContext().DisAsm->getInstruction(Inst, InstSize, Bytes, 0, nulls()); + ASSERT_TRUE(Status == MCDisassembler::Success); + EXPECT_EQ(TestSymbolizer->InstructionSize, InstSize); + }; + + auto checkOperand = [&](size_t OpNo, int64_t Value, uint64_t Offset, + uint64_t Size) { + ASSERT_TRUE(TestSymbolizer->Operands.size() > OpNo); + EXPECT_EQ(TestSymbolizer->Operands[OpNo].Value, Value); + EXPECT_EQ(TestSymbolizer->Operands[OpNo].Offset, Offset); + EXPECT_EQ(TestSymbolizer->Operands[OpNo].Size, Size); + }; + + // movq $0x80000, 0x80000 + checkBytes( + {0x48, 0xc7, 0x04, 0x25, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00}); + checkOperand(0, 0x80000, 4, 4); + checkOperand(1, 0x80000, 8, 4); + + // movq $0x2a, 0x123(%rax,%r14,8) + checkBytes( + {0x4a, 0xc7, 0x84, 0xf0, 0x23, 0x01, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00}); + checkOperand(0, 291, 4, 4); + checkOperand(1, 42, 8, 4); + + // movq $0xffffffffffffefe8, -0x1(%rip) + // Test that the value of the rip-relative operand is set correctly. + // The instruction address is 0 and the size is 12 bytes. + checkBytes( + {0x48, 0xc7, 0x05, 0xff, 0xff, 0xff, 0xff, 0xe8, 0xef, 0xff, 0xff}); + checkOperand(0, /*next instr address*/ 11 - /*disp*/ 1, 3, 4); + checkOperand(1, 0xffffffffffffefe8, 7, 4); + + // movq $0xfffffffffffffef5, (%r12) + // Test that the displacement operand has a size of 0, since it is not + // explicitly specified in the instruction. + checkBytes({0x49, 0xc7, 0x04, 0x24, 0xf5, 0xfe, 0xff, 0xff}); + checkOperand(0, 0, 4, 0); + checkOperand(1, 0xfffffffffffffef5, 4, 4); +}