Index: lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp =================================================================== --- lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp +++ lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp @@ -13,10 +13,10 @@ // //===----------------------------------------------------------------------===// -#include /* for va_*() */ -#include /* for vsnprintf() */ -#include /* for exit() */ -#include /* for memset() */ +#include /* for va_*() */ +#include /* for vsnprintf() */ +#include /* for exit() */ +#include /* for memset() */ #include "X86DisassemblerDecoder.h" @@ -48,12 +48,16 @@ #include "X86GenDisassemblerTables.inc" #ifndef NDEBUG -#define debug(s) do { Debug(__FILE__, __LINE__, s); } while (0) +#define debug(s) \ + do { \ + Debug(__FILE__, __LINE__, s); \ + } while (0) #else -#define debug(s) do { } while (0) +#define debug(s) \ + do { \ + } while (0) #endif - /* * contextForAttrs - Client for the instruction context table. Takes a set of * attributes and returns the appropriate decode context. @@ -77,10 +81,9 @@ * ModR/M extensions and escapes. * @return - true if the ModR/M byte is required, false otherwise. */ -static int modRMRequired(OpcodeType type, - InstructionContext insnContext, +static int modRMRequired(OpcodeType type, InstructionContext insnContext, uint16_t opcode) { - const struct ContextDecision* decision = nullptr; + const struct ContextDecision *decision = nullptr; switch (type) { case ONEBYTE: @@ -106,8 +109,9 @@ break; } - return decision->opcodeDecisions[insnContext].modRMDecisions[opcode]. - modrm_type != MODRM_ONEENTRY; + return decision->opcodeDecisions[insnContext] + .modRMDecisions[opcode] + .modrm_type != MODRM_ONEENTRY; } /* @@ -120,11 +124,9 @@ * @param modRM - The ModR/M byte if required, or any value if not. * @return - The UID of the instruction, or 0 on failure. */ -static InstrUID decode(OpcodeType type, - InstructionContext insnContext, - uint8_t opcode, - uint8_t modRM) { - const struct ModRMDecision* dec = nullptr; +static InstrUID decode(OpcodeType type, InstructionContext insnContext, + uint8_t opcode, uint8_t modRM) { + const struct ModRMDecision *dec = nullptr; switch (type) { case ONEBYTE: @@ -158,18 +160,18 @@ return modRMTable[dec->instructionIDs]; case MODRM_SPLITRM: if (modFromModRM(modRM) == 0x3) - return modRMTable[dec->instructionIDs+1]; + return modRMTable[dec->instructionIDs + 1]; return modRMTable[dec->instructionIDs]; case MODRM_SPLITREG: if (modFromModRM(modRM) == 0x3) - return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8]; - return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)]; + return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3) + 8]; + return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)]; case MODRM_SPLITMISC: if (modFromModRM(modRM) == 0x3) - return modRMTable[dec->instructionIDs+(modRM & 0x3f)+8]; - return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)]; + return modRMTable[dec->instructionIDs + (modRM & 0x3f) + 8]; + return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)]; case MODRM_FULL: - return modRMTable[dec->instructionIDs+modRM]; + return modRMTable[dec->instructionIDs + modRM]; } } @@ -195,7 +197,7 @@ * with the data read. * @return - 0 if the read was successful; nonzero otherwise. */ -static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) { +static int consumeByte(struct InternalInstruction *insn, uint8_t *byte) { int ret = insn->reader(insn->readerArg, byte, insn->readerCursor); if (!ret) @@ -211,30 +213,29 @@ * @param byte - See consumeByte(). * @return - See consumeByte(). */ -static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) { +static int lookAtByte(struct InternalInstruction *insn, uint8_t *byte) { return insn->reader(insn->readerArg, byte, insn->readerCursor); } -static void unconsumeByte(struct InternalInstruction* insn) { +static void unconsumeByte(struct InternalInstruction *insn) { insn->readerCursor--; } -#define CONSUME_FUNC(name, type) \ - static int name(struct InternalInstruction* insn, type* ptr) { \ - type combined = 0; \ - unsigned offset; \ - for (offset = 0; offset < sizeof(type); ++offset) { \ - uint8_t byte; \ - int ret = insn->reader(insn->readerArg, \ - &byte, \ - insn->readerCursor + offset); \ - if (ret) \ - return ret; \ - combined = combined | ((uint64_t)byte << (offset * 8)); \ - } \ - *ptr = combined; \ - insn->readerCursor += sizeof(type); \ - return 0; \ +#define CONSUME_FUNC(name, type) \ + static int name(struct InternalInstruction *insn, type *ptr) { \ + type combined = 0; \ + unsigned offset; \ + for (offset = 0; offset < sizeof(type); ++offset) { \ + uint8_t byte; \ + int ret = \ + insn->reader(insn->readerArg, &byte, insn->readerCursor + offset); \ + if (ret) \ + return ret; \ + combined = combined | ((uint64_t)byte << (offset * 8)); \ + } \ + *ptr = combined; \ + insn->readerCursor += sizeof(type); \ + return 0; \ } /* @@ -262,8 +263,7 @@ * @param format - See printf(). * @param ... - See printf(). */ -static void dbgprintf(struct InternalInstruction* insn, - const char* format, +static void dbgprintf(struct InternalInstruction *insn, const char *format, ...) { char buffer[256]; va_list ap; @@ -289,10 +289,8 @@ * @param location - The location where the prefix is located (in the address * space of the instruction's reader). */ -static void setPrefixPresent(struct InternalInstruction* insn, - uint8_t prefix, - uint64_t location) -{ +static void setPrefixPresent(struct InternalInstruction *insn, uint8_t prefix, + uint64_t location) { insn->prefixPresent[prefix] = 1; insn->prefixLocations[prefix] = location; } @@ -306,12 +304,10 @@ * @param location - The location to query. * @return - Whether the prefix is at that location. */ -static bool isPrefixAtLocation(struct InternalInstruction* insn, - uint8_t prefix, - uint64_t location) -{ +static bool isPrefixAtLocation(struct InternalInstruction *insn, uint8_t prefix, + uint64_t location) { return insn->prefixPresent[prefix] == 1 && - insn->prefixLocations[prefix] == location; + insn->prefixLocations[prefix] == location; } /* @@ -323,7 +319,7 @@ * @return - 0 if the instruction could be read until the end of the prefix * bytes, and no prefixes conflicted; nonzero otherwise. */ -static int readPrefixes(struct InternalInstruction* insn) { +static int readPrefixes(struct InternalInstruction *insn) { bool isPrefix = true; bool prefixGroups[4] = { false }; uint64_t prefixLocation; @@ -338,7 +334,8 @@ while (isPrefix) { prefixLocation = insn->readerCursor; - /* If we fail reading prefixes, just stop here and let the opcode reader deal with it */ + /* If we fail reading prefixes, just stop here and let the opcode reader + * deal with it */ if (consumeByte(insn, &byte)) break; @@ -349,10 +346,8 @@ if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) break; - if (insn->readerCursor - 1 == insn->startLocation - && (byte == 0xf2 || byte == 0xf3) - && !lookAtByte(insn, &nextByte)) - { + if (insn->readerCursor - 1 == insn->startLocation && + (byte == 0xf2 || byte == 0xf3) && !lookAtByte(insn, &nextByte)) { /* * If the byte is 0xf2 or 0xf3, and any of the following conditions are * met: @@ -362,7 +357,7 @@ */ if ((byte == 0xf2 || byte == 0xf3) && ((nextByte == 0xf0) | - ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) + ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) insn->xAcquireRelease = true; /* * Also if the byte is 0xf3, and the following condition is met: @@ -370,9 +365,8 @@ * "mov mem, imm" (opcode 0xc6/0xc7) instructions. * then it should be disassembled as an xrelease not rep. */ - if (byte == 0xf3 && - (nextByte == 0x88 || nextByte == 0x89 || - nextByte == 0xc6 || nextByte == 0xc7)) + if (byte == 0xf3 && (nextByte == 0x88 || nextByte == 0x89 || + nextByte == 0xc6 || nextByte == 0xc7)) insn->xAcquireRelease = true; if (insn->mode == MODE_64BIT && (nextByte & 0xf0) == 0x40) { if (consumeByte(insn, &nextByte)) @@ -386,20 +380,20 @@ } switch (byte) { - case 0xf0: /* LOCK */ - case 0xf2: /* REPNE/REPNZ */ - case 0xf3: /* REP or REPE/REPZ */ + case 0xf0: /* LOCK */ + case 0xf2: /* REPNE/REPNZ */ + case 0xf3: /* REP or REPE/REPZ */ if (prefixGroups[0]) dbgprintf(insn, "Redundant Group 1 prefix"); prefixGroups[0] = true; setPrefixPresent(insn, byte, prefixLocation); break; - case 0x2e: /* CS segment override -OR- Branch not taken */ - case 0x36: /* SS segment override -OR- Branch taken */ - case 0x3e: /* DS segment override */ - case 0x26: /* ES segment override */ - case 0x64: /* FS segment override */ - case 0x65: /* GS segment override */ + case 0x2e: /* CS segment override -OR- Branch not taken */ + case 0x36: /* SS segment override -OR- Branch taken */ + case 0x3e: /* DS segment override */ + case 0x26: /* ES segment override */ + case 0x64: /* FS segment override */ + case 0x65: /* GS segment override */ switch (byte) { case 0x2e: insn->segmentOverride = SEG_OVERRIDE_CS; @@ -428,21 +422,21 @@ prefixGroups[1] = true; setPrefixPresent(insn, byte, prefixLocation); break; - case 0x66: /* Operand-size override */ + case 0x66: /* Operand-size override */ if (prefixGroups[2]) dbgprintf(insn, "Redundant Group 3 prefix"); prefixGroups[2] = true; hasOpSize = true; setPrefixPresent(insn, byte, prefixLocation); break; - case 0x67: /* Address-size override */ + case 0x67: /* Address-size override */ if (prefixGroups[3]) dbgprintf(insn, "Redundant Group 4 prefix"); prefixGroups[3] = true; hasAdSize = true; setPrefixPresent(insn, byte, prefixLocation); break; - default: /* Not a prefix byte */ + default: /* Not a prefix byte */ isPrefix = false; break; } @@ -467,7 +461,7 @@ } if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) && - ((~byte1 & 0xc) == 0xc) && ((byte2 & 0x4) == 0x4)) { + ((~byte1 & 0xc) == 0xc) && ((byte2 & 0x4) == 0x4)) { insn->vectorExtensionType = TYPE_EVEX; } else { unconsumeByte(insn); /* unconsume byte1 */ @@ -489,16 +483,16 @@ /* We simulate the REX prefix for simplicity's sake */ if (insn->mode == MODE_64BIT) { - insn->rexPrefix = 0x40 - | (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3) - | (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2) - | (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1) - | (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0); + insn->rexPrefix = 0x40 | + (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3) | + (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2) | + (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1) | + (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0); } dbgprintf(insn, "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx", - insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], - insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]); + insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1], + insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]); } } else if (byte == 0xc4) { uint8_t byte1; @@ -524,11 +518,11 @@ /* We simulate the REX prefix for simplicity's sake */ if (insn->mode == MODE_64BIT) { - insn->rexPrefix = 0x40 - | (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3) - | (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2) - | (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1) - | (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0); + insn->rexPrefix = 0x40 | + (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3) | + (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2) | + (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1) | + (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0); } dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", @@ -554,8 +548,8 @@ consumeByte(insn, &insn->vectorExtensionPrefix[1]); if (insn->mode == MODE_64BIT) { - insn->rexPrefix = 0x40 - | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2); + insn->rexPrefix = + 0x40 | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2); } switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) { @@ -567,8 +561,7 @@ } dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", - insn->vectorExtensionPrefix[0], - insn->vectorExtensionPrefix[1]); + insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1]); } } else if (byte == 0x8f) { uint8_t byte1; @@ -594,11 +587,11 @@ /* We simulate the REX prefix for simplicity's sake */ if (insn->mode == MODE_64BIT) { - insn->rexPrefix = 0x40 - | (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3) - | (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2) - | (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1) - | (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0); + insn->rexPrefix = 0x40 | + (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3) | + (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2) | + (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1) | + (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0); } switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) { @@ -638,31 +631,31 @@ } if (insn->mode == MODE_16BIT) { - insn->registerSize = (hasOpSize ? 4 : 2); - insn->addressSize = (hasAdSize ? 4 : 2); - insn->displacementSize = (hasAdSize ? 4 : 2); - insn->immediateSize = (hasOpSize ? 4 : 2); + insn->registerSize = (hasOpSize ? 4 : 2); + insn->addressSize = (hasAdSize ? 4 : 2); + insn->displacementSize = (hasAdSize ? 4 : 2); + insn->immediateSize = (hasOpSize ? 4 : 2); } else if (insn->mode == MODE_32BIT) { - insn->registerSize = (hasOpSize ? 2 : 4); - insn->addressSize = (hasAdSize ? 2 : 4); - insn->displacementSize = (hasAdSize ? 2 : 4); - insn->immediateSize = (hasOpSize ? 2 : 4); + insn->registerSize = (hasOpSize ? 2 : 4); + insn->addressSize = (hasAdSize ? 2 : 4); + insn->displacementSize = (hasAdSize ? 2 : 4); + insn->immediateSize = (hasOpSize ? 2 : 4); } else if (insn->mode == MODE_64BIT) { if (insn->rexPrefix && wFromREX(insn->rexPrefix)) { - insn->registerSize = 8; - insn->addressSize = (hasAdSize ? 4 : 8); - insn->displacementSize = 4; - insn->immediateSize = 4; + insn->registerSize = 8; + insn->addressSize = (hasAdSize ? 4 : 8); + insn->displacementSize = 4; + insn->immediateSize = 4; } else if (insn->rexPrefix) { - insn->registerSize = (hasOpSize ? 2 : 4); - insn->addressSize = (hasAdSize ? 4 : 8); - insn->displacementSize = (hasOpSize ? 2 : 4); - insn->immediateSize = (hasOpSize ? 2 : 4); + insn->registerSize = (hasOpSize ? 2 : 4); + insn->addressSize = (hasAdSize ? 4 : 8); + insn->displacementSize = (hasOpSize ? 2 : 4); + insn->immediateSize = (hasOpSize ? 2 : 4); } else { - insn->registerSize = (hasOpSize ? 2 : 4); - insn->addressSize = (hasAdSize ? 4 : 8); - insn->displacementSize = (hasOpSize ? 2 : 4); - insn->immediateSize = (hasOpSize ? 2 : 4); + insn->registerSize = (hasOpSize ? 2 : 4); + insn->addressSize = (hasAdSize ? 4 : 8); + insn->displacementSize = (hasOpSize ? 2 : 4); + insn->immediateSize = (hasOpSize ? 2 : 4); } } @@ -676,7 +669,7 @@ * @param insn - The instruction whose opcode is to be read. * @return - 0 if the opcode could be read successfully; nonzero otherwise. */ -static int readOpcode(struct InternalInstruction* insn) { +static int readOpcode(struct InternalInstruction *insn) { /* Determine the length of the primary opcode */ uint8_t current; @@ -778,7 +771,7 @@ return 0; } -static int readModRM(struct InternalInstruction* insn); +static int readModRM(struct InternalInstruction *insn); /* * getIDWithAttrMask - Determines the ID of an instruction, consuming @@ -792,30 +785,25 @@ * @return - 0 if the ModR/M could be read when needed or was not * needed; nonzero otherwise. */ -static int getIDWithAttrMask(uint16_t* instructionID, - struct InternalInstruction* insn, +static int getIDWithAttrMask(uint16_t *instructionID, + struct InternalInstruction *insn, uint16_t attrMask) { bool hasModRMExtension; InstructionContext instructionClass = contextForAttrs(attrMask); - hasModRMExtension = modRMRequired(insn->opcodeType, - instructionClass, - insn->opcode); + hasModRMExtension = + modRMRequired(insn->opcodeType, instructionClass, insn->opcode); if (hasModRMExtension) { if (readModRM(insn)) return -1; - *instructionID = decode(insn->opcodeType, - instructionClass, - insn->opcode, - insn->modRM); + *instructionID = + decode(insn->opcodeType, instructionClass, insn->opcode, insn->modRM); } else { - *instructionID = decode(insn->opcodeType, - instructionClass, - insn->opcode, - 0); + *instructionID = + decode(insn->opcodeType, instructionClass, insn->opcode, 0); } return 0; @@ -828,7 +816,7 @@ * @param orig - The instruction that is not 16-bit * @param equiv - The instruction that is 16-bit */ -static bool is16BitEquivalent(const char* orig, const char* equiv) { +static bool is16BitEquivalent(const char *orig, const char *equiv) { off_t i; for (i = 0;; i++) { @@ -853,13 +841,13 @@ * * @param name - The instruction that is not 16-bit */ -static bool is64Bit(const char* name) { +static bool is64Bit(const char *name) { off_t i; for (i = 0;; ++i) { if (name[i] == '\0') return false; - if (name[i] == '6' && name[i+1] == '4') + if (name[i] == '6' && name[i + 1] == '4') return true; } } @@ -873,7 +861,7 @@ * @return - 0 if the ModR/M could be read when needed or was not needed; * nonzero otherwise. */ -static int getID(struct InternalInstruction* insn, const void *miiArg) { +static int getID(struct InternalInstruction *insn, const void *miiArg) { uint16_t attrMask; uint16_t instructionID; @@ -959,7 +947,8 @@ return -1; } } else { - if (insn->mode != MODE_16BIT && isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) + if (insn->mode != MODE_16BIT && + isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) attrMask |= ATTR_OPSIZE; else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation)) attrMask |= ATTR_ADSIZE; @@ -980,6 +969,47 @@ insn->opcode == 0xE3) attrMask ^= ATTR_ADSIZE; + /* + * In 64-bit mode all f64 superscripted opcodes ignore opcode size prefix + * CALL/JMP/JCC instructions need to ignore 0x66 and consume 4 bytes + */ + + if (insn->mode == MODE_64BIT && + isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) { + switch (insn->opcode) { + case 0xE8: + case 0xE9: + if (insn->opcodeType == + ONEBYTE) { // breaks psubsb and other mmx instructions otherwise + attrMask ^= ATTR_OPSIZE; + insn->immediateSize = 4; + insn->displacementSize = 4; + } + break; + case 0x82: + case 0x83: + case 0x84: + case 0x85: + case 0x86: + case 0x87: + case 0x88: + case 0x89: + case 0x8A: + case 0x8B: + case 0x8C: + case 0x8D: + case 0x8E: + case 0x8F: + if (insn->opcodeType == + TWOBYTE) { // breaks lea and three byte ops otherwise + attrMask ^= ATTR_OPSIZE; + insn->immediateSize = 4; + insn->displacementSize = 4; // otherwise not sign extended + } + break; + } + } + if (getIDWithAttrMask(&instructionID, insn, attrMask)) return -1; @@ -999,8 +1029,8 @@ wFromXOP3of3(insn->vectorExtensionPrefix[2]))) { uint16_t instructionIDWithREXW; - if (getIDWithAttrMask(&instructionIDWithREXW, - insn, attrMask | ATTR_REXW)) { + if (getIDWithAttrMask(&instructionIDWithREXW, insn, + attrMask | ATTR_REXW)) { insn->instructionID = instructionID; insn->spec = specifierForUID(instructionID); return 0; @@ -1058,8 +1088,7 @@ spec = specifierForUID(instructionID); - if (getIDWithAttrMask(&instructionIDWithOpsize, - insn, + if (getIDWithAttrMask(&instructionIDWithOpsize, insn, attrMask | ATTR_OPSIZE)) { /* * ModRM required with OpSize but not present; give up and return version @@ -1101,9 +1130,7 @@ /* Borrow opcode from one of the other XCHGar opcodes */ insn->opcode = 0x91; - if (getIDWithAttrMask(&instructionIDWithNewOpcode, - insn, - attrMask)) { + if (getIDWithAttrMask(&instructionIDWithNewOpcode, insn, attrMask)) { insn->opcode = 0x90; insn->instructionID = instructionID; @@ -1135,7 +1162,7 @@ * @param insn - The instruction whose SIB byte is to be read. * @return - 0 if the SIB byte was successfully read; nonzero otherwise. */ -static int readSIB(struct InternalInstruction* insn) { +static int readSIB(struct InternalInstruction *insn) { SIBIndex sibIndexBase = SIB_INDEX_NONE; SIBBase sibBaseBase = SIB_BASE_NONE; uint8_t index, base; @@ -1174,8 +1201,7 @@ break; default: insn->sibIndex = (SIBIndex)(sibIndexBase + index); - if (insn->sibIndex == SIB_INDEX_sib || - insn->sibIndex == SIB_INDEX_sib64) + if (insn->sibIndex == SIB_INDEX_sib || insn->sibIndex == SIB_INDEX_sib64) insn->sibIndex = SIB_INDEX_NONE; break; } @@ -1233,7 +1259,7 @@ * @return - 0 if the displacement byte was successfully read; nonzero * otherwise. */ -static int readDisplacement(struct InternalInstruction* insn) { +static int readDisplacement(struct InternalInstruction *insn) { int8_t d8; int16_t d16; int32_t d32; @@ -1278,7 +1304,7 @@ * @param insn - The instruction whose addressing information is to be read. * @return - 0 if the information was successfully read; nonzero otherwise. */ -static int readModRM(struct InternalInstruction* insn) { +static int readModRM(struct InternalInstruction *insn) { uint8_t mod, rm, reg; dbgprintf(insn, "readModRM()"); @@ -1290,9 +1316,9 @@ return -1; insn->consumedModRM = true; - mod = modFromModRM(insn->modRM); - rm = rmFromModRM(insn->modRM); - reg = regFromModRM(insn->modRM); + mod = modFromModRM(insn->modRM); + rm = rmFromModRM(insn->modRM); + reg = regFromModRM(insn->modRM); /* * This goes by insn->registerSize to pick the correct register, which messes @@ -1315,10 +1341,10 @@ } reg |= rFromREX(insn->rexPrefix) << 3; - rm |= bFromREX(insn->rexPrefix) << 3; + rm |= bFromREX(insn->rexPrefix) << 3; if (insn->vectorExtensionType == TYPE_EVEX) { reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4; - rm |= xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4; + rm |= xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4; } insn->reg = (Reg)(insn->regBase + reg); @@ -1369,9 +1395,8 @@ switch (rm) { case 0x14: case 0x4: - case 0xc: /* in case REXW.b is set */ - insn->eaBase = (insn->addressSize == 4 ? - EA_BASE_sib : EA_BASE_sib64); + case 0xc: /* in case REXW.b is set */ + insn->eaBase = (insn->addressSize == 4 ? EA_BASE_sib : EA_BASE_sib64); if (readSIB(insn) || readDisplacement(insn)) return -1; break; @@ -1388,13 +1413,13 @@ break; case 0x1: insn->displacementSize = 1; - /* FALLTHROUGH */ + /* FALLTHROUGH */ case 0x2: insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32); switch (rm) { case 0x14: case 0x4: - case 0xc: /* in case REXW.b is set */ + case 0xc: /* in case REXW.b is set */ insn->eaBase = EA_BASE_sib; if (readSIB(insn) || readDisplacement(insn)) return -1; @@ -1417,58 +1442,55 @@ return 0; } -#define GENERIC_FIXUP_FUNC(name, base, prefix) \ - static uint8_t name(struct InternalInstruction *insn, \ - OperandType type, \ - uint8_t index, \ - uint8_t *valid) { \ - *valid = 1; \ - switch (type) { \ - default: \ - debug("Unhandled register type"); \ - *valid = 0; \ - return 0; \ - case TYPE_Rv: \ - return base + index; \ - case TYPE_R8: \ - if (insn->rexPrefix && \ - index >= 4 && index <= 7) { \ - return prefix##_SPL + (index - 4); \ - } else { \ - return prefix##_AL + index; \ - } \ - case TYPE_R16: \ - return prefix##_AX + index; \ - case TYPE_R32: \ - return prefix##_EAX + index; \ - case TYPE_R64: \ - return prefix##_RAX + index; \ - case TYPE_XMM512: \ - return prefix##_ZMM0 + index; \ - case TYPE_XMM256: \ - return prefix##_YMM0 + index; \ - case TYPE_XMM128: \ - case TYPE_XMM64: \ - case TYPE_XMM32: \ - case TYPE_XMM: \ - return prefix##_XMM0 + index; \ - case TYPE_VK1: \ - case TYPE_VK8: \ - case TYPE_VK16: \ - if (index > 7) \ - *valid = 0; \ - return prefix##_K0 + index; \ - case TYPE_MM64: \ - return prefix##_MM0 + (index & 0x7); \ - case TYPE_SEGMENTREG: \ - if (index > 5) \ - *valid = 0; \ - return prefix##_ES + index; \ - case TYPE_DEBUGREG: \ - return prefix##_DR0 + index; \ - case TYPE_CONTROLREG: \ - return prefix##_CR0 + index; \ - } \ +#define GENERIC_FIXUP_FUNC(name, base, prefix) \ + static uint8_t name(struct InternalInstruction *insn, OperandType type, \ + uint8_t index, uint8_t *valid) { \ + *valid = 1; \ + switch (type) { \ + default: \ + debug("Unhandled register type"); \ + *valid = 0; \ + return 0; \ + case TYPE_Rv: \ + return base + index; \ + case TYPE_R8: \ + if (insn->rexPrefix && index >= 4 && index <= 7) { \ + return prefix##_SPL + (index - 4); \ + } else { \ + return prefix##_AL + index; \ + } \ + case TYPE_R16: \ + return prefix##_AX + index; \ + case TYPE_R32: \ + return prefix##_EAX + index; \ + case TYPE_R64: \ + return prefix##_RAX + index; \ + case TYPE_XMM512: \ + return prefix##_ZMM0 + index; \ + case TYPE_XMM256: \ + return prefix##_YMM0 + index; \ + case TYPE_XMM128: \ + case TYPE_XMM64: \ + case TYPE_XMM32: \ + case TYPE_XMM: \ + return prefix##_XMM0 + index; \ + case TYPE_VK1: \ + case TYPE_VK8: \ + case TYPE_VK16: \ + if (index > 7) \ + *valid = 0; \ + return prefix##_K0 + index; \ + case TYPE_MM64: \ + return prefix##_MM0 + (index & 0x7); \ + case TYPE_SEGMENTREG: \ + if (index > 5) \ + *valid = 0; \ + return prefix##_ES + index; \ + case TYPE_DEBUGREG: \ + return prefix##_DR0 + index; \ + case TYPE_CONTROLREG: \ + return prefix##_CR0 + index; \ + } \ } /* @@ -1484,8 +1506,8 @@ * field is valid for the register class; 0 if not. * @return - The proper value. */ -GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG) -GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG) +GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG) +GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG) /* * fixupReg - Consults an operand specifier to determine which of the @@ -1507,27 +1529,21 @@ debug("Expected a REG or R/M encoding in fixupReg"); return -1; case ENCODING_VVVV: - insn->vvvv = (Reg)fixupRegValue(insn, - (OperandType)op->type, - insn->vvvv, - &valid); + insn->vvvv = + (Reg)fixupRegValue(insn, (OperandType)op->type, insn->vvvv, &valid); if (!valid) return -1; break; case ENCODING_REG: - insn->reg = (Reg)fixupRegValue(insn, - (OperandType)op->type, - insn->reg - insn->regBase, - &valid); + insn->reg = (Reg)fixupRegValue(insn, (OperandType)op->type, + insn->reg - insn->regBase, &valid); if (!valid) return -1; break; CASE_ENCODING_RM: if (insn->eaBase >= insn->eaRegBase) { - insn->eaBase = (EABase)fixupRMValue(insn, - (OperandType)op->type, - insn->eaBase - insn->eaRegBase, - &valid); + insn->eaBase = (EABase)fixupRMValue( + insn, (OperandType)op->type, insn->eaBase - insn->eaRegBase, &valid); if (!valid) return -1; } @@ -1548,7 +1564,7 @@ * RAX. * @return - 0 on success; nonzero otherwise. */ -static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) { +static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size) { dbgprintf(insn, "readOpcodeRegister()"); if (size == 0) @@ -1556,30 +1572,28 @@ switch (size) { case 1: - insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) - | (insn->opcode & 7))); - if (insn->rexPrefix && - insn->opcodeRegister >= MODRM_REG_AL + 0x4 && + insn->opcodeRegister = (Reg)( + MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7))); + if (insn->rexPrefix && insn->opcodeRegister >= MODRM_REG_AL + 0x4 && insn->opcodeRegister < MODRM_REG_AL + 0x8) { - insn->opcodeRegister = (Reg)(MODRM_REG_SPL - + (insn->opcodeRegister - MODRM_REG_AL - 4)); + insn->opcodeRegister = + (Reg)(MODRM_REG_SPL + (insn->opcodeRegister - MODRM_REG_AL - 4)); } break; case 2: - insn->opcodeRegister = (Reg)(MODRM_REG_AX - + ((bFromREX(insn->rexPrefix) << 3) - | (insn->opcode & 7))); + insn->opcodeRegister = (Reg)( + MODRM_REG_AX + ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7))); break; case 4: - insn->opcodeRegister = (Reg)(MODRM_REG_EAX - + ((bFromREX(insn->rexPrefix) << 3) - | (insn->opcode & 7))); + insn->opcodeRegister = + (Reg)(MODRM_REG_EAX + + ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7))); break; case 8: - insn->opcodeRegister = (Reg)(MODRM_REG_RAX - + ((bFromREX(insn->rexPrefix) << 3) - | (insn->opcode & 7))); + insn->opcodeRegister = + (Reg)(MODRM_REG_RAX + + ((bFromREX(insn->rexPrefix) << 3) | (insn->opcode & 7))); break; } @@ -1595,7 +1609,7 @@ * @return - 0 if the immediate was successfully consumed; nonzero * otherwise. */ -static int readImmediate(struct InternalInstruction* insn, uint8_t size) { +static int readImmediate(struct InternalInstruction *insn, uint8_t size) { uint8_t imm8; uint16_t imm16; uint32_t imm32; @@ -1649,7 +1663,7 @@ * @return - 0 if the vvvv was successfully consumed; nonzero * otherwise. */ -static int readVVVV(struct InternalInstruction* insn) { +static int readVVVV(struct InternalInstruction *insn) { dbgprintf(insn, "readVVVV()"); int vvvv; @@ -1679,7 +1693,7 @@ * @param insn - The instruction whose opcode field is to be read. * @return - 0 on success; nonzero otherwise. */ -static int readMaskRegister(struct InternalInstruction* insn) { +static int readMaskRegister(struct InternalInstruction *insn) { dbgprintf(insn, "readMaskRegister()"); if (insn->vectorExtensionType != TYPE_EVEX) @@ -1697,7 +1711,7 @@ * @param insn - The instruction whose operands are to be read and interpreted. * @return - 0 if all operands could be read; nonzero otherwise. */ -static int readOperands(struct InternalInstruction* insn) { +static int readOperands(struct InternalInstruction *insn) { int hasVVVV, needVVVV; int sawRegImm = 0; @@ -1737,14 +1751,13 @@ /* Saw a register immediate so don't read again and instead split the previous immediate. FIXME: This is a hack. */ insn->immediates[insn->numImmediatesConsumed] = - insn->immediates[insn->numImmediatesConsumed - 1] & 0xf; + insn->immediates[insn->numImmediatesConsumed - 1] & 0xf; ++insn->numImmediatesConsumed; break; } if (readImmediate(insn, 1)) return -1; - if (Op.type == TYPE_XMM128 || - Op.type == TYPE_XMM256) + if (Op.type == TYPE_XMM128 || Op.type == TYPE_XMM256) sawRegImm = 1; break; case ENCODING_IW: @@ -1809,7 +1822,8 @@ } /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */ - if (needVVVV) return -1; + if (needVVVV) + return -1; return 0; } @@ -1849,19 +1863,16 @@ insn->mode = mode; insn->numImmediatesConsumed = 0; - if (readPrefixes(insn) || - readOpcode(insn) || - getID(insn, miiArg) || - insn->instructionID == 0 || - readOperands(insn)) + if (readPrefixes(insn) || readOpcode(insn) || getID(insn, miiArg) || + insn->instructionID == 0 || readOperands(insn)) return -1; insn->operands = x86OperandSets[insn->spec->operands]; insn->length = insn->readerCursor - insn->startLocation; - dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu", - startLoc, insn->readerCursor, insn->length); + dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu", startLoc, + insn->readerCursor, insn->length); if (insn->length > 15) dbgprintf(insn, "Instruction exceeds 15-byte limit"); Index: test/MC/Disassembler/X86/x86-64.txt =================================================================== --- test/MC/Disassembler/X86/x86-64.txt +++ test/MC/Disassembler/X86/x86-64.txt @@ -301,3 +301,114 @@ # CHECK: movq %rax, 1515870810 0x67, 0x48 0xa3 0x5a 0x5a 0x5a 0x5a + +# CHECK: callq -32769 +0x66 0xe8 0xff 0x7f 0xff 0xff + +# CHECK: callq -32769 +0x66 0x66 0x48 0xe8 0xff 0x7f 0xff 0xff + +# CHECK: jmp -32769 +0xe9 0xff 0x7f 0xff 0xff + +# CHECK: jmp -32769 +0x66 0xe9 0xff 0x7f 0xff 0xff + +# CHECK: jmp -32769 +0x66 0x66 0x48 0xe9 0xff 0x7f 0xff 0xff + +# CHECK: jb -32769 +0x0f 0x82 0xff 0x7f 0xff 0xff + +# CHECK: jb -32769 +0x66 0x0f 0x82 0xff 0x7f 0xff 0xff + +# CHECK: jae -32769 +0x0f 0x83 0xff 0x7f 0xff 0xff + +# CHECK: jae -32769 +0x66 0x0f 0x83 0xff 0x7f 0xff 0xff + +# CHECK: je -32769 +0x0f 0x84 0xff 0x7f 0xff 0xff + +# CHECK: je -32769 +0x66 0x0f 0x84 0xff 0x7f 0xff 0xff + +# CHECK: jne -32769 +0x0f 0x85 0xff 0x7f 0xff 0xff + +# CHECK: jne -32769 +0x66 0x0f 0x85 0xff 0x7f 0xff 0xff + +# CHECK: jbe -32769 +0x0f 0x86 0xff 0x7f 0xff 0xff + +# CHECK: jbe -32769 +0x66 0x0f 0x86 0xff 0x7f 0xff 0xff + +# CHECK: ja -32769 +0x0f 0x87 0xff 0x7f 0xff 0xff + +# CHECK: ja -32769 +0x66 0x0f 0x87 0xff 0x7f 0xff 0xff + +# CHECK: js -32769 +0x0f 0x88 0xff 0x7f 0xff 0xff + +# CHECK: js -32769 +0x66 0x0f 0x88 0xff 0x7f 0xff 0xff + +# CHECK: jns -32769 +0x0f 0x89 0xff 0x7f 0xff 0xff + +# CHECK: jns -32769 +0x66 0x0f 0x89 0xff 0x7f 0xff 0xff + +# CHECK: jp -32769 +0x0f 0x8a 0xff 0x7f 0xff 0xff + +# CHECK: jp -32769 +0x66 0x0f 0x8a 0xff 0x7f 0xff 0xff + +# CHECK: jnp -32769 +0x0f 0x8b 0xff 0x7f 0xff 0xff + +# CHECK: jnp -32769 +0x66 0x0f 0x8b 0xff 0x7f 0xff 0xff + +# CHECK: jl -32769 +0x0f 0x8c 0xff 0x7f 0xff 0xff + +# CHECK: jl -32769 +0x66 0x0f 0x8c 0xff 0x7f 0xff 0xff + +# CHECK: jge -32769 +0x0f 0x8d 0xff 0x7f 0xff 0xff + +# CHECK: jge -32769 +0x66 0x0f 0x8d 0xff 0x7f 0xff 0xff + +# CHECK: jle -32769 +0x0f 0x8e 0xff 0x7f 0xff 0xff + +# CHECK: jle -32769 +0x66 0x0f 0x8e 0xff 0x7f 0xff 0xff + +# CHECK: jg -32769 +0x0f 0x8f 0xff 0x7f 0xff 0xff + +# CHECK: jg -32769 +0x66 0x0f 0x8f 0xff 0x7f 0xff 0xff + +# CHECK: lcallw *-32769(%rip) +0x66 0xff 0x1d 0xff 0x7f 0xff 0xff + +# CHECK: ljmpw *-32769(%rip) +0x66 0xff 0x2d 0xff 0x7f 0xff 0xff + +# CHECK: psubsb (%rdx), %mm3 +0x0f 0xe8 0x1a + +# CHECK: psubsb (%rdx), %xmm3 +0x66 0x0f 0xe8 0x1a