Index: lib/Target/X86/Disassembler/X86DisassemblerDecoder.h =================================================================== --- lib/Target/X86/Disassembler/X86DisassemblerDecoder.h +++ lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -546,10 +546,13 @@ // Prefix state + // TODO: we're able to get rid off the following 2 arrays // 1 if the prefix byte corresponding to the entry is present; 0 if not uint8_t prefixPresent[0x100]; // contains the location (for use with the reader) of the prefix byte uint64_t prefixLocations[0x100]; + // The possible mandatory prefix + uint8_t mandatory_prefix; // The value of the vector extension prefix(EVEX/VEX/XOP), if present uint8_t vectorExtensionPrefix[4]; // The type of the vector extension prefix Index: lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp =================================================================== --- lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp +++ lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp @@ -277,6 +277,12 @@ insn->dlog(insn->dlogArg, buffer); } +static bool isREX(struct InternalInstruction *insn, uint8_t prefix) { + if (insn->mode == MODE_64BIT) + return prefix >= 0x40 && prefix <= 0x4f; + return false; +} + /* * setPrefixPresent - Marks that a particular prefix is present at a particular * location. @@ -290,6 +296,38 @@ uint8_t prefix, uint64_t location) { + uint8_t nextByte; + switch (prefix) { + case 0xf2: + case 0xf3: + if (lookAtByte(insn, &nextByte)) + break; + // TODO: + // 1. There could be several 0x66 + // 2. if (nextByte == 0x66) and nextNextByte != 0x0f then + // it's not mandatory prefix + // 3. if (nextByte >= 0x40 && nextByte <= 0x4f) it's REX and we need + // 0x0f exactly after it to be mandatory prefix + if (isREX(insn, nextByte) || nextByte == 0x0f || nextByte == 0x66) { + // The last of 0xf2 /0xf3 is mandatory prefix + insn->mandatory_prefix = prefix; + insn->necessaryPrefixLocation = location; + break; + } + break; + case 0x66: + if (lookAtByte(insn, &nextByte)) + break; + // 0x66 can't overwrite existing mandatory prefix and should be ignored + if (!insn->mandatory_prefix && + (nextByte == 0x0f || isREX(insn, nextByte))) { + insn->mandatory_prefix = prefix; + insn->necessaryPrefixLocation = location; + } + break; + } + if (!insn->mandatory_prefix) + insn->necessaryPrefixLocation = location; insn->prefixPresent[prefix] = 1; insn->prefixLocations[prefix] = location; } @@ -343,13 +381,10 @@ * If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then * break and let it be disassembled as a normal "instruction". */ - if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) + if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) // LOCK break; - if (insn->readerCursor - 1 == insn->startLocation - && (byte == 0xf2 || byte == 0xf3) - && !lookAtByte(insn, &nextByte)) - { + if ((byte == 0xf2 || byte == 0xf3) && !lookAtByte(insn, &nextByte)) { /* * If the byte is 0xf2 or 0xf3, and any of the following conditions are * met: @@ -357,9 +392,8 @@ * - it is followed by an xchg instruction * then it should be disassembled as a xacquire/xrelease not repne/rep. */ - if ((byte == 0xf2 || byte == 0xf3) && - ((nextByte == 0xf0) || - ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) + if (((nextByte == 0xf0) || + ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) insn->xAcquireRelease = true; /* * Also if the byte is 0xf3, and the following condition is met: @@ -378,7 +412,13 @@ return -1; unconsumeByte(insn); } - if (nextByte != 0x0f && nextByte != 0x90) + // If the current byte can't be mandatory prefix then it's a simple repeat + // prefix and should be elaborated as a separated instr + if (nextByte != 0x0f && + // We can have f2 f3 f2 66 f2 0f (in any order) as a valid set + // of prefixes with the last one as a mandatory prefix (SSE/SSE2...) + nextByte != 0xf2 && nextByte != 0xf3 && nextByte != 0x66 && + nextByte != 0x90 && !isREX(insn, nextByte)) break; } @@ -426,11 +466,13 @@ setPrefixPresent(insn, byte, prefixLocation); break; case 0x66: /* Operand-size override */ - if (prefixGroups[2]) - dbgprintf(insn, "Redundant Group 3 prefix"); - prefixGroups[2] = true; - hasOpSize = true; setPrefixPresent(insn, byte, prefixLocation); + if (!insn->mandatory_prefix) { + if (prefixGroups[2]) + dbgprintf(insn, "Redundant Group 3 prefix"); + prefixGroups[2] = true; + } + hasOpSize = true; break; case 0x67: /* Address-size override */ if (prefixGroups[3]) @@ -624,10 +666,8 @@ insn->necessaryPrefixLocation = insn->readerCursor - 2; dbgprintf(insn, "Found REX prefix 0x%hhx", byte); - } else { + } else unconsumeByte(insn); - insn->necessaryPrefixLocation = insn->readerCursor - 1; - } } else { unconsumeByte(insn); insn->necessaryPrefixLocation = insn->readerCursor - 1; @@ -950,8 +990,19 @@ } else { return -1; } + } else if (insn->mode == MODE_64BIT && !insn->mandatory_prefix) { + // If we don't have mandatory prefix we should use "standard" prefixes here + if (insn->prefixPresent[0x66]) + attrMask |= ATTR_OPSIZE; + if (insn->prefixPresent[0x67]) + attrMask |= ATTR_ADSIZE; + if (insn->prefixPresent[0xf2]) + attrMask |= ATTR_XD; + if (insn->prefixPresent[0xf3]) + attrMask |= ATTR_XS; } else { - if (insn->mode != MODE_16BIT && isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) + if (insn->mode != MODE_16BIT && + isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) attrMask |= ATTR_OPSIZE; else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation)) attrMask |= ATTR_ADSIZE; Index: test/MC/Disassembler/X86/prefixes.txt =================================================================== --- test/MC/Disassembler/X86/prefixes.txt +++ test/MC/Disassembler/X86/prefixes.txt @@ -1,5 +1,60 @@ # RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s +# CHECK: rep +# CHECK-NEXT: insb %dx, %es:(%rdi) +0xf3 0x6c #rep ins +# CHECK: rep +# CHECK-NEXT: insl %dx, %es:(%rdi) +0xf3 0x6d #rep ins +# CHECK: rep +# CHECK-NEXT: movsb (%rsi), %es:(%rdi) +0xf3 0xa4 #rep movs +# CHECK: rep +# CHECK-NEXT: movsl (%rsi), %es:(%rdi) +0xf3 0xa5 #rep movs +# CHECK: rep +# CHECK-NEXT: outsb (%rsi), %dx +0xf3 0x6e #rep outs +# CHECK: rep +# CHECK-NEXT: outsl (%rsi), %dx +0xf3 0x6f #rep outs +# CHECK: rep +# CHECK-NEXT: lodsb (%rsi), %al +0xf3 0xac #rep lods +# CHECK: rep +# CHECK-NEXT: lodsl (%rsi), %eax +0xf3 0xad #rep lods +# CHECK: rep +# CHECK-NEXT: stosb %al, %es:(%rdi) +0xf3 0xaa #rep stos +# CHECK: rep +# CHECK-NEXT: stosl %eax, %es:(%rdi) +0xf3 0xab #rep stos +# CHECK: rep +# CHECK-NEXT: cmpsb %es:(%rdi), (%rsi) +0xf3 0xa6 #rep cmps +# CHECK: rep +# CHECK-NEXT: cmpsl %es:(%rdi), (%rsi) +0xf3 0xa7 #repe cmps +# CHECK: rep +# CHECK-NEXT: scasb %es:(%rdi), %al +0xf3 0xae #repe scas +# CHECK: rep +# CHECK-NEXT: scasl %es:(%rdi), %eax +0xf3 0xaf #repe scas +# CHECK: repne +# CHECK-NEXT: cmpsb %es:(%rdi), (%rsi) +0xf2 0xa6 #repne cmps +# CHECK: repne +# CHECK-NEXT: cmpsl %es:(%rdi), (%rsi) +0xf2 0xa7 #repne cmps +# CHECK: repne +# CHECK-NEXT: scasb %es:(%rdi), %al +0xf2 0xae #repne scas +# CHECK: repne +# CHECK-NEXT: scasl %es:(%rdi), %eax +0xf2 0xaf #repne scas + # CHECK: lock # CHECK-NEXT: orl $16, %fs:776 0xf0 0x64 0x83 0x0c 0x25 0x08 0x03 0x00 0x00 0x10 @@ -50,7 +105,6 @@ # Test that multiple redundant prefixes work (redundant, but valid x86). # CHECK: rep -# CHECK-NEXT: rep # CHECK-NEXT: stosq 0xf3 0xf3 0x48 0xab