Index: lib/Target/X86/AsmParser/X86AsmParser.cpp =================================================================== --- lib/Target/X86/AsmParser/X86AsmParser.cpp +++ lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -925,7 +925,8 @@ // Requires so "eiz" usage in 64-bit instructions can be also // checked. // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a - // REX prefix. + // REX prefix. This is now mostly an issue of user-friendliness, + // as the instruction emitter won't encode REX + an H register. if (RegNo == X86::RIZ || X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) || X86II::isX86_64NonExtLowByteReg(RegNo) || Index: lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp =================================================================== --- lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -23,6 +23,7 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/raw_ostream.h" +#include "X86InstrInfo.h" using namespace llvm; @@ -72,7 +73,11 @@ return false; } - unsigned GetX86RegNum(const MCOperand &MO) const { + unsigned GetX86RegNum(const MCOperand &MO, bool HasREX = false) const { + unsigned Reg = MO.getReg(); + if (X86::GR8_ABCD_HRegClass.contains(Reg)) + report_fatal_error( + "Cannot encode high byte register in REX-prefixed instruction"); return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg()) & 0x7; } @@ -132,8 +137,9 @@ } void EmitRegModRMByte(const MCOperand &ModRMReg, unsigned RegOpcodeFld, - unsigned &CurByte, raw_ostream &OS) const { - EmitByte(ModRMByte(3, RegOpcodeFld, GetX86RegNum(ModRMReg)), CurByte, OS); + unsigned &CurByte, raw_ostream &OS, bool HasREX) const { + EmitByte(ModRMByte(3, RegOpcodeFld, GetX86RegNum(ModRMReg, HasREX)), + CurByte, OS); } void EmitSIBByte(unsigned SS, unsigned Index, unsigned Base, @@ -160,10 +166,9 @@ void EmitSegmentOverridePrefix(unsigned &CurByte, unsigned SegOperand, const MCInst &MI, raw_ostream &OS) const; - void EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand, + bool EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand, const MCInst &MI, const MCInstrDesc &Desc, - const MCSubtargetInfo &STI, - raw_ostream &OS) const; + const MCSubtargetInfo &STI, raw_ostream &OS) const; }; } // end anonymous namespace @@ -1095,18 +1100,21 @@ } /// EmitOpcodePrefix - Emit all instruction prefixes prior to the opcode. +/// Return true if a REX prefix was emitted, or false if not. /// /// MemOperand is the operand # of the start of a memory operand if present. If /// Not present, it is -1. -void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, +bool X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand, const MCInst &MI, const MCInstrDesc &Desc, const MCSubtargetInfo &STI, raw_ostream &OS) const { + bool EmittedREX = false; + // Emit the operand size opcode prefix as needed. - if ((TSFlags & X86II::OpSizeMask) == (is16BitMode(STI) ? X86II::OpSize32 - : X86II::OpSize16)) + if ((TSFlags & X86II::OpSizeMask) == + (is16BitMode(STI) ? X86II::OpSize32 : X86II::OpSize16)) EmitByte(0x66, CurByte, OS); // Emit the LOCK opcode prefix. @@ -1128,8 +1136,10 @@ // Handle REX prefix. // FIXME: Can this come before F2 etc to simplify emission? if (is64BitMode(STI)) { - if (unsigned REX = DetermineREXPrefix(MI, TSFlags, Desc)) + if (unsigned REX = DetermineREXPrefix(MI, TSFlags, Desc)) { EmitByte(0x40 | REX, CurByte, OS); + EmittedREX = true; + } } // 0x0F escape code must be emitted just before the opcode. @@ -1149,12 +1159,12 @@ EmitByte(0x3A, CurByte, OS); break; } + return EmittedREX; } -void X86MCCodeEmitter:: -encodeInstruction(const MCInst &MI, raw_ostream &OS, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { +void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { unsigned Opcode = MI.getOpcode(); const MCInstrDesc &Desc = MCII.get(Opcode); uint64_t TSFlags = Desc.TSFlags; @@ -1182,9 +1192,13 @@ bool HasEVEX_K = TSFlags & X86II::EVEX_K; bool HasEVEX_RC = TSFlags & X86II::EVEX_RC; + // It has a REX prefix? + bool HasREX = false; + // Determine where the memory operand starts, if present. int MemoryOperand = X86II::getMemoryOperandNo(TSFlags, Opcode); - if (MemoryOperand != -1) MemoryOperand += CurOp; + if (MemoryOperand != -1) + MemoryOperand += CurOp; // Emit segment override opcode prefix as needed. if (MemoryOperand >= 0) @@ -1220,7 +1234,8 @@ EmitByte(0x67, CurByte, OS); if (Encoding == 0) - EmitOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, STI, OS); + HasREX = + EmitOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, STI, OS); else EmitVEXOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, OS); @@ -1319,7 +1334,8 @@ ++SrcRegNum; EmitRegModRMByte(MI.getOperand(CurOp), - GetX86RegNum(MI.getOperand(SrcRegNum)), CurByte, OS); + GetX86RegNum(MI.getOperand(SrcRegNum), HasREX), CurByte, + OS, HasREX); CurOp = SrcRegNum + 1; break; @@ -1353,7 +1369,8 @@ ++SrcRegNum; EmitRegModRMByte(MI.getOperand(SrcRegNum), - GetX86RegNum(MI.getOperand(CurOp)), CurByte, OS); + GetX86RegNum(MI.getOperand(CurOp), HasREX), CurByte, OS, + HasREX); // 2 operands skipped with HasMemOp4, compensate accordingly CurOp = HasMemOp4 ? SrcRegNum : SrcRegNum + 1; @@ -1402,8 +1419,8 @@ EmitByte(BaseOpcode, CurByte, OS); uint64_t Form = TSFlags & X86II::FormMask; EmitRegModRMByte(MI.getOperand(CurOp++), - (Form == X86II::MRMXr) ? 0 : Form-X86II::MRM0r, - CurByte, OS); + (Form == X86II::MRMXr) ? 0 : Form - X86II::MRM0r, CurByte, + OS, HasREX); break; } Index: test/MC/X86/encoder-fail.s =================================================================== --- /dev/null +++ test/MC/X86/encoder-fail.s @@ -0,0 +1,4 @@ +// RUN: not llvm-mc -triple x86_64-unknown-unknown --show-encoding %s 2>&1 | FileCheck %s + +// CHECK: LLVM ERROR: Cannot encode high byte register in REX-prefixed instruction +movzx %dh, %rsi