Index: lib/Target/X86/AsmParser/X86AsmParser.cpp =================================================================== --- lib/Target/X86/AsmParser/X86AsmParser.cpp +++ lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -2331,6 +2331,30 @@ static_cast(*Operands[0]).setTokenValue(Repl); } + // Moving a 32 or 16 bit value into a segment register has the same + // behavior. Modify such instructions to always take shorter form. + if ((Name == "mov" || Name == "movw" || Name == "movl") && + (Operands.size() == 3)) { + X86Operand &Op1 = (X86Operand &)*Operands[1]; + X86Operand &Op2 = (X86Operand &)*Operands[2]; + SMLoc Loc = Op1.getEndLoc(); + if (Op1.isReg() && Op2.isReg() && + X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains( + Op2.getReg()) && + (X86MCRegisterClasses[X86::GR16RegClassID].contains(Op1.getReg()) || + X86MCRegisterClasses[X86::GR32RegClassID].contains(Op1.getReg()))) { + // fix name + if (Name != "mov" && Name[3] == (is16BitMode() ? 'l' : 'w')) { + Name = is16BitMode() ? "movw" : "movl"; + Operands[0] = X86Operand::CreateToken(Name, NameLoc); + } + // select correct register + unsigned reg = + getX86SubSuperRegisterOrZero(Op1.getReg(), is16BitMode() ? 16 : 32); + Operands[1] = X86Operand::CreateReg(reg, Loc, Loc); + } + } + // This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" -> // "outb %al, %dx". Out doesn't take a memory form, but this is a widely // documented form in various unofficial manuals, so a lot of code uses it. Index: test/MC/X86/x86-16.s =================================================================== --- test/MC/X86/x86-16.s +++ test/MC/X86/x86-16.s @@ -256,10 +256,22 @@ // CHECK: encoding: [0x67,0x8c,0x08] movw %cs, (%eax) -// CHECK: movl %eax, %cs -// CHECK: encoding: [0x66,0x8e,0xc8] +// CHECK: movw %ax, %cs +// CHECK: encoding: [0x8e,0xc8] movl %eax, %cs +// CHECK: movw %ax, %cs +// CHECK: encoding: [0x8e,0xc8] + mov %eax, %cs + +// CHECK: movw %ax, %cs +// CHECK: encoding: [0x8e,0xc8] + movw %ax, %cs + +// CHECK: movw %ax, %cs +// CHECK: encoding: [0x8e,0xc8] + mov %ax, %cs + // CHECK: movl (%eax), %cs // CHECK: encoding: [0x67,0x66,0x8e,0x08] movl (%eax), %cs