Index: llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp =================================================================== --- llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ llvm/trunk/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -683,9 +683,14 @@ std::unique_ptr DefaultMemSIOperand(SMLoc Loc); std::unique_ptr DefaultMemDIOperand(SMLoc Loc); - void AddDefaultSrcDestOperands( - OperandVector& Operands, std::unique_ptr &&Src, - std::unique_ptr &&Dst); + bool IsSIReg(unsigned Reg); + unsigned GetSIDIForRegClass(unsigned RegClassID, unsigned Reg, bool IsSIReg); + void + AddDefaultSrcDestOperands(OperandVector &Operands, + std::unique_ptr &&Src, + std::unique_ptr &&Dst); + bool VerifyAndAdjustOperands(OperandVector &OrigOperands, + OperandVector &FinalOperands); std::unique_ptr ParseOperand(); std::unique_ptr ParseATTOperand(); std::unique_ptr ParseIntelOperand(); @@ -747,11 +752,6 @@ bool OmitRegisterFromClobberLists(unsigned RegNo) override; - /// doSrcDstMatch - Returns true if operands are matching in their - /// word size (%si and %di, %esi and %edi, etc.). Order depends on - /// the parsing mode (Intel vs. AT&T). - bool doSrcDstMatch(X86Operand &Op1, X86Operand &Op2); - /// Parses AVX512 specific operand primitives: masked registers ({%k}, {z}) /// and memory broadcasting ({1to}) primitives, updating Operands vector if required. /// \return \c true if no parsing errors occurred, \c false otherwise. @@ -867,27 +867,6 @@ return false; } -bool X86AsmParser::doSrcDstMatch(X86Operand &Op1, X86Operand &Op2) -{ - // Return true and let a normal complaint about bogus operands happen. - if (!Op1.isMem() || !Op2.isMem()) - return true; - - // Actually these might be the other way round if Intel syntax is - // being used. It doesn't matter. - unsigned diReg = Op1.Mem.BaseReg; - unsigned siReg = Op2.Mem.BaseReg; - - if (X86MCRegisterClasses[X86::GR16RegClassID].contains(siReg)) - return X86MCRegisterClasses[X86::GR16RegClassID].contains(diReg); - if (X86MCRegisterClasses[X86::GR32RegClassID].contains(siReg)) - return X86MCRegisterClasses[X86::GR32RegClassID].contains(diReg); - if (X86MCRegisterClasses[X86::GR64RegClassID].contains(siReg)) - return X86MCRegisterClasses[X86::GR64RegClassID].contains(diReg); - // Again, return true and let another error happen. - return true; -} - bool X86AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { MCAsmParser &Parser = getParser(); @@ -1025,6 +1004,37 @@ Loc, Loc, 0); } +bool X86AsmParser::IsSIReg(unsigned Reg) { + switch (Reg) { + default: + assert("Only (R|E)SI and (R|E)DI are expected!"); + return false; + case X86::RSI: + case X86::ESI: + case X86::SI: + return true; + case X86::RDI: + case X86::EDI: + case X86::DI: + return false; + } +} + +unsigned X86AsmParser::GetSIDIForRegClass(unsigned RegClassID, unsigned Reg, + bool IsSIReg) { + switch (RegClassID) { + default: + assert("Unexpected register class"); + return Reg; + case X86::GR64RegClassID: + return IsSIReg ? X86::RSI : X86::RDI; + case X86::GR32RegClassID: + return IsSIReg ? X86::ESI : X86::EDI; + case X86::GR16RegClassID: + return IsSIReg ? X86::SI : X86::DI; + } +} + void X86AsmParser::AddDefaultSrcDestOperands( OperandVector& Operands, std::unique_ptr &&Src, std::unique_ptr &&Dst) { @@ -1038,6 +1048,76 @@ } } +bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands, + OperandVector &FinalOperands) { + + if (OrigOperands.size() > 1) { + // Check if sizes match, OrigOpernads also contains the instruction name + assert(OrigOperands.size() == FinalOperands.size() + 1 && + "Opernand size mismatch"); + + // Verify types match + int RegClassID = -1; + for (unsigned int i = 0; i < FinalOperands.size(); ++i) { + X86Operand &OrigOp = static_cast(*OrigOperands[i + 1]); + X86Operand &FinalOp = static_cast(*FinalOperands[i]); + + if (FinalOp.isReg() && + (!OrigOp.isReg() || FinalOp.getReg() != OrigOp.getReg())) + // Return false and let a normal complaint about bogus operands happen + return false; + + if (FinalOp.isMem()) { + + if (!OrigOp.isMem()) + // Return false and let a normal complaint about bogus operands happen + return false; + + unsigned OrigReg = OrigOp.Mem.BaseReg; + unsigned FinalReg = FinalOp.Mem.BaseReg; + + // If we've already encounterd a register class, make sure all register + // bases are of the same register class + if (RegClassID != -1 && + !X86MCRegisterClasses[RegClassID].contains(OrigReg)) { + return Error(OrigOp.getStartLoc(), + "mismatching source and destination index registers"); + } + + if (X86MCRegisterClasses[X86::GR64RegClassID].contains(OrigReg)) + RegClassID = X86::GR64RegClassID; + else if (X86MCRegisterClasses[X86::GR32RegClassID].contains(OrigReg)) + RegClassID = X86::GR32RegClassID; + else if (X86MCRegisterClasses[X86::GR16RegClassID].contains(OrigReg)) + RegClassID = X86::GR16RegClassID; + + bool IsSI = IsSIReg(FinalReg); + FinalReg = GetSIDIForRegClass(RegClassID, FinalReg, IsSI); + + if (FinalReg != OrigReg) { + std::string RegName = IsSI ? "ES:(R|E)SI" : "ES:(R|E)DI"; + Warning(OrigOp.getStartLoc(), + "memory operand is only for determining the size, " + + RegName + " will be used for the location"); + } + + FinalOp.Mem.Size = OrigOp.Mem.Size; + FinalOp.Mem.SegReg = OrigOp.Mem.SegReg; + FinalOp.Mem.BaseReg = FinalReg; + } + } + + // Remove old operandss + for (unsigned int i = 0; i < FinalOperands.size(); ++i) + OrigOperands.pop_back(); + } + // OrigOperands.append(FinalOperands.begin(), FinalOperands.end()); + for (unsigned int i = 0; i < FinalOperands.size(); ++i) + OrigOperands.push_back(std::move(FinalOperands[i])); + + return false; +} + std::unique_ptr X86AsmParser::ParseOperand() { if (isParsingIntelSyntax()) return ParseIntelOperand(); @@ -2274,84 +2354,92 @@ } } + SmallVector, 2> TmpOperands; + bool HadVerifyError = false; + // Append default arguments to "ins[bwld]" - if (Name.startswith("ins") && Operands.size() == 1 && - (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd")) { - AddDefaultSrcDestOperands(Operands, + if (Name.startswith("ins") && + (Operands.size() == 1 || Operands.size() == 3) && + (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd" || + Name == "ins")) { + + AddDefaultSrcDestOperands(TmpOperands, X86Operand::CreateReg(X86::DX, NameLoc, NameLoc), DefaultMemDIOperand(NameLoc)); + HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); } // Append default arguments to "outs[bwld]" - if (Name.startswith("outs") && Operands.size() == 1 && + if (Name.startswith("outs") && + (Operands.size() == 1 || Operands.size() == 3) && (Name == "outsb" || Name == "outsw" || Name == "outsl" || - Name == "outsd" )) { - AddDefaultSrcDestOperands(Operands, - DefaultMemSIOperand(NameLoc), + Name == "outsd" || Name == "outs")) { + AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc), X86Operand::CreateReg(X86::DX, NameLoc, NameLoc)); + HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); } // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate // values of $SIREG according to the mode. It would be nice if this // could be achieved with InstAlias in the tables. - if (Name.startswith("lods") && Operands.size() == 1 && + if (Name.startswith("lods") && + (Operands.size() == 1 || Operands.size() == 2) && (Name == "lods" || Name == "lodsb" || Name == "lodsw" || - Name == "lodsl" || Name == "lodsd" || Name == "lodsq")) - Operands.push_back(DefaultMemSIOperand(NameLoc)); + Name == "lodsl" || Name == "lodsd" || Name == "lodsq")) { + TmpOperands.push_back(DefaultMemSIOperand(NameLoc)); + HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); + } // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate // values of $DIREG according to the mode. It would be nice if this // could be achieved with InstAlias in the tables. - if (Name.startswith("stos") && Operands.size() == 1 && + if (Name.startswith("stos") && + (Operands.size() == 1 || Operands.size() == 2) && (Name == "stos" || Name == "stosb" || Name == "stosw" || - Name == "stosl" || Name == "stosd" || Name == "stosq")) - Operands.push_back(DefaultMemDIOperand(NameLoc)); + Name == "stosl" || Name == "stosd" || Name == "stosq")) { + TmpOperands.push_back(DefaultMemDIOperand(NameLoc)); + HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); + } // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate // values of $DIREG according to the mode. It would be nice if this // could be achieved with InstAlias in the tables. - if (Name.startswith("scas") && Operands.size() == 1 && + if (Name.startswith("scas") && + (Operands.size() == 1 || Operands.size() == 2) && (Name == "scas" || Name == "scasb" || Name == "scasw" || - Name == "scasl" || Name == "scasd" || Name == "scasq")) - Operands.push_back(DefaultMemDIOperand(NameLoc)); + Name == "scasl" || Name == "scasd" || Name == "scasq")) { + TmpOperands.push_back(DefaultMemDIOperand(NameLoc)); + HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); + } // Add default SI and DI operands to "cmps[bwlq]". if (Name.startswith("cmps") && + (Operands.size() == 1 || Operands.size() == 3) && (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" || Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) { - if (Operands.size() == 1) { - AddDefaultSrcDestOperands(Operands, - DefaultMemDIOperand(NameLoc), - DefaultMemSIOperand(NameLoc)); - } else if (Operands.size() == 3) { - X86Operand &Op = (X86Operand &)*Operands[1]; - X86Operand &Op2 = (X86Operand &)*Operands[2]; - if (!doSrcDstMatch(Op, Op2)) - return Error(Op.getStartLoc(), - "mismatching source and destination index registers"); - } + AddDefaultSrcDestOperands(TmpOperands, DefaultMemDIOperand(NameLoc), + DefaultMemSIOperand(NameLoc)); + HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); } // Add default SI and DI operands to "movs[bwlq]". - if ((Name.startswith("movs") && - (Name == "movs" || Name == "movsb" || Name == "movsw" || - Name == "movsl" || Name == "movsd" || Name == "movsq")) || - (Name.startswith("smov") && - (Name == "smov" || Name == "smovb" || Name == "smovw" || - Name == "smovl" || Name == "smovd" || Name == "smovq"))) { - if (Operands.size() == 1) { - if (Name == "movsd") - Operands.back() = X86Operand::CreateToken("movsl", NameLoc); - AddDefaultSrcDestOperands(Operands, - DefaultMemSIOperand(NameLoc), - DefaultMemDIOperand(NameLoc)); - } else if (Operands.size() == 3) { - X86Operand &Op = (X86Operand &)*Operands[1]; - X86Operand &Op2 = (X86Operand &)*Operands[2]; - if (!doSrcDstMatch(Op, Op2)) - return Error(Op.getStartLoc(), - "mismatching source and destination index registers"); - } + if (((Name.startswith("movs") && + (Name == "movs" || Name == "movsb" || Name == "movsw" || + Name == "movsl" || Name == "movsd" || Name == "movsq")) || + (Name.startswith("smov") && + (Name == "smov" || Name == "smovb" || Name == "smovw" || + Name == "smovl" || Name == "smovd" || Name == "smovq"))) && + (Operands.size() == 1 || Operands.size() == 3)) { + if (Name == "movsd" && Operands.size() == 1) + Operands.back() = X86Operand::CreateToken("movsl", NameLoc); + AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc), + DefaultMemDIOperand(NameLoc)); + HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); + } + + // Check if we encountered an error for one the string insturctions + if (HadVerifyError) { + return HadVerifyError; } // FIXME: Hack to handle recognize s{hr,ar,hl} $1, . Canonicalize to Index: llvm/trunk/lib/Target/X86/X86InstrInfo.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.td +++ llvm/trunk/lib/Target/X86/X86InstrInfo.td @@ -2782,6 +2782,11 @@ def : InstAlias<"lods\t{$src, %ax|ax, $src}", (LODSW srcidx16:$src), 0>; def : InstAlias<"lods\t{$src, %eax|eax, $src}", (LODSL srcidx32:$src), 0>; def : InstAlias<"lods\t{$src, %rax|rax, $src}", (LODSQ srcidx64:$src), 0>, Requires<[In64BitMode]>; +def : InstAlias<"lods\t$src", (LODSB srcidx8:$src), 0>; +def : InstAlias<"lods\t$src", (LODSW srcidx16:$src), 0>; +def : InstAlias<"lods\t$src", (LODSL srcidx32:$src), 0>; +def : InstAlias<"lods\t$src", (LODSQ srcidx64:$src), 0>, Requires<[In64BitMode]>; + // stos aliases. Accept the source being omitted because it's implicit in // the mnemonic, or the mnemonic suffix being omitted because it's implicit @@ -2794,6 +2799,11 @@ def : InstAlias<"stos\t{%ax, $dst|$dst, ax}", (STOSW dstidx16:$dst), 0>; def : InstAlias<"stos\t{%eax, $dst|$dst, eax}", (STOSL dstidx32:$dst), 0>; def : InstAlias<"stos\t{%rax, $dst|$dst, rax}", (STOSQ dstidx64:$dst), 0>, Requires<[In64BitMode]>; +def : InstAlias<"stos\t$dst", (STOSB dstidx8:$dst), 0>; +def : InstAlias<"stos\t$dst", (STOSW dstidx16:$dst), 0>; +def : InstAlias<"stos\t$dst", (STOSL dstidx32:$dst), 0>; +def : InstAlias<"stos\t$dst", (STOSQ dstidx64:$dst), 0>, Requires<[In64BitMode]>; + // scas aliases. Accept the destination being omitted because it's implicit // in the mnemonic, or the mnemonic suffix being omitted because it's implicit @@ -2806,6 +2816,24 @@ def : InstAlias<"scas\t{$dst, %ax|ax, $dst}", (SCASW dstidx16:$dst), 0>; def : InstAlias<"scas\t{$dst, %eax|eax, $dst}", (SCASL dstidx32:$dst), 0>; def : InstAlias<"scas\t{$dst, %rax|rax, $dst}", (SCASQ dstidx64:$dst), 0>, Requires<[In64BitMode]>; +def : InstAlias<"scas\t$dst", (SCASB dstidx8:$dst), 0>; +def : InstAlias<"scas\t$dst", (SCASW dstidx16:$dst), 0>; +def : InstAlias<"scas\t$dst", (SCASL dstidx32:$dst), 0>; +def : InstAlias<"scas\t$dst", (SCASQ dstidx64:$dst), 0>, Requires<[In64BitMode]>; + +// cmps aliases. Mnemonic suffix being omitted because it's implicit +// in the destination. +def : InstAlias<"cmps\t{$dst, $src|$src, $dst}", (CMPSB dstidx8:$dst, srcidx8:$src), 0>; +def : InstAlias<"cmps\t{$dst, $src|$src, $dst}", (CMPSW dstidx16:$dst, srcidx16:$src), 0>; +def : InstAlias<"cmps\t{$dst, $src|$src, $dst}", (CMPSL dstidx32:$dst, srcidx32:$src), 0>; +def : InstAlias<"cmps\t{$dst, $src|$src, $dst}", (CMPSQ dstidx64:$dst, srcidx64:$src), 0>, Requires<[In64BitMode]>; + +// movs aliases. Mnemonic suffix being omitted because it's implicit +// in the destination. +def : InstAlias<"movs\t{$src, $dst|$dst, $src}", (MOVSB dstidx8:$dst, srcidx8:$src), 0>; +def : InstAlias<"movs\t{$src, $dst|$dst, $src}", (MOVSW dstidx16:$dst, srcidx16:$src), 0>; +def : InstAlias<"movs\t{$src, $dst|$dst, $src}", (MOVSL dstidx32:$dst, srcidx32:$src), 0>; +def : InstAlias<"movs\t{$src, $dst|$dst, $src}", (MOVSQ dstidx64:$dst, srcidx64:$src), 0>, Requires<[In64BitMode]>; // div and idiv aliases for explicit A register. def : InstAlias<"div{b}\t{$src, %al|al, $src}", (DIV8r GR8 :$src)>; @@ -2918,6 +2946,18 @@ def : InstAlias<"imul{q}\t{$imm, $r|$r, $imm}", (IMUL64rri32 GR64:$r, GR64:$r, i64i32imm:$imm), 0>; def : InstAlias<"imul{q}\t{$imm, $r|$r, $imm}", (IMUL64rri8 GR64:$r, GR64:$r, i64i8imm:$imm), 0>; +// ins aliases. Accept the mnemonic suffix being omitted because it's implicit +// in the destination. +def : InstAlias<"ins\t{%dx, $dst|$dst, dx}", (INSB dstidx8:$dst), 0>; +def : InstAlias<"ins\t{%dx, $dst|$dst, dx}", (INSW dstidx16:$dst), 0>; +def : InstAlias<"ins\t{%dx, $dst|$dst, dx}", (INSL dstidx32:$dst), 0>; + +// outs aliases. Accept the mnemonic suffix being omitted because it's implicit +// in the source. +def : InstAlias<"outs\t{$src, %dx|dx, $src}", (OUTSB srcidx8:$src), 0>; +def : InstAlias<"outs\t{$src, %dx|dx, $src}", (OUTSW srcidx16:$src), 0>; +def : InstAlias<"outs\t{$src, %dx|dx, $src}", (OUTSL srcidx32:$src), 0>; + // inb %dx -> inb %al, %dx def : InstAlias<"inb\t{%dx|dx}", (IN8rr), 0>; def : InstAlias<"inw\t{%dx|dx}", (IN16rr), 0>; Index: llvm/trunk/test/MC/X86/index-operations.s =================================================================== --- llvm/trunk/test/MC/X86/index-operations.s +++ llvm/trunk/test/MC/X86/index-operations.s @@ -144,3 +144,19 @@ // 64: insw %dx, %es:(%edi) # encoding: [0x66,0x67,0x6d] // 32: insw %dx, %es:(%edi) # encoding: [0x66,0x6d] // 16: insw %dx, %es:(%edi) # encoding: [0x67,0x6d] + +insw %dx, (%bx) +// ERR64: invalid 16-bit base register +// 32: insw %dx, %es:(%di) # encoding: [0x66,0x67,0x6d] +// 16: insw %dx, %es:(%di) # encoding: [0x6d] + +insw %dx, (%ebx) +// 64: insw %dx, %es:(%edi) # encoding: [0x66,0x67,0x6d] +// 32: insw %dx, %es:(%edi) # encoding: [0x66,0x6d] +// 16: insw %dx, %es:(%edi) # encoding: [0x67,0x6d] + +insw %dx, (%rbx) +// 64: insw %dx, %es:(%rdi) # encoding: [0x66,0x6d] +// ERR32: 64-bit +// ERR16: 64-bit + Index: llvm/trunk/test/MC/X86/intel-syntax.s =================================================================== --- llvm/trunk/test/MC/X86/intel-syntax.s +++ llvm/trunk/test/MC/X86/intel-syntax.s @@ -751,3 +751,27 @@ sidt fword ptr [eax] // CHECK: sidtq (%eax) + +ins byte ptr [eax], dx +// CHECK: insb %dx, %es:(%edi) +// CHECK-STDERR: memory operand is only for determining the size, ES:(R|E)DI will be used for the location +outs dx, word ptr [eax] +// CHECK: outsw (%esi), %dx +// CHECK-STDERR: memory operand is only for determining the size, ES:(R|E)SI will be used for the location +lods dword ptr [eax] +// CHECK: lodsl (%esi), %eax +// CHECK-STDERR: memory operand is only for determining the size, ES:(R|E)SI will be used for the location +stos qword ptr [eax] +// CHECK: stosq %rax, %es:(%edi) +// CHECK-STDERR: memory operand is only for determining the size, ES:(R|E)DI will be used for the location +scas byte ptr [eax] +// CHECK: scasb %es:(%edi), %al +// CHECK-STDERR: memory operand is only for determining the size, ES:(R|E)DI will be used for the location +cmps word ptr [eax], word ptr [ebx] +// CHECK: cmpsw %es:(%edi), (%esi) +// CHECK-STDERR: memory operand is only for determining the size, ES:(R|E)SI will be used for the location +// CHECK-STDERR: memory operand is only for determining the size, ES:(R|E)DI will be used for the location +movs dword ptr [eax], dword ptr [ebx] +// CHECK: movsl (%esi), %es:(%edi) +// CHECK-STDERR: memory operand is only for determining the size, ES:(R|E)DI will be used for the location +// CHECK-STDERR: memory operand is only for determining the size, ES:(R|E)SI will be used for the location