diff --git a/llvm/include/llvm/MC/MCParser/MCAsmParser.h b/llvm/include/llvm/MC/MCParser/MCAsmParser.h --- a/llvm/include/llvm/MC/MCParser/MCAsmParser.h +++ b/llvm/include/llvm/MC/MCParser/MCAsmParser.h @@ -90,6 +90,14 @@ IdKind Kind; }; +struct AsmFieldInfo { + StringRef Type; + unsigned Offset = 0; + unsigned Size = 0; + unsigned ElementSize = 0; + unsigned Length = 0; +}; + /// Generic Sema callback for assembly parser. class MCAsmParserSemaCallback { public: @@ -170,15 +178,16 @@ virtual bool isParsingMasm() const { return false; } - virtual bool lookUpField(StringRef Name, StringRef &Type, - unsigned &Offset) const { + virtual bool lookUpField(StringRef Name, AsmFieldInfo &Info) const { return true; } - virtual bool lookUpField(StringRef Base, StringRef Member, StringRef &Type, - unsigned &Offset) const { + virtual bool lookUpField(StringRef Base, StringRef Member, + AsmFieldInfo &Info) const { return true; } + virtual bool lookUpType(StringRef Name, unsigned &Size) const { return true; } + /// Parse MS-style inline assembly. virtual bool parseMSInlineAsm( void *AsmLoc, std::string &AsmString, unsigned &NumOutputs, diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/CodeView/SymbolRecord.h" @@ -333,7 +334,7 @@ FieldInfo &StructInfo::addField(StringRef FieldName, FieldType FT, size_t FieldSize) { if (!FieldName.empty()) - FieldsByName[FieldName] = Fields.size(); + FieldsByName[FieldName.lower()] = Fields.size(); Fields.emplace_back(FT); FieldInfo &Field = Fields.back(); if (IsUnion) { @@ -491,10 +492,11 @@ bool isParsingMasm() const override { return true; } - bool lookUpField(StringRef Name, StringRef &Type, - unsigned &Offset) const override; - bool lookUpField(StringRef Base, StringRef Member, StringRef &Type, - unsigned &Offset) const override; + bool lookUpField(StringRef Name, AsmFieldInfo &Info) const override; + bool lookUpField(StringRef Base, StringRef Member, + AsmFieldInfo &Info) const override; + + bool lookUpType(StringRef Name, unsigned &Size) const override; bool parseMSInlineAsm(void *AsmLoc, std::string &AsmString, unsigned &NumOutputs, unsigned &NumInputs, @@ -565,7 +567,7 @@ static void DiagHandler(const SMDiagnostic &Diag, void *Context); bool lookUpField(const StructInfo &Structure, StringRef Member, - StringRef &Type, unsigned &Offset) const; + AsmFieldInfo &Info) const; /// Should we emit DWARF describing this assembler source? (Returns false if /// the source has .file directives, which means we don't want to generate @@ -1411,24 +1413,23 @@ } // Find the field offset if used. - StringRef Type; - unsigned Offset = 0; + AsmFieldInfo Info; Split = SymbolName.split('.'); if (!Split.second.empty()) { SymbolName = Split.first; if (Structs.count(SymbolName.lower()) && - !lookUpField(SymbolName, Split.second, Type, Offset)) { + !lookUpField(SymbolName, Split.second, Info)) { // This is actually a reference to a field offset. - Res = MCConstantExpr::create(Offset, getContext()); + Res = MCConstantExpr::create(Info.Offset, getContext()); return false; } auto TypeIt = KnownType.find(SymbolName); if (TypeIt == KnownType.end() || - lookUpField(*TypeIt->second, Split.second, Type, Offset)) { + lookUpField(*TypeIt->second, Split.second, Info)) { std::pair BaseMember = Split.second.split('.'); StringRef Base = BaseMember.first, Member = BaseMember.second; - lookUpField(Base, Member, Type, Offset); + lookUpField(Base, Member, Info); } } @@ -1454,10 +1455,10 @@ // Otherwise create a symbol ref. const MCExpr *SymRef = MCSymbolRefExpr::create(Sym, Variant, getContext(), FirstTokenLoc); - if (Offset) { - Res = MCBinaryExpr::create(MCBinaryExpr::Add, SymRef, - MCConstantExpr::create(Offset, getContext()), - getContext()); + if (Info.Offset) { + Res = MCBinaryExpr::create( + MCBinaryExpr::Add, SymRef, + MCConstantExpr::create(Info.Offset, getContext()), getContext()); } else { Res = SymRef; } @@ -6550,37 +6551,39 @@ llvm_unreachable("Unstable rewrite sort."); } -bool MasmParser::lookUpField(StringRef Name, StringRef &Type, - unsigned &Offset) const { +bool MasmParser::lookUpField(StringRef Name, AsmFieldInfo &Info) const { const std::pair BaseMember = Name.split('.'); const StringRef Base = BaseMember.first, Member = BaseMember.second; - return lookUpField(Base, Member, Type, Offset); + return lookUpField(Base, Member, Info); } -bool MasmParser::lookUpField(StringRef Base, StringRef Member, StringRef &Type, - unsigned &Offset) const { +bool MasmParser::lookUpField(StringRef Base, StringRef Member, + AsmFieldInfo &Info) const { if (Base.empty()) return true; - unsigned BaseOffset = 0; - if (Base.contains('.') && !lookUpField(Base, Type, BaseOffset)) - Base = Type; + AsmFieldInfo BaseInfo; + if (Base.contains('.') && !lookUpField(Base, BaseInfo)) + Base = BaseInfo.Type; auto TypeIt = KnownType.find(Base); if (TypeIt != KnownType.end()) - return lookUpField(*TypeIt->second, Member, Type, Offset); + return lookUpField(*TypeIt->second, Member, Info); auto StructIt = Structs.find(Base.lower()); if (StructIt != Structs.end()) - return lookUpField(StructIt->second, Member, Type, Offset); + return lookUpField(StructIt->second, Member, Info); return true; } bool MasmParser::lookUpField(const StructInfo &Structure, StringRef Member, - StringRef &Type, unsigned &Offset) const { + AsmFieldInfo &Info) const { if (Member.empty()) { - Type = Structure.Name; + Info.Type = Structure.Name; + Info.Size = Structure.Size; + Info.ElementSize = Structure.Size; + Info.Length = 1; return false; } @@ -6589,7 +6592,7 @@ auto StructIt = Structs.find(FieldName.lower()); if (StructIt != Structs.end()) - return lookUpField(StructIt->second, FieldMember, Type, Offset); + return lookUpField(StructIt->second, FieldMember, Info); auto FieldIt = Structure.FieldsByName.find(FieldName.lower()); if (FieldIt == Structure.FieldsByName.end()) @@ -6597,9 +6600,12 @@ const FieldInfo &Field = Structure.Fields[FieldIt->second]; if (FieldMember.empty()) { - Offset += Field.Offset; + Info.Offset += Field.Offset; + Info.Size = Field.SizeOf << 3; + Info.ElementSize = Field.Type << 3; + Info.Length = Field.LengthOf; if (Field.Contents.FT == FT_STRUCT) - Type = Field.Contents.StructInfo.Structure.Name; + Info.Type = Field.Contents.StructInfo.Structure.Name; return false; } @@ -6607,14 +6613,35 @@ return true; const StructFieldInfo &StructInfo = Field.Contents.StructInfo; - bool Result = lookUpField(StructInfo.Structure, FieldMember, Type, Offset); - if (Result) + if (lookUpField(StructInfo.Structure, FieldMember, Info)) return true; - Offset += Field.Offset; + Info.Offset += Field.Offset; return false; } +bool MasmParser::lookUpType(StringRef Name, unsigned &Size) const { + Size = StringSwitch(Name) + .CasesLower("byte", "db", "sbyte", 8) + .CasesLower("word", "dw", "sword", 16) + .CasesLower("dword", "dd", "sdword", 32) + .CasesLower("fword", "df", 48) + .CasesLower("qword", "dq", "sqword", 64) + .CaseLower("real4", 32) + .CaseLower("real8", 64) + .Default(0); + if (Size) { + return false; + } + + auto TypeIt = Structs.find(Name.lower()); + if (TypeIt != Structs.end()) { + Size = TypeIt->second.Size << 3; + return false; + } + return true; +} + bool MasmParser::parseMSInlineAsm( void *AsmLoc, std::string &AsmString, unsigned &NumOutputs, unsigned &NumInputs, SmallVectorImpl> &OpDecls, diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -32,6 +32,7 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" @@ -150,6 +151,13 @@ IOK_TYPE, }; + enum MasmOperatorKind { + MOK_INVALID = 0, + MOK_LENGTHOF, + MOK_SIZEOF, + MOK_TYPE, + }; + class InfixCalculator { typedef std::pair< InfixCalculatorTok, int64_t > ICToken; SmallVector InfixOperatorStack; @@ -368,6 +376,9 @@ bool OffsetOperator; SMLoc OffsetOperatorLoc; StringRef CurType; + unsigned CurSize = 0; + unsigned CurElementSize = 0; + unsigned CurLength = 1; bool setSymRef(const MCExpr *Val, StringRef ID, StringRef &ErrMsg) { if (Sym) { @@ -396,6 +407,9 @@ const MCExpr *getSym() { return Sym; } StringRef getSymName() { return SymName; } StringRef getType() { return CurType; } + unsigned getSize() { return CurSize; } + unsigned getElementSize() { return CurElementSize; } + unsigned getLength() { return CurLength; } int64_t getImm() { return Imm + IC.execute(); } bool isValidEndState() { return State == IES_RBRAC || State == IES_INTEGER; @@ -752,6 +766,7 @@ case IES_RPAREN: State = IES_PLUS; IC.pushOperator(IC_PLUS); + CurLength = 1; break; case IES_INIT: case IES_CAST: @@ -835,8 +850,8 @@ } } bool onOffset(const MCExpr *Val, SMLoc OffsetLoc, StringRef ID, - const InlineAsmIdentifierInfo &IDInfo, bool ParsingMSInlineAsm, - StringRef &ErrMsg) { + const InlineAsmIdentifierInfo &IDInfo, + bool ParsingMSInlineAsm, StringRef &ErrMsg) { PrevState = State; switch (State) { default: @@ -860,19 +875,25 @@ } return false; } - void onCast(StringRef Type) { + void onCast(StringRef Type, unsigned Size) { PrevState = State; switch (State) { default: State = IES_ERROR; break; case IES_LPAREN: - setType(Type); + setTypeInfo(Type, Size, Size, 1); State = IES_CAST; break; } } - void setType(StringRef Type) { CurType = Type; } + void setTypeInfo(StringRef Type, unsigned Size, unsigned ElementSize, + unsigned Length) { + CurType = Type; + CurSize = Size; + CurElementSize = ElementSize; + CurLength = Length; + } }; bool Error(SMLoc L, const Twine &Msg, SMRange Range = None, @@ -909,6 +930,8 @@ bool ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End); unsigned IdentifyIntelInlineAsmOperator(StringRef Name); unsigned ParseIntelInlineAsmOperator(unsigned OpKind); + unsigned IdentifyMasmOperator(StringRef Name); + bool ParseMasmOperator(unsigned OpKind, int64_t &Val); bool ParseRoundingModeOp(SMLoc Start, OperandVector &Operands); bool ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM, bool &ParseError, SMLoc &End); @@ -1653,6 +1676,13 @@ if (ParseIntelDotOperator(SM, End)) return true; break; + case AsmToken::Dollar: + if (!Parser.isParsingMasm()) { + if ((Done = SM.isValidEndState())) + break; + return Error(Tok.getLoc(), "unknown token in expression"); + } + LLVM_FALLTHROUGH; case AsmToken::At: case AsmToken::String: case AsmToken::Identifier: { @@ -1664,7 +1694,10 @@ const AsmToken &NextTok = getLexer().peekTok(); if (NextTok.is(AsmToken::Identifier) && NextTok.getIdentifier().equals_lower("ptr")) { - SM.onCast(Identifier); + unsigned Size; + if (Parser.lookUpType(Identifier, Size)) + return Error(Tok.getLoc(), "unknown type"); + SM.onCast(Identifier, Size); // Eat type and PTR. consumeToken(); End = consumeToken(); @@ -1689,16 +1722,15 @@ if (SM.onRegister(Reg, ErrMsg)) return Error(IdentLoc, ErrMsg); - StringRef Type; - unsigned Offset = 0; + AsmFieldInfo Info; SMLoc FieldStartLoc = SMLoc::getFromPointer(Field.data()); - if (Parser.lookUpField(Field, Type, Offset)) + if (Parser.lookUpField(Field, Info)) return Error(FieldStartLoc, "unknown offset"); else if (SM.onPlus(ErrMsg)) return Error(getTok().getLoc(), ErrMsg); - else if (SM.onInteger(Offset, ErrMsg)) + else if (SM.onInteger(Info.Offset, ErrMsg)) return Error(IdentLoc, ErrMsg); - SM.setType(Type); + SM.setTypeInfo(Info.Type, Info.Size, Info.ElementSize, Info.Length); End = consumeToken(); break; @@ -1744,11 +1776,26 @@ return Error(IdentLoc, ErrMsg); break; } + if (Parser.isParsingMasm()) { + if (unsigned OpKind = IdentifyMasmOperator(Identifier)) { + int64_t Val; + if (ParseMasmOperator(OpKind, Val)) + return true; + if (SM.onInteger(Val, ErrMsg)) + return Error(IdentLoc, ErrMsg); + break; + } + } if (getParser().parsePrimaryExpr(Val, End)) { return Error(Tok.getLoc(), "Unexpected identifier!"); } else if (SM.onIdentifierExpr(Val, Identifier, Info, false, ErrMsg)) { return Error(IdentLoc, ErrMsg); } + AsmFieldInfo FieldInfo; + if (!getParser().lookUpField(Identifier, FieldInfo)) { + SM.setTypeInfo(FieldInfo.Type, FieldInfo.Size, FieldInfo.ElementSize, + FieldInfo.Length); + } break; } case AsmToken::Integer: { @@ -1957,8 +2004,7 @@ bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End) { const AsmToken &Tok = getTok(); - StringRef Type; - unsigned Offset = 0; + AsmFieldInfo Info; // Drop the optional '.'. StringRef DotDispStr = Tok.getString(); @@ -1969,27 +2015,28 @@ if (Tok.is(AsmToken::Real)) { APInt DotDisp; DotDispStr.getAsInteger(10, DotDisp); - Offset = DotDisp.getZExtValue(); + Info.Offset = DotDisp.getZExtValue(); } else if ((isParsingMSInlineAsm() || getParser().isParsingMasm()) && Tok.is(AsmToken::Identifier)) { const std::pair BaseMember = DotDispStr.split('.'); const StringRef Base = BaseMember.first, Member = BaseMember.second; - if (getParser().lookUpField(SM.getType(), DotDispStr, Type, Offset) && - getParser().lookUpField(SM.getSymName(), DotDispStr, Type, Offset) && - getParser().lookUpField(DotDispStr, Type, Offset) && + if (getParser().lookUpField(SM.getType(), DotDispStr, Info) && + getParser().lookUpField(SM.getSymName(), DotDispStr, Info) && + getParser().lookUpField(DotDispStr, Info) && (!SemaCallback || - SemaCallback->LookupInlineAsmField(Base, Member, Offset))) + SemaCallback->LookupInlineAsmField(Base, Member, Info.Offset))) return Error(Tok.getLoc(), "Unable to lookup field reference!"); - } else + } else { return Error(Tok.getLoc(), "Unexpected token type!"); + } // Eat the DotExpression and update End End = SMLoc::getFromPointer(DotDispStr.data()); const char *DotExprEndLoc = DotDispStr.data() + DotDispStr.size(); while (Tok.getLoc().getPointer() < DotExprEndLoc) Lex(); - SM.addImm(Offset); - SM.setType(Type); + SM.addImm(Info.Offset); + SM.setTypeInfo(Info.Type, Info.Size, Info.ElementSize, Info.Length); return false; } @@ -2059,6 +2106,66 @@ return CVal; } +// Query a candidate string for being an Intel assembly operator +// Report back its kind, or IOK_INVALID if does not evaluated as a known one +unsigned X86AsmParser::IdentifyMasmOperator(StringRef Name) { + return StringSwitch(Name.lower()) + .Case("type", MOK_TYPE) + .Cases("size", "sizeof", MOK_SIZEOF) + .Cases("length", "lengthof", MOK_LENGTHOF) + .Default(MOK_INVALID); +} + +/// Parse the 'LENGTHOF', 'SIZEOF', and 'TYPE' operators. The LENGTHOF operator +/// returns the number of elements in an array. It returns the value 1 for +/// non-array variables. The SIZEOF operator returns the size of a type or +/// variable in bytes. A variable's size is the product of its LENGTH and TYPE. +/// The TYPE operator returns the size of a variable. If the variable is an +/// array, TYPE returns the size of a single element. +bool X86AsmParser::ParseMasmOperator(unsigned OpKind, int64_t &Val) { + MCAsmParser &Parser = getParser(); + Parser.Lex(); // Eat operator. + + bool InParens = parseOptionalToken(AsmToken::LParen); + + Val = 0; + if (OpKind == MOK_SIZEOF) { + // Check for SIZEOF() + const AsmToken &NextTok = Parser.getTok(); + unsigned Size = 0; + if (NextTok.is(AsmToken::Identifier) && + !Parser.lookUpType(NextTok.getIdentifier(), Size)) { + Val = Size >> 3; + Parser.Lex(); // Eat type. + } + } + + if (!Val) { + IntelExprStateMachine SM; + SMLoc End; + if (ParseIntelExpression(SM, End)) + return true; + + switch (OpKind) { + default: + llvm_unreachable("Unexpected operand kind!"); + case MOK_SIZEOF: + Val = SM.getSize() >> 3; + break; + case MOK_LENGTHOF: + Val = SM.getLength(); + break; + case MOK_TYPE: + Val = SM.getElementSize() >> 3; + break; + } + } + + if (InParens) + parseToken(AsmToken::RParen); + return false; +} + bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) { Size = StringSwitch(getTok().getString()) .Cases("BYTE", "byte", 8) @@ -2161,6 +2268,8 @@ unsigned BaseReg = SM.getBaseReg(); unsigned IndexReg = SM.getIndexReg(); unsigned Scale = SM.getScale(); + if (!PtrInOperand) + Size = SM.getElementSize(); if (Scale == 0 && BaseReg != X86::ESP && BaseReg != X86::RSP && (IndexReg == X86::ESP || IndexReg == X86::RSP)) diff --git a/llvm/test/tools/llvm-ml/size_inference.test b/llvm/test/tools/llvm-ml/size_inference.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-ml/size_inference.test @@ -0,0 +1,27 @@ +; RUN: not llvm-ml -filetype=asm %s 2>&1 | FileCheck %s --dump-input=always + +.data + +FOO STRUCT + dword_field DWORD 3 + byte_field BYTE 4 DUP (1) +FOO ENDS + +var FOO <> + +.code + +t1 PROC + +mov eax, var.byte_field +; CHECK: error: invalid operand for instruction + +mov eax, [var].byte_field +; CHECK: error: invalid operand for instruction + +mov eax, [var.byte_field] +; CHECK: error: invalid operand for instruction + +t1 ENDP + +END diff --git a/llvm/test/tools/llvm-ml/sizeof.test b/llvm/test/tools/llvm-ml/sizeof.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-ml/sizeof.test @@ -0,0 +1,19 @@ +# RUN: llvm-ml -filetype=asm %s | FileCheck %s + +.code + +FOO STRUCT + x BYTE 5 DUP (?) +FOO ENDS + +t1: +mov eax, sizeof(dword) +; CHECK: t1: +; CHECK-NEXT: mov eax, 4 + +t2: +mov eax, sizeof(FOO) +; CHECK: t2: +; CHECK-NEXT: mov eax, 5 + +END