diff --git a/llvm/include/llvm/MC/MCParser/MCAsmParser.h b/llvm/include/llvm/MC/MCParser/MCAsmParser.h --- a/llvm/include/llvm/MC/MCParser/MCAsmParser.h +++ b/llvm/include/llvm/MC/MCParser/MCAsmParser.h @@ -170,6 +170,11 @@ virtual bool isParsingMasm() const { return false; } + virtual bool LookUpFieldOffset(StringRef Base, StringRef Member, + unsigned &Offset) { + return true; + } + /// Parse MS-style inline assembly. virtual bool parseMSInlineAsm( void *AsmLoc, std::string &AsmString, unsigned &NumOutputs, diff --git a/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h b/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h --- a/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h +++ b/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h @@ -334,7 +334,7 @@ /// SemaCallback - The Sema callback implementation. Must be set when parsing /// ms-style inline assembly. - MCAsmParserSemaCallback *SemaCallback; + MCAsmParserSemaCallback *SemaCallback = nullptr; /// Set of options which affects instrumentation of inline assembly. MCTargetOptions MCOptions; diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -113,6 +113,238 @@ : AsmRewrites(rewrites) {} }; +enum FieldType { + FT_INTEGRAL, // Initializer: integer expression (stored as an MCExpr) + FT_REAL, // Initializer: real number (stored as an APInt) + FT_STRUCT // Initializer: struct initializer (stored recursively) +}; + +struct FieldInfo; +struct StructInfo { + StringRef Name; + bool IsUnion = false; + size_t Alignment = 0; + size_t Size = 0; + std::vector Fields; + StringMap FieldsByName; + + FieldInfo &addField(StringRef FieldName, FieldType FT); + + StructInfo() = default; + + StructInfo(StringRef StructName, bool Union, unsigned AlignmentValue) + : Name(StructName), IsUnion(Union), Alignment(AlignmentValue) {} +}; + +// FIXME: This should probably use a class hierarchy, raw pointers between the +// objects, and dynamic type resolution instead of a union. On the other hand, +// ownership then becomes much more complicated; the obvious thing would be to +// use BumpPtrAllocator, but the lack of a destructor makes that messy. + +struct StructInitializer; +struct IntFieldInfo { + SmallVector Values; + + IntFieldInfo() = default; + IntFieldInfo(const SmallVector &V) { Values = V; } + IntFieldInfo(SmallVector &&V) { Values = V; } +}; +struct RealFieldInfo { + SmallVector AsIntValues; + + RealFieldInfo() = default; + RealFieldInfo(const SmallVector &V) { AsIntValues = V; } + RealFieldInfo(SmallVector &&V) { AsIntValues = V; } +}; +struct StructFieldInfo { + std::vector Initializers; + StructInfo Structure; + + StructFieldInfo() = default; + StructFieldInfo(const std::vector &V, StructInfo S) { + Initializers = V; + Structure = S; + } + StructFieldInfo(std::vector &&V, StructInfo S) { + Initializers = V; + Structure = S; + } +}; + +class FieldInitializer { +public: + FieldType FT; + union { + IntFieldInfo IntInfo; + RealFieldInfo RealInfo; + StructFieldInfo StructInfo; + }; + + ~FieldInitializer() { + switch (FT) { + case FT_INTEGRAL: + IntInfo.~IntFieldInfo(); + break; + case FT_REAL: + RealInfo.~RealFieldInfo(); + break; + case FT_STRUCT: + StructInfo.~StructFieldInfo(); + break; + } + } + + FieldInitializer(FieldType FT) : FT(FT) { + switch (FT) { + case FT_INTEGRAL: + new (&IntInfo) IntFieldInfo(); + break; + case FT_REAL: + new (&RealInfo) RealFieldInfo(); + break; + case FT_STRUCT: + new (&StructInfo) StructFieldInfo(); + break; + } + } + + FieldInitializer(SmallVector &&Values) : FT(FT_INTEGRAL) { + new (&IntInfo) IntFieldInfo(Values); + } + + FieldInitializer(SmallVector &&AsIntValues) : FT(FT_REAL) { + new (&RealInfo) RealFieldInfo(AsIntValues); + } + + FieldInitializer(std::vector &&Initializers, + struct StructInfo Structure) + : FT(FT_STRUCT) { + new (&StructInfo) StructFieldInfo(Initializers, Structure); + } + + FieldInitializer(const FieldInitializer &Initializer) : FT(Initializer.FT) { + switch (FT) { + case FT_INTEGRAL: + new (&IntInfo) IntFieldInfo(Initializer.IntInfo); + break; + case FT_REAL: + new (&RealInfo) RealFieldInfo(Initializer.RealInfo); + break; + case FT_STRUCT: + new (&StructInfo) StructFieldInfo(Initializer.StructInfo); + break; + } + } + + FieldInitializer(FieldInitializer &&Initializer) : FT(Initializer.FT) { + switch (FT) { + case FT_INTEGRAL: + new (&IntInfo) IntFieldInfo(Initializer.IntInfo); + break; + case FT_REAL: + new (&RealInfo) RealFieldInfo(Initializer.RealInfo); + break; + case FT_STRUCT: + new (&StructInfo) StructFieldInfo(Initializer.StructInfo); + break; + } + } + + FieldInitializer &operator=(const FieldInitializer &Initializer) { + if (FT != Initializer.FT) { + switch (FT) { + case FT_INTEGRAL: + IntInfo.~IntFieldInfo(); + break; + case FT_REAL: + RealInfo.~RealFieldInfo(); + break; + case FT_STRUCT: + StructInfo.~StructFieldInfo(); + break; + } + } + FT = Initializer.FT; + switch (FT) { + case FT_INTEGRAL: + IntInfo = Initializer.IntInfo; + break; + case FT_REAL: + RealInfo = Initializer.RealInfo; + break; + case FT_STRUCT: + StructInfo = Initializer.StructInfo; + break; + } + return *this; + } + + FieldInitializer &operator=(FieldInitializer &&Initializer) { + if (FT != Initializer.FT) { + switch (FT) { + case FT_INTEGRAL: + IntInfo.~IntFieldInfo(); + break; + case FT_REAL: + RealInfo.~RealFieldInfo(); + break; + case FT_STRUCT: + StructInfo.~StructFieldInfo(); + break; + } + } + FT = Initializer.FT; + switch (FT) { + case FT_INTEGRAL: + IntInfo = Initializer.IntInfo; + break; + case FT_REAL: + RealInfo = Initializer.RealInfo; + break; + case FT_STRUCT: + StructInfo = Initializer.StructInfo; + break; + } + return *this; + } +}; + +struct StructInitializer { + std::vector FieldInitializers; +}; + +struct FieldInfo { + // Offset of the field within the containing STRUCT + size_t Offset = 0; + + // Total size of the field (= LengthOf * Type) + size_t SizeOf = 0; + + // Number of elements in the field (1 if scalar, >1 if an array) + size_t LengthOf = 0; + + // Size of a single entry in this field, in bytes ("type" in MASM standards) + size_t Type = 0; + + FieldInitializer Contents; + + FieldInfo(FieldType FT) : Contents(FT) {} +}; + +FieldInfo &StructInfo::addField(StringRef FieldName, FieldType FT) { + if (!FieldName.empty()) + FieldsByName[FieldName] = Fields.size(); + Fields.emplace_back(FT); + FieldInfo &Field = Fields.back(); + if (IsUnion) { + Field.Offset = 0; + } else { + Size = llvm::alignTo(Size, Alignment); + Field.Offset = Size; + } + return Field; +} + /// The concrete assembly parser instance. // Note that this is a full MCAsmParser, not an MCAsmParserExtension! // It's a peer of AsmParser, not of COFFAsmParser, WasmAsmParser, etc. @@ -149,6 +381,15 @@ }; StringMap Variables; + /// Stack of active struct definitions. + SmallVector StructInProgress; + + /// Maps struct tags to struct definitions. + StringMap Structs; + + /// Maps data location names to user-defined types. + StringMap KnownType; + /// Stack of active macro instantiations. std::vector ActiveMacros; @@ -190,6 +431,9 @@ // Is alt macro mode enabled. bool AltMacroMode = false; + // Current <...> expression depth + unsigned AngleBracketDepth = 0U; + public: MasmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out, const MCAsmInfo &MAI, unsigned CB); @@ -247,6 +491,9 @@ bool isParsingMasm() const override { return true; } + bool LookUpFieldOffset(StringRef Base, StringRef Member, + unsigned &Offset) override; + bool parseMSInlineAsm(void *AsmLoc, std::string &AsmString, unsigned &NumOutputs, unsigned &NumInputs, SmallVectorImpl> &OpDecls, @@ -315,6 +562,9 @@ } static void DiagHandler(const SMDiagnostic &Diag, void *Context); + bool LookUpFieldOffset(const StructInfo &Structure, StringRef Member, + unsigned &Offset); + /// Should we emit DWARF describing this assembler source? (Returns false if /// the source has .file directives, which means we don't want to generate /// info describing the assembler source itself.) @@ -464,6 +714,9 @@ DK_ERRE, DK_ERRNZ, DK_ECHO, + DK_STRUCT, + DK_UNION, + DK_ENDS, DK_END }; @@ -490,20 +743,83 @@ bool parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated); // "byte", "word", ... - bool parseScalarInstList(unsigned Size, - SmallVectorImpl &Values); + bool emitIntValue(const MCExpr *Value, unsigned Size); + bool parseScalarInitializer(unsigned Size, + SmallVectorImpl &Values, + unsigned StringPadLength = 0); + bool parseScalarInstList( + unsigned Size, SmallVectorImpl &Values, + const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement); + bool emitIntegralValues(unsigned Size); + bool addIntegralField(StringRef Name, unsigned Size); bool parseDirectiveValue(StringRef IDVal, unsigned Size); bool parseDirectiveNamedValue(StringRef IDVal, unsigned Size, StringRef Name, SMLoc NameLoc); // "real4", "real8" + bool emitRealValues(const fltSemantics &Semantics); + bool addRealField(StringRef Name, const fltSemantics &Semantics); bool parseDirectiveRealValue(StringRef IDVal, const fltSemantics &Semantics); - bool parseRealInstList(const fltSemantics &Semantics, - SmallVectorImpl &Values); + bool parseRealInstList( + const fltSemantics &Semantics, SmallVectorImpl &Values, + const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement); bool parseDirectiveNamedRealValue(StringRef IDVal, const fltSemantics &Semantics, StringRef Name, SMLoc NameLoc); + bool parseOptionalAngleBracketOpen(); + bool parseAngleBracketClose(); + + bool parseFieldInitializer(const FieldInfo &Field, + FieldInitializer &Initializer); + bool parseFieldInitializer(const FieldInfo &Field, + const IntFieldInfo &Contents, + FieldInitializer &Initializer); + bool parseFieldInitializer(const FieldInfo &Field, + const RealFieldInfo &Contents, + FieldInitializer &Initializer); + bool parseFieldInitializer(const FieldInfo &Field, + const StructFieldInfo &Contents, + FieldInitializer &Initializer); + + bool parseStructInitializer(const StructInfo &Structure, + StructInitializer &Initializer); + bool parseStructInstList( + const StructInfo &Structure, std::vector &Initializers, + const AsmToken::TokenKind EndToken = AsmToken::EndOfStatement); + + bool emitFieldValue(const FieldInfo &Field); + bool emitFieldValue(const FieldInfo &Field, const IntFieldInfo &Contents); + bool emitFieldValue(const FieldInfo &Field, const RealFieldInfo &Contents); + bool emitFieldValue(const FieldInfo &Field, const StructFieldInfo &Contents); + + bool emitStructValue(const StructInfo &Structure); + + bool emitFieldInitializer(const FieldInfo &Field, + const FieldInitializer &Initializer); + bool emitFieldInitializer(const FieldInfo &Field, + const IntFieldInfo &Contents, + const IntFieldInfo &Initializer); + bool emitFieldInitializer(const FieldInfo &Field, + const RealFieldInfo &Contents, + const RealFieldInfo &Initializer); + bool emitFieldInitializer(const FieldInfo &Field, + const StructFieldInfo &Contents, + const StructFieldInfo &Initializer); + + bool emitStructInitializer(const StructInfo &Structure, + const StructInitializer &Initializer); + + // user-defined types (structs, unions) + bool emitStructValue(const StructInfo &Structure, + const StructInitializer &Initializer, + size_t InitialOffset = 0, size_t InitialField = 0); + bool parseDirectiveStructValue(const StructInfo &Structure, + StringRef Directive, SMLoc DirLoc); + bool parseDirectiveNamedStructValue(const StructInfo &Structure, + StringRef Directive, SMLoc DirLoc, + StringRef Name); + // "=", "equ", "textequ" bool parseDirectiveEquate(StringRef IDVal, StringRef Name, DirectiveKind DirKind); @@ -562,6 +878,12 @@ // alternate macro mode directives bool parseDirectiveAltmacro(StringRef Directive); + bool parseDirectiveStruct(StringRef Directive, DirectiveKind DirKind, + StringRef Name, SMLoc NameLoc); + bool parseDirectiveNestedStruct(StringRef Directive, DirectiveKind DirKind); + bool parseDirectiveEnds(StringRef Name, SMLoc NameLoc); + bool parseDirectiveNestedEnds(); + /// Parse a directive like ".globl" which /// accepts a single symbol (which should be a label or an external). bool parseDirectiveSymbolAttribute(MCSymbolAttr Attr); @@ -1060,7 +1382,7 @@ MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; - // Lookup the symbol variant if used. + // Look up the symbol variant if used. if (!Split.second.empty()) { Variant = MCSymbolRefExpr::getVariantKindForName(Split.second); if (Variant != MCSymbolRefExpr::VK_Invalid) { @@ -1073,6 +1395,27 @@ } } + // Find the field offset if used. + unsigned Offset = 0; + Split = SymbolName.split('.'); + if (!Split.second.empty()) { + SymbolName = Split.first; + if (Structs.count(SymbolName.lower()) && + !LookUpFieldOffset(SymbolName, Split.second, Offset)) { + // This is actually a reference to a field offset. + Res = MCConstantExpr::create(Offset, getContext()); + return false; + } + + auto TypeIt = KnownType.find(SymbolName); + if (TypeIt == KnownType.end() || + LookUpFieldOffset(*TypeIt->second, Split.second, Offset)) { + std::pair BaseMember = Split.second.split('.'); + StringRef Base = BaseMember.first, Member = BaseMember.second; + LookUpFieldOffset(Base, Member, Offset); + } + } + MCSymbol *Sym = getContext().getInlineAsmLabel(SymbolName); if (!Sym) Sym = getContext().getOrCreateSymbol(SymbolName); @@ -1093,7 +1436,15 @@ } // Otherwise create a symbol ref. - Res = MCSymbolRefExpr::create(Sym, Variant, getContext(), FirstTokenLoc); + const MCExpr *SymRef = + MCSymbolRefExpr::create(Sym, Variant, getContext(), FirstTokenLoc); + if (Offset) { + Res = MCBinaryExpr::create(MCBinaryExpr::Add, SymRef, + MCConstantExpr::create(Offset, getContext()), + getContext()); + } else { + Res = SymRef; + } return false; } case AsmToken::BigNum: @@ -1107,7 +1458,7 @@ // Look for 'b' or 'f' following an Integer as a directional label if (Lexer.getKind() == AsmToken::Identifier) { StringRef IDVal = getTok().getString(); - // Lookup the symbol variant if used. + // Look up the symbol variant if used. std::pair Split = IDVal.split('@'); MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; if (Split.first.size() != IDVal.size()) { @@ -1324,7 +1675,8 @@ static unsigned getGNUBinOpPrecedence(AsmToken::TokenKind K, MCBinaryExpr::Opcode &Kind, - bool ShouldUseLogicalShr) { + bool ShouldUseLogicalShr, + bool EndExpressionAtGreater) { switch (K) { default: return 0; // not a binop. @@ -1352,6 +1704,8 @@ Kind = MCBinaryExpr::LTE; return 3; case AsmToken::Greater: + if (EndExpressionAtGreater) + return 0; Kind = MCBinaryExpr::GT; return 3; case AsmToken::GreaterEqual: @@ -1393,6 +1747,8 @@ Kind = MCBinaryExpr::Shl; return 6; case AsmToken::GreaterGreater: + if (EndExpressionAtGreater) + return 0; Kind = ShouldUseLogicalShr ? MCBinaryExpr::LShr : MCBinaryExpr::AShr; return 6; } @@ -1401,7 +1757,8 @@ unsigned MasmParser::getBinOpPrecedence(AsmToken::TokenKind K, MCBinaryExpr::Opcode &Kind) { bool ShouldUseLogicalShr = MAI.shouldUseLogicalShr(); - return getGNUBinOpPrecedence(K, Kind, ShouldUseLogicalShr); + return getGNUBinOpPrecedence(K, Kind, ShouldUseLogicalShr, + AngleBracketDepth > 0); } /// Parse all binary operators with precedence >= 'Precedence'. @@ -1678,6 +2035,13 @@ getTargetParser().flushPendingInstructions(getStreamer()); + // Special-case handling of structure-end directives at higher priority, + // since ENDS is overloaded as a segment-end directive. + if (IDVal.equals_lower("ends") && StructInProgress.size() > 1 && + getTok().is(AsmToken::EndOfStatement)) { + return parseDirectiveNestedEnds(); + } + // First, check the extension directive map to see if any extension has // registered itself to parse this directive. std::pair Handler = @@ -1735,6 +2099,11 @@ return parseDirectiveRealValue(IDVal, APFloat::IEEEsingle()); case DK_REAL8: return parseDirectiveRealValue(IDVal, APFloat::IEEEdouble()); + case DK_STRUCT: + case DK_UNION: + return parseDirectiveNestedStruct(IDVal, DirKind); + case DK_ENDS: + return parseDirectiveNestedEnds(); case DK_ALIGN: return parseDirectiveAlign(); case DK_ORG: @@ -1878,6 +2247,12 @@ return Error(IDLoc, "unknown directive"); } + // We also check if this is allocating memory with user-defined type. + auto IDIt = Structs.find(IDVal.lower()); + if (IDIt != Structs.end()) + return parseDirectiveStructValue(/*Structure=*/IDIt->getValue(), IDVal, + IDLoc); + // Non-conditional Microsoft directives sometimes follow their first argument. const AsmToken nextTok = getTok(); const StringRef nextVal = nextTok.getString(); @@ -1894,6 +2269,14 @@ getTargetParser().flushPendingInstructions(getStreamer()); + // Special-case handling of structure-end directives at higher priority, since + // ENDS is overloaded as a segment-end directive. + if (nextVal.equals_lower("ends") && StructInProgress.size() == 1 && + IDVal.equals_lower(StructInProgress.back().Name)) { + Lex(); + return parseDirectiveEnds(IDVal, IDLoc); + } + // First, check the extension directive map to see if any extension has // registered itself to parse this directive. std::pair Handler = @@ -1904,7 +2287,7 @@ return (*Handler.second)(Handler.first, nextVal, nextLoc); } - // Finally, if no one else is interested in this directive, it must be + // If no one else is interested in this directive, it must be // generic and familiar to this class. DirKindIt = DirectiveKindMap.find(nextVal.lower()); DirKind = (DirKindIt == DirectiveKindMap.end()) @@ -1945,6 +2328,21 @@ Lex(); return parseDirectiveNamedRealValue(nextVal, APFloat::IEEEdouble(), IDVal, IDLoc); + case DK_STRUCT: + case DK_UNION: + Lex(); + return parseDirectiveStruct(nextVal, DirKind, IDVal, IDLoc); + case DK_ENDS: + Lex(); + return parseDirectiveEnds(IDVal, IDLoc); + } + + // Finally, we check if this is allocating a variable with user-defined type. + auto NextIt = Structs.find(nextVal.lower()); + if (NextIt != Structs.end()) { + Lex(); + return parseDirectiveNamedStructValue(/*Structure=*/NextIt->getValue(), + nextVal, nextLoc, IDVal); } // __asm _emit or __asm __emit @@ -2781,89 +3179,141 @@ return false; } -bool MasmParser::parseScalarInstList(unsigned Size, - SmallVectorImpl &Values) { - do { - if (getTok().is(AsmToken::String)) { - StringRef Value = getTok().getStringContents(); - if (Size == 1) { - // Treat each character as an initializer. - for (const char CharVal : Value) - Values.push_back(MCConstantExpr::create(CharVal, getContext())); - } else { - // Treat the string as an initial value in big-endian representation. - if (Value.size() > Size) - return Error(getTok().getLoc(), "out of range literal value"); - - uint64_t IntValue = 0; - for (const unsigned char CharVal : Value.bytes()) - IntValue = (IntValue << 8) | CharVal; - Values.push_back(MCConstantExpr::create(IntValue, getContext())); - } - Lex(); +bool MasmParser::emitIntValue(const MCExpr *Value, unsigned Size) { + // Special case constant expressions to match code + // generator. + if (const MCConstantExpr *MCE = dyn_cast(Value)) { + assert(Size <= 8 && "Invalid size"); + int64_t IntValue = MCE->getValue(); + if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue)) + return Error(MCE->getLoc(), "out of range literal value"); + getStreamer().emitIntValue(IntValue, Size); + } else { + const MCSymbolRefExpr *MSE = dyn_cast(Value); + if (MSE && MSE->getSymbol().getName() == "?") { + // ? initializer; treat as 0. + getStreamer().emitIntValue(0, Size); } else { - const MCExpr *Value; - if (checkForValidSection() || parseExpression(Value)) + getStreamer().emitValue(Value, Size, Value->getLoc()); + } + } + return false; +} + +bool MasmParser::parseScalarInitializer(unsigned Size, + SmallVectorImpl &Values, + unsigned StringPadLength) { + if (getTok().is(AsmToken::String)) { + StringRef Value = getTok().getStringContents(); + if (Size == 1) { + // Treat each character as an initializer. + for (const char CharVal : Value) + Values.push_back(MCConstantExpr::create(CharVal, getContext())); + + // Pad the string with spaces to the specified length + for (size_t i = Value.size(); i < StringPadLength; ++i) + Values.push_back(MCConstantExpr::create(' ', getContext())); + } else { + // Treat the string as an initial value in big-endian representation. + if (Value.size() > Size) + return Error(getTok().getLoc(), "out of range literal value"); + + uint64_t IntValue = 0; + for (const unsigned char CharVal : Value.bytes()) + IntValue = (IntValue << 8) | CharVal; + Values.push_back(MCConstantExpr::create(IntValue, getContext())); + } + Lex(); + } else { + const MCExpr *Value; + if (checkForValidSection() || parseExpression(Value)) + return true; + if (getTok().is(AsmToken::Identifier) && + getTok().getString().equals_lower("dup")) { + Lex(); // eat 'dup' + const MCConstantExpr *MCE = dyn_cast(Value); + if (!MCE) + return Error(Value->getLoc(), + "cannot repeat value a non-constant number of times"); + const int64_t Repetitions = MCE->getValue(); + if (Repetitions < 0) + return Error(Value->getLoc(), + "cannot repeat value a negative number of times"); + + SmallVector DuplicatedValues; + if (parseToken(AsmToken::LParen, + "parentheses required for 'dup' contents") || + parseScalarInstList(Size, DuplicatedValues) || + parseToken(AsmToken::RParen, "unmatched parentheses")) return true; - if (getTok().is(AsmToken::Identifier) && - getTok().getString().equals_lower("dup")) { - Lex(); // eat 'dup' - const MCConstantExpr *MCE = dyn_cast(Value); - if (!MCE) - return Error(Value->getLoc(), - "cannot repeat value a non-constant number of times"); - const int64_t Repetitions = MCE->getValue(); - if (Repetitions < 0) - return Error(Value->getLoc(), - "cannot repeat value a negative number of times"); - - SmallVector DuplicatedValues; - if (parseToken(AsmToken::LParen, - "parentheses required for 'dup' contents") || - parseScalarInstList(Size, DuplicatedValues) || - parseToken(AsmToken::RParen, "unmatched parentheses")) - return true; - for (int i = 0; i < Repetitions; ++i) - Values.append(DuplicatedValues.begin(), DuplicatedValues.end()); - } else { - Values.push_back(Value); - } + for (int i = 0; i < Repetitions; ++i) + Values.append(DuplicatedValues.begin(), DuplicatedValues.end()); + } else { + Values.push_back(Value); } + } + return false; +} - // Continue if we see a comma. (Also, allow line continuation.) - } while (parseOptionalToken(AsmToken::Comma) && - (getTok().isNot(AsmToken::EndOfStatement) || - !parseToken(AsmToken::EndOfStatement))); +bool MasmParser::parseScalarInstList(unsigned Size, + SmallVectorImpl &Values, + const AsmToken::TokenKind EndToken) { + while (getTok().isNot(EndToken) && + (EndToken != AsmToken::Greater || + getTok().isNot(AsmToken::GreaterGreater))) { + parseScalarInitializer(Size, Values); + + // If we see a comma, continue, and allow line continuation. + if (!parseOptionalToken(AsmToken::Comma)) + break; + parseOptionalToken(AsmToken::EndOfStatement); + } + return false; +} +bool MasmParser::emitIntegralValues(unsigned Size) { + SmallVector Values; + if (checkForValidSection() || parseScalarInstList(Size, Values)) + return true; + + for (auto Value : Values) { + emitIntValue(Value, Size); + } + return false; +} + +// Add a field to the current structure +bool MasmParser::addIntegralField(StringRef Name, unsigned Size) { + StructInfo &Struct = StructInProgress.back(); + FieldInfo &Field = Struct.addField(Name, FT_INTEGRAL); + IntFieldInfo &IntInfo = Field.Contents.IntInfo; + + Field.Type = Size; + + if (parseScalarInstList(Size, IntInfo.Values)) + return true; + + Field.SizeOf = Field.Type * IntInfo.Values.size(); + Field.LengthOf = IntInfo.Values.size(); + if (Struct.IsUnion) + Struct.Size = std::max(Struct.Size, Field.SizeOf); + else + Struct.Size += Field.SizeOf; return false; } /// parseDirectiveValue /// ::= (byte | word | ... ) [ expression (, expression)* ] bool MasmParser::parseDirectiveValue(StringRef IDVal, unsigned Size) { - SmallVector Values; - if (parseScalarInstList(Size, Values)) + if (StructInProgress.empty()) { + // Initialize data value + if (emitIntegralValues(Size)) + return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); + } else if (addIntegralField("", Size)) { return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); - - for (const MCExpr *Value : Values) { - // Special case constant expressions to match code generator. - if (const MCConstantExpr *MCE = dyn_cast(Value)) { - assert(Size <= 8 && "Invalid size"); - int64_t IntValue = MCE->getValue(); - if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue)) - return Error(MCE->getLoc(), "out of range literal value"); - getStreamer().emitIntValue(IntValue, Size); - } else { - const MCSymbolRefExpr *MSE = dyn_cast(Value); - if (MSE && MSE->getSymbol().getName() == "?") { - // ? initializer; treat as 0. - getStreamer().emitIntValue(0, Size); - } else { - getStreamer().emitValue(Value, Size, Value->getLoc()); - } - } } + return false; } @@ -2871,9 +3321,17 @@ /// ::= name (byte | word | ... ) [ expression (, expression)* ] bool MasmParser::parseDirectiveNamedValue(StringRef IDVal, unsigned Size, StringRef Name, SMLoc NameLoc) { - MCSymbol *Sym = getContext().getOrCreateSymbol(Name); - getStreamer().emitLabel(Sym); - return parseDirectiveValue(IDVal, Size); + if (StructInProgress.empty()) { + // Initialize named data value + MCSymbol *Sym = getContext().getOrCreateSymbol(Name); + getStreamer().emitLabel(Sym); + if (emitIntegralValues(Size)) + return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); + } else if (addIntegralField(Name, Size)) { + return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); + } + + return false; } static bool parseHexOcta(MasmParser &Asm, uint64_t &hi, uint64_t &lo) { @@ -2919,6 +3377,8 @@ Value = APFloat::getInf(Semantics); else if (!IDVal.compare_lower("nan")) Value = APFloat::getNaN(Semantics, false, ~0); + else if (!IDVal.compare_lower("?")) + Value = APFloat::getZero(Semantics); else return TokError("invalid floating point literal"); } else if (errorToBool( @@ -2937,8 +3397,11 @@ } bool MasmParser::parseRealInstList(const fltSemantics &Semantics, - SmallVectorImpl &ValuesAsInt) { - do { + SmallVectorImpl &ValuesAsInt, + const AsmToken::TokenKind EndToken) { + while (getTok().isNot(EndToken) || + (EndToken == AsmToken::Greater && + getTok().isNot(AsmToken::GreaterGreater))) { const AsmToken NextTok = Lexer.peekTok(); if (NextTok.is(AsmToken::Identifier) && NextTok.getString().equals_lower("dup")) { @@ -2969,11 +3432,48 @@ return true; ValuesAsInt.push_back(AsInt); } + // Continue if we see a comma. (Also, allow line continuation.) - } while (parseOptionalToken(AsmToken::Comma) && - (getTok().isNot(AsmToken::EndOfStatement) || - !parseToken(AsmToken::EndOfStatement))); + if (!parseOptionalToken(AsmToken::Comma)) + break; + parseOptionalToken(AsmToken::EndOfStatement); + } + + return false; +} + +// Initialize real data values +bool MasmParser::emitRealValues(const fltSemantics &Semantics) { + SmallVector ValuesAsInt; + if (parseRealInstList(Semantics, ValuesAsInt)) + return true; + + for (const APInt &AsInt : ValuesAsInt) { + getStreamer().emitIntValue(AsInt.getLimitedValue(), + AsInt.getBitWidth() / 8); + } + return false; +} + +// Add a real field to the current struct +bool MasmParser::addRealField(StringRef Name, const fltSemantics &Semantics) { + StructInfo &Struct = StructInProgress.back(); + FieldInfo &Field = Struct.addField(Name, FT_REAL); + RealFieldInfo &RealInfo = Field.Contents.RealInfo; + + Field.SizeOf = 0; + + if (checkForValidSection() || + parseRealInstList(Semantics, RealInfo.AsIntValues)) + return true; + Field.Type = RealInfo.AsIntValues.back().getBitWidth() / 8; + Field.LengthOf = RealInfo.AsIntValues.size(); + Field.SizeOf = Field.Type * Field.LengthOf; + if (Struct.IsUnion) + Struct.Size = std::max(Struct.Size, Field.SizeOf); + else + Struct.Size += Field.SizeOf; return false; } @@ -2984,13 +3484,12 @@ if (checkForValidSection()) return true; - SmallVector ValuesAsInt; - if (parseRealInstList(Semantics, ValuesAsInt)) + if (StructInProgress.empty()) { + // Initialize data value + if (emitRealValues(Semantics)) + return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); + } else if (addRealField("", Semantics)) { return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); - - for (const APInt &AsInt : ValuesAsInt) { - getStreamer().emitIntValue(AsInt.getLimitedValue(), - AsInt.getBitWidth() / 8); } return false; } @@ -3000,9 +3499,621 @@ bool MasmParser::parseDirectiveNamedRealValue(StringRef IDVal, const fltSemantics &Semantics, StringRef Name, SMLoc NameLoc) { - MCSymbol *Sym = getContext().getOrCreateSymbol(Name); - getStreamer().emitLabel(Sym); - return parseDirectiveRealValue(IDVal, Semantics); + if (checkForValidSection()) + return true; + + if (StructInProgress.empty()) { + // Initialize named data value + MCSymbol *Sym = getContext().getOrCreateSymbol(Name); + getStreamer().emitLabel(Sym); + if (emitRealValues(Semantics)) + return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); + } else if (addRealField(Name, Semantics)) { + return addErrorSuffix(" in '" + Twine(IDVal) + "' directive"); + } + return false; +} + +bool MasmParser::parseOptionalAngleBracketOpen() { + const AsmToken Tok = getTok(); + if (parseOptionalToken(AsmToken::LessLess)) { + AngleBracketDepth++; + Lexer.UnLex(AsmToken(AsmToken::Less, Tok.getString().substr(1))); + return true; + } else if (parseOptionalToken(AsmToken::LessGreater)) { + AngleBracketDepth++; + Lexer.UnLex(AsmToken(AsmToken::Greater, Tok.getString().substr(1))); + return true; + } else if (parseOptionalToken(AsmToken::Less)) { + AngleBracketDepth++; + return true; + } + + return false; +} + +bool MasmParser::parseAngleBracketClose() { + const AsmToken Tok = getTok(); + if (parseOptionalToken(AsmToken::GreaterGreater)) { + Lexer.UnLex(AsmToken(AsmToken::Greater, Tok.getString().substr(1))); + } else if (parseToken(AsmToken::Greater)) { + return true; + } + AngleBracketDepth--; + return false; +} + +bool MasmParser::parseFieldInitializer(const FieldInfo &Field, + const IntFieldInfo &Contents, + FieldInitializer &Initializer) { + SMLoc Loc = getTok().getLoc(); + + SmallVector Values; + if (parseOptionalToken(AsmToken::LCurly)) { + if (Field.LengthOf == 1 && Field.Type > 1) + return Error(Loc, "Cannot initialize scalar field with array value"); + if (parseScalarInstList(Field.Type, Values, AsmToken::RCurly) || + parseToken(AsmToken::RCurly)) + return true; + } else if (parseOptionalAngleBracketOpen()) { + if (Field.LengthOf == 1 && Field.Type > 1) + return Error(Loc, "Cannot initialize scalar field with array value"); + if (parseScalarInstList(Field.Type, Values, AsmToken::Greater) || + parseAngleBracketClose()) + return true; + } else if (Field.LengthOf > 1 && Field.Type > 1) { + return Error(Loc, "Cannot initialize array field with scalar value"); + } else if (parseScalarInitializer(Field.Type, Values, + /*StringPadLength=*/Field.LengthOf)) { + return true; + } + + if (Values.size() > Field.LengthOf) + return Error(Loc, "Initializer too long for field"); + // Default-initialize all remaining values + Values.append(Contents.Values.begin() + Values.size(), Contents.Values.end()); + + Initializer = FieldInitializer(std::move(Values)); + return false; +} + +bool MasmParser::parseFieldInitializer(const FieldInfo &Field, + const RealFieldInfo &Contents, + FieldInitializer &Initializer) { + const fltSemantics &Semantics = + (Field.Type == 4) ? APFloat::IEEEsingle() : APFloat::IEEEdouble(); + + SMLoc Loc = getTok().getLoc(); + + SmallVector AsIntValues; + if (parseOptionalToken(AsmToken::LCurly)) { + if (Field.LengthOf == 1) + return Error(Loc, "Cannot initialize scalar field with array value"); + if (parseRealInstList(Semantics, AsIntValues, AsmToken::RCurly) || + parseToken(AsmToken::RCurly)) + return true; + } else if (parseOptionalAngleBracketOpen()) { + if (Field.LengthOf == 1) + return Error(Loc, "Cannot initialize scalar field with array value"); + if (parseRealInstList(Semantics, AsIntValues, AsmToken::Greater) || + parseAngleBracketClose()) + return true; + } else if (Field.LengthOf > 1) { + return Error(Loc, "Cannot initialize array field with scalar value"); + } else { + AsIntValues.emplace_back(); + if (parseRealValue(Semantics, AsIntValues.back())) + return true; + } + // Default-initialize all remaining values + AsIntValues.append(Contents.AsIntValues.begin() + AsIntValues.size(), + Contents.AsIntValues.end()); + + Initializer = FieldInitializer(std::move(AsIntValues)); + return false; +} + +bool MasmParser::parseFieldInitializer(const FieldInfo &Field, + const StructFieldInfo &Contents, + FieldInitializer &Initializer) { + SMLoc Loc = getTok().getLoc(); + + std::vector Initializers; + if (Field.LengthOf > 1) { + if (parseOptionalToken(AsmToken::LCurly)) { + if (parseStructInstList(Contents.Structure, Initializers, + AsmToken::RCurly) || + parseToken(AsmToken::RCurly)) + return true; + } else if (parseOptionalAngleBracketOpen()) { + if (parseStructInstList(Contents.Structure, Initializers, + AsmToken::Greater) || + parseAngleBracketClose()) + return true; + } else { + return Error(Loc, "Cannot initialize array field with scalar value"); + } + } else { + Initializers.emplace_back(); + if (parseStructInitializer(Contents.Structure, Initializers.back())) + return true; + } + + // Default-initialize all remaining values + Initializers.insert(Initializers.end(), + Contents.Initializers.begin() + Initializers.size(), + Contents.Initializers.end()); + + Initializer = FieldInitializer(std::move(Initializers), Contents.Structure); + return false; +} + +bool MasmParser::parseFieldInitializer(const FieldInfo &Field, + FieldInitializer &Initializer) { + switch (Field.Contents.FT) { + case FT_INTEGRAL: + return parseFieldInitializer(Field, Field.Contents.IntInfo, Initializer); + case FT_REAL: + return parseFieldInitializer(Field, Field.Contents.RealInfo, Initializer); + case FT_STRUCT: + return parseFieldInitializer(Field, Field.Contents.StructInfo, Initializer); + } +} + +bool MasmParser::parseStructInitializer(const StructInfo &Structure, + StructInitializer &Initializer) { + const AsmToken FirstToken = getTok(); + + Optional EndToken; + if (parseOptionalToken(AsmToken::LCurly)) { + EndToken = AsmToken::RCurly; + } else if (parseOptionalToken(AsmToken::Less)) { + EndToken = AsmToken::Greater; + AngleBracketDepth++; + } else if (parseOptionalToken(AsmToken::LessLess)) { + // Split << into two tokens, and parse the first. + EndToken = AsmToken::Greater; + AngleBracketDepth++; + Lexer.UnLex(AsmToken(AsmToken::Less, FirstToken.getString().substr(1))); + } else { + if (FirstToken.is(AsmToken::Identifier) && FirstToken.getString() == "?") { + // ? initializer; treat as empty + parseToken(AsmToken::Identifier); + } else { + // Empty angle-bracket initializer + if (parseToken(AsmToken::LessGreater)) + return true; + } + } + + auto &FieldInitializers = Initializer.FieldInitializers; + size_t FieldIndex = 0; + if (EndToken.hasValue()) { + // Initialize all fields with given initializers + while (getTok().isNot(EndToken.getValue())) { + const FieldInfo &Field = Structure.Fields[FieldIndex++]; + if (parseOptionalToken(AsmToken::Comma)) { + FieldInitializers.push_back(Field.Contents); + continue; + } else { + FieldInitializers.emplace_back(Field.Contents.FT); + if (parseFieldInitializer(Field, FieldInitializers.back())) + return true; + } + + if (!parseOptionalToken(AsmToken::Comma)) + break; + parseOptionalToken(AsmToken::EndOfStatement); + } + } + // Default-initialize all remaining fields + for (auto It = Structure.Fields.begin() + FieldIndex; + It != Structure.Fields.end(); ++It) { + const FieldInfo &Field = *It; + FieldInitializers.push_back(Field.Contents); + } + + if (EndToken.hasValue()) { + if (EndToken.getValue() == AsmToken::Greater && + getTok().is(AsmToken::GreaterGreater)) { + // Split >> into two tokens, and parse the first + AsmToken GGTok = getTok(); + if (parseToken(AsmToken::GreaterGreater)) + return true; + Lexer.UnLex(AsmToken(AsmToken::Greater, GGTok.getString().substr(1))); + } else if (parseToken(EndToken.getValue())) { + return true; + } + + if (EndToken.getValue() == AsmToken::Greater) + AngleBracketDepth--; + } + + return false; +} + +bool MasmParser::parseStructInstList( + const StructInfo &Structure, std::vector &Initializers, + const AsmToken::TokenKind EndToken) { + while (getTok().isNot(EndToken) || + (EndToken == AsmToken::Greater && + getTok().isNot(AsmToken::GreaterGreater))) { + const AsmToken NextTok = Lexer.peekTok(); + if (NextTok.is(AsmToken::Identifier) && + NextTok.getString().equals_lower("dup")) { + const MCExpr *Value; + if (parseExpression(Value) || parseToken(AsmToken::Identifier)) + return true; + const MCConstantExpr *MCE = dyn_cast(Value); + if (!MCE) + return Error(Value->getLoc(), + "cannot repeat value a non-constant number of times"); + const int64_t Repetitions = MCE->getValue(); + if (Repetitions < 0) + return Error(Value->getLoc(), + "cannot repeat value a negative number of times"); + + std::vector DuplicatedValues; + if (parseToken(AsmToken::LParen, + "parentheses required for 'dup' contents") || + parseStructInstList(Structure, DuplicatedValues) || + parseToken(AsmToken::RParen, "unmatched parentheses")) + return true; + + for (int i = 0; i < Repetitions; ++i) + Initializers.insert(Initializers.end(), DuplicatedValues.begin(), + DuplicatedValues.end()); + } else { + Initializers.emplace_back(); + if (parseStructInitializer(Structure, Initializers.back())) + return true; + } + + // Continue if we see a comma. (Also, allow line continuation.) + if (!parseOptionalToken(AsmToken::Comma)) + break; + parseOptionalToken(AsmToken::EndOfStatement); + } + + return false; +} + +bool MasmParser::emitFieldValue(const FieldInfo &Field, + const IntFieldInfo &Contents) { + // Default-initialize all values + for (auto Value : Contents.Values) { + if (emitIntValue(Value, Field.Type)) + return true; + } + return false; +} + +bool MasmParser::emitFieldValue(const FieldInfo &Field, + const RealFieldInfo &Contents) { + for (APInt AsInt : Contents.AsIntValues) { + getStreamer().emitIntValue(AsInt.getLimitedValue(), + AsInt.getBitWidth() / 8); + } + return false; +} + +bool MasmParser::emitFieldValue(const FieldInfo &Field, + const StructFieldInfo &Contents) { + for (const auto &Initializer : Contents.Initializers) { + size_t Index = 0, Offset = 0; + for (const auto &SubField : Contents.Structure.Fields) { + getStreamer().emitZeros(SubField.Offset - Offset); + Offset = SubField.Offset + SubField.SizeOf; + emitFieldInitializer(SubField, Initializer.FieldInitializers[Index++]); + } + } + return false; +} + +bool MasmParser::emitFieldValue(const FieldInfo &Field) { + switch (Field.Contents.FT) { + case FT_INTEGRAL: + return emitFieldValue(Field, Field.Contents.IntInfo); + case FT_REAL: + return emitFieldValue(Field, Field.Contents.RealInfo); + case FT_STRUCT: + return emitFieldValue(Field, Field.Contents.StructInfo); + } +} + +bool MasmParser::emitStructValue(const StructInfo &Structure) { + size_t Offset = 0; + for (const auto &Field : Structure.Fields) { + getStreamer().emitZeros(Field.Offset - Offset); + Offset = Field.Offset + Field.SizeOf; + if (emitFieldValue(Field)) + return true; + } + // Add final padding + if (Offset != Structure.Size) + getStreamer().emitZeros(Structure.Size - Offset); + return false; +} + +bool MasmParser::emitFieldInitializer(const FieldInfo &Field, + const IntFieldInfo &Contents, + const IntFieldInfo &Initializer) { + for (const auto &Value : Initializer.Values) { + if (emitIntValue(Value, Field.Type)) + return true; + } + // Default-initialize all remaining values + for (auto it = Contents.Values.begin() + Initializer.Values.size(); + it != Contents.Values.end(); ++it) { + const auto &Value = *it; + if (emitIntValue(Value, Field.Type)) + return true; + } + return false; +} + +bool MasmParser::emitFieldInitializer(const FieldInfo &Field, + const RealFieldInfo &Contents, + const RealFieldInfo &Initializer) { + for (const auto &AsInt : Initializer.AsIntValues) { + getStreamer().emitIntValue(AsInt.getLimitedValue(), + AsInt.getBitWidth() / 8); + } + // Default-initialize all remaining values + for (auto It = Contents.AsIntValues.begin() + Initializer.AsIntValues.size(); + It != Contents.AsIntValues.end(); ++It) { + const auto &AsInt = *It; + getStreamer().emitIntValue(AsInt.getLimitedValue(), + AsInt.getBitWidth() / 8); + } + return false; +} + +bool MasmParser::emitFieldInitializer(const FieldInfo &Field, + const StructFieldInfo &Contents, + const StructFieldInfo &Initializer) { + for (const auto &Init : Initializer.Initializers) { + emitStructInitializer(Contents.Structure, Init); + } + // Default-initialize all remaining values + for (auto It = + Contents.Initializers.begin() + Initializer.Initializers.size(); + It != Contents.Initializers.end(); ++It) { + const auto &Init = *It; + emitStructInitializer(Contents.Structure, Init); + } + return false; +} + +bool MasmParser::emitFieldInitializer(const FieldInfo &Field, + const FieldInitializer &Initializer) { + switch (Field.Contents.FT) { + case FT_INTEGRAL: + return emitFieldInitializer(Field, Field.Contents.IntInfo, + Initializer.IntInfo); + case FT_REAL: + return emitFieldInitializer(Field, Field.Contents.RealInfo, + Initializer.RealInfo); + case FT_STRUCT: + return emitFieldInitializer(Field, Field.Contents.StructInfo, + Initializer.StructInfo); + } +} + +bool MasmParser::emitStructInitializer(const StructInfo &Structure, + const StructInitializer &Initializer) { + size_t Index = 0, Offset = 0; + for (const auto &Init : Initializer.FieldInitializers) { + const auto &Field = Structure.Fields[Index++]; + getStreamer().emitZeros(Field.Offset - Offset); + Offset = Field.Offset + Field.SizeOf; + if (emitFieldInitializer(Field, Init)) + return true; + } + // Default-initialize all remaining fields + for (auto It = + Structure.Fields.begin() + Initializer.FieldInitializers.size(); + It != Structure.Fields.end(); ++It) { + const auto &Field = *It; + getStreamer().emitZeros(Field.Offset - Offset); + Offset = Field.Offset + Field.SizeOf; + if (emitFieldValue(Field)) + return true; + } + // Add final padding + if (Offset != Structure.Size) + getStreamer().emitZeros(Structure.Size - Offset); + return false; +} + +/// parseDirectiveStructValue +/// ::= struct-id ( | {struct-initializer}) +/// [, ( | {struct-initializer})]* +bool MasmParser::parseDirectiveStructValue(const StructInfo &Structure, + StringRef Directive, SMLoc DirLoc) { + if (StructInProgress.empty()) { + // Initialize named data value + std::vector Initializers; + if (parseStructInstList(Structure, Initializers)) + return true; + + for (const auto &Initializer : Initializers) { + if (emitStructInitializer(Structure, Initializer)) + return true; + } + } else { + // Declare a field in the current struct + StructInfo &OwningStruct = StructInProgress.back(); + FieldInfo &Field = OwningStruct.addField("", FT_STRUCT); + StructFieldInfo &StructInfo = Field.Contents.StructInfo; + + StructInfo.Structure = Structure; + Field.Type = Structure.Size; + + if (parseStructInstList(Structure, StructInfo.Initializers)) + return addErrorSuffix(" in '" + Twine(Directive) + "' directive"); + + Field.LengthOf = StructInfo.Initializers.size(); + Field.SizeOf = Field.Type * Field.LengthOf; + if (OwningStruct.IsUnion) + OwningStruct.Size = std::max(OwningStruct.Size, Field.SizeOf); + else + OwningStruct.Size += Field.SizeOf; + } + + return false; +} + +/// parseDirectiveNamedValue +/// ::= name (byte | word | ... ) [ expression (, expression)* ] +bool MasmParser::parseDirectiveNamedStructValue(const StructInfo &Structure, + StringRef Directive, + SMLoc DirLoc, StringRef Name) { + if (StructInProgress.empty()) { + // Initialize named data value + MCSymbol *Sym = getContext().getOrCreateSymbol(Name); + getStreamer().emitLabel(Sym); + KnownType[Name] = &Structure; + return parseDirectiveStructValue(Structure, Directive, DirLoc); + } else { + // Declare a field in the current struct + StructInfo &OwningStruct = StructInProgress.back(); + FieldInfo &Field = OwningStruct.addField(Name, FT_STRUCT); + StructFieldInfo &StructInfo = Field.Contents.StructInfo; + + StructInfo.Structure = Structure; + Field.Type = Structure.Size; + + if (parseStructInstList(Structure, StructInfo.Initializers)) + return addErrorSuffix(" in '" + Twine(Directive) + "' directive"); + + Field.LengthOf = StructInfo.Initializers.size(); + Field.SizeOf = Field.Type * Field.LengthOf; + if (OwningStruct.IsUnion) + OwningStruct.Size = std::max(OwningStruct.Size, Field.SizeOf); + else + OwningStruct.Size += Field.SizeOf; + + return false; + } +} + +/// parseDirectiveStruct +/// ::= (STRUC | STRUCT | UNION) [fieldAlign] [, NONUNIQUE] +/// (dataDir | generalDir | offsetDir | nestedStruct)+ +/// ENDS +////// dataDir = data declaration +////// offsetDir = EVEN, ORG, ALIGN +bool MasmParser::parseDirectiveStruct(StringRef Directive, + DirectiveKind DirKind, StringRef Name, + SMLoc NameLoc) { + // We ignore NONUNIQUE; we do not support OPTION M510 or OPTION OLDSTRUCTS + // anyway, so all field accesses must be qualified. + AsmToken NextTok = getTok(); + int64_t AlignmentValue = 1; + if (NextTok.isNot(AsmToken::Comma) && + NextTok.isNot(AsmToken::EndOfStatement) && + parseAbsoluteExpression(AlignmentValue)) { + return addErrorSuffix(" in alignment value for '" + Twine(Directive) + + "' directive"); + } + if (!isPowerOf2_64(AlignmentValue)) + return Error(NextTok.getLoc(), "unsupported alignment value"); + + StringRef Qualifier; + if (parseOptionalToken(AsmToken::Comma) && parseIdentifier(Qualifier)) + return addErrorSuffix(" in '" + Twine(Directive) + "' directive"); + if (!Qualifier.empty() && !Qualifier.equals_lower("nonunique")) + return Error(NameLoc, "Unrecognized qualifier for '" + Twine(Directive) + + "' directive; expected none or NONUNIQUE"); + + if (parseToken(AsmToken::EndOfStatement)) + return addErrorSuffix(" in '" + Twine(Directive) + "' directive"); + + StructInProgress.emplace_back(Name, DirKind == DK_UNION, AlignmentValue); + return false; +} + +/// parseDirectiveNestedStruct +/// ::= (STRUC | STRUCT | UNION) [name] +/// (dataDir | generalDir | offsetDir | nestedStruct)+ +/// ENDS +bool MasmParser::parseDirectiveNestedStruct(StringRef Directive, + DirectiveKind DirKind) { + if (StructInProgress.empty()) + return TokError("missing name in top-level '" + Twine(Directive) + + "' directive"); + + StringRef Name; + if (getTok().is(AsmToken::Identifier)) { + Name = getTok().getIdentifier(); + parseToken(AsmToken::Identifier); + } + if (parseToken(AsmToken::EndOfStatement)) + return addErrorSuffix(" in '" + Twine(Directive) + "' directive"); + + StructInProgress.emplace_back(Name, DirKind == DK_UNION, + StructInProgress.back().Alignment); + return false; +} + +bool MasmParser::parseDirectiveEnds(StringRef Name, SMLoc NameLoc) { + if (StructInProgress.empty()) + return Error(NameLoc, "ENDS directive without matching STRUC/STRUCT/UNION"); + if (StructInProgress.size() > 1) + return Error(NameLoc, "unexpected name in nested ENDS directive"); + if (StructInProgress.back().Name.compare_lower(Name)) + return Error(NameLoc, "mismatched name in ENDS directive"); + StructInfo Structure = StructInProgress.pop_back_val(); + if (Structure.Size % Structure.Alignment != 0) { + // Pad to make the structure's size divisible by its alignment + Structure.Size += + Structure.Alignment - (Structure.Size % Structure.Alignment); + } + Structs[Name.lower()] = Structure; + + if (parseToken(AsmToken::EndOfStatement)) + return addErrorSuffix(" in ENDS directive"); + + return false; +} + +bool MasmParser::parseDirectiveNestedEnds() { + if (StructInProgress.empty()) + return TokError("ENDS directive without matching STRUC/STRUCT/UNION"); + if (StructInProgress.size() == 1) + return TokError("missing name in top-level ENDS directive"); + + if (parseToken(AsmToken::EndOfStatement)) + return addErrorSuffix(" in nested ENDS directive"); + + StructInfo Structure = StructInProgress.pop_back_val(); + if (Structure.Size % Structure.Alignment != 0) { + // Pad to make the structure's size divisible by its alignment + Structure.Size += + Structure.Alignment - (Structure.Size % Structure.Alignment); + } + StructInfo &ParentStruct = StructInProgress.back(); + + FieldInfo &Field = ParentStruct.addField(Structure.Name, FT_STRUCT); + StructFieldInfo &StructInfo = Field.Contents.StructInfo; + Field.Type = Structure.Size; + Field.LengthOf = 1; + Field.SizeOf = Structure.Size; + + if (ParentStruct.IsUnion) + ParentStruct.Size = std::max(ParentStruct.Size, Field.SizeOf); + else + ParentStruct.Size += Field.SizeOf; + + StructInfo.Structure = Structure; + StructInfo.Initializers.emplace_back(); + auto &FieldInitializers = StructInfo.Initializers.back().FieldInitializers; + for (const auto &SubField : Structure.Fields) { + FieldInitializers.push_back(SubField.Contents); + } + + return false; } /// parseDirectiveOrg @@ -5130,6 +6241,10 @@ DirectiveKindMap["dq"] = DK_DQ; DirectiveKindMap["dw"] = DK_DW; DirectiveKindMap["echo"] = DK_ECHO; + DirectiveKindMap["struc"] = DK_STRUCT; + DirectiveKindMap["struct"] = DK_STRUCT; + DirectiveKindMap["union"] = DK_UNION; + DirectiveKindMap["ends"] = DK_ENDS; } MCAsmMacro *MasmParser::parseMacroLikeBody(SMLoc DirectiveLoc) { @@ -5389,6 +6504,49 @@ llvm_unreachable("Unstable rewrite sort."); } +bool MasmParser::LookUpFieldOffset(StringRef Base, StringRef Member, + unsigned &Offset) { + if (Base.empty()) + return true; + + auto TypeIt = KnownType.find(Base); + if (TypeIt != KnownType.end()) + return LookUpFieldOffset(*TypeIt->second, Member, Offset); + + auto StructIt = Structs.find(Base.lower()); + if (StructIt != Structs.end()) + return LookUpFieldOffset(StructIt->second, Member, Offset); + + return true; +} + +bool MasmParser::LookUpFieldOffset(const StructInfo &Structure, + StringRef Member, unsigned &Offset) { + std::pair Split = Member.split('.'); + const StringRef FieldName = Split.first, FieldMember = Split.second; + + auto FieldIt = Structure.FieldsByName.find(FieldName.lower()); + if (FieldIt == Structure.FieldsByName.end()) + return true; + + const FieldInfo &Field = Structure.Fields[FieldIt->second]; + if (FieldMember.empty()) { + Offset = Field.Offset; + return false; + } + + if (Field.Contents.FT != FT_STRUCT) + return true; + const StructFieldInfo &StructInfo = Field.Contents.StructInfo; + + bool Result = LookUpFieldOffset(StructInfo.Structure, FieldMember, Offset); + if (Result) + return true; + + Offset += Field.Offset; + return false; +} + bool MasmParser::parseMSInlineAsm( void *AsmLoc, std::string &AsmString, unsigned &NumOutputs, unsigned &NumInputs, SmallVectorImpl> &OpDecls, diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -864,6 +864,8 @@ return nullptr; } + bool MatchRegisterByName(unsigned &RegNo, StringRef RegName, SMLoc StartLoc, + SMLoc EndLoc); bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, bool RestoreOnFailure); @@ -1145,6 +1147,108 @@ return checkScale(Scale, ErrMsg); } +bool X86AsmParser::MatchRegisterByName(unsigned &RegNo, StringRef RegName, + SMLoc StartLoc, SMLoc EndLoc) { + // If we encounter a %, ignore it. This code handles registers with and + // without the prefix, unprefixed registers can occur in cfi directives. + RegName.consume_front("%"); + + RegNo = MatchRegisterName(RegName); + + // If the match failed, try the register name as lowercase. + if (RegNo == 0) + RegNo = MatchRegisterName(RegName.lower()); + + // The "flags" and "mxcsr" registers cannot be referenced directly. + // Treat it as an identifier instead. + if (isParsingMSInlineAsm() && isParsingIntelSyntax() && + (RegNo == X86::EFLAGS || RegNo == X86::MXCSR)) + RegNo = 0; + + if (!is64BitMode()) { + // FIXME: This should be done using Requires and + // Requires so "eiz" usage in 64-bit instructions can be also + // checked. + // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a + // REX prefix. + if (RegNo == X86::RIZ || RegNo == X86::RIP || + X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) || + X86II::isX86_64NonExtLowByteReg(RegNo) || + X86II::isX86_64ExtendedReg(RegNo)) { + return Error(StartLoc, + "register %" + RegName + " is only available in 64-bit mode", + SMRange(StartLoc, EndLoc)); + } + } + + // If this is "db[0-15]", match it as an alias + // for dr[0-15]. + if (RegNo == 0 && RegName.startswith("db")) { + if (RegName.size() == 3) { + switch (RegName[2]) { + case '0': + RegNo = X86::DR0; + break; + case '1': + RegNo = X86::DR1; + break; + case '2': + RegNo = X86::DR2; + break; + case '3': + RegNo = X86::DR3; + break; + case '4': + RegNo = X86::DR4; + break; + case '5': + RegNo = X86::DR5; + break; + case '6': + RegNo = X86::DR6; + break; + case '7': + RegNo = X86::DR7; + break; + case '8': + RegNo = X86::DR8; + break; + case '9': + RegNo = X86::DR9; + break; + } + } else if (RegName.size() == 4 && RegName[2] == '1') { + switch (RegName[3]) { + case '0': + RegNo = X86::DR10; + break; + case '1': + RegNo = X86::DR11; + break; + case '2': + RegNo = X86::DR12; + break; + case '3': + RegNo = X86::DR13; + break; + case '4': + RegNo = X86::DR14; + break; + case '5': + RegNo = X86::DR15; + break; + } + } + } + + if (RegNo == 0) { + if (isParsingIntelSyntax()) + return true; + return Error(StartLoc, "invalid register name", SMRange(StartLoc, EndLoc)); + } + return false; +} + bool X86AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, bool RestoreOnFailure) { MCAsmParser &Parser = getParser(); @@ -1180,38 +1284,7 @@ SMRange(StartLoc, EndLoc)); } - RegNo = MatchRegisterName(Tok.getString()); - - // If the match failed, try the register name as lowercase. - if (RegNo == 0) - RegNo = MatchRegisterName(Tok.getString().lower()); - - // The "flags" and "mxcsr" registers cannot be referenced directly. - // Treat it as an identifier instead. - if (isParsingMSInlineAsm() && isParsingIntelSyntax() && - (RegNo == X86::EFLAGS || RegNo == X86::MXCSR)) - RegNo = 0; - - if (!is64BitMode()) { - // FIXME: This should be done using Requires and - // Requires so "eiz" usage in 64-bit instructions can be also - // checked. - // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a - // REX prefix. - if (RegNo == X86::RIZ || RegNo == X86::RIP || - X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) || - X86II::isX86_64NonExtLowByteReg(RegNo) || - X86II::isX86_64ExtendedReg(RegNo)) { - StringRef RegName = Tok.getString(); - OnFailure(); - if (!RestoreOnFailure) { - Parser.Lex(); // Eat register name. - } - return Error(StartLoc, - "register %" + RegName + " is only available in 64-bit mode", - SMRange(StartLoc, EndLoc)); - } - } + MatchRegisterByName(RegNo, Tok.getString(), StartLoc, EndLoc); // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens. if (RegNo == X86::ST0) { @@ -1259,40 +1332,6 @@ EndLoc = Parser.getTok().getEndLoc(); - // If this is "db[0-15]", match it as an alias - // for dr[0-15]. - if (RegNo == 0 && Tok.getString().startswith("db")) { - if (Tok.getString().size() == 3) { - switch (Tok.getString()[2]) { - case '0': RegNo = X86::DR0; break; - case '1': RegNo = X86::DR1; break; - case '2': RegNo = X86::DR2; break; - case '3': RegNo = X86::DR3; break; - case '4': RegNo = X86::DR4; break; - case '5': RegNo = X86::DR5; break; - case '6': RegNo = X86::DR6; break; - case '7': RegNo = X86::DR7; break; - case '8': RegNo = X86::DR8; break; - case '9': RegNo = X86::DR9; break; - } - } else if (Tok.getString().size() == 4 && Tok.getString()[2] == '1') { - switch (Tok.getString()[3]) { - case '0': RegNo = X86::DR10; break; - case '1': RegNo = X86::DR11; break; - case '2': RegNo = X86::DR12; break; - case '3': RegNo = X86::DR13; break; - case '4': RegNo = X86::DR14; break; - case '5': RegNo = X86::DR15; break; - } - } - - if (RegNo != 0) { - EndLoc = Parser.getTok().getEndLoc(); - Parser.Lex(); // Eat it. - return false; - } - } - if (RegNo == 0) { OnFailure(); if (isParsingIntelSyntax()) return true; @@ -1590,12 +1629,41 @@ SMLoc IdentLoc = Tok.getLoc(); StringRef Identifier = Tok.getString(); UpdateLocLex = false; - // Register + // Register, or (MASM only) . unsigned Reg; - if (Tok.is(AsmToken::Identifier) && !ParseRegister(Reg, IdentLoc, End)) { - if (SM.onRegister(Reg, ErrMsg)) - return Error(Tok.getLoc(), ErrMsg); - break; + if (Tok.is(AsmToken::Identifier)) { + if (!ParseRegister(Reg, IdentLoc, End, /*RestoreOnFailure=*/true)) { + if (SM.onRegister(Reg, ErrMsg)) + return Error(IdentLoc, ErrMsg); + break; + } + if (Parser.isParsingMasm()) { + const std::pair RegField = + Tok.getString().split('.'); + const StringRef RegName = RegField.first, Field = RegField.second; + SMLoc RegEndLoc = + SMLoc::getFromPointer(RegName.data() + RegName.size()); + if (!Field.empty() && + !MatchRegisterByName(Reg, RegName, IdentLoc, RegEndLoc)) { + if (SM.onRegister(Reg, ErrMsg)) + return Error(IdentLoc, ErrMsg); + + SMLoc FieldStartLoc = SMLoc::getFromPointer(Field.data()); + const std::pair BaseMember = Field.split('.'); + const StringRef Base = BaseMember.first, Member = BaseMember.second; + + unsigned Offset; + if (Parser.LookUpFieldOffset(Base, Member, Offset)) + return Error(FieldStartLoc, "unknown offset"); + else if (SM.onPlus(ErrMsg)) + return Error(getTok().getLoc(), ErrMsg); + else if (SM.onInteger(Offset, ErrMsg)) + return Error(IdentLoc, ErrMsg); + + End = consumeToken(); + break; + } + } } // Operator synonymous ("not", "or" etc.) bool ParseError = false; @@ -1607,37 +1675,39 @@ // Symbol reference, when parsing assembly content InlineAsmIdentifierInfo Info; const MCExpr *Val; - if (!isParsingMSInlineAsm()) { - if (getParser().parsePrimaryExpr(Val, End)) { - return Error(Tok.getLoc(), "Unexpected identifier!"); - } else if (SM.onIdentifierExpr(Val, Identifier, Info, false, ErrMsg)) { - return Error(IdentLoc, ErrMsg); - } else + if (isParsingMSInlineAsm() || Parser.isParsingMasm()) { + // MS Dot Operator expression + if (Identifier.count('.') && PrevTK == AsmToken::RBrac) { + if (ParseIntelDotOperator(SM, End)) + return true; break; + } } - // MS InlineAsm operators (TYPE/LENGTH/SIZE) - if (unsigned OpKind = IdentifyIntelInlineAsmOperator(Identifier)) { - if (int64_t Val = ParseIntelInlineAsmOperator(OpKind)) { - if (SM.onInteger(Val, ErrMsg)) - return Error(IdentLoc, ErrMsg); - } else - return true; - break; - } - // MS Dot Operator expression - if (Identifier.count('.') && PrevTK == AsmToken::RBrac) { - if (ParseIntelDotOperator(SM, End)) + if (isParsingMSInlineAsm()) { + // MS InlineAsm operators (TYPE/LENGTH/SIZE) + if (unsigned OpKind = IdentifyIntelInlineAsmOperator(Identifier)) { + if (int64_t Val = ParseIntelInlineAsmOperator(OpKind)) { + if (SM.onInteger(Val, ErrMsg)) + return Error(IdentLoc, ErrMsg); + } else + return true; + break; + } + // MS InlineAsm identifier + // Call parseIdentifier() to combine @ with the identifier behind it. + if (TK == AsmToken::At && Parser.parseIdentifier(Identifier)) + return Error(IdentLoc, "expected identifier"); + if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, false, End)) return true; + else if (SM.onIdentifierExpr(Val, Identifier, Info, true, ErrMsg)) + return Error(IdentLoc, ErrMsg); break; } - // MS InlineAsm identifier - // Call parseIdentifier() to combine @ with the identifier behind it. - if (TK == AsmToken::At && Parser.parseIdentifier(Identifier)) - return Error(IdentLoc, "expected identifier"); - if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, false, End)) - return true; - else if (SM.onIdentifierExpr(Val, Identifier, Info, true, ErrMsg)) + if (getParser().parsePrimaryExpr(Val, End)) { + return Error(Tok.getLoc(), "Unexpected identifier!"); + } else if (SM.onIdentifierExpr(Val, Identifier, Info, false, ErrMsg)) { return Error(IdentLoc, ErrMsg); + } break; } case AsmToken::Integer: { @@ -1856,10 +1926,14 @@ APInt DotDisp; DotDispStr.getAsInteger(10, DotDisp); Offset = DotDisp.getZExtValue(); - } else if (isParsingMSInlineAsm() && Tok.is(AsmToken::Identifier)) { - std::pair BaseMember = DotDispStr.split('.'); - if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second, - Offset)) + } else if ((isParsingMSInlineAsm() || getParser().isParsingMasm()) && + Tok.is(AsmToken::Identifier)) { + const std::pair BaseMember = DotDispStr.split('.'); + const StringRef Base = BaseMember.first, Member = BaseMember.second; + if (getParser().LookUpFieldOffset(SM.getSymName(), DotDispStr, Offset) && + getParser().LookUpFieldOffset(Base, Member, Offset) && + (!SemaCallback || + SemaCallback->LookupInlineAsmField(Base, Member, Offset))) return Error(Tok.getLoc(), "Unable to lookup field reference!"); } else return Error(Tok.getLoc(), "Unexpected token type!"); diff --git a/llvm/test/tools/llvm-ml/struct.test b/llvm/test/tools/llvm-ml/struct.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-ml/struct.test @@ -0,0 +1,104 @@ +# RUN: llvm-ml -filetype=asm %s | FileCheck %s + +.data +BAZ STRUCT + a BYTE 1 + b BYTE 2 +BAZ ENDS + +FOOBAR struct 2 + c BYTE 3 DUP (4) + d DWORD 5 + e BAZ <> + STRUCT f + g BYTE 6 + h BYTE 7 + ends + h BYTE "abcde" +foobar ENDS + +t1 foobar <> + +; CHECK: t1: +; +; BYTE 3 DUP (4), plus alignment padding +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .byte 4 +; CHECK-NEXT: .zero 1 +; +; DWORD 5 +; CHECK-NEXT: .long 5 +; +; BAZ <> +; CHECK-NEXT: .byte 1 +; CHECK-NEXT: .byte 2 +; +; , with internal alignment padding +; CHECK-NEXT: .byte 6 +; CHECK-NEXT: .zero 1 +; CHECK-NEXT: .byte 7 +; CHECK-NEXT: .zero 1 +; +; BYTE "abcde", plus alignment padding +; CHECK-NEXT: .byte 97 +; CHECK-NEXT: .byte 98 +; CHECK-NEXT: .byte 99 +; CHECK-NEXT: .byte 100 +; CHECK-NEXT: .byte 101 +; CHECK-NEXT: .zero 1 + +t2 FOOBAR <"gh",,<10,11>,<12>,"ijk"> + +; CHECK: t2: +; +; BYTE "gh", padded with " ", plus alignment padding +; CHECK-NEXT: .byte 103 +; CHECK-NEXT: .byte 104 +; CHECK-NEXT: .byte 32 +; CHECK-NEXT: .zero 1 +; +; DWORD 5 (default-initialized when omitted) +; CHECK-NEXT: .long 5 +; +; BAZ <10, 11> +; CHECK-NEXT: .byte 10 +; CHECK-NEXT: .byte 11 +; +; , with internal alignment padding +; CHECK-NEXT: .byte 12 +; CHECK-NEXT: .zero 1 +; CHECK-NEXT: .byte 7 +; CHECK-NEXT: .zero 1 +; +; BYTE "ijk", padded with " ", plus alignment padding +; CHECK-NEXT: .byte 105 +; CHECK-NEXT: .byte 106 +; CHECK-NEXT: .byte 107 +; CHECK-NEXT: .byte 32 +; CHECK-NEXT: .byte 32 +; CHECK-NEXT: .zero 1 + +.code + +t3: +mov eax, t2.f.h +mov eax, [t2].f.h +mov eax, [t2.f.h] +mov eax, t2.FOOBAR.f.h + +; CHECK: t3: +; CHECK-NEXT: mov eax, dword ptr [rip + t2+12] +; CHECK-NEXT: mov eax, dword ptr [rip + t2+12] +; CHECK-NEXT: mov eax, dword ptr [rip + t2+12] +; CHECK-NEXT: mov eax, dword ptr [rip + t2+12] + +t4: +mov eax, j.FOOBAR.f.h +mov eax, j.baz.b + +; CHECK: t4: +; CHECK-NEXT: mov eax, dword ptr [rip + j+12] +; CHECK-NEXT: mov eax, dword ptr [rip + j+1] + +END