diff --git a/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h b/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h --- a/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h +++ b/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h @@ -470,8 +470,9 @@ virtual bool equalIsAsmAssignment() { return true; }; // Return whether this start of statement identifier is a label virtual bool isLabel(AsmToken &Token) { return true; }; - // Return whether this parser accept star as start of statement - virtual bool starIsStartOfStatement() { return false; }; + /// Returns whether this non-identifier token is valid at the start of an + /// instruction. Identifier tokens are always considered valid. + virtual bool isStartOfInstruction(const AsmToken &Token) { return false; } virtual const MCExpr *applyModifierToExpr(const MCExpr *E, MCSymbolRefExpr::VariantKind, diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -1803,59 +1803,20 @@ return parseCppHashLineFilenameComment(IDLoc, !isInsideMacroInstantiation()); - // Allow an integer followed by a ':' as a directional local label. - if (Lexer.is(AsmToken::Integer)) { - LocalLabelVal = getTok().getIntVal(); - if (LocalLabelVal < 0) { - if (!TheCondState.Ignore) { - Lex(); // always eat a token - return Error(IDLoc, "unexpected token at start of statement"); - } - IDVal = ""; - } else { - IDVal = getTok().getString(); - Lex(); // Consume the integer token to be used as an identifier token. - if (Lexer.getKind() != AsmToken::Colon) { - if (!TheCondState.Ignore) { - Lex(); // always eat a token - return Error(IDLoc, "unexpected token at start of statement"); - } - } - } - } else if (Lexer.is(AsmToken::Dot)) { - // Treat '.' as a valid identifier in this context. - Lex(); - IDVal = "."; - } else if (Lexer.is(AsmToken::LCurly)) { - // Treat '{' as a valid identifier in this context. - Lex(); - IDVal = "{"; + // The identifier can be an empty string (""), hence the flag. + bool ParsedID = !parseIdentifier(IDVal); - } else if (Lexer.is(AsmToken::RCurly)) { - // Treat '}' as a valid identifier in this context. - Lex(); - IDVal = "}"; - } else if (Lexer.is(AsmToken::Star) && - getTargetParser().starIsStartOfStatement()) { - // Accept '*' as a valid start of statement. - Lex(); - IDVal = "*"; - } else if (parseIdentifier(IDVal)) { - if (!TheCondState.Ignore) { - Lex(); // always eat a token - return Error(IDLoc, "unexpected token at start of statement"); - } - IDVal = ""; + // Check if this is a known assembler directive. + DirectiveKind DirKind = DK_NO_DIRECTIVE; + if (ParsedID) { + auto DirKindIt = DirectiveKindMap.find(IDVal.lower()); + if (DirKindIt != DirectiveKindMap.end()) + DirKind = DirKindIt->getValue(); } // Handle conditional assembly here before checking for skipping. We // have to do this so that .endif isn't skipped in a ".if 0" block for // example. - StringMap::const_iterator DirKindIt = - DirectiveKindMap.find(IDVal.lower()); - DirectiveKind DirKind = (DirKindIt == DirectiveKindMap.end()) - ? DK_NO_DIRECTIVE - : DirKindIt->getValue(); switch (DirKind) { default: break; @@ -1899,12 +1860,45 @@ return false; } + // Check for the use of the '.' pseudo-symbol. + if (!ParsedID && Lexer.is(AsmToken::Dot)) { + // Diagnose attempt to use '.' as a label. + if (Lexer.peekTok().is(AsmToken::Colon)) { + Lex(); // Eat the '.'. + Lex(); // Eat the ':'. + return Error(IDLoc, "invalid use of pseudo-symbol '.' as a label"); + } + // '.' followed by '=' is a valid start of an assignment statement. + if (Lexer.peekTok().is(AsmToken::Equal) && + getTargetParser().equalIsAsmAssignment()) { + Lex(); // Eat the '.'. + Lex(); // Eat the '='. + return parseAssignment(".", AssignmentKind::Equal); + } + } + + // Allow an integer followed by a ':' as a directional local label. + if (!ParsedID && Lexer.is(AsmToken::Integer)) { + LocalLabelVal = getTok().getIntVal(); + if (LocalLabelVal < 0) { + Lex(); // always eat a token + return Error(IDLoc, "unexpected token at start of statement"); + } + Lex(); // Consume the integer token. + if (Lexer.isNot(AsmToken::Colon)) { + Lex(); // always eat a token + return Error(IDLoc, "unexpected token at start of statement"); + } + } + // FIXME: Recurse on local labels? // Check for a label. // ::= identifier ':' // ::= number ':' - if (Lexer.is(AsmToken::Colon) && getTargetParser().isLabel(ID)) { + if ((ParsedID && Lexer.is(AsmToken::Colon) && + getTargetParser().isLabel(ID)) || + LocalLabelVal != -1) { if (checkForValidSection()) return true; @@ -1971,11 +1965,24 @@ // Check for an assignment statement. // ::= identifier '=' - if (Lexer.is(AsmToken::Equal) && getTargetParser().equalIsAsmAssignment()) { - Lex(); + if (ParsedID && Lexer.is(AsmToken::Equal) && + getTargetParser().equalIsAsmAssignment()) { + Lex(); // Eat the '='. return parseAssignment(IDVal, AssignmentKind::Equal); } + // Other generic assembler statements require an identifier at the start, but + // targets can allow other tokens at the beginning of instructions. + if (!ParsedID) { + if (getTargetParser().isStartOfInstruction(ID)) { + IDVal = getTok().getString(); + Lex(); + return parseAndMatchAndEmitTargetInstruction(Info, IDVal, ID, IDLoc); + } + Lex(); // always eat a token + return Error(IDLoc, "unexpected token at start of statement"); + } + // If macros are enabled, check to see if this is a macro instantiation. if (areMacrosEnabled()) if (const MCAsmMacro *M = getContext().lookupMacro(IDVal)) { diff --git a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp --- a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp +++ b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp @@ -52,9 +52,11 @@ // "=" is used as assignment operator for assembly statment, so can't be used // for symbol assignment. bool equalIsAsmAssignment() override { return false; } - // "*" is used for dereferencing memory that it will be the start of - // statement. - bool starIsStartOfStatement() override { return true; } + + bool isStartOfInstruction(const AsmToken &Token) override { + // "*" is used for dereferencing memory as in '*(u8 *)(r0 + 0) = r7'. + return Token.is(AsmToken::Star); + } #define GET_ASSEMBLER_HEADER #include "BPFGenAsmMatcher.inc" diff --git a/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp --- a/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp +++ b/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp @@ -110,7 +110,12 @@ bool equalIsAsmAssignment() override { return false; } bool isLabel(AsmToken &Token) override; + bool isStartOfInstruction(const AsmToken &Token) override { + // Curly braces denote the start and the end of a packet. + return Token.is(AsmToken::LCurly) || Token.is(AsmToken::RCurly); + } +private: void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); } bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); } bool ParseDirectiveFalign(unsigned Size, SMLoc L); diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -1272,6 +1272,11 @@ setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits())); } + bool isStartOfInstruction(const AsmToken &Token) override { + // '{' can be used in prefixes, e.g. '{evex} vcvttss2si (%rdx), %r15'. + return Token.is(AsmToken::LCurly); + } + bool parseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; OperandMatchResultTy tryParseRegister(MCRegister &RegNo, SMLoc &StartLoc,