diff --git a/llvm/include/llvm/MC/MCAsmInfo.h b/llvm/include/llvm/MC/MCAsmInfo.h --- a/llvm/include/llvm/MC/MCAsmInfo.h +++ b/llvm/include/llvm/MC/MCAsmInfo.h @@ -181,9 +181,26 @@ /// Defaults to false. bool AllowAtInName = false; - /// This is true if the assembler allows $ @ ? characters at the start of - /// symbol names. Defaults to false. - bool AllowSymbolAtNameStart = false; + /// This is true if the assembler allows the "?" character at the start of + /// of a string to be lexed as an AsmToken::Identifier. + /// If the CommentString is also set to "?", setting this option will have + /// no effect, and the string will be lexed as a comment. + /// Defaults to false. + bool AllowQuestionAtStartOfIdentifier = false; + + /// This is true if the assembler allows the "$" character at the start of + /// of a string to be lexed as an AsmToken::Identifier. + /// If the CommentString is also set to "$", setting this option will have + /// no effect, and the string will be lexed as a comment. + /// Defaults to false. + bool AllowDollarAtStartOfIdentifier = false; + + /// This is true if the assembler allows the "@" character at the start of + /// a string to be lexed as an AsmToken::Identifier. + /// If the CommentString is also set to "@", setting this option will have + /// no effect, and the string will be lexed as a comment. + /// Defaults to false. + bool AllowAtAtStartOfIdentifier = false; /// If this is true, symbol names with invalid characters will be printed in /// quotes. @@ -600,7 +617,15 @@ const char *getCode64Directive() const { return Code64Directive; } unsigned getAssemblerDialect() const { return AssemblerDialect; } bool doesAllowAtInName() const { return AllowAtInName; } - bool doesAllowSymbolAtNameStart() const { return AllowSymbolAtNameStart; } + bool doesAllowQuestionAtStartOfIdentifier() const { + return AllowQuestionAtStartOfIdentifier; + } + bool doesAllowAtAtStartOfIdentifier() const { + return AllowAtAtStartOfIdentifier; + } + bool doesAllowDollarAtStartOfIdentifier() const { + return AllowDollarAtStartOfIdentifier; + } bool supportsNameQuoting() const { return SupportsQuotedNames; } bool doesSupportDataRegionDirectives() const { diff --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp --- a/llvm/lib/MC/MCParser/AsmLexer.cpp +++ b/llvm/lib/MC/MCParser/AsmLexer.cpp @@ -144,7 +144,7 @@ return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart)); } -/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@#?]* +/// LexIdentifier: [a-zA-Z_$.@?][a-zA-Z0-9_$.@#?]* static bool isIdentifierChar(char C, bool AllowAt, bool AllowHash) { return isAlnum(C) || C == '_' || C == '$' || C == '.' || C == '?' || (AllowAt && C == '@') || (AllowHash && C == '#'); @@ -769,17 +769,10 @@ IsAtStartOfStatement = false; switch (CurChar) { default: - if (MAI.doesAllowSymbolAtNameStart()) { - // Handle Microsoft-style identifier: [a-zA-Z_$.@?][a-zA-Z0-9_$.@#?]* - if (!isDigit(CurChar) && - isIdentifierChar(CurChar, MAI.doesAllowAtInName(), - AllowHashInIdentifier)) - return LexIdentifier(); - } else { - // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* - if (isalpha(CurChar) || CurChar == '_' || CurChar == '.') - return LexIdentifier(); - } + // Handle identifier: [a-zA-Z_.?][a-zA-Z0-9_$.@#?]* + if (isalpha(CurChar) || CurChar == '_' || CurChar == '.' || + (MAI.doesAllowQuestionAtStartOfIdentifier() && CurChar == '?')) + return LexIdentifier(); // Unknown character, emit an error. return ReturnError(TokStart, "invalid character in input"); @@ -823,13 +816,18 @@ case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1)); case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1)); case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1)); - case '$': - if (LexMotorolaIntegers && isHexDigit(*CurPtr)) { + case '$': { + if (LexMotorolaIntegers && isHexDigit(*CurPtr)) return LexDigit(); - } - + if (MAI.doesAllowDollarAtStartOfIdentifier()) + return LexIdentifier(); return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1)); - case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1)); + } + case '@': { + if (MAI.doesAllowAtAtStartOfIdentifier()) + return LexIdentifier(); + return AsmToken(AsmToken::At, StringRef(TokStart, 1)); + } case '\\': return AsmToken(AsmToken::BackSlash, StringRef(TokStart, 1)); case '=': if (*CurPtr == '=') { diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp --- a/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -144,7 +144,9 @@ DollarIsPC = true; SeparatorString = "\n"; CommentString = ";"; - AllowSymbolAtNameStart = true; + AllowQuestionAtStartOfIdentifier = true; + AllowDollarAtStartOfIdentifier = true; + AllowAtAtStartOfIdentifier = true; } void X86MCAsmInfoGNUCOFF::anchor() { } diff --git a/llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp b/llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp --- a/llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp +++ b/llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp @@ -35,6 +35,15 @@ void setAllowAdditionalComments(bool Value) { AllowAdditionalComments = Value; } + void setAllowQuestionAtStartOfIdentifier(bool Value) { + AllowQuestionAtStartOfIdentifier = Value; + } + void setAllowAtAtStartOfIdentifier(bool Value) { + AllowAtAtStartOfIdentifier = Value; + } + void setAllowDollarAtStartOfIdentifier(bool Value) { + AllowDollarAtStartOfIdentifier = Value; + } }; // Setup a testing class that the GTest framework can call. @@ -454,4 +463,96 @@ ExpectedTokens.push_back(AsmToken::Eof); lexAndCheckTokens(AsmStr, ExpectedTokens); } + +TEST_F(SystemZAsmLexerTest, CheckDefaultQuestionAtStartOfIdentifier) { + StringRef AsmStr = "?lh1?23"; + + // Setup. + setupCallToAsmParser(AsmStr); + + // Lex initially to get the string. + Parser->getLexer().Lex(); + + SmallVector ExpectedTokens( + {AsmToken::Error, AsmToken::Identifier, AsmToken::EndOfStatement, + AsmToken::Eof}); + lexAndCheckTokens(AsmStr, ExpectedTokens); +} + +TEST_F(SystemZAsmLexerTest, CheckAcceptQuestionAtStartOfIdentifier) { + StringRef AsmStr = "?????lh1?23"; + + // Setup. + MUPMAI->setAllowQuestionAtStartOfIdentifier(true); + setupCallToAsmParser(AsmStr); + + // Lex initially to get the string. + Parser->getLexer().Lex(); + + SmallVector ExpectedTokens( + {AsmToken::Identifier, AsmToken::EndOfStatement, AsmToken::Eof}); + lexAndCheckTokens(AsmStr, ExpectedTokens); +} + +TEST_F(SystemZAsmLexerTest, CheckDefaultAtAtStartOfIdentifier) { + StringRef AsmStr = "@@lh1?23"; + + // Setup. + MUPMAI->setAllowQuestionAtStartOfIdentifier(true); + setupCallToAsmParser(AsmStr); + + // Lex initially to get the string. + Parser->getLexer().Lex(); + + SmallVector ExpectedTokens( + {AsmToken::At, AsmToken::At, AsmToken::Identifier, + AsmToken::EndOfStatement, AsmToken::Eof}); + lexAndCheckTokens(AsmStr, ExpectedTokens); +} + +TEST_F(SystemZAsmLexerTest, CheckAcceptAtAtStartOfIdentifier) { + StringRef AsmStr = "@@lh1?23"; + + // Setup. + MUPMAI->setAllowAtAtStartOfIdentifier(true); + setupCallToAsmParser(AsmStr); + + // Lex initially to get the string. + Parser->getLexer().Lex(); + + SmallVector ExpectedTokens( + {AsmToken::Identifier, AsmToken::EndOfStatement, AsmToken::Eof}); + lexAndCheckTokens(AsmStr, ExpectedTokens); +} + +TEST_F(SystemZAsmLexerTest, CheckAccpetAtAtStartOfIdentifier2) { + StringRef AsmStr = "@@lj1?23"; + + // Setup. + MUPMAI->setCommentString("@"); + MUPMAI->setAllowAtAtStartOfIdentifier(true); + setupCallToAsmParser(AsmStr); + + // Lex initially to get the string. + Parser->getLexer().Lex(); + + // "@@lj1?23" -> still lexed as a comment as that takes precedence. + SmallVector ExpectedTokens( + {AsmToken::EndOfStatement, AsmToken::Eof}); + lexAndCheckTokens(AsmStr, ExpectedTokens); +} + +TEST_F(SystemZAsmLexerTest, CheckDefaultHashAtStartOfIdentifier) { + StringRef AsmStr = "##lj1?23"; + + // Setup. + setupCallToAsmParser(AsmStr); + + // Lex initially to get the string. + Parser->getLexer().Lex(); + + SmallVector ExpectedTokens( + {AsmToken::EndOfStatement, AsmToken::Eof}); + lexAndCheckTokens(AsmStr, ExpectedTokens); +} } // end anonymous namespace