Index: llvm/include/llvm/MC/MCAsmInfo.h =================================================================== --- llvm/include/llvm/MC/MCAsmInfo.h +++ llvm/include/llvm/MC/MCAsmInfo.h @@ -181,9 +181,26 @@ /// Defaults to false. bool AllowAtInName = false; - /// This is true if the assembler allows $ @ ? characters at the start of - /// symbol names. Defaults to false. - bool AllowSymbolAtNameStart = false; + /// This is true if the assembler allows the "?" character at the start of + /// of a string to be lexed as an AsmToken::Identifier. + /// If the CommentString is also set to "?", setting this option will have + /// no effect, and the string will be lexed as a comment. + /// Defaults to false. + bool AllowQuestionAtStartOfIdentifier = false; + + /// This is true if the assembler allows the "$" character at the start of + /// of a string to be lexed as an AsmToken::Identifier. + /// If the CommentString is also set to "$", setting this option will have + /// no effect, and the string will be lexed as a comment. + /// Defaults to false. + bool AllowDollarAtStartOfIdentifier = false; + + /// This is true if the assembler allows the "@" character at the start of + /// a string to be lexed as an AsmToken::Identifier. + /// If the CommentString is also set to "@", setting this option will have + /// no effect, and the string will be lexed as a comment. + /// Defaults to false. + bool AllowAtAtStartOfIdentifier = false; /// If this is true, symbol names with invalid characters will be printed in /// quotes. @@ -597,7 +614,15 @@ const char *getCode64Directive() const { return Code64Directive; } unsigned getAssemblerDialect() const { return AssemblerDialect; } bool doesAllowAtInName() const { return AllowAtInName; } - bool doesAllowSymbolAtNameStart() const { return AllowSymbolAtNameStart; } + bool doesAllowQuestionAtStartOfIdentifier() const { + return AllowQuestionAtStartOfIdentifier; + } + bool doesAllowAtAtStartOfIdentifier() const { + return AllowAtAtStartOfIdentifier; + } + bool doesAllowDollarAtStartOfIdentifier() const { + return AllowDollarAtStartOfIdentifier; + } bool supportsNameQuoting() const { return SupportsQuotedNames; } bool doesSupportDataRegionDirectives() const { Index: llvm/lib/MC/MCParser/AsmLexer.cpp =================================================================== --- llvm/lib/MC/MCParser/AsmLexer.cpp +++ llvm/lib/MC/MCParser/AsmLexer.cpp @@ -143,7 +143,7 @@ return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart)); } -/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@#?]* +/// LexIdentifier: [a-zA-Z_$.@?][a-zA-Z0-9_$.@#?]* static bool isIdentifierChar(char C, bool AllowAt, bool AllowHash) { return isAlnum(C) || C == '_' || C == '$' || C == '.' || C == '?' || (AllowAt && C == '@') || (AllowHash && C == '#'); @@ -740,17 +740,10 @@ IsAtStartOfStatement = false; switch (CurChar) { default: - if (MAI.doesAllowSymbolAtNameStart()) { - // Handle Microsoft-style identifier: [a-zA-Z_$.@?][a-zA-Z0-9_$.@#?]* - if (!isDigit(CurChar) && - isIdentifierChar(CurChar, MAI.doesAllowAtInName(), - AllowHashInIdentifier)) - return LexIdentifier(); - } else { - // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* - if (isalpha(CurChar) || CurChar == '_' || CurChar == '.') - return LexIdentifier(); - } + // Handle identifier: [a-zA-Z_.?][a-zA-Z0-9_$.@#?]* + if (isalpha(CurChar) || CurChar == '_' || CurChar == '.' || + (MAI.doesAllowQuestionAtStartOfIdentifier() && CurChar == '?')) + return LexIdentifier(); // Unknown character, emit an error. return ReturnError(TokStart, "invalid character in input"); @@ -794,8 +787,16 @@ case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1)); case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1)); case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1)); - case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1)); - case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1)); + case '$': { + if (MAI.doesAllowDollarAtStartOfIdentifier()) + return LexIdentifier(); + return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1)); + } + case '@': { + if (MAI.doesAllowAtAtStartOfIdentifier()) + return LexIdentifier(); + return AsmToken(AsmToken::At, StringRef(TokStart, 1)); + } case '\\': return AsmToken(AsmToken::BackSlash, StringRef(TokStart, 1)); case '=': if (*CurPtr == '=') { Index: llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp =================================================================== --- llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -144,7 +144,9 @@ DollarIsPC = true; SeparatorString = "\n"; CommentString = ";"; - AllowSymbolAtNameStart = true; + AllowQuestionAtStartOfIdentifier = true; + AllowDollarAtStartOfIdentifier = true; + AllowAtAtStartOfIdentifier = true; } void X86MCAsmInfoGNUCOFF::anchor() { } Index: llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp =================================================================== --- llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp +++ llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp @@ -35,6 +35,15 @@ void setAllowAdditionalComments(bool Value) { AllowAdditionalComments = Value; } + void setAllowQuestionAtStartOfIdentifier(bool Value) { + AllowQuestionAtStartOfIdentifier = Value; + } + void setAllowAtAtStartOfIdentifier(bool Value) { + AllowAtAtStartOfIdentifier = Value; + } + void setAllowDollarAtStartOfIdentifier(bool Value) { + AllowDollarAtStartOfIdentifier = Value; + } }; // Setup a testing class that the GTest framework can call. @@ -454,4 +463,96 @@ ExpectedTokens.push_back(AsmToken::Eof); lexAndCheckTokens(AsmStr, ExpectedTokens); } + +TEST_F(SystemZAsmLexerTest, CheckDefaultQuestionAtStartOfIdentifier) { + StringRef AsmStr = "?lh1?23"; + + // Setup. + setupCallToAsmParser(AsmStr); + + // Lex initially to get the string. + Parser->getLexer().Lex(); + + SmallVector ExpectedTokens( + {AsmToken::Error, AsmToken::Identifier, AsmToken::EndOfStatement, + AsmToken::Eof}); + lexAndCheckTokens(AsmStr, ExpectedTokens); +} + +TEST_F(SystemZAsmLexerTest, CheckAcceptQuestionAtStartOfIdentifier) { + StringRef AsmStr = "?????lh1?23"; + + // Setup. + MUPMAI->setAllowQuestionAtStartOfIdentifier(true); + setupCallToAsmParser(AsmStr); + + // Lex initially to get the string. + Parser->getLexer().Lex(); + + SmallVector ExpectedTokens( + {AsmToken::Identifier, AsmToken::EndOfStatement, AsmToken::Eof}); + lexAndCheckTokens(AsmStr, ExpectedTokens); +} + +TEST_F(SystemZAsmLexerTest, CheckDefaultAtAtStartOfIdentifier) { + StringRef AsmStr = "@@lh1?23"; + + // Setup. + MUPMAI->setAllowQuestionAtStartOfIdentifier(true); + setupCallToAsmParser(AsmStr); + + // Lex initially to get the string. + Parser->getLexer().Lex(); + + SmallVector ExpectedTokens( + {AsmToken::At, AsmToken::At, AsmToken::Identifier, + AsmToken::EndOfStatement, AsmToken::Eof}); + lexAndCheckTokens(AsmStr, ExpectedTokens); +} + +TEST_F(SystemZAsmLexerTest, CheckAcceptAtAtStartOfIdentifier) { + StringRef AsmStr = "@@lh1?23"; + + // Setup. + MUPMAI->setAllowAtAtStartOfIdentifier(true); + setupCallToAsmParser(AsmStr); + + // Lex initially to get the string. + Parser->getLexer().Lex(); + + SmallVector ExpectedTokens( + {AsmToken::Identifier, AsmToken::EndOfStatement, AsmToken::Eof}); + lexAndCheckTokens(AsmStr, ExpectedTokens); +} + +TEST_F(SystemZAsmLexerTest, CheckAccpetAtAtStartOfIdentifier2) { + StringRef AsmStr = "@@lj1?23"; + + // Setup. + MUPMAI->setCommentString("@"); + MUPMAI->setAllowAtAtStartOfIdentifier(true); + setupCallToAsmParser(AsmStr); + + // Lex initially to get the string. + Parser->getLexer().Lex(); + + // "@@lj1?23" -> still lexed as a comment as that takes precedence. + SmallVector ExpectedTokens( + {AsmToken::EndOfStatement, AsmToken::Eof}); + lexAndCheckTokens(AsmStr, ExpectedTokens); +} + +TEST_F(SystemZAsmLexerTest, CheckDefaultHashAtStartOfIdentifier) { + StringRef AsmStr = "##lj1?23"; + + // Setup. + setupCallToAsmParser(AsmStr); + + // Lex initially to get the string. + Parser->getLexer().Lex(); + + SmallVector ExpectedTokens( + {AsmToken::EndOfStatement, AsmToken::Eof}); + lexAndCheckTokens(AsmStr, ExpectedTokens); +} } // end anonymous namespace