Index: llvm/include/llvm/MC/MCParser/MCAsmLexer.h =================================================================== --- llvm/include/llvm/MC/MCParser/MCAsmLexer.h +++ llvm/include/llvm/MC/MCParser/MCAsmLexer.h @@ -48,6 +48,7 @@ const char *TokStart = nullptr; bool SkipSpace = true; bool AllowAtInIdentifier; + bool AllowHashInIdentifier = false; bool IsAtStartOfStatement = true; bool LexMasmHexFloats = false; bool LexMasmIntegers = false; @@ -147,6 +148,8 @@ bool getAllowAtInIdentifier() { return AllowAtInIdentifier; } void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; } + void setAllowHashInIdentifier(bool V) { AllowHashInIdentifier = V; } + void setCommentConsumer(AsmCommentConsumer *CommentConsumer) { this->CommentConsumer = CommentConsumer; } Index: llvm/lib/MC/MCParser/AsmLexer.cpp =================================================================== --- llvm/lib/MC/MCParser/AsmLexer.cpp +++ llvm/lib/MC/MCParser/AsmLexer.cpp @@ -143,10 +143,10 @@ return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart)); } -/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]* -static bool IsIdentifierChar(char c, bool AllowAt) { - return isAlnum(c) || c == '_' || c == '$' || c == '.' || - (c == '@' && AllowAt) || c == '?'; +/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@#?]* +static bool isIdentifierChar(char C, bool AllowAt, bool AllowHash) { + return isAlnum(C) || C == '_' || C == '$' || C == '.' || C == '?' || + (AllowAt && C == '@') || (AllowHash && C == '#'); } AsmToken AsmLexer::LexIdentifier() { @@ -156,12 +156,13 @@ while (isDigit(*CurPtr)) ++CurPtr; - if (!IsIdentifierChar(*CurPtr, AllowAtInIdentifier) || + if (!isIdentifierChar(*CurPtr, AllowAtInIdentifier, + AllowHashInIdentifier) || *CurPtr == 'e' || *CurPtr == 'E') return LexFloatLiteral(); } - while (IsIdentifierChar(*CurPtr, AllowAtInIdentifier)) + while (isIdentifierChar(*CurPtr, AllowAtInIdentifier, AllowHashInIdentifier)) ++CurPtr; // Handle . as a special case. @@ -726,9 +727,10 @@ switch (CurChar) { default: if (MAI.doesAllowSymbolAtNameStart()) { - // Handle Microsoft-style identifier: [a-zA-Z_$.@?][a-zA-Z0-9_$.@?]* + // Handle Microsoft-style identifier: [a-zA-Z_$.@?][a-zA-Z0-9_$.@#?]* if (!isDigit(CurChar) && - IsIdentifierChar(CurChar, MAI.doesAllowAtInName())) + isIdentifierChar(CurChar, MAI.doesAllowAtInName(), + AllowHashInIdentifier)) return LexIdentifier(); } else { // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* Index: llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp =================================================================== --- llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp +++ llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp @@ -94,8 +94,6 @@ Str.reset(TheTarget->createNullStreamer(*Ctx)); Parser.reset(createMCAsmParser(SrcMgr, *Ctx, *Str, *MUPMAI)); - // Lex initially to get the string. - Parser->getLexer().Lex(); } void lexAndCheckTokens(StringRef AsmStr, @@ -116,6 +114,9 @@ // Setup. setupCallToAsmParser(AsmStr); + // Lex initially to get the string. + Parser->getLexer().Lex(); + SmallVector ExpectedTokens( {AsmToken::Identifier, AsmToken::EndOfStatement}); lexAndCheckTokens(AsmStr /* "jne #-4" */, ExpectedTokens); @@ -129,6 +130,9 @@ MUPMAI->setRestrictCommentStringToStartOfStatement(true); setupCallToAsmParser(AsmStr); + // Lex initially to get the string. + Parser->getLexer().Lex(); + // When we are restricting the comment string to only the start of the // statement, The sequence of tokens we are expecting are: Identifier - "jne" // Hash - '#' @@ -148,8 +152,65 @@ MUPMAI->setCommentString("*"); setupCallToAsmParser(AsmStr); + // Lex initially to get the string. + Parser->getLexer().Lex(); + SmallVector ExpectedTokens( {AsmToken::EndOfStatement, AsmToken::Eof}); lexAndCheckTokens(AsmStr /* "* lhi 1,10" */, ExpectedTokens); } + +TEST_F(SystemZAsmLexerTest, CheckHashDefault) { + StringRef AsmStr = "lh#123"; + + // Setup. + setupCallToAsmParser(AsmStr); + + // Lex initially to get the string. + Parser->getLexer().Lex(); + + // "lh" -> Identifier + // "#123" -> EndOfStatement (Lexed as a comment since CommentString is "#") + SmallVector ExpectedTokens( + {AsmToken::Identifier, AsmToken::EndOfStatement, AsmToken::Eof}); + lexAndCheckTokens(AsmStr, ExpectedTokens); +} + +// Test if "#" is accepted as an Identifier +TEST_F(SystemZAsmLexerTest, CheckAllowHashInIdentifier) { + StringRef AsmStr = "lh#123"; + + // Setup. + setupCallToAsmParser(AsmStr); + Parser->getLexer().setAllowHashInIdentifier(true); + + // Lex initially to get the string. + Parser->getLexer().Lex(); + + // "lh123" -> Identifier + SmallVector ExpectedTokens( + {AsmToken::Identifier, AsmToken::EndOfStatement, AsmToken::Eof}); + lexAndCheckTokens(AsmStr, ExpectedTokens); +} + +TEST_F(SystemZAsmLexerTest, CheckAllowHashInIdentifier2) { + StringRef AsmStr = "lh#12*3"; + + // Setup. + MUPMAI->setCommentString("*"); + MUPMAI->setRestrictCommentStringToStartOfStatement(true); + setupCallToAsmParser(AsmStr); + Parser->getLexer().setAllowHashInIdentifier(true); + + // Lex initially to get the string. + Parser->getLexer().Lex(); + + // "lh#12" -> Identifier + // "*" -> Star + // "3" -> Integer + SmallVector ExpectedTokens( + {AsmToken::Identifier, AsmToken::Star, AsmToken::Integer, + AsmToken::EndOfStatement, AsmToken::Eof}); + lexAndCheckTokens(AsmStr, ExpectedTokens); +} } // end anonymous namespace