Index: include/llvm/MC/MCParser/MCAsmLexer.h =================================================================== --- include/llvm/MC/MCParser/MCAsmLexer.h +++ include/llvm/MC/MCParser/MCAsmLexer.h @@ -12,6 +12,7 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Compiler.h" @@ -143,8 +144,9 @@ protected: // Can only create subclasses. const char *TokStart; bool SkipSpace; - bool AllowAtInIdentifier; bool IsAtStartOfStatement; + BitVector IdPrefixCharSet; + BitVector IdBodyCharSet; MCAsmLexer(); @@ -232,8 +234,28 @@ /// Set whether spaces should be ignored by the lexer void setSkipSpace(bool val) { SkipSpace = val; } - bool getAllowAtInIdentifier() { return AllowAtInIdentifier; } - void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; } + bool getAllowAtInIdentifier() { return IsAllowedIDBodyChar('@'); } + void setAllowAtInIdentifier(bool v) { setIdentifierCharSet(v, "", "@"); } + + /// allow/disallow an identifier to contain specified characters + void setIdentifierCharSet(bool Value, + StringRef PfxCharSet, + StringRef BodyCharSet); + + /// test whether the specified character can start an identifier + bool IsAllowedIDPrefixChar(char C) const { + return IdPrefixCharSet.test((unsigned char)C); + } + + /// test whether the specified character can follow identifier start char + bool IsAllowedIDBodyChar(char C) const { + return IdBodyCharSet.test((unsigned char)C); + } + + /// test whether the specified character can be found in an identifier + bool isAllowedIDChar(char C) const { + return IsAllowedIDBodyChar(C) || IsAllowedIDPrefixChar(C); + } }; } // End llvm namespace Index: lib/MC/MCParser/AsmLexer.cpp =================================================================== --- lib/MC/MCParser/AsmLexer.cpp +++ lib/MC/MCParser/AsmLexer.cpp @@ -34,7 +34,9 @@ : MAI(MAI), CurPtr(nullptr), IsAtStartOfLine(true), IsAtStartOfStatement(true), IsParsingMSInlineAsm(false), IsPeeking(false) { - AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@"); + + if (!StringRef(MAI.getCommentString()).startswith("@")) + setIdentifierCharSet(true, "", "@"); } AsmLexer::~AsmLexer() { @@ -137,30 +139,10 @@ return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart)); } -/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]* -static bool IsIdentifierChar(char c, bool AllowAt) { - return isalnum(c) || c == '_' || c == '$' || c == '.' || - (c == '@' && AllowAt) || c == '?'; -} AsmToken AsmLexer::LexIdentifier() { - // Check for floating point literals. - if (CurPtr[-1] == '.' && isdigit(*CurPtr)) { - // Disambiguate a .1243foo identifier from a floating literal. - while (isdigit(*CurPtr)) - ++CurPtr; - if (*CurPtr == 'e' || *CurPtr == 'E' || - !IsIdentifierChar(*CurPtr, AllowAtInIdentifier)) - return LexFloatLiteral(); - } - - while (IsIdentifierChar(*CurPtr, AllowAtInIdentifier)) + while (IsAllowedIDBodyChar(*CurPtr)) ++CurPtr; - - // Handle . as a special case. - if (CurPtr == TokStart+1 && TokStart[0] == '.') - return AsmToken(AsmToken::Dot, StringRef(TokStart, 1)); - return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart)); } @@ -531,7 +513,7 @@ AsmToken AsmLexer::LexToken() { TokStart = CurPtr; // This always consumes at least one character. - int CurChar = getNextChar(); + const int CurChar = getNextChar(); if (!IsPeeking && CurChar == '#' && IsAtStartOfStatement) { // If this starts with a '#', this may be a cpp @@ -572,11 +554,30 @@ IsAtStartOfLine = false; bool OldIsAtStartOfStatement = IsAtStartOfStatement; IsAtStartOfStatement = false; + + if (CurChar == '.' && isdigit(*CurPtr)) { + if (!IsAllowedIDPrefixChar('.')) + return LexFloatLiteral(); + + const auto SavePos = CurPtr; + // Disambiguate a .1243foo identifier from a floating literal. + do { ++CurPtr; } + while (isdigit(*CurPtr)); + if (*CurPtr == 'e' || *CurPtr == 'E' || !IsAllowedIDBodyChar(*CurPtr)) + return LexFloatLiteral(); + CurPtr = SavePos; + } + + const bool IsIDPrefix = IsAllowedIDPrefixChar(CurChar); + if (IsIDPrefix && IsAllowedIDBodyChar(*CurPtr)) { + ++CurPtr; + return LexIdentifier(); + } + switch (CurChar) { default: - // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* - if (isalpha(CurChar) || CurChar == '_' || CurChar == '.') - return LexIdentifier(); + if (IsIDPrefix) + return AsmToken(AsmToken::Identifier, StringRef(TokStart, 1)); // Unknown character, emit an error. return ReturnError(TokStart, "invalid character in input"); @@ -599,6 +600,7 @@ IsAtStartOfLine = true; IsAtStartOfStatement = true; return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); + case '.': return AsmToken(AsmToken::Dot, StringRef(TokStart, 1)); case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1)); case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1)); case '-': return AsmToken(AsmToken::Minus, StringRef(TokStart, 1)); Index: lib/MC/MCParser/MCAsmLexer.cpp =================================================================== --- lib/MC/MCParser/MCAsmLexer.cpp +++ lib/MC/MCParser/MCAsmLexer.cpp @@ -15,11 +15,39 @@ MCAsmLexer::MCAsmLexer() : TokStart(nullptr), SkipSpace(true), IsAtStartOfStatement(true) { CurTok.emplace_back(AsmToken::Space, StringRef()); + // Prefix char = [A-Za-z_.] + IdPrefixCharSet.resize(256); + IdPrefixCharSet.set((unsigned char)'a', (unsigned char)'z' + 1); + IdPrefixCharSet.set((unsigned char)'A', (unsigned char)'Z' + 1); + IdPrefixCharSet.set((unsigned char)'.'); + IdPrefixCharSet.set((unsigned char)'_'); + + // Body char = prefix + [0-9$?] + IdBodyCharSet = IdPrefixCharSet; + IdBodyCharSet.set((unsigned char)'0', (unsigned char)'9' + 1); + IdBodyCharSet.set((unsigned char)'$'); + IdBodyCharSet.set((unsigned char)'?'); } MCAsmLexer::~MCAsmLexer() { } +void MCAsmLexer::setIdentifierCharSet(bool Value, + StringRef PfxCharSet, + StringRef BodyCharSet) { + if (Value) { + for (auto C : PfxCharSet) + IdPrefixCharSet.set((unsigned char)C); + for (auto C : BodyCharSet) + IdBodyCharSet.set((unsigned char)C); + } else { + for (auto C : PfxCharSet) + IdPrefixCharSet.reset((unsigned char)C); + for (auto C : BodyCharSet) + IdBodyCharSet.reset((unsigned char)C); + } +} + SMLoc MCAsmLexer::getLoc() const { return SMLoc::getFromPointer(TokStart); } Index: lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -620,6 +620,8 @@ copySTI().ToggleFeature("SOUTHERN_ISLANDS"); } + getLexer().setIdentifierCharSet(true, "&", ""); + setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits())); { Index: test/MC/AMDGPU/hsa.s =================================================================== --- test/MC/AMDGPU/hsa.s +++ test/MC/AMDGPU/hsa.s @@ -16,12 +16,12 @@ // ELF: 0040: 50550000 // ELF: Symbol { -// ELF: Name: amd_kernel_code_t_minimal +// ELF: Name: &amd_kernel_code_t_minimal // ELF: Type: AMDGPU_HSA_KERNEL (0xA) // ELF: Section: .text // ELF: } // ELF: Symbol { -// ELF: Name: amd_kernel_code_t_test_all +// ELF: Name: &amd_kernel_code_t_test_all // ELF: Type: AMDGPU_HSA_KERNEL (0xA) // ELF: Section: .text // ELF: } @@ -35,11 +35,11 @@ .hsa_code_object_isa 7,0,0,"AMD","AMDGPU" // ASM: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU" -.amdgpu_hsa_kernel amd_kernel_code_t_test_all -.amdgpu_hsa_kernel amd_kernel_code_t_minimal +.amdgpu_hsa_kernel &amd_kernel_code_t_test_all +.amdgpu_hsa_kernel &amd_kernel_code_t_minimal -amd_kernel_code_t_test_all: +&amd_kernel_code_t_test_all: ; Test all amd_kernel_code_t members with non-default values. .amd_kernel_code_t kernel_code_version_major = 100 @@ -106,7 +106,7 @@ runtime_loader_kernel_symbol = 1 .end_amd_kernel_code_t -// ASM-LABEL: {{^}}amd_kernel_code_t_test_all: +// ASM-LABEL: {{^\"\&}}amd_kernel_code_t_test_all{{\"}}: // ASM: .amd_kernel_code_t // ASM: amd_code_version_major = 100 // ASM: amd_code_version_minor = 100 @@ -172,7 +172,7 @@ // ASM: runtime_loader_kernel_symbol = 1 // ASM: .end_amd_kernel_code_t -amd_kernel_code_t_minimal: +&amd_kernel_code_t_minimal: .amd_kernel_code_t enable_sgpr_kernarg_segment_ptr = 1 is_ptr64 = 1 @@ -190,7 +190,7 @@ workitem_vgpr_count = 16 .end_amd_kernel_code_t -// ASM-LABEL: {{^}}amd_kernel_code_t_minimal: +// ASM-LABEL: {{^\"&}}amd_kernel_code_t_minimal{{\"}}: // ASM: .amd_kernel_code_t // ASM: amd_code_version_major = 1 // ASM: amd_code_version_minor = 0