diff --git a/mlir/lib/Parser/AffineParser.cpp b/mlir/lib/Parser/AffineParser.cpp --- a/mlir/lib/Parser/AffineParser.cpp +++ b/mlir/lib/Parser/AffineParser.cpp @@ -269,13 +269,13 @@ /// /// affine-expr ::= bare-id AffineExpr AffineParser::parseBareIdExpr() { - if (getToken().isNot(Token::bare_identifier)) + if (!getToken().isBareIdentifier()) return emitWrongTokenError("expected bare identifier"), nullptr; StringRef sRef = getTokenSpelling(); for (auto entry : dimsAndSymbols) { if (entry.first == sRef) { - consumeToken(Token::bare_identifier); + consumeToken(); return entry.second; } } @@ -458,7 +458,7 @@ /// expressions of the affine map. Update our state to store the /// dimensional/symbolic identifier. ParseResult AffineParser::parseIdentifierDefinition(AffineExpr idExpr) { - if (getToken().isNot(Token::bare_identifier)) + if (!getToken().isBareIdentifier()) return emitWrongTokenError("expected bare identifier"); auto name = getTokenSpelling(); @@ -466,7 +466,7 @@ if (entry.first == name) return emitError("redefinition of identifier '" + name + "'"); } - consumeToken(Token::bare_identifier); + consumeToken(); dimsAndSymbols.push_back({name, idExpr}); return success(); diff --git a/mlir/lib/Parser/Token.h b/mlir/lib/Parser/Token.h --- a/mlir/lib/Parser/Token.h +++ b/mlir/lib/Parser/Token.h @@ -58,6 +58,9 @@ /// Return true if this is one of the keyword token kinds (e.g. kw_if). bool isKeyword() const; + /// Returns true if this token can be represented as a bare identifier. + bool isBareIdentifier() const; + // Helpers to decode specific sorts of tokens. /// For an integer token, return its value as an unsigned. If it doesn't fit, diff --git a/mlir/lib/Parser/Token.cpp b/mlir/lib/Parser/Token.cpp --- a/mlir/lib/Parser/Token.cpp +++ b/mlir/lib/Parser/Token.cpp @@ -187,3 +187,18 @@ #include "TokenKinds.def" } } + +/// Returns true if the given string can be represented as a bare identifier. +bool Token::isBareIdentifier() const { + StringRef spelling = getSpelling(); + if (spelling.empty() || (!isalpha(spelling[0]) && spelling[0] != '_')) + return false; + + // By making this unsigned, the value passed in to isalnum will always be + // in the range 0-255. This is important when building with MSVC because + // its implementation will assert. This situation can arise when dealing + // with UTF-8 multibyte characters. + return llvm::all_of(spelling.drop_front(), [](unsigned char c) { + return isalnum(c) || c == '_' || c == '$' || c == '.'; + }); +}