Index: llvm/trunk/test/tools/llvm-rc/Inputs/tokens.rc =================================================================== --- llvm/trunk/test/tools/llvm-rc/Inputs/tokens.rc +++ llvm/trunk/test/tools/llvm-rc/Inputs/tokens.rc @@ -3,6 +3,14 @@ "RC string test.",L"Another RC string test.'&{",42,100 +Block Comment Ident /*block /* // comment */ ifier +Line Comment // Identifier /* + +/* Multi line + block + comment */ + +Multiple /* comments */ on /* a */ single // line ":))" Index: llvm/trunk/test/tools/llvm-rc/tokenizer.test =================================================================== --- llvm/trunk/test/tools/llvm-rc/tokenizer.test +++ llvm/trunk/test/tools/llvm-rc/tokenizer.test @@ -34,4 +34,13 @@ ; CHECK-NEXT: Int: 42; int value = 42 ; CHECK-NEXT: Comma: , ; CHECK-NEXT: Int: 100; int value = 100 +; CHECK-NEXT: Identifier: Block +; CHECK-NEXT: Identifier: Comment +; CHECK-NEXT: Identifier: Ident +; CHECK-NEXT: Identifier: ifier +; CHECK-NEXT: Identifier: Line +; CHECK-NEXT: Identifier: Comment +; CHECK-NEXT: Identifier: Multiple +; CHECK-NEXT: Identifier: on +; CHECK-NEXT: Identifier: single ; CHECK-NEXT: String: ":))" Index: llvm/trunk/tools/llvm-rc/ResourceScriptToken.cpp =================================================================== --- llvm/trunk/tools/llvm-rc/ResourceScriptToken.cpp +++ llvm/trunk/tools/llvm-rc/ResourceScriptToken.cpp @@ -121,6 +121,17 @@ bool canStartString() const; + // Check if tokenizer can start reading a single line comment (e.g. a comment + // that begins with '//') + bool canStartLineComment() const; + + // Check if tokenizer can start or finish reading a block comment (e.g. a + // comment that begins with '/*' and ends with '*/') + bool canStartBlockComment() const; + + // Throw away all remaining characters on the current line. + void skipCurrentLine(); + bool streamEof() const; // Classify the token that is about to be read from the current position. @@ -134,6 +145,14 @@ size_t DataLength, Pos; }; +void Tokenizer::skipCurrentLine() { + Pos = Data.find_first_of("\r\n", Pos); + Pos = Data.find_first_not_of("\r\n", Pos); + + if (Pos == StringRef::npos) + Pos = DataLength; +} + Expected> Tokenizer::run() { Pos = 0; std::vector Result; @@ -154,6 +173,10 @@ if (Error TokenError = consumeToken(TokenKind)) return std::move(TokenError); + // Comments are just deleted, don't bother saving them. + if (TokenKind == Kind::LineComment || TokenKind == Kind::StartComment) + continue; + RCToken Token(TokenKind, Data.take_front(Pos).drop_front(TokenStart)); if (TokenKind == Kind::Identifier) { processIdentifier(Token); @@ -195,6 +218,21 @@ advance(); return Error::success(); + case Kind::LineComment: + advance(2); + skipCurrentLine(); + return Error::success(); + + case Kind::StartComment: { + advance(2); + auto EndPos = Data.find("*/", Pos); + if (EndPos == StringRef::npos) + return getStringError( + "Unclosed multi-line comment beginning at position " + Twine(Pos)); + advance(EndPos - Pos); + advance(2); + return Error::success(); + } case Kind::Identifier: while (!streamEof() && canContinueIdentifier()) advance(); @@ -259,6 +297,16 @@ return std::isdigit(Data[Pos]); } +bool Tokenizer::canStartBlockComment() const { + assert(!streamEof()); + return Data.drop_front(Pos).startswith("/*"); +} + +bool Tokenizer::canStartLineComment() const { + assert(!streamEof()); + return Data.drop_front(Pos).startswith("//"); +} + bool Tokenizer::canContinueInt() const { assert(!streamEof()); return std::isalnum(Data[Pos]); @@ -271,6 +319,11 @@ bool Tokenizer::streamEof() const { return Pos == DataLength; } Kind Tokenizer::classifyCurrentToken() const { + if (canStartBlockComment()) + return Kind::StartComment; + if (canStartLineComment()) + return Kind::LineComment; + if (canStartInt()) return Kind::Int; if (canStartString()) Index: llvm/trunk/tools/llvm-rc/ResourceScriptTokenList.h =================================================================== --- llvm/trunk/tools/llvm-rc/ResourceScriptTokenList.h +++ llvm/trunk/tools/llvm-rc/ResourceScriptTokenList.h @@ -18,6 +18,8 @@ TOKEN(Int) // Integer (decimal, octal or hexadecimal). TOKEN(String) // String value. TOKEN(Identifier) // Script identifier (resource name or type). +TOKEN(LineComment) // Beginning of single-line comment. +TOKEN(StartComment) // Beginning of multi-line comment. // Short tokens. They usually consist of exactly one character. // The definitions are of the form SHORT_TOKEN(TokenName, TokenChar).