diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h --- a/clang/include/clang/Lex/Lexer.h +++ b/clang/include/clang/Lex/Lexer.h @@ -811,6 +811,8 @@ /// \return \c true if a UTF-8 sequence mapping to an acceptable identifier /// character was lexed, \c false otherwise. bool tryConsumeIdentifierUTF8Char(unsigned &CurOffset); + + void SetTokString(Token& Tok, StringRef Data); }; } // namespace clang diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -1881,8 +1881,9 @@ } const char *IdStart = BufferStart + BufferOffset; + unsigned TokLen = CurOffset - BufferOffset; FormTokenWithChars(Result, CurOffset, tok::raw_identifier); - Result.setRawIdentifierData(IdStart); + SetTokString(Result, StringRef(IdStart, TokLen)); // If we are in raw mode, return this identifier raw. There is no need to // look up identifier information or attempt to macro expand it. @@ -2002,8 +2003,9 @@ // Update the location of token as well as BufferPtr. const char *TokStart = BufferStart + BufferOffset; + unsigned TokLen = CurOffset - BufferOffset; FormTokenWithChars(Result, CurOffset, tok::numeric_constant); - Result.setLiteralData(TokStart); + SetTokString(Result, StringRef(TokStart, TokLen)); return true; } @@ -2158,8 +2160,10 @@ // Update the location of the token as well as the BufferPtr instance var. const char *TokStart = BufferStart + BufferOffset; + unsigned TokLen = CurOffset - BufferOffset; + FormTokenWithChars(Result, CurOffset, Kind); - Result.setLiteralData(TokStart); + SetTokString(Result, StringRef(TokStart, TokLen)); return true; } @@ -2241,8 +2245,9 @@ // Update the location of token as well as BufferPtr. const char *TokStart = &BufferStart[BufferOffset]; + unsigned TokLen = CurOffset - BufferOffset; FormTokenWithChars(Result, CurOffset, Kind); - Result.setLiteralData(TokStart); + SetTokString(Result, StringRef(TokStart, TokLen)); return true; } @@ -2286,8 +2291,9 @@ // Update the location of token as well as BufferPtr. const char *TokStart = &BufferStart[BufferOffset]; + unsigned TokLen = CurOffset - BufferOffset; FormTokenWithChars(Result, CurOffset, tok::header_name); - Result.setLiteralData(TokStart); + SetTokString(Result, StringRef(TokStart, TokLen)); return true; } @@ -2382,8 +2388,9 @@ // Update the location of token as well as BufferPtr. const char *TokStart = BufferStart + BufferOffset; + unsigned TokLen = CurOffset - BufferOffset; FormTokenWithChars(Result, CurOffset, Kind); - Result.setLiteralData(TokStart); + SetTokString(Result, StringRef(TokStart, TokLen)); return true; } @@ -3303,6 +3310,19 @@ return nullptr; } +void Lexer::SetTokString(Token& Tok, StringRef Str) { + const char* DestPtr = Str.data(); + if (GrowBuffer) { + assert(PP); + PP->CreateString(Str, Tok); + } + + if (Tok.is(tok::raw_identifier)) + Tok.setRawIdentifierData(DestPtr); + else + Tok.setLiteralData(DestPtr); +} + bool Lexer::lexEditorPlaceholder(Token &Result, unsigned CurOffset) { assert(BufferStart[CurOffset - 1] == '<' && BufferStart[CurOffset] == '#' && "Not a placeholder!"); @@ -3314,11 +3334,12 @@ if (!End) return false; const char *Start = BufferStart + CurOffset - 1; + unsigned TokLen = End-Start; if (!LangOpts.AllowEditorPlaceholders) Diag(CurOffset - 1, diag::err_placeholder_in_source); Result.startToken(); FormTokenWithChars(Result, End - BufferStart, tok::raw_identifier); - Result.setRawIdentifierData(Start); + SetTokString(Result, StringRef(Start, TokLen)); PP->LookUpIdentifierInfo(Result); Result.setFlag(Token::IsEditorPlaceholder); BufferOffset = End - BufferStart; @@ -4538,7 +4559,7 @@ return true; } if (Result.isLiteral()) { - Result.setLiteralData(TokPtr); + SetTokString(Result, StringRef(TokPtr, Result.getLength())); return true; } if (Result.is(tok::colon) &&