Index: include/clang/Lex/Lexer.h =================================================================== --- include/clang/Lex/Lexer.h +++ include/clang/Lex/Lexer.h @@ -463,6 +463,9 @@ /// \brief Returns true if the given character could appear in an identifier. static bool isIdentifierBodyChar(char c, const LangOptions &LangOpts); + /// \brief Checks whether new line pointed by Str is preceded by escape sequence. + static bool isNewLineEscaped(const char *BufferStart, const char *Str); + /// getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever /// emit a warning. static inline char getCharAndSizeNoWarn(const char *Ptr, unsigned &Size, Index: lib/Lex/Lexer.cpp =================================================================== --- lib/Lex/Lexer.cpp +++ lib/Lex/Lexer.cpp @@ -463,19 +463,18 @@ const char *BufStart = Buffer.data(); if (Offset >= Buffer.size()) return nullptr; - const char *StrData = BufStart + Offset; - if (StrData[0] == '\n' || StrData[0] == '\r') - return StrData; + const char *LexStart = BufStart + Offset; + for (; LexStart != BufStart; --LexStart) { + if (!isVerticalWhitespace(LexStart[0])) + continue; - const char *LexStart = StrData; - while (LexStart != BufStart) { - if (LexStart[0] == '\n' || LexStart[0] == '\r') { - ++LexStart; - break; - } + if (Lexer::isNewLineEscaped(BufStart, LexStart)) + continue; - --LexStart; + // LexStart should point at first character of logical line. + ++LexStart; + break; } return LexStart; } @@ -487,7 +486,7 @@ std::pair LocInfo = SM.getDecomposedLoc(Loc); if (LocInfo.first.isInvalid()) return Loc; - + bool Invalid = false; StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid); if (Invalid) @@ -499,31 +498,31 @@ const char *LexStart = findBeginningOfLine(Buffer, LocInfo.second); if (!LexStart || LexStart == StrData) return Loc; - + // Create a lexer starting at the beginning of this token. SourceLocation LexerStartLoc = Loc.getLocWithOffset(-LocInfo.second); Lexer TheLexer(LexerStartLoc, LangOpts, Buffer.data(), LexStart, Buffer.end()); TheLexer.SetCommentRetentionState(true); - + // Lex tokens until we find the token that contains the source location. Token TheTok; do { TheLexer.LexFromRawLexer(TheTok); - + if (TheLexer.getBufferLocation() > StrData) { // Lexing this token has taken the lexer past the source location we're // looking for. If the current token encompasses our source location, // return the beginning of that token. if (TheLexer.getBufferLocation() - TheTok.getLength() <= StrData) return TheTok.getLocation(); - + // We ended up skipping over the source location entirely, which means // that it points into whitespace. We're done here. break; } } while (TheTok.getKind() != tok::eof); - + // We've passed our source location; just return the original source location. return Loc; } @@ -531,20 +530,20 @@ SourceLocation Lexer::GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts) { - if (Loc.isFileID()) - return getBeginningOfFileToken(Loc, SM, LangOpts); - - if (!SM.isMacroArgExpansion(Loc)) - return Loc; + if (Loc.isFileID()) + return getBeginningOfFileToken(Loc, SM, LangOpts); - SourceLocation FileLoc = SM.getSpellingLoc(Loc); - SourceLocation BeginFileLoc = getBeginningOfFileToken(FileLoc, SM, LangOpts); - std::pair FileLocInfo = SM.getDecomposedLoc(FileLoc); - std::pair BeginFileLocInfo - = SM.getDecomposedLoc(BeginFileLoc); - assert(FileLocInfo.first == BeginFileLocInfo.first && - FileLocInfo.second >= BeginFileLocInfo.second); - return Loc.getLocWithOffset(BeginFileLocInfo.second - FileLocInfo.second); + if (!SM.isMacroArgExpansion(Loc)) + return Loc; + + SourceLocation FileLoc = SM.getSpellingLoc(Loc); + SourceLocation BeginFileLoc = getBeginningOfFileToken(FileLoc, SM, LangOpts); + std::pair FileLocInfo = SM.getDecomposedLoc(FileLoc); + std::pair BeginFileLocInfo + = SM.getDecomposedLoc(BeginFileLoc); + assert(FileLocInfo.first == BeginFileLocInfo.first && + FileLocInfo.second >= BeginFileLocInfo.second); + return Loc.getLocWithOffset(BeginFileLocInfo.second - FileLocInfo.second); } namespace { @@ -1032,6 +1031,26 @@ return isIdentifierBody(c, LangOpts.DollarIdents); } +bool Lexer::isNewLineEscaped(const char *BufferStart, const char *Str) { + assert(isVerticalWhitespace(Str[0])); + if (Str - 1 < BufferStart) + return false; + + if ((Str[0] == '\n' && Str[-1] == '\r') || + (Str[0] == '\r' && Str[-1] == '\n')) { + if (Str - 2 < BufferStart) + return false; + --Str; + } + --Str; + + // Rewind to first non-space character: + while (Str > BufferStart && isHorizontalWhitespace(*Str)) + --Str; + + return *Str == '\\'; +} + StringRef Lexer::getIndentationForLine(SourceLocation Loc, const SourceManager &SM) { if (Loc.isInvalid() || Loc.isMacroID()) Index: unittests/Lex/LexerTest.cpp =================================================================== --- unittests/Lex/LexerTest.cpp +++ unittests/Lex/LexerTest.cpp @@ -420,4 +420,57 @@ #endif } +TEST_F(LexerTest, IsNewLineEscapedValid) { + auto hasNewLineEscaped = [] (const char *S) { + return Lexer::isNewLineEscaped(S, S + strlen(S) - 1); + }; + + EXPECT_TRUE(hasNewLineEscaped("\\\r")); + EXPECT_TRUE(hasNewLineEscaped("\\\n")); + EXPECT_TRUE(hasNewLineEscaped("\\\r\n")); + EXPECT_TRUE(hasNewLineEscaped("\\\n\r")); + EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r")); + EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r\n")); + + EXPECT_FALSE(hasNewLineEscaped("\\\r\r")); + EXPECT_FALSE(hasNewLineEscaped("\\\r\r\n")); + EXPECT_FALSE(hasNewLineEscaped("\\\n\n")); + EXPECT_FALSE(hasNewLineEscaped("\r")); + EXPECT_FALSE(hasNewLineEscaped("\n")); + EXPECT_FALSE(hasNewLineEscaped("\r\n")); + EXPECT_FALSE(hasNewLineEscaped("\n\r")); + EXPECT_FALSE(hasNewLineEscaped("\r\r")); + EXPECT_FALSE(hasNewLineEscaped("\n\n")); +} + +TEST_F(LexerTest, GetBeginningOfTokenWithEscapedNewLine) { + // Each line should have the same length for + // further offset calculation to be more straightforward. + const unsigned IdentifierLength = 8; + std::string TextToLex = "rabarbar\n" + "foo\\\nbar\n" + "foo\\\rbar\n" + "fo\\\r\nbar\n" + "foo\\\n\rba\n"; + std::vector ExpectedTokens{5, tok::identifier}; + std::vector LexedTokens = CheckLex(TextToLex, ExpectedTokens); + + for (const Token &Tok : LexedTokens) { + std::pair OriginalLocation = + SourceMgr.getDecomposedLoc(Tok.getLocation()); + for (unsigned Offset = 0; Offset < IdentifierLength; ++Offset) { + SourceLocation LookupLocation = + Tok.getLocation().getLocWithOffset(Offset); + + std::pair FoundLocation = + SourceMgr.getDecomposedExpansionLoc( + Lexer::GetBeginningOfToken(LookupLocation, SourceMgr, LangOpts)); + + // Check that location returned by the GetBeginningOfToken + // is the same as original token location reported by Lexer. + EXPECT_EQ(FoundLocation.second, OriginalLocation.second); + } + } +} + } // anonymous namespace