Index: lib/Lex/Lexer.cpp =================================================================== --- lib/Lex/Lexer.cpp +++ lib/Lex/Lexer.cpp @@ -452,6 +452,13 @@ return false; } +/// \brief Check if new line pointed by Str is escaped. +static bool isNewLineEscaped(const char *BufferStart, const char *Str) { + while (Str > BufferStart && isWhitespace(*Str)) + --Str; + return Str[0] == '\\'; +} + static SourceLocation getBeginningOfFileToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts) { @@ -467,22 +474,23 @@ // Back up from the current location until we hit the beginning of a line // (or the buffer). We'll relex from that point. - const char *BufStart = Buffer.data(); if (LocInfo.second >= Buffer.size()) return Loc; - const char *StrData = BufStart+LocInfo.second; - if (StrData[0] == '\n' || StrData[0] == '\r') - return Loc; + const char *BufStart = Buffer.data(); + const char *StrData = BufStart + LocInfo.second; const char *LexStart = StrData; - while (LexStart != BufStart) { - if (LexStart[0] == '\n' || LexStart[0] == '\r') { - ++LexStart; - break; - } + for (; LexStart != BufStart; --LexStart) { + if (!isVerticalWhitespace(LexStart[0])) + continue; - --LexStart; + if (isNewLineEscaped(BufStart, LexStart)) + continue; + + // LexStart should point at first character of logical line. + ++LexStart; + break; } // Create a lexer starting at the beginning of this token. Index: unittests/Lex/LexerTest.cpp =================================================================== --- unittests/Lex/LexerTest.cpp +++ unittests/Lex/LexerTest.cpp @@ -380,4 +380,36 @@ EXPECT_EQ(SourceMgr.getFileIDSize(SourceMgr.getFileID(helper1ArgLoc)), 8U); } +TEST_F(LexerTest, GetBeginningOfTokenWithEscapedNewLine) { + // Each line should have the same length for + // further offset calculation to be more straightforward. + const auto IdentifierLength = 8; + std::string textToLex = + "rabarbar\n" + "foo\\\nbar\n" + "foo\\\rbar\n" + "fo\\\r\nbar\n" + "foo\\\n\rba\n"; + std::vector ExpectedTokens{5, tok::identifier}; + + auto lexedTokens = CheckLex(textToLex, ExpectedTokens); + + for (const auto &tok : lexedTokens) { + auto originalLocation = SourceMgr.getDecomposedLoc(tok.getLocation()); + for (unsigned offset = 0; offset < IdentifierLength; ++offset) { + auto lookupLocation = tok.getLocation().getLocWithOffset(offset); + + auto foundLocation = SourceMgr.getDecomposedExpansionLoc( + Lexer::GetBeginningOfToken( + lookupLocation, + SourceMgr, + LangOpts)); + + // Check that location returned by the GetBeginningOfToken + // is the same as original token location reported by Lexer. + EXPECT_EQ(foundLocation.second, originalLocation.second); + } + } +} + } // anonymous namespace