diff --git a/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp b/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp --- a/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp +++ b/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp @@ -251,28 +251,12 @@ return false; } -/// Detect the likely line ending style of \p FromFile by examining the first -/// newline found within it. -static StringRef DetectEOL(const MemoryBufferRef &FromFile) { - // Detect what line endings the file uses, so that added content does not mix - // the style. We need to check for "\r\n" first because "\n\r" will match - // "\r\n\r\n". - const char *Pos = strchr(FromFile.getBufferStart(), '\n'); - if (!Pos) - return "\n"; - if (Pos - 1 >= FromFile.getBufferStart() && Pos[-1] == '\r') - return "\r\n"; - if (Pos + 1 < FromFile.getBufferEnd() && Pos[1] == '\r') - return "\n\r"; - return "\n"; -} - void InclusionRewriter::detectMainFileEOL() { Optional FromFile = *SM.getBufferOrNone(SM.getMainFileID()); assert(FromFile); if (!FromFile) return; // Should never happen, but whatever. - MainEOL = DetectEOL(*FromFile); + MainEOL = FromFile->getBuffer().detectEOL(); } /// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at @@ -378,7 +362,7 @@ Lexer RawLex(FileId, FromFile, PP.getSourceManager(), PP.getLangOpts()); RawLex.SetCommentRetentionState(false); - StringRef LocalEOL = DetectEOL(FromFile); + StringRef LocalEOL = FromFile.getBuffer().detectEOL(); // Per the GNU docs: "1" indicates entering a new file. if (FileId == SM.getMainFileID() || FileId == PP.getPredefinesFileID()) diff --git a/llvm/include/llvm/ADT/StringRef.h b/llvm/include/llvm/ADT/StringRef.h --- a/llvm/include/llvm/ADT/StringRef.h +++ b/llvm/include/llvm/ADT/StringRef.h @@ -877,6 +877,25 @@ return ltrim(Chars).rtrim(Chars); } + /// Detect the line ending style of the string. + /// + /// If the string contains a line ending, return the line ending character + /// sequence that is detected. Otherwise return '\n' for unix line endings. + /// + /// \return - The line ending character sequence. + LLVM_NODISCARD + StringRef detectEOL() const { + size_t Pos = find('\r'); + if (Pos == npos) { + // If there is no carriage return, assume unix + return "\n"; + } + if (Pos + 1 < Length && Data[Pos + 1] == '\n') + return "\r\n"; // Windows + if (Pos > 0 && Data[Pos - 1] == '\n') + return "\n\r"; // You monster! + return "\r"; // Classic Mac + } /// @} }; diff --git a/llvm/unittests/ADT/StringRefTest.cpp b/llvm/unittests/ADT/StringRefTest.cpp --- a/llvm/unittests/ADT/StringRefTest.cpp +++ b/llvm/unittests/ADT/StringRefTest.cpp @@ -1109,6 +1109,36 @@ EXPECT_EQ(R"("foo")", ::testing::PrintToString(StringRef("foo"))); } +TEST(StringRefTest, LFLineEnding) { + constexpr StringRef Cases[] = {"\nDoggo\nPupper", "Floofer\n", "Woofer"}; + EXPECT_EQ(StringRef("\n"), Cases[0].detectEOL()); + EXPECT_EQ(StringRef("\n"), Cases[1].detectEOL()); + EXPECT_EQ(StringRef("\n"), Cases[2].detectEOL()); +} + +TEST(StringRefTest, CRLineEnding) { + constexpr StringRef Cases[] = {"\rDoggo\rPupper", "Floofer\r", "Woo\rfer\n"}; + EXPECT_EQ(StringRef("\r"), Cases[0].detectEOL()); + EXPECT_EQ(StringRef("\r"), Cases[1].detectEOL()); + EXPECT_EQ(StringRef("\r"), Cases[2].detectEOL()); +} + +TEST(StringRefTest, CRLFLineEnding) { + constexpr StringRef Cases[] = {"\r\nDoggo\r\nPupper", "Floofer\r\n", + "Woofer\r\nSubWoofer\n"}; + EXPECT_EQ(StringRef("\r\n"), Cases[0].detectEOL()); + EXPECT_EQ(StringRef("\r\n"), Cases[1].detectEOL()); + EXPECT_EQ(StringRef("\r\n"), Cases[2].detectEOL()); +} + +TEST(StringRefTest, LFCRLineEnding) { + constexpr StringRef Cases[] = {"\n\rDoggo\n\rPupper", "Floofer\n\r", + "Woofer\n\rSubWoofer\n"}; + EXPECT_EQ(StringRef("\n\r"), Cases[0].detectEOL()); + EXPECT_EQ(StringRef("\n\r"), Cases[1].detectEOL()); + EXPECT_EQ(StringRef("\n\r"), Cases[2].detectEOL()); +} + static_assert(std::is_trivially_copyable::value, "trivially copyable");