diff --git a/clang-tools-extra/clangd/FormattedString.cpp b/clang-tools-extra/clangd/FormattedString.cpp --- a/clang-tools-extra/clangd/FormattedString.cpp +++ b/clang-tools-extra/clangd/FormattedString.cpp @@ -26,23 +26,122 @@ namespace markup { namespace { -/// Escape a markdown text block. Ensures the punctuation will not introduce -/// any of the markdown constructs. -std::string renderText(llvm::StringRef Input) { - // Escaping ASCII punctuation ensures we can't start a markdown construct. - constexpr llvm::StringLiteral Punctuation = - R"txt(!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~)txt"; +bool isCharLineBreak(llvm::StringRef Str, size_t LineBreakIndex) { + return Str[LineBreakIndex] == '\n'; +} + +bool isCharParagraphLineBreak(llvm::StringRef Str, size_t LineBreakIndex) { + if (!isCharLineBreak(Str, LineBreakIndex) || + LineBreakIndex + 1 >= Str.size()) { + return false; + } + auto NextNonSpaceCharIndex = Str.find_first_not_of(' ', LineBreakIndex + 1); + + return NextNonSpaceCharIndex != llvm::StringRef::npos && + Str[NextNonSpaceCharIndex] == '\n'; +} + +bool isCharMardkownLineBreak(llvm::StringRef Str, size_t LineBreakIndex) { + if (!isCharLineBreak(Str, LineBreakIndex) || LineBreakIndex <= 2) { + return false; + } + + if (Str[LineBreakIndex - 1] == '\\') { + return true; + } + + if ((Str[LineBreakIndex - 1] == ' ' && Str[LineBreakIndex - 2] == ' ')) { + return true; + } + + return false; +} + +bool isCharPunctuationLineBreak(llvm::StringRef Str, size_t LineBreakIndex) { + if (!isCharLineBreak(Str, LineBreakIndex) || LineBreakIndex <= 1) { + return false; + } + + constexpr llvm::StringLiteral Punctuation = R"txt(.:,;!?)txt"; + + return Punctuation.find(Str[LineBreakIndex - 1]) != llvm::StringRef::npos; +} + +bool isCharFollowedByHardLineBreakIndicator(llvm::StringRef Str, + size_t LineBreakIndex) { + if (!isCharLineBreak(Str, LineBreakIndex)) { + return false; + } + + // '-'/'*' md list, '@'/'\' documentation command, '>' md blockquote, + // '#' headings, '`' code blocks + constexpr llvm::StringLiteral LinbreakIdenticators = R"txt(-*@\>#`)txt"; + + auto NextNonSpaceCharIndex = Str.find_first_not_of(' ', LineBreakIndex + 1); + + if (NextNonSpaceCharIndex == llvm::StringRef::npos) { + + return false; + } + + auto FollowedBySingleCharIndicator = + LinbreakIdenticators.find(Str[NextNonSpaceCharIndex]) != + llvm::StringRef::npos; + + auto FollowedByNumberedListIndicator = + llvm::isDigit(Str[NextNonSpaceCharIndex]) && + NextNonSpaceCharIndex + 1 < Str.size() && + (Str[NextNonSpaceCharIndex + 1] == '.' || + Str[NextNonSpaceCharIndex + 1] == ')'); + + return FollowedBySingleCharIndicator || FollowedByNumberedListIndicator; +} + +// Tries to estimate which linebreak is meant to be soft and which is meant to +// be hard. Soft linebreaks are removed. Hard linebreaks are retained or +// converted to markdown linebreaks if \p Markdown is set. +std::string convertLineBreaks(llvm::StringRef Input, bool Markdown = false) { + Input = Input.trim(); + + constexpr auto WhiteSpaceChars = "\t\n\v\f\r "; std::string R; - for (size_t From = 0; From < Input.size();) { - size_t Next = Input.find_first_of(Punctuation, From); - R += Input.substr(From, Next - From); - if (Next == llvm::StringRef::npos) - break; - R += "\\"; - R += Input[Next]; + R.reserve(Input.size()); - From = Next + 1; + std::string HardLinebreak = "\n"; + if (Markdown) { + HardLinebreak = " \n"; + } + + for (size_t CharIndex = 0; CharIndex < Input.size();) { + if (isCharLineBreak(Input, CharIndex)) { + // Trim spaces infront of linebreak + R.erase(R.find_last_not_of(WhiteSpaceChars) + 1); + if (isCharParagraphLineBreak(Input, CharIndex)) { + R += "\n\n"; + } else if (Markdown && isCharMardkownLineBreak(Input, CharIndex)) { + if (Input[CharIndex - 1] == '\\') { + R += '\n'; + } else { + R += HardLinebreak; + } + } else if (isCharPunctuationLineBreak(Input, CharIndex)) { + R += HardLinebreak; + } else if (isCharFollowedByHardLineBreakIndicator(Input, CharIndex)) { + R += HardLinebreak; + } else { + // Omit line break -> replace with space + R += ' '; + } + CharIndex++; + // After a linebreak always remove spaces to avoid 4 space markdown code + // blocks, also skip all additional linebreaks since they have no effect + CharIndex = Input.find_first_not_of(WhiteSpaceChars, CharIndex); + } else { + // copy the rest of the string + R += Input[CharIndex]; + CharIndex++; + } } return R; } @@ -94,26 +193,6 @@ return std::string(/*Repeat=*/std::max(3u, MaxBackticks + 1), '`'); } -// Trims the input and concatenates whitespace blocks into a single ` `. -std::string canonicalizeSpaces(std::string Input) { - // Goes over the string and preserves only a single ` ` for any whitespace - // chunks, the rest is moved to the end of the string and dropped in the end. - auto WritePtr = Input.begin(); - llvm::SmallVector Words; - llvm::SplitString(Input, Words); - if (Words.empty()) - return ""; - // Go over each word and add it to the string. - for (llvm::StringRef Word : Words) { - if (WritePtr > Input.begin()) - *WritePtr++ = ' '; // Separate from previous block. - llvm::for_each(Word, [&WritePtr](const char C) { *WritePtr++ = C; }); - } - // Get rid of extra spaces. - Input.resize(WritePtr - Input.begin()); - return Input; -} - std::string renderBlocks(llvm::ArrayRef> Children, void (Block::*RenderFunc)(llvm::raw_ostream &) const) { std::string R; @@ -236,7 +315,7 @@ OS << Sep; switch (C.Kind) { case Chunk::PlainText: - OS << renderText(C.Contents); + OS << convertLineBreaks(C.Contents, true); break; case Chunk::InlineCode: OS << renderInlineBlock(C.Contents); @@ -253,7 +332,7 @@ void Paragraph::renderPlainText(llvm::raw_ostream &OS) const { llvm::StringRef Sep = ""; for (auto &C : Chunks) { - OS << Sep << C.Contents; + OS << Sep << convertLineBreaks(C.Contents); Sep = " "; } OS << '\n'; @@ -278,7 +357,7 @@ } Paragraph &Paragraph::appendText(std::string Text) { - Text = canonicalizeSpaces(std::move(Text)); + Text = llvm::StringRef(Text).trim().str(); if (Text.empty()) return *this; Chunks.emplace_back(); @@ -289,7 +368,7 @@ } Paragraph &Paragraph::appendCode(std::string Code) { - Code = canonicalizeSpaces(std::move(Code)); + Code = llvm::StringRef(Code).trim().str(); if (Code.empty()) return *this; Chunks.emplace_back(); diff --git a/clang-tools-extra/clangd/unittests/FormattedStringTests.cpp b/clang-tools-extra/clangd/unittests/FormattedStringTests.cpp --- a/clang-tools-extra/clangd/unittests/FormattedStringTests.cpp +++ b/clang-tools-extra/clangd/unittests/FormattedStringTests.cpp @@ -20,19 +20,6 @@ TEST(Render, Escaping) { // Check some ASCII punctuation Paragraph P; - P.appendText("*!`"); - EXPECT_EQ(P.asMarkdown(), "\\*\\!\\`"); - - // Check all ASCII punctuation. - P = Paragraph(); - std::string Punctuation = R"txt(!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~)txt"; - // Same text, with each character escaped. - std::string EscapedPunctuation; - EscapedPunctuation.reserve(2 * Punctuation.size()); - for (char C : Punctuation) - EscapedPunctuation += std::string("\\") + C; - P.appendText(Punctuation); - EXPECT_EQ(P.asMarkdown(), EscapedPunctuation); // In code blocks we don't need to escape ASCII punctuation. P = Paragraph(); @@ -98,7 +85,7 @@ } TEST(Paragraph, ExtraSpaces) { - // Make sure spaces inside chunks are dropped. + // Make sure spaces after linebreaks are dropped. Paragraph P; P.appendText("foo\n \t baz"); P.appendCode(" bar\n"); @@ -106,15 +93,92 @@ EXPECT_EQ(P.asPlainText(), "foo baz bar"); } -TEST(Paragraph, NewLines) { +TEST(Paragraph, LineBreakTrim) { // New lines before and after chunks are dropped. Paragraph P; P.appendText(" \n foo\nbar\n "); P.appendCode(" \n foo\nbar \n "); - EXPECT_EQ(P.asMarkdown(), "foo bar `foo bar`"); + EXPECT_EQ(P.asMarkdown(), "foo bar `foo\nbar`"); EXPECT_EQ(P.asPlainText(), "foo bar foo bar"); } +TEST(Paragraph, LineBreakConversionParagraphs) { + // Paragraphs are eindicated by double linebreaks + // additional linebreaks and spaces should be dropped + constexpr auto Expected = "foo\n\nbar"; + + Paragraph P1; + P1.appendText("foo\n\n\n\nbar"); + EXPECT_EQ(P1.asMarkdown(), Expected); + EXPECT_EQ(P1.asPlainText(), Expected); + + Paragraph P2; + P2.appendText("foo\n\n\n\tbar"); + EXPECT_EQ(P2.asMarkdown(), Expected); + EXPECT_EQ(P2.asPlainText(), Expected); + + Paragraph P3; + P3.appendText("foo\n\n\n bar"); + EXPECT_EQ(P3.asMarkdown(), Expected); + EXPECT_EQ(P3.asPlainText(), Expected); +} + +TEST(Paragraph, LineBreakConversionPunctuation) { + // Keep linebreaks after punctuation + constexpr llvm::StringLiteral Punctuation = R"txt(.:,;!?)txt"; + + for (auto Char : Punctuation) { + const auto TestString = std::string("foo") + Char + "\nbar"; + const auto MarkdownExpected = std::string("foo") + Char + " \nbar"; + + Paragraph P; + P.appendText(TestString); + EXPECT_EQ(P.asMarkdown(), MarkdownExpected); + EXPECT_EQ(P.asPlainText(), TestString); + } +} + +TEST(Paragraph, LineBreakConversionIndicator) { + // Keep linebreaks before interesting characters + constexpr llvm::StringLiteral LinbreakIdenticators = R"txt(-*@\>#`)txt"; + + for (auto Char : LinbreakIdenticators) { + const auto TestString = std::string("foo\n") + Char + "bar"; + const auto MarkdownExpected = std::string("foo \n") + Char + "bar"; + + Paragraph P; + P.appendText(TestString); + EXPECT_EQ(P.asMarkdown(), MarkdownExpected); + EXPECT_EQ(P.asPlainText(), TestString); + } +} + +TEST(Paragraph, LineBreakConversionMarkdownLineBreaks) { + // Keep Markdown linebreaks + Paragraph P1; + P1.appendText("foo \nbar"); + EXPECT_EQ(P1.asMarkdown(), "foo \nbar"); + EXPECT_EQ(P1.asPlainText(), "foo bar"); + + Paragraph P2; + P2.appendText("foo \nbar"); + EXPECT_EQ(P2.asMarkdown(), "foo \nbar"); + EXPECT_EQ(P2.asPlainText(), "foo bar"); + + Paragraph P3; + P3.appendText("foo\\\nbar"); + EXPECT_EQ(P3.asMarkdown(), "foo\\\nbar"); + EXPECT_EQ(P3.asPlainText(), "foo\\ bar"); +} + +TEST(Paragraph, LineBreakConversionRemoveSoftLineBreaks) { + // Remove soft linebreaks with no interesting context + Paragraph P1; + P1.appendText("foo\nbar"); + EXPECT_EQ(P1.asMarkdown(), "foo bar"); + EXPECT_EQ(P1.asPlainText(), "foo bar"); +} + TEST(Document, Separators) { Document D; D.addParagraph().appendText("foo"); diff --git a/clang-tools-extra/clangd/unittests/HoverTests.cpp b/clang-tools-extra/clangd/unittests/HoverTests.cpp --- a/clang-tools-extra/clangd/unittests/HoverTests.cpp +++ b/clang-tools-extra/clangd/unittests/HoverTests.cpp @@ -1905,7 +1905,7 @@ llvm::StringRef ExpectedMarkdown = R"md(### variable `foo` --- -Value \= `val` +Value = `val` --- ```cpp