diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt --- a/clang-tools-extra/clangd/CMakeLists.txt +++ b/clang-tools-extra/clangd/CMakeLists.txt @@ -50,6 +50,7 @@ FileDistance.cpp FS.cpp FSProvider.cpp + FormattedString.cpp FuzzyMatch.cpp GlobalCompilationDatabase.cpp Headers.cpp diff --git a/clang-tools-extra/clangd/FormattedString.h b/clang-tools-extra/clangd/FormattedString.h new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/FormattedString.h @@ -0,0 +1,57 @@ +//===--- FormattedString.h ----------------------------------*- C++-*------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A simple intermediate representation of formatted text that could be +// converted to plaintext or markdown. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_FORMATTEDSTRING_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_FORMATTEDSTRING_H + +#include +#include + +namespace clang { +namespace clangd { + +/// A structured string representation that could be converted to markdown or +/// plaintext upon requrest. +class FormattedString { +public: + /// Append plain text to the end of the string. + void appendText(std::string Text); + /// Append a block of C++ code. This translates to a ``` block in markdown. + /// In a plain text representation, the code block will be surrounded by + /// newlines. + void appendCodeBlock(std::string Code, std::string Language = "cpp"); + /// Append an inline block of C++ code. This translates to the ` block in + /// markdown. + void appendInlineCode(std::string Code); + + std::string renderAsMarkdown() const; + std::string renderAsPlainText() const; + +private: + enum class ChunkKind { + PlainText, /// A plain text paragraph. + CodeBlock, /// A block of code. + InlineCodeBlock, /// An inline block of code. + }; + struct Chunk { + ChunkKind Kind = ChunkKind::PlainText; + std::string Contents; + /// Language for code block chunks. Ignored for other chunks. + std::string Language; + }; + std::vector Chunks; +}; + +} // namespace clangd +} // namespace clang + +#endif diff --git a/clang-tools-extra/clangd/FormattedString.cpp b/clang-tools-extra/clangd/FormattedString.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/FormattedString.cpp @@ -0,0 +1,173 @@ +//===--- FormattedString.cpp --------------------------------*- C++-*------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "FormattedString.h" +#include "clang/Basic/CharInfo.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/ErrorHandling.h" +#include +#include + +namespace clang { +namespace clangd { + +namespace { +/// Escape a markdown text block. Ensures the punctuation will not introduce +/// any of the markdown constructs. +static std::string renderText(llvm::StringRef Input) { + // Escaping ASCII punctiation ensures we can't start a markdown construct. + constexpr llvm::StringLiteral Punctuation = + R"txt(!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~)txt"; + + std::string R; + for (size_t From = 0; From < Input.size();) { + size_t Next = Input.find_first_of(Punctuation, From); + R += Input.substr(From, Next - From); + if (Next == llvm::StringRef::npos) + break; + R += "\\"; + R += Input[Next]; + + From = Next + 1; + } + return R; +} + +/// Renders \p Input as an inline block of code in markdown. The returned value +/// is surrounded by backticks and the inner contents are properly escaped. +static std::string renderInlineBlock(llvm::StringRef Input) { + std::string R; + // Double all backticks to make sure we don't close the inline block early. + for (size_t From = 0; From < Input.size();) { + size_t Next = Input.find("`", From); + R += Input.substr(From, Next - From); + if (Next == llvm::StringRef::npos) + break; + R += "``"; // double the found backtick. + + From = Next + 1; + } + // If results starts with a backtick, add spaces on both sides. The spaces + // are ignored by markdown renderers. + if (llvm::StringRef(R).startswith("`") || llvm::StringRef(R).endswith("`")) + return "` " + std::move(R) + " `"; + // Markdown render should ignore first and last space if both are there. We + // add an extra pair of spaces in that case to make sure we render what the + // user intended. + if (llvm::StringRef(R).startswith(" ") && llvm::StringRef(R).endswith(" ")) + return "` " + std::move(R) + " `"; + return "`" + std::move(R) + "`"; +} +/// Render \p Input as markdown code block with a specified \p Language. The +/// result is surrounded by >= 3 backticks. Although markdown also allows to use +/// '~' for code blocks, they are never used. +static std::string renderCodeBlock(llvm::StringRef Input, + llvm::StringRef Language) { + // Count the maximum number of consecutive backticks in \p Input. We need to + // start and end the code block with more. + unsigned MaxBackticks = 0; + unsigned Backticks = 0; + for (char C : Input) { + if (C == '`') { + ++Backticks; + continue; + } + MaxBackticks = std::max(MaxBackticks, Backticks); + Backticks = 0; + } + MaxBackticks = std::max(Backticks, MaxBackticks); + // Use the corresponding number of backticks to start and end a code block. + std::string BlockMarker(/*Repeat=*/std::max(3u, MaxBackticks + 1), '`'); + return BlockMarker + Language.str() + "\n" + Input.str() + "\n" + BlockMarker; +} + +} // namespace + +void FormattedString::appendText(std::string Text) { + // We merge consecutive blocks of text to simplify the overall structure. + if (Chunks.empty() || Chunks.back().Kind != ChunkKind::PlainText) { + Chunk C; + C.Kind = ChunkKind::PlainText; + Chunks.push_back(C); + } + // FIXME: ensure there is a whitespace between the chunks. + Chunks.back().Contents += Text; +} + +void FormattedString::appendCodeBlock(std::string Code, std::string Language) { + Chunk C; + C.Kind = ChunkKind::CodeBlock; + C.Contents = std::move(Code); + C.Language = std::move(Language); + Chunks.push_back(std::move(C)); +} + +void FormattedString::appendInlineCode(std::string Code) { + Chunk C; + C.Kind = ChunkKind::InlineCodeBlock; + C.Contents = std::move(Code); + Chunks.push_back(std::move(C)); +} + +std::string FormattedString::renderAsMarkdown() const { + std::string R; + for (const auto &C : Chunks) { + switch (C.Kind) { + case ChunkKind::PlainText: + R += renderText(C.Contents); + continue; + case ChunkKind::InlineCodeBlock: + // Make sure we don't glue two backticks together. + if (llvm::StringRef(R).endswith("`")) + R += " "; + R += renderInlineBlock(C.Contents); + continue; + case ChunkKind::CodeBlock: + if (!R.empty() && !llvm::StringRef(R).endswith("\n")) + R += "\n"; + R += renderCodeBlock(C.Contents, C.Language); + R += "\n"; + continue; + } + llvm_unreachable("unhanlded ChunkKind"); + } + return R; +} + +std::string FormattedString::renderAsPlainText() const { + std::string R; + auto EnsureWhitespace = [&]() { + if (R.empty() || isWhitespace(R.back())) + return; + R += " "; + }; + for (const auto &C : Chunks) { + switch (C.Kind) { + case ChunkKind::PlainText: + EnsureWhitespace(); + R += C.Contents; + continue; + case ChunkKind::InlineCodeBlock: + EnsureWhitespace(); + R += C.Contents; + continue; + case ChunkKind::CodeBlock: + if (!R.empty()) + R += "\n\n"; + R += C.Contents; + if (!llvm::StringRef(C.Contents).endswith("\n")) + R += "\n"; + continue; + } + llvm_unreachable("unhanlded ChunkKind"); + } + while (!R.empty() && isWhitespace(R.back())) + R.pop_back(); + return R; +} +} // namespace clangd +} // namespace clang diff --git a/clang-tools-extra/clangd/unittests/CMakeLists.txt b/clang-tools-extra/clangd/unittests/CMakeLists.txt --- a/clang-tools-extra/clangd/unittests/CMakeLists.txt +++ b/clang-tools-extra/clangd/unittests/CMakeLists.txt @@ -38,6 +38,7 @@ FileDistanceTests.cpp FileIndexTests.cpp FindSymbolsTests.cpp + FormattedStringTests.cpp FSTests.cpp FunctionTests.cpp FuzzyMatchTests.cpp diff --git a/clang-tools-extra/clangd/unittests/FormattedStringTests.cpp b/clang-tools-extra/clangd/unittests/FormattedStringTests.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/unittests/FormattedStringTests.cpp @@ -0,0 +1,156 @@ +//===-- FormattedStringTests.cpp ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "FormattedString.h" +#include "clang/Basic/LLVM.h" +#include "llvm/ADT/StringRef.h" + +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace clang { +namespace clangd { +namespace { + +TEST(FormattedString, Basic) { + FormattedString S; + EXPECT_EQ(S.renderAsPlainText(), ""); + EXPECT_EQ(S.renderAsMarkdown(), ""); + + S.appendText("foobar"); + EXPECT_EQ(S.renderAsPlainText(), "foobar"); + EXPECT_EQ(S.renderAsMarkdown(), "foobar"); + + S = FormattedString(); + S.appendInlineCode("foobar"); + EXPECT_EQ(S.renderAsPlainText(), "foobar"); + EXPECT_EQ(S.renderAsMarkdown(), "`foobar`"); + + S = FormattedString(); + S.appendCodeBlock("foobar"); + EXPECT_EQ(S.renderAsPlainText(), "foobar"); + EXPECT_EQ(S.renderAsMarkdown(), "```cpp\n" + "foobar\n" + "```\n"); +} + +TEST(FormattedString, CodeBlocks) { + FormattedString S; + S.appendCodeBlock("foobar"); + S.appendCodeBlock("bazqux", "javascript"); + + EXPECT_EQ(S.renderAsPlainText(), "foobar\n\n\nbazqux"); + std::string ExpectedMarkdown = R"md(```cpp +foobar +``` +```javascript +bazqux +``` +)md"; + EXPECT_EQ(S.renderAsMarkdown(), ExpectedMarkdown); + + S = FormattedString(); + S.appendInlineCode("foobar"); + S.appendInlineCode("bazqux"); + EXPECT_EQ(S.renderAsPlainText(), "foobar bazqux"); + EXPECT_EQ(S.renderAsMarkdown(), "`foobar` `bazqux`"); + + S = FormattedString(); + S.appendText("foo"); + S.appendInlineCode("bar"); + S.appendText("baz"); + + EXPECT_EQ(S.renderAsPlainText(), "foo bar baz"); + EXPECT_EQ(S.renderAsMarkdown(), "foo`bar`baz"); +} + +TEST(FormattedString, Escaping) { + // Check some ASCII punctuation + FormattedString S; + S.appendText("*!`"); + EXPECT_EQ(S.renderAsMarkdown(), "\\*\\!\\`"); + + // Check all ASCII punctuation. + S = FormattedString(); + std::string Punctuation = R"txt(!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~)txt"; + // Same text, with each character escaped. + std::string EscapedPunctuation; + EscapedPunctuation.reserve(2 * Punctuation.size()); + for (char C : Punctuation) + EscapedPunctuation += std::string("\\") + C; + S.appendText(Punctuation); + EXPECT_EQ(S.renderAsMarkdown(), EscapedPunctuation); + + // In code blocks we don't need to escape ASCII punctuation. + S = FormattedString(); + S.appendInlineCode("* foo !+ bar * baz"); + EXPECT_EQ(S.renderAsMarkdown(), "`* foo !+ bar * baz`"); + S = FormattedString(); + S.appendCodeBlock("#define FOO\n* foo !+ bar * baz"); + EXPECT_EQ(S.renderAsMarkdown(), "```cpp\n" + "#define FOO\n* foo !+ bar * baz\n" + "```\n"); + + // But we have to escape the backticks. + S = FormattedString(); + S.appendInlineCode("foo`bar`baz"); + EXPECT_EQ(S.renderAsMarkdown(), "`foo``bar``baz`"); + + S = FormattedString(); + S.appendCodeBlock("foo`bar`baz"); + EXPECT_EQ(S.renderAsMarkdown(), "```cpp\n" + "foo`bar`baz\n" + "```\n"); + + // Inline code blocks starting or ending with backticks should add spaces. + S = FormattedString(); + S.appendInlineCode("`foo"); + EXPECT_EQ(S.renderAsMarkdown(), "` ``foo `"); + S = FormattedString(); + S.appendInlineCode("foo`"); + EXPECT_EQ(S.renderAsMarkdown(), "` foo`` `"); + S = FormattedString(); + S.appendInlineCode("`foo`"); + EXPECT_EQ(S.renderAsMarkdown(), "` ``foo`` `"); + + // Should also add extra spaces if the block stars and ends with spaces. + S = FormattedString(); + S.appendInlineCode(" foo "); + EXPECT_EQ(S.renderAsMarkdown(), "` foo `"); + S = FormattedString(); + S.appendInlineCode("foo "); + EXPECT_EQ(S.renderAsMarkdown(), "`foo `"); + S = FormattedString(); + S.appendInlineCode(" foo"); + EXPECT_EQ(S.renderAsMarkdown(), "` foo`"); + + // Code blocks might need more than 3 backticks. + S = FormattedString(); + S.appendCodeBlock("foobarbaz `\nqux"); + EXPECT_EQ(S.renderAsMarkdown(), "```cpp\n" + "foobarbaz `\nqux\n" + "```\n"); + S = FormattedString(); + S.appendCodeBlock("foobarbaz ``\nqux"); + EXPECT_EQ(S.renderAsMarkdown(), "```cpp\n" + "foobarbaz ``\nqux\n" + "```\n"); + S = FormattedString(); + S.appendCodeBlock("foobarbaz ```\nqux"); + EXPECT_EQ(S.renderAsMarkdown(), "````cpp\n" + "foobarbaz ```\nqux\n" + "````\n"); + S = FormattedString(); + S.appendCodeBlock("foobarbaz ` `` ``` ```` `\nqux"); + EXPECT_EQ(S.renderAsMarkdown(), "`````cpp\n" + "foobarbaz ` `` ``` ```` `\nqux\n" + "`````\n"); +} + +} // namespace +} // namespace clangd +} // namespace clang