diff --git a/clang-tools-extra/clangd/CodeCompletionStrings.cpp b/clang-tools-extra/clangd/CodeCompletionStrings.cpp --- a/clang-tools-extra/clangd/CodeCompletionStrings.cpp +++ b/clang-tools-extra/clangd/CodeCompletionStrings.cpp @@ -12,6 +12,7 @@ #include "clang/AST/RawCommentList.h" #include "clang/Basic/SourceManager.h" #include "clang/Sema/CodeCompleteConsumer.h" +#include "llvm/Support/JSON.h" #include #include @@ -86,7 +87,12 @@ assert(!Ctx.getSourceManager().isLoadedSourceLocation(RC->getBeginLoc())); std::string Doc = RC->getFormattedText(Ctx.getSourceManager(), Ctx.getDiagnostics()); - return looksLikeDocComment(Doc) ? Doc : ""; + if (!looksLikeDocComment(Doc)) + return ""; + // Clang requires source to be UTF-8, but doesn't enforce this in comments. + if (!llvm::json::isUTF8(Doc)) + Doc = llvm::json::fixUTF8(Doc); + return Doc; } void getSignature(const CodeCompletionString &CCS, std::string *Signature, diff --git a/clang-tools-extra/clangd/unittests/CodeCompletionStringsTests.cpp b/clang-tools-extra/clangd/unittests/CodeCompletionStringsTests.cpp --- a/clang-tools-extra/clangd/unittests/CodeCompletionStringsTests.cpp +++ b/clang-tools-extra/clangd/unittests/CodeCompletionStringsTests.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "CodeCompletionStrings.h" +#include "TestTU.h" #include "clang/Sema/CodeCompleteConsumer.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -56,6 +57,14 @@ "Annotation: Ano\n\nIs this brief?"); } +TEST_F(CompletionStringTest, GetDeclCommentBadUTF8) { + // is not a valid byte here, should be replaced by encoded . + auto TU = TestTU::withCode("/*x\xffy*/ struct X;"); + auto AST = TU.build(); + EXPECT_EQ("x\xef\xbf\xbdy", + getDeclComment(AST.getASTContext(), findDecl(AST, "X"))); +} + TEST_F(CompletionStringTest, MultipleAnnotations) { Builder.AddAnnotation("Ano1"); Builder.AddAnnotation("Ano2");