Index: clangd/index/SymbolCollector.cpp =================================================================== --- clangd/index/SymbolCollector.cpp +++ clangd/index/SymbolCollector.cpp @@ -89,6 +89,46 @@ return llvm::None; } +// All proto generated headers should start with this line. +static const char *PROTO_HEADER_COMMENT = + "// Generated by the protocol buffer compiler. DO NOT EDIT!"; + +// Checks whether the decl is a private symbol in a header generated by +// protobuf compiler. +// To identify whether a proto header is actually generated by proto compiler, +// we check whether it starts with PROTO_HEADER_COMMENT. +// FIXME: make filtering extensible when there are more use cases for symbol +// filters. +bool isPrivateProtoDecl(const NamedDecl &ND) { + const auto &SM = ND.getASTContext().getSourceManager(); + auto Loc = findNameLoc(&ND); + auto FileName = SM.getFilename(Loc); + if (!FileName.endswith(".proto.h") && !FileName.endswith(".pb.h")) + return false; + auto FID = SM.getFileID(Loc); + // Double check that this is an actual protobuf header. + if (!SM.getBufferData(FID).startswith(PROTO_HEADER_COMMENT)) + return false; + + // ND without identifier can be operators. + if (ND.getIdentifier() == nullptr) + return false; + auto Name = ND.getIdentifier()->getName(); + if (!Name.contains('_')) + return false; + // Nested proto entities (e.g. Message::Nested) have top-level decls + // that shouldn't be used (Message_Nested). Ignore them completely. + // The nested entities are dangling type aliases, we may want to reconsider + // including them in the future. + // For enum constants, SOME_ENUM_CONSTANT is not private and should be + // indexed. Outer_INNER is private. This heuristic relies on naming style, it + // will include OUTER_INNER and exclude some_enum_constant. + // FIXME: the heuristic relies on naming style (i.e. no underscore in + // user-defined names) and can be improved. + return (ND.getKind() != Decl::EnumConstant) || + std::any_of(Name.begin(), Name.end(), islower); +} + bool shouldFilterDecl(const NamedDecl *ND, ASTContext *ASTCtx, const SymbolCollector::Options &Opts) { using namespace clang::ast_matchers; @@ -129,6 +169,9 @@ .empty()) return true; + // Avoid indexing internal symbols in protobuf generated headers. + if (isPrivateProtoDecl(*ND)) + return true; return false; } Index: unittests/clangd/SymbolCollectorTests.cpp =================================================================== --- unittests/clangd/SymbolCollectorTests.cpp +++ unittests/clangd/SymbolCollectorTests.cpp @@ -697,6 +697,41 @@ AllOf(QName("pörk"), DeclRange(Header.range())))); } +TEST_F(SymbolCollectorTest, FilterPrivateProtoSymbols) { + TestHeaderName = testPath("x.proto.h"); + const std::string Header = + R"(// Generated by the protocol buffer compiler. DO NOT EDIT! + namespace nx { + class Top_Level {}; + class TopLevel {}; + enum Kind { + KIND_OK, + Kind_Not_Ok, + }; + bool operator<(const TopLevel &, const TopLevel &); + })"; + runSymbolCollector(Header, /*Main=*/""); + EXPECT_THAT(Symbols, + UnorderedElementsAre(QName("nx"), QName("nx::TopLevel"), + QName("nx::Kind"), QName("nx::KIND_OK"), + QName("nx::operator<"))); +} + +TEST_F(SymbolCollectorTest, DoubleCheckProtoHeaderComment) { + TestHeaderName = testPath("x.proto.h"); + const std::string Header = R"( + namespace nx { + class Top_Level {}; + enum Kind { + Kind_Fine + }; + } + )"; + runSymbolCollector(Header, /*Main=*/""); + EXPECT_THAT(Symbols, + UnorderedElementsAre(QName("nx"), QName("nx::Top_Level"), + QName("nx::Kind"), QName("nx::Kind_Fine"))); +} } // namespace } // namespace clangd