diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt --- a/clang-tools-extra/clangd/CMakeLists.txt +++ b/clang-tools-extra/clangd/CMakeLists.txt @@ -82,6 +82,7 @@ Hover.cpp IncludeFixer.cpp InlayHints.cpp + IWYU.cpp JSONTransport.cpp PathMapping.cpp Protocol.cpp diff --git a/clang-tools-extra/clangd/Headers.h b/clang-tools-extra/clangd/Headers.h --- a/clang-tools-extra/clangd/Headers.h +++ b/clang-tools-extra/clangd/Headers.h @@ -58,6 +58,7 @@ unsigned HashOffset = 0; // Byte offset from start of file to #. int HashLine = 0; // Line number containing the directive, 0-indexed. SrcMgr::CharacteristicKind FileKind = SrcMgr::C_User; + bool Used = true; // Was something from this used in the main file? }; llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Inclusion &); bool operator==(const Inclusion &LHS, const Inclusion &RHS); @@ -129,6 +130,28 @@ llvm::StringRef IncludedName, llvm::StringRef IncludedRealName); + // Captures the filename corresponding to system includes like . + // This is used in for IWYU of the standard library as we want to check + // against where the spec *says* the symbols are. + void recordSystemInclude(llvm::StringRef Spelled, + llvm::StringRef IncludedName); + + // Classifying the main-file includes as "used" or "unused" is subtle + // (consider transitive includes), so we inject the algorithm. + + // An AbstractIncludeGraph maps including files to included files. + using AbstractIncludeGraph = llvm::DenseMap>; + // A UsedFunc decides whether each file included by EntryPoint is "used", + // based on the set of files that contain some referenced symbol. + using UsedFunc = llvm::DenseMap( + const AbstractIncludeGraph &, llvm::DenseSet Referenced, + unsigned EntryPoint); + // Mark each MainFileIncludes with the decision yielded by the UsedFunc. + void markUsed(llvm::StringRef EntryPoint, + llvm::ArrayRef ReferencedFiles, + llvm::DenseSet ReferencedStdlib, + llvm::function_ref); + private: // Identifying files in a way that persists from preamble build to subsequent // builds is surprisingly hard. FileID is unavailable in InclusionDirective(), @@ -140,6 +163,8 @@ llvm::StringMap NameToIndex; // Values are file indexes. // Maps a file's index to that of the files it includes. llvm::DenseMap> IncludeChildren; + // Index of standard library files, e.g. "vector". + llvm::DenseMap StandardIncludes; }; /// Returns a PPCallback that visits all inclusions in the main file. diff --git a/clang-tools-extra/clangd/Headers.cpp b/clang-tools-extra/clangd/Headers.cpp --- a/clang-tools-extra/clangd/Headers.cpp +++ b/clang-tools-extra/clangd/Headers.cpp @@ -70,6 +70,8 @@ Out->recordInclude(IncludingFileEntry->getName(), File->getName(), File->tryGetRealPathName()); } + if (IsAngled && File) + Out->recordSystemInclude(FileName, File->getName()); } void FileChanged(SourceLocation Loc, FileChangeReason Reason, @@ -102,6 +104,18 @@ IncludeStructure *Out; }; +// Map of unquoted to quoted +const llvm::StringMap &allStandardIncludes() { + static llvm::StringMap Result = { +#define SYMBOL(Name, Namespace, Header) \ + {llvm::StringRef(#Header).ltrim('<').rtrim('>'), #Header}, +#include "CSymbolMap.inc" +#include "StdSymbolMap.inc" +#undef SYMBOL + }; + return Result; +} + } // namespace bool isLiteralInclude(llvm::StringRef Include) { @@ -164,6 +178,17 @@ IncludeChildren[Parent].push_back(Child); } +void IncludeStructure::recordSystemInclude(llvm::StringRef Spelled, + llvm::StringRef IncludedName) { + const auto &AllStandardIncludes = allStandardIncludes(); + auto It = AllStandardIncludes.find(Spelled); + if (It == AllStandardIncludes.end()) + return; + auto New = StandardIncludes.try_emplace(It->getValue()); + if (New.second) + New.first->second = fileIndex(IncludedName); +} + unsigned IncludeStructure::fileIndex(llvm::StringRef Name) { auto R = NameToIndex.try_emplace(Name, RealPathNames.size()); if (R.second) @@ -204,6 +229,56 @@ return Result; } +void IncludeStructure::markUsed(llvm::StringRef EntryPoint, + llvm::ArrayRef ReferencedFiles, + llvm::DenseSet ReferencedStdlib, + llvm::function_ref Algorithm) { + auto Root = NameToIndex.find(EntryPoint); + if (Root == NameToIndex.end()) { + elog("IWYU: entrypoint {0} not found in include graph", EntryPoint); + return; + } + + llvm::DenseSet Referenced; + Referenced.reserve(ReferencedFiles.size()); + for (llvm::StringRef RefName : ReferencedFiles) { + dlog("{0} is REFERENCED", RefName); + auto It = NameToIndex.find(RefName); + if (It != NameToIndex.end()) + Referenced.insert(It->second); + } + for (llvm::StringRef RefStdlib : ReferencedStdlib) { + auto It = StandardIncludes.find(RefStdlib); + if (It == StandardIncludes.end()) { + dlog("<{0}> is REFERENCED but missing!"); + continue; + } + dlog("<{0}> is REFERENCED ({1})", RefStdlib, RealPathNames[It->second]); + Referenced.insert(It->second); + } + + auto Decisions = + Algorithm(IncludeChildren, std::move(Referenced), Root->second); + auto RootChildren = IncludeChildren.find(Root->second); + assert(RootChildren != IncludeChildren.end()); + llvm::DenseMap IncludeIndex; + for (const auto Index : RootChildren->second) { + if (!RealPathNames[Index].empty()) + IncludeIndex[RealPathNames[Index]] = Index; + } + for (auto &MFI : MainFileIncludes) { + // FIXME: skip includes that are not self-contained. + auto It = IncludeIndex.find(MFI.Resolved); + if (It != IncludeIndex.end()) { + auto DIt = Decisions.find(It->second); + if (DIt != Decisions.end()) { + MFI.Used = DIt->second; + dlog("{0} is {1}", MFI.Written, MFI.Used ? "USED" : "UNUSED"); + } + } + } +} + void IncludeInserter::addExisting(const Inclusion &Inc) { IncludedHeaders.insert(Inc.Written); if (!Inc.Resolved.empty()) diff --git a/clang-tools-extra/clangd/IWYU.h b/clang-tools-extra/clangd/IWYU.h new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/IWYU.h @@ -0,0 +1,43 @@ +//===--- IWYU.h - Include-what-you-use analysis -------------------*-C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_IWYU_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_IWYU_H + +#include "Headers.h" +#include "ParsedAST.h" +#include "clang/Basic/SourceLocation.h" +#include "llvm/ADT/DenseSet.h" + +namespace clang { +namespace clangd { + +struct ReferencedLocations { + llvm::DenseSet AllLocations; + llvm::DenseSet StandardLibraryHeaders; +}; +ReferencedLocations findReferencedLocations(ParsedAST &AST); + +llvm::DenseSet +findReferencedFiles(const llvm::DenseSet &, + const SourceManager &); + +inline llvm::DenseMap +directlyReferencedFiles(const IncludeStructure::AbstractIncludeGraph &Graph, + llvm::DenseSet Referenced, + unsigned EntryPoint) { + llvm::DenseMap Result; + for (unsigned Inclusion : Graph.lookup(EntryPoint)) + Result.try_emplace(Inclusion, Referenced.contains(Inclusion)); + return Result; +} + +} // namespace clangd +} // namespace clang + +#endif diff --git a/clang-tools-extra/clangd/IWYU.cpp b/clang-tools-extra/clangd/IWYU.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/IWYU.cpp @@ -0,0 +1,233 @@ +//===--- IWYU.cpp ------------------------------------------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "IWYU.h" +#include "support/Logger.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/Basic/SourceLocation.h" + +namespace clang { +namespace clangd { +namespace { + +// If DC is a std namespace, returns e.g. "std::chrono". +// If DC is top-level, returns "". +// Else returns None. +// Buf is used for storage if the namespace needs to be printed ('std' doesn't). +llvm::Optional getStdNamespace(const DeclContext *DC, + std::string &Buf) { + const NamespaceDecl *FirstNamespace; + unsigned NamespaceCount = 0; + for (; !DC->isTranslationUnit(); DC = DC->getParent()) { + if (!DC->isTransparentContext()) { + if (DC->isNamespace()) { + if (!NamespaceCount) + FirstNamespace = cast(DC); + ++NamespaceCount; + } else { + return llvm::None; + } + } + } + if (!NamespaceCount) + return llvm::StringRef(); + if (NamespaceCount == 1) + return FirstNamespace->getName(); + Buf = printNamespaceScope(*FirstNamespace); + return llvm::StringRef(Buf).rtrim(":"); +} + +llvm::Optional getStdlibHeader(const NamedDecl &ND) { + log("looking up header for {0}", ND.getNameAsString()); + const static auto &Lookup = [] { + // Lookup table is keyed first on the unqualified name, so we can bail out + // quickly without building strings if there's no match. + llvm::DenseMap>> + Result; +#define SYMBOL(Name, Scope, Header) Result[#Name].emplace_back(#Scope, #Header); +#include "CSymbolMap.inc" +#include "StdSymbolMap.inc" +#undef SYMBOL + for (auto &Unqual : Result) + for (auto &Qual : Unqual.second) { + // Swap "None" -> "", and "std::" -> "std", for easier lookup. + if (Qual.first == "None") + Qual.first = ""; + else + Qual.first.consume_back("::"); + } + return Result; + }(); + + // Bail out early in easy cases. + if (!ND.getDeclName().isIdentifier()) + return llvm::None; + auto It = Lookup.find(ND.getName()); + if (It == Lookup.end()) { + if (ND.getName() == "move" && ND.isInStdNamespace()) + + log("No match"); + return llvm::None; + } + + // OK, we have an unqualified name match. Does the namespace match? + std::string Buf; + auto NS = getStdNamespace(ND.getDeclContext(), Buf); + if (!NS) + return llvm::None; + log("with namespace {0}", *NS); + for (const auto &Pair : It->second) + if (*NS == Pair.first) { + log("found {0}", Pair.second); + return Pair.second; + } + return llvm::None; +} + +class ReferencedLocationCrawler + : public RecursiveASTVisitor { + using Base = RecursiveASTVisitor; + llvm::DenseSet Visited; + bool shouldAdd(const void *P) { return P && Visited.insert(P).second; } + + void add(const Decl *D) { + if (!D || !shouldAdd(D->getCanonicalDecl())) + return; + for (const Decl *Redecl : D->redecls()) + Result.AllLocations.insert(Redecl->getLocation()); + if (const auto *ND = llvm::dyn_cast(D)) { + if (auto StdlibHeader = getStdlibHeader(*ND)) + Result.StandardLibraryHeaders.insert(*StdlibHeader); + else if (ND->getDeclName().isIdentifier() && ND->getName() == "move" && + ND->isInStdNamespace()) { + // Ugly special case: there are two `std::move`s. + const FunctionDecl *FD = ND->getAsFunction(); + Result.StandardLibraryHeaders.insert( + FD && FD->getMinRequiredArguments() > 1 ? "" + : ""); + } + } + } + +public: + bool VisitDeclRefExpr(DeclRefExpr *DRE) { + add(DRE->getDecl()); + add(DRE->getFoundDecl()); + return true; + } + + bool VisitMemberExpr(MemberExpr *ME) { + add(ME->getMemberDecl()); + add(ME->getFoundDecl().getDecl()); + return true; + } + + bool VisitTagType(TagType *TT) { + add(TT->getDecl()); + return true; + } + + bool VisitCXXConstructExpr(CXXConstructExpr *CCE) { + add(CCE->getConstructor()); + return true; + } + + bool VisitTemplateSpecializationType(TemplateSpecializationType *TST) { + if (shouldAdd(TST)) { + add(TST->getTemplateName().getAsTemplateDecl()); // primary template + add(TST->getAsCXXRecordDecl()); // specialization + } + return true; + } + + bool VisitTypedefType(TypedefType *TT) { + add(TT->getDecl()); + return true; + } + +#if 0 + // Consider types of any subexpression used, even if the type is not named. + // This is helpful in getFoo().bar(), where Foo must be complete. + bool VisitExpr(Expr *E) { + TraverseType(E->getType()); + return true; + } +#endif + + bool TraverseType(QualType T) { + if (shouldAdd(T.getTypePtrOrNull())) { // don't care about quals + Base::TraverseType(T); + } + return true; + } + + ReferencedLocationCrawler(ReferencedLocations &Result) : Result(Result){}; + ReferencedLocations &Result; +}; + +// Given a set of referenced FileIDs, determines all the potentially-referenced +// files and macros by traversing expansion/spelling locations of macro IDs. +// This is used to map the referenced SourceLocations onto real files. +struct ReferencedFiles { + ReferencedFiles(const SourceManager &SM) : SM(SM) {} + llvm::DenseSet Files; + llvm::DenseSet Macros; + const SourceManager &SM; + + void add(SourceLocation Loc) { add(SM.getFileID(Loc), Loc); } + + void add(FileID FID, SourceLocation Loc) { + if (FID.isInvalid()) + return; + assert(SM.isInFileID(Loc, FID)); + if (Loc.isFileID()) { + Files.insert(FID); + return; + } + // Don't process the same macro FID twice. + if (!Macros.insert(FID).second) + return; + const auto &Exp = SM.getSLocEntry(FID).getExpansion(); + add(Exp.getSpellingLoc()); + add(Exp.getExpansionLocStart()); + add(Exp.getExpansionLocEnd()); + } +}; + +} // namespace + +ReferencedLocations findReferencedLocations(ParsedAST &AST) { + ReferencedLocations Result; + ReferencedLocationCrawler C(Result); + C.TraverseAST(AST.getASTContext()); + // XXX macros + return Result; +} + +llvm::DenseSet +findReferencedFiles(const llvm::DenseSet &Locs, + const SourceManager &SM) { + std::vector Sorted{Locs.begin(), Locs.end()}; + llvm::sort(Sorted); // This groups by FileID! + ReferencedFiles Result(SM); + for (auto It = Sorted.begin(); It < Sorted.end();) { + FileID FID = SM.getFileID(*It); + Result.add(FID, *It); + // Cheaply skip over all the other locations from the same FileID. + // This avoids lots of redundant Loc->File lookups for the same file. + do + ++It; + while (It != Sorted.end() && SM.isInFileID(*It, FID)); + } + return std::move(Result.Files); +} + +} // namespace clangd +} // namespace clang diff --git a/clang-tools-extra/clangd/ParsedAST.h b/clang-tools-extra/clangd/ParsedAST.h --- a/clang-tools-extra/clangd/ParsedAST.h +++ b/clang-tools-extra/clangd/ParsedAST.h @@ -83,6 +83,8 @@ return getASTContext().getLangOpts(); } + void computeUsedIncludes(); + /// This function returns top-level decls present in the main file of the AST. /// The result does not include the decls that come from the preamble. /// (These should be const, but RecursiveASTVisitor requires Decl*). diff --git a/clang-tools-extra/clangd/ParsedAST.cpp b/clang-tools-extra/clangd/ParsedAST.cpp --- a/clang-tools-extra/clangd/ParsedAST.cpp +++ b/clang-tools-extra/clangd/ParsedAST.cpp @@ -17,6 +17,7 @@ #include "FeatureModule.h" #include "Headers.h" #include "HeuristicResolver.h" +#include "IWYU.h" #include "IncludeFixer.h" #include "Preamble.h" #include "SourceCode.h" @@ -482,10 +483,36 @@ Diags->insert(Diags->end(), D.begin(), D.end()); } } - return ParsedAST(Inputs.Version, std::move(Preamble), std::move(Clang), + ParsedAST Result(Inputs.Version, std::move(Preamble), std::move(Clang), std::move(Action), std::move(Tokens), std::move(Macros), std::move(ParsedDecls), std::move(Diags), std::move(Includes), std::move(CanonIncludes)); + // FIXME: Warn about missing includes. + if (Result.Diags) { + Result.computeUsedIncludes(); + for (const auto &Include : Result.getIncludeStructure().MainFileIncludes) { + if (!Include.Used) { + Diag D; + D.Message = "Unused header"; + D.Name = "clangd-unused-include"; + D.Range.start.line = Include.HashLine; + D.Range.end.line = Include.HashLine + 1; + D.Fixes.emplace_back(); + D.Fixes.back().Message = "Remove include"; + D.Fixes.back().Edits.emplace_back(); + D.Fixes.back().Edits.back().range = D.Range; + D.InsideMainFile = true; + D.File = + Result.getSourceManager() + .getFileEntryForID(Result.getSourceManager().getMainFileID()) + ->getName() + .str(); + D.Severity = DiagnosticsEngine::Warning; + Result.Diags->push_back(std::move(D)); + } + } + } + return Result; } ParsedAST::ParsedAST(ParsedAST &&Other) = default; @@ -588,5 +615,27 @@ return llvm::None; return llvm::StringRef(Preamble->Version); } + +void ParsedAST::computeUsedIncludes() { + const auto &SM = getSourceManager(); + + auto Refs = findReferencedLocations(*this); + auto ReferencedFileIDs = + findReferencedFiles(Refs.AllLocations, getSourceManager()); + std::vector ReferencedFilenames; + ReferencedFilenames.reserve(ReferencedFileIDs.size()); + for (FileID FID : ReferencedFileIDs) { + const FileEntry *FE = SM.getFileEntryForID(FID); + if (!FE) { + elog("Missing FE for {0}", SM.getComposedLoc(FID, 0).printToString(SM)); + continue; + } + ReferencedFilenames.push_back(SM.getFileEntryForID(FID)->getName()); + } + Includes.markUsed(SM.getFileEntryForID(SM.getMainFileID())->getName(), + ReferencedFilenames, Refs.StandardLibraryHeaders, + directlyReferencedFiles); +} + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/unittests/CMakeLists.txt b/clang-tools-extra/clangd/unittests/CMakeLists.txt --- a/clang-tools-extra/clangd/unittests/CMakeLists.txt +++ b/clang-tools-extra/clangd/unittests/CMakeLists.txt @@ -61,6 +61,7 @@ IndexActionTests.cpp IndexTests.cpp InlayHintTests.cpp + IWYUTests.cpp JSONTransportTests.cpp LoggerTests.cpp LSPBinderTests.cpp diff --git a/clang-tools-extra/clangd/unittests/IWYUTests.cpp b/clang-tools-extra/clangd/unittests/IWYUTests.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/clangd/unittests/IWYUTests.cpp @@ -0,0 +1,117 @@ +//===-- IWYUTests.cpp ---------------------------------*- C++ -*-----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Annotations.h" +#include "IWYU.h" +#include "TestTU.h" +#include "index/FileIndex.h" +#include "index/Index.h" +#include "index/MemIndex.h" +#include "index/Merge.h" +#include "index/Symbol.h" +#include "clang/Index/IndexSymbol.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace clang { +namespace clangd { +namespace { +using testing::ElementsAreArray; + +TEST(ReferencedLocations, All) { + struct TestCase { + const char *HeaderCode; + const char *MainCode; + }; + TestCase Cases[] = {{ + "int ^x();", + "int y = x();", + }, + { + "class ^X;", + "X *y;", + }, + { + "template class ^X;", + "X *y;", + }, + { + "using ^Integer = int;", + "Integer x;", + }, + { + "struct X{int ^a;}; X ^foo();", + "int y = foo().a;", + }, + { + "class ^X{}; X ^foo();", + "auto bar() { return foo(); }", + }, + { + "class ^X; class ^X{}; class ^X;", + "X *y;", + }, + { + // FIXME: should report the using decl too. + // This information is not preserved in the AST. + "namespace ns { class ^X; }; using ns::X;", + "X *y;", + }}; + for (const TestCase &T : Cases) { + TestTU TU; + TU.Code = T.MainCode; + Annotations Header(T.HeaderCode); + TU.HeaderCode = Header.code().str(); + auto AST = TU.build(); + + std::vector Points; + for (SourceLocation Loc : findReferencedLocations(AST).AllLocations) { + if (AST.getSourceManager().getBufferName(Loc).endswith( + TU.HeaderFilename)) { + Points.push_back(offsetToPosition( + TU.HeaderCode, AST.getSourceManager().getFileOffset(Loc))); + } + } + llvm::sort(Points); + + EXPECT_EQ(Points, Header.points()) << T.HeaderCode << "\n---\n" + << T.MainCode; + } +} + +TEST(ReferencedLocations, Stdlib) { + struct TestCase { + const char *HeaderCode; + const char *MainCode; + std::vector StdlibHeaders; + }; + TestCase Cases[] = { + { + "template class vector{};", + "std::vector *x;", + {""}, + }, + { + "namespace foo { class vector; }", + "std::foo::vector *x;", + {}, + }}; + for (const TestCase &T : Cases) { + TestTU TU; + TU.Code = T.MainCode; + TU.HeaderCode = std::string("namespace std {\n") + T.HeaderCode + "\n}"; + auto AST = TU.build(); + + auto StdlibHeaders = findReferencedLocations(AST).StandardLibraryHeaders; + EXPECT_THAT(StdlibHeaders, ElementsAreArray(T.StdlibHeaders)); + } +} + +} // namespace +} // namespace clangd +} // namespace clang