diff --git a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Record.h b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Record.h --- a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Record.h +++ b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Record.h @@ -34,6 +34,7 @@ class CompilerInstance; class Decl; class FileEntry; +class HeaderSearch; class Preprocessor; class PPCallbacks; class FileManager; @@ -148,6 +149,9 @@ include_cleaner::Includes Includes; }; +/// Extracts the search path, as needed by Includes. +std::vector getHeaderSearchPath(const HeaderSearch &HS); + } // namespace include_cleaner } // namespace clang diff --git a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h --- a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h +++ b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h @@ -30,7 +30,9 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" #include +#include #include #include #include @@ -167,6 +169,11 @@ /// Supports efficiently hit-testing Headers against Includes. class Includes { public: + /// Registers a directory on the include path (-I etc) from HeaderSearch. + /// This allows reasoning about equivalence of e.g. "path/a/b.h" and "a/b.h". + /// This must be called before calling add() in order to take effect. + void addSearchDirectory(llvm::StringRef); + void add(const Include &); /// All #includes seen, in the order they appear. @@ -183,9 +190,12 @@ const Include *atLine(unsigned OneBasedIndex) const; private: + llvm::StringSet<> SearchPath; + std::vector All; // Lookup structures for match(), values are index into All. llvm::StringMap> BySpelling; + llvm::StringMap> BySpellingAlternate; llvm::DenseMap> ByFile; llvm::DenseMap ByLine; }; diff --git a/clang-tools-extra/include-cleaner/lib/Record.cpp b/clang-tools-extra/include-cleaner/lib/Record.cpp --- a/clang-tools-extra/include-cleaner/lib/Record.cpp +++ b/clang-tools-extra/include-cleaner/lib/Record.cpp @@ -14,13 +14,16 @@ #include "clang/Basic/SourceManager.h" #include "clang/Basic/Specifiers.h" #include "clang/Frontend/CompilerInstance.h" +#include "clang/Lex/DirectoryLookup.h" #include "clang/Lex/MacroInfo.h" #include "clang/Lex/PPCallbacks.h" #include "clang/Lex/Preprocessor.h" #include "clang/Tooling/Inclusions/HeaderAnalysis.h" #include "clang/Tooling/Inclusions/StandardLibrary.h" +#include "llvm/ADT/StringRef.h" #include #include +#include namespace clang::include_cleaner { namespace { @@ -28,7 +31,11 @@ class PPRecorder : public PPCallbacks { public: PPRecorder(RecordedPP &Recorded, const Preprocessor &PP) - : Recorded(Recorded), PP(PP), SM(PP.getSourceManager()) {} + : Recorded(Recorded), PP(PP), SM(PP.getSourceManager()) { + for (const auto &Dir : PP.getHeaderSearchInfo().search_dir_range()) + if (Dir.getLookupType() == DirectoryLookup::LT_NormalDir) + Recorded.Includes.addSearchDirectory(Dir.getDirRef()->getName()); + } void FileChanged(SourceLocation Loc, FileChangeReason Reason, SrcMgr::CharacteristicKind FileType, diff --git a/clang-tools-extra/include-cleaner/lib/Types.cpp b/clang-tools-extra/include-cleaner/lib/Types.cpp --- a/clang-tools-extra/include-cleaner/lib/Types.cpp +++ b/clang-tools-extra/include-cleaner/lib/Types.cpp @@ -10,8 +10,15 @@ #include "TypesInternal.h" #include "clang/AST/Decl.h" #include "clang/Basic/FileEntry.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" +#include namespace clang::include_cleaner { @@ -94,16 +101,51 @@ .str(); } +static llvm::SmallString<128> normalizePath(llvm::StringRef Path) { + namespace path = llvm::sys::path; + + llvm::SmallString<128> P = Path; + path::remove_dots(P, /*remove_dot_dot=*/true); + path::native(P, path::Style::posix); + while (!P.empty() && path::is_separator(P.back())) + P.pop_back(); + return P; +} + +void Includes::addSearchDirectory(llvm::StringRef Path) { + SearchPath.try_emplace(normalizePath(Path)); +} + void Includes::add(const Include &I) { + namespace path = llvm::sys::path; + unsigned Index = All.size(); All.push_back(I); auto BySpellingIt = BySpelling.try_emplace(I.Spelled).first; All.back().Spelled = BySpellingIt->first(); // Now we own the backing string. BySpellingIt->second.push_back(Index); - if (I.Resolved) - ByFile[I.Resolved].push_back(Index); ByLine[I.Line] = Index; + if (I.Resolved) { + ByFile[I.Resolved].push_back(Index); + + // While verbatim headers ideally should match #include spelling exactly, + // we want to be tolerant of different spellings of the same file. + // + // If the search path includes "/a/b" and "/a/b/c/d", + // verbatim "e/f" should match (spelled=c/d/e/f, resolved=/a/b/c/d/e/f). + // We assume entry's (normalized) name will match the search dirs. + auto Path = normalizePath(I.Resolved->getName()); + for (llvm::StringRef Parent = path::parent_path(Path); !Parent.empty(); + Parent = path::parent_path(Parent)) { + if (SearchPath.contains(Parent)) { + llvm::StringRef Rel = llvm::StringRef(Path).drop_front(Parent.size()); + while (!Rel.empty() && path::is_separator(Rel.front())) + Rel = Rel.drop_front(); + BySpellingAlternate[Rel].push_back(Index); + } + } + } } const Include *Includes::atLine(unsigned OneBasedIndex) const { @@ -122,11 +164,16 @@ for (unsigned I : BySpelling.lookup(H.standard().name().trim("<>"))) Result.push_back(&All[I]); break; - case Header::Verbatim: - for (unsigned I : BySpelling.lookup(H.verbatim().trim("\"<>"))) + case Header::Verbatim: { + llvm::StringRef Spelling = H.verbatim().trim("\"<>"); + for (unsigned I : BySpelling.lookup(Spelling)) Result.push_back(&All[I]); + for (unsigned I : BySpellingAlternate.lookup(Spelling)) + if (!llvm::is_contained(Result, &All[I])) + Result.push_back(&All[I]); break; } + } return Result; } diff --git a/clang-tools-extra/include-cleaner/unittests/TypesTest.cpp b/clang-tools-extra/include-cleaner/unittests/TypesTest.cpp --- a/clang-tools-extra/include-cleaner/unittests/TypesTest.cpp +++ b/clang-tools-extra/include-cleaner/unittests/TypesTest.cpp @@ -17,6 +17,8 @@ namespace clang::include_cleaner { namespace { using testing::ElementsAre; +using testing::IsEmpty; +using testing::UnorderedElementsAre; // Matches an Include* on the specified line; MATCHER_P(line, N, "") { return arg->Line == (unsigned)N; } @@ -43,5 +45,37 @@ ElementsAre(line(4), line(5))); } +TEST(RecordedIncludesTest, MatchVerbatim) { + auto FS = llvm::makeIntrusiveRefCnt(); + FileManager FM(FileSystemOptions{}); + + Includes Inc; + + // By default, a verbatim header only matches includes with the same spelling. + const FileEntry *Foo = + FM.getVirtualFile("repo/lib/include/rel/foo.h", /*Size=*/0, time_t{}); + Inc.add(Include{"lib/include/rel/foo.h", Foo, SourceLocation(), 1}); + Inc.add(Include{"rel/foo.h", Foo, SourceLocation(), 2}); + EXPECT_THAT(Inc.match(Header("")), ElementsAre(line(2))); + + // A verbatim header can match another spelling if the search path + // suggests it's equivalent. + const FileEntry *Bar = + FM.getVirtualFile("repo/lib/include/rel/bar.h", /*Size=*/0, time_t{}); + Inc.addSearchDirectory("repo/"); + Inc.addSearchDirectory("repo/lib/include"); + Inc.add(Include{"lib/include/rel/bar.h", Bar, SourceLocation(), 3}); + Inc.add(Include{"rel/bar.h", Bar, SourceLocation(), 4}); + EXPECT_THAT(Inc.match(Header("")), + UnorderedElementsAre(line(3), line(4))); + + // We don't apply this logic to system headers, though. + const FileEntry *Vector = + FM.getVirtualFile("repo/lib/include/vector", /*Size=*/0, time_t{}); + Inc.add(Include{"lib/include/vector", Vector, SourceLocation(), 5}); + EXPECT_THAT(Inc.match(Header(*tooling::stdlib::Header::named(""))), + IsEmpty()); +} + } // namespace } // namespace clang::include_cleaner