Index: include/clang/Tooling/CompilationDatabase.h =================================================================== --- include/clang/Tooling/CompilationDatabase.h +++ include/clang/Tooling/CompilationDatabase.h @@ -31,8 +31,12 @@ #include "clang/Basic/LLVM.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/PathV2.h" + #include #include @@ -186,6 +190,66 @@ std::vector CompileCommands; }; +struct PathComparator { + virtual bool equivalent(const Twine &FileA, const Twine &FileB) const { + return FileA.str() == FileB.str() || + llvm::sys::fs::equivalent(FileA, FileB); + } +}; + +/// \brief A trie to efficiently match against the entries of the compilation +/// database in order of matching suffix length. +/// +/// When a clang tool is supposed to operate on a specific file, we have to +/// find the corresponding file in the compilation database. Although entries +/// in the compilation database are keyed by filename, a simple string match +/// is insufficient because of symlinks. Commonly, a project hierarchy looks +/// like this: +/// //.cc (used as input for the tool) +/// ///.cc (stored in DB) +/// +/// Furthermore, there might be symlinks inside the source folder or inside the +/// database, so that the same source file is translated with different build +/// options. +/// +/// For a given input file, the \c FileNameMatchTrie finds its entries in order +/// of matching suffix length. For each suffix length, there might be one or +/// more entries in the database. For each of those entries, it calls +/// \c llvm::sys::fs::equivalent() (injected as \c PathComparator). There might +/// be zero or more entries with the same matching suffix length that are +/// equivalent to the input file. Three cases are distinguished: +/// 0 equivalent files: Continue with the next suffix length. +/// 1 equivalent file: Best match found, return it. +/// >1 equivalent files: Match is ambiguous, return error. +class FileNameMatchTrie { +public: + /// \brief Inserts 'NewPath' into this trie. + void insert(StringRef NewPath, unsigned ConsumedLength = 0); + + /// \brief Finds the corresponding file in this trie. + /// + /// Returns file name stored in this trie that is equivalent to 'FileName' + /// according to 'Comparator', if it can be uniquely identified. If there + /// are no matches an empty StringRef is return. If there are ambigious + /// matches, an empty StringRef is return and a corresponding message written + /// to 'Error'. + StringRef findEquivalent(const PathComparator &Comparator, + StringRef FileName, + std::string *Error, + unsigned ConsumedLength = 0) const; + + /// \brief Gets all paths in this FileNameMatchTrie. + void getAll(std::vector &Results) const; + +private: + // The stored absolute path in this node. Only valid for leaf nodes, i.e. + // nodes where Children.empty(). + std::string Path; + + // The children of this node stored in a map based on the next path segment. + llvm::StringMap Children; +}; + } // end namespace tooling } // end namespace clang Index: include/clang/Tooling/JSONCompilationDatabase.h =================================================================== --- include/clang/Tooling/JSONCompilationDatabase.h +++ include/clang/Tooling/JSONCompilationDatabase.h @@ -92,6 +92,8 @@ // Maps file paths to the compile command lines for that file. llvm::StringMap< std::vector > IndexByFile; + + FileNameMatchTrie MatchTrie; llvm::OwningPtr Database; llvm::SourceMgr SM; Index: lib/Tooling/CompilationDatabase.cpp =================================================================== --- lib/Tooling/CompilationDatabase.cpp +++ lib/Tooling/CompilationDatabase.cpp @@ -132,5 +132,83 @@ extern volatile int JSONAnchorSource; static int JSONAnchorDest = JSONAnchorSource; +void FileNameMatchTrie::insert(StringRef NewPath, unsigned ConsumedLength) { + if (llvm::sys::path::is_relative(NewPath)) + return; + if (Path.empty()) { + // This is an empty leaf. Store NewPath and return. + Path = NewPath; + return; + } + if (Children.empty()) { + // This is a leaf, ignore duplicate entry if 'Path' equals 'NewPath'. + if (NewPath == Path) + return; + // Make this a node and create a child-leaf with 'Path'. + StringRef Element(llvm::sys::path::filename( + StringRef(Path).drop_back(ConsumedLength))); + Children[Element].Path = Path; + } + StringRef Element(llvm::sys::path::filename( + StringRef(NewPath).drop_back(ConsumedLength))); + Children[Element].insert(NewPath, ConsumedLength + Element.size() + 1); +} + +StringRef FileNameMatchTrie::findEquivalent(const PathComparator &Comparator, + StringRef FileName, + std::string *Error, + unsigned ConsumedLength) const { + if (llvm::sys::path::is_relative(FileName)) { + *Error = "Cannot resolve relative paths"; + return StringRef(); + } + if (Children.empty()) { + if (Comparator.equivalent(StringRef(Path), FileName)) + return StringRef(Path); + return StringRef(); + } + StringRef Element(llvm::sys::path::filename(FileName.drop_back( + ConsumedLength))); + llvm::StringMap::const_iterator MatchingChild = + Children.find(Element); + if (MatchingChild != Children.end()) { + StringRef Result = MatchingChild->getValue().findEquivalent( + Comparator, FileName, Error, ConsumedLength + Element.size() + 1); + if (!Result.empty() || !Error->empty()) + return Result; + } + // FIXME: This will re-examine 'MatchingChild' which does not have any + // matching children. Thus, the result is still correct, but performance + // can be improved. + std::vector AllChildren; + getAll(AllChildren); + StringRef Result; + for (unsigned i = 0; i < AllChildren.size(); i++) { + if (Comparator.equivalent(AllChildren[i], FileName)) { + if (Result.empty()) { + Result = AllChildren[i]; + } else { + *Error = "Path is ambiguous"; + return StringRef(); + } + } + } + return Result; +} + +void FileNameMatchTrie::getAll(std::vector &Results) const { + if (Path.empty()) + return; + if (Children.empty()) { + Results.push_back(StringRef(Path)); + return; + } + for (llvm::StringMap::const_iterator + It = Children.begin(), E = Children.end(); + It != E; ++It) { + It->getValue().getAll(Results); + } +} + } // end namespace tooling } // end namespace clang Index: lib/Tooling/JSONCompilationDatabase.cpp =================================================================== --- lib/Tooling/JSONCompilationDatabase.cpp +++ lib/Tooling/JSONCompilationDatabase.cpp @@ -164,8 +164,19 @@ JSONCompilationDatabase::getCompileCommands(StringRef FilePath) const { llvm::SmallString<128> NativeFilePath; llvm::sys::path::native(FilePath, NativeFilePath); + std::vector PossibleMatches; + PathComparator Comparator; + std::string Error; + StringRef Match = MatchTrie.findEquivalent(Comparator, NativeFilePath.str(), + &Error); + if (Match.empty()) { + if (Error.empty()) + Error = "No match found."; + llvm::outs() << Error << "\n"; + return std::vector(); + } llvm::StringMap< std::vector >::const_iterator - CommandsRefI = IndexByFile.find(NativeFilePath); + CommandsRefI = IndexByFile.find(Match); if (CommandsRefI == IndexByFile.end()) return std::vector(); const std::vector &CommandsRef = CommandsRefI->getValue(); @@ -271,10 +282,19 @@ return false; } llvm::SmallString<8> FileStorage; + StringRef FileName = File->getValue(FileStorage); llvm::SmallString<128> NativeFilePath; - llvm::sys::path::native(File->getValue(FileStorage), NativeFilePath); + if (llvm::sys::path::is_relative(FileName)) { + llvm::SmallString<8> DirectoryStorage; + llvm::SmallString<128> AbsolutePath(Directory->getValue(DirectoryStorage)); + llvm::sys::path::append(AbsolutePath, FileName); + llvm::sys::path::native(AbsolutePath.str(), NativeFilePath); + } else { + llvm::sys::path::native(FileName, NativeFilePath); + } IndexByFile[NativeFilePath].push_back( - CompileCommandRef(Directory, Command)); + CompileCommandRef(Directory, Command)); + MatchTrie.insert(NativeFilePath.str()); } return true; } Index: unittests/Tooling/CompilationDatabaseTest.cpp =================================================================== --- unittests/Tooling/CompilationDatabaseTest.cpp +++ unittests/Tooling/CompilationDatabaseTest.cpp @@ -55,13 +55,13 @@ getAllFiles("[]", ErrorMessage)) << ErrorMessage; std::vector expected_files; - expected_files.push_back("file1"); - expected_files.push_back("file2"); + expected_files.push_back("/dir/file1"); + expected_files.push_back("/dir/file2"); EXPECT_EQ(expected_files, getAllFiles( - "[{\"directory\":\"dir\"," + "[{\"directory\":\"/dir\"," "\"command\":\"command\"," "\"file\":\"file1\"}," - " {\"directory\":\"dir\"," + " {\"directory\":\"/dir\"," "\"command\":\"command\"," "\"file\":\"file2\"}]", ErrorMessage)) << ErrorMessage; @@ -81,6 +81,82 @@ return Commands[0]; } +struct FakeComparator : public PathComparator { + virtual bool equivalent(const Twine &FileA, const Twine &FileB) const { + return StringRef(FileA.str()).equals_lower(FileB.str()); + } +}; + +class FileNameMatchTrieTest : public ::testing::Test { +protected: + StringRef find(StringRef Path) { + return Trie.findEquivalent(Comparator, Path, &Error); + } + + FileNameMatchTrie Trie; + FakeComparator Comparator; + std::string Error; +}; + +TEST_F(FileNameMatchTrieTest, InsertingRelativePath) { + Trie.insert("/path/file.cc"); + Trie.insert("file.cc"); + EXPECT_EQ("/path/file.cc", find("/path/file.cc")); + EXPECT_EQ("", Error); +} + +TEST_F(FileNameMatchTrieTest, MatchingRelativePath) { + EXPECT_EQ("", find("file.cc")); + EXPECT_EQ("Cannot resolve relative paths", Error); +} + +TEST_F(FileNameMatchTrieTest, ReturnsBestResults) { + Trie.insert("/d/c/b.cc"); + Trie.insert("/d/b/b.cc"); + EXPECT_EQ("/d/b/b.cc", find("/d/b/b.cc")); + EXPECT_EQ("", Error); +} + +TEST_F(FileNameMatchTrieTest, HandlesSymlinks) { + Trie.insert("/AA/file.cc"); + EXPECT_EQ("/AA/file.cc", find("/aa/file.cc")); + EXPECT_EQ("", Error); +} + +TEST_F(FileNameMatchTrieTest, ReportsSymlinkAmbiguity) { + Trie.insert("/Aa/file.cc"); + Trie.insert("/aA/file.cc"); + EXPECT_TRUE(find("/aa/file.cc").empty()); + EXPECT_EQ("Path is ambiguous", Error); +} + +TEST_F(FileNameMatchTrieTest, LongerMatchingSuffixPreferred) { + Trie.insert("/src/Aa/file.cc"); + Trie.insert("/src/aA/file.cc"); + Trie.insert("/SRC/aa/file.cc"); + EXPECT_EQ("/SRC/aa/file.cc", find("/src/aa/file.cc")); + EXPECT_EQ("", Error); +} + +TEST_F(FileNameMatchTrieTest, EmptyTrie) { + EXPECT_TRUE(find("/some/path").empty()); + EXPECT_EQ("", Error); +} + +TEST_F(FileNameMatchTrieTest, NoResult) { + Trie.insert("/somepath/otherfile.cc"); + Trie.insert("/otherpath/somefile.cc"); + EXPECT_EQ("", find("/somepath/somefile.cc")); + EXPECT_EQ("", Error); +} + +TEST_F(FileNameMatchTrieTest, RootElementDifferent) { + Trie.insert("/path/file.cc"); + Trie.insert("/otherpath/file.cc"); + EXPECT_EQ("/path/file.cc", find("/path/file.cc")); + EXPECT_EQ("", Error); +} + TEST(findCompileArgsInJsonDatabase, FindsNothingIfEmpty) { std::string ErrorMessage; CompileCommand NotFound = findCompileArgsInJsonDatabase( @@ -148,7 +224,7 @@ } TEST(findCompileArgsInJsonDatabase, FindsEntry) { - StringRef Directory("directory"); + StringRef Directory("/directory"); StringRef FileName("file"); StringRef Command("command"); std::string JsonDatabase = "["; @@ -162,19 +238,19 @@ JsonDatabase += "]"; std::string ErrorMessage; CompileCommand FoundCommand = findCompileArgsInJsonDatabase( - "file4", JsonDatabase, ErrorMessage); - EXPECT_EQ("directory4", FoundCommand.Directory) << ErrorMessage; + "/directory4/file4", JsonDatabase, ErrorMessage); + EXPECT_EQ("/directory4", FoundCommand.Directory) << ErrorMessage; ASSERT_EQ(1u, FoundCommand.CommandLine.size()) << ErrorMessage; EXPECT_EQ("command4", FoundCommand.CommandLine[0]) << ErrorMessage; } static std::vector unescapeJsonCommandLine(StringRef Command) { std::string JsonDatabase = - ("[{\"directory\":\"\", \"file\":\"test\", \"command\": \"" + + ("[{\"directory\":\"/root\", \"file\":\"test\", \"command\": \"" + Command + "\"}]").str(); std::string ErrorMessage; CompileCommand FoundCommand = findCompileArgsInJsonDatabase( - "test", JsonDatabase, ErrorMessage); + "/root/test", JsonDatabase, ErrorMessage); EXPECT_TRUE(ErrorMessage.empty()) << ErrorMessage; return FoundCommand.CommandLine; }