Index: include/clang/Tooling/CompilationDatabase.h =================================================================== --- include/clang/Tooling/CompilationDatabase.h +++ include/clang/Tooling/CompilationDatabase.h @@ -31,8 +31,12 @@ #include "clang/Basic/LLVM.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/PathV2.h" + #include #include @@ -186,6 +190,105 @@ std::vector CompileCommands; }; +struct PathComparator { + virtual bool equivalent(const Twine &FileA, const Twine &FileB) const { + return FileA.str() == FileB.str() || + llvm::sys::fs::equivalent(FileA, FileB); + } +}; + + +/// \brief A trie to efficiently match against the entries of the compilation +/// database in order of matching suffix length. +/// +/// When a clang tool is supposed to operate on a specific file, we have to +/// find the corresponding file in the compilation database. Although entries +/// in the compilation database are keyed by filename, a simple string match +/// is insufficient because of symlinks. Commonly, a project hierarchy looks +/// like this: +/// //src//.cc (used as input for the tool) +/// //build///.cc (stored in DB) +/// +/// Furthermore, there might be symlinks inside the source folder or inside the +/// database, so that the same source file is translated with different build +/// options. +/// +/// For a given input file, the \c FileNameMatchTrie finds its entries in order +/// of matching suffix length. For each suffix length, there might be one or +/// more entries in the database. For each of those entries, it calls +/// \c llvm::sys::fs::equivalent() (injected as \c PathComparator). There might +/// be zero or more entries with the same matching suffix length that are +/// equivalent to the input file. Three cases are distinguished: +/// 0 equivalent files: Continue with the next suffix length. +/// 1 equivalent file: Best match found, return it. +/// >1 equivalent files: Match is ambiguous, return error. +/// +/// Each node has storage for up to one path and a map mapping a path segment to +/// child nodes. The trie starts with an empty root node. An insert of a path +/// 'p'starts at the root node and does the following: +/// - If the node is empty, insert 'p' into its storage and abort. +/// - If the node has a path 'p2' but no children, take the last path segment +/// 's' of 'p2', put a new child into the map at 's' an insert the rest of +/// 'p2' there. +/// - Insert a new child for the last segment of 'p' and insert the rest of 'p' +/// there. +/// +/// To find the best matching node for a given path 'p', the \c findEquivalent() +/// function is called recursively for each path segment (back to fron) of 'p' +/// until a node 'n' is reached that does not .. +/// - .. have children. In this case it is checked +/// whether the stored path is equivalent to 'p'. If yes, the best match is +/// found. Otherwise continue with the parent node as if this node did not +/// exist. +/// - .. a child matching the next path segment. In this case, all children of +/// 'n' are an equally good match for 'p'. All children are of 'n' are found +/// recursively and their equivalence to 'p' is determined. If none are +/// equivalent, continue with the parent node as if 'n' didn't exist. If one +/// is equivalent, the best match is found. Otherwise, report and ambigiuity +/// error. +/// +/// An insert operation is linear in the number of a path's segments. The +/// \c findEquivalent() operation can will in worst case (when the Trie does not +/// contain the input path) compare each store path against the input path. +class FileNameMatchTrie { +public: + /// \brief Inserts 'NewPath' into this trie. + void insert(StringRef NewPath, unsigned ConsumedLength = 0); + + /// \brief Finds the corresponding file in this trie. + /// + /// Returns file name stored in this trie that is equivalent to 'FileName' + /// according to 'Comparator', if it can be uniquely identified. If there + /// are no matches an empty StringRef is return. If there are ambigious + /// matches, an empty StringRef is return and a corresponding message written + /// to 'Error'. + StringRef findEquivalent(StringRef FileName, + llvm::raw_ostream &Error) const; + +private: + friend class FileNameMatchTrieTest; + + /// Internal version of \c findEquivalent(). If multiple paths fit 'FileName' + /// equally well, \c IsAmbiguous is set to \c true. \c ConsumedLength denotes + /// the number of \c Filename's trailing characters already consumed during + /// recursion. + StringRef findEquivalent(const PathComparator &Comparator, + StringRef FileName, + bool &IsAmbiguous, + unsigned ConsumedLength) const; + + /// \brief Gets all paths in this FileNameMatchTrie. + void getAll(std::vector &Results, + llvm::StringMap::const_iterator Except) const; + + // The stored absolute path in this node. Only valid for leaf nodes, i.e. + // nodes where Children.empty(). + std::string Path; + + // The children of this node stored in a map based on the next path segment. + llvm::StringMap Children; +}; + } // end namespace tooling } // end namespace clang Index: include/clang/Tooling/JSONCompilationDatabase.h =================================================================== --- include/clang/Tooling/JSONCompilationDatabase.h +++ include/clang/Tooling/JSONCompilationDatabase.h @@ -92,6 +92,8 @@ // Maps file paths to the compile command lines for that file. llvm::StringMap< std::vector > IndexByFile; + + FileNameMatchTrie MatchTrie; llvm::OwningPtr Database; llvm::SourceMgr SM; Index: lib/Tooling/CompilationDatabase.cpp =================================================================== --- lib/Tooling/CompilationDatabase.cpp +++ lib/Tooling/CompilationDatabase.cpp @@ -132,5 +132,94 @@ extern volatile int JSONAnchorSource; static int JSONAnchorDest = JSONAnchorSource; +void FileNameMatchTrie::insert(StringRef NewPath, unsigned ConsumedLength) { + if (llvm::sys::path::is_relative(NewPath)) + return; + if (Path.empty()) { + // This is an empty leaf. Store NewPath and return. + Path = NewPath; + return; + } + if (Children.empty()) { + // This is a leaf, ignore duplicate entry if 'Path' equals 'NewPath'. + if (NewPath == Path) + return; + // Make this a node and create a child-leaf with 'Path'. + StringRef Element(llvm::sys::path::filename( + StringRef(Path).drop_back(ConsumedLength))); + Children[Element].Path = Path; + } + StringRef Element(llvm::sys::path::filename( + StringRef(NewPath).drop_back(ConsumedLength))); + Children[Element].insert(NewPath, ConsumedLength + Element.size() + 1); +} + +StringRef FileNameMatchTrie::findEquivalent(StringRef FileName, + llvm::raw_ostream &Error) const { + if (llvm::sys::path::is_relative(FileName)) { + Error << "Cannot resolve relative paths"; + return StringRef(); + } + bool IsAmbiguous = false; + PathComparator PathComp; + StringRef Result = findEquivalent(PathComp, FileName, IsAmbiguous, 0); + if (IsAmbiguous) + Error << "Path is ambiguous"; + return Result; +} + +StringRef FileNameMatchTrie::findEquivalent(const PathComparator &Comparator, + StringRef FileName, + bool &IsAmbiguous, + unsigned ConsumedLength) const { + if (Children.empty()) { + if (Comparator.equivalent(StringRef(Path), FileName)) + return StringRef(Path); + return StringRef(); + } + StringRef Element(llvm::sys::path::filename(FileName.drop_back( + ConsumedLength))); + llvm::StringMap::const_iterator MatchingChild = + Children.find(Element); + if (MatchingChild != Children.end()) { + StringRef Result = MatchingChild->getValue().findEquivalent( + Comparator, FileName, IsAmbiguous, ConsumedLength + Element.size() + 1); + if (!Result.empty() || IsAmbiguous) + return Result; + } + std::vector AllChildren; + getAll(AllChildren, MatchingChild); + StringRef Result; + for (unsigned i = 0; i < AllChildren.size(); i++) { + if (Comparator.equivalent(AllChildren[i], FileName)) { + if (Result.empty()) { + Result = AllChildren[i]; + } else { + IsAmbiguous = true; + return StringRef(); + } + } + } + return Result; +} + +void FileNameMatchTrie::getAll( + std::vector &Results, + llvm::StringMap::const_iterator Except) const { + if (Path.empty()) + return; + if (Children.empty()) { + Results.push_back(StringRef(Path)); + return; + } + for (llvm::StringMap::const_iterator + It = Children.begin(), E = Children.end(); + It != E; ++It) { + if (It == Except) + continue; + It->getValue().getAll(Results, Children.end()); + } +} + } // end namespace tooling } // end namespace clang Index: lib/Tooling/JSONCompilationDatabase.cpp =================================================================== --- lib/Tooling/JSONCompilationDatabase.cpp +++ lib/Tooling/JSONCompilationDatabase.cpp @@ -164,8 +164,18 @@ JSONCompilationDatabase::getCompileCommands(StringRef FilePath) const { llvm::SmallString<128> NativeFilePath; llvm::sys::path::native(FilePath, NativeFilePath); + std::vector PossibleMatches; + std::string Error; + llvm::raw_string_ostream ES(Error); + StringRef Match = MatchTrie.findEquivalent(NativeFilePath.str(), ES); + if (Match.empty()) { + if (Error.empty()) + Error = "No match found."; + llvm::outs() << Error << "\n"; + return std::vector(); + } llvm::StringMap< std::vector >::const_iterator - CommandsRefI = IndexByFile.find(NativeFilePath); + CommandsRefI = IndexByFile.find(Match); if (CommandsRefI == IndexByFile.end()) return std::vector(); const std::vector &CommandsRef = CommandsRefI->getValue(); @@ -271,10 +281,20 @@ return false; } llvm::SmallString<8> FileStorage; + StringRef FileName = File->getValue(FileStorage); llvm::SmallString<128> NativeFilePath; - llvm::sys::path::native(File->getValue(FileStorage), NativeFilePath); + if (llvm::sys::path::is_relative(FileName)) { + llvm::SmallString<8> DirectoryStorage; + llvm::SmallString<128> AbsolutePath( + Directory->getValue(DirectoryStorage)); + llvm::sys::path::append(AbsolutePath, FileName); + llvm::sys::path::native(AbsolutePath.str(), NativeFilePath); + } else { + llvm::sys::path::native(FileName, NativeFilePath); + } IndexByFile[NativeFilePath].push_back( - CompileCommandRef(Directory, Command)); + CompileCommandRef(Directory, Command)); + MatchTrie.insert(NativeFilePath.str()); } return true; } Index: unittests/Tooling/CompilationDatabaseTest.cpp =================================================================== --- unittests/Tooling/CompilationDatabaseTest.cpp +++ unittests/Tooling/CompilationDatabaseTest.cpp @@ -55,13 +55,13 @@ getAllFiles("[]", ErrorMessage)) << ErrorMessage; std::vector expected_files; - expected_files.push_back("file1"); - expected_files.push_back("file2"); + expected_files.push_back("/dir/file1"); + expected_files.push_back("/dir/file2"); EXPECT_EQ(expected_files, getAllFiles( - "[{\"directory\":\"dir\"," + "[{\"directory\":\"/dir\"," "\"command\":\"command\"," "\"file\":\"file1\"}," - " {\"directory\":\"dir\"," + " {\"directory\":\"/dir\"," "\"command\":\"command\"," "\"file\":\"file2\"}]", ErrorMessage)) << ErrorMessage; @@ -81,6 +81,83 @@ return Commands[0]; } +struct FakeComparator : public PathComparator { + virtual bool equivalent(const Twine &FileA, const Twine &FileB) const { + return StringRef(FileA.str()).equals_lower(FileB.str()); + } +}; + +class FileNameMatchTrieTest : public ::testing::Test { +protected: + StringRef find(StringRef Path) { + IsAmbiguous = false; + return Trie.findEquivalent(Comparator, Path, IsAmbiguous, 0); + } + + FileNameMatchTrie Trie; + FakeComparator Comparator; + bool IsAmbiguous; +}; + +TEST_F(FileNameMatchTrieTest, InsertingRelativePath) { + Trie.insert("/path/file.cc"); + Trie.insert("file.cc"); + EXPECT_EQ("/path/file.cc", find("/path/file.cc")); + EXPECT_FALSE(IsAmbiguous); +} + +TEST_F(FileNameMatchTrieTest, MatchingRelativePath) { + EXPECT_EQ("", find("file.cc")); + EXPECT_FALSE(IsAmbiguous); +} + +TEST_F(FileNameMatchTrieTest, ReturnsBestResults) { + Trie.insert("/d/c/b.cc"); + Trie.insert("/d/b/b.cc"); + EXPECT_EQ("/d/b/b.cc", find("/d/b/b.cc")); + EXPECT_FALSE(IsAmbiguous); +} + +TEST_F(FileNameMatchTrieTest, HandlesSymlinks) { + Trie.insert("/AA/file.cc"); + EXPECT_EQ("/AA/file.cc", find("/aa/file.cc")); + EXPECT_FALSE(IsAmbiguous); +} + +TEST_F(FileNameMatchTrieTest, ReportsSymlinkAmbiguity) { + Trie.insert("/Aa/file.cc"); + Trie.insert("/aA/file.cc"); + EXPECT_TRUE(find("/aa/file.cc").empty()); + EXPECT_TRUE(IsAmbiguous); +} + +TEST_F(FileNameMatchTrieTest, LongerMatchingSuffixPreferred) { + Trie.insert("/src/Aa/file.cc"); + Trie.insert("/src/aA/file.cc"); + Trie.insert("/SRC/aa/file.cc"); + EXPECT_EQ("/SRC/aa/file.cc", find("/src/aa/file.cc")); + EXPECT_FALSE(IsAmbiguous); +} + +TEST_F(FileNameMatchTrieTest, EmptyTrie) { + EXPECT_TRUE(find("/some/path").empty()); + EXPECT_FALSE(IsAmbiguous); +} + +TEST_F(FileNameMatchTrieTest, NoResult) { + Trie.insert("/somepath/otherfile.cc"); + Trie.insert("/otherpath/somefile.cc"); + EXPECT_EQ("", find("/somepath/somefile.cc")); + EXPECT_FALSE(IsAmbiguous); +} + +TEST_F(FileNameMatchTrieTest, RootElementDifferent) { + Trie.insert("/path/file.cc"); + Trie.insert("/otherpath/file.cc"); + EXPECT_EQ("/path/file.cc", find("/path/file.cc")); + EXPECT_FALSE(IsAmbiguous); +} + TEST(findCompileArgsInJsonDatabase, FindsNothingIfEmpty) { std::string ErrorMessage; CompileCommand NotFound = findCompileArgsInJsonDatabase( @@ -148,7 +225,7 @@ } TEST(findCompileArgsInJsonDatabase, FindsEntry) { - StringRef Directory("directory"); + StringRef Directory("/directory"); StringRef FileName("file"); StringRef Command("command"); std::string JsonDatabase = "["; @@ -162,19 +239,19 @@ JsonDatabase += "]"; std::string ErrorMessage; CompileCommand FoundCommand = findCompileArgsInJsonDatabase( - "file4", JsonDatabase, ErrorMessage); - EXPECT_EQ("directory4", FoundCommand.Directory) << ErrorMessage; + "/directory4/file4", JsonDatabase, ErrorMessage); + EXPECT_EQ("/directory4", FoundCommand.Directory) << ErrorMessage; ASSERT_EQ(1u, FoundCommand.CommandLine.size()) << ErrorMessage; EXPECT_EQ("command4", FoundCommand.CommandLine[0]) << ErrorMessage; } static std::vector unescapeJsonCommandLine(StringRef Command) { std::string JsonDatabase = - ("[{\"directory\":\"\", \"file\":\"test\", \"command\": \"" + + ("[{\"directory\":\"/root\", \"file\":\"test\", \"command\": \"" + Command + "\"}]").str(); std::string ErrorMessage; CompileCommand FoundCommand = findCompileArgsInJsonDatabase( - "test", JsonDatabase, ErrorMessage); + "/root/test", JsonDatabase, ErrorMessage); EXPECT_TRUE(ErrorMessage.empty()) << ErrorMessage; return FoundCommand.CommandLine; }