Index: include/clang/Tooling/CompilationDatabase.h =================================================================== --- include/clang/Tooling/CompilationDatabase.h +++ include/clang/Tooling/CompilationDatabase.h @@ -33,6 +33,7 @@ #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" + #include #include Index: include/clang/Tooling/FileMatchTrie.h =================================================================== --- /dev/null +++ include/clang/Tooling/FileMatchTrie.h @@ -0,0 +1,91 @@ +//===--- FileMatchTrie.h - --------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a match trie to find the matching file in a compilation +// database based on a given path in the presence of symlinks. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLING_FILE_MATCH_TRIE_H +#define LLVM_CLANG_TOOLING_FILE_MATCH_TRIE_H + +#include "clang/Basic/LLVM.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" + +#include +#include + +namespace clang { +namespace tooling { + +struct PathComparator { + virtual ~PathComparator() {} + virtual bool equivalent(const Twine &FileA, const Twine &FileB) const = 0; +}; +class FileMatchTrieNode; + +/// \brief A trie to efficiently match against the entries of the compilation +/// database in order of matching suffix length. +/// +/// When a clang tool is supposed to operate on a specific file, we have to +/// find the corresponding file in the compilation database. Although entries +/// in the compilation database are keyed by filename, a simple string match +/// is insufficient because of symlinks. Commonly, a project hierarchy looks +/// like this: +/// //src//.cc (used as input for the tool) +/// //build///.cc (stored in DB) +/// +/// Furthermore, there might be symlinks inside the source folder or inside the +/// database, so that the same source file is translated with different build +/// options. +/// +/// For a given input file, the \c FileMatchTrie finds its entries in order +/// of matching suffix length. For each suffix length, there might be one or +/// more entries in the database. For each of those entries, it calls +/// \c llvm::sys::fs::equivalent() (injected as \c PathComparator). There might +/// be zero or more entries with the same matching suffix length that are +/// equivalent to the input file. Three cases are distinguished: +/// 0 equivalent files: Continue with the next suffix length. +/// 1 equivalent file: Best match found, return it. +/// >1 equivalent files: Match is ambiguous, return error. +class FileMatchTrie { +public: + FileMatchTrie(); + + /// \brief Construct a new \c FileMatchTrie with the given \c PathComparator. + /// + /// The \c FileMatchTrie takes ownership of 'Comparator'. Used for testing. + FileMatchTrie(PathComparator* Comparator); + + ~FileMatchTrie(); + + /// \brief Insert a new absolute path. Relative paths are ignored. + void insert(StringRef NewPath); + + /// \brief Finds the corresponding file in this trie. + /// + /// Returns file name stored in this trie that is equivalent to 'FileName' + /// according to 'Comparator', if it can be uniquely identified. If there + /// are no matches an empty \c StringRef is returned. If there are ambigious + /// matches, an empty \c StringRef is returned and a corresponding message + /// written to 'Error'. + StringRef findEquivalent(StringRef FileName, + llvm::raw_ostream &Error) const; +private: + FileMatchTrieNode *Root; + OwningPtr Comparator; +}; + + +} // end namespace tooling +} // end namespace clang + +#endif // LLVM_CLANG_TOOLING_FILE_MATCH_TRIE_H Index: include/clang/Tooling/JSONCompilationDatabase.h =================================================================== --- include/clang/Tooling/JSONCompilationDatabase.h +++ include/clang/Tooling/JSONCompilationDatabase.h @@ -17,6 +17,7 @@ #include "clang/Basic/LLVM.h" #include "clang/Tooling/CompilationDatabase.h" +#include "clang/Tooling/FileMatchTrie.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" @@ -93,6 +94,8 @@ // Maps file paths to the compile command lines for that file. llvm::StringMap< std::vector > IndexByFile; + FileMatchTrie MatchTrie; + llvm::OwningPtr Database; llvm::SourceMgr SM; llvm::yaml::Stream YAMLStream; Index: lib/Tooling/CMakeLists.txt =================================================================== --- lib/Tooling/CMakeLists.txt +++ lib/Tooling/CMakeLists.txt @@ -4,6 +4,7 @@ ArgumentsAdjusters.cpp CommonOptionsParser.cpp CompilationDatabase.cpp + FileMatchTrie.cpp JSONCompilationDatabase.cpp Refactoring.cpp RefactoringCallbacks.cpp Index: lib/Tooling/FileMatchTrie.cpp =================================================================== --- /dev/null +++ lib/Tooling/FileMatchTrie.cpp @@ -0,0 +1,188 @@ +//===--- FileMatchTrie.cpp - ----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the implementation of a FileMatchTrie. +// +//===----------------------------------------------------------------------===// + +#include +#include "clang/Tooling/FileMatchTrie.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/PathV2.h" + +namespace clang { +namespace tooling { + +/// \brief Default \c PathComparator using \c llvm::sys::fs::equivalent(). +struct DefaultPathComparator : public PathComparator { + virtual ~DefaultPathComparator() {} + virtual bool equivalent(const Twine &FileA, const Twine &FileB) const { + return FileA.str() == FileB.str() || + llvm::sys::fs::equivalent(FileA, FileB); + } +}; + +/// \brief A node of the \c FileMatchTrie. +/// +/// Each node has storage for up to one path and a map mapping a path segment to +/// child nodes. The trie starts with an empty root node. +class FileMatchTrieNode { +public: + /// \brief Inserts 'NewPath' into this trie. \c ConsumedLength denotes + /// the number of \c NewPath's trailing characters already consumed during + /// recursion. + /// + /// An insert of a path + /// 'p'starts at the root node and does the following: + /// - If the node is empty, insert 'p' into its storage and abort. + /// - If the node has a path 'p2' but no children, take the last path segment + /// 's' of 'p2', put a new child into the map at 's' an insert the rest of + /// 'p2' there. + /// - Insert a new child for the last segment of 'p' and insert the rest of + /// 'p' there. + /// + /// An insert operation is linear in the number of a path's segments. + void insert(StringRef NewPath, unsigned ConsumedLength = 0) { + // We cannot put relative paths into the FileMatchTrie as then a path can be + // a postfix of another path, violating a core assumption of the trie. + if (llvm::sys::path::is_relative(NewPath)) + return; + if (Path.empty()) { + // This is an empty leaf. Store NewPath and return. + Path = NewPath; + return; + } + if (Children.empty()) { + // This is a leaf, ignore duplicate entry if 'Path' equals 'NewPath'. + if (NewPath == Path) + return; + // Make this a node and create a child-leaf with 'Path'. + StringRef Element(llvm::sys::path::filename( + StringRef(Path).drop_back(ConsumedLength))); + Children[Element].Path = Path; + } + StringRef Element(llvm::sys::path::filename( + StringRef(NewPath).drop_back(ConsumedLength))); + Children[Element].insert(NewPath, ConsumedLength + Element.size() + 1); + } + + /// \brief Tries to find the node under this \c FileMatchTrieNode that best + /// matches 'FileName'. + /// + /// If multiple paths fit 'FileName' equally well, \c IsAmbiguous is set to + /// \c true and an empty string is returned. If no path fits 'FileName', an + /// empty string is returned. \c ConsumedLength denotes the number of + /// \c Filename's trailing characters already consumed during recursion. + /// + /// To find the best matching node for a given path 'p', the + /// \c findEquivalent() function is called recursively for each path segment + /// (back to fron) of 'p' until a node 'n' is reached that does not .. + /// - .. have children. In this case it is checked + /// whether the stored path is equivalent to 'p'. If yes, the best match is + /// found. Otherwise continue with the parent node as if this node did not + /// exist. + /// - .. a child matching the next path segment. In this case, all children of + /// 'n' are an equally good match for 'p'. All children are of 'n' are found + /// recursively and their equivalence to 'p' is determined. If none are + /// equivalent, continue with the parent node as if 'n' didn't exist. If one + /// is equivalent, the best match is found. Otherwise, report and ambigiuity + /// error. + StringRef findEquivalent(const PathComparator& Comparator, + StringRef FileName, + bool &IsAmbiguous, + unsigned ConsumedLength = 0) const { + if (Children.empty()) { + if (Comparator.equivalent(StringRef(Path), FileName)) + return StringRef(Path); + return StringRef(); + } + StringRef Element(llvm::sys::path::filename(FileName.drop_back( + ConsumedLength))); + llvm::StringMap::const_iterator MatchingChild = + Children.find(Element); + if (MatchingChild != Children.end()) { + StringRef Result = MatchingChild->getValue().findEquivalent( + Comparator, FileName, IsAmbiguous, + ConsumedLength + Element.size() + 1); + if (!Result.empty() || IsAmbiguous) + return Result; + } + std::vector AllChildren; + getAll(AllChildren, MatchingChild); + StringRef Result; + for (unsigned i = 0; i < AllChildren.size(); i++) { + if (Comparator.equivalent(AllChildren[i], FileName)) { + if (Result.empty()) { + Result = AllChildren[i]; + } else { + IsAmbiguous = true; + return StringRef(); + } + } + } + return Result; + } + +private: + /// \brief Gets all paths under this FileMatchTrieNode. + void getAll(std::vector &Results, + llvm::StringMap::const_iterator Except) const { + if (Path.empty()) + return; + if (Children.empty()) { + Results.push_back(StringRef(Path)); + return; + } + for (llvm::StringMap::const_iterator + It = Children.begin(), E = Children.end(); + It != E; ++It) { + if (It == Except) + continue; + It->getValue().getAll(Results, Children.end()); + } + } + + // The stored absolute path in this node. Only valid for leaf nodes, i.e. + // nodes where Children.empty(). + std::string Path; + + // The children of this node stored in a map based on the next path segment. + llvm::StringMap Children; +}; + +FileMatchTrie::FileMatchTrie() + : Root(new FileMatchTrieNode), Comparator(new DefaultPathComparator()) {} + +FileMatchTrie::FileMatchTrie(PathComparator *Comparator) + : Root(new FileMatchTrieNode), Comparator(Comparator) {} + +FileMatchTrie::~FileMatchTrie() { + delete Root; +} + +void FileMatchTrie::insert(StringRef NewPath) { + Root->insert(NewPath); +} + +StringRef FileMatchTrie::findEquivalent(StringRef FileName, + llvm::raw_ostream &Error) const { + if (llvm::sys::path::is_relative(FileName)) { + Error << "Cannot resolve relative paths"; + return StringRef(); + } + bool IsAmbiguous = false; + StringRef Result = Root->findEquivalent(*Comparator, FileName, IsAmbiguous); + if (IsAmbiguous) + Error << "Path is ambiguous"; + return Result; +} + +} // end namespace tooling +} // end namespace clang Index: lib/Tooling/JSONCompilationDatabase.cpp =================================================================== --- lib/Tooling/JSONCompilationDatabase.cpp +++ lib/Tooling/JSONCompilationDatabase.cpp @@ -164,8 +164,18 @@ JSONCompilationDatabase::getCompileCommands(StringRef FilePath) const { llvm::SmallString<128> NativeFilePath; llvm::sys::path::native(FilePath, NativeFilePath); + std::vector PossibleMatches; + std::string Error; + llvm::raw_string_ostream ES(Error); + StringRef Match = MatchTrie.findEquivalent(NativeFilePath.str(), ES); + if (Match.empty()) { + if (Error.empty()) + Error = "No match found."; + llvm::outs() << Error << "\n"; + return std::vector(); + } llvm::StringMap< std::vector >::const_iterator - CommandsRefI = IndexByFile.find(NativeFilePath); + CommandsRefI = IndexByFile.find(Match); if (CommandsRefI == IndexByFile.end()) return std::vector(); const std::vector &CommandsRef = CommandsRefI->getValue(); @@ -271,10 +281,20 @@ return false; } llvm::SmallString<8> FileStorage; + StringRef FileName = File->getValue(FileStorage); llvm::SmallString<128> NativeFilePath; - llvm::sys::path::native(File->getValue(FileStorage), NativeFilePath); + if (llvm::sys::path::is_relative(FileName)) { + llvm::SmallString<8> DirectoryStorage; + llvm::SmallString<128> AbsolutePath( + Directory->getValue(DirectoryStorage)); + llvm::sys::path::append(AbsolutePath, FileName); + llvm::sys::path::native(AbsolutePath.str(), NativeFilePath); + } else { + llvm::sys::path::native(FileName, NativeFilePath); + } IndexByFile[NativeFilePath].push_back( - CompileCommandRef(Directory, Command)); + CompileCommandRef(Directory, Command)); + MatchTrie.insert(NativeFilePath.str()); } return true; } Index: unittests/Tooling/CompilationDatabaseTest.cpp =================================================================== --- unittests/Tooling/CompilationDatabaseTest.cpp +++ unittests/Tooling/CompilationDatabaseTest.cpp @@ -11,6 +11,7 @@ #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclGroup.h" #include "clang/Frontend/FrontendAction.h" +#include "clang/Tooling/FileMatchTrie.h" #include "clang/Tooling/JSONCompilationDatabase.h" #include "clang/Tooling/Tooling.h" #include "gtest/gtest.h" @@ -55,13 +56,13 @@ getAllFiles("[]", ErrorMessage)) << ErrorMessage; std::vector expected_files; - expected_files.push_back("file1"); - expected_files.push_back("file2"); + expected_files.push_back("/dir/file1"); + expected_files.push_back("/dir/file2"); EXPECT_EQ(expected_files, getAllFiles( - "[{\"directory\":\"dir\"," + "[{\"directory\":\"/dir\"," "\"command\":\"command\"," "\"file\":\"file1\"}," - " {\"directory\":\"dir\"," + " {\"directory\":\"/dir\"," "\"command\":\"command\"," "\"file\":\"file2\"}]", ErrorMessage)) << ErrorMessage; @@ -81,6 +82,82 @@ return Commands[0]; } +struct FakeComparator : public PathComparator { + virtual ~FakeComparator() {} + virtual bool equivalent(const Twine &FileA, const Twine &FileB) const { + return StringRef(FileA.str()).equals_lower(FileB.str()); + } +}; + +class FileMatchTrieTest : public ::testing::Test { +protected: + FileMatchTrieTest() : Trie(new FakeComparator()) {} + + StringRef find(StringRef Path) { + llvm::raw_string_ostream ES(Error); + return Trie.findEquivalent(Path, ES); + } + + FileMatchTrie Trie; + std::string Error; +}; + +TEST_F(FileMatchTrieTest, InsertingRelativePath) { + Trie.insert("/path/file.cc"); + Trie.insert("file.cc"); + EXPECT_EQ("/path/file.cc", find("/path/file.cc")); +} + +TEST_F(FileMatchTrieTest, MatchingRelativePath) { + EXPECT_EQ("", find("file.cc")); +} + +TEST_F(FileMatchTrieTest, ReturnsBestResults) { + Trie.insert("/d/c/b.cc"); + Trie.insert("/d/b/b.cc"); + EXPECT_EQ("/d/b/b.cc", find("/d/b/b.cc")); +} + +TEST_F(FileMatchTrieTest, HandlesSymlinks) { + Trie.insert("/AA/file.cc"); + EXPECT_EQ("/AA/file.cc", find("/aa/file.cc")); +} + +TEST_F(FileMatchTrieTest, ReportsSymlinkAmbiguity) { + Trie.insert("/Aa/file.cc"); + Trie.insert("/aA/file.cc"); + EXPECT_TRUE(find("/aa/file.cc").empty()); + EXPECT_EQ("Path is ambiguous", Error); +} + +TEST_F(FileMatchTrieTest, LongerMatchingSuffixPreferred) { + Trie.insert("/src/Aa/file.cc"); + Trie.insert("/src/aA/file.cc"); + Trie.insert("/SRC/aa/file.cc"); + EXPECT_EQ("/SRC/aa/file.cc", find("/src/aa/file.cc")); +} + +TEST_F(FileMatchTrieTest, EmptyTrie) { + EXPECT_TRUE(find("/some/path").empty()); +} + +TEST_F(FileMatchTrieTest, NoResult) { + Trie.insert("/somepath/otherfile.cc"); + Trie.insert("/otherpath/somefile.cc"); + EXPECT_EQ("", find("/somepath/somefile.cc")); +} + +TEST_F(FileMatchTrieTest, RootElementDifferent) { + Trie.insert("/path/file.cc"); + Trie.insert("/otherpath/file.cc"); + EXPECT_EQ("/path/file.cc", find("/path/file.cc")); +} + +TEST_F(FileMatchTrieTest, CannotResolveRelativePath) { + EXPECT_EQ("", find("relative-path.cc")); + EXPECT_EQ("Cannot resolve relative paths", Error); +} + TEST(findCompileArgsInJsonDatabase, FindsNothingIfEmpty) { std::string ErrorMessage; CompileCommand NotFound = findCompileArgsInJsonDatabase( @@ -148,7 +225,7 @@ } TEST(findCompileArgsInJsonDatabase, FindsEntry) { - StringRef Directory("directory"); + StringRef Directory("/directory"); StringRef FileName("file"); StringRef Command("command"); std::string JsonDatabase = "["; @@ -162,19 +239,19 @@ JsonDatabase += "]"; std::string ErrorMessage; CompileCommand FoundCommand = findCompileArgsInJsonDatabase( - "file4", JsonDatabase, ErrorMessage); - EXPECT_EQ("directory4", FoundCommand.Directory) << ErrorMessage; + "/directory4/file4", JsonDatabase, ErrorMessage); + EXPECT_EQ("/directory4", FoundCommand.Directory) << ErrorMessage; ASSERT_EQ(1u, FoundCommand.CommandLine.size()) << ErrorMessage; EXPECT_EQ("command4", FoundCommand.CommandLine[0]) << ErrorMessage; } static std::vector unescapeJsonCommandLine(StringRef Command) { std::string JsonDatabase = - ("[{\"directory\":\"\", \"file\":\"test\", \"command\": \"" + + ("[{\"directory\":\"/root\", \"file\":\"test\", \"command\": \"" + Command + "\"}]").str(); std::string ErrorMessage; CompileCommand FoundCommand = findCompileArgsInJsonDatabase( - "test", JsonDatabase, ErrorMessage); + "/root/test", JsonDatabase, ErrorMessage); EXPECT_TRUE(ErrorMessage.empty()) << ErrorMessage; return FoundCommand.CommandLine; }