Index: lib/Tooling/InterpolatingCompilationDatabase.cpp =================================================================== --- lib/Tooling/InterpolatingCompilationDatabase.cpp +++ lib/Tooling/InterpolatingCompilationDatabase.cpp @@ -217,46 +217,47 @@ } }; -// CommandIndex does the real work: given a filename, it produces the best -// matching TransferableCommand by matching filenames. Basic strategy: +// Given a filename, FileIndex picks the best matching file from the underlying +// DB. This is the proxy file whose CompileCommand will be reused. The +// heuristics incorporate file name, extension, and directory structure. +// Strategy: // - Build indexes of each of the substrings we want to look up by. // These indexes are just sorted lists of the substrings. -// - Forward requests to the inner CDB. If it fails, we must pick a proxy. // - Each criterion corresponds to a range lookup into the index, so we only // need O(log N) string comparisons to determine scores. -// - We then break ties among the candidates with the highest score. -class CommandIndex { +// +// Apart from path proximity signals, also takes file extensions into account +// when scoring the candidates. +class FileIndex { public: - CommandIndex(std::vector AllCommands) - : Commands(std::move(AllCommands)), Strings(Arena) { + FileIndex(std::vector Files) : Storage(Files) { // Sort commands by filename for determinism (index is a tiebreaker later). - llvm::sort( - Commands.begin(), Commands.end(), - [](const TransferableCommand &Left, const TransferableCommand &Right) { - return Left.Cmd.Filename < Right.Cmd.Filename; - }); - for (size_t I = 0; I < Commands.size(); ++I) { - StringRef Path = - Strings.save(StringRef(Commands[I].Cmd.Filename).lower()); - Paths.push_back({Path, I}); + llvm::sort(Storage.begin(), Storage.end()); + Paths.reserve(Storage.size()); + Types.reserve(Storage.size()); + for (size_t I = 0; I < Storage.size(); ++I) { + StringRef Path = Storage[I]; + + Paths.emplace_back(Path, I); + Types.push_back(foldType(guessType(Path))); Stems.emplace_back(sys::path::stem(Path), I); auto Dir = ++sys::path::rbegin(Path), DirEnd = sys::path::rend(Path); for (int J = 0; J < DirectorySegmentsIndexed && Dir != DirEnd; ++J, ++Dir) if (Dir->size() > ShortDirectorySegment) // not trivial ones Components.emplace_back(*Dir, I); } - llvm::sort(Paths.begin(), Paths.end()); + // Paths are already sorted at this point. llvm::sort(Stems.begin(), Stems.end()); llvm::sort(Components.begin(), Components.end()); } - bool empty() const { return Commands.empty(); } + bool empty() const { return Paths.empty(); } - // Returns the command that best fits OriginalFilename. - // Candidates with PreferLanguage will be chosen over others (unless it's - // TY_INVALID, or all candidates are bad). - const TransferableCommand &chooseProxy(StringRef OriginalFilename, - types::ID PreferLanguage) const { + // Returns the path for the file that best fits OriginalFilename. + // Candidates with extensions matching PreferLanguage will be chosen over + // others (unless it's TY_INVALID, or all candidates are bad). + StringRef chooseProxy(StringRef OriginalFilename, + types::ID PreferLanguage) const { assert(!empty() && "need at least one candidate!"); std::string Filename = OriginalFilename.lower(); auto Candidates = scoreCandidates(Filename); @@ -266,13 +267,13 @@ DEBUG_WITH_TYPE("interpolate", llvm::dbgs() << "interpolate: chose " - << Commands[Best.first].Cmd.Filename << " as proxy for " + << Paths[Best.first].first << " as proxy for " << OriginalFilename << " preferring " << (PreferLanguage == types::TY_INVALID ? "none" : types::getTypeName(PreferLanguage)) << " score=" << Best.second << "\n"); - return Commands[Best.first]; + return Paths[Best.first].first; } private: @@ -338,7 +339,7 @@ ScoredCandidate S; S.Index = Candidate.first; S.Preferred = PreferredLanguage == types::TY_INVALID || - PreferredLanguage == Commands[S.Index].Type; + PreferredLanguage == Types[S.Index]; S.Points = Candidate.second; if (!S.Preferred && Best.Preferred) continue; @@ -371,7 +372,7 @@ // If Prefix is true, it's instead the range starting with Key. template ArrayRef - indexLookup(StringRef Key, const std::vector &Idx) const { + indexLookup(StringRef Key, ArrayRef Idx) const { // Use pointers as iteratiors to ease conversion of result to ArrayRef. auto Range = std::equal_range(Idx.data(), Idx.data() + Idx.size(), Key, Less()); @@ -380,7 +381,7 @@ // Performs a point lookup into a nonempty index, returning a longest match. SubstringAndIndex - longestMatch(StringRef Key, const std::vector &Idx) const { + longestMatch(StringRef Key, ArrayRef Idx) const { assert(!Idx.empty()); // Longest substring match will be adjacent to a direct lookup. auto It = @@ -395,9 +396,10 @@ return Prefix > PrevPrefix ? *It : *--It; } - std::vector Commands; // Indexes point into this. - BumpPtrAllocator Arena; - StringSaver Strings; + std::vector Storage; // Indexes point into this. + // Lang types obtained by guessing on the corresponding Path. I-th element is + // a type for the I-th path. + std::vector Types; // Indexes of candidates by certain substrings. // String is lowercase and sorted, index points into OriginalPaths. std::vector Paths; // Full path. @@ -406,11 +408,12 @@ }; // The actual CompilationDatabase wrapper delegates to its inner database. -// If no match, looks up a command in CommandIndex and transfers it to the file. +// If no match, looks up a proxy file in FileIndex and transfers its +// command to the requested file. class InterpolatingCompilationDatabase : public CompilationDatabase { public: InterpolatingCompilationDatabase(std::unique_ptr Inner) - : Inner(std::move(Inner)), Index(allCommands()) {} + : Inner(std::move(Inner)), Index(this->Inner->getAllFiles()) {} std::vector getCompileCommands(StringRef Filename) const override { @@ -421,7 +424,11 @@ auto Lang = guessType(Filename, &TypeCertain); if (!TypeCertain) Lang = types::TY_INVALID; - return {Index.chooseProxy(Filename, foldType(Lang)).transferTo(Filename)}; + auto ProxyCommands = + Inner->getCompileCommands(Index.chooseProxy(Filename, foldType(Lang))); + if (ProxyCommands.empty()) + return {}; + return {TransferableCommand(ProxyCommands[0]).transferTo(Filename)}; } std::vector getAllFiles() const override { @@ -433,18 +440,8 @@ } private: - std::vector allCommands() { - std::vector Result; - for (auto Command : Inner->getAllCompileCommands()) { - Result.emplace_back(std::move(Command)); - if (Result.back().Type == types::TY_INVALID) - Result.pop_back(); - } - return Result; - } - std::unique_ptr Inner; - CommandIndex Index; + FileIndex Index; }; } // namespace