Index: clangd/FileDistance.h =================================================================== --- clangd/FileDistance.h +++ clangd/FileDistance.h @@ -61,8 +61,11 @@ struct SourceParams { // Base cost for paths starting at this source. unsigned Cost = 0; - // Limits the number of upwards traversals allowed from this source. - unsigned MaxUpTraversals = std::numeric_limits::max(); + // Limits the number of upwards/downwards traversals allowed from this source. + static constexpr unsigned MaxTraversals = + std::numeric_limits::max(); + unsigned MaxUpTraversals = MaxTraversals; + unsigned MaxDownTraversals = MaxTraversals; }; // Supports lookups to find the minimum distance to a file from any source. @@ -80,7 +83,11 @@ private: // Costs computed so far. Always contains sources and their ancestors. // We store hash codes only. Collisions are rare and consequences aren't dire. - llvm::DenseMap Cache; + struct CacheNode { + unsigned Cost = Unreachable; + unsigned MaxDownTraversals = std::numeric_limits::max(); + }; + llvm::DenseMap Cache; FileDistanceOptions Opts; }; Index: clangd/FileDistance.cpp =================================================================== --- clangd/FileDistance.cpp +++ clangd/FileDistance.cpp @@ -63,8 +63,8 @@ // Keep track of down edges, in case we can use them to improve on this. for (const auto &S : Sources) { auto Canonical = canonicalize(S.getKey()); - dlog("Source {0} = {1}, MaxUp = {2}", Canonical, S.second.Cost, - S.second.MaxUpTraversals); + dlog("Source {0} = {1}, MaxUp = {2}, MaxDown = {3}", Canonical, + S.second.Cost, S.second.MaxUpTraversals, S.second.MaxDownTraversals); // Walk up to ancestors of this source, assigning cost. StringRef Rest = Canonical; llvm::hash_code Hash = hash_value(Rest); @@ -79,11 +79,16 @@ if (Cache.find(Hash) != Cache.end()) break; } else { - unsigned Cost = S.getValue().Cost + I * Opts.UpCost; - auto R = Cache.try_emplace(Hash, Cost); + CacheNode Node; + Node.Cost = S.getValue().Cost + I * Opts.UpCost; + if (SourceParams::MaxTraversals - S.getValue().MaxDownTraversals >= I) + Node.MaxDownTraversals = S.getValue().MaxDownTraversals + I; + auto R = Cache.try_emplace(Hash, Node); if (!R.second) { - if (Cost < R.first->second) { - R.first->second = Cost; + // Note that we might lose coverage of some paths that are otherwise + // reachable with a higher cost but large down traversal limit. + if (Node.Cost < R.first->second.Cost) { + R.first->second = std::move(Node); } else { // If we're not the best way to get to this path, stop assigning. break; @@ -99,13 +104,17 @@ for (auto Child : DownEdges.lookup(hash_value(llvm::StringRef("")))) Next.push(Child); while (!Next.empty()) { - auto ParentCost = Cache.lookup(Next.front()); - for (auto Child : DownEdges.lookup(Next.front())) { - auto &ChildCost = - Cache.try_emplace(Child, Unreachable).first->getSecond(); - if (ParentCost + Opts.DownCost < ChildCost) - ChildCost = ParentCost + Opts.DownCost; - Next.push(Child); + const auto &Parent = Cache[Next.front()]; + if (Parent.MaxDownTraversals > 0) { + for (auto ChildHash : DownEdges.lookup(Next.front())) { + auto &Child = + Cache.try_emplace(ChildHash, CacheNode()).first->getSecond(); + if (Parent.Cost + Opts.DownCost < Child.Cost) { + Child.Cost = Parent.Cost + Opts.DownCost; + Child.MaxDownTraversals = Parent.MaxDownTraversals - 1; + } + Next.push(ChildHash); + } } Next.pop(); } @@ -113,7 +122,7 @@ unsigned FileDistance::distance(StringRef Path) { auto Canonical = canonicalize(Path); - unsigned Cost = Unreachable; + CacheNode KnownNode; SmallVector Ancestors; // Walk up ancestors until we find a path we know the distance for. for (StringRef Rest = Canonical; !Rest.empty(); @@ -121,20 +130,26 @@ auto Hash = hash_value(Rest); auto It = Cache.find(Hash); if (It != Cache.end()) { - Cost = It->second; + KnownNode = It->second; break; } Ancestors.push_back(Hash); } // Now we know the costs for (known node, queried node]. // Fill these in, walking down the directory tree. - for (hash_code Hash : reverse(Ancestors)) { - if (Cost != Unreachable) - Cost += Opts.DownCost; - Cache.try_emplace(Hash, Cost); + for (auto B = Ancestors.rbegin(), I = B, E = Ancestors.rend(); I != E; ++I) { + CacheNode Node; + unsigned DistanceToKnown = I - B + 1; + if (DistanceToKnown <= KnownNode.MaxDownTraversals) { + if (KnownNode.Cost != Unreachable) + Node.Cost = KnownNode.Cost + DistanceToKnown * Opts.DownCost; + if (KnownNode.MaxDownTraversals < SourceParams::MaxTraversals) + Node.MaxDownTraversals = KnownNode.MaxDownTraversals - (I - B) - 1; + } + Cache.try_emplace(*I, Node); } - dlog("distance({0} = {1})", Path, Cost); - return Cost; + dlog("distance({0} = {1})", Path, Cache[hash_value(Canonical)].Cost); + return Cache[hash_value(Canonical)].Cost; } unsigned URIDistance::distance(llvm::StringRef URI) { Index: unittests/clangd/FileDistanceTests.cpp =================================================================== --- unittests/clangd/FileDistanceTests.cpp +++ unittests/clangd/FileDistanceTests.cpp @@ -95,6 +95,37 @@ EXPECT_EQ(D.distance("/a/b/z"), 2u); } +TEST(FileDistance, LimitDownTraversals) { + FileDistanceOptions Opts; + Opts.UpCost = 1; + Opts.DownCost = 1; + SourceParams CheapButLimited, CostLots; + CheapButLimited.MaxDownTraversals = 1; + CostLots.Cost = 100; + + FileDistance D({{"/", CheapButLimited}, {"/a/b", CostLots}}, Opts); + EXPECT_EQ(D.distance("/a"), 1u); + EXPECT_EQ(D.distance("/a/b"), 100u); + EXPECT_EQ(D.distance("/a/b/c"), 101u); +} + +TEST(FileDistance, LimitDownTraversalsCorner) { + FileDistanceOptions Opts; + Opts.UpCost = 1; + Opts.DownCost = 1; + SourceParams CheapButLimited, CostLots; + CheapButLimited.MaxDownTraversals = 1; + CostLots.Cost = 100; + + FileDistance D({{"/", CheapButLimited}, {"/a", CostLots}}, Opts); + EXPECT_EQ(D.distance("/x"), 1u); + EXPECT_EQ(D.distance("/a"), 1u); + // These are actually reacahble from "/a" with high cost. But we don't support + // such case. + EXPECT_EQ(D.distance("/a/b"), FileDistance::Unreachable); + EXPECT_EQ(D.distance("/a/b/c"), FileDistance::Unreachable); +} + } // namespace } // namespace clangd } // namespace clang