diff --git a/clang-tools-extra/clangd/index/dex/Dex.h b/clang-tools-extra/clangd/index/dex/Dex.h --- a/clang-tools-extra/clangd/index/dex/Dex.h +++ b/clang-tools-extra/clangd/index/dex/Dex.h @@ -132,7 +132,7 @@ /// Should be used within the index build process. /// /// This function is exposed for testing only. -std::vector generateProximityURIs(llvm::StringRef URIPath); +llvm::SmallVector generateProximityURIs(const char *); } // namespace dex } // namespace clangd diff --git a/clang-tools-extra/clangd/index/dex/Dex.cpp b/clang-tools-extra/clangd/index/dex/Dex.cpp --- a/clang-tools-extra/clangd/index/dex/Dex.cpp +++ b/clang-tools-extra/clangd/index/dex/Dex.cpp @@ -163,8 +163,8 @@ llvm::StringMap Sources; for (const auto &Path : ProximityPaths) { Sources[Path] = SourceParams(); - auto PathURI = URI::create(Path); - const auto PathProximityURIs = generateProximityURIs(PathURI.toString()); + auto PathURI = URI::create(Path).toString(); + const auto PathProximityURIs = generateProximityURIs(PathURI.c_str()); for (const auto &ProximityURI : PathProximityURIs) ParentURIs.insert(ProximityURI); } @@ -353,30 +353,59 @@ return Bytes + BackingDataSize; } -std::vector generateProximityURIs(llvm::StringRef URIPath) { - std::vector Result; - auto ParsedURI = URI::parse(URIPath); - assert(ParsedURI && - "Non-empty argument of generateProximityURIs() should be a valid " - "URI."); - llvm::StringRef Body = ParsedURI->body(); - // FIXME(kbobyrev): Currently, this is a heuristic which defines the maximum - // size of resulting vector. Some projects might want to have higher limit if - // the file hierarchy is deeper. For the generic case, it would be useful to - // calculate Limit in the index build stage by calculating the maximum depth - // of the project source tree at runtime. - size_t Limit = 5; - // Insert original URI before the loop: this would save a redundant iteration - // with a URI parse. - Result.emplace_back(ParsedURI->toString()); - while (!Body.empty() && --Limit > 0) { - // FIXME(kbobyrev): Parsing and encoding path to URIs is not necessary and - // could be optimized. - Body = llvm::sys::path::parent_path(Body, llvm::sys::path::Style::posix); - if (!Body.empty()) - Result.emplace_back( - URI(ParsedURI->scheme(), ParsedURI->authority(), Body).toString()); +// Given foo://bar/one/two +// Returns ^ +const char *findPathInURI(const char *S) { + // Skip over scheme. + for (;;) { + if (!*S) + return S; + if (*S++ == ':') + break; } + // Skip over authority. + if (*S == '/' && *(S+1) == '/') { + S += 2; + while (*S && *S != '/') + ++S; + } + return S; +} + +// FIXME(kbobyrev): Currently, this is a heuristic which defines the maximum +// size of resulting vector. Some projects might want to have higher limit if +// the file hierarchy is deeper. For the generic case, it would be useful to +// calculate Limit in the index build stage by calculating the maximum depth +// of the project source tree at runtime. +constexpr unsigned ProximityURILimit = 5; + +llvm::SmallVector +generateProximityURIs(const char *URI) { + // This function is hot when indexing, so don't parse/reserialize URIPath, + // just emit substrings of it instead. + // + // foo://bar/one/two + // ^URI ^Path ^End + const char *Path = findPathInURI(URI); + const char *End = Path; + while (*End) + ++End; + // The original URI is a proximity path. + llvm::SmallVector Result = { + StringRef(URI, End - URI)}; + unsigned Limit = ProximityURILimit - 1; + while (--End != Path) { // foo://bar is not a proximity path. + if (*End == '/') { + // foo://bar/one/two + // ^End + Result.push_back(StringRef(URI, End - URI)); + if (--Limit == 0) + return Result; + } + } + // The root foo://bar/ is a proximity path. + if (*Path == '/') + Result.push_back(StringRef(URI, Path + 1 - URI)); return Result; }