Index: clangd/CMakeLists.txt =================================================================== --- clangd/CMakeLists.txt +++ clangd/CMakeLists.txt @@ -21,6 +21,7 @@ ProtocolHandlers.cpp SourceCode.cpp Trace.cpp + URI.cpp XRefs.cpp index/FileIndex.cpp index/Index.cpp Index: clangd/URI.h =================================================================== --- /dev/null +++ clangd/URI.h @@ -0,0 +1,102 @@ +//===--- URI.h - File URIs with schemes --------------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_PATHURI_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_PATHURI_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/Registry.h" + +namespace clang { +namespace clangd { + +/// A URI describes the location of a source file. +/// In the simplest case, this is a "file" URI that directly encodes the +/// absolute path to a file. More abstract cases are possible: a shared index +/// service might expose repo:// URIs that are relative to the source control +/// root. +class FileURI { +public: + /// \brief Returns decoded scheme. + llvm::StringRef scheme() const { return Scheme; } + /// \brief Returns decoded authority. + llvm::StringRef authority() const { return Authority; } + /// \brief Returns decoded body. + llvm::StringRef body() const { return Body; } + + /// \brief Creates a URI string for a file in the given scheme. \p Scheme must + /// must be registered. + static llvm::Expected create(llvm::StringRef AbsolutePath, + llvm::StringRef Scheme = "file"); + + /// \brief Parse a URI string ":///" where authority + /// is optional when "//" is not present. + static llvm::Expected parse(llvm::StringRef Uri); + + /// \brief Resolves the absolute path of \p U with the first matching scheme + /// registered. + static llvm::Expected resolve(const FileURI &U, + llvm::StringRef CurrentFile = ""); + + friend bool operator==(const FileURI &LHS, const FileURI &RHS) { + return std::tie(LHS.Scheme, LHS.Authority, LHS.Body) == + std::tie(RHS.Scheme, RHS.Authority, RHS.Body); + } + +private: + FileURI() = default; + + std::string Scheme; + std::string Authority; + std::string Body; +}; + +/// \brief URIScheme is an extension point for teaching clangd to recognize a +/// custom URI scheme. This is expected to be implemented and exposed via the +/// URISchemeRegistry. Users are not expected to use URIScheme directly. +/// +/// Different codebases/projects can have different file schemes, and clangd +/// interprets a file path according to the scheme. For example, a file path +/// provided by a remote symbol index can follow a certain scheme (e.g. relative +/// to a project root directory), and clangd needs to combine the scheme path +/// with execution environment (e.g. working/build directory) in order to get a +/// file path in the file system. +class URIScheme { +public: + virtual ~URIScheme() = default; + + /// \brief Returns the absolute path of the file corresponding to the URI body + /// in the file system. \p CurrentFile is the file from which the request is + /// issued. This is needed because the same URI in different workspace may + /// correspond to different files. + virtual llvm::Expected + getAbsolutePath(llvm::StringRef Body, llvm::StringRef CurrentFile) const = 0; + + virtual llvm::Expected + uriFromAbsolutePath(llvm::StringRef AbsolutePath) const = 0; +}; + +/// \brief Encodes a string according to percent-encoding. +/// - Unrerved characters are not escaped. +/// - Reserved characters always escaped with exceptions like '/'. +/// - All other characters are escaped. +std::string percentEncode(llvm::StringRef Content); + +/// \brief Decodes a string according to percent-encoding. +llvm::Expected percentDecode(llvm::StringRef Content); + +/// By default, a "file" scheme is supported where URI paths are always absolute +/// in the file system. +typedef llvm::Registry URISchemeRegistry; + +} // namespace clangd +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_PATHURI_H Index: clangd/URI.cpp =================================================================== --- /dev/null +++ clangd/URI.cpp @@ -0,0 +1,191 @@ +//===---- URI.h - File URIs with schemes -------------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "URI.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/Path.h" +#include +#include + +LLVM_INSTANTIATE_REGISTRY(clang::clangd::URISchemeRegistry) + +namespace clang { +namespace clangd { +namespace { + +inline llvm::Error make_string_error(const llvm::Twine &Message) { + return llvm::make_error(Message, + llvm::inconvertibleErrorCode()); +} + +/// \brief This manages file paths in the file system. All paths in the scheme +/// are absolute (with leading '/'). +class FileSystemScheme : public URIScheme { +public: + static const char *Scheme; + + llvm::Expected + getAbsolutePath(llvm::StringRef Body, + llvm::StringRef /*CurrentFile*/) const override { + if (!Body.startswith("/")) + return make_string_error( + "File scheme: expect body to be an absolute path starting with '/'"); + // For Windows paths e.g. /X: + if (Body.size() > 2 && Body[0] == '/' && Body[2] == ':') + Body.consume_front("/"); + return llvm::sys::path::convert_to_slash(Body); + } + + llvm::Expected + uriFromAbsolutePath(llvm::StringRef AbsolutePath) const override { + using namespace llvm::sys; + + if (!AbsolutePath.startswith("/")) + return make_string_error( + "File scheme: An AbsolutePath must start with '/'."); + std::string Body; + // For Windows paths e.g. X: + if (AbsolutePath.size() > 1 && AbsolutePath[1] == ':') + Body = "/"; + Body += path::convert_to_slash(AbsolutePath, path::Style::posix); + return (llvm::Twine(Scheme) + ":" + percentEncode(Body)).str(); + } +}; + +const char *FileSystemScheme::Scheme = "file"; + +static URISchemeRegistry::Add + X(FileSystemScheme::Scheme, + "URI scheme for absolute paths in the file system."); + +llvm::Expected> +findSchemeByName(llvm::StringRef Scheme) { + for (auto I = URISchemeRegistry::begin(), E = URISchemeRegistry::end(); + I != E; ++I) { + if (I->getName() != Scheme) + continue; + return I->instantiate(); + } + return make_string_error("Can't find scheme: " + Scheme); +} + +constexpr static char Unreserved[] = { + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', + 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', + 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', + 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '1', '2', '3', '4', + '5', '6', '7', '8', '9', '0', '-', '_', '.', '~', +}; + +inline std::string percentize(char C) { + std::ostringstream OS; + OS << "%" << std::setfill('0') << std::setw(2) << std::hex + << static_cast(C); + return OS.str(); +} + +std::vector createEncodeMap() { + std::vector Result(128, ""); + for (char C : Unreserved) + Result[C] = C; + for (int i = 0; i < 128; i++) + if (Result[i].empty()) + Result[i] = percentize(static_cast(i)); + // Avoid escaping '/' to save URI readability. + Result['/'] = '/'; + return Result; +} + +} // namespace + +std::string percentEncode(llvm::StringRef Content) { + static const std::vector EncodeMap = createEncodeMap(); + std::string Result; + for (char C : Content) + Result += EncodeMap[C]; + return Result; +} + +llvm::Expected percentDecode(llvm::StringRef Content) { + std::string Result; + for (auto I = Content.begin(), E = Content.end(); I != E; ++I) { + if (*I != '%') { + Result += *I; + continue; + } + if (I + 1 == E || I + 2 == E) + return make_string_error("Expect two characters after '%' sign: Content"); + char Buf[3]; + Buf[0] = *(++I); + Buf[1] = *(++I); + Buf[2] = '\0'; + Result += static_cast(strtoul(Buf, nullptr, 16)); + } + return Result; +} + +llvm::Expected FileURI::parse(llvm::StringRef Uri) { + FileURI U; + llvm::StringRef OrigUri = Uri; + + auto Pos = Uri.find(':'); + if (Pos == llvm::StringRef::npos) + return make_string_error("Expect ':' in a URI: " + OrigUri); + auto Decoded = percentDecode(Uri.substr(0, Pos)); + if (!Decoded) + return Decoded.takeError(); + U.Scheme = *Decoded; + + Uri = Uri.substr(Pos + 1); + if (Uri.consume_front("//")) { + Pos = Uri.find('/'); + if (Pos == llvm::StringRef::npos) + return make_string_error("Expect '/' after a URI authority: " + OrigUri); + + Decoded = percentDecode(Uri.substr(0, Pos)); + if (!Decoded) + return Decoded.takeError(); + if (Decoded->empty()) + return make_string_error( + "'//' is present after scheme while authority is not provided: " + + OrigUri); + U.Authority = *Decoded; + Uri = Uri.substr(Pos + 1); + } + Decoded = percentDecode(Uri); + if (!Decoded) + return Decoded.takeError(); + U.Body = *Decoded; + + if (U.Scheme.empty() || U.Body.empty()) + return make_string_error("Scheme and body must be provided in URI: " + + OrigUri); + + return U; +} + +llvm::Expected FileURI::create(llvm::StringRef AbsolutePath, + llvm::StringRef Scheme) { + auto S = findSchemeByName(Scheme); + if (!S) + return S.takeError(); + return S->get()->uriFromAbsolutePath(AbsolutePath); +} + +llvm::Expected FileURI::resolve(const FileURI &Uri, + llvm::StringRef CurrentFile) { + auto S = findSchemeByName(Uri.Scheme); + if (!S) + return S.takeError(); + return S->get()->getAbsolutePath(Uri.Body, CurrentFile); +} + +} // namespace clangd +} // namespace clang Index: unittests/clangd/CMakeLists.txt =================================================================== --- unittests/clangd/CMakeLists.txt +++ unittests/clangd/CMakeLists.txt @@ -18,6 +18,7 @@ FuzzyMatchTests.cpp IndexTests.cpp JSONExprTests.cpp + URITests.cpp TestFS.cpp TraceTests.cpp SourceCodeTests.cpp Index: unittests/clangd/URITests.cpp =================================================================== --- /dev/null +++ unittests/clangd/URITests.cpp @@ -0,0 +1,181 @@ +//===-- URITests.cpp ---------------------------------*- C++ -*-----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "URI.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace clang { +namespace clangd { +namespace { + +using ::testing::AllOf; + +MATCHER_P(Scheme, S, "") { return arg.scheme() == S; } +MATCHER_P(Authority, A, "") { return arg.authority() == A; } +MATCHER_P(Body, B, "") { return arg.body() == B; } + +std::string decodeOrDie(llvm::StringRef S) { + auto D = percentDecode(S); + if (!D) + llvm_unreachable(llvm::toString(D.takeError()).c_str()); + return *D; +} + +TEST(PercentEncodingTest, Encode) { + EXPECT_EQ(percentEncode("a/b/c"), "a/b/c"); + EXPECT_EQ(percentEncode("a!b;c~"), "a%21b%3bc~"); +} + +TEST(PercentEncodingTest, Decode) { + EXPECT_EQ(decodeOrDie("a/b/c"), "a/b/c"); + EXPECT_EQ(decodeOrDie("a%21b%3ac~"), "a!b:c~"); +} + +// Assume all files in the schema have a "test-root/" root directory, and the +// schema path is the relative path to the root directory. +// So the schema of "/some-dir/test-root/x/y/z" is "test:x/y/z". +class TestScheme : public URIScheme { +public: + static const char *Scheme; + + static const char *TestRoot; + + llvm::Expected + getAbsolutePath(llvm::StringRef Body, + llvm::StringRef CurrentFile) const override { + auto Pos = CurrentFile.find(TestRoot); + assert(Pos != llvm::StringRef::npos); + return (CurrentFile.substr(0, Pos + llvm::StringRef(TestRoot).size()) + + Body) + .str(); + } + + llvm::Expected + uriFromAbsolutePath(llvm::StringRef AbsolutePath) const override { + auto Pos = AbsolutePath.find(TestRoot); + assert(Pos != llvm::StringRef::npos); + return (llvm::Twine(Scheme) + ":" + + AbsolutePath.substr(Pos + Pos + llvm::StringRef(TestRoot).size())) + .str(); + } +}; + +const char *TestScheme::Scheme = "test"; +const char *TestScheme::TestRoot = "/test-root/"; + +static URISchemeRegistry::Add X(TestScheme::Scheme, "Test schema"); + +std::string createOrDie(llvm::StringRef AbsolutePath, + llvm::StringRef Scheme = "file") { + auto Uri = FileURI::create(AbsolutePath, Scheme); + if (!Uri) + llvm_unreachable(llvm::toString(Uri.takeError()).c_str()); + return *Uri; +} + +FileURI parseOrDie(llvm::StringRef Uri) { + auto U = FileURI::parse(Uri); + if (!U) + llvm_unreachable(llvm::toString(U.takeError()).c_str()); + llvm::errs() << "URI: " << U->scheme() << ", " << U->authority() << ", " + << U->body() << "\n"; + return *U; +} + +std::string resolveOrDie(const FileURI &U, llvm::StringRef CurrentFile = "") { + auto Path = FileURI::resolve(U, CurrentFile); + if (!Path) + llvm_unreachable(llvm::toString(Path.takeError()).c_str()); + return *Path; +} + +TEST(URITest, Create) { + EXPECT_THAT(createOrDie("/x/y/z"), "file:/x/y/z"); + EXPECT_THAT(createOrDie("/(x)/y/\\ z"), "file:/%28x%29/y/%5c%20z"); +} + +TEST(URITest, FailedCreate) { + auto Uri = FileURI::create("/x/y/z", "no"); + EXPECT_FALSE(static_cast(Uri)); + llvm::consumeError(Uri.takeError()); + + // Path has to be absolute. + Uri = FileURI::create("x/y/z"); + EXPECT_FALSE(static_cast(Uri)); + llvm::consumeError(Uri.takeError()); +} + +TEST(URITest, Parse) { + EXPECT_THAT(parseOrDie("file://auth//x/y/z"), + AllOf(Scheme("file"), Authority("auth"), Body("/x/y/z"))); + + EXPECT_THAT(parseOrDie("file://au%3dth//%28x%29/y/%5c%20z"), + AllOf(Scheme("file"), Authority("au=th"), Body("/(x)/y/\\ z"))); + + EXPECT_THAT(parseOrDie("file:/%28x%29/y/%5c%20z"), + AllOf(Scheme("file"), Authority(""), Body("/(x)/y/\\ z"))); +} + +TEST(URITest, ParseFailed) { + auto FailedParse = [](llvm::StringRef U) { + auto URI = FileURI::parse("file//x/y/z"); + if (!URI) { + llvm::consumeError(URI.takeError()); + return true; + } + return false; + }; + + // Expect ':' in URI. + EXPECT_TRUE(FailedParse("file//x/y/z")); + // Expect two bytes after %. + EXPECT_TRUE(FailedParse("file://x/y/z%2")); + // Empty. + EXPECT_TRUE(FailedParse("")); + EXPECT_TRUE(FailedParse(":/a/b/c")); + EXPECT_TRUE(FailedParse("s:")); + // Incomplete. + EXPECT_TRUE(FailedParse("x:")); + EXPECT_TRUE(FailedParse("x://a")); + // Empty authority. + EXPECT_TRUE(FailedParse("file:////x/y/z")); +} + +TEST(URITest, Resolve) { + EXPECT_EQ(resolveOrDie(parseOrDie("file:/a/b/c")), "/a/b/c"); + EXPECT_EQ(resolveOrDie(parseOrDie("file://auth//a/b/c")), "/a/b/c"); + EXPECT_EQ(resolveOrDie(parseOrDie("test:a/b/c"), "/dir/test-root/x/y/z"), + "/dir/test-root/a/b/c"); + + EXPECT_THAT(resolveOrDie(parseOrDie("file://au%3dth//%28x%29/y/%5c%20z")), + "/(x)/y/\\ z"); +} + +TEST(URITest, ResolveFailed) { + auto FailedResolve = [](llvm::StringRef Uri) { + auto Path = FileURI::resolve(parseOrDie(Uri)); + if (!Path) { + llvm::consumeError(Path.takeError()); + return true; + } + return false; + }; + + // Invalid scheme. + EXPECT_TRUE(FailedResolve("no:/a/b/c")); + // File path needs to be absolute. + EXPECT_TRUE(FailedResolve("file:a/b/c")); + // File path needs to be absolute. + EXPECT_TRUE(FailedResolve("file://c/x/y/z")); +} + +} // namespace +} // namespace clangd +} // namespace clang