Index: clang-tools-extra/trunk/clangd/CMakeLists.txt =================================================================== --- clang-tools-extra/trunk/clangd/CMakeLists.txt +++ clang-tools-extra/trunk/clangd/CMakeLists.txt @@ -21,6 +21,7 @@ ProtocolHandlers.cpp SourceCode.cpp Trace.cpp + URI.cpp XRefs.cpp index/FileIndex.cpp index/Index.cpp Index: clang-tools-extra/trunk/clangd/URI.h =================================================================== --- clang-tools-extra/trunk/clangd/URI.h +++ clang-tools-extra/trunk/clangd/URI.h @@ -0,0 +1,101 @@ +//===--- URI.h - File URIs with schemes --------------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_PATHURI_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_PATHURI_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/Registry.h" + +namespace clang { +namespace clangd { + +/// A URI describes the location of a source file. +/// In the simplest case, this is a "file" URI that directly encodes the +/// absolute path to a file. More abstract cases are possible: a shared index +/// service might expose repo:// URIs that are relative to the source control +/// root. +/// +/// Clangd handles URIs of the form :[//]. It doesn't +/// further split the authority or body into constituent parts (e.g. query +/// strings is included in the body). +class FileURI { +public: + /// Returns decoded scheme e.g. "https" + llvm::StringRef scheme() const { return Scheme; } + /// Returns decoded authority e.g. "reviews.lvm.org" + llvm::StringRef authority() const { return Authority; } + /// Returns decoded body e.g. "/D41946" + llvm::StringRef body() const { return Body; } + + /// Returns a string URI with all components percent-encoded. + std::string toString() const; + + /// Create a FileURI from unescaped scheme+authority+body. + static llvm::Expected create(llvm::StringRef Scheme, + llvm::StringRef Authority, + llvm::StringRef Body); + + /// Creates a FileURI for a file in the given scheme. \p Scheme must be + /// registered. The URI is percent-encoded. + static llvm::Expected create(llvm::StringRef AbsolutePath, + llvm::StringRef Scheme = "file"); + + /// Parse a URI string ":[///]". Percent-encoded + /// characters in the URI will be decoded. + static llvm::Expected parse(llvm::StringRef Uri); + + /// Resolves the absolute path of \p U. If there is no matching scheme, or the + /// URI is invalid in the scheme, this returns an error. + /// + /// \p HintPath A related path, such as the current file or working directory, + /// which can help disambiguate when the same file exists in many workspaces. + static llvm::Expected resolve(const FileURI &U, + llvm::StringRef HintPath = ""); + + friend bool operator==(const FileURI &LHS, const FileURI &RHS) { + return std::tie(LHS.Scheme, LHS.Authority, LHS.Body) == + std::tie(RHS.Scheme, RHS.Authority, RHS.Body); + } + +private: + FileURI() = default; + + std::string Scheme; + std::string Authority; + std::string Body; +}; + +/// URIScheme is an extension point for teaching clangd to recognize a custom +/// URI scheme. This is expected to be implemented and exposed via the +/// URISchemeRegistry. +class URIScheme { +public: + virtual ~URIScheme() = default; + + /// Returns the absolute path of the file corresponding to the URI + /// authority+body in the file system. See FileURI::resolve for semantics of + /// \p HintPath. + virtual llvm::Expected + getAbsolutePath(llvm::StringRef Authority, llvm::StringRef Body, + llvm::StringRef HintPath) const = 0; + + virtual llvm::Expected + uriFromAbsolutePath(llvm::StringRef AbsolutePath) const = 0; +}; + +/// By default, a "file" scheme is supported where URI paths are always absolute +/// in the file system. +typedef llvm::Registry URISchemeRegistry; + +} // namespace clangd +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_PATHURI_H Index: clang-tools-extra/trunk/clangd/URI.cpp =================================================================== --- clang-tools-extra/trunk/clangd/URI.cpp +++ clang-tools-extra/trunk/clangd/URI.cpp @@ -0,0 +1,199 @@ +//===---- URI.h - File URIs with schemes -------------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "URI.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/Path.h" +#include +#include + +LLVM_INSTANTIATE_REGISTRY(clang::clangd::URISchemeRegistry) + +namespace clang { +namespace clangd { +namespace { + +inline llvm::Error make_string_error(const llvm::Twine &Message) { + return llvm::make_error(Message, + llvm::inconvertibleErrorCode()); +} + +/// \brief This manages file paths in the file system. All paths in the scheme +/// are absolute (with leading '/'). +class FileSystemScheme : public URIScheme { +public: + static const char *Scheme; + + llvm::Expected + getAbsolutePath(llvm::StringRef /*Authority*/, llvm::StringRef Body, + llvm::StringRef /*HintPath*/) const override { + if (!Body.startswith("/")) + return make_string_error("File scheme: expect body to be an absolute " + "path starting with '/': " + + Body); + // For Windows paths e.g. /X: + if (Body.size() > 2 && Body[0] == '/' && Body[2] == ':') + Body.consume_front("/"); + llvm::SmallVector Path(Body.begin(), Body.end()); + llvm::sys::path::native(Path); + return std::string(Path.begin(), Path.end()); + } + + llvm::Expected + uriFromAbsolutePath(llvm::StringRef AbsolutePath) const override { + using namespace llvm::sys; + + std::string Body; + // For Windows paths e.g. X: + if (AbsolutePath.size() > 1 && AbsolutePath[1] == ':') + Body = "/"; + Body += path::convert_to_slash(AbsolutePath); + return FileURI::create(Scheme, /*Authority=*/"", Body); + } +}; + +const char *FileSystemScheme::Scheme = "file"; + +static URISchemeRegistry::Add + X(FileSystemScheme::Scheme, + "URI scheme for absolute paths in the file system."); + +llvm::Expected> +findSchemeByName(llvm::StringRef Scheme) { + for (auto I = URISchemeRegistry::begin(), E = URISchemeRegistry::end(); + I != E; ++I) { + if (I->getName() != Scheme) + continue; + return I->instantiate(); + } + return make_string_error("Can't find scheme: " + Scheme); +} + +bool shouldEscape(unsigned char C) { + // Unreserved characters. + if ((C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z')) + return false; + switch (C) { + case '-': + case '_': + case '.': + case '~': + case '/': // '/' is only reserved when parsing. + return false; + } + return true; +} + +/// Encodes a string according to percent-encoding. +/// - Unreserved characters are not escaped. +/// - Reserved characters always escaped with exceptions like '/'. +/// - All other characters are escaped. +std::string percentEncode(llvm::StringRef Content) { + std::string Result; + llvm::raw_string_ostream OS(Result); + for (unsigned char C : Content) + if (shouldEscape(C)) + OS << '%' << llvm::format_hex_no_prefix(C, 2); + else + OS << C; + + OS.flush(); + return Result; +} + +/// Decodes a string according to percent-encoding. +std::string percentDecode(llvm::StringRef Content) { + std::string Result; + for (auto I = Content.begin(), E = Content.end(); I != E; ++I) { + if (*I != '%') { + Result += *I; + continue; + } + if (*I == '%' && I + 2 < Content.end() && llvm::isHexDigit(*(I + 1)) && + llvm::isHexDigit(*(I + 2))) { + Result.push_back(llvm::hexFromNibbles(*(I + 1), *(I + 2))); + I += 2; + } else + Result.push_back(*I); + } + return Result; +} + +} // namespace + +llvm::Expected FileURI::create(llvm::StringRef Scheme, + llvm::StringRef Authority, + llvm::StringRef Body) { + if (Scheme.empty()) + return make_string_error("Scheme must be specified in a URI."); + if (!Authority.empty() && !Body.startswith("/")) + return make_string_error( + "URI body must start with '/' when authority is present."); + FileURI U; + U.Scheme = Scheme; + U.Authority = Authority; + U.Body = Body; + return U; +} + +std::string FileURI::toString() const { + std::string Result; + llvm::raw_string_ostream OS(Result); + OS << percentEncode(Scheme) << ":"; + if (Authority.empty() && Body.empty()) + return OS.str(); + // If authority if empty, we only print body if it starts with "/"; otherwise, + // the URI is invalid. + if (!Authority.empty() || llvm::StringRef(Body).startswith("/")) + OS << "//" << percentEncode(Authority); + OS << percentEncode(Body); + OS.flush(); + return Result; +} + +llvm::Expected FileURI::parse(llvm::StringRef OrigUri) { + FileURI U; + llvm::StringRef Uri = OrigUri; + + auto Pos = Uri.find(':'); + if (Pos == 0 || Pos == llvm::StringRef::npos) + return make_string_error("Scheme must be provided in URI: " + OrigUri); + U.Scheme = percentDecode(Uri.substr(0, Pos)); + Uri = Uri.substr(Pos + 1); + if (Uri.consume_front("//")) { + Pos = Uri.find('/'); + U.Authority = percentDecode(Uri.substr(0, Pos)); + Uri = Uri.substr(Pos); + } + U.Body = percentDecode(Uri); + return U; +} + +llvm::Expected FileURI::create(llvm::StringRef AbsolutePath, + llvm::StringRef Scheme) { + if (!llvm::sys::path::is_absolute(AbsolutePath)) + return make_string_error("Not a valid absolute path: " + AbsolutePath); + auto S = findSchemeByName(Scheme); + if (!S) + return S.takeError(); + return S->get()->uriFromAbsolutePath(AbsolutePath); +} + +llvm::Expected FileURI::resolve(const FileURI &Uri, + llvm::StringRef HintPath) { + auto S = findSchemeByName(Uri.Scheme); + if (!S) + return S.takeError(); + return S->get()->getAbsolutePath(Uri.Authority, Uri.Body, HintPath); +} + +} // namespace clangd +} // namespace clang Index: clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt =================================================================== --- clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt +++ clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt @@ -18,6 +18,7 @@ FuzzyMatchTests.cpp IndexTests.cpp JSONExprTests.cpp + URITests.cpp TestFS.cpp TraceTests.cpp SourceCodeTests.cpp Index: clang-tools-extra/trunk/unittests/clangd/URITests.cpp =================================================================== --- clang-tools-extra/trunk/unittests/clangd/URITests.cpp +++ clang-tools-extra/trunk/unittests/clangd/URITests.cpp @@ -0,0 +1,220 @@ +//===-- URITests.cpp ---------------------------------*- C++ -*-----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "TestFS.h" +#include "URI.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace clang { +namespace clangd { +namespace { + +using ::testing::AllOf; + +MATCHER_P(Scheme, S, "") { return arg.scheme() == S; } +MATCHER_P(Authority, A, "") { return arg.authority() == A; } +MATCHER_P(Body, B, "") { return arg.body() == B; } + +// Assume all files in the schema have a "test-root/" root directory, and the +// schema path is the relative path to the root directory. +// So the schema of "/some-dir/test-root/x/y/z" is "test:x/y/z". +class TestScheme : public URIScheme { +public: + static const char *Scheme; + + static const char *TestRoot; + + llvm::Expected + getAbsolutePath(llvm::StringRef /*Authority*/, llvm::StringRef Body, + llvm::StringRef HintPath) const override { + auto Pos = HintPath.find(TestRoot); + assert(Pos != llvm::StringRef::npos); + return (HintPath.substr(0, Pos + llvm::StringRef(TestRoot).size()) + Body) + .str(); + } + + llvm::Expected + uriFromAbsolutePath(llvm::StringRef AbsolutePath) const override { + auto Pos = AbsolutePath.find(TestRoot); + assert(Pos != llvm::StringRef::npos); + return FileURI::create( + Scheme, /*Authority=*/"", + AbsolutePath.substr(Pos + llvm::StringRef(TestRoot).size())); + } +}; + +const char *TestScheme::Scheme = "test"; +const char *TestScheme::TestRoot = "/test-root/"; + +static URISchemeRegistry::Add X(TestScheme::Scheme, "Test schema"); + +std::string createOrDie(llvm::StringRef AbsolutePath, + llvm::StringRef Scheme = "file") { + auto Uri = FileURI::create(AbsolutePath, Scheme); + if (!Uri) + llvm_unreachable(llvm::toString(Uri.takeError()).c_str()); + return Uri->toString(); +} + +std::string createOrDie(llvm::StringRef Scheme, llvm::StringRef Authority, + llvm::StringRef Body) { + auto Uri = FileURI::create(Scheme, Authority, Body); + if (!Uri) + llvm_unreachable(llvm::toString(Uri.takeError()).c_str()); + return Uri->toString(); +} + +FileURI parseOrDie(llvm::StringRef Uri) { + auto U = FileURI::parse(Uri); + if (!U) + llvm_unreachable(llvm::toString(U.takeError()).c_str()); + return *U; +} + +TEST(PercentEncodingTest, Encode) { + EXPECT_EQ(createOrDie("x", /*Authority=*/"", "a/b/c"), "x:a/b/c"); + EXPECT_EQ(createOrDie("x", /*Authority=*/"", "a!b;c~"), "x:a%21b%3bc~"); +} + +TEST(PercentEncodingTest, Decode) { + EXPECT_EQ(parseOrDie("x:a/b/c").body(), "a/b/c"); + + EXPECT_EQ(parseOrDie("%3a://%3a/%3").scheme(), ":"); + EXPECT_EQ(parseOrDie("%3a://%3a/%3").authority(), ":"); + EXPECT_EQ(parseOrDie("%3a://%3a/%3").body(), "/%3"); + + EXPECT_EQ(parseOrDie("x:a%21b%3ac~").body(), "a!b:c~"); +} + +std::string resolveOrDie(const FileURI &U, llvm::StringRef HintPath = "") { + auto Path = FileURI::resolve(U, HintPath); + if (!Path) + llvm_unreachable(llvm::toString(Path.takeError()).c_str()); + return *Path; +} + +TEST(URITest, Create) { +#ifdef LLVM_ON_WIN32 + EXPECT_THAT(createOrDie("c:\\x\\y\\z"), "file:///c:/x/y/z"); +#else + EXPECT_THAT(createOrDie("/x/y/z"), "file:///x/y/z"); + EXPECT_THAT(createOrDie("/(x)/y/\\ z"), "file:///%28x%29/y/%5c%20z"); +#endif +} + +TEST(URITest, FailedCreate) { + auto Fail = [](llvm::Expected U) { + if (!U) { + llvm::consumeError(U.takeError()); + return true; + } + return false; + }; + // Create from scheme+authority+body: + // + // Scheme must be provided. + EXPECT_TRUE(Fail(FileURI::create("", "auth", "/a"))); + // Body must start with '/' if authority is present. + EXPECT_TRUE(Fail(FileURI::create("scheme", "auth", "x/y/z"))); + + // Create from scheme registry: + // + EXPECT_TRUE(Fail(FileURI::create("/x/y/z", "no"))); + // Path has to be absolute. + EXPECT_TRUE(Fail(FileURI::create("x/y/z"))); +} + +TEST(URITest, Parse) { + EXPECT_THAT(parseOrDie("file://auth/x/y/z"), + AllOf(Scheme("file"), Authority("auth"), Body("/x/y/z"))); + + EXPECT_THAT(parseOrDie("file://au%3dth/%28x%29/y/%5c%20z"), + AllOf(Scheme("file"), Authority("au=th"), Body("/(x)/y/\\ z"))); + + EXPECT_THAT(parseOrDie("file:///%28x%29/y/%5c%20z"), + AllOf(Scheme("file"), Authority(""), Body("/(x)/y/\\ z"))); + EXPECT_THAT(parseOrDie("file:///x/y/z"), + AllOf(Scheme("file"), Authority(""), Body("/x/y/z"))); + EXPECT_THAT(parseOrDie("file:"), + AllOf(Scheme("file"), Authority(""), Body(""))); + EXPECT_THAT(parseOrDie("file:///x/y/z%2"), + AllOf(Scheme("file"), Authority(""), Body("/x/y/z%2"))); + EXPECT_THAT(parseOrDie("http://llvm.org"), + AllOf(Scheme("http"), Authority("llvm.org"), Body(""))); + EXPECT_THAT(parseOrDie("http://llvm.org/"), + AllOf(Scheme("http"), Authority("llvm.org"), Body("/"))); + EXPECT_THAT(parseOrDie("http://llvm.org/D"), + AllOf(Scheme("http"), Authority("llvm.org"), Body("/D"))); + EXPECT_THAT(parseOrDie("http:/"), + AllOf(Scheme("http"), Authority(""), Body("/"))); + EXPECT_THAT(parseOrDie("urn:isbn:0451450523"), + AllOf(Scheme("urn"), Authority(""), Body("isbn:0451450523"))); + EXPECT_THAT( + parseOrDie("file:///c:/windows/system32/"), + AllOf(Scheme("file"), Authority(""), Body("/c:/windows/system32/"))); +} + +TEST(URITest, ParseFailed) { + auto FailedParse = [](llvm::StringRef U) { + auto URI = FileURI::parse(U); + if (!URI) { + llvm::consumeError(URI.takeError()); + return true; + } + return false; + }; + + // Expect ':' in URI. + EXPECT_TRUE(FailedParse("file//x/y/z")); + // Empty. + EXPECT_TRUE(FailedParse("")); + EXPECT_TRUE(FailedParse(":/a/b/c")); +} + +TEST(URITest, Resolve) { +#ifdef LLVM_ON_WIN32 + EXPECT_THAT(resolveOrDie(parseOrDie("file:///c:/x/y/z")), "c:\\x\\y\\z"); +#else + EXPECT_EQ(resolveOrDie(parseOrDie("file:/a/b/c")), "/a/b/c"); + EXPECT_EQ(resolveOrDie(parseOrDie("file://auth/a/b/c")), "/a/b/c"); + EXPECT_EQ(resolveOrDie(parseOrDie("test:a/b/c"), "/dir/test-root/x/y/z"), + "/dir/test-root/a/b/c"); + EXPECT_THAT(resolveOrDie(parseOrDie("file://au%3dth/%28x%29/y/%20z")), + "/(x)/y/ z"); + EXPECT_THAT(resolveOrDie(parseOrDie("file:///c:/x/y/z")), "c:/x/y/z"); +#endif +} + +TEST(URITest, Platform) { + auto Path = getVirtualTestFilePath("x"); + auto U = FileURI::create(Path, "file"); + EXPECT_TRUE(static_cast(U)); + EXPECT_THAT(resolveOrDie(*U), Path.str()); +} + +TEST(URITest, ResolveFailed) { + auto FailedResolve = [](llvm::StringRef Uri) { + auto Path = FileURI::resolve(parseOrDie(Uri)); + if (!Path) { + llvm::consumeError(Path.takeError()); + return true; + } + return false; + }; + + // Invalid scheme. + EXPECT_TRUE(FailedResolve("no:/a/b/c")); + // File path needs to be absolute. + EXPECT_TRUE(FailedResolve("file:a/b/c")); +} + +} // namespace +} // namespace clangd +} // namespace clang