Index: llvm/lib/Support/VirtualFileSystem.cpp =================================================================== --- llvm/lib/Support/VirtualFileSystem.cpp +++ llvm/lib/Support/VirtualFileSystem.cpp @@ -992,20 +992,117 @@ namespace { -/// Removes leading "./" as well as path components like ".." and ".". +/// Returns the components of a possibly hybrid Path. In VFS, paths can be +/// hybrids of Windows and Posix styles, so we cannot rely on the sys::path +/// methods which make assumptions. +/// +/// Examples: +/// "C:/foo\\bar" +/// -> { "C:", "/", "foo\\", "bar" } +/// "C:./.././//foo/bar/../bar" +/// -> {"C:", "./", "../", "./", "/", "/", "foo/", "bar/", "../", "bar" } +static llvm::SmallVector splitHybridPath(llvm::StringRef Path) { + llvm::SmallVector components; + auto head = Path.begin(); + auto it = head; + auto end = Path.end(); + while (it != end) { + if (*it == '/' || *it == '\\') { + ++it; + components.emplace_back(head, std::distance(head, it)); + head = it; + continue; + } + + if (*it == ':' && std::distance(Path.begin(), it) == 1) { + ++it; + components.emplace_back(head, std::distance(head, it)); + head = it; + continue; + } + + ++it; + } + if (head != end) { + components.emplace_back(head, std::distance(head, it)); + } + return components; +} + +/// Removes traversal components (".." and ".") as well as leading "./" and +/// redundant slashes. This accepts (and preserves) both slash directions. +/// For use only on VFS paths, which may be in a hybrid of Windows and Posix +/// styles. static llvm::SmallString<256> canonicalize(llvm::StringRef Path) { - // First detect the path style in use by checking the first separator. - llvm::sys::path::Style style = llvm::sys::path::Style::native; - const size_t n = Path.find_first_of("/\\"); - if (n != static_cast(-1)) - style = (Path[n] == '/') ? llvm::sys::path::Style::posix - : llvm::sys::path::Style::windows; - - // Now remove the dots. Explicitly specifying the path style prevents the - // direction of the slashes from changing. - llvm::SmallString<256> result = - llvm::sys::path::remove_leading_dotslash(Path, style); - llvm::sys::path::remove_dots(result, /*remove_dot_dot=*/true, style); + llvm::SmallString<256> result; + auto components = splitHybridPath(Path); + + if (components.empty()) return result; + + // Walk through the components, dropping the traversals. + auto source = components.begin(); + auto target = components.begin(); + const auto end = components.end(); + + if (source->size() == 2 && (*source)[1] == ':') { + // Starts with a drive letter, so keep it. + *target++ = *source++; + } + + if (source != end && (*source == "/" || *source == "\\")) { + // This is an absolute path, so keep the root. + *target++ = *source++; + } + + // A double dot cannot ascend above this point. + const auto limit = target; + + while (source != end) { + if (*source == "/" || *source == "\\") { + // Redundant slashes should be dropped. + ++source; + continue; + } + if (*source == "./" || *source == ".\\" || *source == ".") { + // A dot goes nowhere, so just drop it. + ++source; + continue; + } + if (*source == "../" || *source == "..\\\\" || *source == "..") { + // Back up if we can. + if (target == limit) { + // We cannot ascend, so keep the dot-dot. + *target++ = *source++; + continue; + } + + auto prev = target - 1; + if (*prev == "../" || *prev == "..\\\\") { + // We cannot ascend in this case, either. + *target++ = *source++; + continue; + } + + // Ascend! + target = prev; + ++source; + continue; + } + // Everything else is a plain old path component. + *target++ = *source++; + } + components.erase(target, end); + + // The canonical path is just the concatenation of the remaining components. + for (StringRef C : components) { + result.append(C.begin(), C.end()); + } + + // ... except that a trailing slash is redundant. + if (!result.empty() && (result.back() == '/' || result.back() == '\\')) { + result.pop_back(); + } + return result; }