Index: lib/Basic/VirtualFileSystem.cpp =================================================================== --- lib/Basic/VirtualFileSystem.cpp +++ lib/Basic/VirtualFileSystem.cpp @@ -111,6 +111,20 @@ return Status && Status->exists(); } +#ifndef NDEBUG +static bool isTraversalComponent(StringRef Component) { + return Component.equals("..") || Component.equals("."); +} + +static bool pathHasTraversal(StringRef Path) { + using namespace llvm::sys; + for (StringRef Comp : llvm::make_range(path::begin(Path), path::end(Path))) + if (isTraversalComponent(Comp)) + return true; + return false; +} +#endif + //===-----------------------------------------------------------------------===/ // RealFileSystem implementation //===-----------------------------------------------------------------------===/ @@ -807,6 +821,9 @@ /// /// and inherit their attributes from the external contents. /// +/// Virtual file paths and external files are expected to be canonicalized +/// without "..", "." and "./" in their paths. +/// /// In both cases, the 'name' field may contain multiple path components (e.g. /// /path/to/file). However, any directory that contains more than one child /// must be uniquely represented by a directory entry. @@ -1004,7 +1021,13 @@ if (Key == "name") { if (!parseScalarString(I->getValue(), Value, Buffer)) return nullptr; - Name = Value; + + SmallString<256> Path(Value); + // Guarantee that old YAML files containing paths with ".." and "." are + // properly canonicalized before read into the VFS. + Path = sys::path::remove_leading_dotslash(Path); + sys::path::remove_dots(Path, /*remove_dot_dot=*/true); + Name = Path.str(); } else if (Key == "type") { if (!parseScalarString(I->getValue(), Value, Buffer)) return nullptr; @@ -1048,7 +1071,12 @@ HasContents = true; if (!parseScalarString(I->getValue(), Value, Buffer)) return nullptr; - ExternalContentsPath = Value; + SmallString<256> Path(Value); + // Guarantee that old YAML files containing paths with ".." and "." are + // properly canonicalized before read into the VFS. + Path = sys::path::remove_leading_dotslash(Path); + sys::path::remove_dots(Path, /*remove_dot_dot=*/true); + ExternalContentsPath = Path.str(); } else if (Key == "use-external-name") { bool Val; if (!parseScalarBool(I->getValue(), Val)) @@ -1238,6 +1266,12 @@ if (std::error_code EC = makeAbsolute(Path)) return EC; + // Canonicalize path by removing ".", "..", "./", etc components. This is + // a VFS request, do bot bother about symlinks in the path components + // but canonicalize in order to perform the correct entry search. + Path = sys::path::remove_leading_dotslash(Path); + sys::path::remove_dots(Path, /*remove_dot_dot=*/true); + if (Path.empty()) return make_error_code(llvm::errc::invalid_argument); @@ -1254,10 +1288,10 @@ ErrorOr RedirectingFileSystem::lookupPath(sys::path::const_iterator Start, sys::path::const_iterator End, Entry *From) { - if (Start->equals(".")) - ++Start; + assert(!isTraversalComponent(*Start) && + !isTraversalComponent(From->getName()) && + "Paths should not contain traversal components"); - // FIXME: handle .. if (CaseSensitive ? !Start->equals(From->getName()) : !Start->equals_lower(From->getName())) // failure to match @@ -1376,16 +1410,6 @@ return UniqueID(std::numeric_limits::max(), ID); } -#ifndef NDEBUG -static bool pathHasTraversal(StringRef Path) { - using namespace llvm::sys; - for (StringRef Comp : llvm::make_range(path::begin(Path), path::end(Path))) - if (Comp == "." || Comp == "..") - return true; - return false; -} -#endif - void YAMLVFSWriter::addFileMapping(StringRef VirtualPath, StringRef RealPath) { assert(sys::path::is_absolute(VirtualPath) && "virtual path not absolute"); assert(sys::path::is_absolute(RealPath) && "real path not absolute"); Index: lib/Frontend/ModuleDependencyCollector.cpp =================================================================== --- lib/Frontend/ModuleDependencyCollector.cpp +++ lib/Frontend/ModuleDependencyCollector.cpp @@ -13,8 +13,9 @@ #include "clang/Frontend/Utils.h" #include "clang/Serialization/ASTReader.h" -#include "llvm/ADT/StringSet.h" +#include "llvm/ADT/StringMap.h" #include "llvm/ADT/iterator_range.h" +#include "llvm/Config/config.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" @@ -25,7 +26,9 @@ /// Private implementation for ModuleDependencyCollector class ModuleDependencyListener : public ASTReaderListener { ModuleDependencyCollector &Collector; + llvm::StringMap SymLinkMap; + bool getRealPath(StringRef SrcPath, SmallVectorImpl &Result); std::error_code copyToRoot(StringRef Src); public: ModuleDependencyListener(ModuleDependencyCollector &Collector) @@ -57,6 +60,48 @@ VFSWriter.write(OS); } +// TODO: move this to Support/Path.h? +static bool real_path(StringRef SrcPath, SmallVectorImpl &RealPath) { +#ifdef HAVE_REALPATH + char CanonicalPath[256]; + + // TODO: emit a warning in case this fails...? + if (!realpath(SrcPath.str().c_str(), CanonicalPath)) + return false; + + SmallString<256> RPath(CanonicalPath); + RealPath.swap(RPath); + return true; +#else + // FIXME: Add support for systems without realpath. + return false; +#endif +} + +bool ModuleDependencyListener::getRealPath(StringRef SrcPath, + SmallVectorImpl &Result) { + using namespace llvm::sys; + SmallString<256> RealPath; + StringRef FileName = path::filename(SrcPath); + std::string Dir = path::parent_path(SrcPath).str(); + auto DirWithSymLink = SymLinkMap.find(Dir); + + // Use real_path to fix any symbolic link component present in a path. + // Computing the real path is expensive, cache the search through the + // parent path directory. + if (DirWithSymLink == SymLinkMap.end()) { + if (!real_path(Dir, RealPath)) + return false; + SymLinkMap[Dir] = RealPath.str(); + } else { + RealPath = DirWithSymLink->second; + } + + path::append(RealPath, FileName); + Result.swap(RealPath); + return true; +} + std::error_code ModuleDependencyListener::copyToRoot(StringRef Src) { using namespace llvm::sys; @@ -65,22 +110,42 @@ fs::make_absolute(AbsoluteSrc); // Canonicalize to a native path to avoid mixed separator styles. path::native(AbsoluteSrc); - // TODO: We probably need to handle .. as well as . in order to have valid - // input to the YAMLVFSWriter. - path::remove_dots(AbsoluteSrc); + // Remove redundant leading "./" pieces and consecutive separators. + AbsoluteSrc = path::remove_leading_dotslash(AbsoluteSrc); + + // Canonicalize path by removing "..", "." components. + SmallString<256> CanonicalPath = AbsoluteSrc; + path::remove_dots(CanonicalPath, /*remove_dot_dot=*/true); + + // If a ".." component is present after a symlink component, remove_dots may + // lead to the wrong real destination path. Let the source be canonicalized + // like that but make sure the destination uses the real path. + bool HasDotDotInPath = + std::count(path::begin(AbsoluteSrc), path::end(AbsoluteSrc), "..") > 0; + SmallString<256> RealPath; + bool HasSymLinkComponent = HasDotDotInPath && + getRealPath(AbsoluteSrc, RealPath) && + !StringRef(CanonicalPath).equals(RealPath); // Build the destination path. SmallString<256> Dest = Collector.getDest(); - path::append(Dest, path::relative_path(AbsoluteSrc)); + path::append(Dest, path::relative_path(HasSymLinkComponent ? RealPath + : CanonicalPath)); // Copy the file into place. if (std::error_code EC = fs::create_directories(path::parent_path(Dest), /*IgnoreExisting=*/true)) return EC; - if (std::error_code EC = fs::copy_file(AbsoluteSrc, Dest)) + if (std::error_code EC = + fs::copy_file(HasSymLinkComponent ? RealPath : CanonicalPath, Dest)) return EC; - // Use the absolute path under the root for the file mapping. - Collector.addFileMapping(AbsoluteSrc, Dest); + + // Use the canonical path under the root for the file mapping. Also create + // an additional entry for the real path. + Collector.addFileMapping(CanonicalPath, Dest); + if (HasSymLinkComponent) + Collector.addFileMapping(RealPath, Dest); + return std::error_code(); } Index: test/Modules/crash-vfs-path-symlink-component.m =================================================================== --- /dev/null +++ test/Modules/crash-vfs-path-symlink-component.m @@ -0,0 +1,72 @@ +// REQUIRES: crash-recovery, shell + +// FIXME: This XFAIL is cargo-culted from crash-report.c. Do we need it? +// XFAIL: mingw32 + +// Test that clang is capable of collecting the right header files in the +// crash reproducer if there's a symbolic link component in the path. + +// RUN: rm -rf %t +// RUN: mkdir -p %t/i %t/m %t %t/sysroot +// RUN: cp -a %S/Inputs/System/usr %t/i/ +// RUN: ln -s include/tcl-private %t/i/usr/x + +// RUN: not env FORCE_CLANG_DIAGNOSTICS_CRASH= TMPDIR=%t TEMP=%t TMP=%t \ +// RUN: %clang -fsyntax-only %s -I %/t/i -isysroot %/t/sysroot/ \ +// RUN: -fmodules -fmodules-cache-path=%t/m/ 2>&1 | FileCheck %s + +// RUN: FileCheck --check-prefix=CHECKSRC %s -input-file %t/crash-vfs-*.m +// RUN: FileCheck --check-prefix=CHECKSH %s -input-file %t/crash-vfs-*.sh +// RUN: FileCheck --check-prefix=CHECKYAML %s -input-file \ +// RUN: %t/crash-vfs-*.cache/vfs/vfs.yaml +// RUN: find %t/crash-vfs-*.cache/vfs | \ +// RUN: grep "usr/include/stdio.h" | count 1 + +#include "usr/x/../stdio.h" + +// CHECK: Preprocessed source(s) and associated run script(s) are located at: +// CHECK-NEXT: note: diagnostic msg: {{.*}}.m +// CHECK-NEXT: note: diagnostic msg: {{.*}}.cache + +// CHECKSRC: @import cstd.stdio; + +// CHECKSH: # Crash reproducer +// CHECKSH-NEXT: # Driver args: "-fsyntax-only" +// CHECKSH-NEXT: # Original command: {{.*$}} +// CHECKSH-NEXT: "-cc1" +// CHECKSH: "-isysroot" "{{[^"]*}}/sysroot/" +// CHECKSH-NOT: "-fmodules-cache-path=" +// CHECKSH: "crash-vfs-{{[^ ]*}}.m" +// CHECKSH: "-ivfsoverlay" "crash-vfs-{{[^ ]*}}.cache/vfs/vfs.yaml" + +// CHECKYAML: 'type': 'directory' +// CHECKYAML: 'name': "{{[^ ]*}}/i/usr/include", +// CHECKYAML-NEXT: 'contents': [ +// CHECKYAML-NEXT: { +// CHECKYAML-NEXT: 'type': 'file', +// CHECKYAML-NEXT: 'name': "module.map", +// CHECKYAML-NEXT: 'external-contents': "{{[^ ]*}}.cache/vfs/{{[^ ]*}}/i/usr/include/module.map" +// CHECKYAML-NEXT: }, + +// CHECKYAML: 'type': 'directory' +// CHECKYAML: 'name': "{{[^ ]*}}/i/usr", +// CHECKYAML-NEXT: 'contents': [ +// CHECKYAML-NEXT: { +// CHECKYAML-NEXT: 'type': 'file', +// CHECKYAML-NEXT: 'name': "module.map", +// CHECKYAML-NEXT: 'external-contents': "{{[^ ]*}}.cache/vfs/{{[^ ]*}}/i/usr/include/module.map" +// CHECKYAML-NEXT: }, + +// Test that by using the previous generated YAML file clang is able to find the +// right files inside the overlay and map the virtual request for a path that +// previously contained a symlink to work. To make sure of this, wipe out the +// %/t/i directory containing the symlink component. + +// RUN: rm -rf %/t/i +// RUN: unset FORCE_CLANG_DIAGNOSTICS_CRASH +// RUN: %clang -E %s -I %/t/i -isysroot %/t/sysroot/ \ +// RUN: -ivfsoverlay %t/crash-vfs-*.cache/vfs/vfs.yaml -fmodules \ +// RUN: -fmodules-cache-path=%t/m/ 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECKOVERLAY + +// CHECKOVERLAY: @import cstd.stdio; /* clang -E: implicit import for "{{[^ ]*}}.cache/vfs/{{[^ ]*}}/i/usr/include/stdio.h" Index: test/Modules/crash-vfs-path-traversal.m =================================================================== --- /dev/null +++ test/Modules/crash-vfs-path-traversal.m @@ -0,0 +1,58 @@ +// REQUIRES: crash-recovery, shell + +// FIXME: This XFAIL is cargo-culted from crash-report.c. Do we need it? +// XFAIL: mingw32 + +// RUN: rm -rf %t +// RUN: mkdir -p %t/i %t/m %t + +// RUN: not env FORCE_CLANG_DIAGNOSTICS_CRASH= TMPDIR=%t TEMP=%t TMP=%t \ +// RUN: %clang -fsyntax-only %s -I %S/Inputs/System -isysroot %/t/i/ \ +// RUN: -fmodules -fmodules-cache-path=%t/m/ 2>&1 | FileCheck %s + +// RUN: FileCheck --check-prefix=CHECKSRC %s -input-file %t/crash-vfs-*.m +// RUN: FileCheck --check-prefix=CHECKSH %s -input-file %t/crash-vfs-*.sh +// RUN: FileCheck --check-prefix=CHECKYAML %s -input-file \ +// RUN: %t/crash-vfs-*.cache/vfs/vfs.yaml +// RUN: find %t/crash-vfs-*.cache/vfs | \ +// RUN: grep "Inputs/System/usr/include/stdio.h" | count 1 + +#include "usr/././//////include/../include/./././../include/stdio.h" + +// CHECK: Preprocessed source(s) and associated run script(s) are located at: +// CHECK-NEXT: note: diagnostic msg: {{.*}}.m +// CHECK-NEXT: note: diagnostic msg: {{.*}}.cache + +// CHECKSRC: @import cstd.stdio; + +// CHECKSH: # Crash reproducer +// CHECKSH-NEXT: # Driver args: "-fsyntax-only" +// CHECKSH-NEXT: # Original command: {{.*$}} +// CHECKSH-NEXT: "-cc1" +// CHECKSH: "-isysroot" "{{[^"]*}}/i/" +// CHECKSH-NOT: "-fmodules-cache-path=" +// CHECKSH: "crash-vfs-{{[^ ]*}}.m" +// CHECKSH: "-ivfsoverlay" "crash-vfs-{{[^ ]*}}.cache/vfs/vfs.yaml" + +// CHECKYAML: 'type': 'directory' +// CHECKYAML: 'name': "{{[^ ]*}}/Inputs/System/usr/include", +// CHECKYAML-NEXT: 'contents': [ +// CHECKYAML-NEXT: { +// CHECKYAML-NEXT: 'type': 'file', +// CHECKYAML-NEXT: 'name': "module.map", +// CHECKYAML-NEXT: 'external-contents': "{{[^ ]*}}/Inputs/System/usr/include/module.map" +// CHECKYAML-NEXT: }, + +// Replace the paths in the YAML files with relative ".." traversals +// and fed into clang to test whether we're correctly representing them +// in the VFS overlay. + +// RUN: sed -e "s@usr/include@usr/include/../include@g" \ +// RUN: %t/crash-vfs-*.cache/vfs/vfs.yaml > %t/vfs.yaml +// RUN: unset FORCE_CLANG_DIAGNOSTICS_CRASH +// RUN: %clang -E %s -I %S/Inputs/System -isysroot %/t/i/ \ +// RUN: -ivfsoverlay %t/vfs.yaml -fmodules \ +// RUN: -fmodules-cache-path=%t/m/ 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECKOVERLAY + +// CHECKOVERLAY: @import cstd.stdio; /* clang -E: implicit import for "{{[^ ]*}}.cache/vfs/{{[^ ]*}}/usr/include/stdio.h"