Index: llvm/include/llvm/Support/Path.h =================================================================== --- llvm/include/llvm/Support/Path.h +++ llvm/include/llvm/Support/Path.h @@ -468,10 +468,6 @@ bool remove_dots(SmallVectorImpl &path, bool remove_dot_dot = false, Style style = Style::native); -#if defined(_WIN32) -std::error_code widenPath(const Twine &Path8, SmallVectorImpl &Path16); -#endif - } // end namespace path } // end namespace sys } // end namespace llvm Index: llvm/include/llvm/Support/Windows/WindowsSupport.h =================================================================== --- llvm/include/llvm/Support/Windows/WindowsSupport.h +++ llvm/include/llvm/Support/Windows/WindowsSupport.h @@ -236,6 +236,12 @@ // UTF-8 regardless of the current code page setting. std::error_code GetCommandLineArguments(SmallVectorImpl &Args, BumpPtrAllocator &Alloc); + +/// Convert UTF-8 path to a suitable UTF-16 path for use with the Win32 Unicode +/// File API. +std::error_code widenPath(const Twine &Path8, SmallVectorImpl &Path16, + size_t MaxPathLen = MAX_PATH); + } // end namespace windows } // end namespace sys } // end namespace llvm. Index: llvm/lib/Support/Windows/Path.inc =================================================================== --- llvm/lib/Support/Windows/Path.inc +++ llvm/lib/Support/Windows/Path.inc @@ -47,7 +47,7 @@ using llvm::sys::windows::UTF8ToUTF16; using llvm::sys::windows::CurCPToUTF16; using llvm::sys::windows::UTF16ToUTF8; -using llvm::sys::path::widenPath; +using llvm::sys::windows::widenPath; static bool is_separator(const wchar_t value) { switch (value) { @@ -61,64 +61,70 @@ namespace llvm { namespace sys { -namespace path { +namespace windows { -// Convert a UTF-8 path to UTF-16. Also, if the absolute equivalent of the -// path is longer than CreateDirectory can tolerate, make it absolute and -// prefixed by '\\?\'. -std::error_code widenPath(const Twine &Path8, - SmallVectorImpl &Path16) { - const size_t MaxDirLen = MAX_PATH - 12; // Must leave room for 8.3 filename. +// Convert a UTF-8 path to UTF-16. Also, if the absolute equivalent of the path +// is longer than the limit that the Win32 Unicode File API can tolerate, make +// it an absolute normalized path prefixed by '\\?\'. +std::error_code widenPath(const Twine &Path8, SmallVectorImpl &Path16, + size_t MaxPathLen) { + assert(MaxPathLen <= MAX_PATH); - // Several operations would convert Path8 to SmallString; more efficient to - // do it once up front. - SmallString<128> Path8Str; + // Several operations would convert Path8 to SmallString; more efficient to do + // it once up front. + SmallString Path8Str; Path8.toVector(Path8Str); - // If we made this path absolute, how much longer would it get? + std::error_code EC = UTF8ToUTF16(Path8Str, Path16); + if (EC) + return EC; + + const bool IsAbsolute = llvm::sys::path::is_absolute(Path8); size_t CurPathLen; - if (llvm::sys::path::is_absolute(Twine(Path8Str))) + if (IsAbsolute) CurPathLen = 0; // No contribution from current_path needed. else { - CurPathLen = ::GetCurrentDirectoryW(0, NULL); + CurPathLen = ::GetCurrentDirectoryW( + 0, NULL); // Returns the size including the null terminator. if (CurPathLen == 0) return mapWindowsError(::GetLastError()); } - // Would the absolute path be longer than our limit? - if ((Path8Str.size() + CurPathLen) >= MaxDirLen && - !Path8Str.startswith("\\\\?\\")) { - SmallString<2*MAX_PATH> FullPath("\\\\?\\"); - if (CurPathLen) { - SmallString<80> CurPath; - if (std::error_code EC = llvm::sys::fs::current_path(CurPath)) - return EC; - FullPath.append(CurPath); - } - // Traverse the requested path, canonicalizing . and .. (because the \\?\ - // prefix is documented to treat them as real components). Ignore - // separators, which can be returned from the iterator if the path has a - // drive name. We don't need to call native() on the result since append() - // always attaches preferred_separator. - for (llvm::sys::path::const_iterator I = llvm::sys::path::begin(Path8Str), - E = llvm::sys::path::end(Path8Str); - I != E; ++I) { - if (I->size() == 1 && is_separator((*I)[0])) - continue; - if (I->size() == 1 && *I == ".") - continue; - if (I->size() == 2 && *I == "..") - llvm::sys::path::remove_filename(FullPath); - else - llvm::sys::path::append(FullPath, *I); - } - return UTF8ToUTF16(FullPath, Path16); + const char *const LongPathPrefix = "\\\\?\\"; + + if ((Path16.size() + CurPathLen) < MaxPathLen || + Path8Str.startswith(LongPathPrefix)) + return std::error_code(); + + if (!IsAbsolute) { + if (EC = llvm::sys::fs::make_absolute(Path8Str)) + return EC; } - // Just use the caller's original path. - return UTF8ToUTF16(Path8Str, Path16); + // Remove '.' and '..' because long paths treat these as real path components. + llvm::sys::path::remove_dots(Path8Str, true); + + const StringRef RootName = llvm::sys::path::root_name(Path8Str); + assert(!RootName.empty() && + "Root name cannot be empty for an absolute path!"); + + // llvm::sys::path::remove_dots, used above, can leave a '/' after the root + // name and long paths must use '\' as the separator. + const size_t RootNameSize = RootName.size(); + if (RootNameSize < Path8Str.size() && Path8Str[RootNameSize] == '/') + Path8Str[RootNameSize] = '\\'; + + SmallString<2 * MAX_PATH> FullPath(LongPathPrefix); + if (RootName[1] != ':') { // Check if UNC. + FullPath.append("UNC\\"); + FullPath.append(Path8Str.begin() + 2, Path8Str.end()); + } else + FullPath.append(Path8Str); + + return UTF8ToUTF16(FullPath, Path16); } -} // end namespace path + +} // end namespace windows namespace fs { @@ -227,7 +233,9 @@ perms Perms) { SmallVector path_utf16; - if (std::error_code ec = widenPath(path, path_utf16)) + // CreateDirectoryW has a lower maximum path length as it must leave room for + // an 8.3 filename. + if (std::error_code ec = widenPath(path, path_utf16, MAX_PATH - 12)) return ec; if (!::CreateDirectoryW(path_utf16.begin(), NULL)) { Index: llvm/lib/Support/Windows/Program.inc =================================================================== --- llvm/lib/Support/Windows/Program.inc +++ llvm/lib/Support/Windows/Program.inc @@ -151,7 +151,7 @@ if (windows::UTF8ToUTF16(fname, fnameUnicode)) return INVALID_HANDLE_VALUE; } else { - if (path::widenPath(fname, fnameUnicode)) + if (sys::windows::widenPath(fname, fnameUnicode)) return INVALID_HANDLE_VALUE; } h = CreateFileW(fnameUnicode.data(), fd ? GENERIC_WRITE : GENERIC_READ, @@ -263,7 +263,7 @@ fflush(stderr); SmallVector ProgramUtf16; - if (std::error_code ec = path::widenPath(Program, ProgramUtf16)) { + if (std::error_code ec = sys::windows::widenPath(Program, ProgramUtf16)) { SetLastError(ec.value()); MakeErrMsg(ErrMsg, std::string("Unable to convert application name to UTF-16")); Index: llvm/unittests/Support/Path.cpp =================================================================== --- llvm/unittests/Support/Path.cpp +++ llvm/unittests/Support/Path.cpp @@ -28,6 +28,7 @@ #ifdef _WIN32 #include "llvm/ADT/ArrayRef.h" #include "llvm/Support/Chrono.h" +#include "llvm/Support/Windows/WindowsSupport.h" #include #include #endif @@ -1875,4 +1876,74 @@ #endif } +#ifdef _WIN32 +TEST_F(FileSystemTest, widenPath) { + const std::wstring LongPathPrefix(L"\\\\?\\"); + + // Test that the length limit is checked against the UTF-16 length and not the + // UTF-8 length. + std::string Input("C:\\foldername\\"); + const std::string Pi("\xcf\x80"); // UTF-8 lower case pi. + // Add Pi up to the MAX_PATH limit. + const size_t NumChars = MAX_PATH - Input.size() - 1; + for (size_t i = 0; i < NumChars; ++i) + Input += Pi; + // Check that UTF-8 length already exceeds MAX_PATH. + EXPECT_TRUE(Input.size() > MAX_PATH); + SmallVector Result; + ASSERT_NO_ERROR(windows::widenPath(Input, Result)); + // Result should not start with the long path prefix. + EXPECT_TRUE(std::wmemcmp(Result.data(), LongPathPrefix.c_str(), + LongPathPrefix.size()) != 0); + EXPECT_EQ(Result.size(), MAX_PATH - 1); + + // Add another Pi to exceed the MAX_PATH limit. + Input += Pi; + // Construct the expected result. + SmallVector Expected; + ASSERT_NO_ERROR(windows::UTF8ToUTF16(Input, Expected)); + Expected.insert(Expected.begin(), LongPathPrefix.begin(), + LongPathPrefix.end()); + + ASSERT_NO_ERROR(windows::widenPath(Input, Result)); + EXPECT_EQ(Result, Expected); + + // Test that UNC paths are handled correctly. + const std::string ShareName("\\\\sharename\\"); + const std::string FileName("\\filename"); + // Initialize directory name so that the input is within the MAX_PATH limit. + const char DirChar = 'x'; + std::string DirName(MAX_PATH - ShareName.size() - FileName.size() - 1, + DirChar); + + Input = ShareName + DirName + FileName; + ASSERT_NO_ERROR(windows::widenPath(Input, Result)); + // Result should not start with the long path prefix. + EXPECT_TRUE(std::wmemcmp(Result.data(), LongPathPrefix.c_str(), + LongPathPrefix.size()) != 0); + EXPECT_EQ(Result.size(), MAX_PATH - 1); + + // Extend the directory name so the input exceeds the MAX_PATH limit. + DirName += DirChar; + Input = ShareName + DirName + FileName; + // Construct the expected result. + ASSERT_NO_ERROR(windows::UTF8ToUTF16(StringRef(Input).substr(2), Expected)); + const std::wstring UNCPrefix(LongPathPrefix + L"UNC\\"); + Expected.insert(Expected.begin(), UNCPrefix.begin(), UNCPrefix.end()); + + ASSERT_NO_ERROR(windows::widenPath(Input, Result)); + EXPECT_EQ(Result, Expected); + + // Check that Unix separators are handled correctly. + std::replace(Input.begin(), Input.end(), '\\', '/'); + ASSERT_NO_ERROR(windows::widenPath(Input, Result)); + EXPECT_EQ(Result, Expected); + + // Check the removal of "dots". + Input = ShareName + DirName + "\\.\\foo\\.\\.." + FileName; + ASSERT_NO_ERROR(windows::widenPath(Input, Result)); + EXPECT_EQ(Result, Expected); +} +#endif + } // anonymous namespace