diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -117,6 +117,12 @@ Changes to the Windows Target ----------------------------- +* The LLVM filesystem class ``UniqueID`` and function ``equivalent()`` + no longer determine that distinct different path names for the same + hard linked file actually are equal. This is an intentional tradeoff in a + bug fix, where the bug used to cause distinct files to be considered + equivalent on some file systems. + Changes to the X86 Backend -------------------------- diff --git a/llvm/include/llvm/Support/FileSystem.h b/llvm/include/llvm/Support/FileSystem.h --- a/llvm/include/llvm/Support/FileSystem.h +++ b/llvm/include/llvm/Support/FileSystem.h @@ -233,8 +233,7 @@ #elif defined (_WIN32) uint32_t NumLinks = 0; uint32_t VolumeSerialNumber = 0; - uint32_t FileIndexHigh = 0; - uint32_t FileIndexLow = 0; + uint64_t PathHash = 0; #endif public: @@ -255,13 +254,12 @@ uint32_t LastAccessTimeHigh, uint32_t LastAccessTimeLow, uint32_t LastWriteTimeHigh, uint32_t LastWriteTimeLow, uint32_t VolumeSerialNumber, uint32_t FileSizeHigh, - uint32_t FileSizeLow, uint32_t FileIndexHigh, - uint32_t FileIndexLow) + uint32_t FileSizeLow, uint64_t PathHash) : basic_file_status(Type, Perms, LastAccessTimeHigh, LastAccessTimeLow, LastWriteTimeHigh, LastWriteTimeLow, FileSizeHigh, FileSizeLow), NumLinks(LinkCount), VolumeSerialNumber(VolumeSerialNumber), - FileIndexHigh(FileIndexHigh), FileIndexLow(FileIndexLow) {} + PathHash(PathHash) {} #endif UniqueID getUniqueID() const; diff --git a/llvm/lib/Support/Windows/Path.inc b/llvm/lib/Support/Windows/Path.inc --- a/llvm/lib/Support/Windows/Path.inc +++ b/llvm/lib/Support/Windows/Path.inc @@ -158,12 +158,7 @@ } UniqueID file_status::getUniqueID() const { - // The file is uniquely identified by the volume serial number along - // with the 64-bit file identifier. - uint64_t FileID = (static_cast(FileIndexHigh) << 32ULL) | - static_cast(FileIndexLow); - - return UniqueID(VolumeSerialNumber, FileID); + return UniqueID(VolumeSerialNumber, PathHash); } ErrorOr disk_space(const Twine &Path) { @@ -362,16 +357,17 @@ } static std::error_code realPathFromHandle(HANDLE H, - SmallVectorImpl &Buffer) { + SmallVectorImpl &Buffer, + DWORD flags = VOLUME_NAME_DOS) { Buffer.resize_for_overwrite(Buffer.capacity()); DWORD CountChars = ::GetFinalPathNameByHandleW( - H, Buffer.begin(), Buffer.capacity(), FILE_NAME_NORMALIZED); + H, Buffer.begin(), Buffer.capacity(), FILE_NAME_NORMALIZED | flags); if (CountChars && CountChars >= Buffer.capacity()) { // The buffer wasn't big enough, try again. In this case the return value // *does* indicate the size of the null terminator. Buffer.resize_for_overwrite(CountChars); CountChars = ::GetFinalPathNameByHandleW(H, Buffer.begin(), Buffer.size(), - FILE_NAME_NORMALIZED); + FILE_NAME_NORMALIZED | flags); } Buffer.truncate(CountChars); if (CountChars == 0) @@ -647,12 +643,7 @@ bool equivalent(file_status A, file_status B) { assert(status_known(A) && status_known(B)); - return A.FileIndexHigh == B.FileIndexHigh && - A.FileIndexLow == B.FileIndexLow && A.FileSizeHigh == B.FileSizeHigh && - A.FileSizeLow == B.FileSizeLow && - A.LastWriteTimeHigh == B.LastWriteTimeHigh && - A.LastWriteTimeLow == B.LastWriteTimeLow && - A.VolumeSerialNumber == B.VolumeSerialNumber; + return A.getUniqueID() == B.getUniqueID(); } std::error_code equivalent(const Twine &A, const Twine &B, bool &result) { @@ -698,6 +689,7 @@ } static std::error_code getStatus(HANDLE FileHandle, file_status &Result) { + SmallVector ntPath; if (FileHandle == INVALID_HANDLE_VALUE) goto handle_status_error; @@ -725,13 +717,37 @@ if (!::GetFileInformationByHandle(FileHandle, &Info)) goto handle_status_error; + // File indices aren't necessarily stable after closing the file handle; + // instead hash a canonicalized path. + // + // For getting a canonical path to the file, call GetFinalPathNameByHandleW + // with VOLUME_NAME_NT. We don't really care exactly what the path looks + // like here, as long as it is canonical (e.g. doesn't differentiate between + // whether a file was referred to with upper/lower case names originally). + // The default format with VOLUME_NAME_DOS doesn't work with all file system + // drivers, such as ImDisk. (See + // https://github.com/rust-lang/rust/pull/86447.) + uint64_t PathHash; + if (std::error_code EC = + realPathFromHandle(FileHandle, ntPath, VOLUME_NAME_NT)) { + // If realPathFromHandle failed, fall back on the fields + // nFileIndex{High,Low} instead. They're not necessarily stable on all file + // systems as they're only documented as being unique/stable as long as the + // file handle is open - but they're a decent fallback if we couldn't get + // the canonical path. + PathHash = (static_cast(Info.nFileIndexHigh) << 32ULL) | + static_cast(Info.nFileIndexLow); + } else { + PathHash = hash_combine_range(ntPath.begin(), ntPath.end()); + } + Result = file_status( file_type_from_attrs(Info.dwFileAttributes), perms_from_attrs(Info.dwFileAttributes), Info.nNumberOfLinks, Info.ftLastAccessTime.dwHighDateTime, Info.ftLastAccessTime.dwLowDateTime, Info.ftLastWriteTime.dwHighDateTime, Info.ftLastWriteTime.dwLowDateTime, Info.dwVolumeSerialNumber, Info.nFileSizeHigh, Info.nFileSizeLow, - Info.nFileIndexHigh, Info.nFileIndexLow); + PathHash); return std::error_code(); handle_status_error: diff --git a/llvm/unittests/Support/Path.cpp b/llvm/unittests/Support/Path.cpp --- a/llvm/unittests/Support/Path.cpp +++ b/llvm/unittests/Support/Path.cpp @@ -708,12 +708,16 @@ ASSERT_NO_ERROR(fs::remove(Twine(TempPath2))); +#ifndef _WIN32 // Two paths representing the same file on disk should still provide the // same unique id. We can test this by making a hard link. + // FIXME: Our implementation of getUniqueID on Windows doesn't consider hard + // links to be the same file. ASSERT_NO_ERROR(fs::create_link(Twine(TempPath), Twine(TempPath2))); fs::UniqueID D2; ASSERT_NO_ERROR(fs::getUniqueID(Twine(TempPath2), D2)); ASSERT_EQ(D2, F1); +#endif ::close(FileDescriptor); @@ -909,12 +913,16 @@ // Create a hard link to Temp1. ASSERT_NO_ERROR(fs::create_link(Twine(TempPath), Twine(TempPath2))); +#ifndef _WIN32 + // FIXME: Our implementation of equivalent() on Windows doesn't consider hard + // links to be the same file. bool equal; ASSERT_NO_ERROR(fs::equivalent(Twine(TempPath), Twine(TempPath2), equal)); EXPECT_TRUE(equal); ASSERT_NO_ERROR(fs::status(Twine(TempPath), A)); ASSERT_NO_ERROR(fs::status(Twine(TempPath2), B)); EXPECT_TRUE(fs::equivalent(A, B)); +#endif // Remove Temp1. ::close(FileDescriptor);