diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -176,6 +176,14 @@ This roughly makes hidden visibility work like it does for other object file formats. +* When using multi-threaded LLVM tools (such as LLD) on a Windows host with a + large number of processors or CPU sockets, previously the LLVM ThreadPool + would span out threads to use all processors. + Starting with Windows Server 2022 and Windows 11, the behavior has changed, + the OS now spans out threads automatically to all processors. This also fixes + an affinity mask issue. + (`D138747 `_) + Changes to the X86 Backend -------------------------- diff --git a/llvm/include/llvm/Support/Windows/WindowsSupport.h b/llvm/include/llvm/Support/Windows/WindowsSupport.h --- a/llvm/include/llvm/Support/Windows/WindowsSupport.h +++ b/llvm/include/llvm/Support/Windows/WindowsSupport.h @@ -59,6 +59,9 @@ /// yet have VersionHelpers.h, so we have our own helper. bool RunningWindows8OrGreater(); +/// Determines if the program is running on Windows 11 or Windows Server 2022. +bool RunningWindows11OrGreater(); + /// Returns the Windows version as Major.Minor.0.BuildNumber. Uses /// RtlGetVersion or GetVersionEx under the hood depending on what is available. /// GetVersionEx is deprecated, but this API exposes the build number which can diff --git a/llvm/lib/Support/Windows/Process.inc b/llvm/lib/Support/Windows/Process.inc --- a/llvm/lib/Support/Windows/Process.inc +++ b/llvm/lib/Support/Windows/Process.inc @@ -477,20 +477,30 @@ typedef NTSTATUS(WINAPI *RtlGetVersionPtr)(PRTL_OSVERSIONINFOW); #define STATUS_SUCCESS ((NTSTATUS)0x00000000L) -llvm::VersionTuple llvm::GetWindowsOSVersion() { - HMODULE hMod = ::GetModuleHandleW(L"ntdll.dll"); - if (hMod) { +static RTL_OSVERSIONINFOEXW GetWindowsVer() { + auto getVer = []() -> RTL_OSVERSIONINFOEXW { + HMODULE hMod = ::GetModuleHandleW(L"ntdll.dll"); + assert(hMod); + auto getVer = (RtlGetVersionPtr)::GetProcAddress(hMod, "RtlGetVersion"); - if (getVer) { - RTL_OSVERSIONINFOEXW info{}; - info.dwOSVersionInfoSize = sizeof(info); - if (getVer((PRTL_OSVERSIONINFOW)&info) == STATUS_SUCCESS) { - return llvm::VersionTuple(info.dwMajorVersion, info.dwMinorVersion, 0, - info.dwBuildNumber); - } - } - } - return llvm::VersionTuple(0, 0, 0, 0); + assert(getVer); + + RTL_OSVERSIONINFOEXW info{}; + info.dwOSVersionInfoSize = sizeof(info); + NTSTATUS r = getVer((PRTL_OSVERSIONINFOW)&info); + (void)r; + assert(r == STATUS_SUCCESS); + + return info; + }; + static RTL_OSVERSIONINFOEXW info = getVer(); + return info; +} + +llvm::VersionTuple llvm::GetWindowsOSVersion() { + RTL_OSVERSIONINFOEXW info = GetWindowsVer(); + return llvm::VersionTuple(info.dwMajorVersion, info.dwMinorVersion, 0, + info.dwBuildNumber); } bool llvm::RunningWindows8OrGreater() { @@ -498,6 +508,19 @@ return GetWindowsOSVersion() >= llvm::VersionTuple(6, 2, 0, 0); } +bool llvm::RunningWindows11OrGreater() { + RTL_OSVERSIONINFOEXW info = GetWindowsVer(); + auto ver = llvm::VersionTuple(info.dwMajorVersion, info.dwMinorVersion, 0, + info.dwBuildNumber); + + // Windows Server 2022 + if (info.wProductType == VER_NT_SERVER) + return ver >= llvm::VersionTuple(10, 0, 0, 20348); + + // Windows 11 + return ver >= llvm::VersionTuple(10, 0, 0, 22000); +} + [[noreturn]] void Process::ExitNoCleanup(int RetCode) { TerminateProcess(GetCurrentProcess(), RetCode); llvm_unreachable("TerminateProcess doesn't return"); diff --git a/llvm/lib/Support/Windows/Threading.inc b/llvm/lib/Support/Windows/Threading.inc --- a/llvm/lib/Support/Windows/Threading.inc +++ b/llvm/lib/Support/Windows/Threading.inc @@ -159,6 +159,22 @@ return true; } +static std::optional> getActiveGroups() { + USHORT Count = 0; + if (::GetProcessGroupAffinity(GetCurrentProcess(), &Count, nullptr)) + return std::nullopt; + + if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) + return std::nullopt; + + std::vector Groups; + Groups.resize(Count); + if (!::GetProcessGroupAffinity(GetCurrentProcess(), &Count, Groups.data())) + return std::nullopt; + + return Groups; +} + static ArrayRef getProcessorGroups() { auto computeGroups = []() { SmallVector Groups; @@ -193,22 +209,28 @@ if (!IterateProcInfo(RelationProcessorCore, HandleProc)) return std::vector(); + auto ActiveGroups = getActiveGroups(); + if (!ActiveGroups) + return std::vector(); + // If there's an affinity mask set, assume the user wants to constrain the // current process to only a single CPU group. On Windows, it is not // possible for affinity masks to cross CPU group boundaries. DWORD_PTR ProcessAffinityMask = 0, SystemAffinityMask = 0; if (::GetProcessAffinityMask(GetCurrentProcess(), &ProcessAffinityMask, - &SystemAffinityMask) && - ProcessAffinityMask != SystemAffinityMask) { - // We don't expect more that 4 CPU groups on Windows (256 processors). - USHORT GroupCount = 4; - USHORT GroupArray[4]{}; - if (::GetProcessGroupAffinity(GetCurrentProcess(), &GroupCount, - GroupArray)) { - assert(GroupCount == 1 && - "On startup, a program is expected to be assigned only to " - "one processor group!"); - unsigned CurrentGroupID = GroupArray[0]; + &SystemAffinityMask)) { + + if (ProcessAffinityMask != SystemAffinityMask) { + if (llvm::RunningWindows11OrGreater() && ActiveGroups->size() > 1) { + // The process affinity mask is spurious, due to an OS bug, ignore it. + return std::vector(Groups.begin(), Groups.end()); + } + + assert(ActiveGroups->size() == 1 && + "When an affinity mask is set, the process is expected to be " + "assigned to a single processor group!"); + + unsigned CurrentGroupID = (*ActiveGroups)[0]; ProcessorGroup NewG{Groups[CurrentGroupID]}; NewG.Affinity = ProcessAffinityMask; NewG.UsableThreads = countPopulation(ProcessAffinityMask); @@ -216,7 +238,6 @@ Groups.push_back(NewG); } } - return std::vector(Groups.begin(), Groups.end()); }; static auto Groups = computeGroups(); @@ -273,6 +294,12 @@ // Assign the current thread to a more appropriate CPU socket or CPU group void llvm::ThreadPoolStrategy::apply_thread_strategy( unsigned ThreadPoolNum) const { + + // After Windows 11 and Windows Server 2022, let the OS do the scheduling, + // since a process automatically gains access to all processor groups. + if (llvm::RunningWindows11OrGreater()) + return; + std::optional Socket = compute_cpu_socket(ThreadPoolNum); if (!Socket) return; diff --git a/llvm/unittests/Support/ThreadPool.cpp b/llvm/unittests/Support/ThreadPool.cpp --- a/llvm/unittests/Support/ThreadPool.cpp +++ b/llvm/unittests/Support/ThreadPool.cpp @@ -18,6 +18,10 @@ #include "llvm/Support/TargetSelect.h" #include "llvm/Support/Threading.h" +#ifdef _WIN32 +#include "llvm/Support/Windows/WindowsSupport.h" +#endif + #include #include @@ -378,12 +382,22 @@ TEST_F(ThreadPoolTest, AllThreads_UseAllRessources) { CHECK_UNSUPPORTED(); + // After Windows 11, the OS is free to deploy the threads on any CPU socket. + // We cannot relibly ensure that all thread affinity mask are covered, + // therefore this test should not run. + if (llvm::RunningWindows11OrGreater()) + return; std::vector ThreadsUsed = RunOnAllSockets({}); ASSERT_EQ(llvm::get_cpus(), ThreadsUsed.size()); } TEST_F(ThreadPoolTest, AllThreads_OneThreadPerCore) { CHECK_UNSUPPORTED(); + // After Windows 11, the OS is free to deploy the threads on any CPU socket. + // We cannot relibly ensure that all thread affinity mask are covered, + // therefore this test should not run. + if (llvm::RunningWindows11OrGreater()) + return; std::vector ThreadsUsed = RunOnAllSockets(llvm::heavyweight_hardware_concurrency()); ASSERT_EQ(llvm::get_cpus(), ThreadsUsed.size());