diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp @@ -28,6 +28,7 @@ #include "AMDGPU.h" #include "Utils/AMDGPUBaseInfo.h" +#include "Utils/AMDGPULDSUtils.h" #include "llvm/ADT/STLExtras.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" @@ -53,87 +54,13 @@ return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv()); } - static Align getAlign(DataLayout const &DL, const GlobalVariable *GV) { - return DL.getValueOrABITypeAlignment(GV->getPointerAlignment(DL), - GV->getValueType()); - } - - static bool - userRequiresLowering(const SmallPtrSetImpl &UsedList, - User *InitialUser) { - // Any LDS variable can be lowered by moving into the created struct - // Each variable so lowered is allocated in every kernel, so variables - // whose users are all known to be safe to lower without the transform - // are left unchanged. - SmallPtrSet Visited; - SmallVector Stack; - Stack.push_back(InitialUser); - - while (!Stack.empty()) { - User *V = Stack.pop_back_val(); - Visited.insert(V); - - if (auto *G = dyn_cast(V->stripPointerCasts())) { - if (UsedList.contains(G)) { - continue; - } - } - - if (auto *I = dyn_cast(V)) { - if (isKernelCC(I->getFunction())) { - continue; - } - } - - if (auto *E = dyn_cast(V)) { - for (Value::user_iterator EU = E->user_begin(); EU != E->user_end(); - ++EU) { - if (Visited.insert(*EU).second) { - Stack.push_back(*EU); - } - } - continue; - } - - // Unknown user, conservatively lower the variable - return true; - } - - return false; - } - static std::vector findVariablesToLower(Module &M, const SmallPtrSetImpl &UsedList) { std::vector LocalVars; for (auto &GV : M.globals()) { - if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) { - continue; - } - if (!GV.hasInitializer()) { - // addrspace(3) without initializer implies cuda/hip extern __shared__ - // the semantics for such a variable appears to be that all extern - // __shared__ variables alias one another, in which case this transform - // is not required - continue; - } - if (!isa(GV.getInitializer())) { - // Initializers are unimplemented for local address space. - // Leave such variables in place for consistent error reporting. - continue; - } - if (GV.isConstant()) { - // A constant undef variable can't be written to, and any load is - // undef, so it should be eliminated by the optimizer. It could be - // dropped by the back end if not. This pass skips over it. - continue; - } - if (std::none_of(GV.user_begin(), GV.user_end(), [&](User *U) { - return userRequiresLowering(UsedList, U); - })) { - continue; - } - LocalVars.push_back(&GV); + if (isLDSLowereringRequired(&GV, UsedList)) + LocalVars.push_back(&GV); } return LocalVars; } @@ -217,20 +144,6 @@ ""); } - static SmallPtrSet getUsedList(Module &M) { - SmallPtrSet UsedList; - - SmallVector TmpVec; - collectUsedGlobalVariables(M, TmpVec, true); - UsedList.insert(TmpVec.begin(), TmpVec.end()); - - TmpVec.clear(); - collectUsedGlobalVariables(M, TmpVec, false); - UsedList.insert(TmpVec.begin(), TmpVec.end()); - - return UsedList; - } - public: static char ID; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h @@ -0,0 +1,32 @@ +//===- AMDGPULDSUtils.h - LDS related helper functions -*- C++ -*----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// AMDGPU LDS related helper utility functions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULDSUTILS_H +#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULDSUTILS_H + +#include "AMDGPU.h" + +namespace llvm { + +// Check if `GV` is an LDS global, and lowerering is required for it. +bool isLDSLowereringRequired(GlobalVariable *GV, + const SmallPtrSetImpl &UsedList); + +// Get the required alignment for the LDS global variable `GV`. +Align getAlign(const DataLayout &DL, const GlobalVariable *GV); + +// Collect "llvm.used" and "llvm.compiler.used" global vars. +SmallPtrSet getUsedList(Module &M); + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULDSUTILS_H diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp @@ -0,0 +1,158 @@ +//===- AMDGPULDSUtils.cpp -------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// AMDGPU LDS related helper utility functions. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPULDSUtils.h" +#include "Utils/AMDGPUBaseInfo.h" +#include "llvm/IR/Constants.h" + +using namespace llvm; + +namespace llvm { + +// Check if we can skip the lowering for current LDS global `GV`. +static bool skipLDSLowering(GlobalVariable *GV, + const SmallPtrSetImpl &UsedList) { + bool UsedAsInitializer = false; + bool UsedAsNonLLVMUsedInitializer = false; + bool UsedWithinKernelFunction = false; + bool UsedWithinNonKernelFunction = false; + SmallPtrSet VisitedUsers; + + // There are no users for `GV`, skip lowering for `GV`. + SmallVector UserStack(GV->users()); + if (UserStack.empty()) + return true; + + while (!UserStack.empty()) { + auto *U = UserStack.pop_back_val(); + + // `U` is already visited? continue to next one. + if (!VisitedUsers.insert(U).second) + continue; + + if (isa(U)) { + // `U` is a global variable, and `GV` is used as its initializer. + UsedAsInitializer = true; + if (!UsedList.contains(GV)) { + // Used as initializer of normal globals apart from "llvm.used" or + // "llvm.compiler.used". + UsedAsNonLLVMUsedInitializer = true; + } + continue; + } + + if (isa(U)) { + // `U` is `Constant`. Push-back users of `U`, and continue further + // exploring the stack. + append_range(UserStack, U->users()); + continue; + } + + // `U` should be an instruction belonging to some function. + auto *I = dyn_cast(U); + assert(I && "Instruction expected."); + auto *F = I->getFunction(); + if (AMDGPU::isModuleEntryFunctionCC(F->getCallingConv())) { + UsedWithinKernelFunction = true; + } else { + UsedWithinNonKernelFunction = true; + } + } + + if (UsedWithinNonKernelFunction) { + // `GV` is used within non-kernel function, it requires lowering. + return false; + } + + if (UsedAsInitializer) { + // `GV` is used as an initializer of some other global variable defintion. + if (UsedAsNonLLVMUsedInitializer) { + // `GV` is used as global variable initializer of normal globals, + // it requires lowering. + return false; + } else { + // `GV` is used as global variable initializer of "llvm.used" or + // "llvm.compiler.used", it does not require lowering. + return true; + } + } + + if (UsedWithinKernelFunction) { + // `GV` is *only* used within kernel, it does not require lowering. + return true; + } + + // Ideally control should not reach here. If it is, then, we need to take a + // re-look at the above logic. + assert(false && "Internal error."); + return true; +} + +// Check if `GV` is an LDS global, and lowerering is required for it. +bool isLDSLowereringRequired(GlobalVariable *GV, + const SmallPtrSetImpl &UsedList) { + if (GV->getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) { + // Ignore addrspace other than 3. + return false; + } + + if (!GV->hasInitializer()) { + // addrspace(3) without initializer implies cuda/hip extern __shared__ the + // semantics for such a variable appears to be that all extern // __shared__ + // variables alias one another, in which case this transform is not required + return false; + } + + if (!isa(GV->getInitializer())) { + // Initializers are unimplemented for local address space. Leave such + // variables in place for consistent error reporting. + return false; + } + + if (GV->isConstant()) { + // A constant undef variable can't be written to, and any load is undef, so + // it should be eliminated by the optimizer. It could be dropped by the back + // end if not. This pass skips over it. + return false; + } + + if (skipLDSLowering(GV, UsedList)) { + // We can safely ignore the users of GV, hence lowering of GV is not + // required. + return false; + } + + return true; +} + +// Get the required alignment for the LDS global variable `GV`. +Align getAlign(const DataLayout &DL, const GlobalVariable *GV) { + return DL.getValueOrABITypeAlignment(GV->getPointerAlignment(DL), + GV->getValueType()); +} + +// Collect "llvm.used" and "llvm.compiler.used" global vars. +SmallPtrSet getUsedList(Module &M) { + SmallPtrSet UsedList; + + SmallVector TmpVec; + collectUsedGlobalVariables(M, TmpVec, true); + UsedList.insert(TmpVec.begin(), TmpVec.end()); + + TmpVec.clear(); + collectUsedGlobalVariables(M, TmpVec, false); + UsedList.insert(TmpVec.begin(), TmpVec.end()); + + return UsedList; +} + +} // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt b/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt --- a/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt @@ -1,8 +1,9 @@ add_llvm_component_library(LLVMAMDGPUUtils - AMDGPUBaseInfo.cpp - AMDKernelCodeTUtils.cpp AMDGPUAsmUtils.cpp + AMDGPUBaseInfo.cpp + AMDGPULDSUtils.cpp AMDGPUPALMetadata.cpp + AMDKernelCodeTUtils.cpp LINK_COMPONENTS Core