diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h @@ -19,16 +19,25 @@ namespace AMDGPU { +// Check if the function `Func` is an entry point function. bool isKernelCC(Function *Func); +// Get the required alignment for the LDS global variable `GV`. Align getAlign(DataLayout const &DL, const GlobalVariable *GV); -bool userRequiresLowering(const SmallPtrSetImpl &UsedList, - User *InitialUser); +// Check if we can skip the lowering for current LDS global `GV`. +bool skipLDSLowering(GlobalVariable *GV, + const SmallPtrSetImpl &UsedList); +// Check if `GV` is an LDS global, and lowerering is required for it. +bool isLDSLowereringRequired(GlobalVariable *GV, + const SmallPtrSetImpl &UsedList); + +// Collect all those LDS globals which requires lowering. std::vector findVariablesToLower(Module &M, const SmallPtrSetImpl &UsedList); +// Collect "llvm.used" and "llvm.compiler.used" global vars. SmallPtrSet getUsedList(Module &M); } // end namespace AMDGPU diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp @@ -20,94 +20,154 @@ namespace AMDGPU { +// Check if the function `Func` is an entry point function. bool isKernelCC(Function *Func) { return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv()); } +// Get the required alignment for the LDS global variable `GV`. Align getAlign(DataLayout const &DL, const GlobalVariable *GV) { return DL.getValueOrABITypeAlignment(GV->getPointerAlignment(DL), GV->getValueType()); } -bool userRequiresLowering(const SmallPtrSetImpl &UsedList, - User *InitialUser) { - // Any LDS variable can be lowered by moving into the created struct - // Each variable so lowered is allocated in every kernel, so variables - // whose users are all known to be safe to lower without the transform - // are left unchanged. - SmallPtrSet Visited; - SmallVector Stack; - Stack.push_back(InitialUser); - - while (!Stack.empty()) { - User *V = Stack.pop_back_val(); - Visited.insert(V); - - if (auto *G = dyn_cast(V->stripPointerCasts())) { - if (UsedList.contains(G)) { - continue; - } - } +// Check if we can skip the lowering for current LDS global `GV` based on its +// usage. +// +// If `GV` is used within non-kernel functions, then, it certainly requires +// lowering. Else, if it is used as global initializer, then, lowering requires +// if it is used as an initilizer of normal globals (*NON* llvm.used and +// llvm.compiler.used globals). All other uses of `GV` can be ignored. +bool skipLDSLowering(GlobalVariable *GV, + const SmallPtrSetImpl &UsedList) { + bool UsedAsGlobalInitializer = false; + bool UsedAsNonLLVMUsedInitializer = false; + bool UsedWithinKernelFunction = false; + bool UsedWithinNonKernelFunction = false; + SmallPtrSet VisitedUsers; + SmallVector UserStack(GV->users()); + + // There are no users for `GV`? skip lowering for `GV`. + if (UserStack.empty()) + return true; - if (auto *I = dyn_cast(V)) { - if (isKernelCC(I->getFunction())) { - continue; + // We should explore all the users of `GV` since it could possibly be used in + // multiple places/scopes, and accordingly need to take lowering decision. + while (!UserStack.empty()) { + auto *U = UserStack.pop_back_val(); + + // `U` is already visited? continue to next one. + if (!VisitedUsers.insert(U).second) + continue; + + // `U` is a global variable, and `GV` is used as its initializer. Note that + // we should first check for `GlobalVariable` before checking for `Constant` + // below since `GlobalVariable` is also a `Constant`. + if (isa(U)) { + UsedAsGlobalInitializer = true; + if (!UsedList.contains(GV)) { + // Used as initializer of normal globals apart from "llvm.used" or + // "llvm.compiler.used". + UsedAsNonLLVMUsedInitializer = true; } + continue; } - if (auto *E = dyn_cast(V)) { - for (Value::user_iterator EU = E->user_begin(); EU != E->user_end(); - ++EU) { - if (Visited.insert(*EU).second) { - Stack.push_back(*EU); - } - } + // `U` is `Constant`. Push-back users of `U`, and continue further exploring + // the stack until we find the actual user of `GV`. + if (isa(U)) { + append_range(UserStack, U->users()); continue; } - // Unknown user, conservatively lower the variable + // `U` should be an instruction belonging to some function. + auto *I = dyn_cast(U); + assert(I && "Instruction expected."); + if (AMDGPU::isKernelCC(I->getFunction())) { + UsedWithinKernelFunction = true; + } else { + UsedWithinNonKernelFunction = true; + } + } + + // `GV` is used within non-kernel function, it requires lowering. + if (UsedWithinNonKernelFunction) { + return false; + } + + // `GV` is used as an initializer of some other global variable definition. + if (UsedAsGlobalInitializer) { + if (UsedAsNonLLVMUsedInitializer) { + // `GV` is used as global variable initializer of normal globals, it + // requires lowering. + return false; + } else { + // `GV` is used as global variable initializer of "llvm.used" or + // "llvm.compiler.used", it does not require lowering. + return true; + } + } + + // `GV` is *only* used within kernel, it does not require lowering. + if (UsedWithinKernelFunction) { return true; } - return false; + // Ideally control should not reach here. + assert(false && "Internal error."); + return true; +} + +// Check if `GV` is an LDS global, and lowerering is required for it. +bool isLDSLowereringRequired(GlobalVariable *GV, + const SmallPtrSetImpl &UsedList) { + if (GV->getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) { + // Ignore addrspace other than 3. + return false; + } + + if (!GV->hasInitializer()) { + // addrspace(3) without initializer implies cuda/hip extern __shared__ the + // semantics for such a variable appears to be that all extern // __shared__ + // variables alias one another, in which case this transform is not required + return false; + } + + if (!isa(GV->getInitializer())) { + // Initializers are unimplemented for local address space. Leave such + // variables in place for consistent error reporting. + return false; + } + + if (GV->isConstant()) { + // A constant undef variable can't be written to, and any load is undef, so + // it should be eliminated by the optimizer. It could be dropped by the back + // end if not. This pass skips over it. + return false; + } + + if (skipLDSLowering(GV, UsedList)) { + // We can safely ignore the users of GV, hence lowering of GV is not + // required. + return false; + } + + return true; } +// Collect all those LDS globals which requires lowering. std::vector findVariablesToLower(Module &M, const SmallPtrSetImpl &UsedList) { std::vector LocalVars; for (auto &GV : M.globals()) { - if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) { - continue; - } - if (!GV.hasInitializer()) { - // addrspace(3) without initializer implies cuda/hip extern __shared__ - // the semantics for such a variable appears to be that all extern - // __shared__ variables alias one another, in which case this transform - // is not required - continue; - } - if (!isa(GV.getInitializer())) { - // Initializers are unimplemented for local address space. - // Leave such variables in place for consistent error reporting. - continue; - } - if (GV.isConstant()) { - // A constant undef variable can't be written to, and any load is - // undef, so it should be eliminated by the optimizer. It could be - // dropped by the back end if not. This pass skips over it. - continue; - } - if (std::none_of(GV.user_begin(), GV.user_end(), [&](User *U) { - return userRequiresLowering(UsedList, U); - })) { - continue; - } - LocalVars.push_back(&GV); + if (isLDSLowereringRequired(&GV, UsedList)) + LocalVars.push_back(&GV); } return LocalVars; } +// Collect "llvm.used" and "llvm.compiler.used" global vars. SmallPtrSet getUsedList(Module &M) { SmallPtrSet UsedList;