diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h @@ -23,12 +23,16 @@ Align getAlign(DataLayout const &DL, const GlobalVariable *GV); -bool userRequiresLowering(const SmallPtrSetImpl &UsedList, - User *InitialUser); +bool skipLDSLowering(GlobalVariable *GV, + const SmallPtrSetImpl &UsedList); + +bool isLDSLowereringRequired(GlobalVariable *GV, + const SmallPtrSetImpl &UsedList); std::vector findVariablesToLower(Module &M, const SmallPtrSetImpl &UsedList); +// Collect "llvm.used" and "llvm.compiler.used" global vars. SmallPtrSet getUsedList(Module &M); } // end namespace AMDGPU diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp @@ -29,15 +29,17 @@ GV->getValueType()); } -bool userRequiresLowering(const SmallPtrSetImpl &UsedList, - User *InitialUser) { - // Any LDS variable can be lowered by moving into the created struct - // Each variable so lowered is allocated in every kernel, so variables - // whose users are all known to be safe to lower without the transform - // are left unchanged. +// Check if we can skip the lowering for current LDS global `GV` based on its +// usage. +// +// If `GV` is used within non-kernel functions, then, it certainly requires +// lowering. Else, if it is used as global initializer, then, lowering requires +// if it is used as an initilizer of normal globals (*NON* llvm.used and +// llvm.compiler.used globals). All other uses of `GV` can be ignored. +bool skipLDSLowering(GlobalVariable *GV, + const SmallPtrSetImpl &UsedList) { SmallPtrSet Visited; - SmallVector Stack; - Stack.push_back(InitialUser); + SmallVector Stack(GV->users()); while (!Stack.empty()) { User *V = Stack.pop_back_val(); @@ -65,49 +67,60 @@ continue; } - // Unknown user, conservatively lower the variable - return true; + // We *cannot* skip the lowering of LDS global `GV`. + return false; + } + + return true; +} + +bool isLDSLowereringRequired(GlobalVariable *GV, + const SmallPtrSetImpl &UsedList) { + if (GV->getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) { + return false; + } + + if (!GV->hasInitializer()) { + // addrspace(3) without initializer implies cuda/hip extern __shared__ the + // semantics for such a variable appears to be that all extern // __shared__ + // variables alias one another, in which case this transform is not required + return false; + } + + if (!isa(GV->getInitializer())) { + // Initializers are unimplemented for local address space. Leave such + // variables in place for consistent error reporting. + return false; + } + + if (GV->isConstant()) { + // A constant undef variable can't be written to, and any load is undef, so + // it should be eliminated by the optimizer. It could be dropped by the back + // end if not. This pass skips over it. + return false; } - return false; + if (skipLDSLowering(GV, UsedList)) { + // We can safely ignore users of GV, hence lowering of `GV` is not required. + return false; + } + + return true; } +// Collect all those LDS globals which requires lowering. std::vector findVariablesToLower(Module &M, const SmallPtrSetImpl &UsedList) { std::vector LocalVars; for (auto &GV : M.globals()) { - if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) { - continue; - } - if (!GV.hasInitializer()) { - // addrspace(3) without initializer implies cuda/hip extern __shared__ - // the semantics for such a variable appears to be that all extern - // __shared__ variables alias one another, in which case this transform - // is not required - continue; - } - if (!isa(GV.getInitializer())) { - // Initializers are unimplemented for local address space. - // Leave such variables in place for consistent error reporting. - continue; - } - if (GV.isConstant()) { - // A constant undef variable can't be written to, and any load is - // undef, so it should be eliminated by the optimizer. It could be - // dropped by the back end if not. This pass skips over it. - continue; - } - if (std::none_of(GV.user_begin(), GV.user_end(), [&](User *U) { - return userRequiresLowering(UsedList, U); - })) { - continue; - } - LocalVars.push_back(&GV); + if (isLDSLowereringRequired(&GV, UsedList)) + LocalVars.push_back(&GV); } return LocalVars; } +// Collect "llvm.used" and "llvm.compiler.used" global vars. SmallPtrSet getUsedList(Module &M) { SmallPtrSet UsedList;