diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp @@ -28,6 +28,7 @@ #include "AMDGPU.h" #include "Utils/AMDGPUBaseInfo.h" +#include "Utils/AMDGPULDSUtils.h" #include "llvm/ADT/STLExtras.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" @@ -49,95 +50,6 @@ class AMDGPULowerModuleLDS : public ModulePass { - static bool isKernelCC(Function *Func) { - return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv()); - } - - static Align getAlign(DataLayout const &DL, const GlobalVariable *GV) { - return DL.getValueOrABITypeAlignment(GV->getPointerAlignment(DL), - GV->getValueType()); - } - - static bool - userRequiresLowering(const SmallPtrSetImpl &UsedList, - User *InitialUser) { - // Any LDS variable can be lowered by moving into the created struct - // Each variable so lowered is allocated in every kernel, so variables - // whose users are all known to be safe to lower without the transform - // are left unchanged. - SmallPtrSet Visited; - SmallVector Stack; - Stack.push_back(InitialUser); - - while (!Stack.empty()) { - User *V = Stack.pop_back_val(); - Visited.insert(V); - - if (auto *G = dyn_cast(V->stripPointerCasts())) { - if (UsedList.contains(G)) { - continue; - } - } - - if (auto *I = dyn_cast(V)) { - if (isKernelCC(I->getFunction())) { - continue; - } - } - - if (auto *E = dyn_cast(V)) { - for (Value::user_iterator EU = E->user_begin(); EU != E->user_end(); - ++EU) { - if (Visited.insert(*EU).second) { - Stack.push_back(*EU); - } - } - continue; - } - - // Unknown user, conservatively lower the variable - return true; - } - - return false; - } - - static std::vector - findVariablesToLower(Module &M, - const SmallPtrSetImpl &UsedList) { - std::vector LocalVars; - for (auto &GV : M.globals()) { - if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) { - continue; - } - if (!GV.hasInitializer()) { - // addrspace(3) without initializer implies cuda/hip extern __shared__ - // the semantics for such a variable appears to be that all extern - // __shared__ variables alias one another, in which case this transform - // is not required - continue; - } - if (!isa(GV.getInitializer())) { - // Initializers are unimplemented for local address space. - // Leave such variables in place for consistent error reporting. - continue; - } - if (GV.isConstant()) { - // A constant undef variable can't be written to, and any load is - // undef, so it should be eliminated by the optimizer. It could be - // dropped by the back end if not. This pass skips over it. - continue; - } - if (std::none_of(GV.user_begin(), GV.user_end(), [&](User *U) { - return userRequiresLowering(UsedList, U); - })) { - continue; - } - LocalVars.push_back(&GV); - } - return LocalVars; - } - static void removeFromUsedList(Module &M, StringRef Name, SmallPtrSetImpl &ToRemove) { GlobalVariable *GV = M.getGlobalVariable(Name); @@ -217,20 +129,6 @@ ""); } - static SmallPtrSet getUsedList(Module &M) { - SmallPtrSet UsedList; - - SmallVector TmpVec; - collectUsedGlobalVariables(M, TmpVec, true); - UsedList.insert(TmpVec.begin(), TmpVec.end()); - - TmpVec.clear(); - collectUsedGlobalVariables(M, TmpVec, false); - UsedList.insert(TmpVec.begin(), TmpVec.end()); - - return UsedList; - } - public: static char ID; @@ -241,11 +139,11 @@ bool runOnModule(Module &M) override { LLVMContext &Ctx = M.getContext(); const DataLayout &DL = M.getDataLayout(); - SmallPtrSet UsedList = getUsedList(M); + SmallPtrSet UsedList = AMDGPU::getUsedList(M); // Find variables to move into new struct instance std::vector FoundLocalVars = - findVariablesToLower(M, UsedList); + AMDGPU::findVariablesToLower(M, UsedList); if (FoundLocalVars.empty()) { // No variables to rewrite, no changes made. @@ -257,8 +155,8 @@ llvm::stable_sort( FoundLocalVars, [&](const GlobalVariable *LHS, const GlobalVariable *RHS) -> bool { - Align ALHS = getAlign(DL, LHS); - Align ARHS = getAlign(DL, RHS); + Align ALHS = AMDGPU::getAlign(DL, LHS); + Align ARHS = AMDGPU::getAlign(DL, RHS); if (ALHS != ARHS) { return ALHS > ARHS; } @@ -280,7 +178,7 @@ uint64_t CurrentOffset = 0; for (size_t I = 0; I < FoundLocalVars.size(); I++) { GlobalVariable *FGV = FoundLocalVars[I]; - Align DataAlign = getAlign(DL, FGV); + Align DataAlign = AMDGPU::getAlign(DL, FGV); uint64_t DataAlignV = DataAlign.value(); if (uint64_t Rem = CurrentOffset % DataAlignV) { @@ -312,7 +210,8 @@ StructType *LDSTy = StructType::create( Ctx, LocalVarTypes, llvm::StringRef("llvm.amdgcn.module.lds.t")); - Align MaxAlign = getAlign(DL, LocalVars[0]); // was sorted on alignment + Align MaxAlign = + AMDGPU::getAlign(DL, LocalVars[0]); // was sorted on alignment Constant *InstanceAddress = Constant::getIntegerValue( PointerType::get(LDSTy, AMDGPUAS::LOCAL_ADDRESS), APInt(32, 0)); @@ -350,7 +249,7 @@ SmallPtrSet Kernels; for (auto &I : M.functions()) { Function *Func = &I; - if (isKernelCC(Func) && !Kernels.contains(Func)) { + if (AMDGPU::isKernelCC(Func) && !Kernels.contains(Func)) { markUsedByKernel(Builder, Func, SGV); Kernels.insert(Func); } diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h @@ -0,0 +1,38 @@ +//===- AMDGPULDSUtils.h - LDS related helper functions -*- C++ -*----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// AMDGPU LDS related helper utility functions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULDSUTILS_H +#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULDSUTILS_H + +#include "AMDGPU.h" + +namespace llvm { + +namespace AMDGPU { + +bool isKernelCC(Function *Func); + +Align getAlign(DataLayout const &DL, const GlobalVariable *GV); + +bool userRequiresLowering(const SmallPtrSetImpl &UsedList, + User *InitialUser); + +std::vector +findVariablesToLower(Module &M, const SmallPtrSetImpl &UsedList); + +SmallPtrSet getUsedList(Module &M); + +} // end namespace AMDGPU + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULDSUTILS_H diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp @@ -0,0 +1,127 @@ +//===- AMDGPULDSUtils.cpp -------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// AMDGPU LDS related helper utility functions. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPULDSUtils.h" +#include "Utils/AMDGPUBaseInfo.h" +#include "llvm/IR/Constants.h" + +using namespace llvm; + +namespace llvm { + +namespace AMDGPU { + +bool isKernelCC(Function *Func) { + return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv()); +} + +Align getAlign(DataLayout const &DL, const GlobalVariable *GV) { + return DL.getValueOrABITypeAlignment(GV->getPointerAlignment(DL), + GV->getValueType()); +} + +bool userRequiresLowering(const SmallPtrSetImpl &UsedList, + User *InitialUser) { + // Any LDS variable can be lowered by moving into the created struct + // Each variable so lowered is allocated in every kernel, so variables + // whose users are all known to be safe to lower without the transform + // are left unchanged. + SmallPtrSet Visited; + SmallVector Stack; + Stack.push_back(InitialUser); + + while (!Stack.empty()) { + User *V = Stack.pop_back_val(); + Visited.insert(V); + + if (auto *G = dyn_cast(V->stripPointerCasts())) { + if (UsedList.contains(G)) { + continue; + } + } + + if (auto *I = dyn_cast(V)) { + if (isKernelCC(I->getFunction())) { + continue; + } + } + + if (auto *E = dyn_cast(V)) { + for (Value::user_iterator EU = E->user_begin(); EU != E->user_end(); + ++EU) { + if (Visited.insert(*EU).second) { + Stack.push_back(*EU); + } + } + continue; + } + + // Unknown user, conservatively lower the variable + return true; + } + + return false; +} + +std::vector +findVariablesToLower(Module &M, + const SmallPtrSetImpl &UsedList) { + std::vector LocalVars; + for (auto &GV : M.globals()) { + if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) { + continue; + } + if (!GV.hasInitializer()) { + // addrspace(3) without initializer implies cuda/hip extern __shared__ + // the semantics for such a variable appears to be that all extern + // __shared__ variables alias one another, in which case this transform + // is not required + continue; + } + if (!isa(GV.getInitializer())) { + // Initializers are unimplemented for local address space. + // Leave such variables in place for consistent error reporting. + continue; + } + if (GV.isConstant()) { + // A constant undef variable can't be written to, and any load is + // undef, so it should be eliminated by the optimizer. It could be + // dropped by the back end if not. This pass skips over it. + continue; + } + if (std::none_of(GV.user_begin(), GV.user_end(), [&](User *U) { + return userRequiresLowering(UsedList, U); + })) { + continue; + } + LocalVars.push_back(&GV); + } + return LocalVars; +} + +SmallPtrSet getUsedList(Module &M) { + SmallPtrSet UsedList; + + SmallVector TmpVec; + collectUsedGlobalVariables(M, TmpVec, true); + UsedList.insert(TmpVec.begin(), TmpVec.end()); + + TmpVec.clear(); + collectUsedGlobalVariables(M, TmpVec, false); + UsedList.insert(TmpVec.begin(), TmpVec.end()); + + return UsedList; +} + +} // end namespace AMDGPU + +} // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt b/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt --- a/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt @@ -1,8 +1,9 @@ add_llvm_component_library(LLVMAMDGPUUtils - AMDGPUBaseInfo.cpp - AMDKernelCodeTUtils.cpp AMDGPUAsmUtils.cpp + AMDGPUBaseInfo.cpp + AMDGPULDSUtils.cpp AMDGPUPALMetadata.cpp + AMDKernelCodeTUtils.cpp LINK_COMPONENTS Core