diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp @@ -28,7 +28,7 @@ #include "AMDGPU.h" #include "Utils/AMDGPUBaseInfo.h" -#include "Utils/AMDGPULDSUtils.h" +#include "Utils/AMDGPUMemoryUtils.h" #include "llvm/ADT/STLExtras.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" diff --git a/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp @@ -83,7 +83,7 @@ #include "AMDGPU.h" #include "GCNSubtarget.h" #include "Utils/AMDGPUBaseInfo.h" -#include "Utils/AMDGPULDSUtils.h" +#include "Utils/AMDGPUMemoryUtils.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetOperations.h" diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -741,6 +741,8 @@ LLVM_READNONE bool isModuleEntryFunctionCC(CallingConv::ID CC); +bool isKernelCC(const Function *Func); + // FIXME: Remove this when calling conventions cleaned up LLVM_READNONE inline bool isKernel(CallingConv::ID CC) { diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1443,6 +1443,10 @@ } } +bool isKernelCC(const Function *Func) { + return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv()); +} + bool hasXNACK(const MCSubtargetInfo &STI) { return STI.getFeatureBits()[AMDGPU::FeatureXNACK]; } diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h deleted file mode 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h +++ /dev/null @@ -1,38 +0,0 @@ -//===- AMDGPULDSUtils.h - LDS related helper functions -*- C++ -*----------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// AMDGPU LDS related helper utility functions. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULDSUTILS_H -#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULDSUTILS_H - -#include "llvm/ADT/DenseMap.h" -#include "llvm/IR/Constants.h" - -namespace llvm { - -class ConstantExpr; - -namespace AMDGPU { - -bool isKernelCC(const Function *Func); - -Align getAlign(DataLayout const &DL, const GlobalVariable *GV); - -std::vector findVariablesToLower(Module &M, - const Function *F = nullptr); - -/// Replace all uses of constant \p C with instructions in \p F. -void replaceConstantUsesInFunction(ConstantExpr *C, const Function *F); -} // end namespace AMDGPU - -} // end namespace llvm - -#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPULDSUTILS_H diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp deleted file mode 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp +++ /dev/null @@ -1,144 +0,0 @@ -//===- AMDGPULDSUtils.cpp -------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// AMDGPU LDS related helper utility functions. -// -//===----------------------------------------------------------------------===// - -#include "AMDGPULDSUtils.h" -#include "AMDGPU.h" -#include "Utils/AMDGPUBaseInfo.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/ReplaceConstant.h" - -using namespace llvm; - -namespace llvm { - -namespace AMDGPU { - -bool isKernelCC(const Function *Func) { - return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv()); -} - -Align getAlign(DataLayout const &DL, const GlobalVariable *GV) { - return DL.getValueOrABITypeAlignment(GV->getPointerAlignment(DL), - GV->getValueType()); -} - -static void collectFunctionUses(User *U, const Function *F, - SetVector &InstUsers) { - SmallVector Stack{U}; - - while (!Stack.empty()) { - U = Stack.pop_back_val(); - - if (auto *I = dyn_cast(U)) { - if (I->getFunction() == F) - InstUsers.insert(I); - continue; - } - - if (!isa(U)) - continue; - - append_range(Stack, U->users()); - } -} - -void replaceConstantUsesInFunction(ConstantExpr *C, const Function *F) { - SetVector InstUsers; - - collectFunctionUses(C, F, InstUsers); - for (Instruction *I : InstUsers) { - convertConstantExprsToInstructions(I, C); - } -} - -static bool shouldLowerLDSToStruct(const GlobalVariable &GV, - const Function *F) { - // We are not interested in kernel LDS lowering for module LDS itself. - if (F && GV.getName() == "llvm.amdgcn.module.lds") - return false; - - bool Ret = false; - SmallPtrSet Visited; - SmallVector Stack(GV.users()); - - assert(!F || isKernelCC(F)); - - while (!Stack.empty()) { - const User *V = Stack.pop_back_val(); - Visited.insert(V); - - if (isa(V)) { - // This use of the LDS variable is the initializer of a global variable. - // This is ill formed. The address of an LDS variable is kernel dependent - // and unknown until runtime. It can't be written to a global variable. - continue; - } - - if (auto *I = dyn_cast(V)) { - const Function *UF = I->getFunction(); - if (UF == F) { - // Used from this kernel, we want to put it into the structure. - Ret = true; - } else if (!F) { - // For module LDS lowering, lowering is required if the user instruction - // is from non-kernel function. - Ret |= !isKernelCC(UF); - } - continue; - } - - // User V should be a constant, recursively visit users of V. - assert(isa(V) && "Expected a constant."); - append_range(Stack, V->users()); - } - - return Ret; -} - -std::vector findVariablesToLower(Module &M, - const Function *F) { - std::vector LocalVars; - for (auto &GV : M.globals()) { - if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) { - continue; - } - if (!GV.hasInitializer()) { - // addrspace(3) without initializer implies cuda/hip extern __shared__ - // the semantics for such a variable appears to be that all extern - // __shared__ variables alias one another, in which case this transform - // is not required - continue; - } - if (!isa(GV.getInitializer())) { - // Initializers are unimplemented for LDS address space. - // Leave such variables in place for consistent error reporting. - continue; - } - if (GV.isConstant()) { - // A constant undef variable can't be written to, and any load is - // undef, so it should be eliminated by the optimizer. It could be - // dropped by the back end if not. This pass skips over it. - continue; - } - if (!shouldLowerLDSToStruct(GV, F)) { - continue; - } - LocalVars.push_back(&GV); - } - return LocalVars; -} - -} // end namespace AMDGPU - -} // end namespace llvm diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h @@ -9,16 +9,32 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUMEMORYUTILS_H #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUMEMORYUTILS_H +#include + namespace llvm { +struct Align; class AAResults; +class ConstantExpr; +class DataLayout; +class Function; +class GlobalVariable; class LoadInst; class MemoryDef; class MemorySSA; +class Module; class Value; namespace AMDGPU { +Align getAlign(DataLayout const &DL, const GlobalVariable *GV); + +std::vector findVariablesToLower(Module &M, + const Function *F = nullptr); + +/// Replace all uses of constant \p C with instructions in \p F. +void replaceConstantUsesInFunction(ConstantExpr *C, const Function *F); + /// Given a \p Def clobbering a load from \p Ptr accroding to the MSSA check /// if this is actually a memory update or an artifical clobber to facilitate /// ordering constraints. diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp @@ -8,12 +8,16 @@ #include "AMDGPUMemoryUtils.h" #include "AMDGPU.h" +#include "AMDGPUBaseInfo.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MemorySSA.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" +#include "llvm/IR/ReplaceConstant.h" #define DEBUG_TYPE "amdgpu-memory-utils" @@ -23,6 +27,117 @@ namespace AMDGPU { +Align getAlign(DataLayout const &DL, const GlobalVariable *GV) { + return DL.getValueOrABITypeAlignment(GV->getPointerAlignment(DL), + GV->getValueType()); +} + +static void collectFunctionUses(User *U, const Function *F, + SetVector &InstUsers) { + SmallVector Stack{U}; + + while (!Stack.empty()) { + U = Stack.pop_back_val(); + + if (auto *I = dyn_cast(U)) { + if (I->getFunction() == F) + InstUsers.insert(I); + continue; + } + + if (!isa(U)) + continue; + + append_range(Stack, U->users()); + } +} + +void replaceConstantUsesInFunction(ConstantExpr *C, const Function *F) { + SetVector InstUsers; + + collectFunctionUses(C, F, InstUsers); + for (Instruction *I : InstUsers) { + convertConstantExprsToInstructions(I, C); + } +} + +static bool shouldLowerLDSToStruct(const GlobalVariable &GV, + const Function *F) { + // We are not interested in kernel LDS lowering for module LDS itself. + if (F && GV.getName() == "llvm.amdgcn.module.lds") + return false; + + bool Ret = false; + SmallPtrSet Visited; + SmallVector Stack(GV.users()); + + assert(!F || isKernelCC(F)); + + while (!Stack.empty()) { + const User *V = Stack.pop_back_val(); + Visited.insert(V); + + if (isa(V)) { + // This use of the LDS variable is the initializer of a global variable. + // This is ill formed. The address of an LDS variable is kernel dependent + // and unknown until runtime. It can't be written to a global variable. + continue; + } + + if (auto *I = dyn_cast(V)) { + const Function *UF = I->getFunction(); + if (UF == F) { + // Used from this kernel, we want to put it into the structure. + Ret = true; + } else if (!F) { + // For module LDS lowering, lowering is required if the user instruction + // is from non-kernel function. + Ret |= !isKernelCC(UF); + } + continue; + } + + // User V should be a constant, recursively visit users of V. + assert(isa(V) && "Expected a constant."); + append_range(Stack, V->users()); + } + + return Ret; +} + +std::vector findVariablesToLower(Module &M, + const Function *F) { + std::vector LocalVars; + for (auto &GV : M.globals()) { + if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) { + continue; + } + if (!GV.hasInitializer()) { + // addrspace(3) without initializer implies cuda/hip extern __shared__ + // the semantics for such a variable appears to be that all extern + // __shared__ variables alias one another, in which case this transform + // is not required + continue; + } + if (!isa(GV.getInitializer())) { + // Initializers are unimplemented for LDS address space. + // Leave such variables in place for consistent error reporting. + continue; + } + if (GV.isConstant()) { + // A constant undef variable can't be written to, and any load is + // undef, so it should be eliminated by the optimizer. It could be + // dropped by the back end if not. This pass skips over it. + continue; + } + if (!shouldLowerLDSToStruct(GV, F)) { + continue; + } + LocalVars.push_back(&GV); + } + return LocalVars; +} + bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA) { Instruction *DefInst = Def->getMemoryInst(); diff --git a/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt b/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt --- a/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt @@ -1,7 +1,6 @@ add_llvm_component_library(LLVMAMDGPUUtils AMDGPUAsmUtils.cpp AMDGPUBaseInfo.cpp - AMDGPULDSUtils.cpp AMDGPUMemoryUtils.cpp AMDGPUPALMetadata.cpp AMDKernelCodeTUtils.cpp