diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp @@ -76,8 +76,36 @@ } // end anonymous namespace +static Function *getBasePtrIntrinsic(Module &M, bool IsV5OrAbove) { + auto Intrinsic = IsV5OrAbove ? Intrinsic::amdgcn_implicitarg_ptr + : Intrinsic::amdgcn_dispatch_ptr; + StringRef Name = Intrinsic::getName(Intrinsic); + return M.getFunction(Name); +} + +static LoadInst *getUniqueLoadUser(Value *V) { + if (LoadInst *L = dyn_cast(V)) { + return L->isSimple() ? L : nullptr; + } + + if (!V->getType()->isPointerTy()) { + return nullptr; + } + + LoadInst *UniqueLoad = nullptr; + for (User *U : V->users()) { + if (LoadInst *L = getUniqueLoadUser(U)) { + if (UniqueLoad) { + return nullptr; + } + UniqueLoad = L; + } + } + return UniqueLoad; +} + static bool processUse(CallInst *CI, bool IsV5OrAbove) { - Function *F = CI->getParent()->getParent(); + Function *F = CI->getFunction(); auto MD = F->getMetadata("reqd_work_group_size"); const bool HasReqdWorkGroupSize = MD && MD->getNumOperands() == 3; @@ -98,33 +126,23 @@ // We expect to see several GEP users, casted to the appropriate type and // loaded. for (User *U : CI->users()) { - if (!U->hasOneUse()) + LoadInst *Load = getUniqueLoadUser(U); + if (!Load) { continue; - - int64_t Offset = 0; - auto *Load = dyn_cast(U); // Load from ImplicitArgPtr/DispatchPtr? - auto *BCI = dyn_cast(U); - if (!Load && !BCI) { - if (GetPointerBaseWithConstantOffset(U, Offset, DL) != CI) - continue; - Load = dyn_cast(*U->user_begin()); // Load from GEP? - BCI = dyn_cast(*U->user_begin()); } - if (BCI) { - if (!BCI->hasOneUse()) - continue; - Load = dyn_cast(*BCI->user_begin()); // Load from BCI? - } - - if (!Load || !Load->isSimple()) + APInt Offset(64, 0U); + if (Load != U && + U->stripAndAccumulateConstantOffsets(DL, Offset, true) != CI) { continue; + } unsigned LoadSize = DL.getTypeStoreSize(Load->getType()); // TODO: Handle merged loads. + auto const OffsetValue = Offset.getSExtValue(); if (IsV5OrAbove) { // Base is ImplicitArgPtr. - switch (Offset) { + switch (OffsetValue) { case HIDDEN_BLOCK_COUNT_X: if (LoadSize == 4) BlockCounts[0] = Load; @@ -165,7 +183,7 @@ break; } } else { // Base is DispatchPtr. - switch (Offset) { + switch (OffsetValue) { case WORKGROUP_SIZE_X: if (LoadSize == 2) GroupSizes[0] = Load; @@ -315,17 +333,8 @@ // TargetPassConfig for subtarget. bool AMDGPULowerKernelAttributes::runOnModule(Module &M) { bool MadeChange = false; - Function *BasePtr = nullptr; bool IsV5OrAbove = AMDGPU::getAmdhsaCodeObjectVersion() >= 5; - if (IsV5OrAbove) { - StringRef ImplicitArgPtrName = - Intrinsic::getName(Intrinsic::amdgcn_implicitarg_ptr); - BasePtr = M.getFunction(ImplicitArgPtrName); - } else { // Pre-V5. - StringRef DispatchPtrName = - Intrinsic::getName(Intrinsic::amdgcn_dispatch_ptr); - BasePtr = M.getFunction(DispatchPtrName); - } + Function *BasePtr = getBasePtrIntrinsic(M, IsV5OrAbove); if (!BasePtr) // ImplicitArgPtr/DispatchPtr not used. return false; @@ -356,17 +365,8 @@ PreservedAnalyses AMDGPULowerKernelAttributesPass::run(Function &F, FunctionAnalysisManager &AM) { - Function *BasePtr = nullptr; bool IsV5OrAbove = AMDGPU::getAmdhsaCodeObjectVersion() >= 5; - if (IsV5OrAbove) { - StringRef ImplicitArgPtrName = - Intrinsic::getName(Intrinsic::amdgcn_implicitarg_ptr); - BasePtr = F.getParent()->getFunction(ImplicitArgPtrName); - } else { // Pre_V5. - StringRef DispatchPtrName = - Intrinsic::getName(Intrinsic::amdgcn_dispatch_ptr); - BasePtr = F.getParent()->getFunction(DispatchPtrName); - } + Function *BasePtr = getBasePtrIntrinsic(*F.getParent(), IsV5OrAbove); if (!BasePtr) // ImplicitArgPtr/DispatchPtr not used. return PreservedAnalyses::all();