diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp @@ -81,10 +81,28 @@ return M.getFunction(Name); } -} // end anonymous namespace +LoadInst *getUniqueSimpleLoadUser(Value *V) { + if (LoadInst *L = dyn_cast(V)) { + return L->isSimple() ? L : nullptr; + } + + if (!V->getType()->isPointerTy()) { + return nullptr; + } + + LoadInst *UniqueLoad = nullptr; + for (User *U : V->users()) { + if (LoadInst *L = getUniqueSimpleLoadUser(U)) { + if (UniqueLoad && UniqueLoad != L) + return nullptr; + UniqueLoad = L; + } + } + return UniqueLoad; +} static bool processUse(CallInst *CI, bool IsV5OrAbove) { - Function *F = CI->getParent()->getParent(); + Function *F = CI->getFunction(); auto MD = F->getMetadata("reqd_work_group_size"); const bool HasReqdWorkGroupSize = MD && MD->getNumOperands() == 3; @@ -105,33 +123,21 @@ // We expect to see several GEP users, casted to the appropriate type and // loaded. for (User *U : CI->users()) { - if (!U->hasOneUse()) + LoadInst *Load = getUniqueSimpleLoadUser(U); + if (!Load) continue; - int64_t Offset = 0; - auto *Load = dyn_cast(U); // Load from ImplicitArgPtr/DispatchPtr? - auto *BCI = dyn_cast(U); - if (!Load && !BCI) { - if (GetPointerBaseWithConstantOffset(U, Offset, DL) != CI) - continue; - Load = dyn_cast(*U->user_begin()); // Load from GEP? - BCI = dyn_cast(*U->user_begin()); - } - - if (BCI) { - if (!BCI->hasOneUse()) - continue; - Load = dyn_cast(*BCI->user_begin()); // Load from BCI? - } - - if (!Load || !Load->isSimple()) + APInt Offset(64, 0U); + if (Load != U && + U->stripAndAccumulateConstantOffsets(DL, Offset, true) != CI) continue; unsigned LoadSize = DL.getTypeStoreSize(Load->getType()); // TODO: Handle merged loads. + auto const OffsetValue = Offset.getSExtValue(); if (IsV5OrAbove) { // Base is ImplicitArgPtr. - switch (Offset) { + switch (OffsetValue) { case HIDDEN_BLOCK_COUNT_X: if (LoadSize == 4) BlockCounts[0] = Load; @@ -172,7 +178,7 @@ break; } } else { // Base is DispatchPtr. - switch (Offset) { + switch (OffsetValue) { case WORKGROUP_SIZE_X: if (LoadSize == 2) GroupSizes[0] = Load; @@ -317,6 +323,7 @@ return MadeChange; } +} // end anonymous namespace // TODO: Move makeLIDRangeMetadata usage into here. Seem to not get // TargetPassConfig for subtarget.