diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -5068,7 +5068,8 @@ } /// Constant used to represent unknown offset or sizes. - static constexpr int64_t Unknown = 1 << 31; + static constexpr int64_t Unassigned = -1; + static constexpr int64_t Unknown = -2; }; /// Call \p CB on all accesses that might interfere with \p OAS and return diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -1226,7 +1226,7 @@ /// Helper struct, will support ranges eventually. struct OffsetInfo { - int64_t Offset = OffsetAndSize::Unknown; + int64_t Offset = OffsetAndSize::Unassigned; bool operator==(const OffsetInfo &OI) const { return Offset == OI.Offset; } }; @@ -1243,6 +1243,7 @@ auto HandlePassthroughUser = [&](Value *Usr, OffsetInfo PtrOI, bool &Follow) { + assert(PtrOI.Offset != OffsetAndSize::Unassigned); OffsetInfo &UsrOI = OffsetInfoMap[Usr]; UsrOI = PtrOI; Follow = true; @@ -1283,11 +1284,15 @@ APInt GEPOffset(DL.getIndexTypeSizeInBits(GEP->getType()), 0); if (PtrOI.Offset == OffsetAndSize::Unknown || !GEP->accumulateConstantOffset(DL, GEPOffset)) { + LLVM_DEBUG(dbgs() << "[AAPointerInfo] GEP offset not constant " + << *GEP << "\n"); UsrOI.Offset = OffsetAndSize::Unknown; Follow = true; return true; } + LLVM_DEBUG(dbgs() << "[AAPointerInfo] GEP offset is constant " << *GEP + << "\n"); UsrOI.Offset = PtrOI.Offset + GEPOffset.getZExtValue(); Follow = true; return true; @@ -1306,15 +1311,22 @@ bool IsFirstPHIUser = !OffsetInfoMap.count(Usr); OffsetInfo &UsrOI = OffsetInfoMap[Usr]; OffsetInfo &PtrOI = OffsetInfoMap[CurPtr]; - // Check if the PHI is invariant (so far). - if (UsrOI == PtrOI) - return true; // Check if the PHI operand has already an unknown offset as we can't // improve on that anymore. if (PtrOI.Offset == OffsetAndSize::Unknown) { + LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI operand offset unknown " + << *CurPtr << " in " << *Usr << "\n"); + Follow = UsrOI.Offset == OffsetAndSize::Unassigned || + UsrOI.Offset != OffsetAndSize::Unknown; UsrOI = PtrOI; - Follow = true; + return true; + } + + // Check if the PHI is invariant (so far). + if (UsrOI == PtrOI) { + assert(PtrOI.Offset != OffsetAndSize::Unassigned); + LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI is invariant (so far)"); return true; } diff --git a/llvm/test/CodeGen/AMDGPU/attributor.ll b/llvm/test/CodeGen/AMDGPU/attributor.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/attributor.ll @@ -0,0 +1,42 @@ +; RUN: llc -o - < %s | FileCheck %s + +target triple = "amdgcn-amd-amdhsa" + +; The call to intrinsic implicitarg_ptr reaches a load through a phi. The +; offsets of the phi cannot be determined, and hence the attirbutor assumes that +; hostcall is in use. + +; CHECK: .value_kind: hidden_hostcall_buffer +; CHECK: .value_kind: hidden_multigrid_sync_arg + +define amdgpu_kernel void @the_kernel(i32 addrspace(1)* %a, i64 %index1, i64 %index2, i1 %cond) { +entry: + %tmp7 = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + br i1 %cond, label %old, label %new + +old: ; preds = %entry + %tmp4 = getelementptr i8, i8 addrspace(4)* %tmp7, i64 %index1 + br label %join + +new: ; preds = %entry + %tmp12 = getelementptr inbounds i8, i8 addrspace(4)* %tmp7, i64 %index2 + br label %join + +join: ; preds = %new, %old + %.in.in.in = phi i8 addrspace(4)* [ %tmp12, %new ], [ %tmp4, %old ] + %.in.in = bitcast i8 addrspace(4)* %.in.in.in to i16 addrspace(4)* + + ;;; THIS USE is where the offset into implicitarg_ptr is unknown + %.in = load i16, i16 addrspace(4)* %.in.in, align 2 + + %idx.ext = sext i16 %.in to i64 + %add.ptr3 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idx.ext + %tmp16 = atomicrmw add i32 addrspace(1)* %add.ptr3, i32 15 syncscope("agent-one-as") monotonic, align 4 + ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x() + +declare align 4 i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() + +declare i32 @llvm.amdgcn.workgroup.id.x()