diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp @@ -85,6 +85,44 @@ if (Result == NoAlias) return Result; + // In general, FLAT (generic) pointers could be aliased to LOCAL or PRIVATE + // pointers. However, as LOCAL or PRIVATE pointers point to local objects, in + // certain cases, it's still viable to check whether a FLAT pointer won't + // alias to a LOCAL or PRIVATE pointer. + MemoryLocation A = LocA; + MemoryLocation B = LocB; + // Canonicalize the location order to simplify the following alias check. + if (asA != AMDGPUAS::FLAT_ADDRESS) { + std::swap(asA, asB); + std::swap(A, B); + } + if (asA == AMDGPUAS::FLAT_ADDRESS && + (asB == AMDGPUAS::LOCAL_ADDRESS || asB == AMDGPUAS::PRIVATE_ADDRESS)) { + const auto *ObjA = + getUnderlyingObject(A.Ptr->stripPointerCastsAndInvariantGroups()); + if (const LoadInst *LI = dyn_cast(ObjA)) { + // If a generic pointer is loaded from the constant address space, it + // could only be a GLOBAL or CONSTANT one as that address space is soley + // prepared on the host side, where only GLOBAL or CONSTANT variables are + // visible. Note that this even holds for regular functions. + if (LI->getPointerAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) + return NoAlias; + } else if (const Argument *Arg = dyn_cast(ObjA)) { + const Function *F = Arg->getParent(); + switch (F->getCallingConv()) { + case CallingConv::AMDGPU_KERNEL: + // In the kernel function, kernel arguments won't alias to (local) + // variables in shared or private address space. + return NoAlias; + default: + // TODO: In the regular function, if that local variable in the + // location B is not captured, that argument pointer won't alias to it + // as well. + break; + } + } + } + // Forward the query to the next alias analysis. return AAResultBase::alias(LocA, LocB, AAQI); } diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll --- a/llvm/test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll @@ -1,5 +1,5 @@ -; RUN: opt -mtriple=amdgcn-- -aa-eval -amdgpu-aa -amdgpu-aa-wrapper -disable-basic-aa -print-all-alias-modref-info -disable-output < %s 2>&1 | FileCheck %s -; RUN: opt -mtriple=r600-- -aa-eval -amdgpu-aa -amdgpu-aa-wrapper -disable-basic-aa -print-all-alias-modref-info -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -mtriple=amdgcn-- -data-layout=A5 -aa-eval -amdgpu-aa -amdgpu-aa-wrapper -disable-basic-aa -print-all-alias-modref-info -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -mtriple=r600-- -data-layout=A5 -aa-eval -amdgpu-aa -amdgpu-aa-wrapper -disable-basic-aa -print-all-alias-modref-info -disable-output < %s 2>&1 | FileCheck %s ; CHECK: NoAlias: i8 addrspace(1)* %p1, i8 addrspace(5)* %p @@ -125,3 +125,61 @@ define void @test_7_7(i8 addrspace(7)* %p, i8 addrspace(7)* %p1) { ret void } + +@cst = internal addrspace(4) global i8* undef, align 4 + +; CHECK-LABEL: Function: test_8_0 +; CHECK: NoAlias: i8 addrspace(3)* %p, i8* %p1 +; CHECK: NoAlias: i8 addrspace(3)* %p, i8* addrspace(4)* @cst +; CHECK: MayAlias: i8* %p1, i8* addrspace(4)* @cst +define void @test_8_0(i8 addrspace(3)* %p) { + %p1 = load i8*, i8* addrspace(4)* @cst + ret void +} + +; CHECK-LABEL: Function: test_8_1 +; CHECK: NoAlias: i8 addrspace(5)* %p, i8* %p1 +; CHECK: NoAlias: i8 addrspace(5)* %p, i8* addrspace(4)* @cst +; CHECK: MayAlias: i8* %p1, i8* addrspace(4)* @cst +define void @test_8_1(i8 addrspace(5)* %p) { + %p1 = load i8*, i8* addrspace(4)* @cst + ret void +} + +; CHECK-LABEL: Function: test_8_2 +; CHECK: NoAlias: i8 addrspace(5)* %p1, i8* %p +define amdgpu_kernel void @test_8_2(i8* %p) { + %p1 = alloca i8, align 1, addrspace(5) + ret void +} + +; CHECK-LABEL: Function: test_8_3 +; CHECK: MayAlias: i8 addrspace(5)* %p1, i8* %p +; TODO: So far, %p1 may still alias to %p. As it's not captured at all, it +; should be NoAlias. +define void @test_8_3(i8* %p) { + %p1 = alloca i8, align 1, addrspace(5) + ret void +} + +@shm = internal addrspace(3) global i8 undef, align 4 + +; CHECK-LABEL: Function: test_8_4 +; CHECK: NoAlias: i8 addrspace(3)* %p1, i8* %p +; CHECK: NoAlias: i8 addrspace(3)* @shm, i8* %p +; CHECK: MayAlias: i8 addrspace(3)* %p1, i8 addrspace(3)* @shm +define amdgpu_kernel void @test_8_4(i8* %p) { + %p1 = getelementptr i8, i8 addrspace(3)* @shm, i32 0 + ret void +} + +; CHECK-LABEL: Function: test_8_5 +; CHECK: MayAlias: i8 addrspace(3)* %p1, i8* %p +; CHECK: MayAlias: i8 addrspace(3)* @shm, i8* %p +; CHECK: MayAlias: i8 addrspace(3)* %p1, i8 addrspace(3)* @shm +; TODO: So far, %p1 may still alias to %p. As it's not captured at all, it +; should be NoAlias. +define void @test_8_5(i8* %p) { + %p1 = getelementptr i8, i8 addrspace(3)* @shm, i32 0 + ret void +}