diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -29,6 +29,7 @@ #include "SIMachineFunctionInfo.h" #include "SIMachineScheduler.h" #include "TargetInfo/AMDGPUTargetInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/GlobalISel/IRTranslator.h" #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/CodeGen/GlobalISel/Legalizer.h" @@ -537,13 +538,23 @@ V->getType()->getPointerAddressSpace() == AMDGPUAS::FLAT_ADDRESS); const auto *Ptr = LD->getPointerOperand(); - if (Ptr->getType()->getPointerAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) - return AMDGPUAS::UNKNOWN_ADDRESS_SPACE; - // For a generic pointer loaded from the constant memory, it could be assumed - // as a global pointer since the constant memory is only populated on the - // host side. As implied by the offload programming model, only global - // pointers could be referenced on the host side. - return AMDGPUAS::GLOBAL_ADDRESS; + + // For a generic pointer loaded from the constant memory, it could be + // assumed as a global pointer since the constant memory is only populated + // on the host side. As implied by the offload programming model, only + // global pointers could be referenced on the host side. + if (Ptr->getType()->getPointerAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) + return AMDGPUAS::GLOBAL_ADDRESS; + + // For a generic pointer loaded from the readonly kernel function pointer + // arguments, it could be assumed as a global pointer since that memory is + // also only prepared on the host side. + const Argument *Arg = dyn_cast_or_null(getUnderlyingObject(Ptr)); + if (Arg && Arg->onlyReadsMemory() && Arg->hasNoAliasAttr() && + Arg->getParent()->getCallingConv() == CallingConv::AMDGPU_KERNEL) + return AMDGPUAS::GLOBAL_ADDRESS; + + return AMDGPUAS::UNKNOWN_ADDRESS_SPACE; } TargetTransformInfo diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/assumed-addrspace.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/assumed-addrspace.ll --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/assumed-addrspace.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/assumed-addrspace.ll @@ -29,3 +29,33 @@ store float %v1, float* %p1 ret void } + +%struct.arg = type { float* } + +; CHECK-LABEL: @generic_ptr_from_readonly_arg0 +; CHECK: addrspacecast i32* %p0 to i32 addrspace(1)* +; CHECK: load i32, i32 addrspace(1)* +; CHECK: store i32 %v0, i32 addrspace(1)* +; CHECK: ret +define amdgpu_kernel void @generic_ptr_from_readonly_arg0(%struct.arg addrspace(1)* noalias readonly %in, float addrspace(1)* nocapture %out) { + %f0 = bitcast %struct.arg addrspace(1)* %in to i32* addrspace(1)* + %p0 = load i32*, i32* addrspace(1)* %f0, align 8 + %v0 = load i32, i32* %p0, align 4 + %q0 = bitcast float addrspace(1)* %out to i32 addrspace(1)* + store i32 %v0, i32 addrspace(1)* %q0, align 4 + ret void +} + +; CHECK-LABEL: @generic_ptr_from_readonly_arg1 +; CHECK-NOT: addrspacecast i32* %p0 to i32 addrspace(1)* +; CHECK-NOT: load i32, i32 addrspace(1)* +; CHECK: store i32 %v0, i32 addrspace(1)* +; CHECK: ret +define amdgpu_kernel void @generic_ptr_from_readonly_arg1(%struct.arg addrspace(1)* readonly %in, float addrspace(1)* nocapture %out) { + %f0 = bitcast %struct.arg addrspace(1)* %in to i32* addrspace(1)* + %p0 = load i32*, i32* addrspace(1)* %f0, align 8 + %v0 = load i32, i32* %p0, align 4 + %q0 = bitcast float addrspace(1)* %out to i32 addrspace(1)* + store i32 %v0, i32 addrspace(1)* %q0, align 4 + ret void +}