Index: lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -545,14 +545,16 @@ if (const Argument *A = dyn_cast(V)) return !isArgPassedInSGPR(A); - // Loads from the private address space are divergent, because threads - // can execute the load instruction with the same inputs and get different - // results. + // Loads from the private and flat address spaces are divergent, because + // threads can execute the load instruction with the same inputs and get + // different results. // // All other loads are not divergent, because if threads issue loads with the // same arguments, they will always get the same result. if (const LoadInst *Load = dyn_cast(V)) - return Load->getPointerAddressSpace() == ST->getAMDGPUAS().PRIVATE_ADDRESS; + return Load->getPointerAddressSpace() == + ST->getAMDGPUAS().PRIVATE_ADDRESS || + Load->getPointerAddressSpace() == ST->getAMDGPUAS().FLAT_ADDRESS; // Atomics are divergent because they are executed sequentially: when an // atomic operation refers to the same address in each thread, then each Index: test/Analysis/DivergenceAnalysis/AMDGPU/loads.ll =================================================================== --- /dev/null +++ test/Analysis/DivergenceAnalysis/AMDGPU/loads.ll @@ -0,0 +1,15 @@ +; RUN: opt -mtriple=amdgcn-- -analyze -divergence %s | FileCheck %s + +; Test that we consider loads from flat and private addrspaces to be divergent. + +; CHECK: DIVERGENT: %val = load i32, i32* %flat, align 4 +define amdgpu_kernel void @flat_load(i32* %flat) { + %val = load i32, i32* %flat, align 4 + ret void +} + +; CHECK: DIVERGENT: %val = load i32, i32 addrspace(5)* %priv, align 4 +define amdgpu_kernel void @private_load(i32 addrspace(5)* %priv) { + %val = load i32, i32 addrspace(5)* %priv, align 4 + ret void +}