Index: lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -545,14 +545,15 @@ if (const Argument *A = dyn_cast(V)) return !isArgPassedInSGPR(A); - // Loads from the private address space are divergent, because threads - // can execute the load instruction with the same inputs and get different - // results. + // Loads from the private and flat address spaces are divergent, because + // threads can execute the load instruction with the same inputs and get + // different results. // // All other loads are not divergent, because if threads issue loads with the // same arguments, they will always get the same result. if (const LoadInst *Load = dyn_cast(V)) - return Load->getPointerAddressSpace() == ST->getAMDGPUAS().PRIVATE_ADDRESS; + return Load->getPointerAddressSpace() == ST->getAMDGPUAS().PRIVATE_ADDRESS + || Load->getPointerAddressSpace() == ST->getAMDGPUAS().FLAT_ADDRESS; // Atomics are divergent because they are executed sequentially: when an // atomic operation refers to the same address in each thread, then each Index: test/CodeGen/AMDGPU/divergent-flat.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/divergent-flat.ll @@ -0,0 +1,30 @@ +; RUN: llc -march=amdgcn -mcpu=gfx900 -o - %s | FileCheck %s --check-prefix=ASM +; RUN: llc -march=amdgcn -mcpu=gfx900 -stop-after=structurizecfg -o - %s | FileCheck %s --check-prefix=STRUCTURIZECFG + +; Test that we do not consider loads from flat addrspace to be uniform. + +define amdgpu_kernel void @spam(float* %a) #0 { + %priv = alloca i32, align 4, addrspace(5) + %flat = addrspacecast i32 addrspace(5)* %priv to i32* + %idx = call i32 @llvm.amdgcn.workitem.id.x() + + store i32 %idx, i32* %flat, align 4 + %b = load i32, i32* %flat, align 4 + + %cmp = icmp slt i32 %b, 1 +; ASM: s_mov_b64 exec, s[{{[0-9]+}}:{{[0-9]+}}] +; ASM-NOT: s_cbranch_vccnz +; STRUCTURIZECFG-NOT: structurizecfg.uniform + br i1 %cmp, label %body, label %end + +body: + store float 1.000000e+00, float* %a, align 4 + br label %end + +end: + ret void +} + +declare i32 @llvm.amdgcn.workitem.id.x() + +attributes #0 = { noinline optnone }