Index: lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp +++ lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp @@ -130,6 +130,34 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) { Value *Ptr = I.getPointerOperand(); + + // This is true if loads have amdgpu.uniform on them (as opposed to GEP + // which might be dropped by InstCombine if the offset is 0). + if (I.getMetadata("amdgpu.uniform")) { + if (GetElementPtrInst *GEP = dyn_cast(Ptr)) { + // If GEP is uniform, we don't need to do anything. + if (GEP->getMetadata("amdgpu.uniform")) + return; + + // Set Uniform on the GEP if it only has 1 use. + if (GEP->hasOneUse()) { + setUniformMetadata(GEP); + return; + } + } + + // There is no GEP or the GEP has multiple uses. We'll have to create + // a new one. + Value *Idx = Constant::getIntegerValue( + Type::getInt32Ty(Ptr->getContext()), APInt(64, 0)); + Instruction *GEP = GetElementPtrInst::Create( + Ptr->getType()->getPointerElementType(), Ptr, + ArrayRef(Idx), Twine(""), &I); + I.replaceUsesOfWith(Ptr, GEP); + setUniformMetadata(GEP); + return; + } + if (!DA->isUniform(Ptr)) return; auto isGlobalLoad = [&](LoadInst &Load)->bool { Index: test/CodeGen/AMDGPU/amdgpu.uniform.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/amdgpu.uniform.ll @@ -0,0 +1,66 @@ +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SICI -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SICI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VIGFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VIGFX9 %s + +; GCN-LABEL: {{^}}uniform_load: +; GCN: v_mov_b32_e32 v1, 0 +; GCN-NEXT: v_readfirstlane_b32 s0, v0 +; GCN-NEXT: v_readfirstlane_b32 s1, v1 +; SI-NEXT: s_nop +; GCN-NEXT: s_load_dword s{{[0-9]}}, s[0:1], 0x0 +define amdgpu_vs i32 @uniform_load(i32 %p) #0 { + %p64 = zext i32 %p to i64 + %ptr = inttoptr i64 %p64 to i32 addrspace(2)* + %r = load i32, i32 addrspace(2)* %ptr, !amdgpu.uniform !0 + ret i32 %r +} + +; GCN-LABEL: {{^}}gep_uniform_load: +; GCN: v_mov_b32_e32 v1, 0 +; GCN-NEXT: v_readfirstlane_b32 s0, v0 +; GCN-NEXT: v_readfirstlane_b32 s1, v1 +; SI-NEXT: s_nop +; SICI-NEXT: s_load_dword s{{[0-9]}}, s[0:1], 0x1 +; VIGFX9-NEXT: s_load_dword s{{[0-9]}}, s[0:1], 0x4 +define amdgpu_vs i32 @gep_uniform_load(i32 %p) #0 { + %p64 = zext i32 %p to i64 + %ptr = inttoptr i64 %p64 to i32 addrspace(2)* + %gep = getelementptr i32, i32 addrspace(2)* %ptr, i32 1 + %r = load i32, i32 addrspace(2)* %gep, !amdgpu.uniform !0 + ret i32 %r +} + +; GCN-LABEL: {{^}}uniform_gep_load: +; GCN: v_mov_b32_e32 v1, 0 +; GCN-NEXT: v_readfirstlane_b32 s0, v0 +; GCN-NEXT: v_readfirstlane_b32 s1, v1 +; SI-NEXT: s_nop +; SICI-NEXT: s_load_dword s{{[0-9]}}, s[0:1], 0x1 +; VIGFX9-NEXT: s_load_dword s{{[0-9]}}, s[0:1], 0x4 +define amdgpu_vs i32 @uniform_gep_load(i32 %p) #0 { + %p64 = zext i32 %p to i64 + %ptr = inttoptr i64 %p64 to i32 addrspace(2)* + %gep = getelementptr i32, i32 addrspace(2)* %ptr, i32 1, !amdgpu.uniform !0 + %r = load i32, i32 addrspace(2)* %gep + ret i32 %r +} + +; GCN-LABEL: {{^}}uniform_gep_uniform_load: +; GCN: v_mov_b32_e32 v1, 0 +; GCN-NEXT: v_readfirstlane_b32 s0, v0 +; GCN-NEXT: v_readfirstlane_b32 s1, v1 +; SI-NEXT: s_nop +; SICI-NEXT: s_load_dword s{{[0-9]}}, s[0:1], 0x1 +; VIGFX9-NEXT: s_load_dword s{{[0-9]}}, s[0:1], 0x4 +define amdgpu_vs i32 @uniform_gep_uniform_load(i32 %p) #0 { + %p64 = zext i32 %p to i64 + %ptr = inttoptr i64 %p64 to i32 addrspace(2)* + %gep = getelementptr i32, i32 addrspace(2)* %ptr, i32 1, !amdgpu.uniform !0 + %r = load i32, i32 addrspace(2)* %gep, !amdgpu.uniform !0 + ret i32 %r +} + +attributes #0 = { nounwind } + +!0 = !{}