diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp @@ -116,6 +116,7 @@ bool isGlobalAddr(const Value *V) const; bool isLocalAddr(const Value *V) const; + bool isConstantAddr(const Value *V) const; bool isGlobalLoadUsedInBB(const Instruction &) const; }; @@ -153,7 +154,7 @@ if (auto LD = dyn_cast(V)) { auto M = LD->getPointerOperand(); - if (isGlobalAddr(M)) { + if (isGlobalAddr(M) || isLocalAddr(M) || isConstantAddr(M)) { LLVM_DEBUG(dbgs() << " is IA\n"); return true; } @@ -348,6 +349,15 @@ return false; } +bool AMDGPUPerfHint::isConstantAddr(const Value *V) const { + if (auto PT = dyn_cast(V->getType())) { + unsigned As = PT->getAddressSpace(); + return As == AMDGPUAS::CONSTANT_ADDRESS || + As == AMDGPUAS::CONSTANT_ADDRESS_32BIT; + } + return false; +} + bool AMDGPUPerfHint::isLargeStride(const Instruction *Inst) { LLVM_DEBUG(dbgs() << "[isLargeStride] " << *Inst << '\n'); diff --git a/llvm/test/CodeGen/AMDGPU/perfhint.ll b/llvm/test/CodeGen/AMDGPU/perfhint.ll --- a/llvm/test/CodeGen/AMDGPU/perfhint.ll +++ b/llvm/test/CodeGen/AMDGPU/perfhint.ll @@ -144,9 +144,10 @@ ret void } +; FIXME: This test was intended to be WaveLimiterHint : 0 ; GCN-LABEL: {{^}}test_indirect_through_phi: ; GCN: MemoryBound: 0 -; GCN: WaveLimiterHint : 0 +; GCN: WaveLimiterHint : 1 define amdgpu_kernel void @test_indirect_through_phi(float addrspace(1)* %arg) { bb: %load = load float, float addrspace(1)* %arg, align 8 diff --git a/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit2.ll b/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit2.ll --- a/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit2.ll +++ b/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit2.ll @@ -6,8 +6,8 @@ ; SI-MINREG: NumSgprs: {{[1-9]$}} ; SI-MINREG: NumVgprs: {{[1-9]$}} -; SI-MAXOCC: NumSgprs: {{[1-4]?[0-9]$}} -; SI-MAXOCC: NumVgprs: {{[1-4]?[0-9]$}} +; SI-MAXOCC: NumSgprs: {{[0-4][0-9]$}} +; SI-MAXOCC: NumVgprs: {{[0-4][0-9]$}} ; stores may alias loads ; VI: NumSgprs: {{[0-9]$}}