diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp @@ -224,6 +224,9 @@ LastAccess = MemAccessInfo(); unsigned UsedGlobalLoadsInBB = 0; for (auto &I : B) { + // skip bitcast for cost calculation + if (isa(I)) + continue; if (const Type *Ty = getMemoryInstrPtrAndType(&I).second) { unsigned Size = divideCeil(Ty->getPrimitiveSizeInBits(), 32); // TODO: Check if the global load and its user are close to each other diff --git a/llvm/test/CodeGen/AMDGPU/perfhint-instr-cost.ll b/llvm/test/CodeGen/AMDGPU/perfhint-instr-cost.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/perfhint-instr-cost.ll @@ -0,0 +1,12 @@ +; RUN: llc -march=amdgcn -debug-only=amdgpu-perf-hint < %s 2>&1 | FileCheck %s +define i32 @perfHintInstrCost(ptr addrspace(4) %p1, ptr addrspace(4) %p2, ptr addrspace(4) %p3) #0 { +; CHECK-LABEL: perfHintInstrCost +; CHECK: MemInst cost: 3 +; CHECK: TotalInst cost: 6 + %x = load volatile i32, ptr addrspace(4) %p1, align 4 + %y = load volatile i32, ptr addrspace(4) %p2, align 4 + %z = load volatile i32, ptr addrspace(4) %p3, align 4 + %sum = add i32 %x, %y + %sum2 = add i32 %sum, %z + ret i32 %sum2 +} diff --git a/llvm/test/CodeGen/AMDGPU/perfhint.ll b/llvm/test/CodeGen/AMDGPU/perfhint.ll --- a/llvm/test/CodeGen/AMDGPU/perfhint.ll +++ b/llvm/test/CodeGen/AMDGPU/perfhint.ll @@ -1,4 +1,5 @@ ; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -opaque-pointers=1 < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}test_membound: ; GCN: MemoryBound: 1 @@ -21,6 +22,7 @@ ; GCN-LABEL: {{^}}test_membound_1: ; GCN: MemoryBound: 1 +; GCN: WaveLimiterHint : 1 define amdgpu_kernel void @test_membound_1(<2 x double> addrspace(1)* nocapture readonly %ptr.0, <2 x double> addrspace(1)* nocapture %ptr.1, <2 x double> %arg.0, i32 %arg.1, <4 x double> %arg.2) { @@ -112,7 +114,7 @@ } ; GCN-LABEL: {{^}}test_indirect: -; GCN: MemoryBound: 0 +; GCN: MemoryBound: 1 ; GCN: WaveLimiterHint : 1 define amdgpu_kernel void @test_indirect(i32 addrspace(1)* nocapture %arg) { bb: