diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -150,6 +150,8 @@ VMEM_NOSAMPLER, // MIMG instructions with a sampler. VMEM_SAMPLER, + // BVH instrauctions + VMEM_BVH }; VmemType getVmemType(const MachineInstr &Inst) { @@ -157,9 +159,10 @@ if (!SIInstrInfo::isMIMG(Inst)) return VMEM_NOSAMPLER; const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Inst.getOpcode()); - return AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode)->Sampler - ? VMEM_SAMPLER - : VMEM_NOSAMPLER; + const AMDGPU::MIMGBaseOpcodeInfo *BaseInfo = + AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); + return BaseInfo->BVH ? VMEM_BVH + : BaseInfo->Sampler ? VMEM_SAMPLER : VMEM_NOSAMPLER; } void addWait(AMDGPU::Waitcnt &Wait, InstCounterType T, unsigned Count) { diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-bvh.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-bvh.mir --- a/llvm/test/CodeGen/AMDGPU/waitcnt-bvh.mir +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-bvh.mir @@ -16,6 +16,7 @@ bb.0: ; GCN-LABEL: name: waitcnt-check-vs-vmem ; GCN: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19 + ; GCN-NEXT: S_WAITCNT 16240 ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource") $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec @@ -37,6 +38,7 @@ bb.0: ; GCN-LABEL: name: waitcnt-check-vs-vmem-reverse ; GCN: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec + ; GCN-NEXT: S_WAITCNT 16240 ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19 $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, implicit $exec $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")