Index: llvm/trunk/lib/Target/AMDGPU/SIInsertWaits.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInsertWaits.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIInsertWaits.cpp @@ -231,15 +231,17 @@ return false; // Check if this operand is the value being stored. - // Special case for DS instructions, since the address + // Special case for DS/FLAT instructions, since the address // operand comes before the value operand and it may have // multiple data operands. - if (TII->isDS(MI)) { + if (TII->isDS(MI) || TII->isFLAT(MI)) { MachineOperand *Data = TII->getNamedOperand(MI, AMDGPU::OpName::data); if (Data && Op.isIdenticalTo(*Data)) return true; + } + if (TII->isDS(MI)) { MachineOperand *Data0 = TII->getNamedOperand(MI, AMDGPU::OpName::data0); if (Data0 && Op.isIdenticalTo(*Data0)) return true; Index: llvm/trunk/test/CodeGen/AMDGPU/waitcnt-flat.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/waitcnt-flat.ll +++ llvm/trunk/test/CodeGen/AMDGPU/waitcnt-flat.ll @@ -0,0 +1,16 @@ +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=GCN %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=fiji | FileCheck --check-prefix=GCN %s + +; If flat_store_dword and flat_load_dword use different registers for the data +; operand, this test is not broken. It just means it is no longer testing +; for the original bug. + +; GCN: {{^}}test: +; GCN: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[DATA:v[0-9]+]] +; GCN: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN: flat_load_dword [[DATA]], v[{{[0-9]+:[0-9]+}}] +define void @test(i32 addrspace(1)* %out, i32 %in) { + store volatile i32 0, i32 addrspace(1)* %out + %val = load volatile i32, i32 addrspace(1)* %out + ret void +}