diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -332,6 +332,7 @@ bits<4> elements = 0; bits<1> has_sccb = 1; bits<1> sccb_value = 0; + bits<1> IsBufferInv = 0; } class MUBUF_Real : @@ -374,7 +375,8 @@ let mayLoad = 0; let mayStore = 0; - // Set everything to 0. + let IsBufferInv = 1; + // Set everything else to 0. let offen = 0; let idxen = 0; let addr64 = 0; @@ -2606,7 +2608,10 @@ def MUBUFInfoTable : GenericTable { let FilterClass = "MUBUF_Pseudo"; let CppTypeName = "MUBUFInfo"; - let Fields = ["Opcode", "BaseOpcode", "elements", "has_vaddr", "has_srsrc", "has_soffset"]; + let Fields = [ + "Opcode", "BaseOpcode", "elements", "has_vaddr", "has_srsrc", "has_soffset", + "IsBufferInv" + ]; let PrimaryKey = ["Opcode"]; let PrimaryKeyName = "getMUBUFOpcodeHelper"; diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -27,6 +27,7 @@ #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIMachineFunctionInfo.h" +#include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/CodeGen/MachinePostDominators.h" @@ -945,8 +946,10 @@ AMDGPU::Waitcnt Wait; bool Modified = false; - // See if this instruction has a forced S_WAITCNT VM. - // TODO: Handle other cases of NeedsWaitcntVmBefore() + // FIXME: This should have already been handled by the memory legalizer. + // Removing this currently doesn't affect any lit tests, but we need to + // verify that nothing was relying on this. The number of buffer invalidates + // being handled here should not be expanded. if (MI.getOpcode() == AMDGPU::BUFFER_WBINVL1 || MI.getOpcode() == AMDGPU::BUFFER_WBINVL1_SC || MI.getOpcode() == AMDGPU::BUFFER_WBINVL1_VOL || @@ -1317,12 +1320,7 @@ if (FlatASCount > 1) ScoreBrackets->setPendingFlat(); } else if (SIInstrInfo::isVMEM(Inst) && - // TODO: get a better carve out. - Inst.getOpcode() != AMDGPU::BUFFER_WBINVL1 && - Inst.getOpcode() != AMDGPU::BUFFER_WBINVL1_SC && - Inst.getOpcode() != AMDGPU::BUFFER_WBINVL1_VOL && - Inst.getOpcode() != AMDGPU::BUFFER_GL0_INV && - Inst.getOpcode() != AMDGPU::BUFFER_GL1_INV) { + !llvm::AMDGPU::getMUBUFIsBufferInv(Inst.getOpcode())) { if (!ST->hasVscnt()) ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_ACCESS, Inst); else if ((Inst.mayLoad() && !SIInstrInfo::isAtomicNoRet(Inst)) || diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -394,6 +394,9 @@ LLVM_READONLY bool getMUBUFHasSoffset(unsigned Opc); +LLVM_READONLY +bool getMUBUFIsBufferInv(unsigned Opc); + LLVM_READONLY bool getSMEMIsBuffer(unsigned Opc); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -162,6 +162,7 @@ bool has_vaddr; bool has_srsrc; bool has_soffset; + bool IsBufferInv; }; struct MTBUFInfo { @@ -257,6 +258,11 @@ return Info ? Info->has_soffset : false; } +bool getMUBUFIsBufferInv(unsigned Opc) { + const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); + return Info ? Info->IsBufferInv : false; +} + bool getSMEMIsBuffer(unsigned Opc) { const SMInfo *Info = getSMEMOpcodeHelper(Opc); return Info ? Info->IsBuffer : false; diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting.mir --- a/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting.mir +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting.mir @@ -192,6 +192,7 @@ $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr S_WAITCNT 3952 KILL $vgpr0 +... # Combine preexisting waitcnt with wait added to the start of a non-entry function. @@ -206,3 +207,28 @@ S_WAITCNT 0 S_ENDPGM 0 ... + +# Verify that extra waitcnt are not added after buffer invalidate instructions. + +--- +name: test_waitcnt_preexisting_buffer_inv +body: | + bb.0: + ; GFX9-LABEL: name: test_waitcnt_preexisting_buffer_inv + ; GFX9: S_WAITCNT 0 + ; GFX9: $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec + ; GFX9: S_WAITCNT 3952 + ; GFX9: BUFFER_INVL2 implicit $exec + ; GFX9: BUFFER_WBINVL1_VOL implicit $exec + ; GFX9: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9: S_WAITCNT 112 + ; GFX9: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX9: S_ENDPGM 0 + $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec + S_WAITCNT 3952 + BUFFER_INVL2 implicit $exec + BUFFER_WBINVL1_VOL implicit $exec + $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr + FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr + S_ENDPGM 0 +...