diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -12,6 +12,7 @@ let LGKM_CNT = 1; let DS = 1; + let GWS = 0; let Size = 8; let UseNamedOperandTable = 1; @@ -61,6 +62,7 @@ let UseNamedOperandTable = 1; // copy relevant pseudo op flags + let GWS = ps.GWS; let SubtargetPredicate = ps.SubtargetPredicate; let OtherPredicates = ps.OtherPredicates; let SchedRW = ps.SchedRW; @@ -376,6 +378,7 @@ class DS_GWS : DS_Pseudo { + let GWS = 1; let has_vdst = 0; let has_addr = 0; diff --git a/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.h b/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.h --- a/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.h +++ b/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.h @@ -68,6 +68,8 @@ bool hasModifiersSet(const std::unique_ptr &Inst, unsigned OpName) const; /// Helper function used in generateWaitCntInfo() + bool isGWS(uint16_t Opcode) const; + /// Helper function used in generateWaitCntInfo() bool isAlwaysGDS(uint16_t Opcode) const; /// Helper function used in generateWaitCntInfo() bool isVMEM(const MCInstrDesc &MCID); diff --git a/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp b/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp --- a/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp +++ b/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp @@ -317,13 +317,15 @@ return true; } +// taken from SIInstrInfo::isGWS() +bool AMDGPUCustomBehaviour::isGWS(uint16_t Opcode) const { + const MCInstrDesc &MCID = MCII.get(Opcode); + return MCID.TSFlags & SIInstrFlags::GWS; +} + // taken from SIInstrInfo::isAlwaysGDS() bool AMDGPUCustomBehaviour::isAlwaysGDS(uint16_t Opcode) const { - return Opcode == AMDGPU::DS_ORDERED_COUNT || Opcode == AMDGPU::DS_GWS_INIT || - Opcode == AMDGPU::DS_GWS_SEMA_V || Opcode == AMDGPU::DS_GWS_SEMA_BR || - Opcode == AMDGPU::DS_GWS_SEMA_P || - Opcode == AMDGPU::DS_GWS_SEMA_RELEASE_ALL || - Opcode == AMDGPU::DS_GWS_BARRIER; + return Opcode == AMDGPU::DS_ORDERED_COUNT || isGWS(Opcode); } } // namespace mca diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -161,6 +161,9 @@ // Is never uniform. IsNeverUniform = UINT64_C(1) << 61, + + // ds_gws_* instructions. + GWS = UINT64_C(1) << 62, }; // v_cmp_class_* etc. use a 10-bit mask for what operation is checked. diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -588,12 +588,7 @@ AMDGPU::OpName::data1), CurrScore); } - } else if (SIInstrInfo::isAtomicRet(Inst) && - Inst.getOpcode() != AMDGPU::DS_GWS_INIT && - Inst.getOpcode() != AMDGPU::DS_GWS_SEMA_V && - Inst.getOpcode() != AMDGPU::DS_GWS_SEMA_BR && - Inst.getOpcode() != AMDGPU::DS_GWS_SEMA_P && - Inst.getOpcode() != AMDGPU::DS_GWS_BARRIER && + } else if (SIInstrInfo::isAtomicRet(Inst) && !SIInstrInfo::isGWS(Inst) && Inst.getOpcode() != AMDGPU::DS_APPEND && Inst.getOpcode() != AMDGPU::DS_CONSUME && Inst.getOpcode() != AMDGPU::DS_ORDERED_COUNT) { diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td --- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td +++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td @@ -156,6 +156,9 @@ // This bit indicates that the instruction is never-uniform/divergent field bit IsNeverUniform = 0; + // ds_gws_* instructions. + field bit GWS = 0; + // These need to be kept in sync with the enum in SIInstrFlags. let TSFlags{0} = SALU; let TSFlags{1} = VALU; @@ -239,6 +242,8 @@ let TSFlags{61} = IsNeverUniform; + let TSFlags{62} = GWS; + let SchedRW = [Write32Bit]; let AsmVariantName = AMDGPUAsmVariants.Default; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -531,6 +531,14 @@ return get(Opcode).TSFlags & SIInstrFlags::DS; } + static bool isGWS(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::GWS; + } + + bool isGWS(uint16_t Opcode) const { + return get(Opcode).TSFlags & SIInstrFlags::GWS; + } + bool isAlwaysGDS(uint16_t Opcode) const; static bool isMIMG(const MachineInstr &MI) { diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3732,13 +3732,7 @@ } bool SIInstrInfo::isAlwaysGDS(uint16_t Opcode) const { - return Opcode == AMDGPU::DS_ORDERED_COUNT || - Opcode == AMDGPU::DS_GWS_INIT || - Opcode == AMDGPU::DS_GWS_SEMA_V || - Opcode == AMDGPU::DS_GWS_SEMA_BR || - Opcode == AMDGPU::DS_GWS_SEMA_P || - Opcode == AMDGPU::DS_GWS_SEMA_RELEASE_ALL || - Opcode == AMDGPU::DS_GWS_BARRIER; + return Opcode == AMDGPU::DS_ORDERED_COUNT || isGWS(Opcode); } bool SIInstrInfo::modifiesModeRegister(const MachineInstr &MI) { diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -1140,6 +1140,7 @@ bool hasA16(const MCSubtargetInfo &STI); bool hasG16(const MCSubtargetInfo &STI); bool hasPackedD16(const MCSubtargetInfo &STI); +bool hasGDS(const MCSubtargetInfo &STI); unsigned getNSAMaxSize(const MCSubtargetInfo &STI); bool isSI(const MCSubtargetInfo &STI); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -2000,6 +2000,10 @@ !isSI(STI); } +bool hasGDS(const MCSubtargetInfo &STI) { + return STI.hasFeature(AMDGPU::FeatureGDS); +} + unsigned getNSAMaxSize(const MCSubtargetInfo &STI) { auto Version = getIsaVersion(STI.getCPU()); if (Version.Major == 10)