Index: llvm/lib/Target/AMDGPU/BUFInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/BUFInstructions.td +++ llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -696,6 +696,7 @@ let PseudoInstr = opName # "_" # getAddrName.ret; let glc_value = 0; let dlc_value = 0; + let IsAtomicNoRtn = 1; let AsmMatchConverter = "cvtMubufAtomic"; } @@ -714,6 +715,7 @@ let PseudoInstr = opName # "_rtn_" # getAddrName.ret; let glc_value = 1; let dlc_value = 0; + let IsAtomicRtn = 1; let Constraints = "$vdata = $vdata_in"; let DisableEncoding = "$vdata_in"; let AsmMatchConverter = "cvtMubufAtomicReturn"; Index: llvm/lib/Target/AMDGPU/DSInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/DSInstructions.td +++ llvm/lib/Target/AMDGPU/DSInstructions.td @@ -102,6 +102,7 @@ let has_data1 = 0; let has_vdst = 0; + let IsAtomicNoRtn = 1; } multiclass DS_1A1D_NORET_mc { @@ -121,6 +122,7 @@ " $addr, $data0, $data1$offset$gds"> { let has_vdst = 0; + let IsAtomicNoRtn = 1; } multiclass DS_1A2D_NORET_mc { @@ -161,6 +163,7 @@ let hasPostISelHook = 1; let has_data1 = 0; + let IsAtomicRtn = 1; } multiclass DS_1A1D_RET_mc { let hasPostISelHook = 1; + let IsAtomicRtn = 1; } multiclass DS_1A2D_RET_mc { // 64-bit atomics - def "" : MIMGBaseOpcode { - let Atomic = 1; - let AtomicX2 = isCmpSwap; - } + let IsAtomicRtn = 1 in { + def "" : MIMGBaseOpcode { + let Atomic = 1; + let AtomicX2 = isCmpSwap; + } - let BaseOpcode = !cast(NAME) in { - // _V* variants have different dst size, but the size is encoded implicitly, - // using dmask and tfe. Only 32-bit variant is registered with disassembler. - // Other variants are reconstructed by disassembler using dmask and tfe. - let VDataDwords = !if(isCmpSwap, 2, 1) in - defm _V1 : MIMG_Atomic_Addr_Helper_m ; - let VDataDwords = !if(isCmpSwap, 4, 2) in - defm _V2 : MIMG_Atomic_Addr_Helper_m ; - } + let BaseOpcode = !cast(NAME) in { + // _V* variants have different dst size, but the size is encoded implicitly, + // using dmask and tfe. Only 32-bit variant is registered with disassembler. + // Other variants are reconstructed by disassembler using dmask and tfe. + let VDataDwords = !if(isCmpSwap, 2, 1) in + defm _V1 : MIMG_Atomic_Addr_Helper_m ; + let VDataDwords = !if(isCmpSwap, 4, 2) in + defm _V2 : MIMG_Atomic_Addr_Helper_m ; + } + } // End IsAtomicRtn = 1 } class MIMG_Sampler_Helper isMIMG(Inst)) { if (Inst.mayStore()) { setExpScore(&Inst, TII, TRI, MRI, 0, CurrScore); - } else if (AMDGPU::getAtomicNoRetOp(Inst.getOpcode()) != -1) { + } else if (SIInstrInfo::isAtomicRtn(Inst)) { setExpScore( &Inst, TII, TRI, MRI, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data), @@ -582,7 +582,7 @@ } else if (TII->isMUBUF(Inst)) { if (Inst.mayStore()) { setExpScore(&Inst, TII, TRI, MRI, 0, CurrScore); - } else if (AMDGPU::getAtomicNoRetOp(Inst.getOpcode()) != -1) { + } else if (SIInstrInfo::isAtomicRtn(Inst)) { setExpScore( &Inst, TII, TRI, MRI, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data), @@ -1246,8 +1246,7 @@ ++FlatASCount; if (!ST->hasVscnt()) ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_ACCESS, Inst); - else if (Inst.mayLoad() && - AMDGPU::getAtomicRetOp(Inst.getOpcode()) == -1) + else if (Inst.mayLoad() && !SIInstrInfo::isAtomicNoRtn(Inst)) ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_READ_ACCESS, Inst); else ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_WRITE_ACCESS, Inst); @@ -1275,8 +1274,7 @@ Inst.getOpcode() != AMDGPU::BUFFER_GL1_INV) { if (!ST->hasVscnt()) ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_ACCESS, Inst); - else if ((Inst.mayLoad() && - AMDGPU::getAtomicRetOp(Inst.getOpcode()) == -1) || + else if ((Inst.mayLoad() && !SIInstrInfo::isAtomicNoRtn(Inst)) || /* IMAGE_GET_RESINFO / IMAGE_GET_LOD */ (TII->isMIMG(Inst) && !Inst.mayLoad() && !Inst.mayStore())) ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_READ_ACCESS, Inst); @@ -1284,7 +1282,7 @@ ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_WRITE_ACCESS, Inst); if (ST->vmemWriteNeedsExpWaitcnt() && - (Inst.mayStore() || AMDGPU::getAtomicNoRetOp(Inst.getOpcode()) != -1)) { + (Inst.mayStore() || SIInstrInfo::isAtomicRtn(Inst))) { ScoreBrackets->updateByEvent(TII, TRI, MRI, VMW_GPR_LOCK, Inst); } } else if (TII->isSMRD(Inst)) { Index: llvm/lib/Target/AMDGPU/SIInstrFormats.td =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrFormats.td +++ llvm/lib/Target/AMDGPU/SIInstrFormats.td @@ -135,6 +135,12 @@ // Must be 0 for non-FLAT instructions. field bit IsFlatScratch = 0; + // Atomic without a return. + field bit IsAtomicNoRtn = 0; + + // Atomic with return. + field bit IsAtomicRtn = 0; + // These need to be kept in sync with the enum in SIInstrFlags. let TSFlags{0} = SALU; let TSFlags{1} = VALU; @@ -205,6 +211,10 @@ let TSFlags{56} = IsFlatScratch; + let TSFlags{57} = IsAtomicNoRtn; + + let TSFlags{58} = IsAtomicRtn; + let SchedRW = [Write32Bit]; let AsmVariantName = AMDGPUAsmVariants.Default; Index: llvm/lib/Target/AMDGPU/SIInstrInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -538,6 +538,32 @@ return get(Opcode).TSFlags & SIInstrFlags::EXP; } + static bool isAtomicNoRtn(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicNoRtn; + } + + bool isAtomicNoRtn(uint16_t Opcode) const { + return get(Opcode).TSFlags & SIInstrFlags::IsAtomicNoRtn; + } + + static bool isAtomicRtn(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicRtn; + } + + bool isAtomicRtn(uint16_t Opcode) const { + return get(Opcode).TSFlags & SIInstrFlags::IsAtomicRtn; + } + + static bool isAtomic(const MachineInstr &MI) { + return MI.getDesc().TSFlags & (SIInstrFlags::IsAtomicRtn | + SIInstrFlags::IsAtomicNoRtn); + } + + bool isAtomic(uint16_t Opcode) const { + return get(Opcode).TSFlags & (SIInstrFlags::IsAtomicRtn | + SIInstrFlags::IsAtomicNoRtn); + } + static bool isWQM(const MachineInstr &MI) { return MI.getDesc().TSFlags & SIInstrFlags::WQM; } Index: llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -455,7 +455,7 @@ /// Return true iff instruction \p MI is a atomic instruction that /// returns a result. bool isAtomicRet(const MachineInstr &MI) const { - return AMDGPU::getAtomicNoRetOp(MI.getOpcode()) != -1; + return SIInstrInfo::isAtomicRtn(MI); } /// Removes all processed atomic pseudo instructions from the current Index: llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll +++ llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll @@ -595,8 +595,7 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; @@ -722,8 +721,7 @@ ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; @@ -790,8 +788,7 @@ ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; @@ -856,8 +853,7 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: ds_write_b32 v0, v1 ; GFX10-WGP-NEXT: s_endpgm @@ -928,8 +924,7 @@ ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: ds_write_b32 v0, v1 ; GFX10-WGP-NEXT: s_endpgm @@ -1002,8 +997,7 @@ ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: ds_write_b32 v0, v1 ; GFX10-WGP-NEXT: s_endpgm Index: llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll +++ llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll @@ -595,8 +595,7 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; @@ -722,8 +721,7 @@ ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; @@ -790,8 +788,7 @@ ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; @@ -856,8 +853,7 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: ds_write_b32 v0, v1 ; GFX10-WGP-NEXT: s_endpgm @@ -928,8 +924,7 @@ ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: ds_write_b32 v0, v1 ; GFX10-WGP-NEXT: s_endpgm @@ -1002,8 +997,7 @@ ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: ds_write_b32 v0, v1 ; GFX10-WGP-NEXT: s_endpgm Index: llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll +++ llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll @@ -595,8 +595,7 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; @@ -722,8 +721,7 @@ ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; @@ -790,8 +788,7 @@ ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v0, v0, v1 -; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: s_endpgm ; @@ -856,8 +853,7 @@ ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: ds_write_b32 v0, v1 ; GFX10-WGP-NEXT: s_endpgm @@ -928,8 +924,7 @@ ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: ds_write_b32 v0, v1 ; GFX10-WGP-NEXT: s_endpgm @@ -1002,8 +997,7 @@ ; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-WGP-NEXT: ds_wrxchg_rtn_b32 v1, v0, v1 -; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-WGP-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-WGP-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX10-WGP-NEXT: buffer_gl0_inv ; GFX10-WGP-NEXT: ds_write_b32 v0, v1 ; GFX10-WGP-NEXT: s_endpgm