This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Skip generating cache invalidating instructions on AMDPAL
ClosedPublic

Authored by piotr on Apr 24 2020, 1:33 AM.

Download Raw Diff

Details

Reviewers

nhaehnle
t-tye

Commits

rG7631af3af279: [AMDGPU] Skip generating cache invalidating instructions on AMDPAL

Summary

Frontend guarantees that coherent accesses have
corresponding cache policy bits set (glc, dlc).
Therefore there is no need for extra instructions
that invalidate cache.

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

piotr created this revision.Apr 24 2020, 1:33 AM

Herald added a project: Restricted Project. · View Herald TranscriptApr 24 2020, 1:33 AM

Herald added subscribers: llvm-commits, kerbowa, hiraditya and 9 others. · View Herald Transcript

piotr added a reviewer: nhaehnle.Apr 24 2020, 1:37 AM

Harbormaster failed remote builds in B54556: Diff 259831!Apr 24 2020, 2:40 AM

@t-tye for awareness.

LGTM -- this is a reasonable step to take for us given that the Vulkan memory model has an explicit distinction between Private and NonPrivate memory accesses which we cannot yet model explicitly in LLVM.

This revision is now accepted and ready to land.Apr 24 2020, 4:12 AM

Closed by commit rG7631af3af279: [AMDGPU] Skip generating cache invalidating instructions on AMDPAL (authored by piotr). · Explain WhyApr 24 2020, 5:22 AM

This revision was automatically updated to reflect the committed changes.

@nhaehnle my thinking was that the Vulkan memory model could be semantically represented in the LLVM IR by adding new memory orderings to make the distinction of the effective two sets of memory that the Vulkam memory model introduces. There needs to be memory orderings that only relates to the subset that Vulkan is requiring to be coherent. However, I have not followed the Vulkan memory model specification recently so not sure if it is still the same as when I was looking at it. What are your thoughts?

piotr mentioned this in D84448: [AMDGPU] Make generating cache invalidating instructions optional.Jul 23 2020, 10:28 AM

piotr mentioned this in rG590dd73c6ebd: [AMDGPU] Make generating cache invalidating instructions optional.Jul 27 2020, 12:53 AM

Revision Contents

Path

Size

llvm/

lib/

Target/

AMDGPU/

SIMemoryLegalizer.cpp

13 lines

test/

CodeGen/

AMDGPU/

memory-legalizer-amdpal.ll

80 lines

Diff 259868

llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp

Show First 20 Lines • Show All 248 Lines • ▼ Show 20 Lines
class SICacheControl {		class SICacheControl {
protected:		protected:

/// Instruction info.		/// Instruction info.
const SIInstrInfo *TII = nullptr;		const SIInstrInfo *TII = nullptr;

IsaVersion IV;		IsaVersion IV;

		/// Whether to insert cache invalidation instructions.
		bool InsertCacheInv;

SICacheControl(const GCNSubtarget &ST);		SICacheControl(const GCNSubtarget &ST);

public:		public:

/// Create a cache control for the subtarget \p ST.		/// Create a cache control for the subtarget \p ST.
static std::unique_ptr<SICacheControl> create(const GCNSubtarget &ST);		static std::unique_ptr<SICacheControl> create(const GCNSubtarget &ST);

/// Update \p MI memory load instruction to bypass any caches up to		/// Update \p MI memory load instruction to bypass any caches up to
▲ Show 20 Lines • Show All 380 Lines • ▼ Show 20 Lines	if (MI->getNumMemOperands() == 0)
return SIMemOpInfo();		return SIMemOpInfo();

return constructFromMIWithMMO(MI);		return constructFromMIWithMMO(MI);
}		}

SICacheControl::SICacheControl(const GCNSubtarget &ST) {		SICacheControl::SICacheControl(const GCNSubtarget &ST) {
TII = ST.getInstrInfo();		TII = ST.getInstrInfo();
IV = getIsaVersion(ST.getCPU());		IV = getIsaVersion(ST.getCPU());
		InsertCacheInv = !ST.isAmdPalOS();
}		}

/* static */		/* static */
std::unique_ptr<SICacheControl> SICacheControl::create(const GCNSubtarget &ST) {		std::unique_ptr<SICacheControl> SICacheControl::create(const GCNSubtarget &ST) {
GCNSubtarget::Generation Generation = ST.getGeneration();		GCNSubtarget::Generation Generation = ST.getGeneration();
if (Generation <= AMDGPUSubtarget::SOUTHERN_ISLANDS)		if (Generation <= AMDGPUSubtarget::SOUTHERN_ISLANDS)
return std::make_unique<SIGfx6CacheControl>(ST);		return std::make_unique<SIGfx6CacheControl>(ST);
if (Generation < AMDGPUSubtarget::GFX10)		if (Generation < AMDGPUSubtarget::GFX10)
▲ Show 20 Lines • Show All 48 Lines • ▼ Show 20 Lines	bool SIGfx6CacheControl::enableNonTemporal(

return Changed;		return Changed;
}		}

bool SIGfx6CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,		bool SIGfx6CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,		SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace,		SIAtomicAddrSpace AddrSpace,
Position Pos) const {		Position Pos) const {
		if (!InsertCacheInv)
		return false;

bool Changed = false;		bool Changed = false;

MachineBasicBlock &MBB = *MI->getParent();		MachineBasicBlock &MBB = *MI->getParent();
DebugLoc DL = MI->getDebugLoc();		DebugLoc DL = MI->getDebugLoc();

if (Pos == Position::AFTER)		if (Pos == Position::AFTER)
++MI;		++MI;

▲ Show 20 Lines • Show All 122 Lines • ▼ Show 20 Lines	bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,

return Changed;		return Changed;
}		}

bool SIGfx7CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,		bool SIGfx7CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,		SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace,		SIAtomicAddrSpace AddrSpace,
Position Pos) const {		Position Pos) const {
		if (!InsertCacheInv)
		return false;

bool Changed = false;		bool Changed = false;

MachineBasicBlock &MBB = *MI->getParent();		MachineBasicBlock &MBB = *MI->getParent();
DebugLoc DL = MI->getDebugLoc();		DebugLoc DL = MI->getDebugLoc();

const GCNSubtarget &STM = MBB.getParent()->getSubtarget<GCNSubtarget>();		const GCNSubtarget &STM = MBB.getParent()->getSubtarget<GCNSubtarget>();

const unsigned Flush = STM.isAmdPalOS() \|\| STM.isMesa3DOS()		const unsigned Flush = STM.isAmdPalOS() \|\| STM.isMesa3DOS()
▲ Show 20 Lines • Show All 86 Lines • ▼ Show 20 Lines	bool SIGfx10CacheControl::enableNonTemporal(

return Changed;		return Changed;
}		}

bool SIGfx10CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,		bool SIGfx10CacheControl::insertCacheInvalidate(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,		SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace,		SIAtomicAddrSpace AddrSpace,
Position Pos) const {		Position Pos) const {
		if (!InsertCacheInv)
		return false;

bool Changed = false;		bool Changed = false;

MachineBasicBlock &MBB = *MI->getParent();		MachineBasicBlock &MBB = *MI->getParent();
DebugLoc DL = MI->getDebugLoc();		DebugLoc DL = MI->getDebugLoc();

if (Pos == Position::AFTER)		if (Pos == Position::AFTER)
++MI;		++MI;

▲ Show 20 Lines • Show All 368 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/memory-legalizer-amdpal.ll

; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx600 -verify-machineinstrs < %s \| FileCheck -check-prefixes=FUNC,GCN %s		; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx600 -verify-machineinstrs < %s \| FileCheck -check-prefixes=FUNC,GCN,GCN9 %s
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx700 -verify-machineinstrs < %s \| FileCheck -check-prefixes=FUNC,GCN %s		; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx700 -verify-machineinstrs < %s \| FileCheck -check-prefixes=FUNC,GCN,GCN9 %s
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx800 -verify-machineinstrs < %s \| FileCheck -check-prefixes=FUNC,GCN %s		; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx803 -verify-machineinstrs < %s \| FileCheck -check-prefixes=FUNC,GCN,GCN9 %s
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s \| FileCheck -check-prefixes=FUNC,GCN %s		; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s \| FileCheck -check-prefixes=FUNC,GCN,GCN9 %s
		; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -verify-machineinstrs < %s \| FileCheck -check-prefixes=FUNC,GCN,GCN10 %s

; FUNC-LABEL: {{^}}system_acquire:		; FUNC-LABEL: {{^}}system_acquire:
; GCN: %bb.0		; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE		; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}		; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: buffer_wbinvl1{{$}}		; GCN10: s_waitcnt_vscnt null, 0x0
		; GCN-NOT: buffer_wbinvl1{{$}}
; GCN: s_endpgm		; GCN: s_endpgm
define amdgpu_kernel void @system_acquire() {		define amdgpu_kernel void @system_acquire() {
entry:		entry:
fence acquire		fence acquire
ret void		ret void
}		}

; FUNC-LABEL: {{^}}system_release:		; FUNC-LABEL: {{^}}system_release:
; GCN: %bb.0		; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE		; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}		; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
		; GCN10: s_waitcnt_vscnt null, 0x0
; GCN: s_endpgm		; GCN: s_endpgm
define amdgpu_kernel void @system_release() {		define amdgpu_kernel void @system_release() {
entry:		entry:
fence release		fence release
ret void		ret void
}		}

; FUNC-LABEL: {{^}}system_acq_rel:		; FUNC-LABEL: {{^}}system_acq_rel:
; GCN: %bb.0		; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE		; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}		; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}}		; GCN10: s_waitcnt_vscnt null, 0x0
		; GCN-NOT: buffer_wbinvl1{{$}}
; GCN: s_endpgm		; GCN: s_endpgm
define amdgpu_kernel void @system_acq_rel() {		define amdgpu_kernel void @system_acq_rel() {
entry:		entry:
fence acq_rel		fence acq_rel
ret void		ret void
}		}

; FUNC-LABEL: {{^}}system_seq_cst:		; FUNC-LABEL: {{^}}system_seq_cst:
; GCN: %bb.0		; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE		; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}		; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}}		; GCN10: s_waitcnt_vscnt null, 0x0
		; GCN-NOT: buffer_wbinvl1{{$}}
; GCN: s_endpgm		; GCN: s_endpgm
define amdgpu_kernel void @system_seq_cst() {		define amdgpu_kernel void @system_seq_cst() {
entry:		entry:
fence seq_cst		fence seq_cst
ret void		ret void
}		}

; FUNC-LABEL: {{^}}system_one_as_acquire:		; FUNC-LABEL: {{^}}system_one_as_acquire:
; GCN: %bb.0		; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE		; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}		; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: buffer_wbinvl1{{$}}		; GCN10: s_waitcnt_vscnt null, 0x0
		; GCN-NOT: buffer_wbinvl1{{$}}
; GCN: s_endpgm		; GCN: s_endpgm
define amdgpu_kernel void @system_one_as_acquire() {		define amdgpu_kernel void @system_one_as_acquire() {
entry:		entry:
fence syncscope("one-as") acquire		fence syncscope("one-as") acquire
ret void		ret void
}		}

; FUNC-LABEL: {{^}}system_one_as_release:		; FUNC-LABEL: {{^}}system_one_as_release:
; GCN: %bb.0		; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE		; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}		; GCN: s_waitcnt vmcnt(0){{$}}
		; GCN10: s_waitcnt_vscnt null, 0x0
; GCN: s_endpgm		; GCN: s_endpgm
define amdgpu_kernel void @system_one_as_release() {		define amdgpu_kernel void @system_one_as_release() {
entry:		entry:
fence syncscope("one-as") release		fence syncscope("one-as") release
ret void		ret void
}		}

; FUNC-LABEL: {{^}}system_one_as_acq_rel:		; FUNC-LABEL: {{^}}system_one_as_acq_rel:
; GCN: %bb.0		; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE		; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}		; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}}		; GCN10: s_waitcnt_vscnt null, 0x0
		; GCN-NOT: buffer_wbinvl1{{$}}
; GCN: s_endpgm		; GCN: s_endpgm
define amdgpu_kernel void @system_one_as_acq_rel() {		define amdgpu_kernel void @system_one_as_acq_rel() {
entry:		entry:
fence syncscope("one-as") acq_rel		fence syncscope("one-as") acq_rel
ret void		ret void
}		}

; FUNC-LABEL: {{^}}system_one_as_seq_cst:		; FUNC-LABEL: {{^}}system_one_as_seq_cst:
; GCN: %bb.0		; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE		; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}		; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}}		; GCN10: s_waitcnt_vscnt null, 0x0
		; GCN-NOT: buffer_wbinvl1{{$}}
; GCN: s_endpgm		; GCN: s_endpgm
define amdgpu_kernel void @system_one_as_seq_cst() {		define amdgpu_kernel void @system_one_as_seq_cst() {
entry:		entry:
fence syncscope("one-as") seq_cst		fence syncscope("one-as") seq_cst
ret void		ret void
}		}

; FUNC-LABEL: {{^}}singlethread_acquire:		; FUNC-LABEL: {{^}}singlethread_acquire:
▲ Show 20 Lines • Show All 75 Lines • ▼ Show 20 Lines	entry:
fence syncscope("singlethread-one-as") seq_cst		fence syncscope("singlethread-one-as") seq_cst
ret void		ret void
}		}

; FUNC-LABEL: {{^}}agent_acquire:		; FUNC-LABEL: {{^}}agent_acquire:
; GCN: %bb.0		; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE		; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}		; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: buffer_wbinvl1{{$}}		; GCN10: s_waitcnt_vscnt null, 0x0
		; GCN-NOT: buffer_wbinvl1{{$}}
; GCN: s_endpgm		; GCN: s_endpgm
define amdgpu_kernel void @agent_acquire() {		define amdgpu_kernel void @agent_acquire() {
entry:		entry:
fence syncscope("agent") acquire		fence syncscope("agent") acquire
ret void		ret void
}		}

; FUNC-LABEL: {{^}}agent_release:		; FUNC-LABEL: {{^}}agent_release:
; GCN: %bb.0		; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE		; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}		; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
		; GCN10: s_waitcnt_vscnt null, 0x0
; GCN: s_endpgm		; GCN: s_endpgm
define amdgpu_kernel void @agent_release() {		define amdgpu_kernel void @agent_release() {
entry:		entry:
fence syncscope("agent") release		fence syncscope("agent") release
ret void		ret void
}		}

; FUNC-LABEL: {{^}}agent_acq_rel:		; FUNC-LABEL: {{^}}agent_acq_rel:
; GCN: %bb.0		; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE		; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}		; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}}		; GCN10: s_waitcnt_vscnt null, 0x0
		; GCN-NOT: buffer_wbinvl1{{$}}
; GCN: s_endpgm		; GCN: s_endpgm
define amdgpu_kernel void @agent_acq_rel() {		define amdgpu_kernel void @agent_acq_rel() {
entry:		entry:
fence syncscope("agent") acq_rel		fence syncscope("agent") acq_rel
ret void		ret void
}		}

; FUNC-LABEL: {{^}}agent_seq_cst:		; FUNC-LABEL: {{^}}agent_seq_cst:
; GCN: %bb.0		; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE		; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}		; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}}		; GCN10: s_waitcnt_vscnt null, 0x0
		; GCN-NOT: buffer_wbinvl1{{$}}
; GCN: s_endpgm		; GCN: s_endpgm
define amdgpu_kernel void @agent_seq_cst() {		define amdgpu_kernel void @agent_seq_cst() {
entry:		entry:
fence syncscope("agent") seq_cst		fence syncscope("agent") seq_cst
ret void		ret void
}		}

; FUNC-LABEL: {{^}}agent_one_as_acquire:		; FUNC-LABEL: {{^}}agent_one_as_acquire:
; GCN: %bb.0		; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE		; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}		; GCN: s_waitcnt vmcnt(0){{$}}
; GCN-NEXT: buffer_wbinvl1{{$}}		; GCN10: s_waitcnt_vscnt null, 0x0
		; GCN-NOT: buffer_wbinvl1{{$}}
; GCN: s_endpgm		; GCN: s_endpgm
define amdgpu_kernel void @agent_one_as_acquire() {		define amdgpu_kernel void @agent_one_as_acquire() {
entry:		entry:
fence syncscope("agent-one-as") acquire		fence syncscope("agent-one-as") acquire
ret void		ret void
}		}

; FUNC-LABEL: {{^}}agent_one_as_release:		; FUNC-LABEL: {{^}}agent_one_as_release:
; GCN: %bb.0		; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE		; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}		; GCN: s_waitcnt vmcnt(0){{$}}
		; GCN10: s_waitcnt_vscnt null, 0x0
; GCN: s_endpgm		; GCN: s_endpgm
define amdgpu_kernel void @agent_one_as_release() {		define amdgpu_kernel void @agent_one_as_release() {
entry:		entry:
fence syncscope("agent-one-as") release		fence syncscope("agent-one-as") release
ret void		ret void
}		}

; FUNC-LABEL: {{^}}agent_one_as_acq_rel:		; FUNC-LABEL: {{^}}agent_one_as_acq_rel:
; GCN: %bb.0		; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE		; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}		; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}}		; GCN10: s_waitcnt_vscnt null, 0x0
		; GCN-NOT: buffer_wbinvl1{{$}}
; GCN: s_endpgm		; GCN: s_endpgm
define amdgpu_kernel void @agent_one_as_acq_rel() {		define amdgpu_kernel void @agent_one_as_acq_rel() {
entry:		entry:
fence syncscope("agent-one-as") acq_rel		fence syncscope("agent-one-as") acq_rel
ret void		ret void
}		}

; FUNC-LABEL: {{^}}agent_one_as_seq_cst:		; FUNC-LABEL: {{^}}agent_one_as_seq_cst:
; GCN: %bb.0		; GCN: %bb.0
; GCN-NOT: ATOMIC_FENCE		; GCN-NOT: ATOMIC_FENCE
; GCN: s_waitcnt vmcnt(0){{$}}		; GCN: s_waitcnt vmcnt(0){{$}}
; GCN: buffer_wbinvl1{{$}}		; GCN10: s_waitcnt_vscnt null, 0x0
		; GCN-NOT: buffer_wbinvl1{{$}}
; GCN: s_endpgm		; GCN: s_endpgm
define amdgpu_kernel void @agent_one_as_seq_cst() {		define amdgpu_kernel void @agent_one_as_seq_cst() {
entry:		entry:
fence syncscope("agent-one-as") seq_cst		fence syncscope("agent-one-as") seq_cst
ret void		ret void
}		}

; FUNC-LABEL: {{^}}workgroup_acquire:		; FUNC-LABEL: {{^}}workgroup_acquire:
; GCN: %bb.0		; GCN: %bb.0
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}		; GCN9-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
		; GCN10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
		; GCN10: s_waitcnt_vscnt null, 0x0
; GCN-NOT: ATOMIC_FENCE		; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm		; GCN: s_endpgm
define amdgpu_kernel void @workgroup_acquire() {		define amdgpu_kernel void @workgroup_acquire() {
entry:		entry:
fence syncscope("workgroup") acquire		fence syncscope("workgroup") acquire
ret void		ret void
}		}

; FUNC-LABEL: {{^}}workgroup_release:		; FUNC-LABEL: {{^}}workgroup_release:
; GCN: %bb.0		; GCN: %bb.0
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}		; GCN9-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
		; GCN10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
		; GCN10: s_waitcnt_vscnt null, 0x0
; GCN-NOT: ATOMIC_FENCE		; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm		; GCN: s_endpgm
define amdgpu_kernel void @workgroup_release() {		define amdgpu_kernel void @workgroup_release() {
entry:		entry:
fence syncscope("workgroup") release		fence syncscope("workgroup") release
ret void		ret void
}		}

; FUNC-LABEL: {{^}}workgroup_acq_rel:		; FUNC-LABEL: {{^}}workgroup_acq_rel:
; GCN: %bb.0		; GCN: %bb.0
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}		; GCN9-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
		; GCN10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
		; GCN10: s_waitcnt_vscnt null, 0x0
; GCN-NOT: ATOMIC_FENCE		; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm		; GCN: s_endpgm
define amdgpu_kernel void @workgroup_acq_rel() {		define amdgpu_kernel void @workgroup_acq_rel() {
entry:		entry:
fence syncscope("workgroup") acq_rel		fence syncscope("workgroup") acq_rel
ret void		ret void
}		}

; FUNC-LABEL: {{^}}workgroup_seq_cst:		; FUNC-LABEL: {{^}}workgroup_seq_cst:
; GCN: %bb.0		; GCN: %bb.0
; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}		; GCN-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
		; GCN9-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
		; GCN10: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
		; GCN10: s_waitcnt_vscnt null, 0x0
; GCN-NOT: ATOMIC_FENCE		; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm		; GCN: s_endpgm
define amdgpu_kernel void @workgroup_seq_cst() {		define amdgpu_kernel void @workgroup_seq_cst() {
entry:		entry:
fence syncscope("workgroup") seq_cst		fence syncscope("workgroup") seq_cst
ret void		ret void
}		}

; FUNC-LABEL: {{^}}workgroup_one_as_acquire:		; FUNC-LABEL: {{^}}workgroup_one_as_acquire:
; GCN: %bb.0		; GCN: %bb.0
; GCN-NOT: s_waitcnt vmcnt(0){{$}}		; GCN9-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
		; GCN10: s_waitcnt vmcnt(0)
		; GCN10: s_waitcnt_vscnt null, 0x0
; GCN-NOT: ATOMIC_FENCE		; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm		; GCN: s_endpgm
define amdgpu_kernel void @workgroup_one_as_acquire() {		define amdgpu_kernel void @workgroup_one_as_acquire() {
entry:		entry:
fence syncscope("workgroup-one-as") acquire		fence syncscope("workgroup-one-as") acquire
ret void		ret void
}		}

; FUNC-LABEL: {{^}}workgroup_one_as_release:		; FUNC-LABEL: {{^}}workgroup_one_as_release:
; GCN: %bb.0		; GCN: %bb.0
; GCN-NOT: s_waitcnt vmcnt(0){{$}}		; GCN9-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
		; GCN10: s_waitcnt vmcnt(0)
		; GCN10: s_waitcnt_vscnt null, 0x0
; GCN-NOT: ATOMIC_FENCE		; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm		; GCN: s_endpgm
define amdgpu_kernel void @workgroup_one_as_release() {		define amdgpu_kernel void @workgroup_one_as_release() {
entry:		entry:
fence syncscope("workgroup-one-as") release		fence syncscope("workgroup-one-as") release
ret void		ret void
}		}

; FUNC-LABEL: {{^}}workgroup_one_as_acq_rel:		; FUNC-LABEL: {{^}}workgroup_one_as_acq_rel:
; GCN: %bb.0		; GCN: %bb.0
; GCN-NOT: s_waitcnt vmcnt(0){{$}}		; GCN9-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
		; GCN10: s_waitcnt vmcnt(0)
		; GCN10: s_waitcnt_vscnt null, 0x0
; GCN-NOT: ATOMIC_FENCE		; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm		; GCN: s_endpgm
define amdgpu_kernel void @workgroup_one_as_acq_rel() {		define amdgpu_kernel void @workgroup_one_as_acq_rel() {
entry:		entry:
fence syncscope("workgroup-one-as") acq_rel		fence syncscope("workgroup-one-as") acq_rel
ret void		ret void
}		}

; FUNC-LABEL: {{^}}workgroup_one_as_seq_cst:		; FUNC-LABEL: {{^}}workgroup_one_as_seq_cst:
; GCN: %bb.0		; GCN: %bb.0
; GCN-NOT: s_waitcnt vmcnt(0){{$}}		; GCN9-NOT: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
		; GCN10: s_waitcnt vmcnt(0)
		; GCN10: s_waitcnt_vscnt null, 0x0
; GCN-NOT: ATOMIC_FENCE		; GCN-NOT: ATOMIC_FENCE
; GCN: s_endpgm		; GCN: s_endpgm
define amdgpu_kernel void @workgroup_one_as_seq_cst() {		define amdgpu_kernel void @workgroup_one_as_seq_cst() {
entry:		entry:
fence syncscope("workgroup-one-as") seq_cst		fence syncscope("workgroup-one-as") seq_cst
ret void		ret void
}		}

▲ Show 20 Lines • Show All 79 Lines • Show Last 20 Lines