This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Account for VData size increase from tfe bit for image instructions
AbandonedPublic

Authored by mbrkusanin on Jul 29 2022, 5:30 AM.

Download Raw Diff

Details

Reviewers

foad
arsenm

Summary

TFE bit effectively increases size of VData by 32bit so we need _V* variants in
tablegen to account for that.

Note that this change for cmpswap also renames _V1_V*_ into _V2_V*_ and _V2_V*_
into _V4_V*_ for VReg_64 and VReg_128 respectively.

Diff Detail

Unit TestsFailed

	Time	Test
	60,090 ms	x64 debian > AddressSanitizer-x86_64-linux.TestCases::scariness_score_test.cpp

Event Timeline

mbrkusanin created this revision.Jul 29 2022, 5:30 AM

Herald added a project: Restricted Project. · View Herald TranscriptJul 29 2022, 5:30 AM

Herald added subscribers: kosarev, kerbowa, hiraditya and 6 others. · View Herald Transcript

mbrkusanin requested review of this revision.Jul 29 2022, 5:30 AM

Herald added a subscriber: wdng. · View Herald TranscriptJul 29 2022, 5:30 AM

mbrkusanin added a child revision: D130764: [AMDGPU] Enable image_gather4h instruction for gfx10 and gfx11.Jul 29 2022, 5:31 AM

Does TFE actually work for atomics? Do we not have any IR tests for TFE?

llvm/test/MC/Disassembler/AMDGPU/mimg_vi.txt
194	Why did this get an additional output register if it's not using TFE?

mbrkusanin added inline comments.Jul 29 2022, 5:46 AM

llvm/test/MC/Disassembler/AMDGPU/mimg_vi.txt
194	It is one of examples with wrong encoding for dmask. Since dmask has 3 bits enabled it will chose vgpr96. Only valid masks are 0x1 and 0x3. It does not seem to me it's worth to update disassembler for this.

arsenm accepted this revision.Jul 29 2022, 5:55 AM

This revision is now accepted and ready to land.Jul 29 2022, 5:55 AM

I could not find the clear answer in the docs. But looking at .ll tests it does not seems like it is used. So tfe is probably not supported for atomics or gather4.

mbrkusanin removed a child revision: D130764: [AMDGPU] Enable image_gather4h instruction for gfx10 and gfx11.Jul 29 2022, 6:13 AM

In D130763#3687302, @mbrkusanin wrote:

I could not find the clear answer in the docs. But looking at .ll tests it does not seems like it is used. So tfe is probably not supported for atomics or gather4.

If I were to guess it's something that works by default but isn't actually tested

Harbormaster completed remote builds in B178259: Diff 448596.Jul 29 2022, 6:26 AM

Revision Contents

Path

Size

llvm/

lib/

Target/

AMDGPU/

MIMGInstructions.td

22 lines

test/

CodeGen/

AMDGPU/

GlobalISel/

llvm.amdgcn.image.atomic.dim.mir

36 lines

release-vgprs.mir

4 lines

MC/

AMDGPU/

gfx10_asm_mimg.s

18 lines

gfx11_asm_mimg.s

18 lines

Disassembler/

AMDGPU/

gfx10_mimg.txt

18 lines

gfx11_mimg.txt

18 lines

mimg_vi.txt

4 lines

Diff 448596

llvm/lib/Target/AMDGPU/MIMGInstructions.td

Show First 20 Lines • Show All 854 Lines • ▼ Show 20 Lines	multiclass MIMG_Atomic <mimgopc op, string asm, bit isCmpSwap = 0, bit isFP = 0> { // 64-bit atomics
let IsAtomicRet = 1 in {		let IsAtomicRet = 1 in {
def "" : MIMGBaseOpcode {		def "" : MIMGBaseOpcode {
let Atomic = 1;		let Atomic = 1;
let AtomicX2 = isCmpSwap;		let AtomicX2 = isCmpSwap;
}		}

let BaseOpcode = !cast<MIMGBaseOpcode>(NAME) in {		let BaseOpcode = !cast<MIMGBaseOpcode>(NAME) in {
// _V* variants have different dst size, but the size is encoded implicitly,		// _V* variants have different dst size, but the size is encoded implicitly,
// using dmask and tfe. Only 32-bit variant is registered with disassembler.		// using dmask and tfe. Only smallest variant is registered with disassembler.
// Other variants are reconstructed by disassembler using dmask and tfe.		// Other variants are reconstructed by disassembler using dmask and tfe.
let VDataDwords = !if(isCmpSwap, 2, 1) in		if !not(isCmpSwap) then {
defm _V1 : MIMG_Atomic_Addr_Helper_m <op, asm, !if(isCmpSwap, VReg_64, VGPR_32), 1, isFP>;		let VDataDwords = 1 in
let VDataDwords = !if(isCmpSwap, 4, 2) in		defm _V1 : MIMG_Atomic_Addr_Helper_m <op, asm, VGPR_32, 1, isFP>;
defm _V2 : MIMG_Atomic_Addr_Helper_m <op, asm, !if(isCmpSwap, VReg_128, VReg_64), 0, isFP>;		}
		let VDataDwords = 2 in
		defm _V2 : MIMG_Atomic_Addr_Helper_m <op, asm, VReg_64, isCmpSwap, isFP>;
		let VDataDwords = 3 in
		defm _V3 : MIMG_Atomic_Addr_Helper_m <op, asm, VReg_96, 0, isFP>;
		if isCmpSwap then {
		let VDataDwords = 4 in
		defm _V4 : MIMG_Atomic_Addr_Helper_m <op, asm, VReg_128, 0, isFP>;
		let VDataDwords = 5 in
		defm _V5 : MIMG_Atomic_Addr_Helper_m <op, asm, VReg_160, 0, isFP>;
		}
}		}
} // End IsAtomicRet = 1		} // End IsAtomicRet = 1
}		}

class MIMG_Sampler_Helper <mimgopc op, string asm, RegisterClass dst_rc,		class MIMG_Sampler_Helper <mimgopc op, string asm, RegisterClass dst_rc,
RegisterClass src_rc, string dns="">		RegisterClass src_rc, string dns="">
: MIMG_gfx6789 <op.GFX10M, (outs dst_rc:$vdata), dns> {		: MIMG_gfx6789 <op.GFX10M, (outs dst_rc:$vdata), dns> {
let InOperandList = !con((ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,		let InOperandList = !con((ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
▲ Show 20 Lines • Show All 231 Lines • ▼ Show 20 Lines	def "" : MIMG_Sampler_BaseOpcode<sample> {
let HasD16 = 1;		let HasD16 = 1;
let Gather4 = 1;		let Gather4 = 1;
}		}

let BaseOpcode = !cast<MIMGBaseOpcode>(NAME), WQM = wqm,		let BaseOpcode = !cast<MIMGBaseOpcode>(NAME), WQM = wqm,
Gather4 = 1 in {		Gather4 = 1 in {
let VDataDwords = 2 in		let VDataDwords = 2 in
defm _V2 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_64>; /* for packed D16 only */		defm _V2 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_64>; /* for packed D16 only */
		let VDataDwords = 3 in
		defm _V3 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_96>; /* for packed D16 + tfe only */
let VDataDwords = 4 in		let VDataDwords = 4 in
defm _V4 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_128, 1>;		defm _V4 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_128, 1>;
let VDataDwords = 5 in		let VDataDwords = 5 in
defm _V5 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_160>;		defm _V5 : MIMG_Sampler_Src_Helper<op, asm, sample, VReg_160>;
}		}
}		}

multiclass MIMG_Gather_WQM <mimgopc op, AMDGPUSampleVariant sample>		multiclass MIMG_Gather_WQM <mimgopc op, AMDGPUSampleVariant sample>
▲ Show 20 Lines • Show All 412 Lines • Show Last 20 Lines

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir

Show All 13 Lines	bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2		liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2

; GFX6-LABEL: name: atomic_cmpswap_i32_1d		; GFX6-LABEL: name: atomic_cmpswap_i32_1d
; GFX6: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2		; GFX6: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2
; GFX6-NEXT: {{ $}}		; GFX6-NEXT: {{ $}}
; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7		; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1		; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2		; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource")		; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource")
; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V1_V1_si]].sub0		; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si]].sub0
; GFX6-NEXT: $vgpr0 = COPY [[COPY3]]		; GFX6-NEXT: $vgpr0 = COPY [[COPY3]]
; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0		; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX8-LABEL: name: atomic_cmpswap_i32_1d		; GFX8-LABEL: name: atomic_cmpswap_i32_1d
; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2		; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2
; GFX8-NEXT: {{ $}}		; GFX8-NEXT: {{ $}}
; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7		; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1		; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2		; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource")		; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource")
; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V1_V1_vi]].sub0		; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi]].sub0
; GFX8-NEXT: $vgpr0 = COPY [[COPY3]]		; GFX8-NEXT: $vgpr0 = COPY [[COPY3]]
; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0		; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX10-LABEL: name: atomic_cmpswap_i32_1d		; GFX10-LABEL: name: atomic_cmpswap_i32_1d
; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2		; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2
; GFX10-NEXT: {{ $}}		; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7		; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1		; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2		; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource")		; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource")
; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10_]].sub0		; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_]].sub0
; GFX10-NEXT: $vgpr0 = COPY [[COPY3]]		; GFX10-NEXT: $vgpr0 = COPY [[COPY3]]
; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0		; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
%0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7		%0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
%1:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1		%1:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1
%2:vgpr(s32) = COPY $vgpr2		%2:vgpr(s32) = COPY $vgpr2
%3:vgpr(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), %1(<2 x s32>), $noreg, %2(s32), %0(<8 x s32>), 0, 0, 0 :: (volatile dereferenceable load store (s32) on custom "ImageResource")		%3:vgpr(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), %1(<2 x s32>), $noreg, %2(s32), %0(<8 x s32>), 0, 0, 0 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
$vgpr0 = COPY %3(s32)		$vgpr0 = COPY %3(s32)
SI_RETURN_TO_EPILOG implicit $vgpr0		SI_RETURN_TO_EPILOG implicit $vgpr0
Show All 10 Lines	bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2		liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2

; GFX6-LABEL: name: atomic_cmpswap_i32_1d_no_return		; GFX6-LABEL: name: atomic_cmpswap_i32_1d_no_return
; GFX6: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2		; GFX6: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2
; GFX6-NEXT: {{ $}}		; GFX6-NEXT: {{ $}}
; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7		; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1		; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2		; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource")		; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource")
; GFX6-NEXT: S_ENDPGM 0		; GFX6-NEXT: S_ENDPGM 0
; GFX8-LABEL: name: atomic_cmpswap_i32_1d_no_return		; GFX8-LABEL: name: atomic_cmpswap_i32_1d_no_return
; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2		; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2
; GFX8-NEXT: {{ $}}		; GFX8-NEXT: {{ $}}
; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7		; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1		; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2		; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource")		; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource")
; GFX8-NEXT: S_ENDPGM 0		; GFX8-NEXT: S_ENDPGM 0
; GFX10-LABEL: name: atomic_cmpswap_i32_1d_no_return		; GFX10-LABEL: name: atomic_cmpswap_i32_1d_no_return
; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2		; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2
; GFX10-NEXT: {{ $}}		; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7		; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1		; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2		; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource")		; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on custom "ImageResource")
; GFX10-NEXT: S_ENDPGM 0		; GFX10-NEXT: S_ENDPGM 0
%0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7		%0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
%1:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1		%1:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1
%2:vgpr(s32) = COPY $vgpr2		%2:vgpr(s32) = COPY $vgpr2
%3:vgpr(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), %1(<2 x s32>), $noreg, %2(s32), %0(<8 x s32>), 0, 0, 0 :: (volatile dereferenceable load store (s32) on custom "ImageResource")		%3:vgpr(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), %1(<2 x s32>), $noreg, %2(s32), %0(<8 x s32>), 0, 0, 0 :: (volatile dereferenceable load store (s32) on custom "ImageResource")
S_ENDPGM 0		S_ENDPGM 0
...		...

---		---
name: atomic_cmpswap_i64_1d		name: atomic_cmpswap_i64_1d
legalized: true		legalized: true
regBankSelected: true		regBankSelected: true
tracksRegLiveness: true		tracksRegLiveness: true

body: \|		body: \|
bb.0:		bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4		liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4

; GFX6-LABEL: name: atomic_cmpswap_i64_1d		; GFX6-LABEL: name: atomic_cmpswap_i64_1d
; GFX6: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4		; GFX6: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
; GFX6-NEXT: {{ $}}		; GFX6-NEXT: {{ $}}
; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7		; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3		; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4		; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource")		; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource")
; GFX6-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si]].sub0_sub1		; GFX6-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V4_V1_si]].sub0_sub1
; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]]		; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]]
; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1		; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1
; GFX8-LABEL: name: atomic_cmpswap_i64_1d		; GFX8-LABEL: name: atomic_cmpswap_i64_1d
; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4		; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
; GFX8-NEXT: {{ $}}		; GFX8-NEXT: {{ $}}
; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7		; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3		; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4		; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource")		; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource")
; GFX8-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi]].sub0_sub1		; GFX8-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V4_V1_vi]].sub0_sub1
; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]]		; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]]
; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1		; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1
; GFX10-LABEL: name: atomic_cmpswap_i64_1d		; GFX10-LABEL: name: atomic_cmpswap_i64_1d
; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4		; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
; GFX10-NEXT: {{ $}}		; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7		; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3		; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4		; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource")		; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource")
; GFX10-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_]].sub0_sub1		; GFX10-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10_]].sub0_sub1
; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]]		; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]]
; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1		; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1
%0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7		%0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
%1:vgpr(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3		%1:vgpr(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%2:vgpr(s32) = COPY $vgpr4		%2:vgpr(s32) = COPY $vgpr4
%3:vgpr(s64) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), %1(<2 x s64>), $noreg, %2(s32), %0(<8 x s32>), 0, 0, 0 :: (volatile dereferenceable load store (s64) on custom "ImageResource")		%3:vgpr(s64) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), %1(<2 x s64>), $noreg, %2(s32), %0(<8 x s32>), 0, 0, 0 :: (volatile dereferenceable load store (s64) on custom "ImageResource")
$vgpr0_vgpr1 = COPY %3(s64)		$vgpr0_vgpr1 = COPY %3(s64)
SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1		SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1
Show All 10 Lines	bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4		liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4

; GFX6-LABEL: name: atomic_cmpswap_i64_1d_no_return		; GFX6-LABEL: name: atomic_cmpswap_i64_1d_no_return
; GFX6: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4		; GFX6: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
; GFX6-NEXT: {{ $}}		; GFX6-NEXT: {{ $}}
; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7		; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3		; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4		; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource")		; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource")
; GFX6-NEXT: S_ENDPGM 0		; GFX6-NEXT: S_ENDPGM 0
; GFX8-LABEL: name: atomic_cmpswap_i64_1d_no_return		; GFX8-LABEL: name: atomic_cmpswap_i64_1d_no_return
; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4		; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
; GFX8-NEXT: {{ $}}		; GFX8-NEXT: {{ $}}
; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7		; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3		; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4		; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource")		; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource")
; GFX8-NEXT: S_ENDPGM 0		; GFX8-NEXT: S_ENDPGM 0
; GFX10-LABEL: name: atomic_cmpswap_i64_1d_no_return		; GFX10-LABEL: name: atomic_cmpswap_i64_1d_no_return
; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4		; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
; GFX10-NEXT: {{ $}}		; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7		; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3		; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4		; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource")		; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource")
; GFX10-NEXT: S_ENDPGM 0		; GFX10-NEXT: S_ENDPGM 0
%0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7		%0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
%1:vgpr(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3		%1:vgpr(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%2:vgpr(s32) = COPY $vgpr4		%2:vgpr(s32) = COPY $vgpr4
%3:vgpr(s64) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), %1(<2 x s64>), $noreg, %2(s32), %0(<8 x s32>), 0, 0, 0 :: (volatile dereferenceable load store (s64) on custom "ImageResource")		%3:vgpr(s64) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), %1(<2 x s64>), $noreg, %2(s32), %0(<8 x s32>), 0, 0, 0 :: (volatile dereferenceable load store (s64) on custom "ImageResource")
S_ENDPGM 0		S_ENDPGM 0
...		...

llvm/test/CodeGen/AMDGPU/release-vgprs.mir

Show First 20 Lines • Show All 397 Lines • ▼ Show 20 Lines	bb.0:
S_ENDPGM 0		S_ENDPGM 0
...		...

---		---
name: image_atomic		name: image_atomic
body: \|		body: \|
bb.0:		bb.0:
; CHECK-LABEL: name: image_atomic		; CHECK-LABEL: name: image_atomic
; CHECK: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11 killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource")		; CHECK: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx11 killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource")
; CHECK-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0		; CHECK-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0
; CHECK-NEXT: S_ENDPGM 0		; CHECK-NEXT: S_ENDPGM 0
renamable $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11 killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource")		renamable $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx11 killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on custom "ImageResource")
S_ENDPGM 0		S_ENDPGM 0
...		...

llvm/test/MC/AMDGPU/gfx10_asm_mimg.s

	Show First 20 Lines • Show All 94 Lines • ▼ Show 20 Lines
	; GFX10: image_store_mip v[0:3], v[253:255], s[96:103] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x24,0xf0,0xfd,0x00,0x18,0x00]			; GFX10: image_store_mip v[0:3], v[253:255], s[96:103] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0x24,0xf0,0xfd,0x00,0x18,0x00]

	image_get_resinfo v[4:7], v32, s[96:103] dmask:0xf dim:SQ_RSRC_IMG_3D			image_get_resinfo v[4:7], v32, s[96:103] dmask:0xf dim:SQ_RSRC_IMG_3D
	; GFX10: image_get_resinfo v[4:7], v32, s[96:103] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x10,0x0f,0x38,0xf0,0x20,0x04,0x18,0x00]			; GFX10: image_get_resinfo v[4:7], v32, s[96:103] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x10,0x0f,0x38,0xf0,0x20,0x04,0x18,0x00]

	image_atomic_swap v4, v[32:34], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_3D glc			image_atomic_swap v4, v[32:34], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_3D glc
	; GFX10: image_atomic_swap v4, v[32:34], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_3D glc ; encoding: [0x10,0x21,0x3c,0xf0,0x20,0x04,0x18,0x00]			; GFX10: image_atomic_swap v4, v[32:34], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_3D glc ; encoding: [0x10,0x21,0x3c,0xf0,0x20,0x04,0x18,0x00]

				image_atomic_swap v[0:2], [v4, v5], s[4:11] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe
				; GFX10: image_atomic_swap v[0:2], [v4, v5], s[4:11] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x0a,0x03,0x3d,0xf0,0x04,0x00,0x01,0x00,0x05,0x00,0x00,0x00]

	image_atomic_cmpswap v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_3D glc			image_atomic_cmpswap v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_3D glc
	; GFX10: image_atomic_cmpswap v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_3D glc ; encoding: [0x12,0x23,0x40,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00]			; GFX10: image_atomic_cmpswap v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_3D glc ; encoding: [0x12,0x23,0x40,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00]

				image_atomic_cmpswap v[0:2], [v3, v4], s[4:11] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe
				; GFX10: image_atomic_cmpswap v[0:2], [v3, v4], s[4:11] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x0a,0x03,0x41,0xf0,0x03,0x00,0x01,0x00,0x04,0x00,0x00,0x00]

				image_atomic_cmpswap v[0:4], v[5:6], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D tfe
				; GFX10: image_atomic_cmpswap v[0:4], v[5:6], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x08,0x0f,0x41,0xf0,0x05,0x00,0x01,0x00]

	image_atomic_add v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_CUBE glc			image_atomic_add v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_CUBE glc
	; GFX10: image_atomic_add v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_CUBE glc ; encoding: [0x1a,0x23,0x44,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00]			; GFX10: image_atomic_add v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_CUBE glc ; encoding: [0x1a,0x23,0x44,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00]

	image_atomic_sub v4, [v32, v1], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY glc			image_atomic_sub v4, [v32, v1], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY glc
	; GFX10: image_atomic_sub v4, [v32, v1], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY glc ; encoding: [0x22,0x21,0x48,0xf0,0x20,0x04,0x18,0x00,0x01,0x00,0x00,0x00]			; GFX10: image_atomic_sub v4, [v32, v1], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY glc ; encoding: [0x22,0x21,0x48,0xf0,0x20,0x04,0x18,0x00,0x01,0x00,0x00,0x00]

	image_atomic_smin v4, [v32, v1, v2], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY glc			image_atomic_smin v4, [v32, v1, v2], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY glc
	; GFX10: image_atomic_smin v4, [v32, v1, v2], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY glc ; encoding: [0x2a,0x21,0x50,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00]			; GFX10: image_atomic_smin v4, [v32, v1, v2], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY glc ; encoding: [0x2a,0x21,0x50,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00]
	▲ Show 20 Lines • Show All 281 Lines • ▼ Show 20 Lines
	; GFX10: image_sample_c_b_cl_o v[64:66], [v32, v16, v0, v2, v1, v4, v5], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x14,0x07,0xf8,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01,0x04,0x05,0x00,0x00]			; GFX10: image_sample_c_b_cl_o v[64:66], [v32, v16, v0, v2, v1, v4, v5], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x14,0x07,0xf8,0xf0,0x20,0x40,0x21,0x03,0x10,0x00,0x02,0x01,0x04,0x05,0x00,0x00]

	image_sample_c_lz_o v[64:66], [v32, v0, v16], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D			image_sample_c_lz_o v[64:66], [v32, v0, v16], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D
	; GFX10: image_sample_c_lz_o v[64:66], [v32, v0, v16], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D ; encoding: [0x02,0x07,0xfc,0xf0,0x20,0x40,0x21,0x03,0x00,0x10,0x00,0x00]			; GFX10: image_sample_c_lz_o v[64:66], [v32, v0, v16], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D ; encoding: [0x02,0x07,0xfc,0xf0,0x20,0x40,0x21,0x03,0x00,0x10,0x00,0x00]

	image_gather4 v[64:67], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D			image_gather4 v[64:67], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D
	; GFX10: image_gather4 v[64:67], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x01,0x00,0xf1,0x20,0x40,0x21,0x03]			; GFX10: image_gather4 v[64:67], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x01,0x00,0xf1,0x20,0x40,0x21,0x03]

				image_gather4 v[64:68], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
				; GFX10: image_gather4 v[64:68], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe ; encoding: [0x00,0x01,0x01,0xf1,0x20,0x40,0x21,0x03]

				image_gather4 v[64:65], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D d16
				; GFX10: image_gather4 v[64:65], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D d16 ; encoding: [0x00,0x01,0x00,0xf1,0x20,0x40,0x21,0x83]

				image_gather4 v[64:66], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe d16
				; GFX10: image_gather4 v[64:66], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe d16 ; encoding: [0x00,0x01,0x01,0xf1,0x20,0x40,0x21,0x83]

	image_gather4_cl v[64:67], v[32:35], s[4:11], s[100:103] dmask:0x2 dim:SQ_RSRC_IMG_CUBE			image_gather4_cl v[64:67], v[32:35], s[4:11], s[100:103] dmask:0x2 dim:SQ_RSRC_IMG_CUBE
	; GFX10: image_gather4_cl v[64:67], v[32:35], s[4:11], s[100:103] dmask:0x2 dim:SQ_RSRC_IMG_CUBE ; encoding: [0x18,0x02,0x04,0xf1,0x20,0x40,0x21,0x03]			; GFX10: image_gather4_cl v[64:67], v[32:35], s[4:11], s[100:103] dmask:0x2 dim:SQ_RSRC_IMG_CUBE ; encoding: [0x18,0x02,0x04,0xf1,0x20,0x40,0x21,0x03]

	image_gather4_l v[64:67], [v32, v0, v4], s[4:11], s[100:103] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY			image_gather4_l v[64:67], [v32, v0, v4], s[4:11], s[100:103] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY
	; GFX10: image_gather4_l v[64:67], [v32, v0, v4], s[4:11], s[100:103] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x22,0x04,0x10,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x00,0x00]			; GFX10: image_gather4_l v[64:67], [v32, v0, v4], s[4:11], s[100:103] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x22,0x04,0x10,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x00,0x00]

	image_gather4_b v[64:67], [v32, v0, v4], s[4:11], s[100:103] dmask:0x8 dim:SQ_RSRC_IMG_2D			image_gather4_b v[64:67], [v32, v0, v4], s[4:11], s[100:103] dmask:0x8 dim:SQ_RSRC_IMG_2D
	; GFX10: image_gather4_b v[64:67], [v32, v0, v4], s[4:11], s[100:103] dmask:0x8 dim:SQ_RSRC_IMG_2D ; encoding: [0x0a,0x08,0x14,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x00,0x00]			; GFX10: image_gather4_b v[64:67], [v32, v0, v4], s[4:11], s[100:103] dmask:0x8 dim:SQ_RSRC_IMG_2D ; encoding: [0x0a,0x08,0x14,0xf1,0x20,0x40,0x21,0x03,0x00,0x04,0x00,0x00]
	▲ Show 20 Lines • Show All 243 Lines • Show Last 20 Lines

llvm/test/MC/AMDGPU/gfx11_asm_mimg.s

	Show First 20 Lines • Show All 94 Lines • ▼ Show 20 Lines
	; GFX11: image_store_mip v[0:3], v[253:255], s[96:103] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x0f,0x1c,0xf0,0xfd,0x00,0x18,0x00]			; GFX11: image_store_mip v[0:3], v[253:255], s[96:103] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x04,0x0f,0x1c,0xf0,0xfd,0x00,0x18,0x00]

	image_get_resinfo v[4:7], v32, s[96:103] dmask:0xf dim:SQ_RSRC_IMG_3D			image_get_resinfo v[4:7], v32, s[96:103] dmask:0xf dim:SQ_RSRC_IMG_3D
	; GFX11: image_get_resinfo v[4:7], v32, s[96:103] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x0f,0x5c,0xf0,0x20,0x04,0x18,0x00]			; GFX11: image_get_resinfo v[4:7], v32, s[96:103] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x0f,0x5c,0xf0,0x20,0x04,0x18,0x00]

	image_atomic_swap v4, v[32:34], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_3D glc			image_atomic_swap v4, v[32:34], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_3D glc
	; GFX11: image_atomic_swap v4, v[32:34], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_3D glc ; encoding: [0x08,0x41,0x28,0xf0,0x20,0x04,0x18,0x00]			; GFX11: image_atomic_swap v4, v[32:34], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_3D glc ; encoding: [0x08,0x41,0x28,0xf0,0x20,0x04,0x18,0x00]

				image_atomic_swap v[0:2], [v4, v5], s[4:11] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe
				; GFX11: image_atomic_swap v[0:2], [v4, v5], s[4:11] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x05,0x03,0x28,0xf0,0x04,0x00,0x21,0x00,0x05,0x00,0x00,0x00]

	image_atomic_cmpswap v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_3D glc			image_atomic_cmpswap v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_3D glc
	; GFX11: image_atomic_cmpswap v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_3D glc ; encoding: [0x09,0x43,0x2c,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00]			; GFX11: image_atomic_cmpswap v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_3D glc ; encoding: [0x09,0x43,0x2c,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00]

				image_atomic_cmpswap v[0:2], [v3, v4], s[4:11] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe
				; GFX11: image_atomic_cmpswap v[0:2], [v3, v4], s[4:11] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x05,0x03,0x2c,0xf0,0x03,0x00,0x21,0x00,0x04,0x00,0x00,0x00]

				image_atomic_cmpswap v[0:4], v[5:6], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D tfe
				; GFX11: image_atomic_cmpswap v[0:4], v[5:6], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x04,0x0f,0x2c,0xf0,0x05,0x00,0x21,0x00]

	image_atomic_add v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_CUBE glc			image_atomic_add v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_CUBE glc
	; GFX11: image_atomic_add v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_CUBE glc ; encoding: [0x0d,0x43,0x30,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00]			; GFX11: image_atomic_add v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_CUBE glc ; encoding: [0x0d,0x43,0x30,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00]

	image_atomic_sub v4, [v32, v1], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY glc			image_atomic_sub v4, [v32, v1], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY glc
	; GFX11: image_atomic_sub v4, [v32, v1], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY glc ; encoding: [0x11,0x41,0x34,0xf0,0x20,0x04,0x18,0x00,0x01,0x00,0x00,0x00]			; GFX11: image_atomic_sub v4, [v32, v1], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY glc ; encoding: [0x11,0x41,0x34,0xf0,0x20,0x04,0x18,0x00,0x01,0x00,0x00,0x00]

	image_atomic_smin v4, [v32, v1, v2], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY glc			image_atomic_smin v4, [v32, v1, v2], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY glc
	; GFX11: image_atomic_smin v4, [v32, v1, v2], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY glc ; encoding: [0x15,0x41,0x38,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00]			; GFX11: image_atomic_smin v4, [v32, v1, v2], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY glc ; encoding: [0x15,0x41,0x38,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00]
	▲ Show 20 Lines • Show All 101 Lines • ▼ Show 20 Lines
	; GFX11: image_sample_lz_o v[64:66], [v32, v16, v0, v2], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x07,0xa4,0xf0,0x20,0x40,0x01,0x64,0x10,0x00,0x02,0x00]			; GFX11: image_sample_lz_o v[64:66], [v32, v16, v0, v2], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_3D ; encoding: [0x09,0x07,0xa4,0xf0,0x20,0x40,0x01,0x64,0x10,0x00,0x02,0x00]

	image_sample_c_lz_o v[64:66], [v32, v0, v16], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D			image_sample_c_lz_o v[64:66], [v32, v0, v16], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D
	; GFX11: image_sample_c_lz_o v[64:66], [v32, v0, v16], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D ; encoding: [0x01,0x07,0xb8,0xf0,0x20,0x40,0x01,0x64,0x00,0x10,0x00,0x00]			; GFX11: image_sample_c_lz_o v[64:66], [v32, v0, v16], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D ; encoding: [0x01,0x07,0xb8,0xf0,0x20,0x40,0x01,0x64,0x00,0x10,0x00,0x00]

	image_gather4 v[64:67], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D			image_gather4 v[64:67], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D
	; GFX11: image_gather4 v[64:67], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x01,0xbc,0xf0,0x20,0x40,0x01,0x64]			; GFX11: image_gather4 v[64:67], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x01,0xbc,0xf0,0x20,0x40,0x01,0x64]

				image_gather4 v[64:68], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
				; GFX11: image_gather4 v[64:68], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe ; encoding: [0x00,0x01,0xbc,0xf0,0x20,0x40,0x21,0x64]

				image_gather4 v[64:65], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D d16
				; GFX11: image_gather4 v[64:65], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D d16 ; encoding: [0x00,0x01,0xbe,0xf0,0x20,0x40,0x01,0x64]

				image_gather4 v[64:66], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe d16
				; GFX11: image_gather4 v[64:66], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe d16 ; encoding: [0x00,0x01,0xbe,0xf0,0x20,0x40,0x21,0x64]

	image_gather4_cl v[64:67], v[32:35], s[4:11], s[100:103] dmask:0x2 dim:SQ_RSRC_IMG_CUBE			image_gather4_cl v[64:67], v[32:35], s[4:11], s[100:103] dmask:0x2 dim:SQ_RSRC_IMG_CUBE
	; GFX11: image_gather4_cl v[64:67], v[32:35], s[4:11], s[100:103] dmask:0x2 dim:SQ_RSRC_IMG_CUBE ; encoding: [0x0c,0x02,0x80,0xf1,0x20,0x40,0x01,0x64]			; GFX11: image_gather4_cl v[64:67], v[32:35], s[4:11], s[100:103] dmask:0x2 dim:SQ_RSRC_IMG_CUBE ; encoding: [0x0c,0x02,0x80,0xf1,0x20,0x40,0x01,0x64]

	image_gather4_l v[64:67], [v32, v0, v4], s[4:11], s[100:103] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY			image_gather4_l v[64:67], [v32, v0, v4], s[4:11], s[100:103] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY
	; GFX11: image_gather4_l v[64:67], [v32, v0, v4], s[4:11], s[100:103] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x11,0x04,0xc0,0xf0,0x20,0x40,0x01,0x64,0x00,0x04,0x00,0x00]			; GFX11: image_gather4_l v[64:67], [v32, v0, v4], s[4:11], s[100:103] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x11,0x04,0xc0,0xf0,0x20,0x40,0x01,0x64,0x00,0x04,0x00,0x00]

	image_gather4_b v[64:67], [v32, v0, v4], s[4:11], s[100:103] dmask:0x8 dim:SQ_RSRC_IMG_2D			image_gather4_b v[64:67], [v32, v0, v4], s[4:11], s[100:103] dmask:0x8 dim:SQ_RSRC_IMG_2D
	; GFX11: image_gather4_b v[64:67], [v32, v0, v4], s[4:11], s[100:103] dmask:0x8 dim:SQ_RSRC_IMG_2D ; encoding: [0x05,0x08,0xc4,0xf0,0x20,0x40,0x01,0x64,0x00,0x04,0x00,0x00]			; GFX11: image_gather4_b v[64:67], [v32, v0, v4], s[4:11], s[100:103] dmask:0x8 dim:SQ_RSRC_IMG_2D ; encoding: [0x05,0x08,0xc4,0xf0,0x20,0x40,0x01,0x64,0x00,0x04,0x00,0x00]
	▲ Show 20 Lines • Show All 78 Lines • Show Last 20 Lines

llvm/test/MC/Disassembler/AMDGPU/gfx10_mimg.txt

	Show First 20 Lines • Show All 164 Lines • ▼ Show 20 Lines
	0x08,0x0f,0x21,0xf0,0x08,0x10,0x18,0x80			0x08,0x0f,0x21,0xf0,0x08,0x10,0x18,0x80

	# GFX10: image_get_resinfo v[16:19], v8, s[96:103] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm ; encoding: [0x28,0x1f,0x38,0xf0,0x08,0x10,0x18,0x00]			# GFX10: image_get_resinfo v[16:19], v8, s[96:103] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm ; encoding: [0x28,0x1f,0x38,0xf0,0x08,0x10,0x18,0x00]
	0x28,0x1f,0x38,0xf0,0x08,0x10,0x18,0x00			0x28,0x1f,0x38,0xf0,0x08,0x10,0x18,0x00

	# GFX10: image_atomic_swap v16, v8, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x11,0x3c,0xf0,0x08,0x10,0x18,0x00]			# GFX10: image_atomic_swap v16, v8, s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm ; encoding: [0x00,0x11,0x3c,0xf0,0x08,0x10,0x18,0x00]
	0x00,0x11,0x3c,0xf0,0x08,0x10,0x18,0x00			0x00,0x11,0x3c,0xf0,0x08,0x10,0x18,0x00

				# GFX10: image_atomic_swap v[0:2], [v4, v5], s[4:11] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x0a,0x03,0x3d,0xf0,0x04,0x00,0x01,0x00,0x05,0x00,0x00,0x00]
				0x0a,0x03,0x3d,0xf0,0x04,0x00,0x01,0x00,0x05,0x00,0x00,0x00

	# GFX10: image_sample_c_b v[16:19], v[8:12], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; encoding: [0x18,0x0f,0xb4,0xf0,0x08,0x10,0x25,0x03]			# GFX10: image_sample_c_b v[16:19], v[8:12], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; encoding: [0x18,0x0f,0xb4,0xf0,0x08,0x10,0x25,0x03]
	0x18,0x0f,0xb4,0xf0,0x08,0x10,0x25,0x03			0x18,0x0f,0xb4,0xf0,0x08,0x10,0x25,0x03

	# GFX10: image_sample_c_b_cl v[16:19], v[250:255], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; encoding: [0x18,0x0f,0xb8,0xf0,0xfa,0x10,0x25,0x03]			# GFX10: image_sample_c_b_cl v[16:19], v[250:255], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; encoding: [0x18,0x0f,0xb8,0xf0,0xfa,0x10,0x25,0x03]
	0x18,0x0f,0xb8,0xf0,0xfa,0x10,0x25,0x03			0x18,0x0f,0xb8,0xf0,0xfa,0x10,0x25,0x03

	# GFX10: image_sample_c_lz v[16:19], v[253:255], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xbc,0xf0,0xfd,0x10,0x25,0x03]			# GFX10: image_sample_c_lz v[16:19], v[253:255], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x08,0x0f,0xbc,0xf0,0xfd,0x10,0x25,0x03]
	0x08,0x0f,0xbc,0xf0,0xfd,0x10,0x25,0x03			0x08,0x0f,0xbc,0xf0,0xfd,0x10,0x25,0x03
	▲ Show 20 Lines • Show All 128 Lines • ▼ Show 20 Lines
	# MIMG, NSA address			# MIMG, NSA address
	#===------------------------------------------------------------------------===#			#===------------------------------------------------------------------------===#

	# NOTE: Contents of unused NSA bytes are NOT preserved.			# NOTE: Contents of unused NSA bytes are NOT preserved.

	# GFX10: image_atomic_cmpswap v[16:17], [v8, v9], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm glc ; encoding: [0x0a,0x33,0x40,0xf0,0x08,0x10,0x18,0x00,0x09,0x00,0x00,0x00]			# GFX10: image_atomic_cmpswap v[16:17], [v8, v9], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm glc ; encoding: [0x0a,0x33,0x40,0xf0,0x08,0x10,0x18,0x00,0x09,0x00,0x00,0x00]
	0x0a,0x33,0x40,0xf0,0x08,0x10,0x18,0x00,0x09,0x0a,0x0b,0x0c			0x0a,0x33,0x40,0xf0,0x08,0x10,0x18,0x00,0x09,0x0a,0x0b,0x0c

				# GFX10: image_atomic_cmpswap v[0:2], [v3, v4], s[4:11] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x0a,0x03,0x41,0xf0,0x03,0x00,0x01,0x00,0x04,0x00,0x00,0x00]
				0x0a,0x03,0x41,0xf0,0x03,0x00,0x01,0x00,0x04,0x00,0x00,0x00

				# GFX10: image_atomic_cmpswap v[0:4], v[5:6], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x08,0x0f,0x41,0xf0,0x05,0x00,0x01,0x00]
				0x08,0x0f,0x41,0xf0,0x05,0x00,0x01,0x00

	# GFX10: image_atomic_add v16, [v8, v9, v10], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm glc ; encoding: [0x12,0x31,0x44,0xf0,0x08,0x10,0x18,0x00,0x09,0x0a,0x00,0x00]			# GFX10: image_atomic_add v16, [v8, v9, v10], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm glc ; encoding: [0x12,0x31,0x44,0xf0,0x08,0x10,0x18,0x00,0x09,0x0a,0x00,0x00]
	0x12,0x31,0x44,0xf0,0x08,0x10,0x18,0x00,0x09,0x0a,0x0b,0x0c			0x12,0x31,0x44,0xf0,0x08,0x10,0x18,0x00,0x09,0x0a,0x0b,0x0c

	# GFX10: image_atomic_sub v16, [v8, v9, v10], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_CUBE unorm glc ; encoding: [0x1a,0x31,0x48,0xf0,0x08,0x10,0x18,0x00,0x09,0x0a,0x00,0x00]			# GFX10: image_atomic_sub v16, [v8, v9, v10], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_CUBE unorm glc ; encoding: [0x1a,0x31,0x48,0xf0,0x08,0x10,0x18,0x00,0x09,0x0a,0x00,0x00]
	0x1a,0x31,0x48,0xf0,0x08,0x10,0x18,0x00,0x09,0x0a,0x0b,0x0c			0x1a,0x31,0x48,0xf0,0x08,0x10,0x18,0x00,0x09,0x0a,0x0b,0x0c

	# GFX10: image_atomic_smin v16, [v8, v9], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY unorm glc ; encoding: [0x22,0x31,0x50,0xf0,0x08,0x10,0x18,0x00,0x09,0x00,0x00,0x00]			# GFX10: image_atomic_smin v16, [v8, v9], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY unorm glc ; encoding: [0x22,0x31,0x50,0xf0,0x08,0x10,0x18,0x00,0x09,0x00,0x00,0x00]
	0x22,0x31,0x50,0xf0,0x08,0x10,0x18,0x00,0x09,0x0a,0x0b,0x0c			0x22,0x31,0x50,0xf0,0x08,0x10,0x18,0x00,0x09,0x0a,0x0b,0x0c
	▲ Show 20 Lines • Show All 113 Lines • ▼ Show 20 Lines
	0x02,0x0f,0xf8,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c			0x02,0x0f,0xf8,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c

	# GFX10: image_sample_c_lz_o v[16:19], [v8, v9, v10, v11], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x0a,0x0f,0xfc,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x00]			# GFX10: image_sample_c_lz_o v[16:19], [v8, v9, v10, v11], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x0a,0x0f,0xfc,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x00]
	0x0a,0x0f,0xfc,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c			0x0a,0x0f,0xfc,0xf0,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c

	# GFX10: image_gather4 v[16:19], [v8, v9, v10], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x0f,0x00,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x00,0x00]			# GFX10: image_gather4 v[16:19], [v8, v9, v10], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x12,0x0f,0x00,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x00,0x00]
	0x12,0x0f,0x00,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c			0x12,0x0f,0x00,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c

				# GFX10: image_gather4 v[64:68], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe ; encoding: [0x00,0x01,0x01,0xf1,0x20,0x40,0x21,0x03]
				0x00,0x01,0x01,0xf1,0x20,0x40,0x21,0x03

				# GFX10: image_gather4 v[64:65], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D d16 ; encoding: [0x00,0x01,0x00,0xf1,0x20,0x40,0x21,0x83]
				0x00,0x01,0x00,0xf1,0x20,0x40,0x21,0x83

				# GFX10: image_gather4 v[64:66], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe d16 ; encoding: [0x00,0x01,0x01,0xf1,0x20,0x40,0x21,0x83]
				0x00,0x01,0x01,0xf1,0x20,0x40,0x21,0x83

	# GFX10: image_gather4_cl v[16:19], [v8, v9, v10, v11], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; encoding: [0x1a,0x0f,0x04,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x00]			# GFX10: image_gather4_cl v[16:19], [v8, v9, v10, v11], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_CUBE ; encoding: [0x1a,0x0f,0x04,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x00]
	0x1a,0x0f,0x04,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c			0x1a,0x0f,0x04,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c

	# GFX10: image_gather4_l v[16:19], [v8, v9, v10], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x22,0x0f,0x10,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x00,0x00]			# GFX10: image_gather4_l v[16:19], [v8, v9, v10], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x22,0x0f,0x10,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x00,0x00]
	0x22,0x0f,0x10,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c			0x22,0x0f,0x10,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c

	# GFX10: image_gather4_b v[16:19], [v8, v9, v10, v11], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x2a,0x0f,0x14,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x00]			# GFX10: image_gather4_b v[16:19], [v8, v9, v10, v11], s[20:27], s[100:103] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY ; encoding: [0x2a,0x0f,0x14,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x00]
	0x2a,0x0f,0x14,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c			0x2a,0x0f,0x14,0xf1,0x08,0x10,0x25,0x03,0x09,0x0a,0x0b,0x0c
	▲ Show 20 Lines • Show All 87 Lines • Show Last 20 Lines

llvm/test/MC/Disassembler/AMDGPU/gfx11_mimg.txt

	Show First 20 Lines • Show All 93 Lines • ▼ Show 20 Lines
	0x04,0x0f,0x1c,0xf0,0xfd,0x00,0x18,0x00			0x04,0x0f,0x1c,0xf0,0xfd,0x00,0x18,0x00

	# GFX11: image_get_resinfo v[4:7], v32, s[96:103] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x0f,0x5c,0xf0,0x20,0x04,0x18,0x00]			# GFX11: image_get_resinfo v[4:7], v32, s[96:103] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x08,0x0f,0x5c,0xf0,0x20,0x04,0x18,0x00]
	0x08,0x0f,0x5c,0xf0,0x20,0x04,0x18,0x00			0x08,0x0f,0x5c,0xf0,0x20,0x04,0x18,0x00

	# GFX11: image_atomic_swap v4, v[32:34], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_3D glc ; encoding: [0x08,0x41,0x28,0xf0,0x20,0x04,0x18,0x00]			# GFX11: image_atomic_swap v4, v[32:34], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_3D glc ; encoding: [0x08,0x41,0x28,0xf0,0x20,0x04,0x18,0x00]
	0x08,0x41,0x28,0xf0,0x20,0x04,0x18,0x00			0x08,0x41,0x28,0xf0,0x20,0x04,0x18,0x00

				# GFX11: image_atomic_swap v[0:2], [v4, v5], s[4:11] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x05,0x03,0x28,0xf0,0x04,0x00,0x21,0x00,0x05,0x00,0x00,0x00]
				0x05,0x03,0x28,0xf0,0x04,0x00,0x21,0x00,0x05,0x00,0x00,0x00

	# GFX11: image_atomic_cmpswap v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_3D glc ; encoding: [0x09,0x43,0x2c,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00]			# GFX11: image_atomic_cmpswap v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_3D glc ; encoding: [0x09,0x43,0x2c,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00]
	0x09,0x43,0x2c,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00			0x09,0x43,0x2c,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00

				# GFX11: image_atomic_cmpswap v[0:2], [v3, v4], s[4:11] dmask:0x3 dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x05,0x03,0x2c,0xf0,0x03,0x00,0x21,0x00,0x04,0x00,0x00,0x00]
				0x05,0x03,0x2c,0xf0,0x03,0x00,0x21,0x00,0x04,0x00,0x00,0x00

				# GFX11: image_atomic_cmpswap v[0:4], v[5:6], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D tfe ; encoding: [0x04,0x0f,0x2c,0xf0,0x05,0x00,0x21,0x00]
				0x04,0x0f,0x2c,0xf0,0x05,0x00,0x21,0x00

	# GFX11: image_atomic_add v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_CUBE glc ; encoding: [0x0d,0x43,0x30,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00]			# GFX11: image_atomic_add v[4:5], [v32, v1, v2], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_CUBE glc ; encoding: [0x0d,0x43,0x30,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00]
	0x0d,0x43,0x30,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00			0x0d,0x43,0x30,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00

	# GFX11: image_atomic_sub v4, [v32, v1], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY glc ; encoding: [0x11,0x41,0x34,0xf0,0x20,0x04,0x18,0x00,0x01,0x00,0x00,0x00]			# GFX11: image_atomic_sub v4, [v32, v1], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY glc ; encoding: [0x11,0x41,0x34,0xf0,0x20,0x04,0x18,0x00,0x01,0x00,0x00,0x00]
	0x11,0x41,0x34,0xf0,0x20,0x04,0x18,0x00,0x01,0x00,0x00,0x00			0x11,0x41,0x34,0xf0,0x20,0x04,0x18,0x00,0x01,0x00,0x00,0x00

	# GFX11: image_atomic_smin v4, [v32, v1, v2], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY glc ; encoding: [0x15,0x41,0x38,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00]			# GFX11: image_atomic_smin v4, [v32, v1, v2], s[96:103] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY glc ; encoding: [0x15,0x41,0x38,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00]
	0x15,0x41,0x38,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00			0x15,0x41,0x38,0xf0,0x20,0x04,0x18,0x00,0x01,0x02,0x00,0x00
	▲ Show 20 Lines • Show All 101 Lines • ▼ Show 20 Lines
	0x09,0x07,0xa4,0xf0,0x20,0x40,0x01,0x64,0x10,0x00,0x02,0x00			0x09,0x07,0xa4,0xf0,0x20,0x40,0x01,0x64,0x10,0x00,0x02,0x00

	# GFX11: image_sample_c_lz_o v[64:66], [v32, v0, v16], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D ; encoding: [0x01,0x07,0xb8,0xf0,0x20,0x40,0x01,0x64,0x00,0x10,0x00,0x00]			# GFX11: image_sample_c_lz_o v[64:66], [v32, v0, v16], s[4:11], s[100:103] dmask:0x7 dim:SQ_RSRC_IMG_1D ; encoding: [0x01,0x07,0xb8,0xf0,0x20,0x40,0x01,0x64,0x00,0x10,0x00,0x00]
	0x01,0x07,0xb8,0xf0,0x20,0x40,0x01,0x64,0x00,0x10,0x00,0x00			0x01,0x07,0xb8,0xf0,0x20,0x40,0x01,0x64,0x00,0x10,0x00,0x00

	# GFX11: image_gather4 v[64:67], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x01,0xbc,0xf0,0x20,0x40,0x01,0x64]			# GFX11: image_gather4 v[64:67], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x01,0xbc,0xf0,0x20,0x40,0x01,0x64]
	0x00,0x01,0xbc,0xf0,0x20,0x40,0x01,0x64			0x00,0x01,0xbc,0xf0,0x20,0x40,0x01,0x64

				# GFX11: image_gather4 v[64:68], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe ; encoding: [0x00,0x01,0xbc,0xf0,0x20,0x40,0x21,0x64]
				0x00,0x01,0xbc,0xf0,0x20,0x40,0x21,0x64

				# GFX11: image_gather4 v[64:65], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D d16 ; encoding: [0x00,0x01,0xbe,0xf0,0x20,0x40,0x01,0x64]
				0x00,0x01,0xbe,0xf0,0x20,0x40,0x01,0x64

				# GFX11: image_gather4 v[64:66], v32, s[4:11], s[100:103] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe d16 ; encoding: [0x00,0x01,0xbe,0xf0,0x20,0x40,0x21,0x64]
				0x00,0x01,0xbe,0xf0,0x20,0x40,0x21,0x64

	# GFX11: image_gather4_cl v[64:67], v[32:35], s[4:11], s[100:103] dmask:0x2 dim:SQ_RSRC_IMG_CUBE ; encoding: [0x0c,0x02,0x80,0xf1,0x20,0x40,0x01,0x64]			# GFX11: image_gather4_cl v[64:67], v[32:35], s[4:11], s[100:103] dmask:0x2 dim:SQ_RSRC_IMG_CUBE ; encoding: [0x0c,0x02,0x80,0xf1,0x20,0x40,0x01,0x64]
	0x0c,0x02,0x80,0xf1,0x20,0x40,0x01,0x64			0x0c,0x02,0x80,0xf1,0x20,0x40,0x01,0x64

	# GFX11: image_gather4_l v[64:67], [v32, v0, v4], s[4:11], s[100:103] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x11,0x04,0xc0,0xf0,0x20,0x40,0x01,0x64,0x00,0x04,0x00,0x00]			# GFX11: image_gather4_l v[64:67], [v32, v0, v4], s[4:11], s[100:103] dmask:0x4 dim:SQ_RSRC_IMG_1D_ARRAY ; encoding: [0x11,0x04,0xc0,0xf0,0x20,0x40,0x01,0x64,0x00,0x04,0x00,0x00]
	0x11,0x04,0xc0,0xf0,0x20,0x40,0x01,0x64,0x00,0x04,0x00,0x00			0x11,0x04,0xc0,0xf0,0x20,0x40,0x01,0x64,0x00,0x04,0x00,0x00

	# GFX11: image_gather4_b v[64:67], [v32, v0, v4], s[4:11], s[100:103] dmask:0x8 dim:SQ_RSRC_IMG_2D ; encoding: [0x05,0x08,0xc4,0xf0,0x20,0x40,0x01,0x64,0x00,0x04,0x00,0x00]			# GFX11: image_gather4_b v[64:67], [v32, v0, v4], s[4:11], s[100:103] dmask:0x8 dim:SQ_RSRC_IMG_2D ; encoding: [0x05,0x08,0xc4,0xf0,0x20,0x40,0x01,0x64,0x00,0x04,0x00,0x00]
	0x05,0x08,0xc4,0xf0,0x20,0x40,0x01,0x64,0x00,0x04,0x00,0x00			0x05,0x08,0xc4,0xf0,0x20,0x40,0x01,0x64,0x00,0x04,0x00,0x00
	▲ Show 20 Lines • Show All 75 Lines • Show Last 20 Lines

llvm/test/MC/Disassembler/AMDGPU/mimg_vi.txt

	Show First 20 Lines • Show All 185 Lines • ▼ Show 20 Lines
	#===------------------------------------------------------------------------===#			#===------------------------------------------------------------------------===#
	# Invalid image atomics (incorrect dmask value).			# Invalid image atomics (incorrect dmask value).
	# Disassembler may produce a partially incorrect instruction but should not fail.			# Disassembler may produce a partially incorrect instruction but should not fail.
	#===------------------------------------------------------------------------===#			#===------------------------------------------------------------------------===#

	# VI: image_atomic_add v5, v1, s[8:15] dmask:0x2 unorm ; encoding: [0x00,0x12,0x48,0xf0,0x01,0x05,0x02,0x00]			# VI: image_atomic_add v5, v1, s[8:15] dmask:0x2 unorm ; encoding: [0x00,0x12,0x48,0xf0,0x01,0x05,0x02,0x00]
	0x00,0x12,0x48,0xf0,0x01,0x05,0x02,0x00			0x00,0x12,0x48,0xf0,0x01,0x05,0x02,0x00

	# VI: image_atomic_add v5, v1, s[8:15] dmask:0x7 unorm ; encoding: [0x00,0x17,0x48,0xf0,0x01,0x05,0x02,0x00]			# VI: image_atomic_add v[5:7], v1, s[8:15] dmask:0x7 unorm ; encoding: [0x00,0x17,0x48,0xf0,0x01,0x05,0x02,0x00]
				arsenmUnsubmitted Not Done Reply Inline Actions Why did this get an additional output register if it's not using TFE? arsenm: Why did this get an additional output register if it's not using TFE?
				mbrkusaninAuthorUnsubmitted Not Done Reply Inline Actions It is one of examples with wrong encoding for dmask. Since dmask has 3 bits enabled it will chose vgpr96. Only valid masks are 0x1 and 0x3. It does not seem to me it's worth to update disassembler for this. mbrkusanin: It is one of examples with wrong encoding for dmask. Since dmask has 3 bits enabled it will…
	0x00,0x17,0x48,0xf0,0x01,0x05,0x02,0x00			0x00,0x17,0x48,0xf0,0x01,0x05,0x02,0x00

	# VI: image_atomic_add v5, v1, s[8:15] dmask:0xf unorm ; encoding: [0x00,0x1f,0x48,0xf0,0x01,0x05,0x02,0x00]			# VI: image_atomic_add v5, v1, s[8:15] dmask:0xf unorm ; encoding: [0x00,0x1f,0x48,0xf0,0x01,0x05,0x02,0x00]
	0x00,0x1f,0x48,0xf0,0x01,0x05,0x02,0x00			0x00,0x1f,0x48,0xf0,0x01,0x05,0x02,0x00

	# VI: image_atomic_cmpswap v[5:6], v1, s[8:15] unorm ; encoding: [0x00,0x10,0x44,0xf0,0x01,0x05,0x02,0x00]			# VI: image_atomic_cmpswap v[5:6], v1, s[8:15] unorm ; encoding: [0x00,0x10,0x44,0xf0,0x01,0x05,0x02,0x00]
	0x00,0x10,0x44,0xf0,0x01,0x05,0x02,0x00			0x00,0x10,0x44,0xf0,0x01,0x05,0x02,0x00

	# VI: image_atomic_cmpswap v[5:6], v1, s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x44,0xf0,0x01,0x05,0x02,0x00]			# VI: image_atomic_cmpswap v[5:6], v1, s[8:15] dmask:0x1 unorm ; encoding: [0x00,0x11,0x44,0xf0,0x01,0x05,0x02,0x00]
	0x00,0x11,0x44,0xf0,0x01,0x05,0x02,0x00			0x00,0x11,0x44,0xf0,0x01,0x05,0x02,0x00

	# VI: image_atomic_cmpswap v[5:6], v1, s[8:15] dmask:0xe unorm ; encoding: [0x00,0x1e,0x44,0xf0,0x01,0x05,0x02,0x00]			# VI: image_atomic_cmpswap v[5:7], v1, s[8:15] dmask:0xe unorm ; encoding: [0x00,0x1e,0x44,0xf0,0x01,0x05,0x02,0x00]
	0x00,0x1e,0x44,0xf0,0x01,0x05,0x02,0x00			0x00,0x1e,0x44,0xf0,0x01,0x05,0x02,0x00

	#===------------------------------------------------------------------------===#			#===------------------------------------------------------------------------===#
	# Image gather			# Image gather
	#===------------------------------------------------------------------------===#			#===------------------------------------------------------------------------===#

	# VI: image_gather4 v[252:255], v1, s[8:15], s[12:15] dmask:0x1 ; encoding: [0x00,0x01,0x00,0xf1,0x01,0xfc,0x62,0x00]			# VI: image_gather4 v[252:255], v1, s[8:15], s[12:15] dmask:0x1 ; encoding: [0x00,0x01,0x00,0xf1,0x01,0xfc,0x62,0x00]
	0x00,0x01,0x00,0xf1,0x01,0xfc,0x62,0x00			0x00,0x01,0x00,0xf1,0x01,0xfc,0x62,0x00
	Show All 24 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[AMDGPU] Account for VData size increase from tfe bit for image instructionsAbandonedPublic

Details

Diff Detail

Unit TestsFailed

Event Timeline

Revision Contents

Diff 448596

llvm/lib/Target/AMDGPU/MIMGInstructions.td

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir

llvm/test/CodeGen/AMDGPU/release-vgprs.mir

llvm/test/MC/AMDGPU/gfx10_asm_mimg.s

llvm/test/MC/AMDGPU/gfx11_asm_mimg.s

llvm/test/MC/Disassembler/AMDGPU/gfx10_mimg.txt

llvm/test/MC/Disassembler/AMDGPU/gfx11_mimg.txt

llvm/test/MC/Disassembler/AMDGPU/mimg_vi.txt

[AMDGPU] Account for VData size increase from tfe bit for image instructions
AbandonedPublic