Differential D138949 Diff 478865 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll
Changeset View
Changeset View
Standalone View
Standalone View
llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll
Show All 13 Lines | define amdgpu_ps void @load_1d_vgpr_vaddr__sgpr_srsrc(<8 x i32> inreg %rsrc, i32 %s) { | ||||
; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 | ; FAST-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 | ||||
; FAST-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 | ; FAST-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 | ||||
; FAST-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 | ; FAST-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 | ||||
; FAST-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 | ; FAST-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 | ||||
; FAST-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 | ; FAST-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 | ||||
; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) | ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) | ||||
; FAST-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 | ; FAST-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 | ||||
; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF | ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF | ||||
; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") | ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) | ||||
; FAST-NEXT: [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) | ; FAST-NEXT: [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) | ||||
; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) | ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) | ||||
; FAST-NEXT: S_ENDPGM 0 | ; FAST-NEXT: S_ENDPGM 0 | ||||
; GREEDY-LABEL: name: load_1d_vgpr_vaddr__sgpr_srsrc | ; GREEDY-LABEL: name: load_1d_vgpr_vaddr__sgpr_srsrc | ||||
; GREEDY: bb.1 (%ir-block.0): | ; GREEDY: bb.1 (%ir-block.0): | ||||
; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 | ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0 | ||||
; GREEDY-NEXT: {{ $}} | ; GREEDY-NEXT: {{ $}} | ||||
; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 | ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 | ||||
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 | ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 | ||||
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 | ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 | ||||
; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 | ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 | ||||
; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 | ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 | ||||
; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 | ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 | ||||
; GREEDY-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 | ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 | ||||
; GREEDY-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 | ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 | ||||
; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) | ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) | ||||
; GREEDY-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 | ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 | ||||
; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF | ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF | ||||
; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") | ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) | ||||
; GREEDY-NEXT: [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) | ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) | ||||
; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) | ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) | ||||
; GREEDY-NEXT: S_ENDPGM 0 | ; GREEDY-NEXT: S_ENDPGM 0 | ||||
%v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) | %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) | ||||
store <4 x float> %v, ptr addrspace(1) undef | store <4 x float> %v, ptr addrspace(1) undef | ||||
ret void | ret void | ||||
} | } | ||||
Show All 10 Lines | define amdgpu_ps void @load_1d_sgpr_vaddr__sgpr_srsrc(<8 x i32> inreg %rsrc, i32 inreg %s) { | ||||
; FAST-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 | ; FAST-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 | ||||
; FAST-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 | ; FAST-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 | ||||
; FAST-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 | ; FAST-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 | ||||
; FAST-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 | ; FAST-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 | ||||
; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) | ; FAST-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) | ||||
; FAST-NEXT: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10 | ; FAST-NEXT: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10 | ||||
; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF | ; FAST-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF | ||||
; FAST-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32) | ; FAST-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32) | ||||
; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") | ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) | ||||
; FAST-NEXT: [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) | ; FAST-NEXT: [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) | ||||
; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) | ; FAST-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) | ||||
; FAST-NEXT: S_ENDPGM 0 | ; FAST-NEXT: S_ENDPGM 0 | ||||
; GREEDY-LABEL: name: load_1d_sgpr_vaddr__sgpr_srsrc | ; GREEDY-LABEL: name: load_1d_sgpr_vaddr__sgpr_srsrc | ||||
; GREEDY: bb.1 (%ir-block.0): | ; GREEDY: bb.1 (%ir-block.0): | ||||
; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10 | ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10 | ||||
; GREEDY-NEXT: {{ $}} | ; GREEDY-NEXT: {{ $}} | ||||
; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 | ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 | ||||
; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 | ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 | ||||
; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 | ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 | ||||
; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 | ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 | ||||
; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 | ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 | ||||
; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 | ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7 | ||||
; GREEDY-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 | ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8 | ||||
; GREEDY-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 | ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9 | ||||
; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) | ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) | ||||
; GREEDY-NEXT: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10 | ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10 | ||||
; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF | ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF | ||||
; GREEDY-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32) | ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32) | ||||
; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") | ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) | ||||
; GREEDY-NEXT: [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) | ; GREEDY-NEXT: [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) | ||||
; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) | ; GREEDY-NEXT: G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1) | ||||
; GREEDY-NEXT: S_ENDPGM 0 | ; GREEDY-NEXT: S_ENDPGM 0 | ||||
%v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) | %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) | ||||
store <4 x float> %v, ptr addrspace(1) undef | store <4 x float> %v, ptr addrspace(1) undef | ||||
ret void | ret void | ||||
} | } | ||||
▲ Show 20 Lines • Show All 42 Lines • ▼ Show 20 Lines | define amdgpu_ps void @load_1d_vgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 %s) { | ||||
; FAST-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] | ; FAST-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] | ||||
; FAST-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] | ; FAST-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] | ||||
; FAST-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1) | ; FAST-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1) | ||||
; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec | ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec | ||||
; FAST-NEXT: {{ $}} | ; FAST-NEXT: {{ $}} | ||||
; FAST-NEXT: bb.3: | ; FAST-NEXT: bb.3: | ||||
; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) | ; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) | ||||
; FAST-NEXT: {{ $}} | ; FAST-NEXT: {{ $}} | ||||
; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") | ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) | ||||
; FAST-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc | ; FAST-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc | ||||
; FAST-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec | ; FAST-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec | ||||
; FAST-NEXT: {{ $}} | ; FAST-NEXT: {{ $}} | ||||
; FAST-NEXT: bb.4: | ; FAST-NEXT: bb.4: | ||||
; FAST-NEXT: successors: %bb.5(0x80000000) | ; FAST-NEXT: successors: %bb.5(0x80000000) | ||||
; FAST-NEXT: {{ $}} | ; FAST-NEXT: {{ $}} | ||||
; FAST-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] | ; FAST-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] | ||||
; FAST-NEXT: {{ $}} | ; FAST-NEXT: {{ $}} | ||||
▲ Show 20 Lines • Show All 44 Lines • ▼ Show 20 Lines | define amdgpu_ps void @load_1d_vgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 %s) { | ||||
; GREEDY-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] | ; GREEDY-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] | ||||
; GREEDY-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] | ; GREEDY-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] | ||||
; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1) | ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1) | ||||
; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec | ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec | ||||
; GREEDY-NEXT: {{ $}} | ; GREEDY-NEXT: {{ $}} | ||||
; GREEDY-NEXT: bb.3: | ; GREEDY-NEXT: bb.3: | ||||
; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) | ; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) | ||||
; GREEDY-NEXT: {{ $}} | ; GREEDY-NEXT: {{ $}} | ||||
; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") | ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) | ||||
; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc | ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc | ||||
; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec | ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec | ||||
; GREEDY-NEXT: {{ $}} | ; GREEDY-NEXT: {{ $}} | ||||
; GREEDY-NEXT: bb.4: | ; GREEDY-NEXT: bb.4: | ||||
; GREEDY-NEXT: successors: %bb.5(0x80000000) | ; GREEDY-NEXT: successors: %bb.5(0x80000000) | ||||
; GREEDY-NEXT: {{ $}} | ; GREEDY-NEXT: {{ $}} | ||||
; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] | ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] | ||||
; GREEDY-NEXT: {{ $}} | ; GREEDY-NEXT: {{ $}} | ||||
▲ Show 20 Lines • Show All 52 Lines • ▼ Show 20 Lines | define amdgpu_ps void @load_1d_sgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 inreg %s) { | ||||
; FAST-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] | ; FAST-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] | ||||
; FAST-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] | ; FAST-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] | ||||
; FAST-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1) | ; FAST-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1) | ||||
; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec | ; FAST-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec | ||||
; FAST-NEXT: {{ $}} | ; FAST-NEXT: {{ $}} | ||||
; FAST-NEXT: bb.3: | ; FAST-NEXT: bb.3: | ||||
; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) | ; FAST-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) | ||||
; FAST-NEXT: {{ $}} | ; FAST-NEXT: {{ $}} | ||||
; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") | ; FAST-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) | ||||
; FAST-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc | ; FAST-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc | ||||
; FAST-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec | ; FAST-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec | ||||
; FAST-NEXT: {{ $}} | ; FAST-NEXT: {{ $}} | ||||
; FAST-NEXT: bb.4: | ; FAST-NEXT: bb.4: | ||||
; FAST-NEXT: successors: %bb.5(0x80000000) | ; FAST-NEXT: successors: %bb.5(0x80000000) | ||||
; FAST-NEXT: {{ $}} | ; FAST-NEXT: {{ $}} | ||||
; FAST-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] | ; FAST-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] | ||||
; FAST-NEXT: {{ $}} | ; FAST-NEXT: {{ $}} | ||||
▲ Show 20 Lines • Show All 45 Lines • ▼ Show 20 Lines | define amdgpu_ps void @load_1d_sgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 inreg %s) { | ||||
; GREEDY-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] | ; GREEDY-NEXT: [[ICMP3:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV15]](s64), [[UV11]] | ||||
; GREEDY-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] | ; GREEDY-NEXT: [[AND2:%[0-9]+]]:vcc(s1) = G_AND [[AND1]], [[ICMP3]] | ||||
; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1) | ; GREEDY-NEXT: [[INT:%[0-9]+]]:sreg_64_xexec(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ballot), [[AND2]](s1) | ||||
; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec | ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[INT]](s64), implicit-def $exec, implicit-def $scc, implicit $exec | ||||
; GREEDY-NEXT: {{ $}} | ; GREEDY-NEXT: {{ $}} | ||||
; GREEDY-NEXT: bb.3: | ; GREEDY-NEXT: bb.3: | ||||
; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) | ; GREEDY-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) | ||||
; GREEDY-NEXT: {{ $}} | ; GREEDY-NEXT: {{ $}} | ||||
; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") | ; GREEDY-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>), addrspace 7) | ||||
; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc | ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc | ||||
; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec | ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec | ||||
; GREEDY-NEXT: {{ $}} | ; GREEDY-NEXT: {{ $}} | ||||
; GREEDY-NEXT: bb.4: | ; GREEDY-NEXT: bb.4: | ||||
; GREEDY-NEXT: successors: %bb.5(0x80000000) | ; GREEDY-NEXT: successors: %bb.5(0x80000000) | ||||
; GREEDY-NEXT: {{ $}} | ; GREEDY-NEXT: {{ $}} | ||||
; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] | ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] | ||||
; GREEDY-NEXT: {{ $}} | ; GREEDY-NEXT: {{ $}} | ||||
Show All 12 Lines |