Skip to content

Commit 1045928

Browse files
committedJun 21, 2018
AMDGPU: Convert test cases to the dimension-aware intrinsics
Summary: Also explicitly port over some tests in llvm.amdgcn.image.* that were missing. Some tests are removed because they no longer apply (i.e. explicitly testing building an address vector via insertelement). This is in preparation for the eventual removal of the old-style intrinsics. Some additional notes: - constant-address-space-32bit.ll: change some GCN-NEXT to GCN because the instruction schedule was subtly altered - insert_vector_elt.ll: the old test didn't actually test anything, because %tmp1 was not used; remove the load, because it doesn't work (Because of the amdgpu_ps calling convention? In any case, it's orthogonal to what the test claims to be testing.) Change-Id: Idfa99b6512ad139e755e82b8b89548ab08f0afcf Reviewers: arsenm, rampitec Subscribers: MatzeB, qcolombet, kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits Differential Revision: https://reviews.llvm.org/D48018 llvm-svn: 335229
1 parent 7a9c03f commit 1045928

25 files changed

+811
-400
lines changed
 

‎llvm/test/Analysis/DivergenceAnalysis/AMDGPU/llvm.amdgcn.image.atomic.ll

+39-39
Original file line numberDiff line numberDiff line change
@@ -1,105 +1,105 @@
11
;RUN: opt -mtriple=amdgcn-mesa-mesa3d -analyze -divergence %s | FileCheck %s
22

3-
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.swap.i32(
3+
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(
44
define float @image_atomic_swap(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
55
main_body:
6-
%orig = call i32 @llvm.amdgcn.image.atomic.swap.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
6+
%orig = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
77
%r = bitcast i32 %orig to float
88
ret float %r
99
}
1010

11-
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.add.i32(
11+
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(
1212
define float @image_atomic_add(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
1313
main_body:
14-
%orig = call i32 @llvm.amdgcn.image.atomic.add.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
14+
%orig = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
1515
%r = bitcast i32 %orig to float
1616
ret float %r
1717
}
1818

19-
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.sub.i32(
19+
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(
2020
define float @image_atomic_sub(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
2121
main_body:
22-
%orig = call i32 @llvm.amdgcn.image.atomic.sub.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
22+
%orig = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
2323
%r = bitcast i32 %orig to float
2424
ret float %r
2525
}
2626

27-
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.smin.i32(
27+
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(
2828
define float @image_atomic_smin(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
2929
main_body:
30-
%orig = call i32 @llvm.amdgcn.image.atomic.smin.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
30+
%orig = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
3131
%r = bitcast i32 %orig to float
3232
ret float %r
3333
}
3434

35-
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.umin.i32(
35+
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(
3636
define float @image_atomic_umin(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
3737
main_body:
38-
%orig = call i32 @llvm.amdgcn.image.atomic.umin.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
38+
%orig = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
3939
%r = bitcast i32 %orig to float
4040
ret float %r
4141
}
4242

43-
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.smax.i32(
43+
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(
4444
define float @image_atomic_smax(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
4545
main_body:
46-
%orig = call i32 @llvm.amdgcn.image.atomic.smax.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
46+
%orig = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
4747
%r = bitcast i32 %orig to float
4848
ret float %r
4949
}
5050

51-
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.umax.i32(
51+
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(
5252
define float @image_atomic_umax(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
5353
main_body:
54-
%orig = call i32 @llvm.amdgcn.image.atomic.umax.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
54+
%orig = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
5555
%r = bitcast i32 %orig to float
5656
ret float %r
5757
}
5858

59-
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.and.i32(
59+
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(
6060
define float @image_atomic_and(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
6161
main_body:
62-
%orig = call i32 @llvm.amdgcn.image.atomic.and.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
62+
%orig = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
6363
%r = bitcast i32 %orig to float
6464
ret float %r
6565
}
6666

67-
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.or.i32(
67+
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(
6868
define float @image_atomic_or(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
6969
main_body:
70-
%orig = call i32 @llvm.amdgcn.image.atomic.or.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
70+
%orig = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
7171
%r = bitcast i32 %orig to float
7272
ret float %r
7373
}
7474

75-
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.xor.i32(
75+
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(
7676
define float @image_atomic_xor(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
7777
main_body:
78-
%orig = call i32 @llvm.amdgcn.image.atomic.xor.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
78+
%orig = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
7979
%r = bitcast i32 %orig to float
8080
ret float %r
8181
}
8282

83-
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.inc.i32(
83+
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(
8484
define float @image_atomic_inc(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
8585
main_body:
86-
%orig = call i32 @llvm.amdgcn.image.atomic.inc.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
86+
%orig = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
8787
%r = bitcast i32 %orig to float
8888
ret float %r
8989
}
9090

91-
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.dec.i32(
91+
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(
9292
define float @image_atomic_dec(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data) #0 {
9393
main_body:
94-
%orig = call i32 @llvm.amdgcn.image.atomic.dec.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
94+
%orig = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32 %data, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
9595
%r = bitcast i32 %orig to float
9696
ret float %r
9797
}
9898

99-
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.cmpswap.i32(
99+
;CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(
100100
define float @image_atomic_cmpswap(<8 x i32> inreg %rsrc, i32 inreg %addr, i32 inreg %data, i32 inreg %cmp) #0 {
101101
main_body:
102-
%orig = call i32 @llvm.amdgcn.image.atomic.cmpswap.i32(i32 %data, i32 %cmp, i32 %addr, <8 x i32> %rsrc, i1 0, i1 0, i1 0)
102+
%orig = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %data, i32 %cmp, i32 %addr, <8 x i32> %rsrc, i32 0, i32 0)
103103
%r = bitcast i32 %orig to float
104104
ret float %r
105105
}
@@ -112,19 +112,19 @@ main_body:
112112
ret float %r
113113
}
114114

115-
declare i32 @llvm.amdgcn.image.atomic.swap.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
116-
declare i32 @llvm.amdgcn.image.atomic.add.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
117-
declare i32 @llvm.amdgcn.image.atomic.sub.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
118-
declare i32 @llvm.amdgcn.image.atomic.smin.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
119-
declare i32 @llvm.amdgcn.image.atomic.umin.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
120-
declare i32 @llvm.amdgcn.image.atomic.smax.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
121-
declare i32 @llvm.amdgcn.image.atomic.umax.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
122-
declare i32 @llvm.amdgcn.image.atomic.and.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
123-
declare i32 @llvm.amdgcn.image.atomic.or.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
124-
declare i32 @llvm.amdgcn.image.atomic.xor.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
125-
declare i32 @llvm.amdgcn.image.atomic.inc.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
126-
declare i32 @llvm.amdgcn.image.atomic.dec.i32(i32, i32, <8 x i32>, i1, i1, i1) #0
127-
declare i32 @llvm.amdgcn.image.atomic.cmpswap.i32(i32, i32, i32, <8 x i32>,i1, i1, i1) #0
115+
declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
116+
declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
117+
declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
118+
declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
119+
declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
120+
declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
121+
declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
122+
declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
123+
declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
124+
declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
125+
declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
126+
declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
127+
declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #0
128128

129129
declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #0
130130

Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
22

33
; GCN-LABEL: {{^}}adjust_writemask_crash_0_nochain:
4-
; GCN: image_get_lod v0, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x2
4+
; GCN: image_get_lod v0, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x2
55
; GCN-NOT: v1
66
; GCN-NOT: v0
77
; GCN: buffer_store_dword v0
88
define amdgpu_ps void @adjust_writemask_crash_0_nochain() #0 {
99
main_body:
10-
%tmp = call <2 x float> @llvm.amdgcn.image.getlod.v2f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
10+
%tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
1111
%tmp1 = bitcast <2 x float> %tmp to <2 x i32>
1212
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
1313
%tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@@ -17,13 +17,13 @@ main_body:
1717
}
1818

1919
; GCN-LABEL: {{^}}adjust_writemask_crash_1_nochain:
20-
; GCN: image_get_lod v0, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x1
20+
; GCN: image_get_lod v0, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x1
2121
; GCN-NOT: v1
2222
; GCN-NOT: v0
2323
; GCN: buffer_store_dword v0
2424
define amdgpu_ps void @adjust_writemask_crash_1_nochain() #0 {
2525
main_body:
26-
%tmp = call <2 x float> @llvm.amdgcn.image.getlod.v2f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
26+
%tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
2727
%tmp1 = bitcast <2 x float> %tmp to <2 x i32>
2828
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>
2929
%tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@@ -33,13 +33,13 @@ main_body:
3333
}
3434

3535
; GCN-LABEL: {{^}}adjust_writemask_crash_0_chain:
36-
; GCN: image_sample v0, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x2
36+
; GCN: image_sample v0, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x2
3737
; GCN-NOT: v1
3838
; GCN-NOT: v0
3939
; GCN: buffer_store_dword v0
4040
define amdgpu_ps void @adjust_writemask_crash_0_chain() #0 {
4141
main_body:
42-
%tmp = call <2 x float> @llvm.amdgcn.image.sample.v2f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
42+
%tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
4343
%tmp1 = bitcast <2 x float> %tmp to <2 x i32>
4444
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
4545
%tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@@ -49,13 +49,13 @@ main_body:
4949
}
5050

5151
; GCN-LABEL: {{^}}adjust_writemask_crash_1_chain:
52-
; GCN: image_sample v0, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x1
52+
; GCN: image_sample v0, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x1
5353
; GCN-NOT: v1
5454
; GCN-NOT: v0
5555
; GCN: buffer_store_dword v0
5656
define amdgpu_ps void @adjust_writemask_crash_1_chain() #0 {
5757
main_body:
58-
%tmp = call <2 x float> @llvm.amdgcn.image.sample.v2f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
58+
%tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
5959
%tmp1 = bitcast <2 x float> %tmp to <2 x i32>
6060
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>
6161
%tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@@ -66,7 +66,7 @@ main_body:
6666

6767
define amdgpu_ps void @adjust_writemask_crash_0_v4() #0 {
6868
main_body:
69-
%tmp = call <4 x float> @llvm.amdgcn.image.getlod.v4f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 5, i1 false, i1 false, i1 false, i1 false, i1 false)
69+
%tmp = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32 5, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
7070
%tmp1 = bitcast <4 x float> %tmp to <4 x i32>
7171
%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
7272
%tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
@@ -76,9 +76,9 @@ main_body:
7676
}
7777

7878

79-
declare <2 x float> @llvm.amdgcn.image.sample.v2f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
80-
declare <2 x float> @llvm.amdgcn.image.getlod.v2f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
81-
declare <4 x float> @llvm.amdgcn.image.getlod.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
79+
declare <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
80+
declare <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
81+
declare <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
8282

8383
attributes #0 = { nounwind }
8484
attributes #1 = { nounwind readonly }

‎llvm/test/CodeGen/AMDGPU/commute-shifts.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
define amdgpu_ps float @main(float %arg0, float %arg1) #0 {
88
bb:
99
%tmp = fptosi float %arg0 to i32
10-
%tmp1 = call <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32> undef, <8 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false)
10+
%tmp1 = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 undef, <8 x i32> undef, i32 0, i32 0)
1111
%tmp2.f = extractelement <4 x float> %tmp1, i32 0
1212
%tmp2 = bitcast float %tmp2.f to i32
1313
%tmp3 = and i32 %tmp, 7
@@ -21,7 +21,7 @@ bb:
2121
}
2222

2323
declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
24-
declare <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #2
24+
declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
2525

2626
attributes #0 = { nounwind }
2727
attributes #1 = { nounwind readnone }

‎llvm/test/CodeGen/AMDGPU/constant-address-space-32bit.ll

+5-5
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ define amdgpu_vs i32 @load_i32_hifffffff0(i32 addrspace(6)* inreg %p) #4 {
204204
; GCN: v_readfirstlane_b32
205205
; GCN-NEXT: v_readfirstlane_b32
206206
; SI: s_nop
207-
; GCN-NEXT: s_load_dwordx8
207+
; GCN: s_load_dwordx8
208208
; GCN-NEXT: s_load_dwordx4
209209
; GCN: image_sample
210210
define amdgpu_ps <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @load_sampler([0 x <4 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), [0 x <4 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #5 {
@@ -219,7 +219,7 @@ main_body:
219219
%29 = bitcast [0 x <8 x i32>] addrspace(6)* %1 to [0 x <4 x i32>] addrspace(6)*
220220
%30 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(6)* %29, i32 0, i32 %28, !amdgpu.uniform !0
221221
%31 = load <4 x i32>, <4 x i32> addrspace(6)* %30, align 16, !invariant.load !0
222-
%32 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> zeroinitializer, <8 x i32> %26, <4 x i32> %31, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #8
222+
%32 = call nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 0.0, <8 x i32> %26, <4 x i32> %31, i1 0, i32 0, i32 0) #8
223223
%33 = extractelement <4 x float> %32, i32 0
224224
%34 = extractelement <4 x float> %32, i32 1
225225
%35 = extractelement <4 x float> %32, i32 2
@@ -238,7 +238,7 @@ main_body:
238238
; GCN: v_readfirstlane_b32
239239
; GCN-NEXT: v_readfirstlane_b32
240240
; SI: s_nop
241-
; GCN-NEXT: s_load_dwordx8
241+
; GCN: s_load_dwordx8
242242
; GCN-NEXT: s_load_dwordx4
243243
; GCN: image_sample
244244
define amdgpu_ps <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @load_sampler_nouniform([0 x <4 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), [0 x <4 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #5 {
@@ -253,7 +253,7 @@ main_body:
253253
%29 = bitcast [0 x <8 x i32>] addrspace(6)* %1 to [0 x <4 x i32>] addrspace(6)*
254254
%30 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(6)* %29, i32 0, i32 %28
255255
%31 = load <4 x i32>, <4 x i32> addrspace(6)* %30, align 16, !invariant.load !0
256-
%32 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> zeroinitializer, <8 x i32> %26, <4 x i32> %31, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #8
256+
%32 = call nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 0.0, <8 x i32> %26, <4 x i32> %31, i1 0, i32 0, i32 0) #8
257257
%33 = extractelement <4 x float> %32, i32 0
258258
%34 = extractelement <4 x float> %32, i32 1
259259
%35 = extractelement <4 x float> %32, i32 2
@@ -272,7 +272,7 @@ main_body:
272272
declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #6
273273

274274
; Function Attrs: nounwind readonly
275-
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #7
275+
declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #7
276276

277277

278278
!0 = !{}

‎llvm/test/CodeGen/AMDGPU/else.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ if:
4444

4545
else:
4646
%c = fmul float %v, 3.0
47-
%tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %c, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
47+
%tex = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %c, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
4848
%v.else = extractelement <4 x float> %tex, i32 0
4949
br label %end
5050

@@ -55,7 +55,7 @@ end:
5555
}
5656

5757
declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #1
58-
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
58+
declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
5959

6060
attributes #0 = { nounwind }
6161
attributes #1 = { nounwind writeonly }

‎llvm/test/CodeGen/AMDGPU/image-schedule.ll

+8-7
Original file line numberDiff line numberDiff line change
@@ -25,28 +25,29 @@ define dllexport amdgpu_cs void @_amdgpu_cs_main(i32 inreg %arg, i32 inreg %arg1
2525
%tmp10 = getelementptr [4294967295 x i8], [4294967295 x i8] addrspace(4)* %tmp8, i64 0, i64 32
2626
%tmp11 = bitcast i8 addrspace(4)* %tmp10 to <8 x i32> addrspace(4)*, !amdgpu.uniform !0
2727
%tmp12 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp11, align 16
28-
%tmp13 = shufflevector <3 x i32> %tmp9, <3 x i32> undef, <2 x i32> <i32 0, i32 1>
29-
%tmp14 = call <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %tmp13, <8 x i32> %tmp12, i32 15, i1 false, i1 false, i1 false, i1 false) #0
28+
%tmp13.0 = extractelement <3 x i32> %tmp9, i32 0
29+
%tmp13.1 = extractelement <3 x i32> %tmp9, i32 1
30+
%tmp14 = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 %tmp13.0, i32 %tmp13.1, <8 x i32> %tmp12, i32 0, i32 0) #0
3031
%tmp15 = inttoptr i64 %tmp7 to <8 x i32> addrspace(4)*
3132
%tmp16 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp15, align 16
32-
call void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float> %tmp14, <2 x i32> %tmp13, <8 x i32> %tmp16, i32 15, i1 false, i1 false, i1 false, i1 false) #0
33+
call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %tmp14, i32 15, i32 %tmp13.0, i32 %tmp13.1, <8 x i32> %tmp16, i32 0, i32 0) #0
3334
%tmp17 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp15, align 16
34-
%tmp18 = call <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32> %tmp13, <8 x i32> %tmp17, i32 15, i1 false, i1 false, i1 false, i1 false) #0
35+
%tmp18 = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 165, i32 %tmp13.0, i32 %tmp13.1, <8 x i32> %tmp17, i32 0, i32 0) #0
3536
%tmp19 = getelementptr [4294967295 x i8], [4294967295 x i8] addrspace(4)* %tmp8, i64 0, i64 64
3637
%tmp20 = bitcast i8 addrspace(4)* %tmp19 to <8 x i32> addrspace(4)*, !amdgpu.uniform !0
3738
%tmp21 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp20, align 16
38-
call void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float> %tmp18, <2 x i32> %tmp13, <8 x i32> %tmp21, i32 15, i1 false, i1 false, i1 false, i1 false) #0
39+
call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %tmp18, i32 15, i32 %tmp13.0, i32 %tmp13.1, <8 x i32> %tmp21, i32 0, i32 0) #0
3940
ret void
4041
}
4142

4243
; Function Attrs: nounwind readnone speculatable
4344
declare i64 @llvm.amdgcn.s.getpc() #1
4445

4546
; Function Attrs: nounwind readonly
46-
declare <4 x float> @llvm.amdgcn.image.load.v4f32.v2i32.v8i32(<2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #2
47+
declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #2
4748

4849
; Function Attrs: nounwind writeonly
49-
declare void @llvm.amdgcn.image.store.v4f32.v2i32.v8i32(<4 x float>, <2 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #3
50+
declare void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #3
5051

5152
attributes #0 = { nounwind }
5253
attributes #1 = { nounwind readnone speculatable }

‎llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll

+4-5
Original file line numberDiff line numberDiff line change
@@ -75,10 +75,9 @@ define amdgpu_kernel void @insertelement_v3f32_3(<3 x float> addrspace(1)* %out,
7575

7676
; GCN-LABEL: {{^}}insertelement_to_sgpr:
7777
; GCN-NOT: v_readfirstlane
78-
define amdgpu_ps <4 x float> @insertelement_to_sgpr() nounwind {
79-
%tmp = load <4 x i32>, <4 x i32> addrspace(2)* undef
80-
%tmp1 = insertelement <4 x i32> %tmp, i32 0, i32 0
81-
%tmp2 = call <4 x float> @llvm.amdgcn.image.gather4.lz.v4f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 1, i1 false, i1 false, i1 false, i1 false, i1 true)
78+
define amdgpu_ps <4 x float> @insertelement_to_sgpr(<4 x i32> inreg %samp) nounwind {
79+
%tmp1 = insertelement <4 x i32> %samp, i32 0, i32 0
80+
%tmp2 = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 1, float undef, float undef, <8 x i32> undef, <4 x i32> %tmp1, i1 0, i32 0, i32 0)
8281
ret <4 x float> %tmp2
8382
}
8483

@@ -474,7 +473,7 @@ define amdgpu_kernel void @dynamic_insertelement_v8f64(<8 x double> addrspace(1)
474473
ret void
475474
}
476475

477-
declare <4 x float> @llvm.amdgcn.image.gather4.lz.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
476+
declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
478477

479478
attributes #0 = { nounwind }
480479
attributes #1 = { nounwind readnone }

‎llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll

+1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
12
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
23

34
; GCN-LABEL: {{^}}atomic_swap_1d:

‎llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll

+44-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
2-
; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3-
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
1+
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s
2+
; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
3+
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
44

55
; GCN-LABEL: {{^}}load_1d:
66
; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm{{$}}
@@ -370,6 +370,46 @@ main_body:
370370
ret void
371371
}
372372

373+
; GCN-LABEL: {{^}}getresinfo_dmask0:
374+
; GCN-NOT: image
375+
; GCN: ; return to shader part epilog
376+
define amdgpu_ps <4 x float> @getresinfo_dmask0(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %mip) #0 {
377+
main_body:
378+
%r = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32 0, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
379+
ret <4 x float> %r
380+
}
381+
382+
; Ideally, the register allocator would avoid the wait here
383+
;
384+
; GCN-LABEL: {{^}}image_store_wait:
385+
; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm
386+
; SI: s_waitcnt expcnt(0)
387+
; GCN: image_load v[0:3], v4, s[8:15] dmask:0xf unorm
388+
; GCN: s_waitcnt vmcnt(0)
389+
; GCN: image_store v[0:3], v4, s[16:23] dmask:0xf unorm
390+
define amdgpu_ps void @image_store_wait(<8 x i32> inreg %arg, <8 x i32> inreg %arg1, <8 x i32> inreg %arg2, <4 x float> %arg3, i32 %arg4) #0 {
391+
main_body:
392+
call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %arg3, i32 15, i32 %arg4, <8 x i32> %arg, i32 0, i32 0)
393+
%data = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %arg4, <8 x i32> %arg1, i32 0, i32 0)
394+
call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %data, i32 15, i32 %arg4, <8 x i32> %arg2, i32 0, i32 0)
395+
ret void
396+
}
397+
398+
; SI won't merge ds memory operations, because of the signed offset bug, so
399+
; we only have check lines for VI.
400+
; VI-LABEL: image_load_mmo
401+
; VI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
402+
; VI: ds_write2_b32 v{{[0-9]+}}, [[ZERO]], [[ZERO]] offset1:4
403+
define amdgpu_ps float @image_load_mmo(<8 x i32> inreg %rsrc, float addrspace(3)* %lds, <2 x i32> %c) #0 {
404+
store float 0.000000e+00, float addrspace(3)* %lds
405+
%c0 = extractelement <2 x i32> %c, i32 0
406+
%c1 = extractelement <2 x i32> %c, i32 1
407+
%tex = call float @llvm.amdgcn.image.load.2d.f32.i32(i32 15, i32 %c0, i32 %c1, <8 x i32> %rsrc, i32 0, i32 0)
408+
%tmp2 = getelementptr float, float addrspace(3)* %lds, i32 4
409+
store float 0.000000e+00, float addrspace(3)* %tmp2
410+
ret float %tex
411+
}
412+
373413
declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1
374414
declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
375415
declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
@@ -412,6 +452,7 @@ declare <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i32(i32, i32, <8
412452
declare <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
413453

414454
declare float @llvm.amdgcn.image.load.1d.f32.i32(i32, i32, <8 x i32>, i32, i32) #1
455+
declare float @llvm.amdgcn.image.load.2d.f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
415456
declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32, i32, <8 x i32>, i32, i32) #1
416457
declare void @llvm.amdgcn.image.store.1d.f32.i32(float, i32, i32, <8 x i32>, i32, i32) #0
417458
declare void @llvm.amdgcn.image.store.1d.v2f32.i32(<2 x float>, i32, i32, <8 x i32>, i32, i32) #0

‎llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.dim.ll

+1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
12
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
23

34
; GCN-LABEL: {{^}}gather4_2d:
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
2+
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
3+
4+
; GCN-LABEL: {{^}}gather4_o_2d:
5+
; GCN: image_gather4_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
6+
define amdgpu_ps <4 x float> @gather4_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) {
7+
main_body:
8+
%v = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
9+
ret <4 x float> %v
10+
}
11+
12+
; GCN-LABEL: {{^}}gather4_c_o_2d:
13+
; GCN: image_gather4_c_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
14+
define amdgpu_ps <4 x float> @gather4_c_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) {
15+
main_body:
16+
%v = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
17+
ret <4 x float> %v
18+
}
19+
20+
; GCN-LABEL: {{^}}gather4_cl_o_2d:
21+
; GCN: image_gather4_cl_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
22+
define amdgpu_ps <4 x float> @gather4_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %clamp) {
23+
main_body:
24+
%v = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
25+
ret <4 x float> %v
26+
}
27+
28+
; GCN-LABEL: {{^}}gather4_c_cl_o_2d:
29+
; GCN: image_gather4_c_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1{{$}}
30+
define amdgpu_ps <4 x float> @gather4_c_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %clamp) {
31+
main_body:
32+
%v = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
33+
ret <4 x float> %v
34+
}
35+
36+
; GCN-LABEL: {{^}}gather4_b_o_2d:
37+
; GCN: image_gather4_b_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
38+
define amdgpu_ps <4 x float> @gather4_b_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %s, float %t) {
39+
main_body:
40+
%v = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
41+
ret <4 x float> %v
42+
}
43+
44+
; GCN-LABEL: {{^}}gather4_c_b_o_2d:
45+
; GCN: image_gather4_c_b_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1{{$}}
46+
define amdgpu_ps <4 x float> @gather4_c_b_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %zcompare, float %s, float %t) {
47+
main_body:
48+
%v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
49+
ret <4 x float> %v
50+
}
51+
52+
; GCN-LABEL: {{^}}gather4_b_cl_o_2d:
53+
; GCN: image_gather4_b_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1{{$}}
54+
define amdgpu_ps <4 x float> @gather4_b_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %s, float %t, float %clamp) {
55+
main_body:
56+
%v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
57+
ret <4 x float> %v
58+
}
59+
60+
; GCN-LABEL: {{^}}gather4_c_b_cl_o_2d:
61+
; GCN: image_gather4_c_b_cl_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1{{$}}
62+
define amdgpu_ps <4 x float> @gather4_c_b_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp) {
63+
main_body:
64+
%v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
65+
ret <4 x float> %v
66+
}
67+
68+
; GCN-LABEL: {{^}}gather4_l_o_2d:
69+
; GCN: image_gather4_l_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
70+
define amdgpu_ps <4 x float> @gather4_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) {
71+
main_body:
72+
%v = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
73+
ret <4 x float> %v
74+
}
75+
76+
; GCN-LABEL: {{^}}gather4_c_l_o_2d:
77+
; GCN: image_gather4_c_l_o v[0:3], v[0:7], s[0:7], s[8:11] dmask:0x1{{$}}
78+
define amdgpu_ps <4 x float> @gather4_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) {
79+
main_body:
80+
%v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
81+
ret <4 x float> %v
82+
}
83+
84+
; GCN-LABEL: {{^}}gather4_lz_o_2d:
85+
; GCN: image_gather4_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
86+
define amdgpu_ps <4 x float> @gather4_lz_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) {
87+
main_body:
88+
%v = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
89+
ret <4 x float> %v
90+
}
91+
92+
; GCN-LABEL: {{^}}gather4_c_lz_o_2d:
93+
; GCN: image_gather4_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1{{$}}
94+
define amdgpu_ps <4 x float> @gather4_c_lz_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) {
95+
main_body:
96+
%v = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
97+
ret <4 x float> %v
98+
}
99+
100+
declare <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
101+
declare <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
102+
declare <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
103+
declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
104+
105+
declare <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
106+
declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
107+
declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
108+
declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
109+
110+
declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
111+
declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
112+
113+
declare <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
114+
declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
115+
116+
attributes #0 = { nounwind }
117+
attributes #1 = { nounwind readonly }
118+
attributes #2 = { nounwind readnone }

‎llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll

+90
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
12
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
23

34
; GCN-LABEL: {{^}}sample_1d:
@@ -400,6 +401,95 @@ main_body:
400401
ret <4 x float> %v
401402
}
402403

404+
; GCN-LABEL: {{^}}adjust_writemask_sample_0:
405+
; GCN: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1{{$}}
406+
define amdgpu_ps float @adjust_writemask_sample_0(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
407+
main_body:
408+
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
409+
%elt0 = extractelement <4 x float> %r, i32 0
410+
ret float %elt0
411+
}
412+
413+
; GCN-LABEL: {{^}}adjust_writemask_sample_01
414+
; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3{{$}}
415+
define amdgpu_ps <2 x float> @adjust_writemask_sample_01(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
416+
main_body:
417+
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
418+
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 1>
419+
ret <2 x float> %out
420+
}
421+
422+
; GCN-LABEL: {{^}}adjust_writemask_sample_012
423+
; GCN: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7{{$}}
424+
define amdgpu_ps <3 x float> @adjust_writemask_sample_012(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
425+
main_body:
426+
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
427+
%out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
428+
ret <3 x float> %out
429+
}
430+
431+
; GCN-LABEL: {{^}}adjust_writemask_sample_12
432+
; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6{{$}}
433+
define amdgpu_ps <2 x float> @adjust_writemask_sample_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
434+
main_body:
435+
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
436+
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 2>
437+
ret <2 x float> %out
438+
}
439+
440+
; GCN-LABEL: {{^}}adjust_writemask_sample_03
441+
; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9{{$}}
442+
define amdgpu_ps <2 x float> @adjust_writemask_sample_03(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
443+
main_body:
444+
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
445+
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 3>
446+
ret <2 x float> %out
447+
}
448+
449+
; GCN-LABEL: {{^}}adjust_writemask_sample_13
450+
; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa{{$}}
451+
define amdgpu_ps <2 x float> @adjust_writemask_sample_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
452+
main_body:
453+
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
454+
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 3>
455+
ret <2 x float> %out
456+
}
457+
458+
; GCN-LABEL: {{^}}adjust_writemask_sample_123
459+
; GCN: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe{{$}}
460+
define amdgpu_ps <3 x float> @adjust_writemask_sample_123(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
461+
main_body:
462+
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
463+
%out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
464+
ret <3 x float> %out
465+
}
466+
467+
; GCN-LABEL: {{^}}adjust_writemask_sample_none_enabled
468+
; GCN-NOT: image
469+
define amdgpu_ps <4 x float> @adjust_writemask_sample_none_enabled(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
470+
main_body:
471+
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
472+
ret <4 x float> %r
473+
}
474+
475+
; GCN-LABEL: {{^}}adjust_writemask_sample_123_to_12
476+
; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6{{$}}
477+
define amdgpu_ps <2 x float> @adjust_writemask_sample_123_to_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
478+
main_body:
479+
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 14, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
480+
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 1>
481+
ret <2 x float> %out
482+
}
483+
484+
; GCN-LABEL: {{^}}adjust_writemask_sample_013_to_13
485+
; GCN: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa{{$}}
486+
define amdgpu_ps <2 x float> @adjust_writemask_sample_013_to_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
487+
main_body:
488+
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 11, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
489+
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 2>
490+
ret <2 x float> %out
491+
}
492+
403493
declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
404494
declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
405495
declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1

‎llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.o.dim.ll

+371
Large diffs are not rendered by default.

‎llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll

+3-3
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ define amdgpu_ps float @test2() #0 {
2626
%live = call i1 @llvm.amdgcn.ps.live()
2727
%live.32 = zext i1 %live to i32
2828
%live.32.bc = bitcast i32 %live.32 to float
29-
%t = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %live.32.bc, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
29+
%t = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %live.32.bc, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
3030
%r = extractelement <4 x float> %t, i32 0
3131
ret float %r
3232
}
@@ -49,13 +49,13 @@ dead:
4949
end:
5050
%tc = phi i32 [ %in, %entry ], [ %tc.dead, %dead ]
5151
%tc.bc = bitcast i32 %tc to float
52-
%t = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %tc.bc, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
52+
%t = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %tc.bc, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0) #0
5353
%r = extractelement <4 x float> %t, i32 0
5454
ret float %r
5555
}
5656

5757
declare i1 @llvm.amdgcn.ps.live() #1
58-
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
58+
declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
5959

6060
attributes #0 = { nounwind }
6161
attributes #1 = { nounwind readnone }

‎llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll

+6-6
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@
88
; CHECK-NEXT: image_store
99
; CHECK-NEXT: s_endpgm
1010
define amdgpu_ps void @test1(<8 x i32> inreg %rsrc, <4 x float> %d0, <4 x float> %d1, i32 %c0, i32 %c1) {
11-
call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %d0, i32 %c0, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 1, i1 0)
11+
call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %d0, i32 15, i32 %c0, <8 x i32> %rsrc, i32 0, i32 0)
1212
call void @llvm.amdgcn.s.waitcnt(i32 3840) ; 0xf00
13-
call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %d1, i32 %c1, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 1, i1 0)
13+
call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %d1, i32 15, i32 %c1, <8 x i32> %rsrc, i32 0, i32 0)
1414
ret void
1515
}
1616

@@ -24,17 +24,17 @@ define amdgpu_ps void @test1(<8 x i32> inreg %rsrc, <4 x float> %d0, <4 x float>
2424
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0){{$}}
2525
; CHECK-NEXT: image_store
2626
define amdgpu_ps void @test2(<8 x i32> inreg %rsrc, i32 %c) {
27-
%t = call <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
27+
%t = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %c, <8 x i32> %rsrc, i32 0, i32 0)
2828
call void @llvm.amdgcn.s.waitcnt(i32 3840) ; 0xf00
2929
%c.1 = mul i32 %c, 2
30-
call void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float> %t, i32 %c.1, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
30+
call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %t, i32 15, i32 %c.1, <8 x i32> %rsrc, i32 0, i32 0)
3131
ret void
3232
}
3333

3434
declare void @llvm.amdgcn.s.waitcnt(i32) #0
3535

36-
declare <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32, <8 x i32>, i32, i1, i1, i1, i1) #1
37-
declare void @llvm.amdgcn.image.store.v4f32.i32.v8i32(<4 x float>, i32, <8 x i32>, i32, i1, i1, i1, i1) #0
36+
declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1
37+
declare void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float>, i32, i32, <8 x i32>, i32, i32) #0
3838

3939
attributes #0 = { nounwind }
4040
attributes #1 = { nounwind readonly }

‎llvm/test/CodeGen/AMDGPU/print-mir-custom-pseudo.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ target triple = "amdgcn--amdpal"
1010

1111
define dllexport amdgpu_ps <2 x float> @_amdgpu_ps_main(i32 inreg, i32 inreg, i32 inreg, i32 inreg, <2 x float>, <2 x float>, <2 x float>, <3 x float>, <2 x float>, <2 x float>, <2 x float>, float, float, float, float, float, i32, i32, i32, i32) local_unnamed_addr {
1212
.entry:
13-
%res = call <2 x float> @llvm.amdgcn.image.sample.l.v2f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
13+
%res = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
1414
ret <2 x float> %res
1515
}
1616

17-
declare <2 x float> @llvm.amdgcn.image.sample.l.v2f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1)
17+
declare <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32)

‎llvm/test/CodeGen/AMDGPU/sgpr-copy.ll

+10-21
Original file line numberDiff line numberDiff line change
@@ -81,13 +81,8 @@ main_body:
8181
%j.f.i4 = bitcast i32 %j.i2 to float
8282
%p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 2, i32 1, i32 %arg3) #1
8383
%p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 2, i32 1, i32 %arg3) #1
84-
%tmp45 = bitcast float %p2.i to i32
85-
%tmp46 = bitcast float %p2.i24 to i32
86-
%tmp47 = insertelement <2 x i32> undef, i32 %tmp45, i32 0
87-
%tmp48 = insertelement <2 x i32> %tmp47, i32 %tmp46, i32 1
8884
%tmp39.bc = bitcast <4 x i32> %tmp39 to <4 x i32>
89-
%a.bc.i = bitcast <2 x i32> %tmp48 to <2 x float>
90-
%tmp1 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i, <8 x i32> %tmp37, <4 x i32> %tmp39.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
85+
%tmp1 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %p2.i, float %p2.i24, <8 x i32> %tmp37, <4 x i32> %tmp39.bc, i1 0, i32 0, i32 0)
9186
%tmp50 = extractelement <4 x float> %tmp1, i32 2
9287
%tmp51 = call float @llvm.fabs.f32(float %tmp50)
9388
%tmp52 = fmul float %p2.i18, %p2.i18
@@ -240,14 +235,14 @@ entry:
240235
br i1 %tmp27, label %if, label %else
241236

242237
if: ; preds = %entry
243-
%tmp1 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> <float 0x36D6000000000000, float 0x36DA000000000000>, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
238+
%tmp1 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0x36D6000000000000, float 0x36DA000000000000, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i1 0, i32 0, i32 0)
244239
%val.if.0 = extractelement <4 x float> %tmp1, i32 0
245240
%val.if.1 = extractelement <4 x float> %tmp1, i32 1
246241
%val.if.2 = extractelement <4 x float> %tmp1, i32 2
247242
br label %endif
248243

249244
else: ; preds = %entry
250-
%tmp2 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> <float 0x36C4000000000000, float 0x36CC000000000000>, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
245+
%tmp2 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0x36C4000000000000, float 0x36CC000000000000, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i1 0, i32 0, i32 0)
251246
%val.else.0 = extractelement <4 x float> %tmp2, i32 0
252247
%val.else.1 = extractelement <4 x float> %tmp2, i32 1
253248
%val.else.2 = extractelement <4 x float> %tmp2, i32 2
@@ -352,24 +347,18 @@ bb:
352347
br i1 %tmp36, label %bb38, label %bb80
353348

354349
bb38: ; preds = %bb
355-
%tmp52 = bitcast float %p2.i to i32
356-
%tmp53 = bitcast float %p2.i6 to i32
357-
%tmp54 = insertelement <2 x i32> undef, i32 %tmp52, i32 0
358-
%tmp55 = insertelement <2 x i32> %tmp54, i32 %tmp53, i32 1
359350
%tmp56 = bitcast <8 x i32> %tmp26 to <8 x i32>
360-
%a.bc.i = bitcast <2 x i32> %tmp55 to <2 x float>
361-
%tmp2 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i, <8 x i32> %tmp56, <4 x i32> %tmp28, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
351+
%tmp2 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %p2.i, float %p2.i6, <8 x i32> %tmp56, <4 x i32> %tmp28, i1 0, i32 0, i32 0)
362352
br label %bb71
363353

364354
bb80: ; preds = %bb
365355
%tmp81 = bitcast float %p2.i to i32
366356
%tmp82 = bitcast float %p2.i6 to i32
367357
%tmp82.2 = add i32 %tmp82, 1
368-
%tmp83 = insertelement <2 x i32> undef, i32 %tmp81, i32 0
369-
%tmp84 = insertelement <2 x i32> %tmp83, i32 %tmp82.2, i32 1
358+
%tmp83 = bitcast i32 %tmp81 to float
359+
%tmp84 = bitcast i32 %tmp82.2 to float
370360
%tmp85 = bitcast <8 x i32> %tmp26 to <8 x i32>
371-
%a.bc.i1 = bitcast <2 x i32> %tmp84 to <2 x float>
372-
%tmp3 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i1, <8 x i32> %tmp85, <4 x i32> %tmp28, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
361+
%tmp3 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %tmp83, float %tmp84, <8 x i32> %tmp85, <4 x i32> %tmp28, i1 0, i32 0, i32 0)
373362
br label %bb71
374363

375364
bb71: ; preds = %bb80, %bb38
@@ -387,7 +376,7 @@ bb:
387376
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
388377
%tmp7 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(4)* %arg, i32 0, i32 %tid
389378
%tmp8 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp7, align 32, !tbaa !0
390-
%tmp = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> <float 7.500000e-01, float 2.500000e-01>, <8 x i32> %tmp8, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
379+
%tmp = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 7.500000e-01, float 2.500000e-01, <8 x i32> %tmp8, <4 x i32> undef, i1 0, i32 0, i32 0)
391380
%tmp10 = extractelement <4 x float> %tmp, i32 0
392381
%tmp12 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float %tmp10)
393382
call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp12, <2 x half> undef, i1 true, i1 true) #0
@@ -402,7 +391,7 @@ bb:
402391
%tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
403392
%tmp7 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(4)* %arg, i32 0, i32 %tid
404393
%tmp8 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp7, align 16, !tbaa !0
405-
%tmp = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> <float 7.500000e-01, float 2.500000e-01>, <8 x i32> undef, <4 x i32> %tmp8, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
394+
%tmp = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 7.500000e-01, float 2.500000e-01, <8 x i32> undef, <4 x i32> %tmp8, i1 0, i32 0, i32 0)
406395
%tmp10 = extractelement <4 x float> %tmp, i32 0
407396
%tmp12 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %tmp10, float undef)
408397
call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp12, <2 x half> undef, i1 true, i1 true) #0
@@ -419,7 +408,7 @@ declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
419408
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
420409
declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
421410
declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
422-
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
411+
declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
423412
declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
424413

425414
attributes #0 = { nounwind }

‎llvm/test/CodeGen/AMDGPU/si-scheduler.ll

+2-7
Original file line numberDiff line numberDiff line change
@@ -34,14 +34,9 @@ main_body:
3434
%j.f.i4 = bitcast i32 %j.i2 to float
3535
%p1.i5 = call float @llvm.amdgcn.interp.p1(float %i.f.i3, i32 1, i32 0, i32 %arg5) #1
3636
%p2.i6 = call float @llvm.amdgcn.interp.p2(float %p1.i5, float %j.f.i4, i32 1, i32 0, i32 %arg5) #1
37-
%tmp27 = bitcast float %p2.i to i32
38-
%tmp28 = bitcast float %p2.i6 to i32
39-
%tmp29 = insertelement <2 x i32> undef, i32 %tmp27, i32 0
40-
%tmp30 = insertelement <2 x i32> %tmp29, i32 %tmp28, i32 1
4137
%tmp22.bc = bitcast <32 x i8> %tmp22 to <8 x i32>
4238
%tmp24.bc = bitcast <16 x i8> %tmp24 to <4 x i32>
43-
%tmp30.bc = bitcast <2 x i32> %tmp30 to <2 x float>
44-
%tmp31 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %tmp30.bc, <8 x i32> %tmp22.bc, <4 x i32> %tmp24.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
39+
%tmp31 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %p2.i, float %p2.i6, <8 x i32> %tmp22.bc, <4 x i32> %tmp24.bc, i1 0, i32 0, i32 0)
4540

4641
%tmp32 = extractelement <4 x float> %tmp31, i32 0
4742
%tmp33 = extractelement <4 x float> %tmp31, i32 1
@@ -57,7 +52,7 @@ declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
5752
declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
5853
declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
5954
declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
60-
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
55+
declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
6156

6257
attributes #0 = { nounwind }
6358
attributes #1 = { nounwind readnone }

‎llvm/test/CodeGen/AMDGPU/si-sgpr-spill.ll

+24-210
Large diffs are not rendered by default.

‎llvm/test/CodeGen/AMDGPU/skip-if-dead.ll

+3-3
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,7 @@ bb7: ; preds = %bb4
355355

356356
; CHECK: [[END]]:
357357
; CHECK: s_endpgm
358-
define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, <4 x float> %arg2) #0 {
358+
define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, float %arg2, float %arg3) #0 {
359359
bb:
360360
%tmp = fcmp ult float %arg1, 0.000000e+00
361361
br i1 %tmp, label %bb3, label %bb4
@@ -365,7 +365,7 @@ bb3: ; preds = %bb
365365
br label %bb4
366366

367367
bb4: ; preds = %bb3, %bb
368-
%tmp5 = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float> %arg2, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
368+
%tmp5 = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 16, float %arg2, float %arg3, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
369369
%tmp6 = extractelement <4 x float> %tmp5, i32 0
370370
%tmp7 = fcmp une float %tmp6, 0.000000e+00
371371
br i1 %tmp7, label %bb8, label %bb9
@@ -378,7 +378,7 @@ bb9: ; preds = %bb4
378378
ret void
379379
}
380380

381-
declare <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
381+
declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
382382
declare void @llvm.AMDGPU.kill(float) #0
383383

384384
attributes #0 = { nounwind }

‎llvm/test/CodeGen/AMDGPU/split-smrd.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ bb3: ; preds = %bb
2121
%tmp6 = sext i32 %tmp5 to i64
2222
%tmp7 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(4)* %arg, i64 0, i64 %tmp6
2323
%tmp8 = load <8 x i32>, <8 x i32> addrspace(4)* %tmp7, align 32, !tbaa !0
24-
%tmp9 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> <float bitcast (i32 1061158912 to float), float bitcast (i32 1048576000 to float)>, <8 x i32> %tmp8, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
24+
%tmp9 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float bitcast (i32 1061158912 to float), float bitcast (i32 1048576000 to float), <8 x i32> %tmp8, <4 x i32> undef, i1 0, i32 0, i32 0)
2525
%tmp10 = extractelement <4 x float> %tmp9, i32 0
2626
%tmp12 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %tmp10, float undef)
2727
call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp12, <2 x half> undef, i1 true, i1 true) #0
@@ -30,7 +30,7 @@ bb3: ; preds = %bb
3030

3131
declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
3232
declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
33-
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
33+
declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
3434
declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
3535

3636
attributes #0 = { nounwind }

‎llvm/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ bb7: ; preds = %bb6
6565
br label %bb4
6666

6767
bb9: ; preds = %bb2
68-
%tmp10 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
68+
%tmp10 = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
6969
%tmp11 = extractelement <4 x float> %tmp10, i32 1
7070
%tmp12 = extractelement <4 x float> %tmp10, i32 3
7171
br label %bb14
@@ -97,7 +97,7 @@ bb27: ; preds = %bb24
9797

9898

9999
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
100-
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
100+
declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
101101

102102
attributes #0 = { nounwind }
103103
attributes #1 = { nounwind readonly }

‎llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll

+3-4
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,8 @@ bb:
3434
%tmp = load volatile i32, i32 addrspace(1)* undef, align 4
3535
%tmp1 = load volatile i32, i32 addrspace(1)* undef, align 4
3636
%tmp2 = insertelement <4 x i32> undef, i32 %tmp1, i32 0
37-
%tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1
38-
%tmp3.cast = bitcast <4 x i32> %tmp3 to <4 x float>
39-
%tmp4 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tmp3.cast, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
37+
%tmp3 = bitcast i32 %tmp1 to float
38+
%tmp4 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %tmp3, float %tmp3, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
4039
%tmp5 = extractelement <4 x float> %tmp4, i32 0
4140
%tmp6 = fmul float %tmp5, undef
4241
%tmp7 = fadd float %tmp6, %tmp6
@@ -84,7 +83,7 @@ define amdgpu_kernel void @partially_undef_copy() #0 {
8483
ret void
8584
}
8685

87-
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
86+
declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
8887

8988
attributes #0 = { nounwind }
9089
attributes #1 = { nounwind readonly }

‎llvm/test/CodeGen/AMDGPU/unigine-liveness-crash.ll

+8-22
Original file line numberDiff line numberDiff line change
@@ -17,25 +17,20 @@ main_body:
1717
%j.f.i = bitcast i32 %j.i to float
1818
%p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 3, i32 4, i32 %arg6) #2
1919
%p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 3, i32 4, i32 %arg6) #2
20-
%tmp23 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
20+
%tmp23 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
2121

2222
%tmp24 = extractelement <4 x float> %tmp23, i32 3
2323
%tmp25 = fmul float %tmp24, %tmp24
2424
%tmp26 = fmul float %p2.i, %p2.i
2525
%tmp27 = fadd float %tmp26, %tmp26
26-
%tmp28 = bitcast float %tmp27 to i32
27-
%tmp29 = insertelement <4 x i32> undef, i32 %tmp28, i32 0
28-
%tmp30 = insertelement <4 x i32> %tmp29, i32 0, i32 1
29-
%tmp31 = insertelement <4 x i32> %tmp30, i32 undef, i32 2
30-
%tmp31.cast = bitcast <4 x i32> %tmp31 to <4 x float>
31-
%tmp32 = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float> %tmp31.cast, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
26+
%tmp32 = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %tmp27, float 0.0, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
3227
%tmp33 = extractelement <4 x float> %tmp32, i32 0
3328
%tmp34 = fadd float %tmp33, %tmp33
3429
%tmp35 = fadd float %tmp34, %tmp34
3530
%tmp36 = fadd float %tmp35, %tmp35
3631
%tmp37 = fadd float %tmp36, %tmp36
3732
%tmp38 = fadd float %tmp37, %tmp37
38-
%tmp39 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
33+
%tmp39 = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
3934
%tmp40 = extractelement <4 x float> %tmp39, i32 0
4035
%tmp41 = extractelement <4 x float> %tmp39, i32 1
4136
%tmp42 = extractelement <4 x float> %tmp39, i32 2
@@ -53,17 +48,12 @@ main_body:
5348
%tmp54 = insertelement <4 x i32> %tmp53, i32 %tmp51, i32 1
5449
%tmp55 = insertelement <4 x i32> %tmp54, i32 %tmp52, i32 2
5550
%tmp55.cast = bitcast <4 x i32> %tmp55 to <4 x float>
56-
%tmp56 = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float> %tmp55.cast, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
51+
%tmp56 = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %tmp27, float %tmp48, float %tmp49, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
5752
%tmp57 = extractelement <4 x float> %tmp56, i32 0
5853
%tmp58 = fadd float %tmp38, %tmp57
5954
%tmp59 = fadd float %tmp46, %tmp46
6055
%tmp60 = fadd float %tmp47, %tmp47
61-
%tmp61 = bitcast float %tmp59 to i32
62-
%tmp62 = bitcast float %tmp60 to i32
63-
%tmp63 = insertelement <4 x i32> undef, i32 %tmp61, i32 1
64-
%tmp64 = insertelement <4 x i32> %tmp63, i32 %tmp62, i32 2
65-
%tmp64.cast = bitcast <4 x i32> %tmp64 to <4 x float>
66-
%tmp65 = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float> %tmp64.cast, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
56+
%tmp65 = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float undef, float %tmp59, float %tmp60, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
6757
%tmp66 = extractelement <4 x float> %tmp65, i32 0
6858
%tmp67 = fadd float %tmp58, %tmp66
6959
%tmp68 = fmul float %tmp67, 1.250000e-01
@@ -101,10 +91,7 @@ IF29: ; preds = %LOOP
10191
br label %ENDIF25
10292

10393
ENDIF28: ; preds = %LOOP
104-
%tmp85 = insertelement <4 x i32> %tmp72, i32 undef, i32 1
105-
%tmp86 = insertelement <4 x i32> %tmp85, i32 undef, i32 2
106-
%tmp86.cast = bitcast <4 x i32> %tmp86 to <4 x float>
107-
%tmp87 = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float> %tmp86.cast, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
94+
%tmp87 = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %tmp27, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
10895
%tmp88 = extractelement <4 x float> %tmp87, i32 0
10996
%tmp89 = fadd float %tmp88, %tmp88
11097
br label %LOOP
@@ -114,9 +101,8 @@ declare float @llvm.minnum.f32(float, float) #1
114101
declare float @llvm.maxnum.f32(float, float) #1
115102
declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
116103
declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
117-
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
118-
declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
119-
declare <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
104+
declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
105+
declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
120106

121107
attributes #0 = { nounwind "InitialPSInputAddr"="36983" "target-cpu"="tonga" }
122108
attributes #1 = { nounwind readnone }

‎llvm/test/CodeGen/AMDGPU/wqm.ll

+49-43
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)
Please sign in to comment.