Skip to content

Commit b896c4e

Browse files
committedJun 11, 2018
[NFC][AMDGPU] Add tests for all the various IR patterns equivalent to extracting low bits.
Summary: The idiom recognition seems rather poor. Only the `@bzhi32_d0` produces `v_bfe_u32`. But they all should. This needs to be fixed before D47980 can be re-landed. Reviewers: mareko, bogner, rampitec, arsenm, tstellar, nhaehnle Reviewed By: nhaehnle Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, llvm-commits Tags: #amdgpu Differential Revision: https://reviews.llvm.org/D48005 llvm-svn: 334398
1 parent dee4930 commit b896c4e

File tree

1 file changed

+263
-0
lines changed

1 file changed

+263
-0
lines changed
 
Lines changed: 263 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,263 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
3+
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
4+
5+
; Loosely based on test/CodeGen/{X86,AArch64}/extract-lowbits.ll,
6+
; but with all 64-bit tests, and tests with loads dropped.
7+
8+
; Patterns:
9+
; a) x & (1 << nbits) - 1
10+
; b) x & ~(-1 << nbits)
11+
; c) x & (-1 >> (32 - y))
12+
; d) x << (32 - y) >> (32 - y)
13+
; are equivalent.
14+
15+
; ---------------------------------------------------------------------------- ;
16+
; Pattern a. 32-bit
17+
; ---------------------------------------------------------------------------- ;
18+
19+
define i32 @bzhi32_a0(i32 %val, i32 %numlowbits) nounwind {
20+
; SI-LABEL: bzhi32_a0:
21+
; SI: ; %bb.0:
22+
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23+
; SI-NEXT: v_bfm_b32_e64 v1, v1, 0
24+
; SI-NEXT: v_and_b32_e32 v0, v1, v0
25+
; SI-NEXT: s_setpc_b64 s[30:31]
26+
;
27+
; VI-LABEL: bzhi32_a0:
28+
; VI: ; %bb.0:
29+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
30+
; VI-NEXT: v_bfm_b32 v1, v1, 0
31+
; VI-NEXT: v_and_b32_e32 v0, v1, v0
32+
; VI-NEXT: s_setpc_b64 s[30:31]
33+
%onebit = shl i32 1, %numlowbits
34+
%mask = add nsw i32 %onebit, -1
35+
%masked = and i32 %mask, %val
36+
ret i32 %masked
37+
}
38+
39+
define i32 @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind {
40+
; SI-LABEL: bzhi32_a1_indexzext:
41+
; SI: ; %bb.0:
42+
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
43+
; SI-NEXT: v_bfm_b32_e64 v1, v1, 0
44+
; SI-NEXT: v_and_b32_e32 v0, v1, v0
45+
; SI-NEXT: s_setpc_b64 s[30:31]
46+
;
47+
; VI-LABEL: bzhi32_a1_indexzext:
48+
; VI: ; %bb.0:
49+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
50+
; VI-NEXT: v_bfm_b32 v1, v1, 0
51+
; VI-NEXT: v_and_b32_e32 v0, v1, v0
52+
; VI-NEXT: s_setpc_b64 s[30:31]
53+
%conv = zext i8 %numlowbits to i32
54+
%onebit = shl i32 1, %conv
55+
%mask = add nsw i32 %onebit, -1
56+
%masked = and i32 %mask, %val
57+
ret i32 %masked
58+
}
59+
60+
define i32 @bzhi32_a4_commutative(i32 %val, i32 %numlowbits) nounwind {
61+
; SI-LABEL: bzhi32_a4_commutative:
62+
; SI: ; %bb.0:
63+
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
64+
; SI-NEXT: v_bfm_b32_e64 v1, v1, 0
65+
; SI-NEXT: v_and_b32_e32 v0, v0, v1
66+
; SI-NEXT: s_setpc_b64 s[30:31]
67+
;
68+
; VI-LABEL: bzhi32_a4_commutative:
69+
; VI: ; %bb.0:
70+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
71+
; VI-NEXT: v_bfm_b32 v1, v1, 0
72+
; VI-NEXT: v_and_b32_e32 v0, v0, v1
73+
; VI-NEXT: s_setpc_b64 s[30:31]
74+
%onebit = shl i32 1, %numlowbits
75+
%mask = add nsw i32 %onebit, -1
76+
%masked = and i32 %val, %mask ; swapped order
77+
ret i32 %masked
78+
}
79+
80+
; ---------------------------------------------------------------------------- ;
81+
; Pattern b. 32-bit
82+
; ---------------------------------------------------------------------------- ;
83+
84+
define i32 @bzhi32_b0(i32 %val, i32 %numlowbits) nounwind {
85+
; SI-LABEL: bzhi32_b0:
86+
; SI: ; %bb.0:
87+
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
88+
; SI-NEXT: v_lshl_b32_e32 v1, -1, v1
89+
; SI-NEXT: v_not_b32_e32 v1, v1
90+
; SI-NEXT: v_and_b32_e32 v0, v1, v0
91+
; SI-NEXT: s_setpc_b64 s[30:31]
92+
;
93+
; VI-LABEL: bzhi32_b0:
94+
; VI: ; %bb.0:
95+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
96+
; VI-NEXT: v_lshlrev_b32_e64 v1, v1, -1
97+
; VI-NEXT: v_not_b32_e32 v1, v1
98+
; VI-NEXT: v_and_b32_e32 v0, v1, v0
99+
; VI-NEXT: s_setpc_b64 s[30:31]
100+
%notmask = shl i32 -1, %numlowbits
101+
%mask = xor i32 %notmask, -1
102+
%masked = and i32 %mask, %val
103+
ret i32 %masked
104+
}
105+
106+
define i32 @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits) nounwind {
107+
; SI-LABEL: bzhi32_b1_indexzext:
108+
; SI: ; %bb.0:
109+
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
110+
; SI-NEXT: v_lshl_b32_e32 v1, -1, v1
111+
; SI-NEXT: v_not_b32_e32 v1, v1
112+
; SI-NEXT: v_and_b32_e32 v0, v1, v0
113+
; SI-NEXT: s_setpc_b64 s[30:31]
114+
;
115+
; VI-LABEL: bzhi32_b1_indexzext:
116+
; VI: ; %bb.0:
117+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
118+
; VI-NEXT: v_lshlrev_b32_e64 v1, v1, -1
119+
; VI-NEXT: v_not_b32_e32 v1, v1
120+
; VI-NEXT: v_and_b32_e32 v0, v1, v0
121+
; VI-NEXT: s_setpc_b64 s[30:31]
122+
%conv = zext i8 %numlowbits to i32
123+
%notmask = shl i32 -1, %conv
124+
%mask = xor i32 %notmask, -1
125+
%masked = and i32 %mask, %val
126+
ret i32 %masked
127+
}
128+
129+
define i32 @bzhi32_b4_commutative(i32 %val, i32 %numlowbits) nounwind {
130+
; SI-LABEL: bzhi32_b4_commutative:
131+
; SI: ; %bb.0:
132+
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
133+
; SI-NEXT: v_lshl_b32_e32 v1, -1, v1
134+
; SI-NEXT: v_not_b32_e32 v1, v1
135+
; SI-NEXT: v_and_b32_e32 v0, v0, v1
136+
; SI-NEXT: s_setpc_b64 s[30:31]
137+
;
138+
; VI-LABEL: bzhi32_b4_commutative:
139+
; VI: ; %bb.0:
140+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
141+
; VI-NEXT: v_lshlrev_b32_e64 v1, v1, -1
142+
; VI-NEXT: v_not_b32_e32 v1, v1
143+
; VI-NEXT: v_and_b32_e32 v0, v0, v1
144+
; VI-NEXT: s_setpc_b64 s[30:31]
145+
%notmask = shl i32 -1, %numlowbits
146+
%mask = xor i32 %notmask, -1
147+
%masked = and i32 %val, %mask ; swapped order
148+
ret i32 %masked
149+
}
150+
151+
; ---------------------------------------------------------------------------- ;
152+
; Pattern c. 32-bit
153+
; ---------------------------------------------------------------------------- ;
154+
155+
define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind {
156+
; SI-LABEL: bzhi32_c0:
157+
; SI: ; %bb.0:
158+
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
159+
; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1
160+
; SI-NEXT: v_lshr_b32_e32 v1, -1, v1
161+
; SI-NEXT: v_and_b32_e32 v0, v1, v0
162+
; SI-NEXT: s_setpc_b64 s[30:31]
163+
;
164+
; VI-LABEL: bzhi32_c0:
165+
; VI: ; %bb.0:
166+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
167+
; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v1
168+
; VI-NEXT: v_lshrrev_b32_e64 v1, v1, -1
169+
; VI-NEXT: v_and_b32_e32 v0, v1, v0
170+
; VI-NEXT: s_setpc_b64 s[30:31]
171+
%numhighbits = sub i32 32, %numlowbits
172+
%mask = lshr i32 -1, %numhighbits
173+
%masked = and i32 %mask, %val
174+
ret i32 %masked
175+
}
176+
177+
define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind {
178+
; SI-LABEL: bzhi32_c1_indexzext:
179+
; SI: ; %bb.0:
180+
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
181+
; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1
182+
; SI-NEXT: v_and_b32_e32 v1, 0xff, v1
183+
; SI-NEXT: v_lshr_b32_e32 v1, -1, v1
184+
; SI-NEXT: v_and_b32_e32 v0, v1, v0
185+
; SI-NEXT: s_setpc_b64 s[30:31]
186+
;
187+
; VI-LABEL: bzhi32_c1_indexzext:
188+
; VI: ; %bb.0:
189+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
190+
; VI-NEXT: v_sub_u16_e32 v1, 32, v1
191+
; VI-NEXT: v_mov_b32_e32 v2, -1
192+
; VI-NEXT: v_lshrrev_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
193+
; VI-NEXT: v_and_b32_e32 v0, v1, v0
194+
; VI-NEXT: s_setpc_b64 s[30:31]
195+
%numhighbits = sub i8 32, %numlowbits
196+
%sh_prom = zext i8 %numhighbits to i32
197+
%mask = lshr i32 -1, %sh_prom
198+
%masked = and i32 %mask, %val
199+
ret i32 %masked
200+
}
201+
202+
define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind {
203+
; SI-LABEL: bzhi32_c4_commutative:
204+
; SI: ; %bb.0:
205+
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
206+
; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1
207+
; SI-NEXT: v_lshr_b32_e32 v1, -1, v1
208+
; SI-NEXT: v_and_b32_e32 v0, v0, v1
209+
; SI-NEXT: s_setpc_b64 s[30:31]
210+
;
211+
; VI-LABEL: bzhi32_c4_commutative:
212+
; VI: ; %bb.0:
213+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
214+
; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v1
215+
; VI-NEXT: v_lshrrev_b32_e64 v1, v1, -1
216+
; VI-NEXT: v_and_b32_e32 v0, v0, v1
217+
; VI-NEXT: s_setpc_b64 s[30:31]
218+
%numhighbits = sub i32 32, %numlowbits
219+
%mask = lshr i32 -1, %numhighbits
220+
%masked = and i32 %val, %mask ; swapped order
221+
ret i32 %masked
222+
}
223+
224+
; ---------------------------------------------------------------------------- ;
225+
; Pattern d. 32-bit.
226+
; ---------------------------------------------------------------------------- ;
227+
228+
define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) nounwind {
229+
; GCN-LABEL: bzhi32_d0:
230+
; GCN: ; %bb.0:
231+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
232+
; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1
233+
; GCN-NEXT: s_setpc_b64 s[30:31]
234+
%numhighbits = sub i32 32, %numlowbits
235+
%highbitscleared = shl i32 %val, %numhighbits
236+
%masked = lshr i32 %highbitscleared, %numhighbits
237+
ret i32 %masked
238+
}
239+
240+
define i32 @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits) nounwind {
241+
; SI-LABEL: bzhi32_d1_indexzext:
242+
; SI: ; %bb.0:
243+
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
244+
; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1
245+
; SI-NEXT: v_and_b32_e32 v1, 0xff, v1
246+
; SI-NEXT: v_lshl_b32_e32 v0, v0, v1
247+
; SI-NEXT: v_lshr_b32_e32 v0, v0, v1
248+
; SI-NEXT: s_setpc_b64 s[30:31]
249+
;
250+
; VI-LABEL: bzhi32_d1_indexzext:
251+
; VI: ; %bb.0:
252+
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
253+
; VI-NEXT: v_sub_u16_e32 v1, 32, v1
254+
; VI-NEXT: v_and_b32_e32 v1, 0xff, v1
255+
; VI-NEXT: v_lshlrev_b32_e32 v0, v1, v0
256+
; VI-NEXT: v_lshrrev_b32_e32 v0, v1, v0
257+
; VI-NEXT: s_setpc_b64 s[30:31]
258+
%numhighbits = sub i8 32, %numlowbits
259+
%sh_prom = zext i8 %numhighbits to i32
260+
%highbitscleared = shl i32 %val, %sh_prom
261+
%masked = lshr i32 %highbitscleared, %sh_prom
262+
ret i32 %masked
263+
}

0 commit comments

Comments
 (0)
Please sign in to comment.