Skip to content

Commit bdf3c5a

Browse files
committedFeb 6, 2017
Add DAGCombiner load combine tests with non-zero offset
This is separated from https://reviews.llvm.org/D29394 review. llvm-svn: 294185
1 parent b070ce8 commit bdf3c5a

File tree

5 files changed

+805
-3
lines changed

5 files changed

+805
-3
lines changed
 

‎llvm/test/CodeGen/AArch64/load-combine-big-endian.ll

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,3 +191,143 @@ define i64 @load_i64_by_i8(i64* %arg) {
191191
%tmp37 = or i64 %tmp33, %tmp36
192192
ret i64 %tmp37
193193
}
194+
195+
; i8* p; // p[1] is 4 byte aligned
196+
; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
197+
define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) {
198+
; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
199+
; CHECK: ldrb w8, [x0, #1]
200+
; CHECK-NEXT: ldrb w9, [x0, #2]
201+
; CHECK-NEXT: ldrb w10, [x0, #3]
202+
; CHECK-NEXT: ldrb w11, [x0, #4]
203+
; CHECK-NEXT: bfi w8, w9, #8, #8
204+
; CHECK-NEXT: bfi w8, w10, #16, #8
205+
; CHECK-NEXT: bfi w8, w11, #24, #8
206+
; CHECK-NEXT: mov w0, w8
207+
; CHECK-NEXT: ret
208+
%tmp = bitcast i32* %arg to i8*
209+
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
210+
%tmp2 = load i8, i8* %tmp1, align 4
211+
%tmp3 = zext i8 %tmp2 to i32
212+
%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 2
213+
%tmp5 = load i8, i8* %tmp4, align 1
214+
%tmp6 = zext i8 %tmp5 to i32
215+
%tmp7 = shl nuw nsw i32 %tmp6, 8
216+
%tmp8 = or i32 %tmp7, %tmp3
217+
%tmp9 = getelementptr inbounds i8, i8* %tmp, i32 3
218+
%tmp10 = load i8, i8* %tmp9, align 1
219+
%tmp11 = zext i8 %tmp10 to i32
220+
%tmp12 = shl nuw nsw i32 %tmp11, 16
221+
%tmp13 = or i32 %tmp8, %tmp12
222+
%tmp14 = getelementptr inbounds i8, i8* %tmp, i32 4
223+
%tmp15 = load i8, i8* %tmp14, align 1
224+
%tmp16 = zext i8 %tmp15 to i32
225+
%tmp17 = shl nuw nsw i32 %tmp16, 24
226+
%tmp18 = or i32 %tmp13, %tmp17
227+
ret i32 %tmp18
228+
}
229+
230+
; i8* p; // p[-4] is 4 byte aligned
231+
; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24)
232+
define i32 @load_i32_by_i8_neg_offset(i32* %arg) {
233+
; CHECK-LABEL: load_i32_by_i8_neg_offset:
234+
; CHECK: ldurb w8, [x0, #-4]
235+
; CHECK-NEXT: ldurb w9, [x0, #-3]
236+
; CHECK-NEXT: ldurb w10, [x0, #-2]
237+
; CHECK-NEXT: ldurb w11, [x0, #-1]
238+
; CHECK-NEXT: bfi w8, w9, #8, #8
239+
; CHECK-NEXT: bfi w8, w10, #16, #8
240+
; CHECK-NEXT: bfi w8, w11, #24, #8
241+
; CHECK-NEXT: mov w0, w8
242+
; CHECK-NEXT: ret
243+
%tmp = bitcast i32* %arg to i8*
244+
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -4
245+
%tmp2 = load i8, i8* %tmp1, align 4
246+
%tmp3 = zext i8 %tmp2 to i32
247+
%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -3
248+
%tmp5 = load i8, i8* %tmp4, align 1
249+
%tmp6 = zext i8 %tmp5 to i32
250+
%tmp7 = shl nuw nsw i32 %tmp6, 8
251+
%tmp8 = or i32 %tmp7, %tmp3
252+
%tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -2
253+
%tmp10 = load i8, i8* %tmp9, align 1
254+
%tmp11 = zext i8 %tmp10 to i32
255+
%tmp12 = shl nuw nsw i32 %tmp11, 16
256+
%tmp13 = or i32 %tmp8, %tmp12
257+
%tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -1
258+
%tmp15 = load i8, i8* %tmp14, align 1
259+
%tmp16 = zext i8 %tmp15 to i32
260+
%tmp17 = shl nuw nsw i32 %tmp16, 24
261+
%tmp18 = or i32 %tmp13, %tmp17
262+
ret i32 %tmp18
263+
}
264+
265+
; i8* p; // p[1] is 4 byte aligned
266+
; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24)
267+
define i32 @load_i32_by_i8_nonzero_offset_bswap(i32* %arg) {
268+
; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
269+
; CHECK: ldrb w8, [x0, #4]
270+
; CHECK-NEXT: ldrb w9, [x0, #3]
271+
; CHECK-NEXT: ldrb w10, [x0, #2]
272+
; CHECK-NEXT: ldrb w11, [x0, #1]
273+
; CHECK-NEXT: bfi w8, w9, #8, #8
274+
; CHECK-NEXT: bfi w8, w10, #16, #8
275+
; CHECK-NEXT: bfi w8, w11, #24, #8
276+
; CHECK-NEXT: mov w0, w8
277+
; CHECK-NEXT: ret
278+
%tmp = bitcast i32* %arg to i8*
279+
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 4
280+
%tmp2 = load i8, i8* %tmp1, align 1
281+
%tmp3 = zext i8 %tmp2 to i32
282+
%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 3
283+
%tmp5 = load i8, i8* %tmp4, align 1
284+
%tmp6 = zext i8 %tmp5 to i32
285+
%tmp7 = shl nuw nsw i32 %tmp6, 8
286+
%tmp8 = or i32 %tmp7, %tmp3
287+
%tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
288+
%tmp10 = load i8, i8* %tmp9, align 1
289+
%tmp11 = zext i8 %tmp10 to i32
290+
%tmp12 = shl nuw nsw i32 %tmp11, 16
291+
%tmp13 = or i32 %tmp8, %tmp12
292+
%tmp14 = getelementptr inbounds i8, i8* %tmp, i32 1
293+
%tmp15 = load i8, i8* %tmp14, align 4
294+
%tmp16 = zext i8 %tmp15 to i32
295+
%tmp17 = shl nuw nsw i32 %tmp16, 24
296+
%tmp18 = or i32 %tmp13, %tmp17
297+
ret i32 %tmp18
298+
}
299+
300+
; i8* p; // p[-4] is 4 byte aligned
301+
; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24)
302+
define i32 @load_i32_by_i8_neg_offset_bswap(i32* %arg) {
303+
; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
304+
; CHECK: ldurb w8, [x0, #-1]
305+
; CHECK-NEXT: ldurb w9, [x0, #-2]
306+
; CHECK-NEXT: ldurb w10, [x0, #-3]
307+
; CHECK-NEXT: ldurb w11, [x0, #-4]
308+
; CHECK-NEXT: bfi w8, w9, #8, #8
309+
; CHECK-NEXT: bfi w8, w10, #16, #8
310+
; CHECK-NEXT: bfi w8, w11, #24, #8
311+
; CHECK-NEXT: mov w0, w8
312+
; CHECK-NEXT: ret
313+
%tmp = bitcast i32* %arg to i8*
314+
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -1
315+
%tmp2 = load i8, i8* %tmp1, align 1
316+
%tmp3 = zext i8 %tmp2 to i32
317+
%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -2
318+
%tmp5 = load i8, i8* %tmp4, align 1
319+
%tmp6 = zext i8 %tmp5 to i32
320+
%tmp7 = shl nuw nsw i32 %tmp6, 8
321+
%tmp8 = or i32 %tmp7, %tmp3
322+
%tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -3
323+
%tmp10 = load i8, i8* %tmp9, align 1
324+
%tmp11 = zext i8 %tmp10 to i32
325+
%tmp12 = shl nuw nsw i32 %tmp11, 16
326+
%tmp13 = or i32 %tmp8, %tmp12
327+
%tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -4
328+
%tmp15 = load i8, i8* %tmp14, align 4
329+
%tmp16 = zext i8 %tmp15 to i32
330+
%tmp17 = shl nuw nsw i32 %tmp16, 24
331+
%tmp18 = or i32 %tmp13, %tmp17
332+
ret i32 %tmp18
333+
}

‎llvm/test/CodeGen/AArch64/load-combine.ll

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,3 +178,143 @@ define i64 @load_i64_by_i8_bswap(i64* %arg) {
178178
%tmp37 = or i64 %tmp33, %tmp36
179179
ret i64 %tmp37
180180
}
181+
182+
; i8* p; // p[1] is 4 byte aligned
183+
; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
184+
define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) {
185+
; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
186+
; CHECK: ldrb w8, [x0, #1]
187+
; CHECK-NEXT: ldrb w9, [x0, #2]
188+
; CHECK-NEXT: ldrb w10, [x0, #3]
189+
; CHECK-NEXT: ldrb w11, [x0, #4]
190+
; CHECK-NEXT: bfi w8, w9, #8, #8
191+
; CHECK-NEXT: bfi w8, w10, #16, #8
192+
; CHECK-NEXT: bfi w8, w11, #24, #8
193+
; CHECK-NEXT: mov w0, w8
194+
; CHECK-NEXT: ret
195+
%tmp = bitcast i32* %arg to i8*
196+
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
197+
%tmp2 = load i8, i8* %tmp1, align 4
198+
%tmp3 = zext i8 %tmp2 to i32
199+
%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 2
200+
%tmp5 = load i8, i8* %tmp4, align 1
201+
%tmp6 = zext i8 %tmp5 to i32
202+
%tmp7 = shl nuw nsw i32 %tmp6, 8
203+
%tmp8 = or i32 %tmp7, %tmp3
204+
%tmp9 = getelementptr inbounds i8, i8* %tmp, i32 3
205+
%tmp10 = load i8, i8* %tmp9, align 1
206+
%tmp11 = zext i8 %tmp10 to i32
207+
%tmp12 = shl nuw nsw i32 %tmp11, 16
208+
%tmp13 = or i32 %tmp8, %tmp12
209+
%tmp14 = getelementptr inbounds i8, i8* %tmp, i32 4
210+
%tmp15 = load i8, i8* %tmp14, align 1
211+
%tmp16 = zext i8 %tmp15 to i32
212+
%tmp17 = shl nuw nsw i32 %tmp16, 24
213+
%tmp18 = or i32 %tmp13, %tmp17
214+
ret i32 %tmp18
215+
}
216+
217+
; i8* p; // p[-4] is 4 byte aligned
218+
; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24)
219+
define i32 @load_i32_by_i8_neg_offset(i32* %arg) {
220+
; CHECK-LABEL: load_i32_by_i8_neg_offset:
221+
; CHECK: ldurb w8, [x0, #-4]
222+
; CHECK-NEXT: ldurb w9, [x0, #-3]
223+
; CHECK-NEXT: ldurb w10, [x0, #-2]
224+
; CHECK-NEXT: ldurb w11, [x0, #-1]
225+
; CHECK-NEXT: bfi w8, w9, #8, #8
226+
; CHECK-NEXT: bfi w8, w10, #16, #8
227+
; CHECK-NEXT: bfi w8, w11, #24, #8
228+
; CHECK-NEXT: mov w0, w8
229+
; CHECK-NEXT: ret
230+
%tmp = bitcast i32* %arg to i8*
231+
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -4
232+
%tmp2 = load i8, i8* %tmp1, align 4
233+
%tmp3 = zext i8 %tmp2 to i32
234+
%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -3
235+
%tmp5 = load i8, i8* %tmp4, align 1
236+
%tmp6 = zext i8 %tmp5 to i32
237+
%tmp7 = shl nuw nsw i32 %tmp6, 8
238+
%tmp8 = or i32 %tmp7, %tmp3
239+
%tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -2
240+
%tmp10 = load i8, i8* %tmp9, align 1
241+
%tmp11 = zext i8 %tmp10 to i32
242+
%tmp12 = shl nuw nsw i32 %tmp11, 16
243+
%tmp13 = or i32 %tmp8, %tmp12
244+
%tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -1
245+
%tmp15 = load i8, i8* %tmp14, align 1
246+
%tmp16 = zext i8 %tmp15 to i32
247+
%tmp17 = shl nuw nsw i32 %tmp16, 24
248+
%tmp18 = or i32 %tmp13, %tmp17
249+
ret i32 %tmp18
250+
}
251+
252+
; i8* p; // p[1] is 4 byte aligned
253+
; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24)
254+
define i32 @load_i32_by_i8_nonzero_offset_bswap(i32* %arg) {
255+
; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
256+
; CHECK: ldrb w8, [x0, #4]
257+
; CHECK-NEXT: ldrb w9, [x0, #3]
258+
; CHECK-NEXT: ldrb w10, [x0, #2]
259+
; CHECK-NEXT: ldrb w11, [x0, #1]
260+
; CHECK-NEXT: bfi w8, w9, #8, #8
261+
; CHECK-NEXT: bfi w8, w10, #16, #8
262+
; CHECK-NEXT: bfi w8, w11, #24, #8
263+
; CHECK-NEXT: mov w0, w8
264+
; CHECK-NEXT: ret
265+
%tmp = bitcast i32* %arg to i8*
266+
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 4
267+
%tmp2 = load i8, i8* %tmp1, align 1
268+
%tmp3 = zext i8 %tmp2 to i32
269+
%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 3
270+
%tmp5 = load i8, i8* %tmp4, align 1
271+
%tmp6 = zext i8 %tmp5 to i32
272+
%tmp7 = shl nuw nsw i32 %tmp6, 8
273+
%tmp8 = or i32 %tmp7, %tmp3
274+
%tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
275+
%tmp10 = load i8, i8* %tmp9, align 1
276+
%tmp11 = zext i8 %tmp10 to i32
277+
%tmp12 = shl nuw nsw i32 %tmp11, 16
278+
%tmp13 = or i32 %tmp8, %tmp12
279+
%tmp14 = getelementptr inbounds i8, i8* %tmp, i32 1
280+
%tmp15 = load i8, i8* %tmp14, align 4
281+
%tmp16 = zext i8 %tmp15 to i32
282+
%tmp17 = shl nuw nsw i32 %tmp16, 24
283+
%tmp18 = or i32 %tmp13, %tmp17
284+
ret i32 %tmp18
285+
}
286+
287+
; i8* p; // p[-4] is 4 byte aligned
288+
; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24)
289+
define i32 @load_i32_by_i8_neg_offset_bswap(i32* %arg) {
290+
; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
291+
; CHECK: ldurb w8, [x0, #-1]
292+
; CHECK-NEXT: ldurb w9, [x0, #-2]
293+
; CHECK-NEXT: ldurb w10, [x0, #-3]
294+
; CHECK-NEXT: ldurb w11, [x0, #-4]
295+
; CHECK-NEXT: bfi w8, w9, #8, #8
296+
; CHECK-NEXT: bfi w8, w10, #16, #8
297+
; CHECK-NEXT: bfi w8, w11, #24, #8
298+
; CHECK-NEXT: mov w0, w8
299+
; CHECK-NEXT: ret
300+
%tmp = bitcast i32* %arg to i8*
301+
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -1
302+
%tmp2 = load i8, i8* %tmp1, align 1
303+
%tmp3 = zext i8 %tmp2 to i32
304+
%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -2
305+
%tmp5 = load i8, i8* %tmp4, align 1
306+
%tmp6 = zext i8 %tmp5 to i32
307+
%tmp7 = shl nuw nsw i32 %tmp6, 8
308+
%tmp8 = or i32 %tmp7, %tmp3
309+
%tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -3
310+
%tmp10 = load i8, i8* %tmp9, align 1
311+
%tmp11 = zext i8 %tmp10 to i32
312+
%tmp12 = shl nuw nsw i32 %tmp11, 16
313+
%tmp13 = or i32 %tmp8, %tmp12
314+
%tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -4
315+
%tmp15 = load i8, i8* %tmp14, align 4
316+
%tmp16 = zext i8 %tmp15 to i32
317+
%tmp17 = shl nuw nsw i32 %tmp16, 24
318+
%tmp18 = or i32 %tmp13, %tmp17
319+
ret i32 %tmp18
320+
}

‎llvm/test/CodeGen/ARM/load-combine-big-endian.ll

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,3 +269,183 @@ define i64 @load_i64_by_i8(i64* %arg) {
269269
%tmp37 = or i64 %tmp33, %tmp36
270270
ret i64 %tmp37
271271
}
272+
273+
; i8* p; // p[1] is 4 byte aligned
274+
; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
275+
define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) {
276+
; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
277+
; CHECK: ldrb r1, [r0, #1]
278+
; CHECK-NEXT: ldrb r2, [r0, #2]
279+
; CHECK-NEXT: ldrb r3, [r0, #3]
280+
; CHECK-NEXT: ldrb r0, [r0, #4]
281+
; CHECK-NEXT: orr r1, r1, r2, lsl #8
282+
; CHECK-NEXT: orr r1, r1, r3, lsl #16
283+
; CHECK-NEXT: orr r0, r1, r0, lsl #24
284+
; CHECK-NEXT: mov pc, lr
285+
286+
; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset:
287+
; CHECK-ARMv6: ldrb r1, [r0, #1]
288+
; CHECK-ARMv6-NEXT: ldrb r2, [r0, #2]
289+
; CHECK-ARMv6-NEXT: ldrb r3, [r0, #3]
290+
; CHECK-ARMv6-NEXT: ldrb r0, [r0, #4]
291+
; CHECK-ARMv6-NEXT: orr r1, r1, r2, lsl #8
292+
; CHECK-ARMv6-NEXT: orr r1, r1, r3, lsl #16
293+
; CHECK-ARMv6-NEXT: orr r0, r1, r0, lsl #24
294+
; CHECK-ARMv6-NEXT: bx lr
295+
296+
%tmp = bitcast i32* %arg to i8*
297+
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
298+
%tmp2 = load i8, i8* %tmp1, align 4
299+
%tmp3 = zext i8 %tmp2 to i32
300+
%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 2
301+
%tmp5 = load i8, i8* %tmp4, align 1
302+
%tmp6 = zext i8 %tmp5 to i32
303+
%tmp7 = shl nuw nsw i32 %tmp6, 8
304+
%tmp8 = or i32 %tmp7, %tmp3
305+
%tmp9 = getelementptr inbounds i8, i8* %tmp, i32 3
306+
%tmp10 = load i8, i8* %tmp9, align 1
307+
%tmp11 = zext i8 %tmp10 to i32
308+
%tmp12 = shl nuw nsw i32 %tmp11, 16
309+
%tmp13 = or i32 %tmp8, %tmp12
310+
%tmp14 = getelementptr inbounds i8, i8* %tmp, i32 4
311+
%tmp15 = load i8, i8* %tmp14, align 1
312+
%tmp16 = zext i8 %tmp15 to i32
313+
%tmp17 = shl nuw nsw i32 %tmp16, 24
314+
%tmp18 = or i32 %tmp13, %tmp17
315+
ret i32 %tmp18
316+
}
317+
318+
; i8* p; // p[-4] is 4 byte aligned
319+
; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24)
320+
define i32 @load_i32_by_i8_neg_offset(i32* %arg) {
321+
; CHECK-LABEL: load_i32_by_i8_neg_offset:
322+
; CHECK: ldrb r1, [r0, #-4]
323+
; CHECK-NEXT: ldrb r2, [r0, #-3]
324+
; CHECK-NEXT: ldrb r3, [r0, #-2]
325+
; CHECK-NEXT: ldrb r0, [r0, #-1]
326+
; CHECK-NEXT: orr r1, r1, r2, lsl #8
327+
; CHECK-NEXT: orr r1, r1, r3, lsl #16
328+
; CHECK-NEXT: orr r0, r1, r0, lsl #24
329+
; CHECK-NEXT: mov pc, lr
330+
331+
; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset:
332+
; CHECK-ARMv6: ldrb r1, [r0, #-4]
333+
; CHECK-ARMv6-NEXT: ldrb r2, [r0, #-3]
334+
; CHECK-ARMv6-NEXT: ldrb r3, [r0, #-2]
335+
; CHECK-ARMv6-NEXT: ldrb r0, [r0, #-1]
336+
; CHECK-ARMv6-NEXT: orr r1, r1, r2, lsl #8
337+
; CHECK-ARMv6-NEXT: orr r1, r1, r3, lsl #16
338+
; CHECK-ARMv6-NEXT: orr r0, r1, r0, lsl #24
339+
; CHECK-ARMv6-NEXT: bx lr
340+
341+
%tmp = bitcast i32* %arg to i8*
342+
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -4
343+
%tmp2 = load i8, i8* %tmp1, align 4
344+
%tmp3 = zext i8 %tmp2 to i32
345+
%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -3
346+
%tmp5 = load i8, i8* %tmp4, align 1
347+
%tmp6 = zext i8 %tmp5 to i32
348+
%tmp7 = shl nuw nsw i32 %tmp6, 8
349+
%tmp8 = or i32 %tmp7, %tmp3
350+
%tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -2
351+
%tmp10 = load i8, i8* %tmp9, align 1
352+
%tmp11 = zext i8 %tmp10 to i32
353+
%tmp12 = shl nuw nsw i32 %tmp11, 16
354+
%tmp13 = or i32 %tmp8, %tmp12
355+
%tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -1
356+
%tmp15 = load i8, i8* %tmp14, align 1
357+
%tmp16 = zext i8 %tmp15 to i32
358+
%tmp17 = shl nuw nsw i32 %tmp16, 24
359+
%tmp18 = or i32 %tmp13, %tmp17
360+
ret i32 %tmp18
361+
}
362+
363+
; i8* p; // p[1] is 4 byte aligned
364+
; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24)
365+
define i32 @load_i32_by_i8_nonzero_offset_bswap(i32* %arg) {
366+
; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
367+
; CHECK: ldrb r1, [r0, #1]
368+
; CHECK-NEXT: ldrb r2, [r0, #2]
369+
; CHECK-NEXT: ldrb r3, [r0, #3]
370+
; CHECK-NEXT: ldrb r0, [r0, #4]
371+
; CHECK-NEXT: orr r0, r0, r3, lsl #8
372+
; CHECK-NEXT: orr r0, r0, r2, lsl #16
373+
; CHECK-NEXT: orr r0, r0, r1, lsl #24
374+
; CHECK-NEXT: mov pc, lr
375+
376+
; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset_bswap:
377+
; CHECK-ARMv6: ldrb r1, [r0, #1]
378+
; CHECK-ARMv6-NEXT: ldrb r2, [r0, #2]
379+
; CHECK-ARMv6-NEXT: ldrb r3, [r0, #3]
380+
; CHECK-ARMv6-NEXT: ldrb r0, [r0, #4]
381+
; CHECK-ARMv6-NEXT: orr r0, r0, r3, lsl #8
382+
; CHECK-ARMv6-NEXT: orr r0, r0, r2, lsl #16
383+
; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #24
384+
; CHECK-ARMv6-NEXT: bx lr
385+
386+
%tmp = bitcast i32* %arg to i8*
387+
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 4
388+
%tmp2 = load i8, i8* %tmp1, align 1
389+
%tmp3 = zext i8 %tmp2 to i32
390+
%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 3
391+
%tmp5 = load i8, i8* %tmp4, align 1
392+
%tmp6 = zext i8 %tmp5 to i32
393+
%tmp7 = shl nuw nsw i32 %tmp6, 8
394+
%tmp8 = or i32 %tmp7, %tmp3
395+
%tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
396+
%tmp10 = load i8, i8* %tmp9, align 1
397+
%tmp11 = zext i8 %tmp10 to i32
398+
%tmp12 = shl nuw nsw i32 %tmp11, 16
399+
%tmp13 = or i32 %tmp8, %tmp12
400+
%tmp14 = getelementptr inbounds i8, i8* %tmp, i32 1
401+
%tmp15 = load i8, i8* %tmp14, align 4
402+
%tmp16 = zext i8 %tmp15 to i32
403+
%tmp17 = shl nuw nsw i32 %tmp16, 24
404+
%tmp18 = or i32 %tmp13, %tmp17
405+
ret i32 %tmp18
406+
}
407+
408+
; i8* p; // p[-4] is 4 byte aligned
409+
; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24)
410+
define i32 @load_i32_by_i8_neg_offset_bswap(i32* %arg) {
411+
; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
412+
; CHECK: ldrb r1, [r0, #-4]
413+
; CHECK-NEXT: ldrb r2, [r0, #-3]
414+
; CHECK-NEXT: ldrb r3, [r0, #-2]
415+
; CHECK-NEXT: ldrb r0, [r0, #-1]
416+
; CHECK-NEXT: orr r0, r0, r3, lsl #8
417+
; CHECK-NEXT: orr r0, r0, r2, lsl #16
418+
; CHECK-NEXT: orr r0, r0, r1, lsl #24
419+
; CHECK-NEXT: mov pc, lr
420+
421+
; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset_bswap:
422+
; CHECK-ARMv6: ldrb r1, [r0, #-4]
423+
; CHECK-ARMv6-NEXT: ldrb r2, [r0, #-3]
424+
; CHECK-ARMv6-NEXT: ldrb r3, [r0, #-2]
425+
; CHECK-ARMv6-NEXT: ldrb r0, [r0, #-1]
426+
; CHECK-ARMv6-NEXT: orr r0, r0, r3, lsl #8
427+
; CHECK-ARMv6-NEXT: orr r0, r0, r2, lsl #16
428+
; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #24
429+
; CHECK-ARMv6-NEXT: bx lr
430+
431+
%tmp = bitcast i32* %arg to i8*
432+
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -1
433+
%tmp2 = load i8, i8* %tmp1, align 1
434+
%tmp3 = zext i8 %tmp2 to i32
435+
%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -2
436+
%tmp5 = load i8, i8* %tmp4, align 1
437+
%tmp6 = zext i8 %tmp5 to i32
438+
%tmp7 = shl nuw nsw i32 %tmp6, 8
439+
%tmp8 = or i32 %tmp7, %tmp3
440+
%tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -3
441+
%tmp10 = load i8, i8* %tmp9, align 1
442+
%tmp11 = zext i8 %tmp10 to i32
443+
%tmp12 = shl nuw nsw i32 %tmp11, 16
444+
%tmp13 = or i32 %tmp8, %tmp12
445+
%tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -4
446+
%tmp15 = load i8, i8* %tmp14, align 4
447+
%tmp16 = zext i8 %tmp15 to i32
448+
%tmp17 = shl nuw nsw i32 %tmp16, 24
449+
%tmp18 = or i32 %tmp13, %tmp17
450+
ret i32 %tmp18
451+
}

‎llvm/test/CodeGen/ARM/load-combine.ll

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,3 +227,183 @@ define i64 @load_i64_by_i8_bswap(i64* %arg) {
227227
%tmp37 = or i64 %tmp33, %tmp36
228228
ret i64 %tmp37
229229
}
230+
231+
; i8* p; // p[1] is 4 byte aligned
232+
; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
233+
define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) {
234+
; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
235+
; CHECK: ldrb r1, [r0, #1]
236+
; CHECK-NEXT: ldrb r2, [r0, #2]
237+
; CHECK-NEXT: ldrb r3, [r0, #3]
238+
; CHECK-NEXT: ldrb r0, [r0, #4]
239+
; CHECK-NEXT: orr r1, r1, r2, lsl #8
240+
; CHECK-NEXT: orr r1, r1, r3, lsl #16
241+
; CHECK-NEXT: orr r0, r1, r0, lsl #24
242+
; CHECK-NEXT: mov pc, lr
243+
244+
; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset:
245+
; CHECK-ARMv6: ldrb r1, [r0, #1]
246+
; CHECK-ARMv6-NEXT: ldrb r2, [r0, #2]
247+
; CHECK-ARMv6-NEXT: ldrb r3, [r0, #3]
248+
; CHECK-ARMv6-NEXT: ldrb r0, [r0, #4]
249+
; CHECK-ARMv6-NEXT: orr r1, r1, r2, lsl #8
250+
; CHECK-ARMv6-NEXT: orr r1, r1, r3, lsl #16
251+
; CHECK-ARMv6-NEXT: orr r0, r1, r0, lsl #24
252+
; CHECK-ARMv6-NEXT: bx lr
253+
254+
%tmp = bitcast i32* %arg to i8*
255+
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
256+
%tmp2 = load i8, i8* %tmp1, align 4
257+
%tmp3 = zext i8 %tmp2 to i32
258+
%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 2
259+
%tmp5 = load i8, i8* %tmp4, align 1
260+
%tmp6 = zext i8 %tmp5 to i32
261+
%tmp7 = shl nuw nsw i32 %tmp6, 8
262+
%tmp8 = or i32 %tmp7, %tmp3
263+
%tmp9 = getelementptr inbounds i8, i8* %tmp, i32 3
264+
%tmp10 = load i8, i8* %tmp9, align 1
265+
%tmp11 = zext i8 %tmp10 to i32
266+
%tmp12 = shl nuw nsw i32 %tmp11, 16
267+
%tmp13 = or i32 %tmp8, %tmp12
268+
%tmp14 = getelementptr inbounds i8, i8* %tmp, i32 4
269+
%tmp15 = load i8, i8* %tmp14, align 1
270+
%tmp16 = zext i8 %tmp15 to i32
271+
%tmp17 = shl nuw nsw i32 %tmp16, 24
272+
%tmp18 = or i32 %tmp13, %tmp17
273+
ret i32 %tmp18
274+
}
275+
276+
; i8* p; // p[-4] is 4 byte aligned
277+
; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24)
278+
define i32 @load_i32_by_i8_neg_offset(i32* %arg) {
279+
; CHECK-LABEL: load_i32_by_i8_neg_offset:
280+
; CHECK: ldrb r1, [r0, #-4]
281+
; CHECK-NEXT: ldrb r2, [r0, #-3]
282+
; CHECK-NEXT: ldrb r3, [r0, #-2]
283+
; CHECK-NEXT: ldrb r0, [r0, #-1]
284+
; CHECK-NEXT: orr r1, r1, r2, lsl #8
285+
; CHECK-NEXT: orr r1, r1, r3, lsl #16
286+
; CHECK-NEXT: orr r0, r1, r0, lsl #24
287+
; CHECK-NEXT: mov pc, lr
288+
289+
; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset:
290+
; CHECK-ARMv6: ldrb r1, [r0, #-4]
291+
; CHECK-ARMv6-NEXT: ldrb r2, [r0, #-3]
292+
; CHECK-ARMv6-NEXT: ldrb r3, [r0, #-2]
293+
; CHECK-ARMv6-NEXT: ldrb r0, [r0, #-1]
294+
; CHECK-ARMv6-NEXT: orr r1, r1, r2, lsl #8
295+
; CHECK-ARMv6-NEXT: orr r1, r1, r3, lsl #16
296+
; CHECK-ARMv6-NEXT: orr r0, r1, r0, lsl #24
297+
; CHECK-ARMv6-NEXT: bx lr
298+
299+
%tmp = bitcast i32* %arg to i8*
300+
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -4
301+
%tmp2 = load i8, i8* %tmp1, align 4
302+
%tmp3 = zext i8 %tmp2 to i32
303+
%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -3
304+
%tmp5 = load i8, i8* %tmp4, align 1
305+
%tmp6 = zext i8 %tmp5 to i32
306+
%tmp7 = shl nuw nsw i32 %tmp6, 8
307+
%tmp8 = or i32 %tmp7, %tmp3
308+
%tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -2
309+
%tmp10 = load i8, i8* %tmp9, align 1
310+
%tmp11 = zext i8 %tmp10 to i32
311+
%tmp12 = shl nuw nsw i32 %tmp11, 16
312+
%tmp13 = or i32 %tmp8, %tmp12
313+
%tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -1
314+
%tmp15 = load i8, i8* %tmp14, align 1
315+
%tmp16 = zext i8 %tmp15 to i32
316+
%tmp17 = shl nuw nsw i32 %tmp16, 24
317+
%tmp18 = or i32 %tmp13, %tmp17
318+
ret i32 %tmp18
319+
}
320+
321+
; i8* p; // p[1] is 4 byte aligned
322+
; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24)
323+
define i32 @load_i32_by_i8_nonzero_offset_bswap(i32* %arg) {
324+
; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
325+
; CHECK: ldrb r1, [r0, #1]
326+
; CHECK-NEXT: ldrb r2, [r0, #2]
327+
; CHECK-NEXT: ldrb r3, [r0, #3]
328+
; CHECK-NEXT: ldrb r0, [r0, #4]
329+
; CHECK-NEXT: orr r0, r0, r3, lsl #8
330+
; CHECK-NEXT: orr r0, r0, r2, lsl #16
331+
; CHECK-NEXT: orr r0, r0, r1, lsl #24
332+
; CHECK-NEXT: mov pc, lr
333+
334+
; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset_bswap:
335+
; CHECK-ARMv6: ldrb r1, [r0, #1]
336+
; CHECK-ARMv6-NEXT: ldrb r2, [r0, #2]
337+
; CHECK-ARMv6-NEXT: ldrb r3, [r0, #3]
338+
; CHECK-ARMv6-NEXT: ldrb r0, [r0, #4]
339+
; CHECK-ARMv6-NEXT: orr r0, r0, r3, lsl #8
340+
; CHECK-ARMv6-NEXT: orr r0, r0, r2, lsl #16
341+
; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #24
342+
; CHECK-ARMv6-NEXT: bx lr
343+
344+
%tmp = bitcast i32* %arg to i8*
345+
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 4
346+
%tmp2 = load i8, i8* %tmp1, align 1
347+
%tmp3 = zext i8 %tmp2 to i32
348+
%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 3
349+
%tmp5 = load i8, i8* %tmp4, align 1
350+
%tmp6 = zext i8 %tmp5 to i32
351+
%tmp7 = shl nuw nsw i32 %tmp6, 8
352+
%tmp8 = or i32 %tmp7, %tmp3
353+
%tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
354+
%tmp10 = load i8, i8* %tmp9, align 1
355+
%tmp11 = zext i8 %tmp10 to i32
356+
%tmp12 = shl nuw nsw i32 %tmp11, 16
357+
%tmp13 = or i32 %tmp8, %tmp12
358+
%tmp14 = getelementptr inbounds i8, i8* %tmp, i32 1
359+
%tmp15 = load i8, i8* %tmp14, align 4
360+
%tmp16 = zext i8 %tmp15 to i32
361+
%tmp17 = shl nuw nsw i32 %tmp16, 24
362+
%tmp18 = or i32 %tmp13, %tmp17
363+
ret i32 %tmp18
364+
}
365+
366+
; i8* p; // p[-4] is 4 byte aligned
367+
; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24)
368+
define i32 @load_i32_by_i8_neg_offset_bswap(i32* %arg) {
369+
; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
370+
; CHECK: ldrb r1, [r0, #-4]
371+
; CHECK-NEXT: ldrb r2, [r0, #-3]
372+
; CHECK-NEXT: ldrb r3, [r0, #-2]
373+
; CHECK-NEXT: ldrb r0, [r0, #-1]
374+
; CHECK-NEXT: orr r0, r0, r3, lsl #8
375+
; CHECK-NEXT: orr r0, r0, r2, lsl #16
376+
; CHECK-NEXT: orr r0, r0, r1, lsl #24
377+
; CHECK-NEXT: mov pc, lr
378+
379+
; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset_bswap:
380+
; CHECK-ARMv6: ldrb r1, [r0, #-4]
381+
; CHECK-ARMv6-NEXT: ldrb r2, [r0, #-3]
382+
; CHECK-ARMv6-NEXT: ldrb r3, [r0, #-2]
383+
; CHECK-ARMv6-NEXT: ldrb r0, [r0, #-1]
384+
; CHECK-ARMv6-NEXT: orr r0, r0, r3, lsl #8
385+
; CHECK-ARMv6-NEXT: orr r0, r0, r2, lsl #16
386+
; CHECK-ARMv6-NEXT: orr r0, r0, r1, lsl #24
387+
; CHECK-ARMv6-NEXT: bx lr
388+
389+
%tmp = bitcast i32* %arg to i8*
390+
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -1
391+
%tmp2 = load i8, i8* %tmp1, align 1
392+
%tmp3 = zext i8 %tmp2 to i32
393+
%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -2
394+
%tmp5 = load i8, i8* %tmp4, align 1
395+
%tmp6 = zext i8 %tmp5 to i32
396+
%tmp7 = shl nuw nsw i32 %tmp6, 8
397+
%tmp8 = or i32 %tmp7, %tmp3
398+
%tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -3
399+
%tmp10 = load i8, i8* %tmp9, align 1
400+
%tmp11 = zext i8 %tmp10 to i32
401+
%tmp12 = shl nuw nsw i32 %tmp11, 16
402+
%tmp13 = or i32 %tmp8, %tmp12
403+
%tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -4
404+
%tmp15 = load i8, i8* %tmp14, align 4
405+
%tmp16 = zext i8 %tmp15 to i32
406+
%tmp17 = shl nuw nsw i32 %tmp16, 24
407+
%tmp18 = or i32 %tmp13, %tmp17
408+
ret i32 %tmp18
409+
}

‎llvm/test/CodeGen/X86/load-combine.ll

Lines changed: 165 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -574,8 +574,8 @@ define i32 @load_i32_by_i8_bswap_unrelated_load(i32* %arg, i32* %arg1) {
574574
; Non-zero offsets are not supported for now
575575
; i8* p;
576576
; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
577-
define i32 @load_i32_by_i8_unsupported_offset(i32* %arg) {
578-
; CHECK-LABEL: load_i32_by_i8_unsupported_offset:
577+
define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) {
578+
; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
579579
; CHECK: # BB#0:
580580
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
581581
; CHECK-NEXT: movzbl 1(%eax), %ecx
@@ -590,7 +590,7 @@ define i32 @load_i32_by_i8_unsupported_offset(i32* %arg) {
590590
; CHECK-NEXT: orl %ecx, %eax
591591
; CHECK-NEXT: retl
592592
;
593-
; CHECK64-LABEL: load_i32_by_i8_unsupported_offset:
593+
; CHECK64-LABEL: load_i32_by_i8_nonzero_offset:
594594
; CHECK64: # BB#0:
595595
; CHECK64-NEXT: movzbl 1(%rdi), %eax
596596
; CHECK64-NEXT: movzbl 2(%rdi), %ecx
@@ -626,6 +626,168 @@ define i32 @load_i32_by_i8_unsupported_offset(i32* %arg) {
626626
ret i32 %tmp18
627627
}
628628

629+
; i8* p;
630+
; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24)
631+
define i32 @load_i32_by_i8_neg_offset(i32* %arg) {
632+
; CHECK-LABEL: load_i32_by_i8_neg_offset:
633+
; CHECK: # BB#0:
634+
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
635+
; CHECK-NEXT: movzbl -4(%eax), %ecx
636+
; CHECK-NEXT: movzbl -3(%eax), %edx
637+
; CHECK-NEXT: shll $8, %edx
638+
; CHECK-NEXT: orl %ecx, %edx
639+
; CHECK-NEXT: movzbl -2(%eax), %ecx
640+
; CHECK-NEXT: shll $16, %ecx
641+
; CHECK-NEXT: orl %edx, %ecx
642+
; CHECK-NEXT: movzbl -1(%eax), %eax
643+
; CHECK-NEXT: shll $24, %eax
644+
; CHECK-NEXT: orl %ecx, %eax
645+
; CHECK-NEXT: retl
646+
;
647+
; CHECK64-LABEL: load_i32_by_i8_neg_offset:
648+
; CHECK64: # BB#0:
649+
; CHECK64-NEXT: movzbl -4(%rdi), %eax
650+
; CHECK64-NEXT: movzbl -3(%rdi), %ecx
651+
; CHECK64-NEXT: shll $8, %ecx
652+
; CHECK64-NEXT: orl %eax, %ecx
653+
; CHECK64-NEXT: movzbl -2(%rdi), %edx
654+
; CHECK64-NEXT: shll $16, %edx
655+
; CHECK64-NEXT: orl %ecx, %edx
656+
; CHECK64-NEXT: movzbl -1(%rdi), %eax
657+
; CHECK64-NEXT: shll $24, %eax
658+
; CHECK64-NEXT: orl %edx, %eax
659+
; CHECK64-NEXT: retq
660+
661+
%tmp = bitcast i32* %arg to i8*
662+
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -4
663+
%tmp2 = load i8, i8* %tmp1, align 1
664+
%tmp3 = zext i8 %tmp2 to i32
665+
%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -3
666+
%tmp5 = load i8, i8* %tmp4, align 1
667+
%tmp6 = zext i8 %tmp5 to i32
668+
%tmp7 = shl nuw nsw i32 %tmp6, 8
669+
%tmp8 = or i32 %tmp7, %tmp3
670+
%tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -2
671+
%tmp10 = load i8, i8* %tmp9, align 1
672+
%tmp11 = zext i8 %tmp10 to i32
673+
%tmp12 = shl nuw nsw i32 %tmp11, 16
674+
%tmp13 = or i32 %tmp8, %tmp12
675+
%tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -1
676+
%tmp15 = load i8, i8* %tmp14, align 1
677+
%tmp16 = zext i8 %tmp15 to i32
678+
%tmp17 = shl nuw nsw i32 %tmp16, 24
679+
%tmp18 = or i32 %tmp13, %tmp17
680+
ret i32 %tmp18
681+
}
682+
683+
; i8* p;
684+
; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24)
685+
define i32 @load_i32_by_i8_nonzero_offset_bswap(i32* %arg) {
686+
; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
687+
; CHECK: # BB#0:
688+
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
689+
; CHECK-NEXT: movzbl 4(%eax), %ecx
690+
; CHECK-NEXT: movzbl 3(%eax), %edx
691+
; CHECK-NEXT: shll $8, %edx
692+
; CHECK-NEXT: orl %ecx, %edx
693+
; CHECK-NEXT: movzbl 2(%eax), %ecx
694+
; CHECK-NEXT: shll $16, %ecx
695+
; CHECK-NEXT: orl %edx, %ecx
696+
; CHECK-NEXT: movzbl 1(%eax), %eax
697+
; CHECK-NEXT: shll $24, %eax
698+
; CHECK-NEXT: orl %ecx, %eax
699+
; CHECK-NEXT: retl
700+
;
701+
; CHECK64-LABEL: load_i32_by_i8_nonzero_offset_bswap:
702+
; CHECK64: # BB#0:
703+
; CHECK64-NEXT: movzbl 4(%rdi), %eax
704+
; CHECK64-NEXT: movzbl 3(%rdi), %ecx
705+
; CHECK64-NEXT: shll $8, %ecx
706+
; CHECK64-NEXT: orl %eax, %ecx
707+
; CHECK64-NEXT: movzbl 2(%rdi), %edx
708+
; CHECK64-NEXT: shll $16, %edx
709+
; CHECK64-NEXT: orl %ecx, %edx
710+
; CHECK64-NEXT: movzbl 1(%rdi), %eax
711+
; CHECK64-NEXT: shll $24, %eax
712+
; CHECK64-NEXT: orl %edx, %eax
713+
; CHECK64-NEXT: retq
714+
715+
%tmp = bitcast i32* %arg to i8*
716+
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 4
717+
%tmp2 = load i8, i8* %tmp1, align 1
718+
%tmp3 = zext i8 %tmp2 to i32
719+
%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 3
720+
%tmp5 = load i8, i8* %tmp4, align 1
721+
%tmp6 = zext i8 %tmp5 to i32
722+
%tmp7 = shl nuw nsw i32 %tmp6, 8
723+
%tmp8 = or i32 %tmp7, %tmp3
724+
%tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
725+
%tmp10 = load i8, i8* %tmp9, align 1
726+
%tmp11 = zext i8 %tmp10 to i32
727+
%tmp12 = shl nuw nsw i32 %tmp11, 16
728+
%tmp13 = or i32 %tmp8, %tmp12
729+
%tmp14 = getelementptr inbounds i8, i8* %tmp, i32 1
730+
%tmp15 = load i8, i8* %tmp14, align 1
731+
%tmp16 = zext i8 %tmp15 to i32
732+
%tmp17 = shl nuw nsw i32 %tmp16, 24
733+
%tmp18 = or i32 %tmp13, %tmp17
734+
ret i32 %tmp18
735+
}
736+
737+
; i8* p;
738+
; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24)
739+
define i32 @load_i32_by_i8_neg_offset_bswap(i32* %arg) {
740+
; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
741+
; CHECK: # BB#0:
742+
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
743+
; CHECK-NEXT: movzbl -1(%eax), %ecx
744+
; CHECK-NEXT: movzbl -2(%eax), %edx
745+
; CHECK-NEXT: shll $8, %edx
746+
; CHECK-NEXT: orl %ecx, %edx
747+
; CHECK-NEXT: movzbl -3(%eax), %ecx
748+
; CHECK-NEXT: shll $16, %ecx
749+
; CHECK-NEXT: orl %edx, %ecx
750+
; CHECK-NEXT: movzbl -4(%eax), %eax
751+
; CHECK-NEXT: shll $24, %eax
752+
; CHECK-NEXT: orl %ecx, %eax
753+
; CHECK-NEXT: retl
754+
;
755+
; CHECK64-LABEL: load_i32_by_i8_neg_offset_bswap:
756+
; CHECK64: # BB#0:
757+
; CHECK64-NEXT: movzbl -1(%rdi), %eax
758+
; CHECK64-NEXT: movzbl -2(%rdi), %ecx
759+
; CHECK64-NEXT: shll $8, %ecx
760+
; CHECK64-NEXT: orl %eax, %ecx
761+
; CHECK64-NEXT: movzbl -3(%rdi), %edx
762+
; CHECK64-NEXT: shll $16, %edx
763+
; CHECK64-NEXT: orl %ecx, %edx
764+
; CHECK64-NEXT: movzbl -4(%rdi), %eax
765+
; CHECK64-NEXT: shll $24, %eax
766+
; CHECK64-NEXT: orl %edx, %eax
767+
; CHECK64-NEXT: retq
768+
769+
%tmp = bitcast i32* %arg to i8*
770+
%tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -1
771+
%tmp2 = load i8, i8* %tmp1, align 1
772+
%tmp3 = zext i8 %tmp2 to i32
773+
%tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -2
774+
%tmp5 = load i8, i8* %tmp4, align 1
775+
%tmp6 = zext i8 %tmp5 to i32
776+
%tmp7 = shl nuw nsw i32 %tmp6, 8
777+
%tmp8 = or i32 %tmp7, %tmp3
778+
%tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -3
779+
%tmp10 = load i8, i8* %tmp9, align 1
780+
%tmp11 = zext i8 %tmp10 to i32
781+
%tmp12 = shl nuw nsw i32 %tmp11, 16
782+
%tmp13 = or i32 %tmp8, %tmp12
783+
%tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -4
784+
%tmp15 = load i8, i8* %tmp14, align 1
785+
%tmp16 = zext i8 %tmp15 to i32
786+
%tmp17 = shl nuw nsw i32 %tmp16, 24
787+
%tmp18 = or i32 %tmp13, %tmp17
788+
ret i32 %tmp18
789+
}
790+
629791
; i8* p; i32 i;
630792
; ((i32) p[i] << 24) | ((i32) p[i + 1] << 16) | ((i32) p[i + 2] << 8) | (i32) p[i + 3]
631793
define i32 @load_i32_by_i8_bswap_base_index_offset(i32* %arg, i32 %arg1) {

0 commit comments

Comments
 (0)
Please sign in to comment.