13
13
; RUN: -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu < %s \
14
14
; RUN: | FileCheck -check-prefix=NOOPTSWAP %s
15
15
16
+ ; LH: 2016-11-17
17
+ ; Updated align attritue from 16 to 8 to keep swap instructions tests.
18
+ ; Changes have been made on little-endian to use lvx and stvx
19
+ ; instructions instead of lxvd2x/xxswapd and xxswapd/stxvd2x for
20
+ ; aligned vectors with elements up to 4 bytes
21
+
16
22
; This test was generated from the following source:
17
23
;
18
24
; #define N 4096
29
35
; }
30
36
; }
31
37
32
- @cb = common global [4096 x i32 ] zeroinitializer , align 16
33
- @cc = common global [4096 x i32 ] zeroinitializer , align 16
34
- @cd = common global [4096 x i32 ] zeroinitializer , align 16
35
- @ca = common global [4096 x i32 ] zeroinitializer , align 16
38
+ @cb = common global [4096 x i32 ] zeroinitializer , align 8
39
+ @cc = common global [4096 x i32 ] zeroinitializer , align 8
40
+ @cd = common global [4096 x i32 ] zeroinitializer , align 8
41
+ @ca = common global [4096 x i32 ] zeroinitializer , align 8
36
42
37
43
define void @foo () {
38
44
entry:
@@ -42,63 +48,63 @@ vector.body:
42
48
%index = phi i64 [ 0 , %entry ], [ %index.next.3 , %vector.body ]
43
49
%0 = getelementptr inbounds [4096 x i32 ], [4096 x i32 ]* @cb , i64 0 , i64 %index
44
50
%1 = bitcast i32* %0 to <4 x i32 >*
45
- %wide.load = load <4 x i32 >, <4 x i32 >* %1 , align 16
51
+ %wide.load = load <4 x i32 >, <4 x i32 >* %1 , align 8
46
52
%2 = getelementptr inbounds [4096 x i32 ], [4096 x i32 ]* @cc , i64 0 , i64 %index
47
53
%3 = bitcast i32* %2 to <4 x i32 >*
48
- %wide.load13 = load <4 x i32 >, <4 x i32 >* %3 , align 16
54
+ %wide.load13 = load <4 x i32 >, <4 x i32 >* %3 , align 8
49
55
%4 = add nsw <4 x i32 > %wide.load13 , %wide.load
50
56
%5 = getelementptr inbounds [4096 x i32 ], [4096 x i32 ]* @cd , i64 0 , i64 %index
51
57
%6 = bitcast i32* %5 to <4 x i32 >*
52
- %wide.load14 = load <4 x i32 >, <4 x i32 >* %6 , align 16
58
+ %wide.load14 = load <4 x i32 >, <4 x i32 >* %6 , align 8
53
59
%7 = mul nsw <4 x i32 > %4 , %wide.load14
54
60
%8 = getelementptr inbounds [4096 x i32 ], [4096 x i32 ]* @ca , i64 0 , i64 %index
55
61
%9 = bitcast i32* %8 to <4 x i32 >*
56
- store <4 x i32 > %7 , <4 x i32 >* %9 , align 16
62
+ store <4 x i32 > %7 , <4 x i32 >* %9 , align 8
57
63
%index.next = add nuw nsw i64 %index , 4
58
64
%10 = getelementptr inbounds [4096 x i32 ], [4096 x i32 ]* @cb , i64 0 , i64 %index.next
59
65
%11 = bitcast i32* %10 to <4 x i32 >*
60
- %wide.load.1 = load <4 x i32 >, <4 x i32 >* %11 , align 16
66
+ %wide.load.1 = load <4 x i32 >, <4 x i32 >* %11 , align 8
61
67
%12 = getelementptr inbounds [4096 x i32 ], [4096 x i32 ]* @cc , i64 0 , i64 %index.next
62
68
%13 = bitcast i32* %12 to <4 x i32 >*
63
- %wide.load13.1 = load <4 x i32 >, <4 x i32 >* %13 , align 16
69
+ %wide.load13.1 = load <4 x i32 >, <4 x i32 >* %13 , align 8
64
70
%14 = add nsw <4 x i32 > %wide.load13.1 , %wide.load.1
65
71
%15 = getelementptr inbounds [4096 x i32 ], [4096 x i32 ]* @cd , i64 0 , i64 %index.next
66
72
%16 = bitcast i32* %15 to <4 x i32 >*
67
- %wide.load14.1 = load <4 x i32 >, <4 x i32 >* %16 , align 16
73
+ %wide.load14.1 = load <4 x i32 >, <4 x i32 >* %16 , align 8
68
74
%17 = mul nsw <4 x i32 > %14 , %wide.load14.1
69
75
%18 = getelementptr inbounds [4096 x i32 ], [4096 x i32 ]* @ca , i64 0 , i64 %index.next
70
76
%19 = bitcast i32* %18 to <4 x i32 >*
71
- store <4 x i32 > %17 , <4 x i32 >* %19 , align 16
77
+ store <4 x i32 > %17 , <4 x i32 >* %19 , align 8
72
78
%index.next.1 = add nuw nsw i64 %index.next , 4
73
79
%20 = getelementptr inbounds [4096 x i32 ], [4096 x i32 ]* @cb , i64 0 , i64 %index.next.1
74
80
%21 = bitcast i32* %20 to <4 x i32 >*
75
- %wide.load.2 = load <4 x i32 >, <4 x i32 >* %21 , align 16
81
+ %wide.load.2 = load <4 x i32 >, <4 x i32 >* %21 , align 8
76
82
%22 = getelementptr inbounds [4096 x i32 ], [4096 x i32 ]* @cc , i64 0 , i64 %index.next.1
77
83
%23 = bitcast i32* %22 to <4 x i32 >*
78
- %wide.load13.2 = load <4 x i32 >, <4 x i32 >* %23 , align 16
84
+ %wide.load13.2 = load <4 x i32 >, <4 x i32 >* %23 , align 8
79
85
%24 = add nsw <4 x i32 > %wide.load13.2 , %wide.load.2
80
86
%25 = getelementptr inbounds [4096 x i32 ], [4096 x i32 ]* @cd , i64 0 , i64 %index.next.1
81
87
%26 = bitcast i32* %25 to <4 x i32 >*
82
- %wide.load14.2 = load <4 x i32 >, <4 x i32 >* %26 , align 16
88
+ %wide.load14.2 = load <4 x i32 >, <4 x i32 >* %26 , align 8
83
89
%27 = mul nsw <4 x i32 > %24 , %wide.load14.2
84
90
%28 = getelementptr inbounds [4096 x i32 ], [4096 x i32 ]* @ca , i64 0 , i64 %index.next.1
85
91
%29 = bitcast i32* %28 to <4 x i32 >*
86
- store <4 x i32 > %27 , <4 x i32 >* %29 , align 16
92
+ store <4 x i32 > %27 , <4 x i32 >* %29 , align 8
87
93
%index.next.2 = add nuw nsw i64 %index.next.1 , 4
88
94
%30 = getelementptr inbounds [4096 x i32 ], [4096 x i32 ]* @cb , i64 0 , i64 %index.next.2
89
95
%31 = bitcast i32* %30 to <4 x i32 >*
90
- %wide.load.3 = load <4 x i32 >, <4 x i32 >* %31 , align 16
96
+ %wide.load.3 = load <4 x i32 >, <4 x i32 >* %31 , align 8
91
97
%32 = getelementptr inbounds [4096 x i32 ], [4096 x i32 ]* @cc , i64 0 , i64 %index.next.2
92
98
%33 = bitcast i32* %32 to <4 x i32 >*
93
- %wide.load13.3 = load <4 x i32 >, <4 x i32 >* %33 , align 16
99
+ %wide.load13.3 = load <4 x i32 >, <4 x i32 >* %33 , align 8
94
100
%34 = add nsw <4 x i32 > %wide.load13.3 , %wide.load.3
95
101
%35 = getelementptr inbounds [4096 x i32 ], [4096 x i32 ]* @cd , i64 0 , i64 %index.next.2
96
102
%36 = bitcast i32* %35 to <4 x i32 >*
97
- %wide.load14.3 = load <4 x i32 >, <4 x i32 >* %36 , align 16
103
+ %wide.load14.3 = load <4 x i32 >, <4 x i32 >* %36 , align 8
98
104
%37 = mul nsw <4 x i32 > %34 , %wide.load14.3
99
105
%38 = getelementptr inbounds [4096 x i32 ], [4096 x i32 ]* @ca , i64 0 , i64 %index.next.2
100
106
%39 = bitcast i32* %38 to <4 x i32 >*
101
- store <4 x i32 > %37 , <4 x i32 >* %39 , align 16
107
+ store <4 x i32 > %37 , <4 x i32 >* %39 , align 8
102
108
%index.next.3 = add nuw nsw i64 %index.next.2 , 4
103
109
%40 = icmp eq i64 %index.next.3 , 4096
104
110
br i1 %40 , label %for.end , label %vector.body
0 commit comments