|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
2 | 2 | ; RUN: opt -slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux -mcpu=corei7-avx | FileCheck %s
|
3 | 3 | ;
|
4 |
| -; This checks the look-ahead operand reordering heuristic |
| 4 | +; This file tests the look-ahead operand reordering heuristic. |
| 5 | +; |
| 6 | +; |
| 7 | +; This checks that operand reordering will reorder the operands of the adds |
| 8 | +; by taking into consideration the instructions beyond the immediate |
| 9 | +; predecessors. |
5 | 10 | ;
|
6 | 11 | ; A[0] B[0] C[0] D[0] C[1] D[1] A[1] B[1]
|
7 | 12 | ; \ / \ / \ / \ /
|
|
11 | 16 | ; | |
|
12 | 17 | ; S[0] S[1]
|
13 | 18 | ;
|
14 |
| -define void @test(double* %array) { |
15 |
| -; CHECK-LABEL: @test( |
| 19 | +define void @lookahead_basic(double* %array) { |
| 20 | +; CHECK-LABEL: @lookahead_basic( |
16 | 21 | ; CHECK-NEXT: entry:
|
17 | 22 | ; CHECK-NEXT: [[IDX0:%.*]] = getelementptr inbounds double, double* [[ARRAY:%.*]], i64 0
|
18 | 23 | ; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 1
|
@@ -72,3 +77,230 @@ entry:
|
72 | 77 | store double %addCDAB_1, double *%idx1, align 8
|
73 | 78 | ret void
|
74 | 79 | }
|
| 80 | + |
| 81 | + |
| 82 | +; Check whether the look-ahead operand reordering heuristic will avoid |
| 83 | +; bundling the alt opcodes. The vectorized code should have no shuffles. |
| 84 | +; |
| 85 | +; A[0] B[0] A[0] B[0] A[1] A[1] A[1] B[1] |
| 86 | +; \ / \ / \ / \ / |
| 87 | +; + - - + |
| 88 | +; \ / \ / |
| 89 | +; + + |
| 90 | +; | | |
| 91 | +; S[0] S[1] |
| 92 | +; |
| 93 | +define void @lookahead_alt1(double* %array) { |
| 94 | +; CHECK-LABEL: @lookahead_alt1( |
| 95 | +; CHECK-NEXT: entry: |
| 96 | +; CHECK-NEXT: [[IDX0:%.*]] = getelementptr inbounds double, double* [[ARRAY:%.*]], i64 0 |
| 97 | +; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 1 |
| 98 | +; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 2 |
| 99 | +; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 3 |
| 100 | +; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 4 |
| 101 | +; CHECK-NEXT: [[IDX5:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 5 |
| 102 | +; CHECK-NEXT: [[IDX6:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 6 |
| 103 | +; CHECK-NEXT: [[IDX7:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 7 |
| 104 | +; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[IDX0]] to <2 x double>* |
| 105 | +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 |
| 106 | +; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[IDX2]] to <2 x double>* |
| 107 | +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8 |
| 108 | +; CHECK-NEXT: [[TMP4:%.*]] = fsub fast <2 x double> [[TMP1]], [[TMP3]] |
| 109 | +; CHECK-NEXT: [[TMP5:%.*]] = fadd fast <2 x double> [[TMP1]], [[TMP3]] |
| 110 | +; CHECK-NEXT: [[TMP6:%.*]] = fadd fast <2 x double> [[TMP5]], [[TMP4]] |
| 111 | +; CHECK-NEXT: [[TMP7:%.*]] = bitcast double* [[IDX0]] to <2 x double>* |
| 112 | +; CHECK-NEXT: store <2 x double> [[TMP6]], <2 x double>* [[TMP7]], align 8 |
| 113 | +; CHECK-NEXT: ret void |
| 114 | +; |
| 115 | +entry: |
| 116 | + %idx0 = getelementptr inbounds double, double* %array, i64 0 |
| 117 | + %idx1 = getelementptr inbounds double, double* %array, i64 1 |
| 118 | + %idx2 = getelementptr inbounds double, double* %array, i64 2 |
| 119 | + %idx3 = getelementptr inbounds double, double* %array, i64 3 |
| 120 | + %idx4 = getelementptr inbounds double, double* %array, i64 4 |
| 121 | + %idx5 = getelementptr inbounds double, double* %array, i64 5 |
| 122 | + %idx6 = getelementptr inbounds double, double* %array, i64 6 |
| 123 | + %idx7 = getelementptr inbounds double, double* %array, i64 7 |
| 124 | + |
| 125 | + %A_0 = load double, double *%idx0, align 8 |
| 126 | + %A_1 = load double, double *%idx1, align 8 |
| 127 | + %B_0 = load double, double *%idx2, align 8 |
| 128 | + %B_1 = load double, double *%idx3, align 8 |
| 129 | + |
| 130 | + %addAB_0_L = fadd fast double %A_0, %B_0 |
| 131 | + %subAB_0_R = fsub fast double %A_0, %B_0 |
| 132 | + |
| 133 | + %subAB_1_L = fsub fast double %A_1, %B_1 |
| 134 | + %addAB_1_R = fadd fast double %A_1, %B_1 |
| 135 | + |
| 136 | + %addABCD_0 = fadd fast double %addAB_0_L, %subAB_0_R |
| 137 | + %addCDAB_1 = fadd fast double %subAB_1_L, %addAB_1_R |
| 138 | + |
| 139 | + store double %addABCD_0, double *%idx0, align 8 |
| 140 | + store double %addCDAB_1, double *%idx1, align 8 |
| 141 | + ret void |
| 142 | +} |
| 143 | + |
| 144 | + |
| 145 | +; This code should get vectorized all the way to the loads with shuffles for |
| 146 | +; the alt opcodes. |
| 147 | +; |
| 148 | +; A[0] B[0] C[0] D[0] C[1] D[1] A[1] B[1] |
| 149 | +; \ / \ / \ / \ / |
| 150 | +; + - + - |
| 151 | +; \ / \ / |
| 152 | +; + + |
| 153 | +; | | |
| 154 | +; S[0] S[1] |
| 155 | +; |
| 156 | +define void @lookahead_alt2(double* %array) { |
| 157 | +; CHECK-LABEL: @lookahead_alt2( |
| 158 | +; CHECK-NEXT: entry: |
| 159 | +; CHECK-NEXT: [[IDX0:%.*]] = getelementptr inbounds double, double* [[ARRAY:%.*]], i64 0 |
| 160 | +; CHECK-NEXT: [[IDX1:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 1 |
| 161 | +; CHECK-NEXT: [[IDX2:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 2 |
| 162 | +; CHECK-NEXT: [[IDX3:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 3 |
| 163 | +; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 4 |
| 164 | +; CHECK-NEXT: [[IDX5:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 5 |
| 165 | +; CHECK-NEXT: [[IDX6:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 6 |
| 166 | +; CHECK-NEXT: [[IDX7:%.*]] = getelementptr inbounds double, double* [[ARRAY]], i64 7 |
| 167 | +; CHECK-NEXT: [[A_0:%.*]] = load double, double* [[IDX0]], align 8 |
| 168 | +; CHECK-NEXT: [[A_1:%.*]] = load double, double* [[IDX1]], align 8 |
| 169 | +; CHECK-NEXT: [[B_0:%.*]] = load double, double* [[IDX2]], align 8 |
| 170 | +; CHECK-NEXT: [[B_1:%.*]] = load double, double* [[IDX3]], align 8 |
| 171 | +; CHECK-NEXT: [[C_0:%.*]] = load double, double* [[IDX4]], align 8 |
| 172 | +; CHECK-NEXT: [[C_1:%.*]] = load double, double* [[IDX5]], align 8 |
| 173 | +; CHECK-NEXT: [[D_0:%.*]] = load double, double* [[IDX6]], align 8 |
| 174 | +; CHECK-NEXT: [[D_1:%.*]] = load double, double* [[IDX7]], align 8 |
| 175 | +; CHECK-NEXT: [[ADDAB_0:%.*]] = fadd fast double [[A_0]], [[B_0]] |
| 176 | +; CHECK-NEXT: [[SUBCD_0:%.*]] = fsub fast double [[C_0]], [[D_0]] |
| 177 | +; CHECK-NEXT: [[ADDCD_1:%.*]] = fadd fast double [[C_1]], [[D_1]] |
| 178 | +; CHECK-NEXT: [[SUBAB_1:%.*]] = fsub fast double [[A_1]], [[B_1]] |
| 179 | +; CHECK-NEXT: [[ADDABCD_0:%.*]] = fadd fast double [[ADDAB_0]], [[SUBCD_0]] |
| 180 | +; CHECK-NEXT: [[ADDCDAB_1:%.*]] = fadd fast double [[ADDCD_1]], [[SUBAB_1]] |
| 181 | +; CHECK-NEXT: store double [[ADDABCD_0]], double* [[IDX0]], align 8 |
| 182 | +; CHECK-NEXT: store double [[ADDCDAB_1]], double* [[IDX1]], align 8 |
| 183 | +; CHECK-NEXT: ret void |
| 184 | +; |
| 185 | +entry: |
| 186 | + %idx0 = getelementptr inbounds double, double* %array, i64 0 |
| 187 | + %idx1 = getelementptr inbounds double, double* %array, i64 1 |
| 188 | + %idx2 = getelementptr inbounds double, double* %array, i64 2 |
| 189 | + %idx3 = getelementptr inbounds double, double* %array, i64 3 |
| 190 | + %idx4 = getelementptr inbounds double, double* %array, i64 4 |
| 191 | + %idx5 = getelementptr inbounds double, double* %array, i64 5 |
| 192 | + %idx6 = getelementptr inbounds double, double* %array, i64 6 |
| 193 | + %idx7 = getelementptr inbounds double, double* %array, i64 7 |
| 194 | + |
| 195 | + %A_0 = load double, double *%idx0, align 8 |
| 196 | + %A_1 = load double, double *%idx1, align 8 |
| 197 | + %B_0 = load double, double *%idx2, align 8 |
| 198 | + %B_1 = load double, double *%idx3, align 8 |
| 199 | + %C_0 = load double, double *%idx4, align 8 |
| 200 | + %C_1 = load double, double *%idx5, align 8 |
| 201 | + %D_0 = load double, double *%idx6, align 8 |
| 202 | + %D_1 = load double, double *%idx7, align 8 |
| 203 | + |
| 204 | + %addAB_0 = fadd fast double %A_0, %B_0 |
| 205 | + %subCD_0 = fsub fast double %C_0, %D_0 |
| 206 | + |
| 207 | + %addCD_1 = fadd fast double %C_1, %D_1 |
| 208 | + %subAB_1 = fsub fast double %A_1, %B_1 |
| 209 | + |
| 210 | + %addABCD_0 = fadd fast double %addAB_0, %subCD_0 |
| 211 | + %addCDAB_1 = fadd fast double %addCD_1, %subAB_1 |
| 212 | + |
| 213 | + store double %addABCD_0, double *%idx0, align 8 |
| 214 | + store double %addCDAB_1, double *%idx1, align 8 |
| 215 | + ret void |
| 216 | +} |
| 217 | + |
| 218 | + |
| 219 | +; |
| 220 | +; A[0] B[0] C[0] D[0] A[1] B[2] A[2] B[1] |
| 221 | +; \ / \ / / \ / \ / |
| 222 | +; - - U - - |
| 223 | +; \ / \ / |
| 224 | +; + + |
| 225 | +; | | |
| 226 | +; S[0] S[1] |
| 227 | +; |
| 228 | +; SLP should reorder the operands of the RHS add taking into consideration the cost of external uses. |
| 229 | +; It is more profitable to reorder the operands of the RHS add, because A[1] has an external use. |
| 230 | + |
| 231 | +define void @lookahead_external_uses(double* %A, double *%B, double *%C, double *%D, double *%S, double *%Ext1, double *%Ext2) { |
| 232 | +; CHECK-LABEL: @lookahead_external_uses( |
| 233 | +; CHECK-NEXT: entry: |
| 234 | +; CHECK-NEXT: [[IDXA0:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 0 |
| 235 | +; CHECK-NEXT: [[IDXB0:%.*]] = getelementptr inbounds double, double* [[B:%.*]], i64 0 |
| 236 | +; CHECK-NEXT: [[IDXC0:%.*]] = getelementptr inbounds double, double* [[C:%.*]], i64 0 |
| 237 | +; CHECK-NEXT: [[IDXD0:%.*]] = getelementptr inbounds double, double* [[D:%.*]], i64 0 |
| 238 | +; CHECK-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, double* [[A]], i64 1 |
| 239 | +; CHECK-NEXT: [[IDXB2:%.*]] = getelementptr inbounds double, double* [[B]], i64 2 |
| 240 | +; CHECK-NEXT: [[IDXA2:%.*]] = getelementptr inbounds double, double* [[A]], i64 2 |
| 241 | +; CHECK-NEXT: [[IDXB1:%.*]] = getelementptr inbounds double, double* [[B]], i64 1 |
| 242 | +; CHECK-NEXT: [[B0:%.*]] = load double, double* [[IDXB0]], align 8 |
| 243 | +; CHECK-NEXT: [[C0:%.*]] = load double, double* [[IDXC0]], align 8 |
| 244 | +; CHECK-NEXT: [[D0:%.*]] = load double, double* [[IDXD0]], align 8 |
| 245 | +; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[IDXA0]] to <2 x double>* |
| 246 | +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 |
| 247 | +; CHECK-NEXT: [[B2:%.*]] = load double, double* [[IDXB2]], align 8 |
| 248 | +; CHECK-NEXT: [[A2:%.*]] = load double, double* [[IDXA2]], align 8 |
| 249 | +; CHECK-NEXT: [[B1:%.*]] = load double, double* [[IDXB1]], align 8 |
| 250 | +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[B0]], i32 0 |
| 251 | +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[B2]], i32 1 |
| 252 | +; CHECK-NEXT: [[TMP4:%.*]] = fsub fast <2 x double> [[TMP1]], [[TMP3]] |
| 253 | +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> undef, double [[C0]], i32 0 |
| 254 | +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[A2]], i32 1 |
| 255 | +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> undef, double [[D0]], i32 0 |
| 256 | +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[B1]], i32 1 |
| 257 | +; CHECK-NEXT: [[TMP9:%.*]] = fsub fast <2 x double> [[TMP6]], [[TMP8]] |
| 258 | +; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <2 x double> [[TMP4]], [[TMP9]] |
| 259 | +; CHECK-NEXT: [[IDXS0:%.*]] = getelementptr inbounds double, double* [[S:%.*]], i64 0 |
| 260 | +; CHECK-NEXT: [[IDXS1:%.*]] = getelementptr inbounds double, double* [[S]], i64 1 |
| 261 | +; CHECK-NEXT: [[TMP11:%.*]] = bitcast double* [[IDXS0]] to <2 x double>* |
| 262 | +; CHECK-NEXT: store <2 x double> [[TMP10]], <2 x double>* [[TMP11]], align 8 |
| 263 | +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 |
| 264 | +; CHECK-NEXT: store double [[TMP12]], double* [[EXT1:%.*]], align 8 |
| 265 | +; CHECK-NEXT: ret void |
| 266 | +; |
| 267 | +entry: |
| 268 | + %IdxA0 = getelementptr inbounds double, double* %A, i64 0 |
| 269 | + %IdxB0 = getelementptr inbounds double, double* %B, i64 0 |
| 270 | + %IdxC0 = getelementptr inbounds double, double* %C, i64 0 |
| 271 | + %IdxD0 = getelementptr inbounds double, double* %D, i64 0 |
| 272 | + |
| 273 | + %IdxA1 = getelementptr inbounds double, double* %A, i64 1 |
| 274 | + %IdxB2 = getelementptr inbounds double, double* %B, i64 2 |
| 275 | + %IdxA2 = getelementptr inbounds double, double* %A, i64 2 |
| 276 | + %IdxB1 = getelementptr inbounds double, double* %B, i64 1 |
| 277 | + |
| 278 | + %A0 = load double, double *%IdxA0, align 8 |
| 279 | + %B0 = load double, double *%IdxB0, align 8 |
| 280 | + %C0 = load double, double *%IdxC0, align 8 |
| 281 | + %D0 = load double, double *%IdxD0, align 8 |
| 282 | + |
| 283 | + %A1 = load double, double *%IdxA1, align 8 |
| 284 | + %B2 = load double, double *%IdxB2, align 8 |
| 285 | + %A2 = load double, double *%IdxA2, align 8 |
| 286 | + %B1 = load double, double *%IdxB1, align 8 |
| 287 | + |
| 288 | + %subA0B0 = fsub fast double %A0, %B0 |
| 289 | + %subC0D0 = fsub fast double %C0, %D0 |
| 290 | + |
| 291 | + %subA1B2 = fsub fast double %A1, %B2 |
| 292 | + %subA2B1 = fsub fast double %A2, %B1 |
| 293 | + |
| 294 | + %add0 = fadd fast double %subA0B0, %subC0D0 |
| 295 | + %add1 = fadd fast double %subA1B2, %subA2B1 |
| 296 | + |
| 297 | + %IdxS0 = getelementptr inbounds double, double* %S, i64 0 |
| 298 | + %IdxS1 = getelementptr inbounds double, double* %S, i64 1 |
| 299 | + |
| 300 | + store double %add0, double *%IdxS0, align 8 |
| 301 | + store double %add1, double *%IdxS1, align 8 |
| 302 | + |
| 303 | + ; External use |
| 304 | + store double %A1, double *%Ext1, align 8 |
| 305 | + ret void |
| 306 | +} |
0 commit comments