|
| 1 | +; RUN: opt < %s -force-vector-interleave=1 -enable-conflict-detection=false -loop-vectorize -dce -instcombine -S | FileCheck %s |
| 2 | + |
| 3 | +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" |
| 4 | +target triple = "aarch64--linux-gnu" |
| 5 | + |
| 6 | +%struct.pair = type { i32, i32 } |
| 7 | + |
| 8 | +; Check vectorization of interleaved access groups with positive dependence |
| 9 | +; distances. In this test, the maximum safe dependence distance for |
| 10 | +; vectorization is 16 bytes. Normally, this would lead to a maximum VF of 4. |
| 11 | +; However, for interleaved groups, the effective VF is VF * IF, where IF is the |
| 12 | +; interleave factor. Here, the maximum safe dependence distance is recomputed |
| 13 | +; as 16 / IF bytes, resulting in VF=2. Since IF=2, we should generate <4 x i32> |
| 14 | +; loads and stores instead of <8 x i32> accesses. |
| 15 | +; |
| 16 | +; Note: LAA's conflict detection optimization has to be disabled for this test |
| 17 | +; to be vectorized. |
| 18 | + |
| 19 | +; struct pair { |
| 20 | +; int x; |
| 21 | +; int y; |
| 22 | +; }; |
| 23 | +; |
| 24 | +; void max_vf(struct pair *restrict p) { |
| 25 | +; for (int i = 0; i < 1000; i++) { |
| 26 | +; p[i + 2].x = p[i].x |
| 27 | +; p[i + 2].y = p[i].y |
| 28 | +; } |
| 29 | +; } |
| 30 | + |
| 31 | +; CHECK-LABEL: @max_vf |
| 32 | +; CHECK: load <4 x i32> |
| 33 | +; CHECK: store <4 x i32> |
| 34 | + |
| 35 | +define void @max_vf(%struct.pair* noalias nocapture %p) { |
| 36 | +entry: |
| 37 | + br label %for.body |
| 38 | + |
| 39 | +for.body: |
| 40 | + %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] |
| 41 | + %0 = add nuw nsw i64 %i, 2 |
| 42 | + %p_i.x = getelementptr inbounds %struct.pair, %struct.pair* %p, i64 %i, i32 0 |
| 43 | + %p_i_plus_2.x = getelementptr inbounds %struct.pair, %struct.pair* %p, i64 %0, i32 0 |
| 44 | + %1 = load i32, i32* %p_i.x, align 4 |
| 45 | + store i32 %1, i32* %p_i_plus_2.x, align 4 |
| 46 | + %p_i.y = getelementptr inbounds %struct.pair, %struct.pair* %p, i64 %i, i32 1 |
| 47 | + %p_i_plus_2.y = getelementptr inbounds %struct.pair, %struct.pair* %p, i64 %0, i32 1 |
| 48 | + %2 = load i32, i32* %p_i.y, align 4 |
| 49 | + store i32 %2, i32* %p_i_plus_2.y, align 4 |
| 50 | + %i.next = add nuw nsw i64 %i, 1 |
| 51 | + %cond = icmp eq i64 %i.next, 1000 |
| 52 | + br i1 %cond, label %for.exit, label %for.body |
| 53 | + |
| 54 | +for.exit: |
| 55 | + ret void |
| 56 | +} |
0 commit comments