Skip to content

Commit e43198d

Browse files
committedMay 16, 2016
[LV] Ensure safe VF for loops with interleaved accesses
The selection of the vectorization factor currently doesn't consider interleaved accesses. The vectorization factor is based on the maximum safe dependence distance computed by LAA. However, for loops with interleaved groups, we should instead base the vectorization factor on the maximum safe dependence distance divided by the maximum interleave factor of all the interleaved groups. Interleaved accesses not in a group will be scalarized. Differential Revision: http://reviews.llvm.org/D20241 llvm-svn: 269659
1 parent 3df2879 commit e43198d

File tree

2 files changed

+79
-1
lines changed

2 files changed

+79
-1
lines changed
 

‎llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

+23-1
Original file line numberDiff line numberDiff line change
@@ -854,6 +854,14 @@ class InterleavedAccessInfo {
854854
return InterleaveGroupMap.count(Instr);
855855
}
856856

857+
/// \brief Return the maximum interleave factor of all interleaved groups.
858+
unsigned getMaxInterleaveFactor() const {
859+
unsigned MaxFactor = 1;
860+
for (auto &Entry : InterleaveGroupMap)
861+
MaxFactor = std::max(MaxFactor, Entry.second->getFactor());
862+
return MaxFactor;
863+
}
864+
857865
/// \brief Get the interleave group that \p Instr belongs to.
858866
///
859867
/// \returns nullptr if doesn't have such group.
@@ -1334,6 +1342,11 @@ class LoopVectorizationLegality {
13341342
return InterleaveInfo.isInterleaved(Instr);
13351343
}
13361344

1345+
/// \brief Return the maximum interleave factor of all interleaved groups.
1346+
unsigned getMaxInterleaveFactor() const {
1347+
return InterleaveInfo.getMaxInterleaveFactor();
1348+
}
1349+
13371350
/// \brief Get the interleaved access group that \p Instr belongs to.
13381351
const InterleaveGroup *getInterleavedAccessGroup(Instruction *Instr) {
13391352
return InterleaveInfo.getInterleaveGroup(Instr);
@@ -5183,8 +5196,17 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
51835196
std::tie(SmallestType, WidestType) = getSmallestAndWidestTypes();
51845197
unsigned WidestRegister = TTI.getRegisterBitWidth(true);
51855198
unsigned MaxSafeDepDist = -1U;
5199+
5200+
// Get the maximum safe dependence distance in bits computed by LAA. If the
5201+
// loop contains any interleaved accesses, we divide the dependence distance
5202+
// by the maximum interleave factor of all interleaved groups. Note that
5203+
// although the division ensures correctness, this is a fairly conservative
5204+
// computation because the maximum distance computed by LAA may not involve
5205+
// any of the interleaved accesses.
51865206
if (Legal->getMaxSafeDepDistBytes() != -1U)
5187-
MaxSafeDepDist = Legal->getMaxSafeDepDistBytes() * 8;
5207+
MaxSafeDepDist =
5208+
Legal->getMaxSafeDepDistBytes() * 8 / Legal->getMaxInterleaveFactor();
5209+
51885210
WidestRegister =
51895211
((WidestRegister < MaxSafeDepDist) ? WidestRegister : MaxSafeDepDist);
51905212
unsigned MaxVectorSize = WidestRegister / WidestType;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
; RUN: opt < %s -force-vector-interleave=1 -enable-conflict-detection=false -loop-vectorize -dce -instcombine -S | FileCheck %s
2+
3+
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
4+
target triple = "aarch64--linux-gnu"
5+
6+
%struct.pair = type { i32, i32 }
7+
8+
; Check vectorization of interleaved access groups with positive dependence
9+
; distances. In this test, the maximum safe dependence distance for
10+
; vectorization is 16 bytes. Normally, this would lead to a maximum VF of 4.
11+
; However, for interleaved groups, the effective VF is VF * IF, where IF is the
12+
; interleave factor. Here, the maximum safe dependence distance is recomputed
13+
; as 16 / IF bytes, resulting in VF=2. Since IF=2, we should generate <4 x i32>
14+
; loads and stores instead of <8 x i32> accesses.
15+
;
16+
; Note: LAA's conflict detection optimization has to be disabled for this test
17+
; to be vectorized.
18+
19+
; struct pair {
20+
; int x;
21+
; int y;
22+
; };
23+
;
24+
; void max_vf(struct pair *restrict p) {
25+
; for (int i = 0; i < 1000; i++) {
26+
; p[i + 2].x = p[i].x
27+
; p[i + 2].y = p[i].y
28+
; }
29+
; }
30+
31+
; CHECK-LABEL: @max_vf
32+
; CHECK: load <4 x i32>
33+
; CHECK: store <4 x i32>
34+
35+
define void @max_vf(%struct.pair* noalias nocapture %p) {
36+
entry:
37+
br label %for.body
38+
39+
for.body:
40+
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
41+
%0 = add nuw nsw i64 %i, 2
42+
%p_i.x = getelementptr inbounds %struct.pair, %struct.pair* %p, i64 %i, i32 0
43+
%p_i_plus_2.x = getelementptr inbounds %struct.pair, %struct.pair* %p, i64 %0, i32 0
44+
%1 = load i32, i32* %p_i.x, align 4
45+
store i32 %1, i32* %p_i_plus_2.x, align 4
46+
%p_i.y = getelementptr inbounds %struct.pair, %struct.pair* %p, i64 %i, i32 1
47+
%p_i_plus_2.y = getelementptr inbounds %struct.pair, %struct.pair* %p, i64 %0, i32 1
48+
%2 = load i32, i32* %p_i.y, align 4
49+
store i32 %2, i32* %p_i_plus_2.y, align 4
50+
%i.next = add nuw nsw i64 %i, 1
51+
%cond = icmp eq i64 %i.next, 1000
52+
br i1 %cond, label %for.exit, label %for.body
53+
54+
for.exit:
55+
ret void
56+
}

0 commit comments

Comments
 (0)
Please sign in to comment.