Skip to content

Commit 1be9b9f

Browse files
author
Changpeng Fang
committedMar 9, 2017
AMDGPU/SI: Disable unrolling in the loop vectorizer if the loop is not vectorized.
Reviewers: arsenm Differential Revision: http://reviews.llvm.org/D30719 llvm-svn: 297328
1 parent 8537d99 commit 1be9b9f

File tree

3 files changed

+34
-0
lines changed

3 files changed

+34
-0
lines changed
 

‎llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,10 @@ bool AMDGPUTTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
155155
}
156156

157157
unsigned AMDGPUTTIImpl::getMaxInterleaveFactor(unsigned VF) {
158+
// Disable unrolling if the loop is not vectorized.
159+
if (VF == 1)
160+
return 1;
161+
158162
// Semi-arbitrary large amount.
159163
return 64;
160164
}
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
if not 'AMDGPU' in config.root.targets:
2+
config.unsupported = True
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=fiji -loop-vectorize < %s | FileCheck %s
2+
3+
4+
; For AMDGPU, loop unroll in loop vectorizer is disabled when VF==1.
5+
;
6+
; CHECK-LABEL: @small_loop(
7+
; CHECK: store i32
8+
; CHECK-NOT: store i32
9+
; CHECK: ret
10+
define void @small_loop(i32* nocapture %inArray, i32 %size) nounwind {
11+
entry:
12+
%0 = icmp sgt i32 %size, 0
13+
br i1 %0, label %loop, label %exit
14+
15+
loop: ; preds = %entry, %loop
16+
%iv = phi i32 [ %iv1, %loop ], [ 0, %entry ]
17+
%1 = getelementptr inbounds i32, i32* %inArray, i32 %iv
18+
%2 = load i32, i32* %1, align 4
19+
%3 = add nsw i32 %2, 6
20+
store i32 %3, i32* %1, align 4
21+
%iv1 = add i32 %iv, 1
22+
; %lftr.wideiv = trunc i64 %indvars.iv.next to i32
23+
%cond = icmp eq i32 %iv1, %size
24+
br i1 %cond, label %exit, label %loop
25+
26+
exit: ; preds = %loop, %entry
27+
ret void
28+
}

0 commit comments

Comments
 (0)
Please sign in to comment.