Skip to content

Commit 1bd3d00

Browse files
committedJun 17, 2019
[CodeGen] Check for HardwareLoop Latch ExitBlock
The HardwareLoops pass finds exit blocks with a scevable exit count. If the target specifies to update the loop counter in a register, through a phi, we need to ensure that the exit block is a latch so that we can insert the phi with the correct value for the incoming edge. Differential Revision: https://reviews.llvm.org/D63336 llvm-svn: 363556
1 parent 7dc9176 commit 1bd3d00

File tree

5 files changed

+139
-13
lines changed

5 files changed

+139
-13
lines changed
 

Diff for: ‎llvm/include/llvm/Analysis/TargetTransformInfo.h

+4-6
Original file line numberDiff line numberDiff line change
@@ -448,9 +448,7 @@ class TargetTransformInfo {
448448
void getUnrollingPreferences(Loop *L, ScalarEvolution &,
449449
UnrollingPreferences &UP) const;
450450

451-
/// Attributes of a target dependent hardware loop. Here, the term 'element'
452-
/// describes the work performed by an IR loop that has not been vectorized
453-
/// by the compiler.
451+
/// Attributes of a target dependent hardware loop.
454452
struct HardwareLoopInfo {
455453
HardwareLoopInfo() = delete;
456454
HardwareLoopInfo(Loop *L) : L(L) { }
@@ -459,10 +457,10 @@ class TargetTransformInfo {
459457
BranchInst *ExitBranch = nullptr;
460458
const SCEV *ExitCount = nullptr;
461459
IntegerType *CountType = nullptr;
462-
Value *LoopDecrement = nullptr; // The maximum number of elements
463-
// processed in the loop body.
460+
Value *LoopDecrement = nullptr; // Decrement the loop counter by this
461+
// value in every iteration.
464462
bool IsNestingLegal = false; // Can a hardware loop be a parent to
465-
// another hardware loop.
463+
// another hardware loop?
466464
bool CounterInReg = false; // Should loop counter be updated in
467465
// the loop via a phi?
468466
};

Diff for: ‎llvm/lib/CodeGen/HardwareLoops.cpp

+13-3
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,17 @@ bool HardwareLoops::TryConvertLoop(TTI::HardwareLoopInfo &HWLoopInfo) {
235235

236236
for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
237237
IE = ExitingBlocks.end(); I != IE; ++I) {
238-
const SCEV *EC = SE->getExitCount(L, *I);
238+
BasicBlock *BB = *I;
239+
240+
// If we pass the updated counter back through a phi, we need to know
241+
// which latch the updated value will be coming from.
242+
if (!L->isLoopLatch(BB)) {
243+
if ((ForceHardwareLoopPHI.getNumOccurrences() && ForceHardwareLoopPHI) ||
244+
HWLoopInfo.CounterInReg)
245+
continue;
246+
}
247+
248+
const SCEV *EC = SE->getExitCount(L, BB);
239249
if (isa<SCEVCouldNotCompute>(EC))
240250
continue;
241251
if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) {
@@ -251,7 +261,7 @@ bool HardwareLoops::TryConvertLoop(TTI::HardwareLoopInfo &HWLoopInfo) {
251261
// If this exiting block is contained in a nested loop, it is not eligible
252262
// for insertion of the branch-and-decrement since the inner loop would
253263
// end up messing up the value in the CTR.
254-
if (!HWLoopInfo.IsNestingLegal && LI->getLoopFor(*I) != L &&
264+
if (!HWLoopInfo.IsNestingLegal && LI->getLoopFor(BB) != L &&
255265
!ForceNestedLoop)
256266
continue;
257267

@@ -278,7 +288,7 @@ bool HardwareLoops::TryConvertLoop(TTI::HardwareLoopInfo &HWLoopInfo) {
278288
continue;
279289

280290
// Make sure this blocks ends with a conditional branch.
281-
Instruction *TI = (*I)->getTerminator();
291+
Instruction *TI = BB->getTerminator();
282292
if (!TI)
283293
continue;
284294

Diff for: ‎llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

-4
Original file line numberDiff line numberDiff line change
@@ -702,10 +702,6 @@ bool ARMTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
702702
if (!ST->hasLOB() || DisableLowOverheadLoops)
703703
return false;
704704

705-
// For now, for simplicity, only support loops with one exit block.
706-
if (!L->getExitBlock())
707-
return false;
708-
709705
if (!SE.hasLoopInvariantBackedgeTakenCount(L))
710706
return false;
711707

Diff for: ‎llvm/test/Transforms/HardwareLoops/ARM/structure.ll

+76
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,82 @@ while.end7:
135135
ret void
136136
}
137137

138+
; CHECK-LABEL: not_rotated
139+
; CHECK-NOT: call void @llvm.set.loop.iterations
140+
; CHECK-NOT: call i32 @llvm.loop.decrement.i32
141+
define void @not_rotated(i32, i16* nocapture, i16 signext) {
142+
br label %4
143+
144+
4:
145+
%5 = phi i32 [ 0, %3 ], [ %19, %18 ]
146+
%6 = icmp eq i32 %5, %0
147+
br i1 %6, label %20, label %7
148+
149+
7:
150+
%8 = mul i32 %5, %0
151+
br label %9
152+
153+
9:
154+
%10 = phi i32 [ %17, %12 ], [ 0, %7 ]
155+
%11 = icmp eq i32 %10, %0
156+
br i1 %11, label %18, label %12
157+
158+
12:
159+
%13 = add i32 %10, %8
160+
%14 = getelementptr inbounds i16, i16* %1, i32 %13
161+
%15 = load i16, i16* %14, align 2
162+
%16 = add i16 %15, %2
163+
store i16 %16, i16* %14, align 2
164+
%17 = add i32 %10, 1
165+
br label %9
166+
167+
18:
168+
%19 = add i32 %5, 1
169+
br label %4
170+
171+
20:
172+
ret void
173+
}
174+
175+
; CHECK-LABEL: multi_latch
176+
; CHECK-NOT: call void @llvm.set.loop.iterations
177+
; CHECK-NOT: call i32 @llvm.loop.decrement
178+
define void @multi_latch(i32* %a, i32* %b, i32 %N) {
179+
entry:
180+
%half = lshr i32 %N, 1
181+
br label %header
182+
183+
header:
184+
%iv = phi i32 [ 0, %entry ], [ %count.next, %latch.0 ], [ %count.next, %latch.1 ]
185+
%cmp = icmp ult i32 %iv, %half
186+
%addr.a = getelementptr i32, i32* %a, i32 %iv
187+
%addr.b = getelementptr i32, i32* %b, i32 %iv
188+
br i1 %cmp, label %if.then, label %if.else
189+
190+
if.then:
191+
store i32 %iv, i32* %addr.a
192+
br label %latch.0
193+
194+
if.else:
195+
store i32 %iv, i32* %addr.b
196+
br label %latch.0
197+
198+
latch.0:
199+
%count.next = add nuw i32 %iv, 1
200+
%cmp.1 = icmp ult i32 %count.next, %half
201+
br i1 %cmp.1, label %header, label %latch.1
202+
203+
latch.1:
204+
%ld = load i32, i32* %addr.a
205+
store i32 %ld, i32* %addr.b
206+
%cmp.2 = icmp ult i32 %count.next, %N
207+
br i1 %cmp.2, label %header, label %latch.1
208+
209+
exit:
210+
ret void
211+
}
212+
213+
138214
declare void @llvm.set.loop.iterations.i32(i32) #0
139215
declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #0
140216

+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
; RUN: opt -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -hardware-loops -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ALLOW
2+
; RUN: opt -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-phi=true -hardware-loops -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LATCH
3+
4+
; CHECK-LABEL: not_rotated
5+
; CHECK-LATCH-NOT: call void @llvm.set.loop.iterations
6+
; CHECK-LATCH-NOT: call i1 @llvm.loop.decrement
7+
8+
; CHECK-ALLOW: call void @llvm.set.loop.iterations.i32(i32 %4)
9+
; CHECK-ALLOW: br label %10
10+
11+
; CHECK-ALLOW: [[CMP:%[^ ]+]] = call i1 @llvm.loop.decrement.i32(i32 1)
12+
; CHECK-ALLOW: br i1 [[CMP]], label %13, label %19
13+
14+
define void @not_rotated(i32, i16* nocapture, i16 signext) {
15+
br label %4
16+
17+
4:
18+
%5 = phi i32 [ 0, %3 ], [ %19, %18 ]
19+
%6 = icmp eq i32 %5, %0
20+
br i1 %6, label %20, label %7
21+
22+
7:
23+
%8 = mul i32 %5, %0
24+
br label %9
25+
26+
9:
27+
%10 = phi i32 [ %17, %12 ], [ 0, %7 ]
28+
%11 = icmp eq i32 %10, %0
29+
br i1 %11, label %18, label %12
30+
31+
12:
32+
%13 = add i32 %10, %8
33+
%14 = getelementptr inbounds i16, i16* %1, i32 %13
34+
%15 = load i16, i16* %14, align 2
35+
%16 = add i16 %15, %2
36+
store i16 %16, i16* %14, align 2
37+
%17 = add i32 %10, 1
38+
br label %9
39+
40+
18:
41+
%19 = add i32 %5, 1
42+
br label %4
43+
44+
20:
45+
ret void
46+
}

0 commit comments

Comments
 (0)
Please sign in to comment.