Skip to content

Commit 726c28f

Browse files
committedJun 2, 2017
[CodeGen] Track trip counts per-scop for performance measurement.
- Add a counter that is incremented once on exit from a scop. - Test cases got split into two: one to test the cycles, and another one to test trip counts. - Sample output: ```name=sample-output.txt scop function, entry block name, exit block name, total time, trip count warmup, %entry.split, %polly.merge_new_and_old, 5180, 1 f, %entry.split, %polly.merge_new_and_old, 409944, 500 g, %entry.split, %polly.merge_new_and_old, 1226, 1 ``` Differential Revision: https://reviews.llvm.org/D33822 llvm-svn: 304543
1 parent 01bf58d commit 726c28f

File tree

5 files changed

+173
-104
lines changed

5 files changed

+173
-104
lines changed
 

‎polly/include/polly/CodeGen/PerfMonitor.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,9 @@ class PerfMonitor {
6262
/// The total number of cycles spent in the current scop S.
6363
llvm::Value *CyclesInCurrentScopPtr;
6464

65+
/// The total number of times the current scop S is executed.
66+
llvm::Value *TripCountForCurrentScopPtr;
67+
6568
/// The total number of cycles spent within scops.
6669
llvm::Value *CyclesInScopsPtr;
6770

‎polly/lib/CodeGen/PerfMonitor.cpp

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -87,15 +87,18 @@ static std::string GetScopUniqueVarname(const Scop &S) {
8787
std::string EntryString, ExitString;
8888
std::tie(EntryString, ExitString) = S.getEntryExitStr();
8989

90-
Name << "__polly_perf_cycles_in_" << std::string(S.getFunction().getName())
90+
Name << "__polly_perf_in_" << std::string(S.getFunction().getName())
9191
<< "_from__" << EntryString << "__to__" << ExitString;
9292
return Name.str();
9393
}
9494

9595
void PerfMonitor::addScopCounter() {
9696
const std::string varname = GetScopUniqueVarname(S);
97-
TryRegisterGlobal(M, varname.c_str(), Builder.getInt64(0),
97+
TryRegisterGlobal(M, (varname + "_cycles").c_str(), Builder.getInt64(0),
9898
&CyclesInCurrentScopPtr);
99+
100+
TryRegisterGlobal(M, (varname + "_trip_count").c_str(), Builder.getInt64(0),
101+
&TripCountForCurrentScopPtr);
99102
}
100103

101104
void PerfMonitor::addGlobalVariables() {
@@ -160,7 +163,7 @@ Function *PerfMonitor::insertFinalReporting() {
160163

161164
RuntimeDebugBuilder::createCPUPrinter(
162165
Builder, "scop function, "
163-
"entry block name, exit block name, total time\n");
166+
"entry block name, exit block name, total time, trip count\n");
164167
ReturnFromFinal = Builder.CreateRetVoid();
165168
return ExitFn;
166169
}
@@ -179,13 +182,17 @@ void PerfMonitor::AppendScopReporting() {
179182

180183
Value *CyclesInCurrentScop =
181184
Builder.CreateLoad(this->CyclesInCurrentScopPtr, true);
185+
186+
Value *TripCountForCurrentScop =
187+
Builder.CreateLoad(this->TripCountForCurrentScopPtr, true);
188+
182189
std::string EntryName, ExitName;
183190
std::tie(EntryName, ExitName) = S.getEntryExitStr();
184191

185192
// print in CSV for easy parsing with other tools.
186-
RuntimeDebugBuilder::createCPUPrinter(Builder, S.getFunction().getName(),
187-
", ", EntryName, ", ", ExitName, ", ",
188-
CyclesInCurrentScop, "\n");
193+
RuntimeDebugBuilder::createCPUPrinter(
194+
Builder, S.getFunction().getName(), ", ", EntryName, ", ", ExitName, ", ",
195+
CyclesInCurrentScop, ", ", TripCountForCurrentScop, "\n");
189196

190197
ReturnFromFinal = Builder.CreateRetVoid();
191198
}
@@ -288,4 +295,11 @@ void PerfMonitor::insertRegionEnd(Instruction *InsertBefore) {
288295
Value *CyclesInCurrentScop = Builder.CreateLoad(CyclesInCurrentScopPtr, true);
289296
CyclesInCurrentScop = Builder.CreateAdd(CyclesInCurrentScop, CyclesInScop);
290297
Builder.CreateStore(CyclesInCurrentScop, CyclesInCurrentScopPtr, true);
298+
299+
Value *TripCountForCurrentScop =
300+
Builder.CreateLoad(TripCountForCurrentScopPtr, true);
301+
TripCountForCurrentScop =
302+
Builder.CreateAdd(TripCountForCurrentScop, Builder.getInt64(1));
303+
Builder.CreateStore(TripCountForCurrentScop, TripCountForCurrentScopPtr,
304+
true);
291305
}
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
; RUN: opt %loadPolly -polly-codegen -polly-codegen-perf-monitoring \
2+
; RUN: -S < %s | FileCheck %s
3+
4+
; void f(long A[], long N) {
5+
; long i;
6+
; if (true)
7+
; for (i = 0; i < N; ++i)
8+
; A[i] = i;
9+
; }
10+
; void g(long A[], long N) {
11+
; long i;
12+
; if (true)
13+
; for (i = 0; i < N; ++i)
14+
; A[i] = i;
15+
; }
16+
17+
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
18+
target triple = "x86_64-unknown-linux-gnu"
19+
20+
define void @f(i64* %A, i64 %N) nounwind {
21+
entry:
22+
fence seq_cst
23+
br label %next
24+
25+
next:
26+
br i1 true, label %for.i, label %return
27+
28+
for.i:
29+
%indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ]
30+
%scevgep = getelementptr i64, i64* %A, i64 %indvar
31+
store i64 %indvar, i64* %scevgep
32+
%indvar.next = add nsw i64 %indvar, 1
33+
%exitcond = icmp eq i64 %indvar.next, %N
34+
br i1 %exitcond, label %return, label %for.i
35+
36+
return:
37+
fence seq_cst
38+
ret void
39+
}
40+
41+
42+
define void @g(i64* %A, i64 %N) nounwind {
43+
entry:
44+
fence seq_cst
45+
br label %next
46+
47+
next:
48+
br i1 true, label %for.i, label %return
49+
50+
for.i:
51+
%indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ]
52+
%scevgep = getelementptr i64, i64* %A, i64 %indvar
53+
store i64 %indvar, i64* %scevgep
54+
%indvar.next = add nsw i64 %indvar, 1
55+
%exitcond = icmp eq i64 %indvar.next, %N
56+
br i1 %exitcond, label %return, label %for.i
57+
58+
return:
59+
fence seq_cst
60+
ret void
61+
}
62+
63+
; Declaration of globals - Check for cycles declaration.
64+
; @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_cycles" = weak thread_local(initialexec) constant i64 0
65+
; @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_cycles" = weak thread_local(initialexec) constant i64 0
66+
67+
; Bumping up number of cycles in f
68+
; CHECK: %10 = load volatile i64, i64* @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_cycles"
69+
; CHECK-NEXT: %11 = add i64 %10, %7
70+
; CHECK-NEXT: store volatile i64 %11, i64* @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_cycles"
71+
72+
; Bumping up number of cycles in g
73+
; CHECK: %10 = load volatile i64, i64* @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_cycles"
74+
; CHECK-NEXT: %11 = add i64 %10, %7
75+
; CHECK-NEXT: store volatile i64 %11, i64* @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_cycles"

‎polly/test/Isl/CodeGen/perf_monitoring_per_scop.ll

Lines changed: 0 additions & 98 deletions
This file was deleted.
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
; RUN: opt %loadPolly -polly-codegen -polly-codegen-perf-monitoring \
2+
; RUN: -S < %s | FileCheck %s
3+
4+
; void f(long A[], long N) {
5+
; long i;
6+
; if (true)
7+
; for (i = 0; i < N; ++i)
8+
; A[i] = i;
9+
; }
10+
; void g(long A[], long N) {
11+
; long i;
12+
; if (true)
13+
; for (i = 0; i < N; ++i)
14+
; A[i] = i;
15+
; }
16+
17+
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
18+
target triple = "x86_64-unknown-linux-gnu"
19+
20+
define void @f(i64* %A, i64 %N) nounwind {
21+
entry:
22+
fence seq_cst
23+
br label %next
24+
25+
next:
26+
br i1 true, label %for.i, label %return
27+
28+
for.i:
29+
%indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ]
30+
%scevgep = getelementptr i64, i64* %A, i64 %indvar
31+
store i64 %indvar, i64* %scevgep
32+
%indvar.next = add nsw i64 %indvar, 1
33+
%exitcond = icmp eq i64 %indvar.next, %N
34+
br i1 %exitcond, label %return, label %for.i
35+
36+
return:
37+
fence seq_cst
38+
ret void
39+
}
40+
41+
42+
define void @g(i64* %A, i64 %N) nounwind {
43+
entry:
44+
fence seq_cst
45+
br label %next
46+
47+
next:
48+
br i1 true, label %for.i, label %return
49+
50+
for.i:
51+
%indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ]
52+
%scevgep = getelementptr i64, i64* %A, i64 %indvar
53+
store i64 %indvar, i64* %scevgep
54+
%indvar.next = add nsw i64 %indvar, 1
55+
%exitcond = icmp eq i64 %indvar.next, %N
56+
br i1 %exitcond, label %return, label %for.i
57+
58+
return:
59+
fence seq_cst
60+
ret void
61+
}
62+
63+
; Declaration of globals - Check for cycles declaration.
64+
; CHECK: @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_trip_count" = weak thread_local(initialexec) constant i64 0
65+
; CHECK: @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_trip_count" = weak thread_local(initialexec) constant i64 0
66+
67+
; Bumping up number of cycles in f
68+
; CHECK: %12 = load volatile i64, i64* @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_trip_count"
69+
; CHECK-NEXT: %13 = add i64 %12, 1
70+
; CHECK-NEXT: store volatile i64 %13, i64* @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_trip_count"
71+
72+
; Bumping up number of cycles in g
73+
; CHECK: %12 = load volatile i64, i64* @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_trip_count"
74+
; CHECK-NEXT: %13 = add i64 %12, 1
75+
; CHECK-NEXT: store volatile i64 %13, i64* @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_trip_count"

0 commit comments

Comments
 (0)
Please sign in to comment.