Skip to content

Commit 07bee29

Browse files
committedJun 2, 2017
[CodeGen] Extend Performance Counter to track per-scop information.
Previously, we would generate one performance counter for all scops. Now, we generate both the old information, as well as a per-scop performance counter to generate finer grained information. This patch needed a way to generate a unique name for a `Scop`. The start region, end region, and function name combined provides a unique `Scop` name. So, `Scop` has a new public API to provide its start and end region names. Differential Revision: https://reviews.llvm.org/D33723 llvm-svn: 304528
1 parent af19915 commit 07bee29

File tree

7 files changed

+211
-33
lines changed

7 files changed

+211
-33
lines changed
 

‎polly/include/polly/CodeGen/PerfMonitor.h

+21-1
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,10 @@ class PerfMonitor {
2525
public:
2626
/// Create a new performance monitor.
2727
///
28+
/// @param S The scop for which to generate fine-grained performance
29+
/// monitoring information.
2830
/// @param M The module for which to generate the performance monitor.
29-
PerfMonitor(llvm::Module *M);
31+
PerfMonitor(const Scop &S, llvm::Module *M);
3032

3133
/// Initialize the performance monitor.
3234
///
@@ -48,12 +50,18 @@ class PerfMonitor {
4850
llvm::Module *M;
4951
PollyIRBuilder Builder;
5052

53+
// The scop to profile against.
54+
const Scop &S;
55+
5156
/// Indicates if performance profiling is supported on this architecture.
5257
bool Supported;
5358

5459
/// The cycle counter at the beginning of the program execution.
5560
llvm::Value *CyclesTotalStartPtr;
5661

62+
/// The total number of cycles spent in the current scop S.
63+
llvm::Value *CyclesInCurrentScopPtr;
64+
5765
/// The total number of cycles spent within scops.
5866
llvm::Value *CyclesInScopsPtr;
5967

@@ -89,6 +97,12 @@ class PerfMonitor {
8997
/// into the module (or obtain references to them if they already exist).
9098
void addGlobalVariables();
9199

100+
/// Add per-scop tracking to module.
101+
///
102+
/// Insert the global variable which is used to track the number of cycles
103+
/// this scop runs.
104+
void addScopCounter();
105+
92106
/// Get a reference to the intrinsic "i64 @llvm.x86.rdtscp(i8*)".
93107
///
94108
/// The rdtscp function returns the current value of the processor's
@@ -126,6 +140,12 @@ class PerfMonitor {
126140
/// This function finalizes the performance measurements and prints the
127141
/// results to stdout. It is expected to be registered with 'atexit()'.
128142
llvm::Function *insertFinalReporting();
143+
144+
/// Append Scop reporting data to "__polly_perf_final_reporting".
145+
///
146+
/// This function appends the current scop (S)'s information to the final
147+
/// printing function.
148+
void AppendScopReporting();
129149
};
130150
} // namespace polly
131151

‎polly/include/polly/ScopInfo.h

+8
Original file line numberDiff line numberDiff line change
@@ -2329,6 +2329,14 @@ class Scop {
23292329
/// Check if the SCoP has been optimized by the scheduler.
23302330
bool isOptimized() const { return IsOptimized; }
23312331

2332+
/// Get the name of the entry and exit blocks of this Scop.
2333+
///
2334+
/// These along with the function name can uniquely identify a Scop.
2335+
///
2336+
/// @return std::pair whose first element is the entry name & second element
2337+
/// is the exit name.
2338+
std::pair<std::string, std::string> getEntryExitStr() const;
2339+
23322340
/// Get the name of this Scop.
23332341
std::string getNameStr() const;
23342342

‎polly/lib/Analysis/ScopInfo.cpp

+7-1
Original file line numberDiff line numberDiff line change
@@ -4125,6 +4125,12 @@ std::string Scop::getInvalidContextStr() const {
41254125
}
41264126

41274127
std::string Scop::getNameStr() const {
4128+
std::string ExitName, EntryName;
4129+
std::tie(EntryName, ExitName) = getEntryExitStr();
4130+
return EntryName + "---" + ExitName;
4131+
}
4132+
4133+
std::pair<std::string, std::string> Scop::getEntryExitStr() const {
41284134
std::string ExitName, EntryName;
41294135
raw_string_ostream ExitStr(ExitName);
41304136
raw_string_ostream EntryStr(EntryName);
@@ -4138,7 +4144,7 @@ std::string Scop::getNameStr() const {
41384144
} else
41394145
ExitName = "FunctionExit";
41404146

4141-
return EntryName + "---" + ExitName;
4147+
return std::make_pair(EntryName, ExitName);
41424148
}
41434149

41444150
__isl_give isl_set *Scop::getContext() const { return isl_set_copy(Context); }

‎polly/lib/CodeGen/CodeGeneration.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ static bool CodeGen(Scop &S, IslAstInfo &AI, LoopInfo &LI, DominatorTree &DT,
184184
IslNodeBuilder NodeBuilder(Builder, Annotator, DL, LI, SE, DT, S, StartBlock);
185185

186186
if (PerfMonitoring) {
187-
PerfMonitor P(EnteringBB->getParent()->getParent());
187+
PerfMonitor P(S, EnteringBB->getParent()->getParent());
188188
P.initialize();
189189
P.insertRegionStart(SplitBlock->getTerminator());
190190

‎polly/lib/CodeGen/PerfMonitor.cpp

+74-27
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,10 @@
1111

1212
#include "polly/CodeGen/PerfMonitor.h"
1313
#include "polly/CodeGen/RuntimeDebugBuilder.h"
14+
#include "polly/ScopInfo.h"
1415
#include "llvm/ADT/Triple.h"
1516
#include "llvm/IR/Intrinsics.h"
17+
#include <sstream>
1618

1719
using namespace llvm;
1820
using namespace polly;
@@ -60,51 +62,73 @@ Function *PerfMonitor::getRDTSCP() {
6062
return Intrinsic::getDeclaration(M, Intrinsic::x86_rdtscp);
6163
}
6264

63-
PerfMonitor::PerfMonitor(Module *M) : M(M), Builder(M->getContext()) {
65+
PerfMonitor::PerfMonitor(const Scop &S, Module *M)
66+
: M(M), Builder(M->getContext()), S(S) {
6467
if (Triple(M->getTargetTriple()).getArch() == llvm::Triple::x86_64)
6568
Supported = true;
6669
else
6770
Supported = false;
6871
}
6972

70-
void PerfMonitor::addGlobalVariables() {
71-
auto TryRegisterGlobal = [=](const char *Name, Constant *InitialValue,
72-
Value **Location) {
73-
*Location = M->getGlobalVariable(Name);
73+
static void TryRegisterGlobal(Module *M, const char *Name,
74+
Constant *InitialValue, Value **Location) {
75+
*Location = M->getGlobalVariable(Name);
76+
77+
if (!*Location)
78+
*Location = new GlobalVariable(
79+
*M, InitialValue->getType(), true, GlobalValue::WeakAnyLinkage,
80+
InitialValue, Name, nullptr, GlobalVariable::InitialExecTLSModel);
81+
};
82+
83+
// Generate a unique name that is usable as a LLVM name for a scop to name its
84+
// performance counter.
85+
static std::string GetScopUniqueVarname(const Scop &S) {
86+
std::stringstream Name;
87+
std::string EntryString, ExitString;
88+
std::tie(EntryString, ExitString) = S.getEntryExitStr();
89+
90+
Name << "__polly_perf_cycles_in_" << std::string(S.getFunction().getName())
91+
<< "_from__" << EntryString << "__to__" << ExitString;
92+
return Name.str();
93+
}
7494

75-
if (!*Location)
76-
*Location = new GlobalVariable(
77-
*M, InitialValue->getType(), true, GlobalValue::WeakAnyLinkage,
78-
InitialValue, Name, nullptr, GlobalVariable::InitialExecTLSModel);
79-
};
95+
void PerfMonitor::addScopCounter() {
96+
const std::string varname = GetScopUniqueVarname(S);
97+
TryRegisterGlobal(M, varname.c_str(), Builder.getInt64(0),
98+
&CyclesInCurrentScopPtr);
99+
}
80100

81-
TryRegisterGlobal("__polly_perf_cycles_total_start", Builder.getInt64(0),
101+
void PerfMonitor::addGlobalVariables() {
102+
TryRegisterGlobal(M, "__polly_perf_cycles_total_start", Builder.getInt64(0),
82103
&CyclesTotalStartPtr);
83104

84-
TryRegisterGlobal("__polly_perf_initialized", Builder.getInt1(0),
105+
TryRegisterGlobal(M, "__polly_perf_initialized", Builder.getInt1(0),
85106
&AlreadyInitializedPtr);
86107

87-
TryRegisterGlobal("__polly_perf_cycles_in_scops", Builder.getInt64(0),
108+
TryRegisterGlobal(M, "__polly_perf_cycles_in_scops", Builder.getInt64(0),
88109
&CyclesInScopsPtr);
89110

90-
TryRegisterGlobal("__polly_perf_cycles_in_scop_start", Builder.getInt64(0),
111+
TryRegisterGlobal(M, "__polly_perf_cycles_in_scop_start", Builder.getInt64(0),
91112
&CyclesInScopStartPtr);
92113

93-
TryRegisterGlobal("__polly_perf_write_loation", Builder.getInt32(0),
114+
TryRegisterGlobal(M, "__polly_perf_write_loation", Builder.getInt32(0),
94115
&RDTSCPWriteLocation);
95116
}
96117

97118
static const char *InitFunctionName = "__polly_perf_init";
98119
static const char *FinalReportingFunctionName = "__polly_perf_final";
99120

121+
static BasicBlock *FinalStartBB = nullptr;
122+
static ReturnInst *ReturnFromFinal = nullptr;
123+
100124
Function *PerfMonitor::insertFinalReporting() {
101125
// Create new function.
102126
GlobalValue::LinkageTypes Linkage = Function::WeakODRLinkage;
103127
FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), {}, false);
104128
Function *ExitFn =
105129
Function::Create(Ty, Linkage, FinalReportingFunctionName, M);
106-
BasicBlock *Start = BasicBlock::Create(M->getContext(), "start", ExitFn);
107-
Builder.SetInsertPoint(Start);
130+
FinalStartBB = BasicBlock::Create(M->getContext(), "start", ExitFn);
131+
Builder.SetInsertPoint(FinalStartBB);
108132

109133
if (!Supported) {
110134
RuntimeDebugBuilder::createCPUPrinter(
@@ -128,23 +152,42 @@ Function *PerfMonitor::insertFinalReporting() {
128152
RuntimeDebugBuilder::createCPUPrinter(Builder, "Total: ", CyclesTotal, "\n");
129153
RuntimeDebugBuilder::createCPUPrinter(Builder, "Scops: ", CyclesInScops,
130154
"\n");
131-
132-
// Finalize function.
133-
Builder.CreateRetVoid();
155+
ReturnFromFinal = Builder.CreateRetVoid();
134156
return ExitFn;
135157
}
136158

159+
void PerfMonitor::AppendScopReporting() {
160+
Builder.SetInsertPoint(FinalStartBB);
161+
ReturnFromFinal->eraseFromParent();
162+
163+
Value *CyclesInCurrentScop =
164+
Builder.CreateLoad(this->CyclesInCurrentScopPtr, true);
165+
std::string EntryName, ExitName;
166+
std::tie(EntryName, ExitName) = S.getEntryExitStr();
167+
168+
RuntimeDebugBuilder::createCPUPrinter(
169+
Builder, "Scop(", S.getFunction().getName(), " |from: ", EntryName,
170+
" |to: ", ExitName, "): ", CyclesInCurrentScop, "\n");
171+
172+
ReturnFromFinal = Builder.CreateRetVoid();
173+
}
174+
175+
static Function *FinalReporting = nullptr;
176+
137177
void PerfMonitor::initialize() {
138178
addGlobalVariables();
179+
addScopCounter();
139180

140-
Function *F = M->getFunction(InitFunctionName);
141-
if (F)
142-
return;
181+
// Ensure that we only add the final reporting function once.
182+
// On later invocations, append to the reporting function.
183+
if (!FinalReporting) {
184+
FinalReporting = insertFinalReporting();
185+
186+
Function *InitFn = insertInitFunction(FinalReporting);
187+
addToGlobalConstructors(InitFn);
188+
}
143189

144-
// initialize
145-
Function *FinalReporting = insertFinalReporting();
146-
Function *InitFn = insertInitFunction(FinalReporting);
147-
addToGlobalConstructors(InitFn);
190+
AppendScopReporting();
148191
}
149192

150193
Function *PerfMonitor::insertInitFunction(Function *FinalReporting) {
@@ -223,4 +266,8 @@ void PerfMonitor::insertRegionEnd(Instruction *InsertBefore) {
223266
Value *CyclesInScops = Builder.CreateLoad(CyclesInScopsPtr, true);
224267
CyclesInScops = Builder.CreateAdd(CyclesInScops, CyclesInScop);
225268
Builder.CreateStore(CyclesInScops, CyclesInScopsPtr, true);
269+
270+
Value *CyclesInCurrentScop = Builder.CreateLoad(CyclesInCurrentScopPtr, true);
271+
CyclesInCurrentScop = Builder.CreateAdd(CyclesInCurrentScop, CyclesInScop);
272+
Builder.CreateStore(CyclesInCurrentScop, CyclesInCurrentScopPtr, true);
226273
}

‎polly/test/Isl/CodeGen/perf_monitoring.ll

-3
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@ return:
4949
; CHECK-NEXT: %8 = load volatile i64, i64* @__polly_perf_cycles_in_scops
5050
; CHECK-NEXT: %9 = add i64 %8, %7
5151
; CHECK-NEXT: store volatile i64 %9, i64* @__polly_perf_cycles_in_scops
52-
; CHECK-NEXT: br label %return
5352

5453

5554
; CHECK: define weak_odr void @__polly_perf_final() {
@@ -66,8 +65,6 @@ return:
6665
; CHECK-NEXT: %9 = call i32 @fflush(i8* null)
6766
; CHECK-NEXT: %10 = call i32 (...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @9, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(4)* @7, i32 0, i32 0), i64 %3, i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @8, i32 0, i32 0))
6867
; CHECK-NEXT: %11 = call i32 @fflush(i8* null)
69-
; CHECK-NEXT: ret void
70-
; CHECK-NEXT: }
7168

7269

7370
; CHECK: define weak_odr void @__polly_perf_init() {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
; RUN: opt %loadPolly -polly-codegen -polly-codegen-perf-monitoring \
2+
; RUN: -S < %s | FileCheck %s
3+
4+
; void f(long A[], long N) {
5+
; long i;
6+
; if (true)
7+
; for (i = 0; i < N; ++i)
8+
; A[i] = i;
9+
; }
10+
; void g(long A[], long N) {
11+
; long i;
12+
; if (true)
13+
; for (i = 0; i < N; ++i)
14+
; A[i] = i;
15+
; }
16+
17+
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
18+
target triple = "x86_64-unknown-linux-gnu"
19+
20+
define void @f(i64* %A, i64 %N) nounwind {
21+
entry:
22+
fence seq_cst
23+
br label %next
24+
25+
next:
26+
br i1 true, label %for.i, label %return
27+
28+
for.i:
29+
%indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ]
30+
%scevgep = getelementptr i64, i64* %A, i64 %indvar
31+
store i64 %indvar, i64* %scevgep
32+
%indvar.next = add nsw i64 %indvar, 1
33+
%exitcond = icmp eq i64 %indvar.next, %N
34+
br i1 %exitcond, label %return, label %for.i
35+
36+
return:
37+
fence seq_cst
38+
ret void
39+
}
40+
41+
42+
define void @g(i64* %A, i64 %N) nounwind {
43+
entry:
44+
fence seq_cst
45+
br label %next
46+
47+
next:
48+
br i1 true, label %for.i, label %return
49+
50+
for.i:
51+
%indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ]
52+
%scevgep = getelementptr i64, i64* %A, i64 %indvar
53+
store i64 %indvar, i64* %scevgep
54+
%indvar.next = add nsw i64 %indvar, 1
55+
%exitcond = icmp eq i64 %indvar.next, %N
56+
br i1 %exitcond, label %return, label %for.i
57+
58+
return:
59+
fence seq_cst
60+
ret void
61+
}
62+
63+
; Declaration of globals
64+
; CHECK: @"__polly_perf_cycles_in_f_from__%next__to__%polly.merge_new_and_old" = weak thread_local(initialexec) constant i64 0
65+
; CHECK: @"__polly_perf_cycles_in_g_from__%next__to__%polly.merge_new_and_old" = weak thread_local(initialexec) constant i64 0
66+
67+
; Bumping up counter in f
68+
; CHECK: polly.merge_new_and_old: ; preds = %polly.exiting, %return.region_exiting
69+
; CHECK-NEXT: %5 = load volatile i64, i64* @__polly_perf_cycles_in_scop_start
70+
; CHECK-NEXT: %6 = call i64 @llvm.x86.rdtscp(i8* bitcast (i32* @__polly_perf_write_loation to i8*))
71+
; CHECK-NEXT: %7 = sub i64 %6, %5
72+
; CHECK-NEXT: %8 = load volatile i64, i64* @__polly_perf_cycles_in_scops
73+
; CHECK-NEXT: %9 = add i64 %8, %7
74+
; CHECK-NEXT: store volatile i64 %9, i64* @__polly_perf_cycles_in_scops
75+
; CHECK-NEXT: %10 = load volatile i64, i64* @"__polly_perf_cycles_in_f_from__%next__to__%polly.merge_new_and_old"
76+
; CHECK-NEXT: %11 = add i64 %10, %7
77+
; CHECK-NEXT: store volatile i64 %11, i64* @"__polly_perf_cycles_in_f_from__%next__to__%polly.merge_new_and_old"
78+
; CHECK-NEXT: br label %return
79+
80+
; Bumping up counter in g
81+
; CHECK: polly.merge_new_and_old: ; preds = %polly.exiting, %return.region_exiting
82+
; CHECK-NEXT: %5 = load volatile i64, i64* @__polly_perf_cycles_in_scop_start
83+
; CHECK-NEXT: %6 = call i64 @llvm.x86.rdtscp(i8* bitcast (i32* @__polly_perf_write_loation to i8*))
84+
; CHECK-NEXT: %7 = sub i64 %6, %5
85+
; CHECK-NEXT: %8 = load volatile i64, i64* @__polly_perf_cycles_in_scops
86+
; CHECK-NEXT: %9 = add i64 %8, %7
87+
; CHECK-NEXT: store volatile i64 %9, i64* @__polly_perf_cycles_in_scops
88+
; CHECK-NEXT: %10 = load volatile i64, i64* @"__polly_perf_cycles_in_g_from__%next__to__%polly.merge_new_and_old"
89+
; CHECK-NEXT: %11 = add i64 %10, %7
90+
; CHECK-NEXT: store volatile i64 %11, i64* @"__polly_perf_cycles_in_g_from__%next__to__%polly.merge_new_and_old"
91+
; CHECK-NEXT: br label %return
92+
93+
; Final reporting prints
94+
; CHECK: %12 = load volatile i64, i64* @"__polly_perf_cycles_in_f_from__%next__to__%polly.merge_new_and_old"
95+
; CHECK-NEXT: %13 = call i32 (...) @printf(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @18, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(4)* @10, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @11, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([9 x i8], [9 x i8] addrspace(4)* @12, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(4)* @13, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([7 x i8], [7 x i8] addrspace(4)* @14, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([25 x i8], [25 x i8] addrspace(4)* @15, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(4)* @16, i32 0, i32 0), i64 %12, i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @17, i32 0, i32 0))
96+
; CHECK-NEXT: %14 = call i32 @fflush(i8* null)
97+
; CHECK-NEXT: %15 = load volatile i64, i64* @"__polly_perf_cycles_in_g_from__%next__to__%polly.merge_new_and_old"
98+
; CHECK-NEXT: %16 = call i32 (...) @printf(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @27, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(4)* @19, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @20, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([9 x i8], [9 x i8] addrspace(4)* @21, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(4)* @22, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([7 x i8], [7 x i8] addrspace(4)* @23, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([25 x i8], [25 x i8] addrspace(4)* @24, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(4)* @25, i32 0, i32 0), i64 %15, i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @26, i32 0, i32 0))
99+
; CHECK-NEXT: %17 = call i32 @fflush(i8* null)
100+
; CHECK-NEXT: ret void

0 commit comments

Comments
 (0)
Please sign in to comment.