Index: polly/trunk/include/polly/CodeGen/PerfMonitor.h =================================================================== --- polly/trunk/include/polly/CodeGen/PerfMonitor.h +++ polly/trunk/include/polly/CodeGen/PerfMonitor.h @@ -62,6 +62,9 @@ /// The total number of cycles spent in the current scop S. llvm::Value *CyclesInCurrentScopPtr; + /// The total number of times the current scop S is executed. + llvm::Value *TripCountForCurrentScopPtr; + /// The total number of cycles spent within scops. llvm::Value *CyclesInScopsPtr; Index: polly/trunk/lib/CodeGen/PerfMonitor.cpp =================================================================== --- polly/trunk/lib/CodeGen/PerfMonitor.cpp +++ polly/trunk/lib/CodeGen/PerfMonitor.cpp @@ -87,15 +87,18 @@ std::string EntryString, ExitString; std::tie(EntryString, ExitString) = S.getEntryExitStr(); - Name << "__polly_perf_cycles_in_" << std::string(S.getFunction().getName()) + Name << "__polly_perf_in_" << std::string(S.getFunction().getName()) << "_from__" << EntryString << "__to__" << ExitString; return Name.str(); } void PerfMonitor::addScopCounter() { const std::string varname = GetScopUniqueVarname(S); - TryRegisterGlobal(M, varname.c_str(), Builder.getInt64(0), + TryRegisterGlobal(M, (varname + "_cycles").c_str(), Builder.getInt64(0), &CyclesInCurrentScopPtr); + + TryRegisterGlobal(M, (varname + "_trip_count").c_str(), Builder.getInt64(0), + &TripCountForCurrentScopPtr); } void PerfMonitor::addGlobalVariables() { @@ -160,7 +163,7 @@ RuntimeDebugBuilder::createCPUPrinter( Builder, "scop function, " - "entry block name, exit block name, total time\n"); + "entry block name, exit block name, total time, trip count\n"); ReturnFromFinal = Builder.CreateRetVoid(); return ExitFn; } @@ -179,13 +182,17 @@ Value *CyclesInCurrentScop = Builder.CreateLoad(this->CyclesInCurrentScopPtr, true); + + Value *TripCountForCurrentScop = + Builder.CreateLoad(this->TripCountForCurrentScopPtr, true); + std::string EntryName, ExitName; std::tie(EntryName, ExitName) = S.getEntryExitStr(); // print in CSV for easy parsing with other tools. - RuntimeDebugBuilder::createCPUPrinter(Builder, S.getFunction().getName(), - ", ", EntryName, ", ", ExitName, ", ", - CyclesInCurrentScop, "\n"); + RuntimeDebugBuilder::createCPUPrinter( + Builder, S.getFunction().getName(), ", ", EntryName, ", ", ExitName, ", ", + CyclesInCurrentScop, ", ", TripCountForCurrentScop, "\n"); ReturnFromFinal = Builder.CreateRetVoid(); } @@ -288,4 +295,11 @@ Value *CyclesInCurrentScop = Builder.CreateLoad(CyclesInCurrentScopPtr, true); CyclesInCurrentScop = Builder.CreateAdd(CyclesInCurrentScop, CyclesInScop); Builder.CreateStore(CyclesInCurrentScop, CyclesInCurrentScopPtr, true); + + Value *TripCountForCurrentScop = + Builder.CreateLoad(TripCountForCurrentScopPtr, true); + TripCountForCurrentScop = + Builder.CreateAdd(TripCountForCurrentScop, Builder.getInt64(1)); + Builder.CreateStore(TripCountForCurrentScop, TripCountForCurrentScopPtr, + true); } Index: polly/trunk/test/Isl/CodeGen/perf_monitoring_cycles_per_scop.ll =================================================================== --- polly/trunk/test/Isl/CodeGen/perf_monitoring_cycles_per_scop.ll +++ polly/trunk/test/Isl/CodeGen/perf_monitoring_cycles_per_scop.ll @@ -0,0 +1,75 @@ +; RUN: opt %loadPolly -polly-codegen -polly-codegen-perf-monitoring \ +; RUN: -S < %s | FileCheck %s + +; void f(long A[], long N) { +; long i; +; if (true) +; for (i = 0; i < N; ++i) +; A[i] = i; +; } +; void g(long A[], long N) { +; long i; +; if (true) +; for (i = 0; i < N; ++i) +; A[i] = i; +; } + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +define void @f(i64* %A, i64 %N) nounwind { +entry: + fence seq_cst + br label %next + +next: + br i1 true, label %for.i, label %return + +for.i: + %indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ] + %scevgep = getelementptr i64, i64* %A, i64 %indvar + store i64 %indvar, i64* %scevgep + %indvar.next = add nsw i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, %N + br i1 %exitcond, label %return, label %for.i + +return: + fence seq_cst + ret void +} + + +define void @g(i64* %A, i64 %N) nounwind { +entry: + fence seq_cst + br label %next + +next: + br i1 true, label %for.i, label %return + +for.i: + %indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ] + %scevgep = getelementptr i64, i64* %A, i64 %indvar + store i64 %indvar, i64* %scevgep + %indvar.next = add nsw i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, %N + br i1 %exitcond, label %return, label %for.i + +return: + fence seq_cst + ret void +} + +; Declaration of globals - Check for cycles declaration. +; @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_cycles" = weak thread_local(initialexec) constant i64 0 +; @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_cycles" = weak thread_local(initialexec) constant i64 0 + +; Bumping up number of cycles in f +; CHECK: %10 = load volatile i64, i64* @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_cycles" +; CHECK-NEXT: %11 = add i64 %10, %7 +; CHECK-NEXT: store volatile i64 %11, i64* @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_cycles" + +; Bumping up number of cycles in g +; CHECK: %10 = load volatile i64, i64* @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_cycles" +; CHECK-NEXT: %11 = add i64 %10, %7 +; CHECK-NEXT: store volatile i64 %11, i64* @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_cycles" Index: polly/trunk/test/Isl/CodeGen/perf_monitoring_per_scop.ll =================================================================== --- polly/trunk/test/Isl/CodeGen/perf_monitoring_per_scop.ll +++ polly/trunk/test/Isl/CodeGen/perf_monitoring_per_scop.ll @@ -1,98 +0,0 @@ -; RUN: opt %loadPolly -polly-codegen -polly-codegen-perf-monitoring \ -; RUN: -S < %s | FileCheck %s - -; void f(long A[], long N) { -; long i; -; if (true) -; for (i = 0; i < N; ++i) -; A[i] = i; -; } -; void g(long A[], long N) { -; long i; -; if (true) -; for (i = 0; i < N; ++i) -; A[i] = i; -; } - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" -target triple = "x86_64-unknown-linux-gnu" - -define void @f(i64* %A, i64 %N) nounwind { -entry: - fence seq_cst - br label %next - -next: - br i1 true, label %for.i, label %return - -for.i: - %indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ] - %scevgep = getelementptr i64, i64* %A, i64 %indvar - store i64 %indvar, i64* %scevgep - %indvar.next = add nsw i64 %indvar, 1 - %exitcond = icmp eq i64 %indvar.next, %N - br i1 %exitcond, label %return, label %for.i - -return: - fence seq_cst - ret void -} - - -define void @g(i64* %A, i64 %N) nounwind { -entry: - fence seq_cst - br label %next - -next: - br i1 true, label %for.i, label %return - -for.i: - %indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ] - %scevgep = getelementptr i64, i64* %A, i64 %indvar - store i64 %indvar, i64* %scevgep - %indvar.next = add nsw i64 %indvar, 1 - %exitcond = icmp eq i64 %indvar.next, %N - br i1 %exitcond, label %return, label %for.i - -return: - fence seq_cst - ret void -} - -; Declaration of globals -; CHECK: @"__polly_perf_cycles_in_f_from__%next__to__%polly.merge_new_and_old" = weak thread_local(initialexec) constant i64 0 -; CHECK: @"__polly_perf_cycles_in_g_from__%next__to__%polly.merge_new_and_old" = weak thread_local(initialexec) constant i64 0 - -; Bumping up counter in f -; CHECK: polly.merge_new_and_old: ; preds = %polly.exiting, %return.region_exiting -; CHECK-NEXT: %5 = load volatile i64, i64* @__polly_perf_cycles_in_scop_start -; CHECK-NEXT: %6 = call i64 @llvm.x86.rdtscp(i8* bitcast (i32* @__polly_perf_write_loation to i8*)) -; CHECK-NEXT: %7 = sub i64 %6, %5 -; CHECK-NEXT: %8 = load volatile i64, i64* @__polly_perf_cycles_in_scops -; CHECK-NEXT: %9 = add i64 %8, %7 -; CHECK-NEXT: store volatile i64 %9, i64* @__polly_perf_cycles_in_scops -; CHECK-NEXT: %10 = load volatile i64, i64* @"__polly_perf_cycles_in_f_from__%next__to__%polly.merge_new_and_old" -; CHECK-NEXT: %11 = add i64 %10, %7 -; CHECK-NEXT: store volatile i64 %11, i64* @"__polly_perf_cycles_in_f_from__%next__to__%polly.merge_new_and_old" -; CHECK-NEXT: br label %return - -; Bumping up counter in g -; CHECK: polly.merge_new_and_old: ; preds = %polly.exiting, %return.region_exiting -; CHECK-NEXT: %5 = load volatile i64, i64* @__polly_perf_cycles_in_scop_start -; CHECK-NEXT: %6 = call i64 @llvm.x86.rdtscp(i8* bitcast (i32* @__polly_perf_write_loation to i8*)) -; CHECK-NEXT: %7 = sub i64 %6, %5 -; CHECK-NEXT: %8 = load volatile i64, i64* @__polly_perf_cycles_in_scops -; CHECK-NEXT: %9 = add i64 %8, %7 -; CHECK-NEXT: store volatile i64 %9, i64* @__polly_perf_cycles_in_scops -; CHECK-NEXT: %10 = load volatile i64, i64* @"__polly_perf_cycles_in_g_from__%next__to__%polly.merge_new_and_old" -; CHECK-NEXT: %11 = add i64 %10, %7 -; CHECK-NEXT: store volatile i64 %11, i64* @"__polly_perf_cycles_in_g_from__%next__to__%polly.merge_new_and_old" -; CHECK-NEXT: br label %return - -; Final reporting prints -; CHECK: %20 = load volatile i64, i64* @"__polly_perf_cycles_in_f_from__%next__to__%polly.merge_new_and_old" -; CHECK-NEXT: %21 = call i32 (...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @25, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @18, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([3 x i8], [3 x i8] addrspace(4)* @19, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(4)* @20, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([3 x i8], [3 x i8] addrspace(4)* @21, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([25 x i8], [25 x i8] addrspace(4)* @22, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([3 x i8], [3 x i8] addrspace(4)* @23, i32 0, i32 0), i64 %20, i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @24, i32 0, i32 0)) -; CHECK-NEXT: %22 = call i32 @fflush(i8* null) -; CHECK-NEXT: %23 = load volatile i64, i64* @"__polly_perf_cycles_in_g_from__%next__to__%polly.merge_new_and_old" -; CHECK-NEXT: %24 = call i32 (...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @33, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @26, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([3 x i8], [3 x i8] addrspace(4)* @27, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(4)* @28, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([3 x i8], [3 x i8] addrspace(4)* @29, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([25 x i8], [25 x i8] addrspace(4)* @30, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([3 x i8], [3 x i8] addrspace(4)* @31, i32 0, i32 0), i64 %23, i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @32, i32 0, i32 0)) Index: polly/trunk/test/Isl/CodeGen/perf_monitoring_trip_counts_per_scop.ll =================================================================== --- polly/trunk/test/Isl/CodeGen/perf_monitoring_trip_counts_per_scop.ll +++ polly/trunk/test/Isl/CodeGen/perf_monitoring_trip_counts_per_scop.ll @@ -0,0 +1,75 @@ +; RUN: opt %loadPolly -polly-codegen -polly-codegen-perf-monitoring \ +; RUN: -S < %s | FileCheck %s + +; void f(long A[], long N) { +; long i; +; if (true) +; for (i = 0; i < N; ++i) +; A[i] = i; +; } +; void g(long A[], long N) { +; long i; +; if (true) +; for (i = 0; i < N; ++i) +; A[i] = i; +; } + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-unknown-linux-gnu" + +define void @f(i64* %A, i64 %N) nounwind { +entry: + fence seq_cst + br label %next + +next: + br i1 true, label %for.i, label %return + +for.i: + %indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ] + %scevgep = getelementptr i64, i64* %A, i64 %indvar + store i64 %indvar, i64* %scevgep + %indvar.next = add nsw i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, %N + br i1 %exitcond, label %return, label %for.i + +return: + fence seq_cst + ret void +} + + +define void @g(i64* %A, i64 %N) nounwind { +entry: + fence seq_cst + br label %next + +next: + br i1 true, label %for.i, label %return + +for.i: + %indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ] + %scevgep = getelementptr i64, i64* %A, i64 %indvar + store i64 %indvar, i64* %scevgep + %indvar.next = add nsw i64 %indvar, 1 + %exitcond = icmp eq i64 %indvar.next, %N + br i1 %exitcond, label %return, label %for.i + +return: + fence seq_cst + ret void +} + +; Declaration of globals - Check for cycles declaration. +; CHECK: @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_trip_count" = weak thread_local(initialexec) constant i64 0 +; CHECK: @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_trip_count" = weak thread_local(initialexec) constant i64 0 + +; Bumping up number of cycles in f +; CHECK: %12 = load volatile i64, i64* @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_trip_count" +; CHECK-NEXT: %13 = add i64 %12, 1 +; CHECK-NEXT: store volatile i64 %13, i64* @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_trip_count" + +; Bumping up number of cycles in g +; CHECK: %12 = load volatile i64, i64* @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_trip_count" +; CHECK-NEXT: %13 = add i64 %12, 1 +; CHECK-NEXT: store volatile i64 %13, i64* @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_trip_count"