Index: polly/trunk/include/polly/CodeGen/CodeGeneration.h =================================================================== --- polly/trunk/include/polly/CodeGen/CodeGeneration.h +++ polly/trunk/include/polly/CodeGen/CodeGeneration.h @@ -36,6 +36,8 @@ PreservedAnalyses run(Scop &S, ScopAnalysisManager &SAM, ScopStandardAnalysisResults &AR, SPMUpdater &U); }; + +extern bool PerfMonitoring; } // namespace polly #endif // POLLY_CODEGENERATION_H Index: polly/trunk/lib/CodeGen/CodeGeneration.cpp =================================================================== --- polly/trunk/lib/CodeGen/CodeGeneration.cpp +++ polly/trunk/lib/CodeGen/CodeGeneration.cpp @@ -49,10 +49,12 @@ cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); -static cl::opt - PerfMonitoring("polly-codegen-perf-monitoring", - cl::desc("Add run-time performance monitoring"), cl::Hidden, - cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); +bool polly::PerfMonitoring; +static cl::opt + XPerfMonitoring("polly-codegen-perf-monitoring", + cl::desc("Add run-time performance monitoring"), cl::Hidden, + cl::location(polly::PerfMonitoring), cl::init(false), + cl::ZeroOrMore, cl::cat(PollyCategory)); namespace polly { /// Mark a basic block unreachable. Index: polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp =================================================================== --- polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp +++ polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp @@ -16,6 +16,7 @@ #include "polly/CodeGen/CodeGeneration.h" #include "polly/CodeGen/IslAst.h" #include "polly/CodeGen/IslNodeBuilder.h" +#include "polly/CodeGen/PerfMonitor.h" #include "polly/CodeGen/Utils.h" #include "polly/DependenceInfo.h" #include "polly/LinkAllPasses.h" @@ -122,6 +123,8 @@ cl::desc("Minimal number of compute statements to run on GPU."), cl::Hidden, cl::init(10 * 512 * 512)); +extern bool polly::PerfMonitoring; + /// Return a unique name for a Scop, which is the scop region with the /// function name. std::string getUniqueScopName(const Scop *S) { @@ -3433,6 +3436,22 @@ isl_ast_node_free(Root); } else { + if (polly::PerfMonitoring) { + PerfMonitor P(*S, EnteringBB->getParent()->getParent()); + P.initialize(); + P.insertRegionStart(SplitBlock->getTerminator()); + + // TODO: actually think if this is the correct exiting block to place + // the `end` performance marker. Invariant load hoisting changes + // the CFG in a way that I do not precisely understand, so I + // (Siddharth) should come back to this and + // think about which exiting block to use. + auto *ExitingBlock = StartBlock->getUniqueSuccessor(); + assert(ExitingBlock); + BasicBlock *MergeBlock = ExitingBlock->getUniqueSuccessor(); + P.insertRegionEnd(MergeBlock->getTerminator()); + } + NodeBuilder.addParameters(S->getContext().release()); Value *RTC = NodeBuilder.createRTC(Condition); Builder.GetInsertBlock()->getTerminator()->setOperand(0, RTC);