Index: include/polly/CodeGen/CodeGeneration.h =================================================================== --- include/polly/CodeGen/CodeGeneration.h +++ include/polly/CodeGen/CodeGeneration.h @@ -36,6 +36,8 @@ PreservedAnalyses run(Scop &S, ScopAnalysisManager &SAM, ScopStandardAnalysisResults &AR, SPMUpdater &U); }; + +extern bool PerfMonitoring; } // namespace polly #endif // POLLY_CODEGENERATION_H Index: lib/CodeGen/CodeGeneration.cpp =================================================================== --- lib/CodeGen/CodeGeneration.cpp +++ lib/CodeGen/CodeGeneration.cpp @@ -49,10 +49,12 @@ cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); -static cl::opt - PerfMonitoring("polly-codegen-perf-monitoring", - cl::desc("Add run-time performance monitoring"), cl::Hidden, - cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); +bool polly::PerfMonitoring; +static cl::opt + XPerfMonitoring("polly-codegen-perf-monitoring", + cl::desc("Add run-time performance monitoring"), cl::Hidden, + cl::location(polly::PerfMonitoring), cl::init(false), + cl::ZeroOrMore, cl::cat(PollyCategory)); namespace polly { /// Mark a basic block unreachable. @@ -194,12 +196,12 @@ Annotator.buildAliasScopes(S); if (PerfMonitoring) { - PerfMonitor P(S, EnteringBB->getParent()->getParent()); - P.initialize(); - P.insertRegionStart(SplitBlock->getTerminator()); + PerfMonitor P(S, EnteringBB->getParent()->getParent()); + P.initialize(); + P.insertRegionStart(SplitBlock->getTerminator()); - BasicBlock *MergeBlock = ExitBlock->getUniqueSuccessor(); - P.insertRegionEnd(MergeBlock->getTerminator()); + BasicBlock *MergeBlock = ExitBlock->getUniqueSuccessor(); + P.insertRegionEnd(MergeBlock->getTerminator()); } // First generate code for the hoisted invariant loads and transitively the Index: lib/CodeGen/PPCGCodeGeneration.cpp =================================================================== --- lib/CodeGen/PPCGCodeGeneration.cpp +++ lib/CodeGen/PPCGCodeGeneration.cpp @@ -39,6 +39,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "polly/CodeGen/PerfMonitor.h" #include "isl/union_map.h" @@ -122,6 +123,8 @@ cl::desc("Minimal number of compute statements to run on GPU."), cl::Hidden, cl::init(10 * 512 * 512)); +extern bool polly::PerfMonitoring; + /// Return a unique name for a Scop, which is the scop region with the /// function name. std::string getUniqueScopName(const Scop *S) { @@ -3433,6 +3436,18 @@ isl_ast_node_free(Root); } else { + if (polly::PerfMonitoring) { + PerfMonitor P(*S, EnteringBB->getParent()->getParent()); + P.initialize(); + P.insertRegionStart(SplitBlock->getTerminator()); + + // Not sure if this is the correct "exiting" BB. + auto *ExitingBlock = StartBlock->getUniqueSuccessor(); + assert(ExitingBlock); + BasicBlock *MergeBlock = ExitingBlock->getUniqueSuccessor(); + P.insertRegionEnd(MergeBlock->getTerminator()); + } + NodeBuilder.addParameters(S->getContext().release()); Value *RTC = NodeBuilder.createRTC(Condition); Builder.GetInsertBlock()->getTerminator()->setOperand(0, RTC);