Index: polly/trunk/include/polly/ScopInfo.h =================================================================== --- polly/trunk/include/polly/ScopInfo.h +++ polly/trunk/include/polly/ScopInfo.h @@ -1633,6 +1633,9 @@ /// Number of copy statements. unsigned CopyStmtsNum; + /// Flag to indicate if the Scop is to be skipped. + bool SkipScop; + typedef std::list StmtSet; /// The statements in this Scop. StmtSet Stmts; @@ -2366,6 +2369,12 @@ /// Check if the SCoP has been optimized by the scheduler. bool isOptimized() const { return IsOptimized; } + /// Mark the SCoP to be skipped by ScopPass passes. + void markAsToBeSkipped() { SkipScop = true; } + + /// Check if the SCoP is to be skipped by ScopPass passes. + bool isToBeSkipped() const { return SkipScop; } + /// Get the name of the entry and exit blocks of this Scop. /// /// These along with the function name can uniquely identify a Scop. Index: polly/trunk/lib/Analysis/ScopInfo.cpp =================================================================== --- polly/trunk/lib/Analysis/ScopInfo.cpp +++ polly/trunk/lib/Analysis/ScopInfo.cpp @@ -3497,8 +3497,8 @@ Scop::Scop(Region &R, ScalarEvolution &ScalarEvolution, LoopInfo &LI, ScopDetection::DetectionContext &DC) : SE(&ScalarEvolution), R(R), name(R.getNameStr()), IsOptimized(false), - HasSingleExitEdge(R.getExitingBlock()), HasErrorBlock(false), - MaxLoopDepth(0), CopyStmtsNum(0), DC(DC), + SkipScop(false), HasSingleExitEdge(R.getExitingBlock()), + HasErrorBlock(false), MaxLoopDepth(0), CopyStmtsNum(0), DC(DC), IslCtx(isl_ctx_alloc(), isl_ctx_free), Context(nullptr), Affinator(this, LI), AssumedContext(nullptr), InvalidContext(nullptr), Schedule(nullptr) { Index: polly/trunk/lib/CodeGen/CodeGeneration.cpp =================================================================== --- polly/trunk/lib/CodeGen/CodeGeneration.cpp +++ polly/trunk/lib/CodeGen/CodeGeneration.cpp @@ -278,6 +278,10 @@ /// Generate LLVM-IR for the SCoP @p S. bool runOnScop(Scop &S) override { + // Skip SCoPs in case they're already code-generated by PPCGCodeGeneration. + if (S.isToBeSkipped()) + return false; + AI = &getAnalysis().getAI(); LI = &getAnalysis().getLoopInfo(); DT = &getAnalysis().getDomTree(); Index: polly/trunk/lib/CodeGen/IslAst.cpp =================================================================== --- polly/trunk/lib/CodeGen/IslAst.cpp +++ polly/trunk/lib/CodeGen/IslAst.cpp @@ -624,6 +624,11 @@ void IslAstInfoWrapperPass::releaseMemory() { Ast.reset(); } bool IslAstInfoWrapperPass::runOnScop(Scop &Scop) { + + // Skip SCoPs in case they're already handled by PPCGCodeGeneration. + if (Scop.isToBeSkipped()) + return false; + const Dependences &D = getAnalysis().getDependences(Dependences::AL_Statement); Index: polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp =================================================================== --- polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp +++ polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp @@ -2840,8 +2840,10 @@ auto PPCGProg = createPPCGProg(PPCGScop); auto PPCGGen = generateGPU(PPCGScop, PPCGProg); - if (PPCGGen->tree) + if (PPCGGen->tree) { generateCode(isl_ast_node_copy(PPCGGen->tree), PPCGProg); + CurrentScop.markAsToBeSkipped(); + } freeOptions(PPCGScop); freePPCGGen(PPCGGen); Index: polly/trunk/lib/Support/RegisterPasses.cpp =================================================================== --- polly/trunk/lib/Support/RegisterPasses.cpp +++ polly/trunk/lib/Support/RegisterPasses.cpp @@ -92,13 +92,15 @@ clEnumValN(CODEGEN_NONE, "none", "No code generation")), cl::Hidden, cl::init(CODEGEN_FULL), cl::ZeroOrMore, cl::cat(PollyCategory)); -enum TargetChoice { TARGET_CPU, TARGET_GPU }; +enum TargetChoice { TARGET_CPU, TARGET_GPU, TARGET_HYBRID }; static cl::opt Target("polly-target", cl::desc("The hardware to target"), cl::values(clEnumValN(TARGET_CPU, "cpu", "generate CPU code") #ifdef GPU_CODEGEN , - clEnumValN(TARGET_GPU, "gpu", "generate GPU code") + clEnumValN(TARGET_GPU, "gpu", "generate GPU code"), + clEnumValN(TARGET_HYBRID, "hybrid", + "generate GPU code (preferably) or CPU code") #endif ), cl::init(TARGET_CPU), cl::ZeroOrMore, cl::cat(PollyCategory)); @@ -314,9 +316,12 @@ if (EnablePruneUnprofitable) PM.add(polly::createPruneUnprofitablePass()); - if (Target == TARGET_GPU) { - // GPU generation provides its own scheduling optimization strategy. - } else { +#ifdef GPU_CODEGEN + if (Target == TARGET_HYBRID) + PM.add( + polly::createPPCGCodeGenerationPass(GPUArchChoice, GPURuntimeChoice)); +#endif + if (Target == TARGET_CPU || Target == TARGET_HYBRID) switch (Optimizer) { case OPTIMIZER_NONE: break; /* Do nothing */ @@ -325,17 +330,11 @@ PM.add(polly::createIslScheduleOptimizerPass()); break; } - } if (ExportJScop) PM.add(polly::createJSONExporterPass()); - if (Target == TARGET_GPU) { -#ifdef GPU_CODEGEN - PM.add( - polly::createPPCGCodeGenerationPass(GPUArchChoice, GPURuntimeChoice)); -#endif - } else { + if (Target == TARGET_CPU || Target == TARGET_HYBRID) switch (CodeGeneration) { case CODEGEN_AST: PM.add(polly::createIslAstInfoWrapperPassPass()); @@ -346,7 +345,11 @@ case CODEGEN_NONE: break; } - } +#ifdef GPU_CODEGEN + else + PM.add( + polly::createPPCGCodeGenerationPass(GPUArchChoice, GPURuntimeChoice)); +#endif // FIXME: This dummy ModulePass keeps some programs from miscompiling, // probably some not correctly preserved analyses. It acts as a barrier to Index: polly/trunk/lib/Transform/ScheduleOptimizer.cpp =================================================================== --- polly/trunk/lib/Transform/ScheduleOptimizer.cpp +++ polly/trunk/lib/Transform/ScheduleOptimizer.cpp @@ -1443,6 +1443,10 @@ bool IslScheduleOptimizer::runOnScop(Scop &S) { + // Skip SCoPs in case they're already optimised by PPCGCodeGeneration + if (S.isToBeSkipped()) + return false; + // Skip empty SCoPs but still allow code generation as it will delete the // loops present but not needed. if (S.getSize() == 0) {