Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -1641,6 +1641,9 @@ /// Number of copy statements. unsigned CopyStmtsNum; + /// Flag to indicate if the Scop is to be skipped. + bool SkipScop; + typedef std::list StmtSet; /// The statements in this Scop. StmtSet Stmts; @@ -2348,6 +2351,12 @@ /// Check if the SCoP has been optimized by the scheduler. bool isOptimized() const { return IsOptimized; } + /// Mark the SCoP to be skipped by ScopPass passes. + void markAsToBeSkipped() { SkipScop = true; } + + /// Check if the SCoP is to be skipped by ScopPass passes. + bool isToBeSkipped() const { return SkipScop; } + /// Get the name of the entry and exit blocks of this Scop. /// /// These along with the function name can uniquely identify a Scop. Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -3459,8 +3459,8 @@ Scop::Scop(Region &R, ScalarEvolution &ScalarEvolution, LoopInfo &LI, ScopDetection::DetectionContext &DC) : SE(&ScalarEvolution), R(R), name(R.getNameStr()), IsOptimized(false), - HasSingleExitEdge(R.getExitingBlock()), HasErrorBlock(false), - MaxLoopDepth(0), CopyStmtsNum(0), DC(DC), + SkipScop(false), HasSingleExitEdge(R.getExitingBlock()), + HasErrorBlock(false), MaxLoopDepth(0), CopyStmtsNum(0), DC(DC), IslCtx(isl_ctx_alloc(), isl_ctx_free), Context(nullptr), Affinator(this, LI), AssumedContext(nullptr), InvalidContext(nullptr), Schedule(nullptr) { Index: lib/CodeGen/CodeGeneration.cpp =================================================================== --- lib/CodeGen/CodeGeneration.cpp +++ lib/CodeGen/CodeGeneration.cpp @@ -278,6 +278,10 @@ /// Generate LLVM-IR for the SCoP @p S. bool runOnScop(Scop &S) override { + // Skip SCoPs in case they're already code-generated by PPCGCodeGeneration. + if (S.isToBeSkipped()) + return false; + AI = &getAnalysis().getAI(); LI = &getAnalysis().getLoopInfo(); DT = &getAnalysis().getDomTree(); Index: lib/CodeGen/IslAst.cpp =================================================================== --- lib/CodeGen/IslAst.cpp +++ lib/CodeGen/IslAst.cpp @@ -624,6 +624,11 @@ void IslAstInfoWrapperPass::releaseMemory() { Ast.reset(); } bool IslAstInfoWrapperPass::runOnScop(Scop &Scop) { + + // Skip SCoPs in case they're already handled by PPCGCodeGeneration. + if (Scop.isToBeSkipped()) + return false; + const Dependences &D = getAnalysis().getDependences(Dependences::AL_Statement); Index: lib/CodeGen/PPCGCodeGeneration.cpp =================================================================== --- lib/CodeGen/PPCGCodeGeneration.cpp +++ lib/CodeGen/PPCGCodeGeneration.cpp @@ -2840,8 +2840,10 @@ auto PPCGProg = createPPCGProg(PPCGScop); auto PPCGGen = generateGPU(PPCGScop, PPCGProg); - if (PPCGGen->tree) + if (PPCGGen->tree) { generateCode(isl_ast_node_copy(PPCGGen->tree), PPCGProg); + CurrentScop.markAsToBeSkipped(); + } freeOptions(PPCGScop); freePPCGGen(PPCGGen); Index: lib/Support/RegisterPasses.cpp =================================================================== --- lib/Support/RegisterPasses.cpp +++ lib/Support/RegisterPasses.cpp @@ -91,13 +91,15 @@ clEnumValN(CODEGEN_NONE, "none", "No code generation")), cl::Hidden, cl::init(CODEGEN_FULL), cl::ZeroOrMore, cl::cat(PollyCategory)); -enum TargetChoice { TARGET_CPU, TARGET_GPU }; +enum TargetChoice { TARGET_CPU, TARGET_GPU, TARGET_HYBRID }; static cl::opt Target("polly-target", cl::desc("The hardware to target"), cl::values(clEnumValN(TARGET_CPU, "cpu", "generate CPU code") #ifdef GPU_CODEGEN , - clEnumValN(TARGET_GPU, "gpu", "generate GPU code") + clEnumValN(TARGET_GPU, "gpu", "generate GPU code"), + clEnumValN(TARGET_HYBRID, "hybrid", + "generate GPU code (preferably) or CPU code") #endif ), cl::init(TARGET_CPU), cl::ZeroOrMore, cl::cat(PollyCategory)); @@ -309,9 +311,12 @@ if (EnablePruneUnprofitable) PM.add(polly::createPruneUnprofitablePass()); - if (Target == TARGET_GPU) { - // GPU generation provides its own scheduling optimization strategy. - } else { +#ifdef GPU_CODEGEN + if (Target == TARGET_HYBRID) + PM.add( + polly::createPPCGCodeGenerationPass(GPUArchChoice, GPURuntimeChoice)); +#endif + if (Target == TARGET_CPU || Target == TARGET_HYBRID) switch (Optimizer) { case OPTIMIZER_NONE: break; /* Do nothing */ @@ -320,17 +325,11 @@ PM.add(polly::createIslScheduleOptimizerPass()); break; } - } if (ExportJScop) PM.add(polly::createJSONExporterPass()); - if (Target == TARGET_GPU) { -#ifdef GPU_CODEGEN - PM.add( - polly::createPPCGCodeGenerationPass(GPUArchChoice, GPURuntimeChoice)); -#endif - } else { + if (Target == TARGET_CPU || Target == TARGET_HYBRID) switch (CodeGeneration) { case CODEGEN_AST: PM.add(polly::createIslAstInfoWrapperPassPass()); @@ -341,7 +340,11 @@ case CODEGEN_NONE: break; } - } +#ifdef GPU_CODEGEN + else + PM.add( + polly::createPPCGCodeGenerationPass(GPUArchChoice, GPURuntimeChoice)); +#endif // FIXME: This dummy ModulePass keeps some programs from miscompiling, // probably some not correctly preserved analyses. It acts as a barrier to Index: lib/Transform/ScheduleOptimizer.cpp =================================================================== --- lib/Transform/ScheduleOptimizer.cpp +++ lib/Transform/ScheduleOptimizer.cpp @@ -1443,6 +1443,10 @@ bool IslScheduleOptimizer::runOnScop(Scop &S) { + // Skip SCoPs in case they're already optimised by PPCGCodeGeneration + if (S.isToBeSkipped()) + return false; + // Skip empty SCoPs but still allow code generation as it will delete the // loops present but not needed. if (S.getSize() == 0) {