Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -1620,6 +1620,9 @@ /// Number of copy statements. unsigned CopyStmtsNum; + /// Flag to indicate if the Scop is to be skipped. + bool SkipScop; + typedef std::list StmtSet; /// The statements in this Scop. StmtSet Stmts; @@ -2329,6 +2332,12 @@ /// Check if the SCoP has been optimized by the scheduler. bool isOptimized() const { return IsOptimized; } + /// Mark the SCoP to be skipped by ScopPass passes. + void markAsToBeSkipped() { SkipScop = true; } + + /// Check if the SCoP is to be skipped by ScopPass passes. + bool isToBeSkipped() const { return SkipScop; } + /// Get the name of the entry and exit blocks of this Scop. /// /// These along with the function name can uniquely identify a Scop. Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -3426,8 +3426,8 @@ Scop::Scop(Region &R, ScalarEvolution &ScalarEvolution, LoopInfo &LI, ScopDetection::DetectionContext &DC) : SE(&ScalarEvolution), R(R), name(R.getNameStr()), IsOptimized(false), - HasSingleExitEdge(R.getExitingBlock()), HasErrorBlock(false), - MaxLoopDepth(0), CopyStmtsNum(0), DC(DC), + SkipScop(false), HasSingleExitEdge(R.getExitingBlock()), + HasErrorBlock(false), MaxLoopDepth(0), CopyStmtsNum(0), DC(DC), IslCtx(isl_ctx_alloc(), isl_ctx_free), Context(nullptr), Affinator(this, LI), AssumedContext(nullptr), InvalidContext(nullptr), Schedule(nullptr) { Index: lib/CodeGen/CodeGeneration.cpp =================================================================== --- lib/CodeGen/CodeGeneration.cpp +++ lib/CodeGen/CodeGeneration.cpp @@ -268,6 +268,10 @@ /// Generate LLVM-IR for the SCoP @p S. bool runOnScop(Scop &S) override { + // Skip SCoPs in case they're already code-generated by PPCGCodeGeneration. + if (S.isToBeSkipped()) + return false; + AI = &getAnalysis().getAI(); LI = &getAnalysis().getLoopInfo(); DT = &getAnalysis().getDomTree(); Index: lib/CodeGen/IslAst.cpp =================================================================== --- lib/CodeGen/IslAst.cpp +++ lib/CodeGen/IslAst.cpp @@ -624,6 +624,11 @@ void IslAstInfoWrapperPass::releaseMemory() { Ast.reset(); } bool IslAstInfoWrapperPass::runOnScop(Scop &Scop) { + + // Skip SCoPs in case they're already handled by PPCGCodeGeneration. + if (Scop.isToBeSkipped()) + return false; + const Dependences &D = getAnalysis().getDependences(Dependences::AL_Statement); Index: lib/CodeGen/PPCGCodeGeneration.cpp =================================================================== --- lib/CodeGen/PPCGCodeGeneration.cpp +++ lib/CodeGen/PPCGCodeGeneration.cpp @@ -2685,8 +2685,10 @@ auto PPCGProg = createPPCGProg(PPCGScop); auto PPCGGen = generateGPU(PPCGScop, PPCGProg); - if (PPCGGen->tree) + if (PPCGGen->tree) { generateCode(isl_ast_node_copy(PPCGGen->tree), PPCGProg); + CurrentScop.markAsToBeSkipped(); + } freeOptions(PPCGScop); freePPCGGen(PPCGGen); Index: lib/Support/RegisterPasses.cpp =================================================================== --- lib/Support/RegisterPasses.cpp +++ lib/Support/RegisterPasses.cpp @@ -91,13 +91,15 @@ clEnumValN(CODEGEN_NONE, "none", "No code generation")), cl::Hidden, cl::init(CODEGEN_FULL), cl::ZeroOrMore, cl::cat(PollyCategory)); -enum TargetChoice { TARGET_CPU, TARGET_GPU }; +enum TargetChoice { TARGET_CPU, TARGET_GPU, TARGET_Hybrid }; static cl::opt Target("polly-target", cl::desc("The hardware to target"), cl::values(clEnumValN(TARGET_CPU, "cpu", "generate CPU code") #ifdef GPU_CODEGEN , - clEnumValN(TARGET_GPU, "gpu", "generate GPU code") + clEnumValN(TARGET_GPU, "gpu", "generate GPU code"), + clEnumValN(TARGET_Hybrid, "hybrid", + "generate GPU code (preferably) or CPU code") #endif ), cl::init(TARGET_CPU), cl::ZeroOrMore, cl::cat(PollyCategory)); @@ -309,9 +311,7 @@ if (EnablePruneUnprofitable) PM.add(polly::createPruneUnprofitablePass()); - if (Target == TARGET_GPU) { - // GPU generation provides its own scheduling optimization strategy. - } else { + if (Target == TARGET_CPU) { switch (Optimizer) { case OPTIMIZER_NONE: break; /* Do nothing */ @@ -320,17 +320,46 @@ PM.add(polly::createIslScheduleOptimizerPass()); break; } - } - if (ExportJScop) - PM.add(polly::createJSONExporterPass()); + if (ExportJScop) + PM.add(polly::createJSONExporterPass()); - if (Target == TARGET_GPU) { + switch (CodeGeneration) { + case CODEGEN_AST: + PM.add(polly::createIslAstInfoWrapperPassPass()); + break; + case CODEGEN_FULL: + PM.add(polly::createCodeGenerationPass()); + break; + case CODEGEN_NONE: + break; + } + } #ifdef GPU_CODEGEN + else if (Target == TARGET_GPU) { + if (ExportJScop) // Does this make sense ? + PM.add(polly::createJSONExporterPass()); + PM.add( polly::createPPCGCodeGenerationPass(GPUArchChoice, GPURuntimeChoice)); -#endif + } else { + + PM.add( + polly::createPPCGCodeGenerationPass(GPUArchChoice, GPURuntimeChoice)); + + switch (Optimizer) { + case OPTIMIZER_NONE: + break; /* Do nothing */ + + case OPTIMIZER_ISL: + PM.add(polly::createIslScheduleOptimizerPass()); + break; + } + + if (ExportJScop) + PM.add(polly::createJSONExporterPass()); + switch (CodeGeneration) { case CODEGEN_AST: PM.add(polly::createIslAstInfoWrapperPassPass()); @@ -342,6 +371,7 @@ break; } } +#endif // FIXME: This dummy ModulePass keeps some programs from miscompiling, // probably some not correctly preserved analyses. It acts as a barrier to @@ -356,10 +386,12 @@ if (CFGPrinter) PM.add(llvm::createCFGPrinterLegacyPassPass()); - if (Target == TARGET_GPU) { +#ifdef GPU_CODEGEN + if (Target == TARGET_GPU || Target == TARGET_Hybrid) { // Invariant load hoisting not yet supported by GPU code generation. PollyInvariantLoadHoisting = false; } +#endif } static bool shouldEnablePolly() { Index: lib/Transform/ScheduleOptimizer.cpp =================================================================== --- lib/Transform/ScheduleOptimizer.cpp +++ lib/Transform/ScheduleOptimizer.cpp @@ -1449,6 +1449,10 @@ bool IslScheduleOptimizer::runOnScop(Scop &S) { + // Skip SCoPs in case they're already optimised by PPCGCodeGeneration + if (S.isToBeSkipped()) + return false; + // Skip empty SCoPs but still allow code generation as it will delete the // loops present but not needed. if (S.getSize() == 0) {