Index: include/polly/CodeGen/IslAst.h =================================================================== --- include/polly/CodeGen/IslAst.h +++ include/polly/CodeGen/IslAst.h @@ -126,6 +126,9 @@ /// @brief Is this loop a reduction parallel loop? static bool isReductionParallel(__isl_keep isl_ast_node *Node); + /// @brief Will the loop be run as thread parallel? + static bool runAsThreadParallel(__isl_keep isl_ast_node *Node); + /// @brief Get the nodes schedule or a nullptr if not available. static __isl_give isl_union_map *getSchedule(__isl_keep isl_ast_node *Node); Index: lib/CodeGen/IslAst.cpp =================================================================== --- lib/CodeGen/IslAst.cpp +++ lib/CodeGen/IslAst.cpp @@ -42,6 +42,11 @@ using IslAstUserPayload = IslAstInfo::IslAstUserPayload; +static cl::opt + UseThreads("polly-parallel", + cl::desc("Generate thread parallel code (isl codegen only)"), + cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); + static cl::opt UseContext("polly-ast-use-context", cl::desc("Use context"), cl::Hidden, cl::init(false), cl::ZeroOrMore, @@ -148,6 +153,7 @@ isl_pw_aff *DD = IslAstInfo::getMinimalDependenceDistance(Node); const std::string BrokenReductionsStr = getBrokenReductionsStr(Node); + const std::string KnownParallelStr = "#pragma known-parallel"; const std::string DepDisPragmaStr = "#pragma minimal dependence distance: "; const std::string SimdPragmaStr = "#pragma simd"; const std::string OmpPragmaStr = "#pragma omp parallel for"; @@ -158,8 +164,10 @@ if (IslAstInfo::isInnermostParallel(Node)) Printer = printLine(Printer, SimdPragmaStr + BrokenReductionsStr); - if (IslAstInfo::isOutermostParallel(Node)) - Printer = printLine(Printer, OmpPragmaStr + BrokenReductionsStr); + if (IslAstInfo::runAsThreadParallel(Node)) + Printer = printLine(Printer, OmpPragmaStr); + else if (IslAstInfo::isOutermostParallel(Node)) + Printer = printLine(Printer, KnownParallelStr + BrokenReductionsStr); isl_pw_aff_free(DD); return isl_ast_node_for_print(Node, Printer, Options); @@ -357,7 +365,8 @@ isl_union_map *Schedule = isl_union_map_intersect_domain(S->getSchedule(), S->getDomains()); - if (DetectParallel || PollyVectorizerChoice != VECTORIZER_NONE) { + if (UseThreads || DetectParallel || + PollyVectorizerChoice != VECTORIZER_NONE) { BuildInfo.Deps = &D; BuildInfo.InParallelFor = 0; @@ -444,6 +453,10 @@ return Payload && Payload->IsReductionParallel; } +bool IslAstInfo::runAsThreadParallel(__isl_keep isl_ast_node *Node) { + return isOutermostParallel(Node) && !isReductionParallel(Node) && UseThreads; +} + isl_union_map *IslAstInfo::getSchedule(__isl_keep isl_ast_node *Node) { IslAstUserPayload *Payload = getNodePayload(Node); return Payload ? isl_ast_build_get_schedule(Payload->Build) : nullptr; Index: test/Isl/Ast/OpenMP/multiple_loops_outer_parallel.ll =================================================================== --- test/Isl/Ast/OpenMP/multiple_loops_outer_parallel.ll +++ test/Isl/Ast/OpenMP/multiple_loops_outer_parallel.ll @@ -1,4 +1,4 @@ -; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-ast -polly-parallel -analyze < %s | FileCheck %s ; ; void jd(int *A) { ; CHECK: #pragma omp parallel for Index: test/Isl/Ast/OpenMP/nested_loop_both_parallel.ll =================================================================== --- test/Isl/Ast/OpenMP/nested_loop_both_parallel.ll +++ test/Isl/Ast/OpenMP/nested_loop_both_parallel.ll @@ -1,4 +1,4 @@ -; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-ast -polly-parallel -analyze < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-pc-linux-gnu" Index: test/Isl/Ast/OpenMP/nested_loop_both_parallel_parametric.ll =================================================================== --- test/Isl/Ast/OpenMP/nested_loop_both_parallel_parametric.ll +++ test/Isl/Ast/OpenMP/nested_loop_both_parallel_parametric.ll @@ -1,4 +1,4 @@ -; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze -polly-delinearize < %s | FileCheck %s +; RUN: opt %loadPolly -polly-ast -polly-parallel -analyze -polly-delinearize < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-pc-linux-gnu" ; int A[1024][1024]; Index: test/Isl/Ast/OpenMP/nested_loop_inner_parallel.ll =================================================================== --- test/Isl/Ast/OpenMP/nested_loop_inner_parallel.ll +++ test/Isl/Ast/OpenMP/nested_loop_inner_parallel.ll @@ -1,4 +1,4 @@ -; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-ast -polly-parallel -analyze < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-pc-linux-gnu" Index: test/Isl/Ast/OpenMP/nested_loop_outer_parallel.ll =================================================================== --- test/Isl/Ast/OpenMP/nested_loop_outer_parallel.ll +++ test/Isl/Ast/OpenMP/nested_loop_outer_parallel.ll @@ -1,4 +1,4 @@ -; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-ast -polly-parallel -analyze < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-pc-linux-gnu" Index: test/Isl/Ast/OpenMP/single_loop_param_non_parallel.ll =================================================================== --- test/Isl/Ast/OpenMP/single_loop_param_non_parallel.ll +++ test/Isl/Ast/OpenMP/single_loop_param_non_parallel.ll @@ -1,4 +1,4 @@ -; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-ast -polly-parallel -analyze < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-pc-linux-gnu" Index: test/Isl/Ast/OpenMP/single_loop_param_parallel.ll =================================================================== --- test/Isl/Ast/OpenMP/single_loop_param_parallel.ll +++ test/Isl/Ast/OpenMP/single_loop_param_parallel.ll @@ -1,4 +1,4 @@ -; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-ast -polly-parallel -analyze < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-pc-linux-gnu" Index: test/Isl/Ast/OpenMP/single_loop_param_parallel_computeout.ll =================================================================== --- test/Isl/Ast/OpenMP/single_loop_param_parallel_computeout.ll +++ test/Isl/Ast/OpenMP/single_loop_param_parallel_computeout.ll @@ -1,4 +1,4 @@ -; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -polly-dependences-computeout=1 -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-ast -polly-parallel -polly-dependences-computeout=1 -analyze < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-pc-linux-gnu" Index: test/Isl/Ast/reduction_clauses_multidimensional_access.ll =================================================================== --- test/Isl/Ast/reduction_clauses_multidimensional_access.ll +++ test/Isl/Ast/reduction_clauses_multidimensional_access.ll @@ -1,6 +1,6 @@ ; RUN: opt %loadPolly -polly-delinearize -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s ; -; CHECK: #pragma omp parallel for reduction (^ : sum) +; CHECK: #pragma known-parallel reduction (^ : sum) ; void f(int N, int M, int P, int sum[P][M]) { ; for (int i = 0; i < N; i++) ; for (int j = 0; j < P; j++) Index: test/Isl/Ast/reduction_clauses_onedimensional_access.ll =================================================================== --- test/Isl/Ast/reduction_clauses_onedimensional_access.ll +++ test/Isl/Ast/reduction_clauses_onedimensional_access.ll @@ -1,6 +1,6 @@ ; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s ; -; CHECK: #pragma omp parallel for reduction (^ : sum) +; CHECK: #pragma known-parallel reduction (^ : sum) ; void f(int N, int M, int *sum) { ; for (int i = 0; i < N; i++) ; CHECK: #pragma simd Index: test/Isl/Ast/reduction_different_reduction_clauses.ll =================================================================== --- test/Isl/Ast/reduction_different_reduction_clauses.ll +++ test/Isl/Ast/reduction_different_reduction_clauses.ll @@ -1,7 +1,7 @@ ; RUN: opt %loadPolly -basicaa -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s ; ; CHECK: #pragma simd reduction (+ : sum{{[1,2]}}, sum{{[1,2]}}) reduction (* : prod) reduction (| : or) reduction (& : and) -; CHECK: #pragma omp parallel for reduction (+ : sum{{[1,2]}}, sum{{[1,2]}}) reduction (* : prod) reduction (| : or) reduction (& : and) +; CHECK: #pragma known-parallel reduction (+ : sum{{[1,2]}}, sum{{[1,2]}}) reduction (* : prod) reduction (| : or) reduction (& : and) ; CHECK: for (int c1 = 0; c1 < N; c1 += 1) ; CHECK: Stmt_for_body(c1); ; Index: test/Isl/Ast/reduction_in_one_dimension.ll =================================================================== --- test/Isl/Ast/reduction_in_one_dimension.ll +++ test/Isl/Ast/reduction_in_one_dimension.ll @@ -2,7 +2,7 @@ ; ; Verify that we won't privatize anything in the outer dimension ; -; CHECK: #pragma omp parallel for +; CHECK: #pragma known-parallel ; CHECK: for (int c1 = 0; c1 < 2 * n; c1 += 1) ; CHECK: #pragma simd reduction ; CHECK: for (int c3 = 0; c3 <= 1023; c3 += 1) Index: test/Isl/Ast/reduction_modulo_and_loop_reversal_schedule_2.ll =================================================================== --- test/Isl/Ast/reduction_modulo_and_loop_reversal_schedule_2.ll +++ test/Isl/Ast/reduction_modulo_and_loop_reversal_schedule_2.ll @@ -1,6 +1,6 @@ ; RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s ; -; CHECK: #pragma omp parallel for reduction +; CHECK: #pragma known-parallel reduction ; CHECK: for (int c0 = 0; c0 <= 2; c0 += 1) { ; CHECK: if (c0 == 2) { ; CHECK: #pragma simd reduction Index: test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions.ll =================================================================== --- test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions.ll +++ test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions.ll @@ -1,6 +1,6 @@ ; RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s ; -; CHECK: #pragma omp parallel for +; CHECK: #pragma known-parallel ; CHECK: for (int c0 = 0; c0 <= 1; c0 += 1) { ; CHECK: if (c0 == 1) { ; CHECK: for (int c1 = 1; c1 < 2 * n; c1 += 2) Index: test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions_2.ll =================================================================== --- test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions_2.ll +++ test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions_2.ll @@ -2,8 +2,8 @@ ; ; Verify that the outer dimension doesnt't carry reduction dependences ; -; CHECK-NOT:#pragma omp parallel for reduction -; CHECK: #pragma omp parallel for +; CHECK-NOT:#pragma known-parallel reduction +; CHECK: #pragma known-parallel ; CHECK: for (int c1 = 0; c1 < 2 * n; c1 += 1) { ; CHECK: if (c1 % 2 == 0) { ; CHECK: #pragma simd reduction Index: test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions_3.ll =================================================================== --- test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions_3.ll +++ test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions_3.ll @@ -2,8 +2,8 @@ ; ; Verify that the outer dimension doesnt't carry reduction dependences ; -; CHECK-NOT:#pragma omp parallel for reduction -; CHECK: #pragma omp parallel for +; CHECK-NOT:#pragma known-parallel reduction +; CHECK: #pragma known-parallel ; CHECK: for (int c1 = 0; c1 < 2 * n; c1 += 1) ; CHECK: #pragma simd reduction ; CHECK: for (int c3 = 0; c3 <= 1023; c3 += 1) { Index: test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions_4.ll =================================================================== --- test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions_4.ll +++ test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions_4.ll @@ -2,8 +2,8 @@ ; ; Verify that the outer dimension doesnt't carry reduction dependences ; -; CHECK-NOT:#pragma omp parallel for reduction -; CHECK: #pragma omp parallel for +; CHECK-NOT:#pragma known-parallel reduction +; CHECK: #pragma known-parallel ; CHECK: for (int c1 = 0; c1 < 2 * n; c1 += 1) ; CHECK: #pragma simd reduction ; CHECK: for (int c3 = -1023; c3 <= 1023; c3 += 1) { Index: test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions_5.ll =================================================================== --- test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions_5.ll +++ test/Isl/Ast/reduction_modulo_schedule_multiple_dimensions_5.ll @@ -2,7 +2,7 @@ ; ; Verify that only the outer dimension needs privatization ; -; CHECK: #pragma omp parallel for reduction +; CHECK: #pragma known-parallel reduction ; CHECK: for (int c1 = 0; c1 <= 1023; c1 += 1) { ; CHECK: if (c1 % 2 == 0) { ; CHECK-NOT: #pragma simd reduction Index: test/Isl/Ast/reduction_multiple_dimensions.ll =================================================================== --- test/Isl/Ast/reduction_multiple_dimensions.ll +++ test/Isl/Ast/reduction_multiple_dimensions.ll @@ -1,7 +1,7 @@ ; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s ; -; CHECK-NOT:#pragma omp parallel for reduction -; CHECK: #pragma omp parallel for +; CHECK-NOT:#pragma known-parallel reduction +; CHECK: #pragma known-parallel ; CHECK: for (int c1 = 0; c1 <= 2047; c1 += 1) ; CHECK: for (int c3 = 0; c3 <= 1023; c3 += 1) ; CHECK: #pragma simd reduction Index: test/Isl/Ast/reduction_multiple_dimensions_2.ll =================================================================== --- test/Isl/Ast/reduction_multiple_dimensions_2.ll +++ test/Isl/Ast/reduction_multiple_dimensions_2.ll @@ -1,7 +1,7 @@ ; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s ; -; CHECK-NOT:#pragma omp parallel for reduction -; CHECK: #pragma omp parallel for +; CHECK-NOT:#pragma known-parallel reduction +; CHECK: #pragma known-parallel ; CHECK: for (int c1 = 0; c1 <= 2047; c1 += 1) ; CHECK: for (int c3 = 0; c3 <= 1023; c3 += 1) ; CHECK: #pragma simd reduction Index: test/Isl/Ast/reduction_multiple_dimensions_3.ll =================================================================== --- test/Isl/Ast/reduction_multiple_dimensions_3.ll +++ test/Isl/Ast/reduction_multiple_dimensions_3.ll @@ -1,7 +1,7 @@ ; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s ; -; CHECK-NOT:#pragma omp parallel for reduction -; CHECK: #pragma omp parallel for +; CHECK-NOT:#pragma known-parallel reduction +; CHECK: #pragma known-parallel ; CHECK: for (int c1 = 0; c1 <= 2047; c1 += 1) ; CHECK: for (int c3 = 0; c3 <= 1023; c3 += 1) ; CHECK: #pragma simd reduction Index: test/Isl/Ast/reduction_multiple_dimensions_4.ll =================================================================== --- test/Isl/Ast/reduction_multiple_dimensions_4.ll +++ test/Isl/Ast/reduction_multiple_dimensions_4.ll @@ -1,7 +1,7 @@ ; RUN: opt %loadPolly -polly-ast -polly-ast-detect-parallel -analyze < %s | FileCheck %s ; -; CHECK-NOT:#pragma omp parallel for reduction -; CHECK: #pragma omp parallel for +; CHECK-NOT:#pragma known-parallel reduction +; CHECK: #pragma known-parallel ; CHECK: for (int c1 = 0; c1 <= 2047; c1 += 1) ; CHECK: for (int c3 = 0; c3 <= 1023; c3 += 1) ; CHECK: #pragma simd reduction Index: test/Isl/Ast/single_loop_strip_mine.ll =================================================================== --- test/Isl/Ast/single_loop_strip_mine.ll +++ test/Isl/Ast/single_loop_strip_mine.ll @@ -32,7 +32,7 @@ ; CHECK: for (int c1 = 0; c1 <= 1023; c1 += 1) ; CHECK: Stmt_for_body(c1); -; CHECK-VECTOR: #pragma omp parallel for +; CHECK-VECTOR: #pragma known-parallel ; CHECK-VECTOR: for (int c0 = 0; c0 <= 1023; c0 += 4) ; CHECK-VECTOR: #pragma simd ; CHECK-VECTOR: for (int c1 = c0; c1 <= c0 + 3; c1 += 1) Index: test/ScheduleOptimizer/prevectorization.ll =================================================================== --- test/ScheduleOptimizer/prevectorization.ll +++ test/ScheduleOptimizer/prevectorization.ll @@ -54,7 +54,7 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } -; CHECK: #pragma omp parallel for +; CHECK: #pragma known-parallel ; CHECK: for (int c1 = 0; c1 <= 1535; c1 += 32) ; CHECK: for (int c2 = 0; c2 <= 1535; c2 += 32) ; CHECK: for (int c3 = c1; c3 <= c1 + 31; c3 += 1) @@ -62,7 +62,7 @@ ; CHECK: #pragma simd ; CHECK: for (int c5 = c4; c5 <= c4 + 3; c5 += 1) ; CHECK: Stmt_for_body3(c3, c5); -; CHECK: #pragma omp parallel for +; CHECK: #pragma known-parallel ; CHECK: for (int c1 = 0; c1 <= 1535; c1 += 32) ; CHECK: for (int c2 = 0; c2 <= 1535; c2 += 32) ; CHECK: for (int c3 = 0; c3 <= 1535; c3 += 32)