diff --git a/openmp/runtime/src/kmp_dispatch.cpp b/openmp/runtime/src/kmp_dispatch.cpp --- a/openmp/runtime/src/kmp_dispatch.cpp +++ b/openmp/runtime/src/kmp_dispatch.cpp @@ -392,8 +392,7 @@ KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d switching to " "kmp_sch_dynamic_chunked\n", gtid)); - if (pr->u.p.parm1 <= 0) - pr->u.p.parm1 = KMP_DEFAULT_CHUNK; + goto dynamic_init; break; } // if } // case @@ -490,6 +489,7 @@ if ((2L * chunk + 1) * nproc >= tc) { /* chunk size too large, switch to dynamic */ schedule = kmp_sch_dynamic_chunked; + goto dynamic_init; } else { // when remaining iters become less than parm2 - switch to dynamic pr->u.p.parm2 = guided_int_param * nproc * (chunk + 1); @@ -519,6 +519,7 @@ if ((2L * chunk + 1) * nproc >= tc) { /* chunk size too large, switch to dynamic */ schedule = kmp_sch_dynamic_chunked; + goto dynamic_init; } else { /* commonly used term: (2 nproc - 1)/(2 nproc) */ DBL x; @@ -643,10 +644,14 @@ break; case kmp_sch_static_chunked: case kmp_sch_dynamic_chunked: + dynamic_init: if (pr->u.p.parm1 <= 0) pr->u.p.parm1 = KMP_DEFAULT_CHUNK; else if (pr->u.p.parm1 > tc) pr->u.p.parm1 = tc; + // Store the total number of chunks to prevent integer overflow during + // bounds calculations in the get next chunk routine. + pr->u.p.parm2 = (tc / pr->u.p.parm1) + (tc % pr->u.p.parm1 ? 1 : 0); KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d " "kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n", gtid)); @@ -1487,28 +1492,32 @@ break; case kmp_sch_dynamic_chunked: { - T chunk = pr->u.p.parm1; + UT chunk_number; + UT chunk_size = pr->u.p.parm1; + UT nchunks = pr->u.p.parm2; KD_TRACE( 100, ("__kmp_dispatch_next_algorithm: T#%d kmp_sch_dynamic_chunked case\n", gtid)); - init = chunk * test_then_inc_acq((volatile ST *)&sh->u.s.iteration); - trip = pr->u.p.tc - 1; - - if ((status = (init <= trip)) == 0) { + chunk_number = test_then_inc_acq((volatile ST *)&sh->u.s.iteration); + status = (chunk_number < nchunks); + if (!status) { *p_lb = 0; *p_ub = 0; if (p_st != NULL) *p_st = 0; } else { + init = chunk_size * chunk_number; + trip = pr->u.p.tc - 1; start = pr->u.p.lb; - limit = chunk + init - 1; incr = pr->u.p.st; - if ((last = (limit >= trip)) != 0) + if ((last = (trip - init < (UT)chunk_size))) limit = trip; + else + limit = chunk_size + init - 1; if (p_st != NULL) *p_st = incr; diff --git a/openmp/runtime/test/worksharing/for/omp_for_dynamic_large_chunk.c b/openmp/runtime/test/worksharing/for/omp_for_dynamic_large_chunk.c new file mode 100644 --- /dev/null +++ b/openmp/runtime/test/worksharing/for/omp_for_dynamic_large_chunk.c @@ -0,0 +1,67 @@ +// RUN: %libomp-compile +// RUN: env OMP_WAIT_POLICY=passive OMP_NUM_THREADS=32 %libomp-run 0 134217728 1 134217728 +// +// This test makes sure that large chunks sizes are handled correctly +// including internal runtime calculations which incorporate the chunk size +// Only one thread should execute all iterations. +#include +#include +#include "omp_testsuite.h" + +typedef unsigned long long ull_t; + +int main(int argc, char **argv) { + int i, j, lb, ub, stride, nthreads, actual_nthreads, chunk; + ull_t num_iters = 0; + ull_t counted_iters = 0; + int errs = 0; + if (argc != 5) { + fprintf(stderr, "error: incorrect number of arguments\n"); + fprintf(stderr, "usage: %s \n", argv[0]); + exit(EXIT_FAILURE); + } + lb = atoi(argv[1]); + ub = atoi(argv[2]); + stride = atoi(argv[3]); + chunk = atoi(argv[4]); + nthreads = omp_get_max_threads(); + if (lb >= ub) { + fprintf(stderr, "error: lb must be less than ub\n"); + exit(EXIT_FAILURE); + } + if (stride <= 0) { + fprintf(stderr, "error: stride must be positive integer\n"); + exit(EXIT_FAILURE); + } + if (chunk <= 0) { + fprintf(stderr, "error: chunk must be positive integer\n"); + exit(EXIT_FAILURE); + } + for (i = lb; i < ub; i += stride) + num_iters++; + + #pragma omp parallel num_threads(nthreads) + { + #pragma omp single + actual_nthreads = omp_get_num_threads(); + + if (actual_nthreads != nthreads) { + printf("did not create enough threads, skipping test.\n"); + } else { + #pragma omp for schedule(dynamic, chunk) + for (i = lb; i < ub; i += stride) { + counted_iters++; + } + } + } + + // Check that the number of iterations executed is correct + if (actual_nthreads == nthreads && counted_iters != num_iters) { + fprintf(stderr, "error: wrong number of final iterations counted! " + "num_iters=%llu, counted_iters=%llu\n", + num_iters, counted_iters); + exit(EXIT_FAILURE); + } + + return EXIT_SUCCESS; +}