diff --git a/openmp/runtime/src/kmp_collapse.cpp b/openmp/runtime/src/kmp_collapse.cpp --- a/openmp/runtime/src/kmp_collapse.cpp +++ b/openmp/runtime/src/kmp_collapse.cpp @@ -27,7 +27,7 @@ // avoid inadevertently using a library based abs template T __kmp_abs(const T val) { - return (val < 0) ? -val: val; + return (val < 0) ? -val : val; } kmp_uint32 __kmp_abs(const kmp_uint32 val) { return val; } kmp_uint64 __kmp_abs(const kmp_uint64 val) { return val; } @@ -36,7 +36,34 @@ // Common functions for working with rectangular and non-rectangular loops //---------------------------------------------------------------------------- -template int __kmp_sign(T val) { return (T(0) < val) - (val < T(0)); } +template int __kmp_sign(T val) { + return (T(0) < val) - (val < T(0)); +} + +template class CollapseAllocator { + typedef T *pT; + +private: + static const size_t allocaSize = 32; // size limit for stack allocations + // (8 bytes x 4 nested loops) + char stackAlloc[allocaSize]; + static constexpr size_t maxElemCount = allocaSize / sizeof(T); + pT pTAlloc; + +public: + CollapseAllocator(size_t n) : pTAlloc(reinterpret_cast(stackAlloc)) { + if (n > maxElemCount) { + pTAlloc = reinterpret_cast(__kmp_allocate(n * sizeof(T))); + } + } + ~CollapseAllocator() { + if (pTAlloc != reinterpret_cast(stackAlloc)) { + __kmp_free(pTAlloc); + } + } + T &operator[](int index) { return pTAlloc[index]; } + operator const pT() { return pTAlloc; } +}; //----------Loop canonicalization--------------------------------------------- @@ -463,8 +490,7 @@ /*out*/ kmp_uint64 *original_ivs, kmp_index_t n) { - kmp_iterations_t iterations = - (kmp_iterations_t)__kmp_allocate(sizeof(kmp_loop_nest_iv_t) * n); + CollapseAllocator iterations(n); // First, calc corresponding iteration in every original loop: for (kmp_index_t ind = n; ind > 0;) { @@ -485,7 +511,6 @@ kmp_calc_one_iv_rectang(bounds, /*in/out*/ original_ivs, iterations, ind); } - __kmp_free(iterations); } //---------------------------------------------------------------------------- @@ -924,9 +949,7 @@ /*out*/ kmp_point_t original_ivs) { // Iterations in the original space, multiplied by step: - kmp_iterations_t iterations = - (kmp_iterations_t)__kmp_allocate(sizeof(kmp_loop_nest_iv_t) * n); - + CollapseAllocator iterations(n); for (kmp_index_t ind = n; ind > 0;) { --ind; iterations[ind] = 0; @@ -936,7 +959,6 @@ bool b = kmp_calc_original_ivs_from_iterations(original_bounds_nest, n, /*in/out*/ original_ivs, /*in/out*/ iterations, 0); - __kmp_free(iterations); return b; } @@ -948,9 +970,7 @@ kmp_index_t n, const kmp_point_t original_ivs, /*out*/ kmp_point_t next_original_ivs) { // Iterations in the original space, multiplied by step (so can be negative): - kmp_iterations_t iterations = - (kmp_iterations_t)__kmp_allocate(sizeof(kmp_loop_nest_iv_t) * n); - + CollapseAllocator iterations(n); // First, calc corresponding iteration in every original loop: for (kmp_index_t ind = 0; ind < n; ++ind) { auto bounds = &(original_bounds_nest[ind]); @@ -969,7 +989,6 @@ bool b = kmp_calc_original_ivs_from_iterations( original_bounds_nest, n, /*in/out*/ next_original_ivs, iterations, ind); - __kmp_free(iterations); return b; } @@ -1132,9 +1151,7 @@ /*out*/ kmp_point_t original_ivs) { // Iterations in the expanded space: - kmp_iterations_t iterations = - (kmp_iterations_t)__kmp_allocate(sizeof(kmp_loop_nest_iv_t) * n); - + CollapseAllocator iterations(n); // First, calc corresponding iteration in every modified loop: for (kmp_index_t ind = n; ind > 0;) { --ind; @@ -1166,7 +1183,6 @@ // Too big (or too small for >=). if (ind == 0) { // Need to reduce to the end. - __kmp_free(iterations); return false; } else { // Go to next iteration on outer loop: @@ -1197,7 +1213,6 @@ ++ind; } - __kmp_free(iterations); return true; } @@ -1291,9 +1306,7 @@ kmp_canonicalize_loop_nest(loc, /*in/out*/ original_bounds_nest, n); - bounds_info_internal_t *updated_bounds_nest = - (bounds_info_internal_t *)__kmp_allocate(sizeof(bounds_info_internal_t) * - n); + CollapseAllocator updated_bounds_nest(n); for (kmp_index_t i = 0; i < n; ++i) { updated_bounds_nest[i].b = original_bounds_nest[i]; @@ -1308,7 +1321,6 @@ if (total == 0) { // Loop won't execute: - __kmp_free(updated_bounds_nest); return FALSE; } @@ -1322,20 +1334,11 @@ KMP_DEBUG_ASSERT(tid < nth); - kmp_point_t original_ivs_start = - (kmp_point_t)__kmp_allocate(sizeof(kmp_uint64) * n); - kmp_point_t original_ivs_end = - (kmp_point_t)__kmp_allocate(sizeof(kmp_uint64) * n); - kmp_point_t original_ivs_next_start = - (kmp_point_t)__kmp_allocate(sizeof(kmp_uint64) * n); + CollapseAllocator original_ivs_start(n); if (!kmp_calc_original_ivs_for_start(original_bounds_nest, n, /*out*/ original_ivs_start)) { // Loop won't execute: - __kmp_free(updated_bounds_nest); - __kmp_free(original_ivs_start); - __kmp_free(original_ivs_end); - __kmp_free(original_ivs_next_start); return FALSE; } @@ -1354,10 +1357,6 @@ // if (plastiter != NULL) { // *plastiter = TRUE; // } - // __kmp_free(updated_bounds_nest); - // __kmp_free(original_ivs_start); - // __kmp_free(original_ivs_end); - // __kmp_free(original_ivs_next_start); // return TRUE; //} @@ -1391,6 +1390,7 @@ new_iv += curr_chunk_size - 1; } + CollapseAllocator original_ivs_end(n); if ((nth == 1) || (new_iv >= total - 1)) { // Do this one till the end - just in case we miscalculated // and either too much is left to process or new_iv is a bit too big: @@ -1421,10 +1421,6 @@ if (last_iter && (tid != 0)) { // We are done, this was last chunk, but no chunk for current thread was // found: - __kmp_free(updated_bounds_nest); - __kmp_free(original_ivs_start); - __kmp_free(original_ivs_end); - __kmp_free(original_ivs_next_start); return FALSE; } @@ -1432,6 +1428,7 @@ // We found the chunk for this thread, now we need to check if it's the // last chunk or not: + CollapseAllocator original_ivs_next_start(n); if (last_iter || !kmp_calc_next_original_ivs(original_bounds_nest, n, original_ivs_end, /*out*/ original_ivs_next_start)) { @@ -1453,10 +1450,6 @@ chunk_bounds_nest[i].ub1_u64 = 0; } - __kmp_free(updated_bounds_nest); - __kmp_free(original_ivs_start); - __kmp_free(original_ivs_end); - __kmp_free(original_ivs_next_start); return TRUE; } @@ -1478,9 +1471,5 @@ original_ivs_start, n); } - __kmp_free(updated_bounds_nest); - __kmp_free(original_ivs_start); - __kmp_free(original_ivs_end); - __kmp_free(original_ivs_next_start); return FALSE; }