diff --git a/openmp/runtime/src/kmp_lock.h b/openmp/runtime/src/kmp_lock.h --- a/openmp/runtime/src/kmp_lock.h +++ b/openmp/runtime/src/kmp_lock.h @@ -1217,22 +1217,41 @@ ? __kmp_indirect_get_flags[(lck)->type]((lck)->lock) \ : NULL) -#define KMP_I_LOCK_CHUNK \ - 1024 // number of kmp_indirect_lock_t objects to be allocated together +// number of kmp_indirect_lock_t objects to be allocated together +#define KMP_I_LOCK_CHUNK 1024 +// Keep at a power of 2 since it is used in multiplication & division +KMP_BUILD_ASSERT(KMP_I_LOCK_CHUNK % 2 == 0); +// number of row entries in the initial lock table +#define KMP_I_LOCK_TABLE_INIT_NROW_PTRS 8 // Lock table for indirect locks. typedef struct kmp_indirect_lock_table { kmp_indirect_lock_t **table; // blocks of indirect locks allocated - kmp_lock_index_t size; // size of the indirect lock table + kmp_uint32 nrow_ptrs; // number *table pointer entries in table kmp_lock_index_t next; // index to the next lock to be allocated + struct kmp_indirect_lock_table *next_table; } kmp_indirect_lock_table_t; extern kmp_indirect_lock_table_t __kmp_i_lock_table; // Returns the indirect lock associated with the given index. -#define KMP_GET_I_LOCK(index) \ - (*(__kmp_i_lock_table.table + (index) / KMP_I_LOCK_CHUNK) + \ - (index) % KMP_I_LOCK_CHUNK) +// Returns nullptr if no lock at given index +static inline kmp_indirect_lock_t *__kmp_get_i_lock(kmp_lock_index_t idx) { + kmp_indirect_lock_table_t *lock_table = &__kmp_i_lock_table; + while (lock_table) { + kmp_lock_index_t max_locks = lock_table->nrow_ptrs * KMP_I_LOCK_CHUNK; + if (idx < max_locks) { + kmp_lock_index_t row = idx / KMP_I_LOCK_CHUNK; + kmp_lock_index_t col = idx % KMP_I_LOCK_CHUNK; + if (!lock_table->table[row] || idx >= lock_table->next) + break; + return &lock_table->table[row][col]; + } + idx -= max_locks; + lock_table = lock_table->next_table; + } + return nullptr; +} // Number of locks in a lock block, which is fixed to "1" now. // TODO: No lock block implementation now. If we do support, we need to manage @@ -1241,8 +1260,9 @@ // Fast lock table lookup without consistency checking #define KMP_LOOKUP_I_LOCK(l) \ - ((OMP_LOCK_T_SIZE < sizeof(void *)) ? KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(l)) \ - : *((kmp_indirect_lock_t **)(l))) + ((OMP_LOCK_T_SIZE < sizeof(void *)) \ + ? __kmp_get_i_lock(KMP_EXTRACT_I_INDEX(l)) \ + : *((kmp_indirect_lock_t **)(l))) // Used once in kmp_error.cpp extern kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p, kmp_uint32); diff --git a/openmp/runtime/src/kmp_lock.cpp b/openmp/runtime/src/kmp_lock.cpp --- a/openmp/runtime/src/kmp_lock.cpp +++ b/openmp/runtime/src/kmp_lock.cpp @@ -3104,7 +3104,7 @@ kmp_int32 gtid, kmp_indirect_locktag_t tag) { kmp_indirect_lock_t *lck; - kmp_lock_index_t idx; + kmp_lock_index_t idx, table_idx; __kmp_acquire_lock(&__kmp_global_lock, gtid); @@ -3117,26 +3117,41 @@ KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n", lck)); } else { - idx = __kmp_i_lock_table.next; - // Check capacity and double the size if it is full - if (idx == __kmp_i_lock_table.size) { - // Double up the space for block pointers - int row = __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; - kmp_indirect_lock_t **new_table = (kmp_indirect_lock_t **)__kmp_allocate( - 2 * row * sizeof(kmp_indirect_lock_t *)); - KMP_MEMCPY(new_table, __kmp_i_lock_table.table, - row * sizeof(kmp_indirect_lock_t *)); - kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table; - __kmp_i_lock_table.table = new_table; - __kmp_free(old_table); - // Allocate new objects in the new blocks - for (int i = row; i < 2 * row; ++i) - *(__kmp_i_lock_table.table + i) = (kmp_indirect_lock_t *)__kmp_allocate( - KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t)); - __kmp_i_lock_table.size = 2 * idx; + kmp_uint32 row, col; + kmp_indirect_lock_table_t *lock_table = &__kmp_i_lock_table; + idx = 0; + // Find location in list of lock tables to put new lock + while (1) { + table_idx = lock_table->next; // index within this table + idx += lock_table->next; // global index within list of tables + if (table_idx < lock_table->nrow_ptrs * KMP_I_LOCK_CHUNK) { + row = table_idx / KMP_I_LOCK_CHUNK; + col = table_idx % KMP_I_LOCK_CHUNK; + // Allocate a new row of locks if necessary + if (!lock_table->table[row]) { + lock_table->table[row] = (kmp_indirect_lock_t *)__kmp_allocate( + sizeof(kmp_indirect_lock_t) * KMP_I_LOCK_CHUNK); + } + break; + } + // Allocate a new lock table if necessary with double the capacity + if (!lock_table->next_table) { + kmp_indirect_lock_table_t *next_table = + (kmp_indirect_lock_table_t *)__kmp_allocate( + sizeof(kmp_indirect_lock_table_t)); + next_table->table = (kmp_indirect_lock_t **)__kmp_allocate( + sizeof(kmp_indirect_lock_t *) * 2 * lock_table->nrow_ptrs); + next_table->nrow_ptrs = 2 * lock_table->nrow_ptrs; + next_table->next = 0; + next_table->next_table = nullptr; + lock_table->next_table = next_table; + } + lock_table = lock_table->next_table; + KMP_ASSERT(lock_table); } - __kmp_i_lock_table.next++; - lck = KMP_GET_I_LOCK(idx); + lock_table->next++; + + lck = &lock_table->table[row][col]; // Allocate a new base lock object lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]); KA_TRACE(20, @@ -3167,10 +3182,7 @@ } if (OMP_LOCK_T_SIZE < sizeof(void *)) { kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock); - if (idx >= __kmp_i_lock_table.size) { - KMP_FATAL(LockIsUninitialized, func); - } - lck = KMP_GET_I_LOCK(idx); + lck = __kmp_get_i_lock(idx); } else { lck = *((kmp_indirect_lock_t **)user_lock); } @@ -3180,7 +3192,7 @@ return lck; } else { if (OMP_LOCK_T_SIZE < sizeof(void *)) { - return KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(user_lock)); + return __kmp_get_i_lock(KMP_EXTRACT_I_INDEX(user_lock)); } else { return *((kmp_indirect_lock_t **)user_lock); } @@ -3323,12 +3335,13 @@ return; // Initialize lock index table - __kmp_i_lock_table.size = KMP_I_LOCK_CHUNK; - __kmp_i_lock_table.table = - (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *)); + __kmp_i_lock_table.nrow_ptrs = KMP_I_LOCK_TABLE_INIT_NROW_PTRS; + __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate( + sizeof(kmp_indirect_lock_t *) * KMP_I_LOCK_TABLE_INIT_NROW_PTRS); *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)__kmp_allocate( KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t)); __kmp_i_lock_table.next = 0; + __kmp_i_lock_table.next_table = nullptr; // Indirect lock size __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t); @@ -3393,7 +3406,6 @@ // Clean up the lock table. void __kmp_cleanup_indirect_user_locks() { - kmp_lock_index_t i; int k; // Clean up locks in the pools first (they were already destroyed before going @@ -3411,22 +3423,29 @@ __kmp_indirect_lock_pool[k] = NULL; } // Clean up the remaining undestroyed locks. - for (i = 0; i < __kmp_i_lock_table.next; i++) { - kmp_indirect_lock_t *l = KMP_GET_I_LOCK(i); - if (l->lock != NULL) { - // Locks not destroyed explicitly need to be destroyed here. - KMP_I_LOCK_FUNC(l, destroy)(l->lock); - KA_TRACE( - 20, - ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p from table\n", - l)); - __kmp_free(l->lock); + kmp_indirect_lock_table_t *ptr = &__kmp_i_lock_table; + while (ptr) { + for (kmp_uint32 row = 0; row < ptr->nrow_ptrs; ++row) { + if (!ptr->table[row]) + continue; + for (kmp_uint32 col = 0; col < KMP_I_LOCK_CHUNK; ++col) { + kmp_indirect_lock_t *l = &ptr->table[row][col]; + if (l->lock) { + // Locks not destroyed explicitly need to be destroyed here. + KMP_I_LOCK_FUNC(l, destroy)(l->lock); + KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p " + "from table\n", + l)); + __kmp_free(l->lock); + } + } + __kmp_free(ptr->table[row]); } + kmp_indirect_lock_table_t *next_table = ptr->next_table; + if (ptr != &__kmp_i_lock_table) + __kmp_free(ptr); + ptr = next_table; } - // Free the table - for (i = 0; i < __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; i++) - __kmp_free(__kmp_i_lock_table.table[i]); - __kmp_free(__kmp_i_lock_table.table); __kmp_init_user_locks = FALSE; }