diff --git a/openmp/runtime/src/kmp_itt.inl b/openmp/runtime/src/kmp_itt.inl --- a/openmp/runtime/src/kmp_itt.inl +++ b/openmp/runtime/src/kmp_itt.inl @@ -630,7 +630,7 @@ void *__kmp_itt_taskwait_object(int gtid) { void *object = NULL; #if USE_ITT_NOTIFY - if (__itt_sync_create_ptr) { + if (UNLIKELY(__itt_sync_create_ptr)) { kmp_info_t *thread = __kmp_thread_from_gtid(gtid); kmp_taskdata_t *taskdata = thread->th.th_current_task; object = reinterpret_cast(kmp_uintptr_t(taskdata) + @@ -677,7 +677,7 @@ void *object // ITT sync object: barrier or taskwait. ) { #if USE_ITT_NOTIFY - if (object != NULL) { + if (UNLIKELY(object != NULL)) { KMP_ITT_DEBUG_LOCK(); __itt_sync_cancel(object); KMP_ITT_DEBUG_PRINT("[tsk sta] scan( %p )\n", object); diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -275,7 +275,7 @@ } // Check mutexinoutset dependencies, acquire locks kmp_depnode_t *node = tasknew->td_depnode; - if (node && (node->dn.mtx_num_locks > 0)) { + if (UNLIKELY(node && (node->dn.mtx_num_locks > 0))) { for (int i = 0; i < node->dn.mtx_num_locks; ++i) { KMP_DEBUG_ASSERT(node->dn.mtx_locks[i] != NULL); if (__kmp_test_lock(node->dn.mtx_locks[i], gtid)) @@ -332,7 +332,7 @@ KA_TRACE(20, ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata)); - if (taskdata->td_flags.tiedness == TASK_UNTIED) { + if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)) { // untied task needs to increment counter so that the task structure is not // freed prematurely kmp_int32 counter = 1 + KMP_ATOMIC_INC(&taskdata->td_untied_count); @@ -344,7 +344,7 @@ } // The first check avoids building task_team thread data if serialized - if (taskdata->td_flags.task_serial) { + if (UNLIKELY(taskdata->td_flags.task_serial)) { KA_TRACE(20, ("__kmp_push_task: T#%d team serialized; returning " "TASK_NOT_PUSHED for task %p\n", gtid, taskdata)); @@ -354,7 +354,7 @@ // Now that serialized tasks have returned, we can assume that we are not in // immediate exec mode KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec); - if (!KMP_TASKING_ENABLED(task_team)) { + if (UNLIKELY(!KMP_TASKING_ENABLED(task_team))) { __kmp_enable_tasking(task_team, thread); } KMP_DEBUG_ASSERT(TCR_4(task_team->tt.tt_found_tasks) == TRUE); @@ -364,7 +364,7 @@ thread_data = &task_team->tt.tt_threads_data[tid]; // No lock needed since only owner can allocate - if (thread_data->td.td_deque == NULL) { + if (UNLIKELY(thread_data->td.td_deque == NULL)) { __kmp_alloc_task_deque(thread, thread_data); } @@ -824,7 +824,7 @@ } #endif /* BUILD_TIED_TASK_STACK */ - if (taskdata->td_flags.tiedness == TASK_UNTIED) { + if (UNLIKELY(taskdata->td_flags.tiedness == TASK_UNTIED)) { // untied task needs to check the counter so that the task structure is not // freed prematurely kmp_int32 counter = KMP_ATOMIC_DEC(&taskdata->td_untied_count) - 1; @@ -851,7 +851,7 @@ // Check mutexinoutset dependencies, release locks kmp_depnode_t *node = taskdata->td_depnode; - if (node && (node->dn.mtx_num_locks < 0)) { + if (UNLIKELY(node && (node->dn.mtx_num_locks < 0))) { // negative num_locks means all locks were acquired node->dn.mtx_num_locks = -node->dn.mtx_num_locks; for (int i = node->dn.mtx_num_locks - 1; i >= 0; --i) { @@ -1186,7 +1186,7 @@ kmp_taskdata_t *parent_task = thread->th.th_current_task; size_t shareds_offset; - if (!TCR_4(__kmp_init_middle)) + if (UNLIKELY(!TCR_4(__kmp_init_middle))) __kmp_middle_initialize(); KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) " @@ -1444,8 +1444,8 @@ 30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n", gtid, taskdata, current_task)); KMP_DEBUG_ASSERT(task); - if (taskdata->td_flags.proxy == TASK_PROXY && - taskdata->td_flags.complete == 1) { + if (UNLIKELY(taskdata->td_flags.proxy == TASK_PROXY && + taskdata->td_flags.complete == 1)) { // This is a proxy task that was already completed but it needs to run // its bottom-half finish KA_TRACE( @@ -1487,7 +1487,7 @@ // TODO: cancel tasks if the parallel region has also been cancelled // TODO: check if this sequence can be hoisted above __kmp_task_start // if cancellation has been enabled for this run ... - if (__kmp_omp_cancellation) { + if (UNLIKELY(__kmp_omp_cancellation)) { thread = __kmp_threads[gtid]; kmp_team_t *this_team = thread->th.th_team; kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup; @@ -1866,7 +1866,7 @@ #if USE_ITT_BUILD void *itt_sync_obj = __kmp_itt_taskwait_object(gtid); - if (itt_sync_obj != NULL) + if (UNLIKELY(itt_sync_obj != NULL)) __kmp_itt_taskwait_starting(gtid, itt_sync_obj); #endif /* USE_ITT_BUILD */ @@ -1886,7 +1886,7 @@ } } #if USE_ITT_BUILD - if (itt_sync_obj != NULL) + if (UNLIKELY(itt_sync_obj != NULL)) __kmp_itt_taskwait_finished(gtid, itt_sync_obj); KMP_FSYNC_ACQUIRED(taskdata); // acquire self - sync with children #endif /* USE_ITT_BUILD */ @@ -1972,7 +1972,7 @@ #if USE_ITT_BUILD void *itt_sync_obj = __kmp_itt_taskwait_object(gtid); - if (itt_sync_obj != NULL) + if (UNLIKELY(itt_sync_obj != NULL)) __kmp_itt_taskwait_starting(gtid, itt_sync_obj); #endif /* USE_ITT_BUILD */ if (!taskdata->td_flags.team_serial) { @@ -1995,7 +1995,7 @@ } } #if USE_ITT_BUILD - if (itt_sync_obj != NULL) + if (UNLIKELY(itt_sync_obj != NULL)) __kmp_itt_taskwait_finished(gtid, itt_sync_obj); #endif /* USE_ITT_BUILD */ @@ -2497,7 +2497,7 @@ // For ITT the taskgroup wait is similar to taskwait until we need to // distinguish them void *itt_sync_obj = __kmp_itt_taskwait_object(gtid); - if (itt_sync_obj != NULL) + if (UNLIKELY(itt_sync_obj != NULL)) __kmp_itt_taskwait_starting(gtid, itt_sync_obj); #endif /* USE_ITT_BUILD */ @@ -2531,7 +2531,7 @@ #endif #if USE_ITT_BUILD - if (itt_sync_obj != NULL) + if (UNLIKELY(itt_sync_obj != NULL)) __kmp_itt_taskwait_finished(gtid, itt_sync_obj); KMP_FSYNC_ACQUIRED(taskdata); // acquire self - sync with descendants #endif /* USE_ITT_BUILD */