diff --git a/openmp/docs/design/Runtimes.rst b/openmp/docs/design/Runtimes.rst
--- a/openmp/docs/design/Runtimes.rst
+++ b/openmp/docs/design/Runtimes.rst
@@ -409,17 +409,17 @@
KMP_BLOCKTIME
"""""""""""""
-Sets the time, in milliseconds, that a thread should wait, after completing
-the execution of a parallel region, before sleeping.
+Sets the time that a thread should wait, after completing the
+execution of a parallel region, before sleeping.
-Use the optional character suffixes: ``s`` (seconds), ``m`` (minutes),
-``h`` (hours), or ``d`` (days) to specify the units.
+Use the optional suffixes: ``ms`` (milliseconds), or ``us`` (microseconds) to
+specify/change the units. Defaults units is milliseconds.
-Specify infinite for an unlimited wait time.
+Specify ``infinite`` for an unlimited wait time.
| **Default:** 200 milliseconds
| **Related Environment Variable:** ``KMP_LIBRARY``
-| **Example:** ``KMP_BLOCKTIME=1s``
+| **Example:** ``KMP_BLOCKTIME=1ms``
KMP_CPUINFO_FILE
""""""""""""""""
@@ -1341,22 +1341,22 @@
LLVM/OpenMP support for C library routines
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-Support for calling standard C library routines on GPU targets is provided by
-the `LLVM C Library `_. This project provides two
-static libraries, ``libcgpu.a`` and ``libllvmlibc_rpc_server.a``, which are used
-by the OpenMP runtime to provide ``libc`` support. The ``libcgpu.a`` library
-contains the GPU device code, while ``libllvmlibc_rpc_server.a`` provides the
-interface to the RPC interface. More information on the RPC construction can be
+Support for calling standard C library routines on GPU targets is provided by
+the `LLVM C Library `_. This project provides two
+static libraries, ``libcgpu.a`` and ``libllvmlibc_rpc_server.a``, which are used
+by the OpenMP runtime to provide ``libc`` support. The ``libcgpu.a`` library
+contains the GPU device code, while ``libllvmlibc_rpc_server.a`` provides the
+interface to the RPC interface. More information on the RPC construction can be
found in the `associated documentation `_.
-To provide host services, we run an RPC server inside of the runtime. This
-allows the host to respond to requests made from the GPU asynchronously. For
-``libc`` calls that require an RPC server, such as printing, an external handle
-to the RPC client running on the GPU will be present in the GPU executable. If
-we find this symbol, we will initialize a client and server and run it in the
+To provide host services, we run an RPC server inside of the runtime. This
+allows the host to respond to requests made from the GPU asynchronously. For
+``libc`` calls that require an RPC server, such as printing, an external handle
+to the RPC client running on the GPU will be present in the GPU executable. If
+we find this symbol, we will initialize a client and server and run it in the
background while the kernel is executing.
-For example, consider the following simple OpenMP offloading code. Here we will
+For example, consider the following simple OpenMP offloading code. Here we will
simply print a string to the user from the GPU.
.. code-block:: c++
@@ -1368,11 +1368,11 @@
{ fputs("Hello World!\n", stderr); }
}
-We can compile this using the ``libcgpu.a`` library to resolve the symbols.
-Because this function requires RPC support, this will also pull in an externally
-visible symbol called ``__llvm_libc_rpc_client`` into the device image. When
-loading the device image, the runtime will check for this symbol and initialize
-an RPC interface if it is found. The following example shows the RPC server
+We can compile this using the ``libcgpu.a`` library to resolve the symbols.
+Because this function requires RPC support, this will also pull in an externally
+visible symbol called ``__llvm_libc_rpc_client`` into the device image. When
+loading the device image, the runtime will check for this symbol and initialize
+an RPC interface if it is found. The following example shows the RPC server
being used.
.. code-block:: console
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -180,6 +180,7 @@
#define KMP_NSEC_PER_SEC 1000000000L
#define KMP_USEC_PER_SEC 1000000L
+#define KMP_NSEC_PER_USEC 1000L
/*!
@ingroup BASIC_TYPES
@@ -1190,13 +1191,13 @@
#define KMP_MAX_STKPADDING (2 * 1024 * 1024)
#define KMP_BLOCKTIME_MULTIPLIER \
- (1000) /* number of blocktime units per second */
+ (1000000) /* number of blocktime units per second */
#define KMP_MIN_BLOCKTIME (0)
#define KMP_MAX_BLOCKTIME \
(INT_MAX) /* Must be this for "infinite" setting the work */
-/* __kmp_blocktime is in milliseconds */
-#define KMP_DEFAULT_BLOCKTIME (__kmp_is_hybrid_cpu() ? (0) : (200))
+/* __kmp_blocktime is in microseconds */
+#define KMP_DEFAULT_BLOCKTIME (__kmp_is_hybrid_cpu() ? (0) : (200000))
#if KMP_USE_MONITOR
#define KMP_DEFAULT_MONITOR_STKSIZE ((size_t)(64 * 1024))
@@ -1223,22 +1224,21 @@
#if KMP_OS_UNIX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)
// HW TSC is used to reduce overhead (clock tick instead of nanosecond).
extern kmp_uint64 __kmp_ticks_per_msec;
+extern kmp_uint64 __kmp_ticks_per_usec;
#if KMP_COMPILER_ICC || KMP_COMPILER_ICX
#define KMP_NOW() ((kmp_uint64)_rdtsc())
#else
#define KMP_NOW() __kmp_hardware_timestamp()
#endif
-#define KMP_NOW_MSEC() (KMP_NOW() / __kmp_ticks_per_msec)
#define KMP_BLOCKTIME_INTERVAL(team, tid) \
- (KMP_BLOCKTIME(team, tid) * __kmp_ticks_per_msec)
+ ((kmp_uint64)KMP_BLOCKTIME(team, tid) * __kmp_ticks_per_usec)
#define KMP_BLOCKING(goal, count) ((goal) > KMP_NOW())
#else
// System time is retrieved sporadically while blocking.
extern kmp_uint64 __kmp_now_nsec();
#define KMP_NOW() __kmp_now_nsec()
-#define KMP_NOW_MSEC() (KMP_NOW() / KMP_USEC_PER_SEC)
#define KMP_BLOCKTIME_INTERVAL(team, tid) \
- (KMP_BLOCKTIME(team, tid) * KMP_USEC_PER_SEC)
+ ((kmp_uint64)KMP_BLOCKTIME(team, tid) * (kmp_uint64)KMP_NSEC_PER_USEC)
#define KMP_BLOCKING(goal, count) ((count) % 1000 != 0 || (goal) > KMP_NOW())
#endif
#endif // KMP_USE_MONITOR
@@ -3351,9 +3351,22 @@
used (fixed) */
extern int __kmp_tp_cached; /* whether threadprivate cache has been created
(__kmpc_threadprivate_cached()) */
-extern int __kmp_dflt_blocktime; /* number of milliseconds to wait before
+extern int __kmp_dflt_blocktime; /* number of microseconds to wait before
blocking (env setting) */
+extern char __kmp_blocktime_units; /* 'm' or 'u' to note units specified */
extern bool __kmp_wpolicy_passive; /* explicitly set passive wait policy */
+
+// Convert raw blocktime from ms to us if needed.
+static inline void __kmp_aux_convert_blocktime(int *bt) {
+ if (__kmp_blocktime_units == 'm') {
+ if (*bt > INT_MAX / 1000) {
+ *bt = INT_MAX / 1000;
+ KMP_INFORM(MaxValueUsing, "kmp_set_blocktime(ms)", bt);
+ }
+ *bt = *bt * 1000;
+ }
+}
+
#if KMP_USE_MONITOR
extern int
__kmp_monitor_wakeups; /* number of times monitor wakes up per second */
diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp
--- a/openmp/runtime/src/kmp_csupport.cpp
+++ b/openmp/runtime/src/kmp_csupport.cpp
@@ -2065,14 +2065,15 @@
}
void kmpc_set_blocktime(int arg) {
- int gtid, tid;
+ int gtid, tid, bt = arg;
kmp_info_t *thread;
gtid = __kmp_entry_gtid();
tid = __kmp_tid_from_gtid(gtid);
thread = __kmp_thread_from_gtid(gtid);
- __kmp_aux_set_blocktime(arg, thread, tid);
+ __kmp_aux_convert_blocktime(&bt);
+ __kmp_aux_set_blocktime(bt, thread, tid);
}
void kmpc_set_library(int arg) {
diff --git a/openmp/runtime/src/kmp_ftn_entry.h b/openmp/runtime/src/kmp_ftn_entry.h
--- a/openmp/runtime/src/kmp_ftn_entry.h
+++ b/openmp/runtime/src/kmp_ftn_entry.h
@@ -112,17 +112,19 @@
#ifdef KMP_STUB
__kmps_set_blocktime(KMP_DEREF arg);
#else
- int gtid, tid;
+ int gtid, tid, bt = (KMP_DEREF arg);
kmp_info_t *thread;
gtid = __kmp_entry_gtid();
tid = __kmp_tid_from_gtid(gtid);
thread = __kmp_thread_from_gtid(gtid);
- __kmp_aux_set_blocktime(KMP_DEREF arg, thread, tid);
+ __kmp_aux_convert_blocktime(&bt);
+ __kmp_aux_set_blocktime(bt, thread, tid);
#endif
}
+// Gets blocktime in units used for KMP_BLOCKTIME, ms otherwise
int FTN_STDCALL FTN_GET_BLOCKTIME(void) {
#ifdef KMP_STUB
return __kmps_get_blocktime();
@@ -136,21 +138,24 @@
/* These must match the settings used in __kmp_wait_sleep() */
if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
- KF_TRACE(10, ("kmp_get_blocktime: T#%d(%d:%d), blocktime=%d\n", gtid,
- team->t.t_id, tid, KMP_MAX_BLOCKTIME));
+ KF_TRACE(10, ("kmp_get_blocktime: T#%d(%d:%d), blocktime=%d%cs\n", gtid,
+ team->t.t_id, tid, KMP_MAX_BLOCKTIME, __kmp_blocktime_units));
return KMP_MAX_BLOCKTIME;
}
#ifdef KMP_ADJUST_BLOCKTIME
else if (__kmp_zero_bt && !get__bt_set(team, tid)) {
- KF_TRACE(10, ("kmp_get_blocktime: T#%d(%d:%d), blocktime=%d\n", gtid,
- team->t.t_id, tid, 0));
+ KF_TRACE(10, ("kmp_get_blocktime: T#%d(%d:%d), blocktime=%d%cs\n", gtid,
+ team->t.t_id, tid, 0, __kmp_blocktime_units));
return 0;
}
#endif /* KMP_ADJUST_BLOCKTIME */
else {
- KF_TRACE(10, ("kmp_get_blocktime: T#%d(%d:%d), blocktime=%d\n", gtid,
- team->t.t_id, tid, get__blocktime(team, tid)));
- return get__blocktime(team, tid);
+ int bt = get__blocktime(team, tid);
+ if (__kmp_blocktime_units == 'm')
+ bt = bt / 1000;
+ KF_TRACE(10, ("kmp_get_blocktime: T#%d(%d:%d), blocktime=%d%cs\n", gtid,
+ team->t.t_id, tid, bt, __kmp_blocktime_units));
+ return bt;
}
#endif
}
diff --git a/openmp/runtime/src/kmp_global.cpp b/openmp/runtime/src/kmp_global.cpp
--- a/openmp/runtime/src/kmp_global.cpp
+++ b/openmp/runtime/src/kmp_global.cpp
@@ -154,7 +154,8 @@
int __kmp_hier_threads_per[kmp_hier_layer_e::LAYER_LAST + 1];
kmp_hier_sched_env_t __kmp_hier_scheds = {0, 0, NULL, NULL, NULL};
#endif
-int __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
+int __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME; // in microseconds
+char __kmp_blocktime_units = 'm'; // Units specified in KMP_BLOCKTIME
bool __kmp_wpolicy_passive = false;
#if KMP_USE_MONITOR
int __kmp_monitor_wakeups = KMP_MIN_MONITOR_WAKEUPS;
diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -8729,9 +8729,8 @@
}
/* ------------------------------------------------------------------------ */
-
void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid) {
- int blocktime = arg; /* argument is in milliseconds */
+ int blocktime = arg; /* argument is in microseconds */
#if KMP_USE_MONITOR
int bt_intervals;
#endif
diff --git a/openmp/runtime/src/kmp_settings.h b/openmp/runtime/src/kmp_settings.h
--- a/openmp/runtime/src/kmp_settings.h
+++ b/openmp/runtime/src/kmp_settings.h
@@ -24,7 +24,6 @@
int __kmp_initial_threads_capacity(int req_nproc);
void __kmp_init_dflt_team_nth();
-int __kmp_convert_to_milliseconds(char const *);
int __kmp_default_tp_capacity(int, int, int);
#if KMP_MIC
diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp
--- a/openmp/runtime/src/kmp_settings.cpp
+++ b/openmp/runtime/src/kmp_settings.cpp
@@ -149,70 +149,6 @@
} // __kmp_round4k
#endif
-/* Here, multipliers are like __kmp_convert_to_seconds, but floating-point
- values are allowed, and the return value is in milliseconds. The default
- multiplier is milliseconds. Returns INT_MAX only if the value specified
- matches "infinit*". Returns -1 if specified string is invalid. */
-int __kmp_convert_to_milliseconds(char const *data) {
- int ret, nvalues, factor;
- char mult, extra;
- double value;
-
- if (data == NULL)
- return (-1);
- if (__kmp_str_match("infinit", -1, data))
- return (INT_MAX);
- value = (double)0.0;
- mult = '\0';
-#if KMP_OS_WINDOWS && KMP_MSVC_COMPAT
- // On Windows, each %c parameter needs additional size parameter for sscanf_s
- nvalues = KMP_SSCANF(data, "%lf%c%c", &value, &mult, 1, &extra, 1);
-#else
- nvalues = KMP_SSCANF(data, "%lf%c%c", &value, &mult, &extra);
-#endif
- if (nvalues < 1)
- return (-1);
- if (nvalues == 1)
- mult = '\0';
- if (nvalues == 3)
- return (-1);
-
- if (value < 0)
- return (-1);
-
- switch (mult) {
- case '\0':
- /* default is milliseconds */
- factor = 1;
- break;
- case 's':
- case 'S':
- factor = 1000;
- break;
- case 'm':
- case 'M':
- factor = 1000 * 60;
- break;
- case 'h':
- case 'H':
- factor = 1000 * 60 * 60;
- break;
- case 'd':
- case 'D':
- factor = 1000 * 24 * 60 * 60;
- break;
- default:
- return (-1);
- }
-
- if (value >= ((INT_MAX - 1) / factor))
- ret = INT_MAX - 1; /* Don't allow infinite value here */
- else
- ret = (int)(value * (double)factor); /* truncate to int */
-
- return ret;
-}
-
static int __kmp_strcasecmp_with_sentinel(char const *a, char const *b,
char sentinel) {
if (a == NULL)
@@ -731,24 +667,73 @@
static void __kmp_stg_parse_blocktime(char const *name, char const *value,
void *data) {
- __kmp_dflt_blocktime = __kmp_convert_to_milliseconds(value);
- if (__kmp_dflt_blocktime < 0) {
- __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
+ const char *buf = value;
+ const char *next;
+ const int ms_mult = 1000;
+ int multiplier = 1;
+ int num;
+
+ // Read integer blocktime value
+ SKIP_WS(buf);
+ if ((*buf >= '0') && (*buf <= '9')) {
+ next = buf;
+ SKIP_DIGITS(next);
+ num = __kmp_basic_str_to_int(buf);
+ KMP_ASSERT(num >= 0);
+ buf = next;
+ SKIP_WS(buf);
+ } else {
+ num = -1;
+ }
+
+ // Read units: note that __kmp_dflt_blocktime units is now us
+ next = buf;
+ if (*buf == '\0' || __kmp_match_str("ms", buf, &next)) {
+ // units are in ms; convert
+ __kmp_dflt_blocktime = ms_mult * num;
+ __kmp_blocktime_units = 'm';
+ multiplier = ms_mult;
+ } else if (__kmp_match_str("us", buf, &next)) {
+ // units are in us
+ __kmp_dflt_blocktime = num;
+ __kmp_blocktime_units = 'u';
+ } else if (__kmp_match_str("infinite", buf, &next) ||
+ __kmp_match_str("infinity", buf, &next)) {
+ // units are in ms
+ __kmp_dflt_blocktime = KMP_MAX_BLOCKTIME;
+ __kmp_blocktime_units = 'm';
+ multiplier = ms_mult;
+ } else {
+ KMP_WARNING(StgInvalidValue, name, value);
+ // default units are in ms
+ __kmp_dflt_blocktime = ms_mult * num;
+ __kmp_blocktime_units = 'm';
+ multiplier = ms_mult;
+ }
+
+ if (num < 0 && __kmp_dflt_blocktime < 0) { // num out of range
+ __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME; // now in us
__kmp_msg(kmp_ms_warning, KMP_MSG(InvalidValue, name, value),
__kmp_msg_null);
- KMP_INFORM(Using_int_Value, name, __kmp_dflt_blocktime);
+ // Inform in appropriate units
+ KMP_INFORM(Using_int_Value, name, __kmp_dflt_blocktime / multiplier);
__kmp_env_blocktime = FALSE; // Revert to default as if var not set.
+ } else if (num > 0 && __kmp_dflt_blocktime < 0) { // overflow
+ __kmp_dflt_blocktime = KMP_MAX_BLOCKTIME;
+ __kmp_msg(kmp_ms_warning, KMP_MSG(LargeValue, name, value), __kmp_msg_null);
+ KMP_INFORM(MaxValueUsing, name, __kmp_dflt_blocktime / multiplier);
+ __kmp_env_blocktime = TRUE; // KMP_BLOCKTIME was specified.
} else {
if (__kmp_dflt_blocktime < KMP_MIN_BLOCKTIME) {
__kmp_dflt_blocktime = KMP_MIN_BLOCKTIME;
__kmp_msg(kmp_ms_warning, KMP_MSG(SmallValue, name, value),
__kmp_msg_null);
- KMP_INFORM(MinValueUsing, name, __kmp_dflt_blocktime);
+ KMP_INFORM(MinValueUsing, name, __kmp_dflt_blocktime / multiplier);
} else if (__kmp_dflt_blocktime > KMP_MAX_BLOCKTIME) {
__kmp_dflt_blocktime = KMP_MAX_BLOCKTIME;
__kmp_msg(kmp_ms_warning, KMP_MSG(LargeValue, name, value),
__kmp_msg_null);
- KMP_INFORM(MaxValueUsing, name, __kmp_dflt_blocktime);
+ KMP_INFORM(MaxValueUsing, name, __kmp_dflt_blocktime / multiplier);
}
__kmp_env_blocktime = TRUE; // KMP_BLOCKTIME was specified.
}
@@ -768,7 +753,17 @@
static void __kmp_stg_print_blocktime(kmp_str_buf_t *buffer, char const *name,
void *data) {
- __kmp_stg_print_int(buffer, name, __kmp_dflt_blocktime);
+ int num = __kmp_dflt_blocktime;
+ if (__kmp_blocktime_units == 'm') {
+ num = num / 1000;
+ }
+ if (__kmp_env_format) {
+ KMP_STR_BUF_PRINT_NAME_EX(name);
+ } else {
+ __kmp_str_buf_print(buffer, " %s=", name);
+ }
+ __kmp_str_buf_print(buffer, "%d", num);
+ __kmp_str_buf_print(buffer, "%cs\n", __kmp_blocktime_units);
} // __kmp_stg_print_blocktime
// -----------------------------------------------------------------------------
@@ -6097,7 +6092,13 @@
/* KMP_BLOCKTIME */
value = __kmp_env_blk_var(block, "KMP_BLOCKTIME");
if (value) {
- kmpc_set_blocktime(__kmp_dflt_blocktime);
+ int gtid, tid;
+ kmp_info_t *thread;
+
+ gtid = __kmp_entry_gtid();
+ tid = __kmp_tid_from_gtid(gtid);
+ thread = __kmp_thread_from_gtid(gtid);
+ __kmp_aux_set_blocktime(__kmp_dflt_blocktime, thread, tid);
}
/* OMP_NESTED */
diff --git a/openmp/runtime/src/kmp_str.h b/openmp/runtime/src/kmp_str.h
--- a/openmp/runtime/src/kmp_str.h
+++ b/openmp/runtime/src/kmp_str.h
@@ -112,6 +112,7 @@
void __kmp_str_replace(char *str, char search_for, char replace_with);
void __kmp_str_split(char *str, char delim, char **head, char **tail);
char *__kmp_str_token(char *str, char const *delim, char **buf);
+int __kmp_basic_str_to_int(char const *str);
int __kmp_str_to_int(char const *str, char sentinel);
void __kmp_str_to_size(char const *str, size_t *out, size_t dfactor,
diff --git a/openmp/runtime/src/kmp_str.cpp b/openmp/runtime/src/kmp_str.cpp
--- a/openmp/runtime/src/kmp_str.cpp
+++ b/openmp/runtime/src/kmp_str.cpp
@@ -619,6 +619,21 @@
return token;
} // __kmp_str_token
+int __kmp_basic_str_to_int(char const *str) {
+ int result;
+ char const *t;
+
+ result = 0;
+
+ for (t = str; *t != '\0'; ++t) {
+ if (*t < '0' || *t > '9')
+ break;
+ result = (result * 10) + (*t - '0');
+ }
+
+ return result;
+}
+
int __kmp_str_to_int(char const *str, char sentinel) {
int result, factor;
char const *t;
diff --git a/openmp/runtime/src/z_Linux_util.cpp b/openmp/runtime/src/z_Linux_util.cpp
--- a/openmp/runtime/src/z_Linux_util.cpp
+++ b/openmp/runtime/src/z_Linux_util.cpp
@@ -93,6 +93,7 @@
static kmp_mutex_align_t __kmp_wait_mx;
kmp_uint64 __kmp_ticks_per_msec = 1000000;
+kmp_uint64 __kmp_ticks_per_usec = 1000;
#ifdef DEBUG_SUSPEND
static void __kmp_print_cond(char *buffer, kmp_cond_align_t *cond) {
@@ -2002,7 +2003,7 @@
/* Measure clock ticks per millisecond */
void __kmp_initialize_system_tick() {
kmp_uint64 now, nsec2, diff;
- kmp_uint64 delay = 100000; // 50~100 usec on most machines.
+ kmp_uint64 delay = 1000000; // ~450 usec on most machines.
kmp_uint64 nsec = __kmp_now_nsec();
kmp_uint64 goal = __kmp_hardware_timestamp() + delay;
while ((now = __kmp_hardware_timestamp()) < goal)
@@ -2010,9 +2011,11 @@
nsec2 = __kmp_now_nsec();
diff = nsec2 - nsec;
if (diff > 0) {
- kmp_uint64 tpms = ((kmp_uint64)1e6 * (delay + (now - goal)) / diff);
- if (tpms > 0)
- __kmp_ticks_per_msec = tpms;
+ double tpus = 1000.0 * (double)(delay + (now - goal)) / (double)diff;
+ if (tpus > 0.0) {
+ __kmp_ticks_per_msec = (kmp_uint64)(tpus * 1000.0);
+ __kmp_ticks_per_usec = (kmp_uint64)tpus;
+ }
}
}
#endif