Index: libomptarget/src/interface.cpp =================================================================== --- libomptarget/src/interface.cpp +++ libomptarget/src/interface.cpp @@ -21,6 +21,43 @@ #include #include +static inline void checkOffloadResult(int rc) { + if (rc == OFFLOAD_SUCCESS) { + HasSuccessfulRuns = true; + } else if (TargetOffloadKind == OMP_TGT_OFFLOAD_MANDATORY) { + fprintf(stderr, "OMP: WARNING: Target offload failure, terminating " + "application\n"); + exit(OFFLOAD_FAIL); + } else if (TargetOffloadKind == OMP_TGT_OFFLOAD_DEFAULT) { + if (HasSuccessfulRuns) { + fprintf(stderr, "OMP: WARNING: Target offload failure after offload " + "success, terminating application due to possible inconsistent state " + "of data\n"); + exit(OFFLOAD_FAIL); + } else { // if (!HasSuccessfulRuns) + fprintf(stderr, "OMP: WARNING: Target offload failure, falling back to " + "the host\n"); + TargetOffloadKind = OMP_TGT_OFFLOAD_DISABLED; + } + } +} + +#define CHECK_HOST_FALLBACK_DATA() \ + { \ + if (TargetOffloadKind == OMP_TGT_OFFLOAD_DISABLED) { \ + DP("Target offload disabled, ignoring \"target data\", " \ + "\"target enter/exit data\" or \"target update\"\n"); \ + return; \ + } \ + } +#define CHECK_HOST_FALLBACK_TARGET() \ + { \ + if (TargetOffloadKind == OMP_TGT_OFFLOAD_DISABLED) { \ + DP("Target offload disabled, executing target region on the host\n"); \ + return OFFLOAD_FAIL; \ + } \ + } + //////////////////////////////////////////////////////////////////////////////// /// adds a target shared library to the target execution image EXTERN void __tgt_register_lib(__tgt_bin_desc *desc) { @@ -260,6 +297,7 @@ /// and passes the data to the device. EXTERN void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { + CHECK_HOST_FALLBACK_DATA(); DP("Entering data begin region for device %ld with %d mappings\n", device_id, arg_num); @@ -271,6 +309,7 @@ if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) { DP("Failed to get device %ld ready\n", device_id); + checkOffloadResult(OFFLOAD_FAIL); return; } @@ -286,12 +325,14 @@ new_args_base, new_args, new_arg_sizes, new_arg_types, false); //target_data_begin(Device, arg_num, args_base, args, arg_sizes, arg_types); - target_data_begin(Device, new_arg_num, new_args_base, new_args, new_arg_sizes, - new_arg_types); + int rc = target_data_begin(Device, new_arg_num, new_args_base, new_args, + new_arg_sizes, new_arg_types); // Cleanup translation memory cleanup_map(new_arg_num, new_args_base, new_args, new_arg_sizes, new_arg_types, arg_num, args_base); + + checkOffloadResult(rc); } EXTERN void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num, @@ -310,6 +351,7 @@ /// created by the last __tgt_target_data_begin. EXTERN void __tgt_target_data_end(int64_t device_id, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { + CHECK_HOST_FALLBACK_DATA(); DP("Entering data end region with %d mappings\n", arg_num); // No devices available? @@ -322,12 +364,13 @@ RTLsMtx.unlock(); if (Devices_size <= (size_t)device_id) { DP("Device ID %ld does not have a matching RTL.\n", device_id); + checkOffloadResult(OFFLOAD_FAIL); return; } DeviceTy &Device = Devices[device_id]; if (!Device.IsInit) { - DP("uninit device: ignore"); + DP("Uninit device: ignore"); return; } @@ -341,12 +384,14 @@ new_args_base, new_args, new_arg_sizes, new_arg_types, false); //target_data_end(Device, arg_num, args_base, args, arg_sizes, arg_types); - target_data_end(Device, new_arg_num, new_args_base, new_args, new_arg_sizes, - new_arg_types); + int rc = target_data_end(Device, new_arg_num, new_args_base, new_args, + new_arg_sizes, new_arg_types); // Cleanup translation memory cleanup_map(new_arg_num, new_args_base, new_args, new_arg_sizes, new_arg_types, arg_num, args_base); + + checkOffloadResult(rc); } EXTERN void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num, @@ -362,6 +407,7 @@ EXTERN void __tgt_target_data_update(int64_t device_id, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { + CHECK_HOST_FALLBACK_DATA(); DP("Entering data update with %d mappings\n", arg_num); // No devices available? @@ -371,11 +417,15 @@ if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) { DP("Failed to get device %ld ready\n", device_id); + checkOffloadResult(OFFLOAD_FAIL); return; } DeviceTy& Device = Devices[device_id]; - target_data_update(Device, arg_num, args_base, args, arg_sizes, arg_types); + int rc = target_data_update(Device, arg_num, args_base, args, arg_sizes, + arg_types); + + checkOffloadResult(rc); } EXTERN void __tgt_target_data_update_nowait( @@ -391,6 +441,7 @@ EXTERN int __tgt_target(int64_t device_id, void *host_ptr, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { + CHECK_HOST_FALLBACK_TARGET(); DP("Entering target region with entry point " DPxMOD " and device Id %ld\n", DPxPTR(host_ptr), device_id); @@ -400,6 +451,7 @@ if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) { DP("Failed to get device %ld ready\n", device_id); + checkOffloadResult(OFFLOAD_FAIL); return OFFLOAD_FAIL; } @@ -421,6 +473,7 @@ cleanup_map(new_arg_num, new_args_base, new_args, new_arg_sizes, new_arg_types, arg_num, args_base); + checkOffloadResult(rc); return rc; } @@ -438,6 +491,7 @@ EXTERN int __tgt_target_teams(int64_t device_id, void *host_ptr, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, int32_t team_num, int32_t thread_limit) { + CHECK_HOST_FALLBACK_TARGET(); DP("Entering target region with entry point " DPxMOD " and device Id %ld\n", DPxPTR(host_ptr), device_id); @@ -447,6 +501,7 @@ if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) { DP("Failed to get device %ld ready\n", device_id); + checkOffloadResult(OFFLOAD_FAIL); return OFFLOAD_FAIL; } @@ -469,6 +524,7 @@ cleanup_map(new_arg_num, new_args_base, new_args, new_arg_sizes, new_arg_types, arg_num, args_base); + checkOffloadResult(rc); return rc; } @@ -493,6 +549,7 @@ if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) { DP("Failed to get device %ld ready\n", device_id); + checkOffloadResult(OFFLOAD_FAIL); return; } Index: libomptarget/src/omptarget.cpp =================================================================== --- libomptarget/src/omptarget.cpp +++ libomptarget/src/omptarget.cpp @@ -25,6 +25,18 @@ int DebugLevel = 0; #endif // OMPTARGET_DEBUG +/// Target offload ICV - can be MANDATORY, DISABLED or DEFAULT. +tgt_offload_kind TargetOffloadKind; + +// Variable that controls the behavior of libomptarget in case of offload +// failure when the offload kind is DEFAULT. It is OK to always fail (we can +// safely fall back to the host, since data on the host is up to date). It is +// not OK to succeed at first and then fail, as the most up to date data might +// be on the device, so falling back to the host is not guaranteed to yield +// correct results. Once an offload has fallen back on the host, every +// subsequent offload will also be executed on the host. +bool HasSuccessfulRuns = false; + /// Map global data and execute pending ctors static int InitLibrary(DeviceTy& Device) { /* @@ -258,6 +270,7 @@ if (rt != OFFLOAD_SUCCESS) { DP("Copying data to device failed.\n"); rc = OFFLOAD_FAIL; + break; } } } @@ -272,6 +285,7 @@ if (rt != OFFLOAD_SUCCESS) { DP("Copying data to device failed.\n"); rc = OFFLOAD_FAIL; + break; } // create shadow pointers for this entry Device.ShadowMtx.lock(); @@ -339,6 +353,7 @@ if (rt != OFFLOAD_SUCCESS) { DP("Copying data from device failed.\n"); rc = OFFLOAD_FAIL; + break; } } } @@ -382,6 +397,7 @@ if (rt != OFFLOAD_SUCCESS) { DP("Deallocating data from device failed.\n"); rc = OFFLOAD_FAIL; + break; } } } @@ -391,8 +407,9 @@ } /// Internal function to pass data to/from the target. -void target_data_update(DeviceTy &Device, int32_t arg_num, - void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { +int target_data_update(DeviceTy &Device, int32_t arg_num, void **args_base, + void **args, int64_t *arg_sizes, int64_t *arg_types) { + int rc = OFFLOAD_SUCCESS; // process each input. for (int32_t i = 0; i < arg_num; ++i) { if ((arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) || @@ -408,7 +425,12 @@ if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) { DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", arg_sizes[i], DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); - Device.data_retrieve(HstPtrBegin, TgtPtrBegin, MapSize); + int rt = Device.data_retrieve(HstPtrBegin, TgtPtrBegin, MapSize); + if (rt != OFFLOAD_SUCCESS) { + DP("Copying data from device failed.\n"); + rc = OFFLOAD_FAIL; + break; + } uintptr_t lb = (uintptr_t) HstPtrBegin; uintptr_t ub = (uintptr_t) HstPtrBegin + MapSize; @@ -431,7 +453,12 @@ if (arg_types[i] & OMP_TGT_MAPTYPE_TO) { DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n", arg_sizes[i], DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin)); - Device.data_submit(TgtPtrBegin, HstPtrBegin, MapSize); + int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, MapSize); + if (rt != OFFLOAD_SUCCESS) { + DP("Copying data to device failed.\n"); + rc = OFFLOAD_FAIL; + break; + } uintptr_t lb = (uintptr_t) HstPtrBegin; uintptr_t ub = (uintptr_t) HstPtrBegin + MapSize; @@ -452,6 +479,8 @@ Device.ShadowMtx.unlock(); } } + + return rc; } /// performs the same actions as data_begin in case arg_num is Index: libomptarget/src/private.h =================================================================== --- libomptarget/src/private.h +++ libomptarget/src/private.h @@ -24,7 +24,7 @@ extern int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); -extern void target_data_update(DeviceTy &Device, int32_t arg_num, +extern int target_data_update(DeviceTy &Device, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); extern int target(int64_t device_id, void *host_ptr, int32_t arg_num, @@ -39,6 +39,7 @@ #endif int omp_get_default_device(void) __attribute__((weak)); int32_t __kmpc_omp_taskwait(void *loc_ref, int32_t gtid) __attribute__((weak)); +int __kmpc_get_target_offload() __attribute__((weak)); #ifdef __cplusplus } #endif @@ -56,4 +57,15 @@ #define DP(...) {} #endif // OMPTARGET_DEBUG +// Target offload ICV - must match definition in libomp +enum tgt_offload_kind { + OMP_TGT_OFFLOAD_DISABLED = 0, + OMP_TGT_OFFLOAD_DEFAULT = 1, + OMP_TGT_OFFLOAD_MANDATORY = 2 +}; +extern tgt_offload_kind TargetOffloadKind; + +// Variable that controls the behavior in case of offload failure +extern bool HasSuccessfulRuns; + #endif Index: libomptarget/src/rtl.cpp =================================================================== --- libomptarget/src/rtl.cpp +++ libomptarget/src/rtl.cpp @@ -45,15 +45,17 @@ } #endif // OMPTARGET_DEBUG - // Parse environment variable OMP_TARGET_OFFLOAD (if set) - char *envStr = getenv("OMP_TARGET_OFFLOAD"); - if (envStr && !strcmp(envStr, "DISABLED")) { + // Query libomp about the kind of target offload + TargetOffloadKind = (tgt_offload_kind) __kmpc_get_target_offload(); + if (TargetOffloadKind == OMP_TGT_OFFLOAD_DISABLED) { DP("Target offloading disabled by environment\n"); return; } DP("Loading RTLs...\n"); + bool HasDevices = false; + // Attempt to open all the plugins and, if they exist, check if the interface // is correct and if they are supporting any devices. for (auto *Name : RTLNames) { @@ -118,12 +120,21 @@ DP("Registering RTL %s supporting %d devices!\n", R.RTLName.c_str(), R.NumberOfDevices); + HasDevices = true; + // The RTL is valid! Will save the information in the RTLs list. AllRTLs.push_back(R); } DP("RTLs loaded!\n"); + // At this point, if the target-offload-icv is DEFAULT and no devices have + // been found, the icv falls back to DISABLED. + if (TargetOffloadKind == OMP_TGT_OFFLOAD_DEFAULT && !HasDevices) { + TargetOffloadKind = OMP_TGT_OFFLOAD_DISABLED; + DP("No offload devices found, target-offload-icv set to DISABLED\n"); + } + return; } @@ -192,6 +203,12 @@ // Attempt to load all plugins available in the system. std::call_once(initFlag, &RTLsTy::LoadRTLs, this); + // Return immediately if offloading has been disabled + if (TargetOffloadKind == OMP_TGT_OFFLOAD_DISABLED) { + DP("Target offload disabled, skipping registering library\n"); + return; + } + RTLsMtx.lock(); // Register the images with the RTLs that understand them, if any. for (int32_t i = 0; i < desc->NumDeviceImages; ++i) { @@ -280,6 +297,12 @@ } void RTLsTy::UnregisterLib(__tgt_bin_desc *desc) { + // Return immediately if offloading has been disabled + if (TargetOffloadKind == OMP_TGT_OFFLOAD_DISABLED) { + DP("Target offload disabled, skipping unregister library\n"); + return; + } + DP("Unloading target library!\n"); RTLsMtx.lock();