Index: libomptarget/src/interface.cpp =================================================================== --- libomptarget/src/interface.cpp +++ libomptarget/src/interface.cpp @@ -20,6 +20,55 @@ #include #include +#include +#include + +// Store target policy (disabled, mandatory, default) +kmp_target_offload_kind_t TargetOffloadPolicy = tgt_default; +std::mutex TargetOffloadMtx; + +//////////////////////////////////////////////////////////////////////////////// +/// manage the success or failure of a target constuct + +static void HandleDefaultTargetOffload() { + TargetOffloadMtx.lock(); + if (TargetOffloadPolicy == tgt_default) { + if (omp_get_num_devices() > 0) { + DP("Default TARGET OFFLOAD policy is now mandatory " + "(devicew were found)\n"); + TargetOffloadPolicy = tgt_mandatory; + } else { + DP("Default TARGET OFFLOAD policy is now disabled " + "(devices were not found)\n"); + TargetOffloadPolicy = tgt_disabled; + } + } + TargetOffloadMtx.unlock(); +} + +static int IsOffloadDisabled() { + if (TargetOffloadPolicy == tgt_default) HandleDefaultTargetOffload(); + return TargetOffloadPolicy == tgt_disabled; +} + +static void HandleTargetOutcome(bool success) { + switch (TargetOffloadPolicy) { + case tgt_disabled: + if (success) { + FatalMessage(1, "expected no offloading while offloading is disabled"); + } + break; + case tgt_default: + DP("Should never reach this test with target offload set to default\n"); + assert(false); + break; + case tgt_mandatory: + if (!success) { + FatalMessage(1, "failure of target construct while offloading is mandatory"); + } + break; + } +} //////////////////////////////////////////////////////////////////////////////// /// adds a target shared library to the target execution image @@ -38,6 +87,8 @@ /// and passes the data to the device. EXTERN void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { + if (IsOffloadDisabled()) return; + DP("Entering data begin region for device %" PRId64 " with %d mappings\n", device_id, arg_num); @@ -49,6 +100,7 @@ if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) { DP("Failed to get device %" PRId64 " ready\n", device_id); + HandleTargetOutcome(false); return; } @@ -62,7 +114,9 @@ } #endif - target_data_begin(Device, arg_num, args_base, args, arg_sizes, arg_types); + int rc = target_data_begin(Device, arg_num, args_base, + args, arg_sizes, arg_types); + HandleTargetOutcome(rc == OFFLOAD_SUCCESS); } EXTERN void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num, @@ -81,6 +135,7 @@ /// created by the last __tgt_target_data_begin. EXTERN void __tgt_target_data_end(int64_t device_id, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { + if (IsOffloadDisabled()) return; DP("Entering data end region with %d mappings\n", arg_num); // No devices available? @@ -93,12 +148,14 @@ RTLsMtx.unlock(); if (Devices_size <= (size_t)device_id) { DP("Device ID %" PRId64 " does not have a matching RTL.\n", device_id); + HandleTargetOutcome(false); return; } DeviceTy &Device = Devices[device_id]; if (!Device.IsInit) { DP("Uninit device: ignore"); + HandleTargetOutcome(false); return; } @@ -110,7 +167,9 @@ } #endif - target_data_end(Device, arg_num, args_base, args, arg_sizes, arg_types); + int rc = target_data_end(Device, arg_num, args_base, + args, arg_sizes, arg_types); + HandleTargetOutcome(rc == OFFLOAD_SUCCESS); } EXTERN void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num, @@ -126,6 +185,7 @@ EXTERN void __tgt_target_data_update(int64_t device_id, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { + if (IsOffloadDisabled()) return; DP("Entering data update with %d mappings\n", arg_num); // No devices available? @@ -135,11 +195,14 @@ if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) { DP("Failed to get device %" PRId64 " ready\n", device_id); + HandleTargetOutcome(false); return; } DeviceTy& Device = Devices[device_id]; - target_data_update(Device, arg_num, args_base, args, arg_sizes, arg_types); + int rc = target_data_update(Device, arg_num, args_base, + args, arg_sizes, arg_types); + HandleTargetOutcome(rc == OFFLOAD_SUCCESS); } EXTERN void __tgt_target_data_update_nowait( @@ -155,6 +218,7 @@ EXTERN int __tgt_target(int64_t device_id, void *host_ptr, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { + if (IsOffloadDisabled()) return OFFLOAD_FAIL; DP("Entering target region with entry point " DPxMOD " and device Id %" PRId64 "\n", DPxPTR(host_ptr), device_id); @@ -164,6 +228,7 @@ if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) { DP("Failed to get device %" PRId64 " ready\n", device_id); + HandleTargetOutcome(false); return OFFLOAD_FAIL; } @@ -177,7 +242,7 @@ int rc = target(device_id, host_ptr, arg_num, args_base, args, arg_sizes, arg_types, 0, 0, false /*team*/); - + HandleTargetOutcome(rc == OFFLOAD_SUCCESS); return rc; } @@ -195,6 +260,7 @@ EXTERN int __tgt_target_teams(int64_t device_id, void *host_ptr, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, int32_t team_num, int32_t thread_limit) { + if (IsOffloadDisabled()) return OFFLOAD_FAIL; DP("Entering target region with entry point " DPxMOD " and device Id %" PRId64 "\n", DPxPTR(host_ptr), device_id); @@ -204,6 +270,7 @@ if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) { DP("Failed to get device %" PRId64 " ready\n", device_id); + HandleTargetOutcome(false); return OFFLOAD_FAIL; } @@ -217,6 +284,7 @@ int rc = target(device_id, host_ptr, arg_num, args_base, args, arg_sizes, arg_types, team_num, thread_limit, true /*team*/); + HandleTargetOutcome(rc == OFFLOAD_SUCCESS); return rc; } @@ -242,6 +310,7 @@ if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) { DP("Failed to get device %" PRId64 " ready\n", device_id); + HandleTargetOutcome(false); return; } Index: libomptarget/src/omptarget.cpp =================================================================== --- libomptarget/src/omptarget.cpp +++ libomptarget/src/omptarget.cpp @@ -20,11 +20,31 @@ #include #include +#include #ifdef OMPTARGET_DEBUG int DebugLevel = 0; #endif // OMPTARGET_DEBUG +//////////////////////////////////////////////////////////////////////////////// +/// support for fatal messages + +// mutex +std::mutex LibomptargetPrintMtx; + +void FatalMessage(const int errorNum, const char *fmt, ...) { + va_list args; + va_start(args, fmt); + LibomptargetPrintMtx.lock(); + fprintf(stderr, "Libomptarget error %d:", errorNum); + vfprintf(stderr, fmt, args); + fprintf(stderr, "\n"); + LibomptargetPrintMtx.unlock(); + va_end(args); + exit(1); +} + + /* All begin addresses for partially mapped structs must be 8-aligned in order * to ensure proper alignment of members. E.g. * @@ -212,7 +232,6 @@ int target_data_begin(DeviceTy &Device, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { // process each input. - int rc = OFFLOAD_SUCCESS; for (int32_t i = 0; i < arg_num; ++i) { // Ignore private variables and arrays - there is no mapping for them. if ((arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) || @@ -257,6 +276,7 @@ if (!Pointer_TgtPtrBegin) { DP("Call to getOrAllocTgtPtr returned null pointer (device failure or " "illegal mapping).\n"); + return OFFLOAD_FAIL; } DP("There are %zu bytes allocated at target address " DPxMOD " - is%s new" "\n", sizeof(void *), DPxPTR(Pointer_TgtPtrBegin), @@ -306,7 +326,7 @@ int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, data_size); if (rt != OFFLOAD_SUCCESS) { DP("Copying data to device failed.\n"); - rc = OFFLOAD_FAIL; + return OFFLOAD_FAIL; } } } @@ -320,7 +340,7 @@ sizeof(void *)); if (rt != OFFLOAD_SUCCESS) { DP("Copying data to device failed.\n"); - rc = OFFLOAD_FAIL; + return OFFLOAD_FAIL; } // create shadow pointers for this entry Device.ShadowMtx.lock(); @@ -330,13 +350,12 @@ } } - return rc; + return OFFLOAD_SUCCESS; } /// Internal function to undo the mapping and retrieve the data from the device. int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { - int rc = OFFLOAD_SUCCESS; // process each input. for (int32_t i = arg_num - 1; i >= 0; --i) { // Ignore private variables and arrays - there is no mapping for them. @@ -404,7 +423,7 @@ int rt = Device.data_retrieve(HstPtrBegin, TgtPtrBegin, data_size); if (rt != OFFLOAD_SUCCESS) { DP("Copying data from device failed.\n"); - rc = OFFLOAD_FAIL; + return OFFLOAD_FAIL; } } } @@ -447,17 +466,17 @@ int rt = Device.deallocTgtPtr(HstPtrBegin, data_size, ForceDelete); if (rt != OFFLOAD_SUCCESS) { DP("Deallocating data from device failed.\n"); - rc = OFFLOAD_FAIL; + return OFFLOAD_FAIL; } } } } - return rc; + return OFFLOAD_SUCCESS; } /// Internal function to pass data to/from the target. -void target_data_update(DeviceTy &Device, int32_t arg_num, +int target_data_update(DeviceTy &Device, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { // process each input. for (int32_t i = 0; i < arg_num; ++i) { @@ -470,11 +489,19 @@ bool IsLast; void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, MapSize, IsLast, false); + if (!TgtPtrBegin) { + DP("hst data:" DPxMOD " not found, becomes a noop\n", DPxPTR(HstPtrBegin)); + continue; + } if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) { DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", arg_sizes[i], DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); - Device.data_retrieve(HstPtrBegin, TgtPtrBegin, MapSize); + int rt = Device.data_retrieve(HstPtrBegin, TgtPtrBegin, MapSize); + if (rt != OFFLOAD_SUCCESS) { + DP("Copying data from device failed.\n"); + return OFFLOAD_FAIL; + } uintptr_t lb = (uintptr_t) HstPtrBegin; uintptr_t ub = (uintptr_t) HstPtrBegin + MapSize; @@ -497,8 +524,11 @@ if (arg_types[i] & OMP_TGT_MAPTYPE_TO) { DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n", arg_sizes[i], DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin)); - Device.data_submit(TgtPtrBegin, HstPtrBegin, MapSize); - + int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, MapSize); + if (rt != OFFLOAD_SUCCESS) { + DP("Copying data to device failed.\n"); + return OFFLOAD_FAIL; + } uintptr_t lb = (uintptr_t) HstPtrBegin; uintptr_t ub = (uintptr_t) HstPtrBegin + MapSize; Device.ShadowMtx.lock(); @@ -512,12 +542,18 @@ DP("Restoring original target pointer value " DPxMOD " for target " "pointer " DPxMOD "\n", DPxPTR(it->second.TgtPtrVal), DPxPTR(it->second.TgtPtrAddr)); - Device.data_submit(it->second.TgtPtrAddr, + rt = Device.data_submit(it->second.TgtPtrAddr, &it->second.TgtPtrVal, sizeof(void *)); + if (rt != OFFLOAD_SUCCESS) { + DP("Copying data to device failed.\n"); + Device.ShadowMtx.unlock(); + return OFFLOAD_FAIL; + } } Device.ShadowMtx.unlock(); } } + return OFFLOAD_SUCCESS; } /// performs the same actions as data_begin in case arg_num is @@ -585,12 +621,8 @@ // Move data to device. int rc = target_data_begin(Device, arg_num, args_base, args, arg_sizes, arg_types); - if (rc != OFFLOAD_SUCCESS) { - DP("Call to target_data_begin failed, skipping target execution.\n"); - // Call target_data_end to dealloc whatever target_data_begin allocated - // and return OFFLOAD_FAIL. - target_data_end(Device, arg_num, args_base, args, arg_sizes, arg_types); + DP("Call to target_data_begin failed, abort target.\n"); return OFFLOAD_FAIL; } @@ -620,30 +652,28 @@ TgtPtrBegin = Device.RTL->data_alloc(Device.RTLDeviceID, arg_sizes[i], HstPtrBegin); if (!TgtPtrBegin) { - DP ("Data allocation for %sprivate array " DPxMOD " failed\n", + DP ("Data allocation for %sprivate array " DPxMOD " failed, " + "abort target.\n", (arg_types[i] & OMP_TGT_MAPTYPE_TO ? "first-" : ""), DPxPTR(HstPtrBegin)); - rc = OFFLOAD_FAIL; - break; - } else { - fpArrays.push_back(TgtPtrBegin); - TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin; + return OFFLOAD_FAIL; + } + fpArrays.push_back(TgtPtrBegin); + TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin; #ifdef OMPTARGET_DEBUG - void *TgtPtrBase = (void *)((intptr_t)TgtPtrBegin + TgtBaseOffset); - DP("Allocated %" PRId64 " bytes of target memory at " DPxMOD " for " - "%sprivate array " DPxMOD " - pushing target argument " DPxMOD "\n", - arg_sizes[i], DPxPTR(TgtPtrBegin), - (arg_types[i] & OMP_TGT_MAPTYPE_TO ? "first-" : ""), - DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBase)); + void *TgtPtrBase = (void *)((intptr_t)TgtPtrBegin + TgtBaseOffset); + DP("Allocated %" PRId64 " bytes of target memory at " DPxMOD " for " + "%sprivate array " DPxMOD " - pushing target argument " DPxMOD "\n", + arg_sizes[i], DPxPTR(TgtPtrBegin), + (arg_types[i] & OMP_TGT_MAPTYPE_TO ? "first-" : ""), + DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBase)); #endif - // If first-private, copy data from host - if (arg_types[i] & OMP_TGT_MAPTYPE_TO) { - int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, arg_sizes[i]); - if (rt != OFFLOAD_SUCCESS) { - DP ("Copying data to device failed.\n"); - rc = OFFLOAD_FAIL; - break; - } + // If first-private, copy data from host + if (arg_types[i] & OMP_TGT_MAPTYPE_TO) { + int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, arg_sizes[i]); + if (rt != OFFLOAD_SUCCESS) { + DP ("Copying data to device failed, failed.\n"); + return OFFLOAD_FAIL; } } } else if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) { @@ -675,21 +705,20 @@ Device.loopTripCnt = 0; // Launch device execution. - if (rc == OFFLOAD_SUCCESS) { - DP("Launching target execution %s with pointer " DPxMOD " (index=%d).\n", - TargetTable->EntriesBegin[TM->Index].name, - DPxPTR(TargetTable->EntriesBegin[TM->Index].addr), TM->Index); - if (IsTeamConstruct) { - rc = Device.run_team_region(TargetTable->EntriesBegin[TM->Index].addr, - &tgt_args[0], &tgt_offsets[0], tgt_args.size(), team_num, - thread_limit, ltc); - } else { - rc = Device.run_region(TargetTable->EntriesBegin[TM->Index].addr, - &tgt_args[0], &tgt_offsets[0], tgt_args.size()); - } + DP("Launching target execution %s with pointer " DPxMOD " (index=%d).\n", + TargetTable->EntriesBegin[TM->Index].name, + DPxPTR(TargetTable->EntriesBegin[TM->Index].addr), TM->Index); + if (IsTeamConstruct) { + rc = Device.run_team_region(TargetTable->EntriesBegin[TM->Index].addr, + &tgt_args[0], &tgt_offsets[0], tgt_args.size(), team_num, + thread_limit, ltc); } else { - DP("Errors occurred while obtaining target arguments, skipping kernel " - "execution\n"); + rc = Device.run_region(TargetTable->EntriesBegin[TM->Index].addr, + &tgt_args[0], &tgt_offsets[0], tgt_args.size()); + } + if (rc != OFFLOAD_SUCCESS) { + DP ("Executing target region abort target.\n"); + return OFFLOAD_FAIL; } // Deallocate (first-)private arrays @@ -697,18 +726,17 @@ int rt = Device.RTL->data_delete(Device.RTLDeviceID, it); if (rt != OFFLOAD_SUCCESS) { DP("Deallocation of (first-)private arrays failed.\n"); - rc = OFFLOAD_FAIL; + return OFFLOAD_FAIL; } } // Move data from device. int rt = target_data_end(Device, arg_num, args_base, args, arg_sizes, arg_types); - if (rt != OFFLOAD_SUCCESS) { - DP("Call to target_data_end failed.\n"); - rc = OFFLOAD_FAIL; + DP("Call to target_data_end failed, abort targe.\n"); + return OFFLOAD_FAIL; } - return rc; + return OFFLOAD_SUCCESS; } Index: libomptarget/src/private.h =================================================================== --- libomptarget/src/private.h +++ libomptarget/src/private.h @@ -24,7 +24,7 @@ extern int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); -extern void target_data_update(DeviceTy &Device, int32_t arg_num, +extern int target_data_update(DeviceTy &Device, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); extern int target(int64_t device_id, void *host_ptr, int32_t arg_num, @@ -33,16 +33,29 @@ extern int CheckDeviceAndCtors(int64_t device_id); +// enum for OMP_TARGET_OFFLOAD; keep in sync with kmp.h definition +enum kmp_target_offload_kind { + tgt_disabled = 0, + tgt_default = 1, + tgt_mandatory = 2 +}; +typedef enum kmp_target_offload_kind kmp_target_offload_kind_t; +extern kmp_target_offload_kind_t TargetOffloadPolicy; + // Implemented in libomp, they are called from within __tgt_* functions. #ifdef __cplusplus extern "C" { #endif +// functions that extract info from libomp; keep in sync int omp_get_default_device(void) __attribute__((weak)); int32_t __kmpc_omp_taskwait(void *loc_ref, int32_t gtid) __attribute__((weak)); +int __kmpc_get_target_offload(void) __attribute__((weak)); #ifdef __cplusplus } #endif +void FatalMessage(const int errorNum, const char *fmt, ...); + #ifdef OMPTARGET_DEBUG extern int DebugLevel; Index: libomptarget/src/rtl.cpp =================================================================== --- libomptarget/src/rtl.cpp +++ libomptarget/src/rtl.cpp @@ -46,9 +46,8 @@ #endif // OMPTARGET_DEBUG // Parse environment variable OMP_TARGET_OFFLOAD (if set) - char *envStr = getenv("OMP_TARGET_OFFLOAD"); - if (envStr && !strcmp(envStr, "DISABLED")) { - DP("Target offloading disabled by environment\n"); + TargetOffloadPolicy = (kmp_target_offload_kind_t) __kmpc_get_target_offload(); + if (TargetOffloadPolicy == tgt_disabled) { return; } @@ -216,7 +215,6 @@ if (!R.isUsed) { // Initialize the device information for the RTL we are about to use. DeviceTy device(&R); - size_t start = Devices.size(); Devices.resize(start + R.NumberOfDevices, device); for (int32_t device_id = 0; device_id < R.NumberOfDevices;