Index: libomptarget/include/omptarget.h =================================================================== --- libomptarget/include/omptarget.h +++ libomptarget/include/omptarget.h @@ -185,6 +185,13 @@ } #endif +/* + * To enable libomptarget debugging: + * 1) uncommend OMPTARGET_DEBUG #define + * 2) set LIBOMPTARGET_DEBUG environment variable before executing: + * eg. export LIBOMPTARGET_DEBUG=1 + */ +// #define OMPTARGET_DEBUG 1 #ifdef OMPTARGET_DEBUG #include #define DEBUGP(prefix, ...) \ Index: libomptarget/src/device.h =================================================================== --- libomptarget/src/device.h +++ libomptarget/src/device.h @@ -21,6 +21,14 @@ #include #include +// enum for OMP_TARGET_OFFLOAD; keep in sync with kmp.h definition +enum kmp_target_offload_kind { + tgt_disabled = 0, + tgt_default = 1, + tgt_mandatory = 2 +}; +typedef enum kmp_target_offload_kind kmp_target_offload_kind_t; + // Forward declarations. struct RTLInfoTy; struct __tgt_bin_desc; @@ -161,7 +169,8 @@ /// Map between Device ID (i.e. openmp device id) and its DeviceTy. typedef std::vector DevicesTy; extern DevicesTy Devices; - +extern kmp_target_offload_kind_t TargetOffloadPolicy; extern bool device_is_ready(int device_num); +extern void handle_target_outcome(bool success); #endif Index: libomptarget/src/device.cpp =================================================================== --- libomptarget/src/device.cpp +++ libomptarget/src/device.cpp @@ -21,6 +21,8 @@ /// Map between Device ID (i.e. openmp device id) and its DeviceTy. DevicesTy Devices; +// Store target policy (disabled, mandatory, default) +kmp_target_offload_kind_t TargetOffloadPolicy; int DeviceTy::associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size) { DataMapMtx.lock(); @@ -363,3 +365,26 @@ return true; } + +// manage the success or failure of a target constuct +void handle_target_outcome(bool success) +{ + switch (TargetOffloadPolicy) { + case tgt_disabled: + if (success) { + DP("failure of target construct when expecting to fail offloading"); + assert(! success); + } + break; + case tgt_default: + DP("should have determined default -> mandatory/disabled when registering code"); + assert(false); + break; + case tgt_mandatory: + if (! success) { + DP("failure of target construct when expecting to successfully offload"); + assert(success); + } + break; + } +} Index: libomptarget/src/interface.cpp =================================================================== --- libomptarget/src/interface.cpp +++ libomptarget/src/interface.cpp @@ -38,6 +38,8 @@ /// and passes the data to the device. EXTERN void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { + if (TargetOffloadPolicy == tgt_disabled) return; + DP("Entering data begin region for device %" PRId64 " with %d mappings\n", device_id, arg_num); @@ -49,6 +51,7 @@ if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) { DP("Failed to get device %" PRId64 " ready\n", device_id); + handle_target_outcome(false); return; } @@ -62,7 +65,9 @@ } #endif - target_data_begin(Device, arg_num, args_base, args, arg_sizes, arg_types); + int rc = target_data_begin(Device, arg_num, args_base, + args, arg_sizes, arg_types); + handle_target_outcome(rc == OFFLOAD_SUCCESS); } EXTERN void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num, @@ -81,6 +86,7 @@ /// created by the last __tgt_target_data_begin. EXTERN void __tgt_target_data_end(int64_t device_id, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { + if (TargetOffloadPolicy == tgt_disabled) return; DP("Entering data end region with %d mappings\n", arg_num); // No devices available? @@ -93,12 +99,14 @@ RTLsMtx.unlock(); if (Devices_size <= (size_t)device_id) { DP("Device ID %" PRId64 " does not have a matching RTL.\n", device_id); + handle_target_outcome(false); return; } DeviceTy &Device = Devices[device_id]; if (!Device.IsInit) { DP("Uninit device: ignore"); + handle_target_outcome(false); return; } @@ -110,7 +118,9 @@ } #endif - target_data_end(Device, arg_num, args_base, args, arg_sizes, arg_types); + int rc = target_data_end(Device, arg_num, args_base, + args, arg_sizes, arg_types); + handle_target_outcome(rc == OFFLOAD_SUCCESS); } EXTERN void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num, @@ -126,6 +136,7 @@ EXTERN void __tgt_target_data_update(int64_t device_id, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { + if (TargetOffloadPolicy == tgt_disabled) return; DP("Entering data update with %d mappings\n", arg_num); // No devices available? @@ -135,11 +146,14 @@ if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) { DP("Failed to get device %" PRId64 " ready\n", device_id); + handle_target_outcome(false); return; } DeviceTy& Device = Devices[device_id]; - target_data_update(Device, arg_num, args_base, args, arg_sizes, arg_types); + int rc = target_data_update(Device, arg_num, args_base, + args, arg_sizes, arg_types); + handle_target_outcome(rc == OFFLOAD_SUCCESS); } EXTERN void __tgt_target_data_update_nowait( @@ -155,6 +169,7 @@ EXTERN int __tgt_target(int64_t device_id, void *host_ptr, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { + if (TargetOffloadPolicy == tgt_disabled) return OFFLOAD_FAIL; DP("Entering target region with entry point " DPxMOD " and device Id %" PRId64 "\n", DPxPTR(host_ptr), device_id); @@ -164,6 +179,7 @@ if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) { DP("Failed to get device %" PRId64 " ready\n", device_id); + handle_target_outcome(false); return OFFLOAD_FAIL; } @@ -177,7 +193,7 @@ int rc = target(device_id, host_ptr, arg_num, args_base, args, arg_sizes, arg_types, 0, 0, false /*team*/); - + handle_target_outcome(rc == OFFLOAD_SUCCESS); return rc; } @@ -195,6 +211,7 @@ EXTERN int __tgt_target_teams(int64_t device_id, void *host_ptr, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, int32_t team_num, int32_t thread_limit) { + if (TargetOffloadPolicy == tgt_disabled) return OFFLOAD_FAIL; DP("Entering target region with entry point " DPxMOD " and device Id %" PRId64 "\n", DPxPTR(host_ptr), device_id); @@ -204,6 +221,7 @@ if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) { DP("Failed to get device %" PRId64 " ready\n", device_id); + handle_target_outcome(false); return OFFLOAD_FAIL; } @@ -217,6 +235,7 @@ int rc = target(device_id, host_ptr, arg_num, args_base, args, arg_sizes, arg_types, team_num, thread_limit, true /*team*/); + handle_target_outcome(rc == OFFLOAD_SUCCESS); return rc; } @@ -242,6 +261,7 @@ if (CheckDeviceAndCtors(device_id) != OFFLOAD_SUCCESS) { DP("Failed to get device %" PRId64 " ready\n", device_id); + handle_target_outcome(false); return; } Index: libomptarget/src/omptarget.cpp =================================================================== --- libomptarget/src/omptarget.cpp +++ libomptarget/src/omptarget.cpp @@ -212,7 +212,6 @@ int target_data_begin(DeviceTy &Device, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { // process each input. - int rc = OFFLOAD_SUCCESS; for (int32_t i = 0; i < arg_num; ++i) { // Ignore private variables and arrays - there is no mapping for them. if ((arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) || @@ -257,6 +256,7 @@ if (!Pointer_TgtPtrBegin) { DP("Call to getOrAllocTgtPtr returned null pointer (device failure or " "illegal mapping).\n"); + return OFFLOAD_FAIL; } DP("There are %zu bytes allocated at target address " DPxMOD " - is%s new" "\n", sizeof(void *), DPxPTR(Pointer_TgtPtrBegin), @@ -306,7 +306,7 @@ int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, data_size); if (rt != OFFLOAD_SUCCESS) { DP("Copying data to device failed.\n"); - rc = OFFLOAD_FAIL; + return OFFLOAD_FAIL; } } } @@ -320,7 +320,7 @@ sizeof(void *)); if (rt != OFFLOAD_SUCCESS) { DP("Copying data to device failed.\n"); - rc = OFFLOAD_FAIL; + return OFFLOAD_FAIL; } // create shadow pointers for this entry Device.ShadowMtx.lock(); @@ -330,13 +330,12 @@ } } - return rc; + return OFFLOAD_SUCCESS; } /// Internal function to undo the mapping and retrieve the data from the device. int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { - int rc = OFFLOAD_SUCCESS; // process each input. for (int32_t i = arg_num - 1; i >= 0; --i) { // Ignore private variables and arrays - there is no mapping for them. @@ -404,7 +403,7 @@ int rt = Device.data_retrieve(HstPtrBegin, TgtPtrBegin, data_size); if (rt != OFFLOAD_SUCCESS) { DP("Copying data from device failed.\n"); - rc = OFFLOAD_FAIL; + return OFFLOAD_FAIL; } } } @@ -447,17 +446,17 @@ int rt = Device.deallocTgtPtr(HstPtrBegin, data_size, ForceDelete); if (rt != OFFLOAD_SUCCESS) { DP("Deallocating data from device failed.\n"); - rc = OFFLOAD_FAIL; + return OFFLOAD_FAIL; } } } } - return rc; + return OFFLOAD_SUCCESS; } /// Internal function to pass data to/from the target. -void target_data_update(DeviceTy &Device, int32_t arg_num, +int target_data_update(DeviceTy &Device, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { // process each input. for (int32_t i = 0; i < arg_num; ++i) { @@ -470,11 +469,19 @@ bool IsLast; void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, MapSize, IsLast, false); + if (!TgtPtrBegin) { + DP("hst data:" DPxMOD " not found, becomes a noop\n", DPxPTR(HstPtrBegin)); + continue; + } if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) { DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", arg_sizes[i], DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); - Device.data_retrieve(HstPtrBegin, TgtPtrBegin, MapSize); + int rt = Device.data_retrieve(HstPtrBegin, TgtPtrBegin, MapSize); + if (rt != OFFLOAD_SUCCESS) { + DP("Copying data from device failed.\n"); + return OFFLOAD_FAIL; + } uintptr_t lb = (uintptr_t) HstPtrBegin; uintptr_t ub = (uintptr_t) HstPtrBegin + MapSize; @@ -497,8 +504,11 @@ if (arg_types[i] & OMP_TGT_MAPTYPE_TO) { DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n", arg_sizes[i], DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin)); - Device.data_submit(TgtPtrBegin, HstPtrBegin, MapSize); - + int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, MapSize); + if (rt != OFFLOAD_SUCCESS) { + DP("Copying data to device failed.\n"); + return OFFLOAD_FAIL; + } uintptr_t lb = (uintptr_t) HstPtrBegin; uintptr_t ub = (uintptr_t) HstPtrBegin + MapSize; Device.ShadowMtx.lock(); @@ -512,12 +522,18 @@ DP("Restoring original target pointer value " DPxMOD " for target " "pointer " DPxMOD "\n", DPxPTR(it->second.TgtPtrVal), DPxPTR(it->second.TgtPtrAddr)); - Device.data_submit(it->second.TgtPtrAddr, + rt = Device.data_submit(it->second.TgtPtrAddr, &it->second.TgtPtrVal, sizeof(void *)); + if (rt != OFFLOAD_SUCCESS) { + DP("Copying data to device failed.\n"); + Device.ShadowMtx.unlock(); + return OFFLOAD_FAIL; + } } Device.ShadowMtx.unlock(); } } + return OFFLOAD_SUCCESS; } /// performs the same actions as data_begin in case arg_num is @@ -585,12 +601,8 @@ // Move data to device. int rc = target_data_begin(Device, arg_num, args_base, args, arg_sizes, arg_types); - if (rc != OFFLOAD_SUCCESS) { - DP("Call to target_data_begin failed, skipping target execution.\n"); - // Call target_data_end to dealloc whatever target_data_begin allocated - // and return OFFLOAD_FAIL. - target_data_end(Device, arg_num, args_base, args, arg_sizes, arg_types); + DP("Call to target_data_begin failed, abort target.\n"); return OFFLOAD_FAIL; } @@ -620,30 +632,28 @@ TgtPtrBegin = Device.RTL->data_alloc(Device.RTLDeviceID, arg_sizes[i], HstPtrBegin); if (!TgtPtrBegin) { - DP ("Data allocation for %sprivate array " DPxMOD " failed\n", + DP ("Data allocation for %sprivate array " DPxMOD " failed, " + "abort target.\n", (arg_types[i] & OMP_TGT_MAPTYPE_TO ? "first-" : ""), DPxPTR(HstPtrBegin)); - rc = OFFLOAD_FAIL; - break; - } else { - fpArrays.push_back(TgtPtrBegin); - TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin; + return OFFLOAD_FAIL; + } + fpArrays.push_back(TgtPtrBegin); + TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin; #ifdef OMPTARGET_DEBUG - void *TgtPtrBase = (void *)((intptr_t)TgtPtrBegin + TgtBaseOffset); - DP("Allocated %" PRId64 " bytes of target memory at " DPxMOD " for " - "%sprivate array " DPxMOD " - pushing target argument " DPxMOD "\n", - arg_sizes[i], DPxPTR(TgtPtrBegin), - (arg_types[i] & OMP_TGT_MAPTYPE_TO ? "first-" : ""), - DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBase)); + void *TgtPtrBase = (void *)((intptr_t)TgtPtrBegin + TgtBaseOffset); + DP("Allocated %" PRId64 " bytes of target memory at " DPxMOD " for " + "%sprivate array " DPxMOD " - pushing target argument " DPxMOD "\n", + arg_sizes[i], DPxPTR(TgtPtrBegin), + (arg_types[i] & OMP_TGT_MAPTYPE_TO ? "first-" : ""), + DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBase)); #endif - // If first-private, copy data from host - if (arg_types[i] & OMP_TGT_MAPTYPE_TO) { - int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, arg_sizes[i]); - if (rt != OFFLOAD_SUCCESS) { - DP ("Copying data to device failed.\n"); - rc = OFFLOAD_FAIL; - break; - } + // If first-private, copy data from host + if (arg_types[i] & OMP_TGT_MAPTYPE_TO) { + int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, arg_sizes[i]); + if (rt != OFFLOAD_SUCCESS) { + DP ("Copying data to device failed, failed.\n"); + return OFFLOAD_FAIL; } } } else if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) { @@ -675,21 +685,20 @@ Device.loopTripCnt = 0; // Launch device execution. - if (rc == OFFLOAD_SUCCESS) { - DP("Launching target execution %s with pointer " DPxMOD " (index=%d).\n", - TargetTable->EntriesBegin[TM->Index].name, - DPxPTR(TargetTable->EntriesBegin[TM->Index].addr), TM->Index); - if (IsTeamConstruct) { - rc = Device.run_team_region(TargetTable->EntriesBegin[TM->Index].addr, - &tgt_args[0], &tgt_offsets[0], tgt_args.size(), team_num, - thread_limit, ltc); - } else { - rc = Device.run_region(TargetTable->EntriesBegin[TM->Index].addr, - &tgt_args[0], &tgt_offsets[0], tgt_args.size()); - } + DP("Launching target execution %s with pointer " DPxMOD " (index=%d).\n", + TargetTable->EntriesBegin[TM->Index].name, + DPxPTR(TargetTable->EntriesBegin[TM->Index].addr), TM->Index); + if (IsTeamConstruct) { + rc = Device.run_team_region(TargetTable->EntriesBegin[TM->Index].addr, + &tgt_args[0], &tgt_offsets[0], tgt_args.size(), team_num, + thread_limit, ltc); } else { - DP("Errors occurred while obtaining target arguments, skipping kernel " - "execution\n"); + rc = Device.run_region(TargetTable->EntriesBegin[TM->Index].addr, + &tgt_args[0], &tgt_offsets[0], tgt_args.size()); + } + if (rc != OFFLOAD_SUCCESS) { + DP ("Executing target region abort target.\n"); + return OFFLOAD_FAIL; } // Deallocate (first-)private arrays @@ -697,18 +706,17 @@ int rt = Device.RTL->data_delete(Device.RTLDeviceID, it); if (rt != OFFLOAD_SUCCESS) { DP("Deallocation of (first-)private arrays failed.\n"); - rc = OFFLOAD_FAIL; + return OFFLOAD_FAIL; } } // Move data from device. int rt = target_data_end(Device, arg_num, args_base, args, arg_sizes, arg_types); - if (rt != OFFLOAD_SUCCESS) { - DP("Call to target_data_end failed.\n"); - rc = OFFLOAD_FAIL; + DP("Call to target_data_end failed, abort targe.\n"); + return OFFLOAD_FAIL; } - return rc; + return OFFLOAD_SUCCESS; } Index: libomptarget/src/private.h =================================================================== --- libomptarget/src/private.h +++ libomptarget/src/private.h @@ -24,7 +24,7 @@ extern int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); -extern void target_data_update(DeviceTy &Device, int32_t arg_num, +extern int target_data_update(DeviceTy &Device, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); extern int target(int64_t device_id, void *host_ptr, int32_t arg_num, @@ -37,12 +37,15 @@ #ifdef __cplusplus extern "C" { #endif +// functions that extract info from libomp; keep in sync int omp_get_default_device(void) __attribute__((weak)); int32_t __kmpc_omp_taskwait(void *loc_ref, int32_t gtid) __attribute__((weak)); +int __kmpc_get_target_offload(void) __attribute__((weak)); #ifdef __cplusplus } #endif + #ifdef OMPTARGET_DEBUG extern int DebugLevel; Index: libomptarget/src/rtl.cpp =================================================================== --- libomptarget/src/rtl.cpp +++ libomptarget/src/rtl.cpp @@ -46,9 +46,8 @@ #endif // OMPTARGET_DEBUG // Parse environment variable OMP_TARGET_OFFLOAD (if set) - char *envStr = getenv("OMP_TARGET_OFFLOAD"); - if (envStr && !strcmp(envStr, "DISABLED")) { - DP("Target offloading disabled by environment\n"); + TargetOffloadPolicy = (kmp_target_offload_kind_t) __kmpc_get_target_offload(); + if (TargetOffloadPolicy == tgt_disabled) { return; } @@ -216,7 +215,17 @@ if (!R.isUsed) { // Initialize the device information for the RTL we are about to use. DeviceTy device(&R); - + if (TargetOffloadPolicy == tgt_default) { + if (R.NumberOfDevices > 0) { + DP("Default TARGET OFFLOAD policy is now mandatory " + "(devicew were found)\n"); + TargetOffloadPolicy = tgt_mandatory; + } else { + DP("Default TARGET OFFLOAD policy is now disabled " + "(devices were not found)\n"); + TargetOffloadPolicy = tgt_disabled; + } + } size_t start = Devices.size(); Devices.resize(start + R.NumberOfDevices, device); for (int32_t device_id = 0; device_id < R.NumberOfDevices;