Changeset View
Changeset View
Standalone View
Standalone View
openmp/libomptarget/src/omptarget.cpp
Show First 20 Lines • Show All 209 Lines • ▼ Show 20 Lines | int CheckDeviceAndCtors(int64_t device_id) { | ||||
return OFFLOAD_SUCCESS; | return OFFLOAD_SUCCESS; | ||||
} | } | ||||
static int32_t member_of(int64_t type) { | static int32_t member_of(int64_t type) { | ||||
return ((type & OMP_TGT_MAPTYPE_MEMBER_OF) >> 48) - 1; | return ((type & OMP_TGT_MAPTYPE_MEMBER_OF) >> 48) - 1; | ||||
} | } | ||||
/// Internal function to do the mapping and transfer the data to the device | /// Internal function to do the mapping and transfer the data to the device | ||||
int target_data_begin(DeviceTy &Device, int32_t arg_num, | int target_data_begin(DeviceTy &Device, int32_t arg_num, void **args_base, | ||||
void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { | void **args, int64_t *arg_sizes, int64_t *arg_types, | ||||
__tgt_async_info *async_info_ptr) { | |||||
// process each input. | // process each input. | ||||
for (int32_t i = 0; i < arg_num; ++i) { | for (int32_t i = 0; i < arg_num; ++i) { | ||||
// Ignore private variables and arrays - there is no mapping for them. | // Ignore private variables and arrays - there is no mapping for them. | ||||
if ((arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) || | if ((arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) || | ||||
(arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE)) | (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE)) | ||||
continue; | continue; | ||||
void *HstPtrBegin = args[i]; | void *HstPtrBegin = args[i]; | ||||
▲ Show 20 Lines • Show All 83 Lines • ▼ Show 20 Lines | if (arg_types[i] & OMP_TGT_MAPTYPE_TO) { | ||||
if (parent_rc == 1) { | if (parent_rc == 1) { | ||||
copy = true; | copy = true; | ||||
} | } | ||||
} | } | ||||
} | } | ||||
if (copy && !IsHostPtr) { | if (copy && !IsHostPtr) { | ||||
DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n", | DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n", | ||||
data_size, DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin)); | data_size, DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin)); | ||||
int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, data_size); | int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, data_size, | ||||
async_info_ptr); | |||||
if (rt != OFFLOAD_SUCCESS) { | if (rt != OFFLOAD_SUCCESS) { | ||||
DP("Copying data to device failed.\n"); | DP("Copying data to device failed.\n"); | ||||
return OFFLOAD_FAIL; | return OFFLOAD_FAIL; | ||||
} | } | ||||
} | } | ||||
} | } | ||||
if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ && !IsHostPtr) { | if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ && !IsHostPtr) { | ||||
DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n", | DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n", | ||||
DPxPTR(Pointer_TgtPtrBegin), DPxPTR(TgtPtrBegin)); | DPxPTR(Pointer_TgtPtrBegin), DPxPTR(TgtPtrBegin)); | ||||
uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase; | uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase; | ||||
void *TgtPtrBase = (void *)((uint64_t)TgtPtrBegin - Delta); | void *TgtPtrBase = (void *)((uint64_t)TgtPtrBegin - Delta); | ||||
int rt = Device.data_submit(Pointer_TgtPtrBegin, &TgtPtrBase, | int rt = Device.data_submit(Pointer_TgtPtrBegin, &TgtPtrBase, | ||||
sizeof(void *)); | sizeof(void *), async_info_ptr); | ||||
if (rt != OFFLOAD_SUCCESS) { | if (rt != OFFLOAD_SUCCESS) { | ||||
DP("Copying data to device failed.\n"); | DP("Copying data to device failed.\n"); | ||||
return OFFLOAD_FAIL; | return OFFLOAD_FAIL; | ||||
} | } | ||||
// create shadow pointers for this entry | // create shadow pointers for this entry | ||||
Device.ShadowMtx.lock(); | Device.ShadowMtx.lock(); | ||||
Device.ShadowPtrMap[Pointer_HstPtrBegin] = {HstPtrBase, | Device.ShadowPtrMap[Pointer_HstPtrBegin] = {HstPtrBase, | ||||
Pointer_TgtPtrBegin, TgtPtrBase}; | Pointer_TgtPtrBegin, TgtPtrBase}; | ||||
Device.ShadowMtx.unlock(); | Device.ShadowMtx.unlock(); | ||||
} | } | ||||
} | } | ||||
return OFFLOAD_SUCCESS; | return OFFLOAD_SUCCESS; | ||||
} | } | ||||
/// Internal function to undo the mapping and retrieve the data from the device. | /// Internal function to undo the mapping and retrieve the data from the device. | ||||
int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base, | int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base, | ||||
void **args, int64_t *arg_sizes, int64_t *arg_types) { | void **args, int64_t *arg_sizes, int64_t *arg_types, | ||||
__tgt_async_info *async_info_ptr) { | |||||
// process each input. | // process each input. | ||||
for (int32_t i = arg_num - 1; i >= 0; --i) { | for (int32_t i = arg_num - 1; i >= 0; --i) { | ||||
// Ignore private variables and arrays - there is no mapping for them. | // Ignore private variables and arrays - there is no mapping for them. | ||||
// Also, ignore the use_device_ptr directive, it has no effect here. | // Also, ignore the use_device_ptr directive, it has no effect here. | ||||
if ((arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) || | if ((arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) || | ||||
(arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE)) | (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE)) | ||||
continue; | continue; | ||||
▲ Show 20 Lines • Show All 53 Lines • ▼ Show 20 Lines | if ((arg_types[i] & OMP_TGT_MAPTYPE_FROM) || DelEntry) { | ||||
} | } | ||||
} | } | ||||
} | } | ||||
if ((DelEntry || Always || CopyMember) && | if ((DelEntry || Always || CopyMember) && | ||||
!(RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && | !(RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && | ||||
TgtPtrBegin == HstPtrBegin)) { | TgtPtrBegin == HstPtrBegin)) { | ||||
DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", | DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", | ||||
data_size, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); | data_size, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); | ||||
int rt = Device.data_retrieve(HstPtrBegin, TgtPtrBegin, data_size); | int rt = Device.data_retrieve(HstPtrBegin, TgtPtrBegin, data_size, | ||||
async_info_ptr); | |||||
if (rt != OFFLOAD_SUCCESS) { | if (rt != OFFLOAD_SUCCESS) { | ||||
DP("Copying data from device failed.\n"); | DP("Copying data from device failed.\n"); | ||||
return OFFLOAD_FAIL; | return OFFLOAD_FAIL; | ||||
} | } | ||||
} | } | ||||
} | } | ||||
// If we copied back to the host a struct/array containing pointers, we | // If we copied back to the host a struct/array containing pointers, we | ||||
▲ Show 20 Lines • Show All 72 Lines • ▼ Show 20 Lines | if (RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && | ||||
DP("hst data:" DPxMOD " unified and shared, becomes a noop\n", | DP("hst data:" DPxMOD " unified and shared, becomes a noop\n", | ||||
DPxPTR(HstPtrBegin)); | DPxPTR(HstPtrBegin)); | ||||
continue; | continue; | ||||
} | } | ||||
if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) { | if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) { | ||||
DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", | DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", | ||||
arg_sizes[i], DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); | arg_sizes[i], DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); | ||||
int rt = Device.data_retrieve(HstPtrBegin, TgtPtrBegin, MapSize); | int rt = Device.data_retrieve(HstPtrBegin, TgtPtrBegin, MapSize, nullptr); | ||||
if (rt != OFFLOAD_SUCCESS) { | if (rt != OFFLOAD_SUCCESS) { | ||||
DP("Copying data from device failed.\n"); | DP("Copying data from device failed.\n"); | ||||
return OFFLOAD_FAIL; | return OFFLOAD_FAIL; | ||||
} | } | ||||
uintptr_t lb = (uintptr_t) HstPtrBegin; | uintptr_t lb = (uintptr_t) HstPtrBegin; | ||||
uintptr_t ub = (uintptr_t) HstPtrBegin + MapSize; | uintptr_t ub = (uintptr_t) HstPtrBegin + MapSize; | ||||
Device.ShadowMtx.lock(); | Device.ShadowMtx.lock(); | ||||
Show All 10 Lines | if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) { | ||||
*ShadowHstPtrAddr = it->second.HstPtrVal; | *ShadowHstPtrAddr = it->second.HstPtrVal; | ||||
} | } | ||||
Device.ShadowMtx.unlock(); | Device.ShadowMtx.unlock(); | ||||
} | } | ||||
if (arg_types[i] & OMP_TGT_MAPTYPE_TO) { | if (arg_types[i] & OMP_TGT_MAPTYPE_TO) { | ||||
DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n", | DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n", | ||||
arg_sizes[i], DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin)); | arg_sizes[i], DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin)); | ||||
int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, MapSize); | int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, MapSize, nullptr); | ||||
if (rt != OFFLOAD_SUCCESS) { | if (rt != OFFLOAD_SUCCESS) { | ||||
DP("Copying data to device failed.\n"); | DP("Copying data to device failed.\n"); | ||||
return OFFLOAD_FAIL; | return OFFLOAD_FAIL; | ||||
} | } | ||||
uintptr_t lb = (uintptr_t) HstPtrBegin; | uintptr_t lb = (uintptr_t) HstPtrBegin; | ||||
uintptr_t ub = (uintptr_t) HstPtrBegin + MapSize; | uintptr_t ub = (uintptr_t) HstPtrBegin + MapSize; | ||||
Device.ShadowMtx.lock(); | Device.ShadowMtx.lock(); | ||||
for (ShadowPtrListTy::iterator it = Device.ShadowPtrMap.begin(); | for (ShadowPtrListTy::iterator it = Device.ShadowPtrMap.begin(); | ||||
it != Device.ShadowPtrMap.end(); ++it) { | it != Device.ShadowPtrMap.end(); ++it) { | ||||
void **ShadowHstPtrAddr = (void**) it->first; | void **ShadowHstPtrAddr = (void**) it->first; | ||||
if ((uintptr_t) ShadowHstPtrAddr < lb) | if ((uintptr_t) ShadowHstPtrAddr < lb) | ||||
continue; | continue; | ||||
if ((uintptr_t) ShadowHstPtrAddr >= ub) | if ((uintptr_t) ShadowHstPtrAddr >= ub) | ||||
break; | break; | ||||
DP("Restoring original target pointer value " DPxMOD " for target " | DP("Restoring original target pointer value " DPxMOD " for target " | ||||
"pointer " DPxMOD "\n", DPxPTR(it->second.TgtPtrVal), | "pointer " DPxMOD "\n", DPxPTR(it->second.TgtPtrVal), | ||||
DPxPTR(it->second.TgtPtrAddr)); | DPxPTR(it->second.TgtPtrAddr)); | ||||
rt = Device.data_submit(it->second.TgtPtrAddr, | rt = Device.data_submit(it->second.TgtPtrAddr, | ||||
&it->second.TgtPtrVal, sizeof(void *)); | &it->second.TgtPtrVal, sizeof(void *), nullptr); | ||||
if (rt != OFFLOAD_SUCCESS) { | if (rt != OFFLOAD_SUCCESS) { | ||||
DP("Copying data to device failed.\n"); | DP("Copying data to device failed.\n"); | ||||
Device.ShadowMtx.unlock(); | Device.ShadowMtx.unlock(); | ||||
return OFFLOAD_FAIL; | return OFFLOAD_FAIL; | ||||
} | } | ||||
} | } | ||||
Device.ShadowMtx.unlock(); | Device.ShadowMtx.unlock(); | ||||
} | } | ||||
▲ Show 20 Lines • Show All 65 Lines • ▼ Show 20 Lines | int target(int64_t device_id, void *host_ptr, int32_t arg_num, | ||||
// get target table. | // get target table. | ||||
TrlTblMtx->lock(); | TrlTblMtx->lock(); | ||||
assert(TM->Table->TargetsTable.size() > (size_t)device_id && | assert(TM->Table->TargetsTable.size() > (size_t)device_id && | ||||
"Not expecting a device ID outside the table's bounds!"); | "Not expecting a device ID outside the table's bounds!"); | ||||
__tgt_target_table *TargetTable = TM->Table->TargetsTable[device_id]; | __tgt_target_table *TargetTable = TM->Table->TargetsTable[device_id]; | ||||
TrlTblMtx->unlock(); | TrlTblMtx->unlock(); | ||||
assert(TargetTable && "Global data has not been mapped\n"); | assert(TargetTable && "Global data has not been mapped\n"); | ||||
__tgt_async_info AsyncInfo; | |||||
jdoerfert: Move this to the struct definition. It's C++ after all. | |||||
Yep. Will do that. tianshilei1992: Yep. Will do that. | |||||
// Move data to device. | // Move data to device. | ||||
int rc = target_data_begin(Device, arg_num, args_base, args, arg_sizes, | int rc = target_data_begin(Device, arg_num, args_base, args, arg_sizes, | ||||
arg_types); | arg_types, &AsyncInfo); | ||||
if (rc != OFFLOAD_SUCCESS) { | if (rc != OFFLOAD_SUCCESS) { | ||||
DP("Call to target_data_begin failed, abort target.\n"); | DP("Call to target_data_begin failed, abort target.\n"); | ||||
return OFFLOAD_FAIL; | return OFFLOAD_FAIL; | ||||
} | } | ||||
std::vector<void *> tgt_args; | std::vector<void *> tgt_args; | ||||
std::vector<ptrdiff_t> tgt_offsets; | std::vector<ptrdiff_t> tgt_offsets; | ||||
Show All 34 Lines | if (!(arg_types[i] & OMP_TGT_MAPTYPE_TARGET_PARAM)) { | ||||
TgtPtrBegin == HstPtrBegin) { | TgtPtrBegin == HstPtrBegin) { | ||||
DP("Unified memory is active, no need to map lambda captured" | DP("Unified memory is active, no need to map lambda captured" | ||||
"variable (" DPxMOD ")\n", DPxPTR(HstPtrVal)); | "variable (" DPxMOD ")\n", DPxPTR(HstPtrVal)); | ||||
continue; | continue; | ||||
} | } | ||||
DP("Update lambda reference (" DPxMOD ") -> [" DPxMOD "]\n", | DP("Update lambda reference (" DPxMOD ") -> [" DPxMOD "]\n", | ||||
DPxPTR(Pointer_TgtPtrBegin), DPxPTR(TgtPtrBegin)); | DPxPTR(Pointer_TgtPtrBegin), DPxPTR(TgtPtrBegin)); | ||||
int rt = Device.data_submit(TgtPtrBegin, &Pointer_TgtPtrBegin, | int rt = Device.data_submit(TgtPtrBegin, &Pointer_TgtPtrBegin, | ||||
sizeof(void *)); | sizeof(void *), &AsyncInfo); | ||||
if (rt != OFFLOAD_SUCCESS) { | if (rt != OFFLOAD_SUCCESS) { | ||||
DP("Copying data to device failed.\n"); | DP("Copying data to device failed.\n"); | ||||
return OFFLOAD_FAIL; | return OFFLOAD_FAIL; | ||||
} | } | ||||
} | } | ||||
continue; | continue; | ||||
} | } | ||||
void *HstPtrBegin = args[i]; | void *HstPtrBegin = args[i]; | ||||
Show All 24 Lines | #ifdef OMPTARGET_DEBUG | ||||
DP("Allocated %" PRId64 " bytes of target memory at " DPxMOD " for " | DP("Allocated %" PRId64 " bytes of target memory at " DPxMOD " for " | ||||
"%sprivate array " DPxMOD " - pushing target argument " DPxMOD "\n", | "%sprivate array " DPxMOD " - pushing target argument " DPxMOD "\n", | ||||
arg_sizes[i], DPxPTR(TgtPtrBegin), | arg_sizes[i], DPxPTR(TgtPtrBegin), | ||||
(arg_types[i] & OMP_TGT_MAPTYPE_TO ? "first-" : ""), | (arg_types[i] & OMP_TGT_MAPTYPE_TO ? "first-" : ""), | ||||
DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBase)); | DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBase)); | ||||
#endif | #endif | ||||
// If first-private, copy data from host | // If first-private, copy data from host | ||||
if (arg_types[i] & OMP_TGT_MAPTYPE_TO) { | if (arg_types[i] & OMP_TGT_MAPTYPE_TO) { | ||||
int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, arg_sizes[i]); | int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, arg_sizes[i], | ||||
&AsyncInfo); | |||||
if (rt != OFFLOAD_SUCCESS) { | if (rt != OFFLOAD_SUCCESS) { | ||||
DP ("Copying data to device failed, failed.\n"); | DP("Copying data to device failed, failed.\n"); | ||||
return OFFLOAD_FAIL; | return OFFLOAD_FAIL; | ||||
} | } | ||||
} | } | ||||
} else if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) { | } else if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) { | ||||
TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBase, sizeof(void *), IsLast, | TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBase, sizeof(void *), IsLast, | ||||
false, IsHostPtr); | false, IsHostPtr); | ||||
TgtBaseOffset = 0; // no offset for ptrs. | TgtBaseOffset = 0; // no offset for ptrs. | ||||
DP("Obtained target argument " DPxMOD " from host pointer " DPxMOD " to " | DP("Obtained target argument " DPxMOD " from host pointer " DPxMOD " to " | ||||
Show All 29 Lines | #endif | ||||
TblMapMtx->unlock(); | TblMapMtx->unlock(); | ||||
// Launch device execution. | // Launch device execution. | ||||
DP("Launching target execution %s with pointer " DPxMOD " (index=%d).\n", | DP("Launching target execution %s with pointer " DPxMOD " (index=%d).\n", | ||||
TargetTable->EntriesBegin[TM->Index].name, | TargetTable->EntriesBegin[TM->Index].name, | ||||
DPxPTR(TargetTable->EntriesBegin[TM->Index].addr), TM->Index); | DPxPTR(TargetTable->EntriesBegin[TM->Index].addr), TM->Index); | ||||
if (IsTeamConstruct) { | if (IsTeamConstruct) { | ||||
rc = Device.run_team_region(TargetTable->EntriesBegin[TM->Index].addr, | rc = Device.run_team_region(TargetTable->EntriesBegin[TM->Index].addr, | ||||
&tgt_args[0], &tgt_offsets[0], tgt_args.size(), team_num, | &tgt_args[0], &tgt_offsets[0], tgt_args.size(), | ||||
thread_limit, ltc); | team_num, thread_limit, ltc, &AsyncInfo); | ||||
} else { | } else { | ||||
rc = Device.run_region(TargetTable->EntriesBegin[TM->Index].addr, | rc = Device.run_region(TargetTable->EntriesBegin[TM->Index].addr, | ||||
&tgt_args[0], &tgt_offsets[0], tgt_args.size()); | &tgt_args[0], &tgt_offsets[0], tgt_args.size(), | ||||
&AsyncInfo); | |||||
} | } | ||||
if (rc != OFFLOAD_SUCCESS) { | if (rc != OFFLOAD_SUCCESS) { | ||||
DP ("Executing target region abort target.\n"); | DP ("Executing target region abort target.\n"); | ||||
return OFFLOAD_FAIL; | return OFFLOAD_FAIL; | ||||
} | } | ||||
// Deallocate (first-)private arrays | // Deallocate (first-)private arrays | ||||
for (auto it : fpArrays) { | for (auto it : fpArrays) { | ||||
int rt = Device.RTL->data_delete(Device.RTLDeviceID, it); | int rt = Device.RTL->data_delete(Device.RTLDeviceID, it); | ||||
if (rt != OFFLOAD_SUCCESS) { | if (rt != OFFLOAD_SUCCESS) { | ||||
DP("Deallocation of (first-)private arrays failed.\n"); | DP("Deallocation of (first-)private arrays failed.\n"); | ||||
return OFFLOAD_FAIL; | return OFFLOAD_FAIL; | ||||
} | } | ||||
} | } | ||||
// Move data from device. | // Move data from device. | ||||
int rt = target_data_end(Device, arg_num, args_base, args, arg_sizes, | int rt = target_data_end(Device, arg_num, args_base, args, arg_sizes, | ||||
arg_types); | arg_types, &AsyncInfo); | ||||
if (rt != OFFLOAD_SUCCESS) { | if (rt != OFFLOAD_SUCCESS) { | ||||
DP("Call to target_data_end failed, abort targe.\n"); | DP("Call to target_data_end failed, abort targe.\n"); | ||||
return OFFLOAD_FAIL; | return OFFLOAD_FAIL; | ||||
} | } | ||||
return OFFLOAD_SUCCESS; | return Device.RTL->synchronize(device_id, &AsyncInfo); | ||||
} | } |
Move this to the struct definition. It's C++ after all.