diff --git a/openmp/libomptarget/include/omptarget.h b/openmp/libomptarget/include/omptarget.h --- a/openmp/libomptarget/include/omptarget.h +++ b/openmp/libomptarget/include/omptarget.h @@ -49,6 +49,8 @@ OMP_TGT_MAPTYPE_IMPLICIT = 0x200, // copy data to device OMP_TGT_MAPTYPE_CLOSE = 0x400, + // descriptor for non-contiguous target-update + OMP_TGT_MAPTYPE_DESCRIPTOR = 0x100000000000, // member of struct, member given by [16 MSBs] - 1 OMP_TGT_MAPTYPE_MEMBER_OF = 0xffff000000000000 }; @@ -120,6 +122,14 @@ void *Queue = nullptr; }; + +/// This struct is a record of non-contiguous information +struct __tgt_target_non_contig { + int64_t offset; + int64_t count; + int64_t stride; +}; + #ifdef __cplusplus extern "C" { #endif diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -485,92 +485,155 @@ return OFFLOAD_SUCCESS; } -/// Internal function to pass data to/from the target. -int target_data_update(DeviceTy &Device, int32_t arg_num, - void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { - // process each input. - for (int32_t i = 0; i < arg_num; ++i) { - if ((arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) || - (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE)) - continue; +static int target_data_contiguous(DeviceTy &Device, void *args_base, + void *HstPtrBegin, int64_t MapSize, + int64_t arg_type) { + bool IsLast, IsHostPtr; + void *TgtPtrBegin = + Device.getTgtPtrBegin(HstPtrBegin, MapSize, IsLast, false, IsHostPtr); + if (!TgtPtrBegin) { + DP("hst data:" DPxMOD " not found, becomes a noop\n", DPxPTR(HstPtrBegin)); + return OFFLOAD_SUCCESS; + } - void *HstPtrBegin = args[i]; - int64_t MapSize = arg_sizes[i]; - bool IsLast, IsHostPtr; - void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, MapSize, IsLast, - false, IsHostPtr); - if (!TgtPtrBegin) { - DP("hst data:" DPxMOD " not found, becomes a noop\n", DPxPTR(HstPtrBegin)); - continue; - } + if (RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && + TgtPtrBegin == HstPtrBegin) { + DP("hst data:" DPxMOD " unified and shared, becomes a noop\n", + DPxPTR(HstPtrBegin)); + return OFFLOAD_SUCCESS; + } - if (RTLs->RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && - TgtPtrBegin == HstPtrBegin) { - DP("hst data:" DPxMOD " unified and shared, becomes a noop\n", - DPxPTR(HstPtrBegin)); - continue; + if (arg_type & OMP_TGT_MAPTYPE_FROM) { + DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", + MapSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); + int rt = Device.data_retrieve(HstPtrBegin, TgtPtrBegin, MapSize, nullptr); + if (rt != OFFLOAD_SUCCESS) { + DP("Copying data from device failed.\n"); + return OFFLOAD_FAIL; } - if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) { - DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", - arg_sizes[i], DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); - int rt = Device.data_retrieve(HstPtrBegin, TgtPtrBegin, MapSize, nullptr); - if (rt != OFFLOAD_SUCCESS) { - DP("Copying data from device failed.\n"); - return OFFLOAD_FAIL; - } - - uintptr_t lb = (uintptr_t) HstPtrBegin; - uintptr_t ub = (uintptr_t) HstPtrBegin + MapSize; - Device.ShadowMtx.lock(); - for (ShadowPtrListTy::iterator it = Device.ShadowPtrMap.begin(); - it != Device.ShadowPtrMap.end(); ++it) { - void **ShadowHstPtrAddr = (void**) it->first; - if ((uintptr_t) ShadowHstPtrAddr < lb) + uintptr_t lb = (uintptr_t)HstPtrBegin; + uintptr_t ub = (uintptr_t)HstPtrBegin + MapSize; + Device.ShadowMtx.lock(); + for (ShadowPtrListTy::iterator it = Device.ShadowPtrMap.begin(); + it != Device.ShadowPtrMap.end(); ++it) { + void **ShadowHstPtrAddr = (void **)it->first; + if ((uintptr_t)ShadowHstPtrAddr < lb) continue; - if ((uintptr_t) ShadowHstPtrAddr >= ub) + if ((uintptr_t)ShadowHstPtrAddr >= ub) break; - DP("Restoring original host pointer value " DPxMOD " for host pointer " - DPxMOD "\n", DPxPTR(it->second.HstPtrVal), - DPxPTR(ShadowHstPtrAddr)); - *ShadowHstPtrAddr = it->second.HstPtrVal; - } - Device.ShadowMtx.unlock(); + DP("Restoring original host pointer value " DPxMOD + " for host pointer " DPxMOD "\n", + DPxPTR(it->second.HstPtrVal), DPxPTR(ShadowHstPtrAddr)); + *ShadowHstPtrAddr = it->second.HstPtrVal; } + Device.ShadowMtx.unlock(); + } - if (arg_types[i] & OMP_TGT_MAPTYPE_TO) { - DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n", - arg_sizes[i], DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin)); - int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, MapSize, nullptr); + if (arg_type & OMP_TGT_MAPTYPE_TO) { + DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n", + MapSize, DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin)); + int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, MapSize, nullptr); + if (rt != OFFLOAD_SUCCESS) { + DP("Copying data to device failed.\n"); + return OFFLOAD_FAIL; + } + + uintptr_t lb = (uintptr_t)HstPtrBegin; + uintptr_t ub = (uintptr_t)HstPtrBegin + MapSize; + Device.ShadowMtx.lock(); + for (ShadowPtrListTy::iterator it = Device.ShadowPtrMap.begin(); + it != Device.ShadowPtrMap.end(); ++it) { + void **ShadowHstPtrAddr = (void **)it->first; + if ((uintptr_t)ShadowHstPtrAddr < lb) + continue; + if ((uintptr_t)ShadowHstPtrAddr >= ub) + break; + DP("Restoring original target pointer value " DPxMOD " for target " + "pointer " DPxMOD "\n", + DPxPTR(it->second.TgtPtrVal), DPxPTR(it->second.TgtPtrAddr)); + rt = Device.data_submit(it->second.TgtPtrAddr, + &it->second.TgtPtrVal, sizeof(void *), nullptr); if (rt != OFFLOAD_SUCCESS) { DP("Copying data to device failed.\n"); + Device.ShadowMtx.unlock(); return OFFLOAD_FAIL; } + } + Device.ShadowMtx.unlock(); + } - uintptr_t lb = (uintptr_t) HstPtrBegin; - uintptr_t ub = (uintptr_t) HstPtrBegin + MapSize; - Device.ShadowMtx.lock(); - for (ShadowPtrListTy::iterator it = Device.ShadowPtrMap.begin(); - it != Device.ShadowPtrMap.end(); ++it) { - void **ShadowHstPtrAddr = (void**) it->first; - if ((uintptr_t) ShadowHstPtrAddr < lb) - continue; - if ((uintptr_t) ShadowHstPtrAddr >= ub) - break; - DP("Restoring original target pointer value " DPxMOD " for target " - "pointer " DPxMOD "\n", DPxPTR(it->second.TgtPtrVal), - DPxPTR(it->second.TgtPtrAddr)); - rt = Device.data_submit(it->second.TgtPtrAddr, - &it->second.TgtPtrVal, sizeof(void *), nullptr); - if (rt != OFFLOAD_SUCCESS) { - DP("Copying data to device failed.\n"); - Device.ShadowMtx.unlock(); - return OFFLOAD_FAIL; - } + return OFFLOAD_SUCCESS; +} + +static int target_data_non_contig(DeviceTy &Device, void *arg_base, + __tgt_target_non_contig *non_contig, + int64_t size, int64_t arg_type, int dim, + int dim_size, int64_t offset) { + int rt = OFFLOAD_SUCCESS; + if (dim < dim_size) { + for (int i = 0; i < non_contig[dim].count; ++i) { + int64_t cur_offset = + (non_contig[dim].offset + i) * non_contig[dim].stride; + // we only need to transfer the first element for the last dimension + // since we've alreay got a contiguous peice. + if (dim != dim_size - 1 || i == 0) { + rt = target_data_non_contig(Device, arg_base, non_contig, size, arg_type, + dim + 1, dim_size, offset + cur_offset); + // Stop the whole process if any contiguous piece return anything + // other than OFFLOAD_SUCCESS. + if (rt != OFFLOAD_SUCCESS) + return rt; } - Device.ShadowMtx.unlock(); } } + else { + char *ptr = (char *)arg_base + offset; + DP("Transfer of non-contiguous : host ptr %lx offset %ld len %ld\n", + (uint64_t)ptr, offset, size); + rt = target_data_contiguous(Device, arg_base, ptr, size, arg_type); + } + return rt; +} + +static int get_non_contig_merged_dimension(__tgt_target_non_contig *non_contig, + int32_t dim_size) { + int removed_dim = 0; + for (int i = dim_size - 1; i > 0; --i) { + if (non_contig[i].count * non_contig[i].stride == non_contig[i - 1].stride) + removed_dim++; + } + return removed_dim; +} + +/// Internal function to pass data to/from the target. +int target_data_update(DeviceTy &Device, int32_t arg_num, + void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { + // process each input. + for (int32_t i = 0; i < arg_num; ++i) { + if ((arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) || + (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE)) + continue; + + int rt = OFFLOAD_SUCCESS; + + if (arg_types[i] & OMP_TGT_MAPTYPE_DESCRIPTOR) { + __tgt_target_non_contig *non_contig = (__tgt_target_non_contig *)args[i]; + int32_t dim_size = arg_sizes[i]; + int64_t size = + non_contig[dim_size - 1].count * non_contig[dim_size - 1].stride; + int32_t merged_dim = + get_non_contig_merged_dimension(non_contig, dim_size); + rt = target_data_non_contig( + Device, args_base[i], non_contig, size, arg_types[i], + /*current_dim=*/0, dim_size - merged_dim, /*offset=*/0); + } else { + rt = target_data_contiguous(Device, args_base[i], args[i], arg_sizes[i], + arg_types[i]); + } + if (rt == OFFLOAD_FAIL) + return OFFLOAD_FAIL; + } return OFFLOAD_SUCCESS; }