diff --git a/openmp/libomptarget/include/omptarget.h b/openmp/libomptarget/include/omptarget.h --- a/openmp/libomptarget/include/omptarget.h +++ b/openmp/libomptarget/include/omptarget.h @@ -160,6 +160,14 @@ int32_t depNum, void *depList, int32_t noAliasDepNum, void *noAliasDepList); +void __tgt_target_data_begin_mapper(int64_t device_id, int32_t arg_num, + void **args_base, void **args, + int64_t *arg_sizes, int64_t *arg_types, + void **arg_mappers); +void __tgt_target_data_begin_nowait_mapper( + int64_t device_id, int32_t arg_num, void **args_base, void **args, + int64_t *arg_sizes, int64_t *arg_types, void **arg_mappers, int32_t depNum, + void *depList, int32_t noAliasDepNum, void *noAliasDepList); // passes data from the target, release target memory and destroys the // host-target mapping (top entry from the stack of data maps) created by @@ -171,6 +179,16 @@ int64_t *arg_sizes, int64_t *arg_types, int32_t depNum, void *depList, int32_t noAliasDepNum, void *noAliasDepList); +void __tgt_target_data_end_mapper(int64_t device_id, int32_t arg_num, + void **args_base, void **args, + int64_t *arg_sizes, int64_t *arg_types, + void **arg_mappers); +void __tgt_target_data_end_nowait_mapper(int64_t device_id, int32_t arg_num, + void **args_base, void **args, + int64_t *arg_sizes, int64_t *arg_types, + void **arg_mappers, int32_t depNum, + void *depList, int32_t noAliasDepNum, + void *noAliasDepList); /// passes data to/from the target void __tgt_target_data_update(int64_t device_id, int32_t arg_num, @@ -182,6 +200,14 @@ int32_t depNum, void *depList, int32_t noAliasDepNum, void *noAliasDepList); +void __tgt_target_data_update_mapper(int64_t device_id, int32_t arg_num, + void **args_base, void **args, + int64_t *arg_sizes, int64_t *arg_types, + void **arg_mappers); +void __tgt_target_data_update_nowait_mapper( + int64_t device_id, int32_t arg_num, void **args_base, void **args, + int64_t *arg_sizes, int64_t *arg_types, void **arg_mappers, int32_t depNum, + void *depList, int32_t noAliasDepNum, void *noAliasDepList); // Performs the same actions as data_begin in case arg_num is non-zero // and initiates run of offloaded region on target platform; if arg_num @@ -196,6 +222,15 @@ void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, int32_t depNum, void *depList, int32_t noAliasDepNum, void *noAliasDepList); +int __tgt_target_mapper(int64_t device_id, void *host_ptr, int32_t arg_num, + void **args_base, void **args, int64_t *arg_sizes, + int64_t *arg_types, void **arg_mappers); +int __tgt_target_nowait_mapper(int64_t device_id, void *host_ptr, + int32_t arg_num, void **args_base, void **args, + int64_t *arg_sizes, int64_t *arg_types, + void **arg_mappers, int32_t depNum, + void *depList, int32_t noAliasDepNum, + void *noAliasDepList); int __tgt_target_teams(int64_t device_id, void *host_ptr, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, @@ -207,6 +242,17 @@ int32_t num_teams, int32_t thread_limit, int32_t depNum, void *depList, int32_t noAliasDepNum, void *noAliasDepList); +int __tgt_target_teams_mapper(int64_t device_id, void *host_ptr, + int32_t arg_num, void **args_base, void **args, + int64_t *arg_sizes, int64_t *arg_types, + void **arg_mappers, int32_t num_teams, + int32_t thread_limit); +int __tgt_target_teams_nowait_mapper( + int64_t device_id, void *host_ptr, int32_t arg_num, void **args_base, + void **args, int64_t *arg_sizes, int64_t *arg_types, void **arg_mappers, + int32_t num_teams, int32_t thread_limit, int32_t depNum, void *depList, + int32_t noAliasDepNum, void *noAliasDepList); + void __kmpc_push_target_tripcount(int64_t device_id, uint64_t loop_tripcount); #ifdef __cplusplus diff --git a/openmp/libomptarget/src/exports b/openmp/libomptarget/src/exports --- a/openmp/libomptarget/src/exports +++ b/openmp/libomptarget/src/exports @@ -13,6 +13,16 @@ __tgt_target_data_update_nowait; __tgt_target_nowait; __tgt_target_teams_nowait; + __tgt_target_data_begin_mapper; + __tgt_target_data_end_mapper; + __tgt_target_data_update_mapper; + __tgt_target_mapper; + __tgt_target_teams_mapper; + __tgt_target_data_begin_nowait_mapper; + __tgt_target_data_end_nowait_mapper; + __tgt_target_data_update_nowait_mapper; + __tgt_target_nowait_mapper; + __tgt_target_teams_nowait_mapper; __tgt_mapper_num_components; __tgt_push_mapper_component; omp_get_num_devices; diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -91,6 +91,24 @@ /// and passes the data to the device. EXTERN void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { + __tgt_target_data_begin_mapper(device_id, arg_num, args_base, args, + arg_sizes, arg_types, nullptr); +} + +EXTERN void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num, + void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, + int32_t depNum, void *depList, int32_t noAliasDepNum, + void *noAliasDepList) { + if (depNum + noAliasDepNum > 0) + __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); + + __tgt_target_data_begin_mapper(device_id, arg_num, args_base, args, + arg_sizes, arg_types, nullptr); +} + +EXTERN void __tgt_target_data_begin_mapper(int64_t device_id, int32_t arg_num, + void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, + void **arg_mappers) { if (IsOffloadDisabled()) return; DP("Entering data begin region for device %" PRId64 " with %d mappings\n", @@ -119,19 +137,19 @@ #endif int rc = target_data_begin(Device, arg_num, args_base, args, arg_sizes, - arg_types, nullptr); + arg_types, arg_mappers, nullptr); HandleTargetOutcome(rc == OFFLOAD_SUCCESS); } -EXTERN void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num, - void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, - int32_t depNum, void *depList, int32_t noAliasDepNum, - void *noAliasDepList) { +EXTERN void __tgt_target_data_begin_nowait_mapper(int64_t device_id, + int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, + int64_t *arg_types, void **arg_mappers, int32_t depNum, void *depList, + int32_t noAliasDepNum, void *noAliasDepList) { if (depNum + noAliasDepNum > 0) __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); - __tgt_target_data_begin(device_id, arg_num, args_base, args, arg_sizes, - arg_types); + __tgt_target_data_begin_mapper(device_id, arg_num, args_base, args, + arg_sizes, arg_types, arg_mappers); } /// passes data from the target, releases target memory and destroys @@ -139,6 +157,24 @@ /// created by the last __tgt_target_data_begin. EXTERN void __tgt_target_data_end(int64_t device_id, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { + __tgt_target_data_end_mapper(device_id, arg_num, args_base, args, arg_sizes, + arg_types, nullptr); +} + +EXTERN void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num, + void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, + int32_t depNum, void *depList, int32_t noAliasDepNum, + void *noAliasDepList) { + if (depNum + noAliasDepNum > 0) + __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); + + __tgt_target_data_end_mapper(device_id, arg_num, args_base, args, arg_sizes, + arg_types, nullptr); +} + +EXTERN void __tgt_target_data_end_mapper(int64_t device_id, int32_t arg_num, + void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, + void **arg_mappers) { if (IsOffloadDisabled()) return; DP("Entering data end region with %d mappings\n", arg_num); @@ -172,23 +208,41 @@ #endif int rc = target_data_end(Device, arg_num, args_base, args, arg_sizes, - arg_types, nullptr); + arg_types, arg_mappers, nullptr); HandleTargetOutcome(rc == OFFLOAD_SUCCESS); } -EXTERN void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num, +EXTERN void __tgt_target_data_end_nowait_mapper(int64_t device_id, + int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, + int64_t *arg_types, void **arg_mappers, int32_t depNum, void *depList, + int32_t noAliasDepNum, void *noAliasDepList) { + if (depNum + noAliasDepNum > 0) + __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); + + __tgt_target_data_end_mapper(device_id, arg_num, args_base, args, arg_sizes, + arg_types, arg_mappers); +} + +EXTERN void __tgt_target_data_update(int64_t device_id, int32_t arg_num, + void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { + __tgt_target_data_update_mapper(device_id, arg_num, args_base, args, + arg_sizes, arg_types, nullptr); +} + +EXTERN void __tgt_target_data_update_nowait(int64_t device_id, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, int32_t depNum, void *depList, int32_t noAliasDepNum, void *noAliasDepList) { if (depNum + noAliasDepNum > 0) __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); - __tgt_target_data_end(device_id, arg_num, args_base, args, arg_sizes, - arg_types); + __tgt_target_data_update_mapper(device_id, arg_num, args_base, args, + arg_sizes, arg_types, nullptr); } -EXTERN void __tgt_target_data_update(int64_t device_id, int32_t arg_num, - void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { +EXTERN void __tgt_target_data_update_mapper(int64_t device_id, int32_t arg_num, + void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, + void **arg_mappers) { if (IsOffloadDisabled()) return; DP("Entering data update with %d mappings\n", arg_num); @@ -205,23 +259,41 @@ DeviceTy& Device = Devices[device_id]; int rc = target_data_update(Device, arg_num, args_base, - args, arg_sizes, arg_types); + args, arg_sizes, arg_types, arg_mappers); HandleTargetOutcome(rc == OFFLOAD_SUCCESS); } -EXTERN void __tgt_target_data_update_nowait( - int64_t device_id, int32_t arg_num, void **args_base, void **args, - int64_t *arg_sizes, int64_t *arg_types, int32_t depNum, void *depList, +EXTERN void __tgt_target_data_update_nowait_mapper(int64_t device_id, + int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, + int64_t *arg_types, void **arg_mappers, int32_t depNum, void *depList, int32_t noAliasDepNum, void *noAliasDepList) { if (depNum + noAliasDepNum > 0) __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); - __tgt_target_data_update(device_id, arg_num, args_base, args, arg_sizes, - arg_types); + __tgt_target_data_update_mapper(device_id, arg_num, args_base, args, + arg_sizes, arg_types, arg_mappers); } EXTERN int __tgt_target(int64_t device_id, void *host_ptr, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { + return __tgt_target_mapper(device_id, host_ptr, arg_num, args_base, args, + arg_sizes, arg_types, nullptr); +} + +EXTERN int __tgt_target_nowait(int64_t device_id, void *host_ptr, + int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, + int64_t *arg_types, int32_t depNum, void *depList, int32_t noAliasDepNum, + void *noAliasDepList) { + if (depNum + noAliasDepNum > 0) + __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); + + return __tgt_target_mapper(device_id, host_ptr, arg_num, args_base, args, + arg_sizes, arg_types, nullptr); +} + +EXTERN int __tgt_target_mapper(int64_t device_id, void *host_ptr, + int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, + int64_t *arg_types, void **arg_mappers) { if (IsOffloadDisabled()) return OFFLOAD_FAIL; DP("Entering target region with entry point " DPxMOD " and device Id %" PRId64 "\n", DPxPTR(host_ptr), device_id); @@ -245,25 +317,43 @@ #endif int rc = target(device_id, host_ptr, arg_num, args_base, args, arg_sizes, - arg_types, 0, 0, false /*team*/); + arg_types, arg_mappers, 0, 0, false /*team*/); HandleTargetOutcome(rc == OFFLOAD_SUCCESS); return rc; } -EXTERN int __tgt_target_nowait(int64_t device_id, void *host_ptr, +EXTERN int __tgt_target_nowait_mapper(int64_t device_id, void *host_ptr, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, - int64_t *arg_types, int32_t depNum, void *depList, int32_t noAliasDepNum, - void *noAliasDepList) { + int64_t *arg_types, void **arg_mappers, int32_t depNum, void *depList, + int32_t noAliasDepNum, void *noAliasDepList) { if (depNum + noAliasDepNum > 0) __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); - return __tgt_target(device_id, host_ptr, arg_num, args_base, args, arg_sizes, - arg_types); + return __tgt_target_mapper(device_id, host_ptr, arg_num, args_base, args, + arg_sizes, arg_types, arg_mappers); } EXTERN int __tgt_target_teams(int64_t device_id, void *host_ptr, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, int32_t team_num, int32_t thread_limit) { + return __tgt_target_teams_mapper(device_id, host_ptr, arg_num, args_base, + args, arg_sizes, arg_types, nullptr, team_num, thread_limit); +} + +EXTERN int __tgt_target_teams_nowait(int64_t device_id, void *host_ptr, + int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, + int64_t *arg_types, int32_t team_num, int32_t thread_limit, int32_t depNum, + void *depList, int32_t noAliasDepNum, void *noAliasDepList) { + if (depNum + noAliasDepNum > 0) + __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); + + return __tgt_target_teams_mapper(device_id, host_ptr, arg_num, args_base, + args, arg_sizes, arg_types, nullptr, team_num, thread_limit); +} + +EXTERN int __tgt_target_teams_mapper(int64_t device_id, void *host_ptr, + int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, + int64_t *arg_types, void **arg_mappers, int32_t team_num, int32_t thread_limit) { if (IsOffloadDisabled()) return OFFLOAD_FAIL; DP("Entering target region with entry point " DPxMOD " and device Id %" PRId64 "\n", DPxPTR(host_ptr), device_id); @@ -287,21 +377,22 @@ #endif int rc = target(device_id, host_ptr, arg_num, args_base, args, arg_sizes, - arg_types, team_num, thread_limit, true /*team*/); + arg_types, arg_mappers, team_num, thread_limit, true /*team*/); HandleTargetOutcome(rc == OFFLOAD_SUCCESS); return rc; } -EXTERN int __tgt_target_teams_nowait(int64_t device_id, void *host_ptr, +EXTERN int __tgt_target_teams_nowait_mapper(int64_t device_id, void *host_ptr, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, - int64_t *arg_types, int32_t team_num, int32_t thread_limit, int32_t depNum, - void *depList, int32_t noAliasDepNum, void *noAliasDepList) { + int64_t *arg_types, void **arg_mappers, int32_t team_num, + int32_t thread_limit, int32_t depNum, void *depList, int32_t noAliasDepNum, + void *noAliasDepList) { if (depNum + noAliasDepNum > 0) __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); - return __tgt_target_teams(device_id, host_ptr, arg_num, args_base, args, - arg_sizes, arg_types, team_num, thread_limit); + return __tgt_target_teams_mapper(device_id, host_ptr, arg_num, args_base, + args, arg_sizes, arg_types, arg_mappers, team_num, thread_limit); } // Get the current number of components for a user-defined mapper. diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -166,8 +166,8 @@ DP("Has pending ctors... call now\n"); for (auto &entry : lib.second.PendingCtors) { void *ctor = entry; - int rc = target(device_id, ctor, 0, NULL, NULL, NULL, - NULL, 1, 1, true /*team*/); + int rc = target(device_id, ctor, 0, NULL, NULL, NULL, NULL, NULL, 1, + 1, true /*team*/); if (rc != OFFLOAD_SUCCESS) { DP("Running ctor " DPxMOD " failed.\n", DPxPTR(ctor)); Device.PendingGlobalsMtx.unlock(); @@ -214,10 +214,46 @@ return ((type & OMP_TGT_MAPTYPE_MEMBER_OF) >> 48) - 1; } +/// Call the user-defined mapper function followed by the appropriate +// target_data_* function (target_data_{begin,end,update}). +int target_data_mapper(DeviceTy &Device, void *arg_base, + void *arg, int64_t arg_size, int64_t arg_type, void *arg_mapper, + TargetDataFuncPtrTy target_data_function) { + DP("Calling the mapper function " DPxMOD "\n", DPxPTR(arg_mapper)); + + // The mapper function fills up Components. + MapperComponentsTy MapperComponents; + MapperFuncPtrTy MapperFuncPtr = (MapperFuncPtrTy)(arg_mapper); + (*MapperFuncPtr)((void *)&MapperComponents, arg_base, arg, arg_size, + arg_type); + + // Construct new arrays for args_base, args, arg_sizes and arg_types + // using the information in MapperComponents and call the corresponding + // target_data_* function using these new arrays. + std::vector mapper_args_base; + std::vector mapper_args; + std::vector mapper_arg_sizes; + std::vector mapper_arg_types; + + for (auto& C : MapperComponents.Components) { + mapper_args_base.push_back(C.Base); + mapper_args.push_back(C.Begin); + mapper_arg_sizes.push_back(C.Size); + mapper_arg_types.push_back(C.Type); + } + + int rc = target_data_function(Device, MapperComponents.Components.size(), + mapper_args_base.data(), mapper_args.data(), mapper_arg_sizes.data(), + mapper_arg_types.data(), /*arg_mappers*/ nullptr, + /*__tgt_async_info*/ nullptr); + + return rc; +} + /// Internal function to do the mapping and transfer the data to the device int target_data_begin(DeviceTy &Device, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, - __tgt_async_info *async_info_ptr) { + void **arg_mappers, __tgt_async_info *async_info_ptr) { // process each input. for (int32_t i = 0; i < arg_num; ++i) { // Ignore private variables and arrays - there is no mapping for them. @@ -225,6 +261,25 @@ (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE)) continue; + if (arg_mappers && arg_mappers[i]) { + // Instead of executing the regular path of target_data_begin, call the + // target_data_mapper variant which will call target_data_begin again + // with new arguments. + DP("Calling target_data_mapper for the %dth argument\n", i); + + int rc = target_data_mapper(Device, args_base[i], args[i], arg_sizes[i], + arg_types[i], arg_mappers[i], target_data_begin); + + if (rc != OFFLOAD_SUCCESS) { + DP("Call to target_data_begin via target_data_mapper for custom mapper" + " failed.\n"); + return OFFLOAD_FAIL; + } + + // Skip the rest of this function, continue to the next argument. + continue; + } + void *HstPtrBegin = args[i]; void *HstPtrBase = args_base[i]; int64_t data_size = arg_sizes[i]; @@ -353,7 +408,7 @@ /// Internal function to undo the mapping and retrieve the data from the device. int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, - __tgt_async_info *async_info_ptr) { + void **arg_mappers, __tgt_async_info *async_info_ptr) { // process each input. for (int32_t i = arg_num - 1; i >= 0; --i) { // Ignore private variables and arrays - there is no mapping for them. @@ -362,6 +417,25 @@ (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE)) continue; + if (arg_mappers && arg_mappers[i]) { + // Instead of executing the regular path of target_data_end, call the + // target_data_mapper variant which will call target_data_end again + // with new arguments. + DP("Calling target_data_mapper for the %dth argument\n", i); + + int rc = target_data_mapper(Device, args_base[i], args[i], arg_sizes[i], + arg_types[i], arg_mappers[i], target_data_end); + + if (rc != OFFLOAD_SUCCESS) { + DP("Call to target_data_end via target_data_mapper for custom mapper" + " failed.\n"); + return OFFLOAD_FAIL; + } + + // Skip the rest of this function, continue to the next argument. + continue; + } + void *HstPtrBegin = args[i]; int64_t data_size = arg_sizes[i]; // Adjust for proper alignment if this is a combined entry (for structs). @@ -486,14 +560,36 @@ } /// Internal function to pass data to/from the target. +// async_info_ptr is currently unused, added here so target_data_update has the +// same signature as target_data_begin and target_data_end. int target_data_update(DeviceTy &Device, int32_t arg_num, - void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { + void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, + void **arg_mappers, __tgt_async_info *async_info_ptr) { // process each input. for (int32_t i = 0; i < arg_num; ++i) { if ((arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) || (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE)) continue; + if (arg_mappers && arg_mappers[i]) { + // Instead of executing the regular path of target_data_update, call the + // target_data_mapper variant which will call target_data_update again + // with new arguments. + DP("Calling target_data_mapper for the %dth argument\n", i); + + int rc = target_data_mapper(Device, args_base[i], args[i], arg_sizes[i], + arg_types[i], arg_mappers[i], target_data_update); + + if (rc != OFFLOAD_SUCCESS) { + DP("Call to target_data_update via target_data_mapper for custom mapper" + " failed.\n"); + return OFFLOAD_FAIL; + } + + // Skip the rest of this function, continue to the next argument. + continue; + } + void *HstPtrBegin = args[i]; int64_t MapSize = arg_sizes[i]; bool IsLast, IsHostPtr; @@ -589,7 +685,8 @@ /// integer different from zero otherwise. int target(int64_t device_id, void *host_ptr, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, - int32_t team_num, int32_t thread_limit, int IsTeamConstruct) { + void **arg_mappers, int32_t team_num, int32_t thread_limit, + int IsTeamConstruct) { DeviceTy &Device = Devices[device_id]; // Find the table information in the map or look it up in the translation @@ -647,7 +744,7 @@ // Move data to device. int rc = target_data_begin(Device, arg_num, args_base, args, arg_sizes, - arg_types, &AsyncInfo); + arg_types, arg_mappers, &AsyncInfo); if (rc != OFFLOAD_SUCCESS) { DP("Call to target_data_begin failed, abort target.\n"); return OFFLOAD_FAIL; @@ -811,7 +908,7 @@ // Move data from device. int rt = target_data_end(Device, arg_num, args_base, args, arg_sizes, - arg_types, &AsyncInfo); + arg_types, arg_mappers, &AsyncInfo); if (rt != OFFLOAD_SUCCESS) { DP("Call to target_data_end failed, abort targe.\n"); return OFFLOAD_FAIL; diff --git a/openmp/libomptarget/src/private.h b/openmp/libomptarget/src/private.h --- a/openmp/libomptarget/src/private.h +++ b/openmp/libomptarget/src/private.h @@ -19,19 +19,24 @@ extern int target_data_begin(DeviceTy &Device, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, - int64_t *arg_types, + int64_t *arg_types, void **arg_mappers, __tgt_async_info *async_info_ptr); extern int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, + void **arg_mappers, __tgt_async_info *async_info_ptr); extern int target_data_update(DeviceTy &Device, int32_t arg_num, - void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); + void **args_base, void **args, + int64_t *arg_sizes, int64_t *arg_types, + void **arg_mappers, + __tgt_async_info *async_info_ptr = nullptr); extern int target(int64_t device_id, void *host_ptr, int32_t arg_num, - void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, - int32_t team_num, int32_t thread_limit, int IsTeamConstruct); + void **args_base, void **args, int64_t *arg_sizes, + int64_t *arg_types, void **arg_mappers, int32_t team_num, + int32_t thread_limit, int IsTeamConstruct); extern int CheckDeviceAndCtors(int64_t device_id); @@ -60,8 +65,20 @@ // implementation here. struct MapperComponentsTy { std::vector Components; + int32_t size() { return Components.size(); } }; +// The mapper function pointer type. It follows the signature below: +// void .omp_mapper...(void *rt_mapper_handle, +// void *base, void *begin, +// size_t size, int64_t type); +typedef void (*MapperFuncPtrTy)(void *, void *, void *, int64_t, int64_t); + +// Function pointer type for target_data_* functions (target_data_begin, +// target_data_end and target_data_update). +typedef int (*TargetDataFuncPtrTy)(DeviceTy &, int32_t, void **, void **, + int64_t *, int64_t *, void **, __tgt_async_info *); + //////////////////////////////////////////////////////////////////////////////// // implementation for fatal messages //////////////////////////////////////////////////////////////////////////////// diff --git a/openmp/libomptarget/src/rtl.cpp b/openmp/libomptarget/src/rtl.cpp --- a/openmp/libomptarget/src/rtl.cpp +++ b/openmp/libomptarget/src/rtl.cpp @@ -387,8 +387,8 @@ Device.PendingGlobalsMtx.lock(); if (Device.PendingCtorsDtors[desc].PendingCtors.empty()) { for (auto &dtor : Device.PendingCtorsDtors[desc].PendingDtors) { - int rc = target(Device.DeviceID, dtor, 0, NULL, NULL, NULL, NULL, 1, - 1, true /*team*/); + int rc = target(Device.DeviceID, dtor, 0, NULL, NULL, NULL, NULL, + NULL, 1, 1, true /*team*/); if (rc != OFFLOAD_SUCCESS) { DP("Running destructor " DPxMOD " failed.\n", DPxPTR(dtor)); } diff --git a/openmp/libomptarget/test/mapping/declare_mapper_api.cpp b/openmp/libomptarget/test/mapping/declare_mapper_api.cpp --- a/openmp/libomptarget/test/mapping/declare_mapper_api.cpp +++ b/openmp/libomptarget/test/mapping/declare_mapper_api.cpp @@ -6,6 +6,7 @@ #include #include #include +#include // Data structure definitions copied from OpenMP RTL. struct MapComponentInfoTy { @@ -42,6 +43,6 @@ __tgt_push_mapper_component((void *)&MC, base, begin, size, type); int64_t num = __tgt_mapper_num_components((void *)&MC); // CHECK: num=2 - printf("num=%lld\n", num); + printf("num=%" PRId64 "\n", num); return 0; } diff --git a/openmp/libomptarget/test/mapping/declare_mapper_target.cpp b/openmp/libomptarget/test/mapping/declare_mapper_target.cpp new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/mapping/declare_mapper_target.cpp @@ -0,0 +1,37 @@ +// RUN: %libomptarget-compile-run-and-check-aarch64-unknown-linux-gnu +// RUN: %libomptarget-compile-run-and-check-nvptx64-nvidia-cuda +// RUN: %libomptarget-compile-run-and-check-powerpc64-ibm-linux-gnu +// RUN: %libomptarget-compile-run-and-check-powerpc64le-ibm-linux-gnu +// RUN: %libomptarget-compile-run-and-check-x86_64-pc-linux-gnu + +#include +#include + +#define NUM 1024 + +class C { +public: + int *a; +}; + +#pragma omp declare mapper(id: C s) map(s.a[0:NUM]) + +int main() { + C c; + c.a = (int*) malloc(sizeof(int)*NUM); + for (int i = 0; i < NUM; i++) { + c.a[i] = 1; + } + #pragma omp target teams distribute parallel for map(mapper(id),tofrom: c) + for (int i = 0; i < NUM; i++) { + ++c.a[i]; + } + int sum = 0; + for (int i = 0; i < NUM; i++) { + sum += c.a[i]; + } + // CHECK: Sum = 2048 + printf("Sum = %d\n", sum); + return 0; +} + diff --git a/openmp/libomptarget/test/mapping/declare_mapper_target_data.cpp b/openmp/libomptarget/test/mapping/declare_mapper_target_data.cpp new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/mapping/declare_mapper_target_data.cpp @@ -0,0 +1,40 @@ +// RUN: %libomptarget-compile-run-and-check-aarch64-unknown-linux-gnu +// RUN: %libomptarget-compile-run-and-check-nvptx64-nvidia-cuda +// RUN: %libomptarget-compile-run-and-check-powerpc64-ibm-linux-gnu +// RUN: %libomptarget-compile-run-and-check-powerpc64le-ibm-linux-gnu +// RUN: %libomptarget-compile-run-and-check-x86_64-pc-linux-gnu + +#include +#include + +#define NUM 1024 + +class C { +public: + int *a; +}; + +#pragma omp declare mapper(id: C s) map(s.a[0:NUM]) + +int main() { + C c; + c.a = (int*) malloc(sizeof(int)*NUM); + for (int i = 0; i < NUM; i++) { + c.a[i] = 1; + } + #pragma omp target data map(mapper(id),tofrom: c) + { + #pragma omp target teams distribute parallel for + for (int i = 0; i < NUM; i++) { + ++c.a[i]; + } + } + int sum = 0; + for (int i = 0; i < NUM; i++) { + sum += c.a[i]; + } + // CHECK: Sum = 2048 + printf("Sum = %d\n", sum); + return 0; +} + diff --git a/openmp/libomptarget/test/mapping/declare_mapper_target_data_enter_exit.cpp b/openmp/libomptarget/test/mapping/declare_mapper_target_data_enter_exit.cpp new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/mapping/declare_mapper_target_data_enter_exit.cpp @@ -0,0 +1,39 @@ +// RUN: %libomptarget-compile-run-and-check-aarch64-unknown-linux-gnu +// RUN: %libomptarget-compile-run-and-check-nvptx64-nvidia-cuda +// RUN: %libomptarget-compile-run-and-check-powerpc64-ibm-linux-gnu +// RUN: %libomptarget-compile-run-and-check-powerpc64le-ibm-linux-gnu +// RUN: %libomptarget-compile-run-and-check-x86_64-pc-linux-gnu + +#include +#include + +#define NUM 1024 + +class C { +public: + int *a; +}; + +#pragma omp declare mapper(id: C s) map(s.a[0:NUM]) + +int main() { + C c; + c.a = (int*) malloc(sizeof(int)*NUM); + for (int i = 0; i < NUM; i++) { + c.a[i] = 1; + } + #pragma omp target enter data map(mapper(id),to: c) + #pragma omp target teams distribute parallel for + for (int i = 0; i < NUM; i++) { + ++c.a[i]; + } + #pragma omp target exit data map(mapper(id),from: c) + int sum = 0; + for (int i = 0; i < NUM; i++) { + sum += c.a[i]; + } + // CHECK: Sum = 2048 + printf("Sum = %d\n", sum); + return 0; +} + diff --git a/openmp/libomptarget/test/mapping/declare_mapper_target_update.cpp b/openmp/libomptarget/test/mapping/declare_mapper_target_update.cpp new file mode 100644 --- /dev/null +++ b/openmp/libomptarget/test/mapping/declare_mapper_target_update.cpp @@ -0,0 +1,61 @@ +// RUN: %libomptarget-compile-run-and-check-aarch64-unknown-linux-gnu +// RUN: %libomptarget-compile-run-and-check-nvptx64-nvidia-cuda +// RUN: %libomptarget-compile-run-and-check-powerpc64-ibm-linux-gnu +// RUN: %libomptarget-compile-run-and-check-powerpc64le-ibm-linux-gnu +// RUN: %libomptarget-compile-run-and-check-x86_64-pc-linux-gnu + +#include +#include + +#define NUM 1024 + +class C { +public: + int *a; +}; + +#pragma omp declare mapper(id: C s) map(s.a[0:NUM]) + +int main() { + C c; + int sum = 0; + c.a = (int*) malloc(sizeof(int)*NUM); + for (int i = 0; i < NUM; i++) { + c.a[i] = 1; + } + #pragma omp target enter data map(mapper(id),alloc: c) + #pragma omp target teams distribute parallel for + for (int i = 0; i < NUM; i++) { + c.a[i] = 0; + } + #pragma omp target update from(mapper(id): c) + for (int i = 0; i < NUM; i++) { + sum += c.a[i]; + } + // CHECK: Sum (after first update from) = 0 + printf("Sum (after first update from) = %d\n", sum); + for (int i = 0; i < NUM; i++) { + c.a[i] = 1; + } + #pragma omp target update to(mapper(id): c) + #pragma omp target teams distribute parallel for + for (int i = 0; i < NUM; i++) { + ++c.a[i]; + } + sum = 0; + for (int i = 0; i < NUM; i++) { + sum += c.a[i]; + } + // CHECK: Sum (after update to) = 1024 + printf("Sum (after update to) = %d\n", sum); + #pragma omp target update from(mapper(id): c) + sum = 0; + for (int i = 0; i < NUM; i++) { + sum += c.a[i]; + } + // CHECK: Sum (after second update from) = 2048 + printf("Sum (after second update from) = %d\n", sum); + #pragma omp target exit data map(mapper(id),delete: c) + return 0; +} +