Index: openmp/libomptarget/include/omptarget.h =================================================================== --- openmp/libomptarget/include/omptarget.h +++ openmp/libomptarget/include/omptarget.h @@ -151,6 +151,14 @@ int32_t depNum, void *depList, int32_t noAliasDepNum, void *noAliasDepList); +void __tgt_target_data_begin_mapper(int64_t device_id, int32_t arg_num, + void **args_base, void **args, + int64_t *arg_sizes, int64_t *arg_types, + void **arg_mappers); +void __tgt_target_data_begin_nowait_mapper( + int64_t device_id, int32_t arg_num, void **args_base, void **args, + int64_t *arg_sizes, int64_t *arg_types, void **arg_mappers, int32_t depNum, + void *depList, int32_t noAliasDepNum, void *noAliasDepList); // passes data from the target, release target memory and destroys the // host-target mapping (top entry from the stack of data maps) created by @@ -162,6 +170,16 @@ int64_t *arg_sizes, int64_t *arg_types, int32_t depNum, void *depList, int32_t noAliasDepNum, void *noAliasDepList); +void __tgt_target_data_end_mapper(int64_t device_id, int32_t arg_num, + void **args_base, void **args, + int64_t *arg_sizes, int64_t *arg_types, + void **arg_mappers); +void __tgt_target_data_end_nowait_mapper(int64_t device_id, int32_t arg_num, + void **args_base, void **args, + int64_t *arg_sizes, int64_t *arg_types, + void **arg_mappers, int32_t depNum, + void *depList, int32_t noAliasDepNum, + void *noAliasDepList); /// passes data to/from the target void __tgt_target_data_update(int64_t device_id, int32_t arg_num, @@ -173,6 +191,14 @@ int32_t depNum, void *depList, int32_t noAliasDepNum, void *noAliasDepList); +void __tgt_target_data_update_mapper(int64_t device_id, int32_t arg_num, + void **args_base, void **args, + int64_t *arg_sizes, int64_t *arg_types, + void **arg_mappers); +void __tgt_target_data_update_nowait_mapper( + int64_t device_id, int32_t arg_num, void **args_base, void **args, + int64_t *arg_sizes, int64_t *arg_types, void **arg_mappers, int32_t depNum, + void *depList, int32_t noAliasDepNum, void *noAliasDepList); // Performs the same actions as data_begin in case arg_num is non-zero // and initiates run of offloaded region on target platform; if arg_num @@ -187,6 +213,15 @@ void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, int32_t depNum, void *depList, int32_t noAliasDepNum, void *noAliasDepList); +int __tgt_target_mapper(int64_t device_id, void *host_ptr, int32_t arg_num, + void **args_base, void **args, int64_t *arg_sizes, + int64_t *arg_types, void **arg_mappers); +int __tgt_target_nowait_mapper(int64_t device_id, void *host_ptr, + int32_t arg_num, void **args_base, void **args, + int64_t *arg_sizes, int64_t *arg_types, + void **arg_mappers, int32_t depNum, + void *depList, int32_t noAliasDepNum, + void *noAliasDepList); int __tgt_target_teams(int64_t device_id, void *host_ptr, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, @@ -198,6 +233,17 @@ int32_t num_teams, int32_t thread_limit, int32_t depNum, void *depList, int32_t noAliasDepNum, void *noAliasDepList); +int __tgt_target_teams_mapper(int64_t device_id, void *host_ptr, + int32_t arg_num, void **args_base, void **args, + int64_t *arg_sizes, int64_t *arg_types, + void **arg_mappers, int32_t num_teams, + int32_t thread_limit); +int __tgt_target_teams_nowait_mapper( + int64_t device_id, void *host_ptr, int32_t arg_num, void **args_base, + void **args, int64_t *arg_sizes, int64_t *arg_types, void **arg_mappers, + int32_t num_teams, int32_t thread_limit, int32_t depNum, void *depList, + int32_t noAliasDepNum, void *noAliasDepList); + void __kmpc_push_target_tripcount(int64_t device_id, uint64_t loop_tripcount); #ifdef __cplusplus Index: openmp/libomptarget/src/exports =================================================================== --- openmp/libomptarget/src/exports +++ openmp/libomptarget/src/exports @@ -13,6 +13,16 @@ __tgt_target_data_update_nowait; __tgt_target_nowait; __tgt_target_teams_nowait; + __tgt_target_data_begin_mapper; + __tgt_target_data_end_mapper; + __tgt_target_data_update_mapper; + __tgt_target_mapper; + __tgt_target_teams_mapper; + __tgt_target_data_begin_nowait_mapper; + __tgt_target_data_end_nowait_mapper; + __tgt_target_data_update_nowait_mapper; + __tgt_target_nowait_mapper; + __tgt_target_teams_nowait_mapper; __tgt_mapper_num_components; __tgt_push_mapper_component; omp_get_num_devices; Index: openmp/libomptarget/src/interface.cpp =================================================================== --- openmp/libomptarget/src/interface.cpp +++ openmp/libomptarget/src/interface.cpp @@ -91,6 +91,26 @@ /// and passes the data to the device. EXTERN void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { + __tgt_target_data_begin_mapper(device_id, arg_num, args_base, args, arg_sizes, + arg_types, nullptr); +} + +EXTERN void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num, + void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, + int32_t depNum, void *depList, int32_t noAliasDepNum, + void *noAliasDepList) { + if (depNum + noAliasDepNum > 0) + __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); + + __tgt_target_data_begin_mapper(device_id, arg_num, args_base, args, arg_sizes, + arg_types, nullptr); +} + +EXTERN void __tgt_target_data_begin_mapper(int64_t device_id, int32_t arg_num, + void **args_base, void **args, + int64_t *arg_sizes, + int64_t *arg_types, + void **arg_mappers) { if (IsOffloadDisabled()) return; DP("Entering data begin region for device %" PRId64 " with %d mappings\n", @@ -118,20 +138,20 @@ } #endif - int rc = target_data_begin(Device, arg_num, args_base, - args, arg_sizes, arg_types); + int rc = target_data_begin(Device, arg_num, args_base, args, arg_sizes, + arg_types, arg_mappers); HandleTargetOutcome(rc == OFFLOAD_SUCCESS); } -EXTERN void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num, - void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, - int32_t depNum, void *depList, int32_t noAliasDepNum, - void *noAliasDepList) { +EXTERN void __tgt_target_data_begin_nowait_mapper( + int64_t device_id, int32_t arg_num, void **args_base, void **args, + int64_t *arg_sizes, int64_t *arg_types, void **arg_mappers, int32_t depNum, + void *depList, int32_t noAliasDepNum, void *noAliasDepList) { if (depNum + noAliasDepNum > 0) __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); - __tgt_target_data_begin(device_id, arg_num, args_base, args, arg_sizes, - arg_types); + __tgt_target_data_begin_mapper(device_id, arg_num, args_base, args, arg_sizes, + arg_types, arg_mappers); } /// passes data from the target, releases target memory and destroys @@ -139,6 +159,25 @@ /// created by the last __tgt_target_data_begin. EXTERN void __tgt_target_data_end(int64_t device_id, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { + __tgt_target_data_end_mapper(device_id, arg_num, args_base, args, arg_sizes, + arg_types, nullptr); +} + +EXTERN void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num, + void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, + int32_t depNum, void *depList, int32_t noAliasDepNum, + void *noAliasDepList) { + if (depNum + noAliasDepNum > 0) + __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); + + __tgt_target_data_end_mapper(device_id, arg_num, args_base, args, arg_sizes, + arg_types, nullptr); +} + +EXTERN void __tgt_target_data_end_mapper(int64_t device_id, int32_t arg_num, + void **args_base, void **args, + int64_t *arg_sizes, int64_t *arg_types, + void **arg_mappers) { if (IsOffloadDisabled()) return; DP("Entering data end region with %d mappings\n", arg_num); @@ -171,24 +210,44 @@ } #endif - int rc = target_data_end(Device, arg_num, args_base, - args, arg_sizes, arg_types); + int rc = target_data_end(Device, arg_num, args_base, args, arg_sizes, + arg_types, arg_mappers); HandleTargetOutcome(rc == OFFLOAD_SUCCESS); } -EXTERN void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num, - void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, - int32_t depNum, void *depList, int32_t noAliasDepNum, - void *noAliasDepList) { +EXTERN void __tgt_target_data_end_nowait_mapper( + int64_t device_id, int32_t arg_num, void **args_base, void **args, + int64_t *arg_sizes, int64_t *arg_types, void **arg_mappers, int32_t depNum, + void *depList, int32_t noAliasDepNum, void *noAliasDepList) { if (depNum + noAliasDepNum > 0) __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); - __tgt_target_data_end(device_id, arg_num, args_base, args, arg_sizes, - arg_types); + __tgt_target_data_end_mapper(device_id, arg_num, args_base, args, arg_sizes, + arg_types, arg_mappers); } EXTERN void __tgt_target_data_update(int64_t device_id, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { + __tgt_target_data_update_mapper(device_id, arg_num, args_base, args, + arg_sizes, arg_types, nullptr); +} + +EXTERN void __tgt_target_data_update_nowait( + int64_t device_id, int32_t arg_num, void **args_base, void **args, + int64_t *arg_sizes, int64_t *arg_types, int32_t depNum, void *depList, + int32_t noAliasDepNum, void *noAliasDepList) { + if (depNum + noAliasDepNum > 0) + __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); + + __tgt_target_data_update_mapper(device_id, arg_num, args_base, args, + arg_sizes, arg_types, nullptr); +} + +EXTERN void __tgt_target_data_update_mapper(int64_t device_id, int32_t arg_num, + void **args_base, void **args, + int64_t *arg_sizes, + int64_t *arg_types, + void **arg_mappers) { if (IsOffloadDisabled()) return; DP("Entering data update with %d mappings\n", arg_num); @@ -204,24 +263,43 @@ } DeviceTy& Device = Devices[device_id]; - int rc = target_data_update(Device, arg_num, args_base, - args, arg_sizes, arg_types); + int rc = target_data_update(Device, arg_num, args_base, args, arg_sizes, + arg_types, arg_mappers); HandleTargetOutcome(rc == OFFLOAD_SUCCESS); } -EXTERN void __tgt_target_data_update_nowait( +EXTERN void __tgt_target_data_update_nowait_mapper( int64_t device_id, int32_t arg_num, void **args_base, void **args, - int64_t *arg_sizes, int64_t *arg_types, int32_t depNum, void *depList, - int32_t noAliasDepNum, void *noAliasDepList) { + int64_t *arg_sizes, int64_t *arg_types, void **arg_mappers, int32_t depNum, + void *depList, int32_t noAliasDepNum, void *noAliasDepList) { if (depNum + noAliasDepNum > 0) __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); - __tgt_target_data_update(device_id, arg_num, args_base, args, arg_sizes, - arg_types); + __tgt_target_data_update_mapper(device_id, arg_num, args_base, args, + arg_sizes, arg_types, arg_mappers); } EXTERN int __tgt_target(int64_t device_id, void *host_ptr, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { + return __tgt_target_mapper(device_id, host_ptr, arg_num, args_base, args, + arg_sizes, arg_types, nullptr); +} + +EXTERN int __tgt_target_nowait(int64_t device_id, void *host_ptr, + int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, + int64_t *arg_types, int32_t depNum, void *depList, int32_t noAliasDepNum, + void *noAliasDepList) { + if (depNum + noAliasDepNum > 0) + __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); + + return __tgt_target_mapper(device_id, host_ptr, arg_num, args_base, args, + arg_sizes, arg_types, nullptr); +} + +EXTERN int __tgt_target_mapper(int64_t device_id, void *host_ptr, + int32_t arg_num, void **args_base, void **args, + int64_t *arg_sizes, int64_t *arg_types, + void **arg_mappers) { if (IsOffloadDisabled()) return OFFLOAD_FAIL; DP("Entering target region with entry point " DPxMOD " and device Id %" PRId64 "\n", DPxPTR(host_ptr), device_id); @@ -245,25 +323,48 @@ #endif int rc = target(device_id, host_ptr, arg_num, args_base, args, arg_sizes, - arg_types, 0, 0, false /*team*/); + arg_types, arg_mappers, 0, 0, false /*team*/); HandleTargetOutcome(rc == OFFLOAD_SUCCESS); return rc; } -EXTERN int __tgt_target_nowait(int64_t device_id, void *host_ptr, - int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, - int64_t *arg_types, int32_t depNum, void *depList, int32_t noAliasDepNum, - void *noAliasDepList) { +EXTERN int __tgt_target_nowait_mapper(int64_t device_id, void *host_ptr, + int32_t arg_num, void **args_base, + void **args, int64_t *arg_sizes, + int64_t *arg_types, void **arg_mappers, + int32_t depNum, void *depList, + int32_t noAliasDepNum, + void *noAliasDepList) { if (depNum + noAliasDepNum > 0) __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); - return __tgt_target(device_id, host_ptr, arg_num, args_base, args, arg_sizes, - arg_types); + return __tgt_target_mapper(device_id, host_ptr, arg_num, args_base, args, + arg_sizes, arg_types, arg_mappers); } EXTERN int __tgt_target_teams(int64_t device_id, void *host_ptr, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, int32_t team_num, int32_t thread_limit) { + return __tgt_target_teams_mapper(device_id, host_ptr, arg_num, args_base, + args, arg_sizes, arg_types, nullptr, + team_num, thread_limit); +} + +EXTERN int __tgt_target_teams_nowait(int64_t device_id, void *host_ptr, + int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, + int64_t *arg_types, int32_t team_num, int32_t thread_limit, int32_t depNum, + void *depList, int32_t noAliasDepNum, void *noAliasDepList) { + if (depNum + noAliasDepNum > 0) + __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); + + return __tgt_target_teams_mapper(device_id, host_ptr, arg_num, args_base, + args, arg_sizes, arg_types, nullptr, + team_num, thread_limit); +} + +EXTERN int __tgt_target_teams_mapper(int64_t device_id, void *host_ptr, + int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, + int64_t *arg_types, void **arg_mappers, int32_t team_num, int32_t thread_limit) { if (IsOffloadDisabled()) return OFFLOAD_FAIL; DP("Entering target region with entry point " DPxMOD " and device Id %" PRId64 "\n", DPxPTR(host_ptr), device_id); @@ -286,22 +387,25 @@ } #endif - int rc = target(device_id, host_ptr, arg_num, args_base, args, arg_sizes, - arg_types, team_num, thread_limit, true /*team*/); + int rc = + target(device_id, host_ptr, arg_num, args_base, args, arg_sizes, + arg_types, arg_mappers, team_num, thread_limit, true /*team*/); HandleTargetOutcome(rc == OFFLOAD_SUCCESS); return rc; } -EXTERN int __tgt_target_teams_nowait(int64_t device_id, void *host_ptr, - int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, - int64_t *arg_types, int32_t team_num, int32_t thread_limit, int32_t depNum, - void *depList, int32_t noAliasDepNum, void *noAliasDepList) { +EXTERN int __tgt_target_teams_nowait_mapper( + int64_t device_id, void *host_ptr, int32_t arg_num, void **args_base, + void **args, int64_t *arg_sizes, int64_t *arg_types, void **arg_mappers, + int32_t team_num, int32_t thread_limit, int32_t depNum, void *depList, + int32_t noAliasDepNum, void *noAliasDepList) { if (depNum + noAliasDepNum > 0) __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); - return __tgt_target_teams(device_id, host_ptr, arg_num, args_base, args, - arg_sizes, arg_types, team_num, thread_limit); + return __tgt_target_teams_mapper(device_id, host_ptr, arg_num, args_base, + args, arg_sizes, arg_types, arg_mappers, + team_num, thread_limit); } // Get the current number of components for a user-defined mapper. Index: openmp/libomptarget/src/omptarget.cpp =================================================================== --- openmp/libomptarget/src/omptarget.cpp +++ openmp/libomptarget/src/omptarget.cpp @@ -161,8 +161,8 @@ DP("Has pending ctors... call now\n"); for (auto &entry : lib.second.PendingCtors) { void *ctor = entry; - int rc = target(device_id, ctor, 0, NULL, NULL, NULL, - NULL, 1, 1, true /*team*/); + int rc = target(device_id, ctor, 0, NULL, NULL, NULL, NULL, NULL, 1, + 1, true /*team*/); if (rc != OFFLOAD_SUCCESS) { DP("Running ctor " DPxMOD " failed.\n", DPxPTR(ctor)); Device.PendingGlobalsMtx.unlock(); @@ -336,8 +336,9 @@ } /// Internal function to do the mapping and transfer the data to the device -int target_data_begin(DeviceTy &Device, int32_t arg_num, - void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { +int target_data_begin(DeviceTy &Device, int32_t arg_num, void **args_base, + void **args, int64_t *arg_sizes, int64_t *arg_types, + void **arg_mappers) { // process each input. for (int32_t i = 0; i < arg_num; ++i) { // Ignore private variables and arrays - there is no mapping for them. @@ -345,14 +346,46 @@ (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE)) continue; - // Helper function to get the base address and type. - auto &&GetBegin = [&args](int32_t Idx) { return args[Idx]; }; - auto &&GetType = [&arg_types](int32_t Idx) { return arg_types[Idx]; }; - int rt = target_data_begin_component(Device, args[i], arg_sizes[i], - arg_types[i], &args_base[i], i, - arg_num, GetBegin, GetType); - if (rt != OFFLOAD_SUCCESS) - return OFFLOAD_FAIL; + // If a valid user-defined mapper is attached, use the associated mapper + // function to complete data mapping. + if (arg_mappers && arg_mappers[i]) { + DP("Call the mapper function " DPxMOD " for the %dth argument\n", + DPxPTR(arg_mappers[i]), i); + // The mapper function fills up Components. + MapperComponentsTy Components; + MapperFuncPtrTy MapperFuncPtr = (MapperFuncPtrTy)(arg_mappers[i]); + (*MapperFuncPtr)((void *)&Components, args_base[i], args[i], arg_sizes[i], + arg_types[i]); + if (Components.size() >= 0xffff) { + DP("The number of components exceed the limitation\n"); + return OFFLOAD_FAIL; + } + + // Map each component filled up by the mapper function. + for (int32_t j = 0, e = Components.size(); j < e; ++j) { + // Helper function to get the base address and type. + auto &&GetBegin = [&Components](int32_t Idx) { + return Components.get(Idx)->Begin; + }; + auto &&GetType = [&Components](int32_t Idx) { + return Components.get(Idx)->Type; + }; + int rt = target_data_begin_component( + Device, Components.get(j)->Begin, Components.get(j)->Size, + Components.get(j)->Type, &args_base[i], j, e, GetBegin, GetType); + if (rt != OFFLOAD_SUCCESS) + return OFFLOAD_FAIL; + } + } else { + // Helper function to get the base address and type. + auto &&GetBegin = [&args](int32_t Idx) { return args[Idx]; }; + auto &&GetType = [&arg_types](int32_t Idx) { return arg_types[Idx]; }; + int rt = target_data_begin_component(Device, args[i], arg_sizes[i], + arg_types[i], &args_base[i], i, + arg_num, GetBegin, GetType); + if (rt != OFFLOAD_SUCCESS) + return OFFLOAD_FAIL; + } } return OFFLOAD_SUCCESS; @@ -483,7 +516,8 @@ /// Internal function to undo the mapping and retrieve the data from the device. int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base, - void **args, int64_t *arg_sizes, int64_t *arg_types) { + void **args, int64_t *arg_sizes, int64_t *arg_types, + void **arg_mappers) { // process each input. for (int32_t i = arg_num - 1; i >= 0; --i) { // Ignore private variables and arrays - there is no mapping for them. @@ -492,14 +526,46 @@ (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE)) continue; - // Helper function to get the base address and type. - auto &&GetBegin = [&args](int32_t Idx) { return args[Idx]; }; - auto &&GetType = [&arg_types](int32_t Idx) { return arg_types[Idx]; }; - int rt = - target_data_end_component(Device, args[i], arg_sizes[i], arg_types[i], - i, arg_num, GetBegin, GetType); - if (rt != OFFLOAD_SUCCESS) - return OFFLOAD_FAIL; + // If a valid user-defined mapper is attached, use the associated mapper + // function to complete data mapping. + if (arg_mappers && arg_mappers[i]) { + DP("Call the mapper function " DPxMOD " for the %dth argument\n", + DPxPTR(arg_mappers[i]), i); + // The mapper function fills up Components. + MapperComponentsTy Components; + MapperFuncPtrTy MapperFuncPtr = (MapperFuncPtrTy)(arg_mappers[i]); + (*MapperFuncPtr)((void *)&Components, args_base[i], args[i], arg_sizes[i], + arg_types[i]); + if (Components.size() >= 0xffff) { + DP("The number of components exceed the limitation\n"); + return OFFLOAD_FAIL; + } + + // Map each component filled up by the mapper function. + for (int32_t j = 0, e = Components.size(); j < e; ++j) { + // Helper function to get the base address and type. + auto &&GetBegin = [&Components](int32_t Idx) { + return Components.get(Idx)->Begin; + }; + auto &&GetType = [&Components](int32_t Idx) { + return Components.get(Idx)->Type; + }; + int rt = target_data_end_component( + Device, Components.get(j)->Begin, Components.get(j)->Size, + Components.get(j)->Type, j, e, GetBegin, GetType); + if (rt != OFFLOAD_SUCCESS) + return OFFLOAD_FAIL; + } + } else { + // Helper function to get the base address and type. + auto &&GetBegin = [&args](int32_t Idx) { return args[Idx]; }; + auto &&GetType = [&arg_types](int32_t Idx) { return arg_types[Idx]; }; + int rt = + target_data_end_component(Device, args[i], arg_sizes[i], arg_types[i], + i, arg_num, GetBegin, GetType); + if (rt != OFFLOAD_SUCCESS) + return OFFLOAD_FAIL; + } } return OFFLOAD_SUCCESS; @@ -587,18 +653,45 @@ } /// Internal function to pass data to/from the target. -int target_data_update(DeviceTy &Device, int32_t arg_num, - void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { +int target_data_update(DeviceTy &Device, int32_t arg_num, void **args_base, + void **args, int64_t *arg_sizes, int64_t *arg_types, + void **arg_mappers) { // process each input. for (int32_t i = 0; i < arg_num; ++i) { if ((arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) || (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE)) continue; - int rt = target_data_update_component(Device, args[i], arg_sizes[i], - arg_types[i]); - if (rt != OFFLOAD_SUCCESS) - return OFFLOAD_FAIL; + // If a valid user-defined mapper is attached, use the associated mapper + // function to complete data mapping. + if (arg_mappers && arg_mappers[i]) { + DP("Call the mapper function " DPxMOD " for the %dth argument\n", + DPxPTR(arg_mappers[i]), i); + // The mapper function fills up Components. + MapperComponentsTy Components; + MapperFuncPtrTy MapperFuncPtr = (MapperFuncPtrTy)(arg_mappers[i]); + (*MapperFuncPtr)((void *)&Components, args_base[i], args[i], arg_sizes[i], + arg_types[i]); + if (Components.size() >= 0xffff) { + DP("The number of components exceed the limitation\n"); + return OFFLOAD_FAIL; + } + + // Map each component filled up by the mapper function. + for (int32_t j = 0, e = Components.size(); j < e; ++j) { + int rt = target_data_update_component(Device, Components.get(j)->Begin, + Components.get(j)->Size, + Components.get(j)->Type); + if (rt != OFFLOAD_SUCCESS) + return OFFLOAD_FAIL; + } + } + else { + int rt = target_data_update_component(Device, args[i], arg_sizes[i], + arg_types[i]); + if (rt != OFFLOAD_SUCCESS) + return OFFLOAD_FAIL; + } } return OFFLOAD_SUCCESS; @@ -617,9 +710,10 @@ /// performs the same action as data_update and data_end above. This function /// returns 0 if it was able to transfer the execution to a target and an /// integer different from zero otherwise. -int target(int64_t device_id, void *host_ptr, int32_t arg_num, - void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, - int32_t team_num, int32_t thread_limit, int IsTeamConstruct) { +int target(int64_t device_id, void *host_ptr, int32_t arg_num, void **args_base, + void **args, int64_t *arg_sizes, int64_t *arg_types, + void **arg_mappers, int32_t team_num, int32_t thread_limit, + int IsTeamConstruct) { DeviceTy &Device = Devices[device_id]; // Find the table information in the map or look it up in the translation @@ -675,7 +769,7 @@ // Move data to device. int rc = target_data_begin(Device, arg_num, args_base, args, arg_sizes, - arg_types); + arg_types, arg_mappers); if (rc != OFFLOAD_SUCCESS) { DP("Call to target_data_begin failed, abort target.\n"); return OFFLOAD_FAIL; @@ -837,7 +931,7 @@ // Move data from device. int rt = target_data_end(Device, arg_num, args_base, args, arg_sizes, - arg_types); + arg_types, arg_mappers); if (rt != OFFLOAD_SUCCESS) { DP("Call to target_data_end failed, abort targe.\n"); return OFFLOAD_FAIL; Index: openmp/libomptarget/src/private.h =================================================================== --- openmp/libomptarget/src/private.h +++ openmp/libomptarget/src/private.h @@ -15,20 +15,25 @@ #include +#include #include extern int target_data_begin(DeviceTy &Device, int32_t arg_num, - void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); + void **args_base, void **args, int64_t *arg_sizes, + int64_t *arg_types, void **arg_mappers); extern int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base, - void **args, int64_t *arg_sizes, int64_t *arg_types); + void **args, int64_t *arg_sizes, int64_t *arg_types, + void **arg_mappers); extern int target_data_update(DeviceTy &Device, int32_t arg_num, - void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); + void **args_base, void **args, int64_t *arg_sizes, + int64_t *arg_types, void **arg_mappers); extern int target(int64_t device_id, void *host_ptr, int32_t arg_num, - void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, - int32_t team_num, int32_t thread_limit, int IsTeamConstruct); + void **args_base, void **args, int64_t *arg_sizes, + int64_t *arg_types, void **arg_mappers, int32_t team_num, + int32_t thread_limit, int IsTeamConstruct); extern int CheckDeviceAndCtors(int64_t device_id); @@ -57,8 +62,19 @@ // implementation here. struct MapperComponentsTy { std::vector Components; + int32_t size() { return Components.size(); } + MapComponentInfoTy *get(int32_t i) { + assert(i < size() && "Try to access a component that does not exist"); + return &Components[i]; + } }; +// The mapper function pointer type. It follows the signature below: +// void .omp_mapper...(void *rt_mapper_handle, +// void *base, void *begin, +// size_t size, int64_t type); +typedef void (*MapperFuncPtrTy)(void *, void *, void *, int64_t, int64_t); + //////////////////////////////////////////////////////////////////////////////// // implemtation for fatal messages //////////////////////////////////////////////////////////////////////////////// Index: openmp/libomptarget/src/rtl.cpp =================================================================== --- openmp/libomptarget/src/rtl.cpp +++ openmp/libomptarget/src/rtl.cpp @@ -352,8 +352,8 @@ Device.PendingGlobalsMtx.lock(); if (Device.PendingCtorsDtors[desc].PendingCtors.empty()) { for (auto &dtor : Device.PendingCtorsDtors[desc].PendingDtors) { - int rc = target(Device.DeviceID, dtor, 0, NULL, NULL, NULL, NULL, 1, - 1, true /*team*/); + int rc = target(Device.DeviceID, dtor, 0, NULL, NULL, NULL, NULL, + NULL, 1, 1, true /*team*/); if (rc != OFFLOAD_SUCCESS) { DP("Running destructor " DPxMOD " failed.\n", DPxPTR(dtor)); }