Index: libomptarget/include/omptarget.h =================================================================== --- libomptarget/include/omptarget.h +++ libomptarget/include/omptarget.h @@ -151,6 +151,14 @@ int32_t depNum, void *depList, int32_t noAliasDepNum, void *noAliasDepList); +void __tgt_target_data_begin_mapper(int64_t device_id, int32_t arg_num, + void **args_base, void **args, + int64_t *arg_sizes, int64_t *arg_types, + void **arg_mappers); +void __tgt_target_data_begin_nowait_mapper( + int64_t device_id, int32_t arg_num, void **args_base, void **args, + int64_t *arg_sizes, int64_t *arg_types, void **arg_mappers, int32_t depNum, + void *depList, int32_t noAliasDepNum, void *noAliasDepList); // passes data from the target, release target memory and destroys the // host-target mapping (top entry from the stack of data maps) created by @@ -162,6 +170,16 @@ int64_t *arg_sizes, int64_t *arg_types, int32_t depNum, void *depList, int32_t noAliasDepNum, void *noAliasDepList); +void __tgt_target_data_end_mapper(int64_t device_id, int32_t arg_num, + void **args_base, void **args, + int64_t *arg_sizes, int64_t *arg_types, + void **arg_mappers); +void __tgt_target_data_end_nowait_mapper(int64_t device_id, int32_t arg_num, + void **args_base, void **args, + int64_t *arg_sizes, int64_t *arg_types, + void **arg_mappers, int32_t depNum, + void *depList, int32_t noAliasDepNum, + void *noAliasDepList); /// passes data to/from the target void __tgt_target_data_update(int64_t device_id, int32_t arg_num, @@ -173,6 +191,14 @@ int32_t depNum, void *depList, int32_t noAliasDepNum, void *noAliasDepList); +void __tgt_target_data_update_mapper(int64_t device_id, int32_t arg_num, + void **args_base, void **args, + int64_t *arg_sizes, int64_t *arg_types, + void **arg_mappers); +void __tgt_target_data_update_nowait_mapper( + int64_t device_id, int32_t arg_num, void **args_base, void **args, + int64_t *arg_sizes, int64_t *arg_types, void **arg_mappers, int32_t depNum, + void *depList, int32_t noAliasDepNum, void *noAliasDepList); // Performs the same actions as data_begin in case arg_num is non-zero // and initiates run of offloaded region on target platform; if arg_num @@ -187,6 +213,15 @@ void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, int32_t depNum, void *depList, int32_t noAliasDepNum, void *noAliasDepList); +int __tgt_target_mapper(int64_t device_id, void *host_ptr, int32_t arg_num, + void **args_base, void **args, int64_t *arg_sizes, + int64_t *arg_types, void **arg_mappers); +int __tgt_target_nowait_mapper(int64_t device_id, void *host_ptr, + int32_t arg_num, void **args_base, void **args, + int64_t *arg_sizes, int64_t *arg_types, + void **arg_mappers, int32_t depNum, + void *depList, int32_t noAliasDepNum, + void *noAliasDepList); int __tgt_target_teams(int64_t device_id, void *host_ptr, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, @@ -198,6 +233,17 @@ int32_t num_teams, int32_t thread_limit, int32_t depNum, void *depList, int32_t noAliasDepNum, void *noAliasDepList); +int __tgt_target_teams_mapper(int64_t device_id, void *host_ptr, + int32_t arg_num, void **args_base, void **args, + int64_t *arg_sizes, int64_t *arg_types, + void **arg_mappers, int32_t num_teams, + int32_t thread_limit); +int __tgt_target_teams_nowait_mapper( + int64_t device_id, void *host_ptr, int32_t arg_num, void **args_base, + void **args, int64_t *arg_sizes, int64_t *arg_types, void **arg_mappers, + int32_t num_teams, int32_t thread_limit, int32_t depNum, void *depList, + int32_t noAliasDepNum, void *noAliasDepList); + void __kmpc_push_target_tripcount(int64_t device_id, uint64_t loop_tripcount); #ifdef __cplusplus Index: libomptarget/src/exports =================================================================== --- libomptarget/src/exports +++ libomptarget/src/exports @@ -13,6 +13,16 @@ __tgt_target_data_update_nowait; __tgt_target_nowait; __tgt_target_teams_nowait; + __tgt_target_data_begin_mapper; + __tgt_target_data_end_mapper; + __tgt_target_data_update_mapper; + __tgt_target_mapper; + __tgt_target_teams_mapper; + __tgt_target_data_begin_nowait_mapper; + __tgt_target_data_end_nowait_mapper; + __tgt_target_data_update_nowait_mapper; + __tgt_target_nowait_mapper; + __tgt_target_teams_nowait_mapper; __tgt_mapper_num_components; __tgt_push_mapper_component; omp_get_num_devices; Index: libomptarget/src/interface.cpp =================================================================== --- libomptarget/src/interface.cpp +++ libomptarget/src/interface.cpp @@ -91,6 +91,26 @@ /// and passes the data to the device. EXTERN void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { + __tgt_target_data_begin_mapper(device_id, arg_num, args_base, args, arg_sizes, + arg_types, nullptr); +} + +EXTERN void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num, + void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, + int32_t depNum, void *depList, int32_t noAliasDepNum, + void *noAliasDepList) { + if (depNum + noAliasDepNum > 0) + __kmpc_omp_taskwait(NULL, 0); + + __tgt_target_data_begin_mapper(device_id, arg_num, args_base, args, arg_sizes, + arg_types, nullptr); +} + +EXTERN void __tgt_target_data_begin_mapper(int64_t device_id, int32_t arg_num, + void **args_base, void **args, + int64_t *arg_sizes, + int64_t *arg_types, + void **arg_mappers) { if (IsOffloadDisabled()) return; DP("Entering data begin region for device %" PRId64 " with %d mappings\n", @@ -118,20 +138,20 @@ } #endif - int rc = target_data_begin(Device, arg_num, args_base, - args, arg_sizes, arg_types); + int rc = target_data_begin(Device, arg_num, args_base, args, arg_sizes, + arg_types, arg_mappers); HandleTargetOutcome(rc == OFFLOAD_SUCCESS); } -EXTERN void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num, - void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, - int32_t depNum, void *depList, int32_t noAliasDepNum, - void *noAliasDepList) { +EXTERN void __tgt_target_data_begin_nowait_mapper( + int64_t device_id, int32_t arg_num, void **args_base, void **args, + int64_t *arg_sizes, int64_t *arg_types, void **arg_mappers, int32_t depNum, + void *depList, int32_t noAliasDepNum, void *noAliasDepList) { if (depNum + noAliasDepNum > 0) __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); - __tgt_target_data_begin(device_id, arg_num, args_base, args, arg_sizes, - arg_types); + __tgt_target_data_begin_mapper(device_id, arg_num, args_base, args, arg_sizes, + arg_types, arg_mappers); } /// passes data from the target, releases target memory and destroys @@ -139,6 +159,25 @@ /// created by the last __tgt_target_data_begin. EXTERN void __tgt_target_data_end(int64_t device_id, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { + __tgt_target_data_end_mapper(device_id, arg_num, args_base, args, arg_sizes, + arg_types, nullptr); +} + +EXTERN void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num, + void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, + int32_t depNum, void *depList, int32_t noAliasDepNum, + void *noAliasDepList) { + if (depNum + noAliasDepNum > 0) + __kmpc_omp_taskwait(NULL, 0); + + __tgt_target_data_end_mapper(device_id, arg_num, args_base, args, arg_sizes, + arg_types, nullptr); +} + +EXTERN void __tgt_target_data_end_mapper(int64_t device_id, int32_t arg_num, + void **args_base, void **args, + int64_t *arg_sizes, int64_t *arg_types, + void **arg_mappers) { if (IsOffloadDisabled()) return; DP("Entering data end region with %d mappings\n", arg_num); @@ -171,24 +210,44 @@ } #endif - int rc = target_data_end(Device, arg_num, args_base, - args, arg_sizes, arg_types); + int rc = target_data_end(Device, arg_num, args_base, args, arg_sizes, + arg_types, arg_mappers); HandleTargetOutcome(rc == OFFLOAD_SUCCESS); } -EXTERN void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num, - void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, - int32_t depNum, void *depList, int32_t noAliasDepNum, - void *noAliasDepList) { +EXTERN void __tgt_target_data_end_nowait_mapper( + int64_t device_id, int32_t arg_num, void **args_base, void **args, + int64_t *arg_sizes, int64_t *arg_types, void **arg_mappers, int32_t depNum, + void *depList, int32_t noAliasDepNum, void *noAliasDepList) { if (depNum + noAliasDepNum > 0) __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); - __tgt_target_data_end(device_id, arg_num, args_base, args, arg_sizes, - arg_types); + __tgt_target_data_end_mapper(device_id, arg_num, args_base, args, arg_sizes, + arg_types, arg_mappers); } EXTERN void __tgt_target_data_update(int64_t device_id, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { + __tgt_target_data_update_mapper(device_id, arg_num, args_base, args, + arg_sizes, arg_types, nullptr); +} + +EXTERN void __tgt_target_data_update_nowait( + int64_t device_id, int32_t arg_num, void **args_base, void **args, + int64_t *arg_sizes, int64_t *arg_types, int32_t depNum, void *depList, + int32_t noAliasDepNum, void *noAliasDepList) { + if (depNum + noAliasDepNum > 0) + __kmpc_omp_taskwait(NULL, 0); + + __tgt_target_data_update_mapper(device_id, arg_num, args_base, args, + arg_sizes, arg_types, nullptr); +} + +EXTERN void __tgt_target_data_update_mapper(int64_t device_id, int32_t arg_num, + void **args_base, void **args, + int64_t *arg_sizes, + int64_t *arg_types, + void **arg_mappers) { if (IsOffloadDisabled()) return; DP("Entering data update with %d mappings\n", arg_num); @@ -204,24 +263,43 @@ } DeviceTy& Device = Devices[device_id]; - int rc = target_data_update(Device, arg_num, args_base, - args, arg_sizes, arg_types); + int rc = target_data_update(Device, arg_num, args_base, args, arg_sizes, + arg_types, arg_mappers); HandleTargetOutcome(rc == OFFLOAD_SUCCESS); } -EXTERN void __tgt_target_data_update_nowait( +EXTERN void __tgt_target_data_update_nowait_mapper( int64_t device_id, int32_t arg_num, void **args_base, void **args, - int64_t *arg_sizes, int64_t *arg_types, int32_t depNum, void *depList, - int32_t noAliasDepNum, void *noAliasDepList) { + int64_t *arg_sizes, int64_t *arg_types, void **arg_mappers, int32_t depNum, + void *depList, int32_t noAliasDepNum, void *noAliasDepList) { if (depNum + noAliasDepNum > 0) __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); - __tgt_target_data_update(device_id, arg_num, args_base, args, arg_sizes, - arg_types); + __tgt_target_data_update_mapper(device_id, arg_num, args_base, args, + arg_sizes, arg_types, arg_mappers); } EXTERN int __tgt_target(int64_t device_id, void *host_ptr, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { + return __tgt_target_mapper(device_id, host_ptr, arg_num, args_base, args, + arg_sizes, arg_types, nullptr); +} + +EXTERN int __tgt_target_nowait(int64_t device_id, void *host_ptr, + int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, + int64_t *arg_types, int32_t depNum, void *depList, int32_t noAliasDepNum, + void *noAliasDepList) { + if (depNum + noAliasDepNum > 0) + __kmpc_omp_taskwait(NULL, 0); + + return __tgt_target_mapper(device_id, host_ptr, arg_num, args_base, args, + arg_sizes, arg_types, nullptr); +} + +EXTERN int __tgt_target_mapper(int64_t device_id, void *host_ptr, + int32_t arg_num, void **args_base, void **args, + int64_t *arg_sizes, int64_t *arg_types, + void **arg_mappers) { if (IsOffloadDisabled()) return OFFLOAD_FAIL; DP("Entering target region with entry point " DPxMOD " and device Id %" PRId64 "\n", DPxPTR(host_ptr), device_id); @@ -245,25 +323,48 @@ #endif int rc = target(device_id, host_ptr, arg_num, args_base, args, arg_sizes, - arg_types, 0, 0, false /*team*/); + arg_types, arg_mappers, 0, 0, false /*team*/); HandleTargetOutcome(rc == OFFLOAD_SUCCESS); return rc; } -EXTERN int __tgt_target_nowait(int64_t device_id, void *host_ptr, - int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, - int64_t *arg_types, int32_t depNum, void *depList, int32_t noAliasDepNum, - void *noAliasDepList) { +EXTERN int __tgt_target_nowait_mapper(int64_t device_id, void *host_ptr, + int32_t arg_num, void **args_base, + void **args, int64_t *arg_sizes, + int64_t *arg_types, void **arg_mappers, + int32_t depNum, void *depList, + int32_t noAliasDepNum, + void *noAliasDepList) { if (depNum + noAliasDepNum > 0) __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); - return __tgt_target(device_id, host_ptr, arg_num, args_base, args, arg_sizes, - arg_types); + return __tgt_target_mapper(device_id, host_ptr, arg_num, args_base, args, + arg_sizes, arg_types, arg_mappers); } EXTERN int __tgt_target_teams(int64_t device_id, void *host_ptr, int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, int32_t team_num, int32_t thread_limit) { + return __tgt_target_teams_mapper(device_id, host_ptr, arg_num, args_base, + args, arg_sizes, arg_types, nullptr, + team_num, thread_limit); +} + +EXTERN int __tgt_target_teams_nowait(int64_t device_id, void *host_ptr, + int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, + int64_t *arg_types, int32_t team_num, int32_t thread_limit, int32_t depNum, + void *depList, int32_t noAliasDepNum, void *noAliasDepList) { + if (depNum + noAliasDepNum > 0) + __kmpc_omp_taskwait(NULL, 0); + + return __tgt_target_teams_mapper(device_id, host_ptr, arg_num, args_base, + args, arg_sizes, arg_types, nullptr, + team_num, thread_limit); +} + +EXTERN int __tgt_target_teams_mapper(int64_t device_id, void *host_ptr, + int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, + int64_t *arg_types, void **arg_mappers, int32_t team_num, int32_t thread_limit) { if (IsOffloadDisabled()) return OFFLOAD_FAIL; DP("Entering target region with entry point " DPxMOD " and device Id %" PRId64 "\n", DPxPTR(host_ptr), device_id); @@ -286,22 +387,25 @@ } #endif - int rc = target(device_id, host_ptr, arg_num, args_base, args, arg_sizes, - arg_types, team_num, thread_limit, true /*team*/); + int rc = + target(device_id, host_ptr, arg_num, args_base, args, arg_sizes, + arg_types, arg_mappers, team_num, thread_limit, true /*team*/); HandleTargetOutcome(rc == OFFLOAD_SUCCESS); return rc; } -EXTERN int __tgt_target_teams_nowait(int64_t device_id, void *host_ptr, - int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes, - int64_t *arg_types, int32_t team_num, int32_t thread_limit, int32_t depNum, - void *depList, int32_t noAliasDepNum, void *noAliasDepList) { +EXTERN int __tgt_target_teams_nowait_mapper( + int64_t device_id, void *host_ptr, int32_t arg_num, void **args_base, + void **args, int64_t *arg_sizes, int64_t *arg_types, void **arg_mappers, + int32_t team_num, int32_t thread_limit, int32_t depNum, void *depList, + int32_t noAliasDepNum, void *noAliasDepList) { if (depNum + noAliasDepNum > 0) __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL)); - return __tgt_target_teams(device_id, host_ptr, arg_num, args_base, args, - arg_sizes, arg_types, team_num, thread_limit); + return __tgt_target_teams_mapper(device_id, host_ptr, arg_num, args_base, + args, arg_sizes, arg_types, arg_mappers, + team_num, thread_limit); } // Get the current number of components for a user-defined mapper. Index: libomptarget/src/omptarget.cpp =================================================================== --- libomptarget/src/omptarget.cpp +++ libomptarget/src/omptarget.cpp @@ -161,8 +161,8 @@ DP("Has pending ctors... call now\n"); for (auto &entry : lib.second.PendingCtors) { void *ctor = entry; - int rc = target(device_id, ctor, 0, NULL, NULL, NULL, - NULL, 1, 1, true /*team*/); + int rc = target(device_id, ctor, 0, NULL, NULL, NULL, NULL, NULL, 1, + 1, true /*team*/); if (rc != OFFLOAD_SUCCESS) { DP("Running ctor " DPxMOD " failed.\n", DPxPTR(ctor)); Device.PendingGlobalsMtx.unlock(); @@ -209,9 +209,359 @@ return ((type & OMP_TGT_MAPTYPE_MEMBER_OF) >> 48) - 1; } +/// Map components filled by a user-defined mapper function, when entering a +/// target data region. +int target_data_begin_map_components(DeviceTy &Device, + MapperComponentsTy &Components) { + // Process each component. + for (int32_t i = 0; i < Components.size(); ++i) { + void *HstPtrBegin = Components.get(i)->Begin; + void *HstPtrBase = Components.get(i)->Base; + int64_t Size = Components.get(i)->Size; + int64_t Type = Components.get(i)->Type; + + // Adjust for proper alignment if this is a combined entry (for structs). + // Look at the next argument - if that is MEMBER_OF this one, then this one + // is a combined entry. + int64_t padding = 0; + const int next_i = i + 1; + if (member_of(Type) < 0 && next_i < Components.size() && + member_of(Components.get(next_i)->Type) == i) { + padding = (int64_t)HstPtrBegin % alignment; + if (padding) { + DP("Using a padding of %" PRId64 " bytes for begin address " DPxMOD + "\n", + padding, DPxPTR(HstPtrBegin)); + HstPtrBegin = (char *)HstPtrBegin - padding; + Size += padding; + } + } + + // Address of pointer on the host and device, respectively. + void *Pointer_HstPtrBegin, *Pointer_TgtPtrBegin; + bool IsNew, Pointer_IsNew; + bool IsHostPtr = false; + bool IsImplicit = Type & OMP_TGT_MAPTYPE_IMPLICIT; + // Force the creation of a device side copy of the data when: + // a close map modifier was associated with a map that contained a to. + bool HasCloseModifier = Type & OMP_TGT_MAPTYPE_CLOSE; + // UpdateRef is based on MEMBER_OF instead of TARGET_PARAM because if we + // have reached this point via __tgt_target_data_begin and not __tgt_target + // then no argument is marked as TARGET_PARAM ("omp target data map" is not + // associated with a target region, so there are no target parameters). This + // may be considered a hack, we could revise the scheme in the future. + bool UpdateRef = !(Type & OMP_TGT_MAPTYPE_MEMBER_OF); + if (Type & OMP_TGT_MAPTYPE_PTR_AND_OBJ) { + DP("Has a pointer entry: \n"); + // base is address of pointer. + Pointer_TgtPtrBegin = Device.getOrAllocTgtPtr( + HstPtrBase, HstPtrBase, sizeof(void *), Pointer_IsNew, IsHostPtr, + IsImplicit, UpdateRef, HasCloseModifier); + if (!Pointer_TgtPtrBegin) { + DP("Call to getOrAllocTgtPtr returned null pointer (device failure or " + "illegal mapping).\n"); + return OFFLOAD_FAIL; + } + DP("There are %zu bytes allocated at target address " DPxMOD " - is%s new" + "\n", + sizeof(void *), DPxPTR(Pointer_TgtPtrBegin), + (Pointer_IsNew ? "" : " not")); + Pointer_HstPtrBegin = HstPtrBase; + // modify current entry. + HstPtrBase = *(void **)HstPtrBase; + UpdateRef = true; // subsequently update ref count of pointee + } + + void *TgtPtrBegin = + Device.getOrAllocTgtPtr(HstPtrBegin, HstPtrBase, Size, IsNew, IsHostPtr, + IsImplicit, UpdateRef, HasCloseModifier); + if (!TgtPtrBegin && Size) { + // If Size==0, then the argument could be a zero-length pointer to + // NULL, so getOrAlloc() returning NULL is not an error. + DP("Call to getOrAllocTgtPtr returned null pointer (device failure or " + "illegal mapping).\n"); + } + DP("There are %" PRId64 " bytes allocated at target address " DPxMOD + " - is%s new\n", + Size, DPxPTR(TgtPtrBegin), (IsNew ? "" : " not")); + + if (Type & OMP_TGT_MAPTYPE_TO) { + bool copy = false; + if (!(RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) || + HasCloseModifier) { + if (IsNew || (Type & OMP_TGT_MAPTYPE_ALWAYS)) { + copy = true; + } else if (Type & OMP_TGT_MAPTYPE_MEMBER_OF) { + // Copy data only if the "parent" struct has RefCount==1. + int32_t parent_idx = member_of(Type); + long parent_rc = + Device.getMapEntryRefCnt(Components.get(parent_idx)->Begin); + assert(parent_rc > 0 && "parent struct not found"); + if (parent_rc == 1) { + copy = true; + } + } + } + + if (copy && !IsHostPtr) { + DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n", + Size, DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin)); + int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, Size); + if (rt != OFFLOAD_SUCCESS) { + DP("Copying data to device failed.\n"); + return OFFLOAD_FAIL; + } + } + } + + if (Type & OMP_TGT_MAPTYPE_PTR_AND_OBJ && !IsHostPtr) { + DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n", + DPxPTR(Pointer_TgtPtrBegin), DPxPTR(TgtPtrBegin)); + uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase; + void *TgtPtrBase = (void *)((uint64_t)TgtPtrBegin - Delta); + int rt = + Device.data_submit(Pointer_TgtPtrBegin, &TgtPtrBase, sizeof(void *)); + if (rt != OFFLOAD_SUCCESS) { + DP("Copying data to device failed.\n"); + return OFFLOAD_FAIL; + } + // create shadow pointers for this entry + Device.ShadowMtx.lock(); + Device.ShadowPtrMap[Pointer_HstPtrBegin] = { + HstPtrBase, Pointer_TgtPtrBegin, TgtPtrBase}; + Device.ShadowMtx.unlock(); + } + } + + return OFFLOAD_SUCCESS; +} + +/// Map components filled by a user-defined mapper function, when exiting a +/// target data region. +int target_data_end_map_components(DeviceTy &Device, + MapperComponentsTy &Components) { + // Process each component. + for (int32_t i = 0; i < Components.size(); ++i) { + void *HstPtrBegin = Components.get(i)->Begin; + int64_t Size = Components.get(i)->Size; + int64_t Type = Components.get(i)->Type; + + // Adjust for proper alignment if this is a combined entry (for structs). + // Look at the next argument - if that is MEMBER_OF this one, then this one + // is a combined entry. + int64_t padding = 0; + const int next_i = i + 1; + if (member_of(Type) < 0 && next_i < Components.size() && + member_of(Components.get(next_i)->Type) == i) { + padding = (int64_t)HstPtrBegin % alignment; + if (padding) { + DP("Using a padding of %" PRId64 " bytes for begin address " DPxMOD + "\n", + padding, DPxPTR(HstPtrBegin)); + HstPtrBegin = (char *)HstPtrBegin - padding; + Size += padding; + } + } + + bool IsLast, IsHostPtr; + bool UpdateRef = !(Type & OMP_TGT_MAPTYPE_MEMBER_OF) || + (Type & OMP_TGT_MAPTYPE_PTR_AND_OBJ); + bool ForceDelete = Type & OMP_TGT_MAPTYPE_DELETE; + bool HasCloseModifier = Type & OMP_TGT_MAPTYPE_CLOSE; + + // If PTR_AND_OBJ, HstPtrBegin is address of pointee + void *TgtPtrBegin = + Device.getTgtPtrBegin(HstPtrBegin, Size, IsLast, UpdateRef, IsHostPtr); + DP("There are %" PRId64 " bytes allocated at target address " DPxMOD + " - is%s last\n", + Size, DPxPTR(TgtPtrBegin), (IsLast ? "" : " not")); + + bool DelEntry = IsLast || ForceDelete; + + if ((Type & OMP_TGT_MAPTYPE_MEMBER_OF) && + !(Type & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) { + DelEntry = false; // protect parent struct from being deallocated + } + + if ((Type & OMP_TGT_MAPTYPE_FROM) || DelEntry) { + // Move data back to the host + if (Type & OMP_TGT_MAPTYPE_FROM) { + bool Always = Type & OMP_TGT_MAPTYPE_ALWAYS; + bool CopyMember = false; + if (!(RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) || + HasCloseModifier) { + if ((Type & OMP_TGT_MAPTYPE_MEMBER_OF) && + !(Type & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) { + // Copy data only if the "parent" struct has RefCount==1. + int32_t parent_idx = member_of(Type); + long parent_rc = + Device.getMapEntryRefCnt(Components.get(parent_idx)->Begin); + assert(parent_rc > 0 && "parent struct not found"); + if (parent_rc == 1) { + CopyMember = true; + } + } + } + + if ((DelEntry || Always || CopyMember) && + !(RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && + TgtPtrBegin == HstPtrBegin)) { + DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", + Size, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); + int rt = Device.data_retrieve(HstPtrBegin, TgtPtrBegin, Size); + if (rt != OFFLOAD_SUCCESS) { + DP("Copying data from device failed.\n"); + return OFFLOAD_FAIL; + } + } + } + + // If we copied back to the host a struct/array containing pointers, we + // need to restore the original host pointer values from their shadow + // copies. If the struct is going to be deallocated, remove any remaining + // shadow pointer entries for this struct. + uintptr_t lb = (uintptr_t)HstPtrBegin; + uintptr_t ub = (uintptr_t)HstPtrBegin + Size; + Device.ShadowMtx.lock(); + for (ShadowPtrListTy::iterator it = Device.ShadowPtrMap.begin(); + it != Device.ShadowPtrMap.end();) { + void **ShadowHstPtrAddr = (void **)it->first; + + // An STL map is sorted on its keys; use this property + // to quickly determine when to break out of the loop. + if ((uintptr_t)ShadowHstPtrAddr < lb) { + ++it; + continue; + } + if ((uintptr_t)ShadowHstPtrAddr >= ub) + break; + + // If we copied the struct to the host, we need to restore the pointer. + if (Type & OMP_TGT_MAPTYPE_FROM) { + DP("Restoring original host pointer value " DPxMOD " for host " + "pointer " DPxMOD "\n", + DPxPTR(it->second.HstPtrVal), DPxPTR(ShadowHstPtrAddr)); + *ShadowHstPtrAddr = it->second.HstPtrVal; + } + // If the struct is to be deallocated, remove the shadow entry. + if (DelEntry) { + DP("Removing shadow pointer " DPxMOD "\n", DPxPTR(ShadowHstPtrAddr)); + it = Device.ShadowPtrMap.erase(it); + } else { + ++it; + } + } + Device.ShadowMtx.unlock(); + + // Deallocate map + if (DelEntry) { + int rt = Device.deallocTgtPtr(HstPtrBegin, Size, ForceDelete, + HasCloseModifier); + if (rt != OFFLOAD_SUCCESS) { + DP("Deallocating data from device failed.\n"); + return OFFLOAD_FAIL; + } + } + } + } + + return OFFLOAD_SUCCESS; +} + +/// Map components filled by a user-defined mapper function, when updating a +/// target data region. +int target_data_update_map_components(DeviceTy &Device, + MapperComponentsTy &Components) { + // Process each component. + for (int32_t i = 0; i < Components.size(); ++i) { + void *HstPtrBegin = Components.get(i)->Begin; + int64_t Size = Components.get(i)->Size; + int64_t Type = Components.get(i)->Type; + + bool IsLast, IsHostPtr; + void *TgtPtrBegin = + Device.getTgtPtrBegin(HstPtrBegin, Size, IsLast, false, IsHostPtr); + if (!TgtPtrBegin) { + DP("hst data:" DPxMOD " not found, becomes a noop\n", + DPxPTR(HstPtrBegin)); + continue; + } + + if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && + TgtPtrBegin == HstPtrBegin) { + DP("hst data:" DPxMOD " unified and shared, becomes a noop\n", + DPxPTR(HstPtrBegin)); + continue; + } + + if (Type & OMP_TGT_MAPTYPE_FROM) { + DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", + Size, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); + int rt = Device.data_retrieve(HstPtrBegin, TgtPtrBegin, Size); + if (rt != OFFLOAD_SUCCESS) { + DP("Copying data from device failed.\n"); + return OFFLOAD_FAIL; + } + + uintptr_t lb = (uintptr_t)HstPtrBegin; + uintptr_t ub = (uintptr_t)HstPtrBegin + Size; + Device.ShadowMtx.lock(); + for (ShadowPtrListTy::iterator it = Device.ShadowPtrMap.begin(); + it != Device.ShadowPtrMap.end(); ++it) { + void **ShadowHstPtrAddr = (void **)it->first; + if ((uintptr_t)ShadowHstPtrAddr < lb) + continue; + if ((uintptr_t)ShadowHstPtrAddr >= ub) + break; + DP("Restoring original host pointer value " DPxMOD + " for host pointer " DPxMOD "\n", + DPxPTR(it->second.HstPtrVal), DPxPTR(ShadowHstPtrAddr)); + *ShadowHstPtrAddr = it->second.HstPtrVal; + } + Device.ShadowMtx.unlock(); + } + + if (Type & OMP_TGT_MAPTYPE_TO) { + DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n", + Size, DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin)); + int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, Size); + if (rt != OFFLOAD_SUCCESS) { + DP("Copying data to device failed.\n"); + return OFFLOAD_FAIL; + } + + uintptr_t lb = (uintptr_t)HstPtrBegin; + uintptr_t ub = (uintptr_t)HstPtrBegin + Size; + Device.ShadowMtx.lock(); + for (ShadowPtrListTy::iterator it = Device.ShadowPtrMap.begin(); + it != Device.ShadowPtrMap.end(); ++it) { + void **ShadowHstPtrAddr = (void **)it->first; + if ((uintptr_t)ShadowHstPtrAddr < lb) + continue; + if ((uintptr_t)ShadowHstPtrAddr >= ub) + break; + DP("Restoring original target pointer value " DPxMOD " for target " + "pointer " DPxMOD "\n", + DPxPTR(it->second.TgtPtrVal), DPxPTR(it->second.TgtPtrAddr)); + rt = Device.data_submit(it->second.TgtPtrAddr, &it->second.TgtPtrVal, + sizeof(void *)); + if (rt != OFFLOAD_SUCCESS) { + DP("Copying data to device failed.\n"); + Device.ShadowMtx.unlock(); + return OFFLOAD_FAIL; + } + } + Device.ShadowMtx.unlock(); + } + } + + return OFFLOAD_SUCCESS; +} + /// Internal function to do the mapping and transfer the data to the device -int target_data_begin(DeviceTy &Device, int32_t arg_num, - void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { +int target_data_begin(DeviceTy &Device, int32_t arg_num, void **args_base, + void **args, int64_t *arg_sizes, int64_t *arg_types, + void **arg_mappers) { // process each input. for (int32_t i = 0; i < arg_num; ++i) { // Ignore private variables and arrays - there is no mapping for them. @@ -223,6 +573,35 @@ void *HstPtrBase = args_base[i]; int64_t data_size = arg_sizes[i]; + // If a valid user-defined mapper is attached, use the associated mapper + // function to complete data mapping. + if (arg_mappers && arg_mappers[i]) { + DP("Call the mapper function " DPxMOD " for the %dth argument\n", + DPxPTR(arg_mappers[i]), i); + MapperComponentsTy Components; + // The mapper function follows the signature below: + // void .omp_mapper...(void *rt_mapper_handle, + // void *base, void *begin, + // size_t size, int64_t type); + void (*mapper_func_ptr)(void *, void *, void *, int64_t, int64_t); + mapper_func_ptr = + (void (*)(void *, void *, void *, int64_t, int64_t))(arg_mappers[i]); + // The mapper function fills up Components. + (*mapper_func_ptr)((void *)&Components, HstPtrBase, HstPtrBegin, + data_size, arg_types[i]); + if (Components.size() >= 0xffff) { + DP("The number of components exceed the limitation\n"); + return OFFLOAD_FAIL; + } + // Map the components filled up by the mapper function. + int rt = target_data_begin_map_components(Device, Components); + if (rt != OFFLOAD_SUCCESS) { + DP("Failed to map the components specified by a user-defined mapper\n"); + return OFFLOAD_FAIL; + } + continue; + } + // Adjust for proper alignment if this is a combined entry (for structs). // Look at the next argument - if that is MEMBER_OF this one, then this one // is a combined entry. @@ -344,7 +723,8 @@ /// Internal function to undo the mapping and retrieve the data from the device. int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base, - void **args, int64_t *arg_sizes, int64_t *arg_types) { + void **args, int64_t *arg_sizes, int64_t *arg_types, + void **arg_mappers) { // process each input. for (int32_t i = arg_num - 1; i >= 0; --i) { // Ignore private variables and arrays - there is no mapping for them. @@ -354,7 +734,38 @@ continue; void *HstPtrBegin = args[i]; + void *HstPtrBase = args_base[i]; int64_t data_size = arg_sizes[i]; + + // If a valid user-defined mapper is attached, use the associated mapper + // function to complete data mapping. + if (arg_mappers && arg_mappers[i]) { + DP("Call the mapper function " DPxMOD " for the %dth argument\n", + DPxPTR(arg_mappers[i]), i); + MapperComponentsTy Components; + // The mapper function follows the signature below: + // void .omp_mapper...(void *rt_mapper_handle, + // void *base, void *begin, + // size_t size, int64_t type); + void (*mapper_func_ptr)(void *, void *, void *, int64_t, int64_t); + mapper_func_ptr = + (void (*)(void *, void *, void *, int64_t, int64_t))(arg_mappers[i]); + // The mapper function fills up Components. + (*mapper_func_ptr)((void *)&Components, HstPtrBase, HstPtrBegin, + data_size, arg_types[i]); + if (Components.size() >= 0xffff) { + DP("The number of components exceed the limitation\n"); + return OFFLOAD_FAIL; + } + // Map the components filled up by the mapper function. + int rt = target_data_end_map_components(Device, Components); + if (rt != OFFLOAD_SUCCESS) { + DP("Failed to map the components specified by a user-defined mapper\n"); + return OFFLOAD_FAIL; + } + continue; + } + // Adjust for proper alignment if this is a combined entry (for structs). // Look at the next argument - if that is MEMBER_OF this one, then this one // is a combined entry. @@ -476,8 +887,9 @@ } /// Internal function to pass data to/from the target. -int target_data_update(DeviceTy &Device, int32_t arg_num, - void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) { +int target_data_update(DeviceTy &Device, int32_t arg_num, void **args_base, + void **args, int64_t *arg_sizes, int64_t *arg_types, + void **arg_mappers) { // process each input. for (int32_t i = 0; i < arg_num; ++i) { if ((arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) || @@ -485,7 +897,38 @@ continue; void *HstPtrBegin = args[i]; + void *HstPtrBase = args_base[i]; int64_t MapSize = arg_sizes[i]; + + // If a valid user-defined mapper is attached, use the associated mapper + // function to complete data mapping. + if (arg_mappers && arg_mappers[i]) { + DP("Call the mapper function " DPxMOD " for the %dth argument\n", + DPxPTR(arg_mappers[i]), i); + MapperComponentsTy Components; + // The mapper function follows the signature below: + // void .omp_mapper...(void *rt_mapper_handle, + // void *base, void *begin, + // size_t size, int64_t type); + void (*mapper_func_ptr)(void *, void *, void *, int64_t, int64_t); + mapper_func_ptr = + (void (*)(void *, void *, void *, int64_t, int64_t))(arg_mappers[i]); + // The mapper function fills up Components. + (*mapper_func_ptr)((void *)&Components, HstPtrBase, HstPtrBegin, + MapSize, arg_types[i]); + if (Components.size() >= 0xffff) { + DP("The number of components exceed the limitation\n"); + return OFFLOAD_FAIL; + } + // Map the components filled up by the mapper function. + int rt = target_data_update_map_components(Device, Components); + if (rt != OFFLOAD_SUCCESS) { + DP("Failed to map the components specified by a user-defined mapper\n"); + return OFFLOAD_FAIL; + } + continue; + } + bool IsLast, IsHostPtr; void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, MapSize, IsLast, false, IsHostPtr); @@ -577,9 +1020,10 @@ /// performs the same action as data_update and data_end above. This function /// returns 0 if it was able to transfer the execution to a target and an /// integer different from zero otherwise. -int target(int64_t device_id, void *host_ptr, int32_t arg_num, - void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, - int32_t team_num, int32_t thread_limit, int IsTeamConstruct) { +int target(int64_t device_id, void *host_ptr, int32_t arg_num, void **args_base, + void **args, int64_t *arg_sizes, int64_t *arg_types, + void **arg_mappers, int32_t team_num, int32_t thread_limit, + int IsTeamConstruct) { DeviceTy &Device = Devices[device_id]; // Find the table information in the map or look it up in the translation @@ -635,7 +1079,7 @@ // Move data to device. int rc = target_data_begin(Device, arg_num, args_base, args, arg_sizes, - arg_types); + arg_types, arg_mappers); if (rc != OFFLOAD_SUCCESS) { DP("Call to target_data_begin failed, abort target.\n"); return OFFLOAD_FAIL; @@ -797,7 +1241,7 @@ // Move data from device. int rt = target_data_end(Device, arg_num, args_base, args, arg_sizes, - arg_types); + arg_types, arg_mappers); if (rt != OFFLOAD_SUCCESS) { DP("Call to target_data_end failed, abort targe.\n"); return OFFLOAD_FAIL; Index: libomptarget/src/private.h =================================================================== --- libomptarget/src/private.h +++ libomptarget/src/private.h @@ -15,20 +15,25 @@ #include +#include #include extern int target_data_begin(DeviceTy &Device, int32_t arg_num, - void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); + void **args_base, void **args, int64_t *arg_sizes, + int64_t *arg_types, void **arg_mappers); extern int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base, - void **args, int64_t *arg_sizes, int64_t *arg_types); + void **args, int64_t *arg_sizes, int64_t *arg_types, + void **arg_mappers); extern int target_data_update(DeviceTy &Device, int32_t arg_num, - void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types); + void **args_base, void **args, int64_t *arg_sizes, + int64_t *arg_types, void **arg_mappers); extern int target(int64_t device_id, void *host_ptr, int32_t arg_num, - void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, - int32_t team_num, int32_t thread_limit, int IsTeamConstruct); + void **args_base, void **args, int64_t *arg_sizes, + int64_t *arg_types, void **arg_mappers, int32_t team_num, + int32_t thread_limit, int IsTeamConstruct); extern int CheckDeviceAndCtors(int64_t device_id); @@ -57,6 +62,11 @@ // implementation here. struct MapperComponentsTy { std::vector Components; + int32_t size() { return Components.size(); } + MapComponentInfoTy *get(int32_t i) { + assert(i < size() && "Try to access a component that does not exist"); + return &Components[i]; + } }; //////////////////////////////////////////////////////////////////////////////// Index: libomptarget/src/rtl.cpp =================================================================== --- libomptarget/src/rtl.cpp +++ libomptarget/src/rtl.cpp @@ -352,8 +352,8 @@ Device.PendingGlobalsMtx.lock(); if (Device.PendingCtorsDtors[desc].PendingCtors.empty()) { for (auto &dtor : Device.PendingCtorsDtors[desc].PendingDtors) { - int rc = target(Device.DeviceID, dtor, 0, NULL, NULL, NULL, NULL, 1, - 1, true /*team*/); + int rc = target(Device.DeviceID, dtor, 0, NULL, NULL, NULL, NULL, + NULL, 1, 1, true /*team*/); if (rc != OFFLOAD_SUCCESS) { DP("Running destructor " DPxMOD " failed.\n", DPxPTR(dtor)); } Index: libomptarget/test/mapping/declare_mapper_target.cpp =================================================================== --- /dev/null +++ libomptarget/test/mapping/declare_mapper_target.cpp @@ -0,0 +1,36 @@ +// RUN: %libomptarget-compile-run-and-check-aarch64-unknown-linux-gnu +// RUN: %libomptarget-compile-run-and-check-nvptx64-nvidia-cuda +// RUN: %libomptarget-compile-run-and-check-powerpc64-ibm-linux-gnu +// RUN: %libomptarget-compile-run-and-check-powerpc64le-ibm-linux-gnu +// RUN: %libomptarget-compile-run-and-check-x86_64-pc-linux-gnu + +#include +#include + +#define NUM 1024 + +class C { +public: + int *a; +}; + +#pragma omp declare mapper(id: C s) map(s.a[0:NUM]) + +int main() { + C c; + c.a = (int*) malloc(sizeof(int)*NUM); + for (int i = 0; i < NUM; i++) { + c.a[i] = 1; + } + #pragma omp target teams distribute parallel for map(mapper(id),tofrom: c) + for (int i = 0; i < NUM; i++) { + ++c.a[i]; + } + int sum = 0; + for (int i = 0; i < NUM; i++) { + sum += c.a[i]; + } + // CHECK: Sum = 2048 + printf("Sum = %d\n", sum); + return 0; +} Index: libomptarget/test/mapping/declare_mapper_target_data.cpp =================================================================== --- /dev/null +++ libomptarget/test/mapping/declare_mapper_target_data.cpp @@ -0,0 +1,39 @@ +// RUN: %libomptarget-compile-run-and-check-aarch64-unknown-linux-gnu +// RUN: %libomptarget-compile-run-and-check-nvptx64-nvidia-cuda +// RUN: %libomptarget-compile-run-and-check-powerpc64-ibm-linux-gnu +// RUN: %libomptarget-compile-run-and-check-powerpc64le-ibm-linux-gnu +// RUN: %libomptarget-compile-run-and-check-x86_64-pc-linux-gnu + +#include +#include + +#define NUM 1024 + +class C { +public: + int *a; +}; + +#pragma omp declare mapper(id: C s) map(s.a[0:NUM]) + +int main() { + C c; + c.a = (int*) malloc(sizeof(int)*NUM); + for (int i = 0; i < NUM; i++) { + c.a[i] = 1; + } + #pragma omp target data map(mapper(id),tofrom: c) + { + #pragma omp target teams distribute parallel for + for (int i = 0; i < NUM; i++) { + ++c.a[i]; + } + } + int sum = 0; + for (int i = 0; i < NUM; i++) { + sum += c.a[i]; + } + // CHECK: Sum = 2048 + printf("Sum = %d\n", sum); + return 0; +} Index: libomptarget/test/mapping/declare_mapper_target_data_enter_exit.cpp =================================================================== --- /dev/null +++ libomptarget/test/mapping/declare_mapper_target_data_enter_exit.cpp @@ -0,0 +1,38 @@ +// RUN: %libomptarget-compile-run-and-check-aarch64-unknown-linux-gnu +// RUN: %libomptarget-compile-run-and-check-nvptx64-nvidia-cuda +// RUN: %libomptarget-compile-run-and-check-powerpc64-ibm-linux-gnu +// RUN: %libomptarget-compile-run-and-check-powerpc64le-ibm-linux-gnu +// RUN: %libomptarget-compile-run-and-check-x86_64-pc-linux-gnu + +#include +#include + +#define NUM 1024 + +class C { +public: + int *a; +}; + +#pragma omp declare mapper(id: C s) map(s.a[0:NUM]) + +int main() { + C c; + c.a = (int*) malloc(sizeof(int)*NUM); + for (int i = 0; i < NUM; i++) { + c.a[i] = 1; + } + #pragma omp target enter data map(mapper(id),to: c) + #pragma omp target teams distribute parallel for + for (int i = 0; i < NUM; i++) { + ++c.a[i]; + } + #pragma omp target exit data map(mapper(id),from: c) + int sum = 0; + for (int i = 0; i < NUM; i++) { + sum += c.a[i]; + } + // CHECK: Sum = 2048 + printf("Sum = %d\n", sum); + return 0; +} Index: libomptarget/test/mapping/declare_mapper_target_update.cpp =================================================================== --- /dev/null +++ libomptarget/test/mapping/declare_mapper_target_update.cpp @@ -0,0 +1,60 @@ +// RUN: %libomptarget-compile-run-and-check-aarch64-unknown-linux-gnu +// RUN: %libomptarget-compile-run-and-check-nvptx64-nvidia-cuda +// RUN: %libomptarget-compile-run-and-check-powerpc64-ibm-linux-gnu +// RUN: %libomptarget-compile-run-and-check-powerpc64le-ibm-linux-gnu +// RUN: %libomptarget-compile-run-and-check-x86_64-pc-linux-gnu + +#include +#include + +#define NUM 1024 + +class C { +public: + int *a; +}; + +#pragma omp declare mapper(id: C s) map(s.a[0:NUM]) + +int main() { + C c; + int sum = 0; + c.a = (int*) malloc(sizeof(int)*NUM); + for (int i = 0; i < NUM; i++) { + c.a[i] = 1; + } + #pragma omp target enter data map(mapper(id),alloc: c) + #pragma omp target teams distribute parallel for + for (int i = 0; i < NUM; i++) { + c.a[i] = 0; + } + #pragma omp target update from(mapper(id): c) + for (int i = 0; i < NUM; i++) { + sum += c.a[i]; + } + // CHECK: Sum (after first update from) = 0 + printf("Sum (after first update from) = %d\n", sum); + for (int i = 0; i < NUM; i++) { + c.a[i] = 1; + } + #pragma omp target update to(mapper(id): c) + #pragma omp target teams distribute parallel for + for (int i = 0; i < NUM; i++) { + ++c.a[i]; + } + sum = 0; + for (int i = 0; i < NUM; i++) { + sum += c.a[i]; + } + // CHECK: Sum (after update to) = 1024 + printf("Sum (after update to) = %d\n", sum); + #pragma omp target update from(mapper(id): c) + sum = 0; + for (int i = 0; i < NUM; i++) { + sum += c.a[i]; + } + // CHECK: Sum (after second update from) = 2048 + printf("Sum (after second update from) = %d\n", sum); + #pragma omp target exit data map(mapper(id),delete: c) + return 0; +}