Index: libomptarget/include/omptarget.h
===================================================================
--- libomptarget/include/omptarget.h
+++ libomptarget/include/omptarget.h
@@ -151,6 +151,14 @@
                                     int32_t depNum, void *depList,
                                     int32_t noAliasDepNum,
                                     void *noAliasDepList);
+void __tgt_target_data_begin_mapper(int64_t device_id, int32_t arg_num,
+                                    void **args_base, void **args,
+                                    int64_t *arg_sizes, int64_t *arg_types,
+                                    void **arg_mappers);
+void __tgt_target_data_begin_nowait_mapper(
+    int64_t device_id, int32_t arg_num, void **args_base, void **args,
+    int64_t *arg_sizes, int64_t *arg_types, void **arg_mappers, int32_t depNum,
+    void *depList, int32_t noAliasDepNum, void *noAliasDepList);
 
 // passes data from the target, release target memory and destroys the
 // host-target mapping (top entry from the stack of data maps) created by
@@ -162,6 +170,16 @@
                                   int64_t *arg_sizes, int64_t *arg_types,
                                   int32_t depNum, void *depList,
                                   int32_t noAliasDepNum, void *noAliasDepList);
+void __tgt_target_data_end_mapper(int64_t device_id, int32_t arg_num,
+                                  void **args_base, void **args,
+                                  int64_t *arg_sizes, int64_t *arg_types,
+                                  void **arg_mappers);
+void __tgt_target_data_end_nowait_mapper(int64_t device_id, int32_t arg_num,
+                                         void **args_base, void **args,
+                                         int64_t *arg_sizes, int64_t *arg_types,
+                                         void **arg_mappers, int32_t depNum,
+                                         void *depList, int32_t noAliasDepNum,
+                                         void *noAliasDepList);
 
 /// passes data to/from the target
 void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
@@ -173,6 +191,14 @@
                                      int32_t depNum, void *depList,
                                      int32_t noAliasDepNum,
                                      void *noAliasDepList);
+void __tgt_target_data_update_mapper(int64_t device_id, int32_t arg_num,
+                                     void **args_base, void **args,
+                                     int64_t *arg_sizes, int64_t *arg_types,
+                                     void **arg_mappers);
+void __tgt_target_data_update_nowait_mapper(
+    int64_t device_id, int32_t arg_num, void **args_base, void **args,
+    int64_t *arg_sizes, int64_t *arg_types, void **arg_mappers, int32_t depNum,
+    void *depList, int32_t noAliasDepNum, void *noAliasDepList);
 
 // Performs the same actions as data_begin in case arg_num is non-zero
 // and initiates run of offloaded region on target platform; if arg_num
@@ -187,6 +213,15 @@
                         void **args_base, void **args, int64_t *arg_sizes,
                         int64_t *arg_types, int32_t depNum, void *depList,
                         int32_t noAliasDepNum, void *noAliasDepList);
+int __tgt_target_mapper(int64_t device_id, void *host_ptr, int32_t arg_num,
+                        void **args_base, void **args, int64_t *arg_sizes,
+                        int64_t *arg_types, void **arg_mappers);
+int __tgt_target_nowait_mapper(int64_t device_id, void *host_ptr,
+                               int32_t arg_num, void **args_base, void **args,
+                               int64_t *arg_sizes, int64_t *arg_types,
+                               void **arg_mappers, int32_t depNum,
+                               void *depList, int32_t noAliasDepNum,
+                               void *noAliasDepList);
 
 int __tgt_target_teams(int64_t device_id, void *host_ptr, int32_t arg_num,
                        void **args_base, void **args, int64_t *arg_sizes,
@@ -198,6 +233,17 @@
                               int32_t num_teams, int32_t thread_limit,
                               int32_t depNum, void *depList,
                               int32_t noAliasDepNum, void *noAliasDepList);
+int __tgt_target_teams_mapper(int64_t device_id, void *host_ptr,
+                              int32_t arg_num, void **args_base, void **args,
+                              int64_t *arg_sizes, int64_t *arg_types,
+                              void **arg_mappers, int32_t num_teams,
+                              int32_t thread_limit);
+int __tgt_target_teams_nowait_mapper(
+    int64_t device_id, void *host_ptr, int32_t arg_num, void **args_base,
+    void **args, int64_t *arg_sizes, int64_t *arg_types, void **arg_mappers,
+    int32_t num_teams, int32_t thread_limit, int32_t depNum, void *depList,
+    int32_t noAliasDepNum, void *noAliasDepList);
+
 void __kmpc_push_target_tripcount(int64_t device_id, uint64_t loop_tripcount);
 
 #ifdef __cplusplus
Index: libomptarget/src/exports
===================================================================
--- libomptarget/src/exports
+++ libomptarget/src/exports
@@ -13,6 +13,16 @@
     __tgt_target_data_update_nowait;
     __tgt_target_nowait;
     __tgt_target_teams_nowait;
+    __tgt_target_data_begin_mapper;
+    __tgt_target_data_end_mapper;
+    __tgt_target_data_update_mapper;
+    __tgt_target_mapper;
+    __tgt_target_teams_mapper;
+    __tgt_target_data_begin_nowait_mapper;
+    __tgt_target_data_end_nowait_mapper;
+    __tgt_target_data_update_nowait_mapper;
+    __tgt_target_nowait_mapper;
+    __tgt_target_teams_nowait_mapper;
     __tgt_mapper_num_components;
     __tgt_push_mapper_component;
     omp_get_num_devices;
Index: libomptarget/src/interface.cpp
===================================================================
--- libomptarget/src/interface.cpp
+++ libomptarget/src/interface.cpp
@@ -91,6 +91,26 @@
 /// and passes the data to the device.
 EXTERN void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
     void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
+  __tgt_target_data_begin_mapper(device_id, arg_num, args_base, args, arg_sizes,
+                                 arg_types, nullptr);
+}
+
+EXTERN void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num,
+    void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
+    int32_t depNum, void *depList, int32_t noAliasDepNum,
+    void *noAliasDepList) {
+  if (depNum + noAliasDepNum > 0)
+    __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
+
+  __tgt_target_data_begin_mapper(device_id, arg_num, args_base, args, arg_sizes,
+                                 arg_types, nullptr);
+}
+
+EXTERN void __tgt_target_data_begin_mapper(int64_t device_id, int32_t arg_num,
+                                           void **args_base, void **args,
+                                           int64_t *arg_sizes,
+                                           int64_t *arg_types,
+                                           void **arg_mappers) {
   if (IsOffloadDisabled()) return;
 
   DP("Entering data begin region for device %" PRId64 " with %d mappings\n",
@@ -118,20 +138,20 @@
   }
 #endif
 
-  int rc = target_data_begin(Device, arg_num, args_base,
-      args, arg_sizes, arg_types);
+  int rc = target_data_begin(Device, arg_num, args_base, args, arg_sizes,
+                             arg_types, arg_mappers);
   HandleTargetOutcome(rc == OFFLOAD_SUCCESS);
 }
 
-EXTERN void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num,
-    void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
-    int32_t depNum, void *depList, int32_t noAliasDepNum,
-    void *noAliasDepList) {
+EXTERN void __tgt_target_data_begin_nowait_mapper(
+    int64_t device_id, int32_t arg_num, void **args_base, void **args,
+    int64_t *arg_sizes, int64_t *arg_types, void **arg_mappers, int32_t depNum,
+    void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
   if (depNum + noAliasDepNum > 0)
     __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
 
-  __tgt_target_data_begin(device_id, arg_num, args_base, args, arg_sizes,
-                          arg_types);
+  __tgt_target_data_begin_mapper(device_id, arg_num, args_base, args, arg_sizes,
+                                 arg_types, arg_mappers);
 }
 
 /// passes data from the target, releases target memory and destroys
@@ -139,6 +159,25 @@
 /// created by the last __tgt_target_data_begin.
 EXTERN void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
     void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
+  __tgt_target_data_end_mapper(device_id, arg_num, args_base, args, arg_sizes,
+                               arg_types, nullptr);
+}
+
+EXTERN void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num,
+    void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
+    int32_t depNum, void *depList, int32_t noAliasDepNum,
+    void *noAliasDepList) {
+  if (depNum + noAliasDepNum > 0)
+    __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
+
+  __tgt_target_data_end_mapper(device_id, arg_num, args_base, args, arg_sizes,
+                               arg_types, nullptr);
+}
+
+EXTERN void __tgt_target_data_end_mapper(int64_t device_id, int32_t arg_num,
+                                         void **args_base, void **args,
+                                         int64_t *arg_sizes, int64_t *arg_types,
+                                         void **arg_mappers) {
   if (IsOffloadDisabled()) return;
   DP("Entering data end region with %d mappings\n", arg_num);
 
@@ -171,24 +210,44 @@
   }
 #endif
 
-  int rc = target_data_end(Device, arg_num, args_base,
-      args, arg_sizes, arg_types);
+  int rc = target_data_end(Device, arg_num, args_base, args, arg_sizes,
+                           arg_types, arg_mappers);
   HandleTargetOutcome(rc == OFFLOAD_SUCCESS);
 }
 
-EXTERN void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num,
-    void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
-    int32_t depNum, void *depList, int32_t noAliasDepNum,
-    void *noAliasDepList) {
+EXTERN void __tgt_target_data_end_nowait_mapper(
+    int64_t device_id, int32_t arg_num, void **args_base, void **args,
+    int64_t *arg_sizes, int64_t *arg_types, void **arg_mappers, int32_t depNum,
+    void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
   if (depNum + noAliasDepNum > 0)
     __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
 
-  __tgt_target_data_end(device_id, arg_num, args_base, args, arg_sizes,
-                        arg_types);
+  __tgt_target_data_end_mapper(device_id, arg_num, args_base, args, arg_sizes,
+                               arg_types, arg_mappers);
 }
 
 EXTERN void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
     void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
+  __tgt_target_data_update_mapper(device_id, arg_num, args_base, args,
+                                  arg_sizes, arg_types, nullptr);
+}
+
+EXTERN void __tgt_target_data_update_nowait(
+    int64_t device_id, int32_t arg_num, void **args_base, void **args,
+    int64_t *arg_sizes, int64_t *arg_types, int32_t depNum, void *depList,
+    int32_t noAliasDepNum, void *noAliasDepList) {
+  if (depNum + noAliasDepNum > 0)
+    __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
+
+  __tgt_target_data_update_mapper(device_id, arg_num, args_base, args,
+                                  arg_sizes, arg_types, nullptr);
+}
+
+EXTERN void __tgt_target_data_update_mapper(int64_t device_id, int32_t arg_num,
+                                            void **args_base, void **args,
+                                            int64_t *arg_sizes,
+                                            int64_t *arg_types,
+                                            void **arg_mappers) {
   if (IsOffloadDisabled()) return;
   DP("Entering data update with %d mappings\n", arg_num);
 
@@ -204,24 +263,43 @@
   }
 
   DeviceTy& Device = Devices[device_id];
-  int rc = target_data_update(Device, arg_num, args_base,
-      args, arg_sizes, arg_types);
+  int rc = target_data_update(Device, arg_num, args_base, args, arg_sizes,
+                              arg_types, arg_mappers);
   HandleTargetOutcome(rc == OFFLOAD_SUCCESS);
 }
 
-EXTERN void __tgt_target_data_update_nowait(
+EXTERN void __tgt_target_data_update_nowait_mapper(
     int64_t device_id, int32_t arg_num, void **args_base, void **args,
-    int64_t *arg_sizes, int64_t *arg_types, int32_t depNum, void *depList,
-    int32_t noAliasDepNum, void *noAliasDepList) {
+    int64_t *arg_sizes, int64_t *arg_types, void **arg_mappers, int32_t depNum,
+    void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
   if (depNum + noAliasDepNum > 0)
     __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
 
-  __tgt_target_data_update(device_id, arg_num, args_base, args, arg_sizes,
-                           arg_types);
+  __tgt_target_data_update_mapper(device_id, arg_num, args_base, args,
+                                  arg_sizes, arg_types, arg_mappers);
 }
 
 EXTERN int __tgt_target(int64_t device_id, void *host_ptr, int32_t arg_num,
     void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
+  return __tgt_target_mapper(device_id, host_ptr, arg_num, args_base, args,
+                             arg_sizes, arg_types, nullptr);
+}
+
+EXTERN int __tgt_target_nowait(int64_t device_id, void *host_ptr,
+    int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
+    int64_t *arg_types, int32_t depNum, void *depList, int32_t noAliasDepNum,
+    void *noAliasDepList) {
+  if (depNum + noAliasDepNum > 0)
+    __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
+
+  return __tgt_target_mapper(device_id, host_ptr, arg_num, args_base, args,
+                             arg_sizes, arg_types, nullptr);
+}
+
+EXTERN int __tgt_target_mapper(int64_t device_id, void *host_ptr,
+                               int32_t arg_num, void **args_base, void **args,
+                               int64_t *arg_sizes, int64_t *arg_types,
+                               void **arg_mappers) {
   if (IsOffloadDisabled()) return OFFLOAD_FAIL;
   DP("Entering target region with entry point " DPxMOD " and device Id %"
       PRId64 "\n", DPxPTR(host_ptr), device_id);
@@ -245,25 +323,48 @@
 #endif
 
   int rc = target(device_id, host_ptr, arg_num, args_base, args, arg_sizes,
-      arg_types, 0, 0, false /*team*/);
+                  arg_types, arg_mappers, 0, 0, false /*team*/);
   HandleTargetOutcome(rc == OFFLOAD_SUCCESS);
   return rc;
 }
 
-EXTERN int __tgt_target_nowait(int64_t device_id, void *host_ptr,
-    int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
-    int64_t *arg_types, int32_t depNum, void *depList, int32_t noAliasDepNum,
-    void *noAliasDepList) {
+EXTERN int __tgt_target_nowait_mapper(int64_t device_id, void *host_ptr,
+                                      int32_t arg_num, void **args_base,
+                                      void **args, int64_t *arg_sizes,
+                                      int64_t *arg_types, void **arg_mappers,
+                                      int32_t depNum, void *depList,
+                                      int32_t noAliasDepNum,
+                                      void *noAliasDepList) {
   if (depNum + noAliasDepNum > 0)
     __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
 
-  return __tgt_target(device_id, host_ptr, arg_num, args_base, args, arg_sizes,
-                      arg_types);
+  return __tgt_target_mapper(device_id, host_ptr, arg_num, args_base, args,
+                             arg_sizes, arg_types, arg_mappers);
 }
 
 EXTERN int __tgt_target_teams(int64_t device_id, void *host_ptr,
     int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
     int64_t *arg_types, int32_t team_num, int32_t thread_limit) {
+  return __tgt_target_teams_mapper(device_id, host_ptr, arg_num, args_base,
+                                   args, arg_sizes, arg_types, nullptr,
+                                   team_num, thread_limit);
+}
+
+EXTERN int __tgt_target_teams_nowait(int64_t device_id, void *host_ptr,
+    int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
+    int64_t *arg_types, int32_t team_num, int32_t thread_limit, int32_t depNum,
+    void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
+  if (depNum + noAliasDepNum > 0)
+    __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
+
+  return __tgt_target_teams_mapper(device_id, host_ptr, arg_num, args_base,
+                                   args, arg_sizes, arg_types, nullptr,
+                                   team_num, thread_limit);
+}
+
+EXTERN int __tgt_target_teams_mapper(int64_t device_id, void *host_ptr,
+    int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
+    int64_t *arg_types, void **arg_mappers, int32_t team_num, int32_t thread_limit) {
   if (IsOffloadDisabled()) return OFFLOAD_FAIL;
   DP("Entering target region with entry point " DPxMOD " and device Id %"
       PRId64 "\n", DPxPTR(host_ptr), device_id);
@@ -286,22 +387,25 @@
   }
 #endif
 
-  int rc = target(device_id, host_ptr, arg_num, args_base, args, arg_sizes,
-      arg_types, team_num, thread_limit, true /*team*/);
+  int rc =
+      target(device_id, host_ptr, arg_num, args_base, args, arg_sizes,
+             arg_types, arg_mappers, team_num, thread_limit, true /*team*/);
   HandleTargetOutcome(rc == OFFLOAD_SUCCESS);
 
   return rc;
 }
 
-EXTERN int __tgt_target_teams_nowait(int64_t device_id, void *host_ptr,
-    int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
-    int64_t *arg_types, int32_t team_num, int32_t thread_limit, int32_t depNum,
-    void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
+EXTERN int __tgt_target_teams_nowait_mapper(
+    int64_t device_id, void *host_ptr, int32_t arg_num, void **args_base,
+    void **args, int64_t *arg_sizes, int64_t *arg_types, void **arg_mappers,
+    int32_t team_num, int32_t thread_limit, int32_t depNum, void *depList,
+    int32_t noAliasDepNum, void *noAliasDepList) {
   if (depNum + noAliasDepNum > 0)
     __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
 
-  return __tgt_target_teams(device_id, host_ptr, arg_num, args_base, args,
-                            arg_sizes, arg_types, team_num, thread_limit);
+  return __tgt_target_teams_mapper(device_id, host_ptr, arg_num, args_base,
+                                   args, arg_sizes, arg_types, arg_mappers,
+                                   team_num, thread_limit);
 }
 
 // Get the current number of components for a user-defined mapper.
Index: libomptarget/src/omptarget.cpp
===================================================================
--- libomptarget/src/omptarget.cpp
+++ libomptarget/src/omptarget.cpp
@@ -161,8 +161,8 @@
         DP("Has pending ctors... call now\n");
         for (auto &entry : lib.second.PendingCtors) {
           void *ctor = entry;
-          int rc = target(device_id, ctor, 0, NULL, NULL, NULL,
-                          NULL, 1, 1, true /*team*/);
+          int rc = target(device_id, ctor, 0, NULL, NULL, NULL, NULL, NULL, 1,
+                          1, true /*team*/);
           if (rc != OFFLOAD_SUCCESS) {
             DP("Running ctor " DPxMOD " failed.\n", DPxPTR(ctor));
             Device.PendingGlobalsMtx.unlock();
@@ -209,9 +209,132 @@
   return ((type & OMP_TGT_MAPTYPE_MEMBER_OF) >> 48) - 1;
 }
 
+/// Map one component when entering a target data region.
+int target_data_begin_component(DeviceTy &Device, void *HstPtrBegin,
+                                int64_t Size, int64_t Type, bool IsParentOfNext,
+                                void *ParentBegin, void **HstPtrBasePtr) {
+  // Adjust for proper alignment if this is a combined entry (for structs).
+  // Look at the next argument - if that is MEMBER_OF this one, then this one
+  // is a combined entry.
+  int64_t padding = 0;
+  if (member_of(Type) < 0 && IsParentOfNext) {
+    padding = (int64_t)HstPtrBegin % alignment;
+    if (padding) {
+      DP("Using a padding of %" PRId64 " bytes for begin address " DPxMOD "\n",
+         padding, DPxPTR(HstPtrBegin));
+      HstPtrBegin = (char *)HstPtrBegin - padding;
+      Size += padding;
+    }
+  }
+
+  // Address of pointer on the host and device, respectively.
+  void *Pointer_HstPtrBegin, *Pointer_TgtPtrBegin;
+  bool IsNew, Pointer_IsNew;
+  bool IsHostPtr = false;
+  bool IsImplicit = Type & OMP_TGT_MAPTYPE_IMPLICIT;
+  // Force the creation of a device side copy of the data when:
+  // a close map modifier was associated with a map that contained a to.
+  bool HasCloseModifier = Type & OMP_TGT_MAPTYPE_CLOSE;
+  // UpdateRef is based on MEMBER_OF instead of TARGET_PARAM because if we
+  // have reached this point via __tgt_target_data_begin and not __tgt_target
+  // then no argument is marked as TARGET_PARAM ("omp target data map" is not
+  // associated with a target region, so there are no target parameters). This
+  // may be considered a hack, we could revise the scheme in the future.
+  bool UpdateRef = !(Type & OMP_TGT_MAPTYPE_MEMBER_OF);
+  void *HstPtrBase = *HstPtrBasePtr;
+  if (Type & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
+    DP("Has a pointer entry: \n");
+    // base is address of pointer.
+    Pointer_TgtPtrBegin = Device.getOrAllocTgtPtr(
+        HstPtrBase, HstPtrBase, sizeof(void *), Pointer_IsNew, IsHostPtr,
+        IsImplicit, UpdateRef, HasCloseModifier);
+    if (!Pointer_TgtPtrBegin) {
+      DP("Call to getOrAllocTgtPtr returned null pointer (device failure or "
+         "illegal mapping).\n");
+      return OFFLOAD_FAIL;
+    }
+    DP("There are %zu bytes allocated at target address " DPxMOD " - is%s new"
+       "\n",
+       sizeof(void *), DPxPTR(Pointer_TgtPtrBegin),
+       (Pointer_IsNew ? "" : " not"));
+    Pointer_HstPtrBegin = HstPtrBase;
+    // modify current entry.
+    HstPtrBase = *(void **)HstPtrBase;
+    UpdateRef = true; // subsequently update ref count of pointee
+  }
+
+  void *TgtPtrBegin =
+      Device.getOrAllocTgtPtr(HstPtrBegin, HstPtrBase, Size, IsNew, IsHostPtr,
+                              IsImplicit, UpdateRef, HasCloseModifier);
+  if (!TgtPtrBegin && Size) {
+    // If Size==0, then the argument could be a zero-length pointer to
+    // NULL, so getOrAlloc() returning NULL is not an error.
+    DP("Call to getOrAllocTgtPtr returned null pointer (device failure or "
+       "illegal mapping).\n");
+  }
+  DP("There are %" PRId64 " bytes allocated at target address " DPxMOD
+     " - is%s new\n",
+     Size, DPxPTR(TgtPtrBegin), (IsNew ? "" : " not"));
+
+  if (Type & OMP_TGT_MAPTYPE_RETURN_PARAM) {
+    uintptr_t Delta = (uintptr_t)HstPtrBegin - (uintptr_t)HstPtrBase;
+    void *TgtPtrBase = (void *)((uintptr_t)TgtPtrBegin - Delta);
+    DP("Returning device pointer " DPxMOD "\n", DPxPTR(TgtPtrBase));
+    HstPtrBase = TgtPtrBase;
+  }
+
+  if (Type & OMP_TGT_MAPTYPE_TO) {
+    bool copy = false;
+    if (!(RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) ||
+        HasCloseModifier) {
+      if (IsNew || (Type & OMP_TGT_MAPTYPE_ALWAYS)) {
+        copy = true;
+      } else if (Type & OMP_TGT_MAPTYPE_MEMBER_OF) {
+        // Copy data only if the "parent" struct has RefCount==1.
+        long parent_rc = Device.getMapEntryRefCnt(ParentBegin);
+        assert(parent_rc > 0 && "parent struct not found");
+        if (parent_rc == 1) {
+          copy = true;
+        }
+      }
+    }
+
+    if (copy && !IsHostPtr) {
+      DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n",
+         Size, DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin));
+      int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, Size);
+      if (rt != OFFLOAD_SUCCESS) {
+        DP("Copying data to device failed.\n");
+        return OFFLOAD_FAIL;
+      }
+    }
+  }
+
+  if (Type & OMP_TGT_MAPTYPE_PTR_AND_OBJ && !IsHostPtr) {
+    DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n",
+       DPxPTR(Pointer_TgtPtrBegin), DPxPTR(TgtPtrBegin));
+    uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase;
+    void *TgtPtrBase = (void *)((uint64_t)TgtPtrBegin - Delta);
+    int rt =
+        Device.data_submit(Pointer_TgtPtrBegin, &TgtPtrBase, sizeof(void *));
+    if (rt != OFFLOAD_SUCCESS) {
+      DP("Copying data to device failed.\n");
+      return OFFLOAD_FAIL;
+    }
+    // create shadow pointers for this entry
+    Device.ShadowMtx.lock();
+    Device.ShadowPtrMap[Pointer_HstPtrBegin] = {HstPtrBase, Pointer_TgtPtrBegin,
+                                                TgtPtrBase};
+    Device.ShadowMtx.unlock();
+  }
+
+  return OFFLOAD_SUCCESS;
+}
+
 /// Internal function to do the mapping and transfer the data to the device
-int target_data_begin(DeviceTy &Device, int32_t arg_num,
-    void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
+int target_data_begin(DeviceTy &Device, int32_t arg_num, void **args_base,
+                      void **args, int64_t *arg_sizes, int64_t *arg_types,
+                      void **arg_mappers) {
   // process each input.
   for (int32_t i = 0; i < arg_num; ++i) {
     // Ignore private variables and arrays - there is no mapping for them.
@@ -219,123 +342,174 @@
         (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE))
       continue;
 
-    void *HstPtrBegin = args[i];
-    void *HstPtrBase = args_base[i];
-    int64_t data_size = arg_sizes[i];
-
-    // Adjust for proper alignment if this is a combined entry (for structs).
-    // Look at the next argument - if that is MEMBER_OF this one, then this one
-    // is a combined entry.
-    int64_t padding = 0;
-    const int next_i = i+1;
-    if (member_of(arg_types[i]) < 0 && next_i < arg_num &&
-        member_of(arg_types[next_i]) == i) {
-      padding = (int64_t)HstPtrBegin % alignment;
-      if (padding) {
-        DP("Using a padding of %" PRId64 " bytes for begin address " DPxMOD
-            "\n", padding, DPxPTR(HstPtrBegin));
-        HstPtrBegin = (char *) HstPtrBegin - padding;
-        data_size += padding;
-      }
-    }
-
-    // Address of pointer on the host and device, respectively.
-    void *Pointer_HstPtrBegin, *Pointer_TgtPtrBegin;
-    bool IsNew, Pointer_IsNew;
-    bool IsHostPtr = false;
-    bool IsImplicit = arg_types[i] & OMP_TGT_MAPTYPE_IMPLICIT;
-    // Force the creation of a device side copy of the data when:
-    // a close map modifier was associated with a map that contained a to.
-    bool HasCloseModifier = arg_types[i] & OMP_TGT_MAPTYPE_CLOSE;
-    // UpdateRef is based on MEMBER_OF instead of TARGET_PARAM because if we
-    // have reached this point via __tgt_target_data_begin and not __tgt_target
-    // then no argument is marked as TARGET_PARAM ("omp target data map" is not
-    // associated with a target region, so there are no target parameters). This
-    // may be considered a hack, we could revise the scheme in the future.
-    bool UpdateRef = !(arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF);
-    if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
-      DP("Has a pointer entry: \n");
-      // base is address of pointer.
-      Pointer_TgtPtrBegin = Device.getOrAllocTgtPtr(HstPtrBase, HstPtrBase,
-          sizeof(void *), Pointer_IsNew, IsHostPtr, IsImplicit, UpdateRef,
-          HasCloseModifier);
-      if (!Pointer_TgtPtrBegin) {
-        DP("Call to getOrAllocTgtPtr returned null pointer (device failure or "
-            "illegal mapping).\n");
+    // If a valid user-defined mapper is attached, use the associated mapper
+    // function to complete data mapping.
+    if (arg_mappers && arg_mappers[i]) {
+      DP("Call the mapper function " DPxMOD " for the %dth argument\n",
+         DPxPTR(arg_mappers[i]), i);
+      // The mapper function fills up Components.
+      MapperComponentsTy Components;
+      MapperFuncPtrTy MapperFuncPtr = (MapperFuncPtrTy)(arg_mappers[i]);
+      (*MapperFuncPtr)((void *)&Components, args_base[i], args[i], arg_sizes[i],
+                       arg_types[i]);
+      if (Components.size() >= 0xffff) {
+        DP("The number of components exceed the limitation\n");
         return OFFLOAD_FAIL;
       }
-      DP("There are %zu bytes allocated at target address " DPxMOD " - is%s new"
-          "\n", sizeof(void *), DPxPTR(Pointer_TgtPtrBegin),
-          (Pointer_IsNew ? "" : " not"));
-      Pointer_HstPtrBegin = HstPtrBase;
-      // modify current entry.
-      HstPtrBase = *(void **)HstPtrBase;
-      UpdateRef = true; // subsequently update ref count of pointee
+      // Map each component filled up by the mapper function.
+      for (int32_t j = 0; j < Components.size(); ++j) {
+        const int next_j = j + 1;
+        bool IsParentOfNext = next_j < Components.size() &&
+                              member_of(Components.get(next_j)->Type) == j;
+        void *ParentBegin;
+        int64_t Type = Components.get(j)->Type;
+        if (Type & OMP_TGT_MAPTYPE_MEMBER_OF) {
+          int32_t parent_idx = member_of(Type);
+          ParentBegin = Components.get(parent_idx)->Begin;
+        }
+        int rt = target_data_begin_component(
+            Device, Components.get(j)->Begin, Components.get(j)->Size, Type,
+            IsParentOfNext, ParentBegin, &args_base[i]);
+        if (rt != OFFLOAD_SUCCESS) {
+          DP("Failed to map the components specified by a user-defined "
+             "mapper\n");
+          return OFFLOAD_FAIL;
+        }
+      }
+    } else {
+      const int next_i = i + 1;
+      bool IsParentOfNext =
+          next_i < arg_num && member_of(arg_types[next_i]) == i;
+      void *ParentBegin;
+      if (arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) {
+        int32_t parent_idx = member_of(arg_types[i]);
+        ParentBegin = args[parent_idx];
+      }
+      int rt = target_data_begin_component(Device, args[i], arg_sizes[i],
+                                           arg_types[i], IsParentOfNext,
+                                           ParentBegin, &args_base[i]);
+      if (rt != OFFLOAD_SUCCESS)
+        return OFFLOAD_FAIL;
     }
+  }
 
-    void *TgtPtrBegin = Device.getOrAllocTgtPtr(HstPtrBegin, HstPtrBase,
-        data_size, IsNew, IsHostPtr, IsImplicit, UpdateRef, HasCloseModifier);
-    if (!TgtPtrBegin && data_size) {
-      // If data_size==0, then the argument could be a zero-length pointer to
-      // NULL, so getOrAlloc() returning NULL is not an error.
-      DP("Call to getOrAllocTgtPtr returned null pointer (device failure or "
-          "illegal mapping).\n");
-    }
-    DP("There are %" PRId64 " bytes allocated at target address " DPxMOD
-        " - is%s new\n", data_size, DPxPTR(TgtPtrBegin),
-        (IsNew ? "" : " not"));
-
-    if (arg_types[i] & OMP_TGT_MAPTYPE_RETURN_PARAM) {
-      uintptr_t Delta = (uintptr_t)HstPtrBegin - (uintptr_t)HstPtrBase;
-      void *TgtPtrBase = (void *)((uintptr_t)TgtPtrBegin - Delta);
-      DP("Returning device pointer " DPxMOD "\n", DPxPTR(TgtPtrBase));
-      args_base[i] = TgtPtrBase;
+  return OFFLOAD_SUCCESS;
+}
+
+/// Map one component when exiting a target data region.
+int target_data_end_component(DeviceTy &Device, void *HstPtrBegin, int64_t Size,
+                              int64_t Type, bool IsParentOfNext,
+                              void *ParentBegin) {
+  // Adjust for proper alignment if this is a combined entry (for structs).
+  // Look at the next argument - if that is MEMBER_OF this one, then this one
+  // is a combined entry.
+  int64_t padding = 0;
+  if (member_of(Type) < 0 && IsParentOfNext) {
+    padding = (int64_t)HstPtrBegin % alignment;
+    if (padding) {
+      DP("Using a padding of %" PRId64 " bytes for begin address " DPxMOD "\n",
+         padding, DPxPTR(HstPtrBegin));
+      HstPtrBegin = (char *)HstPtrBegin - padding;
+      Size += padding;
     }
+  }
 
-    if (arg_types[i] & OMP_TGT_MAPTYPE_TO) {
-      bool copy = false;
+  bool IsLast, IsHostPtr;
+  bool UpdateRef = !(Type & OMP_TGT_MAPTYPE_MEMBER_OF) ||
+                   (Type & OMP_TGT_MAPTYPE_PTR_AND_OBJ);
+  bool ForceDelete = Type & OMP_TGT_MAPTYPE_DELETE;
+  bool HasCloseModifier = Type & OMP_TGT_MAPTYPE_CLOSE;
+
+  // If PTR_AND_OBJ, HstPtrBegin is address of pointee
+  void *TgtPtrBegin =
+      Device.getTgtPtrBegin(HstPtrBegin, Size, IsLast, UpdateRef, IsHostPtr);
+  DP("There are %" PRId64 " bytes allocated at target address " DPxMOD
+     " - is%s last\n",
+     Size, DPxPTR(TgtPtrBegin), (IsLast ? "" : " not"));
+
+  bool DelEntry = IsLast || ForceDelete;
+
+  if ((Type & OMP_TGT_MAPTYPE_MEMBER_OF) &&
+      !(Type & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) {
+    DelEntry = false; // protect parent struct from being deallocated
+  }
+
+  if ((Type & OMP_TGT_MAPTYPE_FROM) || DelEntry) {
+    // Move data back to the host
+    if (Type & OMP_TGT_MAPTYPE_FROM) {
+      bool Always = Type & OMP_TGT_MAPTYPE_ALWAYS;
+      bool CopyMember = false;
       if (!(RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) ||
           HasCloseModifier) {
-        if (IsNew || (arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS)) {
-          copy = true;
-        } else if (arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) {
+        if ((Type & OMP_TGT_MAPTYPE_MEMBER_OF) &&
+            !(Type & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) {
           // Copy data only if the "parent" struct has RefCount==1.
-          int32_t parent_idx = member_of(arg_types[i]);
-          long parent_rc = Device.getMapEntryRefCnt(args[parent_idx]);
+          long parent_rc = Device.getMapEntryRefCnt(ParentBegin);
           assert(parent_rc > 0 && "parent struct not found");
           if (parent_rc == 1) {
-            copy = true;
+            CopyMember = true;
           }
         }
       }
 
-      if (copy && !IsHostPtr) {
-        DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n",
-            data_size, DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin));
-        int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, data_size);
+      if ((DelEntry || Always || CopyMember) &&
+          !(RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
+            TgtPtrBegin == HstPtrBegin)) {
+        DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",
+           Size, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
+        int rt = Device.data_retrieve(HstPtrBegin, TgtPtrBegin, Size);
         if (rt != OFFLOAD_SUCCESS) {
-          DP("Copying data to device failed.\n");
+          DP("Copying data from device failed.\n");
           return OFFLOAD_FAIL;
         }
       }
     }
 
-    if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ && !IsHostPtr) {
-      DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n",
-          DPxPTR(Pointer_TgtPtrBegin), DPxPTR(TgtPtrBegin));
-      uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase;
-      void *TgtPtrBase = (void *)((uint64_t)TgtPtrBegin - Delta);
-      int rt = Device.data_submit(Pointer_TgtPtrBegin, &TgtPtrBase,
-          sizeof(void *));
+    // If we copied back to the host a struct/array containing pointers, we
+    // need to restore the original host pointer values from their shadow
+    // copies. If the struct is going to be deallocated, remove any remaining
+    // shadow pointer entries for this struct.
+    uintptr_t lb = (uintptr_t)HstPtrBegin;
+    uintptr_t ub = (uintptr_t)HstPtrBegin + Size;
+    Device.ShadowMtx.lock();
+    for (ShadowPtrListTy::iterator it = Device.ShadowPtrMap.begin();
+         it != Device.ShadowPtrMap.end();) {
+      void **ShadowHstPtrAddr = (void **)it->first;
+
+      // An STL map is sorted on its keys; use this property
+      // to quickly determine when to break out of the loop.
+      if ((uintptr_t)ShadowHstPtrAddr < lb) {
+        ++it;
+        continue;
+      }
+      if ((uintptr_t)ShadowHstPtrAddr >= ub)
+        break;
+
+      // If we copied the struct to the host, we need to restore the pointer.
+      if (Type & OMP_TGT_MAPTYPE_FROM) {
+        DP("Restoring original host pointer value " DPxMOD " for host "
+           "pointer " DPxMOD "\n",
+           DPxPTR(it->second.HstPtrVal), DPxPTR(ShadowHstPtrAddr));
+        *ShadowHstPtrAddr = it->second.HstPtrVal;
+      }
+      // If the struct is to be deallocated, remove the shadow entry.
+      if (DelEntry) {
+        DP("Removing shadow pointer " DPxMOD "\n", DPxPTR(ShadowHstPtrAddr));
+        it = Device.ShadowPtrMap.erase(it);
+      } else {
+        ++it;
+      }
+    }
+    Device.ShadowMtx.unlock();
+
+    // Deallocate map
+    if (DelEntry) {
+      int rt = Device.deallocTgtPtr(HstPtrBegin, Size, ForceDelete,
+                                    HasCloseModifier);
       if (rt != OFFLOAD_SUCCESS) {
-        DP("Copying data to device failed.\n");
+        DP("Deallocating data from device failed.\n");
         return OFFLOAD_FAIL;
       }
-      // create shadow pointers for this entry
-      Device.ShadowMtx.lock();
-      Device.ShadowPtrMap[Pointer_HstPtrBegin] = {HstPtrBase,
-          Pointer_TgtPtrBegin, TgtPtrBase};
-      Device.ShadowMtx.unlock();
     }
   }
 
@@ -344,7 +518,8 @@
 
 /// Internal function to undo the mapping and retrieve the data from the device.
 int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
-    void **args, int64_t *arg_sizes, int64_t *arg_types) {
+                    void **args, int64_t *arg_sizes, int64_t *arg_types,
+                    void **arg_mappers) {
   // process each input.
   for (int32_t i = arg_num - 1; i >= 0; --i) {
     // Ignore private variables and arrays - there is no mapping for them.
@@ -353,214 +528,185 @@
         (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE))
       continue;
 
-    void *HstPtrBegin = args[i];
-    int64_t data_size = arg_sizes[i];
-    // Adjust for proper alignment if this is a combined entry (for structs).
-    // Look at the next argument - if that is MEMBER_OF this one, then this one
-    // is a combined entry.
-    int64_t padding = 0;
-    const int next_i = i+1;
-    if (member_of(arg_types[i]) < 0 && next_i < arg_num &&
-        member_of(arg_types[next_i]) == i) {
-      padding = (int64_t)HstPtrBegin % alignment;
-      if (padding) {
-        DP("Using a padding of %" PRId64 " bytes for begin address " DPxMOD
-            "\n", padding, DPxPTR(HstPtrBegin));
-        HstPtrBegin = (char *) HstPtrBegin - padding;
-        data_size += padding;
+    // If a valid user-defined mapper is attached, use the associated mapper
+    // function to complete data mapping.
+    if (arg_mappers && arg_mappers[i]) {
+      DP("Call the mapper function " DPxMOD " for the %dth argument\n",
+         DPxPTR(arg_mappers[i]), i);
+      // The mapper function fills up Components.
+      MapperComponentsTy Components;
+      MapperFuncPtrTy MapperFuncPtr = (MapperFuncPtrTy)(arg_mappers[i]);
+      (*MapperFuncPtr)((void *)&Components, args_base[i], args[i], arg_sizes[i],
+                       arg_types[i]);
+      if (Components.size() >= 0xffff) {
+        DP("The number of components exceed the limitation\n");
+        return OFFLOAD_FAIL;
+      }
+      // Map each component filled up by the mapper function.
+      for (int32_t j = 0; j < Components.size(); ++j) {
+        const int next_j = j + 1;
+        bool IsParentOfNext = next_j < Components.size() &&
+                              member_of(Components.get(next_j)->Type) == j;
+        void *ParentBegin;
+        int64_t Type = Components.get(j)->Type;
+        if (Type & OMP_TGT_MAPTYPE_MEMBER_OF) {
+          int32_t parent_idx = member_of(Type);
+          ParentBegin = Components.get(parent_idx)->Begin;
+        }
+        int rt = target_data_end_component(Device, Components.get(j)->Begin,
+                                           Components.get(j)->Size, Type,
+                                           IsParentOfNext, ParentBegin);
+        if (rt != OFFLOAD_SUCCESS) {
+          DP("Failed to map the components specified by a user-defined "
+             "mapper\n");
+          return OFFLOAD_FAIL;
+        }
+      }
+    } else {
+      const int next_i = i + 1;
+      bool IsParentOfNext =
+          next_i < arg_num && member_of(arg_types[next_i]) == i;
+      void *ParentBegin;
+      if (arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) {
+        int32_t parent_idx = member_of(arg_types[i]);
+        ParentBegin = args[parent_idx];
       }
+      int rt =
+          target_data_end_component(Device, args[i], arg_sizes[i], arg_types[i],
+                                    IsParentOfNext, ParentBegin);
+      if (rt != OFFLOAD_SUCCESS)
+        return OFFLOAD_FAIL;
     }
+  }
 
-    bool IsLast, IsHostPtr;
-    bool UpdateRef = !(arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) ||
-        (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ);
-    bool ForceDelete = arg_types[i] & OMP_TGT_MAPTYPE_DELETE;
-    bool HasCloseModifier = arg_types[i] & OMP_TGT_MAPTYPE_CLOSE;
+  return OFFLOAD_SUCCESS;
+}
 
-    // If PTR_AND_OBJ, HstPtrBegin is address of pointee
-    void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, data_size, IsLast,
-        UpdateRef, IsHostPtr);
-    DP("There are %" PRId64 " bytes allocated at target address " DPxMOD
-        " - is%s last\n", data_size, DPxPTR(TgtPtrBegin),
-        (IsLast ? "" : " not"));
+/// Map one component when updating a target data region.
+int target_data_update_component(DeviceTy &Device, void *HstPtrBegin,
+                                 int64_t Size, int64_t Type) {
+  bool IsLast, IsHostPtr;
+  void *TgtPtrBegin =
+      Device.getTgtPtrBegin(HstPtrBegin, Size, IsLast, false, IsHostPtr);
+  if (!TgtPtrBegin) {
+    DP("hst data:" DPxMOD " not found, becomes a noop\n", DPxPTR(HstPtrBegin));
+    return OFFLOAD_SUCCESS;
+  }
 
-    bool DelEntry = IsLast || ForceDelete;
+  if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
+      TgtPtrBegin == HstPtrBegin) {
+    DP("hst data:" DPxMOD " unified and shared, becomes a noop\n",
+       DPxPTR(HstPtrBegin));
+    return OFFLOAD_SUCCESS;
+  }
 
-    if ((arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) &&
-        !(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) {
-      DelEntry = false; // protect parent struct from being deallocated
+  if (Type & OMP_TGT_MAPTYPE_FROM) {
+    DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", Size,
+       DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
+    int rt = Device.data_retrieve(HstPtrBegin, TgtPtrBegin, Size);
+    if (rt != OFFLOAD_SUCCESS) {
+      DP("Copying data from device failed.\n");
+      return OFFLOAD_FAIL;
     }
 
-    if ((arg_types[i] & OMP_TGT_MAPTYPE_FROM) || DelEntry) {
-      // Move data back to the host
-      if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) {
-        bool Always = arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS;
-        bool CopyMember = false;
-        if (!(RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) ||
-            HasCloseModifier) {
-          if ((arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) &&
-              !(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) {
-            // Copy data only if the "parent" struct has RefCount==1.
-            int32_t parent_idx = member_of(arg_types[i]);
-            long parent_rc = Device.getMapEntryRefCnt(args[parent_idx]);
-            assert(parent_rc > 0 && "parent struct not found");
-            if (parent_rc == 1) {
-              CopyMember = true;
-            }
-          }
-        }
-
-        if ((DelEntry || Always || CopyMember) &&
-            !(RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
-              TgtPtrBegin == HstPtrBegin)) {
-          DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",
-              data_size, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
-          int rt = Device.data_retrieve(HstPtrBegin, TgtPtrBegin, data_size);
-          if (rt != OFFLOAD_SUCCESS) {
-            DP("Copying data from device failed.\n");
-            return OFFLOAD_FAIL;
-          }
-        }
-      }
+    uintptr_t lb = (uintptr_t)HstPtrBegin;
+    uintptr_t ub = (uintptr_t)HstPtrBegin + Size;
+    Device.ShadowMtx.lock();
+    for (ShadowPtrListTy::iterator it = Device.ShadowPtrMap.begin();
+         it != Device.ShadowPtrMap.end(); ++it) {
+      void **ShadowHstPtrAddr = (void **)it->first;
+      if ((uintptr_t)ShadowHstPtrAddr < lb)
+        continue;
+      if ((uintptr_t)ShadowHstPtrAddr >= ub)
+        break;
+      DP("Restoring original host pointer value " DPxMOD
+         " for host pointer " DPxMOD "\n",
+         DPxPTR(it->second.HstPtrVal), DPxPTR(ShadowHstPtrAddr));
+      *ShadowHstPtrAddr = it->second.HstPtrVal;
+    }
+    Device.ShadowMtx.unlock();
+  }
 
-      // If we copied back to the host a struct/array containing pointers, we
-      // need to restore the original host pointer values from their shadow
-      // copies. If the struct is going to be deallocated, remove any remaining
-      // shadow pointer entries for this struct.
-      uintptr_t lb = (uintptr_t) HstPtrBegin;
-      uintptr_t ub = (uintptr_t) HstPtrBegin + data_size;
-      Device.ShadowMtx.lock();
-      for (ShadowPtrListTy::iterator it = Device.ShadowPtrMap.begin();
-           it != Device.ShadowPtrMap.end();) {
-        void **ShadowHstPtrAddr = (void**) it->first;
-
-        // An STL map is sorted on its keys; use this property
-        // to quickly determine when to break out of the loop.
-        if ((uintptr_t) ShadowHstPtrAddr < lb) {
-          ++it;
-          continue;
-        }
-        if ((uintptr_t) ShadowHstPtrAddr >= ub)
-          break;
-
-        // If we copied the struct to the host, we need to restore the pointer.
-        if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) {
-          DP("Restoring original host pointer value " DPxMOD " for host "
-              "pointer " DPxMOD "\n", DPxPTR(it->second.HstPtrVal),
-              DPxPTR(ShadowHstPtrAddr));
-          *ShadowHstPtrAddr = it->second.HstPtrVal;
-        }
-        // If the struct is to be deallocated, remove the shadow entry.
-        if (DelEntry) {
-          DP("Removing shadow pointer " DPxMOD "\n", DPxPTR(ShadowHstPtrAddr));
-          it = Device.ShadowPtrMap.erase(it);
-        } else {
-          ++it;
-        }
-      }
-      Device.ShadowMtx.unlock();
+  if (Type & OMP_TGT_MAPTYPE_TO) {
+    DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n", Size,
+       DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin));
+    int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, Size);
+    if (rt != OFFLOAD_SUCCESS) {
+      DP("Copying data to device failed.\n");
+      return OFFLOAD_FAIL;
+    }
 
-      // Deallocate map
-      if (DelEntry) {
-        int rt = Device.deallocTgtPtr(HstPtrBegin, data_size, ForceDelete,
-                                      HasCloseModifier);
-        if (rt != OFFLOAD_SUCCESS) {
-          DP("Deallocating data from device failed.\n");
-          return OFFLOAD_FAIL;
-        }
+    uintptr_t lb = (uintptr_t)HstPtrBegin;
+    uintptr_t ub = (uintptr_t)HstPtrBegin + Size;
+    Device.ShadowMtx.lock();
+    for (ShadowPtrListTy::iterator it = Device.ShadowPtrMap.begin();
+         it != Device.ShadowPtrMap.end(); ++it) {
+      void **ShadowHstPtrAddr = (void **)it->first;
+      if ((uintptr_t)ShadowHstPtrAddr < lb)
+        continue;
+      if ((uintptr_t)ShadowHstPtrAddr >= ub)
+        break;
+      DP("Restoring original target pointer value " DPxMOD " for target "
+         "pointer " DPxMOD "\n",
+         DPxPTR(it->second.TgtPtrVal), DPxPTR(it->second.TgtPtrAddr));
+      rt = Device.data_submit(it->second.TgtPtrAddr, &it->second.TgtPtrVal,
+                              sizeof(void *));
+      if (rt != OFFLOAD_SUCCESS) {
+        DP("Copying data to device failed.\n");
+        Device.ShadowMtx.unlock();
+        return OFFLOAD_FAIL;
       }
     }
+    Device.ShadowMtx.unlock();
   }
 
   return OFFLOAD_SUCCESS;
 }
 
 /// Internal function to pass data to/from the target.
-int target_data_update(DeviceTy &Device, int32_t arg_num,
-    void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
+int target_data_update(DeviceTy &Device, int32_t arg_num, void **args_base,
+                       void **args, int64_t *arg_sizes, int64_t *arg_types,
+                       void **arg_mappers) {
   // process each input.
   for (int32_t i = 0; i < arg_num; ++i) {
     if ((arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) ||
         (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE))
       continue;
 
-    void *HstPtrBegin = args[i];
-    int64_t MapSize = arg_sizes[i];
-    bool IsLast, IsHostPtr;
-    void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, MapSize, IsLast,
-        false, IsHostPtr);
-    if (!TgtPtrBegin) {
-      DP("hst data:" DPxMOD " not found, becomes a noop\n", DPxPTR(HstPtrBegin));
-      continue;
-    }
-
-    if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
-        TgtPtrBegin == HstPtrBegin) {
-      DP("hst data:" DPxMOD " unified and shared, becomes a noop\n",
-         DPxPTR(HstPtrBegin));
-      continue;
-    }
-
-    if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) {
-      DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",
-          arg_sizes[i], DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
-      int rt = Device.data_retrieve(HstPtrBegin, TgtPtrBegin, MapSize);
-      if (rt != OFFLOAD_SUCCESS) {
-        DP("Copying data from device failed.\n");
-        return OFFLOAD_FAIL;
-      }
-
-      uintptr_t lb = (uintptr_t) HstPtrBegin;
-      uintptr_t ub = (uintptr_t) HstPtrBegin + MapSize;
-      Device.ShadowMtx.lock();
-      for (ShadowPtrListTy::iterator it = Device.ShadowPtrMap.begin();
-          it != Device.ShadowPtrMap.end(); ++it) {
-        void **ShadowHstPtrAddr = (void**) it->first;
-        if ((uintptr_t) ShadowHstPtrAddr < lb)
-          continue;
-        if ((uintptr_t) ShadowHstPtrAddr >= ub)
-          break;
-        DP("Restoring original host pointer value " DPxMOD " for host pointer "
-            DPxMOD "\n", DPxPTR(it->second.HstPtrVal),
-            DPxPTR(ShadowHstPtrAddr));
-        *ShadowHstPtrAddr = it->second.HstPtrVal;
-      }
-      Device.ShadowMtx.unlock();
-    }
-
-    if (arg_types[i] & OMP_TGT_MAPTYPE_TO) {
-      DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n",
-          arg_sizes[i], DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin));
-      int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, MapSize);
-      if (rt != OFFLOAD_SUCCESS) {
-        DP("Copying data to device failed.\n");
+    // If a valid user-defined mapper is attached, use the associated mapper
+    // function to complete data mapping.
+    if (arg_mappers && arg_mappers[i]) {
+      DP("Call the mapper function " DPxMOD " for the %dth argument\n",
+         DPxPTR(arg_mappers[i]), i);
+      // The mapper function fills up Components.
+      MapperComponentsTy Components;
+      MapperFuncPtrTy MapperFuncPtr = (MapperFuncPtrTy)(arg_mappers[i]);
+      (*MapperFuncPtr)((void *)&Components, args_base[i], args[i], arg_sizes[i],
+                       arg_types[i]);
+      if (Components.size() >= 0xffff) {
+        DP("The number of components exceed the limitation\n");
         return OFFLOAD_FAIL;
       }
-
-      uintptr_t lb = (uintptr_t) HstPtrBegin;
-      uintptr_t ub = (uintptr_t) HstPtrBegin + MapSize;
-      Device.ShadowMtx.lock();
-      for (ShadowPtrListTy::iterator it = Device.ShadowPtrMap.begin();
-          it != Device.ShadowPtrMap.end(); ++it) {
-        void **ShadowHstPtrAddr = (void**) it->first;
-        if ((uintptr_t) ShadowHstPtrAddr < lb)
-          continue;
-        if ((uintptr_t) ShadowHstPtrAddr >= ub)
-          break;
-        DP("Restoring original target pointer value " DPxMOD " for target "
-            "pointer " DPxMOD "\n", DPxPTR(it->second.TgtPtrVal),
-            DPxPTR(it->second.TgtPtrAddr));
-        rt = Device.data_submit(it->second.TgtPtrAddr,
-            &it->second.TgtPtrVal, sizeof(void *));
+      // Map each component filled up by the mapper function.
+      for (int32_t j = 0; j < Components.size(); ++j) {
+        int rt = target_data_update_component(Device, Components.get(j)->Begin,
+                                              Components.get(j)->Size,
+                                              Components.get(j)->Type);
         if (rt != OFFLOAD_SUCCESS) {
-          DP("Copying data to device failed.\n");
-          Device.ShadowMtx.unlock();
+          DP("Failed to map the components specified by a user-defined "
+             "mapper\n");
           return OFFLOAD_FAIL;
         }
       }
-      Device.ShadowMtx.unlock();
+    }
+    else {
+      int rt = target_data_update_component(Device, args[i], arg_sizes[i],
+                                            arg_types[i]);
+      if (rt != OFFLOAD_SUCCESS)
+        return OFFLOAD_FAIL;
     }
   }
+
   return OFFLOAD_SUCCESS;
 }
 
@@ -577,9 +723,10 @@
 /// performs the same action as data_update and data_end above. This function
 /// returns 0 if it was able to transfer the execution to a target and an
 /// integer different from zero otherwise.
-int target(int64_t device_id, void *host_ptr, int32_t arg_num,
-    void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
-    int32_t team_num, int32_t thread_limit, int IsTeamConstruct) {
+int target(int64_t device_id, void *host_ptr, int32_t arg_num, void **args_base,
+           void **args, int64_t *arg_sizes, int64_t *arg_types,
+           void **arg_mappers, int32_t team_num, int32_t thread_limit,
+           int IsTeamConstruct) {
   DeviceTy &Device = Devices[device_id];
 
   // Find the table information in the map or look it up in the translation
@@ -635,7 +782,7 @@
 
   // Move data to device.
   int rc = target_data_begin(Device, arg_num, args_base, args, arg_sizes,
-      arg_types);
+                             arg_types, arg_mappers);
   if (rc != OFFLOAD_SUCCESS) {
     DP("Call to target_data_begin failed, abort target.\n");
     return OFFLOAD_FAIL;
@@ -797,7 +944,7 @@
 
   // Move data from device.
   int rt = target_data_end(Device, arg_num, args_base, args, arg_sizes,
-      arg_types);
+                           arg_types, arg_mappers);
   if (rt != OFFLOAD_SUCCESS) {
     DP("Call to target_data_end failed, abort targe.\n");
     return OFFLOAD_FAIL;
Index: libomptarget/src/private.h
===================================================================
--- libomptarget/src/private.h
+++ libomptarget/src/private.h
@@ -15,20 +15,25 @@
 
 #include <omptarget.h>
 
+#include <cassert>
 #include <cstdint>
 
 extern int target_data_begin(DeviceTy &Device, int32_t arg_num,
-    void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
+                             void **args_base, void **args, int64_t *arg_sizes,
+                             int64_t *arg_types, void **arg_mappers);
 
 extern int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
-    void **args, int64_t *arg_sizes, int64_t *arg_types);
+                           void **args, int64_t *arg_sizes, int64_t *arg_types,
+                           void **arg_mappers);
 
 extern int target_data_update(DeviceTy &Device, int32_t arg_num,
-    void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
+                              void **args_base, void **args, int64_t *arg_sizes,
+                              int64_t *arg_types, void **arg_mappers);
 
 extern int target(int64_t device_id, void *host_ptr, int32_t arg_num,
-    void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
-    int32_t team_num, int32_t thread_limit, int IsTeamConstruct);
+                  void **args_base, void **args, int64_t *arg_sizes,
+                  int64_t *arg_types, void **arg_mappers, int32_t team_num,
+                  int32_t thread_limit, int IsTeamConstruct);
 
 extern int CheckDeviceAndCtors(int64_t device_id);
 
@@ -57,8 +62,19 @@
 // implementation here.
 struct MapperComponentsTy {
   std::vector<MapComponentInfoTy> Components;
+  int32_t size() { return Components.size(); }
+  MapComponentInfoTy *get(int32_t i) {
+    assert(i < size() && "Try to access a component that does not exist");
+    return &Components[i];
+  }
 };
 
+// The mapper function pointer type. It follows the signature below:
+// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
+//                                           void *base, void *begin,
+//                                           size_t size, int64_t type);
+typedef void (*MapperFuncPtrTy)(void *, void *, void *, int64_t, int64_t);
+
 ////////////////////////////////////////////////////////////////////////////////
 // implemtation for fatal messages
 ////////////////////////////////////////////////////////////////////////////////
Index: libomptarget/src/rtl.cpp
===================================================================
--- libomptarget/src/rtl.cpp
+++ libomptarget/src/rtl.cpp
@@ -352,8 +352,8 @@
         Device.PendingGlobalsMtx.lock();
         if (Device.PendingCtorsDtors[desc].PendingCtors.empty()) {
           for (auto &dtor : Device.PendingCtorsDtors[desc].PendingDtors) {
-            int rc = target(Device.DeviceID, dtor, 0, NULL, NULL, NULL, NULL, 1,
-                1, true /*team*/);
+            int rc = target(Device.DeviceID, dtor, 0, NULL, NULL, NULL, NULL,
+                            NULL, 1, 1, true /*team*/);
             if (rc != OFFLOAD_SUCCESS) {
               DP("Running destructor " DPxMOD " failed.\n", DPxPTR(dtor));
             }
Index: libomptarget/test/mapping/declare_mapper_target.cpp
===================================================================
--- /dev/null
+++ libomptarget/test/mapping/declare_mapper_target.cpp
@@ -0,0 +1,36 @@
+// RUN: %libomptarget-compile-run-and-check-aarch64-unknown-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-nvptx64-nvidia-cuda
+// RUN: %libomptarget-compile-run-and-check-powerpc64-ibm-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-powerpc64le-ibm-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-x86_64-pc-linux-gnu
+
+#include <cstdio>
+#include <cstdlib>
+
+#define NUM 1024
+
+class C {
+public:
+  int *a;
+};
+
+#pragma omp declare mapper(id: C s) map(s.a[0:NUM])
+
+int main() {
+  C c;
+  c.a = (int*) malloc(sizeof(int)*NUM);
+  for (int i = 0; i < NUM; i++) {
+    c.a[i] = 1;
+  }
+  #pragma omp target teams distribute parallel for map(mapper(id),tofrom: c)
+  for (int i = 0; i < NUM; i++) {
+    ++c.a[i];
+  }
+  int sum = 0;
+  for (int i = 0; i < NUM; i++) {
+    sum += c.a[i];
+  }
+  // CHECK: Sum = 2048
+  printf("Sum = %d\n", sum);
+  return 0;
+}
Index: libomptarget/test/mapping/declare_mapper_target_data.cpp
===================================================================
--- /dev/null
+++ libomptarget/test/mapping/declare_mapper_target_data.cpp
@@ -0,0 +1,39 @@
+// RUN: %libomptarget-compile-run-and-check-aarch64-unknown-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-nvptx64-nvidia-cuda
+// RUN: %libomptarget-compile-run-and-check-powerpc64-ibm-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-powerpc64le-ibm-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-x86_64-pc-linux-gnu
+
+#include <cstdio>
+#include <cstdlib>
+
+#define NUM 1024
+
+class C {
+public:
+  int *a;
+};
+
+#pragma omp declare mapper(id: C s) map(s.a[0:NUM])
+
+int main() {
+  C c;
+  c.a = (int*) malloc(sizeof(int)*NUM);
+  for (int i = 0; i < NUM; i++) {
+    c.a[i] = 1;
+  }
+  #pragma omp target data map(mapper(id),tofrom: c)
+  {
+  #pragma omp target teams distribute parallel for
+  for (int i = 0; i < NUM; i++) {
+    ++c.a[i];
+  }
+  }
+  int sum = 0;
+  for (int i = 0; i < NUM; i++) {
+    sum += c.a[i];
+  }
+  // CHECK: Sum = 2048
+  printf("Sum = %d\n", sum);
+  return 0;
+}
Index: libomptarget/test/mapping/declare_mapper_target_data_enter_exit.cpp
===================================================================
--- /dev/null
+++ libomptarget/test/mapping/declare_mapper_target_data_enter_exit.cpp
@@ -0,0 +1,38 @@
+// RUN: %libomptarget-compile-run-and-check-aarch64-unknown-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-nvptx64-nvidia-cuda
+// RUN: %libomptarget-compile-run-and-check-powerpc64-ibm-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-powerpc64le-ibm-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-x86_64-pc-linux-gnu
+
+#include <cstdio>
+#include <cstdlib>
+
+#define NUM 1024
+
+class C {
+public:
+  int *a;
+};
+
+#pragma omp declare mapper(id: C s) map(s.a[0:NUM])
+
+int main() {
+  C c;
+  c.a = (int*) malloc(sizeof(int)*NUM);
+  for (int i = 0; i < NUM; i++) {
+    c.a[i] = 1;
+  }
+  #pragma omp target enter data map(mapper(id),to: c)
+  #pragma omp target teams distribute parallel for
+  for (int i = 0; i < NUM; i++) {
+    ++c.a[i];
+  }
+  #pragma omp target exit data map(mapper(id),from: c)
+  int sum = 0;
+  for (int i = 0; i < NUM; i++) {
+    sum += c.a[i];
+  }
+  // CHECK: Sum = 2048
+  printf("Sum = %d\n", sum);
+  return 0;
+}
Index: libomptarget/test/mapping/declare_mapper_target_update.cpp
===================================================================
--- /dev/null
+++ libomptarget/test/mapping/declare_mapper_target_update.cpp
@@ -0,0 +1,60 @@
+// RUN: %libomptarget-compile-run-and-check-aarch64-unknown-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-nvptx64-nvidia-cuda
+// RUN: %libomptarget-compile-run-and-check-powerpc64-ibm-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-powerpc64le-ibm-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-x86_64-pc-linux-gnu
+
+#include <cstdio>
+#include <cstdlib>
+
+#define NUM 1024
+
+class C {
+public:
+  int *a;
+};
+
+#pragma omp declare mapper(id: C s) map(s.a[0:NUM])
+
+int main() {
+  C c;
+  int sum = 0;
+  c.a = (int*) malloc(sizeof(int)*NUM);
+  for (int i = 0; i < NUM; i++) {
+    c.a[i] = 1;
+  }
+  #pragma omp target enter data map(mapper(id),alloc: c)
+  #pragma omp target teams distribute parallel for
+  for (int i = 0; i < NUM; i++) {
+    c.a[i] = 0;
+  }
+  #pragma omp target update from(mapper(id): c)
+  for (int i = 0; i < NUM; i++) {
+    sum += c.a[i];
+  }
+  // CHECK: Sum (after first update from) = 0
+  printf("Sum (after first update from) = %d\n", sum);
+  for (int i = 0; i < NUM; i++) {
+    c.a[i] = 1;
+  }
+  #pragma omp target update to(mapper(id): c)
+  #pragma omp target teams distribute parallel for
+  for (int i = 0; i < NUM; i++) {
+    ++c.a[i];
+  }
+  sum = 0;
+  for (int i = 0; i < NUM; i++) {
+    sum += c.a[i];
+  }
+  // CHECK: Sum (after update to) = 1024
+  printf("Sum (after update to) = %d\n", sum);
+  #pragma omp target update from(mapper(id): c)
+  sum = 0;
+  for (int i = 0; i < NUM; i++) {
+    sum += c.a[i];
+  }
+  // CHECK: Sum (after second update from) = 2048
+  printf("Sum (after second update from) = %d\n", sum);
+  #pragma omp target exit data map(mapper(id),delete: c)
+  return 0;
+}