diff --git a/openmp/libomptarget/include/device.h b/openmp/libomptarget/include/device.h --- a/openmp/libomptarget/include/device.h +++ b/openmp/libomptarget/include/device.h @@ -247,17 +247,17 @@ : KeyValue(HDTT->HstPtrBegin), HDTT(HDTT) {} HostDataToTargetTy *HDTT; }; -inline bool operator<(const HostDataToTargetMapKeyTy &lhs, - const uintptr_t &rhs) { - return lhs.KeyValue < rhs; +inline bool operator<(const HostDataToTargetMapKeyTy &LHS, + const uintptr_t &RHS) { + return LHS.KeyValue < RHS; } -inline bool operator<(const uintptr_t &lhs, - const HostDataToTargetMapKeyTy &rhs) { - return lhs < rhs.KeyValue; +inline bool operator<(const uintptr_t &LHS, + const HostDataToTargetMapKeyTy &RHS) { + return LHS < RHS.KeyValue; } -inline bool operator<(const HostDataToTargetMapKeyTy &lhs, - const HostDataToTargetMapKeyTy &rhs) { - return lhs.KeyValue < rhs.KeyValue; +inline bool operator<(const HostDataToTargetMapKeyTy &LHS, + const HostDataToTargetMapKeyTy &RHS) { + return LHS.KeyValue < RHS.KeyValue; } struct LookupResult { @@ -395,7 +395,7 @@ // calls to RTL int32_t initOnce(); - __tgt_target_table *load_binary(void *Img); + __tgt_target_table *loadBinary(void *Img); // device memory allocation/deallocation routines /// Allocates \p Size bytes on the device, host or shared memory space @@ -469,7 +469,7 @@ void deinit(); }; -extern bool device_is_ready(int device_num); +extern bool deviceIsReady(int DeviceNum); /// Struct for the data required to handle plugins struct PluginManager { diff --git a/openmp/libomptarget/include/omptarget.h b/openmp/libomptarget/include/omptarget.h --- a/openmp/libomptarget/include/omptarget.h +++ b/openmp/libomptarget/include/omptarget.h @@ -204,164 +204,161 @@ int omp_get_num_devices(void); int omp_get_device_num(void); int omp_get_initial_device(void); -void *omp_target_alloc(size_t size, int device_num); -void omp_target_free(void *device_ptr, int device_num); -int omp_target_is_present(const void *ptr, int device_num); -int omp_target_memcpy(void *dst, const void *src, size_t length, - size_t dst_offset, size_t src_offset, int dst_device, - int src_device); -int omp_target_memcpy_rect(void *dst, const void *src, size_t element_size, - int num_dims, const size_t *volume, - const size_t *dst_offsets, const size_t *src_offsets, - const size_t *dst_dimensions, - const size_t *src_dimensions, int dst_device, - int src_device); -int omp_target_associate_ptr(const void *host_ptr, const void *device_ptr, - size_t size, size_t device_offset, int device_num); -int omp_target_disassociate_ptr(const void *host_ptr, int device_num); +void *omp_target_alloc(size_t Size, int DeviceNum); +void omp_target_free(void *DevicePtr, int DeviceNum); +int omp_target_is_present(const void *Ptr, int DeviceNum); +int omp_target_memcpy(void *Dst, const void *Src, size_t Length, + size_t DstOffset, size_t SrcOffset, int DstDevice, + int SrcDevice); +int omp_target_memcpy_rect(void *Dst, const void *Src, size_t ElementSize, + int NumDims, const size_t *Volume, + const size_t *DstOffsets, const size_t *SrcOffsets, + const size_t *DstDimensions, + const size_t *SrcDimensions, int DstDevice, + int SrcDevice); +int omp_target_associate_ptr(const void *HostPtr, const void *DevicePtr, + size_t Size, size_t DeviceOffset, int DeviceNum); +int omp_target_disassociate_ptr(const void *HostPtr, int DeviceNum); /// Explicit target memory allocators /// Using the llvm_ prefix until they become part of the OpenMP standard. -void *llvm_omp_target_alloc_device(size_t size, int device_num); -void *llvm_omp_target_alloc_host(size_t size, int device_num); -void *llvm_omp_target_alloc_shared(size_t size, int device_num); +void *llvm_omp_target_alloc_device(size_t Size, int DeviceNum); +void *llvm_omp_target_alloc_host(size_t Size, int DeviceNum); +void *llvm_omp_target_alloc_shared(size_t Size, int DeviceNum); /// Dummy target so we have a symbol for generating host fallback. void *llvm_omp_target_dynamic_shared_alloc(); /// add the clauses of the requires directives in a given file -void __tgt_register_requires(int64_t flags); +void __tgt_register_requires(int64_t Flags); /// adds a target shared library to the target execution image -void __tgt_register_lib(__tgt_bin_desc *desc); +void __tgt_register_lib(__tgt_bin_desc *Desc); /// Initialize all RTLs at once void __tgt_init_all_rtls(); /// removes a target shared library from the target execution image -void __tgt_unregister_lib(__tgt_bin_desc *desc); +void __tgt_unregister_lib(__tgt_bin_desc *Desc); // creates the host to target data mapping, stores it in the // libomptarget.so internal structure (an entry in a stack of data maps) and // passes the data to the device; -void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, - void **args_base, void **args, int64_t *arg_sizes, - int64_t *arg_types); -void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num, - void **args_base, void **args, - int64_t *arg_sizes, int64_t *arg_types, - int32_t depNum, void *depList, - int32_t noAliasDepNum, - void *noAliasDepList); -void __tgt_target_data_begin_mapper(ident_t *loc, int64_t device_id, - int32_t arg_num, void **args_base, - void **args, int64_t *arg_sizes, - int64_t *arg_types, - map_var_info_t *arg_names, - void **arg_mappers); +void __tgt_target_data_begin(int64_t DeviceId, int32_t ArgNum, void **ArgsBase, + void **Args, int64_t *ArgSizes, int64_t *ArgTypes); +void __tgt_target_data_begin_nowait(int64_t DeviceId, int32_t ArgNum, + void **ArgsBase, void **Args, + int64_t *ArgSizes, int64_t *ArgTypes, + int32_t DepNum, void *DepList, + int32_t NoAliasDepNum, + void *NoAliasDepList); +void __tgt_target_data_begin_mapper(ident_t *Loc, int64_t DeviceId, + int32_t ArgNum, void **ArgsBase, + void **Args, int64_t *ArgSizes, + int64_t *ArgTypes, map_var_info_t *ArgNames, + void **ArgMappers); void __tgt_target_data_begin_nowait_mapper( - ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base, - void **args, int64_t *arg_sizes, int64_t *arg_types, - map_var_info_t *arg_names, void **arg_mappers, int32_t depNum, - void *depList, int32_t noAliasDepNum, void *noAliasDepList); + ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, + void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, + void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum, + void *NoAliasDepList); // passes data from the target, release target memory and destroys the // host-target mapping (top entry from the stack of data maps) created by // the last __tgt_target_data_begin -void __tgt_target_data_end(int64_t device_id, int32_t arg_num, void **args_base, - void **args, int64_t *arg_sizes, int64_t *arg_types); -void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num, - void **args_base, void **args, - int64_t *arg_sizes, int64_t *arg_types, - int32_t depNum, void *depList, - int32_t noAliasDepNum, void *noAliasDepList); -void __tgt_target_data_end_mapper(ident_t *loc, int64_t device_id, - int32_t arg_num, void **args_base, - void **args, int64_t *arg_sizes, - int64_t *arg_types, map_var_info_t *arg_names, - void **arg_mappers); +void __tgt_target_data_end(int64_t DeviceId, int32_t ArgNum, void **ArgsBase, + void **Args, int64_t *ArgSizes, int64_t *ArgTypes); +void __tgt_target_data_end_nowait(int64_t DeviceId, int32_t ArgNum, + void **ArgsBase, void **Args, + int64_t *ArgSizes, int64_t *ArgTypes, + int32_t DepNum, void *DepList, + int32_t NoAliasDepNum, void *NoAliasDepList); +void __tgt_target_data_end_mapper(ident_t *Loc, int64_t DeviceId, + int32_t ArgNum, void **ArgsBase, void **Args, + int64_t *ArgSizes, int64_t *ArgTypes, + map_var_info_t *ArgNames, void **ArgMappers); void __tgt_target_data_end_nowait_mapper( - ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base, - void **args, int64_t *arg_sizes, int64_t *arg_types, - map_var_info_t *arg_names, void **arg_mappers, int32_t depNum, - void *depList, int32_t noAliasDepNum, void *noAliasDepList); + ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, + void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, + void **ArgMappers, int32_t depNum, void *depList, int32_t NoAliasDepNum, + void *NoAliasDepList); /// passes data to/from the target -void __tgt_target_data_update(int64_t device_id, int32_t arg_num, - void **args_base, void **args, int64_t *arg_sizes, - int64_t *arg_types); -void __tgt_target_data_update_nowait(int64_t device_id, int32_t arg_num, - void **args_base, void **args, - int64_t *arg_sizes, int64_t *arg_types, - int32_t depNum, void *depList, - int32_t noAliasDepNum, - void *noAliasDepList); -void __tgt_target_data_update_mapper(ident_t *loc, int64_t device_id, - int32_t arg_num, void **args_base, - void **args, int64_t *arg_sizes, - int64_t *arg_types, - map_var_info_t *arg_names, - void **arg_mappers); +void __tgt_target_data_update(int64_t DeviceId, int32_t ArgNum, void **ArgsBase, + void **Args, int64_t *ArgSizes, + int64_t *ArgTypes); +void __tgt_target_data_update_nowait(int64_t DeviceId, int32_t ArgNum, + void **ArgsBase, void **Args, + int64_t *ArgSizes, int64_t *ArgTypes, + int32_t DepNum, void *DepList, + int32_t NoAliasDepNum, + void *NoAliasDepList); +void __tgt_target_data_update_mapper(ident_t *Loc, int64_t DeviceId, + int32_t ArgNum, void **ArgsBase, + void **Args, int64_t *ArgSizes, + int64_t *ArgTypes, + map_var_info_t *ArgNames, + void **ArgMappers); void __tgt_target_data_update_nowait_mapper( - ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base, - void **args, int64_t *arg_sizes, int64_t *arg_types, - map_var_info_t *arg_names, void **arg_mappers, int32_t depNum, - void *depList, int32_t noAliasDepNum, void *noAliasDepList); + ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, + void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, + void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum, + void *NoAliasDepList); -// Performs the same actions as data_begin in case arg_num is non-zero -// and initiates run of offloaded region on target platform; if arg_num +// Performs the same actions as data_begin in case ArgNum is non-zero +// and initiates run of offloaded region on target platform; if ArgNum // is non-zero after the region execution is done it also performs the // same action as data_end above. The following types are used; this // function returns 0 if it was able to transfer the execution to a // target and an int different from zero otherwise. -int __tgt_target(int64_t device_id, void *host_ptr, int32_t arg_num, - void **args_base, void **args, int64_t *arg_sizes, - int64_t *arg_types); -int __tgt_target_nowait(int64_t device_id, void *host_ptr, int32_t arg_num, - void **args_base, void **args, int64_t *arg_sizes, - int64_t *arg_types, int32_t depNum, void *depList, - int32_t noAliasDepNum, void *noAliasDepList); -int __tgt_target_mapper(ident_t *loc, int64_t device_id, void *host_ptr, - int32_t arg_num, void **args_base, void **args, - int64_t *arg_sizes, int64_t *arg_types, - map_var_info_t *arg_names, void **arg_mappers); -int __tgt_target_nowait_mapper(ident_t *loc, int64_t device_id, void *host_ptr, - int32_t arg_num, void **args_base, void **args, - int64_t *arg_sizes, int64_t *arg_types, - map_var_info_t *arg_names, void **arg_mappers, - int32_t depNum, void *depList, - int32_t noAliasDepNum, void *noAliasDepList); - -int __tgt_target_teams(int64_t device_id, void *host_ptr, int32_t arg_num, - void **args_base, void **args, int64_t *arg_sizes, - int64_t *arg_types, int32_t num_teams, - int32_t thread_limit); -int __tgt_target_teams_nowait(int64_t device_id, void *host_ptr, - int32_t arg_num, void **args_base, void **args, - int64_t *arg_sizes, int64_t *arg_types, - int32_t num_teams, int32_t thread_limit, - int32_t depNum, void *depList, - int32_t noAliasDepNum, void *noAliasDepList); -int __tgt_target_teams_mapper(ident_t *loc, int64_t device_id, void *host_ptr, - int32_t arg_num, void **args_base, void **args, - int64_t *arg_sizes, int64_t *arg_types, - map_var_info_t *arg_names, void **arg_mappers, - int32_t num_teams, int32_t thread_limit); +int __tgt_target(int64_t DeviceId, void *HostPtr, int32_t ArgNum, + void **ArgsBase, void **Args, int64_t *ArgSizes, + int64_t *ArgTypes); +int __tgt_target_nowait(int64_t DeviceId, void *HostPtr, int32_t ArgNum, + void **ArgsBase, void **Args, int64_t *ArgSizes, + int64_t *ArgTypes, int32_t DepNum, void *DepList, + int32_t NoAliasDepNum, void *NoAliasDepList); +int __tgt_target_mapper(ident_t *Loc, int64_t DeviceId, void *HostPtr, + int32_t ArgNum, void **ArgsBase, void **Args, + int64_t *ArgSizes, int64_t *ArgTypes, + map_var_info_t *ArgNames, void **ArgMappers); +int __tgt_target_nowait_mapper(ident_t *Loc, int64_t DeviceId, void *HostPtr, + int32_t ArgNum, void **ArgsBase, void **Args, + int64_t *ArgSizes, int64_t *ArgTypes, + map_var_info_t *ArgNames, void **ArgMappers, + int32_t DepNum, void *DepList, + int32_t NoAliasDepNum, void *NoAliasDepList); + +int __tgt_target_teams(int64_t DeviceId, void *HostPtr, int32_t ArgNum, + void **ArgsBase, void **Args, int64_t *ArgSizes, + int64_t *ArgTypes, int32_t NumTeams, + int32_t ThreadLimit); +int __tgt_target_teams_nowait(int64_t DeviceId, void *HostPtr, int32_t ArgNum, + void **ArgsBase, void **Args, int64_t *ArgSizes, + int64_t *ArgTypes, int32_t NumTeams, + int32_t ThreadLimit, int32_t DepNum, + void *DepList, int32_t NoAliasDepNum, + void *NoAliasDepList); +int __tgt_target_teams_mapper(ident_t *Loc, int64_t DeviceId, void *HostPtr, + int32_t ArgNum, void **ArgsBase, void **Args, + int64_t *ArgSizes, int64_t *ArgTypes, + map_var_info_t *ArgNames, void **ArgMappers, + int32_t NumTeams, int32_t ThreadLimit); int __tgt_target_teams_nowait_mapper( - ident_t *loc, int64_t device_id, void *host_ptr, int32_t arg_num, - void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, - map_var_info_t *arg_names, void **arg_mappers, int32_t num_teams, - int32_t thread_limit, int32_t depNum, void *depList, int32_t noAliasDepNum, - void *noAliasDepList); + ident_t *Loc, int64_t DeviceId, void *HostPtr, int32_t ArgNum, + void **ArgsBase, void **Args, int64_t *ArgSizes, int64_t *ArgTypes, + map_var_info_t *ArgNames, void **ArgMappers, int32_t NumTeams, + int32_t ThreadLimit, int32_t DepNum, void *DepList, int32_t NoAliasDepNum, + void *NoAliasDepList); -void __kmpc_push_target_tripcount(int64_t device_id, uint64_t loop_tripcount); +void __kmpc_push_target_tripcount(int64_t DeviceId, uint64_t LoopTripcount); -void __kmpc_push_target_tripcount_mapper(ident_t *loc, int64_t device_id, - uint64_t loop_tripcount); +void __kmpc_push_target_tripcount_mapper(ident_t *Loc, int64_t DeviceId, + uint64_t LoopTripcount); void __tgt_set_info_flag(uint32_t); -int __tgt_print_device_info(int64_t device_id); +int __tgt_print_device_info(int64_t DeviceId); #ifdef __cplusplus } #endif diff --git a/openmp/libomptarget/include/omptargetplugin.h b/openmp/libomptarget/include/omptargetplugin.h --- a/openmp/libomptarget/include/omptargetplugin.h +++ b/openmp/libomptarget/include/omptargetplugin.h @@ -131,12 +131,12 @@ int32_t __tgt_rtl_run_target_team_region(int32_t ID, void *Entry, void **Args, ptrdiff_t *Offsets, int32_t NumArgs, int32_t NumTeams, int32_t ThreadLimit, - uint64_t loop_tripcount); + uint64_t LoopTripcount); // Asynchronous version of __tgt_rtl_run_target_team_region int32_t __tgt_rtl_run_target_team_region_async( int32_t ID, void *Entry, void **Args, ptrdiff_t *Offsets, int32_t NumArgs, - int32_t NumTeams, int32_t ThreadLimit, uint64_t loop_tripcount, + int32_t NumTeams, int32_t ThreadLimit, uint64_t LoopTripcount, __tgt_async_info *AsyncInfo); // Device synchronization. In case of success, return zero. Otherwise, return an diff --git a/openmp/libomptarget/include/rtl.h b/openmp/libomptarget/include/rtl.h --- a/openmp/libomptarget/include/rtl.h +++ b/openmp/libomptarget/include/rtl.h @@ -116,7 +116,7 @@ release_async_info_ty *release_async_info = nullptr; // Are there images associated with this RTL. - bool isUsed = false; + bool IsUsed = false; // Mutex for thread-safety when calling RTL interface functions. // It is easier to enforce thread-safety at the libomptarget level, @@ -138,7 +138,7 @@ explicit RTLsTy() = default; // Register the clauses of the requires directive. - void RegisterRequires(int64_t flags); + void registerRequires(int64_t Flags); // Initialize RTL if it has not been initialized void initRTLonce(RTLInfoTy &RTL); @@ -147,15 +147,15 @@ void initAllRTLs(); // Register a shared library with all (compatible) RTLs. - void RegisterLib(__tgt_bin_desc *desc); + void registerLib(__tgt_bin_desc *Desc); // Unregister a shared library from all RTLs. - void UnregisterLib(__tgt_bin_desc *desc); + void unregisterLib(__tgt_bin_desc *Desc); // Mutex-like object to guarantee thread-safety and unique initialization // (i.e. the library attempts to load the RTLs (plugins) only once). - std::once_flag initFlag; - void LoadRTLs(); // not thread-safe + std::once_flag InitFlag; + void loadRTLs(); // not thread-safe }; /// Map between the host entry begin and the translation table. Each @@ -179,8 +179,8 @@ TranslationTable *Table = nullptr; // table associated with the host ptr. uint32_t Index = 0; // index in which the host ptr translated entry is found. TableMap() = default; - TableMap(TranslationTable *table, uint32_t index) - : Table(table), Index(index) {} + TableMap(TranslationTable *Table, uint32_t Index) + : Table(Table), Index(Index) {} }; typedef std::map HostPtrToTableMapTy; diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -24,8 +24,8 @@ #include #include -#include "interop_hsa.h" #include "impl_runtime.h" +#include "interop_hsa.h" #include "internal.h" #include "rt.h" @@ -43,8 +43,8 @@ // linked as --whole-archive to override the weak symbols that are used to // implement a fallback for toolchains that do not yet have a hostrpc library. extern "C" { -uint64_t hostrpc_assign_buffer(hsa_agent_t agent, hsa_queue_t *this_Q, - uint32_t device_id); +uint64_t hostrpc_assign_buffer(hsa_agent_t Agent, hsa_queue_t *ThisQ, + uint32_t DeviceId); hsa_status_t hostrpc_init(); hsa_status_t hostrpc_terminate(); @@ -53,10 +53,10 @@ return HSA_STATUS_SUCCESS; } __attribute__((weak)) uint64_t hostrpc_assign_buffer(hsa_agent_t, hsa_queue_t *, - uint32_t device_id) { + uint32_t DeviceId) { DP("Warning: Attempting to assign hostrpc to device %u, but hostrpc library " "missing\n", - device_id); + DeviceId); return 0; } } @@ -82,22 +82,22 @@ #include "elf_common.h" namespace hsa { -template hsa_status_t iterate_agents(C cb) { - auto L = [](hsa_agent_t agent, void *data) -> hsa_status_t { - C *unwrapped = static_cast(data); - return (*unwrapped)(agent); +template hsa_status_t iterate_agents(C Cb) { + auto L = [](hsa_agent_t Agent, void *Data) -> hsa_status_t { + C *Unwrapped = static_cast(Data); + return (*Unwrapped)(Agent); }; - return hsa_iterate_agents(L, static_cast(&cb)); + return hsa_iterate_agents(L, static_cast(&Cb)); } template -hsa_status_t amd_agent_iterate_memory_pools(hsa_agent_t Agent, C cb) { - auto L = [](hsa_amd_memory_pool_t MemoryPool, void *data) -> hsa_status_t { - C *unwrapped = static_cast(data); - return (*unwrapped)(MemoryPool); +hsa_status_t amd_agent_iterate_memory_pools(hsa_agent_t Agent, C Cb) { + auto L = [](hsa_amd_memory_pool_t MemoryPool, void *Data) -> hsa_status_t { + C *Unwrapped = static_cast(Data); + return (*Unwrapped)(MemoryPool); }; - return hsa_amd_agent_iterate_memory_pools(Agent, L, static_cast(&cb)); + return hsa_amd_agent_iterate_memory_pools(Agent, L, static_cast(&Cb)); } } // namespace hsa @@ -110,22 +110,22 @@ struct KernelArgPool { private: - static pthread_mutex_t mutex; + static pthread_mutex_t Mutex; public: - uint32_t kernarg_segment_size; - void *kernarg_region = nullptr; - std::queue free_kernarg_segments; + uint32_t KernargSegmentSize; + void *KernargRegion = nullptr; + std::queue FreeKernargSegments; - uint32_t kernarg_size_including_implicit() { - return kernarg_segment_size + sizeof(impl_implicit_args_t); + uint32_t kernargSizeIncludingImplicit() { + return KernargSegmentSize + sizeof(impl_implicit_args_t); } ~KernelArgPool() { - if (kernarg_region) { - auto r = hsa_amd_memory_pool_free(kernarg_region); - if (r != HSA_STATUS_SUCCESS) { - DP("hsa_amd_memory_pool_free failed: %s\n", get_error_string(r)); + if (KernargRegion) { + auto R = hsa_amd_memory_pool_free(KernargRegion); + if (R != HSA_STATUS_SUCCESS) { + DP("hsa_amd_memory_pool_free failed: %s\n", get_error_string(R)); } } } @@ -135,77 +135,76 @@ KernelArgPool(const KernelArgPool &) = delete; KernelArgPool(KernelArgPool &&) = delete; - KernelArgPool(uint32_t kernarg_segment_size, - hsa_amd_memory_pool_t &memory_pool) - : kernarg_segment_size(kernarg_segment_size) { + KernelArgPool(uint32_t KernargSegmentSize, hsa_amd_memory_pool_t &MemoryPool) + : KernargSegmentSize(KernargSegmentSize) { // impl uses one pool per kernel for all gpus, with a fixed upper size // preserving that exact scheme here, including the queue - hsa_status_t err = hsa_amd_memory_pool_allocate( - memory_pool, kernarg_size_including_implicit() * MAX_NUM_KERNELS, 0, - &kernarg_region); + hsa_status_t Err = hsa_amd_memory_pool_allocate( + MemoryPool, kernargSizeIncludingImplicit() * MAX_NUM_KERNELS, 0, + &KernargRegion); - if (err != HSA_STATUS_SUCCESS) { - DP("hsa_amd_memory_pool_allocate failed: %s\n", get_error_string(err)); - kernarg_region = nullptr; // paranoid + if (Err != HSA_STATUS_SUCCESS) { + DP("hsa_amd_memory_pool_allocate failed: %s\n", get_error_string(Err)); + KernargRegion = nullptr; // paranoid return; } - err = core::allow_access_to_all_gpu_agents(kernarg_region); - if (err != HSA_STATUS_SUCCESS) { + Err = core::allow_access_to_all_gpu_agents(KernargRegion); + if (Err != HSA_STATUS_SUCCESS) { DP("hsa allow_access_to_all_gpu_agents failed: %s\n", - get_error_string(err)); - auto r = hsa_amd_memory_pool_free(kernarg_region); - if (r != HSA_STATUS_SUCCESS) { + get_error_string(Err)); + auto R = hsa_amd_memory_pool_free(KernargRegion); + if (R != HSA_STATUS_SUCCESS) { // if free failed, can't do anything more to resolve it - DP("hsa memory poll free failed: %s\n", get_error_string(err)); + DP("hsa memory poll free failed: %s\n", get_error_string(Err)); } - kernarg_region = nullptr; + KernargRegion = nullptr; return; } - for (int i = 0; i < MAX_NUM_KERNELS; i++) { - free_kernarg_segments.push(i); + for (int I = 0; I < MAX_NUM_KERNELS; I++) { + FreeKernargSegments.push(I); } } - void *allocate(uint64_t arg_num) { - assert((arg_num * sizeof(void *)) == kernarg_segment_size); - lock l(&mutex); - void *res = nullptr; - if (!free_kernarg_segments.empty()) { + void *allocate(uint64_t ArgNum) { + assert((ArgNum * sizeof(void *)) == KernargSegmentSize); + Lock L(&Mutex); + void *Res = nullptr; + if (!FreeKernargSegments.empty()) { - int free_idx = free_kernarg_segments.front(); - res = static_cast(static_cast(kernarg_region) + - (free_idx * kernarg_size_including_implicit())); - assert(free_idx == pointer_to_index(res)); - free_kernarg_segments.pop(); + int FreeIdx = FreeKernargSegments.front(); + Res = static_cast(static_cast(KernargRegion) + + (FreeIdx * kernargSizeIncludingImplicit())); + assert(FreeIdx == pointerToIndex(Res)); + FreeKernargSegments.pop(); } - return res; + return Res; } - void deallocate(void *ptr) { - lock l(&mutex); - int idx = pointer_to_index(ptr); - free_kernarg_segments.push(idx); + void deallocate(void *Ptr) { + Lock L(&Mutex); + int Idx = pointerToIndex(Ptr); + FreeKernargSegments.push(Idx); } private: - int pointer_to_index(void *ptr) { - ptrdiff_t bytes = - static_cast(ptr) - static_cast(kernarg_region); - assert(bytes >= 0); - assert(bytes % kernarg_size_including_implicit() == 0); - return bytes / kernarg_size_including_implicit(); - } - struct lock { - lock(pthread_mutex_t *m) : m(m) { pthread_mutex_lock(m); } - ~lock() { pthread_mutex_unlock(m); } - pthread_mutex_t *m; + int pointerToIndex(void *Ptr) { + ptrdiff_t Bytes = + static_cast(Ptr) - static_cast(KernargRegion); + assert(Bytes >= 0); + assert(Bytes % kernargSizeIncludingImplicit() == 0); + return Bytes / kernargSizeIncludingImplicit(); + } + struct Lock { + Lock(pthread_mutex_t *M) : M(M) { pthread_mutex_lock(M); } + ~Lock() { pthread_mutex_unlock(M); } + pthread_mutex_t *M; }; }; -pthread_mutex_t KernelArgPool::mutex = PTHREAD_MUTEX_INITIALIZER; +pthread_mutex_t KernelArgPool::Mutex = PTHREAD_MUTEX_INITIALIZER; std::unordered_map> KernelArgPoolMap; @@ -214,23 +213,23 @@ struct KernelTy { llvm::omp::OMPTgtExecModeFlags ExecutionMode; int16_t ConstWGSize; - int32_t device_id; + int32_t DeviceId; void *CallStackAddr = nullptr; const char *Name; - KernelTy(llvm::omp::OMPTgtExecModeFlags _ExecutionMode, int16_t _ConstWGSize, - int32_t _device_id, void *_CallStackAddr, const char *_Name, - uint32_t _kernarg_segment_size, + KernelTy(llvm::omp::OMPTgtExecModeFlags ExecutionMode, int16_t ConstWgSize, + int32_t DeviceId, void *CallStackAddr, const char *Name, + uint32_t KernargSegmentSize, hsa_amd_memory_pool_t &KernArgMemoryPool) - : ExecutionMode(_ExecutionMode), ConstWGSize(_ConstWGSize), - device_id(_device_id), CallStackAddr(_CallStackAddr), Name(_Name) { + : ExecutionMode(ExecutionMode), ConstWGSize(ConstWgSize), + DeviceId(DeviceId), CallStackAddr(CallStackAddr), Name(Name) { DP("Construct kernelinfo: ExecMode %d\n", ExecutionMode); - std::string N(_Name); + std::string N(Name); if (KernelArgPoolMap.find(N) == KernelArgPoolMap.end()) { KernelArgPoolMap.insert( std::make_pair(N, std::unique_ptr(new KernelArgPool( - _kernarg_segment_size, KernArgMemoryPool)))); + KernargSegmentSize, KernArgMemoryPool)))); } } }; @@ -239,43 +238,43 @@ /// FIXME: we may need this to be per device and per library. std::list KernelsList; -template static hsa_status_t FindAgents(Callback CB) { +template static hsa_status_t findAgents(Callback CB) { - hsa_status_t err = - hsa::iterate_agents([&](hsa_agent_t agent) -> hsa_status_t { - hsa_device_type_t device_type; + hsa_status_t Err = + hsa::iterate_agents([&](hsa_agent_t Agent) -> hsa_status_t { + hsa_device_type_t DeviceType; // get_info fails iff HSA runtime not yet initialized - hsa_status_t err = - hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type); + hsa_status_t Err = + hsa_agent_get_info(Agent, HSA_AGENT_INFO_DEVICE, &DeviceType); - if (err != HSA_STATUS_SUCCESS) { + if (Err != HSA_STATUS_SUCCESS) { if (print_kernel_trace > 0) - DP("rtl.cpp: err %s\n", get_error_string(err)); + DP("rtl.cpp: err %s\n", get_error_string(Err)); - return err; + return Err; } - CB(device_type, agent); + CB(DeviceType, Agent); return HSA_STATUS_SUCCESS; }); // iterate_agents fails iff HSA runtime not yet initialized - if (print_kernel_trace > 0 && err != HSA_STATUS_SUCCESS) { - DP("rtl.cpp: err %s\n", get_error_string(err)); + if (print_kernel_trace > 0 && Err != HSA_STATUS_SUCCESS) { + DP("rtl.cpp: err %s\n", get_error_string(Err)); } - return err; + return Err; } -static void callbackQueue(hsa_status_t status, hsa_queue_t *source, - void *data) { - if (status != HSA_STATUS_SUCCESS) { - const char *status_string; - if (hsa_status_string(status, &status_string) != HSA_STATUS_SUCCESS) { - status_string = "unavailable"; +static void callbackQueue(hsa_status_t Status, hsa_queue_t *Source, + void *Data) { + if (Status != HSA_STATUS_SUCCESS) { + const char *StatusString; + if (hsa_status_string(Status, &StatusString) != HSA_STATUS_SUCCESS) { + StatusString = "unavailable"; } - DP("[%s:%d] GPU error in queue %p %d (%s)\n", __FILE__, __LINE__, source, - status, status_string); + DP("[%s:%d] GPU error in queue %p %d (%s)\n", __FILE__, __LINE__, Source, + Status, StatusString); abort(); } } @@ -292,15 +291,15 @@ return false; } -void packet_store_release(uint32_t *packet, uint16_t header, uint16_t rest) { - __atomic_store_n(packet, header | (rest << 16), __ATOMIC_RELEASE); +void packetStoreRelease(uint32_t *Packet, uint16_t Header, uint16_t Rest) { + __atomic_store_n(Packet, Header | (Rest << 16), __ATOMIC_RELEASE); } -uint16_t create_header() { - uint16_t header = HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; - header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; - header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; - return header; +uint16_t createHeader() { + uint16_t Header = HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; + Header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; + Header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; + return Header; } hsa_status_t isValidMemoryPool(hsa_amd_memory_pool_t MemoryPool) { @@ -329,9 +328,9 @@ std::vector *Result = static_cast *>(Data); - hsa_status_t err; - if ((err = isValidMemoryPool(MemoryPool)) != HSA_STATUS_SUCCESS) { - return err; + hsa_status_t Err; + if ((Err = isValidMemoryPool(MemoryPool)) != HSA_STATUS_SUCCESS) { + return Err; } Result->push_back(MemoryPool); @@ -378,47 +377,47 @@ // multiple threads (one scheduler per device) class HSAQueueScheduler { public: - HSAQueueScheduler() : current(0) {} + HSAQueueScheduler() : Current(0) {} HSAQueueScheduler(const HSAQueueScheduler &) = delete; - HSAQueueScheduler(HSAQueueScheduler &&q) { - current = q.current.load(); - for (uint8_t i = 0; i < NUM_QUEUES_PER_DEVICE; i++) { - HSAQueues[i] = q.HSAQueues[i]; - q.HSAQueues[i] = nullptr; + HSAQueueScheduler(HSAQueueScheduler &&Q) { + Current = Q.Current.load(); + for (uint8_t I = 0; I < NUM_QUEUES_PER_DEVICE; I++) { + HSAQueues[I] = Q.HSAQueues[I]; + Q.HSAQueues[I] = nullptr; } } // \return false if any HSA queue creation fails - bool CreateQueues(hsa_agent_t HSAAgent, uint32_t queue_size) { - for (uint8_t i = 0; i < NUM_QUEUES_PER_DEVICE; i++) { + bool createQueues(hsa_agent_t HSAAgent, uint32_t QueueSize) { + for (uint8_t I = 0; I < NUM_QUEUES_PER_DEVICE; I++) { hsa_queue_t *Q = nullptr; - hsa_status_t rc = - hsa_queue_create(HSAAgent, queue_size, HSA_QUEUE_TYPE_MULTI, + hsa_status_t Rc = + hsa_queue_create(HSAAgent, QueueSize, HSA_QUEUE_TYPE_MULTI, callbackQueue, NULL, UINT32_MAX, UINT32_MAX, &Q); - if (rc != HSA_STATUS_SUCCESS) { - DP("Failed to create HSA queue %d\n", i); + if (Rc != HSA_STATUS_SUCCESS) { + DP("Failed to create HSA queue %d\n", I); return false; } - HSAQueues[i] = Q; + HSAQueues[I] = Q; } return true; } ~HSAQueueScheduler() { - for (uint8_t i = 0; i < NUM_QUEUES_PER_DEVICE; i++) { - if (HSAQueues[i]) { - hsa_status_t err = hsa_queue_destroy(HSAQueues[i]); - if (err != HSA_STATUS_SUCCESS) + for (uint8_t I = 0; I < NUM_QUEUES_PER_DEVICE; I++) { + if (HSAQueues[I]) { + hsa_status_t Err = hsa_queue_destroy(HSAQueues[I]); + if (Err != HSA_STATUS_SUCCESS) DP("Error destroying HSA queue"); } } } // \return next queue to use for device - hsa_queue_t *Next() { - return HSAQueues[(current.fetch_add(1, std::memory_order_relaxed)) % + hsa_queue_t *next() { + return HSAQueues[(Current.fetch_add(1, std::memory_order_relaxed)) % NUM_QUEUES_PER_DEVICE]; } @@ -426,7 +425,7 @@ // Number of queues per device enum : uint8_t { NUM_QUEUES_PER_DEVICE = 4 }; hsa_queue_t *HSAQueues[NUM_QUEUES_PER_DEVICE] = {}; - std::atomic current; + std::atomic Current; }; /// Class containing all the device information @@ -449,7 +448,7 @@ // load binary populates symbol tables and mutates various global state // run uses those symbol tables - std::shared_timed_mutex load_run_lock; + std::shared_timed_mutex LoadRunLock; int NumberOfDevices = 0; @@ -480,7 +479,7 @@ // Resource pools SignalPoolT FreeSignalPool; - bool hostcall_required = false; + bool HostcallRequired = false; std::vector HSAExecutables; @@ -496,15 +495,15 @@ std::vector DeviceFineGrainedMemoryPools; std::vector DeviceCoarseGrainedMemoryPools; - struct implFreePtrDeletor { - void operator()(void *p) { - core::Runtime::Memfree(p); // ignore failure to free + struct ImplFreePtrDeletor { + void operator()(void *P) { + core::Runtime::Memfree(P); // ignore failure to free } }; // device_State shared across loaded binaries, error if inconsistent size - std::vector, uint64_t>> - deviceStateStore; + std::vector, uint64_t>> + DeviceStateStore; static const unsigned HardTeamLimit = (1 << 16) - 1; // 64K needed to fit in uint16 @@ -516,101 +515,101 @@ static_assert(getGridValue<32>().GV_Max_Teams == getGridValue<64>().GV_Max_Teams, ""); - static const int Max_Teams = getGridValue<64>().GV_Max_Teams; + static const int MaxTeams = getGridValue<64>().GV_Max_Teams; static_assert(getGridValue<32>().GV_Max_WG_Size == getGridValue<64>().GV_Max_WG_Size, ""); - static const int Max_WG_Size = getGridValue<64>().GV_Max_WG_Size; + static const int MaxWgSize = getGridValue<64>().GV_Max_WG_Size; static_assert(getGridValue<32>().GV_Default_WG_Size == getGridValue<64>().GV_Default_WG_Size, ""); - static const int Default_WG_Size = getGridValue<64>().GV_Default_WG_Size; + static const int DefaultWgSize = getGridValue<64>().GV_Default_WG_Size; - using MemcpyFunc = hsa_status_t (*)(hsa_signal_t, void *, void *, size_t size, + using MemcpyFunc = hsa_status_t (*)(hsa_signal_t, void *, void *, size_t Size, hsa_agent_t, hsa_amd_memory_pool_t); - hsa_status_t freesignalpool_memcpy(void *dest, void *src, size_t size, - MemcpyFunc Func, int32_t deviceId) { - hsa_agent_t agent = HSAAgents[deviceId]; - hsa_signal_t s = FreeSignalPool.pop(); - if (s.handle == 0) { + hsa_status_t freesignalpoolMemcpy(void *Dest, void *Src, size_t Size, + MemcpyFunc Func, int32_t DeviceId) { + hsa_agent_t Agent = HSAAgents[DeviceId]; + hsa_signal_t S = FreeSignalPool.pop(); + if (S.handle == 0) { return HSA_STATUS_ERROR; } - hsa_status_t r = Func(s, dest, src, size, agent, HostFineGrainedMemoryPool); - FreeSignalPool.push(s); - return r; + hsa_status_t R = Func(S, Dest, Src, Size, Agent, HostFineGrainedMemoryPool); + FreeSignalPool.push(S); + return R; } - hsa_status_t freesignalpool_memcpy_d2h(void *dest, void *src, size_t size, - int32_t deviceId) { - return freesignalpool_memcpy(dest, src, size, impl_memcpy_d2h, deviceId); + hsa_status_t freesignalpoolMemcpyD2H(void *Dest, void *Src, size_t Size, + int32_t DeviceId) { + return freesignalpoolMemcpy(Dest, Src, Size, impl_memcpy_d2h, DeviceId); } - hsa_status_t freesignalpool_memcpy_h2d(void *dest, void *src, size_t size, - int32_t deviceId) { - return freesignalpool_memcpy(dest, src, size, impl_memcpy_h2d, deviceId); + hsa_status_t freesignalpoolMemcpyH2D(void *Dest, void *Src, size_t Size, + int32_t DeviceId) { + return freesignalpoolMemcpy(Dest, Src, Size, impl_memcpy_h2d, DeviceId); } - static void printDeviceInfo(int32_t device_id, hsa_agent_t agent) { + static void printDeviceInfo(int32_t DeviceId, hsa_agent_t Agent) { char TmpChar[1000]; - uint16_t major, minor; + uint16_t Major, Minor; uint32_t TmpUInt; uint32_t TmpUInt2; uint32_t CacheSize[4]; bool TmpBool; - uint16_t workgroupMaxDim[3]; - hsa_dim3_t gridMaxDim; + uint16_t WorkgroupMaxDim[3]; + hsa_dim3_t GridMaxDim; // Getting basic information about HSA and Device core::checkResult( - hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MAJOR, &major), + hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MAJOR, &Major), "Error from hsa_system_get_info when obtaining " "HSA_SYSTEM_INFO_VERSION_MAJOR\n"); core::checkResult( - hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MINOR, &minor), + hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MINOR, &Minor), "Error from hsa_system_get_info when obtaining " "HSA_SYSTEM_INFO_VERSION_MINOR\n"); - printf(" HSA Runtime Version: \t\t%u.%u \n", major, minor); - printf(" HSA OpenMP Device Number: \t\t%d \n", device_id); + printf(" HSA Runtime Version: \t\t%u.%u \n", Major, Minor); + printf(" HSA OpenMP Device Number: \t\t%d \n", DeviceId); core::checkResult( hsa_agent_get_info( - agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_PRODUCT_NAME, TmpChar), + Agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_PRODUCT_NAME, TmpChar), "Error returned from hsa_agent_get_info when obtaining " "HSA_AMD_AGENT_INFO_PRODUCT_NAME\n"); printf(" Product Name: \t\t\t%s \n", TmpChar); - core::checkResult(hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, TmpChar), + core::checkResult(hsa_agent_get_info(Agent, HSA_AGENT_INFO_NAME, TmpChar), "Error returned from hsa_agent_get_info when obtaining " "HSA_AGENT_INFO_NAME\n"); printf(" Device Name: \t\t\t%s \n", TmpChar); core::checkResult( - hsa_agent_get_info(agent, HSA_AGENT_INFO_VENDOR_NAME, TmpChar), + hsa_agent_get_info(Agent, HSA_AGENT_INFO_VENDOR_NAME, TmpChar), "Error returned from hsa_agent_get_info when obtaining " "HSA_AGENT_INFO_NAME\n"); printf(" Vendor Name: \t\t\t%s \n", TmpChar); - hsa_device_type_t devType; + hsa_device_type_t DevType; core::checkResult( - hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &devType), + hsa_agent_get_info(Agent, HSA_AGENT_INFO_DEVICE, &DevType), "Error returned from hsa_agent_get_info when obtaining " "HSA_AGENT_INFO_DEVICE\n"); printf(" Device Type: \t\t\t%s \n", - devType == HSA_DEVICE_TYPE_CPU + DevType == HSA_DEVICE_TYPE_CPU ? "CPU" - : (devType == HSA_DEVICE_TYPE_GPU + : (DevType == HSA_DEVICE_TYPE_GPU ? "GPU" - : (devType == HSA_DEVICE_TYPE_DSP ? "DSP" : "UNKNOWN"))); + : (DevType == HSA_DEVICE_TYPE_DSP ? "DSP" : "UNKNOWN"))); core::checkResult( - hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUES_MAX, &TmpUInt), + hsa_agent_get_info(Agent, HSA_AGENT_INFO_QUEUES_MAX, &TmpUInt), "Error returned from hsa_agent_get_info when obtaining " "HSA_AGENT_INFO_QUEUES_MAX\n"); printf(" Max Queues: \t\t\t%u \n", TmpUInt); core::checkResult( - hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MIN_SIZE, &TmpUInt), + hsa_agent_get_info(Agent, HSA_AGENT_INFO_QUEUE_MIN_SIZE, &TmpUInt), "Error returned from hsa_agent_get_info when obtaining " "HSA_AGENT_INFO_QUEUE_MIN_SIZE\n"); printf(" Queue Min Size: \t\t\t%u \n", TmpUInt); core::checkResult( - hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &TmpUInt), + hsa_agent_get_info(Agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &TmpUInt), "Error returned from hsa_agent_get_info when obtaining " "HSA_AGENT_INFO_QUEUE_MAX_SIZE\n"); printf(" Queue Max Size: \t\t\t%u \n", TmpUInt); @@ -622,18 +621,18 @@ // hsa_agent_iterate_caches and hsa_cache_get_info breaks execution during // runtime. core::checkResult( - hsa_agent_get_info(agent, HSA_AGENT_INFO_CACHE_SIZE, CacheSize), + hsa_agent_get_info(Agent, HSA_AGENT_INFO_CACHE_SIZE, CacheSize), "Error returned from hsa_agent_get_info when obtaining " "HSA_AGENT_INFO_CACHE_SIZE\n"); - for (int i = 0; i < 4; i++) { - if (CacheSize[i]) { - printf(" L%u: \t\t\t\t%u bytes\n", i, CacheSize[i]); + for (int I = 0; I < 4; I++) { + if (CacheSize[I]) { + printf(" L%u: \t\t\t\t%u bytes\n", I, CacheSize[I]); } } core::checkResult( - hsa_agent_get_info(agent, + hsa_agent_get_info(Agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_CACHELINE_SIZE, &TmpUInt), "Error returned from hsa_agent_get_info when obtaining " @@ -641,51 +640,51 @@ printf(" Cacheline Size: \t\t\t%u \n", TmpUInt); core::checkResult( hsa_agent_get_info( - agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY, + Agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY, &TmpUInt), "Error returned from hsa_agent_get_info when obtaining " "HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY\n"); printf(" Max Clock Freq(MHz): \t\t%u \n", TmpUInt); core::checkResult( hsa_agent_get_info( - agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, + Agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, &TmpUInt), "Error returned from hsa_agent_get_info when obtaining " "HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT\n"); printf(" Compute Units: \t\t\t%u \n", TmpUInt); core::checkResult(hsa_agent_get_info( - agent, + Agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU, &TmpUInt), "Error returned from hsa_agent_get_info when obtaining " "HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU\n"); printf(" SIMD per CU: \t\t\t%u \n", TmpUInt); core::checkResult( - hsa_agent_get_info(agent, HSA_AGENT_INFO_FAST_F16_OPERATION, &TmpBool), + hsa_agent_get_info(Agent, HSA_AGENT_INFO_FAST_F16_OPERATION, &TmpBool), "Error returned from hsa_agent_get_info when obtaining " "HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU\n"); printf(" Fast F16 Operation: \t\t%s \n", (TmpBool ? "TRUE" : "FALSE")); core::checkResult( - hsa_agent_get_info(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &TmpUInt2), + hsa_agent_get_info(Agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &TmpUInt2), "Error returned from hsa_agent_get_info when obtaining " "HSA_AGENT_INFO_WAVEFRONT_SIZE\n"); printf(" Wavefront Size: \t\t\t%u \n", TmpUInt2); core::checkResult( - hsa_agent_get_info(agent, HSA_AGENT_INFO_WORKGROUP_MAX_SIZE, &TmpUInt), + hsa_agent_get_info(Agent, HSA_AGENT_INFO_WORKGROUP_MAX_SIZE, &TmpUInt), "Error returned from hsa_agent_get_info when obtaining " "HSA_AGENT_INFO_WORKGROUP_MAX_SIZE\n"); printf(" Workgroup Max Size: \t\t%u \n", TmpUInt); - core::checkResult(hsa_agent_get_info(agent, + core::checkResult(hsa_agent_get_info(Agent, HSA_AGENT_INFO_WORKGROUP_MAX_DIM, - workgroupMaxDim), + WorkgroupMaxDim), "Error returned from hsa_agent_get_info when obtaining " "HSA_AGENT_INFO_WORKGROUP_MAX_DIM\n"); printf(" Workgroup Max Size per Dimension:\n"); - printf(" x: \t\t\t\t%u\n", workgroupMaxDim[0]); - printf(" y: \t\t\t\t%u\n", workgroupMaxDim[1]); - printf(" z: \t\t\t\t%u\n", workgroupMaxDim[2]); + printf(" x: \t\t\t\t%u\n", WorkgroupMaxDim[0]); + printf(" y: \t\t\t\t%u\n", WorkgroupMaxDim[1]); + printf(" z: \t\t\t\t%u\n", WorkgroupMaxDim[2]); core::checkResult(hsa_agent_get_info( - agent, + Agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU, &TmpUInt), "Error returned from hsa_agent_get_info when obtaining " @@ -693,49 +692,49 @@ printf(" Max Waves Per CU: \t\t\t%u \n", TmpUInt); printf(" Max Work-item Per CU: \t\t%u \n", TmpUInt * TmpUInt2); core::checkResult( - hsa_agent_get_info(agent, HSA_AGENT_INFO_GRID_MAX_SIZE, &TmpUInt), + hsa_agent_get_info(Agent, HSA_AGENT_INFO_GRID_MAX_SIZE, &TmpUInt), "Error returned from hsa_agent_get_info when obtaining " "HSA_AGENT_INFO_GRID_MAX_SIZE\n"); printf(" Grid Max Size: \t\t\t%u \n", TmpUInt); core::checkResult( - hsa_agent_get_info(agent, HSA_AGENT_INFO_GRID_MAX_DIM, &gridMaxDim), + hsa_agent_get_info(Agent, HSA_AGENT_INFO_GRID_MAX_DIM, &GridMaxDim), "Error returned from hsa_agent_get_info when obtaining " "HSA_AGENT_INFO_GRID_MAX_DIM\n"); printf(" Grid Max Size per Dimension: \t\t\n"); - printf(" x: \t\t\t\t%u\n", gridMaxDim.x); - printf(" y: \t\t\t\t%u\n", gridMaxDim.y); - printf(" z: \t\t\t\t%u\n", gridMaxDim.z); + printf(" x: \t\t\t\t%u\n", GridMaxDim.x); + printf(" y: \t\t\t\t%u\n", GridMaxDim.y); + printf(" z: \t\t\t\t%u\n", GridMaxDim.z); core::checkResult( - hsa_agent_get_info(agent, HSA_AGENT_INFO_FBARRIER_MAX_SIZE, &TmpUInt), + hsa_agent_get_info(Agent, HSA_AGENT_INFO_FBARRIER_MAX_SIZE, &TmpUInt), "Error returned from hsa_agent_get_info when obtaining " "HSA_AGENT_INFO_FBARRIER_MAX_SIZE\n"); printf(" Max fbarriers/Workgrp: \t\t%u\n", TmpUInt); printf(" Memory Pools:\n"); - auto CB_mem = [](hsa_amd_memory_pool_t region, void *data) -> hsa_status_t { + auto CbMem = [](hsa_amd_memory_pool_t Region, void *Data) -> hsa_status_t { std::string TmpStr; - size_t size; - bool alloc, access; - hsa_amd_segment_t segment; - hsa_amd_memory_pool_global_flag_t globalFlags; + size_t Size; + bool Alloc, Access; + hsa_amd_segment_t Segment; + hsa_amd_memory_pool_global_flag_t GlobalFlags; core::checkResult( hsa_amd_memory_pool_get_info( - region, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &globalFlags), + Region, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &GlobalFlags), "Error returned from hsa_amd_memory_pool_get_info when obtaining " "HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS\n"); core::checkResult(hsa_amd_memory_pool_get_info( - region, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment), + Region, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &Segment), "Error returned from hsa_amd_memory_pool_get_info when " "obtaining HSA_AMD_MEMORY_POOL_INFO_SEGMENT\n"); - switch (segment) { + switch (Segment) { case HSA_AMD_SEGMENT_GLOBAL: TmpStr = "GLOBAL; FLAGS: "; - if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT & globalFlags) + if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT & GlobalFlags) TmpStr += "KERNARG, "; - if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED & globalFlags) + if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED & GlobalFlags) TmpStr += "FINE GRAINED, "; - if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED & globalFlags) + if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED & GlobalFlags) TmpStr += "COARSE GRAINED, "; break; case HSA_AMD_SEGMENT_READONLY: @@ -751,46 +750,46 @@ printf(" Pool %s: \n", TmpStr.c_str()); core::checkResult(hsa_amd_memory_pool_get_info( - region, HSA_AMD_MEMORY_POOL_INFO_SIZE, &size), + Region, HSA_AMD_MEMORY_POOL_INFO_SIZE, &Size), "Error returned from hsa_amd_memory_pool_get_info when " "obtaining HSA_AMD_MEMORY_POOL_INFO_SIZE\n"); - printf(" Size: \t\t\t\t %zu bytes\n", size); + printf(" Size: \t\t\t\t %zu bytes\n", Size); core::checkResult( hsa_amd_memory_pool_get_info( - region, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, &alloc), + Region, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, &Alloc), "Error returned from hsa_amd_memory_pool_get_info when obtaining " "HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED\n"); - printf(" Allocatable: \t\t\t %s\n", (alloc ? "TRUE" : "FALSE")); + printf(" Allocatable: \t\t\t %s\n", (Alloc ? "TRUE" : "FALSE")); core::checkResult( hsa_amd_memory_pool_get_info( - region, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, &size), + Region, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, &Size), "Error returned from hsa_amd_memory_pool_get_info when obtaining " "HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE\n"); - printf(" Runtime Alloc Granule: \t\t %zu bytes\n", size); + printf(" Runtime Alloc Granule: \t\t %zu bytes\n", Size); core::checkResult( hsa_amd_memory_pool_get_info( - region, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT, &size), + Region, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT, &Size), "Error returned from hsa_amd_memory_pool_get_info when obtaining " "HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT\n"); - printf(" Runtime Alloc alignment: \t %zu bytes\n", size); + printf(" Runtime Alloc alignment: \t %zu bytes\n", Size); core::checkResult( hsa_amd_memory_pool_get_info( - region, HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL, &access), + Region, HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL, &Access), "Error returned from hsa_amd_memory_pool_get_info when obtaining " "HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL\n"); printf(" Accessable by all: \t\t %s\n", - (access ? "TRUE" : "FALSE")); + (Access ? "TRUE" : "FALSE")); return HSA_STATUS_SUCCESS; }; // Iterate over all the memory regions for this agent. Get the memory region // type and size - hsa_amd_agent_iterate_memory_pools(agent, CB_mem, nullptr); + hsa_amd_agent_iterate_memory_pools(Agent, CbMem, nullptr); printf(" ISAs:\n"); - auto CB_isas = [](hsa_isa_t isa, void *data) -> hsa_status_t { + auto CBIsas = [](hsa_isa_t Isa, void *Data) -> hsa_status_t { char TmpChar[1000]; - core::checkResult(hsa_isa_get_info_alt(isa, HSA_ISA_INFO_NAME, TmpChar), + core::checkResult(hsa_isa_get_info_alt(Isa, HSA_ISA_INFO_NAME, TmpChar), "Error returned from hsa_isa_get_info_alt when " "obtaining HSA_ISA_INFO_NAME\n"); printf(" Name: \t\t\t\t %s\n", TmpChar); @@ -799,26 +798,26 @@ }; // Iterate over all the memory regions for this agent. Get the memory region // type and size - hsa_agent_iterate_isas(agent, CB_isas, nullptr); + hsa_agent_iterate_isas(Agent, CBIsas, nullptr); } // Record entry point associated with device - void addOffloadEntry(int32_t device_id, __tgt_offload_entry entry) { - assert(device_id < (int32_t)FuncGblEntries.size() && + void addOffloadEntry(int32_t DeviceId, __tgt_offload_entry Entry) { + assert(DeviceId < (int32_t)FuncGblEntries.size() && "Unexpected device id!"); - FuncOrGblEntryTy &E = FuncGblEntries[device_id].back(); + FuncOrGblEntryTy &E = FuncGblEntries[DeviceId].back(); - E.Entries.push_back(entry); + E.Entries.push_back(Entry); } // Return true if the entry is associated with device - bool findOffloadEntry(int32_t device_id, void *addr) { - assert(device_id < (int32_t)FuncGblEntries.size() && + bool findOffloadEntry(int32_t DeviceId, void *Addr) { + assert(DeviceId < (int32_t)FuncGblEntries.size() && "Unexpected device id!"); - FuncOrGblEntryTy &E = FuncGblEntries[device_id].back(); + FuncOrGblEntryTy &E = FuncGblEntries[DeviceId].back(); - for (auto &it : E.Entries) { - if (it.addr == addr) + for (auto &It : E.Entries) { + if (It.addr == Addr) return true; } @@ -826,33 +825,33 @@ } // Return the pointer to the target entries table - __tgt_target_table *getOffloadEntriesTable(int32_t device_id) { - assert(device_id < (int32_t)FuncGblEntries.size() && + __tgt_target_table *getOffloadEntriesTable(int32_t DeviceId) { + assert(DeviceId < (int32_t)FuncGblEntries.size() && "Unexpected device id!"); - FuncOrGblEntryTy &E = FuncGblEntries[device_id].back(); + FuncOrGblEntryTy &E = FuncGblEntries[DeviceId].back(); - int32_t size = E.Entries.size(); + int32_t Size = E.Entries.size(); // Table is empty - if (!size) + if (!Size) return 0; - __tgt_offload_entry *begin = &E.Entries[0]; - __tgt_offload_entry *end = &E.Entries[size - 1]; + __tgt_offload_entry *Begin = &E.Entries[0]; + __tgt_offload_entry *End = &E.Entries[Size - 1]; // Update table info according to the entries and return the pointer - E.Table.EntriesBegin = begin; - E.Table.EntriesEnd = ++end; + E.Table.EntriesBegin = Begin; + E.Table.EntriesEnd = ++End; return &E.Table; } // Clear entries table for a device - void clearOffloadEntriesTable(int device_id) { - assert(device_id < (int32_t)FuncGblEntries.size() && + void clearOffloadEntriesTable(int DeviceId) { + assert(DeviceId < (int32_t)FuncGblEntries.size() && "Unexpected device id!"); - FuncGblEntries[device_id].emplace_back(); - FuncOrGblEntryTy &E = FuncGblEntries[device_id].back(); + FuncGblEntries[DeviceId].emplace_back(); + FuncOrGblEntryTy &E = FuncGblEntries[DeviceId].back(); // KernelArgPoolMap.clear(); E.Entries.clear(); E.Table.EntriesBegin = E.Table.EntriesEnd = 0; @@ -954,13 +953,13 @@ } static int readEnv(const char *Env, int Default = -1) { - const char *envStr = getenv(Env); - int res = Default; - if (envStr) { - res = std::stoi(envStr); - DP("Parsed %s=%d\n", Env, res); + const char *EnvStr = getenv(Env); + int Res = Default; + if (EnvStr) { + Res = std::stoi(EnvStr); + DP("Parsed %s=%d\n", Env, Res); } - return res; + return Res; } RTLDeviceInfoTy() { @@ -977,13 +976,13 @@ return; } - if (char *envStr = getenv("LIBOMPTARGET_KERNEL_TRACE")) - print_kernel_trace = atoi(envStr); + if (char *EnvStr = getenv("LIBOMPTARGET_KERNEL_TRACE")) + print_kernel_trace = atoi(EnvStr); else print_kernel_trace = 0; - hsa_status_t err = core::atl_init_gpu_context(); - if (err != HSA_STATUS_SUCCESS) { + hsa_status_t Err = core::atl_init_gpu_context(); + if (Err != HSA_STATUS_SUCCESS) { DP("Error when initializing " GETNAME(TARGET_NAME) "\n"); return; } @@ -991,14 +990,14 @@ // Init hostcall soon after initializing hsa hostrpc_init(); - err = FindAgents([&](hsa_device_type_t DeviceType, hsa_agent_t Agent) { + Err = findAgents([&](hsa_device_type_t DeviceType, hsa_agent_t Agent) { if (DeviceType == HSA_DEVICE_TYPE_CPU) { CPUAgents.push_back(Agent); } else { HSAAgents.push_back(Agent); } }); - if (err != HSA_STATUS_SUCCESS) + if (Err != HSA_STATUS_SUCCESS) return; NumberOfDevices = (int)HSAAgents.size(); @@ -1006,9 +1005,8 @@ if (NumberOfDevices == 0) { DP("There are no devices supporting HSA.\n"); return; - } else { - DP("There are %d devices supporting HSA.\n", NumberOfDevices); } + DP("There are %d devices supporting HSA.\n", NumberOfDevices); // Init the device info HSAQueueSchedulers.reserve(NumberOfDevices); @@ -1020,55 +1018,55 @@ WarpSize.resize(NumberOfDevices); NumTeams.resize(NumberOfDevices); NumThreads.resize(NumberOfDevices); - deviceStateStore.resize(NumberOfDevices); + DeviceStateStore.resize(NumberOfDevices); KernelInfoTable.resize(NumberOfDevices); SymbolInfoTable.resize(NumberOfDevices); DeviceCoarseGrainedMemoryPools.resize(NumberOfDevices); DeviceFineGrainedMemoryPools.resize(NumberOfDevices); - err = setupDevicePools(HSAAgents); - if (err != HSA_STATUS_SUCCESS) { + Err = setupDevicePools(HSAAgents); + if (Err != HSA_STATUS_SUCCESS) { DP("Setup for Device Memory Pools failed\n"); return; } - err = setupHostMemoryPools(CPUAgents); - if (err != HSA_STATUS_SUCCESS) { + Err = setupHostMemoryPools(CPUAgents); + if (Err != HSA_STATUS_SUCCESS) { DP("Setup for Host Memory Pools failed\n"); return; } - for (int i = 0; i < NumberOfDevices; i++) { - uint32_t queue_size = 0; + for (int I = 0; I < NumberOfDevices; I++) { + uint32_t QueueSize = 0; { - hsa_status_t err = hsa_agent_get_info( - HSAAgents[i], HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_size); - if (err != HSA_STATUS_SUCCESS) { - DP("HSA query QUEUE_MAX_SIZE failed for agent %d\n", i); + hsa_status_t Err = hsa_agent_get_info( + HSAAgents[I], HSA_AGENT_INFO_QUEUE_MAX_SIZE, &QueueSize); + if (Err != HSA_STATUS_SUCCESS) { + DP("HSA query QUEUE_MAX_SIZE failed for agent %d\n", I); return; } enum { MaxQueueSize = 4096 }; - if (queue_size > MaxQueueSize) { - queue_size = MaxQueueSize; + if (QueueSize > MaxQueueSize) { + QueueSize = MaxQueueSize; } } { HSAQueueScheduler QSched; - if (!QSched.CreateQueues(HSAAgents[i], queue_size)) + if (!QSched.createQueues(HSAAgents[I], QueueSize)) return; HSAQueueSchedulers.emplace_back(std::move(QSched)); } - deviceStateStore[i] = {nullptr, 0}; + DeviceStateStore[I] = {nullptr, 0}; } - for (int i = 0; i < NumberOfDevices; i++) { - ThreadsPerGroup[i] = RTLDeviceInfoTy::Default_WG_Size; - GroupsPerDevice[i] = RTLDeviceInfoTy::DefaultNumTeams; - ComputeUnits[i] = 1; - DP("Device %d: Initial groupsPerDevice %d & threadsPerGroup %d\n", i, - GroupsPerDevice[i], ThreadsPerGroup[i]); + for (int I = 0; I < NumberOfDevices; I++) { + ThreadsPerGroup[I] = RTLDeviceInfoTy::DefaultWgSize; + GroupsPerDevice[I] = RTLDeviceInfoTy::DefaultNumTeams; + ComputeUnits[I] = 1; + DP("Device %d: Initial groupsPerDevice %d & threadsPerGroup %d\n", I, + GroupsPerDevice[I], ThreadsPerGroup[I]); } // Get environment variables regarding teams @@ -1092,7 +1090,7 @@ } // Run destructors on types that use HSA before // impl_finalize removes access to it - deviceStateStore.clear(); + DeviceStateStore.clear(); KernelArgPoolMap.clear(); // Terminate hostrpc before finalizing hsa hostrpc_terminate(); @@ -1121,15 +1119,15 @@ // Return success if we are not copying back to host from target. if (!HstPtr) return OFFLOAD_SUCCESS; - hsa_status_t err; + hsa_status_t Err; DP("Retrieve data %ld bytes, (tgt:%016llx) -> (hst:%016llx).\n", Size, (long long unsigned)(Elf64_Addr)TgtPtr, (long long unsigned)(Elf64_Addr)HstPtr); - err = DeviceInfo.freesignalpool_memcpy_d2h(HstPtr, TgtPtr, (size_t)Size, - DeviceId); + Err = DeviceInfo.freesignalpoolMemcpyD2H(HstPtr, TgtPtr, (size_t)Size, + DeviceId); - if (err != HSA_STATUS_SUCCESS) { + if (Err != HSA_STATUS_SUCCESS) { DP("Error when copying data from device to host. Pointers: " "host = 0x%016lx, device = 0x%016lx, size = %lld\n", (Elf64_Addr)HstPtr, (Elf64_Addr)TgtPtr, (unsigned long long)Size); @@ -1144,7 +1142,7 @@ int32_t dataSubmit(int32_t DeviceId, void *TgtPtr, void *HstPtr, int64_t Size, __tgt_async_info *AsyncInfo) { assert(AsyncInfo && "AsyncInfo is nullptr"); - hsa_status_t err; + hsa_status_t Err; assert(DeviceId < DeviceInfo.NumberOfDevices && "Device ID too large"); // Return success if we are not doing host to target. if (!HstPtr) @@ -1153,9 +1151,9 @@ DP("Submit data %ld bytes, (hst:%016llx) -> (tgt:%016llx).\n", Size, (long long unsigned)(Elf64_Addr)HstPtr, (long long unsigned)(Elf64_Addr)TgtPtr); - err = DeviceInfo.freesignalpool_memcpy_h2d(TgtPtr, HstPtr, (size_t)Size, - DeviceId); - if (err != HSA_STATUS_SUCCESS) { + Err = DeviceInfo.freesignalpoolMemcpyH2D(TgtPtr, HstPtr, (size_t)Size, + DeviceId); + if (Err != HSA_STATUS_SUCCESS) { DP("Error when copying data from host to device. Pointers: " "host = 0x%016lx, device = 0x%016lx, size = %lld\n", (Elf64_Addr)HstPtr, (Elf64_Addr)TgtPtr, (unsigned long long)Size); @@ -1193,70 +1191,69 @@ } // Determine launch values for kernel. -struct launchVals { +struct LaunchVals { int WorkgroupSize; int GridSize; }; -launchVals getLaunchVals(int WarpSize, EnvironmentVariables Env, +LaunchVals getLaunchVals(int WarpSize, EnvironmentVariables Env, int ConstWGSize, llvm::omp::OMPTgtExecModeFlags ExecutionMode, - int num_teams, int thread_limit, - uint64_t loop_tripcount, int DeviceNumTeams) { + int NumTeams, int ThreadLimit, uint64_t LoopTripcount, + int DeviceNumTeams) { - int threadsPerGroup = RTLDeviceInfoTy::Default_WG_Size; - int num_groups = 0; + int ThreadsPerGroup = RTLDeviceInfoTy::DefaultWgSize; + int NumGroups = 0; - int Max_Teams = - Env.MaxTeamsDefault > 0 ? Env.MaxTeamsDefault : DeviceNumTeams; - if (Max_Teams > RTLDeviceInfoTy::HardTeamLimit) - Max_Teams = RTLDeviceInfoTy::HardTeamLimit; + int MaxTeams = Env.MaxTeamsDefault > 0 ? Env.MaxTeamsDefault : DeviceNumTeams; + if (MaxTeams > RTLDeviceInfoTy::HardTeamLimit) + MaxTeams = RTLDeviceInfoTy::HardTeamLimit; if (print_kernel_trace & STARTUP_DETAILS) { - DP("RTLDeviceInfoTy::Max_Teams: %d\n", RTLDeviceInfoTy::Max_Teams); - DP("Max_Teams: %d\n", Max_Teams); + DP("RTLDeviceInfoTy::Max_Teams: %d\n", RTLDeviceInfoTy::MaxTeams); + DP("Max_Teams: %d\n", MaxTeams); DP("RTLDeviceInfoTy::Warp_Size: %d\n", WarpSize); - DP("RTLDeviceInfoTy::Max_WG_Size: %d\n", RTLDeviceInfoTy::Max_WG_Size); + DP("RTLDeviceInfoTy::Max_WG_Size: %d\n", RTLDeviceInfoTy::MaxWgSize); DP("RTLDeviceInfoTy::Default_WG_Size: %d\n", - RTLDeviceInfoTy::Default_WG_Size); - DP("thread_limit: %d\n", thread_limit); - DP("threadsPerGroup: %d\n", threadsPerGroup); + RTLDeviceInfoTy::DefaultWgSize); + DP("thread_limit: %d\n", ThreadLimit); + DP("threadsPerGroup: %d\n", ThreadsPerGroup); DP("ConstWGSize: %d\n", ConstWGSize); } // check for thread_limit() clause - if (thread_limit > 0) { - threadsPerGroup = thread_limit; - DP("Setting threads per block to requested %d\n", thread_limit); + if (ThreadLimit > 0) { + ThreadsPerGroup = ThreadLimit; + DP("Setting threads per block to requested %d\n", ThreadLimit); // Add master warp for GENERIC if (ExecutionMode == llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_GENERIC) { - threadsPerGroup += WarpSize; + ThreadsPerGroup += WarpSize; DP("Adding master wavefront: +%d threads\n", WarpSize); } - if (threadsPerGroup > RTLDeviceInfoTy::Max_WG_Size) { // limit to max - threadsPerGroup = RTLDeviceInfoTy::Max_WG_Size; - DP("Setting threads per block to maximum %d\n", threadsPerGroup); + if (ThreadsPerGroup > RTLDeviceInfoTy::MaxWgSize) { // limit to max + ThreadsPerGroup = RTLDeviceInfoTy::MaxWgSize; + DP("Setting threads per block to maximum %d\n", ThreadsPerGroup); } } // check flat_max_work_group_size attr here - if (threadsPerGroup > ConstWGSize) { - threadsPerGroup = ConstWGSize; + if (ThreadsPerGroup > ConstWGSize) { + ThreadsPerGroup = ConstWGSize; DP("Reduced threadsPerGroup to flat-attr-group-size limit %d\n", - threadsPerGroup); + ThreadsPerGroup); } if (print_kernel_trace & STARTUP_DETAILS) - DP("threadsPerGroup: %d\n", threadsPerGroup); - DP("Preparing %d threads\n", threadsPerGroup); + DP("threadsPerGroup: %d\n", ThreadsPerGroup); + DP("Preparing %d threads\n", ThreadsPerGroup); // Set default num_groups (teams) if (Env.TeamLimit > 0) - num_groups = (Max_Teams < Env.TeamLimit) ? Max_Teams : Env.TeamLimit; + NumGroups = (MaxTeams < Env.TeamLimit) ? MaxTeams : Env.TeamLimit; else - num_groups = Max_Teams; - DP("Set default num of groups %d\n", num_groups); + NumGroups = MaxTeams; + DP("Set default num of groups %d\n", NumGroups); if (print_kernel_trace & STARTUP_DETAILS) { - DP("num_groups: %d\n", num_groups); - DP("num_teams: %d\n", num_teams); + DP("num_groups: %d\n", NumGroups); + DP("num_teams: %d\n", NumTeams); } // Reduce num_groups if threadsPerGroup exceeds RTLDeviceInfoTy::Max_WG_Size @@ -1264,240 +1261,237 @@ // or when user goes crazy with num_teams clause. // FIXME: We cant distinguish between a constant or variable thread limit. // So we only handle constant thread_limits. - if (threadsPerGroup > - RTLDeviceInfoTy::Default_WG_Size) // 256 < threadsPerGroup <= 1024 + if (ThreadsPerGroup > + RTLDeviceInfoTy::DefaultWgSize) // 256 < threadsPerGroup <= 1024 // Should we round threadsPerGroup up to nearest WarpSize // here? - num_groups = (Max_Teams * RTLDeviceInfoTy::Max_WG_Size) / threadsPerGroup; + NumGroups = (MaxTeams * RTLDeviceInfoTy::MaxWgSize) / ThreadsPerGroup; // check for num_teams() clause - if (num_teams > 0) { - num_groups = (num_teams < num_groups) ? num_teams : num_groups; + if (NumTeams > 0) { + NumGroups = (NumTeams < NumGroups) ? NumTeams : NumGroups; } if (print_kernel_trace & STARTUP_DETAILS) { - DP("num_groups: %d\n", num_groups); + DP("num_groups: %d\n", NumGroups); DP("Env.NumTeams %d\n", Env.NumTeams); DP("Env.TeamLimit %d\n", Env.TeamLimit); } if (Env.NumTeams > 0) { - num_groups = (Env.NumTeams < num_groups) ? Env.NumTeams : num_groups; + NumGroups = (Env.NumTeams < NumGroups) ? Env.NumTeams : NumGroups; DP("Modifying teams based on Env.NumTeams %d\n", Env.NumTeams); } else if (Env.TeamLimit > 0) { - num_groups = (Env.TeamLimit < num_groups) ? Env.TeamLimit : num_groups; + NumGroups = (Env.TeamLimit < NumGroups) ? Env.TeamLimit : NumGroups; DP("Modifying teams based on Env.TeamLimit%d\n", Env.TeamLimit); } else { - if (num_teams <= 0) { - if (loop_tripcount > 0) { + if (NumTeams <= 0) { + if (LoopTripcount > 0) { if (ExecutionMode == llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_SPMD) { // round up to the nearest integer - num_groups = ((loop_tripcount - 1) / threadsPerGroup) + 1; + NumGroups = ((LoopTripcount - 1) / ThreadsPerGroup) + 1; } else if (ExecutionMode == llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_GENERIC) { - num_groups = loop_tripcount; + NumGroups = LoopTripcount; } else /* OMP_TGT_EXEC_MODE_GENERIC_SPMD */ { // This is a generic kernel that was transformed to use SPMD-mode // execution but uses Generic-mode semantics for scheduling. - num_groups = loop_tripcount; + NumGroups = LoopTripcount; } DP("Using %d teams due to loop trip count %" PRIu64 " and number of " "threads per block %d\n", - num_groups, loop_tripcount, threadsPerGroup); + NumGroups, LoopTripcount, ThreadsPerGroup); } } else { - num_groups = num_teams; + NumGroups = NumTeams; } - if (num_groups > Max_Teams) { - num_groups = Max_Teams; + if (NumGroups > MaxTeams) { + NumGroups = MaxTeams; if (print_kernel_trace & STARTUP_DETAILS) - DP("Limiting num_groups %d to Max_Teams %d \n", num_groups, Max_Teams); + DP("Limiting num_groups %d to Max_Teams %d \n", NumGroups, MaxTeams); } - if (num_groups > num_teams && num_teams > 0) { - num_groups = num_teams; + if (NumGroups > NumTeams && NumTeams > 0) { + NumGroups = NumTeams; if (print_kernel_trace & STARTUP_DETAILS) - DP("Limiting num_groups %d to clause num_teams %d \n", num_groups, - num_teams); + DP("Limiting num_groups %d to clause num_teams %d \n", NumGroups, + NumTeams); } } // num_teams clause always honored, no matter what, unless DEFAULT is active. - if (num_teams > 0) { - num_groups = num_teams; + if (NumTeams > 0) { + NumGroups = NumTeams; // Cap num_groups to EnvMaxTeamsDefault if set. - if (Env.MaxTeamsDefault > 0 && num_groups > Env.MaxTeamsDefault) - num_groups = Env.MaxTeamsDefault; + if (Env.MaxTeamsDefault > 0 && NumGroups > Env.MaxTeamsDefault) + NumGroups = Env.MaxTeamsDefault; } if (print_kernel_trace & STARTUP_DETAILS) { - DP("threadsPerGroup: %d\n", threadsPerGroup); - DP("num_groups: %d\n", num_groups); - DP("loop_tripcount: %ld\n", loop_tripcount); + DP("threadsPerGroup: %d\n", ThreadsPerGroup); + DP("num_groups: %d\n", NumGroups); + DP("loop_tripcount: %ld\n", LoopTripcount); } - DP("Final %d num_groups and %d threadsPerGroup\n", num_groups, - threadsPerGroup); + DP("Final %d num_groups and %d threadsPerGroup\n", NumGroups, + ThreadsPerGroup); - launchVals res; - res.WorkgroupSize = threadsPerGroup; - res.GridSize = threadsPerGroup * num_groups; - return res; + LaunchVals Res; + Res.WorkgroupSize = ThreadsPerGroup; + Res.GridSize = ThreadsPerGroup * NumGroups; + return Res; } -static uint64_t acquire_available_packet_id(hsa_queue_t *queue) { - uint64_t packet_id = hsa_queue_add_write_index_relaxed(queue, 1); - bool full = true; - while (full) { - full = - packet_id >= (queue->size + hsa_queue_load_read_index_scacquire(queue)); +static uint64_t acquireAvailablePacketId(hsa_queue_t *Queue) { + uint64_t PacketId = hsa_queue_add_write_index_relaxed(Queue, 1); + bool Full = true; + while (Full) { + Full = + PacketId >= (Queue->size + hsa_queue_load_read_index_scacquire(Queue)); } - return packet_id; + return PacketId; } -int32_t runRegionLocked(int32_t device_id, void *tgt_entry_ptr, void **tgt_args, - ptrdiff_t *tgt_offsets, int32_t arg_num, - int32_t num_teams, int32_t thread_limit, - uint64_t loop_tripcount) { +int32_t runRegionLocked(int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, + ptrdiff_t *TgtOffsets, int32_t ArgNum, int32_t NumTeams, + int32_t ThreadLimit, uint64_t LoopTripcount) { // Set the context we are using // update thread limit content in gpu memory if un-initialized or specified // from host - DP("Run target team region thread_limit %d\n", thread_limit); + DP("Run target team region thread_limit %d\n", ThreadLimit); // All args are references. - std::vector args(arg_num); - std::vector ptrs(arg_num); + std::vector Args(ArgNum); + std::vector Ptrs(ArgNum); - DP("Arg_num: %d\n", arg_num); - for (int32_t i = 0; i < arg_num; ++i) { - ptrs[i] = (void *)((intptr_t)tgt_args[i] + tgt_offsets[i]); - args[i] = &ptrs[i]; - DP("Offseted base: arg[%d]:" DPxMOD "\n", i, DPxPTR(ptrs[i])); + DP("Arg_num: %d\n", ArgNum); + for (int32_t I = 0; I < ArgNum; ++I) { + Ptrs[I] = (void *)((intptr_t)TgtArgs[I] + TgtOffsets[I]); + Args[I] = &Ptrs[I]; + DP("Offseted base: arg[%d]:" DPxMOD "\n", I, DPxPTR(Ptrs[I])); } - KernelTy *KernelInfo = (KernelTy *)tgt_entry_ptr; + KernelTy *KernelInfo = (KernelTy *)TgtEntryPtr; - std::string kernel_name = std::string(KernelInfo->Name); + std::string KernelName = std::string(KernelInfo->Name); auto &KernelInfoTable = DeviceInfo.KernelInfoTable; - if (KernelInfoTable[device_id].find(kernel_name) == - KernelInfoTable[device_id].end()) { - DP("Kernel %s not found\n", kernel_name.c_str()); + if (KernelInfoTable[DeviceId].find(KernelName) == + KernelInfoTable[DeviceId].end()) { + DP("Kernel %s not found\n", KernelName.c_str()); return OFFLOAD_FAIL; } const atl_kernel_info_t KernelInfoEntry = - KernelInfoTable[device_id][kernel_name]; - const uint32_t group_segment_size = + KernelInfoTable[DeviceId][KernelName]; + const uint32_t GroupSegmentSize = KernelInfoEntry.group_segment_size + DeviceInfo.Env.DynamicMemSize; - const uint32_t sgpr_count = KernelInfoEntry.sgpr_count; - const uint32_t vgpr_count = KernelInfoEntry.vgpr_count; - const uint32_t sgpr_spill_count = KernelInfoEntry.sgpr_spill_count; - const uint32_t vgpr_spill_count = KernelInfoEntry.vgpr_spill_count; + const uint32_t SgprCount = KernelInfoEntry.sgpr_count; + const uint32_t VgprCount = KernelInfoEntry.vgpr_count; + const uint32_t SgprSpillCount = KernelInfoEntry.sgpr_spill_count; + const uint32_t VgprSpillCount = KernelInfoEntry.vgpr_spill_count; - assert(arg_num == (int)KernelInfoEntry.explicit_argument_count); + assert(ArgNum == (int)KernelInfoEntry.explicit_argument_count); /* * Set limit based on ThreadsPerGroup and GroupsPerDevice */ - launchVals LV = - getLaunchVals(DeviceInfo.WarpSize[device_id], DeviceInfo.Env, + LaunchVals LV = + getLaunchVals(DeviceInfo.WarpSize[DeviceId], DeviceInfo.Env, KernelInfo->ConstWGSize, KernelInfo->ExecutionMode, - num_teams, // From run_region arg - thread_limit, // From run_region arg - loop_tripcount, // From run_region arg - DeviceInfo.NumTeams[KernelInfo->device_id]); + NumTeams, // From run_region arg + ThreadLimit, // From run_region arg + LoopTripcount, // From run_region arg + DeviceInfo.NumTeams[KernelInfo->DeviceId]); const int GridSize = LV.GridSize; const int WorkgroupSize = LV.WorkgroupSize; if (print_kernel_trace >= LAUNCH) { - int num_groups = GridSize / WorkgroupSize; + int NumGroups = GridSize / WorkgroupSize; // enum modes are SPMD, GENERIC, NONE 0,1,2 // if doing rtl timing, print to stderr, unless stdout requested. - bool traceToStdout = print_kernel_trace & (RTL_TO_STDOUT | RTL_TIMING); - fprintf(traceToStdout ? stdout : stderr, + bool TraceToStdout = print_kernel_trace & (RTL_TO_STDOUT | RTL_TIMING); + fprintf(TraceToStdout ? stdout : stderr, "DEVID:%2d SGN:%1d ConstWGSize:%-4d args:%2d teamsXthrds:(%4dX%4d) " "reqd:(%4dX%4d) lds_usage:%uB sgpr_count:%u vgpr_count:%u " "sgpr_spill_count:%u vgpr_spill_count:%u tripcount:%lu n:%s\n", - device_id, KernelInfo->ExecutionMode, KernelInfo->ConstWGSize, - arg_num, num_groups, WorkgroupSize, num_teams, thread_limit, - group_segment_size, sgpr_count, vgpr_count, sgpr_spill_count, - vgpr_spill_count, loop_tripcount, KernelInfo->Name); + DeviceId, KernelInfo->ExecutionMode, KernelInfo->ConstWGSize, + ArgNum, NumGroups, WorkgroupSize, NumTeams, ThreadLimit, + GroupSegmentSize, SgprCount, VgprCount, SgprSpillCount, + VgprSpillCount, LoopTripcount, KernelInfo->Name); } // Run on the device. { - hsa_queue_t *queue = DeviceInfo.HSAQueueSchedulers[device_id].Next(); - if (!queue) { + hsa_queue_t *Queue = DeviceInfo.HSAQueueSchedulers[DeviceId].next(); + if (!Queue) { return OFFLOAD_FAIL; } - uint64_t packet_id = acquire_available_packet_id(queue); + uint64_t PacketId = acquireAvailablePacketId(Queue); - const uint32_t mask = queue->size - 1; // size is a power of 2 - hsa_kernel_dispatch_packet_t *packet = - (hsa_kernel_dispatch_packet_t *)queue->base_address + - (packet_id & mask); + const uint32_t Mask = Queue->size - 1; // size is a power of 2 + hsa_kernel_dispatch_packet_t *Packet = + (hsa_kernel_dispatch_packet_t *)Queue->base_address + (PacketId & Mask); // packet->header is written last - packet->setup = UINT16_C(1) << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; - packet->workgroup_size_x = WorkgroupSize; - packet->workgroup_size_y = 1; - packet->workgroup_size_z = 1; - packet->reserved0 = 0; - packet->grid_size_x = GridSize; - packet->grid_size_y = 1; - packet->grid_size_z = 1; - packet->private_segment_size = KernelInfoEntry.private_segment_size; - packet->group_segment_size = group_segment_size; - packet->kernel_object = KernelInfoEntry.kernel_object; - packet->kernarg_address = 0; // use the block allocator - packet->reserved2 = 0; // impl writes id_ here - packet->completion_signal = {0}; // may want a pool of signals + Packet->setup = UINT16_C(1) << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS; + Packet->workgroup_size_x = WorkgroupSize; + Packet->workgroup_size_y = 1; + Packet->workgroup_size_z = 1; + Packet->reserved0 = 0; + Packet->grid_size_x = GridSize; + Packet->grid_size_y = 1; + Packet->grid_size_z = 1; + Packet->private_segment_size = KernelInfoEntry.private_segment_size; + Packet->group_segment_size = GroupSegmentSize; + Packet->kernel_object = KernelInfoEntry.kernel_object; + Packet->kernarg_address = 0; // use the block allocator + Packet->reserved2 = 0; // impl writes id_ here + Packet->completion_signal = {0}; // may want a pool of signals KernelArgPool *ArgPool = nullptr; - void *kernarg = nullptr; + void *KernArg = nullptr; { - auto it = KernelArgPoolMap.find(std::string(KernelInfo->Name)); - if (it != KernelArgPoolMap.end()) { - ArgPool = (it->second).get(); + auto It = KernelArgPoolMap.find(std::string(KernelInfo->Name)); + if (It != KernelArgPoolMap.end()) { + ArgPool = (It->second).get(); } } if (!ArgPool) { DP("Warning: No ArgPool for %s on device %d\n", KernelInfo->Name, - device_id); + DeviceId); } { if (ArgPool) { - assert(ArgPool->kernarg_segment_size == (arg_num * sizeof(void *))); - kernarg = ArgPool->allocate(arg_num); + assert(ArgPool->KernargSegmentSize == (ArgNum * sizeof(void *))); + KernArg = ArgPool->allocate(ArgNum); } - if (!kernarg) { + if (!KernArg) { DP("Allocate kernarg failed\n"); return OFFLOAD_FAIL; } // Copy explicit arguments - for (int i = 0; i < arg_num; i++) { - memcpy((char *)kernarg + sizeof(void *) * i, args[i], sizeof(void *)); + for (int I = 0; I < ArgNum; I++) { + memcpy((char *)KernArg + sizeof(void *) * I, Args[I], sizeof(void *)); } // Initialize implicit arguments. TODO: Which of these can be dropped - impl_implicit_args_t *impl_args = - reinterpret_cast( - static_cast(kernarg) + ArgPool->kernarg_segment_size); - memset(impl_args, 0, + impl_implicit_args_t *ImplArgs = reinterpret_cast( + static_cast(KernArg) + ArgPool->KernargSegmentSize); + memset(ImplArgs, 0, sizeof(impl_implicit_args_t)); // may not be necessary - impl_args->offset_x = 0; - impl_args->offset_y = 0; - impl_args->offset_z = 0; + ImplArgs->offset_x = 0; + ImplArgs->offset_y = 0; + ImplArgs->offset_z = 0; // assign a hostcall buffer for the selected Q - if (__atomic_load_n(&DeviceInfo.hostcall_required, __ATOMIC_ACQUIRE)) { + if (__atomic_load_n(&DeviceInfo.HostcallRequired, __ATOMIC_ACQUIRE)) { // hostrpc_assign_buffer is not thread safe, and this function is // under a multiple reader lock, not a writer lock. - static pthread_mutex_t hostcall_init_lock = PTHREAD_MUTEX_INITIALIZER; - pthread_mutex_lock(&hostcall_init_lock); - uint64_t buffer = hostrpc_assign_buffer(DeviceInfo.HSAAgents[device_id], - queue, device_id); - pthread_mutex_unlock(&hostcall_init_lock); - if (!buffer) { + static pthread_mutex_t HostcallInitLock = PTHREAD_MUTEX_INITIALIZER; + pthread_mutex_lock(&HostcallInitLock); + uint64_t Buffer = hostrpc_assign_buffer(DeviceInfo.HSAAgents[DeviceId], + Queue, DeviceId); + pthread_mutex_unlock(&HostcallInitLock); + if (!Buffer) { DP("hostrpc_assign_buffer failed, gpu would dereference null and " "error\n"); return OFFLOAD_FAIL; @@ -1512,109 +1506,109 @@ // the offset from msgpack. Clang is not annotating it at present. uint64_t Offset = sizeof(void *) * (KernelInfoEntry.explicit_argument_count + 3); - if ((Offset + 8) > ArgPool->kernarg_size_including_implicit()) { + if ((Offset + 8) > ArgPool->kernargSizeIncludingImplicit()) { DP("Bad offset of hostcall: %lu, exceeds kernarg size w/ implicit " "args: %d\n", - Offset + 8, ArgPool->kernarg_size_including_implicit()); + Offset + 8, ArgPool->kernargSizeIncludingImplicit()); } else { - memcpy(static_cast(kernarg) + Offset, &buffer, 8); + memcpy(static_cast(KernArg) + Offset, &Buffer, 8); } } // initialise pointer for implicit_argument_count == 0 ABI - impl_args->hostcall_ptr = buffer; + ImplArgs->hostcall_ptr = Buffer; } - packet->kernarg_address = kernarg; + Packet->kernarg_address = KernArg; } - hsa_signal_t s = DeviceInfo.FreeSignalPool.pop(); - if (s.handle == 0) { + hsa_signal_t S = DeviceInfo.FreeSignalPool.pop(); + if (S.handle == 0) { DP("Failed to get signal instance\n"); return OFFLOAD_FAIL; } - packet->completion_signal = s; - hsa_signal_store_relaxed(packet->completion_signal, 1); + Packet->completion_signal = S; + hsa_signal_store_relaxed(Packet->completion_signal, 1); // Publish the packet indicating it is ready to be processed - core::packet_store_release(reinterpret_cast(packet), - core::create_header(), packet->setup); + core::packetStoreRelease(reinterpret_cast(Packet), + core::createHeader(), Packet->setup); // Since the packet is already published, its contents must not be // accessed any more - hsa_signal_store_relaxed(queue->doorbell_signal, packet_id); + hsa_signal_store_relaxed(Queue->doorbell_signal, PacketId); - while (hsa_signal_wait_scacquire(s, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, + while (hsa_signal_wait_scacquire(S, HSA_SIGNAL_CONDITION_EQ, 0, UINT64_MAX, HSA_WAIT_STATE_BLOCKED) != 0) ; assert(ArgPool); - ArgPool->deallocate(kernarg); - DeviceInfo.FreeSignalPool.push(s); + ArgPool->deallocate(KernArg); + DeviceInfo.FreeSignalPool.push(S); } DP("Kernel completed\n"); return OFFLOAD_SUCCESS; } -bool elf_machine_id_is_amdgcn(__tgt_device_image *image) { - const uint16_t amdgcnMachineID = 224; // EM_AMDGPU may not be in system elf.h - int32_t r = elf_check_machine(image, amdgcnMachineID); - if (!r) { +bool elfMachineIdIsAmdgcn(__tgt_device_image *Image) { + const uint16_t AmdgcnMachineID = 224; // EM_AMDGPU may not be in system elf.h + int32_t R = elf_check_machine(Image, AmdgcnMachineID); + if (!R) { DP("Supported machine ID not found\n"); } - return r; + return R; } -uint32_t elf_e_flags(__tgt_device_image *image) { - char *img_begin = (char *)image->ImageStart; - size_t img_size = (char *)image->ImageEnd - img_begin; +uint32_t elfEFlags(__tgt_device_image *Image) { + char *ImgBegin = (char *)Image->ImageStart; + size_t ImgSize = (char *)Image->ImageEnd - ImgBegin; - Elf *e = elf_memory(img_begin, img_size); - if (!e) { + Elf *E = elf_memory(ImgBegin, ImgSize); + if (!E) { DP("Unable to get ELF handle: %s!\n", elf_errmsg(-1)); return 0; } - Elf64_Ehdr *eh64 = elf64_getehdr(e); + Elf64_Ehdr *Eh64 = elf64_getehdr(E); - if (!eh64) { + if (!Eh64) { DP("Unable to get machine ID from ELF file!\n"); - elf_end(e); + elf_end(E); return 0; } - uint32_t Flags = eh64->e_flags; + uint32_t Flags = Eh64->e_flags; - elf_end(e); + elf_end(E); DP("ELF Flags: 0x%x\n", Flags); return Flags; } -template bool enforce_upper_bound(T *value, T upper) { - bool changed = *value > upper; - if (changed) { - *value = upper; +template bool enforceUpperBound(T *Value, T Upper) { + bool Changed = *Value > Upper; + if (Changed) { + *Value = Upper; } - return changed; + return Changed; } -Elf64_Shdr *find_only_SHT_HASH(Elf *elf) { +Elf64_Shdr *findOnlyShtHash(Elf *Elf) { size_t N; - int rc = elf_getshdrnum(elf, &N); - if (rc != 0) { + int Rc = elf_getshdrnum(Elf, &N); + if (Rc != 0) { return nullptr; } - Elf64_Shdr *result = nullptr; - for (size_t i = 0; i < N; i++) { - Elf_Scn *scn = elf_getscn(elf, i); - if (scn) { - Elf64_Shdr *shdr = elf64_getshdr(scn); - if (shdr) { - if (shdr->sh_type == SHT_HASH) { - if (result == nullptr) { - result = shdr; + Elf64_Shdr *Result = nullptr; + for (size_t I = 0; I < N; I++) { + Elf_Scn *Scn = elf_getscn(Elf, I); + if (Scn) { + Elf64_Shdr *Shdr = elf64_getshdr(Scn); + if (Shdr) { + if (Shdr->sh_type == SHT_HASH) { + if (Result == nullptr) { + Result = Shdr; } else { // multiple SHT_HASH sections not handled return nullptr; @@ -1623,154 +1617,153 @@ } } } - return result; + return Result; } -const Elf64_Sym *elf_lookup(Elf *elf, char *base, Elf64_Shdr *section_hash, - const char *symname) { +const Elf64_Sym *elfLookup(Elf *Elf, char *Base, Elf64_Shdr *SectionHash, + const char *Symname) { - assert(section_hash); - size_t section_symtab_index = section_hash->sh_link; - Elf64_Shdr *section_symtab = - elf64_getshdr(elf_getscn(elf, section_symtab_index)); - size_t section_strtab_index = section_symtab->sh_link; + assert(SectionHash); + size_t SectionSymtabIndex = SectionHash->sh_link; + Elf64_Shdr *SectionSymtab = + elf64_getshdr(elf_getscn(Elf, SectionSymtabIndex)); + size_t SectionStrtabIndex = SectionSymtab->sh_link; - const Elf64_Sym *symtab = - reinterpret_cast(base + section_symtab->sh_offset); + const Elf64_Sym *Symtab = + reinterpret_cast(Base + SectionSymtab->sh_offset); - const uint32_t *hashtab = - reinterpret_cast(base + section_hash->sh_offset); + const uint32_t *Hashtab = + reinterpret_cast(Base + SectionHash->sh_offset); // Layout: // nbucket // nchain // bucket[nbucket] // chain[nchain] - uint32_t nbucket = hashtab[0]; - const uint32_t *bucket = &hashtab[2]; - const uint32_t *chain = &hashtab[nbucket + 2]; - - const size_t max = strlen(symname) + 1; - const uint32_t hash = elf_hash(symname); - for (uint32_t i = bucket[hash % nbucket]; i != 0; i = chain[i]) { - char *n = elf_strptr(elf, section_strtab_index, symtab[i].st_name); - if (strncmp(symname, n, max) == 0) { - return &symtab[i]; + uint32_t Nbucket = Hashtab[0]; + const uint32_t *Bucket = &Hashtab[2]; + const uint32_t *Chain = &Hashtab[Nbucket + 2]; + + const size_t Max = strlen(Symname) + 1; + const uint32_t Hash = elf_hash(Symname); + for (uint32_t I = Bucket[Hash % Nbucket]; I != 0; I = Chain[I]) { + char *N = elf_strptr(Elf, SectionStrtabIndex, Symtab[I].st_name); + if (strncmp(Symname, N, Max) == 0) { + return &Symtab[I]; } } return nullptr; } -struct symbol_info { - void *addr = nullptr; - uint32_t size = UINT32_MAX; - uint32_t sh_type = SHT_NULL; +struct SymbolInfo { + void *Addr = nullptr; + uint32_t Size = UINT32_MAX; + uint32_t ShType = SHT_NULL; }; -int get_symbol_info_without_loading(Elf *elf, char *base, const char *symname, - symbol_info *res) { - if (elf_kind(elf) != ELF_K_ELF) { +int getSymbolInfoWithoutLoading(Elf *Elf, char *Base, const char *Symname, + SymbolInfo *Res) { + if (elf_kind(Elf) != ELF_K_ELF) { return 1; } - Elf64_Shdr *section_hash = find_only_SHT_HASH(elf); - if (!section_hash) { + Elf64_Shdr *SectionHash = findOnlyShtHash(Elf); + if (!SectionHash) { return 1; } - const Elf64_Sym *sym = elf_lookup(elf, base, section_hash, symname); - if (!sym) { + const Elf64_Sym *Sym = elfLookup(Elf, Base, SectionHash, Symname); + if (!Sym) { return 1; } - if (sym->st_size > UINT32_MAX) { + if (Sym->st_size > UINT32_MAX) { return 1; } - if (sym->st_shndx == SHN_UNDEF) { + if (Sym->st_shndx == SHN_UNDEF) { return 1; } - Elf_Scn *section = elf_getscn(elf, sym->st_shndx); - if (!section) { + Elf_Scn *Section = elf_getscn(Elf, Sym->st_shndx); + if (!Section) { return 1; } - Elf64_Shdr *header = elf64_getshdr(section); - if (!header) { + Elf64_Shdr *Header = elf64_getshdr(Section); + if (!Header) { return 1; } - res->addr = sym->st_value + base; - res->size = static_cast(sym->st_size); - res->sh_type = header->sh_type; + Res->Addr = Sym->st_value + Base; + Res->Size = static_cast(Sym->st_size); + Res->ShType = Header->sh_type; return 0; } -int get_symbol_info_without_loading(char *base, size_t img_size, - const char *symname, symbol_info *res) { - Elf *elf = elf_memory(base, img_size); - if (elf) { - int rc = get_symbol_info_without_loading(elf, base, symname, res); - elf_end(elf); - return rc; +int getSymbolInfoWithoutLoading(char *Base, size_t ImgSize, const char *Symname, + SymbolInfo *Res) { + Elf *Elf = elf_memory(Base, ImgSize); + if (Elf) { + int Rc = getSymbolInfoWithoutLoading(Elf, Base, Symname, Res); + elf_end(Elf); + return Rc; } return 1; } -hsa_status_t interop_get_symbol_info(char *base, size_t img_size, - const char *symname, void **var_addr, - uint32_t *var_size) { - symbol_info si; - int rc = get_symbol_info_without_loading(base, img_size, symname, &si); - if (rc == 0) { - *var_addr = si.addr; - *var_size = si.size; +hsa_status_t interopGetSymbolInfo(char *Base, size_t ImgSize, + const char *SymName, void **VarAddr, + uint32_t *VarSize) { + SymbolInfo SI; + int Rc = getSymbolInfoWithoutLoading(Base, ImgSize, SymName, &SI); + if (Rc == 0) { + *VarAddr = SI.Addr; + *VarSize = SI.Size; return HSA_STATUS_SUCCESS; - } else { - return HSA_STATUS_ERROR; } + return HSA_STATUS_ERROR; } template -hsa_status_t module_register_from_memory_to_place( +hsa_status_t moduleRegisterFromMemoryToPlace( std::map &KernelInfoTable, std::map &SymbolInfoTable, - void *module_bytes, size_t module_size, int DeviceId, C cb, + void *ModuleBytes, size_t ModuleSize, int DeviceId, C Cb, std::vector &HSAExecutables) { - auto L = [](void *data, size_t size, void *cb_state) -> hsa_status_t { - C *unwrapped = static_cast(cb_state); - return (*unwrapped)(data, size); + auto L = [](void *Data, size_t Size, void *CbState) -> hsa_status_t { + C *Unwrapped = static_cast(CbState); + return (*Unwrapped)(Data, Size); }; return core::RegisterModuleFromMemory( - KernelInfoTable, SymbolInfoTable, module_bytes, module_size, - DeviceInfo.HSAAgents[DeviceId], L, static_cast(&cb), + KernelInfoTable, SymbolInfoTable, ModuleBytes, ModuleSize, + DeviceInfo.HSAAgents[DeviceId], L, static_cast(&Cb), HSAExecutables); } -uint64_t get_device_State_bytes(char *ImageStart, size_t img_size) { - uint64_t device_State_bytes = 0; +uint64_t getDeviceStateBytes(char *ImageStart, size_t ImgSize) { + uint64_t DeviceStateBytes = 0; { // If this is the deviceRTL, get the state variable size - symbol_info size_si; - int rc = get_symbol_info_without_loading( - ImageStart, img_size, "omptarget_nvptx_device_State_size", &size_si); + SymbolInfo SizeSi; + int Rc = getSymbolInfoWithoutLoading( + ImageStart, ImgSize, "omptarget_nvptx_device_State_size", &SizeSi); - if (rc == 0) { - if (size_si.size != sizeof(uint64_t)) { + if (Rc == 0) { + if (SizeSi.Size != sizeof(uint64_t)) { DP("Found device_State_size variable with wrong size\n"); return 0; } // Read number of bytes directly from the elf - memcpy(&device_State_bytes, size_si.addr, sizeof(uint64_t)); + memcpy(&DeviceStateBytes, SizeSi.Addr, sizeof(uint64_t)); } } - return device_State_bytes; + return DeviceStateBytes; } -struct device_environment { +struct DeviceEnvironment { // initialise an DeviceEnvironmentTy in the deviceRTL // patches around differences in the deviceRTL between trunk, aomp, // rocmcc. Over time these differences will tend to zero and this class @@ -1786,134 +1779,132 @@ // gpu (trunk) and initialize after loading. const char *sym() { return "omptarget_device_environment"; } - DeviceEnvironmentTy host_device_env; - symbol_info si; - bool valid = false; + DeviceEnvironmentTy HostDeviceEnv; + SymbolInfo SI; + bool Valid = false; - __tgt_device_image *image; - const size_t img_size; + __tgt_device_image *Image; + const size_t ImgSize; - device_environment(int device_id, int number_devices, int dynamic_mem_size, - __tgt_device_image *image, const size_t img_size) - : image(image), img_size(img_size) { + DeviceEnvironment(int DeviceId, int NumberDevices, int DynamicMemSize, + __tgt_device_image *Image, const size_t ImgSize) + : Image(Image), ImgSize(ImgSize) { - host_device_env.NumDevices = number_devices; - host_device_env.DeviceNum = device_id; - host_device_env.DebugKind = 0; - host_device_env.DynamicMemSize = dynamic_mem_size; - if (char *envStr = getenv("LIBOMPTARGET_DEVICE_RTL_DEBUG")) { - host_device_env.DebugKind = std::stoi(envStr); - } + HostDeviceEnv.NumDevices = NumberDevices; + HostDeviceEnv.DeviceNum = DeviceId; + HostDeviceEnv.DebugKind = 0; + HostDeviceEnv.DynamicMemSize = DynamicMemSize; + if (char *EnvStr = getenv("LIBOMPTARGET_DEVICE_RTL_DEBUG")) + HostDeviceEnv.DebugKind = std::stoi(EnvStr); - int rc = get_symbol_info_without_loading((char *)image->ImageStart, - img_size, sym(), &si); - if (rc != 0) { + int Rc = getSymbolInfoWithoutLoading((char *)Image->ImageStart, ImgSize, + sym(), &SI); + if (Rc != 0) { DP("Finding global device environment '%s' - symbol missing.\n", sym()); return; } - if (si.size > sizeof(host_device_env)) { - DP("Symbol '%s' has size %u, expected at most %zu.\n", sym(), si.size, - sizeof(host_device_env)); + if (SI.Size > sizeof(HostDeviceEnv)) { + DP("Symbol '%s' has size %u, expected at most %zu.\n", sym(), SI.Size, + sizeof(HostDeviceEnv)); return; } - valid = true; + Valid = true; } - bool in_image() { return si.sh_type != SHT_NOBITS; } + bool inImage() { return SI.ShType != SHT_NOBITS; } - hsa_status_t before_loading(void *data, size_t size) { - if (valid) { - if (in_image()) { + hsa_status_t beforeLoading(void *Data, size_t Size) { + if (Valid) { + if (inImage()) { DP("Setting global device environment before load (%u bytes)\n", - si.size); - uint64_t offset = (char *)si.addr - (char *)image->ImageStart; - void *pos = (char *)data + offset; - memcpy(pos, &host_device_env, si.size); + SI.Size); + uint64_t Offset = (char *)SI.Addr - (char *)Image->ImageStart; + void *Pos = (char *)Data + Offset; + memcpy(Pos, &HostDeviceEnv, SI.Size); } } return HSA_STATUS_SUCCESS; } - hsa_status_t after_loading() { - if (valid) { - if (!in_image()) { + hsa_status_t afterLoading() { + if (Valid) { + if (!inImage()) { DP("Setting global device environment after load (%u bytes)\n", - si.size); - int device_id = host_device_env.DeviceNum; - auto &SymbolInfo = DeviceInfo.SymbolInfoTable[device_id]; - void *state_ptr; - uint32_t state_ptr_size; - hsa_status_t err = interop_hsa_get_symbol_info( - SymbolInfo, device_id, sym(), &state_ptr, &state_ptr_size); - if (err != HSA_STATUS_SUCCESS) { + SI.Size); + int DeviceId = HostDeviceEnv.DeviceNum; + auto &SymbolInfo = DeviceInfo.SymbolInfoTable[DeviceId]; + void *StatePtr; + uint32_t StatePtrSize; + hsa_status_t Err = interop_hsa_get_symbol_info( + SymbolInfo, DeviceId, sym(), &StatePtr, &StatePtrSize); + if (Err != HSA_STATUS_SUCCESS) { DP("failed to find %s in loaded image\n", sym()); - return err; + return Err; } - if (state_ptr_size != si.size) { - DP("Symbol had size %u before loading, %u after\n", state_ptr_size, - si.size); + if (StatePtrSize != SI.Size) { + DP("Symbol had size %u before loading, %u after\n", StatePtrSize, + SI.Size); return HSA_STATUS_ERROR; } - return DeviceInfo.freesignalpool_memcpy_h2d(state_ptr, &host_device_env, - state_ptr_size, device_id); + return DeviceInfo.freesignalpoolMemcpyH2D(StatePtr, &HostDeviceEnv, + StatePtrSize, DeviceId); } } return HSA_STATUS_SUCCESS; } }; -hsa_status_t impl_calloc(void **ret_ptr, size_t size, int DeviceId) { - uint64_t rounded = 4 * ((size + 3) / 4); - void *ptr; +hsa_status_t implCalloc(void **RetPtr, size_t Size, int DeviceId) { + uint64_t Rounded = 4 * ((Size + 3) / 4); + void *Ptr; hsa_amd_memory_pool_t MemoryPool = DeviceInfo.getDeviceMemoryPool(DeviceId); - hsa_status_t err = hsa_amd_memory_pool_allocate(MemoryPool, rounded, 0, &ptr); - if (err != HSA_STATUS_SUCCESS) { - return err; + hsa_status_t Err = hsa_amd_memory_pool_allocate(MemoryPool, Rounded, 0, &Ptr); + if (Err != HSA_STATUS_SUCCESS) { + return Err; } - hsa_status_t rc = hsa_amd_memory_fill(ptr, 0, rounded / 4); - if (rc != HSA_STATUS_SUCCESS) { - DP("zero fill device_state failed with %u\n", rc); - core::Runtime::Memfree(ptr); + hsa_status_t Rc = hsa_amd_memory_fill(Ptr, 0, Rounded / 4); + if (Rc != HSA_STATUS_SUCCESS) { + DP("zero fill device_state failed with %u\n", Rc); + core::Runtime::Memfree(Ptr); return HSA_STATUS_ERROR; } - *ret_ptr = ptr; + *RetPtr = Ptr; return HSA_STATUS_SUCCESS; } -bool image_contains_symbol(void *data, size_t size, const char *sym) { - symbol_info si; - int rc = get_symbol_info_without_loading((char *)data, size, sym, &si); - return (rc == 0) && (si.addr != nullptr); +bool imageContainsSymbol(void *Data, size_t Size, const char *Sym) { + SymbolInfo SI; + int Rc = getSymbolInfoWithoutLoading((char *)Data, Size, Sym, &SI); + return (Rc == 0) && (SI.Addr != nullptr); } } // namespace namespace core { -hsa_status_t allow_access_to_all_gpu_agents(void *ptr) { +hsa_status_t allow_access_to_all_gpu_agents(void *Ptr) { return hsa_amd_agents_allow_access(DeviceInfo.HSAAgents.size(), - &DeviceInfo.HSAAgents[0], NULL, ptr); + &DeviceInfo.HSAAgents[0], NULL, Ptr); } } // namespace core extern "C" { -int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *image) { - return elf_machine_id_is_amdgcn(image); +int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image) { + return elfMachineIdIsAmdgcn(Image); } int __tgt_rtl_number_of_devices() { // If the construction failed, no methods are safe to call if (DeviceInfo.ConstructionSucceeded) { return DeviceInfo.NumberOfDevices; - } else { - DP("AMDGPU plugin construction failed. Zero devices available\n"); - return 0; } + DP("AMDGPU plugin construction failed. Zero devices available\n"); + return 0; } int64_t __tgt_rtl_init_requires(int64_t RequiresFlags) { @@ -1922,109 +1913,107 @@ return RequiresFlags; } -int32_t __tgt_rtl_init_device(int device_id) { - hsa_status_t err; +int32_t __tgt_rtl_init_device(int DeviceId) { + hsa_status_t Err; // this is per device id init - DP("Initialize the device id: %d\n", device_id); + DP("Initialize the device id: %d\n", DeviceId); - hsa_agent_t agent = DeviceInfo.HSAAgents[device_id]; + hsa_agent_t Agent = DeviceInfo.HSAAgents[DeviceId]; // Get number of Compute Unit - uint32_t compute_units = 0; - err = hsa_agent_get_info( - agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, - &compute_units); - if (err != HSA_STATUS_SUCCESS) { - DeviceInfo.ComputeUnits[device_id] = 1; + uint32_t ComputeUnits = 0; + Err = hsa_agent_get_info( + Agent, (hsa_agent_info_t)HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, + &ComputeUnits); + if (Err != HSA_STATUS_SUCCESS) { + DeviceInfo.ComputeUnits[DeviceId] = 1; DP("Error getting compute units : settiing to 1\n"); } else { - DeviceInfo.ComputeUnits[device_id] = compute_units; - DP("Using %d compute unis per grid\n", DeviceInfo.ComputeUnits[device_id]); + DeviceInfo.ComputeUnits[DeviceId] = ComputeUnits; + DP("Using %d compute unis per grid\n", DeviceInfo.ComputeUnits[DeviceId]); } char GetInfoName[64]; // 64 max size returned by get info - err = hsa_agent_get_info(agent, (hsa_agent_info_t)HSA_AGENT_INFO_NAME, + Err = hsa_agent_get_info(Agent, (hsa_agent_info_t)HSA_AGENT_INFO_NAME, (void *)GetInfoName); - if (err) - DeviceInfo.GPUName[device_id] = "--unknown gpu--"; + if (Err) + DeviceInfo.GPUName[DeviceId] = "--unknown gpu--"; else { - DeviceInfo.GPUName[device_id] = GetInfoName; + DeviceInfo.GPUName[DeviceId] = GetInfoName; } if (print_kernel_trace & STARTUP_DETAILS) - DP("Device#%-2d CU's: %2d %s\n", device_id, - DeviceInfo.ComputeUnits[device_id], - DeviceInfo.GPUName[device_id].c_str()); + DP("Device#%-2d CU's: %2d %s\n", DeviceId, + DeviceInfo.ComputeUnits[DeviceId], DeviceInfo.GPUName[DeviceId].c_str()); // Query attributes to determine number of threads/block and blocks/grid. - uint16_t workgroup_max_dim[3]; - err = hsa_agent_get_info(agent, HSA_AGENT_INFO_WORKGROUP_MAX_DIM, - &workgroup_max_dim); - if (err != HSA_STATUS_SUCCESS) { - DeviceInfo.GroupsPerDevice[device_id] = RTLDeviceInfoTy::DefaultNumTeams; + uint16_t WorkgroupMaxDim[3]; + Err = hsa_agent_get_info(Agent, HSA_AGENT_INFO_WORKGROUP_MAX_DIM, + &WorkgroupMaxDim); + if (Err != HSA_STATUS_SUCCESS) { + DeviceInfo.GroupsPerDevice[DeviceId] = RTLDeviceInfoTy::DefaultNumTeams; DP("Error getting grid dims: num groups : %d\n", RTLDeviceInfoTy::DefaultNumTeams); - } else if (workgroup_max_dim[0] <= RTLDeviceInfoTy::HardTeamLimit) { - DeviceInfo.GroupsPerDevice[device_id] = workgroup_max_dim[0]; - DP("Using %d ROCm blocks per grid\n", - DeviceInfo.GroupsPerDevice[device_id]); + } else if (WorkgroupMaxDim[0] <= RTLDeviceInfoTy::HardTeamLimit) { + DeviceInfo.GroupsPerDevice[DeviceId] = WorkgroupMaxDim[0]; + DP("Using %d ROCm blocks per grid\n", DeviceInfo.GroupsPerDevice[DeviceId]); } else { - DeviceInfo.GroupsPerDevice[device_id] = RTLDeviceInfoTy::HardTeamLimit; + DeviceInfo.GroupsPerDevice[DeviceId] = RTLDeviceInfoTy::HardTeamLimit; DP("Max ROCm blocks per grid %d exceeds the hard team limit %d, capping " "at the hard limit\n", - workgroup_max_dim[0], RTLDeviceInfoTy::HardTeamLimit); + WorkgroupMaxDim[0], RTLDeviceInfoTy::HardTeamLimit); } // Get thread limit - hsa_dim3_t grid_max_dim; - err = hsa_agent_get_info(agent, HSA_AGENT_INFO_GRID_MAX_DIM, &grid_max_dim); - if (err == HSA_STATUS_SUCCESS) { - DeviceInfo.ThreadsPerGroup[device_id] = - reinterpret_cast(&grid_max_dim)[0] / - DeviceInfo.GroupsPerDevice[device_id]; - - if (DeviceInfo.ThreadsPerGroup[device_id] == 0) { - DeviceInfo.ThreadsPerGroup[device_id] = RTLDeviceInfoTy::Max_WG_Size; - DP("Default thread limit: %d\n", RTLDeviceInfoTy::Max_WG_Size); - } else if (enforce_upper_bound(&DeviceInfo.ThreadsPerGroup[device_id], - RTLDeviceInfoTy::Max_WG_Size)) { - DP("Capped thread limit: %d\n", RTLDeviceInfoTy::Max_WG_Size); + hsa_dim3_t GridMaxDim; + Err = hsa_agent_get_info(Agent, HSA_AGENT_INFO_GRID_MAX_DIM, &GridMaxDim); + if (Err == HSA_STATUS_SUCCESS) { + DeviceInfo.ThreadsPerGroup[DeviceId] = + reinterpret_cast(&GridMaxDim)[0] / + DeviceInfo.GroupsPerDevice[DeviceId]; + + if (DeviceInfo.ThreadsPerGroup[DeviceId] == 0) { + DeviceInfo.ThreadsPerGroup[DeviceId] = RTLDeviceInfoTy::MaxWgSize; + DP("Default thread limit: %d\n", RTLDeviceInfoTy::MaxWgSize); + } else if (enforceUpperBound(&DeviceInfo.ThreadsPerGroup[DeviceId], + RTLDeviceInfoTy::MaxWgSize)) { + DP("Capped thread limit: %d\n", RTLDeviceInfoTy::MaxWgSize); } else { DP("Using ROCm Queried thread limit: %d\n", - DeviceInfo.ThreadsPerGroup[device_id]); + DeviceInfo.ThreadsPerGroup[DeviceId]); } } else { - DeviceInfo.ThreadsPerGroup[device_id] = RTLDeviceInfoTy::Max_WG_Size; + DeviceInfo.ThreadsPerGroup[DeviceId] = RTLDeviceInfoTy::MaxWgSize; DP("Error getting max block dimension, use default:%d \n", - RTLDeviceInfoTy::Max_WG_Size); + RTLDeviceInfoTy::MaxWgSize); } // Get wavefront size - uint32_t wavefront_size = 0; - err = - hsa_agent_get_info(agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &wavefront_size); - if (err == HSA_STATUS_SUCCESS) { - DP("Queried wavefront size: %d\n", wavefront_size); - DeviceInfo.WarpSize[device_id] = wavefront_size; + uint32_t WavefrontSize = 0; + Err = + hsa_agent_get_info(Agent, HSA_AGENT_INFO_WAVEFRONT_SIZE, &WavefrontSize); + if (Err == HSA_STATUS_SUCCESS) { + DP("Queried wavefront size: %d\n", WavefrontSize); + DeviceInfo.WarpSize[DeviceId] = WavefrontSize; } else { // TODO: Burn the wavefront size into the code object DP("Warning: Unknown wavefront size, assuming 64\n"); - DeviceInfo.WarpSize[device_id] = 64; + DeviceInfo.WarpSize[DeviceId] = 64; } // Adjust teams to the env variables if (DeviceInfo.Env.TeamLimit > 0 && - (enforce_upper_bound(&DeviceInfo.GroupsPerDevice[device_id], - DeviceInfo.Env.TeamLimit))) { + (enforceUpperBound(&DeviceInfo.GroupsPerDevice[DeviceId], + DeviceInfo.Env.TeamLimit))) { DP("Capping max groups per device to OMP_TEAM_LIMIT=%d\n", DeviceInfo.Env.TeamLimit); } // Set default number of teams if (DeviceInfo.Env.NumTeams > 0) { - DeviceInfo.NumTeams[device_id] = DeviceInfo.Env.NumTeams; + DeviceInfo.NumTeams[DeviceId] = DeviceInfo.Env.NumTeams; DP("Default number of teams set according to environment %d\n", DeviceInfo.Env.NumTeams); } else { @@ -2034,63 +2023,63 @@ TeamsPerCU = std::stoi(TeamsPerCUEnvStr); } - DeviceInfo.NumTeams[device_id] = - TeamsPerCU * DeviceInfo.ComputeUnits[device_id]; + DeviceInfo.NumTeams[DeviceId] = + TeamsPerCU * DeviceInfo.ComputeUnits[DeviceId]; DP("Default number of teams = %d * number of compute units %d\n", - TeamsPerCU, DeviceInfo.ComputeUnits[device_id]); + TeamsPerCU, DeviceInfo.ComputeUnits[DeviceId]); } - if (enforce_upper_bound(&DeviceInfo.NumTeams[device_id], - DeviceInfo.GroupsPerDevice[device_id])) { + if (enforceUpperBound(&DeviceInfo.NumTeams[DeviceId], + DeviceInfo.GroupsPerDevice[DeviceId])) { DP("Default number of teams exceeds device limit, capping at %d\n", - DeviceInfo.GroupsPerDevice[device_id]); + DeviceInfo.GroupsPerDevice[DeviceId]); } // Adjust threads to the env variables if (DeviceInfo.Env.TeamThreadLimit > 0 && - (enforce_upper_bound(&DeviceInfo.NumThreads[device_id], - DeviceInfo.Env.TeamThreadLimit))) { + (enforceUpperBound(&DeviceInfo.NumThreads[DeviceId], + DeviceInfo.Env.TeamThreadLimit))) { DP("Capping max number of threads to OMP_TEAMS_THREAD_LIMIT=%d\n", DeviceInfo.Env.TeamThreadLimit); } // Set default number of threads - DeviceInfo.NumThreads[device_id] = RTLDeviceInfoTy::Default_WG_Size; + DeviceInfo.NumThreads[DeviceId] = RTLDeviceInfoTy::DefaultWgSize; DP("Default number of threads set according to library's default %d\n", - RTLDeviceInfoTy::Default_WG_Size); - if (enforce_upper_bound(&DeviceInfo.NumThreads[device_id], - DeviceInfo.ThreadsPerGroup[device_id])) { + RTLDeviceInfoTy::DefaultWgSize); + if (enforceUpperBound(&DeviceInfo.NumThreads[DeviceId], + DeviceInfo.ThreadsPerGroup[DeviceId])) { DP("Default number of threads exceeds device limit, capping at %d\n", - DeviceInfo.ThreadsPerGroup[device_id]); + DeviceInfo.ThreadsPerGroup[DeviceId]); } DP("Device %d: default limit for groupsPerDevice %d & threadsPerGroup %d\n", - device_id, DeviceInfo.GroupsPerDevice[device_id], - DeviceInfo.ThreadsPerGroup[device_id]); + DeviceId, DeviceInfo.GroupsPerDevice[DeviceId], + DeviceInfo.ThreadsPerGroup[DeviceId]); - DP("Device %d: wavefront size %d, total threads %d x %d = %d\n", device_id, - DeviceInfo.WarpSize[device_id], DeviceInfo.ThreadsPerGroup[device_id], - DeviceInfo.GroupsPerDevice[device_id], - DeviceInfo.GroupsPerDevice[device_id] * - DeviceInfo.ThreadsPerGroup[device_id]); + DP("Device %d: wavefront size %d, total threads %d x %d = %d\n", DeviceId, + DeviceInfo.WarpSize[DeviceId], DeviceInfo.ThreadsPerGroup[DeviceId], + DeviceInfo.GroupsPerDevice[DeviceId], + DeviceInfo.GroupsPerDevice[DeviceId] * + DeviceInfo.ThreadsPerGroup[DeviceId]); return OFFLOAD_SUCCESS; } static __tgt_target_table * -__tgt_rtl_load_binary_locked(int32_t device_id, __tgt_device_image *image); - -__tgt_target_table *__tgt_rtl_load_binary(int32_t device_id, - __tgt_device_image *image) { - DeviceInfo.load_run_lock.lock(); - __tgt_target_table *res = __tgt_rtl_load_binary_locked(device_id, image); - DeviceInfo.load_run_lock.unlock(); - return res; +__tgt_rtl_load_binary_locked(int32_t DeviceId, __tgt_device_image *Image); + +__tgt_target_table *__tgt_rtl_load_binary(int32_t DeviceId, + __tgt_device_image *Image) { + DeviceInfo.LoadRunLock.lock(); + __tgt_target_table *Res = __tgt_rtl_load_binary_locked(DeviceId, Image); + DeviceInfo.LoadRunLock.unlock(); + return Res; } -__tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id, - __tgt_device_image *image) { - // This function loads the device image onto gpu[device_id] and does other +__tgt_target_table *__tgt_rtl_load_binary_locked(int32_t DeviceId, + __tgt_device_image *Image) { + // This function loads the device image onto gpu[DeviceId] and does other // per-image initialization work. Specifically: // // - Initialize an DeviceEnvironmentTy instance embedded in the @@ -2112,53 +2101,51 @@ // runtime behaviour. E.g. launching a kernel or using dma to get eight bytes // back from the gpu vs a hashtable lookup on the host. - const size_t img_size = (char *)image->ImageEnd - (char *)image->ImageStart; + const size_t ImgSize = (char *)Image->ImageEnd - (char *)Image->ImageStart; - DeviceInfo.clearOffloadEntriesTable(device_id); + DeviceInfo.clearOffloadEntriesTable(DeviceId); // We do not need to set the ELF version because the caller of this function // had to do that to decide the right runtime to use - if (!elf_machine_id_is_amdgcn(image)) { + if (!elfMachineIdIsAmdgcn(Image)) return NULL; - } { - auto env = - device_environment(device_id, DeviceInfo.NumberOfDevices, - DeviceInfo.Env.DynamicMemSize, image, img_size); - - auto &KernelInfo = DeviceInfo.KernelInfoTable[device_id]; - auto &SymbolInfo = DeviceInfo.SymbolInfoTable[device_id]; - hsa_status_t err = module_register_from_memory_to_place( - KernelInfo, SymbolInfo, (void *)image->ImageStart, img_size, device_id, - [&](void *data, size_t size) { - if (image_contains_symbol(data, size, "needs_hostcall_buffer")) { - __atomic_store_n(&DeviceInfo.hostcall_required, true, + auto Env = DeviceEnvironment(DeviceId, DeviceInfo.NumberOfDevices, + DeviceInfo.Env.DynamicMemSize, Image, ImgSize); + + auto &KernelInfo = DeviceInfo.KernelInfoTable[DeviceId]; + auto &SymbolInfo = DeviceInfo.SymbolInfoTable[DeviceId]; + hsa_status_t Err = moduleRegisterFromMemoryToPlace( + KernelInfo, SymbolInfo, (void *)Image->ImageStart, ImgSize, DeviceId, + [&](void *Data, size_t Size) { + if (imageContainsSymbol(Data, Size, "needs_hostcall_buffer")) { + __atomic_store_n(&DeviceInfo.HostcallRequired, true, __ATOMIC_RELEASE); } - return env.before_loading(data, size); + return Env.beforeLoading(Data, Size); }, DeviceInfo.HSAExecutables); - check("Module registering", err); - if (err != HSA_STATUS_SUCCESS) { - const char *DeviceName = DeviceInfo.GPUName[device_id].c_str(); - const char *ElfName = get_elf_mach_gfx_name(elf_e_flags(image)); + check("Module registering", Err); + if (Err != HSA_STATUS_SUCCESS) { + const char *DeviceName = DeviceInfo.GPUName[DeviceId].c_str(); + const char *ElfName = get_elf_mach_gfx_name(elfEFlags(Image)); if (strcmp(DeviceName, ElfName) != 0) { DP("Possible gpu arch mismatch: device:%s, image:%s please check" " compiler flag: -march=\n", DeviceName, ElfName); } else { - DP("Error loading image onto GPU: %s\n", get_error_string(err)); + DP("Error loading image onto GPU: %s\n", get_error_string(Err)); } return NULL; } - err = env.after_loading(); - if (err != HSA_STATUS_SUCCESS) { + Err = Env.afterLoading(); + if (Err != HSA_STATUS_SUCCESS) { return NULL; } } @@ -2170,18 +2157,18 @@ // needs to be assigned to a pointer to an array of size device_state_bytes // If absent, it has been deadstripped and needs no setup. - void *state_ptr; - uint32_t state_ptr_size; - auto &SymbolInfoMap = DeviceInfo.SymbolInfoTable[device_id]; - hsa_status_t err = interop_hsa_get_symbol_info( - SymbolInfoMap, device_id, "omptarget_nvptx_device_State", &state_ptr, - &state_ptr_size); + void *StatePtr; + uint32_t StatePtrSize; + auto &SymbolInfoMap = DeviceInfo.SymbolInfoTable[DeviceId]; + hsa_status_t Err = interop_hsa_get_symbol_info( + SymbolInfoMap, DeviceId, "omptarget_nvptx_device_State", &StatePtr, + &StatePtrSize); - if (err != HSA_STATUS_SUCCESS) { + if (Err != HSA_STATUS_SUCCESS) { DP("No device_state symbol found, skipping initialization\n"); } else { - if (state_ptr_size < sizeof(void *)) { - DP("unexpected size of state_ptr %u != %zu\n", state_ptr_size, + if (StatePtrSize < sizeof(void *)) { + DP("unexpected size of state_ptr %u != %zu\n", StatePtrSize, sizeof(void *)); return NULL; } @@ -2189,39 +2176,39 @@ // if it's larger than a void*, assume it's a bss array and no further // initialization is required. Only try to set up a pointer for // sizeof(void*) - if (state_ptr_size == sizeof(void *)) { - uint64_t device_State_bytes = - get_device_State_bytes((char *)image->ImageStart, img_size); - if (device_State_bytes == 0) { + if (StatePtrSize == sizeof(void *)) { + uint64_t DeviceStateBytes = + getDeviceStateBytes((char *)Image->ImageStart, ImgSize); + if (DeviceStateBytes == 0) { DP("Can't initialize device_State, missing size information\n"); return NULL; } - auto &dss = DeviceInfo.deviceStateStore[device_id]; - if (dss.first.get() == nullptr) { - assert(dss.second == 0); - void *ptr = NULL; - hsa_status_t err = impl_calloc(&ptr, device_State_bytes, device_id); - if (err != HSA_STATUS_SUCCESS) { + auto &DSS = DeviceInfo.DeviceStateStore[DeviceId]; + if (DSS.first.get() == nullptr) { + assert(DSS.second == 0); + void *Ptr = NULL; + hsa_status_t Err = implCalloc(&Ptr, DeviceStateBytes, DeviceId); + if (Err != HSA_STATUS_SUCCESS) { DP("Failed to allocate device_state array\n"); return NULL; } - dss = { - std::unique_ptr{ptr}, - device_State_bytes, + DSS = { + std::unique_ptr{Ptr}, + DeviceStateBytes, }; } - void *ptr = dss.first.get(); - if (device_State_bytes != dss.second) { + void *Ptr = DSS.first.get(); + if (DeviceStateBytes != DSS.second) { DP("Inconsistent sizes of device_State unsupported\n"); return NULL; } // write ptr to device memory so it can be used by later kernels - err = DeviceInfo.freesignalpool_memcpy_h2d(state_ptr, &ptr, - sizeof(void *), device_id); - if (err != HSA_STATUS_SUCCESS) { + Err = DeviceInfo.freesignalpoolMemcpyH2D(StatePtr, &Ptr, sizeof(void *), + DeviceId); + if (Err != HSA_STATUS_SUCCESS) { DP("memcpy install of state_ptr failed\n"); return NULL; } @@ -2237,80 +2224,80 @@ // Find the symbols in the module by name. The name can be obtain by // concatenating the host entry name with the target name - __tgt_offload_entry *HostBegin = image->EntriesBegin; - __tgt_offload_entry *HostEnd = image->EntriesEnd; + __tgt_offload_entry *HostBegin = Image->EntriesBegin; + __tgt_offload_entry *HostEnd = Image->EntriesEnd; - for (__tgt_offload_entry *e = HostBegin; e != HostEnd; ++e) { + for (__tgt_offload_entry *E = HostBegin; E != HostEnd; ++E) { - if (!e->addr) { + if (!E->addr) { // The host should have always something in the address to // uniquely identify the target region. DP("Analyzing host entry '' (size = %lld)...\n", - (unsigned long long)e->size); + (unsigned long long)E->size); return NULL; } - if (e->size) { - __tgt_offload_entry entry = *e; + if (E->size) { + __tgt_offload_entry Entry = *E; - void *varptr; - uint32_t varsize; + void *Varptr; + uint32_t Varsize; - auto &SymbolInfoMap = DeviceInfo.SymbolInfoTable[device_id]; - hsa_status_t err = interop_hsa_get_symbol_info( - SymbolInfoMap, device_id, e->name, &varptr, &varsize); + auto &SymbolInfoMap = DeviceInfo.SymbolInfoTable[DeviceId]; + hsa_status_t Err = interop_hsa_get_symbol_info( + SymbolInfoMap, DeviceId, E->name, &Varptr, &Varsize); - if (err != HSA_STATUS_SUCCESS) { + if (Err != HSA_STATUS_SUCCESS) { // Inform the user what symbol prevented offloading - DP("Loading global '%s' (Failed)\n", e->name); + DP("Loading global '%s' (Failed)\n", E->name); return NULL; } - if (varsize != e->size) { - DP("Loading global '%s' - size mismatch (%u != %lu)\n", e->name, - varsize, e->size); + if (Varsize != E->size) { + DP("Loading global '%s' - size mismatch (%u != %lu)\n", E->name, + Varsize, E->size); return NULL; } DP("Entry point " DPxMOD " maps to global %s (" DPxMOD ")\n", - DPxPTR(e - HostBegin), e->name, DPxPTR(varptr)); - entry.addr = (void *)varptr; + DPxPTR(E - HostBegin), E->name, DPxPTR(Varptr)); + Entry.addr = (void *)Varptr; - DeviceInfo.addOffloadEntry(device_id, entry); + DeviceInfo.addOffloadEntry(DeviceId, Entry); if (DeviceInfo.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY && - e->flags & OMP_DECLARE_TARGET_LINK) { + E->flags & OMP_DECLARE_TARGET_LINK) { // If unified memory is present any target link variables // can access host addresses directly. There is no longer a // need for device copies. - err = DeviceInfo.freesignalpool_memcpy_h2d(varptr, e->addr, - sizeof(void *), device_id); - if (err != HSA_STATUS_SUCCESS) + Err = DeviceInfo.freesignalpoolMemcpyH2D(Varptr, E->addr, + sizeof(void *), DeviceId); + if (Err != HSA_STATUS_SUCCESS) DP("Error when copying USM\n"); DP("Copy linked variable host address (" DPxMOD ")" "to device address (" DPxMOD ")\n", - DPxPTR(*((void **)e->addr)), DPxPTR(varptr)); + DPxPTR(*((void **)E->addr)), DPxPTR(Varptr)); } continue; } - DP("to find the kernel name: %s size: %lu\n", e->name, strlen(e->name)); + DP("to find the kernel name: %s size: %lu\n", E->name, strlen(E->name)); // errors in kernarg_segment_size previously treated as = 0 (or as undef) - uint32_t kernarg_segment_size = 0; - auto &KernelInfoMap = DeviceInfo.KernelInfoTable[device_id]; - hsa_status_t err = HSA_STATUS_SUCCESS; - if (!e->name) { - err = HSA_STATUS_ERROR; + uint32_t KernargSegmentSize = 0; + auto &KernelInfoMap = DeviceInfo.KernelInfoTable[DeviceId]; + hsa_status_t Err = HSA_STATUS_SUCCESS; + if (!E->name) { + Err = HSA_STATUS_ERROR; } else { - std::string kernelStr = std::string(e->name); - auto It = KernelInfoMap.find(kernelStr); + std::string KernelStr = std::string(E->name); + auto It = KernelInfoMap.find(KernelStr); if (It != KernelInfoMap.end()) { - atl_kernel_info_t info = It->second; - kernarg_segment_size = info.kernel_segment_size; + atl_kernel_info_t Info = It->second; + KernargSegmentSize = Info.kernel_segment_size; } else { - err = HSA_STATUS_ERROR; + Err = HSA_STATUS_ERROR; } } @@ -2319,27 +2306,27 @@ llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_GENERIC; // get flat group size if present, else Default_WG_Size - int16_t WGSizeVal = RTLDeviceInfoTy::Default_WG_Size; + int16_t WGSizeVal = RTLDeviceInfoTy::DefaultWgSize; // get Kernel Descriptor if present. // Keep struct in sync wih getTgtAttributeStructQTy in CGOpenMPRuntime.cpp struct KernDescValType { uint16_t Version; uint16_t TSize; - uint16_t WG_Size; + uint16_t WGSize; }; struct KernDescValType KernDescVal; - std::string KernDescNameStr(e->name); + std::string KernDescNameStr(E->name); KernDescNameStr += "_kern_desc"; const char *KernDescName = KernDescNameStr.c_str(); void *KernDescPtr; uint32_t KernDescSize; void *CallStackAddr = nullptr; - err = interop_get_symbol_info((char *)image->ImageStart, img_size, - KernDescName, &KernDescPtr, &KernDescSize); + Err = interopGetSymbolInfo((char *)Image->ImageStart, ImgSize, KernDescName, + &KernDescPtr, &KernDescSize); - if (err == HSA_STATUS_SUCCESS) { + if (Err == HSA_STATUS_SUCCESS) { if ((size_t)KernDescSize != sizeof(KernDescVal)) DP("Loading global computation properties '%s' - size mismatch (%u != " "%lu)\n", @@ -2355,29 +2342,29 @@ DP("After loading global for %s KernDesc \n", KernDescName); DP("KernDesc: Version: %d\n", KernDescVal.Version); DP("KernDesc: TSize: %d\n", KernDescVal.TSize); - DP("KernDesc: WG_Size: %d\n", KernDescVal.WG_Size); + DP("KernDesc: WG_Size: %d\n", KernDescVal.WGSize); - if (KernDescVal.WG_Size == 0) { - KernDescVal.WG_Size = RTLDeviceInfoTy::Default_WG_Size; - DP("Setting KernDescVal.WG_Size to default %d\n", KernDescVal.WG_Size); + if (KernDescVal.WGSize == 0) { + KernDescVal.WGSize = RTLDeviceInfoTy::DefaultWgSize; + DP("Setting KernDescVal.WG_Size to default %d\n", KernDescVal.WGSize); } - WGSizeVal = KernDescVal.WG_Size; + WGSizeVal = KernDescVal.WGSize; DP("WGSizeVal %d\n", WGSizeVal); - check("Loading KernDesc computation property", err); + check("Loading KernDesc computation property", Err); } else { DP("Warning: Loading KernDesc '%s' - symbol not found, ", KernDescName); // Flat group size - std::string WGSizeNameStr(e->name); + std::string WGSizeNameStr(E->name); WGSizeNameStr += "_wg_size"; const char *WGSizeName = WGSizeNameStr.c_str(); void *WGSizePtr; uint32_t WGSize; - err = interop_get_symbol_info((char *)image->ImageStart, img_size, - WGSizeName, &WGSizePtr, &WGSize); + Err = interopGetSymbolInfo((char *)Image->ImageStart, ImgSize, WGSizeName, + &WGSizePtr, &WGSize); - if (err == HSA_STATUS_SUCCESS) { + if (Err == HSA_STATUS_SUCCESS) { if ((size_t)WGSize != sizeof(int16_t)) { DP("Loading global computation properties '%s' - size mismatch (%u " "!= " @@ -2390,12 +2377,12 @@ DP("After loading global for %s WGSize = %d\n", WGSizeName, WGSizeVal); - if (WGSizeVal < RTLDeviceInfoTy::Default_WG_Size || - WGSizeVal > RTLDeviceInfoTy::Max_WG_Size) { + if (WGSizeVal < RTLDeviceInfoTy::DefaultWgSize || + WGSizeVal > RTLDeviceInfoTy::MaxWgSize) { DP("Error wrong WGSize value specified in HSA code object file: " "%d\n", WGSizeVal); - WGSizeVal = RTLDeviceInfoTy::Default_WG_Size; + WGSizeVal = RTLDeviceInfoTy::DefaultWgSize; } } else { DP("Warning: Loading WGSize '%s' - symbol not found, " @@ -2403,28 +2390,28 @@ WGSizeName, WGSizeVal); } - check("Loading WGSize computation property", err); + check("Loading WGSize computation property", Err); } // Read execution mode from global in binary - std::string ExecModeNameStr(e->name); + std::string ExecModeNameStr(E->name); ExecModeNameStr += "_exec_mode"; const char *ExecModeName = ExecModeNameStr.c_str(); void *ExecModePtr; - uint32_t varsize; - err = interop_get_symbol_info((char *)image->ImageStart, img_size, - ExecModeName, &ExecModePtr, &varsize); + uint32_t VarSize; + Err = interopGetSymbolInfo((char *)Image->ImageStart, ImgSize, ExecModeName, + &ExecModePtr, &VarSize); - if (err == HSA_STATUS_SUCCESS) { - if ((size_t)varsize != sizeof(llvm::omp::OMPTgtExecModeFlags)) { + if (Err == HSA_STATUS_SUCCESS) { + if ((size_t)VarSize != sizeof(llvm::omp::OMPTgtExecModeFlags)) { DP("Loading global computation properties '%s' - size mismatch(%u != " "%lu)\n", - ExecModeName, varsize, sizeof(llvm::omp::OMPTgtExecModeFlags)); + ExecModeName, VarSize, sizeof(llvm::omp::OMPTgtExecModeFlags)); return NULL; } - memcpy(&ExecModeVal, ExecModePtr, (size_t)varsize); + memcpy(&ExecModeVal, ExecModePtr, (size_t)VarSize); DP("After loading global for %s ExecMode = %d\n", ExecModeName, ExecModeVal); @@ -2442,152 +2429,146 @@ "GENERIC (1)\n", ExecModeName); } - check("Loading computation property", err); + check("Loading computation property", Err); - KernelsList.push_back(KernelTy(ExecModeVal, WGSizeVal, device_id, - CallStackAddr, e->name, kernarg_segment_size, + KernelsList.push_back(KernelTy(ExecModeVal, WGSizeVal, DeviceId, + CallStackAddr, E->name, KernargSegmentSize, DeviceInfo.KernArgPool)); - __tgt_offload_entry entry = *e; - entry.addr = (void *)&KernelsList.back(); - DeviceInfo.addOffloadEntry(device_id, entry); - DP("Entry point %ld maps to %s\n", e - HostBegin, e->name); + __tgt_offload_entry Entry = *E; + Entry.addr = (void *)&KernelsList.back(); + DeviceInfo.addOffloadEntry(DeviceId, Entry); + DP("Entry point %ld maps to %s\n", E - HostBegin, E->name); } - return DeviceInfo.getOffloadEntriesTable(device_id); + return DeviceInfo.getOffloadEntriesTable(DeviceId); } -void *__tgt_rtl_data_alloc(int device_id, int64_t size, void *, int32_t kind) { - void *ptr = NULL; - assert(device_id < DeviceInfo.NumberOfDevices && "Device ID too large"); +void *__tgt_rtl_data_alloc(int DeviceId, int64_t Size, void *, int32_t Kind) { + void *Ptr = NULL; + assert(DeviceId < DeviceInfo.NumberOfDevices && "Device ID too large"); - if (kind != TARGET_ALLOC_DEFAULT) { + if (Kind != TARGET_ALLOC_DEFAULT) { REPORT("Invalid target data allocation kind or requested allocator not " "implemented yet\n"); return NULL; } - hsa_amd_memory_pool_t MemoryPool = DeviceInfo.getDeviceMemoryPool(device_id); - hsa_status_t err = hsa_amd_memory_pool_allocate(MemoryPool, size, 0, &ptr); - DP("Tgt alloc data %ld bytes, (tgt:%016llx).\n", size, - (long long unsigned)(Elf64_Addr)ptr); - ptr = (err == HSA_STATUS_SUCCESS) ? ptr : NULL; - return ptr; + hsa_amd_memory_pool_t MemoryPool = DeviceInfo.getDeviceMemoryPool(DeviceId); + hsa_status_t Err = hsa_amd_memory_pool_allocate(MemoryPool, Size, 0, &Ptr); + DP("Tgt alloc data %ld bytes, (tgt:%016llx).\n", Size, + (long long unsigned)(Elf64_Addr)Ptr); + Ptr = (Err == HSA_STATUS_SUCCESS) ? Ptr : NULL; + return Ptr; } -int32_t __tgt_rtl_data_submit(int device_id, void *tgt_ptr, void *hst_ptr, - int64_t size) { - assert(device_id < DeviceInfo.NumberOfDevices && "Device ID too large"); +int32_t __tgt_rtl_data_submit(int DeviceId, void *TgtPtr, void *HstPtr, + int64_t Size) { + assert(DeviceId < DeviceInfo.NumberOfDevices && "Device ID too large"); __tgt_async_info AsyncInfo; - int32_t rc = dataSubmit(device_id, tgt_ptr, hst_ptr, size, &AsyncInfo); - if (rc != OFFLOAD_SUCCESS) + int32_t Rc = dataSubmit(DeviceId, TgtPtr, HstPtr, Size, &AsyncInfo); + if (Rc != OFFLOAD_SUCCESS) return OFFLOAD_FAIL; - return __tgt_rtl_synchronize(device_id, &AsyncInfo); + return __tgt_rtl_synchronize(DeviceId, &AsyncInfo); } -int32_t __tgt_rtl_data_submit_async(int device_id, void *tgt_ptr, void *hst_ptr, - int64_t size, __tgt_async_info *AsyncInfo) { - assert(device_id < DeviceInfo.NumberOfDevices && "Device ID too large"); +int32_t __tgt_rtl_data_submit_async(int DeviceId, void *TgtPtr, void *HstPtr, + int64_t Size, __tgt_async_info *AsyncInfo) { + assert(DeviceId < DeviceInfo.NumberOfDevices && "Device ID too large"); if (AsyncInfo) { initAsyncInfo(AsyncInfo); - return dataSubmit(device_id, tgt_ptr, hst_ptr, size, AsyncInfo); - } else { - return __tgt_rtl_data_submit(device_id, tgt_ptr, hst_ptr, size); + return dataSubmit(DeviceId, TgtPtr, HstPtr, Size, AsyncInfo); } + return __tgt_rtl_data_submit(DeviceId, TgtPtr, HstPtr, Size); } -int32_t __tgt_rtl_data_retrieve(int device_id, void *hst_ptr, void *tgt_ptr, - int64_t size) { - assert(device_id < DeviceInfo.NumberOfDevices && "Device ID too large"); +int32_t __tgt_rtl_data_retrieve(int DeviceId, void *HstPtr, void *TgtPtr, + int64_t Size) { + assert(DeviceId < DeviceInfo.NumberOfDevices && "Device ID too large"); __tgt_async_info AsyncInfo; - int32_t rc = dataRetrieve(device_id, hst_ptr, tgt_ptr, size, &AsyncInfo); - if (rc != OFFLOAD_SUCCESS) + int32_t Rc = dataRetrieve(DeviceId, HstPtr, TgtPtr, Size, &AsyncInfo); + if (Rc != OFFLOAD_SUCCESS) return OFFLOAD_FAIL; - return __tgt_rtl_synchronize(device_id, &AsyncInfo); + return __tgt_rtl_synchronize(DeviceId, &AsyncInfo); } -int32_t __tgt_rtl_data_retrieve_async(int device_id, void *hst_ptr, - void *tgt_ptr, int64_t size, +int32_t __tgt_rtl_data_retrieve_async(int DeviceId, void *HstPtr, void *TgtPtr, + int64_t Size, __tgt_async_info *AsyncInfo) { assert(AsyncInfo && "AsyncInfo is nullptr"); - assert(device_id < DeviceInfo.NumberOfDevices && "Device ID too large"); + assert(DeviceId < DeviceInfo.NumberOfDevices && "Device ID too large"); initAsyncInfo(AsyncInfo); - return dataRetrieve(device_id, hst_ptr, tgt_ptr, size, AsyncInfo); + return dataRetrieve(DeviceId, HstPtr, TgtPtr, Size, AsyncInfo); } -int32_t __tgt_rtl_data_delete(int device_id, void *tgt_ptr) { - assert(device_id < DeviceInfo.NumberOfDevices && "Device ID too large"); - hsa_status_t err; - DP("Tgt free data (tgt:%016llx).\n", (long long unsigned)(Elf64_Addr)tgt_ptr); - err = core::Runtime::Memfree(tgt_ptr); - if (err != HSA_STATUS_SUCCESS) { +int32_t __tgt_rtl_data_delete(int DeviceId, void *TgtPtr) { + assert(DeviceId < DeviceInfo.NumberOfDevices && "Device ID too large"); + hsa_status_t Err; + DP("Tgt free data (tgt:%016llx).\n", (long long unsigned)(Elf64_Addr)TgtPtr); + Err = core::Runtime::Memfree(TgtPtr); + if (Err != HSA_STATUS_SUCCESS) { DP("Error when freeing CUDA memory\n"); return OFFLOAD_FAIL; } return OFFLOAD_SUCCESS; } -int32_t __tgt_rtl_run_target_team_region(int32_t device_id, void *tgt_entry_ptr, - void **tgt_args, - ptrdiff_t *tgt_offsets, - int32_t arg_num, int32_t num_teams, - int32_t thread_limit, - uint64_t loop_tripcount) { +int32_t __tgt_rtl_run_target_team_region(int32_t DeviceId, void *TgtEntryPtr, + void **TgtArgs, ptrdiff_t *TgtOffsets, + int32_t ArgNum, int32_t NumTeams, + int32_t ThreadLimit, + uint64_t LoopTripcount) { - DeviceInfo.load_run_lock.lock_shared(); - int32_t res = - runRegionLocked(device_id, tgt_entry_ptr, tgt_args, tgt_offsets, arg_num, - num_teams, thread_limit, loop_tripcount); + DeviceInfo.LoadRunLock.lock_shared(); + int32_t Res = runRegionLocked(DeviceId, TgtEntryPtr, TgtArgs, TgtOffsets, + ArgNum, NumTeams, ThreadLimit, LoopTripcount); - DeviceInfo.load_run_lock.unlock_shared(); - return res; + DeviceInfo.LoadRunLock.unlock_shared(); + return Res; } -int32_t __tgt_rtl_run_target_region(int32_t device_id, void *tgt_entry_ptr, - void **tgt_args, ptrdiff_t *tgt_offsets, - int32_t arg_num) { +int32_t __tgt_rtl_run_target_region(int32_t DeviceId, void *TgtEntryPtr, + void **TgtArgs, ptrdiff_t *TgtOffsets, + int32_t ArgNum) { // use one team and one thread // fix thread num - int32_t team_num = 1; - int32_t thread_limit = 0; // use default - return __tgt_rtl_run_target_team_region(device_id, tgt_entry_ptr, tgt_args, - tgt_offsets, arg_num, team_num, - thread_limit, 0); + int32_t TeamNum = 1; + int32_t ThreadLimit = 0; // use default + return __tgt_rtl_run_target_team_region(DeviceId, TgtEntryPtr, TgtArgs, + TgtOffsets, ArgNum, TeamNum, + ThreadLimit, 0); } int32_t __tgt_rtl_run_target_team_region_async( - int32_t device_id, void *tgt_entry_ptr, void **tgt_args, - ptrdiff_t *tgt_offsets, int32_t arg_num, int32_t num_teams, - int32_t thread_limit, uint64_t loop_tripcount, - __tgt_async_info *AsyncInfo) { + int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets, + int32_t ArgNum, int32_t NumTeams, int32_t ThreadLimit, + uint64_t LoopTripcount, __tgt_async_info *AsyncInfo) { assert(AsyncInfo && "AsyncInfo is nullptr"); initAsyncInfo(AsyncInfo); - DeviceInfo.load_run_lock.lock_shared(); - int32_t res = - runRegionLocked(device_id, tgt_entry_ptr, tgt_args, tgt_offsets, arg_num, - num_teams, thread_limit, loop_tripcount); + DeviceInfo.LoadRunLock.lock_shared(); + int32_t Res = runRegionLocked(DeviceId, TgtEntryPtr, TgtArgs, TgtOffsets, + ArgNum, NumTeams, ThreadLimit, LoopTripcount); - DeviceInfo.load_run_lock.unlock_shared(); - return res; + DeviceInfo.LoadRunLock.unlock_shared(); + return Res; } -int32_t __tgt_rtl_run_target_region_async(int32_t device_id, - void *tgt_entry_ptr, void **tgt_args, - ptrdiff_t *tgt_offsets, - int32_t arg_num, +int32_t __tgt_rtl_run_target_region_async(int32_t DeviceId, void *TgtEntryPtr, + void **TgtArgs, ptrdiff_t *TgtOffsets, + int32_t ArgNum, __tgt_async_info *AsyncInfo) { // use one team and one thread // fix thread num - int32_t team_num = 1; - int32_t thread_limit = 0; // use default - return __tgt_rtl_run_target_team_region_async( - device_id, tgt_entry_ptr, tgt_args, tgt_offsets, arg_num, team_num, - thread_limit, 0, AsyncInfo); + int32_t TeamNum = 1; + int32_t ThreadLimit = 0; // use default + return __tgt_rtl_run_target_team_region_async(DeviceId, TgtEntryPtr, TgtArgs, + TgtOffsets, ArgNum, TeamNum, + ThreadLimit, 0, AsyncInfo); } -int32_t __tgt_rtl_synchronize(int32_t device_id, __tgt_async_info *AsyncInfo) { +int32_t __tgt_rtl_synchronize(int32_t DeviceId, __tgt_async_info *AsyncInfo) { assert(AsyncInfo && "AsyncInfo is nullptr"); // Cuda asserts that AsyncInfo->Queue is non-null, but this invariant @@ -2599,11 +2580,11 @@ return OFFLOAD_SUCCESS; } -void __tgt_rtl_print_device_info(int32_t device_id) { - // TODO: Assertion to see if device_id is correct +void __tgt_rtl_print_device_info(int32_t DeviceId) { + // TODO: Assertion to see if DeviceId is correct // NOTE: We don't need to set context for print device info. - DeviceInfo.printDeviceInfo(device_id, DeviceInfo.HSAAgents[device_id]); + DeviceInfo.printDeviceInfo(DeviceId, DeviceInfo.HSAAgents[DeviceId]); } } // extern "C" diff --git a/openmp/libomptarget/plugins/common/elf_common/elf_common.h b/openmp/libomptarget/plugins/common/elf_common/elf_common.h --- a/openmp/libomptarget/plugins/common/elf_common/elf_common.h +++ b/openmp/libomptarget/plugins/common/elf_common/elf_common.h @@ -18,10 +18,10 @@ /// Return non-zero, if the given \p image is an ELF object, which /// e_machine matches \p target_id; return zero otherwise. -EXTERN int32_t elf_check_machine(__tgt_device_image *image, uint16_t target_id); +EXTERN int32_t elf_check_machine(__tgt_device_image *Image, uint16_t TargetId); /// Return non-zero, if the given \p image is an ET_DYN ELF object; /// return zero otherwise. -EXTERN int32_t elf_is_dynamic(__tgt_device_image *image); +EXTERN int32_t elf_is_dynamic(__tgt_device_image *Image); #endif // LLVM_OPENMP_LIBOMPTARGET_PLUGINS_COMMON_ELF_COMMON_ELF_COMMON_H diff --git a/openmp/libomptarget/plugins/common/elf_common/elf_common.cpp b/openmp/libomptarget/plugins/common/elf_common/elf_common.cpp --- a/openmp/libomptarget/plugins/common/elf_common/elf_common.cpp +++ b/openmp/libomptarget/plugins/common/elf_common/elf_common.cpp @@ -67,22 +67,22 @@ } // Check whether an image is valid for execution on target_id -int32_t elf_check_machine(__tgt_device_image *image, uint16_t target_id) { - auto CheckMachine = [target_id](const ELFObjectFileBase *Object) { - return target_id == Object->getEMachine(); +int32_t elf_check_machine(__tgt_device_image *Image, uint16_t TargetId) { + auto CheckMachine = [TargetId](const ELFObjectFileBase *Object) { + return TargetId == Object->getEMachine(); }; - return withBytesAsElf(reinterpret_cast(image->ImageStart), - reinterpret_cast(image->ImageEnd), + return withBytesAsElf(reinterpret_cast(Image->ImageStart), + reinterpret_cast(Image->ImageEnd), CheckMachine); } -int32_t elf_is_dynamic(__tgt_device_image *image) { +int32_t elf_is_dynamic(__tgt_device_image *Image) { auto CheckDynType = [](const ELFObjectFileBase *Object) { uint16_t Type = Object->getEType(); DP("ELF Type: %d\n", Type); return Type == ET_DYN; }; - return withBytesAsElf(reinterpret_cast(image->ImageStart), - reinterpret_cast(image->ImageEnd), + return withBytesAsElf(reinterpret_cast(Image->ImageStart), + reinterpret_cast(Image->ImageEnd), CheckDynType); } diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp --- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp +++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp @@ -86,8 +86,8 @@ /// Maximal number of threads per block for this kernel. int MaxThreadsPerBlock = 0; - KernelTy(CUfunction _Func, llvm::omp::OMPTgtExecModeFlags _ExecutionMode) - : Func(_Func), ExecutionMode(_ExecutionMode) {} + KernelTy(CUfunction Func, llvm::omp::OMPTgtExecModeFlags ExecutionMode) + : Func(Func), ExecutionMode(ExecutionMode) {} }; namespace { @@ -437,9 +437,9 @@ bool UseMemoryManager = true; // Record entry point associated with device - void addOffloadEntry(const int DeviceId, const __tgt_offload_entry entry) { + void addOffloadEntry(const int DeviceId, const __tgt_offload_entry Entry) { FuncOrGblEntryTy &E = DeviceData[DeviceId].FuncGblEntries.back(); - E.Entries.push_back(entry); + E.Entries.push_back(Entry); } // Return a pointer to the entry associated with the pointer @@ -1255,19 +1255,19 @@ return (Err == CUDA_SUCCESS) ? OFFLOAD_SUCCESS : OFFLOAD_FAIL; } - void printDeviceInfo(int32_t device_id) { + void printDeviceInfo(int32_t DeviceId) { char TmpChar[1000]; std::string TmpStr; size_t TmpSt; int TmpInt, TmpInt2, TmpInt3; CUdevice Device; - checkResult(cuDeviceGet(&Device, device_id), + checkResult(cuDeviceGet(&Device, DeviceId), "Error returned from cuCtxGetDevice\n"); cuDriverGetVersion(&TmpInt); printf(" CUDA Driver Version: \t\t%d \n", TmpInt); - printf(" CUDA Device Number: \t\t%d \n", device_id); + printf(" CUDA Device Number: \t\t%d \n", DeviceId); checkResult(cuDeviceGetName(TmpChar, 1000, Device), "Error returned from cuDeviceGetName\n"); printf(" Device Name: \t\t\t%s \n", TmpChar); @@ -1515,8 +1515,8 @@ extern "C" { #endif -int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *image) { - return elf_check_machine(image, /* EM_CUDA */ 190); +int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image) { + return elf_check_machine(Image, /* EM_CUDA */ 190); } int32_t __tgt_rtl_number_of_devices() { return DeviceRTL.getNumOfDevices(); } @@ -1527,211 +1527,204 @@ return RequiresFlags; } -int32_t __tgt_rtl_is_data_exchangable(int32_t src_dev_id, int dst_dev_id) { - if (DeviceRTL.isValidDeviceId(src_dev_id) && - DeviceRTL.isValidDeviceId(dst_dev_id)) +int32_t __tgt_rtl_is_data_exchangable(int32_t SrcDevId, int DstDevId) { + if (DeviceRTL.isValidDeviceId(SrcDevId) && + DeviceRTL.isValidDeviceId(DstDevId)) return 1; return 0; } -int32_t __tgt_rtl_init_device(int32_t device_id) { - assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid"); +int32_t __tgt_rtl_init_device(int32_t DeviceId) { + assert(DeviceRTL.isValidDeviceId(DeviceId) && "device_id is invalid"); // Context is set when init the device. - return DeviceRTL.initDevice(device_id); + return DeviceRTL.initDevice(DeviceId); } -int32_t __tgt_rtl_deinit_device(int32_t device_id) { - assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid"); +int32_t __tgt_rtl_deinit_device(int32_t DeviceId) { + assert(DeviceRTL.isValidDeviceId(DeviceId) && "device_id is invalid"); // Context is set when deinit the device. - return DeviceRTL.deinitDevice(device_id); + return DeviceRTL.deinitDevice(DeviceId); } -__tgt_target_table *__tgt_rtl_load_binary(int32_t device_id, - __tgt_device_image *image) { - assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid"); +__tgt_target_table *__tgt_rtl_load_binary(int32_t DeviceId, + __tgt_device_image *Image) { + assert(DeviceRTL.isValidDeviceId(DeviceId) && "device_id is invalid"); - if (DeviceRTL.setContext(device_id) != OFFLOAD_SUCCESS) + if (DeviceRTL.setContext(DeviceId) != OFFLOAD_SUCCESS) return nullptr; - return DeviceRTL.loadBinary(device_id, image); + return DeviceRTL.loadBinary(DeviceId, Image); } -void *__tgt_rtl_data_alloc(int32_t device_id, int64_t size, void *, - int32_t kind) { - assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid"); +void *__tgt_rtl_data_alloc(int32_t DeviceId, int64_t Size, void *, + int32_t Kind) { + assert(DeviceRTL.isValidDeviceId(DeviceId) && "device_id is invalid"); - if (DeviceRTL.setContext(device_id) != OFFLOAD_SUCCESS) + if (DeviceRTL.setContext(DeviceId) != OFFLOAD_SUCCESS) return nullptr; - return DeviceRTL.dataAlloc(device_id, size, (TargetAllocTy)kind); + return DeviceRTL.dataAlloc(DeviceId, Size, (TargetAllocTy)Kind); } -int32_t __tgt_rtl_data_submit(int32_t device_id, void *tgt_ptr, void *hst_ptr, - int64_t size) { - assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid"); +int32_t __tgt_rtl_data_submit(int32_t DeviceId, void *TgtPtr, void *HstPtr, + int64_t Size) { + assert(DeviceRTL.isValidDeviceId(DeviceId) && "device_id is invalid"); // Context is set in __tgt_rtl_data_submit_async. __tgt_async_info AsyncInfo; - const int32_t rc = __tgt_rtl_data_submit_async(device_id, tgt_ptr, hst_ptr, - size, &AsyncInfo); - if (rc != OFFLOAD_SUCCESS) + const int32_t Rc = + __tgt_rtl_data_submit_async(DeviceId, TgtPtr, HstPtr, Size, &AsyncInfo); + if (Rc != OFFLOAD_SUCCESS) return OFFLOAD_FAIL; - return __tgt_rtl_synchronize(device_id, &AsyncInfo); + return __tgt_rtl_synchronize(DeviceId, &AsyncInfo); } -int32_t __tgt_rtl_data_submit_async(int32_t device_id, void *tgt_ptr, - void *hst_ptr, int64_t size, - __tgt_async_info *async_info_ptr) { - assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid"); - assert(async_info_ptr && "async_info_ptr is nullptr"); +int32_t __tgt_rtl_data_submit_async(int32_t DeviceId, void *TgtPtr, + void *HstPtr, int64_t Size, + __tgt_async_info *AsyncInfoPtr) { + assert(DeviceRTL.isValidDeviceId(DeviceId) && "device_id is invalid"); + assert(AsyncInfoPtr && "async_info_ptr is nullptr"); - if (DeviceRTL.setContext(device_id) != OFFLOAD_SUCCESS) + if (DeviceRTL.setContext(DeviceId) != OFFLOAD_SUCCESS) return OFFLOAD_FAIL; - return DeviceRTL.dataSubmit(device_id, tgt_ptr, hst_ptr, size, - async_info_ptr); + return DeviceRTL.dataSubmit(DeviceId, TgtPtr, HstPtr, Size, AsyncInfoPtr); } -int32_t __tgt_rtl_data_retrieve(int32_t device_id, void *hst_ptr, void *tgt_ptr, - int64_t size) { - assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid"); +int32_t __tgt_rtl_data_retrieve(int32_t DeviceId, void *HstPtr, void *TgtPtr, + int64_t Size) { + assert(DeviceRTL.isValidDeviceId(DeviceId) && "device_id is invalid"); // Context is set in __tgt_rtl_data_retrieve_async. __tgt_async_info AsyncInfo; - const int32_t rc = __tgt_rtl_data_retrieve_async(device_id, hst_ptr, tgt_ptr, - size, &AsyncInfo); - if (rc != OFFLOAD_SUCCESS) + const int32_t Rc = + __tgt_rtl_data_retrieve_async(DeviceId, HstPtr, TgtPtr, Size, &AsyncInfo); + if (Rc != OFFLOAD_SUCCESS) return OFFLOAD_FAIL; - return __tgt_rtl_synchronize(device_id, &AsyncInfo); + return __tgt_rtl_synchronize(DeviceId, &AsyncInfo); } -int32_t __tgt_rtl_data_retrieve_async(int32_t device_id, void *hst_ptr, - void *tgt_ptr, int64_t size, - __tgt_async_info *async_info_ptr) { - assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid"); - assert(async_info_ptr && "async_info_ptr is nullptr"); +int32_t __tgt_rtl_data_retrieve_async(int32_t DeviceId, void *HstPtr, + void *TgtPtr, int64_t Size, + __tgt_async_info *AsyncInfoPtr) { + assert(DeviceRTL.isValidDeviceId(DeviceId) && "device_id is invalid"); + assert(AsyncInfoPtr && "async_info_ptr is nullptr"); - if (DeviceRTL.setContext(device_id) != OFFLOAD_SUCCESS) + if (DeviceRTL.setContext(DeviceId) != OFFLOAD_SUCCESS) return OFFLOAD_FAIL; - return DeviceRTL.dataRetrieve(device_id, hst_ptr, tgt_ptr, size, - async_info_ptr); + return DeviceRTL.dataRetrieve(DeviceId, HstPtr, TgtPtr, Size, AsyncInfoPtr); } -int32_t __tgt_rtl_data_exchange_async(int32_t src_dev_id, void *src_ptr, - int dst_dev_id, void *dst_ptr, - int64_t size, +int32_t __tgt_rtl_data_exchange_async(int32_t SrcDevId, void *SrcPtr, + int DstDevId, void *DstPtr, int64_t Size, __tgt_async_info *AsyncInfo) { - assert(DeviceRTL.isValidDeviceId(src_dev_id) && "src_dev_id is invalid"); - assert(DeviceRTL.isValidDeviceId(dst_dev_id) && "dst_dev_id is invalid"); + assert(DeviceRTL.isValidDeviceId(SrcDevId) && "src_dev_id is invalid"); + assert(DeviceRTL.isValidDeviceId(DstDevId) && "dst_dev_id is invalid"); assert(AsyncInfo && "AsyncInfo is nullptr"); - if (DeviceRTL.setContext(src_dev_id) != OFFLOAD_SUCCESS) + if (DeviceRTL.setContext(SrcDevId) != OFFLOAD_SUCCESS) return OFFLOAD_FAIL; - return DeviceRTL.dataExchange(src_dev_id, src_ptr, dst_dev_id, dst_ptr, size, + return DeviceRTL.dataExchange(SrcDevId, SrcPtr, DstDevId, DstPtr, Size, AsyncInfo); } -int32_t __tgt_rtl_data_exchange(int32_t src_dev_id, void *src_ptr, - int32_t dst_dev_id, void *dst_ptr, - int64_t size) { - assert(DeviceRTL.isValidDeviceId(src_dev_id) && "src_dev_id is invalid"); - assert(DeviceRTL.isValidDeviceId(dst_dev_id) && "dst_dev_id is invalid"); +int32_t __tgt_rtl_data_exchange(int32_t SrcDevId, void *SrcPtr, + int32_t DstDevId, void *DstPtr, int64_t Size) { + assert(DeviceRTL.isValidDeviceId(SrcDevId) && "src_dev_id is invalid"); + assert(DeviceRTL.isValidDeviceId(DstDevId) && "dst_dev_id is invalid"); // Context is set in __tgt_rtl_data_exchange_async. __tgt_async_info AsyncInfo; - const int32_t rc = __tgt_rtl_data_exchange_async( - src_dev_id, src_ptr, dst_dev_id, dst_ptr, size, &AsyncInfo); - if (rc != OFFLOAD_SUCCESS) + const int32_t Rc = __tgt_rtl_data_exchange_async(SrcDevId, SrcPtr, DstDevId, + DstPtr, Size, &AsyncInfo); + if (Rc != OFFLOAD_SUCCESS) return OFFLOAD_FAIL; - return __tgt_rtl_synchronize(src_dev_id, &AsyncInfo); + return __tgt_rtl_synchronize(SrcDevId, &AsyncInfo); } -int32_t __tgt_rtl_data_delete(int32_t device_id, void *tgt_ptr) { - assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid"); +int32_t __tgt_rtl_data_delete(int32_t DeviceId, void *TgtPtr) { + assert(DeviceRTL.isValidDeviceId(DeviceId) && "device_id is invalid"); - if (DeviceRTL.setContext(device_id) != OFFLOAD_SUCCESS) + if (DeviceRTL.setContext(DeviceId) != OFFLOAD_SUCCESS) return OFFLOAD_FAIL; - return DeviceRTL.dataDelete(device_id, tgt_ptr); + return DeviceRTL.dataDelete(DeviceId, TgtPtr); } -int32_t __tgt_rtl_run_target_team_region(int32_t device_id, void *tgt_entry_ptr, - void **tgt_args, - ptrdiff_t *tgt_offsets, - int32_t arg_num, int32_t team_num, - int32_t thread_limit, - uint64_t loop_tripcount) { - assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid"); +int32_t __tgt_rtl_run_target_team_region(int32_t DeviceId, void *TgtEntryPtr, + void **TgtArgs, ptrdiff_t *TgtOffsets, + int32_t ArgNum, int32_t TeamNum, + int32_t ThreadLimit, + uint64_t LoopTripcount) { + assert(DeviceRTL.isValidDeviceId(DeviceId) && "device_id is invalid"); // Context is set in __tgt_rtl_run_target_team_region_async. __tgt_async_info AsyncInfo; - const int32_t rc = __tgt_rtl_run_target_team_region_async( - device_id, tgt_entry_ptr, tgt_args, tgt_offsets, arg_num, team_num, - thread_limit, loop_tripcount, &AsyncInfo); - if (rc != OFFLOAD_SUCCESS) + const int32_t Rc = __tgt_rtl_run_target_team_region_async( + DeviceId, TgtEntryPtr, TgtArgs, TgtOffsets, ArgNum, TeamNum, ThreadLimit, + LoopTripcount, &AsyncInfo); + if (Rc != OFFLOAD_SUCCESS) return OFFLOAD_FAIL; - return __tgt_rtl_synchronize(device_id, &AsyncInfo); + return __tgt_rtl_synchronize(DeviceId, &AsyncInfo); } int32_t __tgt_rtl_run_target_team_region_async( - int32_t device_id, void *tgt_entry_ptr, void **tgt_args, - ptrdiff_t *tgt_offsets, int32_t arg_num, int32_t team_num, - int32_t thread_limit, uint64_t loop_tripcount, - __tgt_async_info *async_info_ptr) { - assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid"); + int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs, ptrdiff_t *TgtOffsets, + int32_t ArgNum, int32_t TeamNum, int32_t ThreadLimit, + uint64_t LoopTripcount, __tgt_async_info *AsyncInfoPtr) { + assert(DeviceRTL.isValidDeviceId(DeviceId) && "device_id is invalid"); - if (DeviceRTL.setContext(device_id) != OFFLOAD_SUCCESS) + if (DeviceRTL.setContext(DeviceId) != OFFLOAD_SUCCESS) return OFFLOAD_FAIL; - return DeviceRTL.runTargetTeamRegion( - device_id, tgt_entry_ptr, tgt_args, tgt_offsets, arg_num, team_num, - thread_limit, loop_tripcount, async_info_ptr); + return DeviceRTL.runTargetTeamRegion(DeviceId, TgtEntryPtr, TgtArgs, + TgtOffsets, ArgNum, TeamNum, ThreadLimit, + LoopTripcount, AsyncInfoPtr); } -int32_t __tgt_rtl_run_target_region(int32_t device_id, void *tgt_entry_ptr, - void **tgt_args, ptrdiff_t *tgt_offsets, - int32_t arg_num) { - assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid"); +int32_t __tgt_rtl_run_target_region(int32_t DeviceId, void *TgtEntryPtr, + void **TgtArgs, ptrdiff_t *TgtOffsets, + int32_t ArgNum) { + assert(DeviceRTL.isValidDeviceId(DeviceId) && "device_id is invalid"); // Context is set in __tgt_rtl_run_target_region_async. __tgt_async_info AsyncInfo; - const int32_t rc = __tgt_rtl_run_target_region_async( - device_id, tgt_entry_ptr, tgt_args, tgt_offsets, arg_num, &AsyncInfo); - if (rc != OFFLOAD_SUCCESS) + const int32_t Rc = __tgt_rtl_run_target_region_async( + DeviceId, TgtEntryPtr, TgtArgs, TgtOffsets, ArgNum, &AsyncInfo); + if (Rc != OFFLOAD_SUCCESS) return OFFLOAD_FAIL; - return __tgt_rtl_synchronize(device_id, &AsyncInfo); + return __tgt_rtl_synchronize(DeviceId, &AsyncInfo); } -int32_t __tgt_rtl_run_target_region_async(int32_t device_id, - void *tgt_entry_ptr, void **tgt_args, - ptrdiff_t *tgt_offsets, - int32_t arg_num, - __tgt_async_info *async_info_ptr) { - assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid"); +int32_t __tgt_rtl_run_target_region_async(int32_t DeviceId, void *TgtEntryPtr, + void **TgtArgs, ptrdiff_t *TgtOffsets, + int32_t ArgNum, + __tgt_async_info *AsyncInfoPtr) { + assert(DeviceRTL.isValidDeviceId(DeviceId) && "device_id is invalid"); // Context is set in __tgt_rtl_run_target_team_region_async. return __tgt_rtl_run_target_team_region_async( - device_id, tgt_entry_ptr, tgt_args, tgt_offsets, arg_num, + DeviceId, TgtEntryPtr, TgtArgs, TgtOffsets, ArgNum, /* team num*/ 1, /* thread_limit */ 1, /* loop_tripcount */ 0, - async_info_ptr); + AsyncInfoPtr); } -int32_t __tgt_rtl_synchronize(int32_t device_id, - __tgt_async_info *async_info_ptr) { - assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid"); - assert(async_info_ptr && "async_info_ptr is nullptr"); - assert(async_info_ptr->Queue && "async_info_ptr->Queue is nullptr"); +int32_t __tgt_rtl_synchronize(int32_t DeviceId, + __tgt_async_info *AsyncInfoPtr) { + assert(DeviceRTL.isValidDeviceId(DeviceId) && "device_id is invalid"); + assert(AsyncInfoPtr && "async_info_ptr is nullptr"); + assert(AsyncInfoPtr->Queue && "async_info_ptr->Queue is nullptr"); // NOTE: We don't need to set context for stream sync. - return DeviceRTL.synchronize(device_id, async_info_ptr); + return DeviceRTL.synchronize(DeviceId, AsyncInfoPtr); } void __tgt_rtl_set_info_flag(uint32_t NewInfoLevel) { @@ -1739,89 +1732,88 @@ InfoLevel.store(NewInfoLevel); } -void __tgt_rtl_print_device_info(int32_t device_id) { - assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid"); +void __tgt_rtl_print_device_info(int32_t DeviceId) { + assert(DeviceRTL.isValidDeviceId(DeviceId) && "device_id is invalid"); // NOTE: We don't need to set context for print device info. - DeviceRTL.printDeviceInfo(device_id); + DeviceRTL.printDeviceInfo(DeviceId); } -int32_t __tgt_rtl_create_event(int32_t device_id, void **event) { - assert(event && "event is nullptr"); +int32_t __tgt_rtl_create_event(int32_t DeviceId, void **Event) { + assert(Event && "event is nullptr"); - if (DeviceRTL.setContext(device_id) != OFFLOAD_SUCCESS) + if (DeviceRTL.setContext(DeviceId) != OFFLOAD_SUCCESS) return OFFLOAD_FAIL; - return DeviceRTL.createEvent(device_id, event); + return DeviceRTL.createEvent(DeviceId, Event); } -int32_t __tgt_rtl_record_event(int32_t device_id, void *event_ptr, - __tgt_async_info *async_info_ptr) { - assert(async_info_ptr && "async_info_ptr is nullptr"); - assert(async_info_ptr->Queue && "async_info_ptr->Queue is nullptr"); - assert(event_ptr && "event_ptr is nullptr"); +int32_t __tgt_rtl_record_event(int32_t DeviceId, void *EventPtr, + __tgt_async_info *AsyncInfoPtr) { + assert(AsyncInfoPtr && "async_info_ptr is nullptr"); + assert(AsyncInfoPtr->Queue && "async_info_ptr->Queue is nullptr"); + assert(EventPtr && "event_ptr is nullptr"); // NOTE: We might not need to set context for event record. - return recordEvent(event_ptr, async_info_ptr); + return recordEvent(EventPtr, AsyncInfoPtr); } -int32_t __tgt_rtl_wait_event(int32_t device_id, void *event_ptr, - __tgt_async_info *async_info_ptr) { - assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid"); - assert(async_info_ptr && "async_info_ptr is nullptr"); - assert(event_ptr && "event is nullptr"); +int32_t __tgt_rtl_wait_event(int32_t DeviceId, void *EventPtr, + __tgt_async_info *AsyncInfoPtr) { + assert(DeviceRTL.isValidDeviceId(DeviceId) && "device_id is invalid"); + assert(AsyncInfoPtr && "async_info_ptr is nullptr"); + assert(EventPtr && "event is nullptr"); // If we don't have a queue we need to set the context. - if (!async_info_ptr->Queue && - DeviceRTL.setContext(device_id) != OFFLOAD_SUCCESS) + if (!AsyncInfoPtr->Queue && DeviceRTL.setContext(DeviceId) != OFFLOAD_SUCCESS) return OFFLOAD_FAIL; - return DeviceRTL.waitEvent(device_id, async_info_ptr, event_ptr); + return DeviceRTL.waitEvent(DeviceId, AsyncInfoPtr, EventPtr); } -int32_t __tgt_rtl_sync_event(int32_t device_id, void *event_ptr) { - assert(event_ptr && "event is nullptr"); +int32_t __tgt_rtl_sync_event(int32_t DeviceId, void *EventPtr) { + assert(EventPtr && "event is nullptr"); // NOTE: We might not need to set context for event sync. - return syncEvent(event_ptr); + return syncEvent(EventPtr); } -int32_t __tgt_rtl_destroy_event(int32_t device_id, void *event_ptr) { - assert(event_ptr && "event is nullptr"); +int32_t __tgt_rtl_destroy_event(int32_t DeviceId, void *EventPtr) { + assert(EventPtr && "event is nullptr"); - if (DeviceRTL.setContext(device_id) != OFFLOAD_SUCCESS) + if (DeviceRTL.setContext(DeviceId) != OFFLOAD_SUCCESS) return OFFLOAD_FAIL; - return DeviceRTL.destroyEvent(device_id, event_ptr); + return DeviceRTL.destroyEvent(DeviceId, EventPtr); } -int32_t __tgt_rtl_release_async_info(int32_t device_id, - __tgt_async_info *async_info) { - assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid"); - assert(async_info && "async_info is nullptr"); +int32_t __tgt_rtl_release_async_info(int32_t DeviceId, + __tgt_async_info *AsyncInfo) { + assert(DeviceRTL.isValidDeviceId(DeviceId) && "device_id is invalid"); + assert(AsyncInfo && "async_info is nullptr"); - if (DeviceRTL.setContext(device_id) != OFFLOAD_SUCCESS) + if (DeviceRTL.setContext(DeviceId) != OFFLOAD_SUCCESS) return OFFLOAD_FAIL; - return DeviceRTL.releaseAsyncInfo(device_id, async_info); + return DeviceRTL.releaseAsyncInfo(DeviceId, AsyncInfo); } -int32_t __tgt_rtl_init_async_info(int32_t device_id, - __tgt_async_info **async_info) { - assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid"); - assert(async_info && "async_info is nullptr"); +int32_t __tgt_rtl_init_async_info(int32_t DeviceId, + __tgt_async_info **AsyncInfo) { + assert(DeviceRTL.isValidDeviceId(DeviceId) && "device_id is invalid"); + assert(AsyncInfo && "async_info is nullptr"); - if (DeviceRTL.setContext(device_id) != OFFLOAD_SUCCESS) + if (DeviceRTL.setContext(DeviceId) != OFFLOAD_SUCCESS) return OFFLOAD_FAIL; - return DeviceRTL.initAsyncInfo(device_id, async_info); + return DeviceRTL.initAsyncInfo(DeviceId, AsyncInfo); } -int32_t __tgt_rtl_init_device_info(int32_t device_id, - __tgt_device_info *device_info_ptr, - const char **err_str) { - assert(DeviceRTL.isValidDeviceId(device_id) && "device_id is invalid"); - assert(device_info_ptr && "device_info_ptr is nullptr"); +int32_t __tgt_rtl_init_device_info(int32_t DeviceId, + __tgt_device_info *DeviceInfoPtr, + const char **ErrStr) { + assert(DeviceRTL.isValidDeviceId(DeviceId) && "device_id is invalid"); + assert(DeviceInfoPtr && "device_info_ptr is nullptr"); - if (DeviceRTL.setContext(device_id) != OFFLOAD_SUCCESS) + if (DeviceRTL.setContext(DeviceId) != OFFLOAD_SUCCESS) return OFFLOAD_FAIL; - return DeviceRTL.initDeviceInfo(device_id, device_info_ptr, err_str); + return DeviceRTL.initDeviceInfo(DeviceId, DeviceInfoPtr, ErrStr); } #ifdef __cplusplus diff --git a/openmp/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp b/openmp/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp --- a/openmp/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp +++ b/openmp/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp @@ -58,26 +58,27 @@ std::list DynLibs; // Record entry point associated with device. - void createOffloadTable(int32_t device_id, __tgt_offload_entry *begin, - __tgt_offload_entry *end) { - assert(device_id < (int32_t)FuncGblEntries.size() && + void createOffloadTable(int32_t DeviceId, __tgt_offload_entry *Begin, + __tgt_offload_entry *End) { + assert(DeviceId < (int32_t)FuncGblEntries.size() && "Unexpected device id!"); - FuncGblEntries[device_id].emplace_back(); - FuncOrGblEntryTy &E = FuncGblEntries[device_id].back(); + FuncGblEntries[DeviceId].emplace_back(); + FuncOrGblEntryTy &E = FuncGblEntries[DeviceId].back(); - E.Table.EntriesBegin = begin; - E.Table.EntriesEnd = end; + E.Table.EntriesBegin = Begin; + E.Table.EntriesEnd = End; } // Return true if the entry is associated with device. - bool findOffloadEntry(int32_t device_id, void *addr) { - assert(device_id < (int32_t)FuncGblEntries.size() && + bool findOffloadEntry(int32_t DeviceId, void *Addr) { + assert(DeviceId < (int32_t)FuncGblEntries.size() && "Unexpected device id!"); - FuncOrGblEntryTy &E = FuncGblEntries[device_id].back(); + FuncOrGblEntryTy &E = FuncGblEntries[DeviceId].back(); - for (__tgt_offload_entry *i = E.Table.EntriesBegin, *e = E.Table.EntriesEnd; - i < e; ++i) { - if (i->addr == addr) + for (__tgt_offload_entry *I = E.Table.EntriesBegin, + *End = E.Table.EntriesEnd; + I < End; ++I) { + if (I->addr == Addr) return true; } @@ -85,22 +86,22 @@ } // Return the pointer to the target entries table. - __tgt_target_table *getOffloadEntriesTable(int32_t device_id) { - assert(device_id < (int32_t)FuncGblEntries.size() && + __tgt_target_table *getOffloadEntriesTable(int32_t DeviceId) { + assert(DeviceId < (int32_t)FuncGblEntries.size() && "Unexpected device id!"); - FuncOrGblEntryTy &E = FuncGblEntries[device_id].back(); + FuncOrGblEntryTy &E = FuncGblEntries[DeviceId].back(); return &E.Table; } - RTLDeviceInfoTy(int32_t num_devices) { FuncGblEntries.resize(num_devices); } + RTLDeviceInfoTy(int32_t NumDevices) { FuncGblEntries.resize(NumDevices); } ~RTLDeviceInfoTy() { // Close dynamic libraries - for (auto &lib : DynLibs) { - if (lib.Handle) { - dlclose(lib.Handle); - remove(lib.FileName.c_str()); + for (auto &Lib : DynLibs) { + if (Lib.Handle) { + dlclose(Lib.Handle); + remove(Lib.FileName.c_str()); } } } @@ -112,29 +113,29 @@ extern "C" { #endif -int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *image) { +int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image) { // If we don't have a valid ELF ID we can just fail. #if TARGET_ELF_ID < 1 return 0; #else - return elf_check_machine(image, TARGET_ELF_ID); + return elf_check_machine(Image, TARGET_ELF_ID); #endif } int32_t __tgt_rtl_number_of_devices() { return NUMBER_OF_DEVICES; } -int32_t __tgt_rtl_init_device(int32_t device_id) { return OFFLOAD_SUCCESS; } +int32_t __tgt_rtl_init_device(int32_t DeviceId) { return OFFLOAD_SUCCESS; } -__tgt_target_table *__tgt_rtl_load_binary(int32_t device_id, - __tgt_device_image *image) { +__tgt_target_table *__tgt_rtl_load_binary(int32_t DeviceId, + __tgt_device_image *Image) { - DP("Dev %d: load binary from " DPxMOD " image\n", device_id, - DPxPTR(image->ImageStart)); + DP("Dev %d: load binary from " DPxMOD " image\n", DeviceId, + DPxPTR(Image->ImageStart)); - assert(device_id >= 0 && device_id < NUMBER_OF_DEVICES && "bad dev id"); + assert(DeviceId >= 0 && DeviceId < NUMBER_OF_DEVICES && "bad dev id"); - size_t ImageSize = (size_t)image->ImageEnd - (size_t)image->ImageStart; - size_t NumEntries = (size_t)(image->EntriesEnd - image->EntriesBegin); + size_t ImageSize = (size_t)Image->ImageEnd - (size_t)Image->ImageStart; + size_t NumEntries = (size_t)(Image->EntriesEnd - Image->EntriesBegin); DP("Expecting to have %zd entries defined.\n", NumEntries); // Is the library version incompatible with the header file? @@ -144,47 +145,47 @@ } // Obtain elf handler - Elf *e = elf_memory((char *)image->ImageStart, ImageSize); - if (!e) { + Elf *E = elf_memory((char *)Image->ImageStart, ImageSize); + if (!E) { DP("Unable to get ELF handle: %s!\n", elf_errmsg(-1)); return NULL; } - if (elf_kind(e) != ELF_K_ELF) { + if (elf_kind(E) != ELF_K_ELF) { DP("Invalid Elf kind!\n"); - elf_end(e); + elf_end(E); return NULL; } // Find the entries section offset - Elf_Scn *section = 0; - Elf64_Off entries_offset = 0; + Elf_Scn *Section = 0; + Elf64_Off EntriesOffset = 0; - size_t shstrndx; + size_t Shstrndx; - if (elf_getshdrstrndx(e, &shstrndx)) { + if (elf_getshdrstrndx(E, &Shstrndx)) { DP("Unable to get ELF strings index!\n"); - elf_end(e); + elf_end(E); return NULL; } - while ((section = elf_nextscn(e, section))) { - GElf_Shdr hdr; - gelf_getshdr(section, &hdr); + while ((Section = elf_nextscn(E, Section))) { + GElf_Shdr Hdr; + gelf_getshdr(Section, &Hdr); - if (!strcmp(elf_strptr(e, shstrndx, hdr.sh_name), OFFLOADSECTIONNAME)) { - entries_offset = hdr.sh_addr; + if (!strcmp(elf_strptr(E, Shstrndx, Hdr.sh_name), OFFLOADSECTIONNAME)) { + EntriesOffset = Hdr.sh_addr; break; } } - if (!entries_offset) { + if (!EntriesOffset) { DP("Entries Section Offset Not Found\n"); - elf_end(e); + elf_end(E); return NULL; } - DP("Offset of entries section is (" DPxMOD ").\n", DPxPTR(entries_offset)); + DP("Offset of entries section is (" DPxMOD ").\n", DPxPTR(EntriesOffset)); // load dynamic library and get the entry points. We use the dl library // to do the loading of the library, but we could do it directly to avoid the @@ -192,148 +193,147 @@ // // 1) Create tmp file with the library contents. // 2) Use dlopen to load the file and dlsym to retrieve the symbols. - char tmp_name[] = "/tmp/tmpfile_XXXXXX"; - int tmp_fd = mkstemp(tmp_name); + char TmpName[] = "/tmp/tmpfile_XXXXXX"; + int TmpFd = mkstemp(TmpName); - if (tmp_fd == -1) { - elf_end(e); + if (TmpFd == -1) { + elf_end(E); return NULL; } - FILE *ftmp = fdopen(tmp_fd, "wb"); + FILE *Ftmp = fdopen(TmpFd, "wb"); - if (!ftmp) { - elf_end(e); + if (!Ftmp) { + elf_end(E); return NULL; } - fwrite(image->ImageStart, ImageSize, 1, ftmp); - fclose(ftmp); + fwrite(Image->ImageStart, ImageSize, 1, Ftmp); + fclose(Ftmp); - DynLibTy Lib = {tmp_name, dlopen(tmp_name, RTLD_LAZY)}; + DynLibTy Lib = {TmpName, dlopen(TmpName, RTLD_LAZY)}; if (!Lib.Handle) { DP("Target library loading error: %s\n", dlerror()); - elf_end(e); + elf_end(E); return NULL; } DeviceInfo.DynLibs.push_back(Lib); - struct link_map *libInfo = (struct link_map *)Lib.Handle; + struct link_map *LibInfo = (struct link_map *)Lib.Handle; // The place where the entries info is loaded is the library base address // plus the offset determined from the ELF file. - Elf64_Addr entries_addr = libInfo->l_addr + entries_offset; + Elf64_Addr EntriesAddr = LibInfo->l_addr + EntriesOffset; DP("Pointer to first entry to be loaded is (" DPxMOD ").\n", - DPxPTR(entries_addr)); + DPxPTR(EntriesAddr)); // Table of pointers to all the entries in the target. - __tgt_offload_entry *entries_table = (__tgt_offload_entry *)entries_addr; + __tgt_offload_entry *EntriesTable = (__tgt_offload_entry *)EntriesAddr; - __tgt_offload_entry *entries_begin = &entries_table[0]; - __tgt_offload_entry *entries_end = entries_begin + NumEntries; + __tgt_offload_entry *EntriesBegin = &EntriesTable[0]; + __tgt_offload_entry *EntriesEnd = EntriesBegin + NumEntries; - if (!entries_begin) { + if (!EntriesBegin) { DP("Can't obtain entries begin\n"); - elf_end(e); + elf_end(E); return NULL; } DP("Entries table range is (" DPxMOD ")->(" DPxMOD ")\n", - DPxPTR(entries_begin), DPxPTR(entries_end)); - DeviceInfo.createOffloadTable(device_id, entries_begin, entries_end); + DPxPTR(EntriesBegin), DPxPTR(EntriesEnd)); + DeviceInfo.createOffloadTable(DeviceId, EntriesBegin, EntriesEnd); - elf_end(e); + elf_end(E); - return DeviceInfo.getOffloadEntriesTable(device_id); + return DeviceInfo.getOffloadEntriesTable(DeviceId); } -void __tgt_rtl_print_device_info(int32_t device_id) { +void __tgt_rtl_print_device_info(int32_t DeviceId) { printf(" This is a generic-elf-64bit device\n"); } // Sample implementation of explicit memory allocator. For this plugin all kinds // are equivalent to each other. -void *__tgt_rtl_data_alloc(int32_t device_id, int64_t size, void *hst_ptr, - int32_t kind) { - void *ptr = NULL; +void *__tgt_rtl_data_alloc(int32_t DeviceId, int64_t Size, void *HstPtr, + int32_t Kind) { + void *Ptr = NULL; - switch (kind) { + switch (Kind) { case TARGET_ALLOC_DEVICE: case TARGET_ALLOC_HOST: case TARGET_ALLOC_SHARED: case TARGET_ALLOC_DEFAULT: - ptr = malloc(size); + Ptr = malloc(Size); break; default: REPORT("Invalid target data allocation kind"); } - return ptr; + return Ptr; } -int32_t __tgt_rtl_data_submit(int32_t device_id, void *tgt_ptr, void *hst_ptr, - int64_t size) { - memcpy(tgt_ptr, hst_ptr, size); +int32_t __tgt_rtl_data_submit(int32_t DeviceId, void *TgtPtr, void *HstPtr, + int64_t Size) { + memcpy(TgtPtr, HstPtr, Size); return OFFLOAD_SUCCESS; } -int32_t __tgt_rtl_data_retrieve(int32_t device_id, void *hst_ptr, void *tgt_ptr, - int64_t size) { - memcpy(hst_ptr, tgt_ptr, size); +int32_t __tgt_rtl_data_retrieve(int32_t DeviceId, void *HstPtr, void *TgtPtr, + int64_t Size) { + memcpy(HstPtr, TgtPtr, Size); return OFFLOAD_SUCCESS; } -int32_t __tgt_rtl_data_delete(int32_t device_id, void *tgt_ptr) { - free(tgt_ptr); +int32_t __tgt_rtl_data_delete(int32_t DeviceId, void *TgtPtr) { + free(TgtPtr); return OFFLOAD_SUCCESS; } -int32_t __tgt_rtl_run_target_team_region(int32_t device_id, void *tgt_entry_ptr, - void **tgt_args, - ptrdiff_t *tgt_offsets, - int32_t arg_num, int32_t team_num, - int32_t thread_limit, - uint64_t loop_tripcount /*not used*/) { +int32_t __tgt_rtl_run_target_team_region(int32_t DeviceId, void *TgtEntryPtr, + void **TgtArgs, ptrdiff_t *TgtOffsets, + int32_t ArgNum, int32_t TeamNum, + int32_t ThreadLimit, + uint64_t LoopTripcount /*not used*/) { // ignore team num and thread limit. // Use libffi to launch execution. - ffi_cif cif; + ffi_cif Cif; // All args are references. - std::vector args_types(arg_num, &ffi_type_pointer); - std::vector args(arg_num); - std::vector ptrs(arg_num); + std::vector ArgsTypes(ArgNum, &ffi_type_pointer); + std::vector Args(ArgNum); + std::vector Ptrs(ArgNum); - for (int32_t i = 0; i < arg_num; ++i) { - ptrs[i] = (void *)((intptr_t)tgt_args[i] + tgt_offsets[i]); - args[i] = &ptrs[i]; + for (int32_t I = 0; I < ArgNum; ++I) { + Ptrs[I] = (void *)((intptr_t)TgtArgs[I] + TgtOffsets[I]); + Args[I] = &Ptrs[I]; } - ffi_status status = ffi_prep_cif(&cif, FFI_DEFAULT_ABI, arg_num, - &ffi_type_void, &args_types[0]); + ffi_status Status = ffi_prep_cif(&Cif, FFI_DEFAULT_ABI, ArgNum, + &ffi_type_void, &ArgsTypes[0]); - assert(status == FFI_OK && "Unable to prepare target launch!"); + assert(Status == FFI_OK && "Unable to prepare target launch!"); - if (status != FFI_OK) + if (Status != FFI_OK) return OFFLOAD_FAIL; - DP("Running entry point at " DPxMOD "...\n", DPxPTR(tgt_entry_ptr)); + DP("Running entry point at " DPxMOD "...\n", DPxPTR(TgtEntryPtr)); - void (*entry)(void); - *((void **)&entry) = tgt_entry_ptr; - ffi_call(&cif, entry, NULL, &args[0]); + void (*Entry)(void); + *((void **)&Entry) = TgtEntryPtr; + ffi_call(&Cif, Entry, NULL, &Args[0]); return OFFLOAD_SUCCESS; } -int32_t __tgt_rtl_run_target_region(int32_t device_id, void *tgt_entry_ptr, - void **tgt_args, ptrdiff_t *tgt_offsets, - int32_t arg_num) { +int32_t __tgt_rtl_run_target_region(int32_t DeviceId, void *TgtEntryPtr, + void **TgtArgs, ptrdiff_t *TgtOffsets, + int32_t ArgNum) { // use one team and one thread. - return __tgt_rtl_run_target_team_region(device_id, tgt_entry_ptr, tgt_args, - tgt_offsets, arg_num, 1, 1, 0); + return __tgt_rtl_run_target_team_region(DeviceId, TgtEntryPtr, TgtArgs, + TgtOffsets, ArgNum, 1, 1, 0); } #ifdef __cplusplus diff --git a/openmp/libomptarget/src/api.cpp b/openmp/libomptarget/src/api.cpp --- a/openmp/libomptarget/src/api.cpp +++ b/openmp/libomptarget/src/api.cpp @@ -41,66 +41,66 @@ EXTERN int omp_get_initial_device(void) { TIMESCOPE(); - int hostDevice = omp_get_num_devices(); - DP("Call to omp_get_initial_device returning %d\n", hostDevice); - return hostDevice; + int HostDevice = omp_get_num_devices(); + DP("Call to omp_get_initial_device returning %d\n", HostDevice); + return HostDevice; } -EXTERN void *omp_target_alloc(size_t size, int device_num) { - return targetAllocExplicit(size, device_num, TARGET_ALLOC_DEFAULT, __func__); +EXTERN void *omp_target_alloc(size_t Size, int DeviceNum) { + return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_DEFAULT, __func__); } -EXTERN void *llvm_omp_target_alloc_device(size_t size, int device_num) { - return targetAllocExplicit(size, device_num, TARGET_ALLOC_DEVICE, __func__); +EXTERN void *llvm_omp_target_alloc_device(size_t Size, int DeviceNum) { + return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_DEVICE, __func__); } -EXTERN void *llvm_omp_target_alloc_host(size_t size, int device_num) { - return targetAllocExplicit(size, device_num, TARGET_ALLOC_HOST, __func__); +EXTERN void *llvm_omp_target_alloc_host(size_t Size, int DeviceNum) { + return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_HOST, __func__); } -EXTERN void *llvm_omp_target_alloc_shared(size_t size, int device_num) { - return targetAllocExplicit(size, device_num, TARGET_ALLOC_SHARED, __func__); +EXTERN void *llvm_omp_target_alloc_shared(size_t Size, int DeviceNum) { + return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_SHARED, __func__); } EXTERN void *llvm_omp_target_dynamic_shared_alloc() { return nullptr; } EXTERN void *llvm_omp_get_dynamic_shared() { return nullptr; } -EXTERN void omp_target_free(void *device_ptr, int device_num) { +EXTERN void omp_target_free(void *DevicePtr, int DeviceNum) { TIMESCOPE(); DP("Call to omp_target_free for device %d and address " DPxMOD "\n", - device_num, DPxPTR(device_ptr)); + DeviceNum, DPxPTR(DevicePtr)); - if (!device_ptr) { + if (!DevicePtr) { DP("Call to omp_target_free with NULL ptr\n"); return; } - if (device_num == omp_get_initial_device()) { - free(device_ptr); + if (DeviceNum == omp_get_initial_device()) { + free(DevicePtr); DP("omp_target_free deallocated host ptr\n"); return; } - if (!device_is_ready(device_num)) { + if (!deviceIsReady(DeviceNum)) { DP("omp_target_free returns, nothing to do\n"); return; } - PM->Devices[device_num]->deleteData(device_ptr); + PM->Devices[DeviceNum]->deleteData(DevicePtr); DP("omp_target_free deallocated device ptr\n"); } -EXTERN int omp_target_is_present(const void *ptr, int device_num) { +EXTERN int omp_target_is_present(const void *Ptr, int DeviceNum) { TIMESCOPE(); DP("Call to omp_target_is_present for device %d and address " DPxMOD "\n", - device_num, DPxPTR(ptr)); + DeviceNum, DPxPTR(Ptr)); - if (!ptr) { + if (!Ptr) { DP("Call to omp_target_is_present with NULL ptr, returning false\n"); return false; } - if (device_num == omp_get_initial_device()) { + if (DeviceNum == omp_get_initial_device()) { DP("Call to omp_target_is_present on host, returning true\n"); return true; } @@ -108,13 +108,13 @@ PM->RTLsMtx.lock(); size_t DevicesSize = PM->Devices.size(); PM->RTLsMtx.unlock(); - if (DevicesSize <= (size_t)device_num) { + if (DevicesSize <= (size_t)DeviceNum) { DP("Call to omp_target_is_present with invalid device ID, returning " "false\n"); return false; } - DeviceTy &Device = *PM->Devices[device_num]; + DeviceTy &Device = *PM->Devices[DeviceNum]; bool IsLast; // not used bool IsHostPtr; // omp_target_is_present tests whether a host pointer refers to storage that @@ -122,32 +122,32 @@ // only check 1 byte. Cannot set size 0 which checks whether the pointer (zero // lengh array) is mapped instead of the referred storage. TargetPointerResultTy TPR = - Device.getTgtPtrBegin(const_cast(ptr), 1, IsLast, + Device.getTgtPtrBegin(const_cast(Ptr), 1, IsLast, /*UpdateRefCount=*/false, /*UseHoldRefCount=*/false, IsHostPtr); - int rc = (TPR.TargetPointer != NULL); + int Rc = (TPR.TargetPointer != NULL); // Under unified memory the host pointer can be returned by the // getTgtPtrBegin() function which means that there is no device // corresponding point for ptr. This function should return false // in that situation. if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) - rc = !IsHostPtr; - DP("Call to omp_target_is_present returns %d\n", rc); - return rc; + Rc = !IsHostPtr; + DP("Call to omp_target_is_present returns %d\n", Rc); + return Rc; } -EXTERN int omp_target_memcpy(void *dst, const void *src, size_t length, - size_t dst_offset, size_t src_offset, - int dst_device, int src_device) { +EXTERN int omp_target_memcpy(void *Dst, const void *Src, size_t Length, + size_t DstOffset, size_t SrcOffset, int DstDevice, + int SrcDevice) { TIMESCOPE(); DP("Call to omp_target_memcpy, dst device %d, src device %d, " "dst addr " DPxMOD ", src addr " DPxMOD ", dst offset %zu, " "src offset %zu, length %zu\n", - dst_device, src_device, DPxPTR(dst), DPxPTR(src), dst_offset, src_offset, - length); + DstDevice, SrcDevice, DPxPTR(Dst), DPxPTR(Src), DstOffset, SrcOffset, + Length); - if (!dst || !src || length <= 0) { - if (length == 0) { + if (!Dst || !Src || Length <= 0) { + if (Length == 0) { DP("Call to omp_target_memcpy with zero length, nothing to do\n"); return OFFLOAD_SUCCESS; } @@ -156,180 +156,180 @@ return OFFLOAD_FAIL; } - if (src_device != omp_get_initial_device() && !device_is_ready(src_device)) { + if (SrcDevice != omp_get_initial_device() && !deviceIsReady(SrcDevice)) { REPORT("omp_target_memcpy returns OFFLOAD_FAIL\n"); return OFFLOAD_FAIL; } - if (dst_device != omp_get_initial_device() && !device_is_ready(dst_device)) { + if (DstDevice != omp_get_initial_device() && !deviceIsReady(DstDevice)) { REPORT("omp_target_memcpy returns OFFLOAD_FAIL\n"); return OFFLOAD_FAIL; } - int rc = OFFLOAD_SUCCESS; - void *srcAddr = (char *)const_cast(src) + src_offset; - void *dstAddr = (char *)dst + dst_offset; + int Rc = OFFLOAD_SUCCESS; + void *SrcAddr = (char *)const_cast(Src) + SrcOffset; + void *DstAddr = (char *)Dst + DstOffset; - if (src_device == omp_get_initial_device() && - dst_device == omp_get_initial_device()) { + if (SrcDevice == omp_get_initial_device() && + DstDevice == omp_get_initial_device()) { DP("copy from host to host\n"); - const void *p = memcpy(dstAddr, srcAddr, length); - if (p == NULL) - rc = OFFLOAD_FAIL; - } else if (src_device == omp_get_initial_device()) { + const void *P = memcpy(DstAddr, SrcAddr, Length); + if (P == NULL) + Rc = OFFLOAD_FAIL; + } else if (SrcDevice == omp_get_initial_device()) { DP("copy from host to device\n"); - DeviceTy &DstDev = *PM->Devices[dst_device]; + DeviceTy &DstDev = *PM->Devices[DstDevice]; AsyncInfoTy AsyncInfo(DstDev); - rc = DstDev.submitData(dstAddr, srcAddr, length, AsyncInfo); - } else if (dst_device == omp_get_initial_device()) { + Rc = DstDev.submitData(DstAddr, SrcAddr, Length, AsyncInfo); + } else if (DstDevice == omp_get_initial_device()) { DP("copy from device to host\n"); - DeviceTy &SrcDev = *PM->Devices[src_device]; + DeviceTy &SrcDev = *PM->Devices[SrcDevice]; AsyncInfoTy AsyncInfo(SrcDev); - rc = SrcDev.retrieveData(dstAddr, srcAddr, length, AsyncInfo); + Rc = SrcDev.retrieveData(DstAddr, SrcAddr, Length, AsyncInfo); } else { DP("copy from device to device\n"); - DeviceTy &SrcDev = *PM->Devices[src_device]; - DeviceTy &DstDev = *PM->Devices[dst_device]; + DeviceTy &SrcDev = *PM->Devices[SrcDevice]; + DeviceTy &DstDev = *PM->Devices[DstDevice]; // First try to use D2D memcpy which is more efficient. If fails, fall back // to unefficient way. if (SrcDev.isDataExchangable(DstDev)) { AsyncInfoTy AsyncInfo(SrcDev); - rc = SrcDev.dataExchange(srcAddr, DstDev, dstAddr, length, AsyncInfo); - if (rc == OFFLOAD_SUCCESS) + Rc = SrcDev.dataExchange(SrcAddr, DstDev, DstAddr, Length, AsyncInfo); + if (Rc == OFFLOAD_SUCCESS) return OFFLOAD_SUCCESS; } - void *buffer = malloc(length); + void *Buffer = malloc(Length); { AsyncInfoTy AsyncInfo(SrcDev); - rc = SrcDev.retrieveData(buffer, srcAddr, length, AsyncInfo); + Rc = SrcDev.retrieveData(Buffer, SrcAddr, Length, AsyncInfo); } - if (rc == OFFLOAD_SUCCESS) { + if (Rc == OFFLOAD_SUCCESS) { AsyncInfoTy AsyncInfo(SrcDev); - rc = DstDev.submitData(dstAddr, buffer, length, AsyncInfo); + Rc = DstDev.submitData(DstAddr, Buffer, Length, AsyncInfo); } - free(buffer); + free(Buffer); } - DP("omp_target_memcpy returns %d\n", rc); - return rc; + DP("omp_target_memcpy returns %d\n", Rc); + return Rc; } -EXTERN int omp_target_memcpy_rect( - void *dst, const void *src, size_t element_size, int num_dims, - const size_t *volume, const size_t *dst_offsets, const size_t *src_offsets, - const size_t *dst_dimensions, const size_t *src_dimensions, int dst_device, - int src_device) { +EXTERN int +omp_target_memcpy_rect(void *Dst, const void *Src, size_t ElementSize, + int NumDims, const size_t *Volume, + const size_t *DstOffsets, const size_t *SrcOffsets, + const size_t *DstDimensions, const size_t *SrcDimensions, + int DstDevice, int SrcDevice) { TIMESCOPE(); DP("Call to omp_target_memcpy_rect, dst device %d, src device %d, " "dst addr " DPxMOD ", src addr " DPxMOD ", dst offsets " DPxMOD ", " "src offsets " DPxMOD ", dst dims " DPxMOD ", src dims " DPxMOD ", " "volume " DPxMOD ", element size %zu, num_dims %d\n", - dst_device, src_device, DPxPTR(dst), DPxPTR(src), DPxPTR(dst_offsets), - DPxPTR(src_offsets), DPxPTR(dst_dimensions), DPxPTR(src_dimensions), - DPxPTR(volume), element_size, num_dims); + DstDevice, SrcDevice, DPxPTR(Dst), DPxPTR(Src), DPxPTR(DstOffsets), + DPxPTR(SrcOffsets), DPxPTR(DstDimensions), DPxPTR(SrcDimensions), + DPxPTR(Volume), ElementSize, NumDims); - if (!(dst || src)) { + if (!(Dst || Src)) { DP("Call to omp_target_memcpy_rect returns max supported dimensions %d\n", INT_MAX); return INT_MAX; } - if (!dst || !src || element_size < 1 || num_dims < 1 || !volume || - !dst_offsets || !src_offsets || !dst_dimensions || !src_dimensions) { + if (!Dst || !Src || ElementSize < 1 || NumDims < 1 || !Volume || + !DstOffsets || !SrcOffsets || !DstDimensions || !SrcDimensions) { REPORT("Call to omp_target_memcpy_rect with invalid arguments\n"); return OFFLOAD_FAIL; } - int rc; - if (num_dims == 1) { - rc = omp_target_memcpy( - dst, src, element_size * volume[0], element_size * dst_offsets[0], - element_size * src_offsets[0], dst_device, src_device); + int Rc; + if (NumDims == 1) { + Rc = omp_target_memcpy(Dst, Src, ElementSize * Volume[0], + ElementSize * DstOffsets[0], + ElementSize * SrcOffsets[0], DstDevice, SrcDevice); } else { - size_t dst_slice_size = element_size; - size_t src_slice_size = element_size; - for (int i = 1; i < num_dims; ++i) { - dst_slice_size *= dst_dimensions[i]; - src_slice_size *= src_dimensions[i]; + size_t DstSliceSize = ElementSize; + size_t SrcSliceSize = ElementSize; + for (int I = 1; I < NumDims; ++I) { + DstSliceSize *= DstDimensions[I]; + SrcSliceSize *= SrcDimensions[I]; } - size_t dst_off = dst_offsets[0] * dst_slice_size; - size_t src_off = src_offsets[0] * src_slice_size; - for (size_t i = 0; i < volume[0]; ++i) { - rc = omp_target_memcpy_rect( - (char *)dst + dst_off + dst_slice_size * i, - (char *)const_cast(src) + src_off + src_slice_size * i, - element_size, num_dims - 1, volume + 1, dst_offsets + 1, - src_offsets + 1, dst_dimensions + 1, src_dimensions + 1, dst_device, - src_device); - - if (rc) { + size_t DstOff = DstOffsets[0] * DstSliceSize; + size_t SrcOff = SrcOffsets[0] * SrcSliceSize; + for (size_t I = 0; I < Volume[0]; ++I) { + Rc = omp_target_memcpy_rect( + (char *)Dst + DstOff + DstSliceSize * I, + (char *)const_cast(Src) + SrcOff + SrcSliceSize * I, + ElementSize, NumDims - 1, Volume + 1, DstOffsets + 1, SrcOffsets + 1, + DstDimensions + 1, SrcDimensions + 1, DstDevice, SrcDevice); + + if (Rc) { DP("Recursive call to omp_target_memcpy_rect returns unsuccessfully\n"); - return rc; + return Rc; } } } - DP("omp_target_memcpy_rect returns %d\n", rc); - return rc; + DP("omp_target_memcpy_rect returns %d\n", Rc); + return Rc; } -EXTERN int omp_target_associate_ptr(const void *host_ptr, - const void *device_ptr, size_t size, - size_t device_offset, int device_num) { +EXTERN int omp_target_associate_ptr(const void *HostPtr, const void *DevicePtr, + size_t Size, size_t DeviceOffset, + int DeviceNum) { TIMESCOPE(); DP("Call to omp_target_associate_ptr with host_ptr " DPxMOD ", " "device_ptr " DPxMOD ", size %zu, device_offset %zu, device_num %d\n", - DPxPTR(host_ptr), DPxPTR(device_ptr), size, device_offset, device_num); + DPxPTR(HostPtr), DPxPTR(DevicePtr), Size, DeviceOffset, DeviceNum); - if (!host_ptr || !device_ptr || size <= 0) { + if (!HostPtr || !DevicePtr || Size <= 0) { REPORT("Call to omp_target_associate_ptr with invalid arguments\n"); return OFFLOAD_FAIL; } - if (device_num == omp_get_initial_device()) { + if (DeviceNum == omp_get_initial_device()) { REPORT("omp_target_associate_ptr: no association possible on the host\n"); return OFFLOAD_FAIL; } - if (!device_is_ready(device_num)) { + if (!deviceIsReady(DeviceNum)) { REPORT("omp_target_associate_ptr returns OFFLOAD_FAIL\n"); return OFFLOAD_FAIL; } - DeviceTy &Device = *PM->Devices[device_num]; - void *device_addr = (void *)((uint64_t)device_ptr + (uint64_t)device_offset); - int rc = Device.associatePtr(const_cast(host_ptr), - const_cast(device_addr), size); - DP("omp_target_associate_ptr returns %d\n", rc); - return rc; + DeviceTy &Device = *PM->Devices[DeviceNum]; + void *DeviceAddr = (void *)((uint64_t)DevicePtr + (uint64_t)DeviceOffset); + int Rc = Device.associatePtr(const_cast(HostPtr), + const_cast(DeviceAddr), Size); + DP("omp_target_associate_ptr returns %d\n", Rc); + return Rc; } -EXTERN int omp_target_disassociate_ptr(const void *host_ptr, int device_num) { +EXTERN int omp_target_disassociate_ptr(const void *HostPtr, int DeviceNum) { TIMESCOPE(); DP("Call to omp_target_disassociate_ptr with host_ptr " DPxMOD ", " "device_num %d\n", - DPxPTR(host_ptr), device_num); + DPxPTR(HostPtr), DeviceNum); - if (!host_ptr) { + if (!HostPtr) { REPORT("Call to omp_target_associate_ptr with invalid host_ptr\n"); return OFFLOAD_FAIL; } - if (device_num == omp_get_initial_device()) { + if (DeviceNum == omp_get_initial_device()) { REPORT( "omp_target_disassociate_ptr: no association possible on the host\n"); return OFFLOAD_FAIL; } - if (!device_is_ready(device_num)) { + if (!deviceIsReady(DeviceNum)) { REPORT("omp_target_disassociate_ptr returns OFFLOAD_FAIL\n"); return OFFLOAD_FAIL; } - DeviceTy &Device = *PM->Devices[device_num]; - int rc = Device.disassociatePtr(const_cast(host_ptr)); - DP("omp_target_disassociate_ptr returns %d\n", rc); - return rc; + DeviceTy &Device = *PM->Devices[DeviceNum]; + int Rc = Device.disassociatePtr(const_cast(HostPtr)); + DP("omp_target_disassociate_ptr returns %d\n", Rc); + return Rc; } diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -58,8 +58,8 @@ if (DeviceID == -1 || !(getInfoLevel() & OMP_INFOTYPE_DUMP_TABLE)) return; - ident_t loc = {0, 0, 0, 0, ";libomptarget;libomptarget;0;0;;"}; - dumpTargetPointerMappings(&loc, *this); + ident_t Loc = {0, 0, 0, 0, ";libomptarget;libomptarget;0;0;;"}; + dumpTargetPointerMappings(&Loc, *this); } int DeviceTy::associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size) { @@ -70,21 +70,20 @@ if (It != HDTTMap->end()) { HostDataToTargetTy &HDTT = *It->HDTT; // Mapping already exists - bool isValid = HDTT.HstPtrEnd == (uintptr_t)HstPtrBegin + Size && + bool IsValid = HDTT.HstPtrEnd == (uintptr_t)HstPtrBegin + Size && HDTT.TgtPtrBegin == (uintptr_t)TgtPtrBegin; - if (isValid) { + if (IsValid) { DP("Attempt to re-associate the same device ptr+offset with the same " "host ptr, nothing to do\n"); return OFFLOAD_SUCCESS; - } else { - REPORT("Not allowed to re-associate a different device ptr+offset with " - "the same host ptr\n"); - return OFFLOAD_FAIL; } + REPORT("Not allowed to re-associate a different device ptr+offset with " + "the same host ptr\n"); + return OFFLOAD_FAIL; } // Mapping does not exist, allocate it with refCount=INF - const HostDataToTargetTy &newEntry = + const HostDataToTargetTy &NewEntry = *HDTTMap ->emplace(new HostDataToTargetTy( /*HstPtrBase=*/(uintptr_t)HstPtrBegin, @@ -97,10 +96,10 @@ DP("Creating new map entry: HstBase=" DPxMOD ", HstBegin=" DPxMOD ", HstEnd=" DPxMOD ", TgtBegin=" DPxMOD ", DynRefCount=%s, " "HoldRefCount=%s\n", - DPxPTR(newEntry.HstPtrBase), DPxPTR(newEntry.HstPtrBegin), - DPxPTR(newEntry.HstPtrEnd), DPxPTR(newEntry.TgtPtrBegin), - newEntry.dynRefCountToStr().c_str(), newEntry.holdRefCountToStr().c_str()); - (void)newEntry; + DPxPTR(NewEntry.HstPtrBase), DPxPTR(NewEntry.HstPtrBegin), + DPxPTR(NewEntry.HstPtrEnd), DPxPTR(NewEntry.TgtPtrBegin), + NewEntry.dynRefCountToStr().c_str(), NewEntry.holdRefCountToStr().c_str()); + (void)NewEntry; return OFFLOAD_SUCCESS; } @@ -141,71 +140,71 @@ LookupResult DeviceTy::lookupMapping(HDTTMapAccessorTy &HDTTMap, void *HstPtrBegin, int64_t Size) { - uintptr_t hp = (uintptr_t)HstPtrBegin; - LookupResult lr; + uintptr_t HP = (uintptr_t)HstPtrBegin; + LookupResult LR; DP("Looking up mapping(HstPtrBegin=" DPxMOD ", Size=%" PRId64 ")...\n", - DPxPTR(hp), Size); + DPxPTR(HP), Size); if (HDTTMap->empty()) - return lr; + return LR; - auto upper = HDTTMap->upper_bound(hp); + auto Upper = HDTTMap->upper_bound(HP); if (Size == 0) { // specification v5.1 Pointer Initialization for Device Data Environments // upper_bound satisfies // std::prev(upper)->HDTT.HstPtrBegin <= hp < upper->HDTT.HstPtrBegin - if (upper != HDTTMap->begin()) { - lr.Entry = std::prev(upper)->HDTT; - auto &HT = *lr.Entry; + if (Upper != HDTTMap->begin()) { + LR.Entry = std::prev(Upper)->HDTT; + auto &HT = *LR.Entry; // the left side of extended address range is satisified. // hp >= HT.HstPtrBegin || hp >= HT.HstPtrBase - lr.Flags.IsContained = hp < HT.HstPtrEnd || hp < HT.HstPtrBase; + LR.Flags.IsContained = HP < HT.HstPtrEnd || HP < HT.HstPtrBase; } - if (!lr.Flags.IsContained && upper != HDTTMap->end()) { - lr.Entry = upper->HDTT; - auto &HT = *lr.Entry; + if (!LR.Flags.IsContained && Upper != HDTTMap->end()) { + LR.Entry = Upper->HDTT; + auto &HT = *LR.Entry; // the right side of extended address range is satisified. // hp < HT.HstPtrEnd || hp < HT.HstPtrBase - lr.Flags.IsContained = hp >= HT.HstPtrBase; + LR.Flags.IsContained = HP >= HT.HstPtrBase; } } else { // check the left bin - if (upper != HDTTMap->begin()) { - lr.Entry = std::prev(upper)->HDTT; - auto &HT = *lr.Entry; + if (Upper != HDTTMap->begin()) { + LR.Entry = std::prev(Upper)->HDTT; + auto &HT = *LR.Entry; // Is it contained? - lr.Flags.IsContained = hp >= HT.HstPtrBegin && hp < HT.HstPtrEnd && - (hp + Size) <= HT.HstPtrEnd; + LR.Flags.IsContained = HP >= HT.HstPtrBegin && HP < HT.HstPtrEnd && + (HP + Size) <= HT.HstPtrEnd; // Does it extend beyond the mapped region? - lr.Flags.ExtendsAfter = hp < HT.HstPtrEnd && (hp + Size) > HT.HstPtrEnd; + LR.Flags.ExtendsAfter = HP < HT.HstPtrEnd && (HP + Size) > HT.HstPtrEnd; } // check the right bin - if (!(lr.Flags.IsContained || lr.Flags.ExtendsAfter) && - upper != HDTTMap->end()) { - lr.Entry = upper->HDTT; - auto &HT = *lr.Entry; + if (!(LR.Flags.IsContained || LR.Flags.ExtendsAfter) && + Upper != HDTTMap->end()) { + LR.Entry = Upper->HDTT; + auto &HT = *LR.Entry; // Does it extend into an already mapped region? - lr.Flags.ExtendsBefore = - hp < HT.HstPtrBegin && (hp + Size) > HT.HstPtrBegin; + LR.Flags.ExtendsBefore = + HP < HT.HstPtrBegin && (HP + Size) > HT.HstPtrBegin; // Does it extend beyond the mapped region? - lr.Flags.ExtendsAfter = hp < HT.HstPtrEnd && (hp + Size) > HT.HstPtrEnd; + LR.Flags.ExtendsAfter = HP < HT.HstPtrEnd && (HP + Size) > HT.HstPtrEnd; } - if (lr.Flags.ExtendsBefore) { + if (LR.Flags.ExtendsBefore) { DP("WARNING: Pointer is not mapped but section extends into already " "mapped data\n"); } - if (lr.Flags.ExtendsAfter) { + if (LR.Flags.ExtendsAfter) { DP("WARNING: Pointer is already mapped but section extends beyond mapped " "region\n"); } } - return lr; + return LR; } TargetPointerResultTy DeviceTy::getTargetPointer( @@ -368,11 +367,11 @@ bool IsNew = false; IsHostPtr = false; IsLast = false; - LookupResult lr = lookupMapping(HDTTMap, HstPtrBegin, Size); + LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size); - if (lr.Flags.IsContained || - (!MustContain && (lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter))) { - auto &HT = *lr.Entry; + if (LR.Flags.IsContained || + (!MustContain && (LR.Flags.ExtendsBefore || LR.Flags.ExtendsAfter))) { + auto &HT = *LR.Entry; IsLast = HT.decShouldRemove(UseHoldRefCount, ForceDelete); if (ForceDelete) { @@ -403,13 +402,13 @@ } const char *DynRefCountAction = UseHoldRefCount ? "" : RefCountAction; const char *HoldRefCountAction = UseHoldRefCount ? RefCountAction : ""; - uintptr_t tp = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin); + uintptr_t TP = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin); INFO(OMP_INFOTYPE_MAPPING_EXISTS, DeviceID, "Mapping exists with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", " "Size=%" PRId64 ", DynRefCount=%s%s, HoldRefCount=%s%s\n", - DPxPTR(HstPtrBegin), DPxPTR(tp), Size, HT.dynRefCountToStr().c_str(), + DPxPTR(HstPtrBegin), DPxPTR(TP), Size, HT.dynRefCountToStr().c_str(), DynRefCountAction, HT.holdRefCountToStr().c_str(), HoldRefCountAction); - TargetPointer = (void *)tp; + TargetPointer = (void *)TP; } else if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) { // If the value isn't found in the mapping and unified shared memory // is on then it means we have stumbled upon a value which we need to @@ -421,18 +420,18 @@ TargetPointer = HstPtrBegin; } - return {{IsNew, IsHostPtr}, lr.Entry, TargetPointer}; + return {{IsNew, IsHostPtr}, LR.Entry, TargetPointer}; } // Return the target pointer begin (where the data will be moved). void *DeviceTy::getTgtPtrBegin(HDTTMapAccessorTy &HDTTMap, void *HstPtrBegin, int64_t Size) { - uintptr_t hp = (uintptr_t)HstPtrBegin; - LookupResult lr = lookupMapping(HDTTMap, HstPtrBegin, Size); - if (lr.Flags.IsContained || lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) { - auto &HT = *lr.Entry; - uintptr_t tp = HT.TgtPtrBegin + (hp - HT.HstPtrBegin); - return (void *)tp; + uintptr_t HP = (uintptr_t)HstPtrBegin; + LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size); + if (LR.Flags.IsContained || LR.Flags.ExtendsBefore || LR.Flags.ExtendsAfter) { + auto &HT = *LR.Entry; + uintptr_t TP = HT.TgtPtrBegin + (HP - HT.HstPtrBegin); + return (void *)TP; } return NULL; @@ -500,8 +499,7 @@ // is still false, return OFFLOAD_FAIL. if (IsInit) return OFFLOAD_SUCCESS; - else - return OFFLOAD_FAIL; + return OFFLOAD_FAIL; } void DeviceTy::deinit() { @@ -510,10 +508,9 @@ } // Load binary to device. -__tgt_target_table *DeviceTy::load_binary(void *Img) { +__tgt_target_table *DeviceTy::loadBinary(void *Img) { std::lock_guardMtx)> LG(RTL->Mtx); - __tgt_target_table *rc = RTL->load_binary(RTLDeviceID, Img); - return rc; + return RTL->load_binary(RTLDeviceID, Img); } void *DeviceTy::allocData(int64_t Size, void *HstPtr, int32_t Kind) { @@ -542,9 +539,8 @@ if (!AsyncInfo || !RTL->data_submit_async || !RTL->synchronize) return RTL->data_submit(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size); - else - return RTL->data_submit_async(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size, - AsyncInfo); + return RTL->data_submit_async(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size, + AsyncInfo); } // Retrieve data from device @@ -564,9 +560,8 @@ if (!RTL->data_retrieve_async || !RTL->synchronize) return RTL->data_retrieve(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size); - else - return RTL->data_retrieve_async(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size, - AsyncInfo); + return RTL->data_retrieve_async(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size, + AsyncInfo); } // Copy data from current device to destination device directly @@ -576,9 +571,9 @@ assert(RTL->data_exchange && "RTL->data_exchange is nullptr"); return RTL->data_exchange(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, DstPtr, Size); - } else - return RTL->data_exchange_async(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, - DstPtr, Size, AsyncInfo); + } + return RTL->data_exchange_async(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, + DstPtr, Size, AsyncInfo); } // Run region on device @@ -588,9 +583,8 @@ if (!RTL->run_region || !RTL->synchronize) return RTL->run_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets, TgtVarsSize); - else - return RTL->run_region_async(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, - TgtOffsets, TgtVarsSize, AsyncInfo); + return RTL->run_region_async(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets, + TgtVarsSize, AsyncInfo); } // Run region on device @@ -611,10 +605,9 @@ return RTL->run_team_region(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets, TgtVarsSize, NumTeams, ThreadLimit, LoopTripCount); - else - return RTL->run_team_region_async(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, - TgtOffsets, TgtVarsSize, NumTeams, - ThreadLimit, LoopTripCount, AsyncInfo); + return RTL->run_team_region_async(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, + TgtOffsets, TgtVarsSize, NumTeams, + ThreadLimit, LoopTripCount, AsyncInfo); } // Whether data can be copied to DstDevice directly @@ -672,8 +665,8 @@ /// Check whether a device has an associated RTL and initialize it if it's not /// already initialized. -bool device_is_ready(int device_num) { - DP("Checking whether device %d is ready.\n", device_num); +bool deviceIsReady(int DeviceNum) { + DP("Checking whether device %d is ready.\n", DeviceNum); // Devices.size() can only change while registering a new // library, so try to acquire the lock of RTLs' mutex. size_t DevicesSize; @@ -681,24 +674,24 @@ std::lock_guardRTLsMtx)> LG(PM->RTLsMtx); DevicesSize = PM->Devices.size(); } - if (DevicesSize <= (size_t)device_num) { - DP("Device ID %d does not have a matching RTL\n", device_num); + if (DevicesSize <= (size_t)DeviceNum) { + DP("Device ID %d does not have a matching RTL\n", DeviceNum); return false; } // Get device info - DeviceTy &Device = *PM->Devices[device_num]; + DeviceTy &Device = *PM->Devices[DeviceNum]; - DP("Is the device %d (local ID %d) initialized? %d\n", device_num, + DP("Is the device %d (local ID %d) initialized? %d\n", DeviceNum, Device.RTLDeviceID, Device.IsInit); // Init the device if not done before if (!Device.IsInit && Device.initOnce() != OFFLOAD_SUCCESS) { - DP("Failed to init device %d\n", device_num); + DP("Failed to init device %d\n", DeviceNum); return false; } - DP("Device %d is ready to use.\n", device_num); + DP("Device %d is ready to use.\n", DeviceNum); return true; } diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -23,24 +23,24 @@ //////////////////////////////////////////////////////////////////////////////// /// adds requires flags -EXTERN void __tgt_register_requires(int64_t flags) { +EXTERN void __tgt_register_requires(int64_t Flags) { TIMESCOPE(); - PM->RTLs.RegisterRequires(flags); + PM->RTLs.registerRequires(Flags); } //////////////////////////////////////////////////////////////////////////////// /// adds a target shared library to the target execution image -EXTERN void __tgt_register_lib(__tgt_bin_desc *desc) { +EXTERN void __tgt_register_lib(__tgt_bin_desc *Desc) { TIMESCOPE(); - std::call_once(PM->RTLs.initFlag, &RTLsTy::LoadRTLs, &PM->RTLs); + std::call_once(PM->RTLs.InitFlag, &RTLsTy::loadRTLs, &PM->RTLs); for (auto &RTL : PM->RTLs.AllRTLs) { if (RTL.register_lib) { - if ((*RTL.register_lib)(desc) != OFFLOAD_SUCCESS) { + if ((*RTL.register_lib)(Desc) != OFFLOAD_SUCCESS) { DP("Could not register library with %s", RTL.RTLName.c_str()); } } } - PM->RTLs.RegisterLib(desc); + PM->RTLs.registerLib(Desc); } //////////////////////////////////////////////////////////////////////////////// @@ -49,12 +49,12 @@ //////////////////////////////////////////////////////////////////////////////// /// unloads a target shared library -EXTERN void __tgt_unregister_lib(__tgt_bin_desc *desc) { +EXTERN void __tgt_unregister_lib(__tgt_bin_desc *Desc) { TIMESCOPE(); - PM->RTLs.UnregisterLib(desc); + PM->RTLs.unregisterLib(Desc); for (auto &RTL : PM->RTLs.UsedRTLs) { if (RTL->unregister_lib) { - if ((*RTL->unregister_lib)(desc) != OFFLOAD_SUCCESS) { + if ((*RTL->unregister_lib)(Desc) != OFFLOAD_SUCCESS) { DP("Could not register library with %s", RTL->RTLName.c_str()); } } @@ -64,384 +64,383 @@ /// creates host-to-target data mapping, stores it in the /// libomptarget.so internal structure (an entry in a stack of data maps) /// and passes the data to the device. -EXTERN void __tgt_target_data_begin(int64_t device_id, int32_t arg_num, - void **args_base, void **args, - int64_t *arg_sizes, int64_t *arg_types) { +EXTERN void __tgt_target_data_begin(int64_t DeviceId, int32_t ArgNum, + void **ArgsBase, void **Args, + int64_t *ArgSizes, int64_t *ArgTypes) { TIMESCOPE(); - __tgt_target_data_begin_mapper(nullptr, device_id, arg_num, args_base, args, - arg_sizes, arg_types, nullptr, nullptr); + __tgt_target_data_begin_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args, + ArgSizes, ArgTypes, nullptr, nullptr); } -EXTERN void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num, - void **args_base, void **args, - int64_t *arg_sizes, - int64_t *arg_types, int32_t depNum, - void *depList, int32_t noAliasDepNum, - void *noAliasDepList) { +EXTERN void __tgt_target_data_begin_nowait(int64_t DeviceId, int32_t ArgNum, + void **ArgsBase, void **Args, + int64_t *ArgSizes, int64_t *ArgTypes, + int32_t DepNum, void *DepList, + int32_t NoAliasDepNum, + void *NoAliasDepList) { TIMESCOPE(); - __tgt_target_data_begin_mapper(nullptr, device_id, arg_num, args_base, args, - arg_sizes, arg_types, nullptr, nullptr); + __tgt_target_data_begin_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args, + ArgSizes, ArgTypes, nullptr, nullptr); } -EXTERN void __tgt_target_data_begin_mapper(ident_t *loc, int64_t device_id, - int32_t arg_num, void **args_base, - void **args, int64_t *arg_sizes, - int64_t *arg_types, - map_var_info_t *arg_names, - void **arg_mappers) { - TIMESCOPE_WITH_IDENT(loc); +EXTERN void __tgt_target_data_begin_mapper(ident_t *Loc, int64_t DeviceId, + int32_t ArgNum, void **ArgsBase, + void **Args, int64_t *ArgSizes, + int64_t *ArgTypes, + map_var_info_t *ArgNames, + void **ArgMappers) { + TIMESCOPE_WITH_IDENT(Loc); DP("Entering data begin region for device %" PRId64 " with %d mappings\n", - device_id, arg_num); - if (checkDeviceAndCtors(device_id, loc)) { - DP("Not offloading to device %" PRId64 "\n", device_id); + DeviceId, ArgNum); + if (checkDeviceAndCtors(DeviceId, Loc)) { + DP("Not offloading to device %" PRId64 "\n", DeviceId); return; } - DeviceTy &Device = *PM->Devices[device_id]; + DeviceTy &Device = *PM->Devices[DeviceId]; if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) - printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types, - arg_names, "Entering OpenMP data region"); + printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames, + "Entering OpenMP data region"); #ifdef OMPTARGET_DEBUG - for (int i = 0; i < arg_num; ++i) { + for (int I = 0; I < ArgNum; ++I) { DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 ", Type=0x%" PRIx64 ", Name=%s\n", - i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i], - (arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown"); + I, DPxPTR(ArgsBase[I]), DPxPTR(Args[I]), ArgSizes[I], ArgTypes[I], + (ArgNames) ? getNameFromMapping(ArgNames[I]).c_str() : "unknown"); } #endif AsyncInfoTy AsyncInfo(Device); - int rc = targetDataBegin(loc, Device, arg_num, args_base, args, arg_sizes, - arg_types, arg_names, arg_mappers, AsyncInfo); - if (rc == OFFLOAD_SUCCESS) - rc = AsyncInfo.synchronize(); - handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); + int Rc = targetDataBegin(Loc, Device, ArgNum, ArgsBase, Args, ArgSizes, + ArgTypes, ArgNames, ArgMappers, AsyncInfo); + if (Rc == OFFLOAD_SUCCESS) + Rc = AsyncInfo.synchronize(); + handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc); } EXTERN void __tgt_target_data_begin_nowait_mapper( - ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base, - void **args, int64_t *arg_sizes, int64_t *arg_types, - map_var_info_t *arg_names, void **arg_mappers, int32_t depNum, - void *depList, int32_t noAliasDepNum, void *noAliasDepList) { - TIMESCOPE_WITH_IDENT(loc); - - __tgt_target_data_begin_mapper(loc, device_id, arg_num, args_base, args, - arg_sizes, arg_types, arg_names, arg_mappers); + ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, + void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, + void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum, + void *NoAliasDepList) { + TIMESCOPE_WITH_IDENT(Loc); + + __tgt_target_data_begin_mapper(Loc, DeviceId, ArgNum, ArgsBase, Args, + ArgSizes, ArgTypes, ArgNames, ArgMappers); } /// passes data from the target, releases target memory and destroys /// the host-target mapping (top entry from the stack of data maps) /// created by the last __tgt_target_data_begin. -EXTERN void __tgt_target_data_end(int64_t device_id, int32_t arg_num, - void **args_base, void **args, - int64_t *arg_sizes, int64_t *arg_types) { +EXTERN void __tgt_target_data_end(int64_t DeviceId, int32_t ArgNum, + void **ArgsBase, void **Args, + int64_t *ArgSizes, int64_t *ArgTypes) { TIMESCOPE(); - __tgt_target_data_end_mapper(nullptr, device_id, arg_num, args_base, args, - arg_sizes, arg_types, nullptr, nullptr); + __tgt_target_data_end_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args, + ArgSizes, ArgTypes, nullptr, nullptr); } -EXTERN void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num, - void **args_base, void **args, - int64_t *arg_sizes, int64_t *arg_types, - int32_t depNum, void *depList, - int32_t noAliasDepNum, - void *noAliasDepList) { +EXTERN void __tgt_target_data_end_nowait(int64_t DeviceId, int32_t ArgNum, + void **ArgsBase, void **Args, + int64_t *ArgSizes, int64_t *ArgTypes, + int32_t DepNum, void *DepList, + int32_t NoAliasDepNum, + void *NoAliasDepList) { TIMESCOPE(); - __tgt_target_data_end_mapper(nullptr, device_id, arg_num, args_base, args, - arg_sizes, arg_types, nullptr, nullptr); + __tgt_target_data_end_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args, + ArgSizes, ArgTypes, nullptr, nullptr); } -EXTERN void __tgt_target_data_end_mapper(ident_t *loc, int64_t device_id, - int32_t arg_num, void **args_base, - void **args, int64_t *arg_sizes, - int64_t *arg_types, - map_var_info_t *arg_names, - void **arg_mappers) { - TIMESCOPE_WITH_IDENT(loc); - DP("Entering data end region with %d mappings\n", arg_num); - if (checkDeviceAndCtors(device_id, loc)) { - DP("Not offloading to device %" PRId64 "\n", device_id); +EXTERN void __tgt_target_data_end_mapper(ident_t *Loc, int64_t DeviceId, + int32_t ArgNum, void **ArgsBase, + void **Args, int64_t *ArgSizes, + int64_t *ArgTypes, + map_var_info_t *ArgNames, + void **ArgMappers) { + TIMESCOPE_WITH_IDENT(Loc); + DP("Entering data end region with %d mappings\n", ArgNum); + if (checkDeviceAndCtors(DeviceId, Loc)) { + DP("Not offloading to device %" PRId64 "\n", DeviceId); return; } - DeviceTy &Device = *PM->Devices[device_id]; + DeviceTy &Device = *PM->Devices[DeviceId]; if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) - printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types, - arg_names, "Exiting OpenMP data region"); + printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames, + "Exiting OpenMP data region"); #ifdef OMPTARGET_DEBUG - for (int i = 0; i < arg_num; ++i) { + for (int I = 0; I < ArgNum; ++I) { DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 ", Type=0x%" PRIx64 ", Name=%s\n", - i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i], - (arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown"); + I, DPxPTR(ArgsBase[I]), DPxPTR(Args[I]), ArgSizes[I], ArgTypes[I], + (ArgNames) ? getNameFromMapping(ArgNames[I]).c_str() : "unknown"); } #endif AsyncInfoTy AsyncInfo(Device); - int rc = targetDataEnd(loc, Device, arg_num, args_base, args, arg_sizes, - arg_types, arg_names, arg_mappers, AsyncInfo); - if (rc == OFFLOAD_SUCCESS) - rc = AsyncInfo.synchronize(); - handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); + int Rc = targetDataEnd(Loc, Device, ArgNum, ArgsBase, Args, ArgSizes, + ArgTypes, ArgNames, ArgMappers, AsyncInfo); + if (Rc == OFFLOAD_SUCCESS) + Rc = AsyncInfo.synchronize(); + handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc); } EXTERN void __tgt_target_data_end_nowait_mapper( - ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base, - void **args, int64_t *arg_sizes, int64_t *arg_types, - map_var_info_t *arg_names, void **arg_mappers, int32_t depNum, - void *depList, int32_t noAliasDepNum, void *noAliasDepList) { - TIMESCOPE_WITH_IDENT(loc); - - __tgt_target_data_end_mapper(loc, device_id, arg_num, args_base, args, - arg_sizes, arg_types, arg_names, arg_mappers); + ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, + void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, + void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum, + void *NoAliasDepList) { + TIMESCOPE_WITH_IDENT(Loc); + + __tgt_target_data_end_mapper(Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, + ArgTypes, ArgNames, ArgMappers); } -EXTERN void __tgt_target_data_update(int64_t device_id, int32_t arg_num, - void **args_base, void **args, - int64_t *arg_sizes, int64_t *arg_types) { +EXTERN void __tgt_target_data_update(int64_t DeviceId, int32_t ArgNum, + void **ArgsBase, void **Args, + int64_t *ArgSizes, int64_t *ArgTypes) { TIMESCOPE(); - __tgt_target_data_update_mapper(nullptr, device_id, arg_num, args_base, args, - arg_sizes, arg_types, nullptr, nullptr); + __tgt_target_data_update_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args, + ArgSizes, ArgTypes, nullptr, nullptr); } EXTERN void __tgt_target_data_update_nowait( - int64_t device_id, int32_t arg_num, void **args_base, void **args, - int64_t *arg_sizes, int64_t *arg_types, int32_t depNum, void *depList, - int32_t noAliasDepNum, void *noAliasDepList) { + int64_t DeviceId, int32_t ArgNum, void **ArgsBase, void **Args, + int64_t *ArgSizes, int64_t *ArgTypes, int32_t DepNum, void *DepList, + int32_t NoAliasDepNum, void *NoAliasDepList) { TIMESCOPE(); - __tgt_target_data_update_mapper(nullptr, device_id, arg_num, args_base, args, - arg_sizes, arg_types, nullptr, nullptr); + __tgt_target_data_update_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args, + ArgSizes, ArgTypes, nullptr, nullptr); } -EXTERN void __tgt_target_data_update_mapper(ident_t *loc, int64_t device_id, - int32_t arg_num, void **args_base, - void **args, int64_t *arg_sizes, - int64_t *arg_types, - map_var_info_t *arg_names, - void **arg_mappers) { - TIMESCOPE_WITH_IDENT(loc); - DP("Entering data update with %d mappings\n", arg_num); - if (checkDeviceAndCtors(device_id, loc)) { - DP("Not offloading to device %" PRId64 "\n", device_id); +EXTERN void __tgt_target_data_update_mapper(ident_t *Loc, int64_t DeviceId, + int32_t ArgNum, void **ArgsBase, + void **Args, int64_t *ArgSizes, + int64_t *ArgTypes, + map_var_info_t *ArgNames, + void **ArgMappers) { + TIMESCOPE_WITH_IDENT(Loc); + DP("Entering data update with %d mappings\n", ArgNum); + if (checkDeviceAndCtors(DeviceId, Loc)) { + DP("Not offloading to device %" PRId64 "\n", DeviceId); return; } if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) - printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types, - arg_names, "Updating OpenMP data"); + printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames, + "Updating OpenMP data"); - DeviceTy &Device = *PM->Devices[device_id]; + DeviceTy &Device = *PM->Devices[DeviceId]; AsyncInfoTy AsyncInfo(Device); - int rc = targetDataUpdate(loc, Device, arg_num, args_base, args, arg_sizes, - arg_types, arg_names, arg_mappers, AsyncInfo); - if (rc == OFFLOAD_SUCCESS) - rc = AsyncInfo.synchronize(); - handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); + int Rc = targetDataUpdate(Loc, Device, ArgNum, ArgsBase, Args, ArgSizes, + ArgTypes, ArgNames, ArgMappers, AsyncInfo); + if (Rc == OFFLOAD_SUCCESS) + Rc = AsyncInfo.synchronize(); + handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc); } EXTERN void __tgt_target_data_update_nowait_mapper( - ident_t *loc, int64_t device_id, int32_t arg_num, void **args_base, - void **args, int64_t *arg_sizes, int64_t *arg_types, - map_var_info_t *arg_names, void **arg_mappers, int32_t depNum, - void *depList, int32_t noAliasDepNum, void *noAliasDepList) { - TIMESCOPE_WITH_IDENT(loc); - - __tgt_target_data_update_mapper(loc, device_id, arg_num, args_base, args, - arg_sizes, arg_types, arg_names, arg_mappers); + ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, + void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, + void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum, + void *NoAliasDepList) { + TIMESCOPE_WITH_IDENT(Loc); + + __tgt_target_data_update_mapper(Loc, DeviceId, ArgNum, ArgsBase, Args, + ArgSizes, ArgTypes, ArgNames, ArgMappers); } -EXTERN int __tgt_target(int64_t device_id, void *host_ptr, int32_t arg_num, - void **args_base, void **args, int64_t *arg_sizes, - int64_t *arg_types) { +EXTERN int __tgt_target(int64_t DeviceId, void *HostPtr, int32_t ArgNum, + void **ArgsBase, void **Args, int64_t *ArgSizes, + int64_t *ArgTypes) { TIMESCOPE(); - return __tgt_target_mapper(nullptr, device_id, host_ptr, arg_num, args_base, - args, arg_sizes, arg_types, nullptr, nullptr); + return __tgt_target_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase, Args, + ArgSizes, ArgTypes, nullptr, nullptr); } -EXTERN int __tgt_target_nowait(int64_t device_id, void *host_ptr, - int32_t arg_num, void **args_base, void **args, - int64_t *arg_sizes, int64_t *arg_types, - int32_t depNum, void *depList, - int32_t noAliasDepNum, void *noAliasDepList) { +EXTERN int __tgt_target_nowait(int64_t DeviceId, void *HostPtr, int32_t ArgNum, + void **ArgsBase, void **Args, int64_t *ArgSizes, + int64_t *ArgTypes, int32_t DepNum, void *DepList, + int32_t NoAliasDepNum, void *NoAliasDepList) { TIMESCOPE(); - return __tgt_target_mapper(nullptr, device_id, host_ptr, arg_num, args_base, - args, arg_sizes, arg_types, nullptr, nullptr); + return __tgt_target_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase, Args, + ArgSizes, ArgTypes, nullptr, nullptr); } -EXTERN int __tgt_target_mapper(ident_t *loc, int64_t device_id, void *host_ptr, - int32_t arg_num, void **args_base, void **args, - int64_t *arg_sizes, int64_t *arg_types, - map_var_info_t *arg_names, void **arg_mappers) { - TIMESCOPE_WITH_IDENT(loc); +EXTERN int __tgt_target_mapper(ident_t *Loc, int64_t DeviceId, void *HostPtr, + int32_t ArgNum, void **ArgsBase, void **Args, + int64_t *ArgSizes, int64_t *ArgTypes, + map_var_info_t *ArgNames, void **ArgMappers) { + TIMESCOPE_WITH_IDENT(Loc); DP("Entering target region with entry point " DPxMOD " and device Id %" PRId64 "\n", - DPxPTR(host_ptr), device_id); - if (checkDeviceAndCtors(device_id, loc)) { - DP("Not offloading to device %" PRId64 "\n", device_id); + DPxPTR(HostPtr), DeviceId); + if (checkDeviceAndCtors(DeviceId, Loc)) { + DP("Not offloading to device %" PRId64 "\n", DeviceId); return OMP_TGT_FAIL; } if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) - printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types, - arg_names, "Entering OpenMP kernel"); + printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames, + "Entering OpenMP kernel"); #ifdef OMPTARGET_DEBUG - for (int i = 0; i < arg_num; ++i) { + for (int I = 0; I < ArgNum; ++I) { DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 ", Type=0x%" PRIx64 ", Name=%s\n", - i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i], - (arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown"); + I, DPxPTR(ArgsBase[I]), DPxPTR(Args[I]), ArgSizes[I], ArgTypes[I], + (ArgNames) ? getNameFromMapping(ArgNames[I]).c_str() : "unknown"); } #endif - DeviceTy &Device = *PM->Devices[device_id]; + DeviceTy &Device = *PM->Devices[DeviceId]; AsyncInfoTy AsyncInfo(Device); - int rc = target(loc, Device, host_ptr, arg_num, args_base, args, arg_sizes, - arg_types, arg_names, arg_mappers, 0, 0, false /*team*/, - AsyncInfo); - if (rc == OFFLOAD_SUCCESS) - rc = AsyncInfo.synchronize(); - handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); - assert(rc == OFFLOAD_SUCCESS && "__tgt_target_mapper unexpected failure!"); + int Rc = + target(Loc, Device, HostPtr, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, + ArgNames, ArgMappers, 0, 0, false /*team*/, AsyncInfo); + if (Rc == OFFLOAD_SUCCESS) + Rc = AsyncInfo.synchronize(); + handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc); + assert(Rc == OFFLOAD_SUCCESS && "__tgt_target_mapper unexpected failure!"); return OMP_TGT_SUCCESS; } EXTERN int __tgt_target_nowait_mapper( - ident_t *loc, int64_t device_id, void *host_ptr, int32_t arg_num, - void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, - map_var_info_t *arg_names, void **arg_mappers, int32_t depNum, - void *depList, int32_t noAliasDepNum, void *noAliasDepList) { - TIMESCOPE_WITH_IDENT(loc); - - return __tgt_target_mapper(loc, device_id, host_ptr, arg_num, args_base, args, - arg_sizes, arg_types, arg_names, arg_mappers); + ident_t *Loc, int64_t DeviceId, void *HostPtr, int32_t ArgNum, + void **ArgsBase, void **Args, int64_t *ArgSizes, int64_t *ArgTypes, + map_var_info_t *ArgNames, void **ArgMappers, int32_t DepNum, void *DepList, + int32_t NoAliasDepNum, void *NoAliasDepList) { + TIMESCOPE_WITH_IDENT(Loc); + + return __tgt_target_mapper(Loc, DeviceId, HostPtr, ArgNum, ArgsBase, Args, + ArgSizes, ArgTypes, ArgNames, ArgMappers); } -EXTERN int __tgt_target_teams(int64_t device_id, void *host_ptr, - int32_t arg_num, void **args_base, void **args, - int64_t *arg_sizes, int64_t *arg_types, - int32_t team_num, int32_t thread_limit) { +EXTERN int __tgt_target_teams(int64_t DeviceId, void *HostPtr, int32_t ArgNum, + void **ArgsBase, void **Args, int64_t *ArgSizes, + int64_t *ArgTypes, int32_t TeamNum, + int32_t ThreadLimit) { TIMESCOPE(); - return __tgt_target_teams_mapper(nullptr, device_id, host_ptr, arg_num, - args_base, args, arg_sizes, arg_types, - nullptr, nullptr, team_num, thread_limit); + return __tgt_target_teams_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase, + Args, ArgSizes, ArgTypes, nullptr, nullptr, + TeamNum, ThreadLimit); } -EXTERN int __tgt_target_teams_nowait(int64_t device_id, void *host_ptr, - int32_t arg_num, void **args_base, - void **args, int64_t *arg_sizes, - int64_t *arg_types, int32_t team_num, - int32_t thread_limit, int32_t depNum, - void *depList, int32_t noAliasDepNum, - void *noAliasDepList) { +EXTERN int __tgt_target_teams_nowait(int64_t DeviceId, void *HostPtr, + int32_t ArgNum, void **ArgsBase, + void **Args, int64_t *ArgSizes, + int64_t *ArgTypes, int32_t TeamNum, + int32_t ThreadLimit, int32_t DepNum, + void *DepList, int32_t NoAliasDepNum, + void *NoAliasDepList) { TIMESCOPE(); - return __tgt_target_teams_mapper(nullptr, device_id, host_ptr, arg_num, - args_base, args, arg_sizes, arg_types, - nullptr, nullptr, team_num, thread_limit); + return __tgt_target_teams_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase, + Args, ArgSizes, ArgTypes, nullptr, nullptr, + TeamNum, ThreadLimit); } -EXTERN int __tgt_target_teams_mapper(ident_t *loc, int64_t device_id, - void *host_ptr, int32_t arg_num, - void **args_base, void **args, - int64_t *arg_sizes, int64_t *arg_types, - map_var_info_t *arg_names, - void **arg_mappers, int32_t team_num, - int32_t thread_limit) { +EXTERN int __tgt_target_teams_mapper(ident_t *Loc, int64_t DeviceId, + void *HostPtr, int32_t ArgNum, + void **ArgsBase, void **Args, + int64_t *ArgSizes, int64_t *ArgTypes, + map_var_info_t *ArgNames, + void **ArgMappers, int32_t TeamNum, + int32_t ThreadLimit) { DP("Entering target region with entry point " DPxMOD " and device Id %" PRId64 "\n", - DPxPTR(host_ptr), device_id); - if (checkDeviceAndCtors(device_id, loc)) { - DP("Not offloading to device %" PRId64 "\n", device_id); + DPxPTR(HostPtr), DeviceId); + if (checkDeviceAndCtors(DeviceId, Loc)) { + DP("Not offloading to device %" PRId64 "\n", DeviceId); return OMP_TGT_FAIL; } if (getInfoLevel() & OMP_INFOTYPE_KERNEL_ARGS) - printKernelArguments(loc, device_id, arg_num, arg_sizes, arg_types, - arg_names, "Entering OpenMP kernel"); + printKernelArguments(Loc, DeviceId, ArgNum, ArgSizes, ArgTypes, ArgNames, + "Entering OpenMP kernel"); #ifdef OMPTARGET_DEBUG - for (int i = 0; i < arg_num; ++i) { + for (int I = 0; I < ArgNum; ++I) { DP("Entry %2d: Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 ", Type=0x%" PRIx64 ", Name=%s\n", - i, DPxPTR(args_base[i]), DPxPTR(args[i]), arg_sizes[i], arg_types[i], - (arg_names) ? getNameFromMapping(arg_names[i]).c_str() : "unknown"); + I, DPxPTR(ArgsBase[I]), DPxPTR(Args[I]), ArgSizes[I], ArgTypes[I], + (ArgNames) ? getNameFromMapping(ArgNames[I]).c_str() : "unknown"); } #endif - DeviceTy &Device = *PM->Devices[device_id]; + DeviceTy &Device = *PM->Devices[DeviceId]; AsyncInfoTy AsyncInfo(Device); - int rc = target(loc, Device, host_ptr, arg_num, args_base, args, arg_sizes, - arg_types, arg_names, arg_mappers, team_num, thread_limit, + int Rc = target(Loc, Device, HostPtr, ArgNum, ArgsBase, Args, ArgSizes, + ArgTypes, ArgNames, ArgMappers, TeamNum, ThreadLimit, true /*team*/, AsyncInfo); - if (rc == OFFLOAD_SUCCESS) - rc = AsyncInfo.synchronize(); - handleTargetOutcome(rc == OFFLOAD_SUCCESS, loc); - assert(rc == OFFLOAD_SUCCESS && + if (Rc == OFFLOAD_SUCCESS) + Rc = AsyncInfo.synchronize(); + handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc); + assert(Rc == OFFLOAD_SUCCESS && "__tgt_target_teams_mapper unexpected failure!"); return OMP_TGT_SUCCESS; } EXTERN int __tgt_target_teams_nowait_mapper( - ident_t *loc, int64_t device_id, void *host_ptr, int32_t arg_num, - void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types, - map_var_info_t *arg_names, void **arg_mappers, int32_t team_num, - int32_t thread_limit, int32_t depNum, void *depList, int32_t noAliasDepNum, - void *noAliasDepList) { - TIMESCOPE_WITH_IDENT(loc); - - return __tgt_target_teams_mapper(loc, device_id, host_ptr, arg_num, args_base, - args, arg_sizes, arg_types, arg_names, - arg_mappers, team_num, thread_limit); + ident_t *Loc, int64_t DeviceId, void *HostPtr, int32_t ArgNum, + void **ArgsBase, void **Args, int64_t *ArgSizes, int64_t *ArgTypes, + map_var_info_t *ArgNames, void **ArgMappers, int32_t TeamNum, + int32_t ThreadLimit, int32_t DepNum, void *DepList, int32_t NoAliasDepNum, + void *NoAliasDepList) { + TIMESCOPE_WITH_IDENT(Loc); + + return __tgt_target_teams_mapper(Loc, DeviceId, HostPtr, ArgNum, ArgsBase, + Args, ArgSizes, ArgTypes, ArgNames, + ArgMappers, TeamNum, ThreadLimit); } // Get the current number of components for a user-defined mapper. -EXTERN int64_t __tgt_mapper_num_components(void *rt_mapper_handle) { +EXTERN int64_t __tgt_mapper_num_components(void *RtMapperHandle) { TIMESCOPE(); - auto *MapperComponentsPtr = (struct MapperComponentsTy *)rt_mapper_handle; - int64_t size = MapperComponentsPtr->Components.size(); + auto *MapperComponentsPtr = (struct MapperComponentsTy *)RtMapperHandle; + int64_t Size = MapperComponentsPtr->Components.size(); DP("__tgt_mapper_num_components(Handle=" DPxMOD ") returns %" PRId64 "\n", - DPxPTR(rt_mapper_handle), size); - return size; + DPxPTR(RtMapperHandle), Size); + return Size; } // Push back one component for a user-defined mapper. -EXTERN void __tgt_push_mapper_component(void *rt_mapper_handle, void *base, - void *begin, int64_t size, int64_t type, - void *name) { +EXTERN void __tgt_push_mapper_component(void *RtMapperHandle, void *Base, + void *Begin, int64_t Size, int64_t Type, + void *Name) { TIMESCOPE(); DP("__tgt_push_mapper_component(Handle=" DPxMOD ") adds an entry (Base=" DPxMOD ", Begin=" DPxMOD ", Size=%" PRId64 ", Type=0x%" PRIx64 ", Name=%s).\n", - DPxPTR(rt_mapper_handle), DPxPTR(base), DPxPTR(begin), size, type, - (name) ? getNameFromMapping(name).c_str() : "unknown"); - auto *MapperComponentsPtr = (struct MapperComponentsTy *)rt_mapper_handle; + DPxPTR(RtMapperHandle), DPxPTR(Base), DPxPTR(Begin), Size, Type, + (Name) ? getNameFromMapping(Name).c_str() : "unknown"); + auto *MapperComponentsPtr = (struct MapperComponentsTy *)RtMapperHandle; MapperComponentsPtr->Components.push_back( - MapComponentInfoTy(base, begin, size, type, name)); + MapComponentInfoTy(Base, Begin, Size, Type, Name)); } -EXTERN void __kmpc_push_target_tripcount(int64_t device_id, - uint64_t loop_tripcount) { - __kmpc_push_target_tripcount_mapper(nullptr, device_id, loop_tripcount); +EXTERN void __kmpc_push_target_tripcount(int64_t DeviceId, + uint64_t LoopTripcount) { + __kmpc_push_target_tripcount_mapper(nullptr, DeviceId, LoopTripcount); } -EXTERN void __kmpc_push_target_tripcount_mapper(ident_t *loc, int64_t device_id, - uint64_t loop_tripcount) { - TIMESCOPE_WITH_IDENT(loc); - if (checkDeviceAndCtors(device_id, loc)) { - DP("Not offloading to device %" PRId64 "\n", device_id); +EXTERN void __kmpc_push_target_tripcount_mapper(ident_t *Loc, int64_t DeviceId, + uint64_t LoopTripcount) { + TIMESCOPE_WITH_IDENT(Loc); + if (checkDeviceAndCtors(DeviceId, Loc)) { + DP("Not offloading to device %" PRId64 "\n", DeviceId); return; } - DP("__kmpc_push_target_tripcount(%" PRId64 ", %" PRIu64 ")\n", device_id, - loop_tripcount); + DP("__kmpc_push_target_tripcount(%" PRId64 ", %" PRIu64 ")\n", DeviceId, + LoopTripcount); PM->TblMapMtx.lock(); - PM->Devices[device_id]->LoopTripCnt.emplace(__kmpc_global_thread_num(NULL), - loop_tripcount); + PM->Devices[DeviceId]->LoopTripCnt.emplace(__kmpc_global_thread_num(NULL), + LoopTripcount); PM->TblMapMtx.unlock(); } @@ -454,7 +453,7 @@ } } -EXTERN int __tgt_print_device_info(int64_t device_id) { - return PM->Devices[device_id]->printDeviceInfo( - PM->Devices[device_id]->RTLDeviceID); +EXTERN int __tgt_print_device_info(int64_t DeviceId) { + return PM->Devices[DeviceId]->printDeviceInfo( + PM->Devices[DeviceId]->RTLDeviceID); } diff --git a/openmp/libomptarget/src/interop.cpp b/openmp/libomptarget/src/interop.cpp --- a/openmp/libomptarget/src/interop.cpp +++ b/openmp/libomptarget/src/interop.cpp @@ -62,81 +62,81 @@ omp_interop_property_t Property, int *Err); template <> -intptr_t getProperty(omp_interop_val_t &interop_val, - omp_interop_property_t property, int *err) { - switch (property) { +intptr_t getProperty(omp_interop_val_t &InteropVal, + omp_interop_property_t Property, int *Err) { + switch (Property) { case omp_ipr_fr_id: - return interop_val.backend_type_id; + return InteropVal.backend_type_id; case omp_ipr_vendor: - return interop_val.vendor_id; + return InteropVal.vendor_id; case omp_ipr_device_num: - return interop_val.device_id; + return InteropVal.device_id; default:; } - getTypeMismatch(property, err); + getTypeMismatch(Property, Err); return 0; } template <> -const char *getProperty(omp_interop_val_t &interop_val, - omp_interop_property_t property, - int *err) { - switch (property) { +const char *getProperty(omp_interop_val_t &InteropVal, + omp_interop_property_t Property, + int *Err) { + switch (Property) { case omp_ipr_fr_id: - return interop_val.interop_type == kmp_interop_type_tasksync + return InteropVal.interop_type == kmp_interop_type_tasksync ? "tasksync" : "device+context"; case omp_ipr_vendor_name: - return getVendorIdToStr(interop_val.vendor_id); + return getVendorIdToStr(InteropVal.vendor_id); default: - getTypeMismatch(property, err); + getTypeMismatch(Property, Err); return nullptr; } } template <> -void *getProperty(omp_interop_val_t &interop_val, - omp_interop_property_t property, int *err) { - switch (property) { +void *getProperty(omp_interop_val_t &InteropVal, + omp_interop_property_t Property, int *Err) { + switch (Property) { case omp_ipr_device: - if (interop_val.device_info.Device) - return interop_val.device_info.Device; - *err = omp_irc_no_value; - return const_cast(interop_val.err_str); + if (InteropVal.device_info.Device) + return InteropVal.device_info.Device; + *Err = omp_irc_no_value; + return const_cast(InteropVal.err_str); case omp_ipr_device_context: - return interop_val.device_info.Context; + return InteropVal.device_info.Context; case omp_ipr_targetsync: - return interop_val.async_info->Queue; + return InteropVal.async_info->Queue; default:; } - getTypeMismatch(property, err); + getTypeMismatch(Property, Err); return nullptr; } -bool getPropertyCheck(omp_interop_val_t **interop_ptr, - omp_interop_property_t property, int *err) { - if (err) - *err = omp_irc_success; - if (!interop_ptr) { - if (err) - *err = omp_irc_empty; +bool getPropertyCheck(omp_interop_val_t **InteropPtr, + omp_interop_property_t Property, int *Err) { + if (Err) + *Err = omp_irc_success; + if (!InteropPtr) { + if (Err) + *Err = omp_irc_empty; return false; } - if (property >= 0 || property < omp_ipr_first) { - if (err) - *err = omp_irc_out_of_range; + if (Property >= 0 || Property < omp_ipr_first) { + if (Err) + *Err = omp_irc_out_of_range; return false; } - if (property == omp_ipr_targetsync && - (*interop_ptr)->interop_type != kmp_interop_type_tasksync) { - if (err) - *err = omp_irc_other; + if (Property == omp_ipr_targetsync && + (*InteropPtr)->interop_type != kmp_interop_type_tasksync) { + if (Err) + *Err = omp_irc_other; return false; } - if ((property == omp_ipr_device || property == omp_ipr_device_context) && - (*interop_ptr)->interop_type == kmp_interop_type_tasksync) { - if (err) - *err = omp_irc_other; + if ((Property == omp_ipr_device || Property == omp_ipr_device_context) && + (*InteropPtr)->interop_type == kmp_interop_type_tasksync) { + if (Err) + *Err = omp_irc_other; return false; } return true; @@ -181,105 +181,105 @@ #ifdef __cplusplus extern "C" { #endif -void __tgt_interop_init(ident_t *loc_ref, kmp_int32 gtid, - omp_interop_val_t *&interop_ptr, - kmp_interop_type_t interop_type, kmp_int32 device_id, - kmp_int64 ndeps, kmp_depend_info_t *dep_list, - kmp_int32 have_nowait) { - kmp_int32 ndeps_noalias = 0; - kmp_depend_info_t *noalias_dep_list = NULL; - assert(interop_type != kmp_interop_type_unknown && +void __tgt_interop_init(ident_t *LocRef, kmp_int32 Gtid, + omp_interop_val_t *&InteropPtr, + kmp_interop_type_t InteropType, kmp_int32 DeviceId, + kmp_int64 Ndeps, kmp_depend_info_t *DepList, + kmp_int32 HaveNowait) { + kmp_int32 NdepsNoalias = 0; + kmp_depend_info_t *NoaliasDepList = NULL; + assert(InteropType != kmp_interop_type_unknown && "Cannot initialize with unknown interop_type!"); - if (device_id == -1) { - device_id = omp_get_default_device(); + if (DeviceId == -1) { + DeviceId = omp_get_default_device(); } - if (interop_type == kmp_interop_type_tasksync) { - __kmpc_omp_wait_deps(loc_ref, gtid, ndeps, dep_list, ndeps_noalias, - noalias_dep_list); + if (InteropType == kmp_interop_type_tasksync) { + __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias, + NoaliasDepList); } - interop_ptr = new omp_interop_val_t(device_id, interop_type); - if (!device_is_ready(device_id)) { - interop_ptr->err_str = "Device not ready!"; + InteropPtr = new omp_interop_val_t(DeviceId, InteropType); + if (!deviceIsReady(DeviceId)) { + InteropPtr->err_str = "Device not ready!"; return; } - DeviceTy &Device = *PM->Devices[device_id]; + DeviceTy &Device = *PM->Devices[DeviceId]; if (!Device.RTL || !Device.RTL->init_device_info || - Device.RTL->init_device_info(device_id, &(interop_ptr)->device_info, - &(interop_ptr)->err_str)) { - delete interop_ptr; - interop_ptr = omp_interop_none; + Device.RTL->init_device_info(DeviceId, &(InteropPtr)->device_info, + &(InteropPtr)->err_str)) { + delete InteropPtr; + InteropPtr = omp_interop_none; } - if (interop_type == kmp_interop_type_tasksync) { + if (InteropType == kmp_interop_type_tasksync) { if (!Device.RTL || !Device.RTL->init_async_info || - Device.RTL->init_async_info(device_id, &(interop_ptr)->async_info)) { - delete interop_ptr; - interop_ptr = omp_interop_none; + Device.RTL->init_async_info(DeviceId, &(InteropPtr)->async_info)) { + delete InteropPtr; + InteropPtr = omp_interop_none; } } } -void __tgt_interop_use(ident_t *loc_ref, kmp_int32 gtid, - omp_interop_val_t *&interop_ptr, kmp_int32 device_id, - kmp_int32 ndeps, kmp_depend_info_t *dep_list, - kmp_int32 have_nowait) { - kmp_int32 ndeps_noalias = 0; - kmp_depend_info_t *noalias_dep_list = NULL; - assert(interop_ptr && "Cannot use nullptr!"); - omp_interop_val_t *interop_val = interop_ptr; - if (device_id == -1) { - device_id = omp_get_default_device(); +void __tgt_interop_use(ident_t *LocRef, kmp_int32 Gtid, + omp_interop_val_t *&InteropPtr, kmp_int32 DeviceId, + kmp_int32 Ndeps, kmp_depend_info_t *DepList, + kmp_int32 HaveNowait) { + kmp_int32 NdepsNoalias = 0; + kmp_depend_info_t *NoaliasDepList = NULL; + assert(InteropPtr && "Cannot use nullptr!"); + omp_interop_val_t *InteropVal = InteropPtr; + if (DeviceId == -1) { + DeviceId = omp_get_default_device(); } - assert(interop_val != omp_interop_none && + assert(InteropVal != omp_interop_none && "Cannot use uninitialized interop_ptr!"); - assert((device_id == -1 || interop_val->device_id == device_id) && + assert((DeviceId == -1 || InteropVal->device_id == DeviceId) && "Inconsistent device-id usage!"); - if (!device_is_ready(device_id)) { - interop_ptr->err_str = "Device not ready!"; + if (!deviceIsReady(DeviceId)) { + InteropPtr->err_str = "Device not ready!"; return; } - if (interop_val->interop_type == kmp_interop_type_tasksync) { - __kmpc_omp_wait_deps(loc_ref, gtid, ndeps, dep_list, ndeps_noalias, - noalias_dep_list); + if (InteropVal->interop_type == kmp_interop_type_tasksync) { + __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias, + NoaliasDepList); } // TODO Flush the queue associated with the interop through the plugin } -void __tgt_interop_destroy(ident_t *loc_ref, kmp_int32 gtid, - omp_interop_val_t *&interop_ptr, kmp_int32 device_id, - kmp_int32 ndeps, kmp_depend_info_t *dep_list, - kmp_int32 have_nowait) { - kmp_int32 ndeps_noalias = 0; - kmp_depend_info_t *noalias_dep_list = NULL; - assert(interop_ptr && "Cannot use nullptr!"); - omp_interop_val_t *interop_val = interop_ptr; - if (device_id == -1) { - device_id = omp_get_default_device(); +void __tgt_interop_destroy(ident_t *LocRef, kmp_int32 Gtid, + omp_interop_val_t *&InteropPtr, kmp_int32 DeviceId, + kmp_int32 Ndeps, kmp_depend_info_t *DepList, + kmp_int32 HaveNowait) { + kmp_int32 NdepsNoalias = 0; + kmp_depend_info_t *NoaliasDepList = NULL; + assert(InteropPtr && "Cannot use nullptr!"); + omp_interop_val_t *InteropVal = InteropPtr; + if (DeviceId == -1) { + DeviceId = omp_get_default_device(); } - if (interop_val == omp_interop_none) + if (InteropVal == omp_interop_none) return; - assert((device_id == -1 || interop_val->device_id == device_id) && + assert((DeviceId == -1 || InteropVal->device_id == DeviceId) && "Inconsistent device-id usage!"); - if (!device_is_ready(device_id)) { - interop_ptr->err_str = "Device not ready!"; + if (!deviceIsReady(DeviceId)) { + InteropPtr->err_str = "Device not ready!"; return; } - if (interop_val->interop_type == kmp_interop_type_tasksync) { - __kmpc_omp_wait_deps(loc_ref, gtid, ndeps, dep_list, ndeps_noalias, - noalias_dep_list); + if (InteropVal->interop_type == kmp_interop_type_tasksync) { + __kmpc_omp_wait_deps(LocRef, Gtid, Ndeps, DepList, NdepsNoalias, + NoaliasDepList); } // TODO Flush the queue associated with the interop through the plugin // TODO Signal out dependences - delete interop_ptr; - interop_ptr = omp_interop_none; + delete InteropPtr; + InteropPtr = omp_interop_none; } #ifdef __cplusplus } // extern "C" diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -70,13 +70,13 @@ static const int64_t Alignment = 8; /// Map global data and execute pending ctors -static int InitLibrary(DeviceTy &Device) { +static int initLibrary(DeviceTy &Device) { /* * Map global data */ - int32_t device_id = Device.DeviceID; - int rc = OFFLOAD_SUCCESS; - bool supportsEmptyImages = Device.RTL->supports_empty_images && + int32_t DeviceId = Device.DeviceID; + int Rc = OFFLOAD_SUCCESS; + bool SupportsEmptyImages = Device.RTL->supports_empty_images && Device.RTL->supports_empty_images() > 0; std::lock_guard LG( @@ -88,50 +88,50 @@ &PM->HostEntriesBeginToTransTable[HostEntriesBegin]; if (TransTable->HostTable.EntriesBegin == TransTable->HostTable.EntriesEnd && - !supportsEmptyImages) { + !SupportsEmptyImages) { // No host entry so no need to proceed continue; } - if (TransTable->TargetsTable[device_id] != 0) { + if (TransTable->TargetsTable[DeviceId] != 0) { // Library entries have already been processed continue; } // 1) get image. - assert(TransTable->TargetsImages.size() > (size_t)device_id && + assert(TransTable->TargetsImages.size() > (size_t)DeviceId && "Not expecting a device ID outside the table's bounds!"); - __tgt_device_image *img = TransTable->TargetsImages[device_id]; - if (!img) { - REPORT("No image loaded for device id %d.\n", device_id); - rc = OFFLOAD_FAIL; + __tgt_device_image *Img = TransTable->TargetsImages[DeviceId]; + if (!Img) { + REPORT("No image loaded for device id %d.\n", DeviceId); + Rc = OFFLOAD_FAIL; break; } // 2) load image into the target table. - __tgt_target_table *TargetTable = TransTable->TargetsTable[device_id] = - Device.load_binary(img); + __tgt_target_table *TargetTable = TransTable->TargetsTable[DeviceId] = + Device.loadBinary(Img); // Unable to get table for this image: invalidate image and fail. if (!TargetTable) { REPORT("Unable to generate entries table for device id %d.\n", - device_id); - TransTable->TargetsImages[device_id] = 0; - rc = OFFLOAD_FAIL; + DeviceId); + TransTable->TargetsImages[DeviceId] = 0; + Rc = OFFLOAD_FAIL; break; } // Verify whether the two table sizes match. - size_t hsize = + size_t Hsize = TransTable->HostTable.EntriesEnd - TransTable->HostTable.EntriesBegin; - size_t tsize = TargetTable->EntriesEnd - TargetTable->EntriesBegin; + size_t Tsize = TargetTable->EntriesEnd - TargetTable->EntriesBegin; // Invalid image for these host entries! - if (hsize != tsize) { + if (Hsize != Tsize) { REPORT( "Host and Target tables mismatch for device id %d [%zx != %zx].\n", - device_id, hsize, tsize); - TransTable->TargetsImages[device_id] = 0; - TransTable->TargetsTable[device_id] = 0; - rc = OFFLOAD_FAIL; + DeviceId, Hsize, Tsize); + TransTable->TargetsImages[DeviceId] = 0; + TransTable->TargetsTable[DeviceId] = 0; + Rc = OFFLOAD_FAIL; break; } @@ -175,8 +175,8 @@ } } - if (rc != OFFLOAD_SUCCESS) { - return rc; + if (Rc != OFFLOAD_SUCCESS) { + return Rc; } /* @@ -185,22 +185,22 @@ if (!Device.PendingCtorsDtors.empty()) { AsyncInfoTy AsyncInfo(Device); // Call all ctors for all libraries registered so far - for (auto &lib : Device.PendingCtorsDtors) { - if (!lib.second.PendingCtors.empty()) { + for (auto &Lib : Device.PendingCtorsDtors) { + if (!Lib.second.PendingCtors.empty()) { DP("Has pending ctors... call now\n"); - for (auto &entry : lib.second.PendingCtors) { - void *ctor = entry; - int rc = - target(nullptr, Device, ctor, 0, nullptr, nullptr, nullptr, + for (auto &Entry : Lib.second.PendingCtors) { + void *Ctor = Entry; + int Rc = + target(nullptr, Device, Ctor, 0, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, 1, 1, true /*team*/, AsyncInfo); - if (rc != OFFLOAD_SUCCESS) { - REPORT("Running ctor " DPxMOD " failed.\n", DPxPTR(ctor)); + if (Rc != OFFLOAD_SUCCESS) { + REPORT("Running ctor " DPxMOD " failed.\n", DPxPTR(Ctor)); return OFFLOAD_FAIL; } } // Clear the list to indicate that this device has been used - lib.second.PendingCtors.clear(); - DP("Done with pending ctors for lib " DPxMOD "\n", DPxPTR(lib.first)); + Lib.second.PendingCtors.clear(); + DP("Done with pending ctors for lib " DPxMOD "\n", DPxPTR(Lib.first)); } } // All constructors have been issued, wait for them now. @@ -232,10 +232,10 @@ FAILURE_MESSAGE("Consult https://openmp.llvm.org/design/Runtimes.html " "for debugging options.\n"); - SourceInfo info(Loc); - if (info.isAvailible()) - fprintf(stderr, "%s:%d:%d: ", info.getFilename(), info.getLine(), - info.getColumn()); + SourceInfo Info(Loc); + if (Info.isAvailible()) + fprintf(stderr, "%s:%d:%d: ", Info.getFilename(), Info.getLine(), + Info.getColumn()); else FAILURE_MESSAGE("Source location information not present. Compile with " "-g or -gline-tables-only.\n"); @@ -308,7 +308,7 @@ } // Is device ready? - if (!device_is_ready(DeviceID)) { + if (!deviceIsReady(DeviceID)) { REPORT("Device %" PRId64 " is not ready.\n", DeviceID); handleTargetOutcome(false, Loc); return true; @@ -324,7 +324,7 @@ Device.PendingGlobalsMtx); HasPendingGlobals = Device.HasPendingGlobals; } - if (HasPendingGlobals && InitLibrary(Device) != OFFLOAD_SUCCESS) { + if (HasPendingGlobals && initLibrary(Device) != OFFLOAD_SUCCESS) { REPORT("Failed to init globals on device %" PRId64 "\n", DeviceID); handleTargetOutcome(false, Loc); return true; @@ -333,54 +333,53 @@ return false; } -static int32_t getParentIndex(int64_t type) { - return ((type & OMP_TGT_MAPTYPE_MEMBER_OF) >> 48) - 1; +static int32_t getParentIndex(int64_t Type) { + return ((Type & OMP_TGT_MAPTYPE_MEMBER_OF) >> 48) - 1; } -void *targetAllocExplicit(size_t size, int device_num, int kind, - const char *name) { +void *targetAllocExplicit(size_t Size, int DeviceNum, int Kind, + const char *Name) { TIMESCOPE(); - DP("Call to %s for device %d requesting %zu bytes\n", name, device_num, size); + DP("Call to %s for device %d requesting %zu bytes\n", Name, DeviceNum, Size); - if (size <= 0) { - DP("Call to %s with non-positive length\n", name); + if (Size <= 0) { + DP("Call to %s with non-positive length\n", Name); return NULL; } - void *rc = NULL; + void *Rc = NULL; - if (device_num == omp_get_initial_device()) { - rc = malloc(size); - DP("%s returns host ptr " DPxMOD "\n", name, DPxPTR(rc)); - return rc; + if (DeviceNum == omp_get_initial_device()) { + Rc = malloc(Size); + DP("%s returns host ptr " DPxMOD "\n", Name, DPxPTR(Rc)); + return Rc; } - if (!device_is_ready(device_num)) { - DP("%s returns NULL ptr\n", name); + if (!deviceIsReady(DeviceNum)) { + DP("%s returns NULL ptr\n", Name); return NULL; } - DeviceTy &Device = *PM->Devices[device_num]; - rc = Device.allocData(size, nullptr, kind); - DP("%s returns device ptr " DPxMOD "\n", name, DPxPTR(rc)); - return rc; + DeviceTy &Device = *PM->Devices[DeviceNum]; + Rc = Device.allocData(Size, nullptr, Kind); + DP("%s returns device ptr " DPxMOD "\n", Name, DPxPTR(Rc)); + return Rc; } /// Call the user-defined mapper function followed by the appropriate // targetData* function (targetData{Begin,End,Update}). -int targetDataMapper(ident_t *loc, DeviceTy &Device, void *arg_base, void *arg, - int64_t arg_size, int64_t arg_type, - map_var_info_t arg_names, void *arg_mapper, - AsyncInfoTy &AsyncInfo, - TargetDataFuncPtrTy target_data_function) { - TIMESCOPE_WITH_IDENT(loc); - DP("Calling the mapper function " DPxMOD "\n", DPxPTR(arg_mapper)); +int targetDataMapper(ident_t *Loc, DeviceTy &Device, void *ArgBase, void *Arg, + int64_t ArgSize, int64_t ArgType, map_var_info_t ArgNames, + void *ArgMapper, AsyncInfoTy &AsyncInfo, + TargetDataFuncPtrTy TargetDataFunction) { + TIMESCOPE_WITH_IDENT(Loc); + DP("Calling the mapper function " DPxMOD "\n", DPxPTR(ArgMapper)); // The mapper function fills up Components. MapperComponentsTy MapperComponents; - MapperFuncPtrTy MapperFuncPtr = (MapperFuncPtrTy)(arg_mapper); - (*MapperFuncPtr)((void *)&MapperComponents, arg_base, arg, arg_size, arg_type, - arg_names); + MapperFuncPtrTy MapperFuncPtr = (MapperFuncPtrTy)(ArgMapper); + (*MapperFuncPtr)((void *)&MapperComponents, ArgBase, Arg, ArgSize, ArgType, + ArgNames); // Construct new arrays for args_base, args, arg_sizes and arg_types // using the information in MapperComponents and call the corresponding @@ -400,40 +399,40 @@ MapperArgNames[I] = C.Name; } - int rc = target_data_function(loc, Device, MapperComponents.Components.size(), - MapperArgsBase.data(), MapperArgs.data(), - MapperArgSizes.data(), MapperArgTypes.data(), - MapperArgNames.data(), /*arg_mappers*/ nullptr, - AsyncInfo, /*FromMapper=*/true); + int Rc = TargetDataFunction(Loc, Device, MapperComponents.Components.size(), + MapperArgsBase.data(), MapperArgs.data(), + MapperArgSizes.data(), MapperArgTypes.data(), + MapperArgNames.data(), /*arg_mappers*/ nullptr, + AsyncInfo, /*FromMapper=*/true); - return rc; + return Rc; } /// Internal function to do the mapping and transfer the data to the device -int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num, - void **args_base, void **args, int64_t *arg_sizes, - int64_t *arg_types, map_var_info_t *arg_names, - void **arg_mappers, AsyncInfoTy &AsyncInfo, +int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, + void **ArgsBase, void **Args, int64_t *ArgSizes, + int64_t *ArgTypes, map_var_info_t *ArgNames, + void **ArgMappers, AsyncInfoTy &AsyncInfo, bool FromMapper) { // process each input. - for (int32_t i = 0; i < arg_num; ++i) { + for (int32_t I = 0; I < ArgNum; ++I) { // Ignore private variables and arrays - there is no mapping for them. - if ((arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) || - (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE)) + if ((ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) || + (ArgTypes[I] & OMP_TGT_MAPTYPE_PRIVATE)) continue; - if (arg_mappers && arg_mappers[i]) { + if (ArgMappers && ArgMappers[I]) { // Instead of executing the regular path of targetDataBegin, call the // targetDataMapper variant which will call targetDataBegin again // with new arguments. - DP("Calling targetDataMapper for the %dth argument\n", i); + DP("Calling targetDataMapper for the %dth argument\n", I); - map_var_info_t arg_name = (!arg_names) ? nullptr : arg_names[i]; - int rc = targetDataMapper(loc, Device, args_base[i], args[i], - arg_sizes[i], arg_types[i], arg_name, - arg_mappers[i], AsyncInfo, targetDataBegin); + map_var_info_t ArgName = (!ArgNames) ? nullptr : ArgNames[I]; + int Rc = targetDataMapper(Loc, Device, ArgsBase[I], Args[I], ArgSizes[I], + ArgTypes[I], ArgName, ArgMappers[I], AsyncInfo, + targetDataBegin); - if (rc != OFFLOAD_SUCCESS) { + if (Rc != OFFLOAD_SUCCESS) { REPORT("Call to targetDataBegin via targetDataMapper for custom mapper" " failed.\n"); return OFFLOAD_FAIL; @@ -443,46 +442,46 @@ continue; } - void *HstPtrBegin = args[i]; - void *HstPtrBase = args_base[i]; - int64_t data_size = arg_sizes[i]; - map_var_info_t HstPtrName = (!arg_names) ? nullptr : arg_names[i]; + void *HstPtrBegin = Args[I]; + void *HstPtrBase = ArgsBase[I]; + int64_t DataSize = ArgSizes[I]; + map_var_info_t HstPtrName = (!ArgNames) ? nullptr : ArgNames[I]; // Adjust for proper alignment if this is a combined entry (for structs). // Look at the next argument - if that is MEMBER_OF this one, then this one // is a combined entry. - int64_t padding = 0; - const int next_i = i + 1; - if (getParentIndex(arg_types[i]) < 0 && next_i < arg_num && - getParentIndex(arg_types[next_i]) == i) { - padding = (int64_t)HstPtrBegin % Alignment; - if (padding) { + int64_t Padding = 0; + const int NextI = I + 1; + if (getParentIndex(ArgTypes[I]) < 0 && NextI < ArgNum && + getParentIndex(ArgTypes[NextI]) == I) { + Padding = (int64_t)HstPtrBegin % Alignment; + if (Padding) { DP("Using a padding of %" PRId64 " bytes for begin address " DPxMOD "\n", - padding, DPxPTR(HstPtrBegin)); - HstPtrBegin = (char *)HstPtrBegin - padding; - data_size += padding; + Padding, DPxPTR(HstPtrBegin)); + HstPtrBegin = (char *)HstPtrBegin - Padding; + DataSize += Padding; } } // Address of pointer on the host and device, respectively. - void *Pointer_HstPtrBegin, *PointerTgtPtrBegin; - TargetPointerResultTy Pointer_TPR; + void *PointerHstPtrBegin, *PointerTgtPtrBegin; + TargetPointerResultTy PointerTpr; bool IsHostPtr = false; - bool IsImplicit = arg_types[i] & OMP_TGT_MAPTYPE_IMPLICIT; + bool IsImplicit = ArgTypes[I] & OMP_TGT_MAPTYPE_IMPLICIT; // Force the creation of a device side copy of the data when: // a close map modifier was associated with a map that contained a to. - bool HasCloseModifier = arg_types[i] & OMP_TGT_MAPTYPE_CLOSE; - bool HasPresentModifier = arg_types[i] & OMP_TGT_MAPTYPE_PRESENT; - bool HasHoldModifier = arg_types[i] & OMP_TGT_MAPTYPE_OMPX_HOLD; + bool HasCloseModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_CLOSE; + bool HasPresentModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_PRESENT; + bool HasHoldModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_OMPX_HOLD; // UpdateRef is based on MEMBER_OF instead of TARGET_PARAM because if we // have reached this point via __tgt_target_data_begin and not __tgt_target // then no argument is marked as TARGET_PARAM ("omp target data map" is not // associated with a target region, so there are no target parameters). This // may be considered a hack, we could revise the scheme in the future. bool UpdateRef = - !(arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) && !(FromMapper && i == 0); - if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) { + !(ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF) && !(FromMapper && I == 0); + if (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) { DP("Has a pointer entry: \n"); // Base is address of pointer. // @@ -497,12 +496,12 @@ // entry for a global that might not already be allocated by the time the // PTR_AND_OBJ entry is handled below, and so the allocation might fail // when HasPresentModifier. - Pointer_TPR = Device.getTargetPointer( + PointerTpr = Device.getTargetPointer( HstPtrBase, HstPtrBase, sizeof(void *), /*HstPtrName=*/nullptr, /*HasFlagTo=*/false, /*HasFlagAlways=*/false, IsImplicit, UpdateRef, HasCloseModifier, HasPresentModifier, HasHoldModifier, AsyncInfo); - PointerTgtPtrBegin = Pointer_TPR.TargetPointer; - IsHostPtr = Pointer_TPR.Flags.IsHostPointer; + PointerTgtPtrBegin = PointerTpr.TargetPointer; + IsHostPtr = PointerTpr.Flags.IsHostPointer; if (!PointerTgtPtrBegin) { REPORT("Call to getTargetPointer returned null pointer (%s).\n", HasPresentModifier ? "'present' map type modifier" @@ -512,27 +511,27 @@ DP("There are %zu bytes allocated at target address " DPxMOD " - is%s new" "\n", sizeof(void *), DPxPTR(PointerTgtPtrBegin), - (Pointer_TPR.Flags.IsNewEntry ? "" : " not")); - Pointer_HstPtrBegin = HstPtrBase; + (PointerTpr.Flags.IsNewEntry ? "" : " not")); + PointerHstPtrBegin = HstPtrBase; // modify current entry. HstPtrBase = *(void **)HstPtrBase; // No need to update pointee ref count for the first element of the // subelement that comes from mapper. UpdateRef = - (!FromMapper || i != 0); // subsequently update ref count of pointee + (!FromMapper || I != 0); // subsequently update ref count of pointee } - const bool HasFlagTo = arg_types[i] & OMP_TGT_MAPTYPE_TO; - const bool HasFlagAlways = arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS; + const bool HasFlagTo = ArgTypes[I] & OMP_TGT_MAPTYPE_TO; + const bool HasFlagAlways = ArgTypes[I] & OMP_TGT_MAPTYPE_ALWAYS; auto TPR = Device.getTargetPointer( - HstPtrBegin, HstPtrBase, data_size, HstPtrName, HasFlagTo, - HasFlagAlways, IsImplicit, UpdateRef, HasCloseModifier, - HasPresentModifier, HasHoldModifier, AsyncInfo); + HstPtrBegin, HstPtrBase, DataSize, HstPtrName, HasFlagTo, HasFlagAlways, + IsImplicit, UpdateRef, HasCloseModifier, HasPresentModifier, + HasHoldModifier, AsyncInfo); void *TgtPtrBegin = TPR.TargetPointer; IsHostPtr = TPR.Flags.IsHostPointer; // If data_size==0, then the argument could be a zero-length pointer to // NULL, so getOrAlloc() returning NULL is not an error. - if (!TgtPtrBegin && (data_size || HasPresentModifier)) { + if (!TgtPtrBegin && (DataSize || HasPresentModifier)) { REPORT("Call to getTargetPointer returned null pointer (%s).\n", HasPresentModifier ? "'present' map type modifier" : "device failure or illegal mapping"); @@ -540,16 +539,16 @@ } DP("There are %" PRId64 " bytes allocated at target address " DPxMOD " - is%s new\n", - data_size, DPxPTR(TgtPtrBegin), (TPR.Flags.IsNewEntry ? "" : " not")); + DataSize, DPxPTR(TgtPtrBegin), (TPR.Flags.IsNewEntry ? "" : " not")); - if (arg_types[i] & OMP_TGT_MAPTYPE_RETURN_PARAM) { + if (ArgTypes[I] & OMP_TGT_MAPTYPE_RETURN_PARAM) { uintptr_t Delta = (uintptr_t)HstPtrBegin - (uintptr_t)HstPtrBase; void *TgtPtrBase = (void *)((uintptr_t)TgtPtrBegin - Delta); DP("Returning device pointer " DPxMOD "\n", DPxPTR(TgtPtrBase)); - args_base[i] = TgtPtrBase; + ArgsBase[I] = TgtPtrBase; } - if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ && !IsHostPtr) { + if (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ && !IsHostPtr) { // Check whether we need to update the pointer on the device bool UpdateDevPtr = false; @@ -557,7 +556,7 @@ void *ExpectedTgtPtrBase = (void *)((uint64_t)TgtPtrBegin - Delta); Device.ShadowMtx.lock(); - auto Entry = Device.ShadowPtrMap.find(Pointer_HstPtrBegin); + auto Entry = Device.ShadowPtrMap.find(PointerHstPtrBegin); // If this pointer is not in the map we need to insert it. If the map // contains a stale entry, we need to update it (e.g. if the pointee was // deallocated and later on is reallocated at another device address). The @@ -572,14 +571,14 @@ if (Entry == Device.ShadowPtrMap.end() || Entry->second.TgtPtrVal != ExpectedTgtPtrBase) { // create or update shadow pointers for this entry - Device.ShadowPtrMap[Pointer_HstPtrBegin] = { + Device.ShadowPtrMap[PointerHstPtrBegin] = { HstPtrBase, PointerTgtPtrBegin, ExpectedTgtPtrBase}; - Pointer_TPR.Entry->setMayContainAttachedPointers(); + PointerTpr.Entry->setMayContainAttachedPointers(); UpdateDevPtr = true; } if (UpdateDevPtr) { - std::lock_guard LG(*Pointer_TPR.Entry); + std::lock_guard LG(*PointerTpr.Entry); Device.ShadowMtx.unlock(); DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n", @@ -594,7 +593,7 @@ REPORT("Copying data to device failed.\n"); return OFFLOAD_FAIL; } - if (Pointer_TPR.Entry->addEventIfNecessary(Device, AsyncInfo) != + if (PointerTpr.Entry->addEventIfNecessary(Device, AsyncInfo) != OFFLOAD_SUCCESS) return OFFLOAD_FAIL; } else @@ -675,7 +674,7 @@ } // namespace /// Internal function to undo the mapping and retrieve the data from the device. -int targetDataEnd(ident_t *loc, DeviceTy &Device, int32_t ArgNum, +int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, void **ArgBases, void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, void **ArgMappers, AsyncInfoTy &AsyncInfo, bool FromMapper) { @@ -697,7 +696,7 @@ DP("Calling targetDataMapper for the %dth argument\n", I); map_var_info_t ArgName = (!ArgNames) ? nullptr : ArgNames[I]; - Ret = targetDataMapper(loc, Device, ArgBases[I], Args[I], ArgSizes[I], + Ret = targetDataMapper(Loc, Device, ArgBases[I], Args[I], ArgSizes[I], ArgTypes[I], ArgName, ArgMappers[I], AsyncInfo, targetDataEnd); @@ -909,10 +908,10 @@ return Ret; } -static int targetDataContiguous(ident_t *loc, DeviceTy &Device, void *ArgsBase, +static int targetDataContiguous(ident_t *Loc, DeviceTy &Device, void *ArgsBase, void *HstPtrBegin, int64_t ArgSize, int64_t ArgType, AsyncInfoTy &AsyncInfo) { - TIMESCOPE_WITH_IDENT(loc); + TIMESCOPE_WITH_IDENT(Loc); bool IsLast, IsHostPtr; TargetPointerResultTy TPR = Device.getTgtPtrBegin( HstPtrBegin, ArgSize, IsLast, /*UpdateRefCount=*/false, @@ -985,13 +984,13 @@ return OFFLOAD_SUCCESS; } -static int targetDataNonContiguous(ident_t *loc, DeviceTy &Device, +static int targetDataNonContiguous(ident_t *Loc, DeviceTy &Device, void *ArgsBase, __tgt_target_non_contig *NonContig, uint64_t Size, int64_t ArgType, int CurrentDim, int DimSize, uint64_t Offset, AsyncInfoTy &AsyncInfo) { - TIMESCOPE_WITH_IDENT(loc); + TIMESCOPE_WITH_IDENT(Loc); int Ret = OFFLOAD_SUCCESS; if (CurrentDim < DimSize) { for (unsigned int I = 0; I < NonContig[CurrentDim].Count; ++I) { @@ -1000,7 +999,7 @@ // we only need to transfer the first element for the last dimension // since we've already got a contiguous piece. if (CurrentDim != DimSize - 1 || I == 0) { - Ret = targetDataNonContiguous(loc, Device, ArgsBase, NonContig, Size, + Ret = targetDataNonContiguous(Loc, Device, ArgsBase, NonContig, Size, ArgType, CurrentDim + 1, DimSize, Offset + CurOffset, AsyncInfo); // Stop the whole process if any contiguous piece returns anything @@ -1014,7 +1013,7 @@ DP("Transfer of non-contiguous : host ptr " DPxMOD " offset %" PRIu64 " len %" PRIu64 "\n", DPxPTR(Ptr), Offset, Size); - Ret = targetDataContiguous(loc, Device, ArgsBase, Ptr, Size, ArgType, + Ret = targetDataContiguous(Loc, Device, ArgsBase, Ptr, Size, ArgType, AsyncInfo); } return Ret; @@ -1031,7 +1030,7 @@ } /// Internal function to pass data to/from the target. -int targetDataUpdate(ident_t *loc, DeviceTy &Device, int32_t ArgNum, +int targetDataUpdate(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, void **ArgsBase, void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, void **ArgMappers, AsyncInfoTy &AsyncInfo, bool) { @@ -1048,7 +1047,7 @@ DP("Calling targetDataMapper for the %dth argument\n", I); map_var_info_t ArgName = (!ArgNames) ? nullptr : ArgNames[I]; - int Ret = targetDataMapper(loc, Device, ArgsBase[I], Args[I], ArgSizes[I], + int Ret = targetDataMapper(Loc, Device, ArgsBase[I], Args[I], ArgSizes[I], ArgTypes[I], ArgName, ArgMappers[I], AsyncInfo, targetDataUpdate); @@ -1071,10 +1070,10 @@ NonContig[DimSize - 1].Count * NonContig[DimSize - 1].Stride; int32_t MergedDim = getNonContigMergedDimension(NonContig, DimSize); Ret = targetDataNonContiguous( - loc, Device, ArgsBase[I], NonContig, Size, ArgTypes[I], + Loc, Device, ArgsBase[I], NonContig, Size, ArgTypes[I], /*current_dim=*/0, DimSize - MergedDim, /*offset=*/0, AsyncInfo); } else { - Ret = targetDataContiguous(loc, Device, ArgsBase[I], Args[I], ArgSizes[I], + Ret = targetDataContiguous(Loc, Device, ArgsBase[I], Args[I], ArgSizes[I], ArgTypes[I], AsyncInfo); } if (Ret == OFFLOAD_FAIL) @@ -1321,7 +1320,7 @@ /// Process data before launching the kernel, including calling targetDataBegin /// to map and transfer data to target device, transferring (first-)private /// variables. -static int processDataBefore(ident_t *loc, int64_t DeviceId, void *HostPtr, +static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr, int32_t ArgNum, void **ArgBases, void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, void **ArgMappers, @@ -1329,9 +1328,9 @@ std::vector &TgtOffsets, PrivateArgumentManagerTy &PrivateArgumentManager, AsyncInfoTy &AsyncInfo) { - TIMESCOPE_WITH_NAME_AND_IDENT("mappingBeforeTargetRegion", loc); + TIMESCOPE_WITH_NAME_AND_IDENT("mappingBeforeTargetRegion", Loc); DeviceTy &Device = *PM->Devices[DeviceId]; - int Ret = targetDataBegin(loc, Device, ArgNum, ArgBases, Args, ArgSizes, + int Ret = targetDataBegin(Loc, Device, ArgNum, ArgBases, Args, ArgSizes, ArgTypes, ArgNames, ArgMappers, AsyncInfo); if (Ret != OFFLOAD_SUCCESS) { REPORT("Call to targetDataBegin failed, abort target.\n"); @@ -1452,17 +1451,17 @@ /// Process data after launching the kernel, including transferring data back to /// host if needed and deallocating target memory of (first-)private variables. -static int processDataAfter(ident_t *loc, int64_t DeviceId, void *HostPtr, +static int processDataAfter(ident_t *Loc, int64_t DeviceId, void *HostPtr, int32_t ArgNum, void **ArgBases, void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, void **ArgMappers, PrivateArgumentManagerTy &PrivateArgumentManager, AsyncInfoTy &AsyncInfo) { - TIMESCOPE_WITH_NAME_AND_IDENT("mappingAfterTargetRegion", loc); + TIMESCOPE_WITH_NAME_AND_IDENT("mappingAfterTargetRegion", Loc); DeviceTy &Device = *PM->Devices[DeviceId]; // Move data from device. - int Ret = targetDataEnd(loc, Device, ArgNum, ArgBases, Args, ArgSizes, + int Ret = targetDataEnd(Loc, Device, ArgNum, ArgBases, Args, ArgSizes, ArgTypes, ArgNames, ArgMappers, AsyncInfo); if (Ret != OFFLOAD_SUCCESS) { REPORT("Call to targetDataEnd failed, abort target.\n"); @@ -1486,7 +1485,7 @@ /// performs the same action as data_update and data_end above. This function /// returns 0 if it was able to transfer the execution to a target and an /// integer different from zero otherwise. -int target(ident_t *loc, DeviceTy &Device, void *HostPtr, int32_t ArgNum, +int target(ident_t *Loc, DeviceTy &Device, void *HostPtr, int32_t ArgNum, void **ArgBases, void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, void **ArgMappers, int32_t TeamNum, int32_t ThreadLimit, int IsTeamConstruct, AsyncInfoTy &AsyncInfo) { @@ -1527,7 +1526,7 @@ int Ret; if (ArgNum) { // Process data, such as data mapping, before launching the kernel - Ret = processDataBefore(loc, DeviceId, HostPtr, ArgNum, ArgBases, Args, + Ret = processDataBefore(Loc, DeviceId, HostPtr, ArgNum, ArgBases, Args, ArgSizes, ArgTypes, ArgNames, ArgMappers, TgtArgs, TgtOffsets, PrivateArgumentManager, AsyncInfo); if (Ret != OFFLOAD_SUCCESS) { @@ -1543,7 +1542,7 @@ { TIMESCOPE_WITH_NAME_AND_IDENT( - IsTeamConstruct ? "runTargetTeamRegion" : "runTargetRegion", loc); + IsTeamConstruct ? "runTargetTeamRegion" : "runTargetRegion", Loc); if (IsTeamConstruct) Ret = Device.runTeamRegion(TgtEntryPtr, &TgtArgs[0], &TgtOffsets[0], TgtArgs.size(), TeamNum, ThreadLimit, @@ -1561,7 +1560,7 @@ if (ArgNum) { // Transfer data back and deallocate target memory for (first-)private // variables - Ret = processDataAfter(loc, DeviceId, HostPtr, ArgNum, ArgBases, Args, + Ret = processDataAfter(Loc, DeviceId, HostPtr, ArgNum, ArgBases, Args, ArgSizes, ArgTypes, ArgNames, ArgMappers, PrivateArgumentManager, AsyncInfo); if (Ret != OFFLOAD_SUCCESS) { diff --git a/openmp/libomptarget/src/private.h b/openmp/libomptarget/src/private.h --- a/openmp/libomptarget/src/private.h +++ b/openmp/libomptarget/src/private.h @@ -20,34 +20,34 @@ #include -extern int targetDataBegin(ident_t *loc, DeviceTy &Device, int32_t arg_num, - void **args_base, void **args, int64_t *arg_sizes, - int64_t *arg_types, map_var_info_t *arg_names, - void **arg_mappers, AsyncInfoTy &AsyncInfo, +extern int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, + void **ArgsBase, void **Args, int64_t *ArgSizes, + int64_t *ArgTypes, map_var_info_t *ArgNames, + void **ArgMappers, AsyncInfoTy &AsyncInfo, bool FromMapper = false); -extern int targetDataEnd(ident_t *loc, DeviceTy &Device, int32_t ArgNum, +extern int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, void **ArgBases, void **Args, int64_t *ArgSizes, - int64_t *ArgTypes, map_var_info_t *arg_names, + int64_t *ArgTypes, map_var_info_t *ArgNames, void **ArgMappers, AsyncInfoTy &AsyncInfo, bool FromMapper = false); -extern int targetDataUpdate(ident_t *loc, DeviceTy &Device, int32_t arg_num, - void **args_base, void **args, int64_t *arg_sizes, - int64_t *arg_types, map_var_info_t *arg_names, - void **arg_mappers, AsyncInfoTy &AsyncInfo, +extern int targetDataUpdate(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, + void **ArgsBase, void **Args, int64_t *ArgSizes, + int64_t *ArgTypes, map_var_info_t *ArgNames, + void **ArgMappers, AsyncInfoTy &AsyncInfo, bool FromMapper = false); -extern int target(ident_t *loc, DeviceTy &Device, void *HostPtr, int32_t ArgNum, +extern int target(ident_t *Loc, DeviceTy &Device, void *HostPtr, int32_t ArgNum, void **ArgBases, void **Args, int64_t *ArgSizes, - int64_t *ArgTypes, map_var_info_t *arg_names, + int64_t *ArgTypes, map_var_info_t *ArgNames, void **ArgMappers, int32_t TeamNum, int32_t ThreadLimit, int IsTeamConstruct, AsyncInfoTy &AsyncInfo); extern void handleTargetOutcome(bool Success, ident_t *Loc); extern bool checkDeviceAndCtors(int64_t &DeviceID, ident_t *Loc); -extern void *targetAllocExplicit(size_t size, int device_num, int kind, - const char *name); +extern void *targetAllocExplicit(size_t Size, int DeviceNum, int Kind, + const char *Name); // This structure stores information of a mapped memory region. struct MapComponentInfoTy { @@ -156,33 +156,33 @@ const int32_t ArgNum, const int64_t *ArgSizes, const int64_t *ArgTypes, const map_var_info_t *ArgNames, const char *RegionType) { - SourceInfo info(Loc); + SourceInfo Info(Loc); INFO(OMP_INFOTYPE_ALL, DeviceId, "%s at %s:%d:%d with %d arguments:\n", - RegionType, info.getFilename(), info.getLine(), info.getColumn(), + RegionType, Info.getFilename(), Info.getLine(), Info.getColumn(), ArgNum); - for (int32_t i = 0; i < ArgNum; ++i) { - const map_var_info_t varName = (ArgNames) ? ArgNames[i] : nullptr; - const char *type = nullptr; - const char *implicit = - (ArgTypes[i] & OMP_TGT_MAPTYPE_IMPLICIT) ? "(implicit)" : ""; - if (ArgTypes[i] & OMP_TGT_MAPTYPE_TO && ArgTypes[i] & OMP_TGT_MAPTYPE_FROM) - type = "tofrom"; - else if (ArgTypes[i] & OMP_TGT_MAPTYPE_TO) - type = "to"; - else if (ArgTypes[i] & OMP_TGT_MAPTYPE_FROM) - type = "from"; - else if (ArgTypes[i] & OMP_TGT_MAPTYPE_PRIVATE) - type = "private"; - else if (ArgTypes[i] & OMP_TGT_MAPTYPE_LITERAL) - type = "firstprivate"; - else if (ArgSizes[i] != 0) - type = "alloc"; + for (int32_t I = 0; I < ArgNum; ++I) { + const map_var_info_t VarName = (ArgNames) ? ArgNames[I] : nullptr; + const char *Type = nullptr; + const char *Implicit = + (ArgTypes[I] & OMP_TGT_MAPTYPE_IMPLICIT) ? "(implicit)" : ""; + if (ArgTypes[I] & OMP_TGT_MAPTYPE_TO && ArgTypes[I] & OMP_TGT_MAPTYPE_FROM) + Type = "tofrom"; + else if (ArgTypes[I] & OMP_TGT_MAPTYPE_TO) + Type = "to"; + else if (ArgTypes[I] & OMP_TGT_MAPTYPE_FROM) + Type = "from"; + else if (ArgTypes[I] & OMP_TGT_MAPTYPE_PRIVATE) + Type = "private"; + else if (ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) + Type = "firstprivate"; + else if (ArgSizes[I] != 0) + Type = "alloc"; else - type = "use_address"; + Type = "use_address"; - INFO(OMP_INFOTYPE_ALL, DeviceId, "%s(%s)[%" PRId64 "] %s\n", type, - getNameFromMapping(varName).c_str(), ArgSizes[i], implicit); + INFO(OMP_INFOTYPE_ALL, DeviceId, "%s(%s)[%" PRId64 "] %s\n", Type, + getNameFromMapping(VarName).c_str(), ArgSizes[I], Implicit); } } diff --git a/openmp/libomptarget/src/rtl.cpp b/openmp/libomptarget/src/rtl.cpp --- a/openmp/libomptarget/src/rtl.cpp +++ b/openmp/libomptarget/src/rtl.cpp @@ -78,7 +78,7 @@ #endif } -void RTLsTy::LoadRTLs() { +void RTLsTy::loadRTLs() { // Parse environment variable OMP_TARGET_OFFLOAD (if set) PM->TargetOffloadPolicy = (kmp_target_offload_kind_t)__kmpc_get_target_offload(); @@ -92,9 +92,9 @@ // is correct and if they are supporting any devices. for (auto *Name : RTLNames) { DP("Loading library '%s'...\n", Name); - void *dynlib_handle = dlopen(Name, RTLD_NOW); + void *DynlibHandle = dlopen(Name, RTLD_NOW); - if (!dynlib_handle) { + if (!DynlibHandle) { // Library does not exist or cannot be found. DP("Unable to load library '%s': %s!\n", Name, dlerror()); continue; @@ -110,34 +110,34 @@ bool ValidPlugin = true; if (!(*((void **)&R.is_valid_binary) = - dlsym(dynlib_handle, "__tgt_rtl_is_valid_binary"))) + dlsym(DynlibHandle, "__tgt_rtl_is_valid_binary"))) ValidPlugin = false; if (!(*((void **)&R.number_of_devices) = - dlsym(dynlib_handle, "__tgt_rtl_number_of_devices"))) + dlsym(DynlibHandle, "__tgt_rtl_number_of_devices"))) ValidPlugin = false; if (!(*((void **)&R.init_device) = - dlsym(dynlib_handle, "__tgt_rtl_init_device"))) + dlsym(DynlibHandle, "__tgt_rtl_init_device"))) ValidPlugin = false; if (!(*((void **)&R.load_binary) = - dlsym(dynlib_handle, "__tgt_rtl_load_binary"))) + dlsym(DynlibHandle, "__tgt_rtl_load_binary"))) ValidPlugin = false; if (!(*((void **)&R.data_alloc) = - dlsym(dynlib_handle, "__tgt_rtl_data_alloc"))) + dlsym(DynlibHandle, "__tgt_rtl_data_alloc"))) ValidPlugin = false; if (!(*((void **)&R.data_submit) = - dlsym(dynlib_handle, "__tgt_rtl_data_submit"))) + dlsym(DynlibHandle, "__tgt_rtl_data_submit"))) ValidPlugin = false; if (!(*((void **)&R.data_retrieve) = - dlsym(dynlib_handle, "__tgt_rtl_data_retrieve"))) + dlsym(DynlibHandle, "__tgt_rtl_data_retrieve"))) ValidPlugin = false; if (!(*((void **)&R.data_delete) = - dlsym(dynlib_handle, "__tgt_rtl_data_delete"))) + dlsym(DynlibHandle, "__tgt_rtl_data_delete"))) ValidPlugin = false; if (!(*((void **)&R.run_region) = - dlsym(dynlib_handle, "__tgt_rtl_run_target_region"))) + dlsym(DynlibHandle, "__tgt_rtl_run_target_region"))) ValidPlugin = false; if (!(*((void **)&R.run_team_region) = - dlsym(dynlib_handle, "__tgt_rtl_run_target_team_region"))) + dlsym(DynlibHandle, "__tgt_rtl_run_target_team_region"))) ValidPlugin = false; // Invalid plugin @@ -155,7 +155,7 @@ continue; } - R.LibraryHandler = dynlib_handle; + R.LibraryHandler = DynlibHandle; #ifdef OMPTARGET_DEBUG R.RTLName = Name; @@ -166,48 +166,45 @@ // Optional functions *((void **)&R.deinit_device) = - dlsym(dynlib_handle, "__tgt_rtl_deinit_device"); + dlsym(DynlibHandle, "__tgt_rtl_deinit_device"); *((void **)&R.init_requires) = - dlsym(dynlib_handle, "__tgt_rtl_init_requires"); + dlsym(DynlibHandle, "__tgt_rtl_init_requires"); *((void **)&R.data_submit_async) = - dlsym(dynlib_handle, "__tgt_rtl_data_submit_async"); + dlsym(DynlibHandle, "__tgt_rtl_data_submit_async"); *((void **)&R.data_retrieve_async) = - dlsym(dynlib_handle, "__tgt_rtl_data_retrieve_async"); + dlsym(DynlibHandle, "__tgt_rtl_data_retrieve_async"); *((void **)&R.run_region_async) = - dlsym(dynlib_handle, "__tgt_rtl_run_target_region_async"); + dlsym(DynlibHandle, "__tgt_rtl_run_target_region_async"); *((void **)&R.run_team_region_async) = - dlsym(dynlib_handle, "__tgt_rtl_run_target_team_region_async"); - *((void **)&R.synchronize) = dlsym(dynlib_handle, "__tgt_rtl_synchronize"); + dlsym(DynlibHandle, "__tgt_rtl_run_target_team_region_async"); + *((void **)&R.synchronize) = dlsym(DynlibHandle, "__tgt_rtl_synchronize"); *((void **)&R.data_exchange) = - dlsym(dynlib_handle, "__tgt_rtl_data_exchange"); + dlsym(DynlibHandle, "__tgt_rtl_data_exchange"); *((void **)&R.data_exchange_async) = - dlsym(dynlib_handle, "__tgt_rtl_data_exchange_async"); + dlsym(DynlibHandle, "__tgt_rtl_data_exchange_async"); *((void **)&R.is_data_exchangable) = - dlsym(dynlib_handle, "__tgt_rtl_is_data_exchangable"); - *((void **)&R.register_lib) = - dlsym(dynlib_handle, "__tgt_rtl_register_lib"); + dlsym(DynlibHandle, "__tgt_rtl_is_data_exchangable"); + *((void **)&R.register_lib) = dlsym(DynlibHandle, "__tgt_rtl_register_lib"); *((void **)&R.unregister_lib) = - dlsym(dynlib_handle, "__tgt_rtl_unregister_lib"); + dlsym(DynlibHandle, "__tgt_rtl_unregister_lib"); *((void **)&R.supports_empty_images) = - dlsym(dynlib_handle, "__tgt_rtl_supports_empty_images"); + dlsym(DynlibHandle, "__tgt_rtl_supports_empty_images"); *((void **)&R.set_info_flag) = - dlsym(dynlib_handle, "__tgt_rtl_set_info_flag"); + dlsym(DynlibHandle, "__tgt_rtl_set_info_flag"); *((void **)&R.print_device_info) = - dlsym(dynlib_handle, "__tgt_rtl_print_device_info"); - *((void **)&R.create_event) = - dlsym(dynlib_handle, "__tgt_rtl_create_event"); - *((void **)&R.record_event) = - dlsym(dynlib_handle, "__tgt_rtl_record_event"); - *((void **)&R.wait_event) = dlsym(dynlib_handle, "__tgt_rtl_wait_event"); - *((void **)&R.sync_event) = dlsym(dynlib_handle, "__tgt_rtl_sync_event"); + dlsym(DynlibHandle, "__tgt_rtl_print_device_info"); + *((void **)&R.create_event) = dlsym(DynlibHandle, "__tgt_rtl_create_event"); + *((void **)&R.record_event) = dlsym(DynlibHandle, "__tgt_rtl_record_event"); + *((void **)&R.wait_event) = dlsym(DynlibHandle, "__tgt_rtl_wait_event"); + *((void **)&R.sync_event) = dlsym(DynlibHandle, "__tgt_rtl_sync_event"); *((void **)&R.destroy_event) = - dlsym(dynlib_handle, "__tgt_rtl_destroy_event"); + dlsym(DynlibHandle, "__tgt_rtl_destroy_event"); *((void **)&R.release_async_info) = - dlsym(dynlib_handle, "__tgt_rtl_release_async_info"); + dlsym(DynlibHandle, "__tgt_rtl_release_async_info"); *((void **)&R.init_async_info) = - dlsym(dynlib_handle, "__tgt_rtl_init_async_info"); + dlsym(DynlibHandle, "__tgt_rtl_init_async_info"); *((void **)&R.init_device_info) = - dlsym(dynlib_handle, "__tgt_rtl_init_device_info"); + dlsym(DynlibHandle, "__tgt_rtl_init_device_info"); } DP("RTLs loaded!\n"); @@ -218,9 +215,9 @@ //////////////////////////////////////////////////////////////////////////////// // Functionality for registering libs -static void RegisterImageIntoTranslationTable(TranslationTable &TT, +static void registerImageIntoTranslationTable(TranslationTable &TT, RTLInfoTy &RTL, - __tgt_device_image *image) { + __tgt_device_image *Image) { // same size, as when we increase one, we also increase the other. assert(TT.TargetsTable.size() == TT.TargetsImages.size() && @@ -236,11 +233,11 @@ } // Register the image in all devices for this target type. - for (int32_t i = 0; i < RTL.NumberOfDevices; ++i) { + for (int32_t I = 0; I < RTL.NumberOfDevices; ++I) { // If we are changing the image we are also invalidating the target table. - if (TT.TargetsImages[RTL.Idx + i] != image) { - TT.TargetsImages[RTL.Idx + i] = image; - TT.TargetsTable[RTL.Idx + i] = 0; // lazy initialization of target table. + if (TT.TargetsImages[RTL.Idx + I] != Image) { + TT.TargetsImages[RTL.Idx + I] = Image; + TT.TargetsTable[RTL.Idx + I] = 0; // lazy initialization of target table. } } } @@ -248,29 +245,29 @@ //////////////////////////////////////////////////////////////////////////////// // Functionality for registering Ctors/Dtors -static void RegisterGlobalCtorsDtorsForImage(__tgt_bin_desc *desc, - __tgt_device_image *img, +static void registerGlobalCtorsDtorsForImage(__tgt_bin_desc *Desc, + __tgt_device_image *Img, RTLInfoTy *RTL) { - for (int32_t i = 0; i < RTL->NumberOfDevices; ++i) { - DeviceTy &Device = *PM->Devices[RTL->Idx + i]; + for (int32_t I = 0; I < RTL->NumberOfDevices; ++I) { + DeviceTy &Device = *PM->Devices[RTL->Idx + I]; Device.PendingGlobalsMtx.lock(); Device.HasPendingGlobals = true; - for (__tgt_offload_entry *entry = img->EntriesBegin; - entry != img->EntriesEnd; ++entry) { - if (entry->flags & OMP_DECLARE_TARGET_CTOR) { + for (__tgt_offload_entry *Entry = Img->EntriesBegin; + Entry != Img->EntriesEnd; ++Entry) { + if (Entry->flags & OMP_DECLARE_TARGET_CTOR) { DP("Adding ctor " DPxMOD " to the pending list.\n", - DPxPTR(entry->addr)); - Device.PendingCtorsDtors[desc].PendingCtors.push_back(entry->addr); - } else if (entry->flags & OMP_DECLARE_TARGET_DTOR) { + DPxPTR(Entry->addr)); + Device.PendingCtorsDtors[Desc].PendingCtors.push_back(Entry->addr); + } else if (Entry->flags & OMP_DECLARE_TARGET_DTOR) { // Dtors are pushed in reverse order so they are executed from end // to beginning when unregistering the library! DP("Adding dtor " DPxMOD " to the pending list.\n", - DPxPTR(entry->addr)); - Device.PendingCtorsDtors[desc].PendingDtors.push_front(entry->addr); + DPxPTR(Entry->addr)); + Device.PendingCtorsDtors[Desc].PendingDtors.push_front(Entry->addr); } - if (entry->flags & OMP_DECLARE_TARGET_LINK) { + if (Entry->flags & OMP_DECLARE_TARGET_LINK) { DP("The \"link\" attribute is not yet supported!\n"); } } @@ -278,16 +275,16 @@ } } -void RTLsTy::RegisterRequires(int64_t flags) { +void RTLsTy::registerRequires(int64_t Flags) { // TODO: add more elaborate check. // Minimal check: only set requires flags if previous value // is undefined. This ensures that only the first call to this // function will set the requires flags. All subsequent calls // will be checked for compatibility. - assert(flags != OMP_REQ_UNDEFINED && + assert(Flags != OMP_REQ_UNDEFINED && "illegal undefined flag for requires directive!"); if (RequiresFlags == OMP_REQ_UNDEFINED) { - RequiresFlags = flags; + RequiresFlags = Flags; return; } @@ -297,17 +294,17 @@ // - unified_address // - unified_shared_memory if ((RequiresFlags & OMP_REQ_REVERSE_OFFLOAD) != - (flags & OMP_REQ_REVERSE_OFFLOAD)) { + (Flags & OMP_REQ_REVERSE_OFFLOAD)) { FATAL_MESSAGE0( 1, "'#pragma omp requires reverse_offload' not used consistently!"); } if ((RequiresFlags & OMP_REQ_UNIFIED_ADDRESS) != - (flags & OMP_REQ_UNIFIED_ADDRESS)) { + (Flags & OMP_REQ_UNIFIED_ADDRESS)) { FATAL_MESSAGE0( 1, "'#pragma omp requires unified_address' not used consistently!"); } if ((RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) != - (flags & OMP_REQ_UNIFIED_SHARED_MEMORY)) { + (Flags & OMP_REQ_UNIFIED_SHARED_MEMORY)) { FATAL_MESSAGE0( 1, "'#pragma omp requires unified_shared_memory' not used consistently!"); @@ -316,21 +313,21 @@ // TODO: insert any other missing checks DP("New requires flags %" PRId64 " compatible with existing %" PRId64 "!\n", - flags, RequiresFlags); + Flags, RequiresFlags); } void RTLsTy::initRTLonce(RTLInfoTy &R) { // If this RTL is not already in use, initialize it. - if (!R.isUsed && R.NumberOfDevices != 0) { + if (!R.IsUsed && R.NumberOfDevices != 0) { // Initialize the device information for the RTL we are about to use. const size_t Start = PM->Devices.size(); PM->Devices.reserve(Start + R.NumberOfDevices); - for (int32_t device_id = 0; device_id < R.NumberOfDevices; device_id++) { + for (int32_t DeviceId = 0; DeviceId < R.NumberOfDevices; DeviceId++) { PM->Devices.push_back(std::make_unique(&R)); // global device ID - PM->Devices[Start + device_id]->DeviceID = Start + device_id; + PM->Devices[Start + DeviceId]->DeviceID = Start + DeviceId; // RTL local device ID - PM->Devices[Start + device_id]->RTLDeviceID = device_id; + PM->Devices[Start + DeviceId]->RTLDeviceID = DeviceId; } // Initialize the index of this RTL and save it in the used RTLs. @@ -339,7 +336,7 @@ : UsedRTLs.back()->Idx + UsedRTLs.back()->NumberOfDevices; assert((size_t)R.Idx == Start && "RTL index should equal the number of devices used so far."); - R.isUsed = true; + R.IsUsed = true; UsedRTLs.push_back(&R); DP("RTL " DPxMOD " has index %d!\n", DPxPTR(R.LibraryHandler), R.Idx); @@ -351,58 +348,58 @@ initRTLonce(R); } -void RTLsTy::RegisterLib(__tgt_bin_desc *desc) { +void RTLsTy::registerLib(__tgt_bin_desc *Desc) { PM->RTLsMtx.lock(); // Register the images with the RTLs that understand them, if any. - for (int32_t i = 0; i < desc->NumDeviceImages; ++i) { + for (int32_t I = 0; I < Desc->NumDeviceImages; ++I) { // Obtain the image. - __tgt_device_image *img = &desc->DeviceImages[i]; + __tgt_device_image *Img = &Desc->DeviceImages[I]; RTLInfoTy *FoundRTL = nullptr; // Scan the RTLs that have associated images until we find one that supports // the current image. for (auto &R : AllRTLs) { - if (!R.is_valid_binary(img)) { + if (!R.is_valid_binary(Img)) { DP("Image " DPxMOD " is NOT compatible with RTL %s!\n", - DPxPTR(img->ImageStart), R.RTLName.c_str()); + DPxPTR(Img->ImageStart), R.RTLName.c_str()); continue; } DP("Image " DPxMOD " is compatible with RTL %s!\n", - DPxPTR(img->ImageStart), R.RTLName.c_str()); + DPxPTR(Img->ImageStart), R.RTLName.c_str()); initRTLonce(R); // Initialize (if necessary) translation table for this library. PM->TrlTblMtx.lock(); - if (!PM->HostEntriesBeginToTransTable.count(desc->HostEntriesBegin)) { - PM->HostEntriesBeginRegistrationOrder.push_back(desc->HostEntriesBegin); + if (!PM->HostEntriesBeginToTransTable.count(Desc->HostEntriesBegin)) { + PM->HostEntriesBeginRegistrationOrder.push_back(Desc->HostEntriesBegin); TranslationTable &TransTable = - (PM->HostEntriesBeginToTransTable)[desc->HostEntriesBegin]; - TransTable.HostTable.EntriesBegin = desc->HostEntriesBegin; - TransTable.HostTable.EntriesEnd = desc->HostEntriesEnd; + (PM->HostEntriesBeginToTransTable)[Desc->HostEntriesBegin]; + TransTable.HostTable.EntriesBegin = Desc->HostEntriesBegin; + TransTable.HostTable.EntriesEnd = Desc->HostEntriesEnd; } // Retrieve translation table for this library. TranslationTable &TransTable = - (PM->HostEntriesBeginToTransTable)[desc->HostEntriesBegin]; + (PM->HostEntriesBeginToTransTable)[Desc->HostEntriesBegin]; - DP("Registering image " DPxMOD " with RTL %s!\n", DPxPTR(img->ImageStart), + DP("Registering image " DPxMOD " with RTL %s!\n", DPxPTR(Img->ImageStart), R.RTLName.c_str()); - RegisterImageIntoTranslationTable(TransTable, R, img); + registerImageIntoTranslationTable(TransTable, R, Img); PM->TrlTblMtx.unlock(); FoundRTL = &R; // Load ctors/dtors for static objects - RegisterGlobalCtorsDtorsForImage(desc, img, FoundRTL); + registerGlobalCtorsDtorsForImage(Desc, Img, FoundRTL); // if an RTL was found we are done - proceed to register the next image break; } if (!FoundRTL) { - DP("No RTL found for image " DPxMOD "!\n", DPxPTR(img->ImageStart)); + DP("No RTL found for image " DPxMOD "!\n", DPxPTR(Img->ImageStart)); } } PM->RTLsMtx.unlock(); @@ -410,14 +407,14 @@ DP("Done registering entries!\n"); } -void RTLsTy::UnregisterLib(__tgt_bin_desc *desc) { +void RTLsTy::unregisterLib(__tgt_bin_desc *Desc) { DP("Unloading target library!\n"); PM->RTLsMtx.lock(); // Find which RTL understands each image, if any. - for (int32_t i = 0; i < desc->NumDeviceImages; ++i) { + for (int32_t I = 0; I < Desc->NumDeviceImages; ++I) { // Obtain the image. - __tgt_device_image *img = &desc->DeviceImages[i]; + __tgt_device_image *Img = &Desc->DeviceImages[I]; RTLInfoTy *FoundRTL = NULL; @@ -425,36 +422,36 @@ // the current image. We only need to scan RTLs that are already being used. for (auto *R : UsedRTLs) { - assert(R->isUsed && "Expecting used RTLs."); + assert(R->IsUsed && "Expecting used RTLs."); - if (!R->is_valid_binary(img)) { + if (!R->is_valid_binary(Img)) { DP("Image " DPxMOD " is NOT compatible with RTL " DPxMOD "!\n", - DPxPTR(img->ImageStart), DPxPTR(R->LibraryHandler)); + DPxPTR(Img->ImageStart), DPxPTR(R->LibraryHandler)); continue; } DP("Image " DPxMOD " is compatible with RTL " DPxMOD "!\n", - DPxPTR(img->ImageStart), DPxPTR(R->LibraryHandler)); + DPxPTR(Img->ImageStart), DPxPTR(R->LibraryHandler)); FoundRTL = R; // Execute dtors for static objects if the device has been used, i.e. // if its PendingCtors list has been emptied. - for (int32_t i = 0; i < FoundRTL->NumberOfDevices; ++i) { - DeviceTy &Device = *PM->Devices[FoundRTL->Idx + i]; + for (int32_t I = 0; I < FoundRTL->NumberOfDevices; ++I) { + DeviceTy &Device = *PM->Devices[FoundRTL->Idx + I]; Device.PendingGlobalsMtx.lock(); - if (Device.PendingCtorsDtors[desc].PendingCtors.empty()) { + if (Device.PendingCtorsDtors[Desc].PendingCtors.empty()) { AsyncInfoTy AsyncInfo(Device); - for (auto &dtor : Device.PendingCtorsDtors[desc].PendingDtors) { - int rc = target(nullptr, Device, dtor, 0, nullptr, nullptr, nullptr, + for (auto &Dtor : Device.PendingCtorsDtors[Desc].PendingDtors) { + int Rc = target(nullptr, Device, Dtor, 0, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, 1, 1, true /*team*/, AsyncInfo); - if (rc != OFFLOAD_SUCCESS) { - DP("Running destructor " DPxMOD " failed.\n", DPxPTR(dtor)); + if (Rc != OFFLOAD_SUCCESS) { + DP("Running destructor " DPxMOD " failed.\n", DPxPTR(Dtor)); } } // Remove this library's entry from PendingCtorsDtors - Device.PendingCtorsDtors.erase(desc); + Device.PendingCtorsDtors.erase(Desc); // All constructors have been issued, wait for them now. if (AsyncInfo.synchronize() != OFFLOAD_SUCCESS) DP("Failed synchronizing destructors kernels.\n"); @@ -463,7 +460,7 @@ } DP("Unregistered image " DPxMOD " from RTL " DPxMOD "!\n", - DPxPTR(img->ImageStart), DPxPTR(R->LibraryHandler)); + DPxPTR(Img->ImageStart), DPxPTR(R->LibraryHandler)); break; } @@ -471,7 +468,7 @@ // if no RTL was found proceed to unregister the next image if (!FoundRTL) { DP("No RTLs in use support the image " DPxMOD "!\n", - DPxPTR(img->ImageStart)); + DPxPTR(Img->ImageStart)); } } PM->RTLsMtx.unlock(); @@ -479,22 +476,22 @@ // Remove entries from PM->HostPtrToTableMap PM->TblMapMtx.lock(); - for (__tgt_offload_entry *cur = desc->HostEntriesBegin; - cur < desc->HostEntriesEnd; ++cur) { - PM->HostPtrToTableMap.erase(cur->addr); + for (__tgt_offload_entry *Cur = Desc->HostEntriesBegin; + Cur < Desc->HostEntriesEnd; ++Cur) { + PM->HostPtrToTableMap.erase(Cur->addr); } // Remove translation table for this descriptor. auto TransTable = - PM->HostEntriesBeginToTransTable.find(desc->HostEntriesBegin); + PM->HostEntriesBeginToTransTable.find(Desc->HostEntriesBegin); if (TransTable != PM->HostEntriesBeginToTransTable.end()) { DP("Removing translation table for descriptor " DPxMOD "\n", - DPxPTR(desc->HostEntriesBegin)); + DPxPTR(Desc->HostEntriesBegin)); PM->HostEntriesBeginToTransTable.erase(TransTable); } else { DP("Translation table for descriptor " DPxMOD " cannot be found, probably " "it has been already removed.\n", - DPxPTR(desc->HostEntriesBegin)); + DPxPTR(Desc->HostEntriesBegin)); } PM->TblMapMtx.unlock();