diff --git a/openmp/libomptarget/plugins/amdgpu/impl/atmi.h b/openmp/libomptarget/plugins/amdgpu/impl/atmi.h --- a/openmp/libomptarget/plugins/amdgpu/impl/atmi.h +++ b/openmp/libomptarget/plugins/amdgpu/impl/atmi.h @@ -13,24 +13,6 @@ * @{ */ -/** - * @brief Status codes. - */ -typedef enum atmi_status_t { - /** - * The function has been executed successfully. - */ - ATMI_STATUS_SUCCESS = 0, - /** - * A undocumented error has occurred. - */ - ATMI_STATUS_UNKNOWN = 1, - /** - * A generic error has occurred. - */ - ATMI_STATUS_ERROR = 2, -} atmi_status_t; - /** * @brief Device Types. */ diff --git a/openmp/libomptarget/plugins/amdgpu/impl/atmi.cpp b/openmp/libomptarget/plugins/amdgpu/impl/atmi.cpp --- a/openmp/libomptarget/plugins/amdgpu/impl/atmi.cpp +++ b/openmp/libomptarget/plugins/amdgpu/impl/atmi.cpp @@ -54,22 +54,22 @@ } }; -atmi_status_t atmi_memcpy_h2d(hsa_signal_t signal, void *deviceDest, - const void *hostSrc, size_t size, - hsa_agent_t agent) { +hsa_status_t atmi_memcpy_h2d(hsa_signal_t signal, void *deviceDest, + const void *hostSrc, size_t size, + hsa_agent_t agent) { hsa_status_t rc = hsa_memory_copy(deviceDest, hostSrc, size); // hsa_memory_copy sometimes fails in situations where // allocate + copy succeeds. Looks like it might be related to // locking part of a read only segment. Fall back for now. if (rc == HSA_STATUS_SUCCESS) { - return ATMI_STATUS_SUCCESS; + return HSA_STATUS_SUCCESS; } void *tempHostPtr; atmi_mem_place_t CPU = ATMI_MEM_PLACE_CPU_MEM(0, 0, 0); - atmi_status_t ret = atmi_malloc(&tempHostPtr, size, CPU); - if (ret != ATMI_STATUS_SUCCESS) { + hsa_status_t ret = atmi_malloc(&tempHostPtr, size, CPU); + if (ret != HSA_STATUS_SUCCESS) { DEBUG_PRINT("atmi_malloc: Unable to alloc %d bytes for temp scratch\n", size); return ret; @@ -79,27 +79,27 @@ if (invoke_hsa_copy(signal, deviceDest, tempHostPtr, size, agent) != HSA_STATUS_SUCCESS) { - return ATMI_STATUS_ERROR; + return HSA_STATUS_ERROR; } - return ATMI_STATUS_SUCCESS; + return HSA_STATUS_SUCCESS; } -atmi_status_t atmi_memcpy_d2h(hsa_signal_t signal, void *dest, - const void *deviceSrc, size_t size, - hsa_agent_t agent) { +hsa_status_t atmi_memcpy_d2h(hsa_signal_t signal, void *dest, + const void *deviceSrc, size_t size, + hsa_agent_t agent) { hsa_status_t rc = hsa_memory_copy(dest, deviceSrc, size); // hsa_memory_copy sometimes fails in situations where // allocate + copy succeeds. Looks like it might be related to // locking part of a read only segment. Fall back for now. if (rc == HSA_STATUS_SUCCESS) { - return ATMI_STATUS_SUCCESS; + return HSA_STATUS_SUCCESS; } void *tempHostPtr; atmi_mem_place_t CPU = ATMI_MEM_PLACE_CPU_MEM(0, 0, 0); - atmi_status_t ret = atmi_malloc(&tempHostPtr, size, CPU); - if (ret != ATMI_STATUS_SUCCESS) { + hsa_status_t ret = atmi_malloc(&tempHostPtr, size, CPU); + if (ret != HSA_STATUS_SUCCESS) { DEBUG_PRINT("atmi_malloc: Unable to alloc %d bytes for temp scratch\n", size); return ret; @@ -108,15 +108,15 @@ if (invoke_hsa_copy(signal, tempHostPtr, deviceSrc, size, agent) != HSA_STATUS_SUCCESS) { - return ATMI_STATUS_ERROR; + return HSA_STATUS_ERROR; } memcpy(dest, tempHostPtr, size); - return ATMI_STATUS_SUCCESS; + return HSA_STATUS_SUCCESS; } -atmi_status_t atmi_free(void *ptr) { return core::Runtime::Memfree(ptr); } +hsa_status_t atmi_free(void *ptr) { return core::Runtime::Memfree(ptr); } -atmi_status_t atmi_malloc(void **ptr, size_t size, atmi_mem_place_t place) { +hsa_status_t atmi_malloc(void **ptr, size_t size, atmi_mem_place_t place) { return core::Runtime::Malloc(ptr, size, place); } diff --git a/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.h b/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.h --- a/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.h +++ b/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.h @@ -40,15 +40,13 @@ * @param[in] var_size Pointer to a non-NULL @p uint variable that will * hold the size of the global symbol object. * - * @retval ::ATMI_STATUS_SUCCESS The function has executed successfully. + * @retval ::HSA_STATUS_SUCCESS The function has executed successfully. * - * @retval ::ATMI_STATUS_ERROR If @p symbol, @p var_addr or @p var_size are + * @retval ::HSA_STATUS_ERROR If @p symbol, @p var_addr or @p var_size are * invalid * location in the current node, or if ATMI is not initialized. - * - * @retval ::ATMI_STATUS_UNKNOWN The function encountered errors. */ -atmi_status_t atmi_interop_hsa_get_symbol_info( +hsa_status_t atmi_interop_hsa_get_symbol_info( const std::map &SymbolInfoTable, atmi_mem_place_t place, const char *symbol, void **var_addr, unsigned int *var_size); @@ -69,15 +67,13 @@ * @param[in] value Pointer to a non-NULL @p uint variable that will * hold the return value of the kernel property. * - * @retval ::ATMI_STATUS_SUCCESS The function has executed successfully. + * @retval ::HSA_STATUS_SUCCESS The function has executed successfully. * - * @retval ::ATMI_STATUS_ERROR If @p symbol, @p var_addr or @p var_size are + * @retval ::HSA_STATUS_ERROR If @p symbol, @p var_addr or @p var_size are * invalid * location in the current node, or if ATMI is not initialized. - * - * @retval ::ATMI_STATUS_UNKNOWN The function encountered errors. */ -atmi_status_t atmi_interop_hsa_get_kernel_info( +hsa_status_t atmi_interop_hsa_get_kernel_info( const std::map &KernelInfoTable, atmi_mem_place_t place, const char *kernel_name, hsa_executable_symbol_info_t info, uint32_t *value); diff --git a/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.cpp b/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.cpp --- a/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.cpp +++ b/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.cpp @@ -6,7 +6,7 @@ #include "atmi_interop_hsa.h" #include "internal.h" -atmi_status_t atmi_interop_hsa_get_symbol_info( +hsa_status_t atmi_interop_hsa_get_symbol_info( const std::map &SymbolInfoTable, atmi_mem_place_t place, const char *symbol, void **var_addr, unsigned int *var_size) { @@ -21,10 +21,10 @@ atmi_machine_t *machine = atmi_machine_get_info(); if (!symbol || !var_addr || !var_size || !machine) - return ATMI_STATUS_ERROR; + return HSA_STATUS_ERROR; if (place.dev_id < 0 || place.dev_id >= machine->device_count_by_type[place.dev_type]) - return ATMI_STATUS_ERROR; + return HSA_STATUS_ERROR; // get the symbol info std::string symbolStr = std::string(symbol); @@ -33,15 +33,15 @@ atl_symbol_info_t info = It->second; *var_addr = reinterpret_cast(info.addr); *var_size = info.size; - return ATMI_STATUS_SUCCESS; + return HSA_STATUS_SUCCESS; } else { *var_addr = NULL; *var_size = 0; - return ATMI_STATUS_ERROR; + return HSA_STATUS_ERROR; } } -atmi_status_t atmi_interop_hsa_get_kernel_info( +hsa_status_t atmi_interop_hsa_get_kernel_info( const std::map &KernelInfoTable, atmi_mem_place_t place, const char *kernel_name, hsa_executable_symbol_info_t kernel_info, uint32_t *value) { @@ -55,12 +55,12 @@ atmi_machine_t *machine = atmi_machine_get_info(); if (!kernel_name || !value || !machine) - return ATMI_STATUS_ERROR; + return HSA_STATUS_ERROR; if (place.dev_id < 0 || place.dev_id >= machine->device_count_by_type[place.dev_type]) - return ATMI_STATUS_ERROR; + return HSA_STATUS_ERROR; - atmi_status_t status = ATMI_STATUS_SUCCESS; + hsa_status_t status = HSA_STATUS_SUCCESS; // get the kernel info std::string kernelStr = std::string(kernel_name); auto It = KernelInfoTable.find(kernelStr); @@ -79,12 +79,12 @@ break; default: *value = 0; - status = ATMI_STATUS_ERROR; + status = HSA_STATUS_ERROR; break; } } else { *value = 0; - status = ATMI_STATUS_ERROR; + status = HSA_STATUS_ERROR; } return status; diff --git a/openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h b/openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h --- a/openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h +++ b/openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h @@ -45,17 +45,15 @@ * * @param[in] cb_state void* passed to on_deserialized_data callback * - * @retval ::ATMI_STATUS_SUCCESS The function has executed successfully. + * @retval ::HSA_STATUS_SUCCESS The function has executed successfully. * - * @retval ::ATMI_STATUS_ERROR The function encountered errors. - * - * @retval ::ATMI_STATUS_UNKNOWN The function encountered errors. + * @retval ::HSA_STATUS_ERROR The function encountered errors. * */ -atmi_status_t atmi_module_register_from_memory_to_place( +hsa_status_t atmi_module_register_from_memory_to_place( void *module_bytes, size_t module_size, atmi_place_t place, - atmi_status_t (*on_deserialized_data)(void *data, size_t size, - void *cb_state), + hsa_status_t (*on_deserialized_data)(void *data, size_t size, + void *cb_state), void *cb_state); /** @} */ @@ -96,14 +94,12 @@ * * @param[in] place The memory place in the system to perform the allocation. * - * @retval ::ATMI_STATUS_SUCCESS The function has executed successfully. - * - * @retval ::ATMI_STATUS_ERROR The function encountered errors. + * @retval ::HSA_STATUS_SUCCESS The function has executed successfully. * - * @retval ::ATMI_STATUS_UNKNOWN The function encountered errors. + * @retval ::HSA_STATUS_ERROR The function encountered errors. * */ -atmi_status_t atmi_malloc(void **ptr, size_t size, atmi_mem_place_t place); +hsa_status_t atmi_malloc(void **ptr, size_t size, atmi_mem_place_t place); /** * @brief Frees memory that was previously allocated. @@ -114,22 +110,20 @@ * * @param[in] ptr The pointer to the memory that has to be freed. * - * @retval ::ATMI_STATUS_SUCCESS The function has executed successfully. - * - * @retval ::ATMI_STATUS_ERROR The function encountered errors. + * @retval ::HSA_STATUS_SUCCESS The function has executed successfully. * - * @retval ::ATMI_STATUS_UNKNOWN The function encountered errors. + * @retval ::HSA_STATUS_ERROR The function encountered errors. * */ -atmi_status_t atmi_free(void *ptr); +hsa_status_t atmi_free(void *ptr); -atmi_status_t atmi_memcpy_h2d(hsa_signal_t signal, void *deviceDest, - const void *hostSrc, size_t size, - hsa_agent_t agent); +hsa_status_t atmi_memcpy_h2d(hsa_signal_t signal, void *deviceDest, + const void *hostSrc, size_t size, + hsa_agent_t agent); -atmi_status_t atmi_memcpy_d2h(hsa_signal_t sig, void *hostDest, - const void *deviceSrc, size_t size, - hsa_agent_t agent); +hsa_status_t atmi_memcpy_d2h(hsa_signal_t sig, void *hostDest, + const void *deviceSrc, size_t size, + hsa_agent_t agent); /** @} */ diff --git a/openmp/libomptarget/plugins/amdgpu/impl/data.cpp b/openmp/libomptarget/plugins/amdgpu/impl/data.cpp --- a/openmp/libomptarget/plugins/amdgpu/impl/data.cpp +++ b/openmp/libomptarget/plugins/amdgpu/impl/data.cpp @@ -46,7 +46,7 @@ return HSA_STATUS_SUCCESS; } -atmi_status_t Runtime::Malloc(void **ptr, size_t size, atmi_mem_place_t place) { +hsa_status_t Runtime::Malloc(void **ptr, size_t size, atmi_mem_place_t place) { hsa_amd_memory_pool_t pool = get_memory_pool_by_mem_place(place); hsa_status_t err = hsa_amd_memory_pool_allocate(pool, size, 0, ptr); DEBUG_PRINT("Malloced [%s %d] %p\n", @@ -57,14 +57,14 @@ err = register_allocation(*ptr, size, place); } - return (err == HSA_STATUS_SUCCESS) ? ATMI_STATUS_SUCCESS : ATMI_STATUS_ERROR; + return (err == HSA_STATUS_SUCCESS) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR; } -atmi_status_t Runtime::Memfree(void *ptr) { +hsa_status_t Runtime::Memfree(void *ptr) { hsa_status_t err = hsa_amd_memory_pool_free(ptr); DEBUG_PRINT("Freed %p\n", ptr); - return (err == HSA_STATUS_SUCCESS) ? ATMI_STATUS_SUCCESS : ATMI_STATUS_ERROR; + return (err == HSA_STATUS_SUCCESS) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR; } } // namespace core diff --git a/openmp/libomptarget/plugins/amdgpu/impl/internal.h b/openmp/libomptarget/plugins/amdgpu/impl/internal.h --- a/openmp/libomptarget/plugins/amdgpu/impl/internal.h +++ b/openmp/libomptarget/plugins/amdgpu/impl/internal.h @@ -184,7 +184,7 @@ extern std::vector atl_gpu_kernarg_pools; namespace core { -atmi_status_t atl_init_gpu_context(); +hsa_status_t atl_init_gpu_context(); hsa_status_t init_hsa(); hsa_status_t finalize_hsa(); @@ -219,6 +219,6 @@ } // namespace core const char *get_error_string(hsa_status_t err); -const char *get_atmi_error_string(atmi_status_t err); +const char *get_atmi_error_string(hsa_status_t err); #endif // SRC_RUNTIME_INCLUDE_INTERNAL_H_ diff --git a/openmp/libomptarget/plugins/amdgpu/impl/rt.h b/openmp/libomptarget/plugins/amdgpu/impl/rt.h --- a/openmp/libomptarget/plugins/amdgpu/impl/rt.h +++ b/openmp/libomptarget/plugins/amdgpu/impl/rt.h @@ -52,16 +52,16 @@ // machine info static atmi_machine_t *GetMachineInfo(); // modules - static atmi_status_t RegisterModuleFromMemory( + static hsa_status_t RegisterModuleFromMemory( void *, size_t, atmi_place_t, - atmi_status_t (*on_deserialized_data)(void *data, size_t size, - void *cb_state), + hsa_status_t (*on_deserialized_data)(void *data, size_t size, + void *cb_state), void *cb_state, std::vector &HSAExecutables); // data - static atmi_status_t Memcpy(hsa_signal_t, void *, const void *, size_t); - static atmi_status_t Memfree(void *); - static atmi_status_t Malloc(void **, size_t, atmi_mem_place_t); + static hsa_status_t Memcpy(hsa_signal_t, void *, const void *, size_t); + static hsa_status_t Memfree(void *); + static hsa_status_t Malloc(void **, size_t, atmi_mem_place_t); int getMaxQueueSize() const { return env_.getMaxQueueSize(); } int getDebugMode() const { return env_.getDebugMode(); } diff --git a/openmp/libomptarget/plugins/amdgpu/impl/system.cpp b/openmp/libomptarget/plugins/amdgpu/impl/system.cpp --- a/openmp/libomptarget/plugins/amdgpu/impl/system.cpp +++ b/openmp/libomptarget/plugins/amdgpu/impl/system.cpp @@ -558,27 +558,27 @@ return HSA_STATUS_SUCCESS; } -atmi_status_t atl_init_gpu_context() { +hsa_status_t atl_init_gpu_context() { if (atlc.struct_initialized == false) atmi_init_context_structs(); if (atlc.g_gpu_initialized != false) - return ATMI_STATUS_SUCCESS; + return HSA_STATUS_SUCCESS; hsa_status_t err; err = init_hsa(); if (err != HSA_STATUS_SUCCESS) - return ATMI_STATUS_ERROR; + return HSA_STATUS_ERROR; err = hsa_amd_register_system_event_handler(callbackEvent, NULL); if (err != HSA_STATUS_SUCCESS) { printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__, "Registering the system for memory faults", get_error_string(err)); - return ATMI_STATUS_ERROR; + return HSA_STATUS_ERROR; } init_tasks(); atlc.g_gpu_initialized = true; - return ATMI_STATUS_SUCCESS; + return HSA_STATUS_SUCCESS; } static bool isImplicit(KernelArgMD::ValueKind value_kind) { @@ -1087,12 +1087,12 @@ return HSA_STATUS_SUCCESS; } -atmi_status_t RegisterModuleFromMemory( +hsa_status_t RegisterModuleFromMemory( std::map &KernelInfoTable, std::map &SymbolInfoTable, void *module_bytes, size_t module_size, atmi_place_t place, - atmi_status_t (*on_deserialized_data)(void *data, size_t size, - void *cb_state), + hsa_status_t (*on_deserialized_data)(void *data, size_t size, + void *cb_state), void *cb_state, std::vector &HSAExecutables) { hsa_status_t err; int gpu = place.device_id; @@ -1108,7 +1108,7 @@ if (err != HSA_STATUS_SUCCESS) { printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__, "Query the agent profile", get_error_string(err)); - return ATMI_STATUS_ERROR; + return HSA_STATUS_ERROR; } // FIXME: Assume that every profile is FULL until we understand how to build // GCN with base profile @@ -1119,7 +1119,7 @@ if (err != HSA_STATUS_SUCCESS) { printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__, "Create the executable", get_error_string(err)); - return ATMI_STATUS_ERROR; + return HSA_STATUS_ERROR; } bool module_load_success = false; @@ -1152,9 +1152,9 @@ // Mutating the device image here avoids another allocation & memcpy void *code_object_alloc_data = reinterpret_cast(code_object.handle); - atmi_status_t atmi_err = + hsa_status_t atmi_err = on_deserialized_data(code_object_alloc_data, module_size, cb_state); - if (atmi_err != ATMI_STATUS_SUCCESS) { + if (atmi_err != HSA_STATUS_SUCCESS) { printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__, "Error in deserialized_data callback", get_atmi_error_string(atmi_err)); @@ -1181,7 +1181,7 @@ if (err != HSA_STATUS_SUCCESS) { printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__, "Freeze the executable", get_error_string(err)); - return ATMI_STATUS_ERROR; + return HSA_STATUS_ERROR; } err = hsa::executable_iterate_symbols( @@ -1193,14 +1193,14 @@ if (err != HSA_STATUS_SUCCESS) { printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__, "Iterating over symbols for execuatable", get_error_string(err)); - return ATMI_STATUS_ERROR; + return HSA_STATUS_ERROR; } // save the executable and destroy during finalize HSAExecutables.push_back(executable); - return ATMI_STATUS_SUCCESS; + return HSA_STATUS_SUCCESS; } else { - return ATMI_STATUS_ERROR; + return HSA_STATUS_ERROR; } } diff --git a/openmp/libomptarget/plugins/amdgpu/impl/utils.cpp b/openmp/libomptarget/plugins/amdgpu/impl/utils.cpp --- a/openmp/libomptarget/plugins/amdgpu/impl/utils.cpp +++ b/openmp/libomptarget/plugins/amdgpu/impl/utils.cpp @@ -19,14 +19,12 @@ /* * Helper functions */ -const char *get_atmi_error_string(atmi_status_t err) { +const char *get_atmi_error_string(hsa_status_t err) { switch (err) { - case ATMI_STATUS_SUCCESS: - return "ATMI_STATUS_SUCCESS"; - case ATMI_STATUS_UNKNOWN: - return "ATMI_STATUS_UNKNOWN"; - case ATMI_STATUS_ERROR: - return "ATMI_STATUS_ERROR"; + case HSA_STATUS_SUCCESS: + return "HSA_STATUS_SUCCESS"; + case HSA_STATUS_ERROR: + return "HSA_STATUS_ERROR"; default: return ""; } diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -74,7 +74,7 @@ #ifdef OMPTARGET_DEBUG #define check(msg, status) \ - if (status != ATMI_STATUS_SUCCESS) { \ + if (status != HSA_STATUS_SUCCESS) { \ DP(#msg " failed\n"); \ } else { \ DP(#msg " succeeded\n"); \ @@ -87,12 +87,12 @@ #include "elf_common.h" namespace core { -atmi_status_t RegisterModuleFromMemory( +hsa_status_t RegisterModuleFromMemory( std::map &KernelInfo, std::map &SymbolInfoTable, void *, size_t, atmi_place_t, - atmi_status_t (*on_deserialized_data)(void *data, size_t size, - void *cb_state), + hsa_status_t (*on_deserialized_data)(void *data, size_t size, + void *cb_state), void *cb_state, std::vector &HSAExecutables); } @@ -374,27 +374,27 @@ static const int Default_WG_Size = llvm::omp::AMDGPUGpuGridValues[llvm::omp::GVIDX::GV_Default_WG_Size]; - using MemcpyFunc = atmi_status_t (*)(hsa_signal_t, void *, const void *, - size_t size, hsa_agent_t); - atmi_status_t freesignalpool_memcpy(void *dest, const void *src, size_t size, - MemcpyFunc Func, int32_t deviceId) { + using MemcpyFunc = hsa_status_t (*)(hsa_signal_t, void *, const void *, + size_t size, hsa_agent_t); + hsa_status_t freesignalpool_memcpy(void *dest, const void *src, size_t size, + MemcpyFunc Func, int32_t deviceId) { hsa_agent_t agent = HSAAgents[deviceId]; hsa_signal_t s = FreeSignalPool.pop(); if (s.handle == 0) { - return ATMI_STATUS_ERROR; + return HSA_STATUS_ERROR; } - atmi_status_t r = Func(s, dest, src, size, agent); + hsa_status_t r = Func(s, dest, src, size, agent); FreeSignalPool.push(s); return r; } - atmi_status_t freesignalpool_memcpy_d2h(void *dest, const void *src, - size_t size, int32_t deviceId) { + hsa_status_t freesignalpool_memcpy_d2h(void *dest, const void *src, + size_t size, int32_t deviceId) { return freesignalpool_memcpy(dest, src, size, atmi_memcpy_d2h, deviceId); } - atmi_status_t freesignalpool_memcpy_h2d(void *dest, const void *src, - size_t size, int32_t deviceId) { + hsa_status_t freesignalpool_memcpy_h2d(void *dest, const void *src, + size_t size, int32_t deviceId) { return freesignalpool_memcpy(dest, src, size, atmi_memcpy_h2d, deviceId); } @@ -466,8 +466,8 @@ print_kernel_trace = 0; DP("Start initializing HSA-ATMI\n"); - atmi_status_t err = core::atl_init_gpu_context(); - if (err != ATMI_STATUS_SUCCESS) { + hsa_status_t err = core::atl_init_gpu_context(); + if (err != HSA_STATUS_SUCCESS) { DP("Error when initializing HSA-ATMI\n"); return; } @@ -613,7 +613,7 @@ // Return success if we are not copying back to host from target. if (!HstPtr) return OFFLOAD_SUCCESS; - atmi_status_t err; + hsa_status_t err; DP("Retrieve data %ld bytes, (tgt:%016llx) -> (hst:%016llx).\n", Size, (long long unsigned)(Elf64_Addr)TgtPtr, (long long unsigned)(Elf64_Addr)HstPtr); @@ -621,7 +621,7 @@ err = DeviceInfo.freesignalpool_memcpy_d2h(HstPtr, TgtPtr, (size_t)Size, DeviceId); - if (err != ATMI_STATUS_SUCCESS) { + if (err != HSA_STATUS_SUCCESS) { DP("Error when copying data from device to host. Pointers: " "host = 0x%016lx, device = 0x%016lx, size = %lld\n", (Elf64_Addr)HstPtr, (Elf64_Addr)TgtPtr, (unsigned long long)Size); @@ -636,7 +636,7 @@ int32_t dataSubmit(int32_t DeviceId, void *TgtPtr, void *HstPtr, int64_t Size, __tgt_async_info *AsyncInfo) { assert(AsyncInfo && "AsyncInfo is nullptr"); - atmi_status_t err; + hsa_status_t err; assert(DeviceId < DeviceInfo.NumberOfDevices && "Device ID too large"); // Return success if we are not doing host to target. if (!HstPtr) @@ -647,7 +647,7 @@ (long long unsigned)(Elf64_Addr)TgtPtr); err = DeviceInfo.freesignalpool_memcpy_h2d(TgtPtr, HstPtr, (size_t)Size, DeviceId); - if (err != ATMI_STATUS_SUCCESS) { + if (err != HSA_STATUS_SUCCESS) { DP("Error when copying data from host to device. Pointers: " "host = 0x%016lx, device = 0x%016lx, size = %lld\n", (Elf64_Addr)HstPtr, (Elf64_Addr)TgtPtr, (unsigned long long)Size); @@ -998,27 +998,27 @@ return 1; } -atmi_status_t interop_get_symbol_info(char *base, size_t img_size, - const char *symname, void **var_addr, - uint32_t *var_size) { +hsa_status_t interop_get_symbol_info(char *base, size_t img_size, + const char *symname, void **var_addr, + uint32_t *var_size) { symbol_info si; int rc = get_symbol_info_without_loading(base, img_size, symname, &si); if (rc == 0) { *var_addr = si.addr; *var_size = si.size; - return ATMI_STATUS_SUCCESS; + return HSA_STATUS_SUCCESS; } else { - return ATMI_STATUS_ERROR; + return HSA_STATUS_ERROR; } } template -atmi_status_t module_register_from_memory_to_place( +hsa_status_t module_register_from_memory_to_place( std::map &KernelInfoTable, std::map &SymbolInfoTable, void *module_bytes, size_t module_size, atmi_place_t place, C cb, std::vector &HSAExecutables) { - auto L = [](void *data, size_t size, void *cb_state) -> atmi_status_t { + auto L = [](void *data, size_t size, void *cb_state) -> hsa_status_t { C *unwrapped = static_cast(cb_state); return (*unwrapped)(data, size); }; @@ -1120,7 +1120,7 @@ bool in_image() { return si.sh_type != SHT_NOBITS; } - atmi_status_t before_loading(void *data, size_t size) { + hsa_status_t before_loading(void *data, size_t size) { if (valid) { if (in_image()) { DP("Setting global device environment before load (%u bytes)\n", @@ -1130,10 +1130,10 @@ memcpy(pos, &host_device_env, si.size); } } - return ATMI_STATUS_SUCCESS; + return HSA_STATUS_SUCCESS; } - atmi_status_t after_loading() { + hsa_status_t after_loading() { if (valid) { if (!in_image()) { DP("Setting global device environment after load (%u bytes)\n", @@ -1142,10 +1142,10 @@ auto &SymbolInfo = DeviceInfo.SymbolInfoTable[device_id]; void *state_ptr; uint32_t state_ptr_size; - atmi_status_t err = atmi_interop_hsa_get_symbol_info( + hsa_status_t err = atmi_interop_hsa_get_symbol_info( SymbolInfo, get_gpu_mem_place(device_id), sym(), &state_ptr, &state_ptr_size); - if (err != ATMI_STATUS_SUCCESS) { + if (err != HSA_STATUS_SUCCESS) { DP("failed to find %s in loaded image\n", sym()); return err; } @@ -1153,23 +1153,23 @@ if (state_ptr_size != si.size) { DP("Symbol had size %u before loading, %u after\n", state_ptr_size, si.size); - return ATMI_STATUS_ERROR; + return HSA_STATUS_ERROR; } return DeviceInfo.freesignalpool_memcpy_h2d(state_ptr, &host_device_env, state_ptr_size, device_id); } } - return ATMI_STATUS_SUCCESS; + return HSA_STATUS_SUCCESS; } }; -static atmi_status_t atmi_calloc(void **ret_ptr, size_t size, - atmi_mem_place_t place) { +static hsa_status_t atmi_calloc(void **ret_ptr, size_t size, + atmi_mem_place_t place) { uint64_t rounded = 4 * ((size + 3) / 4); void *ptr; - atmi_status_t err = atmi_malloc(&ptr, rounded, place); - if (err != ATMI_STATUS_SUCCESS) { + hsa_status_t err = atmi_malloc(&ptr, rounded, place); + if (err != HSA_STATUS_SUCCESS) { return err; } @@ -1177,11 +1177,11 @@ if (rc != HSA_STATUS_SUCCESS) { fprintf(stderr, "zero fill device_state failed with %u\n", rc); atmi_free(ptr); - return ATMI_STATUS_ERROR; + return HSA_STATUS_ERROR; } *ret_ptr = ptr; - return ATMI_STATUS_SUCCESS; + return HSA_STATUS_SUCCESS; } static bool image_contains_symbol(void *data, size_t size, const char *sym) { @@ -1231,7 +1231,7 @@ auto &KernelInfo = DeviceInfo.KernelInfoTable[device_id]; auto &SymbolInfo = DeviceInfo.SymbolInfoTable[device_id]; - atmi_status_t err = module_register_from_memory_to_place( + hsa_status_t err = module_register_from_memory_to_place( KernelInfo, SymbolInfo, (void *)image->ImageStart, img_size, get_gpu_place(device_id), [&](void *data, size_t size) { @@ -1244,7 +1244,7 @@ DeviceInfo.HSAExecutables); check("Module registering", err); - if (err != ATMI_STATUS_SUCCESS) { + if (err != HSA_STATUS_SUCCESS) { fprintf(stderr, "Possible gpu arch mismatch: device:%s, image:%s please check" " compiler flag: -march=\n", @@ -1254,7 +1254,7 @@ } err = env.after_loading(); - if (err != ATMI_STATUS_SUCCESS) { + if (err != HSA_STATUS_SUCCESS) { return NULL; } } @@ -1269,11 +1269,11 @@ void *state_ptr; uint32_t state_ptr_size; auto &SymbolInfoMap = DeviceInfo.SymbolInfoTable[device_id]; - atmi_status_t err = atmi_interop_hsa_get_symbol_info( + hsa_status_t err = atmi_interop_hsa_get_symbol_info( SymbolInfoMap, get_gpu_mem_place(device_id), "omptarget_nvptx_device_State", &state_ptr, &state_ptr_size); - if (err != ATMI_STATUS_SUCCESS) { + if (err != HSA_STATUS_SUCCESS) { DP("No device_state symbol found, skipping initialization\n"); } else { if (state_ptr_size < sizeof(void *)) { @@ -1297,9 +1297,9 @@ if (dss.first.get() == nullptr) { assert(dss.second == 0); void *ptr = NULL; - atmi_status_t err = atmi_calloc(&ptr, device_State_bytes, - get_gpu_mem_place(device_id)); - if (err != ATMI_STATUS_SUCCESS) { + hsa_status_t err = atmi_calloc(&ptr, device_State_bytes, + get_gpu_mem_place(device_id)); + if (err != HSA_STATUS_SUCCESS) { DP("Failed to allocate device_state array\n"); return NULL; } @@ -1318,7 +1318,7 @@ // write ptr to device memory so it can be used by later kernels err = DeviceInfo.freesignalpool_memcpy_h2d(state_ptr, &ptr, sizeof(void *), device_id); - if (err != ATMI_STATUS_SUCCESS) { + if (err != HSA_STATUS_SUCCESS) { DP("memcpy install of state_ptr failed\n"); return NULL; } @@ -1354,11 +1354,11 @@ uint32_t varsize; auto &SymbolInfoMap = DeviceInfo.SymbolInfoTable[device_id]; - atmi_status_t err = atmi_interop_hsa_get_symbol_info( + hsa_status_t err = atmi_interop_hsa_get_symbol_info( SymbolInfoMap, get_gpu_mem_place(device_id), e->name, &varptr, &varsize); - if (err != ATMI_STATUS_SUCCESS) { + if (err != HSA_STATUS_SUCCESS) { // Inform the user what symbol prevented offloading DP("Loading global '%s' (Failed)\n", e->name); return NULL; @@ -1383,7 +1383,7 @@ // need for device copies. err = DeviceInfo.freesignalpool_memcpy_h2d(varptr, e->addr, sizeof(void *), device_id); - if (err != ATMI_STATUS_SUCCESS) + if (err != HSA_STATUS_SUCCESS) DP("Error when copying USM\n"); DP("Copy linked variable host address (" DPxMOD ")" "to device address (" DPxMOD ")\n", @@ -1398,7 +1398,7 @@ atmi_mem_place_t place = get_gpu_mem_place(device_id); uint32_t kernarg_segment_size; auto &KernelInfoMap = DeviceInfo.KernelInfoTable[device_id]; - atmi_status_t err = atmi_interop_hsa_get_kernel_info( + hsa_status_t err = atmi_interop_hsa_get_kernel_info( KernelInfoMap, place, e->name, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, &kernarg_segment_size); @@ -1436,7 +1436,7 @@ err = interop_get_symbol_info((char *)image->ImageStart, img_size, KernDescName, &KernDescPtr, &KernDescSize); - if (err == ATMI_STATUS_SUCCESS) { + if (err == HSA_STATUS_SUCCESS) { if ((size_t)KernDescSize != sizeof(KernDescVal)) DP("Loading global computation properties '%s' - size mismatch (%u != " "%lu)\n", @@ -1478,7 +1478,7 @@ err = interop_get_symbol_info((char *)image->ImageStart, img_size, ExecModeName, &ExecModePtr, &varsize); - if (err == ATMI_STATUS_SUCCESS) { + if (err == HSA_STATUS_SUCCESS) { if ((size_t)varsize != sizeof(int8_t)) { DP("Loading global computation properties '%s' - size mismatch(%u != " "%lu)\n", @@ -1515,7 +1515,7 @@ err = interop_get_symbol_info((char *)image->ImageStart, img_size, WGSizeName, &WGSizePtr, &WGSize); - if (err == ATMI_STATUS_SUCCESS) { + if (err == HSA_STATUS_SUCCESS) { if ((size_t)WGSize != sizeof(int16_t)) { DP("Loading global computation properties '%s' - size mismatch (%u " "!= " @@ -1566,10 +1566,10 @@ return NULL; } - atmi_status_t err = atmi_malloc(&ptr, size, get_gpu_mem_place(device_id)); + hsa_status_t err = atmi_malloc(&ptr, size, get_gpu_mem_place(device_id)); DP("Tgt alloc data %ld bytes, (tgt:%016llx).\n", size, (long long unsigned)(Elf64_Addr)ptr); - ptr = (err == ATMI_STATUS_SUCCESS) ? ptr : NULL; + ptr = (err == HSA_STATUS_SUCCESS) ? ptr : NULL; return ptr; } @@ -1617,10 +1617,10 @@ int32_t __tgt_rtl_data_delete(int device_id, void *tgt_ptr) { assert(device_id < DeviceInfo.NumberOfDevices && "Device ID too large"); - atmi_status_t err; + hsa_status_t err; DP("Tgt free data (tgt:%016llx).\n", (long long unsigned)(Elf64_Addr)tgt_ptr); err = atmi_free(tgt_ptr); - if (err != ATMI_STATUS_SUCCESS) { + if (err != HSA_STATUS_SUCCESS) { DP("Error when freeing CUDA memory\n"); return OFFLOAD_FAIL; }