diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp --- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp @@ -161,6 +161,12 @@ BCLibs.push_back(Args.MakeArgString(Lib)); } + if (Args.hasFlag(options::OPT_fgpu_sanitize, + options::OPT_fno_gpu_sanitize,false)) { + BCLibs.push_back("asanrtl.bc"); + } + + // Add libm for Fortran. if (C.getDriver().IsFlangMode()) { BCLibs.push_back(Args.MakeArgString("libm-amdgcn-" + SubArchName + ".bc")); @@ -192,12 +198,7 @@ std::string(WaveFrontSizeBC)}); } - if (Args.hasFlag(options::OPT_fgpu_sanitize, - options::OPT_fno_gpu_sanitize, false)) { - BCLibs.push_back("asanrtl.bc"); - } - - for (auto Lib : BCLibs) + for (auto Lib : BCLibs) addBCLib(C.getDriver(), Args, CmdArgs, LibraryPaths, Lib, /* PostClang Link? */ false); diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -1715,13 +1715,15 @@ // Instrument generic addresses in supported addressspaces. IRBuilder<> IRB(InsertBefore); Value *AddrLong = IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()); + + Value *IsShared = IRB.CreateCall(AMDGPUAddressShared, {AddrLong}); Value *IsPrivate = IRB.CreateCall(AMDGPUAddressPrivate, {AddrLong}); Value *IsSharedOrPrivate = IRB.CreateOr(IsShared, IsPrivate); Value *Cmp = IRB.CreateICmpNE(IRB.getTrue(), IsSharedOrPrivate); - Value *AddrSpaceZeroLanding = - SplitBlockAndInsertIfThen(Cmp, InsertBefore, false); + Value *AddrSpaceZeroLanding = SplitBlockAndInsertIfThen(Cmp, InsertBefore, false); InsertBefore = cast(AddrSpaceZeroLanding); + return InsertBefore; } diff --git a/openmp/libomptarget/hostrpc/services/CMakeLists.txt b/openmp/libomptarget/hostrpc/services/CMakeLists.txt --- a/openmp/libomptarget/hostrpc/services/CMakeLists.txt +++ b/openmp/libomptarget/hostrpc/services/CMakeLists.txt @@ -1,7 +1,8 @@ if (OPENMP_ENABLE_LIBOMPTARGET_HSA) - add_library(hostrpc_services STATIC hostcall.cpp hostrpc_execute_service.c hostrpc_externs.c) + add_library(hostrpc_services STATIC hostcall.cpp hostrpc_execute_service.c hostrpc_externs.c devsanitizer.cpp) set_property(TARGET hostrpc_services PROPERTY POSITION_INDEPENDENT_CODE ON) find_path(HSA_INCLUDE hsa.h HINTS ${CMAKE_INSTALL_PREFIX}/include/hsa ${CMAKE_INSTALL_PREFIX}/../include/hsa /opt/rocm/include/hsa PATH_SUFFIXES hsa) include_directories(${HSA_INCLUDE}) + target_link_libraries(hostrpc_services /home/ampandey/git/aomp/lib/clang/14.0.0/lib/linux/libclang_rt.asan-x86_64.so) endif() diff --git a/openmp/libomptarget/hostrpc/services/hostcall.cpp b/openmp/libomptarget/hostrpc/services/hostcall.cpp --- a/openmp/libomptarget/hostrpc/services/hostcall.cpp +++ b/openmp/libomptarget/hostrpc/services/hostcall.cpp @@ -1,3 +1,4 @@ +#include "../src/hostrpc.h" #include "hostrpc_internal.h" #include "hsa.h" #include "../plugins/amdgpu/impl/rt.h" @@ -143,16 +144,60 @@ } extern "C" void hostrpc_execute_service(uint32_t service, uint32_t device_id, - uint64_t *payload); + uint64_t *payload, uint64_t *activemask); + +static hostrpc_status_t hostrpc_version_check(unsigned int device_vrm) { + + if (device_vrm == (unsigned int)HOSTRPC_VRM) + return HOSTRPC_SUCCESS; + + uint device_version_release = device_vrm >> 6; + if (device_version_release != HOSTRPC_VERSION_RELEASE) { + printf("ERROR Incompatible device and host version \n Device " + "release(%d)\n Host Version(%d)\n", + device_version_release,HOSTRPC_VERSION_RELEASE); + return HOSTRPC_WRONGVERSION_ERROR; + } + if( device_vrm > HOSTRPC_VRM) { + printf("ERROR Incompatible device and host version \n Device" + "version(%d)\n Host version(%d)\n", + device_vrm,HOSTRPC_VERSION_RELEASE); + printf("Upgrade libomptarget runtime on your system. \n"); + return HOSTRPC_OLDHOSTVERSIONMOD_ERROR; + } + if( device_vrm < HOSTRPC_VRM) { + unsigned int host_ver = ((unsigned int)HOSTRPC_VRM) >> 12; + unsigned int host_rel = (((unsigned int)HOSTRPC_VRM) << 20) >> 26; + unsigned int host_mod = (((unsigned int)HOSTRPC_VRM) << 26) >> 26; + unsigned int dev_ver = ((unsigned int)device_vrm) >> 12; + unsigned int dev_rel = (((unsigned int)device_vrm) << 20) >> 26; + unsigned int dev_mod = (((unsigned int)device_vrm) << 26) >> 26; + printf("WARNING: Device mod version < host mod version \n Device" + "version: %d.%d.%d\n Host version: %d.%d.%d\n", + dev_ver,dev_rel,dev_mod,host_ver,host_rel,host_mod); + printf("Consider rebuild binary with more recent compiler.\n"); + return HOSTRPC_SUCCESS; + } +} + +static void hostrpc_abort(int rc) { + printf("hostrpc_abort called with code %d\n",rc); + abort(); +} + +extern "C" void handleSanitizerService(payload_t* payload,uint64_t activemask,uint32_t* dev_ptr); void amd_hostcall_consumer_t::process_packets(buffer_t *buffer, uint64_t ready_stack) const { - // This function is always called from consume_packets, which owns + + bool hostrpc_version_checked; + // This function is always called from consume_packets, which owns // the lock for the critical data. WHEN_DEBUG(std::cout << "process packets starting with " << ready_stack << std::endl); + // Each wave can submit at most one packet at a time, and all // waves independently push ready packets. The stack of packets at // this point cannot contain multiple packets from the same wave, @@ -177,14 +222,33 @@ WHEN_DEBUG(std::cout << "activemask: " << std::hex << activemask << std::endl); + // split the 32-bit service number into service_id and VRM to be checked + // if device hostrpc or stubs are ahead of this host runtime. + uint service = header->service; + uint service_id = (service << 16) >> 16; + if (!hostrpc_version_checked) { + uint device_vrm = ((uint)service >> 16); + hostrpc_status_t err = hostrpc_version_check(device_vrm); + if (err != HOSTRPC_SUCCESS) + hostrpc_abort(err); + hostrpc_version_checked = true; + } + if(service_id == HOSTRPC_SERVICE_SANITIZER) { + handleSanitizerService(payload,activemask,&(buffer->device_id)); + //activemask zeroed to avoid subsequent handling for each work-item. + activemask = 0; + } + // TODO: One could use ffs to skip inactive lanes faster. + if(activemask) { for (uint32_t wi = 0; wi != 64; ++wi) { uint64_t flag = activemask & ((uint64_t)1 << wi); if (flag == 0) continue; uint64_t *slot = payload->slots[wi]; - hostrpc_execute_service(header->service, buffer->device_id, slot); + hostrpc_execute_service(service_id, (buffer->device_id), slot,&activemask); } + } __atomic_store_n(&header->control, reset_ready_flag(header->control), std::memory_order_release); } @@ -398,7 +462,7 @@ hsa_status_t device_malloc(void **mem, size_t size, int device_id) { return core::Runtime::DeviceMalloc(mem, size, device_id); } - + hsa_status_t atmi_free(void *mem) { return core::Runtime::Memfree(mem); } diff --git a/openmp/libomptarget/hostrpc/services/hostrpc_execute_service.c b/openmp/libomptarget/hostrpc/services/hostrpc_execute_service.c --- a/openmp/libomptarget/hostrpc/services/hostrpc_execute_service.c +++ b/openmp/libomptarget/hostrpc/services/hostrpc_execute_service.c @@ -40,6 +40,7 @@ #include #include +#if 0 // Error codes for service handler functions used in this file // Some error codes may be returned to device stub functions. typedef enum hostrpc_status_t { @@ -59,6 +60,7 @@ HOSTRPC_OLDHOSTVERSIONMOD_ERROR = 13, HOSTRPC_INVALIDSERVICE_ERROR = 14, } hostrpc_status_t; +#endif // MAXVARGS is more than a static array size. // It is for user vargs functions only. @@ -263,6 +265,7 @@ payload[1] = (uint64_t)num_zeros; } +#if 0 // FIXME: Clean up this diagnostic and die properly static bool hostrpc_version_checked; static hostrpc_status_t hostrpc_version_check(unsigned int device_vrm) { @@ -296,6 +299,7 @@ } return HOSTRPC_SUCCESS; } +#endif static void hostrpc_abort(int rc) { printf("hostrpc_abort called with code %d\n", rc); @@ -305,12 +309,13 @@ // The architecture-specific implementation of hostrpc will // call this single external function for each service request. // Host service functions are architecturally independent. -extern void hostrpc_execute_service(uint32_t service, uint32_t device_id, - uint64_t *payload) { - +extern void hostrpc_execute_service(uint32_t service_id, uint32_t *dev_ptr_, + uint64_t *payload, uint64_t *activemask) { +#if 0 // split the 32-bit service number into service_id and VRM to be checked // if device hostrpc or stubs are ahead of this host runtime. uint service_id = (service << 16) >> 16; + uint device_id = *dev_ptr_; if (!hostrpc_version_checked) { uint device_vrm = ((uint)service >> 16); hostrpc_status_t err = hostrpc_version_check(device_vrm); @@ -318,8 +323,11 @@ hostrpc_abort(err); hostrpc_version_checked = true; } +#endif + + uint device_id = *dev_ptr_; - switch (service_id) { + switch (service_id) { case HOSTRPC_SERVICE_PRINTF: hostrpc_handler_SERVICE_PRINTF(device_id, payload); break; @@ -350,12 +358,18 @@ case HOSTRPC_SERVICE_FUNCTIONCALL: hostrpc_handler_SERVICE_FUNCTIONCALL(device_id, payload); break; + case HOSTRPC_SERVICE_DEVMEM: + fprintf(stderr, "NO HOSTRPC_SERVICE_DEVMEM handler \n"); + break; + case HOSTRPC_SERVICE_SANITIZER: + abort(); + break; case HOSTRPC_SERVICE_DEMO: hostrpc_handler_SERVICE_DEMO(device_id, payload); break; default: hostrpc_abort(HOSTRPC_INVALIDSERVICE_ERROR); - printf("ERROR: hostrpc got a bad service id:%d\n", service); + printf("ERROR: hostrpc got a bad service id:%d\n", service_id); } } diff --git a/openmp/libomptarget/hostrpc/services/hostrpc_internal.h b/openmp/libomptarget/hostrpc/services/hostrpc_internal.h --- a/openmp/libomptarget/hostrpc/services/hostrpc_internal.h +++ b/openmp/libomptarget/hostrpc/services/hostrpc_internal.h @@ -9,6 +9,25 @@ extern "C" { #endif +// Error codes for hostrpc functions. +typedef enum hostrpc_status_t { + HOSTRPC_SUCCESS = 0, + HOSTRPC_STATUS_UNKNOWN = 1, + HOSTRPC_STATUS_ERROR = 2, + HOSTRPC_STATUS_TERMINATE = 3, + HOSTRPC_DATA_USED_ERROR = 4, + HOSTRPC_ADDINT_ERROR = 5, + HOSTRPC_ADDFLOAT_ERROR = 6, + HOSTRPC_ADDSTRING_ERROR = 7, + HOSTRPC_UNSUPPORTED_ID_ERROR = 8, + HOSTRPC_INVALID_ID_ERROR = 9, + HOSTRPC_ERROR_INVALID_REQUEST = 10, + HOSTRPC_EXCEED_MAXVARGS_ERROR = 11, + HOSTRPC_WRONGVERSION_ERROR = 12, + HOSTRPC_OLDHOSTVERSIONMOD_ERROR = 13, + HOSTRPC_INVALIDSERVICE_ERROR = 14, +} hostrpc_status_t; + /** \file Support library for invoking host services from the device. * * The hostcall consumer defined here is used by the language runtime diff --git a/openmp/libomptarget/hostrpc/src/hostrpc.h b/openmp/libomptarget/hostrpc/src/hostrpc.h --- a/openmp/libomptarget/hostrpc/src/hostrpc.h +++ b/openmp/libomptarget/hostrpc/src/hostrpc.h @@ -111,6 +111,8 @@ HOSTRPC_SERVICE_VARFNDOUBLE, HOSTRPC_SERVICE_FPRINTF, HOSTRPC_SERVICE_FTNASSIGN, + HOSTRPC_SERVICE_DEVMEM, + HOSTRPC_SERVICE_SANITIZER, }; typedef enum hostcall_service_id hostcall_service_id_t; diff --git a/openmp/libomptarget/hostrpc/src/hostrpc.cpp b/openmp/libomptarget/hostrpc/src/hostrpc.cpp --- a/openmp/libomptarget/hostrpc/src/hostrpc.cpp +++ b/openmp/libomptarget/hostrpc/src/hostrpc.cpp @@ -156,6 +156,22 @@ return unionarg.dval; } +EXTERN void __ockl_sanitizer_report(uint64_t addr, uint64_t pc, uint64_t wgidx, + uint64_t wgidy, uint64_t wgidz, + uint64_t wave_id, uint64_t is_read, + uint64_t access_size) { + hostrpc_result_t value = + hostrpc_invoke(PACK_VERS(HOSTRPC_SERVICE_SANITIZER), addr, pc, wgidx, + wgidy, wgidz, wave_id, is_read, access_size); + return (void)value.arg0; +} + +EXTERN uint64_t __hostrpc_devmem_request(uint64_t addr, uint64_t size) { + hostrpc_result_t result = hostrpc_invoke(PACK_VERS(HOSTRPC_SERVICE_DEVMEM), + addr, size, 0, 0, 0, 0, 0, 0); + return (uint64_t)result.arg0; +} + // ----------------------------------------------------------------------------- // // vector_product_zeros: Example stub to demonstrate hostrpc services diff --git a/openmp/libomptarget/src/rtl.cpp b/openmp/libomptarget/src/rtl.cpp --- a/openmp/libomptarget/src/rtl.cpp +++ b/openmp/libomptarget/src/rtl.cpp @@ -306,6 +306,7 @@ Device.HasPendingGlobals = true; for (__tgt_offload_entry *entry = img->EntriesBegin; entry != img->EntriesEnd; ++entry) { + entry.size=32; if (entry->flags & OMP_DECLARE_TARGET_CTOR) { DP("Adding ctor " DPxMOD " to the pending list.\n", DPxPTR(entry->addr));