diff --git a/openmp/libomptarget/plugins/amdgpu/impl/internal.h b/openmp/libomptarget/plugins/amdgpu/impl/internal.h --- a/openmp/libomptarget/plugins/amdgpu/impl/internal.h +++ b/openmp/libomptarget/plugins/amdgpu/impl/internal.h @@ -38,13 +38,11 @@ unsigned long offset_y; unsigned long offset_z; unsigned long hostcall_ptr; - char num_gpu_queues; - unsigned long gpu_queue_ptr; - char num_cpu_queues; - unsigned long cpu_worker_signals; - unsigned long cpu_queue_ptr; - unsigned long kernarg_template_ptr; + unsigned long unused0; + unsigned long unused1; + unsigned long unused2; } impl_implicit_args_t; +static_assert(sizeof(impl_implicit_args_t) == 56, ""); // ---------------------- Kernel Start ------------- typedef struct atl_kernel_info_s { @@ -57,9 +55,6 @@ uint32_t vgpr_spill_count; uint32_t kernel_segment_size; uint32_t num_args; - std::vector arg_alignments; - std::vector arg_offsets; - std::vector arg_sizes; } atl_kernel_info_t; typedef struct atl_symbol_info_s { diff --git a/openmp/libomptarget/plugins/amdgpu/impl/system.cpp b/openmp/libomptarget/plugins/amdgpu/impl/system.cpp --- a/openmp/libomptarget/plugins/amdgpu/impl/system.cpp +++ b/openmp/libomptarget/plugins/amdgpu/impl/system.cpp @@ -62,26 +62,16 @@ }; KernelArgMD() - : name_(std::string()), typeName_(std::string()), size_(0), offset_(0), - align_(0), valueKind_(ValueKind::Unknown) {} + : name_(std::string()), size_(0), offset_(0), + valueKind_(ValueKind::Unknown) {} // fields std::string name_; - std::string typeName_; uint32_t size_; uint32_t offset_; - uint32_t align_; ValueKind valueKind_; }; -class KernelMD { -public: - KernelMD() : kernargSegmentSize_(0ull) {} - - // fields - uint64_t kernargSegmentSize_; -}; - static const std::map ArgValueKind = { // v3 // {"by_value", KernelArgMD::ValueKind::ByValue}, @@ -320,10 +310,6 @@ foronly_string(value, [&](size_t N, const unsigned char *str) { kernelarg->name_ = std::string(str, str + N); }); - } else if (message_is_string(key, ".type_name")) { - foronly_string(value, [&](size_t N, const unsigned char *str) { - kernelarg->typeName_ = std::string(str, str + N); - }); } else if (message_is_string(key, ".size")) { foronly_unsigned(value, [&](uint64_t x) { kernelarg->size_ = x; }); } else if (message_is_string(key, ".offset")) { @@ -395,7 +381,7 @@ return HSA_STATUS_ERROR_INVALID_CODE_OBJECT; } - atl_kernel_info_t info = {0, 0, 0, 0, 0, 0, 0, 0, 0, {}, {}, {}}; + atl_kernel_info_t info = {0, 0, 0, 0, 0, 0, 0, 0, 0}; uint64_t sgpr_count, vgpr_count, sgpr_spill_count, vgpr_spill_count; msgpack_errors += map_lookup_uint64_t(element, ".sgpr_count", &sgpr_count); @@ -479,13 +465,10 @@ "iterate args map in kernel args metadata"); return HSA_STATUS_ERROR_INVALID_CODE_OBJECT; } - // populate info with sizes and offsets - info.arg_sizes.push_back(lcArg.size_); // v3 has offset field and not align field size_t new_offset = lcArg.offset_; size_t padding = new_offset - offset; offset = new_offset; - info.arg_offsets.push_back(lcArg.offset_); DP("Arg[%lu] \"%s\" (%u, %u)\n", i, lcArg.name_.c_str(), lcArg.size_, lcArg.offset_); offset += lcArg.size_; @@ -501,12 +484,9 @@ } } - // add size of implicit args, e.g.: offset x, y and z and pipe pointer, but - // do not count the compiler set implicit args, but set your own implicit - // args by discounting the compiler set implicit args + // TODO: Probably don't want this arithmetic info.kernel_segment_size = - (hasHiddenArgs ? kernel_explicit_args_size : kernel_segment_size) + - sizeof(impl_implicit_args_t); + (hasHiddenArgs ? kernel_explicit_args_size : kernel_segment_size); DP("[%s: kernarg seg size] (%lu --> %u)\n", kernelName.c_str(), kernel_segment_size, info.kernel_segment_size); diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -1631,22 +1631,12 @@ auto It = KernelInfoMap.find(kernelStr); if (It != KernelInfoMap.end()) { atl_kernel_info_t info = It->second; - // return the size for non-implicit args - kernarg_segment_size = - info.kernel_segment_size - sizeof(impl_implicit_args_t); + kernarg_segment_size = info.kernel_segment_size; } else { err = HSA_STATUS_ERROR; } } - // each arg is a void * in this openmp implementation - uint32_t arg_num = kernarg_segment_size / sizeof(void *); - std::vector arg_sizes(arg_num); - for (std::vector::iterator it = arg_sizes.begin(); - it != arg_sizes.end(); it++) { - *it = sizeof(void *); - } - // default value GENERIC (in case symbol is missing from cubin file) llvm::omp::OMPTgtExecModeFlags ExecModeVal = llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_GENERIC;