diff --git a/openmp/libomptarget/plugins/amdgpu/impl/internal.h b/openmp/libomptarget/plugins/amdgpu/impl/internal.h --- a/openmp/libomptarget/plugins/amdgpu/impl/internal.h +++ b/openmp/libomptarget/plugins/amdgpu/impl/internal.h @@ -97,6 +97,10 @@ uint64_t kernel_object; uint32_t group_segment_size; uint32_t private_segment_size; + uint32_t sgpr_count; + uint32_t vgpr_count; + uint32_t sgpr_spill_count; + uint32_t vgpr_spill_count; uint32_t kernel_segment_size; uint32_t num_args; std::vector arg_alignments; diff --git a/openmp/libomptarget/plugins/amdgpu/impl/system.cpp b/openmp/libomptarget/plugins/amdgpu/impl/system.cpp --- a/openmp/libomptarget/plugins/amdgpu/impl/system.cpp +++ b/openmp/libomptarget/plugins/amdgpu/impl/system.cpp @@ -832,7 +832,31 @@ msgpack_errors += map_lookup_string(element, ".symbol", &symbolName); msgpackErrorCheck(strings lookup in kernel metadata, msgpack_errors); - atl_kernel_info_t info = {0, 0, 0, 0, 0, {}, {}, {}}; + atl_kernel_info_t info = {0, 0, 0, 0, 0, 0, 0, 0, 0, {}, {}, {}}; + + uint64_t sgpr_count, vgpr_count, sgpr_spill_count, vgpr_spill_count; + msgpack_errors += map_lookup_uint64_t(element, ".sgpr_count", &sgpr_count); + msgpackErrorCheck(sgpr count metadata lookup in kernel metadata, + msgpack_errors); + info.sgpr_count = sgpr_count; + + msgpack_errors += map_lookup_uint64_t(element, ".vgpr_count", &vgpr_count); + msgpackErrorCheck(vgpr count metadata lookup in kernel metadata, + msgpack_errors); + info.vgpr_count = vgpr_count; + + msgpack_errors += + map_lookup_uint64_t(element, ".sgpr_spill_count", &sgpr_spill_count); + msgpackErrorCheck(sgpr spill count metadata lookup in kernel metadata, + msgpack_errors); + info.sgpr_spill_count = sgpr_spill_count; + + msgpack_errors += + map_lookup_uint64_t(element, ".vgpr_spill_count", &vgpr_spill_count); + msgpackErrorCheck(vgpr spill count metadata lookup in kernel metadata, + msgpack_errors); + info.vgpr_spill_count = vgpr_spill_count; + size_t kernel_explicit_args_size = 0; uint64_t kernel_segment_size; msgpack_errors += map_lookup_uint64_t(element, ".kernarg_segment_size", diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -1759,6 +1759,19 @@ KernelTy *KernelInfo = (KernelTy *)tgt_entry_ptr; + std::string kernel_name = std::string(KernelInfo->Name); + uint32_t sgpr_count, vgpr_count, sgpr_spill_count, vgpr_spill_count; + + { + assert(KernelInfoTable[device_id].find(kernel_name) != + KernelInfoTable[device_id].end()); + auto it = KernelInfoTable[device_id][kernel_name]; + sgpr_count = it.sgpr_count; + vgpr_count = it.vgpr_count; + sgpr_spill_count = it.sgpr_spill_count; + vgpr_spill_count = it.vgpr_spill_count; + } + /* * Set limit based on ThreadsPerGroup and GroupsPerDevice */ @@ -1780,10 +1793,12 @@ bool traceToStdout = print_kernel_trace & (RTL_TO_STDOUT | RTL_TIMING); fprintf(traceToStdout ? stdout : stderr, "DEVID:%2d SGN:%1d ConstWGSize:%-4d args:%2d teamsXthrds:(%4dX%4d) " - "reqd:(%4dX%4d) n:%s\n", + "reqd:(%4dX%4d) sgpr_count:%u vgpr_count:%u sgpr_spill_count:%u " + "vgpr_spill_count:%u tripcount:%lu n:%s\n", device_id, KernelInfo->ExecutionMode, KernelInfo->ConstWGSize, arg_num, num_groups, threadsPerGroup, num_teams, thread_limit, - KernelInfo->Name); + sgpr_count, vgpr_count, sgpr_spill_count, vgpr_spill_count, + loop_tripcount, KernelInfo->Name); } // Run on the device. @@ -1812,7 +1827,6 @@ packet->reserved2 = 0; // atmi writes id_ here packet->completion_signal = {0}; // may want a pool of signals - std::string kernel_name = std::string(KernelInfo->Name); { assert(KernelInfoTable[device_id].find(kernel_name) != KernelInfoTable[device_id].end());