diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp --- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp @@ -1788,10 +1788,12 @@ return OFFLOAD_FAIL; } + uint32_t group_segment_size; uint32_t sgpr_count, vgpr_count, sgpr_spill_count, vgpr_spill_count; { auto it = KernelInfoTable[device_id][kernel_name]; + group_segment_size = it.group_segment_size; sgpr_count = it.sgpr_count; vgpr_count = it.vgpr_count; sgpr_spill_count = it.sgpr_spill_count; @@ -1819,12 +1821,12 @@ bool traceToStdout = print_kernel_trace & (RTL_TO_STDOUT | RTL_TIMING); fprintf(traceToStdout ? stdout : stderr, "DEVID:%2d SGN:%1d ConstWGSize:%-4d args:%2d teamsXthrds:(%4dX%4d) " - "reqd:(%4dX%4d) sgpr_count:%u vgpr_count:%u sgpr_spill_count:%u " - "vgpr_spill_count:%u tripcount:%lu n:%s\n", + "reqd:(%4dX%4d) lds_usage:%uB sgpr_count:%u vgpr_count:%u " + "sgpr_spill_count:%u vgpr_spill_count:%u tripcount:%lu n:%s\n", device_id, KernelInfo->ExecutionMode, KernelInfo->ConstWGSize, arg_num, num_groups, threadsPerGroup, num_teams, thread_limit, - sgpr_count, vgpr_count, sgpr_spill_count, vgpr_spill_count, - loop_tripcount, KernelInfo->Name); + group_segment_size, sgpr_count, vgpr_count, sgpr_spill_count, + vgpr_spill_count, loop_tripcount, KernelInfo->Name); } // Run on the device.