diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp --- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp @@ -2616,31 +2616,39 @@ // General Info auto ConstWGSize = getDefaultNumThreads(GenericDevice); auto NumGroups = NumBlocks; - auto ThreadsPerGroup = getDefaultNumThreads(GenericDevice); - auto NumTeams = KernelArgs.NumTeams[0]; // Only first dimension - auto ThreadLimit = KernelArgs.ThreadLimit[0]; // Only first dimension + auto ThreadsPerGroup = NumThreads; // Kernel Arguments Info auto ArgNum = KernelArgs.NumArgs; auto LoopTripCount = KernelArgs.Tripcount; + auto ThreadLimitX = KernelArgs.ThreadLimit[0]; - // Details for AMDGPU kernels + // Details for AMDGPU kernels (read from image) + // https://www.llvm.org/docs/AMDGPUUsage.html#code-object-v4-metadata auto GroupSegmentSize = (*KernelInfo).GroupSegmentList; auto SGPRCount = (*KernelInfo).SGPRCount; auto VGPRCount = (*KernelInfo).VGPRCount; auto SGPRSpillCount = (*KernelInfo).SGPRSpillCount; auto VGPRSpillCount = (*KernelInfo).VGPRSpillCount; - - // TODO set correctly once host services available - auto HostCallRequired = false; + auto MaxFlatWorkgroupSize = (*KernelInfo).MaxFlatWorkgroupSize; + + // Prints additional launch info that contains the following. + // Num Args: The number of kernel arguments + // Teams x Thrds: The number of teams and the number of threads actually + // running. + // MaxFlatWorkgroupSize: Maximum flat work-group size supported by the + // kernel in work-items + // LDS Usage: Amount of bytes used in LDS storage + // S/VGPR Count: the number of S/V GPRs occupied by the kernel + // S/VGPR Spill Count: how many S/VGPRs are spilled by the kernel + // Tripcount: loop tripcount for the kernel INFO(OMP_INFOTYPE_PLUGIN_KERNEL, GenericDevice.getDeviceId(), - "SGN:%s ConstWGSize:%d args:%d teamsXthrds:(%4luX%4d) " - "reqd:(%4dX%4d) lds_usage:%uB sgpr_count:%u vgpr_count:%u " - "sgpr_spill_count:%u vgpr_spill_count:%u tripcount:%lu rpc:%d n:%s\n", - getExecutionModeName(), ConstWGSize, ArgNum, NumGroups, ThreadsPerGroup, - NumTeams, ThreadLimit, GroupSegmentSize, SGPRCount, VGPRCount, - SGPRSpillCount, VGPRSpillCount, LoopTripCount, HostCallRequired, - getName()); + "Num Args: %d Teams x Thrds: %4lux%4u (MaxFlatWorkGroupSize: %u) LDS " + "Usage: %uB SGPR Count: %u VGPR Count:%u " + "SGPR Spill Count: %u VGPR Spill Count: %u Tripcount: %lu\n", + ArgNum, NumGroups, ThreadsPerGroup, MaxFlatWorkgroupSize, + GroupSegmentSize, SGPRCount, VGPRCount, SGPRSpillCount, VGPRSpillCount, + LoopTripCount); return Plugin::success(); } diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h b/openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h --- a/openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h +++ b/openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h @@ -146,6 +146,10 @@ uint32_t KernelSegmentSize; uint32_t ExplicitArgumentCount; uint32_t ImplicitArgumentCount; + uint32_t RequestedWorkgroupSize[3]; + uint32_t WorkgroupSizeHint[3]; + uint32_t WavefronSize; + uint32_t MaxFlatWorkgroupSize; }; namespace { @@ -194,6 +198,18 @@ return DK.getString() == SK; }; + const auto getSequenceOfThreeInts = [](msgpack::DocNode &DN, + uint32_t *Vals) { + assert(DN.isArray()); + auto DNA = DN.getArray(); + + int i = 0; + for (auto DNABegin = DNA.begin(), DNAEnd = DNA.end(); DNABegin != DNAEnd; + ++DNABegin) { + Vals[i++] = DNABegin->getUInt(); + } + }; + if (isKey(V.first, ".name")) { KernelName = V.second.toString(); } else if (isKey(V.first, ".sgpr_count")) { @@ -208,6 +224,14 @@ KernelData.PrivateSegmentSize = V.second.getUInt(); } else if (isKey(V.first, ".group_segement_fixed_size")) { KernelData.GroupSegmentList = V.second.getUInt(); + } else if (isKey(V.first, ".reqd_workgroup_size")) { + getSequenceOfThreeInts(V.second, KernelData.RequestedWorkgroupSize); + } else if (isKey(V.first, ".workgroup_size_hint")) { + getSequenceOfThreeInts(V.second, KernelData.WorkgroupSizeHint); + } else if (isKey(V.first, ".wavefront_size")) { + KernelData.WavefronSize = V.second.getUInt(); + } else if (isKey(V.first, ".max_flat_workgroup_size")) { + KernelData.MaxFlatWorkgroupSize = V.second.getUInt(); } return Error::success(); @@ -295,6 +319,7 @@ return Error::success(); } + } // namespace utils } // namespace plugin } // namespace target