diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp --- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp @@ -373,10 +373,20 @@ Expected findDeviceSymbol(GenericDeviceTy &Device, StringRef SymbolName) const; + /// Get additional info for kernel, e.g., register spill counts + Expected getKernelInfo(StringRef Identifier) const { + if (KernelInfoMap.find(Identifier) == KernelInfoMap.end()) + return createStringError(inconvertibleErrorCode(), + "No kernel info entry for identifier"); + + return KernelInfoMap.at(Identifier); + } + private: /// The exectuable loaded on the agent. hsa_executable_t Executable; hsa_code_object_t CodeObject; + StringMap KernelInfoMap; }; /// Class implementing the AMDGPU kernel functionalities which derives from the @@ -426,6 +436,13 @@ // TODO: Read the kernel descriptor for the max threads per block. May be // read from the image. + // Get additional kernel info read from image + auto KernelInfoOrErr = AMDImage.getKernelInfo(getName()); + if (auto Err = KernelInfoOrErr.takeError()) + return Err; + + KernelInfo = *KernelInfoOrErr; + return Plugin::success(); } @@ -434,6 +451,11 @@ uint64_t NumBlocks, KernelArgsTy &KernelArgs, void *Args, AsyncInfoWrapperTy &AsyncInfoWrapper) const override; + /// Print more elaborate kernel launch info for AMDGPU + Error printLaunchInfo(GenericDeviceTy &GenericDevice, + KernelArgsTy &KernelArgs, uint32_t NumThreads, + uint64_t NumBlocks) const override; + /// The default number of blocks is common to the whole device. uint32_t getDefaultNumBlocks(GenericDeviceTy &GenericDevice) const override { return GenericDevice.getDefaultNumBlocks(); @@ -462,6 +484,9 @@ /// The size of implicit kernel arguments. const uint32_t ImplicitArgsSize; + + /// Additional Info for the AMD GPU Kernel + utils::KernelMetaDataTy KernelInfo; }; /// Class representing an HSA signal. Signals are used to define dependencies @@ -2200,6 +2225,10 @@ if (Result) return Plugin::error("Loaded HSA executable does not validate"); + if (auto Err = + utils::readAMDGPUMetaDataFromImage(getMemoryBuffer(), KernelInfoMap)) + return Err; + return Plugin::success(); } @@ -2571,6 +2600,44 @@ GroupSize, ArgsMemoryManager); } +Error AMDGPUKernelTy::printLaunchInfo(GenericDeviceTy &GenericDevice, + KernelArgsTy &KernelArgs, + uint32_t NumThreads, + uint64_t NumBlocks) const { + // Only do all this when the output is requested + if (getInfoLevel() & OMP_INFOTYPE_PLUGIN_KERNEL) { + // General Info + auto DeviceId = GenericDevice.getDeviceId(); + auto ConstWGSize = getDefaultNumThreads(GenericDevice); + auto NumGroups = NumBlocks; + auto ThreadsPerGroup = getDefaultNumThreads(GenericDevice); + auto NumTeams = KernelArgs.NumTeams[0]; // Only first dimension + auto ThreadLimit = KernelArgs.ThreadLimit[0]; // Only first dimension + + // Kernel Arguments Info + auto ArgNum = KernelArgs.NumArgs; + auto LoopTripCount = KernelArgs.Tripcount; + + // Custom KernelType info from AMDGPU reader + auto GroupSegmentSize = KernelInfo.group_segment_size; + auto SgprCount = KernelInfo.sgpr_count; + auto VgprCount = KernelInfo.vgpr_count; + auto SgprSpillCount = KernelInfo.sgpr_spill_count; + auto VgprSpillCount = KernelInfo.vgpr_spill_count; + + auto HostCallRequired = false; + INFO(OMP_INFOTYPE_PLUGIN_KERNEL, GenericDevice.getDeviceId(), + "DEVID:%2d SGN:%s ConstWGSize:%-4d args:%2d teamsXthrds:(%4dX%4d) " + "reqd:(%4dX%4d) lds_usage:%uB sgpr_count:%u vgpr_count:%u " + "sgpr_spill_count:%u vgpr_spill_count:%u tripcount:%lu rpc:%d n:%s\n", + DeviceId, getExecutionModeName(), ConstWGSize, ArgNum, NumGroups, + ThreadsPerGroup, NumTeams, ThreadLimit, GroupSegmentSize, SgprCount, + VgprCount, SgprSpillCount, VgprSpillCount, LoopTripCount, + HostCallRequired, getName()); + } + return Plugin::success(); +} + GenericPluginTy *Plugin::createPlugin() { return new AMDGPUPluginTy(); } GenericDeviceTy *Plugin::createDevice(int32_t DeviceId, int32_t NumDevices) { diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h b/openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h --- a/openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h +++ b/openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h @@ -17,6 +17,14 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" + +#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/BinaryFormat/MsgPackDocument.h" +#include "llvm/Support/MemoryBufferRef.h" + +#include "llvm/Support/YAMLTraits.h" namespace llvm { namespace omp { @@ -127,6 +135,171 @@ return true; } +struct KernelMetaDataTy { + uint64_t kernel_object; + uint32_t group_segment_size; + uint32_t private_segment_size; + uint32_t sgpr_count; + uint32_t vgpr_count; + uint32_t sgpr_spill_count; + uint32_t vgpr_spill_count; + uint32_t kernel_segment_size; + uint32_t explicit_argument_count; + uint32_t implicit_argument_count; + std::string kind; +}; +namespace { + +/// Reads the AMDGPU specific per-kernel-metadata from an image. +class KernelInfoReader { +public: + KernelInfoReader(StringMap &KIM) : KernelInfoMap(KIM) {} + + /// Process ELF note to read AMDGPU metadata from respective information + /// fields. + Error processNote(const object::ELF64LE::Note &Note) { + auto Name = Note.getName(); + auto Type = Note.getType(); + if (Name != "AMDGPU") + return Error::success(); // We are not interested in other things + + assert(Type == ELF::NT_AMDGPU_METADATA && "Parse AMDGPU MetaData"); + auto Desc = Note.getDesc(); + StringRef MsgPackString = + StringRef(reinterpret_cast(Desc.data()), Desc.size()); + msgpack::Document MsgPackDoc; + if (!MsgPackDoc.readFromBlob(MsgPackString, /*Multi=*/false)) + return Error::success(); + + AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true); + if (!Verifier.verify(MsgPackDoc.getRoot())) + return Error::success(); + + auto Root = MsgPackDoc.getRoot(); + auto RootMap = Root.getMap(true); + + if (auto Err = iterateAMDKernels(RootMap)) + return Err; + + return Error::success(); + } + +private: + /// Extracts the relevant information via simple string look-up in the msgpack + /// document elements. + Error extractKernelData(msgpack::MapDocNode::MapTy::value_type V, + std::string &KernelName, + KernelMetaDataTy &KernelData) { + if (!V.first.isString()) + return Error::success(); + + const auto isKey = [](const msgpack::DocNode &DK, StringRef SK) { + return DK.getString() == SK; + }; + + if (isKey(V.first, ".name")) { + KernelName = + V.second.toString(); // getString() returns not what you expect + } else if (isKey(V.first, ".sgpr_count")) { + KernelData.sgpr_count = V.second.getUInt(); + } else if (isKey(V.first, ".sgpr_spill_count")) { + KernelData.sgpr_spill_count = V.second.getUInt(); + } else if (isKey(V.first, ".vgpr_count")) { + KernelData.vgpr_count = V.second.getUInt(); + } else if (isKey(V.first, ".vgpr_spill_count")) { + KernelData.vgpr_spill_count = V.second.getUInt(); + } else if (isKey(V.first, ".private_segment_fixed_size")) { + KernelData.private_segment_size = V.second.getUInt(); + } else if (isKey(V.first, ".group_segement_fixed_size")) { + KernelData.group_segment_size = V.second.getUInt(); + } + + return Error::success(); + } + + /// Get the "amdhsa.kernels" element from the msgpack Document + Expected getAMDKernelsArray(msgpack::MapDocNode &MDN) { + auto Res = MDN.find("amdhsa.kernels"); + if (Res == MDN.end()) + return createStringError(inconvertibleErrorCode(), + "Could not find amdhsa.kernels key"); + + auto Pair = *Res; + assert(Pair.second.isArray() && + "AMDGPU kernel entries are arrays of entries"); + + return Pair.second.getArray(); + } + + /// Iterate all entries for one "amdhsa.kernels" entry. Each entry is a + /// MapDocNode that either maps a string to a single value (most of them) or + /// to another array of things. Currently, we only handle the case that maps + /// to scalar value. + Error generateKernelInfo(msgpack::ArrayDocNode::ArrayTy::iterator It) { + KernelMetaDataTy KernelData; + std::string KernelName; + auto Entry = (*It).getMap(); + for (auto MI = Entry.begin(), E = Entry.end(); MI != E; ++MI) + if (auto Err = extractKernelData(*MI, KernelName, KernelData)) + return Err; + + KernelInfoMap.insert({KernelName, KernelData}); + return Error::success(); + } + + /// Go over the list of AMD kernels in the "amdhsa.kernels" entry + Error iterateAMDKernels(msgpack::MapDocNode &MDN) { + auto KernelsOrErr = getAMDKernelsArray(MDN); + if (auto Err = KernelsOrErr.takeError()) + return Err; + + auto KernelsArr = *KernelsOrErr; + for (auto It = KernelsArr.begin(), E = KernelsArr.end(); It != E; ++It) { + if (!It->isMap()) + continue; // we expect pairs + + // Obtain the value for the different entries. Each array entry is a + // MapDocNode + if (auto Err = generateKernelInfo(It)) + return Err; + } + return Error::success(); + } + + // Kernel names are the keys + StringMap &KernelInfoMap; +}; +} // namespace + +/// Reads the AMDGPU specific metadata from the ELF file and propagates the +/// KernelInfoMap +Error readAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer, + StringMap &KernelInfoMap) { + Error Err = Error::success(); // Used later as out-parameter + + auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer()); + if (auto Err = ELFOrError.takeError()) + return Err; + + const object::ELF64LEFile ELFObj = ELFOrError.get(); + + ArrayRef Sections = cantFail(ELFObj.sections()); + KernelInfoReader Reader(KernelInfoMap); + for (const auto &S : Sections) { + if (S.sh_type != ELF::SHT_NOTE) + continue; + + for (const auto N : ELFObj.notes(S, Err)) { + if (Err) + return Err; + // Fills the KernelInfoTabel entries in the reader + if ((Err = Reader.processNote(N))) + return Err; + } + } + + return Error::success(); +} } // namespace utils } // namespace plugin } // namespace target diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h --- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h +++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h @@ -196,6 +196,26 @@ return false; } +protected: + /// Get the execution mode name of the kernel. + const char *getExecutionModeName() const { + switch (ExecutionMode) { + case OMP_TGT_EXEC_MODE_SPMD: + return "SPMD"; + case OMP_TGT_EXEC_MODE_GENERIC: + return "Generic"; + case OMP_TGT_EXEC_MODE_GENERIC_SPMD: + return "Generic-SPMD"; + } + llvm_unreachable("Unknown execution mode!"); + } + + /// When overridden, prints plugin-specific kernel launch information. + /// Otherwise, default info. + virtual Error printLaunchInfo(GenericDeviceTy &GenericDevice, + KernelArgsTy &KernelArgs, uint32_t NumThreads, + uint64_t NumBlocks) const; + private: /// Prepare the arguments before launching the kernel. void *prepareArgs(GenericDeviceTy &GenericDevice, void **ArgPtrs, @@ -225,19 +245,6 @@ } bool isSPMDMode() const { return ExecutionMode == OMP_TGT_EXEC_MODE_SPMD; } - /// Get the execution mode name of the kernel. - const char *getExecutionModeName() const { - switch (ExecutionMode) { - case OMP_TGT_EXEC_MODE_SPMD: - return "SPMD"; - case OMP_TGT_EXEC_MODE_GENERIC: - return "Generic"; - case OMP_TGT_EXEC_MODE_GENERIC_SPMD: - return "Generic-SPMD"; - } - llvm_unreachable("Unknown execution mode!"); - } - /// The kernel name. const char *Name; diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp --- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp +++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp @@ -218,6 +218,17 @@ return initImpl(GenericDevice, Image); } +Error GenericKernelTy::printLaunchInfo(GenericDeviceTy &GenericDevice, + KernelArgsTy &KernelArgs, + uint32_t NumThreads, + uint64_t NumBlocks) const { + INFO(OMP_INFOTYPE_PLUGIN_KERNEL, GenericDevice.getDeviceId(), + "Launching kernel %s with %" PRIu64 + " blocks and %d threads in %s mode\n", + getName(), NumBlocks, NumThreads, getExecutionModeName()); + return Plugin::success(); +} + Error GenericKernelTy::launch(GenericDeviceTy &GenericDevice, void **ArgPtrs, ptrdiff_t *ArgOffsets, KernelArgsTy &KernelArgs, AsyncInfoWrapperTy &AsyncInfoWrapper) const { @@ -232,10 +243,10 @@ uint64_t NumBlocks = getNumBlocks(GenericDevice, KernelArgs.NumTeams, KernelArgs.Tripcount, NumThreads); - INFO(OMP_INFOTYPE_PLUGIN_KERNEL, GenericDevice.getDeviceId(), - "Launching kernel %s with %" PRIu64 - " blocks and %d threads in %s mode\n", - getName(), NumBlocks, NumThreads, getExecutionModeName()); + Error Err = printLaunchInfo(GenericDevice, KernelArgs, NumThreads, NumBlocks); + if (Err) { + return Err; + } return launchImpl(GenericDevice, NumThreads, NumBlocks, KernelArgs, KernelArgsPtr, AsyncInfoWrapper);