diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
--- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -373,10 +373,22 @@
   Expected<hsa_executable_symbol_t>
   findDeviceSymbol(GenericDeviceTy &Device, StringRef SymbolName) const;
 
+  /// Get additional info for kernel, e.g., register spill counts
+  std::optional<utils::KernelMetaDataTy>
+  getKernelInfo(StringRef Identifier) const {
+    auto It = KernelInfoMap.find(Identifier);
+
+    if (It == KernelInfoMap.end())
+      return {};
+
+    return It->second;
+  }
+
 private:
   /// The exectuable loaded on the agent.
   hsa_executable_t Executable;
   hsa_code_object_t CodeObject;
+  StringMap<utils::KernelMetaDataTy> KernelInfoMap;
 };
 
 /// Class implementing the AMDGPU kernel functionalities which derives from the
@@ -426,6 +438,12 @@
     // TODO: Read the kernel descriptor for the max threads per block. May be
     // read from the image.
 
+    // Get additional kernel info read from image
+    KernelInfo = AMDImage.getKernelInfo(getName());
+    if (!KernelInfo.has_value())
+      INFO(OMP_INFOTYPE_PLUGIN_KERNEL, Device.getDeviceId(),
+           "Could not read extra information for kernel %s.", getName());
+
     return Plugin::success();
   }
 
@@ -434,6 +452,11 @@
                    uint64_t NumBlocks, KernelArgsTy &KernelArgs, void *Args,
                    AsyncInfoWrapperTy &AsyncInfoWrapper) const override;
 
+  /// Print more elaborate kernel launch info for AMDGPU
+  Error printLaunchInfoDetails(GenericDeviceTy &GenericDevice,
+                               KernelArgsTy &KernelArgs, uint32_t NumThreads,
+                               uint64_t NumBlocks) const override;
+
   /// The default number of blocks is common to the whole device.
   uint32_t getDefaultNumBlocks(GenericDeviceTy &GenericDevice) const override {
     return GenericDevice.getDefaultNumBlocks();
@@ -462,6 +485,9 @@
 
   /// The size of implicit kernel arguments.
   const uint32_t ImplicitArgsSize;
+
+  /// Additional Info for the AMD GPU Kernel
+  std::optional<utils::KernelMetaDataTy> KernelInfo;
 };
 
 /// Class representing an HSA signal. Signals are used to define dependencies
@@ -2200,6 +2226,10 @@
   if (Result)
     return Plugin::error("Loaded HSA executable does not validate");
 
+  if (auto Err =
+          utils::readAMDGPUMetaDataFromImage(getMemoryBuffer(), KernelInfoMap))
+    return Err;
+
   return Plugin::success();
 }
 
@@ -2571,6 +2601,50 @@
                                  GroupSize, ArgsMemoryManager);
 }
 
+Error AMDGPUKernelTy::printLaunchInfoDetails(GenericDeviceTy &GenericDevice,
+                                             KernelArgsTy &KernelArgs,
+                                             uint32_t NumThreads,
+                                             uint64_t NumBlocks) const {
+  // Only do all this when the output is requested
+  if (!(getInfoLevel() & OMP_INFOTYPE_PLUGIN_KERNEL))
+    return Plugin::success();
+
+  // We don't have data to print additional info, but no hard error
+  if (!KernelInfo.has_value())
+    return Plugin::success();
+
+  // General Info
+  auto ConstWGSize = getDefaultNumThreads(GenericDevice);
+  auto NumGroups = NumBlocks;
+  auto ThreadsPerGroup = getDefaultNumThreads(GenericDevice);
+  auto NumTeams = KernelArgs.NumTeams[0];       // Only first dimension
+  auto ThreadLimit = KernelArgs.ThreadLimit[0]; // Only first dimension
+
+  // Kernel Arguments Info
+  auto ArgNum = KernelArgs.NumArgs;
+  auto LoopTripCount = KernelArgs.Tripcount;
+
+  // Details for AMDGPU kernels
+  auto GroupSegmentSize = (*KernelInfo).GroupSegmentList;
+  auto SGPRCount = (*KernelInfo).SGPRCount;
+  auto VGPRCount = (*KernelInfo).VGPRCount;
+  auto SGPRSpillCount = (*KernelInfo).SGPRSpillCount;
+  auto VGPRSpillCount = (*KernelInfo).VGPRSpillCount;
+
+  // TODO set correctly once host services available
+  auto HostCallRequired = false;
+  INFO(OMP_INFOTYPE_PLUGIN_KERNEL, GenericDevice.getDeviceId(),
+       "SGN:%s ConstWGSize:%d args:%d teamsXthrds:(%4dX%4d) "
+       "reqd:(%4dX%4d) lds_usage:%uB sgpr_count:%u vgpr_count:%u "
+       "sgpr_spill_count:%u vgpr_spill_count:%u tripcount:%lu rpc:%d n:%s\n",
+       getExecutionModeName(), ConstWGSize, ArgNum, NumGroups, ThreadsPerGroup,
+       NumTeams, ThreadLimit, GroupSegmentSize, SGPRCount, VGPRCount,
+       SGPRSpillCount, VGPRSpillCount, LoopTripCount, HostCallRequired,
+       getName());
+
+  return Plugin::success();
+}
+
 GenericPluginTy *Plugin::createPlugin() { return new AMDGPUPluginTy(); }
 
 GenericDeviceTy *Plugin::createDevice(int32_t DeviceId, int32_t NumDevices) {
diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h b/openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
--- a/openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
+++ b/openmp/libomptarget/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
@@ -17,6 +17,14 @@
 
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Error.h"
+
+#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/BinaryFormat/MsgPackDocument.h"
+#include "llvm/Support/MemoryBufferRef.h"
+
+#include "llvm/Support/YAMLTraits.h"
 
 namespace llvm {
 namespace omp {
@@ -127,6 +135,166 @@
   return true;
 }
 
+struct KernelMetaDataTy {
+  uint64_t KernelObject;
+  uint32_t GroupSegmentList;
+  uint32_t PrivateSegmentSize;
+  uint32_t SGPRCount;
+  uint32_t VGPRCount;
+  uint32_t SGPRSpillCount;
+  uint32_t VGPRSpillCount;
+  uint32_t KernelSegmentSize;
+  uint32_t ExplicitArgumentCount;
+  uint32_t ImplicitArgumentCount;
+};
+namespace {
+
+/// Reads the AMDGPU specific per-kernel-metadata from an image.
+class KernelInfoReader {
+public:
+  KernelInfoReader(StringMap<KernelMetaDataTy> &KIM) : KernelInfoMap(KIM) {}
+
+  /// Process ELF note to read AMDGPU metadata from respective information
+  /// fields.
+  Error processNote(const object::ELF64LE::Note &Note) {
+    if (Note.getName() != "AMDGPU")
+      return Error::success(); // We are not interested in other things
+
+    assert(Note.getType() == ELF::NT_AMDGPU_METADATA &&
+           "Parse AMDGPU MetaData");
+    auto Desc = Note.getDesc();
+    StringRef MsgPackString =
+        StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
+    msgpack::Document MsgPackDoc;
+    if (!MsgPackDoc.readFromBlob(MsgPackString, /*Multi=*/false))
+      return Error::success();
+
+    AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
+    if (!Verifier.verify(MsgPackDoc.getRoot()))
+      return Error::success();
+
+    auto RootMap = MsgPackDoc.getRoot().getMap(true);
+
+    if (auto Err = iterateAMDKernels(RootMap))
+      return Err;
+
+    return Error::success();
+  }
+
+private:
+  /// Extracts the relevant information via simple string look-up in the msgpack
+  /// document elements.
+  Error extractKernelData(msgpack::MapDocNode::MapTy::value_type V,
+                          std::string &KernelName,
+                          KernelMetaDataTy &KernelData) {
+    if (!V.first.isString())
+      return Error::success();
+
+    const auto isKey = [](const msgpack::DocNode &DK, StringRef SK) {
+      return DK.getString() == SK;
+    };
+
+    if (isKey(V.first, ".name")) {
+      KernelName = V.second.toString();
+    } else if (isKey(V.first, ".sgpr_count")) {
+      KernelData.SGPRCount = V.second.getUInt();
+    } else if (isKey(V.first, ".sgpr_spill_count")) {
+      KernelData.SGPRSpillCount = V.second.getUInt();
+    } else if (isKey(V.first, ".vgpr_count")) {
+      KernelData.VGPRCount = V.second.getUInt();
+    } else if (isKey(V.first, ".vgpr_spill_count")) {
+      KernelData.VGPRSpillCount = V.second.getUInt();
+    } else if (isKey(V.first, ".private_segment_fixed_size")) {
+      KernelData.PrivateSegmentSize = V.second.getUInt();
+    } else if (isKey(V.first, ".group_segement_fixed_size")) {
+      KernelData.GroupSegmentList = V.second.getUInt();
+    }
+
+    return Error::success();
+  }
+
+  /// Get the "amdhsa.kernels" element from the msgpack Document
+  Expected<msgpack::ArrayDocNode> getAMDKernelsArray(msgpack::MapDocNode &MDN) {
+    auto Res = MDN.find("amdhsa.kernels");
+    if (Res == MDN.end())
+      return createStringError(inconvertibleErrorCode(),
+                               "Could not find amdhsa.kernels key");
+
+    auto Pair = *Res;
+    assert(Pair.second.isArray() &&
+           "AMDGPU kernel entries are arrays of entries");
+
+    return Pair.second.getArray();
+  }
+
+  /// Iterate all entries for one "amdhsa.kernels" entry. Each entry is a
+  /// MapDocNode that either maps a string to a single value (most of them) or
+  /// to another array of things. Currently, we only handle the case that maps
+  /// to scalar value.
+  Error generateKernelInfo(msgpack::ArrayDocNode::ArrayTy::iterator It) {
+    KernelMetaDataTy KernelData;
+    std::string KernelName;
+    auto Entry = (*It).getMap();
+    for (auto MI = Entry.begin(), E = Entry.end(); MI != E; ++MI)
+      if (auto Err = extractKernelData(*MI, KernelName, KernelData))
+        return Err;
+
+    KernelInfoMap.insert({KernelName, KernelData});
+    return Error::success();
+  }
+
+  /// Go over the list of AMD kernels in the "amdhsa.kernels" entry
+  Error iterateAMDKernels(msgpack::MapDocNode &MDN) {
+    auto KernelsOrErr = getAMDKernelsArray(MDN);
+    if (auto Err = KernelsOrErr.takeError())
+      return Err;
+
+    auto KernelsArr = *KernelsOrErr;
+    for (auto It = KernelsArr.begin(), E = KernelsArr.end(); It != E; ++It) {
+      if (!It->isMap())
+        continue; // we expect <key,value> pairs
+
+      // Obtain the value for the different entries. Each array entry is a
+      // MapDocNode
+      if (auto Err = generateKernelInfo(It))
+        return Err;
+    }
+    return Error::success();
+  }
+
+  // Kernel names are the keys
+  StringMap<KernelMetaDataTy> &KernelInfoMap;
+};
+} // namespace
+
+/// Reads the AMDGPU specific metadata from the ELF file and propagates the
+/// KernelInfoMap
+Error readAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer,
+                                  StringMap<KernelMetaDataTy> &KernelInfoMap) {
+  Error Err = Error::success(); // Used later as out-parameter
+
+  auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer());
+  if (auto Err = ELFOrError.takeError())
+    return Err;
+
+  const object::ELF64LEFile ELFObj = ELFOrError.get();
+  ArrayRef<object::ELF64LE::Shdr> Sections = cantFail(ELFObj.sections());
+  KernelInfoReader Reader(KernelInfoMap);
+  for (const auto &S : Sections) {
+    if (S.sh_type != ELF::SHT_NOTE)
+      continue;
+
+    for (const auto N : ELFObj.notes(S, Err)) {
+      if (Err)
+        return Err;
+      // Fills the KernelInfoTabel entries in the reader
+      if ((Err = Reader.processNote(N)))
+        return Err;
+    }
+  }
+
+  return Error::success();
+}
 } // namespace utils
 } // namespace plugin
 } // namespace target
diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
--- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
+++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h
@@ -196,6 +196,32 @@
     return false;
   }
 
+protected:
+  /// Get the execution mode name of the kernel.
+  const char *getExecutionModeName() const {
+    switch (ExecutionMode) {
+    case OMP_TGT_EXEC_MODE_SPMD:
+      return "SPMD";
+    case OMP_TGT_EXEC_MODE_GENERIC:
+      return "Generic";
+    case OMP_TGT_EXEC_MODE_GENERIC_SPMD:
+      return "Generic-SPMD";
+    }
+    llvm_unreachable("Unknown execution mode!");
+  }
+
+  /// Prints generic kernel launch information.
+  Error printLaunchInfo(GenericDeviceTy &GenericDevice,
+                        KernelArgsTy &KernelArgs, uint32_t NumThreads,
+                        uint64_t NumBlocks) const;
+
+  /// Prints plugin-specific kernel launch information after generic kernel
+  /// launch information
+  virtual Error printLaunchInfoDetails(GenericDeviceTy &GenericDevice,
+                                       KernelArgsTy &KernelArgs,
+                                       uint32_t NumThreads,
+                                       uint64_t NumBlocks) const;
+
 private:
   /// Prepare the arguments before launching the kernel.
   void *prepareArgs(GenericDeviceTy &GenericDevice, void **ArgPtrs,
@@ -225,19 +251,6 @@
   }
   bool isSPMDMode() const { return ExecutionMode == OMP_TGT_EXEC_MODE_SPMD; }
 
-  /// Get the execution mode name of the kernel.
-  const char *getExecutionModeName() const {
-    switch (ExecutionMode) {
-    case OMP_TGT_EXEC_MODE_SPMD:
-      return "SPMD";
-    case OMP_TGT_EXEC_MODE_GENERIC:
-      return "Generic";
-    case OMP_TGT_EXEC_MODE_GENERIC_SPMD:
-      return "Generic-SPMD";
-    }
-    llvm_unreachable("Unknown execution mode!");
-  }
-
   /// The kernel name.
   const char *Name;
 
diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
--- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
+++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp
@@ -218,6 +218,25 @@
   return initImpl(GenericDevice, Image);
 }
 
+Error GenericKernelTy::printLaunchInfo(GenericDeviceTy &GenericDevice,
+                                       KernelArgsTy &KernelArgs,
+                                       uint32_t NumThreads,
+                                       uint64_t NumBlocks) const {
+  INFO(OMP_INFOTYPE_PLUGIN_KERNEL, GenericDevice.getDeviceId(),
+       "Launching kernel %s with %" PRIu64
+       " blocks and %d threads in %s mode\n",
+       getName(), NumBlocks, NumThreads, getExecutionModeName());
+  return printLaunchInfoDetails(GenericDevice, KernelArgs, NumThreads,
+                                NumBlocks);
+}
+
+Error GenericKernelTy::printLaunchInfoDetails(GenericDeviceTy &GenericDevice,
+                                              KernelArgsTy &KernelArgs,
+                                              uint32_t NumThreads,
+                                              uint64_t NumBlocks) const {
+  return Plugin::success();
+}
+
 Error GenericKernelTy::launch(GenericDeviceTy &GenericDevice, void **ArgPtrs,
                               ptrdiff_t *ArgOffsets, KernelArgsTy &KernelArgs,
                               AsyncInfoWrapperTy &AsyncInfoWrapper) const {
@@ -232,10 +251,9 @@
   uint64_t NumBlocks = getNumBlocks(GenericDevice, KernelArgs.NumTeams,
                                     KernelArgs.Tripcount, NumThreads);
 
-  INFO(OMP_INFOTYPE_PLUGIN_KERNEL, GenericDevice.getDeviceId(),
-       "Launching kernel %s with %" PRIu64
-       " blocks and %d threads in %s mode\n",
-       getName(), NumBlocks, NumThreads, getExecutionModeName());
+  if (auto Err =
+          printLaunchInfo(GenericDevice, KernelArgs, NumThreads, NumBlocks))
+    return Err;
 
   return launchImpl(GenericDevice, NumThreads, NumBlocks, KernelArgs,
                     KernelArgsPtr, AsyncInfoWrapper);
diff --git a/openmp/libomptarget/test/offloading/info.c b/openmp/libomptarget/test/offloading/info.c
--- a/openmp/libomptarget/test/offloading/info.c
+++ b/openmp/libomptarget/test/offloading/info.c
@@ -1,7 +1,9 @@
 // RUN: %libomptarget-compile-generic \
 // RUN:     -gline-tables-only -fopenmp-extensions
 // RUN: env LIBOMPTARGET_INFO=63 %libomptarget-run-generic 2>&1 | \
-// RUN:   %fcheck-generic -allow-empty -check-prefix=INFO
+// RUN:   %fcheck-generic -allow-empty -check-prefixes=INFO
+// RUN: env LIBOMPTARGET_INFO=63 %libomptarget-run-amdgcn-amd-amdhsa 2>&1 | \
+// RUN:   %fcheck-amdgcn-amd-amdhsa -allow-empty -check-prefixes=INFO,AMDGPU
 
 #include <omp.h>
 #include <stdio.h>
@@ -37,6 +39,7 @@
 // INFO: info: Entering OpenMP kernel at info.c:{{[0-9]+}}:{{[0-9]+}} with 1 arguments:
 // INFO: info: firstprivate(val)[4]
 // INFO: info: Launching kernel __omp_offloading_{{.*}}main{{.*}} with {{[0-9]+}} blocks and {{[0-9]+}} threads in Generic mode
+// AMDGPU: AMDGPU device {{[0-9]}} info: SGN:Generic ConstWGSize:{{[0-9]+}} args:{{[0-9]}} teamsXthrds:({{   [0-9]+}}X {{[0-9]+}}) reqd:(   {{[0-9]+}}X   {{[0-9]+}}) lds_usage:{{[0-9]+}}B sgpr_count:{{[0-9]+}} vgpr_count:{{[0-9]+}} sgpr_spill_count:{{[0-9]+}} vgpr_spill_count:{{[0-9]+}} tripcount:{{[0-9]+}} rpc:0 n:__omp_offloading_{{.*}}main{{.*}}
 // INFO: info: OpenMP Host-Device pointer mappings after block at info.c:{{[0-9]+}}:{{[0-9]+}}:
 // INFO: info: Host Ptr           Target Ptr         Size (B) DynRefCount HoldRefCount Declaration
 // INFO: info: {{.*}}             {{.*}}             256      1           0            C[0:64] at info.c:{{[0-9]+}}:{{[0-9]+}}