diff --git a/openmp/libomptarget/plugins/amdgpu/impl/get_elf_mach_gfx_name.h b/openmp/libomptarget/plugins/amdgpu/impl/get_elf_mach_gfx_name.h
--- a/openmp/libomptarget/plugins/amdgpu/impl/get_elf_mach_gfx_name.h
+++ b/openmp/libomptarget/plugins/amdgpu/impl/get_elf_mach_gfx_name.h
@@ -57,4 +57,6 @@
   COV5_HEAPV1_PTR_SIZE = 8
 };
 
+const uint16_t getImplicitArgsSize(uint16_t Version);
+
 #endif
diff --git a/openmp/libomptarget/plugins/amdgpu/impl/get_elf_mach_gfx_name.cpp b/openmp/libomptarget/plugins/amdgpu/impl/get_elf_mach_gfx_name.cpp
--- a/openmp/libomptarget/plugins/amdgpu/impl/get_elf_mach_gfx_name.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/impl/get_elf_mach_gfx_name.cpp
@@ -79,3 +79,8 @@
     return "--unknown gfx";
   }
 }
+
+const uint16_t getImplicitArgsSize(uint16_t Version) {
+  return Version < ELFABIVERSION_AMDGPU_HSA_V5 ? IMPLICITARGS::COV4_SIZE
+                                               : IMPLICITARGS::COV5_SIZE;
+}
diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
--- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -125,9 +125,10 @@
   uint32_t KernargSegmentSize;
   void *KernargRegion = nullptr;
   std::queue<int> FreeKernargSegments;
+  uint16_t CodeObjectVersion;
 
   uint32_t kernargSizeIncludingImplicit() {
-    return KernargSegmentSize + sizeof(AMDGPUImplicitArgsTy);
+    return KernargSegmentSize + getImplicitArgsSize(CodeObjectVersion);
   }
 
   ~KernelArgPool() {
@@ -144,8 +145,10 @@
   KernelArgPool(const KernelArgPool &) = delete;
   KernelArgPool(KernelArgPool &&) = delete;
 
-  KernelArgPool(uint32_t KernargSegmentSize, hsa_amd_memory_pool_t &MemoryPool)
-      : KernargSegmentSize(KernargSegmentSize) {
+  KernelArgPool(uint32_t KernargSegmentSize, hsa_amd_memory_pool_t &MemoryPool,
+                uint16_t CodeObjectVersion)
+      : KernargSegmentSize(KernargSegmentSize),
+        CodeObjectVersion(CodeObjectVersion) {
 
     // impl uses one pool per kernel for all gpus, with a fixed upper size
     // preserving that exact scheme here, including the queue<int>
@@ -229,16 +232,16 @@
   KernelTy(llvm::omp::OMPTgtExecModeFlags ExecutionMode, int16_t ConstWgSize,
            int32_t DeviceId, void *CallStackAddr, const char *Name,
            uint32_t KernargSegmentSize,
-           hsa_amd_memory_pool_t &KernArgMemoryPool)
+           hsa_amd_memory_pool_t &KernArgMemoryPool, uint16_t CodeObjectVersion)
       : ExecutionMode(ExecutionMode), ConstWGSize(ConstWgSize),
         DeviceId(DeviceId), CallStackAddr(CallStackAddr), Name(Name) {
     DP("Construct kernelinfo: ExecMode %d\n", ExecutionMode);
 
     std::string N(Name);
     if (KernelArgPoolMap.find(N) == KernelArgPoolMap.end()) {
-      KernelArgPoolMap.insert(
-          std::make_pair(N, std::unique_ptr<KernelArgPool>(new KernelArgPool(
-                                KernargSegmentSize, KernArgMemoryPool))));
+      KernelArgPoolMap.insert(std::make_pair(
+          N, std::unique_ptr<KernelArgPool>(new KernelArgPool(
+                 KernargSegmentSize, KernArgMemoryPool, CodeObjectVersion))));
     }
   }
 };
@@ -475,6 +478,7 @@
   std::vector<int> WarpSize;
   std::vector<std::string> GPUName;
   std::vector<std::string> TargetID;
+  uint16_t CodeObjectVersion;
 
   // OpenMP properties
   std::vector<int> NumTeams;
@@ -1363,6 +1367,27 @@
   return PacketId;
 }
 
+const uint16_t getCodeObjectVersionFromELF(__tgt_device_image *Image) {
+  char *ImageBegin = (char *)Image->ImageStart;
+  size_t ImageSize = (char *)Image->ImageEnd - ImageBegin;
+
+  StringRef Buffer = StringRef(ImageBegin, ImageSize);
+  auto ElfOrErr = ObjectFile::createELFObjectFile(MemoryBufferRef(Buffer, ""),
+                                                  /*InitContent=*/false);
+  if (!ElfOrErr) {
+    REPORT("Failed to load ELF: %s\n", toString(ElfOrErr.takeError()).c_str());
+    return 1;
+  }
+
+  if (const auto *ELFObj = dyn_cast<ELF64LEObjectFile>(ElfOrErr->get())) {
+    auto Header = ELFObj->getELFFile().getHeader();
+    uint16_t Version = (uint8_t)(Header.e_ident[EI_ABIVERSION]);
+    DP("ELFABIVERSION Version: %u\n", Version);
+    return Version;
+  }
+  return 0;
+}
+
 int32_t runRegionLocked(int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs,
                         ptrdiff_t *TgtOffsets, int32_t ArgNum, int32_t NumTeams,
                         int32_t ThreadLimit, uint64_t LoopTripcount) {
@@ -1440,6 +1465,7 @@
     }
     uint64_t PacketId = acquireAvailablePacketId(Queue);
 
+    uint16_t CodeObjectVersion = DeviceInfo().CodeObjectVersion;
     const uint32_t Mask = Queue->size - 1; // size is a power of 2
     hsa_kernel_dispatch_packet_t *Packet =
         (hsa_kernel_dispatch_packet_t *)Queue->base_address + (PacketId & Mask);
@@ -1488,13 +1514,9 @@
       }
 
       // Initialize implicit arguments. TODO: Which of these can be dropped
-      AMDGPUImplicitArgsTy *ImplArgs = reinterpret_cast<AMDGPUImplicitArgsTy *>(
-          static_cast<char *>(KernArg) + ArgPool->KernargSegmentSize);
-      memset(ImplArgs, 0,
-             sizeof(AMDGPUImplicitArgsTy)); // may not be necessary
-      ImplArgs->OffsetX = 0;
-      ImplArgs->OffsetY = 0;
-      ImplArgs->OffsetZ = 0;
+      uint8_t *ImplArgs =
+          static_cast<uint8_t *>(KernArg) + sizeof(void *) * ArgNum;
+      memset(ImplArgs, 0, getImplicitArgsSize(CodeObjectVersion));
 
       uint64_t Buffer = 0;
       // assign a hostcall buffer for the selected Q
@@ -1515,9 +1537,15 @@
 
       DP("Implicit argument count: %d\n",
           KernelInfoEntry.implicit_argument_count);
-      DP("Setting Hostcall buffer for COV4\n");
-      memcpy(&ImplArgs[IMPLICITARGS::COV4_HOSTCALL_PTR_OFFSET], &Buffer,
-              IMPLICITARGS::HOSTCALL_PTR_SIZE);
+
+      if (CodeObjectVersion < llvm::ELF::ELFABIVERSION_AMDGPU_HSA_V5) {
+        DP("Setting Hostcall buffer for COV4\n");
+        memcpy(&ImplArgs[IMPLICITARGS::COV4_HOSTCALL_PTR_OFFSET], &Buffer,
+               IMPLICITARGS::HOSTCALL_PTR_SIZE);
+      } else {
+        DP("Code object version 5 is not yet supported\n");
+        return OFFLOAD_FAIL;
+      }
 
       Packet->kernarg_address = KernArg;
     }
@@ -2087,6 +2115,8 @@
   if (!elfMachineIdIsAmdgcn(Image))
     return NULL;
 
+  DeviceInfo().CodeObjectVersion = getCodeObjectVersionFromELF(Image);
+
   {
     auto Env =
         DeviceEnvironment(DeviceId, DeviceInfo().NumberOfDevices,
@@ -2410,7 +2440,8 @@
 
     KernelsList.push_back(KernelTy(ExecModeVal, WGSizeVal, DeviceId,
                                    CallStackAddr, E->name, KernargSegmentSize,
-                                   DeviceInfo().KernArgPool));
+                                   DeviceInfo().KernArgPool,
+                                   DeviceInfo().CodeObjectVersion));
     __tgt_offload_entry Entry = *E;
     Entry.addr = (void *)&KernelsList.back();
     DeviceInfo().addOffloadEntry(DeviceId, Entry);