diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.h b/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.h
deleted file mode 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.h
+++ /dev/null
@@ -1,40 +0,0 @@
-//===--- CGOpenMPRuntimeAMDGCN.h - Interface to OpenMP AMDGCN Runtimes ---===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This provides a class for OpenMP runtime code generation specialized to
-// AMDGCN targets from generalized CGOpenMPRuntimeGPU class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEAMDGCN_H
-#define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEAMDGCN_H
-
-#include "CGOpenMPRuntime.h"
-#include "CGOpenMPRuntimeGPU.h"
-#include "CodeGenFunction.h"
-#include "clang/AST/StmtOpenMP.h"
-
-namespace clang {
-namespace CodeGen {
-
-class CGOpenMPRuntimeAMDGCN final : public CGOpenMPRuntimeGPU {
-
-public:
-  explicit CGOpenMPRuntimeAMDGCN(CodeGenModule &CGM);
-
-  /// Get the GPU warp size.
-  llvm::Value *getGPUWarpSize(CodeGenFunction &CGF) override;
-
-  /// Get the id of the current thread on the GPU.
-  llvm::Value *getGPUThreadID(CodeGenFunction &CGF) override;
-};
-
-} // namespace CodeGen
-} // namespace clang
-
-#endif // LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEAMDGCN_H
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp
deleted file mode 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp
+++ /dev/null
@@ -1,48 +0,0 @@
-//===-- CGOpenMPRuntimeAMDGCN.cpp - Interface to OpenMP AMDGCN Runtimes --===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This provides a class for OpenMP runtime code generation specialized to
-// AMDGCN targets from generalized CGOpenMPRuntimeGPU class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "CGOpenMPRuntimeAMDGCN.h"
-#include "CGOpenMPRuntimeGPU.h"
-#include "CodeGenFunction.h"
-#include "clang/AST/Attr.h"
-#include "clang/AST/DeclOpenMP.h"
-#include "clang/AST/StmtOpenMP.h"
-#include "clang/AST/StmtVisitor.h"
-#include "clang/Basic/Cuda.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Frontend/OpenMP/OMPGridValues.h"
-#include "llvm/IR/IntrinsicsAMDGPU.h"
-
-using namespace clang;
-using namespace CodeGen;
-using namespace llvm::omp;
-
-CGOpenMPRuntimeAMDGCN::CGOpenMPRuntimeAMDGCN(CodeGenModule &CGM)
-    : CGOpenMPRuntimeGPU(CGM) {
-  if (!CGM.getLangOpts().OpenMPIsDevice)
-    llvm_unreachable("OpenMP AMDGCN can only handle device code.");
-}
-
-llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUWarpSize(CodeGenFunction &CGF) {
-  CGBuilderTy &Bld = CGF.Builder;
-  // return constant compile-time target-specific warp size
-  unsigned WarpSize = CGF.getTarget().getGridValue().GV_Warp_Size;
-  return Bld.getInt32(WarpSize);
-}
-
-llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUThreadID(CodeGenFunction &CGF) {
-  CGBuilderTy &Bld = CGF.Builder;
-  llvm::Function *F =
-      CGF.CGM.getIntrinsic(llvm::Intrinsic::amdgcn_workitem_id_x);
-  return Bld.CreateCall(F, llvm::None, "nvptx_tid");
-}
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
@@ -176,10 +176,10 @@
   /// and NVPTX.
 
   /// Get the GPU warp size.
-  virtual llvm::Value *getGPUWarpSize(CodeGenFunction &CGF) = 0;
+  llvm::Value *getGPUWarpSize(CodeGenFunction &CGF);
 
   /// Get the id of the current thread on the GPU.
-  virtual llvm::Value *getGPUThreadID(CodeGenFunction &CGF) = 0;
+  llvm::Value *getGPUThreadID(CodeGenFunction &CGF);
 
   /// Get the maximum number of threads in a block of the GPU.
   llvm::Value *getGPUNumThreads(CodeGenFunction &CGF);
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "CGOpenMPRuntimeGPU.h"
-#include "CGOpenMPRuntimeNVPTX.h"
 #include "CodeGenFunction.h"
 #include "clang/AST/Attr.h"
 #include "clang/AST/DeclOpenMP.h"
@@ -21,6 +20,7 @@
 #include "clang/Basic/Cuda.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
 #include "llvm/IR/IntrinsicsNVPTX.h"
 #include "llvm/Support/MathExtras.h"
 
@@ -1197,7 +1197,7 @@
 CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM)
     : CGOpenMPRuntime(CGM, "_", "$") {
   if (!CGM.getLangOpts().OpenMPIsDevice)
-    llvm_unreachable("OpenMP NVPTX can only handle device code.");
+    llvm_unreachable("OpenMP can only handle device code.");
 
   llvm::OpenMPIRBuilder &OMPBuilder = getOMPBuilder();
   if (CGM.getLangOpts().OpenMPTargetNewRuntime) {
@@ -3960,3 +3960,17 @@
   }
   return Bld.CreateCall(F, llvm::None, "nvptx_num_threads");
 }
+
+llvm::Value *CGOpenMPRuntimeGPU::getGPUThreadID(CodeGenFunction &CGF) {
+  ArrayRef<llvm::Value *> Args{};
+  return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+                                   CGM.getModule(), OMPRTL___kmpc_get_hardware_thread_id_in_block),
+                               Args);
+}
+
+llvm::Value *CGOpenMPRuntimeGPU::getGPUWarpSize(CodeGenFunction &CGF) {
+  ArrayRef<llvm::Value *> Args{};
+  return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+                                   CGM.getModule(), OMPRTL___kmpc_get_warp_size),
+                               Args);
+}
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
deleted file mode 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
+++ /dev/null
@@ -1,40 +0,0 @@
-//===----- CGOpenMPRuntimeNVPTX.h - Interface to OpenMP NVPTX Runtimes ----===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This provides a class for OpenMP runtime code generation specialized to NVPTX
-// targets from generalized CGOpenMPRuntimeGPU class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H
-#define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H
-
-#include "CGOpenMPRuntime.h"
-#include "CGOpenMPRuntimeGPU.h"
-#include "CodeGenFunction.h"
-#include "clang/AST/StmtOpenMP.h"
-
-namespace clang {
-namespace CodeGen {
-
-class CGOpenMPRuntimeNVPTX final : public CGOpenMPRuntimeGPU {
-
-public:
-  explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM);
-
-  /// Get the GPU warp size.
-  llvm::Value *getGPUWarpSize(CodeGenFunction &CGF) override;
-
-  /// Get the id of the current thread on the GPU.
-  llvm::Value *getGPUThreadID(CodeGenFunction &CGF) override;
-};
-
-} // CodeGen namespace.
-} // clang namespace.
-
-#endif // LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
deleted file mode 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ /dev/null
@@ -1,48 +0,0 @@
-//===---- CGOpenMPRuntimeNVPTX.cpp - Interface to OpenMP NVPTX Runtimes ---===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This provides a class for OpenMP runtime code generation specialized to NVPTX
-// targets from generalized CGOpenMPRuntimeGPU class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "CGOpenMPRuntimeNVPTX.h"
-#include "CGOpenMPRuntimeGPU.h"
-#include "CodeGenFunction.h"
-#include "clang/AST/Attr.h"
-#include "clang/AST/DeclOpenMP.h"
-#include "clang/AST/StmtOpenMP.h"
-#include "clang/AST/StmtVisitor.h"
-#include "clang/Basic/Cuda.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/IR/IntrinsicsNVPTX.h"
-
-using namespace clang;
-using namespace CodeGen;
-using namespace llvm::omp;
-
-CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM)
-    : CGOpenMPRuntimeGPU(CGM) {
-  if (!CGM.getLangOpts().OpenMPIsDevice)
-    llvm_unreachable("OpenMP NVPTX can only handle device code.");
-}
-
-llvm::Value *CGOpenMPRuntimeNVPTX::getGPUWarpSize(CodeGenFunction &CGF) {
-  return CGF.EmitRuntimeCall(
-      llvm::Intrinsic::getDeclaration(
-          &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize),
-      "nvptx_warp_size");
-}
-
-llvm::Value *CGOpenMPRuntimeNVPTX::getGPUThreadID(CodeGenFunction &CGF) {
-  CGBuilderTy &Bld = CGF.Builder;
-  llvm::Function *F;
-  F = llvm::Intrinsic::getDeclaration(
-      &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x);
-  return Bld.CreateCall(F, llvm::None, "nvptx_tid");
-}
diff --git a/clang/lib/CodeGen/CMakeLists.txt b/clang/lib/CodeGen/CMakeLists.txt
--- a/clang/lib/CodeGen/CMakeLists.txt
+++ b/clang/lib/CodeGen/CMakeLists.txt
@@ -59,9 +59,7 @@
   CGObjCRuntime.cpp
   CGOpenCLRuntime.cpp
   CGOpenMPRuntime.cpp
-  CGOpenMPRuntimeAMDGCN.cpp
   CGOpenMPRuntimeGPU.cpp
-  CGOpenMPRuntimeNVPTX.cpp
   CGRecordLayoutBuilder.cpp
   CGStmt.cpp
   CGStmtOpenMP.cpp
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -19,8 +19,7 @@
 #include "CGObjCRuntime.h"
 #include "CGOpenCLRuntime.h"
 #include "CGOpenMPRuntime.h"
-#include "CGOpenMPRuntimeAMDGCN.h"
-#include "CGOpenMPRuntimeNVPTX.h"
+#include "CGOpenMPRuntimeGPU.h"
 #include "CodeGenFunction.h"
 #include "CodeGenPGO.h"
 #include "ConstantEmitter.h"
@@ -244,14 +243,10 @@
   switch (getTriple().getArch()) {
   case llvm::Triple::nvptx:
   case llvm::Triple::nvptx64:
-    assert(getLangOpts().OpenMPIsDevice &&
-           "OpenMP NVPTX is only prepared to deal with device code.");
-    OpenMPRuntime.reset(new CGOpenMPRuntimeNVPTX(*this));
-    break;
   case llvm::Triple::amdgcn:
     assert(getLangOpts().OpenMPIsDevice &&
-           "OpenMP AMDGCN is only prepared to deal with device code.");
-    OpenMPRuntime.reset(new CGOpenMPRuntimeAMDGCN(*this));
+           "OpenMP AMDGPU/NVPTX is only prepared to deal with device code.");
+    OpenMPRuntime.reset(new CGOpenMPRuntimeGPU(*this));
     break;
   default:
     if (LangOpts.OpenMPSimd)
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -455,6 +455,8 @@
 __OMP_RTL(__kmpc_warp_active_thread_mask, false, Int64,)
 __OMP_RTL(__kmpc_syncwarp, false, Void, Int64)
 
+__OMP_RTL(__kmpc_get_warp_size, false, Int32, )
+
 __OMP_RTL(__kmpc_is_generic_main_thread_id, false, Int8, Int32)
 
 __OMP_RTL(__last, false, Void, )
diff --git a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp
--- a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp
@@ -277,5 +277,10 @@
   FunctionTracingRAII();
   return impl::getNumHardwareThreadsInBlock();
 }
+
+__attribute__((noinline)) uint32_t __kmpc_get_warp_size() {
+  FunctionTracingRAII();
+  return impl::getWarpSize();
+}
 }
 #pragma omp end declare target