Index: clang/lib/CodeGen/CodeGenFunction.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenFunction.cpp
+++ clang/lib/CodeGen/CodeGenFunction.cpp
@@ -907,10 +907,28 @@
   // If we're in C++ mode and the function name is "main", it is guaranteed
   // to be norecurse by the standard (3.6.1.3 "The function main shall not be
   // used within a program").
-  if (getLangOpts().CPlusPlus)
-    if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
-      if (FD->isMain())
-        Fn->addFnAttr(llvm::Attribute::NoRecurse);
+  //
+  // OpenCL C 2.0 v2.2-11 s6.9.i:
+  //     Recursion is not supported.
+  //
+  // OpenCL C++ 1.0 v2.1-11 s2.9:
+  //     recursive function calls (ISO C++ Section 5.2.2, item 9) unless
+  //     they are a compile-time constant expression.
+  //
+  // SYCL v2.2 s2.10:
+  //     kernels cannot include RTTI information, exception classes,
+  //     recursive code, virtual functions or make use of C++ libraries that
+  //     are not compiled for the device.
+  //
+  // ToDo: clang does not support CUDA/HIP dynamic parallelism, therefore
+  // CUDA/HIP kernel can be marked with norecurse. This may change in the
+  // future.
+  if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
+    if ((getLangOpts().CPlusPlus && FD->isMain()) || getLangOpts().OpenCL ||
+        getLangOpts().SYCLIsDevice ||
+        (getLangOpts().CUDA && FD->hasAttr<CUDAGlobalAttr>()))
+      Fn->addFnAttr(llvm::Attribute::NoRecurse);
+  }
 
   if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
     if (FD->usesFPIntrin())
Index: clang/test/CodeGenCUDA/norecurse.cu
===================================================================
--- /dev/null
+++ clang/test/CodeGenCUDA/norecurse.cu
@@ -0,0 +1,15 @@
+// REQUIRES: amdgpu-registered-target
+// REQUIRES: nvptx-registered-target
+
+// RUN: %clang_cc1 -triple nvptx-nvidia-cuda -fcuda-is-device \
+// RUN:     -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
+
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \
+// RUN:     -emit-llvm -disable-llvm-passes -o - -x hip %s | FileCheck %s
+
+#include "Inputs/cuda.h"
+
+__global__ void kernel1(int a) {}
+// CHECK: define{{.*}}@_Z7kernel1i{{.*}}#[[ATTR:[0-9]*]]
+
+// CHECK: attributes #[[ATTR]] = {{.*}}norecurse
Index: clang/test/CodeGenOpenCL/norecurse.cl
===================================================================
--- /dev/null
+++ clang/test/CodeGenOpenCL/norecurse.cl
@@ -0,0 +1,6 @@
+// RUN: %clang_cc1 -O0 -emit-llvm -o - %s | FileCheck %s
+
+kernel void kernel1(int a) {}
+// CHECK: define{{.*}}@kernel1{{.*}}#[[ATTR:[0-9]*]]
+
+// CHECK: attributes #[[ATTR]] = {{.*}}norecurse
Index: clang/test/SemaCUDA/call-kernel-from-kernel.cu
===================================================================
--- /dev/null
+++ clang/test/SemaCUDA/call-kernel-from-kernel.cu
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 %s --std=c++11 -triple x86_64-unknown-linux -emit-llvm -o - \
+// RUN:   -verify -fsyntax-only -verify-ignore-unexpected=note
+
+#include "Inputs/cuda.h"
+
+__global__ void kernel1();
+__global__ void kernel2() {
+  kernel1<<<1,1>>>(); // expected-error {{reference to __global__ function 'kernel1' in __global__ function}}
+}