Index: clang/lib/CodeGen/CodeGenFunction.cpp =================================================================== --- clang/lib/CodeGen/CodeGenFunction.cpp +++ clang/lib/CodeGen/CodeGenFunction.cpp @@ -907,10 +907,28 @@ // If we're in C++ mode and the function name is "main", it is guaranteed // to be norecurse by the standard (3.6.1.3 "The function main shall not be // used within a program"). - if (getLangOpts().CPlusPlus) - if (const FunctionDecl *FD = dyn_cast_or_null(D)) - if (FD->isMain()) - Fn->addFnAttr(llvm::Attribute::NoRecurse); + // + // OpenCL C 2.0 v2.2-11 s6.9.i: + // Recursion is not supported. + // + // OpenCL C++ 1.0 v2.1-11 s2.9: + // recursive function calls (ISO C++ Section 5.2.2, item 9) unless + // they are a compile-time constant expression. + // + // SYCL v2.2 s2.10: + // kernels cannot include RTTI information, exception classes, + // recursive code, virtual functions or make use of C++ libraries that + // are not compiled for the device. + // + // ToDo: clang does not support CUDA/HIP dynamic parallelism, therefore + // CUDA/HIP kernel can be marked with norecurse. This may change in the + // future. + if (const FunctionDecl *FD = dyn_cast_or_null(D)) { + if ((getLangOpts().CPlusPlus && FD->isMain()) || getLangOpts().OpenCL || + getLangOpts().SYCLIsDevice || + (getLangOpts().CUDA && FD->hasAttr())) + Fn->addFnAttr(llvm::Attribute::NoRecurse); + } if (const FunctionDecl *FD = dyn_cast_or_null(D)) if (FD->usesFPIntrin()) Index: clang/test/CodeGenCUDA/norecurse.cu =================================================================== --- /dev/null +++ clang/test/CodeGenCUDA/norecurse.cu @@ -0,0 +1,15 @@ +// REQUIRES: amdgpu-registered-target +// REQUIRES: nvptx-registered-target + +// RUN: %clang_cc1 -triple nvptx-nvidia-cuda -fcuda-is-device \ +// RUN: -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \ +// RUN: -emit-llvm -disable-llvm-passes -o - -x hip %s | FileCheck %s + +#include "Inputs/cuda.h" + +__global__ void kernel1(int a) {} +// CHECK: define{{.*}}@_Z7kernel1i{{.*}}#[[ATTR:[0-9]*]] + +// CHECK: attributes #[[ATTR]] = {{.*}}norecurse Index: clang/test/CodeGenOpenCL/norecurse.cl =================================================================== --- /dev/null +++ clang/test/CodeGenOpenCL/norecurse.cl @@ -0,0 +1,6 @@ +// RUN: %clang_cc1 -O0 -emit-llvm -o - %s | FileCheck %s + +kernel void kernel1(int a) {} +// CHECK: define{{.*}}@kernel1{{.*}}#[[ATTR:[0-9]*]] + +// CHECK: attributes #[[ATTR]] = {{.*}}norecurse Index: clang/test/SemaCUDA/call-kernel-from-kernel.cu =================================================================== --- /dev/null +++ clang/test/SemaCUDA/call-kernel-from-kernel.cu @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 %s --std=c++11 -triple x86_64-unknown-linux -emit-llvm -o - \ +// RUN: -verify -fsyntax-only -verify-ignore-unexpected=note + +#include "Inputs/cuda.h" + +__global__ void kernel1(); +__global__ void kernel2() { + kernel1<<<1,1>>>(); // expected-error {{reference to __global__ function 'kernel1' in __global__ function}} +}