Index: lib/CodeGen/CodeGenModule.cpp =================================================================== --- lib/CodeGen/CodeGenModule.cpp +++ lib/CodeGen/CodeGenModule.cpp @@ -813,6 +813,14 @@ false); F->setAttributes(llvm::AttributeSet::get(getLLVMContext(), AttributeList)); F->setCallingConv(static_cast(CallingConv)); + + if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) { + // Conservatively, mark all functions in CUDA as convergent (meaning, they + // may call an intrinsically convergent op, such as __syncthreads(), and so + // can't have certain optimizations applied around them). LLVM will remove + // this attribute where it safely can. + F->addFnAttr(llvm::Attribute::Convergent); + } } /// Determines whether the language options require us to model Index: test/CodeGenCUDA/convergent.cu =================================================================== --- /dev/null +++ test/CodeGenCUDA/convergent.cu @@ -0,0 +1,35 @@ +// REQUIRES: x86-registered-target +// REQUIRES: nvptx-registered-target + +// RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -emit-llvm \ +// RUN: -disable-llvm-passes -o - %s | FileCheck -check-prefix DEVICE %s + +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \ +// RUN: -disable-llvm-passes -o - %s | \ +// RUN: FileCheck -check-prefix HOST %s + +#include "Inputs/cuda.h" + +// DEVICE: Function Attrs: +// DEVICE-SAME: convergent +// DEVICE-NEXT: define void @_Z3foov +__device__ void foo() {} + +// HOST: Function Attrs: +// HOST-NOT: convergent +// HOST-NEXT: define void @_Z3barv +// DEVICE: Function Attrs: +// DEVICE-SAME: convergent +// DEVICE-NEXT: define void @_Z3barv +__host__ __device__ void baz(); +__host__ __device__ void bar() { baz(); } + +// DEVICE: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]] +// DEVICE: attributes [[BAZ_ATTR]] = { +// DEVICE-SAME: convergent +// DEVICE-SAME: } + +// HOST: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]] +// HOST: attributes [[BAZ_ATTR]] = { +// HOST-NOT: convergent +// NOST-SAME: }