Index: lib/CodeGen/CodeGenModule.cpp =================================================================== --- lib/CodeGen/CodeGenModule.cpp +++ lib/CodeGen/CodeGenModule.cpp @@ -1875,6 +1875,14 @@ B)); } + if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) { + // Conservatively, mark all functions in CUDA as convergent (meaning, they + // may call an intrinsicly convergent op, such as __syncthreads(), and so + // can't have certain optimizations applied around them). LLVM will remove + // this attribute where it safely can. + F->addFnAttr(llvm::Attribute::Convergent); + } + if (!DontDefer) { // All MSVC dtors other than the base dtor are linkonce_odr and delegate to // each other bottoming out with the base dtor. Therefore we emit non-base Index: test/CodeGenCUDA/convergent.cu =================================================================== --- /dev/null +++ test/CodeGenCUDA/convergent.cu @@ -0,0 +1,34 @@ +// REQUIRES: x86-registered-target +// REQUIRES: nvptx-registered-target + +// RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -emit-llvm \ +// RUN: -o - %s | FileCheck -check-prefix DEVICE %s + +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -o - %s | \ +// RUN: FileCheck -check-prefix HOST %s + +#include "Inputs/cuda.h" + +// DEVICE: Function Attrs: +// DEVICE-SAME: convergent +// DEVICE-NEXT: define void @_Z3foov +__device__ void foo() {} + +// HOST: Function Attrs: +// HOST-NOT: convergent +// HOST-NEXT: define void @_Z3barv +// DEVICE: Function Attrs: +// DEVICE-SAME: convergent +// DEVICE-NEXT: define void @_Z3barv +__host__ __device__ void baz(); +__host__ __device__ void bar() { baz(); } + +// DEVICE: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]] +// DEVICE: attributes [[BAZ_ATTR]] = { +// DEVICE-SAME: convergent +// DEVICE-SAME: } + +// HOST: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]] +// HOST: attributes [[BAZ_ATTR]] = { +// HOST-NOT: convergent +// NOST-SAME: }