Index: lib/CodeGen/CGCall.cpp =================================================================== --- lib/CodeGen/CGCall.cpp +++ lib/CodeGen/CGCall.cpp @@ -3139,7 +3139,15 @@ if (CGM.getLangOpts().ObjCAutoRefCount) AddObjCARCExceptionMetadata(Inst); - return llvm::CallSite(Inst); + llvm::CallSite CS(Inst); + // All calls in CUDA device mode must conservatively be marked as convergent, + // preventing some optimizations. The optimizer can remove this if it can + // prove the the callee is not convergent. + if (CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) { + CS.addAttribute(llvm::AttributeSet::FunctionIndex, + llvm::Attribute::Convergent); + } + return CS; } /// \brief Store a non-aggregate value to an address to initialize it. For @@ -3539,6 +3547,14 @@ Attrs.addAttribute(getLLVMContext(), llvm::AttributeSet::FunctionIndex, llvm::Attribute::NoInline); + // All calls in CUDA device code are conservatively marked as convergent. The + // optimizer is able to remove this attribute if it can prove that the callee + // is not convergent. + if (CGM.getLangOpts().CUDA && CGM.getLangOpts().CUDAIsDevice) + Attrs = + Attrs.addAttribute(getLLVMContext(), llvm::AttributeSet::FunctionIndex, + llvm::Attribute::Convergent); + CS.setAttributes(Attrs); CS.setCallingConv(static_cast(CallingConv)); Index: test/CodeGenCUDA/convergent.cu =================================================================== --- test/CodeGenCUDA/convergent.cu +++ test/CodeGenCUDA/convergent.cu @@ -22,12 +22,16 @@ // DEVICE-SAME: convergent // DEVICE-NEXT: define void @_Z3barv __host__ __device__ void baz(); -__host__ __device__ void bar() { baz(); } +__host__ __device__ void bar() { + // DEVICE: call void @_Z3bazv() [[CALL_ATTR:#[0-9]+]] + baz(); +} // DEVICE: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]] // DEVICE: attributes [[BAZ_ATTR]] = { // DEVICE-SAME: convergent // DEVICE-SAME: } +// DEVICE: attributes [[CALL_ATTR]] = { convergent } // HOST: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]] // HOST: attributes [[BAZ_ATTR]] = { Index: test/CodeGenCUDA/device-var-init.cu =================================================================== --- test/CodeGenCUDA/device-var-init.cu +++ test/CodeGenCUDA/device-var-init.cu @@ -382,7 +382,7 @@ // CHECK: call void @_ZN4NETCC1IJEEEDpT_(%struct.NETC* %netc) // CHECK: call void @_ZN7EC_I_ECC1Ev(%struct.EC_I_EC* %ec_i_ec) // CHECK: call void @_ZN8EC_I_EC1C1Ev(%struct.EC_I_EC1* %ec_i_ec1) -// CHECK: call void @_ZN5T_V_TC1Ev(%struct.T_V_T* %t_v_t) #3 +// CHECK: call void @_ZN5T_V_TC1Ev(%struct.T_V_T* %t_v_t) // CHECK: call void @_ZN7T_B_NECC1Ev(%struct.T_B_NEC* %t_b_nec) // CHECK: call void @_ZN7T_F_NECC1Ev(%struct.T_F_NEC* %t_f_nec) // CHECK: call void @_ZN8T_FA_NECC1Ev(%struct.T_FA_NEC* %t_fa_nec)