Index: clang/include/clang/Basic/LangOptions.h =================================================================== --- clang/include/clang/Basic/LangOptions.h +++ clang/include/clang/Basic/LangOptions.h @@ -312,7 +312,7 @@ } bool assumeFunctionsAreConvergent() const { - return (CUDA && CUDAIsDevice) || OpenCL; + return ConvergentFunctions; } /// Return the OpenCL C or C++ version as a VersionTuple. Index: clang/include/clang/Basic/LangOptions.def =================================================================== --- clang/include/clang/Basic/LangOptions.def +++ clang/include/clang/Basic/LangOptions.def @@ -122,6 +122,7 @@ LANGOPT(ConstStrings , 1, 0, "const-qualified string support") ENUM_LANGOPT(LaxVectorConversions, LaxVectorConversionKind, 2, LaxVectorConversionKind::All, "lax vector conversions") +LANGOPT(ConvergentFunctions, 1, 1, "Assume convergent functions") LANGOPT(AltiVec , 1, 0, "AltiVec-style vector initializers") LANGOPT(ZVector , 1, 0, "System z vector extensions") LANGOPT(Exceptions , 1, 0, "exception handling") Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -548,6 +548,9 @@ MetaVarName<"">; def c : Flag<["-"], "c">, Flags<[DriverOption]>, Group, HelpText<"Only run preprocess, compile, and assemble steps">; +def fconvergent_functions : Joined<["-"], "fconvergent-functions">, Group, Flags<[CC1Option]>, + HelpText<"Assume functions may be convergent">; + def cuda_device_only : Flag<["--"], "cuda-device-only">, HelpText<"Compile CUDA code for device only">; def cuda_host_only : Flag<["--"], "cuda-host-only">, Index: clang/lib/Frontend/CompilerInvocation.cpp =================================================================== --- clang/lib/Frontend/CompilerInvocation.cpp +++ clang/lib/Frontend/CompilerInvocation.cpp @@ -2756,6 +2756,9 @@ Opts.BlocksRuntimeOptional = Args.hasArg(OPT_fblocks_runtime_optional); Opts.Coroutines = Opts.CPlusPlus2a || Args.hasArg(OPT_fcoroutines_ts); + Opts.ConvergentFunctions = Opts.OpenCL || (Opts.CUDA && Opts.CUDAIsDevice) || + Args.hasArg(OPT_fconvergent_functions); + Opts.DoubleSquareBracketAttributes = Args.hasFlag(OPT_fdouble_square_bracket_attributes, OPT_fno_double_square_bracket_attributes, Index: clang/test/CodeGen/convergent-functions.cpp =================================================================== --- /dev/null +++ clang/test/CodeGen/convergent-functions.cpp @@ -0,0 +1,8 @@ +// RUN: %clang_cc1 -triple i386-pc-win32 -emit-llvm -fconvergent-functions -o - < %s | FileCheck -check-prefix=CONVFUNC %s +// RUN: %clang_cc1 -triple i386-pc-win32 -emit-llvm -o - < %s | FileCheck -check-prefix=NOCONVFUNC %s + +// Test that the -fconvergent-functions flag works + +// CONVFUNC: attributes #0 = { convergent {{.*}} } +// NOCONVFUNC-NOT: convergent +void func() { } Index: clang/test/CodeGenCUDA/propagate-metadata.cu =================================================================== --- clang/test/CodeGenCUDA/propagate-metadata.cu +++ clang/test/CodeGenCUDA/propagate-metadata.cu @@ -11,7 +11,7 @@ // Build the bitcode library. This is not built in CUDA mode, otherwise it // might have incompatible attributes. This mirrors how libdevice is built. -// RUN: %clang_cc1 -x c++ -emit-llvm-bc -ftrapping-math -DLIB \ +// RUN: %clang_cc1 -x c++ -fconvergent-functions -emit-llvm-bc -ftrapping-math -DLIB \ // RUN: %s -o %t.bc -triple nvptx-unknown-unknown // RUN: %clang_cc1 -x cuda %s -emit-llvm -mlink-builtin-bitcode %t.bc -o - \ @@ -53,7 +53,8 @@ // Check the attribute list. // CHECK: attributes [[attr]] = { -// CHECK: "no-trapping-math"="true" +// CHECK-SAME: convergent +// CHECK-SAME: "no-trapping-math"="true" // FTZ-SAME: "nvptx-f32ftz"="true" // NOFTZ-NOT: "nvptx-f32ftz"="true"