diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -219,6 +219,7 @@ LANGOPT(CUDAHostDeviceConstexpr, 1, 1, "treating unattributed constexpr functions as __host__ __device__") LANGOPT(CUDADeviceApproxTranscendentals, 1, 0, "using approximate transcendental functions") LANGOPT(GPURelocatableDeviceCode, 1, 0, "generate relocatable device code") +LANGOPT(CUDAForceLambdaODR, 1, 0, "force lambda naming following one definition rule (ODR)") LANGOPT(SYCLIsDevice , 1, 0, "Generate code for SYCL device") diff --git a/clang/include/clang/Driver/CC1Options.td b/clang/include/clang/Driver/CC1Options.td --- a/clang/include/clang/Driver/CC1Options.td +++ b/clang/include/clang/Driver/CC1Options.td @@ -863,6 +863,8 @@ HelpText<"Allow variadic functions in CUDA device code.">; def fno_cuda_host_device_constexpr : Flag<["-"], "fno-cuda-host-device-constexpr">, HelpText<"Don't treat unattributed constexpr functions as __host__ __device__.">; +def fcuda_force_lambda_odr : Flag<["-"], "fcuda-force-lambda-odr">, + HelpText<"Force lambda naming following one definition rule (ODR).">; //===----------------------------------------------------------------------===// // OpenMP Options diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -1090,10 +1090,10 @@ /// block literal. /// \param[out] ManglingContextDecl - Returns the ManglingContextDecl /// associated with the context, if relevant. - MangleNumberingContext *getCurrentMangleNumberContext( - const DeclContext *DC, - Decl *&ManglingContextDecl); - + MangleNumberingContext * + getCurrentMangleNumberContext(const DeclContext *DC, + Decl *&ManglingContextDecl, + bool SkpNoODRChk = false); /// SpecialMemberOverloadResult - The overloading result for a special member /// function. diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -2431,6 +2431,9 @@ if (Args.hasArg(OPT_fno_cuda_host_device_constexpr)) Opts.CUDAHostDeviceConstexpr = 0; + if (Args.hasArg(OPT_fcuda_force_lambda_odr)) + Opts.CUDAForceLambdaODR = 1; + if (Opts.CUDAIsDevice && Args.hasArg(OPT_fcuda_approx_transcendentals)) Opts.CUDADeviceApproxTranscendentals = 1; diff --git a/clang/lib/Sema/SemaLambda.cpp b/clang/lib/Sema/SemaLambda.cpp --- a/clang/lib/Sema/SemaLambda.cpp +++ b/clang/lib/Sema/SemaLambda.cpp @@ -272,9 +272,8 @@ return false; } -MangleNumberingContext * -Sema::getCurrentMangleNumberContext(const DeclContext *DC, - Decl *&ManglingContextDecl) { +MangleNumberingContext *Sema::getCurrentMangleNumberContext( + const DeclContext *DC, Decl *&ManglingContextDecl, bool SkpNoODRChk) { // Compute the context for allocating mangling numbers in the current // expression, if the ABI requires them. ManglingContextDecl = ExprEvalContexts.back().ManglingContextDecl; @@ -322,7 +321,8 @@ case Normal: { // -- the bodies of non-exported nonspecialized template functions // -- the bodies of inline functions - if ((IsInNonspecializedTemplate && + if (SkpNoODRChk || + (IsInNonspecializedTemplate && !(ManglingContextDecl && isa(ManglingContextDecl))) || isInInlineFunction(CurContext)) { ManglingContextDecl = nullptr; @@ -337,7 +337,7 @@ case StaticDataMember: // -- the initializers of nonspecialized static members of template classes - if (!IsInNonspecializedTemplate) { + if (!SkpNoODRChk && !IsInNonspecializedTemplate) { ManglingContextDecl = nullptr; return nullptr; } @@ -441,9 +441,9 @@ Class->setLambdaMangling(Mangling->first, Mangling->second); } else { Decl *ManglingContextDecl; - if (MangleNumberingContext *MCtx = - getCurrentMangleNumberContext(Class->getDeclContext(), - ManglingContextDecl)) { + if (MangleNumberingContext *MCtx = getCurrentMangleNumberContext( + Class->getDeclContext(), ManglingContextDecl, + getLangOpts().CUDAForceLambdaODR)) { unsigned ManglingNumber = MCtx->getManglingNumber(Method); Class->setLambdaMangling(ManglingNumber, ManglingContextDecl); } diff --git a/clang/test/CodeGenCUDA/unnamed-types.cu b/clang/test/CodeGenCUDA/unnamed-types.cu new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCUDA/unnamed-types.cu @@ -0,0 +1,34 @@ +// RUN: %clang_cc1 -std=c++11 -x hip -triple x86_64-linux-gnu -fcuda-force-lambda-odr -emit-llvm %s -o - | FileCheck %s --check-prefix=HOST +// RUN: %clang_cc1 -std=c++11 -x hip -triple amdgcn-amd-amdhsa -fcuda-force-lambda-odr -fcuda-is-device -emit-llvm %s -o - | FileCheck %s --check-prefix=DEVICE + +#include "Inputs/cuda.h" + +// HOST: @0 = private unnamed_addr constant [43 x i8] c"_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_\00", align 1 + +__device__ float d0(float x) { + return [](float x) { return x + 2.f; }(x); +} + +__device__ float d1(float x) { + return [](float x) { return x * 2.f; }(x); +} + +// DEVICE: amdgpu_kernel void @_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_( +template +__global__ void k0(float *p, F f) { + p[0] = f(p[0]) + d0(p[1]) + d1(p[2]); +} + +void f0(float *p) { + [](float *p) { + *p = 1.f; + }(p); +} + +void f1(float *p) { + [](float *p) { + k0<<<1,1>>>(p, [] __device__ (float x) { return x + 1.f; }); + }(p); +} +// HOST: @__hip_register_globals +// HOST: __hipRegisterFunction{{.*}}@_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_{{.*}}@0