diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td --- a/clang/include/clang/Basic/DiagnosticCommonKinds.td +++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td @@ -304,6 +304,11 @@ def err_openclcxx_not_supported : Error< "'%0' is not supported in C++ for OpenCL">; +// HIP +def warn_ignore_hip_only_option : Warning< + "'%0' is ignored since it is only supported for HIP">, + InGroup; + // OpenMP def err_omp_more_one_clause : Error< "directive '#pragma omp %0' cannot contain more than one '%1' clause%select{| with '%3' name modifier| with 'source' dependence}2">; diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -1077,6 +1077,10 @@ // compiling CUDA C/C++ but which is not compatible with the CUDA spec. def CudaCompat : DiagGroup<"cuda-compat">; +// A warning group for warnings about features supported by HIP but +// ignored by CUDA. +def HIPOnly : DiagGroup<"hip-only">; + // Warnings which cause linking of the runtime libraries like // libc and the CRT to be skipped. def AVRRtlibLinkingQuirks : DiagGroup<"avr-rtlib-linking-quirks">; diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -224,6 +224,7 @@ LANGOPT(CUDAHostDeviceConstexpr, 1, 1, "treating unattributed constexpr functions as __host__ __device__") LANGOPT(CUDADeviceApproxTranscendentals, 1, 0, "using approximate transcendental functions") LANGOPT(GPURelocatableDeviceCode, 1, 0, "generate relocatable device code") +LANGOPT(GPUAllowDeviceInit, 1, 0, "allowing device side global init functions for HIP") LANGOPT(SYCLIsDevice , 1, 0, "Generate code for SYCL device") diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -602,6 +602,9 @@ def fhip_new_launch_api : Flag<["-"], "fhip-new-launch-api">, Flags<[CC1Option]>, HelpText<"Use new kernel launching API for HIP.">; def fno_hip_new_launch_api : Flag<["-"], "fno-hip-new-launch-api">; +def fgpu_allow_device_init : Flag<["-"], "fgpu-allow-device-init">, + Flags<[CC1Option]>, HelpText<"Allow device side init function in HIP">; +def fno_gpu_allow_device_init : Flag<["-"], "fno-gpu-allow-device-init">; def libomptarget_nvptx_path_EQ : Joined<["--"], "libomptarget-nvptx-path=">, Group, HelpText<"Path to libomptarget-nvptx libraries">; def dD : Flag<["-"], "dD">, Group, Flags<[CC1Option]>, diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp --- a/clang/lib/CodeGen/CGDeclCXX.cpp +++ b/clang/lib/CodeGen/CGDeclCXX.cpp @@ -437,7 +437,7 @@ // that are of class type, cannot have a non-empty constructor. All // the checks have been done in Sema by now. Whatever initializers // are allowed are empty and we just need to ignore them here. - if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice && + if (getLangOpts().CUDAIsDevice && !getLangOpts().GPUAllowDeviceInit && (D->hasAttr() || D->hasAttr() || D->hasAttr())) return; @@ -608,6 +608,11 @@ Fn->setCallingConv(llvm::CallingConv::SPIR_KERNEL); } + if (getLangOpts().HIP) { + Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); + Fn->addFnAttr("device-init"); + } + CXXGlobalInits.clear(); } diff --git a/clang/lib/Driver/ToolChains/HIP.cpp b/clang/lib/Driver/ToolChains/HIP.cpp --- a/clang/lib/Driver/ToolChains/HIP.cpp +++ b/clang/lib/Driver/ToolChains/HIP.cpp @@ -292,6 +292,10 @@ false)) CC1Args.push_back("-fgpu-rdc"); + if (DriverArgs.hasFlag(options::OPT_fgpu_allow_device_init, + options::OPT_fno_gpu_allow_device_init, false)) + CC1Args.push_back("-fgpu-allow-device-init"); + // Default to "hidden" visibility, as object level linking will not be // supported for the foreseeable future. if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ, diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -2528,6 +2528,13 @@ Opts.CUDADeviceApproxTranscendentals = 1; Opts.GPURelocatableDeviceCode = Args.hasArg(OPT_fgpu_rdc); + if (Args.hasArg(OPT_fgpu_allow_device_init)) { + if (Opts.HIP) + Opts.GPUAllowDeviceInit = 1; + else + Diags.Report(diag::warn_ignore_hip_only_option) + << Args.getLastArg(OPT_fgpu_allow_device_init)->getAsString(Args); + } Opts.HIPUseNewLaunchAPI = Args.hasArg(OPT_fhip_new_launch_api); if (Opts.ObjC) { diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp --- a/clang/lib/Sema/SemaCUDA.cpp +++ b/clang/lib/Sema/SemaCUDA.cpp @@ -492,6 +492,8 @@ const Expr *Init = VD->getInit(); if (VD->hasAttr() || VD->hasAttr() || VD->hasAttr()) { + if (LangOpts.GPUAllowDeviceInit) + return; assert(!VD->isStaticLocal() || VD->hasAttr()); bool AllowedInit = false; if (const CXXConstructExpr *CE = dyn_cast(Init)) diff --git a/clang/test/CodeGenCUDA/device-init-fun.cu b/clang/test/CodeGenCUDA/device-init-fun.cu new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCUDA/device-init-fun.cu @@ -0,0 +1,19 @@ +// REQUIRES: amdgpu-registered-target + +// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -std=c++11 \ +// RUN: -fgpu-allow-device-init -x hip \ +// RUN: -fno-threadsafe-statics -emit-llvm -o - %s \ +// RUN: | FileCheck %s + +#include "Inputs/cuda.h" + +// CHECK: define internal amdgpu_kernel void @_GLOBAL__sub_I_device_init_fun.cu() #[[ATTR:[0-9]*]] +// CHECK: attributes #[[ATTR]] = {{.*}}"device-init" + +__device__ void f(); + +struct A { + __device__ A() { f(); } +}; + +__device__ A a; diff --git a/clang/test/Frontend/warn-device-init-fun.cu b/clang/test/Frontend/warn-device-init-fun.cu new file mode 100644 --- /dev/null +++ b/clang/test/Frontend/warn-device-init-fun.cu @@ -0,0 +1,8 @@ +// REQUIRES: nvptx-registered-target + +// RUN: %clang_cc1 -triple nvptx -fcuda-is-device \ +// RUN: -fgpu-allow-device-init \ +// RUN: %s 2>&1 | FileCheck %s + +// CHECK: warning: '-fgpu-allow-device-init' is ignored since it is only supported for HIP +