Index: clang/include/clang/Basic/Attr.td =================================================================== --- clang/include/clang/Basic/Attr.td +++ clang/include/clang/Basic/Attr.td @@ -1775,8 +1775,9 @@ } def NoInline : DeclOrStmtAttr { - let Spellings = [GCC<"noinline">, CXX11<"clang", "noinline">, - C2x<"clang", "noinline">, Declspec<"noinline">]; + let Spellings = [Keyword<"__noinline__">, GCC<"noinline">, + CXX11<"clang", "noinline">, C2x<"clang", "noinline">, + Declspec<"noinline">]; let Accessors = [Accessor<"isClangNoInline", [CXX11<"clang", "noinline">, C2x<"clang", "noinline">]>]; let Documentation = [NoInlineDocs]; Index: clang/include/clang/Basic/DiagnosticParseKinds.td =================================================================== --- clang/include/clang/Basic/DiagnosticParseKinds.td +++ clang/include/clang/Basic/DiagnosticParseKinds.td @@ -86,6 +86,9 @@ "type nullability specifier %0 is a Clang extension">, InGroup>; +def ext_cuda_noinline_keyword : Extension< + "__noinline__ keyword is a Clang extension for CUDA/HIP">, DefaultIgnore; + def err_empty_enum : Error<"use of empty enum">; def ext_ident_list_in_param : Extension< Index: clang/include/clang/Basic/Features.def =================================================================== --- clang/include/clang/Basic/Features.def +++ clang/include/clang/Basic/Features.def @@ -270,5 +270,8 @@ FEATURE(cxx_abi_relative_vtable, LangOpts.CPlusPlus && LangOpts.RelativeCXXABIVTables) +// CUDA/HIP Extensions +EXTENSION(cuda_noinline_keyword, LangOpts.CUDA) + #undef EXTENSION #undef FEATURE Index: clang/include/clang/Basic/TokenKinds.def =================================================================== --- clang/include/clang/Basic/TokenKinds.def +++ clang/include/clang/Basic/TokenKinds.def @@ -599,6 +599,9 @@ // C++ for OpenCL s2.3.1: addrspace_cast operator KEYWORD(addrspace_cast , KEYOPENCLCXX) +// CUDA/HIP function attributes +KEYWORD(__noinline__ , KEYCUDA) + // OpenMP Type Traits UNARY_EXPR_OR_TYPE_TRAIT(__builtin_omp_required_simd_align, OpenMPRequiredSimdAlign, KEYALL) Index: clang/include/clang/Parse/Parser.h =================================================================== --- clang/include/clang/Parse/Parser.h +++ clang/include/clang/Parse/Parser.h @@ -2824,6 +2824,7 @@ void ParseOpenCLKernelAttributes(ParsedAttributes &attrs); void ParseOpenCLQualifiers(ParsedAttributes &Attrs); void ParseNullabilityTypeSpecifiers(ParsedAttributes &attrs); + void ParseCUDAFunctionAttributes(ParsedAttributes &attrs); VersionTuple ParseVersionTuple(SourceRange &Range); void ParseAvailabilityAttribute(IdentifierInfo &Availability, Index: clang/lib/Basic/IdentifierTable.cpp =================================================================== --- clang/lib/Basic/IdentifierTable.cpp +++ clang/lib/Basic/IdentifierTable.cpp @@ -108,6 +108,7 @@ KEYOPENCLCXX = 0x400000, KEYMSCOMPAT = 0x800000, KEYSYCL = 0x1000000, + KEYCUDA = 0x2000000, KEYALLCXX = KEYCXX | KEYCXX11 | KEYCXX20, KEYALL = (0x1ffffff & ~KEYNOMS18 & ~KEYNOOPENCL) // KEYNOMS18 and KEYNOOPENCL are used to exclude. @@ -158,6 +159,8 @@ return KS_Future; if (LangOpts.isSYCL() && (Flags & KEYSYCL)) return KS_Enabled; + if (LangOpts.CUDA && (Flags & KEYCUDA)) + return KS_Enabled; return KS_Disabled; } Index: clang/lib/Parse/ParseDecl.cpp =================================================================== --- clang/lib/Parse/ParseDecl.cpp +++ clang/lib/Parse/ParseDecl.cpp @@ -897,6 +897,18 @@ } } +void Parser::ParseCUDAFunctionAttributes(ParsedAttributes &attrs) { + while (Tok.is(tok::kw___noinline__)) { + IdentifierInfo *AttrName = Tok.getIdentifierInfo(); + SourceLocation AttrNameLoc = ConsumeToken(); + attrs.addNew(AttrName, AttrNameLoc, nullptr, AttrNameLoc, nullptr, 0, + ParsedAttr::AS_Keyword); + if (getLangOpts().CUDA) { + Diag(Tok, diag::ext_cuda_noinline_keyword); + } + } +} + void Parser::ParseOpenCLQualifiers(ParsedAttributes &Attrs) { IdentifierInfo *AttrName = Tok.getIdentifierInfo(); SourceLocation AttrNameLoc = Tok.getLocation(); @@ -3690,6 +3702,11 @@ ParseOpenCLKernelAttributes(DS.getAttributes()); continue; + // CUDA/HIP single token adornments. + case tok::kw___noinline__: + ParseCUDAFunctionAttributes(DS.getAttributes()); + continue; + // Nullability type specifiers. case tok::kw__Nonnull: case tok::kw__Nullable: Index: clang/test/CodeGenCUDA/noinline.cu =================================================================== --- /dev/null +++ clang/test/CodeGenCUDA/noinline.cu @@ -0,0 +1,30 @@ +// optimization is needed, otherwise by default all functions have noinline. + +// RUN: %clang_cc1 -triple nvptx-nvidia-cuda -fcuda-is-device \ +// RUN: -O2 -emit-llvm -o - %s | FileCheck %s + +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \ +// RUN: -O2 -emit-llvm -o - -x hip %s | FileCheck %s + +// RUN: %clang_cc1 -triple x86_64-unknown-gnu-linux \ +// RUN: -O2 -emit-llvm -o - %s | FileCheck %s + +#include "Inputs/cuda.h" + +__noinline__ __device__ __host__ void fun1() {} + +__attribute__((noinline)) __device__ __host__ void fun2() {} + +__attribute__((__noinline__)) __device__ __host__ void fun3() {} + +[[gnu::__noinline__]] __device__ __host__ void fun4() {} + +__device__ __host__ void fun5() {} + +// CHECK: define{{.*}}@_Z4fun1v{{.*}}#[[ATTR1:[0-9]*]] +// CHECK: define{{.*}}@_Z4fun2v{{.*}}#[[ATTR1:[0-9]*]] +// CHECK: define{{.*}}@_Z4fun3v{{.*}}#[[ATTR1:[0-9]*]] +// CHECK: define{{.*}}@_Z4fun4v{{.*}}#[[ATTR1:[0-9]*]] +// CHECK: define{{.*}}@_Z4fun5v{{.*}}#[[ATTR2:[0-9]*]] +// CHECK: attributes #[[ATTR1]] = {{.*}}noinline +// CHECK-NOT: attributes #[[ATTR2]] = {{.*}}noinline Index: clang/test/Lexer/has_extension.cu =================================================================== --- /dev/null +++ clang/test/Lexer/has_extension.cu @@ -0,0 +1,8 @@ +// RUN: %clang_cc1 -E -triple x86_64-linux-gnu %s -o - | FileCheck %s + +// CHECK: has_noinline_keyword +#if __has_extension(cuda_noinline_keyword) +int has_noinline_keyword(); +#else +int no_noinine_keyword(); +#endif Index: clang/test/SemaCUDA/noinline.cu =================================================================== --- /dev/null +++ clang/test/SemaCUDA/noinline.cu @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 -fsyntax-only -verify=cuda %s +// RUN: %clang_cc1 -fsyntax-only -verify=pedantic -pedantic %s +// RUN: %clang_cc1 -fsyntax-only -verify=cpp -x c++ %s + +// cuda-no-diagnostics + +__noinline__ void fun1() { } // cpp-error {{unknown type name '__noinline__'}} +// pedantic-warning@-1 {{__noinline__ keyword is a Clang extension for CUDA/HIP}} + +__attribute__((noinline)) void fun2() { } +__attribute__((__noinline__)) void fun3() { } +[[gnu::__noinline__]] void fun4() { }