Index: include/clang/Basic/Attr.td =================================================================== --- include/clang/Basic/Attr.td +++ include/clang/Basic/Attr.td @@ -581,7 +581,7 @@ def CUDALaunchBounds : InheritableAttr { let Spellings = [GNU<"launch_bounds">]; - let Args = [IntArgument<"MaxThreads">, DefaultIntArgument<"MinBlocks", 0>]; + let Args = [ExprArgument<"MaxThreads">, ExprArgument<"MinBlocks", 1>]; let LangOpts = [CUDA]; let Subjects = SubjectList<[ObjCMethod, FunctionLike], WarnDiag, "ExpectedFunctionOrMethod">; Index: include/clang/Basic/DiagnosticSemaKinds.td =================================================================== --- include/clang/Basic/DiagnosticSemaKinds.td +++ include/clang/Basic/DiagnosticSemaKinds.td @@ -2103,6 +2103,9 @@ "use 'isEqual:' instead">; def err_attribute_argument_is_zero : Error< "%0 attribute must be greater than 0">; +def warn_cuda_launch_bounds_argument_n_is_negative : Warning< + "%0 attribute parameter %1 is negative which may result in kernel launch failure">, + InGroup; def err_property_function_in_objc_container : Error< "use of Objective-C property in function nested in Objective-C " "container not supported, move function outside its container">; Index: lib/CodeGen/TargetInfo.cpp =================================================================== --- lib/CodeGen/TargetInfo.cpp +++ lib/CodeGen/TargetInfo.cpp @@ -5077,18 +5077,26 @@ // Create !{, metadata !"kernel", i32 1} node addNVVMMetadata(F, "kernel", 1); } - if (FD->hasAttr()) { + if (CUDALaunchBoundsAttr *Attr = FD->getAttr()) { // Create !{, metadata !"maxntidx", i32 } node - addNVVMMetadata(F, "maxntidx", - FD->getAttr()->getMaxThreads()); - // min blocks is a default argument for CUDALaunchBoundsAttr, so getting a - // zero value from getMinBlocks either means it was not specified in - // __launch_bounds__ or the user specified a 0 value. In both cases, we - // don't have to add a PTX directive. - int MinCTASM = FD->getAttr()->getMinBlocks(); - if (MinCTASM > 0) { - // Create !{, metadata !"minctasm", i32 } node - addNVVMMetadata(F, "minctasm", MinCTASM); + llvm::APSInt MaxThreads; + if (Attr->getMaxThreads()->EvaluateAsInt(MaxThreads, M.getContext())) { + if (MaxThreads > 0) + addNVVMMetadata(F, "maxntidx", MaxThreads.getExtValue()); + } else + llvm_unreachable("launch_bounds arg 1 evaluation failed."); + + // min blocks is an optional argument for CUDALaunchBoundsAttr. If it was + // not specified in __launch_bounds__ or if the user specified a 0 value, + // we don't have to add a PTX directive. + if (Attr->getMinBlocks()) { + llvm::APSInt MinBlocks; + if (Attr->getMinBlocks()->EvaluateAsInt(MinBlocks, M.getContext())) { + if (MinBlocks > 0) + // Create !{, metadata !"minctasm", i32 } node + addNVVMMetadata(F, "minctasm", MinBlocks.getExtValue()); + } else + llvm_unreachable("launch_bounds arg 2 evaluation failed."); } } } Index: lib/Sema/SemaDeclAttr.cpp =================================================================== --- lib/Sema/SemaDeclAttr.cpp +++ lib/Sema/SemaDeclAttr.cpp @@ -208,12 +208,11 @@ /// \brief If Expr is a valid integer constant, get the value of the integer /// expression and return success or failure. May output an error. -static bool checkUInt32Argument(Sema &S, const AttributeList &Attr, - const Expr *Expr, uint32_t &Val, - unsigned Idx = UINT_MAX) { - llvm::APSInt I(32); +static bool checkIntArgument(Sema &S, const AttributeList &Attr, + const Expr *Expr, llvm::APSInt &Value, + unsigned Idx = UINT_MAX) { if (Expr->isTypeDependent() || Expr->isValueDependent() || - !Expr->isIntegerConstantExpr(I, S.Context)) { + !Expr->isIntegerConstantExpr(Value, S.Context)) { if (Idx != UINT_MAX) S.Diag(Attr.getLoc(), diag::err_attribute_argument_n_type) << Attr.getName() << Idx << AANT_ArgumentIntegerConstant @@ -224,7 +223,19 @@ << Expr->getSourceRange(); return false; } + return true; +} +/// \brief If Expr is a valid integer constant, get unsigned 32-bit value of the +/// integer expression and return success or failure. May output an error. +static bool checkUInt32Argument(Sema &S, const AttributeList &Attr, + const Expr *Expr, uint32_t &Val, + unsigned Idx = UINT_MAX) { + llvm::APSInt I(32); + if (!checkIntArgument(S, Attr, Expr, I, Idx)) + return false; + + // Make sure we can fit it in 32 bits. if (!I.isIntN(32)) { S.Diag(Expr->getExprLoc(), diag::err_ice_too_large) << I.toString(10, false) << 32 << /* Unsigned */ 1; @@ -3457,20 +3468,50 @@ return false; } +static bool isAcceptableLaunchBoundsArgument(Sema &S, const AttributeList &Attr, + Expr *E, unsigned Idx) { + // If expression does not depend on template instantiation, check its + // evaluated vaule and report any issues. + if (!E->isInstantiationDependent()) { + llvm::APSInt Value; + if (!checkIntArgument(S, Attr, E, Value, Idx)) + return false; + if (Value < 0) + S.Diag(Attr.getLoc(), + diag::warn_cuda_launch_bounds_argument_n_is_negative) + << Attr.getName() << Idx << AANT_ArgumentIntegerConstant + << E->getSourceRange(); + return true; + } + + // For template arguments only check if it's an integer, and do the final + // checks during template instantiation. We also can't deal with variadic + // template arguments. + if (!E->getType()->isIntegerType() || E->containsUnexpandedParameterPack()) { + S.Diag(Attr.getLoc(), diag::err_attribute_argument_n_type) + << Attr.getName() << Idx << AANT_ArgumentIntegerConstant + << E->getSourceRange(); + } + return true; +} + static void handleLaunchBoundsAttr(Sema &S, Decl *D, const AttributeList &Attr) { - uint32_t MaxThreads, MinBlocks = 0; - if (!checkUInt32Argument(S, Attr, Attr.getArgAsExpr(0), MaxThreads, 1)) + if (!checkAttributeAtLeastNumArgs(S, Attr, 1) || + !checkAttributeAtMostNumArgs(S, Attr, 2)) return; - if (Attr.getNumArgs() > 1 && !checkUInt32Argument(S, Attr, - Attr.getArgAsExpr(1), - MinBlocks, 2)) + + Expr *MaxThreads = Attr.getArgAsExpr(0); + if (!isAcceptableLaunchBoundsArgument(S, Attr, MaxThreads, 0)) + return; + + Expr *MinBlocks = Attr.getNumArgs() > 1 ? Attr.getArgAsExpr(1) : nullptr; + if (MinBlocks && !isAcceptableLaunchBoundsArgument(S, Attr, MinBlocks, 1)) return; - D->addAttr(::new (S.Context) - CUDALaunchBoundsAttr(Attr.getRange(), S.Context, - MaxThreads, MinBlocks, - Attr.getAttributeSpellingListIndex())); + D->addAttr(::new (S.Context) CUDALaunchBoundsAttr( + Attr.getRange(), S.Context, MaxThreads, MinBlocks, + Attr.getAttributeSpellingListIndex())); } static void handleArgumentWithTypeTagAttr(Sema &S, Decl *D, Index: lib/Sema/SemaTemplateInstantiateDecl.cpp =================================================================== --- lib/Sema/SemaTemplateInstantiateDecl.cpp +++ lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -202,6 +202,66 @@ New->addAttr(EIA); } +/// Get a substitute expression for TemplateArgExpr(). Check whether it is a +/// valid integer constant, make sure it can be evaluated and warn if the value +/// is negative. Returns integer constant Expr on success, nullptr otherwise. +/// May output an error. +static Expr * +getLaunchBoundsArgument(Sema &S, + const MultiLevelTemplateArgumentList &TemplateArgs, + const Attr &Attr, Expr *TemplateArgExpr, unsigned Idx) { + if (!TemplateArgExpr) + return nullptr; + + // LaunchBounds arguments are constant expressions + EnterExpressionEvaluationContext Unevaluated(S, Sema::ConstantEvaluated); + + ExprResult ExprResult = S.SubstExpr(TemplateArgExpr, TemplateArgs); + if (ExprResult.isInvalid()) { + S.Diag(Attr.getLocation(), diag::err_attribute_argument_n_type) + << &Attr << Idx << AANT_ArgumentIntegerConstant + << TemplateArgExpr->getSourceRange(); + return nullptr; + } + Expr *E = ExprResult.getAs(); + llvm::APSInt Value; + if (E->isTypeDependent() || E->isValueDependent() || + !E->isIntegerConstantExpr(Value, S.Context)) { + S.Diag(Attr.getLocation(), diag::err_attribute_argument_n_type) + << &Attr << Idx << AANT_ArgumentIntegerConstant << E->getSourceRange(); + return nullptr; + } + if (Value < 0) + S.Diag(Attr.getLocation(), + diag::warn_cuda_launch_bounds_argument_n_is_negative) + << &Attr << Idx << AANT_ArgumentIntegerConstant << E->getSourceRange(); + return E; +} + +// Constructs and adds to New a new instance of CUDALaunchBoundsAttr using +// template A as the base and arguments from TemplateArgs. +static void instantiateDependentCUDALaunchBoundsAttr( + Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, + const CUDALaunchBoundsAttr &A, Decl *New) { + + Expr *MaxThreadsExpr = + getLaunchBoundsArgument(S, TemplateArgs, A, A.getMaxThreads(), 0); + if (!MaxThreadsExpr) + return; + + Expr *MinBlocksExpr = nullptr; + if (A.getMinBlocks()) { + MinBlocksExpr = + getLaunchBoundsArgument(S, TemplateArgs, A, A.getMinBlocks(), 1); + if (!MinBlocksExpr) + return; + } + + New->addAttr(::new (S.Context) CUDALaunchBoundsAttr( + A.getRange(), S.Context, MaxThreadsExpr, MinBlocksExpr, + A.getSpellingListIndex())); +} + void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs, const Decl *Tmpl, Decl *New, LateInstantiatedAttrVec *LateAttrs, @@ -233,6 +293,12 @@ continue; } + if (const CUDALaunchBoundsAttr *CUDALaunchBounds = + dyn_cast(TmplAttr)) { + instantiateDependentCUDALaunchBoundsAttr(*this, TemplateArgs, + *CUDALaunchBounds, New); + continue; + } // Existing DLL attribute on the instantiation takes precedence. if (TmplAttr->getKind() == attr::DLLExport || TmplAttr->getKind() == attr::DLLImport) { Index: test/CodeGenCUDA/launch-bounds.cu =================================================================== --- test/CodeGenCUDA/launch-bounds.cu +++ test/CodeGenCUDA/launch-bounds.cu @@ -28,3 +28,47 @@ } // CHECK: !{{[0-9]+}} = !{void ()* @Kernel2, !"maxntidx", i32 256} + +template +__global__ void +__launch_bounds__(max_threads_per_block) +Kernel3() +{ +} + +template void Kernel3(); +// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel3{{.*}}, !"maxntidx", i32 256} + +template +__global__ void +__launch_bounds__(max_threads_per_block, min_blocks_per_mp) +Kernel4() +{ +} +template void Kernel4(); + +// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel4{{.*}}, !"maxntidx", i32 256} +// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel4{{.*}}, !"minctasm", i32 2} + +const int constint = 100; +template +__global__ void +__launch_bounds__(max_threads_per_block + constint, + min_blocks_per_mp + max_threads_per_block) +Kernel5() +{ +} +template void Kernel5(); + +// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel5{{.*}}, !"maxntidx", i32 356} +// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel5{{.*}}, !"minctasm", i32 258} + +// Make sure we don't emit negative launch bounds values. +__global__ void +__launch_bounds__( -MAX_THREADS_PER_BLOCK, -MIN_BLOCKS_PER_MP ) +Kernel6() +{ +} +// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel6{{.*}}, !"kernel", i32 1} +// CHECK-NOT: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel5{{.*}}, !"maxntidx", +// CHECK-NOT: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel6{{.*}}, !"minctasm", Index: test/SemaCUDA/launch_bounds.cu =================================================================== --- test/SemaCUDA/launch_bounds.cu +++ test/SemaCUDA/launch_bounds.cu @@ -2,10 +2,31 @@ #include "Inputs/cuda.h" -__launch_bounds__(128, 7) void Test1(void); -__launch_bounds__(128) void Test2(void); +__launch_bounds__(128, 7) void Test2Args(void); +__launch_bounds__(128) void Test1Arg(void); -__launch_bounds__(1, 2, 3) void Test3(void); // expected-error {{'launch_bounds' attribute takes no more than 2 arguments}} -__launch_bounds__() void Test4(void); // expected-error {{'launch_bounds' attribute takes at least 1 argument}} +__launch_bounds__(-128, 7) void TestNegArg1(void); // expected-warning {{'launch_bounds' attribute parameter 0 is negative which may result in kernel launch failure}} +__launch_bounds__(128, -7) void TestNegArg2(void); // expected-warning {{'launch_bounds' attribute parameter 1 is negative which may result in kernel launch failure}} -int Test5 __launch_bounds__(128, 7); // expected-warning {{'launch_bounds' attribute only applies to functions and methods}} +__launch_bounds__(1, 2, 3) void Test3Args(void); // expected-error {{'launch_bounds' attribute takes no more than 2 arguments}} +__launch_bounds__() void TestNoArgs(void); // expected-error {{'launch_bounds' attribute takes at least 1 argument}} + +int TestNoFunction __launch_bounds__(128, 7); // expected-warning {{'launch_bounds' attribute only applies to functions and methods}} + +__launch_bounds__(true) void TestBool(void); +__launch_bounds__(128.0) void TestFP(void); // expected-error {{'launch_bounds' attribute requires parameter 0 to be an integer constant}} +__launch_bounds__((void*)0) void TestNullptr(void); // expected-error {{'launch_bounds' attribute requires parameter 0 to be an integer constant}} +int nonconstint = 256; +__launch_bounds__(nonconstint) void TestNonConstInt(void); // expected-error {{'launch_bounds' attribute requires parameter 0 to be an integer constant}} +const int constint = 512; +__launch_bounds__(constint) void TestConstInt(void); + +template __launch_bounds__(a, b) void TestTmpl2Args(void) {} +template void TestTmpl2Args<128,7>(void); + +template __launch_bounds__(a) void TestTmpl1Arg(void) {} +template void TestTmpl1Arg<128>(void); + +template +__launch_bounds__(a + b, c + constint) void TestTmplExpr(void) {} +template void TestTmplExpr<128, 3, 7>(void);