Index: include/clang/Basic/Attr.td =================================================================== --- include/clang/Basic/Attr.td +++ include/clang/Basic/Attr.td @@ -581,7 +581,7 @@ def CUDALaunchBounds : InheritableAttr { let Spellings = [GNU<"launch_bounds">]; - let Args = [IntArgument<"MaxThreads">, DefaultIntArgument<"MinBlocks", 0>]; + let Args = [ExprArgument<"MaxThreads">, ExprArgument<"MinBlocks", 1>]; let LangOpts = [CUDA]; let Subjects = SubjectList<[ObjCMethod, FunctionLike], WarnDiag, "ExpectedFunctionOrMethod">; Index: lib/CodeGen/TargetInfo.cpp =================================================================== --- lib/CodeGen/TargetInfo.cpp +++ lib/CodeGen/TargetInfo.cpp @@ -5079,16 +5079,24 @@ } if (FD->hasAttr()) { // Create !{, metadata !"maxntidx", i32 } node - addNVVMMetadata(F, "maxntidx", - FD->getAttr()->getMaxThreads()); - // min blocks is a default argument for CUDALaunchBoundsAttr, so getting a - // zero value from getMinBlocks either means it was not specified in - // __launch_bounds__ or the user specified a 0 value. In both cases, we + CUDALaunchBoundsAttr *Attr = FD->getAttr(); + llvm::APSInt MaxThreads; + if (Attr->getMaxThreads()->EvaluateAsInt(MaxThreads, M.getContext())) + addNVVMMetadata(F, "maxntidx", MaxThreads.getExtValue()); + else + llvm_unreachable("launch_bounds arg 1 evaluation failed."); + + // min blocks is a default argument for CUDALaunchBoundsAttr. If it was + // not specified in __launch_bounds__ or the user specified a 0 value, we // don't have to add a PTX directive. - int MinCTASM = FD->getAttr()->getMinBlocks(); - if (MinCTASM > 0) { - // Create !{, metadata !"minctasm", i32 } node - addNVVMMetadata(F, "minctasm", MinCTASM); + if (Attr->getMinBlocks()) { + llvm::APSInt MinBlocks; + if (Attr->getMinBlocks()->EvaluateAsInt(MinBlocks, M.getContext())) { + if (MinBlocks > 0) + // Create !{, metadata !"minctasm", i32 } node + addNVVMMetadata(F, "minctasm", MinBlocks.getExtValue()); + } else + llvm_unreachable("launch_bounds arg 2 evaluation failed."); } } } Index: lib/Sema/SemaDeclAttr.cpp =================================================================== --- lib/Sema/SemaDeclAttr.cpp +++ lib/Sema/SemaDeclAttr.cpp @@ -3457,20 +3457,38 @@ return false; } +static bool isAcceptableLaunchBoundsArgument(Sema &S, Expr *E) { + return E->getType()->isIntegerType() && + !E->containsUnexpandedParameterPack() && + (E->isInstantiationDependent() || E->isEvaluatable(S.Context)); +} + static void handleLaunchBoundsAttr(Sema &S, Decl *D, const AttributeList &Attr) { - uint32_t MaxThreads, MinBlocks = 0; - if (!checkUInt32Argument(S, Attr, Attr.getArgAsExpr(0), MaxThreads, 1)) + if (!checkAttributeAtLeastNumArgs(S, Attr, 1) || + !checkAttributeAtMostNumArgs(S, Attr, 2)) return; - if (Attr.getNumArgs() > 1 && !checkUInt32Argument(S, Attr, - Attr.getArgAsExpr(1), - MinBlocks, 2)) + + Expr *MaxThreads = Attr.getArgAsExpr(0); + if (!isAcceptableLaunchBoundsArgument(S, MaxThreads)) { + S.Diag(Attr.getLoc(), diag::err_attribute_argument_n_type) + << Attr.getName() << 0 << AANT_ArgumentIntegerConstant + << MaxThreads->getSourceRange(); + return; + } + + Expr *MinBlocks = Attr.getNumArgs() > 1 ? Attr.getArgAsExpr(1) : nullptr; + if (MinBlocks && !isAcceptableLaunchBoundsArgument(S, MinBlocks)) { + S.Diag(Attr.getLoc(), diag::err_attribute_argument_n_type) + << Attr.getName() << 1 << AANT_ArgumentIntegerConstant + << MinBlocks->getSourceRange(); return; + } - D->addAttr(::new (S.Context) - CUDALaunchBoundsAttr(Attr.getRange(), S.Context, - MaxThreads, MinBlocks, - Attr.getAttributeSpellingListIndex())); + D->addAttr(::new (S.Context) CUDALaunchBoundsAttr( + Attr.getRange(), S.Context, MaxThreads, MinBlocks, + Attr.getAttributeSpellingListIndex())); + return; } static void handleArgumentWithTypeTagAttr(Sema &S, Decl *D, Index: lib/Sema/SemaTemplateInstantiateDecl.cpp =================================================================== --- lib/Sema/SemaTemplateInstantiateDecl.cpp +++ lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -202,6 +202,37 @@ New->addAttr(EIA); } +static void instantiateDependentCUDALaunchBoundsAttr( + Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, + const CUDALaunchBoundsAttr *A, Decl *New) { + + // LaunchBounds arguments are constant expressions + EnterExpressionEvaluationContext Unevaluated(S, Sema::ConstantEvaluated); + ExprResult MaxThreadsResult = S.SubstExpr(A->getMaxThreads(), TemplateArgs); + if (MaxThreadsResult.isInvalid()) { + S.Diag(A->getLocation(), diag::err_attribute_argument_n_type) + << A->getSpelling() << 0 << AANT_ArgumentIntegerConstant + << A->getMaxThreads()->getSourceRange(); + return; + } + + Expr *MinBlocksExpr = A->getMinBlocks(); + if (MinBlocksExpr) { + ExprResult MinBlocksResult = S.SubstExpr(A->getMinBlocks(), TemplateArgs); + if (MinBlocksResult.isInvalid()) { + S.Diag(A->getLocation(), diag::err_attribute_argument_n_type) + << A->getSpelling() << 1 << AANT_ArgumentIntegerConstant + << A->getMinBlocks()->getSourceRange(); + return; + } + MinBlocksExpr = MinBlocksResult.getAs(); + } + + New->addAttr(::new (S.Context) CUDALaunchBoundsAttr( + A->getRange(), S.Context, MaxThreadsResult.getAs(), MinBlocksExpr, + A->getSpellingListIndex())); +} + void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs, const Decl *Tmpl, Decl *New, LateInstantiatedAttrVec *LateAttrs, @@ -233,6 +264,13 @@ continue; } + const CUDALaunchBoundsAttr *CUDALaunchBounds = + dyn_cast(TmplAttr); + if (CUDALaunchBounds) { + instantiateDependentCUDALaunchBoundsAttr(*this, TemplateArgs, + CUDALaunchBounds, New); + continue; + } // Existing DLL attribute on the instantiation takes precedence. if (TmplAttr->getKind() == attr::DLLExport || TmplAttr->getKind() == attr::DLLImport) { Index: test/CodeGenCUDA/launch-bounds.cu =================================================================== --- test/CodeGenCUDA/launch-bounds.cu +++ test/CodeGenCUDA/launch-bounds.cu @@ -28,3 +28,23 @@ } // CHECK: !{{[0-9]+}} = !{void ()* @Kernel2, !"maxntidx", i32 256} + +template +__global__ void +__launch_bounds__(max_threads_per_block) +Kernel3() +{ +} + +template void Kernel3(); +// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel3{{.*}}, !"maxntidx", i32 256} + +template +__global__ void +__launch_bounds__(max_threads_per_block, min_blocks_per_mp) +Kernel4() +{ +} +template void Kernel4(); +// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel4{{.*}}, !"maxntidx", i32 256} +// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel4{{.*}}, !"minctasm", i32 2} Index: test/SemaCUDA/launch_bounds.cu =================================================================== --- test/SemaCUDA/launch_bounds.cu +++ test/SemaCUDA/launch_bounds.cu @@ -9,3 +9,9 @@ __launch_bounds__() void Test4(void); // expected-error {{'launch_bounds' attribute takes at least 1 argument}} int Test5 __launch_bounds__(128, 7); // expected-warning {{'launch_bounds' attribute only applies to functions and methods}} + +template __launch_bounds__(a, b) void Test6(void) {} +template void Test6<128,7>(void); + +template __launch_bounds__(a) void Test7(void) {} +template void Test7<128>(void);