diff --git a/.gitignore b/.gitignore new file mode 100644 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +build.*/ diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1484,14 +1484,14 @@ def AMDGPUFlatWorkGroupSize : InheritableAttr { let Spellings = [Clang<"amdgpu_flat_work_group_size", 0>]; - let Args = [UnsignedArgument<"Min">, UnsignedArgument<"Max">]; + let Args = [ExprArgument<"Min">, ExprArgument<"Max">]; let Documentation = [AMDGPUFlatWorkGroupSizeDocs]; let Subjects = SubjectList<[Function], ErrorDiag, "kernel functions">; } def AMDGPUWavesPerEU : InheritableAttr { let Spellings = [Clang<"amdgpu_waves_per_eu", 0>]; - let Args = [UnsignedArgument<"Min">, UnsignedArgument<"Max", 1>]; + let Args = [ExprArgument<"Min">, ExprArgument<"Max", 1>]; let Documentation = [AMDGPUWavesPerEUDocs]; let Subjects = SubjectList<[Function], ErrorDiag, "kernel functions">; } diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -1048,6 +1048,10 @@ EK_Decltype, EK_TemplateArgument, EK_Other } ExprContext; + /// If we are checking arguments of a template, this is the template + /// under check. + TemplateDecl *Template; + ExpressionEvaluationContextRecord(ExpressionEvaluationContext Context, unsigned NumCleanupObjects, CleanupInfo ParentCleanup, @@ -1056,7 +1060,7 @@ : Context(Context), ParentCleanup(ParentCleanup), NumCleanupObjects(NumCleanupObjects), NumTypos(0), ManglingContextDecl(ManglingContextDecl), MangleNumbering(), - ExprContext(ExprContext) {} + ExprContext(ExprContext), Template(nullptr) {} /// Retrieve the mangling numbering context, used to consistently /// number constructs like lambdas for mangling. @@ -6537,10 +6541,12 @@ bool CheckTemplateArgument(TemplateTypeParmDecl *Param, TypeSourceInfo *Arg); - ExprResult CheckTemplateArgument(NonTypeTemplateParmDecl *Param, - QualType InstantiatedParamType, Expr *Arg, - TemplateArgument &Converted, - CheckTemplateArgumentKind CTAK = CTAK_Specified); + ExprResult + CheckTemplateArgument(NonTypeTemplateParmDecl *Param, + QualType InstantiatedParamType, Expr *Arg, + TemplateArgument &Converted, + CheckTemplateArgumentKind CTAK = CTAK_Specified, + TemplateDecl *Template = nullptr); bool CheckTemplateTemplateArgument(TemplateParameterList *Params, TemplateArgumentLoc &Arg); @@ -8659,6 +8665,16 @@ void AddXConsumedAttr(Decl *D, SourceRange SR, unsigned SpellingIndex, RetainOwnershipKind K, bool IsTemplateInstantiation); + /// addAMDGPUFlatWorkGroupSizeAttr - Adds an amdgpu_flat_work_group_size + /// attribute to a particular declaration. + void addAMDGPUFlatWorkGroupSizeAttr(SourceRange AttrRange, Decl *D, Expr *Min, + Expr *Max, unsigned SpellingListIndex); + + /// addAMDGPUWavePersEUAttr - Adds an amdgpu_waves_per_eu attribute to a + /// particular declaration. + void addAMDGPUWavesPerEUAttr(SourceRange AttrRange, Decl *D, Expr *Min, + Expr *Max, unsigned SpellingListIndex); + bool checkNSReturnsRetainedReturnType(SourceLocation loc, QualType type); //===--------------------------------------------------------------------===// diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -7797,8 +7797,16 @@ const auto *FlatWGS = FD->getAttr(); if (ReqdWGS || FlatWGS) { - unsigned Min = FlatWGS ? FlatWGS->getMin() : 0; - unsigned Max = FlatWGS ? FlatWGS->getMax() : 0; + unsigned Min = 0; + unsigned Max = 0; + if (FlatWGS) { + Min = FlatWGS->getMin() + ->EvaluateKnownConstInt(M.getContext()) + .getExtValue(); + Max = FlatWGS->getMax() + ->EvaluateKnownConstInt(M.getContext()) + .getExtValue(); + } if (ReqdWGS && Min == 0 && Max == 0) Min = Max = ReqdWGS->getXDim() * ReqdWGS->getYDim() * ReqdWGS->getZDim(); @@ -7812,8 +7820,12 @@ } if (const auto *Attr = FD->getAttr()) { - unsigned Min = Attr->getMin(); - unsigned Max = Attr->getMax(); + unsigned Min = + Attr->getMin()->EvaluateKnownConstInt(M.getContext()).getExtValue(); + unsigned Max = Attr->getMax() ? Attr->getMax() + ->EvaluateKnownConstInt(M.getContext()) + .getExtValue() + : 0; if (Min != 0) { assert((Max == 0 || Min <= Max) && "Min must be less than or equal Max"); diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp --- a/clang/lib/Sema/SemaCUDA.cpp +++ b/clang/lib/Sema/SemaCUDA.cpp @@ -675,9 +675,22 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) { assert(getLangOpts().CUDA && "Should only be called during CUDA compilation"); assert(Callee && "Callee may not be null."); + + auto &ExprEvalCtx = ExprEvalContexts.back(); + if (ExprEvalCtx.isUnevaluated()) + return true; + + FunctionDecl *Caller = nullptr; + if (auto *Template = ExprEvalContexts.back().Template) { + if (auto *FD = dyn_cast(Template->getTemplatedDecl())) + Caller = FD; + } else if (ExprEvalCtx.isConstantEvaluated()) + return true; + // FIXME: Is bailing out early correct here? Should we instead assume that // the caller is a global initializer? - FunctionDecl *Caller = dyn_cast(CurContext); + if (!Caller) + Caller = dyn_cast(CurContext); if (!Caller) return true; diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -245,11 +245,11 @@ !Expr->isIntegerConstantExpr(I, S.Context)) { if (Idx != UINT_MAX) S.Diag(getAttrLoc(AI), diag::err_attribute_argument_n_type) - << AI << Idx << AANT_ArgumentIntegerConstant + << &AI << Idx << AANT_ArgumentIntegerConstant << Expr->getSourceRange(); else S.Diag(getAttrLoc(AI), diag::err_attribute_argument_type) - << AI << AANT_ArgumentIntegerConstant << Expr->getSourceRange(); + << &AI << AANT_ArgumentIntegerConstant << Expr->getSourceRange(); return false; } @@ -261,7 +261,7 @@ if (StrictlyUnsigned && I.isSigned() && I.isNegative()) { S.Diag(getAttrLoc(AI), diag::err_attribute_requires_positive_integer) - << AI << /*non-negative*/ 1; + << &AI << /*non-negative*/ 1; return false; } @@ -5853,57 +5853,115 @@ } } -static void handleAMDGPUFlatWorkGroupSizeAttr(Sema &S, Decl *D, - const ParsedAttr &AL) { +static bool +checkAMDGPUFlatWorkGroupSizeArguments(Sema &S, Expr *MinExpr, Expr *MaxExpr, + const AMDGPUFlatWorkGroupSizeAttr &Attr) { + // Accept template arguments for now as they depend on something else. + // We'll get to check them when they eventually get instantiated. + if (MinExpr->isValueDependent() || MaxExpr->isValueDependent()) + return false; + uint32_t Min = 0; - Expr *MinExpr = AL.getArgAsExpr(0); - if (!checkUInt32Argument(S, AL, MinExpr, Min)) - return; + if (!checkUInt32Argument(S, Attr, MinExpr, Min, 0)) + return true; uint32_t Max = 0; - Expr *MaxExpr = AL.getArgAsExpr(1); - if (!checkUInt32Argument(S, AL, MaxExpr, Max)) - return; + if (!checkUInt32Argument(S, Attr, MaxExpr, Max, 1)) + return true; if (Min == 0 && Max != 0) { - S.Diag(AL.getLoc(), diag::err_attribute_argument_invalid) << AL << 0; - return; + S.Diag(Attr.getLocation(), diag::err_attribute_argument_invalid) + << &Attr << 0; + return true; } if (Min > Max) { - S.Diag(AL.getLoc(), diag::err_attribute_argument_invalid) << AL << 1; - return; + S.Diag(Attr.getLocation(), diag::err_attribute_argument_invalid) + << &Attr << 1; + return true; } - D->addAttr(::new (S.Context) - AMDGPUFlatWorkGroupSizeAttr(AL.getLoc(), S.Context, Min, Max, - AL.getAttributeSpellingListIndex())); + return false; } -static void handleAMDGPUWavesPerEUAttr(Sema &S, Decl *D, const ParsedAttr &AL) { - uint32_t Min = 0; - Expr *MinExpr = AL.getArgAsExpr(0); - if (!checkUInt32Argument(S, AL, MinExpr, Min)) +void Sema::addAMDGPUFlatWorkGroupSizeAttr(SourceRange AttrRange, Decl *D, + Expr *MinExpr, Expr *MaxExpr, + unsigned SpellingListIndex) { + AMDGPUFlatWorkGroupSizeAttr TmpAttr(AttrRange, Context, MinExpr, MaxExpr, + SpellingListIndex); + + if (checkAMDGPUFlatWorkGroupSizeArguments(*this, MinExpr, MaxExpr, TmpAttr)) return; + D->addAttr(::new (Context) AMDGPUFlatWorkGroupSizeAttr( + AttrRange, Context, MinExpr, MaxExpr, SpellingListIndex)); +} + +static void handleAMDGPUFlatWorkGroupSizeAttr(Sema &S, Decl *D, + const ParsedAttr &AL) { + Expr *MinExpr = AL.getArgAsExpr(0); + Expr *MaxExpr = AL.getArgAsExpr(1); + + S.addAMDGPUFlatWorkGroupSizeAttr(AL.getRange(), D, MinExpr, MaxExpr, + AL.getAttributeSpellingListIndex()); +} + +static bool checkAMDGPUWavesPerEUArguments(Sema &S, Expr *MinExpr, + Expr *MaxExpr, + const AMDGPUWavesPerEUAttr &Attr) { + if (S.DiagnoseUnexpandedParameterPack(MinExpr) || + (MaxExpr && S.DiagnoseUnexpandedParameterPack(MaxExpr))) + return true; + + // Accept template arguments for now as they depend on something else. + // We'll get to check them when they eventually get instantiated. + if (MinExpr->isValueDependent() || (MaxExpr && MaxExpr->isValueDependent())) + return false; + + uint32_t Min = 0; + if (!checkUInt32Argument(S, Attr, MinExpr, Min, 0)) + return true; + uint32_t Max = 0; - if (AL.getNumArgs() == 2) { - Expr *MaxExpr = AL.getArgAsExpr(1); - if (!checkUInt32Argument(S, AL, MaxExpr, Max)) - return; - } + if (MaxExpr && !checkUInt32Argument(S, Attr, MaxExpr, Max, 1)) + return true; if (Min == 0 && Max != 0) { - S.Diag(AL.getLoc(), diag::err_attribute_argument_invalid) << AL << 0; - return; + S.Diag(Attr.getLocation(), diag::err_attribute_argument_invalid) + << &Attr << 0; + return true; } if (Max != 0 && Min > Max) { - S.Diag(AL.getLoc(), diag::err_attribute_argument_invalid) << AL << 1; - return; + S.Diag(Attr.getLocation(), diag::err_attribute_argument_invalid) + << &Attr << 1; + return true; } - D->addAttr(::new (S.Context) - AMDGPUWavesPerEUAttr(AL.getLoc(), S.Context, Min, Max, - AL.getAttributeSpellingListIndex())); + return false; +} + +void Sema::addAMDGPUWavesPerEUAttr(SourceRange AttrRange, Decl *D, + Expr *MinExpr, Expr *MaxExpr, + unsigned SpellingListIndex) { + AMDGPUWavesPerEUAttr TmpAttr(AttrRange, Context, MinExpr, MaxExpr, + SpellingListIndex); + + if (checkAMDGPUWavesPerEUArguments(*this, MinExpr, MaxExpr, TmpAttr)) + return; + + D->addAttr(::new (Context) AMDGPUWavesPerEUAttr(AttrRange, Context, MinExpr, + MaxExpr, SpellingListIndex)); +} + +static void handleAMDGPUWavesPerEUAttr(Sema &S, Decl *D, const ParsedAttr &AL) { + if (!checkAttributeAtLeastNumArgs(S, AL, 1) || + !checkAttributeAtMostNumArgs(S, AL, 2)) + return; + + Expr *MinExpr = AL.getArgAsExpr(0); + Expr *MaxExpr = (AL.getNumArgs() > 1) ? AL.getArgAsExpr(1) : nullptr; + + S.addAMDGPUWavesPerEUAttr(AL.getRange(), D, MinExpr, MaxExpr, + AL.getAttributeSpellingListIndex()); } static void handleAMDGPUNumSGPRAttr(Sema &S, Decl *D, const ParsedAttr &AL) { diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -4555,6 +4555,7 @@ EnterExpressionEvaluationContext ConstantEvaluated( SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated); + SemaRef.ExprEvalContexts.back().Template = Template; return SemaRef.SubstExpr(Param->getDefaultArgument(), TemplateArgLists); } @@ -4805,8 +4806,8 @@ TemplateArgument Result; unsigned CurSFINAEErrors = NumSFINAEErrors; ExprResult Res = - CheckTemplateArgument(NTTP, NTTPType, Arg.getArgument().getAsExpr(), - Result, CTAK); + CheckTemplateArgument(NTTP, NTTPType, Arg.getArgument().getAsExpr(), + Result, CTAK, dyn_cast(Template)); if (Res.isInvalid()) return true; // If the current template argument causes an error, give up now. @@ -6175,6 +6176,22 @@ return true; } +namespace { +FunctionDecl *GetFunctionDecl(Expr *Arg) { + Expr *E = Arg; + if (UnaryOperator *UO = dyn_cast(E)) { + E = UO ? UO->getSubExpr() : nullptr; + } + if (DeclRefExpr *DRE = dyn_cast_or_null(E)) { + ValueDecl *Entity = DRE ? DRE->getDecl() : nullptr; + if (Entity) { + if (auto Callee = dyn_cast(Entity)) + return Callee; + } + } + return nullptr; +} +} // namespace /// Check a template argument against its corresponding /// non-type template parameter. /// @@ -6185,7 +6202,8 @@ ExprResult Sema::CheckTemplateArgument(NonTypeTemplateParmDecl *Param, QualType ParamType, Expr *Arg, TemplateArgument &Converted, - CheckTemplateArgumentKind CTAK) { + CheckTemplateArgumentKind CTAK, + TemplateDecl *Template) { SourceLocation StartLoc = Arg->getBeginLoc(); // If the parameter type somehow involves auto, deduce the type now. @@ -6272,6 +6290,7 @@ // a constant-evaluated context. EnterExpressionEvaluationContext ConstantEvaluated( *this, Sema::ExpressionEvaluationContext::ConstantEvaluated); + ExprEvalContexts.back().Template = Template; if (getLangOpts().CPlusPlus17) { // C++17 [temp.arg.nontype]p1: @@ -6592,6 +6611,10 @@ return ExprError(); } + if (auto *FD = GetFunctionDecl(Arg)) + if (getLangOpts().CUDA && !CheckCUDACall(Arg->getBeginLoc(), FD)) + return ExprError(); + if (!ParamType->isMemberPointerType()) { if (CheckTemplateArgumentAddressOfObjectOrFunction(*this, Param, ParamType, diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -344,6 +344,51 @@ Attr.getRange()); } +static void instantiateDependentAMDGPUFlatWorkGroupSizeAttr( + Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, + const AMDGPUFlatWorkGroupSizeAttr &Attr, Decl *New) { + // Both min and max expression are constant expressions. + EnterExpressionEvaluationContext Unevaluated( + S, Sema::ExpressionEvaluationContext::ConstantEvaluated); + + ExprResult Result = S.SubstExpr(Attr.getMin(), TemplateArgs); + if (Result.isInvalid()) + return; + Expr *MinExpr = Result.getAs(); + + Result = S.SubstExpr(Attr.getMax(), TemplateArgs); + if (Result.isInvalid()) + return; + Expr *MaxExpr = Result.getAs(); + + S.addAMDGPUFlatWorkGroupSizeAttr(Attr.getLocation(), New, MinExpr, MaxExpr, + Attr.getSpellingListIndex()); +} + +static void instantiateDependentAMDGPUWavesPerEUAttr( + Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, + const AMDGPUWavesPerEUAttr &Attr, Decl *New) { + // Both min and max expression are constant expressions. + EnterExpressionEvaluationContext Unevaluated( + S, Sema::ExpressionEvaluationContext::ConstantEvaluated); + + ExprResult Result = S.SubstExpr(Attr.getMin(), TemplateArgs); + if (Result.isInvalid()) + return; + Expr *MinExpr = Result.getAs(); + + Expr *MaxExpr = nullptr; + if (auto Max = Attr.getMax()) { + Result = S.SubstExpr(Max, TemplateArgs); + if (Result.isInvalid()) + return; + MaxExpr = Result.getAs(); + } + + S.addAMDGPUWavesPerEUAttr(Attr.getLocation(), New, MinExpr, MaxExpr, + Attr.getSpellingListIndex()); +} + void Sema::InstantiateAttrsForDecl( const MultiLevelTemplateArgumentList &TemplateArgs, const Decl *Tmpl, Decl *New, LateInstantiatedAttrVec *LateAttrs, @@ -437,6 +482,18 @@ continue; } + if (const AMDGPUFlatWorkGroupSizeAttr *AMDGPUFlatWorkGroupSize = + dyn_cast(TmplAttr)) { + instantiateDependentAMDGPUFlatWorkGroupSizeAttr( + *this, TemplateArgs, *AMDGPUFlatWorkGroupSize, New); + } + + if (const AMDGPUWavesPerEUAttr *AMDGPUFlatWorkGroupSize = + dyn_cast(TmplAttr)) { + instantiateDependentAMDGPUWavesPerEUAttr(*this, TemplateArgs, + *AMDGPUFlatWorkGroupSize, New); + } + // Existing DLL attribute on the instantiation takes precedence. if (TmplAttr->getKind() == attr::DLLExport || TmplAttr->getKind() == attr::DLLImport) { diff --git a/clang/test/SemaCUDA/amdgpu-attrs.cu b/clang/test/SemaCUDA/amdgpu-attrs.cu --- a/clang/test/SemaCUDA/amdgpu-attrs.cu +++ b/clang/test/SemaCUDA/amdgpu-attrs.cu @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsyntax-only -verify %s +// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify %s #include "Inputs/cuda.h" @@ -78,3 +78,119 @@ // expected-error@+2{{attribute 'intel_reqd_sub_group_size' can only be applied to an OpenCL kernel function}} __attribute__((intel_reqd_sub_group_size(64))) __global__ void intel_reqd_sub_group_size_64() {} + +// expected-error@+1{{'amdgpu_flat_work_group_size' attribute requires parameter 0 to be an integer constant}} +__attribute__((amdgpu_flat_work_group_size("32", 64))) +__global__ void non_int_min_flat_work_group_size_32_64() {} +// expected-error@+1{{'amdgpu_flat_work_group_size' attribute requires parameter 1 to be an integer constant}} +__attribute__((amdgpu_flat_work_group_size(32, "64"))) +__global__ void non_int_max_flat_work_group_size_32_64() {} + +int nc_min = 32, nc_max = 64; +// expected-error@+1{{'amdgpu_flat_work_group_size' attribute requires parameter 0 to be an integer constant}} +__attribute__((amdgpu_flat_work_group_size(nc_min, 64))) +__global__ void non_cint_min_flat_work_group_size_32_64() {} +// expected-error@+1{{'amdgpu_flat_work_group_size' attribute requires parameter 1 to be an integer constant}} +__attribute__((amdgpu_flat_work_group_size(32, nc_max))) +__global__ void non_cint_max_flat_work_group_size_32_64() {} + +const int c_min = 16, c_max = 32; +__attribute__((amdgpu_flat_work_group_size(c_min * 2, 64))) +__global__ void cint_min_flat_work_group_size_32_64() {} +__attribute__((amdgpu_flat_work_group_size(32, c_max * 2))) +__global__ void cint_max_flat_work_group_size_32_64() {} + +// expected-error@+3{{'T' does not refer to a value}} +// expected-note@+1{{declared here}} +template +__attribute__((amdgpu_flat_work_group_size(T, 64))) +__global__ void template_class_min_flat_work_group_size_32_64() {} +// expected-error@+3{{'T' does not refer to a value}} +// expected-note@+1{{declared here}} +template +__attribute__((amdgpu_flat_work_group_size(32, T))) +__global__ void template_class_max_flat_work_group_size_32_64() {} + +template +__attribute__((amdgpu_flat_work_group_size(a, b))) +__global__ void template_flat_work_group_size_32_64() {} +template __global__ void template_flat_work_group_size_32_64<32, 64>(); + +template +__attribute__((amdgpu_flat_work_group_size(a + b, b + c))) +__global__ void template_complex_flat_work_group_size_32_64() {} +template __global__ void template_complex_flat_work_group_size_32_64<16, 16, 48>(); + +unsigned ipow2(unsigned n) { return n == 0 ? 1 : 2 * ipow2(n - 1); } +constexpr unsigned ce_ipow2(unsigned n) { return n == 0 ? 1 : 2 * ce_ipow2(n - 1); } + +__attribute__((amdgpu_flat_work_group_size(ce_ipow2(5), ce_ipow2(6)))) +__global__ void cexpr_flat_work_group_size_32_64() {} +// expected-error@+1{{'amdgpu_flat_work_group_size' attribute requires parameter 0 to be an integer constant}} +__attribute__((amdgpu_flat_work_group_size(ipow2(5), 64))) +__global__ void non_cexpr_min_flat_work_group_size_32_64() {} +// expected-error@+1{{'amdgpu_flat_work_group_size' attribute requires parameter 1 to be an integer constant}} +__attribute__((amdgpu_flat_work_group_size(32, ipow2(6)))) +__global__ void non_cexpr_max_flat_work_group_size_32_64() {} + +// expected-error@+1{{'amdgpu_waves_per_eu' attribute requires parameter 0 to be an integer constant}} +__attribute__((amdgpu_waves_per_eu("2"))) +__global__ void non_int_min_waves_per_eu_2() {} +// expected-error@+1{{'amdgpu_waves_per_eu' attribute requires parameter 1 to be an integer constant}} +__attribute__((amdgpu_waves_per_eu(2, "4"))) +__global__ void non_int_max_waves_per_eu_2_4() {} + +// expected-error@+1{{'amdgpu_waves_per_eu' attribute requires parameter 0 to be an integer constant}} +__attribute__((amdgpu_waves_per_eu(nc_min))) +__global__ void non_cint_min_waves_per_eu_2() {} +// expected-error@+1{{'amdgpu_waves_per_eu' attribute requires parameter 1 to be an integer constant}} +__attribute__((amdgpu_waves_per_eu(2, nc_max))) +__global__ void non_cint_min_waves_per_eu_2_4() {} + +__attribute__((amdgpu_waves_per_eu(c_min / 8))) +__global__ void cint_min_waves_per_eu_2() {} +__attribute__((amdgpu_waves_per_eu(c_min / 8, c_max / 8))) +__global__ void cint_min_waves_per_eu_2_4() {} + +// expected-error@+3{{'T' does not refer to a value}} +// expected-note@+1{{declared here}} +template +__attribute__((amdgpu_waves_per_eu(T))) +__global__ void cint_min_waves_per_eu_2() {} +// expected-error@+3{{'T' does not refer to a value}} +// expected-note@+1{{declared here}} +template +__attribute__((amdgpu_waves_per_eu(2, T))) +__global__ void cint_min_waves_per_eu_2_4() {} + +template +__attribute__((amdgpu_waves_per_eu(a))) +__global__ void template_waves_per_eu_2() {} +template __global__ void template_waves_per_eu_2<2>(); + +template +__attribute__((amdgpu_waves_per_eu(a, b))) +__global__ void template_waves_per_eu_2_4() {} +template __global__ void template_waves_per_eu_2_4<2, 4>(); + +template +__attribute__((amdgpu_waves_per_eu(a + b, c - b))) +__global__ void template_complex_waves_per_eu_2_4() {} +template __global__ void template_complex_waves_per_eu_2_4<1, 1, 5>(); + +// expected-error@+2{{expression contains unexpanded parameter pack 'Args'}} +template +__attribute__((amdgpu_waves_per_eu(Args))) +__global__ void template_waves_per_eu_2() {} +template __global__ void template_waves_per_eu_2<2, 4>(); + +__attribute__((amdgpu_waves_per_eu(ce_ipow2(1)))) +__global__ void cexpr_waves_per_eu_2() {} +__attribute__((amdgpu_waves_per_eu(ce_ipow2(1), ce_ipow2(2)))) +__global__ void cexpr_waves_per_eu_2_4() {} +// expected-error@+1{{'amdgpu_waves_per_eu' attribute requires parameter 0 to be an integer constant}} +__attribute__((amdgpu_waves_per_eu(ipow2(1)))) +__global__ void non_cexpr_waves_per_eu_2() {} +// expected-error@+1{{'amdgpu_waves_per_eu' attribute requires parameter 1 to be an integer constant}} +__attribute__((amdgpu_waves_per_eu(2, ipow2(2)))) +__global__ void non_cexpr_waves_per_eu_2_4() {} diff --git a/clang/test/SemaCUDA/kernel-template-with-func-arg.cu b/clang/test/SemaCUDA/kernel-template-with-func-arg.cu new file mode 100644 --- /dev/null +++ b/clang/test/SemaCUDA/kernel-template-with-func-arg.cu @@ -0,0 +1,57 @@ +// RUN: %clang_cc1 -fsyntax-only -verify %s + +#include "Inputs/cuda.h" + +struct C { + __device__ void devfun() {} + void hostfun() {} + template __device__ void devtempfun() {} + __device__ __host__ void devhostfun() {} +}; + +__device__ void devfun() {} +__host__ void hostfun() {} +template __device__ void devtempfun() {} +__device__ __host__ void devhostfun() {} + +template __global__ void kernel() { devF();} +template __global__ void kernel2(T *p) { (p->*devF)(); } + +template<> __global__ void kernel(); +template<> __global__ void kernel(); // expected-error {{no function template matches function template specialization 'kernel'}} + // expected-note@-5 {{candidate template ignored: invalid explicitly-specified argument for template parameter 'devF'}} +template<> __global__ void kernel >(); +template<> __global__ void kernel(); + +template<> __global__ void kernel<&devfun>(); +template<> __global__ void kernel<&hostfun>(); // expected-error {{no function template matches function template specialization 'kernel'}} + // expected-note@-11 {{candidate template ignored: invalid explicitly-specified argument for template parameter 'devF'}} +template<> __global__ void kernel<&devtempfun >(); +template<> __global__ void kernel<&devhostfun>(); + +template<> __global__ void kernel2(C *p); +template<> __global__ void kernel2(C *p); // expected-error {{no function template matches function template specialization 'kernel2'}} + // expected-note@-16 {{candidate template ignored: invalid explicitly-specified argument for template parameter 'devF'}} +template<> __global__ void kernel2 >(C *p); +template<> __global__ void kernel2(C *p); + +void fun() { + kernel<&devfun><<<1,1>>>(); + kernel<&hostfun><<<1,1>>>(); // expected-error {{no matching function for call to 'kernel'}} + // expected-note@-24 {{candidate template ignored: invalid explicitly-specified argument for template parameter 'devF'}} + kernel<&devtempfun ><<<1,1>>>(); + kernel<&devhostfun><<<1,1>>>(); + + kernel<<<1,1>>>(); + kernel<<<1,1>>>(); // expected-error {{no matching function for call to 'kernel'}} + // expected-note@-30 {{candidate template ignored: invalid explicitly-specified argument for template parameter 'devF'}} + kernel ><<<1,1>>>(); + kernel<<<1,1>>>(); + + C a; + kernel2<<<1,1>>>(&a); + kernel2<<<1,1>>>(&a); // expected-error {{no matching function for call to 'kernel2'}} + // expected-note@-36 {{candidate template ignored: invalid explicitly-specified argument for template parameter 'devF'}} + kernel2 ><<<1,1>>>(&a); + kernel2<<<1,1>>>(&a); +} diff --git a/clang/test/SemaOpenCL/amdgpu-attrs.cl b/clang/test/SemaOpenCL/amdgpu-attrs.cl --- a/clang/test/SemaOpenCL/amdgpu-attrs.cl +++ b/clang/test/SemaOpenCL/amdgpu-attrs.cl @@ -27,12 +27,12 @@ __attribute__((amdgpu_num_sgpr(32))) void func_num_sgpr_32() {} // expected-error {{'amdgpu_num_sgpr' attribute only applies to kernel functions}} __attribute__((amdgpu_num_vgpr(64))) void func_num_vgpr_64() {} // expected-error {{'amdgpu_num_vgpr' attribute only applies to kernel functions}} -__attribute__((amdgpu_flat_work_group_size("ABC", "ABC"))) kernel void kernel_flat_work_group_size_ABC_ABC() {} // expected-error {{'amdgpu_flat_work_group_size' attribute requires an integer constant}} -__attribute__((amdgpu_flat_work_group_size(32, "ABC"))) kernel void kernel_flat_work_group_size_32_ABC() {} // expected-error {{'amdgpu_flat_work_group_size' attribute requires an integer constant}} -__attribute__((amdgpu_flat_work_group_size("ABC", 64))) kernel void kernel_flat_work_group_size_ABC_64() {} // expected-error {{'amdgpu_flat_work_group_size' attribute requires an integer constant}} -__attribute__((amdgpu_waves_per_eu("ABC"))) kernel void kernel_waves_per_eu_ABC() {} // expected-error {{'amdgpu_waves_per_eu' attribute requires an integer constant}} -__attribute__((amdgpu_waves_per_eu(2, "ABC"))) kernel void kernel_waves_per_eu_2_ABC() {} // expected-error {{'amdgpu_waves_per_eu' attribute requires an integer constant}} -__attribute__((amdgpu_waves_per_eu("ABC", 4))) kernel void kernel_waves_per_eu_ABC_4() {} // expected-error {{'amdgpu_waves_per_eu' attribute requires an integer constant}} +__attribute__((amdgpu_flat_work_group_size("ABC", "ABC"))) kernel void kernel_flat_work_group_size_ABC_ABC() {} // expected-error {{'amdgpu_flat_work_group_size' attribute requires parameter 0 to be an integer constant}} +__attribute__((amdgpu_flat_work_group_size(32, "ABC"))) kernel void kernel_flat_work_group_size_32_ABC() {} // expected-error {{'amdgpu_flat_work_group_size' attribute requires parameter 1 to be an integer constant}} +__attribute__((amdgpu_flat_work_group_size("ABC", 64))) kernel void kernel_flat_work_group_size_ABC_64() {} // expected-error {{'amdgpu_flat_work_group_size' attribute requires parameter 0 to be an integer constant}} +__attribute__((amdgpu_waves_per_eu("ABC"))) kernel void kernel_waves_per_eu_ABC() {} // expected-error {{'amdgpu_waves_per_eu' attribute requires parameter 0 to be an integer constant}} +__attribute__((amdgpu_waves_per_eu(2, "ABC"))) kernel void kernel_waves_per_eu_2_ABC() {} // expected-error {{'amdgpu_waves_per_eu' attribute requires parameter 1 to be an integer constant}} +__attribute__((amdgpu_waves_per_eu("ABC", 4))) kernel void kernel_waves_per_eu_ABC_4() {} // expected-error {{'amdgpu_waves_per_eu' attribute requires parameter 0 to be an integer constant}} __attribute__((amdgpu_num_sgpr("ABC"))) kernel void kernel_num_sgpr_ABC() {} // expected-error {{'amdgpu_num_sgpr' attribute requires an integer constant}} __attribute__((amdgpu_num_vgpr("ABC"))) kernel void kernel_num_vgpr_ABC() {} // expected-error {{'amdgpu_num_vgpr' attribute requires an integer constant}} diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -398,8 +398,9 @@ const TargetRegisterClass *OpRC = TLI->isTypeLegal(OpVT) ? TLI->getRegClassFor(OpVT) : nullptr; const TargetRegisterClass *IIRC = - II ? TRI->getAllocatableClass(TII->getRegClass(*II, IIOpNum, TRI, *MF)) - : nullptr; + II ? TII->getRegClass(*II, IIOpNum, TRI, *MF) : nullptr; + assert(!II || IIOpNum < II->getNumOperands() || !IIRC); + IIRC = TRI->getAllocatableClass(IIRC); if (OpRC && IIRC && OpRC != IIRC && TargetRegisterInfo::isVirtualRegister(VReg)) {