Index: include/clang/Basic/Attr.td =================================================================== --- include/clang/Basic/Attr.td +++ include/clang/Basic/Attr.td @@ -1050,24 +1050,37 @@ // // FIXME: This provides a sub-optimal error message if you attempt to // use this in CUDA, since CUDA does not use the same terminology. -def AMDGPUNumVGPR : InheritableAttr { - let Spellings = [GNU<"amdgpu_num_vgpr">]; - let Args = [UnsignedArgument<"NumVGPR">]; - let Documentation = [AMDGPUNumVGPRDocs]; - +// // FIXME: This should be for OpenCLKernelFunction, but is not to // workaround needing to see kernel attribute before others to know if // this should be rejected on non-kernels. - let Subjects = SubjectList<[Function], ErrorDiag, - "ExpectedKernelFunction">; + +def AMDGPUFlatWorkGroupSize : InheritableAttr { + let Spellings = [GNU<"amdgpu_flat_work_group_size">]; + let Args = [UnsignedArgument<"Min">, UnsignedArgument<"Max">]; + let Documentation = [AMDGPUFlatWorkGroupSizeDocs]; + let Subjects = SubjectList<[Function], ErrorDiag, "ExpectedKernelFunction">; +} + +def AMDGPUWavesPerEU : InheritableAttr { + let Spellings = [GNU<"amdgpu_waves_per_eu">]; + let Args = [UnsignedArgument<"Min">, VariadicUnsignedArgument<"Max">]; + let Documentation = [AMDGPUWavesPerEUDocs]; + let Subjects = SubjectList<[Function], ErrorDiag, "ExpectedKernelFunction">; } def AMDGPUNumSGPR : InheritableAttr { let Spellings = [GNU<"amdgpu_num_sgpr">]; let Args = [UnsignedArgument<"NumSGPR">]; - let Documentation = [AMDGPUNumSGPRDocs]; - let Subjects = SubjectList<[Function], ErrorDiag, - "ExpectedKernelFunction">; + let Documentation = [AMDGPUNumSGPRNumVGPRDocs]; + let Subjects = SubjectList<[Function], ErrorDiag, "ExpectedKernelFunction">; +} + +def AMDGPUNumVGPR : InheritableAttr { + let Spellings = [GNU<"amdgpu_num_vgpr">]; + let Args = [UnsignedArgument<"NumVGPR">]; + let Documentation = [AMDGPUNumSGPRNumVGPRDocs]; + let Subjects = SubjectList<[Function], ErrorDiag, "ExpectedKernelFunction">; } def NoSplitStack : InheritableAttr { Index: include/clang/Basic/AttrDocs.td =================================================================== --- include/clang/Basic/AttrDocs.td +++ include/clang/Basic/AttrDocs.td @@ -889,12 +889,12 @@ enumerator, a non-static data member, or a label. .. code-block: c++ - #include - - [[maybe_unused]] void f([[maybe_unused]] bool thing1, - [[maybe_unused]] bool thing2) { - [[maybe_unused]] bool b = thing1 && thing2; - assert(b); + #include + + [[maybe_unused]] void f([[maybe_unused]] bool thing1, + [[maybe_unused]] bool thing2) { + [[maybe_unused]] bool b = thing1 && thing2; + assert(b); } }]; } @@ -911,15 +911,15 @@ `void`. .. code-block: c++ - struct [[nodiscard]] error_info { /*...*/ }; - error_info enable_missile_safety_mode(); - - void launch_missiles(); - void test_missiles() { - enable_missile_safety_mode(); // diagnoses - launch_missiles(); - } - error_info &foo(); + struct [[nodiscard]] error_info { /*...*/ }; + error_info enable_missile_safety_mode(); + + void launch_missiles(); + void test_missiles() { + enable_missile_safety_mode(); // diagnoses + launch_missiles(); + } + error_info &foo(); void f() { foo(); } // Does not diagnose, error_info is a reference. }]; } @@ -1076,64 +1076,108 @@ }]; } -def DocCatAMDGPURegisterAttributes : - DocumentationCategory<"AMD GPU Register Attributes"> { - let Content = [{ -Clang supports attributes for controlling register usage on AMD GPU -targets. These attributes may be attached to a kernel function -definition and is an optimization hint to the backend for the maximum -number of registers to use. This is useful in cases where register -limited occupancy is known to be an important factor for the -performance for the kernel. - -The semantics are as follows: - -- The backend will attempt to limit the number of used registers to - the specified value, but the exact number used is not - guaranteed. The number used may be rounded up to satisfy the - allocation requirements or ABI constraints of the subtarget. For - example, on Southern Islands VGPRs may only be allocated in - increments of 4, so requesting a limit of 39 VGPRs will really - attempt to use up to 40. Requesting more registers than the - subtarget supports will truncate to the maximum allowed. The backend - may also use fewer registers than requested whenever possible. - -- 0 implies the default no limit on register usage. - -- Ignored on older VLIW subtargets which did not have separate scalar - and vector registers, R600 through Northern Islands. - -}]; -} - - -def AMDGPUNumVGPRDocs : Documentation { - let Category = DocCatAMDGPURegisterAttributes; - let Content = [{ -Clang supports the -``__attribute__((amdgpu_num_vgpr()))`` attribute on AMD -Southern Islands GPUs and later for controlling the number of vector -registers. A typical value would be between 4 and 256 in increments -of 4. -}]; -} +def DocCatAMDGPUAttributes : DocumentationCategory<"AMD GPU Attributes">; -def AMDGPUNumSGPRDocs : Documentation { - let Category = DocCatAMDGPURegisterAttributes; +def AMDGPUFlatWorkGroupSizeDocs : Documentation { + let Category = DocCatAMDGPUAttributes; let Content = [{ +The flat work-group size is the number of work-items in the work-group size +specified when the kernel is dispatched. It is the product of the sizes of the +x, y, and z dimension of the work-group. Clang supports the -``__attribute__((amdgpu_num_sgpr()))`` attribute on AMD -Southern Islands GPUs and later for controlling the number of scalar -registers. A typical value would be between 8 and 104 in increments of -8. - -Due to common instruction constraints, an additional 2-4 SGPRs are -typically required for internal use depending on features used. This -value is a hint for the total number of SGPRs to use, and not the -number of user SGPRs, so no special consideration needs to be given -for these. -}]; +``__attribute__((amdgpu_flat_work_group_size(, )))`` attribute for the +AMDGPU target. This attribute may be attached to a kernel function definition +and is an optimization hint. + +```` parameter specifies the minimum flat work-group size (must be greater +than or equal to 1), and ```` parameter specifies the maximum flat +work-group size (must be less than ````) to which all dispatches of the +kernel will conform. + +If specified, the AMDGPU target backend might be able to produce better machine +code for barriers and perform scratch promotion by estimating available group +segment size. + +An error will be given if: + - Specified values violate subtarget specifications; + - Specified values are not compatible with values provided through other + attributes. + }]; +} + +def AMDGPUWavesPerEUDocs : Documentation { + let Category = DocCatAMDGPUAttributes; + let Content = [{ +A compute unit (CU) is responsible for executing the wavefronts of a work-group. +It is composed of one or more execution units (EU), which are responsible for +executing the wavefronts. An EU can have enough resources to maintain the state +of more than one executing wavefront. This allows an EU to hide latency by +switching between wavefronts in a similar way to symmetric multithreading on a +CPU. In order to allow the state for multiple wavefronts to fit on an EU, the +resources used by a single wavefront have to be limited. For example, the number +of SGPRs and VGPRs. Limiting such resources can allow greater latency hiding, +but can result in having to spill some register state to memory. + +Clang supports the ``__attribute__((amdgpu_waves_per_eu([, ])))`` +attribute for the AMDGPU target. This attribute may be attached to a kernel +function definition and is an optimization hint. + +```` parameter specifies the requested minimum number of waves per EU (must +be greater than or equal to 1), and *optional* ```` parameter specifies the +requested maximum number of waves per EU. If ```` is omitted, then there is +no restriction on the maximum number of waves per EU other than the one dictated +by the hardware for which the kernel is compiled. + +If specified, this attribute allows an advanced developer to tune the number of +wavefronts that are capable of fitting within the resources of an EU. The AMDGPU +target backend can use this information to limit resources, such as number of +SGPRs, number of VGPRs, size of available group and private memory segments, in +such a way that guarantees that at least ```` wavefronts and at most +```` wavefronts are able to fit within the resources of an EU. Requesting +more wavefronts can hide memory latency but limits available registers which +can result in spilling. Requesting fewer wavefronts can help reduce cache +thrashing, but can reduce memory latency hiding. + +This attribute controls the machine code generated by the AMDGPU target backend +to ensure it is capable of meeting the requested values. However, when the +kernel is executed there may be other reasons that prevent meeting the request, +for example, there may be wavefronts from other kernels executing on the EU. + +The error will be given if: + - Specified values violate subtarget specifications; + - Specified values are not compatible with values provided through other + attributes; + - The AMDGPU target backend is unable to create machine code that can meet the + request. + }]; +} + +def AMDGPUNumSGPRNumVGPRDocs : Documentation { + let Category = DocCatAMDGPUAttributes; + let Content = [{ +Clang supports the ``__attribute__((amdgpu_num_sgpr()))`` and +``__attribute__((amdgpu_num_vgpr()))`` attributes for the AMDGPU +target. These attributes may be attached to a kernel function definition and are +an optimization hint. + +If these attributes are specified, then the AMDGPU target backend will attempt +to limit the number of SGPRs and/or VGPRs used to the specified value(s). The +number of used SGPRs and/or VGPRs may further be rounded up to satisfy the +allocation requirements or constraints of the subtarget. + +These attributes can be used to test the AMDGPU target backend. It is +recommended that the ``amdgpu_waves_per_eu`` attribute be used to control +resources such as SGPRs and VGPRs since it is aware of the limits for different +subtargets. + +An error will be given if: + - Specified values violate subtarget specifications; + - Specified values are not compatible with values provided through other + attributes; + - The AMDGPU target backend is unable to create machine code that can meet the + request. + }]; } def DocCatCallingConvs : DocumentationCategory<"Calling Conventions"> { Index: include/clang/Basic/DiagnosticSemaKinds.td =================================================================== --- include/clang/Basic/DiagnosticSemaKinds.td +++ include/clang/Basic/DiagnosticSemaKinds.td @@ -2381,6 +2381,8 @@ "'%0' parameter must have pointer%select{| to unqualified pointer}1 type; " "type here is %2">; +def err_attribute_argument_invalid : Error< + "%0 attribute parameter is invalid: %1">; def err_attribute_argument_is_zero : Error< "%0 attribute must be greater than 0">; def warn_attribute_argument_n_negative : Warning< Index: lib/CodeGen/TargetInfo.cpp =================================================================== --- lib/CodeGen/TargetInfo.cpp +++ lib/CodeGen/TargetInfo.cpp @@ -6946,25 +6946,57 @@ static void appendOpenCLVersionMD (CodeGen::CodeGenModule &CGM); void AMDGPUTargetCodeGenInfo::setTargetAttributes( - const Decl *D, - llvm::GlobalValue *GV, - CodeGen::CodeGenModule &M) const { + const Decl *D, + llvm::GlobalValue *GV, + CodeGen::CodeGenModule &M) const { const FunctionDecl *FD = dyn_cast_or_null(D); if (!FD) return; - if (const auto Attr = FD->getAttr()) { - llvm::Function *F = cast(GV); - uint32_t NumVGPR = Attr->getNumVGPR(); - if (NumVGPR != 0) - F->addFnAttr("amdgpu_num_vgpr", llvm::utostr(NumVGPR)); + llvm::Function *F = cast(GV); + + if (const auto Attr = FD->getAttr()) { + unsigned Min = Attr->getMin(); + unsigned Max = Attr->getMax(); + + if (Min != 0) { + assert(Min <= Max && "Min must be less than or equal Max"); + + std::string AttrVal = llvm::utostr(Min) + "," + llvm::utostr(Max); + F->addFnAttr("amdgpu-flat-work-group-size", AttrVal); + } else { + assert(Max == 0 && "Max must be zero"); + } + } + + if (const auto Attr = FD->getAttr()) { + unsigned Min = Attr->getMin(); + unsigned Max = *Attr->max_begin(); + + if (Min != 0) { + assert((Max == 0 || Min <= Max) && "Min must be less than or equal Max"); + + std::string AttrVal = llvm::utostr(Min); + if (Max != 0) + AttrVal = AttrVal + "," + llvm::utostr(Max); + F->addFnAttr("amdgpu-waves-per-eu", AttrVal); + } else { + assert(Max == 0 && "Max must be zero"); + } } if (const auto Attr = FD->getAttr()) { - llvm::Function *F = cast(GV); unsigned NumSGPR = Attr->getNumSGPR(); + if (NumSGPR != 0) - F->addFnAttr("amdgpu_num_sgpr", llvm::utostr(NumSGPR)); + F->addFnAttr("amdgpu-num-sgpr", llvm::utostr(NumSGPR)); + } + + if (const auto Attr = FD->getAttr()) { + uint32_t NumVGPR = Attr->getNumVGPR(); + + if (NumVGPR != 0) + F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR)); } appendOpenCLVersionMD(M); Index: lib/Sema/SemaDeclAttr.cpp =================================================================== --- lib/Sema/SemaDeclAttr.cpp +++ lib/Sema/SemaDeclAttr.cpp @@ -4941,29 +4941,94 @@ } } -static void handleAMDGPUNumVGPRAttr(Sema &S, Decl *D, +static void handleAMDGPUFlatWorkGroupSizeAttr(Sema &S, Decl *D, + const AttributeList &Attr) { + uint32_t Min = 0; + Expr *MinExpr = static_cast(Attr.getArgAsExpr(0)); + if (!checkUInt32Argument(S, Attr, MinExpr, Min)) + return; + + uint32_t Max = 0; + Expr *MaxExpr = static_cast(Attr.getArgAsExpr(1)); + if (!checkUInt32Argument(S, Attr, MaxExpr, Max)) + return; + + if (Min == 0 && Max != 0) { + S.Diag(Attr.getLoc(), diag::err_attribute_argument_invalid) + << Attr.getName() + << "maximum flat work-group size must be zero since minimum flat " + "work-group size is zero"; + return; + } + if (Min > Max) { + S.Diag(Attr.getLoc(), diag::err_attribute_argument_invalid) + << Attr.getName() + << "minimum flat work-group size must not be greater than maximum flat " + "work-group size"; + return; + } + + D->addAttr(::new (S.Context) + AMDGPUFlatWorkGroupSizeAttr(Attr.getLoc(), S.Context, Min, Max, + Attr.getAttributeSpellingListIndex())); +} + +static void handleAMDGPUWavesPerEUAttr(Sema &S, Decl *D, + const AttributeList &Attr) { + uint32_t Min = 0; + Expr *MinExpr = static_cast(Attr.getArgAsExpr(0)); + if (!checkUInt32Argument(S, Attr, MinExpr, Min)) + return; + + uint32_t Max = 0; + if (Attr.getNumArgs() == 2) { + Expr *MaxExpr = static_cast(Attr.getArgAsExpr(1)); + if (!checkUInt32Argument(S, Attr, MaxExpr, Max)) + return; + } + + if (Min == 0 && Max != 0) { + S.Diag(Attr.getLoc(), diag::err_attribute_argument_invalid) + << Attr.getName() + << "maximum number of waves per execution unit must be zero since " + "minimum number of waves per execution unit is zero"; + return; + } + if (Max != 0 && Min > Max) { + S.Diag(Attr.getLoc(), diag::err_attribute_argument_invalid) + << Attr.getName() + << "minimum number of waves per execution unit must not be greater than " + "maximum number of waves per execution unit"; + return; + } + + D->addAttr(::new (S.Context) + AMDGPUWavesPerEUAttr(Attr.getLoc(), S.Context, + Min, &Max, sizeof(Max), + Attr.getAttributeSpellingListIndex())); +} + +static void handleAMDGPUNumSGPRAttr(Sema &S, Decl *D, const AttributeList &Attr) { - uint32_t NumRegs; - Expr *NumRegsExpr = static_cast(Attr.getArgAsExpr(0)); - if (!checkUInt32Argument(S, Attr, NumRegsExpr, NumRegs)) + uint32_t NumSGPR = 0; + Expr *NumSGPRExpr = static_cast(Attr.getArgAsExpr(0)); + if (!checkUInt32Argument(S, Attr, NumSGPRExpr, NumSGPR)) return; D->addAttr(::new (S.Context) - AMDGPUNumVGPRAttr(Attr.getLoc(), S.Context, - NumRegs, + AMDGPUNumSGPRAttr(Attr.getLoc(), S.Context, NumSGPR, Attr.getAttributeSpellingListIndex())); } -static void handleAMDGPUNumSGPRAttr(Sema &S, Decl *D, +static void handleAMDGPUNumVGPRAttr(Sema &S, Decl *D, const AttributeList &Attr) { - uint32_t NumRegs; - Expr *NumRegsExpr = static_cast(Attr.getArgAsExpr(0)); - if (!checkUInt32Argument(S, Attr, NumRegsExpr, NumRegs)) + uint32_t NumVGPR = 0; + Expr *NumVGPRExpr = static_cast(Attr.getArgAsExpr(0)); + if (!checkUInt32Argument(S, Attr, NumVGPRExpr, NumVGPR)) return; D->addAttr(::new (S.Context) - AMDGPUNumSGPRAttr(Attr.getLoc(), S.Context, - NumRegs, + AMDGPUNumVGPRAttr(Attr.getLoc(), S.Context, NumVGPR, Attr.getAttributeSpellingListIndex())); } @@ -5417,12 +5482,18 @@ case AttributeList::AT_NoMips16: handleSimpleAttribute(S, D, Attr); break; - case AttributeList::AT_AMDGPUNumVGPR: - handleAMDGPUNumVGPRAttr(S, D, Attr); + case AttributeList::AT_AMDGPUFlatWorkGroupSize: + handleAMDGPUFlatWorkGroupSizeAttr(S, D, Attr); + break; + case AttributeList::AT_AMDGPUWavesPerEU: + handleAMDGPUWavesPerEUAttr(S, D, Attr); break; case AttributeList::AT_AMDGPUNumSGPR: handleAMDGPUNumSGPRAttr(S, D, Attr); break; + case AttributeList::AT_AMDGPUNumVGPR: + handleAMDGPUNumVGPRAttr(S, D, Attr); + break; case AttributeList::AT_IBAction: handleSimpleAttribute(S, D, Attr); break; @@ -5974,7 +6045,11 @@ } else if (Attr *A = D->getAttr()) { Diag(D->getLocation(), diag::err_opencl_kernel_attr) << A; D->setInvalidDecl(); - } else if (Attr *A = D->getAttr()) { + } else if (Attr *A = D->getAttr()) { + Diag(D->getLocation(), diag::err_attribute_wrong_decl_type) + << A << ExpectedKernelFunction; + D->setInvalidDecl(); + } else if (Attr *A = D->getAttr()) { Diag(D->getLocation(), diag::err_attribute_wrong_decl_type) << A << ExpectedKernelFunction; D->setInvalidDecl(); @@ -5982,6 +6057,10 @@ Diag(D->getLocation(), diag::err_attribute_wrong_decl_type) << A << ExpectedKernelFunction; D->setInvalidDecl(); + } else if (Attr *A = D->getAttr()) { + Diag(D->getLocation(), diag::err_attribute_wrong_decl_type) + << A << ExpectedKernelFunction; + D->setInvalidDecl(); } } } Index: test/CodeGenOpenCL/amdgpu-attrs.cl =================================================================== --- test/CodeGenOpenCL/amdgpu-attrs.cl +++ test/CodeGenOpenCL/amdgpu-attrs.cl @@ -0,0 +1,166 @@ +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu tahiti -O0 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86 %s + +__attribute__((amdgpu_flat_work_group_size(0, 0))) // expected-no-diagnostics +kernel void flat_work_group_size_0_0() {} +__attribute__((amdgpu_waves_per_eu(0))) // expected-no-diagnostics +kernel void waves_per_eu_0() {} +__attribute__((amdgpu_waves_per_eu(0, 0))) // expected-no-diagnostics +kernel void waves_per_eu_0_0() {} +__attribute__((amdgpu_num_sgpr(0))) // expected-no-diagnostics +kernel void num_sgpr0() {} +__attribute__((amdgpu_num_vgpr(0))) // expected-no-diagnostics +kernel void num_vgpr0() {} + +__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0))) // expected-no-diagnostics +kernel void flat_work_group_size_0_0_waves_per_eu_0() {} +__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0, 0))) // expected-no-diagnostics +kernel void flat_work_group_size_0_0_waves_per_eu_0_0() {} +__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_num_sgpr(0))) // expected-no-diagnostics +kernel void flat_work_group_size_0_0_num_sgpr_0() {} +__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_num_vgpr(0))) // expected-no-diagnostics +kernel void flat_work_group_size_0_0_num_vgpr_0() {} +__attribute__((amdgpu_waves_per_eu(0), amdgpu_num_sgpr(0))) // expected-no-diagnostics +kernel void waves_per_eu_0_num_sgpr_0() {} +__attribute__((amdgpu_waves_per_eu(0), amdgpu_num_vgpr(0))) // expected-no-diagnostics +kernel void waves_per_eu_0_num_vgpr_0() {} +__attribute__((amdgpu_waves_per_eu(0, 0), amdgpu_num_sgpr(0))) // expected-no-diagnostics +kernel void waves_per_eu_0_0_num_sgpr_0() {} +__attribute__((amdgpu_waves_per_eu(0, 0), amdgpu_num_vgpr(0))) // expected-no-diagnostics +kernel void waves_per_eu_0_0_num_vgpr_0() {} +__attribute__((amdgpu_num_sgpr(0), amdgpu_num_vgpr(0))) // expected-no-diagnostics +kernel void num_sgpr_0_num_vgpr_0() {} + +__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0), amdgpu_num_sgpr(0))) // expected-no-diagnostics +kernel void flat_work_group_size_0_0_waves_per_eu_0_num_sgpr_0() {} +__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0), amdgpu_num_vgpr(0))) // expected-no-diagnostics +kernel void flat_work_group_size_0_0_waves_per_eu_0_num_vgpr_0() {} +__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0, 0), amdgpu_num_sgpr(0))) // expected-no-diagnostics +kernel void flat_work_group_size_0_0_waves_per_eu_0_0_num_sgpr_0() {} +__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0, 0), amdgpu_num_vgpr(0))) // expected-no-diagnostics +kernel void flat_work_group_size_0_0_waves_per_eu_0_0_num_vgpr_0() {} + +__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0), amdgpu_num_sgpr(0), amdgpu_num_vgpr(0))) // expected-no-diagnostics +kernel void flat_work_group_size_0_0_waves_per_eu_0_num_sgpr_0_num_vgpr_0() {} +__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0, 0), amdgpu_num_sgpr(0), amdgpu_num_vgpr(0))) // expected-no-diagnostics +kernel void flat_work_group_size_0_0_waves_per_eu_0_0_num_sgpr_0_num_vgpr_0() {} + +__attribute__((amdgpu_flat_work_group_size(32, 64))) // expected-no-diagnostics +kernel void flat_work_group_size_32_64() { +// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64() [[FLAT_WORK_GROUP_SIZE_32_64:#[0-9]+]] +} +__attribute__((amdgpu_waves_per_eu(2))) // expected-no-diagnostics +kernel void waves_per_eu_2() { +// CHECK: define amdgpu_kernel void @waves_per_eu_2() [[WAVES_PER_EU_2:#[0-9]+]] +} +__attribute__((amdgpu_waves_per_eu(2, 4))) // expected-no-diagnostics +kernel void waves_per_eu_2_4() { +// CHECK: define amdgpu_kernel void @waves_per_eu_2_4() [[WAVES_PER_EU_2_4:#[0-9]+]] +} +__attribute__((amdgpu_num_sgpr(32))) // expected-no-diagnostics +kernel void num_sgpr_32() { +// CHECK: define amdgpu_kernel void @num_sgpr_32() [[NUM_SGPR_32:#[0-9]+]] +} +__attribute__((amdgpu_num_vgpr(64))) // expected-no-diagnostics +kernel void num_vgpr_64() { +// CHECK: define amdgpu_kernel void @num_vgpr_64() [[NUM_VGPR_64:#[0-9]+]] +} + +__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2))) // expected-no-diagnostics +kernel void flat_work_group_size_32_64_waves_per_eu_2() { +// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2:#[0-9]+]] +} +__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4))) // expected-no-diagnostics +kernel void flat_work_group_size_32_64_waves_per_eu_2_4() { +// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_4() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4:#[0-9]+]] +} +__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_num_sgpr(32))) // expected-no-diagnostics +kernel void flat_work_group_size_32_64_num_sgpr_32() { +// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_num_sgpr_32() [[FLAT_WORK_GROUP_SIZE_32_64_NUM_SGPR_32:#[0-9]+]] +} +__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_num_vgpr(64))) // expected-no-diagnostics +kernel void flat_work_group_size_32_64_num_vgpr_64() { +// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_num_vgpr_64() [[FLAT_WORK_GROUP_SIZE_32_64_NUM_VGPR_64:#[0-9]+]] +} +__attribute__((amdgpu_waves_per_eu(2), amdgpu_num_sgpr(32))) // expected-no-diagnostics +kernel void waves_per_eu_2_num_sgpr_32() { +// CHECK: define amdgpu_kernel void @waves_per_eu_2_num_sgpr_32() [[WAVES_PER_EU_2_NUM_SGPR_32:#[0-9]+]] +} +__attribute__((amdgpu_waves_per_eu(2), amdgpu_num_vgpr(64))) // expected-no-diagnostics +kernel void waves_per_eu_2_num_vgpr_64() { +// CHECK: define amdgpu_kernel void @waves_per_eu_2_num_vgpr_64() [[WAVES_PER_EU_2_NUM_VGPR_64:#[0-9]+]] +} +__attribute__((amdgpu_waves_per_eu(2, 4), amdgpu_num_sgpr(32))) // expected-no-diagnostics +kernel void waves_per_eu_2_4_num_sgpr_32() { +// CHECK: define amdgpu_kernel void @waves_per_eu_2_4_num_sgpr_32() [[WAVES_PER_EU_2_4_NUM_SGPR_32:#[0-9]+]] +} +__attribute__((amdgpu_waves_per_eu(2, 4), amdgpu_num_vgpr(64))) // expected-no-diagnostics +kernel void waves_per_eu_2_4_num_vgpr_64() { +// CHECK: define amdgpu_kernel void @waves_per_eu_2_4_num_vgpr_64() [[WAVES_PER_EU_2_4_NUM_VGPR_64:#[0-9]+]] +} +__attribute__((amdgpu_num_sgpr(32), amdgpu_num_vgpr(64))) // expected-no-diagnostics +kernel void num_sgpr_32_num_vgpr_64() { +// CHECK: define amdgpu_kernel void @num_sgpr_32_num_vgpr_64() [[NUM_SGPR_32_NUM_VGPR_64:#[0-9]+]] +} + +__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2), amdgpu_num_sgpr(32))) +kernel void flat_work_group_size_32_64_waves_per_eu_2_num_sgpr_32() { +// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_num_sgpr_32() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32:#[0-9]+]] +} +__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2), amdgpu_num_vgpr(64))) +kernel void flat_work_group_size_32_64_waves_per_eu_2_num_vgpr_64() { +// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_num_vgpr_64() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_VGPR_64:#[0-9]+]] +} +__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4), amdgpu_num_sgpr(32))) +kernel void flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32() { +// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32:#[0-9]+]] +} +__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4), amdgpu_num_vgpr(64))) +kernel void flat_work_group_size_32_64_waves_per_eu_2_4_num_vgpr_64() { +// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_4_num_vgpr_64() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_VGPR_64:#[0-9]+]] +} + +__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2), amdgpu_num_sgpr(32), amdgpu_num_vgpr(64))) // expected-no-diagnostics +kernel void flat_work_group_size_32_64_waves_per_eu_2_num_sgpr_32_num_vgpr_64() { +// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_num_sgpr_32_num_vgpr_64() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64:#[0-9]+]] +} +__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4), amdgpu_num_sgpr(32), amdgpu_num_vgpr(64))) // expected-no-diagnostics +kernel void flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32_num_vgpr_64() { +// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32_num_vgpr_64() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64:#[0-9]+]] +} + +// Make sure this is silently accepted on other targets. +// X86-NOT: "amdgpu-flat-work-group-size" +// X86-NOT: "amdgpu-waves-per-eu" +// X86-NOT: "amdgpu-num-vgpr" +// X86-NOT: "amdgpu-num-sgpr" + +// CHECK-NOT: "amdgpu-flat-work-group-size"="0,0" +// CHECK-NOT: "amdgpu-waves-per-eu"="0" +// CHECK-NOT: "amdgpu-waves-per-eu"="0,0" +// CHECK-NOT: "amdgpu-num-sgpr"="0" +// CHECK-NOT: "amdgpu-num-vgpr"="0" + +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { nounwind "amdgpu-flat-work-group-size"="32,64" +// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { nounwind "amdgpu-waves-per-eu"="2" +// CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = { nounwind "amdgpu-waves-per-eu"="2,4" +// CHECK-DAG: attributes [[NUM_SGPR_32]] = { nounwind "amdgpu-num-sgpr"="32" +// CHECK-DAG: attributes [[NUM_VGPR_64]] = { nounwind "amdgpu-num-vgpr"="64" + +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2]] = { nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-waves-per-eu"="2" +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4]] = { nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-waves-per-eu"="2,4" +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_SGPR_32]] = { nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_VGPR_64]] = { nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64" +// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_SGPR_32]] = { nounwind "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2" +// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_VGPR_64]] = { nounwind "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2" +// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_SGPR_32]] = { nounwind "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4" +// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_VGPR_64]] = { nounwind "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4" +// CHECK-DAG: attributes [[NUM_SGPR_32_NUM_VGPR_64]] = { nounwind "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" + +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32]] = { nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2" +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_VGPR_64]] = { nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2" +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32]] = { nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4" +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_VGPR_64]] = { nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4" + +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64]] = { nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2" +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64]] = { nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4" Index: test/CodeGenOpenCL/amdgpu-num-gpr-attr.cl =================================================================== --- test/CodeGenOpenCL/amdgpu-num-gpr-attr.cl +++ test/CodeGenOpenCL/amdgpu-num-gpr-attr.cl @@ -1,48 +0,0 @@ -// RUN: %clang_cc1 -triple amdgcn-- -target-cpu tahiti -O0 -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86 %s - -// Make sure this is silently accepted on other targets. - -__attribute__((amdgpu_num_vgpr(64))) // expected-no-diagnostics -kernel void test_num_vgpr64() { -// CHECK: define amdgpu_kernel void @test_num_vgpr64() [[ATTR_VGPR64:#[0-9]+]] -} - -__attribute__((amdgpu_num_sgpr(32))) // expected-no-diagnostics -kernel void test_num_sgpr32() { -// CHECK: define amdgpu_kernel void @test_num_sgpr32() [[ATTR_SGPR32:#[0-9]+]] -} - -__attribute__((amdgpu_num_vgpr(64), amdgpu_num_sgpr(32))) // expected-no-diagnostics -kernel void test_num_vgpr64_sgpr32() { -// CHECK: define amdgpu_kernel void @test_num_vgpr64_sgpr32() [[ATTR_VGPR64_SGPR32:#[0-9]+]] - -} - -__attribute__((amdgpu_num_sgpr(20), amdgpu_num_vgpr(40))) // expected-no-diagnostics -kernel void test_num_sgpr20_vgpr40() { -// CHECK: define amdgpu_kernel void @test_num_sgpr20_vgpr40() [[ATTR_SGPR20_VGPR40:#[0-9]+]] -} - -__attribute__((amdgpu_num_vgpr(0))) // expected-no-diagnostics -kernel void test_num_vgpr0() { -} - -__attribute__((amdgpu_num_sgpr(0))) // expected-no-diagnostics -kernel void test_num_sgpr0() { -} - -__attribute__((amdgpu_num_vgpr(0), amdgpu_num_sgpr(0))) // expected-no-diagnostics -kernel void test_num_vgpr0_sgpr0() { -} - - -// X86-NOT: "amdgpu_num_vgpr" -// X86-NOT: "amdgpu_num_sgpr" - -// CHECK-NOT: "amdgpu_num_vgpr"="0" -// CHECK-NOT: "amdgpu_num_sgpr"="0" -// CHECK-DAG: attributes [[ATTR_VGPR64]] = { nounwind "amdgpu_num_vgpr"="64" -// CHECK-DAG: attributes [[ATTR_SGPR32]] = { nounwind "amdgpu_num_sgpr"="32" -// CHECK-DAG: attributes [[ATTR_VGPR64_SGPR32]] = { nounwind "amdgpu_num_sgpr"="32" "amdgpu_num_vgpr"="64" -// CHECK-DAG: attributes [[ATTR_SGPR20_VGPR40]] = { nounwind "amdgpu_num_sgpr"="20" "amdgpu_num_vgpr"="40" Index: test/SemaCUDA/amdgpu-attrs.cu =================================================================== --- test/SemaCUDA/amdgpu-attrs.cu +++ test/SemaCUDA/amdgpu-attrs.cu @@ -0,0 +1,110 @@ +// RUN: %clang_cc1 -fsyntax-only -verify %s + +#include "Inputs/cuda.h" + + +// expected-error@+2 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}} +__attribute__((amdgpu_flat_work_group_size(32, 64))) +__global__ void flat_work_group_size_32_64() {} + +// expected-error@+2 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}} +__attribute__((amdgpu_waves_per_eu(2))) +__global__ void waves_per_eu_2() {} + +// expected-error@+2 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}} +__attribute__((amdgpu_waves_per_eu(2, 4))) +__global__ void waves_per_eu_2_4() {} + +// expected-error@+2 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}} +__attribute__((amdgpu_num_sgpr(32))) +__global__ void num_sgpr_32() {} + +// expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}} +__attribute__((amdgpu_num_vgpr(64))) +__global__ void num_vgpr_64() {} + + +// expected-error@+3 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}} +// fixme-expected-error@+2 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}} +__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2))) +__global__ void flat_work_group_size_32_64_waves_per_eu_2() {} + +// expected-error@+3 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}} +// fixme-expected-error@+2 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}} +__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4))) +__global__ void flat_work_group_size_32_64_waves_per_eu_2_4() {} + +// expected-error@+3 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}} +// fixme-expected-error@+2 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}} +__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_num_sgpr(32))) +__global__ void flat_work_group_size_32_64_num_sgpr_32() {} + +// expected-error@+3 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}} +// fixme-expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}} +__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_num_vgpr(64))) +__global__ void flat_work_group_size_32_64_num_vgpr_64() {} + +// expected-error@+3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}} +// fixme-expected-error@+2 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}} +__attribute__((amdgpu_waves_per_eu(2), amdgpu_num_sgpr(32))) +__global__ void waves_per_eu_2_num_sgpr_32() {} + +// expected-error@+3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}} +// fixme-expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}} +__attribute__((amdgpu_waves_per_eu(2), amdgpu_num_vgpr(64))) +__global__ void waves_per_eu_2_num_vgpr_64() {} + +// expected-error@+3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}} +// fixme-expected-error@+2 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}} +__attribute__((amdgpu_waves_per_eu(2, 4), amdgpu_num_sgpr(32))) +__global__ void waves_per_eu_2_4_num_sgpr_32() {} + +// expected-error@+3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}} +// fixme-expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}} +__attribute__((amdgpu_waves_per_eu(2, 4), amdgpu_num_vgpr(64))) +__global__ void waves_per_eu_2_4_num_vgpr_64() {} + +// expected-error@+3 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}} +// fixme-expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}} +__attribute__((amdgpu_num_sgpr(32), amdgpu_num_vgpr(64))) +__global__ void num_sgpr_32_num_vgpr_64() {} + + +// expected-error@+4 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}} +// fixme-expected-error@+3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}} +// fixme-expected-error@+2 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}} +__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2), amdgpu_num_sgpr(32))) +__global__ void flat_work_group_size_32_64_waves_per_eu_2_num_sgpr_32() {} + +// expected-error@+4 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}} +// fixme-expected-error@+3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}} +// fixme-expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}} +__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2), amdgpu_num_vgpr(64))) +__global__ void flat_work_group_size_32_64_waves_per_eu_2_num_vgpr_64() {} + +// expected-error@+4 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}} +// fixme-expected-error@+3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}} +// fixme-expected-error@+2 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}} +__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4), amdgpu_num_sgpr(32))) +__global__ void flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32() {} + +// expected-error@+4 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}} +// fixme-expected-error@+3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}} +// fixme-expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}} +__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4), amdgpu_num_vgpr(64))) +__global__ void flat_work_group_size_32_64_waves_per_eu_2_4_num_vgpr_64() {} + + +// expected-error@+5 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}} +// fixme-expected-error@+4 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}} +// fixme-expected-error@+3 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}} +// fixme-expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}} +__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2), amdgpu_num_sgpr(32), amdgpu_num_vgpr(64))) +__global__ void flat_work_group_size_32_64_waves_per_eu_2_num_sgpr_32_num_vgpr_64() {} + +// expected-error@+5 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}} +// fixme-expected-error@+4 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}} +// fixme-expected-error@+3 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}} +// fixme-expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}} +__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4), amdgpu_num_sgpr(32), amdgpu_num_vgpr(64))) +__global__ void flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32_num_vgpr_64() {} Index: test/SemaCUDA/amdgpu-num-gpr-attr.cu =================================================================== --- test/SemaCUDA/amdgpu-num-gpr-attr.cu +++ test/SemaCUDA/amdgpu-num-gpr-attr.cu @@ -1,14 +0,0 @@ -// RUN: %clang_cc1 -fsyntax-only -verify %s - -#include "Inputs/cuda.h" - -__attribute__((amdgpu_num_vgpr(64))) -__global__ void test_num_vgpr() { } // expected-error {{'amdgpu_num_vgpr' attribute only applies to kernel functions}} - -__attribute__((amdgpu_num_sgpr(32))) -__global__ void test_num_sgpr() { } // expected-error {{'amdgpu_num_sgpr' attribute only applies to kernel functions}} - -// fixme-expected-error@+3 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}} -// expected-error@+2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}} -__attribute__((amdgpu_num_sgpr(32), amdgpu_num_vgpr(64))) -__global__ void test_num_vgpr_num_sgpr() { } Index: test/SemaOpenCL/amdgpu-attrs.cl =================================================================== --- test/SemaOpenCL/amdgpu-attrs.cl +++ test/SemaOpenCL/amdgpu-attrs.cl @@ -0,0 +1,64 @@ +// RUN: %clang_cc1 -triple amdgcn-- -verify -fsyntax-only %s + +typedef __attribute__((amdgpu_flat_work_group_size(32, 64))) struct struct_flat_work_group_size_32_64 { // expected-error {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}} + int x; + float y; +} struct_flat_work_group_size_32_64; +typedef __attribute__((amdgpu_waves_per_eu(2))) struct struct_waves_per_eu_2 { // expected-error {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}} + int x; + float y; +} struct_waves_per_eu_2; +typedef __attribute__((amdgpu_waves_per_eu(2, 4))) struct struct_waves_per_eu_2_4 { // expected-error {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}} + int x; + float y; +} struct_waves_per_eu_2_4; +typedef __attribute__((amdgpu_num_sgpr(32))) struct struct_num_sgpr_32 { // expected-error {{'amdgpu_num_sgpr' attribute only applies to kernel functions}} + int x; + float y; +} struct_num_sgpr_32; +typedef __attribute__((amdgpu_num_vgpr(64))) struct struct_num_vgpr_64 { // expected-error {{'amdgpu_num_vgpr' attribute only applies to kernel functions}} + int x; + float y; +} struct_num_vgpr_64; + +__attribute__((amdgpu_flat_work_group_size(32, 64))) void func_flat_work_group_size_32_64() {} // expected-error {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}} +__attribute__((amdgpu_waves_per_eu(2))) void func_waves_per_eu_2() {} // expected-error {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}} +__attribute__((amdgpu_waves_per_eu(2, 4))) void func_waves_per_eu_2_4() {} // expected-error {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}} +__attribute__((amdgpu_num_sgpr(32))) void func_num_sgpr_32() {} // expected-error {{'amdgpu_num_sgpr' attribute only applies to kernel functions}} +__attribute__((amdgpu_num_vgpr(64))) void func_num_vgpr_64() {} // expected-error {{'amdgpu_num_vgpr' attribute only applies to kernel functions}} + +__attribute__((amdgpu_flat_work_group_size("ABC", "ABC"))) kernel void kernel_flat_work_group_size_ABC_ABC() {} // expected-error {{'amdgpu_flat_work_group_size' attribute requires an integer constant}} +__attribute__((amdgpu_flat_work_group_size(32, "ABC"))) kernel void kernel_flat_work_group_size_32_ABC() {} // expected-error {{'amdgpu_flat_work_group_size' attribute requires an integer constant}} +__attribute__((amdgpu_flat_work_group_size("ABC", 64))) kernel void kernel_flat_work_group_size_ABC_64() {} // expected-error {{'amdgpu_flat_work_group_size' attribute requires an integer constant}} +__attribute__((amdgpu_waves_per_eu("ABC"))) kernel void kernel_waves_per_eu_ABC() {} // expected-error {{'amdgpu_waves_per_eu' attribute requires an integer constant}} +__attribute__((amdgpu_waves_per_eu(2, "ABC"))) kernel void kernel_waves_per_eu_2_ABC() {} // expected-error {{'amdgpu_waves_per_eu' attribute requires an integer constant}} +__attribute__((amdgpu_waves_per_eu("ABC", 4))) kernel void kernel_waves_per_eu_ABC_4() {} // expected-error {{'amdgpu_waves_per_eu' attribute requires an integer constant}} +__attribute__((amdgpu_num_sgpr("ABC"))) kernel void kernel_num_sgpr_ABC() {} // expected-error {{'amdgpu_num_sgpr' attribute requires an integer constant}} +__attribute__((amdgpu_num_vgpr("ABC"))) kernel void kernel_num_vgpr_ABC() {} // expected-error {{'amdgpu_num_vgpr' attribute requires an integer constant}} + +__attribute__((amdgpu_flat_work_group_size(4294967296, 4294967296))) kernel void kernel_flat_work_group_size_L_L() {} // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}} +__attribute__((amdgpu_flat_work_group_size(32, 4294967296))) kernel void kernel_flat_work_group_size_32_L() {} // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}} +__attribute__((amdgpu_flat_work_group_size(4294967296, 64))) kernel void kernel_flat_work_group_size_L_64() {} // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}} +__attribute__((amdgpu_waves_per_eu(4294967296))) kernel void kernel_waves_per_eu_L() {} // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}} +__attribute__((amdgpu_waves_per_eu(2, 4294967296))) kernel void kernel_waves_per_eu_2_L() {} // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}} +__attribute__((amdgpu_waves_per_eu(4294967296, 4))) kernel void kernel_waves_per_eu_L_4() {} // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}} +__attribute__((amdgpu_num_sgpr(4294967296))) kernel void kernel_num_sgpr_L() {} // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}} +__attribute__((amdgpu_num_vgpr(4294967296))) kernel void kernel_num_vgpr_L() {} // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}} + +__attribute__((amdgpu_flat_work_group_size(0, 64))) kernel void kernel_flat_work_group_size_0_64() {} // expected-error {{'amdgpu_flat_work_group_size' attribute parameter is invalid: maximum flat work-group size must be zero since minimum flat work-group size is zero}} +__attribute__((amdgpu_waves_per_eu(0, 4))) kernel void kernel_waves_per_eu_0_4() {} // expected-error {{'amdgpu_waves_per_eu' attribute parameter is invalid: maximum number of waves per execution unit must be zero since minimum number of waves per execution unit is zero}} + +__attribute__((amdgpu_flat_work_group_size(64, 32))) kernel void kernel_flat_work_group_size_64_32() {} // expected-error {{'amdgpu_flat_work_group_size' attribute parameter is invalid: minimum flat work-group size must not be greater than maximum flat work-group size}} +__attribute__((amdgpu_waves_per_eu(4, 2))) kernel void kernel_waves_per_eu_4_2() {} // expected-error {{'amdgpu_waves_per_eu' attribute parameter is invalid: minimum number of waves per execution unit must not be greater than maximum number of waves per execution unit}} + +__attribute__((amdgpu_flat_work_group_size(0, 0))) kernel void kernel_flat_work_group_size_0_0() {} +__attribute__((amdgpu_waves_per_eu(0))) kernel void kernel_waves_per_eu_0() {} +__attribute__((amdgpu_waves_per_eu(0, 0))) kernel void kernel_waves_per_eu_0_0() {} +__attribute__((amdgpu_num_sgpr(0))) kernel void kernel_num_sgpr_0() {} +__attribute__((amdgpu_num_vgpr(0))) kernel void kernel_num_vgpr_0() {} + +kernel __attribute__((amdgpu_flat_work_group_size(32, 64))) void kernel_flat_work_group_size_32_64() {} +kernel __attribute__((amdgpu_waves_per_eu(2))) void kernel_waves_per_eu_2() {} +kernel __attribute__((amdgpu_waves_per_eu(2, 4))) void kernel_waves_per_eu_2_4() {} +kernel __attribute__((amdgpu_num_sgpr(32))) void kernel_num_sgpr_32() {} +kernel __attribute__((amdgpu_num_vgpr(64))) void kernel_num_vgpr_64() {} Index: test/SemaOpenCL/amdgpu-num-register-attrs.cl =================================================================== --- test/SemaOpenCL/amdgpu-num-register-attrs.cl +++ test/SemaOpenCL/amdgpu-num-register-attrs.cl @@ -1,40 +0,0 @@ -// RUN: %clang_cc1 -triple r600-- -verify -fsyntax-only %s - -typedef __attribute__((amdgpu_num_vgpr(128))) struct FooStruct { // expected-error {{'amdgpu_num_vgpr' attribute only applies to kernel functions}} - int x; - float y; -} FooStruct; - - -__attribute__((amdgpu_num_vgpr("ABC"))) kernel void foo2() {} // expected-error {{'amdgpu_num_vgpr' attribute requires an integer constant}} -__attribute__((amdgpu_num_sgpr("ABC"))) kernel void foo3() {} // expected-error {{'amdgpu_num_sgpr' attribute requires an integer constant}} - - -__attribute__((amdgpu_num_vgpr(40))) void foo4() {} // expected-error {{'amdgpu_num_vgpr' attribute only applies to kernel functions}} -__attribute__((amdgpu_num_sgpr(64))) void foo5() {} // expected-error {{'amdgpu_num_sgpr' attribute only applies to kernel functions}} - -__attribute__((amdgpu_num_vgpr(40))) kernel void foo7() {} -__attribute__((amdgpu_num_sgpr(64))) kernel void foo8() {} -__attribute__((amdgpu_num_vgpr(40), amdgpu_num_sgpr(64))) kernel void foo9() {} - -// Check 0 VGPR is accepted. -__attribute__((amdgpu_num_vgpr(0))) kernel void foo10() {} - -// Check 0 SGPR is accepted. -__attribute__((amdgpu_num_sgpr(0))) kernel void foo11() {} - -// Check both 0 SGPR and VGPR is accepted. -__attribute__((amdgpu_num_vgpr(0), amdgpu_num_sgpr(0))) kernel void foo12() {} - -// Too large VGPR value. -__attribute__((amdgpu_num_vgpr(4294967296))) kernel void foo13() {} // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}} - -__attribute__((amdgpu_num_sgpr(4294967296))) kernel void foo14() {} // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}} - -__attribute__((amdgpu_num_sgpr(4294967296), amdgpu_num_vgpr(4294967296))) kernel void foo15() {} // expected-error 2 {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}} - - -// Make sure it is accepted with kernel keyword before the attribute. -kernel __attribute__((amdgpu_num_vgpr(40))) void foo16() {} - -kernel __attribute__((amdgpu_num_sgpr(40))) void foo17() {}