Index: include/clang/Basic/Attr.td =================================================================== --- include/clang/Basic/Attr.td +++ include/clang/Basic/Attr.td @@ -115,6 +115,10 @@ def FunctionLike : SubsetSubjectgetFunctionType(false) != NULL}]>; +def KernelFunction : SubsetSubjecthasAttr() || S->hasAttr() +}]>; + // HasFunctionProto is a more strict version of FunctionLike, so it should // never be specified in a Subjects list along with FunctionLike (due to the // inclusive nature of subject testing). @@ -243,6 +247,7 @@ let OSes = ["Win32"]; } def TargetMips : TargetArch<["mips", "mipsel"]>; +def TargetAMDGPU : TargetArch<["r600"]>; class Attr { // The various ways in which an attribute can be spelled in source @@ -885,6 +890,22 @@ let Documentation = [Undocumented]; } +def AMDGPUNumVGPR : InheritableAttr, TargetSpecificAttr { + let Spellings = [GNU<"amdgpu_num_vgpr">]; + let Args = [UnsignedArgument<"NumVGPR">]; + let Documentation = [AMDGPUNumVGPRDocs]; + let Subjects = SubjectList<[KernelFunction], ErrorDiag, + "ExpectedKernelFunction">; +} + +def AMDGPUNumSGPR : InheritableAttr, TargetSpecificAttr { + let Spellings = [GNU<"amdgpu_num_sgpr">]; + let Args = [UnsignedArgument<"NumSGPR">]; + let Documentation = [AMDGPUNumSGPRDocs]; + let Subjects = SubjectList<[KernelFunction], ErrorDiag, + "ExpectedKernelFunction">; +} + def NoSplitStack : InheritableAttr { let Spellings = [GCC<"no_split_stack">]; let Subjects = SubjectList<[Function], ErrorDiag>; Index: include/clang/Basic/AttrDocs.td =================================================================== --- include/clang/Basic/AttrDocs.td +++ include/clang/Basic/AttrDocs.td @@ -673,6 +673,65 @@ }]; } +def DocCatAMDGPURegisterAttributes : + DocumentationCategory<"AMD GPU Register Attributes"> { + let Content = [{ +Clang supports attributes for controlling register usage on AMD GPU +targets. These attributes may be attached to a kernel function +definition and is an optimization hint to the backend for the maximum +number of registers to use. This is useful in cases where register +limited occupancy is known to be an important factor for the +performance for the kernel. + +The semantics are as follows: + +- The backend will attempt to limit the number of used registers to + the specified value, but the exact number used is not + guaranteed. The number used may be rounded up to satisfy the + allocation requirements or ABI constraints of the subtarget. For + example, on Southern Islands VGPRs may only be allocated in + increments of 4, so requesting a limit of 39 VGPRs will really + attempt to use up to 40. Requesting more registers than the + subtarget supports will truncate to the maximum allowed. The backend + may also use fewer registers than requested whenever possible. + +- 0 implies the default no limit on register usage. + +- Ignored on older VLIW subtargets which did not have separate scalar +and vector registers, R600 through Northern Islands. +}]; +} + + +def AMDGPUNumVGPRDocs : Documentation { + let Category = DocCatAMDGPURegisterAttributes; + let Content = [{ +Clang supports the +``__attribute__((amdgpu_num_vgpr()))`` attribute on AMD +Southern Islands GPUs and later for controlling the number of vector +registers. A typical value would be between 4 and 256 in increments +of 4. +}]; +} + +def AMDGPUNumSGPRDocs : Documentation { + let Category = DocCatAMDGPURegisterAttributes; + let Content = [{ + +Clang supports the +``__attribute__((amdgpu_num_sgpr()))`` attribute on AMD +Southern Islands GPUs and later for controlling the number of scalar +registers. A typical value would be between 8 and 104 in increments of +8. + +Due to common instruction constraints, an additional 2-4 SGPRs are +typically required for internal use depending on features used. This +value is a hint for the total number of SGPRs to use, and not the +number of user SGPRs, so no special consideration needs to be given +for these. +}]; +} + def DocCatCallingConvs : DocumentationCategory<"Calling Conventions"> { let Content = [{ Clang supports several different calling conventions, depending on the target Index: include/clang/Basic/DiagnosticSemaKinds.td =================================================================== --- include/clang/Basic/DiagnosticSemaKinds.td +++ include/clang/Basic/DiagnosticSemaKinds.td @@ -2244,7 +2244,7 @@ "Objective-C instance methods|init methods of interface or class extension declarations|" "variables, functions and classes|Objective-C protocols|" "functions and global variables|structs or typedefs|" - "interface or protocol declarations}1">, + "interface or protocol declarations|kernel functions}1">, InGroup; def err_attribute_wrong_decl_type : Error; def warn_type_attribute_wrong_type : Warning< Index: include/clang/Sema/AttributeList.h =================================================================== --- include/clang/Sema/AttributeList.h +++ include/clang/Sema/AttributeList.h @@ -844,7 +844,8 @@ ExpectedObjectiveCProtocol, ExpectedFunctionGlobalVarMethodOrProperty, ExpectedStructOrTypedef, - ExpectedObjectiveCInterfaceOrProtocol + ExpectedObjectiveCInterfaceOrProtocol, + ExpectedKernelFunction }; } // end namespace clang Index: lib/CodeGen/TargetInfo.cpp =================================================================== --- lib/CodeGen/TargetInfo.cpp +++ lib/CodeGen/TargetInfo.cpp @@ -20,6 +20,7 @@ #include "clang/AST/RecordLayout.h" #include "clang/CodeGen/CGFunctionInfo.h" #include "clang/Frontend/CodeGenOptions.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Type.h" @@ -6082,6 +6083,45 @@ return AddrTyped; } +//===----------------------------------------------------------------------===// +// AMDGPU ABI Implementation +//===----------------------------------------------------------------------===// + +namespace { + +class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo { +public: + AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT) + : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {} + void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &M) const override; +}; + +} + +void AMDGPUTargetCodeGenInfo::SetTargetAttributes( + const Decl *D, + llvm::GlobalValue *GV, + CodeGen::CodeGenModule &M) const { + const FunctionDecl *FD = dyn_cast(D); + if (!FD) + return; + + if (const auto Attr = FD->getAttr()) { + llvm::Function *F = cast(GV); + uint32_t NumVGPR = Attr->getNumVGPR(); + if (NumVGPR != 0) + F->addFnAttr("amdgpu_num_vgpr", llvm::utostr(NumVGPR)); + } + + if (const auto Attr = FD->getAttr()) { + llvm::Function *F = cast(GV); + unsigned NumSGPR = Attr->getNumSGPR(); + if (NumSGPR != 0) + F->addFnAttr("amdgpu_num_sgpr", llvm::utostr(NumSGPR)); + } +} + //===----------------------------------------------------------------------===// // SPARC v9 ABI Implementation. @@ -7143,6 +7183,8 @@ } case llvm::Triple::hexagon: return *(TheTargetCodeGenInfo = new HexagonTargetCodeGenInfo(Types)); + case llvm::Triple::r600: + return *(TheTargetCodeGenInfo = new AMDGPUTargetCodeGenInfo(Types)); case llvm::Triple::sparcv9: return *(TheTargetCodeGenInfo = new SparcV9TargetCodeGenInfo(Types)); case llvm::Triple::xcore: Index: lib/Sema/SemaDeclAttr.cpp =================================================================== --- lib/Sema/SemaDeclAttr.cpp +++ lib/Sema/SemaDeclAttr.cpp @@ -3940,6 +3940,32 @@ handleARMInterruptAttr(S, D, Attr); } +static void handleAMDGPUNumVGPRAttr(Sema &S, Decl *D, + const AttributeList &Attr) { + uint32_t NumRegs; + Expr *NumRegsExpr = static_cast(Attr.getArgAsExpr(0)); + if (!checkUInt32Argument(S, Attr, NumRegsExpr, NumRegs)) + return; + + D->addAttr(::new (S.Context) + AMDGPUNumVGPRAttr(Attr.getLoc(), S.Context, + NumRegs, + Attr.getAttributeSpellingListIndex())); +} + +static void handleAMDGPUNumSGPRAttr(Sema &S, Decl *D, + const AttributeList &Attr) { + uint32_t NumRegs; + Expr *NumRegsExpr = static_cast(Attr.getArgAsExpr(0)); + if (!checkUInt32Argument(S, Attr, NumRegsExpr, NumRegs)) + return; + + D->addAttr(::new (S.Context) + AMDGPUNumSGPRAttr(Attr.getLoc(), S.Context, + NumRegs, + Attr.getAttributeSpellingListIndex())); +} + static void handleX86ForceAlignArgPointerAttr(Sema &S, Decl *D, const AttributeList& Attr) { // If we try to apply it to a function pointer, don't warn, but don't @@ -4247,6 +4273,12 @@ case AttributeList::AT_NoMips16: handleSimpleAttribute(S, D, Attr); break; + case AttributeList::AT_AMDGPUNumVGPR: + handleAMDGPUNumVGPRAttr(S, D, Attr); + break; + case AttributeList::AT_AMDGPUNumSGPR: + handleAMDGPUNumSGPRAttr(S, D, Attr); + break; case AttributeList::AT_IBAction: handleSimpleAttribute(S, D, Attr); break; Index: test/CodeGenOpenCL/amdgpu-num-gpr-attr.cl =================================================================== --- /dev/null +++ test/CodeGenOpenCL/amdgpu-num-gpr-attr.cl @@ -0,0 +1,41 @@ +// RUN: %clang_cc1 -triple r600-- -target-cpu tahiti -O0 -emit-llvm -o - %s | FileCheck %s + +__attribute__((amdgpu_num_vgpr(64))) +kernel void test_num_vgpr64() { +// CHECK: define void @test_num_vgpr64() [[ATTR_VGPR64:#[0-9]+]] +} + +__attribute__((amdgpu_num_sgpr(32))) +kernel void test_num_sgpr32() { +// CHECK: define void @test_num_sgpr32() [[ATTR_SGPR32:#[0-9]+]] +} + +__attribute__((amdgpu_num_vgpr(64), amdgpu_num_sgpr(32))) +kernel void test_num_vgpr64_sgpr32() { +// CHECK: define void @test_num_vgpr64_sgpr32() [[ATTR_VGPR64_SGPR32:#[0-9]+]] + +} + +__attribute__((amdgpu_num_sgpr(20), amdgpu_num_vgpr(40))) +kernel void test_num_sgpr20_vgpr40() { +// CHECK: define void @test_num_sgpr20_vgpr40() [[ATTR_SGPR20_VGPR40:#[0-9]+]] +} + +__attribute__((amdgpu_num_vgpr(0))) +kernel void test_num_vgpr0() { +} + +__attribute__((amdgpu_num_sgpr(0))) +kernel void test_num_sgpr0() { +} + +__attribute__((amdgpu_num_vgpr(0), amdgpu_num_sgpr(0))) +kernel void test_num_vgpr0_sgpr0() { +} + +// CHECK-DAG-NOT: "amdgpu_num_vgpr"="0" +// CHECK-DAG-NOT: "amdgpu_num_sgpr"="0" +// CHECK-DAG: attributes [[ATTR_VGPR64]] = { nounwind "amdgpu_num_vgpr"="64" +// CHECK-DAG: attributes [[ATTR_SGPR32]] = { nounwind "amdgpu_num_sgpr"="32" +// CHECK-DAG: attributes [[ATTR_VGPR64_SGPR32]] = { nounwind "amdgpu_num_sgpr"="32" "amdgpu_num_vgpr"="64" +// CHECK-DAG: attributes [[ATTR_SGPR20_VGPR40]] = { nounwind "amdgpu_num_sgpr"="20" "amdgpu_num_vgpr"="40" Index: test/SemaOpenCL/amdgpu-num-register-attrs.cl =================================================================== --- /dev/null +++ test/SemaOpenCL/amdgpu-num-register-attrs.cl @@ -0,0 +1,34 @@ +// RUN: %clang_cc1 -triple r600-- -verify -fsyntax-only %s + +typedef __attribute__((amdgpu_num_vgpr(128))) struct FooStruct { // expected-error {{'amdgpu_num_vgpr' attribute only applies to kernel functions}} + int x; + float y; +} FooStruct; + + +__attribute__((amdgpu_num_vgpr("ABC"))) kernel void foo2() {} // expected-error {{'amdgpu_num_vgpr' attribute requires an integer constant}} +__attribute__((amdgpu_num_sgpr("ABC"))) kernel void foo3() {} // expected-error {{'amdgpu_num_sgpr' attribute requires an integer constant}} + + +__attribute__((amdgpu_num_vgpr(40))) void foo4() {} // expected-error {{'amdgpu_num_vgpr' attribute only applies to kernel functions}} +__attribute__((amdgpu_num_sgpr(64))) void foo5() {} // expected-error {{'amdgpu_num_sgpr' attribute only applies to kernel functions}} + +__attribute__((amdgpu_num_vgpr(40))) kernel void foo7() {} +__attribute__((amdgpu_num_sgpr(64))) kernel void foo8() {} +__attribute__((amdgpu_num_vgpr(40), amdgpu_num_sgpr(64))) kernel void foo9() {} + +// Check 0 VGPR is accepted. +__attribute__((amdgpu_num_vgpr(0))) kernel void foo10() {} + +// Check 0 SGPR is accepted. +__attribute__((amdgpu_num_sgpr(0))) kernel void foo11() {} + +// Check both 0 SGPR and VGPR is accepted. +__attribute__((amdgpu_num_vgpr(0), amdgpu_num_sgpr(0))) kernel void foo12() {} + +// Too large VGPR value. +__attribute__((amdgpu_num_vgpr(4294967296))) kernel void foo13() {} // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}} + +__attribute__((amdgpu_num_sgpr(4294967296))) kernel void foo14() {} // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}} + +__attribute__((amdgpu_num_sgpr(4294967296), amdgpu_num_vgpr(4294967296))) kernel void foo15() {} // expected-error 2 {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}}