Index: include/clang/Basic/Attr.td =================================================================== --- include/clang/Basic/Attr.td +++ include/clang/Basic/Attr.td @@ -115,6 +115,10 @@ def FunctionLike : SubsetSubjectgetFunctionType(false) != NULL}]>; +def KernelFunction : SubsetSubjecthasAttr() +}]>; + // HasFunctionProto is a more strict version of FunctionLike, so it should // never be specified in a Subjects list along with FunctionLike (due to the // inclusive nature of subject testing). @@ -241,6 +245,7 @@ let OSes = ["Win32"]; } def TargetMips : TargetArch<["mips", "mipsel"]>; +def TargetAMDGPU : TargetArch<["r600"]>; class Attr { // The various ways in which an attribute can be spelled in source @@ -859,6 +864,22 @@ let Documentation = [Undocumented]; } +def AMDGPUNumVGPR : InheritableAttr, TargetSpecificAttr { + let Spellings = [GCC<"amdgpu_num_vgpr">]; + let Args = [UnsignedArgument<"NumVGPR">]; + let Documentation = [AMDGPURegisterDocs]; + let Subjects = SubjectList<[KernelFunction], ErrorDiag, + "ExpectedKernelFunction">; +} + +def AMDGPUNumSGPR : InheritableAttr, TargetSpecificAttr { + let Spellings = [GCC<"amdgpu_num_sgpr">]; + let Args = [UnsignedArgument<"NumSGPR">]; + let Documentation = [AMDGPURegisterDocs]; + let Subjects = SubjectList<[KernelFunction], ErrorDiag, + "ExpectedKernelFunction">; +} + def NoSplitStack : InheritableAttr { let Spellings = [GCC<"no_split_stack">]; let Subjects = SubjectList<[Function], ErrorDiag>; Index: include/clang/Basic/AttrDocs.td =================================================================== --- include/clang/Basic/AttrDocs.td +++ include/clang/Basic/AttrDocs.td @@ -673,6 +673,31 @@ }]; } +def AMDGPURegisterDocs : Documentation { + let Category = DocCatFunction; + let Content = [{ +Clang supports the +``__attribute__((amdgpu_num_vgpr()))`` and +``__attribute__((amdgpu_num_sgpr()))`` attributes on +AMD GPU targets. This attribute may be attached to a kernel function +definition and is an optimization hint to the backend for the maximum +number of registers to use. This is useful in cases where register +limited occupancy is known to be an important factor for the performance +for the kernel. + +The semantics are as follows: + +- The backend will attempt to limit the number of used registers to + the specified values, but the exact number used is not + guaranteed. The number used may be rounded up to satisfy the + allocation requirements or ABI constraints of the subtarget. The + backend may also use fewer registers than requested whenever + possible. + +- 0 implies the default no limit on register usage. + }]; +} + def DocCatCallingConvs : DocumentationCategory<"Calling Conventions"> { let Content = [{ Clang supports several different calling conventions, depending on the target Index: include/clang/Basic/DiagnosticSemaKinds.td =================================================================== --- include/clang/Basic/DiagnosticSemaKinds.td +++ include/clang/Basic/DiagnosticSemaKinds.td @@ -2237,7 +2237,7 @@ "Objective-C instance methods|init methods of interface or class extension declarations|" "variables, functions and classes|Objective-C protocols|" "functions and global variables|structs or typedefs|" - "interface or protocol declarations}1">, + "interface or protocol declarations|kernel functions}1">, InGroup; def err_attribute_wrong_decl_type : Error; def warn_type_attribute_wrong_type : Warning< Index: include/clang/Sema/AttributeList.h =================================================================== --- include/clang/Sema/AttributeList.h +++ include/clang/Sema/AttributeList.h @@ -843,7 +843,8 @@ ExpectedObjectiveCProtocol, ExpectedFunctionGlobalVarMethodOrProperty, ExpectedStructOrTypedef, - ExpectedObjectiveCInterfaceOrProtocol + ExpectedObjectiveCInterfaceOrProtocol, + ExpectedKernelFunction }; } // end namespace clang Index: lib/CodeGen/TargetInfo.cpp =================================================================== --- lib/CodeGen/TargetInfo.cpp +++ lib/CodeGen/TargetInfo.cpp @@ -20,6 +20,7 @@ #include "clang/AST/RecordLayout.h" #include "clang/CodeGen/CGFunctionInfo.h" #include "clang/Frontend/CodeGenOptions.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Type.h" @@ -6172,6 +6173,40 @@ return AddrTyped; } +//===----------------------------------------------------------------------===// +// AMDGPU ABI Implementation +//===----------------------------------------------------------------------===// + +namespace { + +class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo { +public: + AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT) + : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {} + void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV, + CodeGen::CodeGenModule &M) const override; +}; + +} + +void AMDGPUTargetCodeGenInfo::SetTargetAttributes(const Decl *D, + llvm::GlobalValue *GV, + CodeGen::CodeGenModule &M) const { + const FunctionDecl *FD = dyn_cast(D); + if (!FD) + return; + + if (const AMDGPUNumVGPRAttr *Attr = FD->getAttr()) { + llvm::Function *F = cast(GV); + F->addFnAttr("amdgpu_num_vgpr", llvm::utostr(Attr->getNumVGPR())); + } + + if (const AMDGPUNumSGPRAttr *Attr = FD->getAttr()) { + llvm::Function *F = cast(GV); + F->addFnAttr("amdgpu_num_sgpr", llvm::utostr(Attr->getNumSGPR())); + } +} + //===----------------------------------------------------------------------===// // SPARC v9 ABI Implementation. @@ -7233,6 +7268,8 @@ } case llvm::Triple::hexagon: return *(TheTargetCodeGenInfo = new HexagonTargetCodeGenInfo(Types)); + case llvm::Triple::r600: + return *(TheTargetCodeGenInfo = new AMDGPUTargetCodeGenInfo(Types)); case llvm::Triple::sparcv9: return *(TheTargetCodeGenInfo = new SparcV9TargetCodeGenInfo(Types)); case llvm::Triple::xcore: Index: lib/Sema/SemaDeclAttr.cpp =================================================================== --- lib/Sema/SemaDeclAttr.cpp +++ lib/Sema/SemaDeclAttr.cpp @@ -3940,6 +3940,38 @@ handleARMInterruptAttr(S, D, Attr); } +static void handleAMDGPUNumRegAttr(Sema &S, Decl *D, + const AttributeList &Attr, + bool IsVGPR) { + if (!checkAttributeNumArgs(S, Attr, 1)) + return; + + Expr *NumRegsExpr = static_cast(Attr.getArgAsExpr(0)); + llvm::APSInt NumRegs(32); + + if (!NumRegsExpr->isIntegerConstantExpr(NumRegs, S.Context)) { + S.Diag(Attr.getLoc(), diag::err_attribute_argument_type) + << Attr.getName() << AANT_ArgumentIntegerConstant + << NumRegsExpr->getSourceRange(); + return; + } + + if (IsVGPR) { + D->addAttr(::new (S.Context) + AMDGPUNumVGPRAttr(Attr.getLoc(), S.Context, + NumRegs.getZExtValue(), + Attr.getAttributeSpellingListIndex())); + } else { + D->addAttr(::new (S.Context) + AMDGPUNumSGPRAttr(Attr.getLoc(), S.Context, + NumRegs.getZExtValue(), + Attr.getAttributeSpellingListIndex())); + } + + + D->addAttr(UsedAttr::CreateImplicit(S.Context)); +} + static void handleX86ForceAlignArgPointerAttr(Sema &S, Decl *D, const AttributeList& Attr) { // If we try to apply it to a function pointer, don't warn, but don't @@ -4247,6 +4279,12 @@ case AttributeList::AT_NoMips16: handleSimpleAttribute(S, D, Attr); break; + case AttributeList::AT_AMDGPUNumVGPR: + handleAMDGPUNumRegAttr(S, D, Attr, true); + break; + case AttributeList::AT_AMDGPUNumSGPR: + handleAMDGPUNumRegAttr(S, D, Attr, false); + break; case AttributeList::AT_IBAction: handleSimpleAttribute(S, D, Attr); break; Index: test/CodeGenOpenCL/amdgpu-num-gpr-attr.cl =================================================================== --- /dev/null +++ test/CodeGenOpenCL/amdgpu-num-gpr-attr.cl @@ -0,0 +1,28 @@ +// RUN: %clang_cc1 -triple r600-- -target-cpu tahiti -O0 -emit-llvm -o - %s | FileCheck %s + +__attribute__((amdgpu_num_vgpr(64))) +kernel void test_num_vgpr64() { +// CHECK: define void @test_num_vgpr64() [[ATTR_VGPR64:#[0-9]+]] +} + +__attribute__((amdgpu_num_sgpr(32))) +kernel void test_num_sgpr32() { +// CHECK: define void @test_num_sgpr32() [[ATTR_SGPR32:#[0-9]+]] +} + +__attribute__((amdgpu_num_vgpr(64), amdgpu_num_sgpr(32))) +kernel void test_num_vgpr64_sgpr32() { +// CHECK: define void @test_num_vgpr64_sgpr32() [[ATTR_VGPR64_SGPR32:#[0-9]+]] + +} + +__attribute__((amdgpu_num_sgpr(20), amdgpu_num_vgpr(40))) +kernel void test_num_sgpr20_vgpr40() { +// CHECK: define void @test_num_sgpr20_vgpr40() [[ATTR_SGPR20_VGPR40:#[0-9]+]] +} + + +// CHECK-DAG: attributes [[ATTR_VGPR64]] = { nounwind "amdgpu_num_vgpr"="64" +// CHECK-DAG: attributes [[ATTR_SGPR32]] = { nounwind "amdgpu_num_sgpr"="32" +// CHECK-DAG: attributes [[ATTR_VGPR64_SGPR32]] = { nounwind "amdgpu_num_sgpr"="32" "amdgpu_num_vgpr"="64" +// CHECK-DAG: attributes [[ATTR_SGPR20_VGPR40]] = { nounwind "amdgpu_num_sgpr"="20" "amdgpu_num_vgpr"="40" Index: test/SemaOpenCL/amdgpu-num-register-attrs.cl =================================================================== --- /dev/null +++ test/SemaOpenCL/amdgpu-num-register-attrs.cl @@ -0,0 +1,20 @@ +// RUN: %clang_cc1 -triple r600-- -verify -fsyntax-only %s + +typedef __attribute__((amdgpu_num_vgpr(128))) struct FooStruct { // expected-error {{'amdgpu_num_vgpr' attribute only applies to kernel functions}} + int x; + float y; +} FooStruct; + + +__attribute__((amdgpu_num_vgpr("ABC"))) kernel void foo2() {} // expected-error {{'amdgpu_num_vgpr' attribute requires an integer constant}} +__attribute__((amdgpu_num_sgpr("ABC"))) kernel void foo3() {} // expected-error {{'amdgpu_num_sgpr' attribute requires an integer constant}} + + +__attribute__((amdgpu_num_vgpr(40))) void foo4() {} // expected-error {{'amdgpu_num_vgpr' attribute only applies to kernel functions}} +__attribute__((amdgpu_num_sgpr(64))) void foo5() {} // expected-error {{'amdgpu_num_sgpr' attribute only applies to kernel functions}} + +__attribute__((amdgpu_num_vgpr(40))) kernel void foo7() {} +__attribute__((amdgpu_num_sgpr(64))) kernel void foo8() {} +__attribute__((amdgpu_num_vgpr(40), amdgpu_num_sgpr(64))) kernel void foo9() {} + +