Index: include/clang/Basic/Attr.td =================================================================== --- include/clang/Basic/Attr.td +++ include/clang/Basic/Attr.td @@ -987,7 +987,30 @@ let Args = [UnsignedArgument<"NumSGPR">]; let Documentation = [AMDGPUNumSGPRDocs]; let Subjects = SubjectList<[Function], ErrorDiag, - "ExpectedKernelFunction">; + "ExpectedKernelFunction">; +} + +def AMDGPUToolsInsertNops : InheritableAttr { + let Spellings = [GNU<"amdgpu_tools_insert_nops">]; + let Documentation = [AMDGPUToolsInsertNopsDocs]; + let Subjects = SubjectList<[Function], ErrorDiag, + "ExpectedKernelFunction">; +} + +def AMDGPUToolsNumReservedVGPR : InheritableAttr { + let Spellings = [GNU<"amdgpu_tools_num_reserved_vgpr">]; + let Args = [UnsignedArgument<"NumReservedVGPR">]; + let Documentation = [AMDGPUToolsNumReservedVGPRDocs]; + let Subjects = SubjectList<[Function], ErrorDiag, + "ExpectedKernelFunction">; +} + +def AMDGPUToolsNumReservedSGPR : InheritableAttr { + let Spellings = [GNU<"amdgpu_tools_num_reserved_sgpr">]; + let Args = [UnsignedArgument<"NumReservedSGPR">]; + let Documentation = [AMDGPUToolsNumReservedSGPRDocs]; + let Subjects = SubjectList<[Function], ErrorDiag, + "ExpectedKernelFunction">; } def NoSplitStack : InheritableAttr { Index: include/clang/Basic/AttrDocs.td =================================================================== --- include/clang/Basic/AttrDocs.td +++ include/clang/Basic/AttrDocs.td @@ -936,6 +936,70 @@ }]; } +def DocCatAMDGPUToolsAttributes : + DocumentationCategory<"AMD GPU Tools Attributes"> { + let Content = [{ + +Clang supports following AMD GPU attributes for tools, such as debuggers and +profilers: + }]; +} + +def AMDGPUToolsInsertNopsDocs : Documentation { + let Category = DocCatAMDGPUToolsAttributes; + let Content = [{ + +Clang supports the ``__attribute__((amdgpu_tools_insert_nops))`` attribute on +AMD Southern Islands GPUs and later. If specified, it causes AMD GPU Backend to +insert two nop instructions for each high level source statement: one nop +instruction is inserted before the first ISA instruction of the high level +source statement, and one nop instruction is inserted after the last ISA +instruction of the high level source statement. + +In addition to specifying this attribute manually, clang can add this attribute +for each kernel function in the translation unit if the +``--amdgpu-tools-insert-nops`` clang command line option is specified. + }]; +} + +def AMDGPUToolsNumReservedVGPRDocs : Documentation { + let Category = DocCatAMDGPUToolsAttributes; + let Content = [{ + +Clang supports the ``__attribute__((amdgpu_tools_num_reserved_vgpr()))`` +attribute on AMD Southern Islands GPUs and later. If specified, it causes AMD +GPU Backend to reserve ```` number of vector registers and do not use those +registers throughout kernel function execution. Index of the first reserved +vector register is recorded in the ``amd_kernel_code_t``. + +In addition to specifying this attribute manually, clang can add this attribute +for each kernel function in the translation unit if the +``--amdgpu-tools-num-reserved-vgpr=`` clang command line option is +specified. In this case, the option takes precedence over the attribute. + +See also: ``amdgpu_num_vgpr`` attribute. + }]; +} + +def AMDGPUToolsNumReservedSGPRDocs : Documentation { + let Category = DocCatAMDGPUToolsAttributes; + let Content = [{ + +Clang supports the ``__attribute__((amdgpu_tools_num_reserved_sgpr()))`` +attribute on AMD Southern Islands GPUs and later. If specified, it causes AMD +GPU Backend to reserve ```` number of scalar registers and do not use those +registers throughout kernel function execution. Index of the first reserved +scalar register is recorded in the ``amd_kernel_code_t``. + +In addition to specifying this attribute manually, clang can add this attribute +for each kernel function in the translation unit if the +``--amdgpu-tools-num-reserved-sgpr=`` clang command line option is +specified. In this case, the option takes precedence over the attribute. + +See also: ``amdgpu_num_sgpr`` attribute. + }]; +} + def DocCatCallingConvs : DocumentationCategory<"Calling Conventions"> { let Content = [{ Clang supports several different calling conventions, depending on the target Index: include/clang/Driver/Options.td =================================================================== --- include/clang/Driver/Options.td +++ include/clang/Driver/Options.td @@ -352,6 +352,17 @@ def Z_Joined : Joined<["-"], "Z">; def all__load : Flag<["-"], "all_load">; def allowable__client : Separate<["-"], "allowable_client">; +def amdgpu_tools_insert_nops : + Flag<["--"], "amdgpu-tools-insert-nops">, Flags<[CC1Option, HelpHidden]>, + HelpText<"Insert two nop instructions for each high level source statement">; +def amdgpu_tools_num_reserved_vgpr : + Joined<["--"], "amdgpu-tools-num-reserved-vgpr=">, + Flags<[CC1Option, HelpHidden]>, + HelpText<"Reserve vector registers">, MetaVarName<"">; +def amdgpu_tools_num_reserved_sgpr : + Joined<["--"], "amdgpu-tools-num-reserved-sgpr=">, + Flags<[CC1Option, HelpHidden]>, + HelpText<"Reserve scalar registers">, MetaVarName<"">; def ansi : Flag<["-", "--"], "ansi">; def arch__errors__fatal : Flag<["-"], "arch_errors_fatal">; def arch : Separate<["-"], "arch">, Flags<[DriverOption]>; Index: include/clang/Frontend/CodeGenOptions.def =================================================================== --- include/clang/Frontend/CodeGenOptions.def +++ include/clang/Frontend/CodeGenOptions.def @@ -167,6 +167,16 @@ ///< alignment, if not 0. VALUE_CODEGENOPT(StackProbeSize , 32, 4096) ///< Overrides default stack ///< probe size, even if 0. + +/// \brief Control setting of ``amdgpu_tools_insert_nops`` attribute +CODEGENOPT(AMDGPUToolsInsertNopsOpt, 1, 0) +/// \brief Control setting of ``amdgpu_tools_num_reserved_vgpr()`` +/// attribute +VALUE_CODEGENOPT(AMDGPUToolsNumReservedVGPROpt, 32, 0) +/// \brief Control setting of ``amdgpu_tools_num_reserved_sgpr()`` +/// attribute +VALUE_CODEGENOPT(AMDGPUToolsNumReservedSGPROpt, 32, 0) + CODEGENOPT(DebugColumnInfo, 1, 0) ///< Whether or not to use column information ///< in debug info. Index: lib/CodeGen/CGCall.cpp =================================================================== --- lib/CodeGen/CGCall.cpp +++ lib/CodeGen/CGCall.cpp @@ -1595,6 +1595,20 @@ } } + if (getTarget().getTriple().getArch() == llvm::Triple::amdgcn) { + // Add AMDGPU Tools attributes if needed. + if (CodeGenOpts.AMDGPUToolsInsertNopsOpt) + FuncAttrs.addAttribute("amdgpu-tools-insert-nops"); + if (CodeGenOpts.AMDGPUToolsNumReservedVGPROpt) + FuncAttrs.addAttribute( + "amdgpu-tools-num-reserved-vgpr", + llvm::utostr(CodeGenOpts.AMDGPUToolsNumReservedVGPROpt)); + if (CodeGenOpts.AMDGPUToolsNumReservedSGPROpt) + FuncAttrs.addAttribute( + "amdgpu-tools-num-reserved-sgpr", + llvm::utostr(CodeGenOpts.AMDGPUToolsNumReservedSGPROpt)); + } + if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) { // Conservatively, mark all functions and calls in CUDA as convergent // (meaning, they may call an intrinsically convergent op, such as Index: lib/CodeGen/TargetInfo.cpp =================================================================== --- lib/CodeGen/TargetInfo.cpp +++ lib/CodeGen/TargetInfo.cpp @@ -6598,6 +6598,30 @@ if (NumSGPR != 0) F->addFnAttr("amdgpu_num_sgpr", llvm::utostr(NumSGPR)); } + + if (const auto Attr = FD->getAttr()) { + llvm::Function *F = cast(GV); + if (!F->hasFnAttribute("amdgpu-tools-insert-nops")) + F->addFnAttr("amdgpu-tools-insert-nops"); + } + + if (const auto Attr = FD->getAttr()) { + llvm::Function *F = cast(GV); + unsigned NumReservedVGPR = Attr->getNumReservedVGPR(); + if (!F->hasFnAttribute("amdgpu-tools-num-reserved-vgpr") && + NumReservedVGPR != 0) + F->addFnAttr( + "amdgpu-tools-num-reserved-vgpr", llvm::utostr(NumReservedVGPR)); + } + + if (const auto Attr = FD->getAttr()) { + llvm::Function *F = cast(GV); + unsigned NumReservedSGPR = Attr->getNumReservedSGPR(); + if (!F->hasFnAttribute("amdgpu-tools-num-reserved-sgpr") && + NumReservedSGPR != 0) + F->addFnAttr( + "amdgpu-tools-num-reserved-sgpr", llvm::utostr(NumReservedSGPR)); + } } Index: lib/Driver/Tools.cpp =================================================================== --- lib/Driver/Tools.cpp +++ lib/Driver/Tools.cpp @@ -4813,6 +4813,26 @@ CmdArgs.push_back("-mstack-probe-size=0"); } + // Translate AMDGPU Tools arguments. + if (Args.hasArg(options::OPT_amdgpu_tools_insert_nops)) + CmdArgs.push_back("--amdgpu-tools-insert-nops"); + if (Args.hasArg(options::OPT_amdgpu_tools_num_reserved_vgpr)) { + StringRef Size = + Args.getLastArgValue(options::OPT_amdgpu_tools_num_reserved_vgpr); + + if (!Size.empty()) + CmdArgs.push_back( + Args.MakeArgString("--amdgpu-tools-num-reserved-vgpr=" + Size)); + } + if (Args.hasArg(options::OPT_amdgpu_tools_num_reserved_sgpr)) { + StringRef Size = + Args.getLastArgValue(options::OPT_amdgpu_tools_num_reserved_sgpr); + + if (!Size.empty()) + CmdArgs.push_back( + Args.MakeArgString("--amdgpu-tools-num-reserved-sgpr=" + Size)); + } + switch (getToolChain().getArch()) { case llvm::Triple::aarch64: case llvm::Triple::aarch64_be: Index: lib/Frontend/CompilerInvocation.cpp =================================================================== --- lib/Frontend/CompilerInvocation.cpp +++ lib/Frontend/CompilerInvocation.cpp @@ -664,6 +664,21 @@ Opts.StackProbeSize = StackProbeSize; } + // Set up AMDGPU Tools arguments. + Opts.AMDGPUToolsInsertNopsOpt = Args.hasArg(OPT_amdgpu_tools_insert_nops); + if (Arg *A = Args.getLastArg(OPT_amdgpu_tools_num_reserved_vgpr)) { + StringRef Val = A->getValue(); + unsigned AMDGPUToolsNumReservedVGPROpt = Opts.AMDGPUToolsNumReservedVGPROpt; + Val.getAsInteger(0, AMDGPUToolsNumReservedVGPROpt); + Opts.AMDGPUToolsNumReservedVGPROpt = AMDGPUToolsNumReservedVGPROpt; + } + if (Arg *A = Args.getLastArg(OPT_amdgpu_tools_num_reserved_sgpr)) { + StringRef Val = A->getValue(); + unsigned AMDGPUToolsNumReservedSGPROpt = Opts.AMDGPUToolsNumReservedSGPROpt; + Val.getAsInteger(0, AMDGPUToolsNumReservedSGPROpt); + Opts.AMDGPUToolsNumReservedSGPROpt = AMDGPUToolsNumReservedSGPROpt; + } + if (Arg *A = Args.getLastArg(OPT_fobjc_dispatch_method_EQ)) { StringRef Name = A->getValue(); unsigned Method = llvm::StringSwitch(Name) Index: lib/Sema/SemaDeclAttr.cpp =================================================================== --- lib/Sema/SemaDeclAttr.cpp +++ lib/Sema/SemaDeclAttr.cpp @@ -4703,30 +4703,18 @@ } } -static void handleAMDGPUNumVGPRAttr(Sema &S, Decl *D, - const AttributeList &Attr) { - uint32_t NumRegs; - Expr *NumRegsExpr = static_cast(Attr.getArgAsExpr(0)); - if (!checkUInt32Argument(S, Attr, NumRegsExpr, NumRegs)) - return; - - D->addAttr(::new (S.Context) - AMDGPUNumVGPRAttr(Attr.getLoc(), S.Context, - NumRegs, - Attr.getAttributeSpellingListIndex())); -} - -static void handleAMDGPUNumSGPRAttr(Sema &S, Decl *D, - const AttributeList &Attr) { - uint32_t NumRegs; - Expr *NumRegsExpr = static_cast(Attr.getArgAsExpr(0)); - if (!checkUInt32Argument(S, Attr, NumRegsExpr, NumRegs)) +template +static void handleAMDGPUUInt32Attr(Sema &S, Decl *D, + const AttributeList &Attr) { + uint32_t UInt32Arg; + Expr *UInt32ArgExpr = static_cast(Attr.getArgAsExpr(0)); + if (!checkUInt32Argument(S, Attr, UInt32ArgExpr, UInt32Arg)) return; D->addAttr(::new (S.Context) - AMDGPUNumSGPRAttr(Attr.getLoc(), S.Context, - NumRegs, - Attr.getAttributeSpellingListIndex())); + AMDGPUAttrType(Attr.getLoc(), S.Context, + UInt32Arg, + Attr.getAttributeSpellingListIndex())); } static void handleX86ForceAlignArgPointerAttr(Sema &S, Decl *D, @@ -5133,10 +5121,19 @@ handleSimpleAttribute(S, D, Attr); break; case AttributeList::AT_AMDGPUNumVGPR: - handleAMDGPUNumVGPRAttr(S, D, Attr); + handleAMDGPUUInt32Attr(S, D, Attr); break; case AttributeList::AT_AMDGPUNumSGPR: - handleAMDGPUNumSGPRAttr(S, D, Attr); + handleAMDGPUUInt32Attr(S, D, Attr); + break; + case AttributeList::AT_AMDGPUToolsInsertNops: + handleSimpleAttribute(S, D, Attr); + break; + case AttributeList::AT_AMDGPUToolsNumReservedVGPR: + handleAMDGPUUInt32Attr(S, D, Attr); + break; + case AttributeList::AT_AMDGPUToolsNumReservedSGPR: + handleAMDGPUUInt32Attr(S, D, Attr); break; case AttributeList::AT_IBAction: handleSimpleAttribute(S, D, Attr); @@ -5653,6 +5650,15 @@ Diag(D->getLocation(), diag::err_attribute_wrong_decl_type) << A << ExpectedKernelFunction; D->setInvalidDecl(); + } else if (Attr *A = D->getAttr()) { + Diag(D->getLocation(), diag::err_attribute_wrong_decl_type) + << A << ExpectedKernelFunction; + } else if (Attr *A = D->getAttr()) { + Diag(D->getLocation(), diag::err_attribute_wrong_decl_type) + << A << ExpectedKernelFunction; + } else if (Attr *A = D->getAttr()) { + Diag(D->getLocation(), diag::err_attribute_wrong_decl_type) + << A << ExpectedKernelFunction; } } } Index: test/CodeGenOpenCL/amdgpu-tools-attrs-opts-precedence.cl =================================================================== --- /dev/null +++ test/CodeGenOpenCL/amdgpu-tools-attrs-opts-precedence.cl @@ -0,0 +1,22 @@ +// RUN: %clang_cc1 --amdgpu-tools-num-reserved-vgpr=4 -triple amdgcn--amdhsa -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=PREC1 %s +// RUN: %clang_cc1 --amdgpu-tools-num-reserved-sgpr=8 -triple amdgcn--amdhsa -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=PREC2 %s +// RUN: %clang_cc1 --amdgpu-tools-num-reserved-vgpr=4 --amdgpu-tools-num-reserved-sgpr=8 -triple amdgcn--amdhsa -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=PREC3 %s + +__attribute__((amdgpu_tools_num_reserved_vgpr(8))) // expected-no-diagnostics +kernel void test_prec1() { +// PREC1: define void @test_prec1() [[ATTR_PREC1:#[0-9]+]] +} + +__attribute__((amdgpu_tools_num_reserved_sgpr(4))) // expected-no-diagnostics +kernel void test_prec2() { +// PREC2: define void @test_prec2() [[ATTR_PREC2:#[0-9]+]] +} + +__attribute__((amdgpu_tools_num_reserved_vgpr(8), amdgpu_tools_num_reserved_sgpr(4))) // expected-no-diagnostics +kernel void test_prec3() { +// PREC3: define void @test_prec3() [[ATTR_PREC3:#[0-9]+]] +} + +// PREC1-DAG: attributes [[ATTR_PREC1]] = { nounwind "amdgpu-tools-num-reserved-vgpr"="4" +// PREC2-DAG: attributes [[ATTR_PREC2]] = { nounwind "amdgpu-tools-num-reserved-sgpr"="8" +// PREC3-DAG: attributes [[ATTR_PREC3]] = { nounwind "amdgpu-tools-num-reserved-sgpr"="8" "amdgpu-tools-num-reserved-vgpr"="4" Index: test/CodeGenOpenCL/amdgpu-tools-attrs.cl =================================================================== --- /dev/null +++ test/CodeGenOpenCL/amdgpu-tools-attrs.cl @@ -0,0 +1,59 @@ +// RUN: %clang_cc1 -triple amdgcn--amdhsa -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86 %s + +__attribute__((amdgpu_tools_insert_nops)) // expected-no-diagnostics +kernel void test_tools_insert_nops() { +// CHECK: define void @test_tools_insert_nops() [[ATTR_NOP:#[0-9]+]] +} + +__attribute__((amdgpu_tools_num_reserved_vgpr(8))) // expected-no-diagnostics +kernel void test_tools_num_reserved_vgpr8() { +// CHECK: define void @test_tools_num_reserved_vgpr8() [[ATTR_VGPR8:#[0-9]+]] +} + +__attribute__((amdgpu_tools_insert_nops, amdgpu_tools_num_reserved_vgpr(8))) // expected-no-diagnostics +kernel void test_tools_insert_nops_num_reserved_vgpr8() { +// CHECK: define void @test_tools_insert_nops_num_reserved_vgpr8() [[ATTR_NOP_VGPR8:#[0-9]+]] +} + +__attribute__((amdgpu_tools_num_reserved_sgpr(4))) // expected-no-diagnostics +kernel void test_tools_num_reserved_sgpr4() { +// CHECK: define void @test_tools_num_reserved_sgpr4() [[ATTR_SGPR4:#[0-9]+]] +} + +__attribute__((amdgpu_tools_insert_nops, amdgpu_tools_num_reserved_sgpr(4))) // expected-no-diagnostics +kernel void test_tools_insert_nops_num_reserved_sgpr4() { +// CHECK: define void @test_tools_insert_nops_num_reserved_sgpr4() [[ATTR_NOP_SGPR4:#[0-9]+]] +} + +__attribute__((amdgpu_tools_num_reserved_vgpr(8), amdgpu_tools_num_reserved_sgpr(4))) // expected-no-diagnostics +kernel void test_tools_num_reserved_vgpr8_sgpr4() { +// CHECK: define void @test_tools_num_reserved_vgpr8_sgpr4() [[ATTR_VGPR8_SGPR4:#[0-9]+]] +} + +__attribute__((amdgpu_tools_insert_nops, amdgpu_tools_num_reserved_vgpr(8), amdgpu_tools_num_reserved_sgpr(4))) // expected-no-diagnostics +kernel void test_tools_insert_nops_num_reserved_vgpr8_sgpr4() { +// CHECK: define void @test_tools_insert_nops_num_reserved_vgpr8_sgpr4() [[ATTR_NOP_VGPR8_SGPR4:#[0-9]+]] +} + +__attribute__((amdgpu_tools_num_reserved_vgpr(0))) // expected-no-diagnostics +kernel void test_tools_num_reserved_vgpr0() {} + +__attribute__((amdgpu_tools_num_reserved_sgpr(0))) // expected-no-diagnostics +kernel void test_tools_num_reserved_sgpr0() {} + +__attribute__((amdgpu_tools_num_reserved_vgpr(0), amdgpu_tools_num_reserved_sgpr(0))) // expected-no-diagnostics +kernel void test_tools_num_reserved_vgpr0_sgpr0() {} + +// CHECK-DAG: attributes [[ATTR_NOP]] = { nounwind "amdgpu-tools-insert-nops" +// CHECK-DAG: attributes [[ATTR_VGPR8]] = { nounwind "amdgpu-tools-num-reserved-vgpr"="8" +// CHECK-DAG: attributes [[ATTR_NOP_VGPR8]] = { nounwind "amdgpu-tools-insert-nops" "amdgpu-tools-num-reserved-vgpr"="8" +// CHECK-DAG: attributes [[ATTR_SGPR4]] = { nounwind "amdgpu-tools-num-reserved-sgpr"="4" +// CHECK-DAG: attributes [[ATTR_NOP_SGPR4]] = { nounwind "amdgpu-tools-insert-nops" "amdgpu-tools-num-reserved-sgpr"="4" +// CHECK-DAG: attributes [[ATTR_VGPR8_SGPR4]] = { nounwind "amdgpu-tools-num-reserved-sgpr"="4" "amdgpu-tools-num-reserved-vgpr"="8" +// CHECK-DAG: attributes [[ATTR_NOP_VGPR8_SGPR4]] = { nounwind "amdgpu-tools-insert-nops" "amdgpu-tools-num-reserved-sgpr"="4" "amdgpu-tools-num-reserved-vgpr"="8" +// CHECK-NOT: "amdgpu-tools-num-reserved-vgpr"="0" +// CHECK-NOT: "amdgpu-tools-num-reserved-sgpr"="0" +// X86-NOT: "amdgpu-tools-insert-nops" +// X86-NOT: "amdgpu-tools-num-reserved-vgpr" +// X86-NOT: "amdgpu-tools-num-reserved-sgpr" Index: test/CodeGenOpenCL/amdgpu-tools-opts.cl =================================================================== --- /dev/null +++ test/CodeGenOpenCL/amdgpu-tools-opts.cl @@ -0,0 +1,60 @@ +// RUN: %clang_cc1 --amdgpu-tools-insert-nops -triple amdgcn--amdhsa -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=NOP %s +// RUN: %clang_cc1 --amdgpu-tools-num-reserved-vgpr=8 -triple amdgcn--amdhsa -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=VGPR8 %s +// RUN: %clang_cc1 --amdgpu-tools-insert-nops --amdgpu-tools-num-reserved-vgpr=8 -triple amdgcn--amdhsa -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=NOP_VGPR8 %s +// RUN: %clang_cc1 --amdgpu-tools-num-reserved-sgpr=4 -triple amdgcn--amdhsa -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=SGPR4 %s +// RUN: %clang_cc1 --amdgpu-tools-insert-nops --amdgpu-tools-num-reserved-sgpr=4 -triple amdgcn--amdhsa -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=NOP_SGPR4 %s +// RUN: %clang_cc1 --amdgpu-tools-num-reserved-vgpr=8 --amdgpu-tools-num-reserved-sgpr=4 -triple amdgcn--amdhsa -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=VGPR8_SGPR4 %s +// RUN: %clang_cc1 --amdgpu-tools-insert-nops --amdgpu-tools-num-reserved-vgpr=8 --amdgpu-tools-num-reserved-sgpr=4 -triple amdgcn--amdhsa -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=NOP_VGPR8_SGPR4 %s +// RUN: %clang_cc1 --amdgpu-tools-num-reserved-vgpr=0 -triple amdgcn--amdhsa -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=VGPR0 %s +// RUN: %clang_cc1 --amdgpu-tools-insert-nops --amdgpu-tools-num-reserved-vgpr=0 -triple amdgcn--amdhsa -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=NOP_VGPR0 %s +// RUN: %clang_cc1 --amdgpu-tools-num-reserved-sgpr=0 -triple amdgcn--amdhsa -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=SGPR0 %s +// RUN: %clang_cc1 --amdgpu-tools-insert-nops --amdgpu-tools-num-reserved-sgpr=0 -triple amdgcn--amdhsa -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=NOP_SGPR0 %s +// RUN: %clang_cc1 --amdgpu-tools-num-reserved-vgpr=0 --amdgpu-tools-num-reserved-sgpr=0 -triple amdgcn--amdhsa -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=VGPR0_SGPR0 %s +// RUN: %clang_cc1 --amdgpu-tools-insert-nops --amdgpu-tools-num-reserved-vgpr=0 --amdgpu-tools-num-reserved-sgpr=0 -triple amdgcn--amdhsa -target-cpu kaveri -O0 -emit-llvm -o - %s | FileCheck -check-prefix=NOP_VGPR0_SGPR0 %s +// RUN: %clang_cc1 --amdgpu-tools-insert-nops -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86_NOP %s +// RUN: %clang_cc1 --amdgpu-tools-num-reserved-vgpr=8 -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86_VGPR8 %s +// RUN: %clang_cc1 --amdgpu-tools-insert-nops --amdgpu-tools-num-reserved-vgpr=8 -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86_NOP_VGPR8 %s +// RUN: %clang_cc1 --amdgpu-tools-num-reserved-sgpr=4 -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86_SGPR4 %s +// RUN: %clang_cc1 --amdgpu-tools-insert-nops --amdgpu-tools-num-reserved-sgpr=4 -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86_NOP_SGPR4 %s +// RUN: %clang_cc1 --amdgpu-tools-num-reserved-vgpr=8 --amdgpu-tools-num-reserved-sgpr=4 -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86_VGPR8_SGPR4 %s +// RUN: %clang_cc1 --amdgpu-tools-insert-nops --amdgpu-tools-num-reserved-vgpr=8 --amdgpu-tools-num-reserved-sgpr=4 -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86_NOP_VGPR8_SGPR4 %s +// RUN: %clang_cc1 --amdgpu-tools-num-reserved-vgpr=0 -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86_VGPR0 %s +// RUN: %clang_cc1 --amdgpu-tools-num-reserved-sgpr=0 -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86_SGPR0 %s + +kernel void foo() { // expected-no-diagnostics +// NOP: define void @foo() [[ATTR_NOP:#[0-9]+]] +// VGPR8: define void @foo() [[ATTR_VGPR8:#[0-9]+]] +// NOP_VGPR8: define void @foo() [[ATTR_NOP_VGPR8:#[0-9]+]] +// SGPR4: define void @foo() [[ATTR_SGPR4:#[0-9]+]] +// NOP_SGPR4: define void @foo() [[ATTR_NOP_SGPR4:#[0-9]+]] +// VGPR8_SGPR4: define void @foo() [[ATTR_VGPR8_SGPR4:#[0-9]+]] +// NOP_VGPR8_SGPR4: define void @foo() [[ATTR_NOP_VGPR8_SGPR4:#[0-9]+]] +} + +kernel void another_foo() { // expected-no-diagnostics +// NOP: define void @another_foo() [[ATTR_SECOND_NOP:#[0-9]+]] +} + +// NOP-DAG: attributes [[ATTR_NOP]] = { nounwind "amdgpu-tools-insert-nops" +// NOP-DAG: attributes [[ATTR_SECOND_NOP]] = { nounwind "amdgpu-tools-insert-nops" +// VGPR8-DAG: attributes [[ATTR_VGPR8]] = { nounwind "amdgpu-tools-num-reserved-vgpr"="8" +// NOP_VGPR8-DAG: attributes [[ATTR_NOP_VGPR8]] = { nounwind "amdgpu-tools-insert-nops" "amdgpu-tools-num-reserved-vgpr"="8" +// SGPR4-DAG: attributes [[ATTR_SGPR4]] = { nounwind "amdgpu-tools-num-reserved-sgpr"="4" +// NOP_SGPR4-DAG: attributes [[ATTR_NOP_SGPR4]] = { nounwind "amdgpu-tools-insert-nops" "amdgpu-tools-num-reserved-sgpr"="4" +// VGPR8_SGPR4-DAG: attributes [[ATTR_VGPR8_SGPR4]] = { nounwind "amdgpu-tools-num-reserved-sgpr"="4" "amdgpu-tools-num-reserved-vgpr"="8" +// NOP_VGPR8_SGPR4-DAG: attributes [[ATTR_NOP_VGPR8_SGPR4]] = { nounwind "amdgpu-tools-insert-nops" "amdgpu-tools-num-reserved-sgpr"="4" "amdgpu-tools-num-reserved-vgpr"="8" +// VGPR0-NOT: "amdgpu-tools-num-reserved-vgpr"="0" +// NOP_VGPR0-NOT: "amdgpu-tools-insert-nops" "amdgpu-tools-num-reserved-vgpr"="0" +// SGPR0-NOT: "amdgpu-tools-num-reserved-sgpr"="0" +// NOP_SGPR0-NOT: "amdgpu-tools-insert-nops" "amdgpu-tools-num-reserved-sgpr"="0" +// VGPR0_SGPR0-NOT: "amdgpu-tools-num-reserved-vgpr"="0" "amdgpu-tools-num-reserved-sgpr"="0" +// NOP_VGPR0_SGPR0-NOT: "amdgpu-tools-insert-nops" "amdgpu-tools-num-reserved-sgpr"="0" "amdgpu-tools-num-reserved-vgpr"="0" +// X86_NOP-NOT: "amdgpu-tools-insert-nops" +// X86_VGPR8-NOT: "amdgpu-tools-num-reserved-vgpr" +// X86_NOP_VGPR8-NOT: "amdgpu-tools-insert-nops" "amdgpu-tools-num-reserved-vgpr"="8" +// X86_SGPR4-NOT: "amdgpu-tools-num-reserved-sgpr" +// X86_NOP_SGPR4-NOT: "amdgpu-tools-insert-nops" "amdgpu-tools-num-reserved-sgpr"="4" +// X86_VGPR8_SGPR4-NOT: "amdgpu-tools-num-reserved-sgpr"="4" "amdgpu-tools-num-reserved-vgpr"="8" +// X86_NOP_VGPR8_SGPR4-NOT: "amdgpu-tools-insert-nops" "amdgpu-tools-num-reserved-sgpr"="4" "amdgpu-tools-num-reserved-vgpr"="8" +// X86_VGPR0-NOT: "amdgpu-tools-num-reserved-vgpr" +// X86_SGPR0-NOT: "amdgpu-tools-num-reserved-sgpr" Index: test/SemaOpenCL/amdgpu-tools-attrs.cl =================================================================== --- /dev/null +++ test/SemaOpenCL/amdgpu-tools-attrs.cl @@ -0,0 +1,50 @@ +// RUN: %clang_cc1 -triple amdgcn--amdhsa -verify -fsyntax-only %s + +// Check attributes with non kernel function +__attribute__((amdgpu_tools_insert_nops)) void not_kernel0() {} // expected-error {{'amdgpu_tools_insert_nops' attribute only applies to kernel functions}} +__attribute__((amdgpu_tools_num_reserved_vgpr(8))) void not_kernel1() {} // expected-error {{'amdgpu_tools_num_reserved_vgpr' attribute only applies to kernel functions}} +__attribute__((amdgpu_tools_num_reserved_sgpr(4))) void not_kernel2() {} // expected-error {{'amdgpu_tools_num_reserved_sgpr' attribute only applies to kernel functions}} +typedef __attribute__((amdgpu_tools_insert_nops)) struct foo0_s { // expected-error {{'amdgpu_tools_insert_nops' attribute only applies to kernel functions}} + int x; + int y; +} foo0_t; +typedef __attribute__((amdgpu_tools_num_reserved_vgpr(8))) struct foo1_s { // expected-error {{'amdgpu_tools_num_reserved_vgpr' attribute only applies to kernel functions}} + int x; + int y; +} foo1_t; +typedef __attribute__((amdgpu_tools_num_reserved_sgpr(4))) struct foo2_s { // expected-error {{'amdgpu_tools_num_reserved_sgpr' attribute only applies to kernel functions}} + int x; + int y; +} foo2_t; + +// Check non integer attribute values. +__attribute__((amdgpu_tools_num_reserved_vgpr("ABC"))) kernel void foo3() {} // expected-error {{'amdgpu_tools_num_reserved_vgpr' attribute requires an integer constant}} +__attribute__((amdgpu_tools_num_reserved_sgpr("DEF"))) kernel void foo4() {} // expected-error {{'amdgpu_tools_num_reserved_sgpr' attribute requires an integer constant}} + +// Check large attribute values. +__attribute__((amdgpu_tools_num_reserved_vgpr(4294967296))) kernel void foo5() {} // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}} +__attribute__((amdgpu_tools_num_reserved_sgpr(4294967296))) kernel void foo6() {} // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}} +__attribute__((amdgpu_tools_num_reserved_vgpr(4294967296), amdgpu_tools_num_reserved_sgpr(4294967296))) kernel void foo7() {} // expected-error 2 {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}} + +// Check valid attributes. +__attribute__((amdgpu_tools_insert_nops)) kernel void foo8() {} +__attribute__((amdgpu_tools_num_reserved_vgpr(8))) kernel void foo9() {} +__attribute__((amdgpu_tools_insert_nops, amdgpu_tools_num_reserved_vgpr(8))) kernel void foo10() {} +__attribute__((amdgpu_tools_num_reserved_sgpr(4))) kernel void foo11() {} +__attribute__((amdgpu_tools_insert_nops, amdgpu_tools_num_reserved_sgpr(4))) kernel void foo12() {} +__attribute__((amdgpu_tools_num_reserved_vgpr(8), amdgpu_tools_num_reserved_sgpr(4))) kernel void foo13() {} +__attribute__((amdgpu_tools_insert_nops, amdgpu_tools_num_reserved_vgpr(8), amdgpu_tools_num_reserved_sgpr(4))) kernel void foo14() {} +// Make sure 0 VGPRs is accepted. +__attribute__((amdgpu_tools_num_reserved_vgpr(0))) kernel void foo15() {} +// Make sure 0 SGPRs is accepted. +__attribute__((amdgpu_tools_num_reserved_sgpr(0))) kernel void foo16() {} +// Make sure 0 VGPRs and 0 SGPRs is accepted. +__attribute__((amdgpu_tools_num_reserved_vgpr(0), amdgpu_tools_num_reserved_sgpr(0))) kernel void foo17() {} +// Make sure kernel keyword can go before __attribute__ keyword. +kernel __attribute__((amdgpu_tools_insert_nops)) void foo18() {} +kernel __attribute__((amdgpu_tools_num_reserved_vgpr(8))) void foo19() {} +kernel __attribute__((amdgpu_tools_insert_nops, amdgpu_tools_num_reserved_vgpr(8))) void foo20() {} +kernel __attribute__((amdgpu_tools_num_reserved_sgpr(4))) void foo21() {} +kernel __attribute__((amdgpu_tools_insert_nops, amdgpu_tools_num_reserved_sgpr(4))) void foo22() {} +kernel __attribute__((amdgpu_tools_num_reserved_vgpr(8), amdgpu_tools_num_reserved_sgpr(4))) void foo23() {} +kernel __attribute__((amdgpu_tools_insert_nops, amdgpu_tools_num_reserved_vgpr(8), amdgpu_tools_num_reserved_sgpr(4))) void foo24() {}