Index: llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -779,8 +779,39 @@ ProgInfo.SGPRBlocks = IsaInfo::getNumSGPRBlocks( &STM, ProgInfo.NumSGPRsForWavesPerEU); + if (auto OverrideSGPRBlocks = AMDGPU::overrideSGPRBlocks()) { + if (!isUInt( + *OverrideSGPRBlocks)) { + OutContext.reportError({}, "requested override of sgpr blocks (" + + Twine(*OverrideSGPRBlocks) + ") is out " \ + "of range"); + } else if (ProgInfo.SGPRBlocks > *OverrideSGPRBlocks) { + OutContext.reportError({}, "requested override of sgpr blocks (" + + Twine(*OverrideSGPRBlocks) + ") is smaller " \ + "than the required number of sgpr blocks (" + + Twine(ProgInfo.SGPRBlocks) + ")"); + } else { + ProgInfo.SGPRBlocks = *OverrideSGPRBlocks; + } + } + ProgInfo.VGPRBlocks = IsaInfo::getNumVGPRBlocks( &STM, ProgInfo.NumVGPRsForWavesPerEU); + if (auto OverrideVGPRBlocks = AMDGPU::overrideVGPRBlocks()) { + if (!isUInt( + *OverrideVGPRBlocks)) { + OutContext.reportError({}, "requested override of vgpr blocks (" + + Twine(*OverrideVGPRBlocks) + ") is out " \ + "of range"); + } else if (ProgInfo.VGPRBlocks > *OverrideVGPRBlocks) { + OutContext.reportError({}, "requested override of vgpr blocks (" + + Twine(*OverrideVGPRBlocks) + ") is smaller " \ + "than the required number of vgpr blocks (" + + Twine(ProgInfo.VGPRBlocks) + ")"); + } else { + ProgInfo.VGPRBlocks = *OverrideVGPRBlocks; + } + } const SIModeRegisterDefaults Mode = MFI->getMode(); Index: llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -4811,6 +4811,25 @@ SGPRBlocks)) return true; + if (auto OverrideVGPRBlocks = AMDGPU::overrideVGPRBlocks()) { + if (VGPRBlocks > *OverrideVGPRBlocks) { + return TokError("requested override of vgpr blocks (" + + Twine(*OverrideVGPRBlocks) + ") is smaller " \ + "than the required number of vgpr blocks (" + + Twine(VGPRBlocks) + ")"); + } + VGPRBlocks = *OverrideVGPRBlocks; + } + if (auto OverrideSGPRBlocks = AMDGPU::overrideSGPRBlocks()) { + if (SGPRBlocks > *OverrideSGPRBlocks) { + return TokError("requested override of sgpr blocks (" + + Twine(*OverrideSGPRBlocks) + ") is smaller " \ + "than the required number of sgpr blocks (" + + Twine(SGPRBlocks) + ")"); + } + SGPRBlocks = *OverrideSGPRBlocks; + } + if (!isUInt( VGPRBlocks)) return OutOfRangeError(VGPRRange); Index: llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -51,6 +51,14 @@ /// false otherwise. bool isHsaAbiVersion3Or4(const MCSubtargetInfo *STI); +/// \returns Requested override of VGPR blocks, which can be directly written +/// into COMPUTE_PGM_RSRC1.VGPRS. None if override is not requested. +Optional overrideVGPRBlocks(); + +/// \returns Requested override of SGPR blocks, which can be directly written +/// into COMPUTE_PGM_RSRC1.SGPRS. None if override is not requested. +Optional overrideSGPRBlocks(); + struct GcnBufferFormatInfo { unsigned Format; unsigned BitsPerComp; Index: llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -33,6 +33,16 @@ llvm::cl::desc("AMDHSA Code Object Version"), llvm::cl::init(4), llvm::cl::ZeroOrMore); +static llvm::cl::opt AmdgpuOverrideVGPRBlocks( + "amdgpu-override-vgpr-blocks", llvm::cl::Hidden, + llvm::cl::desc("Override VGPR blocks in COMPUTE_PGM_RSRC1.VGPRS"), + llvm::cl::init(std::numeric_limits::max()), llvm::cl::ZeroOrMore); + +static llvm::cl::opt AmdgpuOverrideSGPRBlocks( + "amdgpu-override-sgpr-blocks", llvm::cl::Hidden, + llvm::cl::desc("Override SGPR blocks in COMPUTE_PGM_RSRC1.SGPRS"), + llvm::cl::init(std::numeric_limits::max()), llvm::cl::ZeroOrMore); + namespace { /// \returns Bit mask for given bit \p Shift and bit \p Width. @@ -127,6 +137,20 @@ return isHsaAbiVersion3(STI) || isHsaAbiVersion4(STI); } +Optional overrideVGPRBlocks() { + unsigned VGPRBlocks = AmdgpuOverrideVGPRBlocks; + if (AmdgpuOverrideVGPRBlocks.getNumOccurrences()) + return VGPRBlocks; + return None; +} + +Optional overrideSGPRBlocks() { + unsigned SGPRBlocks = AmdgpuOverrideSGPRBlocks; + if (AmdgpuOverrideSGPRBlocks.getNumOccurrences()) + return SGPRBlocks; + return None; +} + #define GET_MIMGBaseOpcodesTable_IMPL #define GET_MIMGDimInfoTable_IMPL #define GET_MIMGInfoTable_IMPL Index: llvm/test/CodeGen/AMDGPU/override-svgpr-blocks-err.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/override-svgpr-blocks-err.ll @@ -0,0 +1,51 @@ +; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-override-sgpr-blocks=0 -amdgpu-override-vgpr-blocks=0 < %s 2>&1 | FileCheck --check-prefix=ERR %s + +; ERR: error: requested override of sgpr blocks (0) is smaller than the required number of sgpr blocks (1) +; ERR: error: requested override of vgpr blocks (0) is smaller than the required number of vgpr blocks (1) + +@var = addrspace(1) global float 0.0 + +define amdgpu_kernel void @test1() { + ; 1 SGPR Block. + %one = load volatile i32, i32 addrspace(4)* undef + %two = load volatile i32, i32 addrspace(4)* undef + %three = load volatile i32, i32 addrspace(4)* undef + %four = load volatile i32, i32 addrspace(4)* undef + %five = load volatile i32, i32 addrspace(4)* undef + %six = load volatile i32, i32 addrspace(4)* undef + %seven = load volatile i32, i32 addrspace(4)* undef + %eight = load volatile i32, i32 addrspace(4)* undef + %nine = load volatile i32, i32 addrspace(4)* undef + %ten = load volatile i32, i32 addrspace(4)* undef + %eleven = load volatile i32, i32 addrspace(4)* undef + call void asm sideeffect "", "s,s,s,s,s,s,s,s,s,s"(i32 %one, i32 %two, i32 %three, i32 %four, i32 %five, i32 %six, i32 %seven, i32 %eight, i32 %nine, i32 %ten) + store volatile i32 %one, i32 addrspace(1)* undef + store volatile i32 %two, i32 addrspace(1)* undef + store volatile i32 %three, i32 addrspace(1)* undef + store volatile i32 %four, i32 addrspace(1)* undef + store volatile i32 %five, i32 addrspace(1)* undef + store volatile i32 %six, i32 addrspace(1)* undef + store volatile i32 %seven, i32 addrspace(1)* undef + store volatile i32 %eight, i32 addrspace(1)* undef + store volatile i32 %nine, i32 addrspace(1)* undef + store volatile i32 %ten, i32 addrspace(1)* undef + store volatile i32 %eleven, i32 addrspace(1)* undef + + ; 1 VGPR Block. + %val0 = load volatile float, float addrspace(1)* @var + %val1 = load volatile float, float addrspace(1)* @var + %val2 = load volatile float, float addrspace(1)* @var + %val3 = load volatile float, float addrspace(1)* @var + %val4 = load volatile float, float addrspace(1)* @var + %val5 = load volatile float, float addrspace(1)* @var + %val6 = load volatile float, float addrspace(1)* @var + store volatile float %val0, float addrspace(1)* @var + store volatile float %val1, float addrspace(1)* @var + store volatile float %val2, float addrspace(1)* @var + store volatile float %val3, float addrspace(1)* @var + store volatile float %val4, float addrspace(1)* @var + store volatile float %val5, float addrspace(1)* @var + store volatile float %val6, float addrspace(1)* @var + + ret void +} Index: llvm/test/CodeGen/AMDGPU/override-svgpr-blocks.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/override-svgpr-blocks.ll @@ -0,0 +1,45 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN -check-prefix=NO-OVERRIDE-ASM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj < %s | llvm-objdump -s -j .rodata - | FileCheck -check-prefix=NO-OVERRIDE-OBJ %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-override-sgpr-blocks=1 -amdgpu-override-vgpr-blocks=3 < %s | FileCheck -check-prefix=GCN -check-prefix=OVERRIDE-ASM %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -amdgpu-override-sgpr-blocks=1 -amdgpu-override-vgpr-blocks=3 < %s | llvm-objdump -s -j .rodata - | FileCheck -check-prefix=OVERRIDE-OBJ %s +; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-override-sgpr-blocks=16 < %s 2>&1 | FileCheck --check-prefix=ERR-SGPRS-OUT-OF-RANGE %s +; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-override-vgpr-blocks=64 < %s 2>&1 | FileCheck --check-prefix=ERR-VGPRS-OUT-OF-RANGE %s + +; ERR-SGPRS-OUT-OF-RANGE: error: requested override of sgpr blocks (16) is out of range +; ERR-VGPRS-OUT-OF-RANGE: error: requested override of vgpr blocks (64) is out of range + +; NO-OVERRIDE-OBJ: 0030 0000af00 88000000 01000000 00000000 +; NO-OVERRIDE-OBJ: 0070 0000af00 8c000000 09000000 00000000 +; OVERRIDE-OBJ: 0030 4300af00 88000000 01000000 00000000 +; OVERRIDE-OBJ: 0070 4300af00 8c000000 09000000 00000000 + +; GCN-LABEL: {{^}}test1 +; GCN: NumSgprs: 0 +; GCN: NumVgprs: 0 +; NO-OVERRIDE-ASM: SGPRBlocks: 0 +; NO-OVERRIDE-ASM: VGPRBlocks: 0 +; OVERRIDE-ASM: SGPRBlocks: 1 +; OVERRIDE-ASM: VGPRBlocks: 3 +define amdgpu_kernel void @test1() { +entry: + ret void +} + +; GCN-LABEL: {{^}}test2 +; GCN: NumSgprs: 8 +; GCN: NumVgprs: 3 +; NO-OVERRIDE-ASM: SGPRBlocks: 0 +; NO-OVERRIDE-ASM: VGPRBlocks: 0 +; OVERRIDE-ASM: SGPRBlocks: 1 +; OVERRIDE-ASM: VGPRBlocks: 3 +define amdgpu_kernel void @test2( + half addrspace(1)* %r, + half addrspace(1)* %a, + half addrspace(1)* %b) { +entry: + %a.val = load volatile half, half addrspace(1)* %a + %b.val = load volatile half, half addrspace(1)* %b + %r.val = fadd half %a.val, %b.val + store half %r.val, half addrspace(1)* %r + ret void +} Index: llvm/test/MC/AMDGPU/override-svgpr-blocks-err.s =================================================================== --- /dev/null +++ llvm/test/MC/AMDGPU/override-svgpr-blocks-err.s @@ -0,0 +1,29 @@ +// RUN: not llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -amdgpu-override-sgpr-blocks=1 -amdgpu-override-vgpr-blocks=1 < %s 2>&1 | FileCheck -check-prefix=ERR %s + +// ERR: error: requested override of vgpr blocks (1) is smaller than the required number of vgpr blocks (3) +// ERR: error: requested override of sgpr blocks (1) is smaller than the required number of sgpr blocks (2) + +.text +.p2align 8 +.type test1,@function +test1: + s_endpgm + +.p2align 8 +.type test2,@function +test1: + s_endpgm + +.rodata + +.p2align 6 +.amdhsa_kernel test1 + .amdhsa_next_free_vgpr 16 + .amdhsa_next_free_sgpr 0 +.end_amdhsa_kernel + +.p2align 6 +.amdhsa_kernel test2 + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 16 +.end_amdhsa_kernel Index: llvm/test/MC/AMDGPU/override-svgpr-blocks.s =================================================================== --- /dev/null +++ llvm/test/MC/AMDGPU/override-svgpr-blocks.s @@ -0,0 +1,32 @@ +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj < %s | llvm-objdump -s -j .rodata - | FileCheck -check-prefix=NO-OVERRIDE %s +// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -amdgpu-override-sgpr-blocks=5 -amdgpu-override-vgpr-blocks=7 < %s | llvm-objdump -s -j .rodata - | FileCheck -check-prefix=OVERRIDE %s + +// NO-OVERRIDE: 0030 0000ac00 80000000 00000000 00000000 +// NO-OVERRIDE: 0070 4000ac00 80000000 00000000 00000000 +// OVERRIDE: 0030 4701ac00 80000000 00000000 00000000 +// OVERRIDE: 0070 4701ac00 80000000 00000000 00000000 + +.text +.p2align 8 +.type test1,@function +test1: + s_endpgm + +.p2align 8 +.type test2,@function +test2: + s_endpgm + +.rodata + +.p2align 6 +.amdhsa_kernel test1 + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 +.end_amdhsa_kernel + +.p2align 6 +.amdhsa_kernel test2 + .amdhsa_next_free_vgpr 1 + .amdhsa_next_free_sgpr 3 +.end_amdhsa_kernel