diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -431,9 +431,13 @@ } unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) { - NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI)); + // Even though granularity is 8, the roundup must be an even number of + // 8-granules for GFX9. + unsigned Alignment = isGFX9(*STI) ? getSGPREncodingGranule(STI) * 2 + : getSGPREncodingGranule(STI); // SGPRBlocks is actual number of SGPR blocks minus 1. - return NumSGPRs / getSGPREncodingGranule(STI) - 1; + unsigned NumSGPRBlocks = divideCeil(std::max(1u, NumSGPRs), Alignment) - 1; + return isGFX9(*STI) ? NumSGPRBlocks * 2 : NumSGPRBlocks; } unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, diff --git a/llvm/test/MC/AMDGPU/hsa-v3.s b/llvm/test/MC/AMDGPU/hsa-v3.s --- a/llvm/test/MC/AMDGPU/hsa-v3.s +++ b/llvm/test/MC/AMDGPU/hsa-v3.s @@ -8,19 +8,22 @@ // READOBJ: Section Headers // READOBJ: .text PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256 -// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 000100 {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64 +// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 000140 {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64 // READOBJ: Relocation section '.rela.rodata' at offset // READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10 // READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110 // READOBJ: 0000000000000090 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 210 // READOBJ: 00000000000000d0 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 310 +// READOBJ: 0000000000000110 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 410 // READOBJ: Symbol table '.symtab' contains {{[0-9]+}} entries: // READOBJ: {{[0-9]+}}: 0000000000000100 0 FUNC LOCAL PROTECTED 2 complete // READOBJ: {{[0-9]+}}: 0000000000000040 64 OBJECT LOCAL DEFAULT 3 complete.kd // READOBJ: {{[0-9]+}}: 0000000000000300 0 FUNC LOCAL PROTECTED 2 disabled_user_sgpr // READOBJ: {{[0-9]+}}: 00000000000000c0 64 OBJECT LOCAL DEFAULT 3 disabled_user_sgpr.kd +// READOBJ: {{[0-9]+}}: 0000000000000400 0 FUNC LOCAL PROTECTED 2 gfx9_sgpr +// READOBJ: {{[0-9]+}}: 0000000000000100 64 OBJECT LOCAL DEFAULT 3 gfx9_sgpr.kd // READOBJ: {{[0-9]+}}: 0000000000000000 0 FUNC LOCAL PROTECTED 2 minimal // READOBJ: {{[0-9]+}}: 0000000000000000 64 OBJECT LOCAL DEFAULT 3 minimal.kd // READOBJ: {{[0-9]+}}: 0000000000000200 0 FUNC LOCAL PROTECTED 2 special_sgpr @@ -37,7 +40,7 @@ // OBJDUMP-NEXT: 0040 01000000 01000000 00000000 00000000 // OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000 -// OBJDUMP-NEXT: 0070 c2500104 1f0f007f 7f000000 00000000 +// OBJDUMP-NEXT: 0070 82500104 1f0f007f 7f000000 00000000 // special_sgpr // OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000 @@ -48,6 +51,12 @@ // OBJDUMP-NEXT: 00d0 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 00e0 00000000 00000000 00000000 00000000 // OBJDUMP-NEXT: 00f0 0000ac00 80000000 00000000 00000000 +// gfx9_sgpr +// OBJDUMP-NEXT: 0100 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0110 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0120 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0130 0001ac00 80000000 00000000 00000000 + .text // ASM: .text @@ -75,6 +84,12 @@ disabled_user_sgpr: s_endpgm +.p2align 8 +.type gfx9_sgpr,@function +gfx9_sgpr: + s_endpgm + + .rodata // ASM: .rodata @@ -209,6 +224,19 @@ // ASM-NEXT: .amdhsa_next_free_sgpr 0 // ASM: .end_amdhsa_kernel +// Test GRANULATED_WAVEFRONT_SGPR_COUNT for GFX9 +.p2align 6 +.amdhsa_kernel gfx9_sgpr + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 33 +.end_amdhsa_kernel + +// ASM: .amdhsa_kernel gfx9_sgpr +// ASM: .amdhsa_next_free_vgpr 0 +// ASM-NEXT: .amdhsa_next_free_sgpr 33 +// ASM: .end_amdhsa_kernel + + .section .foo .byte .amdgcn.gfx_generation_number