diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -12193,6 +12193,8 @@ :ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table`. ``.amdhsa_forward_progress`` 0 GFX10 Controls FWD_PROGRESS in :ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table`. + ``.amdhsa_shared_vgpr_count`` 0 GFX10 Controls SHARED_VGPR_COUNT in + :ref:`amdgpu-amdhsa-compute_pgm_rsrc3-gfx10-table`. ``.amdhsa_exception_fp_ieee_invalid_op`` 0 GFX6-GFX10 Controls ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION in :ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx10-table`. ``.amdhsa_exception_fp_denorm_src`` 0 GFX6-GFX10 Controls ENABLE_EXCEPTION_FP_DENORMAL_SOURCE in diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -4566,6 +4566,7 @@ SMRange VGPRRange; uint64_t NextFreeVGPR = 0; uint64_t AccumOffset = 0; + uint64_t SharedVGPRCount = 0; SMRange SGPRRange; uint64_t NextFreeSGPR = 0; unsigned UserSGPRCount = 0; @@ -4785,6 +4786,13 @@ return Error(IDRange.Start, "directive requires gfx10+", IDRange); PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, ValRange); + } else if (ID == ".amdhsa_shared_vgpr_count") { + if (IVersion.Major < 10) + return Error(IDRange.Start, "directive requires gfx10+", IDRange); + SharedVGPRCount = Val; + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, + COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, + Val, ValRange); } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { PARSE_BITS_ENTRY( KD.compute_pgm_rsrc2, @@ -4867,6 +4875,11 @@ (AccumOffset / 4 - 1)); } + if (IVersion.Major == 10) { + if (SharedVGPRCount > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) + return TokError("shared_vgpr_count exceeds total VGPR allocation"); + } + getTargetStreamer().EmitAmdhsaKernelDescriptor( getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, ReserveFlatScr); diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -434,6 +434,9 @@ PRINT_FIELD(OS, ".amdhsa_forward_progress", KD, compute_pgm_rsrc1, amdhsa::COMPUTE_PGM_RSRC1_FWD_PROGRESS); + PRINT_FIELD(OS, ".amdhsa_shared_vgpr_count", KD, + compute_pgm_rsrc3, + amdhsa::COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT); } PRINT_FIELD( OS, ".amdhsa_exception_fp_ieee_invalid_op", KD, diff --git a/llvm/test/MC/AMDGPU/hsa-diag-v3.s b/llvm/test/MC/AMDGPU/hsa-diag-v3.s --- a/llvm/test/MC/AMDGPU/hsa-diag-v3.s +++ b/llvm/test/MC/AMDGPU/hsa-diag-v3.s @@ -225,6 +225,26 @@ .amdhsa_forward_progress 5 .end_amdhsa_kernel +// GCN-LABEL: warning: test_amdhsa_shared_vgpr_count +// NONGFX10: error: directive requires gfx10+ +// GFX10: error: .amdhsa_next_free_vgpr directive is required +// NONAMDHSA: error: unknown directive +.warning "test_amdhsa_shared_vgpr_count" +.amdhsa_kernel test_amdhsa_shared_vgpr_count + .amdhsa_shared_vgpr_count 8 +.end_amdhsa_kernel + +// GCN-LABEL: warning: test_amdhsa_shared_vgpr_count_invalid +// NONGFX10: error: directive requires gfx10+ +// GFX10: error: shared_vgpr_count exceeds total VGPR allocation +// NONAMDHSA: error: unknown directive +.warning "test_amdhsa_shared_vgpr_count_invalid" +.amdhsa_kernel test_amdhsa_shared_vgpr_count_invalid + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 + .amdhsa_shared_vgpr_count 8 +.end_amdhsa_kernel + // GCN-LABEL: warning: test_next_free_vgpr_invalid // AMDHSA: error: .amdgcn.next_free_{v,s}gpr symbols must be absolute expressions // NONAMDHSA-NOT: error: diff --git a/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s --- a/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s +++ b/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s @@ -68,11 +68,13 @@ .amdhsa_kernel minimal .amdhsa_next_free_vgpr 0 .amdhsa_next_free_sgpr 0 + .amdhsa_shared_vgpr_count 0 .end_amdhsa_kernel // ASM: .amdhsa_kernel minimal // ASM: .amdhsa_next_free_vgpr 0 // ASM-NEXT: .amdhsa_next_free_sgpr 0 +// ASM: .amdhsa_shared_vgpr_count 0 // ASM: .end_amdhsa_kernel // Test that we can specify all available directives with non-default values. @@ -152,6 +154,7 @@ // ASM-NEXT: .amdhsa_workgroup_processor_mode 1 // ASM-NEXT: .amdhsa_memory_ordered 1 // ASM-NEXT: .amdhsa_forward_progress 1 +// ASM-NEXT: .amdhsa_shared_vgpr_count 0 // ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1 // ASM-NEXT: .amdhsa_exception_fp_denorm_src 1 // ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1