diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -4451,8 +4451,10 @@ ======= ======= =============================== =========================================================================== Bits Size Field Name Description ======= ======= =============================== =========================================================================== - 3:0 4 bits SHARED_VGPR_COUNT Number of shared VGPRs for wavefront size 64. Granularity 8. Value 0-120. - compute_pgm_rsrc1.vgprs + shared_vgpr_cnt cannot exceed 64. + 3:0 4 bits SHARED_VGPR_COUNT Number of shared VGPR blocks when executing in subvector mode. For + wavefront size 64 the value is 0-15, representing 0-120 VGPRs (granularity + of 8), such that (compute_pgm_rsrc1.vgprs +1)*4 + shared_vgpr_count*8 does + not exceed 256. For wavefront size 32 shared_vgpr_count must be 0. 31:4 28 Reserved, must be 0. bits 32 **Total size 4 bytes.** @@ -12372,6 +12374,8 @@ :ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table`. ``.amdhsa_forward_progress`` 0 GFX10 Controls FWD_PROGRESS in :ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table`. + ``.amdhsa_shared_vgpr_count`` 0 GFX10 Controls SHARED_VGPR_COUNT in + :ref:`amdgpu-amdhsa-compute_pgm_rsrc3-gfx10-table`. ``.amdhsa_exception_fp_ieee_invalid_op`` 0 GFX6-GFX10 Controls ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION in :ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx10-table`. ``.amdhsa_exception_fp_denorm_src`` 0 GFX6-GFX10 Controls ENABLE_EXCEPTION_FP_DENORMAL_SOURCE in diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -4645,6 +4645,7 @@ SMRange VGPRRange; uint64_t NextFreeVGPR = 0; uint64_t AccumOffset = 0; + uint64_t SharedVGPRCount = 0; SMRange SGPRRange; uint64_t NextFreeSGPR = 0; @@ -4872,6 +4873,13 @@ return Error(IDRange.Start, "directive requires gfx10+", IDRange); PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, ValRange); + } else if (ID == ".amdhsa_shared_vgpr_count") { + if (IVersion.Major < 10) + return Error(IDRange.Start, "directive requires gfx10+", IDRange); + SharedVGPRCount = Val; + PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, + COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val, + ValRange); } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { PARSE_BITS_ENTRY( KD.compute_pgm_rsrc2, @@ -4961,6 +4969,19 @@ (AccumOffset / 4 - 1)); } + if (IVersion.Major == 10) { + // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS + if (SharedVGPRCount && EnableWavefrontSize32) { + return TokError("shared_vgpr_count directive not valid on " + "wavefront size 32"); + } + if (SharedVGPRCount * 2 + VGPRBlocks > 63) { + return TokError("shared_vgpr_count*2 + " + "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot " + "exceed 63\n"); + } + } + getTargetStreamer().EmitAmdhsaKernelDescriptor( getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, ReserveFlatScr); diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -447,6 +447,8 @@ PRINT_FIELD(OS, ".amdhsa_forward_progress", KD, compute_pgm_rsrc1, amdhsa::COMPUTE_PGM_RSRC1_FWD_PROGRESS); + PRINT_FIELD(OS, ".amdhsa_shared_vgpr_count", KD, compute_pgm_rsrc3, + amdhsa::COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT); } PRINT_FIELD( OS, ".amdhsa_exception_fp_ieee_invalid_op", KD, diff --git a/llvm/test/MC/AMDGPU/hsa-diag-v3.s b/llvm/test/MC/AMDGPU/hsa-diag-v3.s --- a/llvm/test/MC/AMDGPU/hsa-diag-v3.s +++ b/llvm/test/MC/AMDGPU/hsa-diag-v3.s @@ -225,6 +225,49 @@ .amdhsa_forward_progress 5 .end_amdhsa_kernel +// GCN-LABEL: warning: test_amdhsa_shared_vgpr_count_invalid1 +// NONGFX10: error: directive requires gfx10+ +// GFX10: error: .amdhsa_next_free_vgpr directive is required +// NONAMDHSA: error: unknown directive +.warning "test_amdhsa_shared_vgpr_count_invalid1" +.amdhsa_kernel test_amdhsa_shared_vgpr_count_invalid1 + .amdhsa_shared_vgpr_count 8 +.end_amdhsa_kernel + +// GCN-LABEL: warning: test_amdhsa_shared_vgpr_count_invalid2 +// NONGFX10: error: directive requires gfx10+ +// GFX10: error: shared_vgpr_count directive not valid on wavefront size 32 +// NONAMDHSA: error: unknown directive +.warning "test_amdhsa_shared_vgpr_count_invalid2" +.amdhsa_kernel test_amdhsa_shared_vgpr_count_invalid2 + .amdhsa_next_free_vgpr 16 + .amdhsa_next_free_sgpr 0 + .amdhsa_shared_vgpr_count 8 + .amdhsa_wavefront_size32 1 +.end_amdhsa_kernel + +// GCN-LABEL: warning: test_amdhsa_shared_vgpr_count_invalid3 +// NONGFX10: error: directive requires gfx10+ +// GFX10: error: value out of range +// NONAMDHSA: error: unknown directive +.warning "test_amdhsa_shared_vgpr_count_invalid3" +.amdhsa_kernel test_amdhsa_shared_vgpr_count_invalid3 + .amdhsa_next_free_vgpr 32 + .amdhsa_next_free_sgpr 0 + .amdhsa_shared_vgpr_count 16 +.end_amdhsa_kernel + +// GCN-LABEL: warning: test_amdhsa_shared_vgpr_count_invalid4 +// NONGFX10: error: directive requires gfx10+ +// GFX10: error: shared_vgpr_count*2 + compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot exceed 63 +// NONAMDHSA: error: unknown directive +.warning "test_amdhsa_shared_vgpr_count_invalid4" +.amdhsa_kernel test_amdhsa_shared_vgpr_count_invalid4 + .amdhsa_next_free_vgpr 273 + .amdhsa_next_free_sgpr 0 + .amdhsa_shared_vgpr_count 15 +.end_amdhsa_kernel + // GCN-LABEL: warning: test_next_free_vgpr_invalid // AMDHSA: error: .amdgcn.next_free_{v,s}gpr symbols must be absolute expressions // NONAMDHSA-NOT: error: diff --git a/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s --- a/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s +++ b/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s @@ -68,11 +68,13 @@ .amdhsa_kernel minimal .amdhsa_next_free_vgpr 0 .amdhsa_next_free_sgpr 0 + .amdhsa_shared_vgpr_count 0 .end_amdhsa_kernel // ASM: .amdhsa_kernel minimal // ASM: .amdhsa_next_free_vgpr 0 // ASM-NEXT: .amdhsa_next_free_sgpr 0 +// ASM: .amdhsa_shared_vgpr_count 0 // ASM: .end_amdhsa_kernel // Test that we can specify all available directives with non-default values. @@ -153,6 +155,7 @@ // ASM-NEXT: .amdhsa_workgroup_processor_mode 1 // ASM-NEXT: .amdhsa_memory_ordered 1 // ASM-NEXT: .amdhsa_forward_progress 1 +// ASM-NEXT: .amdhsa_shared_vgpr_count 0 // ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1 // ASM-NEXT: .amdhsa_exception_fp_denorm_src 1 // ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1