Index: llvm/trunk/include/llvm/Support/AMDHSAKernelDescriptor.h =================================================================== --- llvm/trunk/include/llvm/Support/AMDHSAKernelDescriptor.h +++ llvm/trunk/include/llvm/Support/AMDHSAKernelDescriptor.h @@ -143,6 +143,7 @@ KERNEL_CODE_PROPERTY(ENABLE_SGPR_FLAT_SCRATCH_INIT, 5, 1), KERNEL_CODE_PROPERTY(ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 6, 1), KERNEL_CODE_PROPERTY(RESERVED0, 7, 3), + KERNEL_CODE_PROPERTY(ENABLE_WAVEFRONT_SIZE32, 10, 1), // GFX10+ KERNEL_CODE_PROPERTY(RESERVED1, 11, 5), }; #undef KERNEL_CODE_PROPERTY Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -361,6 +361,10 @@ KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT; } + if (MF.getSubtarget().isWave32()) { + KernelCodeProperties |= + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32; + } return KernelCodeProperties; } @@ -1081,6 +1085,10 @@ MD->setSpiPsInputEna(MFI->getPSInputEnable()); MD->setSpiPsInputAddr(MFI->getPSInputAddr()); } + + const GCNSubtarget &STM = MF.getSubtarget(); + if (STM.isWave32()) + MD->setWave32(MF.getFunction().getCallingConv()); } // This is supposed to be log2(Size) Index: llvm/trunk/lib/Target/AMDGPU/AMDKernelCodeT.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDKernelCodeT.h +++ llvm/trunk/lib/Target/AMDGPU/AMDKernelCodeT.h @@ -126,8 +126,12 @@ AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_WIDTH = 1, AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_SHIFT, - AMD_CODE_PROPERTY_RESERVED1_SHIFT = 10, - AMD_CODE_PROPERTY_RESERVED1_WIDTH = 6, + AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT = 10, + AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_WIDTH = 1, + AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32 = ((1 << AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT, + + AMD_CODE_PROPERTY_RESERVED1_SHIFT = 11, + AMD_CODE_PROPERTY_RESERVED1_WIDTH = 5, AMD_CODE_PROPERTY_RESERVED1 = ((1 << AMD_CODE_PROPERTY_RESERVED1_WIDTH) - 1) << AMD_CODE_PROPERTY_RESERVED1_SHIFT, /// Control wave ID base counter for GDS ordered-append. Used to set Index: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -3433,6 +3433,14 @@ KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, Val, ValRange); UserSGPRCount += 1; + } else if (ID == ".amdhsa_wavefront_size32") { + if (IVersion.Major < 10) + return getParser().Error(IDRange.Start, "directive requires gfx10+", + IDRange); + EnableWavefrontSize32 = Val; + PARSE_BITS_ENTRY(KD.kernel_code_properties, + KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, + Val, ValRange); } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { PARSE_BITS_ENTRY( KD.compute_pgm_rsrc2, @@ -3680,6 +3688,30 @@ } Lex(); + if (ID == "enable_wavefront_size32") { + if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { + if (!isGFX10()) + return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); + if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) + return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); + } else { + if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) + return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); + } + } + + if (ID == "wavefront_size") { + if (Header.wavefront_size == 5) { + if (!isGFX10()) + return TokError("wavefront_size=5 is only allowed on GFX10+"); + if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) + return TokError("wavefront_size=5 requires +WavefrontSize32"); + } else if (Header.wavefront_size == 6) { + if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) + return TokError("wavefront_size=6 requires +WavefrontSize64"); + } + } + if (ID == "enable_wgp_mode") { if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10()) return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); Index: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -284,6 +284,10 @@ PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_size", KD, kernel_code_properties, amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE); + if (IVersion.Major >= 10) + PRINT_FIELD(OS, ".amdhsa_wavefront_size32", KD, + kernel_code_properties, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32); PRINT_FIELD( OS, ".amdhsa_system_sgpr_private_segment_wavefront_offset", KD, compute_pgm_rsrc2, Index: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -457,6 +457,10 @@ Header.private_segment_alignment = 4; if (Version.Major >= 10) { + if (STI->getFeatureBits().test(FeatureWavefrontSize32)) { + Header.wavefront_size = 5; + Header.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32; + } Header.compute_pgm_resource_registers |= S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) | S_00B848_MEM_ORDERED(1); @@ -480,6 +484,9 @@ AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1); if (Version.Major >= 10) { + AMDHSA_BITS_SET(KD.kernel_code_properties, + amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, + STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0); AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE, STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1); Index: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h +++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h @@ -80,6 +80,10 @@ // Set the scratch size in the metadata. void setScratchSize(unsigned CC, unsigned Val); + // Set the hardware register bit in PAL metadata to enable wave32 on the + // shader of the given calling convention. + void setWave32(unsigned CC); + // Emit the accumulated PAL metadata as asm directives. // This is called from AMDGPUTargetAsmStreamer::Finish(). void toString(std::string &S); Index: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp +++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp @@ -233,6 +233,29 @@ getHwStage(CC)[".scratch_memory_size"] = MsgPackDoc.getNode(Val); } +// Set the hardware register bit in PAL metadata to enable wave32 on the +// shader of the given calling convention. +void AMDGPUPALMetadata::setWave32(unsigned CC) { + switch (CC) { + case CallingConv::AMDGPU_HS: + setRegister(PALMD::R_A2D5_VGT_SHADER_STAGES_EN, S_028B54_HS_W32_EN(1)); + break; + case CallingConv::AMDGPU_GS: + setRegister(PALMD::R_A2D5_VGT_SHADER_STAGES_EN, S_028B54_GS_W32_EN(1)); + break; + case CallingConv::AMDGPU_VS: + setRegister(PALMD::R_A2D5_VGT_SHADER_STAGES_EN, S_028B54_VS_W32_EN(1)); + break; + case CallingConv::AMDGPU_PS: + setRegister(PALMD::R_A1B6_SPI_PS_IN_CONTROL, S_0286D8_PS_W32_EN(1)); + break; + case CallingConv::AMDGPU_CS: + setRegister(PALMD::R_2E00_COMPUTE_DISPATCH_INITIATOR, + S_00B800_CS_W32_EN(1)); + break; + } +} + // Convert a register number to name, for display by toString(). // Returns nullptr if none. static const char *getRegisterName(unsigned RegNum) { Index: llvm/trunk/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h +++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h @@ -109,6 +109,7 @@ CODEPROP(enable_sgpr_grid_workgroup_count_x, ENABLE_SGPR_GRID_WORKGROUP_COUNT_X), CODEPROP(enable_sgpr_grid_workgroup_count_y, ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y), CODEPROP(enable_sgpr_grid_workgroup_count_z, ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z), +CODEPROP(enable_wavefront_size32, ENABLE_WAVEFRONT_SIZE32), CODEPROP(enable_ordered_append_gds, ENABLE_ORDERED_APPEND_GDS), CODEPROP(private_element_size, PRIVATE_ELEMENT_SIZE), CODEPROP(is_ptr64, IS_PTR64), Index: llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll +++ llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll @@ -1,6 +1,7 @@ -; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s -; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes | FileCheck --check-prefix=CHECK --check-prefix=GFX803 --check-prefix=NOTES %s -; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -enable-misched=0 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=WAVE64 --check-prefix=NOTES %s +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -enable-misched=0 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX803 --check-prefix=WAVE64 --check-prefix=NOTES %s +; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-misched=0 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=WAVE64 --check-prefix=NOTES %s +; run: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -enable-misched=0 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX1010 --check-prefix=WAVE32 --check-prefix=NOTES %s @var = addrspace(1) global float 0.0 @@ -14,10 +15,12 @@ ; CHECK: .max_flat_workgroup_size: 256 ; CHECK: .name: test ; CHECK: .private_segment_fixed_size: 0 -; CHECK: .sgpr_count: 8 +; WAVE64: .sgpr_count: 8 +; WAVE32: .sgpr_count: 10 ; CHECK: .symbol: test.kd ; CHECK: .vgpr_count: 6 -; CHECK: .wavefront_size: 64 +; WAVE64: .wavefront_size: 64 +; WAVE32: .wavefront_size: 32 define amdgpu_kernel void @test( half addrspace(1)* %r, half addrspace(1)* %a, @@ -34,6 +37,7 @@ ; GFX700: .sgpr_spill_count: 40 ; GFX803: .sgpr_spill_count: 24 ; GFX900: .sgpr_spill_count: 24 +; GFX1010: .sgpr_spill_count: 24 ; CHECK: .symbol: num_spilled_sgprs.kd define amdgpu_kernel void @num_spilled_sgprs( i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, [8 x i32], Index: llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-wavefrontsize.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-wavefrontsize.ll +++ llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-wavefrontsize.ll @@ -0,0 +1,14 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-code-object-v3,+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefixes=GCN,GFX10-32 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-code-object-v3,-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefixes=GCN,GFX10-64 %s + +; GCN: --- +; GCN: Kernels: +; GCN: - Name: wavefrontsize +; GCN: CodeProps: +; GFX10-32: WavefrontSize: 32 +; GFX10-64: WavefrontSize: 64 +; GCN: ... +define amdgpu_kernel void @wavefrontsize() { +entry: + ret void +} Index: llvm/trunk/test/MC/AMDGPU/hsa-diag-v3.s =================================================================== --- llvm/trunk/test/MC/AMDGPU/hsa-diag-v3.s +++ llvm/trunk/test/MC/AMDGPU/hsa-diag-v3.s @@ -1,4 +1,5 @@ // RUN: not llvm-mc -mattr=+code-object-v3 -triple amdgcn-amd-amdhsa -mcpu=gfx803 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s +// RUN: not llvm-mc -mattr=+code-object-v3 -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefix=GFX10 // RUN: not llvm-mc -mattr=+code-object-v3 -triple amdgcn-amd- -mcpu=gfx803 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefix=NOT-AMDHSA .text @@ -44,6 +45,46 @@ // CHECK: error: expected .amdhsa_ directive or .end_amdhsa_kernel .end_amdhsa_kernel +.amdhsa_kernel foo + .amdhsa_wavefront_size32 1 + // CHECK: error: directive requires gfx10+ +.end_amdhsa_kernel + +.amdhsa_kernel foo + .amdhsa_workgroup_processor_mode 1 + // CHECK: error: directive requires gfx10+ +.end_amdhsa_kernel + +.amdhsa_kernel foo + .amdhsa_memory_ordered 1 + // CHECK: error: directive requires gfx10+ +.end_amdhsa_kernel + +.amdhsa_kernel foo + .amdhsa_forward_progress 1 + // CHECK: error: directive requires gfx10+ +.end_amdhsa_kernel + +.amdhsa_kernel foo + .amdhsa_wavefront_size32 5 + // GFX10: error: value out of range +.end_amdhsa_kernel + +.amdhsa_kernel foo + .amdhsa_workgroup_processor_mode 5 + // GFX10: error: value out of range +.end_amdhsa_kernel + +.amdhsa_kernel foo + .amdhsa_memory_ordered 5 + // GFX10: error: value out of range +.end_amdhsa_kernel + +.amdhsa_kernel foo + .amdhsa_forward_progress 5 + // GFX10: error: value out of range +.end_amdhsa_kernel + .set .amdgcn.next_free_vgpr, "foo" v_mov_b32_e32 v0, s0 // CHECK: error: .amdgcn.next_free_{v,s}gpr symbols must be absolute expressions Index: llvm/trunk/test/MC/AMDGPU/hsa-gfx10-v3.s =================================================================== --- llvm/trunk/test/MC/AMDGPU/hsa-gfx10-v3.s +++ llvm/trunk/test/MC/AMDGPU/hsa-gfx10-v3.s @@ -0,0 +1,223 @@ +// RUN: llvm-mc -mattr=+code-object-v3 -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s +// RUN: llvm-mc -mattr=+code-object-v3 -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack -filetype=obj < %s > %t +// RUN: llvm-readobj -elf-output-style=GNU -sections -symbols -relocations %t | FileCheck --check-prefix=READOBJ %s +// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s + +// big endian not supported +// XFAIL: powerpc-, powerpc64-, s390x, mips-, mips64-, sparc + +// READOBJ: Section Headers +// READOBJ: .text PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256 +// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 0000c0 {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64 + +// READOBJ: Relocation section '.rela.rodata' at offset +// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10 +// READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110 +// READOBJ: 0000000000000090 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 210 + +// READOBJ: Symbol table '.symtab' contains {{[0-9]+}} entries: +// READOBJ: {{[0-9]+}}: 0000000000000100 0 FUNC LOCAL PROTECTED 2 complete +// READOBJ: {{[0-9]+}}: 0000000000000040 64 OBJECT LOCAL DEFAULT 3 complete.kd +// READOBJ: {{[0-9]+}}: 0000000000000000 0 FUNC LOCAL PROTECTED 2 minimal +// READOBJ: {{[0-9]+}}: 0000000000000000 64 OBJECT LOCAL DEFAULT 3 minimal.kd +// READOBJ: {{[0-9]+}}: 0000000000000200 0 FUNC LOCAL PROTECTED 2 special_sgpr +// READOBJ: {{[0-9]+}}: 0000000000000080 64 OBJECT LOCAL DEFAULT 3 special_sgpr.kd + +// OBJDUMP: Contents of section .rodata +// Note, relocation for KERNEL_CODE_ENTRY_BYTE_OFFSET is not resolved here. +// minimal +// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0030 0000ac60 80000000 00000000 00000000 +// complete +// OBJDUMP-NEXT: 0040 01000000 01000000 00000000 00000000 +// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0070 015001e4 1f0f007f 7f040000 00000000 +// special_sgpr +// OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 00a0 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 00b0 00000060 80000000 00000000 00000000 + +.text +// ASM: .text + +.amdgcn_target "amdgcn-amd-amdhsa--gfx1010+xnack" +// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010+xnack" + +.p2align 8 +.type minimal,@function +minimal: + s_endpgm + +.p2align 8 +.type complete,@function +complete: + s_endpgm + +.p2align 8 +.type special_sgpr,@function +special_sgpr: + s_endpgm + +.rodata +// ASM: .rodata + +// Test that only specifying required directives is allowed, and that defaulted +// values are omitted. +.p2align 6 +.amdhsa_kernel minimal + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 +.end_amdhsa_kernel + +// ASM: .amdhsa_kernel minimal +// ASM: .amdhsa_next_free_vgpr 0 +// ASM-NEXT: .amdhsa_next_free_sgpr 0 +// ASM: .end_amdhsa_kernel + +// Test that we can specify all available directives with non-default values. +.p2align 6 +.amdhsa_kernel complete + .amdhsa_group_segment_fixed_size 1 + .amdhsa_private_segment_fixed_size 1 + .amdhsa_user_sgpr_private_segment_buffer 1 + .amdhsa_user_sgpr_dispatch_ptr 1 + .amdhsa_user_sgpr_queue_ptr 1 + .amdhsa_user_sgpr_kernarg_segment_ptr 1 + .amdhsa_user_sgpr_dispatch_id 1 + .amdhsa_user_sgpr_flat_scratch_init 1 + .amdhsa_user_sgpr_private_segment_size 1 + .amdhsa_wavefront_size32 1 + .amdhsa_system_sgpr_private_segment_wavefront_offset 1 + .amdhsa_system_sgpr_workgroup_id_x 0 + .amdhsa_system_sgpr_workgroup_id_y 1 + .amdhsa_system_sgpr_workgroup_id_z 1 + .amdhsa_system_sgpr_workgroup_info 1 + .amdhsa_system_vgpr_workitem_id 1 + .amdhsa_next_free_vgpr 9 + .amdhsa_next_free_sgpr 27 + .amdhsa_reserve_vcc 0 + .amdhsa_reserve_flat_scratch 0 + .amdhsa_reserve_xnack_mask 0 + .amdhsa_float_round_mode_32 1 + .amdhsa_float_round_mode_16_64 1 + .amdhsa_float_denorm_mode_32 1 + .amdhsa_float_denorm_mode_16_64 0 + .amdhsa_dx10_clamp 0 + .amdhsa_ieee_mode 0 + .amdhsa_fp16_overflow 1 + .amdhsa_workgroup_processor_mode 1 + .amdhsa_memory_ordered 1 + .amdhsa_forward_progress 1 + .amdhsa_exception_fp_ieee_invalid_op 1 + .amdhsa_exception_fp_denorm_src 1 + .amdhsa_exception_fp_ieee_div_zero 1 + .amdhsa_exception_fp_ieee_overflow 1 + .amdhsa_exception_fp_ieee_underflow 1 + .amdhsa_exception_fp_ieee_inexact 1 + .amdhsa_exception_int_div_zero 1 +.end_amdhsa_kernel + +// ASM: .amdhsa_kernel complete +// ASM-NEXT: .amdhsa_group_segment_fixed_size 1 +// ASM-NEXT: .amdhsa_private_segment_fixed_size 1 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1 +// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1 +// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1 +// ASM-NEXT: .amdhsa_wavefront_size32 1 +// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1 +// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 1 +// ASM-NEXT: .amdhsa_next_free_vgpr 9 +// ASM-NEXT: .amdhsa_next_free_sgpr 27 +// ASM-NEXT: .amdhsa_reserve_vcc 0 +// ASM-NEXT: .amdhsa_reserve_flat_scratch 0 +// ASM-NEXT: .amdhsa_reserve_xnack_mask 0 +// ASM-NEXT: .amdhsa_float_round_mode_32 1 +// ASM-NEXT: .amdhsa_float_round_mode_16_64 1 +// ASM-NEXT: .amdhsa_float_denorm_mode_32 1 +// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 0 +// ASM-NEXT: .amdhsa_dx10_clamp 0 +// ASM-NEXT: .amdhsa_ieee_mode 0 +// ASM-NEXT: .amdhsa_fp16_overflow 1 +// ASM-NEXT: .amdhsa_workgroup_processor_mode 1 +// ASM-NEXT: .amdhsa_memory_ordered 1 +// ASM-NEXT: .amdhsa_forward_progress 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1 +// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1 +// ASM-NEXT: .amdhsa_exception_int_div_zero 1 +// ASM-NEXT: .end_amdhsa_kernel + +// Test that we are including special SGPR usage in the granulated count. +.p2align 6 +.amdhsa_kernel special_sgpr + // Same next_free_sgpr as "complete", but... + .amdhsa_next_free_sgpr 27 + // ...on GFX10+ this should require an additional 6 SGPRs, pushing us from + // 3 granules to 4 + .amdhsa_reserve_flat_scratch 1 + + .amdhsa_reserve_vcc 0 + .amdhsa_reserve_xnack_mask 0 + + .amdhsa_float_denorm_mode_16_64 0 + .amdhsa_dx10_clamp 0 + .amdhsa_ieee_mode 0 + .amdhsa_next_free_vgpr 0 +.end_amdhsa_kernel + +// ASM: .amdhsa_kernel special_sgpr +// ASM: .amdhsa_next_free_vgpr 0 +// ASM-NEXT: .amdhsa_next_free_sgpr 27 +// ASM-NEXT: .amdhsa_reserve_vcc 0 +// ASM-NEXT: .amdhsa_reserve_xnack_mask 0 +// ASM: .amdhsa_float_denorm_mode_16_64 0 +// ASM-NEXT: .amdhsa_dx10_clamp 0 +// ASM-NEXT: .amdhsa_ieee_mode 0 +// ASM: .end_amdhsa_kernel + +.section .foo + +.byte .amdgcn.gfx_generation_number +// ASM: .byte 10 + +.byte .amdgcn.next_free_vgpr +// ASM: .byte 0 +.byte .amdgcn.next_free_sgpr +// ASM: .byte 0 + +v_mov_b32_e32 v7, s10 + +.byte .amdgcn.next_free_vgpr +// ASM: .byte 8 +.byte .amdgcn.next_free_sgpr +// ASM: .byte 11 + +.set .amdgcn.next_free_vgpr, 0 +.set .amdgcn.next_free_sgpr, 0 + +.byte .amdgcn.next_free_vgpr +// ASM: .byte 0 +.byte .amdgcn.next_free_sgpr +// ASM: .byte 0 + +v_mov_b32_e32 v16, s3 + +.byte .amdgcn.next_free_vgpr +// ASM: .byte 17 +.byte .amdgcn.next_free_sgpr +// ASM: .byte 4 Index: llvm/trunk/test/MC/AMDGPU/hsa-gfx10.s =================================================================== --- llvm/trunk/test/MC/AMDGPU/hsa-gfx10.s +++ llvm/trunk/test/MC/AMDGPU/hsa-gfx10.s @@ -0,0 +1,284 @@ +// RUN: llvm-mc -triple amdgcn--amdhsa -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64,-code-object-v3 -show-encoding %s | FileCheck %s --check-prefix=ASM +// RUN: llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64,-code-object-v3 -show-encoding %s | llvm-readobj -symbols -s -sd | FileCheck %s --check-prefix=ELF + +// ELF: Section { +// ELF: Name: .text +// ELF: Type: SHT_PROGBITS (0x1) +// ELF: Flags [ (0x6) +// ELF: SHF_ALLOC (0x2) +// ELF: SHF_EXECINSTR (0x4) + +// ELF: SHT_NOTE +// ELF: 0000: 04000000 08000000 01000000 414D4400 +// ELF: 0010: 02000000 00000000 04000000 1B000000 +// ELF: 0020: 03000000 414D4400 04000700 07000000 +// ELF: 0030: 00000000 00000000 414D4400 414D4447 +// ELF: 0040: 50550000 +// We can't check binary representation of metadata note: it is different on +// Windows and Linux because of carriage return on Windows + +// ELF: Symbol { +// ELF: Name: amd_kernel_code_t_minimal +// ELF: Type: AMDGPU_HSA_KERNEL (0xA) +// ELF: Section: .text +// ELF: } +// ELF: Symbol { +// ELF: Name: amd_kernel_code_t_test_all +// ELF: Type: AMDGPU_HSA_KERNEL (0xA) +// ELF: Section: .text +// ELF: } + +.text +// ASM: .text + +.hsa_code_object_version 2,0 +// ASM: .hsa_code_object_version 2,0 + +.hsa_code_object_isa 7,0,0,"AMD","AMDGPU" +// ASM: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU" + +.amd_amdgpu_hsa_metadata + Version: [ 3, 0 ] + Kernels: + - Name: amd_kernel_code_t_test_all + SymbolName: amd_kernel_code_t_test_all@kd + - Name: amd_kernel_code_t_minimal + SymbolName: amd_kernel_code_t_minimal@kd +.end_amd_amdgpu_hsa_metadata + +// ASM: .amd_amdgpu_hsa_metadata +// ASM: Version: [ 3, 0 ] +// ASM: Kernels: +// ASM: - Name: amd_kernel_code_t_test_all +// ASM: SymbolName: 'amd_kernel_code_t_test_all@kd' +// ASM: - Name: amd_kernel_code_t_minimal +// ASM: SymbolName: 'amd_kernel_code_t_minimal@kd' +// ASM: .end_amd_amdgpu_hsa_metadata + +.amdgpu_hsa_kernel amd_kernel_code_t_test_all +.amdgpu_hsa_kernel amd_kernel_code_t_minimal + +amd_kernel_code_t_test_all: +; Test all amd_kernel_code_t members with non-default values. +.amd_kernel_code_t + kernel_code_version_major = 100 + kernel_code_version_minor = 100 + machine_kind = 0 + machine_version_major = 5 + machine_version_minor = 5 + machine_version_stepping = 5 + kernel_code_entry_byte_offset = 512 + kernel_code_prefetch_byte_size = 1 + max_scratch_backing_memory_byte_size = 1 + compute_pgm_rsrc1_vgprs = 1 + compute_pgm_rsrc1_sgprs = 1 + compute_pgm_rsrc1_priority = 1 + compute_pgm_rsrc1_float_mode = 1 + compute_pgm_rsrc1_priv = 1 + compute_pgm_rsrc1_dx10_clamp = 1 + compute_pgm_rsrc1_debug_mode = 1 + compute_pgm_rsrc1_ieee_mode = 1 + compute_pgm_rsrc1_wgp_mode = 0 + compute_pgm_rsrc1_mem_ordered = 0 + compute_pgm_rsrc1_fwd_progress = 1 + compute_pgm_rsrc2_scratch_en = 1 + compute_pgm_rsrc2_user_sgpr = 1 + compute_pgm_rsrc2_tgid_x_en = 1 + compute_pgm_rsrc2_tgid_y_en = 1 + compute_pgm_rsrc2_tgid_z_en = 1 + compute_pgm_rsrc2_tg_size_en = 1 + compute_pgm_rsrc2_tidig_comp_cnt = 1 + compute_pgm_rsrc2_excp_en_msb = 1 + compute_pgm_rsrc2_lds_size = 1 + compute_pgm_rsrc2_excp_en = 1 + enable_sgpr_private_segment_buffer = 1 + enable_sgpr_dispatch_ptr = 1 + enable_sgpr_queue_ptr = 1 + enable_sgpr_kernarg_segment_ptr = 1 + enable_sgpr_dispatch_id = 1 + enable_sgpr_flat_scratch_init = 1 + enable_sgpr_private_segment_size = 1 + enable_sgpr_grid_workgroup_count_x = 1 + enable_sgpr_grid_workgroup_count_y = 1 + enable_sgpr_grid_workgroup_count_z = 1 + enable_ordered_append_gds = 1 + private_element_size = 1 + is_ptr64 = 1 + is_dynamic_callstack = 1 + is_debug_enabled = 1 + is_xnack_enabled = 1 + workitem_private_segment_byte_size = 1 + workgroup_group_segment_byte_size = 1 + gds_segment_byte_size = 1 + kernarg_segment_byte_size = 1 + workgroup_fbarrier_count = 1 + wavefront_sgpr_count = 1 + workitem_vgpr_count = 1 + reserved_vgpr_first = 1 + reserved_vgpr_count = 1 + reserved_sgpr_first = 1 + reserved_sgpr_count = 1 + debug_wavefront_private_segment_offset_sgpr = 1 + debug_private_segment_buffer_sgpr = 1 + kernarg_segment_alignment = 5 + group_segment_alignment = 5 + private_segment_alignment = 5 + wavefront_size = 6 + call_convention = 1 + runtime_loader_kernel_symbol = 1 +.end_amd_kernel_code_t + +// ASM-LABEL: {{^}}amd_kernel_code_t_test_all: +// ASM: .amd_kernel_code_t +// ASM: amd_code_version_major = 100 +// ASM: amd_code_version_minor = 100 +// ASM: amd_machine_kind = 0 +// ASM: amd_machine_version_major = 5 +// ASM: amd_machine_version_minor = 5 +// ASM: amd_machine_version_stepping = 5 +// ASM: kernel_code_entry_byte_offset = 512 +// ASM: kernel_code_prefetch_byte_size = 1 +// ASM: granulated_workitem_vgpr_count = 1 +// ASM: granulated_wavefront_sgpr_count = 1 +// ASM: priority = 1 +// ASM: float_mode = 1 +// ASM: priv = 1 +// ASM: enable_dx10_clamp = 1 +// ASM: debug_mode = 1 +// ASM: enable_ieee_mode = 1 +// ASM: enable_wgp_mode = 0 +// ASM: enable_mem_ordered = 0 +// ASM: enable_fwd_progress = 1 +// ASM: enable_sgpr_private_segment_wave_byte_offset = 1 +// ASM: user_sgpr_count = 1 +// ASM: enable_sgpr_workgroup_id_x = 1 +// ASM: enable_sgpr_workgroup_id_y = 1 +// ASM: enable_sgpr_workgroup_id_z = 1 +// ASM: enable_sgpr_workgroup_info = 1 +// ASM: enable_vgpr_workitem_id = 1 +// ASM: enable_exception_msb = 1 +// ASM: granulated_lds_size = 1 +// ASM: enable_exception = 1 +// ASM: enable_sgpr_private_segment_buffer = 1 +// ASM: enable_sgpr_dispatch_ptr = 1 +// ASM: enable_sgpr_queue_ptr = 1 +// ASM: enable_sgpr_kernarg_segment_ptr = 1 +// ASM: enable_sgpr_dispatch_id = 1 +// ASM: enable_sgpr_flat_scratch_init = 1 +// ASM: enable_sgpr_private_segment_size = 1 +// ASM: enable_sgpr_grid_workgroup_count_x = 1 +// ASM: enable_sgpr_grid_workgroup_count_y = 1 +// ASM: enable_sgpr_grid_workgroup_count_z = 1 +// ASM: enable_ordered_append_gds = 1 +// ASM: private_element_size = 1 +// ASM: is_ptr64 = 1 +// ASM: is_dynamic_callstack = 1 +// ASM: is_debug_enabled = 1 +// ASM: is_xnack_enabled = 1 +// ASM: workitem_private_segment_byte_size = 1 +// ASM: workgroup_group_segment_byte_size = 1 +// ASM: gds_segment_byte_size = 1 +// ASM: kernarg_segment_byte_size = 1 +// ASM: workgroup_fbarrier_count = 1 +// ASM: wavefront_sgpr_count = 1 +// ASM: workitem_vgpr_count = 1 +// ASM: reserved_vgpr_first = 1 +// ASM: reserved_vgpr_count = 1 +// ASM: reserved_sgpr_first = 1 +// ASM: reserved_sgpr_count = 1 +// ASM: debug_wavefront_private_segment_offset_sgpr = 1 +// ASM: debug_private_segment_buffer_sgpr = 1 +// ASM: kernarg_segment_alignment = 5 +// ASM: group_segment_alignment = 5 +// ASM: private_segment_alignment = 5 +// ASM: wavefront_size = 6 +// ASM: call_convention = 1 +// ASM: runtime_loader_kernel_symbol = 1 +// ASM: .end_amd_kernel_code_t + +amd_kernel_code_t_minimal: +.amd_kernel_code_t + enable_sgpr_kernarg_segment_ptr = 1 + is_ptr64 = 1 + granulated_workitem_vgpr_count = 1 + granulated_wavefront_sgpr_count = 1 + user_sgpr_count = 2 + kernarg_segment_byte_size = 16 + wavefront_sgpr_count = 8 +// wavefront_sgpr_count = 7 +; wavefront_sgpr_count = 7 +// Make sure a blank line won't break anything: + +// Make sure a line with whitespace won't break anything: + + workitem_vgpr_count = 16 +.end_amd_kernel_code_t + +// ASM-LABEL: {{^}}amd_kernel_code_t_minimal: +// ASM: .amd_kernel_code_t +// ASM: amd_code_version_major = 1 +// ASM: amd_code_version_minor = 2 +// ASM: amd_machine_kind = 1 +// ASM: amd_machine_version_major = 10 +// ASM: amd_machine_version_minor = 1 +// ASM: amd_machine_version_stepping = 0 +// ASM: kernel_code_entry_byte_offset = 256 +// ASM: kernel_code_prefetch_byte_size = 0 +// ASM: granulated_workitem_vgpr_count = 1 +// ASM: granulated_wavefront_sgpr_count = 1 +// ASM: priority = 0 +// ASM: float_mode = 0 +// ASM: priv = 0 +// ASM: enable_dx10_clamp = 0 +// ASM: debug_mode = 0 +// ASM: enable_ieee_mode = 0 +// ASM: enable_wgp_mode = 1 +// ASM: enable_mem_ordered = 1 +// ASM: enable_fwd_progress = 0 +// ASM: enable_sgpr_private_segment_wave_byte_offset = 0 +// ASM: user_sgpr_count = 2 +// ASM: enable_sgpr_workgroup_id_x = 0 +// ASM: enable_sgpr_workgroup_id_y = 0 +// ASM: enable_sgpr_workgroup_id_z = 0 +// ASM: enable_sgpr_workgroup_info = 0 +// ASM: enable_vgpr_workitem_id = 0 +// ASM: enable_exception_msb = 0 +// ASM: granulated_lds_size = 0 +// ASM: enable_exception = 0 +// ASM: enable_sgpr_private_segment_buffer = 0 +// ASM: enable_sgpr_dispatch_ptr = 0 +// ASM: enable_sgpr_queue_ptr = 0 +// ASM: enable_sgpr_kernarg_segment_ptr = 1 +// ASM: enable_sgpr_dispatch_id = 0 +// ASM: enable_sgpr_flat_scratch_init = 0 +// ASM: enable_sgpr_private_segment_size = 0 +// ASM: enable_sgpr_grid_workgroup_count_x = 0 +// ASM: enable_sgpr_grid_workgroup_count_y = 0 +// ASM: enable_sgpr_grid_workgroup_count_z = 0 +// ASM: enable_wavefront_size32 = 0 +// ASM: enable_ordered_append_gds = 0 +// ASM: private_element_size = 0 +// ASM: is_ptr64 = 1 +// ASM: is_dynamic_callstack = 0 +// ASM: is_debug_enabled = 0 +// ASM: is_xnack_enabled = 0 +// ASM: workitem_private_segment_byte_size = 0 +// ASM: workgroup_group_segment_byte_size = 0 +// ASM: gds_segment_byte_size = 0 +// ASM: kernarg_segment_byte_size = 16 +// ASM: workgroup_fbarrier_count = 0 +// ASM: wavefront_sgpr_count = 8 +// ASM: workitem_vgpr_count = 16 +// ASM: reserved_vgpr_first = 0 +// ASM: reserved_vgpr_count = 0 +// ASM: reserved_sgpr_first = 0 +// ASM: reserved_sgpr_count = 0 +// ASM: debug_wavefront_private_segment_offset_sgpr = 0 +// ASM: debug_private_segment_buffer_sgpr = 0 +// ASM: kernarg_segment_alignment = 4 +// ASM: group_segment_alignment = 4 +// ASM: private_segment_alignment = 4 +// ASM: wavefront_size = 6 +// ASM: call_convention = -1 +// ASM: runtime_loader_kernel_symbol = 0 +// ASM: .end_amd_kernel_code_t Index: llvm/trunk/test/MC/AMDGPU/hsa-wave-size.s =================================================================== --- llvm/trunk/test/MC/AMDGPU/hsa-wave-size.s +++ llvm/trunk/test/MC/AMDGPU/hsa-wave-size.s @@ -0,0 +1,65 @@ +// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -mattr=-code-object-v3 %s | FileCheck --check-prefixes=GCN,GFX7 %s +// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-code-object-v3,+WavefrontSize32,-WavefrontSize64 %s | FileCheck --check-prefixes=GCN,GFX10-W32 %s +// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-code-object-v3,-WavefrontSize32,+WavefrontSize64 %s | FileCheck --check-prefixes=GCN,GFX10-W64 %s + +// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -mattr=-code-object-v3 %s 2>&1 | FileCheck --check-prefixes=GCN-ERR,GFX7-ERR %s +// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-code-object-v3,+WavefrontSize32,-WavefrontSize64 %s 2>&1 | FileCheck --check-prefixes=GCN-ERR,GFX10-W32-ERR %s +// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-code-object-v3,-WavefrontSize32,+WavefrontSize64 %s 2>&1 | FileCheck --check-prefixes=GCN-ERR,GFX10-W64-ERR %s + +// GCN: test0: +// GFX7: enable_wavefront_size32 = 0 +// GFX7: wavefront_size = 6 +// GFX10-W32: enable_wavefront_size32 = 1 +// GFX10-W32: wavefront_size = 5 +// GFX10-W64: enable_wavefront_size32 = 0 +// GFX10-W64: wavefront_size = 6 +.amdgpu_hsa_kernel test0 +test0: +.amd_kernel_code_t +.end_amd_kernel_code_t + +// GCN: test1: +// GFX7: enable_wavefront_size32 = 0 +// GFX7: wavefront_size = 6 +// GFX10-W32-ERR: error: enable_wavefront_size32=0 requires +WavefrontSize64 +// GFX10-W64: enable_wavefront_size32 = 0 +// GFX10-W64: wavefront_size = 6 +.amdgpu_hsa_kernel test1 +test1: +.amd_kernel_code_t + enable_wavefront_size32 = 0 +.end_amd_kernel_code_t + +// GCN: test2: +// GFX7: enable_wavefront_size32 = 0 +// GFX7: wavefront_size = 6 +// GFX10-W32-ERR: error: wavefront_size=6 requires +WavefrontSize64 +// GFX10-W64: enable_wavefront_size32 = 0 +// GFX10-W64: wavefront_size = 6 +.amdgpu_hsa_kernel test2 +test2: +.amd_kernel_code_t + wavefront_size = 6 +.end_amd_kernel_code_t + +// GCN: test3: +// GFX7-ERR: error: enable_wavefront_size32=1 is only allowed on GFX10+ +// GFX10-W32: enable_wavefront_size32 = 1 +// GFX10-W32: wavefront_size = 5 +// GFX10-W64-ERR: error: enable_wavefront_size32=1 requires +WavefrontSize32 +.amdgpu_hsa_kernel test3 +test3: +.amd_kernel_code_t + enable_wavefront_size32 = 1 +.end_amd_kernel_code_t + +// GCN: test4: +// GFX7-ERR: error: wavefront_size=5 is only allowed on GFX10+ +// GFX10-W32: enable_wavefront_size32 = 1 +// GFX10-W32: wavefront_size = 5 +// GFX10-W64-ERR: error: wavefront_size=5 requires +WavefrontSize32 +.amdgpu_hsa_kernel test4 +test4: +.amd_kernel_code_t + wavefront_size = 5 +.end_amd_kernel_code_t Index: llvm/trunk/test/MC/AMDGPU/hsa.s =================================================================== --- llvm/trunk/test/MC/AMDGPU/hsa.s +++ llvm/trunk/test/MC/AMDGPU/hsa.s @@ -120,7 +120,7 @@ kernarg_segment_alignment = 5 group_segment_alignment = 5 private_segment_alignment = 5 - wavefront_size = 5 + wavefront_size = 6 call_convention = 1 runtime_loader_kernel_symbol = 1 .end_amd_kernel_code_t @@ -185,7 +185,7 @@ // ASM: kernarg_segment_alignment = 5 // ASM: group_segment_alignment = 5 // ASM: private_segment_alignment = 5 -// ASM: wavefront_size = 5 +// ASM: wavefront_size = 6 // ASM: call_convention = 1 // ASM: runtime_loader_kernel_symbol = 1 // ASM: .end_amd_kernel_code_t Index: llvm/trunk/test/MC/AMDGPU/hsa_isa_version_attrs.s =================================================================== --- llvm/trunk/test/MC/AMDGPU/hsa_isa_version_attrs.s +++ llvm/trunk/test/MC/AMDGPU/hsa_isa_version_attrs.s @@ -1,6 +1,8 @@ // RUN: llvm-mc -arch=amdgcn -mcpu=gfx801 -mattr=-code-object-v3,-fast-fmaf -show-encoding %s | FileCheck --check-prefix=GFX8 %s // RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -mattr=-code-object-v3,-mad-mix-insts -show-encoding %s | FileCheck --check-prefix=GFX9 %s +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-code-object-v3,-WavefrontSize32 -show-encoding %s | FileCheck --check-prefix=GFX10 %s .hsa_code_object_isa // GFX8: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU" // GFX9: .hsa_code_object_isa 9,0,0,"AMD","AMDGPU" +// GFX10: .hsa_code_object_isa 10,1,0,"AMD","AMDGPU"