Index: lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -713,36 +713,6 @@ emitHiddenKernelArgs(Func, Offset, Args); - // TODO: What about other languages? - if (Func.getParent()->getNamedMetadata("opencl.ocl.version")) { - auto &DL = Func.getParent()->getDataLayout(); - auto Int64Ty = Type::getInt64Ty(Func.getContext()); - - emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, Args); - emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, Args); - emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, Args); - - auto Int8PtrTy = - Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS); - - // Emit "printf buffer" argument if printf is used, otherwise emit dummy - // "none" argument. - if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) - emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, Args); - else - emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args); - - // Emit "default queue" and "completion action" arguments if enqueue kernel - // is used, otherwise emit dummy "none" arguments. - if (Func.hasFnAttribute("calls-enqueue-kernel")) { - emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, Args); - emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, Args); - } else { - emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args); - emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args); - } - } - Kern[".args"] = Args; } Index: test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll =================================================================== --- test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll +++ test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll @@ -1,9 +1,9 @@ -; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s -; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=+code-object-v3 -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=+code-object-v3 -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s ; CHECK: --- -; CHECK: amdhsa.kernels: -; CHECK: - .args: +; CHECK: amdhsa.kernels: +; CHECK: - .args: ; CHECK-NEXT: - .name: a ; CHECK-NEXT: .offset: 0 ; CHECK-NEXT: .size: 1 @@ -25,18 +25,18 @@ ; CHECK-NOT: .value_kind: hidden_default_queue ; CHECK-NOT: .value_kind: hidden_completion_action ; CHECK: .language: OpenCL C -; CHECK-NEXT: .language_version: +; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 ; CHECK-NEXT: - 0 ; CHECK: .name: test_non_enqueue_kernel_caller ; CHECK: .symbol: test_non_enqueue_kernel_caller.kd -define amdgpu_kernel void @test_non_enqueue_kernel_caller(i8 %a) +define amdgpu_kernel void @test_non_enqueue_kernel_caller(i8 %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 { ret void } -; CHECK: - .args: +; CHECK: - .args: ; CHECK-NEXT: - .name: a ; CHECK-NEXT: .offset: 0 ; CHECK-NEXT: .size: 1 @@ -71,12 +71,12 @@ ; CHECK-NEXT: .value_kind: hidden_completion_action ; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C -; CHECK-NEXT: .language_version: +; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 ; CHECK-NEXT: - 0 ; CHECK: .name: test_enqueue_kernel_caller ; CHECK: .symbol: test_enqueue_kernel_caller.kd -define amdgpu_kernel void @test_enqueue_kernel_caller(i8 %a) #0 +define amdgpu_kernel void @test_enqueue_kernel_caller(i8 %a) #1 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 { ret void @@ -87,7 +87,8 @@ ; CHECK-NEXT: - 0 ; CHECK-NOT: amdhsa.printf: -attributes #0 = { "calls-enqueue-kernel" } +attributes #0 = { "amdgpu-implicitarg-num-bytes"="48" } +attributes #1 = { "calls-enqueue-kernel" "amdgpu-implicitarg-num-bytes"="48" } !1 = !{i32 0} !2 = !{!"none"} @@ -97,5 +98,4 @@ !opencl.ocl.version = !{!90} !90 = !{i32 2, i32 0} - ; PARSER: AMDGPU HSA Metadata Parser Test: PASS Index: test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll =================================================================== --- test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll +++ test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll @@ -1,9 +1,9 @@ -; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s -; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX802 --check-prefix=NOTES %s -; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s -; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s -; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s -; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -mattr=+code-object-v3 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -mattr=+code-object-v3 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX802 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+code-object-v3 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -mattr=+code-object-v3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 -mattr=+code-object-v3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+code-object-v3 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s %struct.A = type { i8, float } %opencl.image1d_t = type opaque @@ -17,8 +17,8 @@ @__test_block_invoke_kernel_runtime_handle = external addrspace(1) externally_initialized constant i8 addrspace(1)* ; CHECK: --- -; CHECK-NEXT: amdhsa.kernels: -; CHECK-NEXT: - .args: +; CHECK-NEXT: amdhsa.kernels: +; CHECK-NEXT: - .args: ; CHECK-NEXT: - .name: a ; CHECK-NEXT: .offset: 0 ; CHECK-NEXT: .size: 1 @@ -45,18 +45,18 @@ ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C -; CHECK-NEXT: .language_version: +; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 ; CHECK-NEXT: - 0 ; CHECK: .name: test_char ; CHECK: .symbol: test_char.kd -define amdgpu_kernel void @test_char(i8 %a) +define amdgpu_kernel void @test_char(i8 %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !4 { ret void } -; CHECK: - .args: +; CHECK: - .args: ; CHECK-NEXT: - .name: a ; CHECK-NEXT: .offset: 0 ; CHECK-NEXT: .size: 4 @@ -86,18 +86,18 @@ ; CHECK-NEXT: .value_kind: hidden_none ; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C -; CHECK-NEXT: .language_version: +; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 ; CHECK-NEXT: - 0 ; CHECK: .name: test_ushort2 ; CHECK: .symbol: test_ushort2.kd -define amdgpu_kernel void @test_ushort2(<2 x i16> %a) +define amdgpu_kernel void @test_ushort2(<2 x i16> %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !10 !kernel_arg_base_type !10 !kernel_arg_type_qual !4 { ret void } -; CHECK: - .args: +; CHECK: - .args: ; CHECK-NEXT: - .name: a ; CHECK-NEXT: .offset: 0 ; CHECK-NEXT: .size: 16 @@ -122,18 +122,18 @@ ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C -; CHECK-NEXT: .language_version: +; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 ; CHECK-NEXT: - 0 ; CHECK: .name: test_int3 ; CHECK: .symbol: test_int3.kd -define amdgpu_kernel void @test_int3(<3 x i32> %a) +define amdgpu_kernel void @test_int3(<3 x i32> %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !11 !kernel_arg_base_type !11 !kernel_arg_type_qual !4 { ret void } -; CHECK: - .args: +; CHECK: - .args: ; CHECK-NEXT: - .name: a ; CHECK-NEXT: .offset: 0 ; CHECK-NEXT: .size: 32 @@ -158,18 +158,18 @@ ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C -; CHECK-NEXT: .language_version: +; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 ; CHECK-NEXT: - 0 ; CHECK: .name: test_ulong4 ; CHECK: .symbol: test_ulong4.kd -define amdgpu_kernel void @test_ulong4(<4 x i64> %a) +define amdgpu_kernel void @test_ulong4(<4 x i64> %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !12 !kernel_arg_base_type !12 !kernel_arg_type_qual !4 { ret void } -; CHECK: - .args: +; CHECK: - .args: ; CHECK-NEXT: - .name: a ; CHECK-NEXT: .offset: 0 ; CHECK-NEXT: .size: 16 @@ -194,18 +194,18 @@ ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C -; CHECK-NEXT: .language_version: +; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 ; CHECK-NEXT: - 0 ; CHECK: .name: test_half8 ; CHECK: .symbol: test_half8.kd -define amdgpu_kernel void @test_half8(<8 x half> %a) +define amdgpu_kernel void @test_half8(<8 x half> %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !13 !kernel_arg_base_type !13 !kernel_arg_type_qual !4 { ret void } -; CHECK: - .args: +; CHECK: - .args: ; CHECK-NEXT: - .name: a ; CHECK-NEXT: .offset: 0 ; CHECK-NEXT: .size: 64 @@ -230,18 +230,18 @@ ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C -; CHECK-NEXT: .language_version: +; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 ; CHECK-NEXT: - 0 ; CHECK: .name: test_float16 ; CHECK: .symbol: test_float16.kd -define amdgpu_kernel void @test_float16(<16 x float> %a) +define amdgpu_kernel void @test_float16(<16 x float> %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !14 !kernel_arg_base_type !14 !kernel_arg_type_qual !4 { ret void } -; CHECK: - .args: +; CHECK: - .args: ; CHECK-NEXT: - .name: a ; CHECK-NEXT: .offset: 0 ; CHECK-NEXT: .size: 128 @@ -266,18 +266,18 @@ ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C -; CHECK-NEXT: .language_version: +; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 ; CHECK-NEXT: - 0 ; CHECK: .name: test_double16 ; CHECK: .symbol: test_double16.kd -define amdgpu_kernel void @test_double16(<16 x double> %a) +define amdgpu_kernel void @test_double16(<16 x double> %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !15 !kernel_arg_base_type !15 !kernel_arg_type_qual !4 { ret void } -; CHECK: - .args: +; CHECK: - .args: ; CHECK-NEXT: - .address_space: global ; CHECK-NEXT: .name: a ; CHECK-NEXT: .offset: 0 @@ -303,18 +303,18 @@ ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C -; CHECK-NEXT: .language_version: +; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 ; CHECK-NEXT: - 0 ; CHECK: .name: test_pointer ; CHECK: .symbol: test_pointer.kd -define amdgpu_kernel void @test_pointer(i32 addrspace(1)* %a) +define amdgpu_kernel void @test_pointer(i32 addrspace(1)* %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !4 { ret void } -; CHECK: - .args: +; CHECK: - .args: ; CHECK-NEXT: - .address_space: global ; CHECK-NEXT: .name: a ; CHECK-NEXT: .offset: 0 @@ -340,18 +340,18 @@ ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C -; CHECK-NEXT: .language_version: +; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 ; CHECK-NEXT: - 0 ; CHECK: .name: test_image ; CHECK: .symbol: test_image.kd -define amdgpu_kernel void @test_image(%opencl.image2d_t addrspace(1)* %a) +define amdgpu_kernel void @test_image(%opencl.image2d_t addrspace(1)* %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !17 !kernel_arg_base_type !17 !kernel_arg_type_qual !4 { ret void } -; CHECK: - .args: +; CHECK: - .args: ; CHECK-NEXT: - .name: a ; CHECK-NEXT: .offset: 0 ; CHECK-NEXT: .size: 4 @@ -376,18 +376,18 @@ ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C -; CHECK-NEXT: .language_version: +; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 ; CHECK-NEXT: - 0 ; CHECK: .name: test_sampler ; CHECK: .symbol: test_sampler.kd -define amdgpu_kernel void @test_sampler(i32 %a) +define amdgpu_kernel void @test_sampler(i32 %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !18 !kernel_arg_base_type !18 !kernel_arg_type_qual !4 { ret void } -; CHECK: - .args: +; CHECK: - .args: ; CHECK-NEXT: - .address_space: global ; CHECK-NEXT: .name: a ; CHECK-NEXT: .offset: 0 @@ -413,18 +413,18 @@ ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C -; CHECK-NEXT: .language_version: +; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 ; CHECK-NEXT: - 0 ; CHECK: .name: test_queue ; CHECK: .symbol: test_queue.kd -define amdgpu_kernel void @test_queue(%opencl.queue_t addrspace(1)* %a) +define amdgpu_kernel void @test_queue(%opencl.queue_t addrspace(1)* %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !19 !kernel_arg_base_type !19 !kernel_arg_type_qual !4 { ret void } -; CHECK: - .args: +; CHECK: - .args: ; CHECK-NEXT: - .address_space: private ; CHECK-NEXT: .name: a ; CHECK-NEXT: .offset: 0 @@ -450,18 +450,18 @@ ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C -; CHECK-NEXT: .language_version: +; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 ; CHECK-NEXT: - 0 ; CHECK: .name: test_struct ; CHECK: .symbol: test_struct.kd -define amdgpu_kernel void @test_struct(%struct.A addrspace(5)* byval %a) +define amdgpu_kernel void @test_struct(%struct.A addrspace(5)* byval %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !20 !kernel_arg_base_type !20 !kernel_arg_type_qual !4 { ret void } -; CHECK: - .args: +; CHECK: - .args: ; CHECK-NEXT: - .name: a ; CHECK-NEXT: .offset: 0 ; CHECK-NEXT: .size: 16 @@ -486,18 +486,18 @@ ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C -; CHECK-NEXT: .language_version: +; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 ; CHECK-NEXT: - 0 ; CHECK: .name: test_i128 ; CHECK: .symbol: test_i128.kd -define amdgpu_kernel void @test_i128(i128 %a) +define amdgpu_kernel void @test_i128(i128 %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !21 !kernel_arg_base_type !21 !kernel_arg_type_qual !4 { ret void } -; CHECK: - .args: +; CHECK: - .args: ; CHECK-NEXT: - .name: a ; CHECK-NEXT: .offset: 0 ; CHECK-NEXT: .size: 4 @@ -534,18 +534,18 @@ ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C -; CHECK-NEXT: .language_version: +; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 ; CHECK-NEXT: - 0 ; CHECK: .name: test_multi_arg ; CHECK: .symbol: test_multi_arg.kd -define amdgpu_kernel void @test_multi_arg(i32 %a, <2 x i16> %b, <3 x i8> %c) +define amdgpu_kernel void @test_multi_arg(i32 %a, <2 x i16> %b, <3 x i8> %c) #0 !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !24 !kernel_arg_base_type !24 !kernel_arg_type_qual !25 { ret void } -; CHECK: - .args: +; CHECK: - .args: ; CHECK-NEXT: - .address_space: global ; CHECK-NEXT: .name: g ; CHECK-NEXT: .offset: 0 @@ -586,20 +586,20 @@ ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C -; CHECK-NEXT: .language_version: +; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 ; CHECK-NEXT: - 0 ; CHECK: .name: test_addr_space ; CHECK: .symbol: test_addr_space.kd define amdgpu_kernel void @test_addr_space(i32 addrspace(1)* %g, i32 addrspace(4)* %c, - i32 addrspace(3)* %l) + i32 addrspace(3)* %l) #0 !kernel_arg_addr_space !50 !kernel_arg_access_qual !23 !kernel_arg_type !51 !kernel_arg_base_type !51 !kernel_arg_type_qual !25 { ret void } -; CHECK: - .args: +; CHECK: - .args: ; CHECK-NEXT: - .address_space: global ; CHECK-NEXT: .is_volatile: true ; CHECK-NEXT: .name: a @@ -643,20 +643,20 @@ ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C -; CHECK-NEXT: .language_version: +; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 ; CHECK-NEXT: - 0 ; CHECK: .name: test_type_qual ; CHECK: .symbol: test_type_qual.kd define amdgpu_kernel void @test_type_qual(i32 addrspace(1)* %a, i32 addrspace(1)* %b, - %opencl.pipe_t addrspace(1)* %c) + %opencl.pipe_t addrspace(1)* %c) #0 !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !51 !kernel_arg_base_type !51 !kernel_arg_type_qual !70 { ret void } -; CHECK: - .args: +; CHECK: - .args: ; CHECK-NEXT: - .access: read_only ; CHECK-NEXT: .address_space: global ; CHECK-NEXT: .name: ro @@ -699,20 +699,20 @@ ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C -; CHECK-NEXT: .language_version: +; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 ; CHECK-NEXT: - 0 ; CHECK: .name: test_access_qual ; CHECK: .symbol: test_access_qual.kd define amdgpu_kernel void @test_access_qual(%opencl.image1d_t addrspace(1)* %ro, %opencl.image2d_t addrspace(1)* %wo, - %opencl.image3d_t addrspace(1)* %rw) + %opencl.image3d_t addrspace(1)* %rw) #0 !kernel_arg_addr_space !60 !kernel_arg_access_qual !61 !kernel_arg_type !62 !kernel_arg_base_type !62 !kernel_arg_type_qual !25 { ret void } -; CHECK: - .args: +; CHECK: - .args: ; CHECK-NEXT: - .name: a ; CHECK-NEXT: .offset: 0 ; CHECK-NEXT: .size: 4 @@ -737,19 +737,19 @@ ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C -; CHECK-NEXT: .language_version: +; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 ; CHECK-NEXT: - 0 ; CHECK: .name: test_vec_type_hint_half ; CHECK: .symbol: test_vec_type_hint_half.kd ; CHECK: .vec_type_hint: half -define amdgpu_kernel void @test_vec_type_hint_half(i32 %a) +define amdgpu_kernel void @test_vec_type_hint_half(i32 %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !26 { ret void } -; CHECK: - .args: +; CHECK: - .args: ; CHECK-NEXT: - .name: a ; CHECK-NEXT: .offset: 0 ; CHECK-NEXT: .size: 4 @@ -774,19 +774,19 @@ ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C -; CHECK-NEXT: .language_version: +; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 ; CHECK-NEXT: - 0 ; CHECK: .name: test_vec_type_hint_float ; CHECK: .symbol: test_vec_type_hint_float.kd ; CHECK: .vec_type_hint: float -define amdgpu_kernel void @test_vec_type_hint_float(i32 %a) +define amdgpu_kernel void @test_vec_type_hint_float(i32 %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !27 { ret void } -; CHECK: - .args: +; CHECK: - .args: ; CHECK-NEXT: - .name: a ; CHECK-NEXT: .offset: 0 ; CHECK-NEXT: .size: 4 @@ -811,19 +811,19 @@ ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C -; CHECK-NEXT: .language_version: +; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 ; CHECK-NEXT: - 0 ; CHECK: .name: test_vec_type_hint_double ; CHECK: .symbol: test_vec_type_hint_double.kd ; CHECK: .vec_type_hint: double -define amdgpu_kernel void @test_vec_type_hint_double(i32 %a) +define amdgpu_kernel void @test_vec_type_hint_double(i32 %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !28 { ret void } -; CHECK: - .args: +; CHECK: - .args: ; CHECK-NEXT: - .name: a ; CHECK-NEXT: .offset: 0 ; CHECK-NEXT: .size: 4 @@ -848,19 +848,19 @@ ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C -; CHECK-NEXT: .language_version: +; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 ; CHECK-NEXT: - 0 ; CHECK: .name: test_vec_type_hint_char ; CHECK: .symbol: test_vec_type_hint_char.kd ; CHECK: .vec_type_hint: char -define amdgpu_kernel void @test_vec_type_hint_char(i32 %a) +define amdgpu_kernel void @test_vec_type_hint_char(i32 %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !29 { ret void } -; CHECK: - .args: +; CHECK: - .args: ; CHECK-NEXT: - .name: a ; CHECK-NEXT: .offset: 0 ; CHECK-NEXT: .size: 4 @@ -885,19 +885,19 @@ ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C -; CHECK-NEXT: .language_version: +; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 ; CHECK-NEXT: - 0 ; CHECK: .name: test_vec_type_hint_short ; CHECK: .symbol: test_vec_type_hint_short.kd ; CHECK: .vec_type_hint: short -define amdgpu_kernel void @test_vec_type_hint_short(i32 %a) +define amdgpu_kernel void @test_vec_type_hint_short(i32 %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !30 { ret void } -; CHECK: - .args: +; CHECK: - .args: ; CHECK-NEXT: - .name: a ; CHECK-NEXT: .offset: 0 ; CHECK-NEXT: .size: 4 @@ -922,19 +922,19 @@ ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C -; CHECK-NEXT: .language_version: +; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 ; CHECK-NEXT: - 0 ; CHECK: .name: test_vec_type_hint_long ; CHECK: .symbol: test_vec_type_hint_long.kd ; CHECK: .vec_type_hint: long -define amdgpu_kernel void @test_vec_type_hint_long(i32 %a) +define amdgpu_kernel void @test_vec_type_hint_long(i32 %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !31 { ret void } -; CHECK: - .args: +; CHECK: - .args: ; CHECK-NEXT: - .name: a ; CHECK-NEXT: .offset: 0 ; CHECK-NEXT: .size: 4 @@ -959,19 +959,19 @@ ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C -; CHECK-NEXT: .language_version: +; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 ; CHECK-NEXT: - 0 ; CHECK: .name: test_vec_type_hint_unknown ; CHECK: .symbol: test_vec_type_hint_unknown.kd ; CHECK: .vec_type_hint: unknown -define amdgpu_kernel void @test_vec_type_hint_unknown(i32 %a) +define amdgpu_kernel void @test_vec_type_hint_unknown(i32 %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !32 { ret void } -; CHECK: - .args: +; CHECK: - .args: ; CHECK-NEXT: - .name: a ; CHECK-NEXT: .offset: 0 ; CHECK-NEXT: .size: 4 @@ -996,24 +996,24 @@ ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C -; CHECK-NEXT: .language_version: +; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 ; CHECK-NEXT: - 0 ; CHECK: .name: test_reqd_wgs_vec_type_hint -; CHECK: .reqd_workgroup_size: +; CHECK: .reqd_workgroup_size: ; CHECK-NEXT: - 1 ; CHECK-NEXT: - 2 ; CHECK-NEXT: - 4 ; CHECK: .symbol: test_reqd_wgs_vec_type_hint.kd ; CHECK: .vec_type_hint: int -define amdgpu_kernel void @test_reqd_wgs_vec_type_hint(i32 %a) +define amdgpu_kernel void @test_reqd_wgs_vec_type_hint(i32 %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !5 !reqd_work_group_size !6 { ret void } -; CHECK: - .args: +; CHECK: - .args: ; CHECK-NEXT: - .name: a ; CHECK-NEXT: .offset: 0 ; CHECK-NEXT: .size: 4 @@ -1038,24 +1038,24 @@ ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C -; CHECK-NEXT: .language_version: +; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 ; CHECK-NEXT: - 0 ; CHECK: .name: test_wgs_hint_vec_type_hint ; CHECK: .symbol: test_wgs_hint_vec_type_hint.kd ; CHECK: .vec_type_hint: uint4 -; CHECK: .workgroup_size_hint: +; CHECK: .workgroup_size_hint: ; CHECK-NEXT: - 8 ; CHECK-NEXT: - 16 ; CHECK-NEXT: - 32 -define amdgpu_kernel void @test_wgs_hint_vec_type_hint(i32 %a) +define amdgpu_kernel void @test_wgs_hint_vec_type_hint(i32 %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !7 !work_group_size_hint !8 { ret void } -; CHECK: - .args: +; CHECK: - .args: ; CHECK-NEXT: - .address_space: global ; CHECK-NEXT: .name: a ; CHECK-NEXT: .offset: 0 @@ -1081,18 +1081,18 @@ ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C -; CHECK-NEXT: .language_version: +; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 ; CHECK-NEXT: - 0 ; CHECK: .name: test_arg_ptr_to_ptr ; CHECK: .symbol: test_arg_ptr_to_ptr.kd -define amdgpu_kernel void @test_arg_ptr_to_ptr(i32 addrspace(5)* addrspace(1)* %a) +define amdgpu_kernel void @test_arg_ptr_to_ptr(i32 addrspace(5)* addrspace(1)* %a) #0 !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !80 !kernel_arg_base_type !80 !kernel_arg_type_qual !4 { ret void } -; CHECK: - .args: +; CHECK: - .args: ; CHECK-NEXT: - .address_space: private ; CHECK-NEXT: .name: a ; CHECK-NEXT: .offset: 0 @@ -1118,18 +1118,18 @@ ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C -; CHECK-NEXT: .language_version: +; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 ; CHECK-NEXT: - 0 ; CHECK: .name: test_arg_struct_contains_ptr ; CHECK: .symbol: test_arg_struct_contains_ptr.kd -define amdgpu_kernel void @test_arg_struct_contains_ptr(%struct.B addrspace(5)* byval %a) +define amdgpu_kernel void @test_arg_struct_contains_ptr(%struct.B addrspace(5)* byval %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !82 !kernel_arg_base_type !82 !kernel_arg_type_qual !4 { ret void } -; CHECK: - .args: +; CHECK: - .args: ; CHECK-NEXT: - .name: a ; CHECK-NEXT: .offset: 0 ; CHECK-NEXT: .size: 16 @@ -1154,18 +1154,18 @@ ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C -; CHECK-NEXT: .language_version: +; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 ; CHECK-NEXT: - 0 ; CHECK: .name: test_arg_vector_of_ptr ; CHECK: .symbol: test_arg_vector_of_ptr.kd -define amdgpu_kernel void @test_arg_vector_of_ptr(<2 x i32 addrspace(1)*> %a) +define amdgpu_kernel void @test_arg_vector_of_ptr(<2 x i32 addrspace(1)*> %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !83 !kernel_arg_base_type !83 !kernel_arg_type_qual !4 { ret void } -; CHECK: - .args: +; CHECK: - .args: ; CHECK-NEXT: - .address_space: global ; CHECK-NEXT: .name: a ; CHECK-NEXT: .offset: 0 @@ -1191,19 +1191,19 @@ ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C -; CHECK-NEXT: .language_version: +; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 ; CHECK-NEXT: - 0 ; CHECK: .name: test_arg_unknown_builtin_type ; CHECK: .symbol: test_arg_unknown_builtin_type.kd define amdgpu_kernel void @test_arg_unknown_builtin_type( - %opencl.clk_event_t addrspace(1)* %a) + %opencl.clk_event_t addrspace(1)* %a) #0 !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !84 !kernel_arg_base_type !84 !kernel_arg_type_qual !4 { ret void } -; CHECK: - .args: +; CHECK: - .args: ; CHECK-NEXT: - .address_space: global ; CHECK-NEXT: .name: a ; CHECK-NEXT: .offset: 0 @@ -1259,25 +1259,32 @@ ; CHECK-NEXT: .type_name: 'char16 addrspace(5)*' ; CHECK-NEXT: .value_kind: dynamic_shared_pointer ; CHECK-NEXT: .value_type: i8 -; CHECK-NEXT: - .offset: 32 +; CHECK-NEXT: - .address_space: local +; CHECK-NEXT: .name: h +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .pointee_align: 1 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_kind: dynamic_shared_pointer +; CHECK-NEXT: .value_type: struct +; CHECK-NEXT: - .offset: 40 ; CHECK-NEXT: .size: 8 ; CHECK-NEXT: .value_kind: hidden_global_offset_x ; CHECK-NEXT: .value_type: i64 -; CHECK-NEXT: - .offset: 40 +; CHECK-NEXT: - .offset: 48 ; CHECK-NEXT: .size: 8 ; CHECK-NEXT: .value_kind: hidden_global_offset_y ; CHECK-NEXT: .value_type: i64 -; CHECK-NEXT: - .offset: 48 +; CHECK-NEXT: - .offset: 56 ; CHECK-NEXT: .size: 8 ; CHECK-NEXT: .value_kind: hidden_global_offset_z ; CHECK-NEXT: .value_type: i64 ; CHECK-NEXT: - .address_space: global -; CHECK-NEXT: .offset: 56 +; CHECK-NEXT: .offset: 64 ; CHECK-NEXT: .size: 8 ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C -; CHECK-NEXT: .language_version: +; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 ; CHECK-NEXT: - 0 ; CHECK: .name: test_pointee_align @@ -1288,13 +1295,112 @@ <3 x i8> addrspace(3)* %d, <4 x i8> addrspace(3)* %e, <8 x i8> addrspace(3)* %f, - <16 x i8> addrspace(3)* %g) + <16 x i8> addrspace(3)* %g, + {} addrspace(3)* %h) #0 !kernel_arg_addr_space !91 !kernel_arg_access_qual !92 !kernel_arg_type !93 !kernel_arg_base_type !93 !kernel_arg_type_qual !94 { ret void } -; CHECK: - .args: +; CHECK: - .args: +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .name: a +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .type_name: 'long addrspace(5)*' +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .address_space: local +; CHECK-NEXT: .name: b +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .pointee_align: 8 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .type_name: 'char addrspace(5)*' +; CHECK-NEXT: .value_kind: dynamic_shared_pointer +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: local +; CHECK-NEXT: .name: c +; CHECK-NEXT: .offset: 12 +; CHECK-NEXT: .pointee_align: 32 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .type_name: 'char2 addrspace(5)*' +; CHECK-NEXT: .value_kind: dynamic_shared_pointer +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: local +; CHECK-NEXT: .name: d +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .pointee_align: 64 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .type_name: 'char3 addrspace(5)*' +; CHECK-NEXT: .value_kind: dynamic_shared_pointer +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: local +; CHECK-NEXT: .name: e +; CHECK-NEXT: .offset: 20 +; CHECK-NEXT: .pointee_align: 256 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .type_name: 'char4 addrspace(5)*' +; CHECK-NEXT: .value_kind: dynamic_shared_pointer +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: local +; CHECK-NEXT: .name: f +; CHECK-NEXT: .offset: 24 +; CHECK-NEXT: .pointee_align: 128 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .type_name: 'char8 addrspace(5)*' +; CHECK-NEXT: .value_kind: dynamic_shared_pointer +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: local +; CHECK-NEXT: .name: g +; CHECK-NEXT: .offset: 28 +; CHECK-NEXT: .pointee_align: 1024 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .type_name: 'char16 addrspace(5)*' +; CHECK-NEXT: .value_kind: dynamic_shared_pointer +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: local +; CHECK-NEXT: .name: h +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .pointee_align: 16 +; CHECK-NEXT: .size: 4 +; CHECK-NEXT: .value_kind: dynamic_shared_pointer +; CHECK-NEXT: .value_type: struct +; CHECK-NEXT: - .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_global_offset_x +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_global_offset_y +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .offset: 56 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_global_offset_z +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 64 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_printf_buffer +; CHECK-NEXT: .value_type: i8 +; CHECK: .language: OpenCL C +; CHECK-NEXT: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .name: test_pointee_align_attribute +; CHECK: .symbol: test_pointee_align_attribute.kd +define amdgpu_kernel void @test_pointee_align_attribute(i64 addrspace(1)* align 16 %a, + i8 addrspace(3)* align 8 %b, + <2 x i8> addrspace(3)* align 32 %c, + <3 x i8> addrspace(3)* align 64 %d, + <4 x i8> addrspace(3)* align 256 %e, + <8 x i8> addrspace(3)* align 128 %f, + <16 x i8> addrspace(3)* align 1024 %g, + {} addrspace(3)* align 16 %h) #0 + !kernel_arg_addr_space !91 !kernel_arg_access_qual !92 !kernel_arg_type !93 + !kernel_arg_base_type !93 !kernel_arg_type_qual !94 { + ret void +} +; CHECK: - .args: ; CHECK-NEXT: - .name: arg ; CHECK-NEXT: .offset: 0 ; CHECK-NEXT: .size: 25 @@ -1320,19 +1426,19 @@ ; CHECK-NEXT: .value_type: i8 ; CHECK: .device_enqueue_symbol: __test_block_invoke_kernel_runtime_handle ; CHECK: .language: OpenCL C -; CHECK-NEXT: .language_version: +; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 ; CHECK-NEXT: - 0 ; CHECK: .name: __test_block_invoke_kernel ; CHECK: .symbol: __test_block_invoke_kernel.kd define amdgpu_kernel void @__test_block_invoke_kernel( - <{ i32, i32, i8*, i8 addrspace(1)*, i8 }> %arg) #0 + <{ i32, i32, i8*, i8 addrspace(1)*, i8 }> %arg) #1 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !110 !kernel_arg_base_type !110 !kernel_arg_type_qual !4 { ret void } -; CHECK: - .args: +; CHECK: - .args: ; CHECK-NEXT: - .name: a ; CHECK-NEXT: .offset: 0 ; CHECK-NEXT: .size: 1 @@ -1367,18 +1473,18 @@ ; CHECK-NEXT: .value_kind: hidden_completion_action ; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C -; CHECK-NEXT: .language_version: +; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 ; CHECK-NEXT: - 0 ; CHECK: .name: test_enqueue_kernel_caller ; CHECK: .symbol: test_enqueue_kernel_caller.kd -define amdgpu_kernel void @test_enqueue_kernel_caller(i8 %a) #1 +define amdgpu_kernel void @test_enqueue_kernel_caller(i8 %a) #2 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !4 { ret void } -; CHECK: - .args: +; CHECK: - .args: ; CHECK-NEXT: - .name: ptr ; CHECK-NEXT: .offset: 0 ; CHECK-NEXT: .size: 8 @@ -1397,8 +1503,9 @@ ; CHECK-NEXT: - 1 ; CHECK-NEXT: - 0 -attributes #0 = { "runtime-handle"="__test_block_invoke_kernel_runtime_handle" } -attributes #1 = { "calls-enqueue-kernel" } +attributes #0 = { "amdgpu-implicitarg-num-bytes"="48" } +attributes #1 = { "amdgpu-implicitarg-num-bytes"="48" "runtime-handle"="__test_block_invoke_kernel_runtime_handle" } +attributes #2 = { "amdgpu-implicitarg-num-bytes"="48" "calls-enqueue-kernel" } !llvm.printf.fmts = !{!100, !101} Index: test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll =================================================================== --- test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll +++ test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll @@ -1248,13 +1248,13 @@ ; CHECK-NEXT: PointeeAlign: 16 ; CHECK-NEXT: AddrSpaceQual: Local ; CHECK-NEXT: AccQual: Default -; CHECK-NEXT: - Name: h -; CHECK-NEXT: Size: 4 -; CHECK-NEXT: Align: 4 -; CHECK-NEXT: ValueKind: DynamicSharedPointer -; CHECK-NEXT: ValueType: Struct -; CHECK-NEXT: PointeeAlign: 1 -; CHECK-NEXT: AddrSpaceQual: Local +; CHECK-NEXT: - Name: h +; CHECK-NEXT: Size: 4 +; CHECK-NEXT: Align: 4 +; CHECK-NEXT: ValueKind: DynamicSharedPointer +; CHECK-NEXT: ValueType: Struct +; CHECK-NEXT: PointeeAlign: 1 +; CHECK-NEXT: AddrSpaceQual: Local ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 ; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX Index: test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll =================================================================== --- test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll +++ test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll @@ -1,10 +1,217 @@ -; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s -; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX803 --check-prefix=NOTES %s -; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -mattr=+code-object-v3 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -mattr=+code-object-v3 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX803 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+code-object-v3 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s ; CHECK: --- -; CHECK: amdhsa.kernels: -; CHECK: - .args: +; CHECK: amdhsa.kernels: + +; CHECK: - .args: +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .name: r +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .value_type: f16 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .name: a +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .value_type: f16 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .name: b +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .value_type: f16 +; CHECK: .name: test0 +; CHECK: .symbol: test0.kd +define amdgpu_kernel void @test0( + half addrspace(1)* %r, + half addrspace(1)* %a, + half addrspace(1)* %b) { +entry: + %a.val = load half, half addrspace(1)* %a + %b.val = load half, half addrspace(1)* %b + %r.val = fadd half %a.val, %b.val + store half %r.val, half addrspace(1)* %r + ret void +} + +; CHECK: - .args: +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .name: r +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .value_type: f16 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .name: a +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .value_type: f16 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .name: b +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .value_type: f16 +; CHECK-NEXT: - .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_global_offset_x +; CHECK-NEXT: .value_type: i64 +; CHECK: .name: test8 +; CHECK: .symbol: test8.kd +define amdgpu_kernel void @test8( + half addrspace(1)* %r, + half addrspace(1)* %a, + half addrspace(1)* %b) #0 { +entry: + %a.val = load half, half addrspace(1)* %a + %b.val = load half, half addrspace(1)* %b + %r.val = fadd half %a.val, %b.val + store half %r.val, half addrspace(1)* %r + ret void +} + +; CHECK: - .args: +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .name: r +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .value_type: f16 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .name: a +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .value_type: f16 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .name: b +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .value_type: f16 +; CHECK-NEXT: - .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_global_offset_x +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_global_offset_y +; CHECK-NEXT: .value_type: i64 +; CHECK: .name: test16 +; CHECK: .symbol: test16.kd +define amdgpu_kernel void @test16( + half addrspace(1)* %r, + half addrspace(1)* %a, + half addrspace(1)* %b) #1 { +entry: + %a.val = load half, half addrspace(1)* %a + %b.val = load half, half addrspace(1)* %b + %r.val = fadd half %a.val, %b.val + store half %r.val, half addrspace(1)* %r + ret void +} + +; CHECK: - .args: +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .name: r +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .value_type: f16 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .name: a +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .value_type: f16 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .name: b +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .value_type: f16 +; CHECK-NEXT: - .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_global_offset_x +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_global_offset_y +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_global_offset_z +; CHECK-NEXT: .value_type: i64 +; CHECK: .name: test24 +; CHECK: .symbol: test24.kd +define amdgpu_kernel void @test24( + half addrspace(1)* %r, + half addrspace(1)* %a, + half addrspace(1)* %b) #2 { +entry: + %a.val = load half, half addrspace(1)* %a + %b.val = load half, half addrspace(1)* %b + %r.val = fadd half %a.val, %b.val + store half %r.val, half addrspace(1)* %r + ret void +} + +; CHECK: - .args: +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .name: r +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .value_type: f16 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .name: a +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .value_type: f16 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .name: b +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .value_type: f16 +; CHECK-NEXT: - .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_global_offset_x +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_global_offset_y +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_global_offset_z +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK: .name: test32 +; CHECK: .symbol: test32.kd +define amdgpu_kernel void @test32( + half addrspace(1)* %r, + half addrspace(1)* %a, + half addrspace(1)* %b) #3 { +entry: + %a.val = load half, half addrspace(1)* %a + %b.val = load half, half addrspace(1)* %b + %r.val = fadd half %a.val, %b.val + store half %r.val, half addrspace(1)* %r + ret void +} + +; CHECK: - .args: ; CHECK-NEXT: - .address_space: global ; CHECK-NEXT: .name: r ; CHECK-NEXT: .offset: 0 @@ -50,16 +257,12 @@ ; CHECK-NEXT: .size: 8 ; CHECK-NEXT: .value_kind: hidden_none ; CHECK-NEXT: .value_type: i8 -; CHECK: .language: OpenCL C -; CHECK-NEXT: .language_version: -; CHECK-NEXT: - 2 -; CHECK-NEXT: - 0 -; CHECK: .name: test -; CHECK: .symbol: test.kd -define amdgpu_kernel void @test( +; CHECK: .name: test48 +; CHECK: .symbol: test48.kd +define amdgpu_kernel void @test48( half addrspace(1)* %r, half addrspace(1)* %a, - half addrspace(1)* %b) { + half addrspace(1)* %b) #4 { entry: %a.val = load half, half addrspace(1)* %a %b.val = load half, half addrspace(1)* %b @@ -72,5 +275,8 @@ ; CHECK-NEXT: - 1 ; CHECK-NEXT: - 0 -!opencl.ocl.version = !{!0} -!0 = !{i32 2, i32 0} +attributes #0 = { "amdgpu-implicitarg-num-bytes"="8" } +attributes #1 = { "amdgpu-implicitarg-num-bytes"="16" } +attributes #2 = { "amdgpu-implicitarg-num-bytes"="24" } +attributes #3 = { "amdgpu-implicitarg-num-bytes"="32" } +attributes #4 = { "amdgpu-implicitarg-num-bytes"="48" }