Index: docs/AMDGPUUsage.rst =================================================================== --- docs/AMDGPUUsage.rst +++ docs/AMDGPUUsage.rst @@ -1391,6 +1391,11 @@ the ancestor tree for determining when the parent kernel has finished. + "HiddenMultiGridSyncArg" + A global address space pointer for + multi-grid synchronization is + passed in the kernarg. + "ValueType" string Required Kernel argument value type. Only present if "ValueKind" is "ByValue". For vector data @@ -1880,6 +1885,11 @@ the ancestor tree for determining when the parent kernel has finished. + "hidden_multigrid_sync_arg" + A global address space pointer for + multi-grid synchronization is + passed in the kernarg. + ".value_type" string Required Kernel argument value type. Only present if ".value_kind" is "by_value". For vector data @@ -5696,6 +5706,8 @@ enqueue_kernel. 6 8 8 OpenCL address of AqlWrap struct used by enqueue_kernel. + 7 8 8 Pointer argument used for Multi-gird + synchronization. ======== ==== ========= =========================================== .. _amdgpu-hcc: Index: include/llvm/Support/AMDGPUMetadata.h =================================================================== --- include/llvm/Support/AMDGPUMetadata.h +++ include/llvm/Support/AMDGPUMetadata.h @@ -74,6 +74,7 @@ HiddenPrintfBuffer = 11, HiddenDefaultQueue = 12, HiddenCompletionAction = 13, + HiddenMultiGridSyncArg = 14, Unknown = 0xff }; Index: lib/BinaryFormat/AMDGPUMetadataVerifier.cpp =================================================================== --- lib/BinaryFormat/AMDGPUMetadataVerifier.cpp +++ lib/BinaryFormat/AMDGPUMetadataVerifier.cpp @@ -121,6 +121,7 @@ .Case("hidden_printf_buffer", true) .Case("hidden_default_queue", true) .Case("hidden_completion_action", true) + .Case("hidden_multigrid_sync_arg", true) .Default(false); })) return false; Index: lib/Support/AMDGPUMetadata.cpp =================================================================== --- lib/Support/AMDGPUMetadata.cpp +++ lib/Support/AMDGPUMetadata.cpp @@ -65,6 +65,8 @@ YIO.enumCase(EN, "HiddenDefaultQueue", ValueKind::HiddenDefaultQueue); YIO.enumCase(EN, "HiddenCompletionAction", ValueKind::HiddenCompletionAction); + YIO.enumCase(EN, "HiddenMultiGridSyncArg", + ValueKind::HiddenMultiGridSyncArg); } }; Index: lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -435,6 +435,10 @@ emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone); } } + + // Emit the pointer argument for multi-grid object. + if (HiddenArgNumBytes >= 56) + emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenMultiGridSyncArg); } bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) { @@ -864,6 +868,10 @@ emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args); } } + + // Emit the pointer argument for multi-grid object. + if (HiddenArgNumBytes >= 56) + emitKernelArg(DL, Int8PtrTy, "hidden_multigrid_sync_arg", Offset, Args); } msgpack::MapDocNode Index: test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll =================================================================== --- test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll +++ test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll @@ -44,6 +44,8 @@ ; CHECK-NOT: .value_kind: hidden_completion_action ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 +; CHECK: .value_kind: hidden_multigrid_sync_arg +; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C ; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 @@ -85,6 +87,16 @@ ; CHECK-NEXT: .size: 8 ; CHECK-NEXT: .value_kind: hidden_none ; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 56 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_multigrid_sync_arg +; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C ; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 @@ -121,6 +133,21 @@ ; CHECK-NEXT: .size: 8 ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 56 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 64 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_multigrid_sync_arg +; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C ; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 @@ -157,6 +184,21 @@ ; CHECK-NEXT: .size: 8 ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 64 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 72 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 80 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_multigrid_sync_arg +; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C ; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 @@ -193,6 +235,21 @@ ; CHECK-NEXT: .size: 8 ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 56 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 64 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_multigrid_sync_arg +; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C ; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 @@ -229,6 +286,21 @@ ; CHECK-NEXT: .size: 8 ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 96 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 104 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 112 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_multigrid_sync_arg +; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C ; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 @@ -265,6 +337,21 @@ ; CHECK-NEXT: .size: 8 ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 160 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 168 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 176 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_multigrid_sync_arg +; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C ; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 @@ -302,6 +389,21 @@ ; CHECK-NEXT: .size: 8 ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 56 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_multigrid_sync_arg +; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C ; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 @@ -339,6 +441,21 @@ ; CHECK-NEXT: .size: 8 ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 56 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_multigrid_sync_arg +; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C ; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 @@ -375,6 +492,21 @@ ; CHECK-NEXT: .size: 8 ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 56 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_multigrid_sync_arg +; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C ; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 @@ -412,6 +544,21 @@ ; CHECK-NEXT: .size: 8 ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 56 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_multigrid_sync_arg +; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C ; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 @@ -449,6 +596,21 @@ ; CHECK-NEXT: .size: 8 ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 56 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_multigrid_sync_arg +; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C ; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 @@ -485,6 +647,21 @@ ; CHECK-NEXT: .size: 8 ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 56 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 64 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_multigrid_sync_arg +; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C ; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 @@ -533,6 +710,21 @@ ; CHECK-NEXT: .size: 8 ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 56 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 64 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_multigrid_sync_arg +; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C ; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 @@ -585,6 +777,21 @@ ; CHECK-NEXT: .size: 8 ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 56 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 64 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 72 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_multigrid_sync_arg +; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C ; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 @@ -642,6 +849,21 @@ ; CHECK-NEXT: .size: 8 ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 56 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 64 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 72 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_multigrid_sync_arg +; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C ; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 @@ -698,6 +920,21 @@ ; CHECK-NEXT: .size: 8 ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 56 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 64 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 72 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_multigrid_sync_arg +; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C ; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 @@ -736,6 +973,21 @@ ; CHECK-NEXT: .size: 8 ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 56 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_multigrid_sync_arg +; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C ; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 @@ -773,6 +1025,21 @@ ; CHECK-NEXT: .size: 8 ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 56 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_multigrid_sync_arg +; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C ; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 @@ -810,6 +1077,21 @@ ; CHECK-NEXT: .size: 8 ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 56 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_multigrid_sync_arg +; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C ; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 @@ -847,6 +1129,21 @@ ; CHECK-NEXT: .size: 8 ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 56 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_multigrid_sync_arg +; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C ; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 @@ -884,6 +1181,21 @@ ; CHECK-NEXT: .size: 8 ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 56 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_multigrid_sync_arg +; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C ; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 @@ -921,6 +1233,21 @@ ; CHECK-NEXT: .size: 8 ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 56 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_multigrid_sync_arg +; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C ; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 @@ -958,6 +1285,21 @@ ; CHECK-NEXT: .size: 8 ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 56 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_multigrid_sync_arg +; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C ; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 @@ -995,6 +1337,21 @@ ; CHECK-NEXT: .size: 8 ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 56 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_multigrid_sync_arg +; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C ; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 @@ -1037,6 +1394,21 @@ ; CHECK-NEXT: .size: 8 ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 56 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_multigrid_sync_arg +; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C ; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 @@ -1080,6 +1452,21 @@ ; CHECK-NEXT: .size: 8 ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 56 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_multigrid_sync_arg +; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C ; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 @@ -1117,6 +1504,21 @@ ; CHECK-NEXT: .size: 8 ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 56 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_multigrid_sync_arg +; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C ; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 @@ -1153,6 +1555,21 @@ ; CHECK-NEXT: .size: 8 ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 56 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 64 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_multigrid_sync_arg +; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C ; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 @@ -1190,6 +1607,21 @@ ; CHECK-NEXT: .size: 8 ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 56 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_multigrid_sync_arg +; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C ; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 @@ -1283,6 +1715,21 @@ ; CHECK-NEXT: .size: 8 ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 72 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 80 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 88 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_multigrid_sync_arg +; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C ; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 @@ -1382,6 +1829,21 @@ ; CHECK-NEXT: .size: 8 ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 72 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 80 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 88 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_multigrid_sync_arg +; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C ; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 @@ -1424,6 +1886,21 @@ ; CHECK-NEXT: .size: 8 ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 64 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 72 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 80 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_multigrid_sync_arg +; CHECK-NEXT: .value_type: i8 ; CHECK: .device_enqueue_symbol: __test_block_invoke_kernel_runtime_handle ; CHECK: .language: OpenCL C ; CHECK-NEXT: .language_version: @@ -1472,6 +1949,11 @@ ; CHECK-NEXT: .size: 8 ; CHECK-NEXT: .value_kind: hidden_completion_action ; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 56 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_multigrid_sync_arg +; CHECK-NEXT: .value_type: i8 ; CHECK: .language: OpenCL C ; CHECK-NEXT: .language_version: ; CHECK-NEXT: - 2 @@ -1503,9 +1985,9 @@ ; CHECK-NEXT: - 1 ; CHECK-NEXT: - 0 -attributes #0 = { "amdgpu-implicitarg-num-bytes"="48" } -attributes #1 = { "amdgpu-implicitarg-num-bytes"="48" "runtime-handle"="__test_block_invoke_kernel_runtime_handle" } -attributes #2 = { "amdgpu-implicitarg-num-bytes"="48" "calls-enqueue-kernel" } +attributes #0 = { "amdgpu-implicitarg-num-bytes"="56" } +attributes #1 = { "amdgpu-implicitarg-num-bytes"="56" "runtime-handle"="__test_block_invoke_kernel_runtime_handle" } +attributes #2 = { "amdgpu-implicitarg-num-bytes"="56" "calls-enqueue-kernel" } !llvm.printf.fmts = !{!100, !101} Index: test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll =================================================================== --- test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll +++ test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll @@ -54,6 +54,9 @@ ; CHECK-NEXT: AddrSpaceQual: Global ; CHECK-NOT: ValueKind: HiddenDefaultQueue ; CHECK-NOT: ValueKind: HiddenCompletionAction +; CHECK: ValueKind: HiddenMultiGridSyncArg +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global define amdgpu_kernel void @test_char(i8 %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !4 { @@ -89,6 +92,21 @@ ; CHECK-NEXT: ValueKind: HiddenPrintfBuffer ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global define amdgpu_kernel void @test_ushort2(<2 x i16> %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !10 !kernel_arg_base_type !10 !kernel_arg_type_qual !4 { @@ -124,6 +142,21 @@ ; CHECK-NEXT: ValueKind: HiddenPrintfBuffer ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global define amdgpu_kernel void @test_int3(<3 x i32> %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !11 !kernel_arg_base_type !11 !kernel_arg_type_qual !4 { @@ -159,6 +192,21 @@ ; CHECK-NEXT: ValueKind: HiddenPrintfBuffer ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global define amdgpu_kernel void @test_ulong4(<4 x i64> %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !12 !kernel_arg_base_type !12 !kernel_arg_type_qual !4 { @@ -194,6 +242,21 @@ ; CHECK-NEXT: ValueKind: HiddenPrintfBuffer ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global define amdgpu_kernel void @test_half8(<8 x half> %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !13 !kernel_arg_base_type !13 !kernel_arg_type_qual !4 { @@ -229,6 +292,21 @@ ; CHECK-NEXT: ValueKind: HiddenPrintfBuffer ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global define amdgpu_kernel void @test_float16(<16 x float> %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !14 !kernel_arg_base_type !14 !kernel_arg_type_qual !4 { @@ -264,6 +342,21 @@ ; CHECK-NEXT: ValueKind: HiddenPrintfBuffer ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global define amdgpu_kernel void @test_double16(<16 x double> %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !15 !kernel_arg_base_type !15 !kernel_arg_type_qual !4 { @@ -300,6 +393,21 @@ ; CHECK-NEXT: ValueKind: HiddenPrintfBuffer ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global define amdgpu_kernel void @test_pointer(i32 addrspace(1)* %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !16 !kernel_arg_base_type !16 !kernel_arg_type_qual !4 { @@ -336,6 +444,21 @@ ; CHECK-NEXT: ValueKind: HiddenPrintfBuffer ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global define amdgpu_kernel void @test_image(%opencl.image2d_t addrspace(1)* %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !17 !kernel_arg_base_type !17 !kernel_arg_type_qual !4 { @@ -371,6 +494,21 @@ ; CHECK-NEXT: ValueKind: HiddenPrintfBuffer ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global define amdgpu_kernel void @test_sampler(i32 %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !18 !kernel_arg_base_type !18 !kernel_arg_type_qual !4 { @@ -407,6 +545,21 @@ ; CHECK-NEXT: ValueKind: HiddenPrintfBuffer ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global define amdgpu_kernel void @test_queue(%opencl.queue_t addrspace(1)* %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !19 !kernel_arg_base_type !19 !kernel_arg_type_qual !4 { @@ -443,6 +596,21 @@ ; CHECK-NEXT: ValueKind: HiddenPrintfBuffer ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global define amdgpu_kernel void @test_struct(%struct.A addrspace(5)* byval %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !20 !kernel_arg_base_type !20 !kernel_arg_type_qual !4 { @@ -478,6 +646,21 @@ ; CHECK-NEXT: ValueKind: HiddenPrintfBuffer ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global define amdgpu_kernel void @test_i128(i128 %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !21 !kernel_arg_base_type !21 !kernel_arg_type_qual !4 { @@ -527,6 +710,21 @@ ; CHECK-NEXT: ValueKind: HiddenPrintfBuffer ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global define amdgpu_kernel void @test_multi_arg(i32 %a, <2 x i16> %b, <3 x i8> %c) #0 !kernel_arg_addr_space !22 !kernel_arg_access_qual !23 !kernel_arg_type !24 !kernel_arg_base_type !24 !kernel_arg_type_qual !25 { @@ -580,6 +778,21 @@ ; CHECK-NEXT: ValueKind: HiddenPrintfBuffer ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global define amdgpu_kernel void @test_addr_space(i32 addrspace(1)* %g, i32 addrspace(4)* %c, i32 addrspace(3)* %l) #0 @@ -638,6 +851,21 @@ ; CHECK-NEXT: ValueKind: HiddenPrintfBuffer ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global define amdgpu_kernel void @test_type_qual(i32 addrspace(1)* %a, i32 addrspace(1)* %b, %opencl.pipe_t addrspace(1)* %c) #0 @@ -692,6 +920,21 @@ ; CHECK-NEXT: ValueKind: HiddenPrintfBuffer ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global define amdgpu_kernel void @test_access_qual(%opencl.image1d_t addrspace(1)* %ro, %opencl.image2d_t addrspace(1)* %wo, %opencl.image3d_t addrspace(1)* %rw) #0 @@ -731,6 +974,21 @@ ; CHECK-NEXT: ValueKind: HiddenPrintfBuffer ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global define amdgpu_kernel void @test_vec_type_hint_half(i32 %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !26 { @@ -768,6 +1026,21 @@ ; CHECK-NEXT: ValueKind: HiddenPrintfBuffer ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global define amdgpu_kernel void @test_vec_type_hint_float(i32 %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !27 { @@ -805,6 +1078,21 @@ ; CHECK-NEXT: ValueKind: HiddenPrintfBuffer ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global define amdgpu_kernel void @test_vec_type_hint_double(i32 %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !28 { @@ -842,6 +1130,21 @@ ; CHECK-NEXT: ValueKind: HiddenPrintfBuffer ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global define amdgpu_kernel void @test_vec_type_hint_char(i32 %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !29 { @@ -879,6 +1182,21 @@ ; CHECK-NEXT: ValueKind: HiddenPrintfBuffer ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global define amdgpu_kernel void @test_vec_type_hint_short(i32 %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !30 { @@ -916,6 +1234,21 @@ ; CHECK-NEXT: ValueKind: HiddenPrintfBuffer ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global define amdgpu_kernel void @test_vec_type_hint_long(i32 %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !31 { @@ -953,6 +1286,21 @@ ; CHECK-NEXT: ValueKind: HiddenPrintfBuffer ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global define amdgpu_kernel void @test_vec_type_hint_unknown(i32 %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !32 { @@ -991,6 +1339,21 @@ ; CHECK-NEXT: ValueKind: HiddenPrintfBuffer ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global define amdgpu_kernel void @test_reqd_wgs_vec_type_hint(i32 %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !5 @@ -1030,6 +1393,21 @@ ; CHECK-NEXT: ValueKind: HiddenPrintfBuffer ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global define amdgpu_kernel void @test_wgs_hint_vec_type_hint(i32 %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !3 !kernel_arg_type_qual !4 !vec_type_hint !7 @@ -1067,6 +1445,21 @@ ; CHECK-NEXT: ValueKind: HiddenPrintfBuffer ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global define amdgpu_kernel void @test_arg_ptr_to_ptr(i32 addrspace(5)* addrspace(1)* %a) #0 !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !80 !kernel_arg_base_type !80 !kernel_arg_type_qual !4 { @@ -1138,6 +1531,21 @@ ; CHECK-NEXT: ValueKind: HiddenPrintfBuffer ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global define amdgpu_kernel void @test_arg_vector_of_ptr(<2 x i32 addrspace(1)*> %a) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !83 !kernel_arg_base_type !83 !kernel_arg_type_qual !4 { @@ -1174,6 +1582,21 @@ ; CHECK-NEXT: ValueKind: HiddenPrintfBuffer ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global define amdgpu_kernel void @test_arg_unknown_builtin_type( %opencl.clk_event_t addrspace(1)* %a) #0 !kernel_arg_addr_space !81 !kernel_arg_access_qual !2 !kernel_arg_type !84 @@ -1272,6 +1695,21 @@ ; CHECK-NEXT: ValueKind: HiddenPrintfBuffer ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global define amdgpu_kernel void @test_pointee_align(i64 addrspace(1)* %a, i8 addrspace(3)* %b, <2 x i8> addrspace(3)* %c, @@ -1376,6 +1814,21 @@ ; CHECK-NEXT: ValueKind: HiddenPrintfBuffer ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global define amdgpu_kernel void @test_pointee_align_attribute(i64 addrspace(1)* align 16 %a, i8 addrspace(3)* align 8 %b, <2 x i8> addrspace(3)* align 32 %c, @@ -1421,6 +1874,21 @@ ; CHECK-NEXT: ValueKind: HiddenPrintfBuffer ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global define amdgpu_kernel void @__test_block_invoke_kernel( <{ i32, i32, i8*, i8 addrspace(1)*, i8 }> %arg) #1 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !110 @@ -1467,6 +1935,11 @@ ; CHECK-NEXT: ValueKind: HiddenCompletionAction ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global define amdgpu_kernel void @test_enqueue_kernel_caller(i8 %a) #2 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !4 { @@ -1484,9 +1957,9 @@ ret void } -attributes #0 = { "amdgpu-implicitarg-num-bytes"="48" } -attributes #1 = { "amdgpu-implicitarg-num-bytes"="48" "runtime-handle"="__test_block_invoke_kernel_runtime_handle" } -attributes #2 = { "amdgpu-implicitarg-num-bytes"="48" "calls-enqueue-kernel" } +attributes #0 = { "amdgpu-implicitarg-num-bytes"="56" } +attributes #1 = { "amdgpu-implicitarg-num-bytes"="56" "runtime-handle"="__test_block_invoke_kernel_runtime_handle" } +attributes #2 = { "amdgpu-implicitarg-num-bytes"="56" "calls-enqueue-kernel" } !llvm.printf.fmts = !{!100, !101} Index: test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll =================================================================== --- test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll +++ test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll @@ -271,6 +271,71 @@ ret void } +; CHECK: - .args: +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .name: r +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .value_type: f16 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .name: a +; CHECK-NEXT: .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .value_type: f16 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .name: b +; CHECK-NEXT: .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: global_buffer +; CHECK-NEXT: .value_type: f16 +; CHECK-NEXT: - .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_global_offset_x +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_global_offset_y +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .offset: 40 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_global_offset_z +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 48 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 56 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 64 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_none +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 72 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_multigrid_sync_arg +; CHECK-NEXT: .value_type: i8 +; CHECK: .name: test56 +; CHECK: .symbol: test56.kd +define amdgpu_kernel void @test56( + half addrspace(1)* %r, + half addrspace(1)* %a, + half addrspace(1)* %b) #5 { +entry: + %a.val = load half, half addrspace(1)* %a + %b.val = load half, half addrspace(1)* %b + %r.val = fadd half %a.val, %b.val + store half %r.val, half addrspace(1)* %r + ret void +} + ; CHECK: amdhsa.version: ; CHECK-NEXT: - 1 ; CHECK-NEXT: - 0 @@ -280,3 +345,4 @@ attributes #2 = { "amdgpu-implicitarg-num-bytes"="24" } attributes #3 = { "amdgpu-implicitarg-num-bytes"="32" } attributes #4 = { "amdgpu-implicitarg-num-bytes"="48" } +attributes #5 = { "amdgpu-implicitarg-num-bytes"="56" } Index: test/CodeGen/AMDGPU/hsa-metadata-hidden-args.ll =================================================================== --- test/CodeGen/AMDGPU/hsa-metadata-hidden-args.ll +++ test/CodeGen/AMDGPU/hsa-metadata-hidden-args.ll @@ -278,8 +278,75 @@ ret void } +; CHECK: - Name: test56 +; CHECK: SymbolName: 'test56@kd' +; CHECK: Args: +; CHECK-NEXT: - Name: r +; CHECK-NEXT: Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: GlobalBuffer +; CHECK-NEXT: ValueType: F16 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Name: a +; CHECK-NEXT: Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: GlobalBuffer +; CHECK-NEXT: ValueType: F16 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Name: b +; CHECK-NEXT: Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: GlobalBuffer +; CHECK-NEXT: ValueType: F16 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenNone +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenMultiGridSyncArg +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NEXT: CodeProps: +define amdgpu_kernel void @test56( + half addrspace(1)* %r, + half addrspace(1)* %a, + half addrspace(1)* %b) #5 { +entry: + %a.val = load half, half addrspace(1)* %a + %b.val = load half, half addrspace(1)* %b + %r.val = fadd half %a.val, %b.val + store half %r.val, half addrspace(1)* %r + ret void +} + attributes #0 = { "amdgpu-implicitarg-num-bytes"="8" } attributes #1 = { "amdgpu-implicitarg-num-bytes"="16" } attributes #2 = { "amdgpu-implicitarg-num-bytes"="24" } attributes #3 = { "amdgpu-implicitarg-num-bytes"="32" } attributes #4 = { "amdgpu-implicitarg-num-bytes"="48" } +attributes #5 = { "amdgpu-implicitarg-num-bytes"="56" }