diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -1382,6 +1382,11 @@ to the runtime printf buffer is passed in kernarg. + "HiddenHostcallBuffer" + A global address space pointer + to the runtime hostcall buffer + is passed in kernarg. + "HiddenDefaultQueue" A global address space pointer to the OpenCL device enqueue @@ -1876,6 +1881,11 @@ to the runtime printf buffer is passed in kernarg. + "hidden_hostcall_buffer" + A global address space pointer + to the runtime hostcall buffer + is passed in kernarg. + "hidden_default_queue" A global address space pointer to the OpenCL device enqueue diff --git a/llvm/include/llvm/Support/AMDGPUMetadata.h b/llvm/include/llvm/Support/AMDGPUMetadata.h --- a/llvm/include/llvm/Support/AMDGPUMetadata.h +++ b/llvm/include/llvm/Support/AMDGPUMetadata.h @@ -75,6 +75,7 @@ HiddenDefaultQueue = 12, HiddenCompletionAction = 13, HiddenMultiGridSyncArg = 14, + HiddenHostcallBuffer = 15, Unknown = 0xff }; diff --git a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp --- a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp +++ b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp @@ -119,6 +119,7 @@ .Case("hidden_global_offset_z", true) .Case("hidden_none", true) .Case("hidden_printf_buffer", true) + .Case("hidden_hostcall_buffer", true) .Case("hidden_default_queue", true) .Case("hidden_completion_action", true) .Case("hidden_multigrid_sync_arg", true) diff --git a/llvm/lib/Support/AMDGPUMetadata.cpp b/llvm/lib/Support/AMDGPUMetadata.cpp --- a/llvm/lib/Support/AMDGPUMetadata.cpp +++ b/llvm/lib/Support/AMDGPUMetadata.cpp @@ -62,6 +62,7 @@ YIO.enumCase(EN, "HiddenGlobalOffsetZ", ValueKind::HiddenGlobalOffsetZ); YIO.enumCase(EN, "HiddenNone", ValueKind::HiddenNone); YIO.enumCase(EN, "HiddenPrintfBuffer", ValueKind::HiddenPrintfBuffer); + YIO.enumCase(EN, "HiddenHostcallBuffer", ValueKind::HiddenHostcallBuffer); YIO.enumCase(EN, "HiddenDefaultQueue", ValueKind::HiddenDefaultQueue); YIO.enumCase(EN, "HiddenCompletionAction", ValueKind::HiddenCompletionAction); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -102,9 +102,9 @@ std::unique_ptr Streamer) : AsmPrinter(TM, std::move(Streamer)) { if (IsaInfo::hasCodeObjectV3(getGlobalSTI())) - HSAMetadataStream.reset(new MetadataStreamerV3()); + HSAMetadataStream.reset(new MetadataStreamerV3(OutContext)); else - HSAMetadataStream.reset(new MetadataStreamerV2()); + HSAMetadataStream.reset(new MetadataStreamerV2(OutContext)); } StringRef AMDGPUAsmPrinter::getPassName() const { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h @@ -27,6 +27,7 @@ class Argument; class DataLayout; class Function; +class MCContext; class MDNode; class Module; struct SIProgramInfo; @@ -36,7 +37,11 @@ namespace HSAMD { class MetadataStreamer { +protected: + MCContext &OutContext; + public: + MetadataStreamer(MCContext &C) : OutContext(C){}; virtual ~MetadataStreamer(){}; virtual bool emitTo(AMDGPUTargetStreamer &TargetStreamer) = 0; @@ -105,7 +110,7 @@ } public: - MetadataStreamerV3() = default; + MetadataStreamerV3(MCContext &C) : MetadataStreamer(C){}; ~MetadataStreamerV3() = default; bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override; @@ -171,7 +176,7 @@ } public: - MetadataStreamerV2() = default; + MetadataStreamerV2(MCContext &C) : MetadataStreamer(C){}; ~MetadataStreamerV2() = default; bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Module.h" +#include "llvm/MC/MCContext.h" #include "llvm/Support/raw_ostream.h" namespace llvm { @@ -419,8 +420,16 @@ // Emit "printf buffer" argument if printf is used, otherwise emit dummy // "none" argument. if (HiddenArgNumBytes >= 32) { - if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) + bool HostcallUsed = + Func.getParent()->getFunction("__ockl_hostcall_internal"); + if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) { + if (HostcallUsed) { + OutContext.reportError({}, "Cannot use both hostcall and printf."); + return; + } emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer); + } else if (HostcallUsed) + emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenHostcallBuffer); else emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone); } @@ -852,8 +861,16 @@ // Emit "printf buffer" argument if printf is used, otherwise emit dummy // "none" argument. if (HiddenArgNumBytes >= 32) { - if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) + bool HostcallUsed = + Func.getParent()->getFunction("__ockl_hostcall_internal"); + if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) { + if (HostcallUsed) { + OutContext.reportError({}, "Cannot use both hostcall and printf."); + return; + } emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, Args); + } else if (HostcallUsed) + emitKernelArg(DL, Int8PtrTy, "hidden_hostcall_buffer", Offset, Args); else emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args); } diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll @@ -42,6 +42,7 @@ ; CHECK-NEXT: .size: 8 ; CHECK-NOT: .value_kind: hidden_default_queue ; CHECK-NOT: .value_kind: hidden_completion_action +; CHECK-NOT: .value_kind: hidden_hostcall_buffer ; CHECK-NEXT: .value_kind: hidden_printf_buffer ; CHECK-NEXT: .value_type: i8 ; CHECK: .value_kind: hidden_multigrid_sync_arg diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll @@ -49,6 +49,7 @@ ; CHECK-NEXT: ValueType: I64 ; CHECK-NEXT: - Size: 8 ; CHECK-NEXT: Align: 8 +; CHECK-NOT: ValueKind: HiddenHostcallBuffer ; CHECK-NEXT: ValueKind: HiddenPrintfBuffer ; CHECK-NEXT: ValueType: I8 ; CHECK-NEXT: AddrSpaceQual: Global diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-absent-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-absent-v3.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-absent-v3.ll @@ -0,0 +1,55 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=+code-object-v3 -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=+code-object-v3 -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s + +; CHECK: --- +; CHECK: amdhsa.kernels: +; CHECK: - .args: +; CHECK-NEXT: - .name: a +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 1 +; CHECK-NEXT: .type_name: char +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_global_offset_x +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_global_offset_y +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_global_offset_z +; CHECK-NEXT: .value_type: i64 + +; CHECK-NOT: .value_kind: hidden_hostcall_buffer + +; CHECK: .language: OpenCL C +; CHECK-NEXT: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .name: test_kernel +; CHECK: .symbol: test_kernel.kd + +define amdgpu_kernel void @test_kernel(i8 %a) #0 + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 + !kernel_arg_base_type !3 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: amdhsa.version: +; CHECK-NEXT: - 1 +; CHECK-NEXT: - 0 + +attributes #0 = { "amdgpu-implicitarg-num-bytes"="48" } + +!1 = !{i32 0} +!2 = !{!"none"} +!3 = !{!"char"} +!4 = !{!""} + +!opencl.ocl.version = !{!90} +!90 = !{i32 2, i32 0} + +; PARSER: AMDGPU HSA Metadata Parser Test: PASS diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-absent.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-absent.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-absent.ll @@ -0,0 +1,52 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=-code-object-v3 -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=-code-object-v3 -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s + +; CHECK: --- +; CHECK: Version: [ 1, 0 ] +; CHECK: Kernels: + +; CHECK: - Name: test_kernel +; CHECK-NEXT: SymbolName: 'test_kernel@kd' +; CHECK-NEXT: Language: OpenCL C +; CHECK-NEXT: LanguageVersion: [ 2, 0 ] +; CHECK-NEXT: Args: +; CHECK-NEXT: - Name: a +; CHECK-NEXT: TypeName: char +; CHECK-NEXT: Size: 1 +; CHECK-NEXT: Align: 1 +; CHECK-NEXT: ValueKind: ByValue +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ +; CHECK-NEXT: ValueType: I64 +; CHECK-NOT: ValueKind: HiddenHostcallBuffer +; CHECK-NOT: ValueKind: HiddenDefaultQueue +; CHECK-NOT: ValueKind: HiddenCompletionAction + +define amdgpu_kernel void @test_kernel(i8 %a) #0 + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 + !kernel_arg_base_type !3 !kernel_arg_type_qual !4 { + ret void +} + +attributes #0 = { "amdgpu-implicitarg-num-bytes"="48" } + +!1 = !{i32 0} +!2 = !{!"none"} +!3 = !{!"char"} +!4 = !{!""} + +!opencl.ocl.version = !{!90} +!90 = !{i32 2, i32 0} + +; PARSER: AMDGPU HSA Metadata Parser Test: PASS diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-no-printf.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-no-printf.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-no-printf.ll @@ -0,0 +1,23 @@ +; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mattr=-code-object-v3 -mcpu=gfx900 -filetype=obj -o - < %s 2>&1 | FileCheck %s + +declare <2 x i64> @__ockl_hostcall_internal(i8* %0, i32 %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6, i64 %7, i64 %8, i64 %9) + +define amdgpu_kernel void @test_kernel(i8 %a) #0 + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 + !kernel_arg_base_type !3 !kernel_arg_type_qual !4 { + ret void +} + +attributes #0 = { "amdgpu-implicitarg-num-bytes"="48" } + +!1 = !{i32 0} +!2 = !{!"none"} +!3 = !{!"char"} +!4 = !{!""} + +!llvm.printf.fmts = !{!100} +!100 = !{!"placeholder"} +!opencl.ocl.version = !{!90} +!90 = !{i32 2, i32 0} + +; CHECK: LLVM ERROR: Cannot use both hostcall and printf. diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3.ll @@ -0,0 +1,59 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=+code-object-v3 -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=+code-object-v3 -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s + +; CHECK: --- +; CHECK: amdhsa.kernels: +; CHECK: - .args: +; CHECK-NEXT: - .name: a +; CHECK-NEXT: .offset: 0 +; CHECK-NEXT: .size: 1 +; CHECK-NEXT: .type_name: char +; CHECK-NEXT: .value_kind: by_value +; CHECK-NEXT: .value_type: i8 +; CHECK-NEXT: - .offset: 8 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_global_offset_x +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .offset: 16 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_global_offset_y +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .offset: 24 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_global_offset_z +; CHECK-NEXT: .value_type: i64 +; CHECK-NEXT: - .address_space: global +; CHECK-NEXT: .offset: 32 +; CHECK-NEXT: .size: 8 +; CHECK-NEXT: .value_kind: hidden_hostcall_buffer +; CHECK-NEXT: .value_type: i8 +; CHECK: .language: OpenCL C +; CHECK-NEXT: .language_version: +; CHECK-NEXT: - 2 +; CHECK-NEXT: - 0 +; CHECK: .name: test_kernel +; CHECK: .symbol: test_kernel.kd + +declare <2 x i64> @__ockl_hostcall_internal(i8* %0, i32 %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6, i64 %7, i64 %8, i64 %9) + +define amdgpu_kernel void @test_kernel(i8 %a) #0 + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 + !kernel_arg_base_type !3 !kernel_arg_type_qual !4 { + ret void +} + +; CHECK: amdhsa.version: +; CHECK-NEXT: - 1 +; CHECK-NEXT: - 0 + +attributes #0 = { "amdgpu-implicitarg-num-bytes"="48" } + +!1 = !{i32 0} +!2 = !{!"none"} +!3 = !{!"char"} +!4 = !{!""} + +!opencl.ocl.version = !{!90} +!90 = !{i32 2, i32 0} + +; PARSER: AMDGPU HSA Metadata Parser Test: PASS diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present.ll @@ -0,0 +1,58 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=-code-object-v3 -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readelf --notes | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=-code-object-v3 -mcpu=gfx900 -amdgpu-dump-hsa-metadata -amdgpu-verify-hsa-metadata -filetype=obj -o - < %s 2>&1 | FileCheck --check-prefix=PARSER %s + +; CHECK: --- +; CHECK: Version: [ 1, 0 ] +; CHECK: Kernels: + +; CHECK: - Name: test_kernel +; CHECK-NEXT: SymbolName: 'test_kernel@kd' +; CHECK-NEXT: Language: OpenCL C +; CHECK-NEXT: LanguageVersion: [ 2, 0 ] +; CHECK-NEXT: Args: +; CHECK-NEXT: - Name: a +; CHECK-NEXT: TypeName: char +; CHECK-NEXT: Size: 1 +; CHECK-NEXT: Align: 1 +; CHECK-NEXT: ValueKind: ByValue +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AccQual: Default +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenGlobalOffsetX +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenGlobalOffsetY +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenGlobalOffsetZ +; CHECK-NEXT: ValueType: I64 +; CHECK-NEXT: - Size: 8 +; CHECK-NEXT: Align: 8 +; CHECK-NEXT: ValueKind: HiddenHostcallBuffer +; CHECK-NEXT: ValueType: I8 +; CHECK-NEXT: AddrSpaceQual: Global +; CHECK-NOT: ValueKind: HiddenDefaultQueue +; CHECK-NOT: ValueKind: HiddenCompletionAction + +declare <2 x i64> @__ockl_hostcall_internal(i8* %0, i32 %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6, i64 %7, i64 %8, i64 %9) + +define amdgpu_kernel void @test_kernel(i8 %a) #0 + !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 + !kernel_arg_base_type !3 !kernel_arg_type_qual !4 { + ret void +} + +attributes #0 = { "amdgpu-implicitarg-num-bytes"="48" } + +!1 = !{i32 0} +!2 = !{!"none"} +!3 = !{!"char"} +!4 = !{!""} + +!opencl.ocl.version = !{!90} +!90 = !{i32 2, i32 0} + +; PARSER: AMDGPU HSA Metadata Parser Test: PASS