diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -22,6 +22,7 @@ #include "clang/Basic/CodeGenOptions.h" #include "clang/Basic/DiagnosticFrontend.h" #include "clang/Basic/Builtins.h" +#include "clang/Basic/TargetOptions.h" #include "clang/CodeGen/CGFunctionInfo.h" #include "clang/CodeGen/SwiftCallingConv.h" #include "llvm/ADT/SmallBitVector.h" @@ -9237,9 +9238,11 @@ const FunctionDecl *FD, llvm::Function *F, CodeGenModule &M) const { const auto *ReqdWGS = M.getLangOpts().OpenCL ? FD->getAttr() : nullptr; - const bool IsOpenCLKernel = - M.getLangOpts().OpenCL && FD->hasAttr(); + const bool IsOpenCL = M.getLangOpts().OpenCL; + const bool IsOpenCLKernel = IsOpenCL && FD->hasAttr(); const bool IsHIPKernel = M.getLangOpts().HIP && FD->hasAttr(); + const TargetOptions::CodeObjectVersionKind CodeObjectVersion = + M.getTarget().getTargetOpts().CodeObjectVersion; const auto *FlatWGS = FD->getAttr(); if (ReqdWGS || FlatWGS) { @@ -9307,6 +9310,14 @@ if (NumVGPR != 0) F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR)); } + + // OpenCL pre-COV_5 does not support hostcall, so mark all OpenCL functions + // as "amdgpu-no-hostcall-ptr" unless we are compiling device-libs (COV_None) + // which adopt the COV of the module they are linked with. + if (IsOpenCL && CodeObjectVersion != TargetOptions::COV_None && + CodeObjectVersion < TargetOptions::COV_5) { + F->addFnAttr("amdgpu-no-hostcall-ptr"); + } } void AMDGPUTargetCodeGenInfo::setTargetAttributes( diff --git a/clang/test/CodeGenOpenCL/amdgpu-attrs.cl b/clang/test/CodeGenOpenCL/amdgpu-attrs.cl --- a/clang/test/CodeGenOpenCL/amdgpu-attrs.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-attrs.cl @@ -1,5 +1,6 @@ // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu tahiti -O0 -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple amdgcn-- -target-cpu tahiti -O0 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -mcode-object-version=5 -triple amdgcn-amd-amdhsa -target-cpu tahiti -O0 -emit-llvm -o - %s | FileCheck -check-prefix=V5 %s // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86 %s __attribute__((amdgpu_flat_work_group_size(0, 0))) // expected-no-diagnostics @@ -161,33 +162,35 @@ // CHECK-NOT: "amdgpu-num-sgpr"="0" // CHECK-NOT: "amdgpu-num-vgpr"="0" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_64_64]] = {{.*}} "amdgpu-flat-work-group-size"="64,64" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_16_128]] = {{.*}} "amdgpu-flat-work-group-size"="16,128" +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-no-hostcall-ptr" +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_64_64]] = {{.*}} "amdgpu-flat-work-group-size"="64,64" "amdgpu-no-hostcall-ptr" +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_16_128]] = {{.*}} "amdgpu-flat-work-group-size"="16,128" "amdgpu-no-hostcall-ptr" -// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2" +// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-hostcall-ptr" "amdgpu-waves-per-eu"="2" -// CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2,4" -// CHECK-DAG: attributes [[NUM_SGPR_32]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-num-sgpr"="32" -// CHECK-DAG: attributes [[NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-num-vgpr"="64" +// CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-hostcall-ptr" "amdgpu-waves-per-eu"="2,4" +// CHECK-DAG: attributes [[NUM_SGPR_32]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-hostcall-ptr" "amdgpu-num-sgpr"="32" +// CHECK-DAG: attributes [[NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-hostcall-ptr" "amdgpu-num-vgpr"="64" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-waves-per-eu"="2" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-waves-per-eu"="2,4" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_SGPR_32]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64" -// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_SGPR_32]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2" -// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2" -// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_SGPR_32]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4" -// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4" -// CHECK-DAG: attributes [[NUM_SGPR_32_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-no-hostcall-ptr" "amdgpu-waves-per-eu"="2" +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-no-hostcall-ptr" "amdgpu-waves-per-eu"="2,4" +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_SGPR_32]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-no-hostcall-ptr" "amdgpu-num-sgpr"="32" +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-no-hostcall-ptr" "amdgpu-num-vgpr"="64" +// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_SGPR_32]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-hostcall-ptr" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2" +// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-hostcall-ptr" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2" +// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_SGPR_32]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-hostcall-ptr" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4" +// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-hostcall-ptr" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4" +// CHECK-DAG: attributes [[NUM_SGPR_32_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-hostcall-ptr" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4" +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-no-hostcall-ptr" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2" +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-no-hostcall-ptr" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2" +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-no-hostcall-ptr" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4" +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-no-hostcall-ptr" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2" -// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4" +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-no-hostcall-ptr" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2" +// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64]] = {{.*}} "amdgpu-flat-work-group-size"="32,64" "amdgpu-no-hostcall-ptr" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4" // CHECK-DAG: attributes [[A_FUNCTION]] = {{.*}} -// CHECK-DAG: attributes [[DEFAULT_KERNEL_ATTRS]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" +// CHECK-DAG: attributes [[DEFAULT_KERNEL_ATTRS]] = {{.*}} "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-hostcall-ptr" + +// V5-NOT: "amdgpu-no-hostcall-ptr" diff --git a/clang/test/CodeGenOpenCL/amdgpu-printf.cl b/clang/test/CodeGenOpenCL/amdgpu-printf.cl new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenOpenCL/amdgpu-printf.cl @@ -0,0 +1,46 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -cl-std=CL1.2 -triple amdgcn-amd-amdhsa -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s + +int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2))); + +// CHECK-LABEL: @test_printf_noargs( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = call i32 (i8 addrspace(4)*, ...) @printf(i8 addrspace(4)* noundef getelementptr inbounds ([1 x i8], [1 x i8] addrspace(4)* @.str, i64 0, i64 0)) #[[ATTR4:[0-9]+]] +// CHECK-NEXT: ret void +// +__kernel void test_printf_noargs() { + printf(""); +} + +// CHECK-LABEL: @test_printf_int( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: store i32 [[I:%.*]], i32 addrspace(5)* [[I_ADDR]], align 4, !tbaa [[TBAA8:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32 addrspace(5)* [[I_ADDR]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[CALL:%.*]] = call i32 (i8 addrspace(4)*, ...) @printf(i8 addrspace(4)* noundef getelementptr inbounds ([3 x i8], [3 x i8] addrspace(4)* @.str.1, i64 0, i64 0), i32 noundef [[TMP0]]) #[[ATTR4]] +// CHECK-NEXT: ret void +// +__kernel void test_printf_int(int i) { + printf("%d", i); +} + +// CHECK-LABEL: @test_printf_str_int( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[S:%.*]] = alloca [4 x i8], align 1, addrspace(5) +// CHECK-NEXT: store i32 [[I:%.*]], i32 addrspace(5)* [[I_ADDR]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast [4 x i8] addrspace(5)* [[S]] to i8 addrspace(5)* +// CHECK-NEXT: call void @llvm.lifetime.start.p5i8(i64 4, i8 addrspace(5)* [[TMP0]]) #[[ATTR5:[0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = bitcast [4 x i8] addrspace(5)* [[S]] to i8 addrspace(5)* +// CHECK-NEXT: call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* align 1 [[TMP1]], i8 addrspace(4)* align 1 getelementptr inbounds ([4 x i8], [4 x i8] addrspace(4)* @__const.test_printf_str_int.s, i32 0, i32 0), i64 4, i1 false) +// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], [4 x i8] addrspace(5)* [[S]], i64 0, i64 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32 addrspace(5)* [[I_ADDR]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[CALL:%.*]] = call i32 (i8 addrspace(4)*, ...) @printf(i8 addrspace(4)* noundef getelementptr inbounds ([6 x i8], [6 x i8] addrspace(4)* @.str.2, i64 0, i64 0), i8 addrspace(5)* noundef [[ARRAYDECAY]], i32 noundef [[TMP2]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP3:%.*]] = bitcast [4 x i8] addrspace(5)* [[S]] to i8 addrspace(5)* +// CHECK-NEXT: call void @llvm.lifetime.end.p5i8(i64 4, i8 addrspace(5)* [[TMP3]]) #[[ATTR5]] +// CHECK-NEXT: ret void +// +__kernel void test_printf_str_int(int i) { + char s[] = "foo"; + printf("%s:%d", s, i); +} diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -2826,12 +2826,16 @@ "HiddenPrintfBuffer" A global address space pointer to the runtime printf buffer - is passed in kernarg. + is passed in kernarg. Mutually + exclusive with + "HiddenHostcallBuffer". "HiddenHostcallBuffer" A global address space pointer to the runtime hostcall buffer - is passed in kernarg. + is passed in kernarg. Mutually + exclusive with + "HiddenPrintfBuffer". "HiddenDefaultQueue" A global address space pointer @@ -3351,12 +3355,18 @@ "hidden_printf_buffer" A global address space pointer to the runtime printf buffer - is passed in kernarg. + is passed in kernarg. Mutually + exclusive with + "hidden_hostcall_buffer" + before Code Object V5. "hidden_hostcall_buffer" A global address space pointer to the runtime hostcall buffer - is passed in kernarg. + is passed in kernarg. Mutually + exclusive with + "hidden_printf_buffer" + before Code Object V5. "hidden_default_queue" A global address space pointer diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -401,17 +401,14 @@ auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS); - // Emit "printf buffer" argument if printf is used, otherwise emit dummy - // "none" argument. + // Emit "printf buffer" argument if printf is used, emit "hostcall buffer" + // if "hostcall" module flag is set, otherwise emit dummy "none" argument. if (HiddenArgNumBytes >= 32) { if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenPrintfBuffer); - else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) { - // The printf runtime binding pass should have ensured that hostcall and - // printf are not used in the same module. - assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts")); + else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenHostcallBuffer); - } else + else emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone); } @@ -826,13 +823,10 @@ if (M->getNamedMetadata("llvm.printf.fmts")) emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset, Args); - else if (MFI.hasHostcallPtr()) { - // The printf runtime binding pass should have ensured that hostcall and - // printf are not used in the same module. - assert(!M->getNamedMetadata("llvm.printf.fmts")); + else if (MFI.hasHostcallPtr()) emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset, Args); - } else + else emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp @@ -563,15 +563,6 @@ if (Printfs.empty()) return false; - if (auto HostcallFunction = M.getFunction("__ockl_hostcall_internal")) { - for (auto &U : HostcallFunction->uses()) { - if (auto *CI = dyn_cast(U.getUser())) { - M.getContext().emitError( - CI, "Cannot use both printf and hostcall in the same module"); - } - } - } - TD = &M.getDataLayout(); return lowerPrintfForGpu(M); diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll @@ -1894,9 +1894,9 @@ ; CHECK-NEXT: - 1 ; CHECK-NEXT: - 0 -attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" } -attributes #1 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" "runtime-handle"="__test_block_invoke_kernel_runtime_handle" } -attributes #2 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" "calls-enqueue-kernel" } +attributes #0 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" "amdgpu-no-hostcall-ptr" } +attributes #1 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" "amdgpu-no-hostcall-ptr" "runtime-handle"="__test_block_invoke_kernel_runtime_handle" } +attributes #2 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" "amdgpu-no-hostcall-ptr" "calls-enqueue-kernel" } !llvm.printf.fmts = !{!100, !101} diff --git a/llvm/test/CodeGen/AMDGPU/opencl-printf-and-hostcall.ll b/llvm/test/CodeGen/AMDGPU/opencl-printf-and-hostcall.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/opencl-printf-and-hostcall.ll @@ -0,0 +1,19 @@ +; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-printf-runtime-binding < %s 2>&1 | FileCheck %s + +@.str = private unnamed_addr addrspace(4) constant [6 x i8] c"%s:%d\00", align 1 + +define amdgpu_kernel void @test_kernel(i32 %n) { +entry: + %str = alloca [9 x i8], align 1, addrspace(5) + %arraydecay = getelementptr inbounds [9 x i8], [9 x i8] addrspace(5)* %str, i32 0, i32 0 + %call1 = call i32 (i8 addrspace(4)*, ...) @printf(i8 addrspace(4)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(4)* @.str, i32 0, i32 0), i8 addrspace(5)* %arraydecay, i32 %n) + %call2 = call <2 x i64> (i8*, i32, i64, i64, i64, i64, i64, i64, i64, i64) @__ockl_hostcall_internal(i8* undef, i32 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9) + ret void +} + +declare i32 @printf(i8 addrspace(4)*, ...) + +declare <2 x i64> @__ockl_hostcall_internal(i8*, i32, i64, i64, i64, i64, i64, i64, i64, i64) + +; CHECK-NOT: error: +; CHECK-NOT: warning: diff --git a/llvm/test/CodeGen/AMDGPU/opencl-printf-no-hostcall.ll b/llvm/test/CodeGen/AMDGPU/opencl-printf-no-hostcall.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AMDGPU/opencl-printf-no-hostcall.ll +++ /dev/null @@ -1,18 +0,0 @@ -; RUN: not opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-printf-runtime-binding < %s 2>&1 | FileCheck %s - -@.str = private unnamed_addr addrspace(2) constant [6 x i8] c"%s:%d\00", align 1 - -define amdgpu_kernel void @test_kernel(i32 %n) { -entry: - %str = alloca [9 x i8], align 1 - %arraydecay = getelementptr inbounds [9 x i8], [9 x i8]* %str, i32 0, i32 0 - %call1 = call i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(2)* @.str, i32 0, i32 0), i8* %arraydecay, i32 %n) - %call2 = call <2 x i64> (i8*, i32, i64, i64, i64, i64, i64, i64, i64, i64) @__ockl_hostcall_internal(i8* undef, i32 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9) - ret void -} - -declare i32 @printf(i8 addrspace(2)*, ...) - -declare <2 x i64> @__ockl_hostcall_internal(i8*, i32, i64, i64, i64, i64, i64, i64, i64, i64) - -; CHECK: error: Cannot use both printf and hostcall in the same module