diff --git a/clang/test/CodeGenOpenCL/amdgpu-printf.cl b/clang/test/CodeGenOpenCL/amdgpu-printf.cl new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenOpenCL/amdgpu-printf.cl @@ -0,0 +1,46 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -cl-std=CL1.2 -triple amdgcn-amd-amdhsa -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s + +int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2))); + +// CHECK-LABEL: @test_printf_noargs( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = call i32 (i8 addrspace(4)*, ...) @printf(i8 addrspace(4)* noundef getelementptr inbounds ([1 x i8], [1 x i8] addrspace(4)* @.str, i64 0, i64 0)) #[[ATTR4:[0-9]+]] +// CHECK-NEXT: ret void +// +__kernel void test_printf_noargs() { + printf(""); +} + +// CHECK-LABEL: @test_printf_int( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: store i32 [[I:%.*]], i32 addrspace(5)* [[I_ADDR]], align 4, !tbaa [[TBAA8:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32 addrspace(5)* [[I_ADDR]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[CALL:%.*]] = call i32 (i8 addrspace(4)*, ...) @printf(i8 addrspace(4)* noundef getelementptr inbounds ([3 x i8], [3 x i8] addrspace(4)* @.str.1, i64 0, i64 0), i32 noundef [[TMP0]]) #[[ATTR4]] +// CHECK-NEXT: ret void +// +__kernel void test_printf_int(int i) { + printf("%d", i); +} + +// CHECK-LABEL: @test_printf_str_int( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[S:%.*]] = alloca [4 x i8], align 1, addrspace(5) +// CHECK-NEXT: store i32 [[I:%.*]], i32 addrspace(5)* [[I_ADDR]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast [4 x i8] addrspace(5)* [[S]] to i8 addrspace(5)* +// CHECK-NEXT: call void @llvm.lifetime.start.p5i8(i64 4, i8 addrspace(5)* [[TMP0]]) #[[ATTR5:[0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = bitcast [4 x i8] addrspace(5)* [[S]] to i8 addrspace(5)* +// CHECK-NEXT: call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* align 1 [[TMP1]], i8 addrspace(4)* align 1 getelementptr inbounds ([4 x i8], [4 x i8] addrspace(4)* @__const.test_printf_str_int.s, i32 0, i32 0), i64 4, i1 false) +// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], [4 x i8] addrspace(5)* [[S]], i64 0, i64 0 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32 addrspace(5)* [[I_ADDR]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: [[CALL:%.*]] = call i32 (i8 addrspace(4)*, ...) @printf(i8 addrspace(4)* noundef getelementptr inbounds ([6 x i8], [6 x i8] addrspace(4)* @.str.2, i64 0, i64 0), i8 addrspace(5)* noundef [[ARRAYDECAY]], i32 noundef [[TMP2]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP3:%.*]] = bitcast [4 x i8] addrspace(5)* [[S]] to i8 addrspace(5)* +// CHECK-NEXT: call void @llvm.lifetime.end.p5i8(i64 4, i8 addrspace(5)* [[TMP3]]) #[[ATTR5]] +// CHECK-NEXT: ret void +// +__kernel void test_printf_str_int(int i) { + char s[] = "foo"; + printf("%s:%d", s, i); +} diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -2827,12 +2827,16 @@ "HiddenPrintfBuffer" A global address space pointer to the runtime printf buffer - is passed in kernarg. + is passed in kernarg. Mutually + exclusive with + "HiddenHostcallBuffer". "HiddenHostcallBuffer" A global address space pointer to the runtime hostcall buffer - is passed in kernarg. + is passed in kernarg. Mutually + exclusive with + "HiddenPrintfBuffer". "HiddenDefaultQueue" A global address space pointer @@ -3352,12 +3356,18 @@ "hidden_printf_buffer" A global address space pointer to the runtime printf buffer - is passed in kernarg. + is passed in kernarg. Mutually + exclusive with + "hidden_hostcall_buffer" + before Code Object V5. "hidden_hostcall_buffer" A global address space pointer to the runtime hostcall buffer - is passed in kernarg. + is passed in kernarg. Mutually + exclusive with + "hidden_printf_buffer" + before Code Object V5. "hidden_default_queue" A global address space pointer diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -401,17 +401,15 @@ auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS); - // Emit "printf buffer" argument if printf is used, otherwise emit dummy - // "none" argument. if (HiddenArgNumBytes >= 32) { + // We forbid the use of features requiring hostcall when compiling OpenCL + // before code object V5, which makes the mutual exclusion between the + // "printf buffer" and "hostcall buffer" here sound. if (Func.getParent()->getNamedMetadata("llvm.printf.fmts")) emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenPrintfBuffer); - else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) { - // The printf runtime binding pass should have ensured that hostcall and - // printf are not used in the same module. - assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts")); + else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenHostcallBuffer); - } else + else emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone); } @@ -820,19 +818,17 @@ auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS); - // Emit "printf buffer" argument if printf is used, emit "hostcall buffer" - // if "hostcall" module flag is set, otherwise emit dummy "none" argument. if (HiddenArgNumBytes >= 32) { + // We forbid the use of features requiring hostcall when compiling OpenCL + // before code object V5, which makes the mutual exclusion between the + // "printf buffer" and "hostcall buffer" here sound. if (M->getNamedMetadata("llvm.printf.fmts")) emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset, Args); - else if (MFI.hasHostcallPtr()) { - // The printf runtime binding pass should have ensured that hostcall and - // printf are not used in the same module. - assert(!M->getNamedMetadata("llvm.printf.fmts")); + else if (MFI.hasHostcallPtr()) emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset, Args); - } else + else emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp @@ -563,15 +563,6 @@ if (Printfs.empty()) return false; - if (auto HostcallFunction = M.getFunction("__ockl_hostcall_internal")) { - for (auto &U : HostcallFunction->uses()) { - if (auto *CI = dyn_cast(U.getUser())) { - M.getContext().emitError( - CI, "Cannot use both printf and hostcall in the same module"); - } - } - } - TD = &M.getDataLayout(); return lowerPrintfForGpu(M); diff --git a/llvm/test/CodeGen/AMDGPU/opencl-printf-and-hostcall.ll b/llvm/test/CodeGen/AMDGPU/opencl-printf-and-hostcall.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/opencl-printf-and-hostcall.ll @@ -0,0 +1,19 @@ +; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-printf-runtime-binding < %s 2>&1 | FileCheck %s + +@.str = private unnamed_addr addrspace(4) constant [6 x i8] c"%s:%d\00", align 1 + +define amdgpu_kernel void @test_kernel(i32 %n) { +entry: + %str = alloca [9 x i8], align 1, addrspace(5) + %arraydecay = getelementptr inbounds [9 x i8], [9 x i8] addrspace(5)* %str, i32 0, i32 0 + %call1 = call i32 (i8 addrspace(4)*, ...) @printf(i8 addrspace(4)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(4)* @.str, i32 0, i32 0), i8 addrspace(5)* %arraydecay, i32 %n) + %call2 = call <2 x i64> (i8*, i32, i64, i64, i64, i64, i64, i64, i64, i64) @__ockl_hostcall_internal(i8* undef, i32 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9) + ret void +} + +declare i32 @printf(i8 addrspace(4)*, ...) + +declare <2 x i64> @__ockl_hostcall_internal(i8*, i32, i64, i64, i64, i64, i64, i64, i64, i64) + +; CHECK-NOT: error: +; CHECK-NOT: warning: diff --git a/llvm/test/CodeGen/AMDGPU/opencl-printf-no-hostcall.ll b/llvm/test/CodeGen/AMDGPU/opencl-printf-no-hostcall.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AMDGPU/opencl-printf-no-hostcall.ll +++ /dev/null @@ -1,18 +0,0 @@ -; RUN: not opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-printf-runtime-binding < %s 2>&1 | FileCheck %s - -@.str = private unnamed_addr addrspace(2) constant [6 x i8] c"%s:%d\00", align 1 - -define amdgpu_kernel void @test_kernel(i32 %n) { -entry: - %str = alloca [9 x i8], align 1 - %arraydecay = getelementptr inbounds [9 x i8], [9 x i8]* %str, i32 0, i32 0 - %call1 = call i32 (i8 addrspace(2)*, ...) @printf(i8 addrspace(2)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(2)* @.str, i32 0, i32 0), i8* %arraydecay, i32 %n) - %call2 = call <2 x i64> (i8*, i32, i64, i64, i64, i64, i64, i64, i64, i64) @__ockl_hostcall_internal(i8* undef, i32 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9) - ret void -} - -declare i32 @printf(i8 addrspace(2)*, ...) - -declare <2 x i64> @__ockl_hostcall_internal(i8*, i32, i64, i64, i64, i64, i64, i64, i64, i64) - -; CHECK: error: Cannot use both printf and hostcall in the same module