diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -687,11 +687,12 @@ PB.registerPipelineEarlySimplificationEPCallback( [this](ModulePassManager &PM, OptimizationLevel Level) { + PM.addPass(AMDGPUPrintfRuntimeBindingPass()); + if (Level == OptimizationLevel::O0) return; PM.addPass(AMDGPUUnifyMetadataPass()); - PM.addPass(AMDGPUPrintfRuntimeBindingPass()); if (InternalizeSymbols) { PM.addPass(InternalizePass(mustPreserveGV)); diff --git a/llvm/test/CodeGen/AMDGPU/opencl-printf-pipeline.ll b/llvm/test/CodeGen/AMDGPU/opencl-printf-pipeline.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/opencl-printf-pipeline.ll @@ -0,0 +1,30 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -mtriple=amdgcn--amdhsa -passes=amdgpu-printf-runtime-binding -S < %s | FileCheck %s +; RUN: opt -mtriple=amdgcn--amdhsa -O0 -S < %s | FileCheck %s + +; Check that the call to printf is removed when AMDGPUPrintfRuntimeBindingPass is executed +; And that this pass is executed in the -O0 and -OX pipelines + +@.str = private unnamed_addr addrspace(4) constant [7 x i8] c"hello\0A\00", align 1 + +define void @foo() { +; CHECK-LABEL: define void @foo() { +; CHECK-NEXT: [[PRINTF_ALLOC_FN:%.*]] = call ptr addrspace(1) @__printf_alloc(i32 4) +; CHECK-NEXT: br label [[DOTSPLIT:%.*]] +; CHECK: .split: +; CHECK-NEXT: [[TMP1:%.*]] = icmp ne ptr addrspace(1) [[PRINTF_ALLOC_FN]], null +; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3:%.*]] +; CHECK: 2: +; CHECK-NEXT: [[PRINTBUFFID:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTF_ALLOC_FN]], i32 0 +; CHECK-NEXT: [[PRINTBUFFIDCAST:%.*]] = bitcast ptr addrspace(1) [[PRINTBUFFID]] to ptr addrspace(1) +; CHECK-NEXT: store i32 1, ptr addrspace(1) [[PRINTBUFFIDCAST]], align 4 +; CHECK-NEXT: [[PRINTBUFFGEP:%.*]] = getelementptr i8, ptr addrspace(1) [[PRINTF_ALLOC_FN]], i32 4 +; CHECK-NEXT: br label [[TMP3]] +; CHECK: 3: +; CHECK-NEXT: ret void +; + %call = call i32 @printf(ptr addrspace(4) @.str) + ret void +} + +declare hidden i32 @printf(ptr addrspace(4), ...)