diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -682,11 +682,12 @@ PB.registerPipelineEarlySimplificationEPCallback( [this](ModulePassManager &PM, OptimizationLevel Level) { + PM.addPass(AMDGPUPrintfRuntimeBindingPass()); + if (Level == OptimizationLevel::O0) return; PM.addPass(AMDGPUUnifyMetadataPass()); - PM.addPass(AMDGPUPrintfRuntimeBindingPass()); if (InternalizeSymbols) { PM.addPass(InternalizePass(mustPreserveGV)); diff --git a/llvm/test/CodeGen/AMDGPU/opencl-printf-pipeline.ll b/llvm/test/CodeGen/AMDGPU/opencl-printf-pipeline.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/opencl-printf-pipeline.ll @@ -0,0 +1,18 @@ +; RUN: opt -mtriple=amdgcn--amdhsa -passes=amdgpu-printf-runtime-binding -S < %s | FileCheck %s +; RUN: opt -mtriple=amdgcn--amdhsa -O0 -S < %s | FileCheck %s +; RUN: opt -mtriple=amdgcn--amdhsa -O1 -S < %s | FileCheck %s +; RUN: opt -mtriple=amdgcn--amdhsa -O2 -S < %s | FileCheck %s +; RUN: opt -mtriple=amdgcn--amdhsa -O3 -S < %s | FileCheck %s + +@.str = private unnamed_addr addrspace(4) constant [7 x i8] c"hello\0A\00", align 1 + +; Check that the call to printf is removed when AMDGPUPrintfRuntimeBindingPass is executed +; And that this pass is executed in the -O0 and -OX pipelines +; CHECK-LABEL: define void @foo +; CHECK-NOT: call i32{{.*}}@printf +define void @foo() { + %call = call i32 (ptr addrspace(4), ...) @printf(ptr addrspace(4) noundef @.str) + ret void +} + +declare hidden i32 @printf(ptr addrspace(4) noundef, ...)