diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -10,8 +10,9 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPU_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H -#include "llvm/IR/IntrinsicsR600.h" // TODO: Sink this. #include "llvm/IR/IntrinsicsAMDGPU.h" // TODO: Sink this. +#include "llvm/IR/IntrinsicsR600.h" // TODO: Sink this. +#include "llvm/IR/PassManager.h" #include "llvm/Support/CodeGen.h" namespace llvm { @@ -75,6 +76,14 @@ FunctionPass *createAMDGPURewriteOutArgumentsPass(); FunctionPass *createSIModeRegisterPass(); +struct AMDGPUSimplifyLibCallsPass : PassInfoMixin { + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +struct AMDGPUUseNativeCallsPass : PassInfoMixin { + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + void initializeAMDGPUDAGToDAGISelPass(PassRegistry&); void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -26,6 +26,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/ValueSymbolTable.h" #include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" @@ -1750,6 +1751,40 @@ return Changed; } +PreservedAnalyses AMDGPUSimplifyLibCallsPass::run(Function &F, + FunctionAnalysisManager &AM) { + AMDGPULibCalls Simplifier; + Simplifier.initNativeFuncs(); + + bool Changed = false; + auto AA = &AM.getResult(F); + + LLVM_DEBUG(dbgs() << "AMDIC: process function "; + F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';); + + for (auto &BB : F) { + for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) { + // Ignore non-calls. + CallInst *CI = dyn_cast(I); + ++I; + // Ignore intrinsics that do not become real instructions. + if (!CI || isa(CI) || CI->isLifetimeStartOrEnd()) + continue; + + // Ignore indirect calls. + Function *Callee = CI->getCalledFunction(); + if (Callee == 0) + continue; + + LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n"; + dbgs().flush()); + if (Simplifier.fold(CI, AA)) + Changed = true; + } + } + return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); +} + bool AMDGPUUseNativeCalls::runOnFunction(Function &F) { if (skipFunction(F) || UseNative.empty()) return false; @@ -1772,3 +1807,32 @@ } return Changed; } + +PreservedAnalyses AMDGPUUseNativeCallsPass::run(Function &F, + FunctionAnalysisManager &AM) { + if (UseNative.empty()) + return PreservedAnalyses::all(); + + AMDGPULibCalls Simplifier; + Simplifier.initNativeFuncs(); + + bool Changed = false; + for (auto &BB : F) { + for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) { + // Ignore non-calls. + CallInst *CI = dyn_cast(I); + ++I; + if (!CI) + continue; + + // Ignore indirect calls. + Function *Callee = CI->getCalledFunction(); + if (Callee == 0) + continue; + + if (Simplifier.useNative(CI)) + Changed = true; + } + } + return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -56,6 +56,9 @@ void adjustPassManager(PassManagerBuilder &) override; + void registerPassBuilderCallbacks(PassBuilder &PB, + bool DebugPassManager) override; + /// Get the integer value of a null pointer in the given address space. static int64_t getNullPointerValue(unsigned AddrSpace) { return (AddrSpace == AMDGPUAS::LOCAL_ADDRESS || diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -40,8 +40,10 @@ #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" #include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/PassManager.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" +#include "llvm/Passes/PassBuilder.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/TargetRegistry.h" @@ -52,6 +54,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Utils.h" +#include "llvm/Transforms/Utils/SimplifyLibCalls.h" #include "llvm/Transforms/Vectorize.h" #include @@ -482,6 +485,33 @@ }); } +void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB, + bool DebugPassManager) { + PB.registerPipelineParsingCallback( + [](StringRef PassName, FunctionPassManager &PM, + ArrayRef) { + if (PassName == "amdgpu-simplifylib") { + PM.addPass(AMDGPUSimplifyLibCallsPass()); + return true; + } + if (PassName == "amdgpu-usenative") { + PM.addPass(AMDGPUUseNativeCallsPass()); + return true; + } + return false; + }); + + PB.registerPipelineStartEPCallback([DebugPassManager]( + ModulePassManager &PM, + PassBuilder::OptimizationLevel Level) { + FunctionPassManager FPM(DebugPassManager); + FPM.addPass(AMDGPUUseNativeCallsPass()); + if (EnableLibCallSimplify && Level != PassBuilder::OptimizationLevel::O0) + FPM.addPass(AMDGPUSimplifyLibCallsPass()); + PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + }); +} + //===----------------------------------------------------------------------===// // R600 Target Machine (R600 -> Cayman) //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -152,6 +152,7 @@ Core IPO MC + Passes AMDGPUDesc AMDGPUInfo AMDGPUUtils diff --git a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll --- a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll +++ b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll @@ -1,6 +1,9 @@ ; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-simplify-libcall < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-POSTLINK %s ; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-simplify-libcall -amdgpu-prelink <%s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-PRELINK %s ; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-use-native -amdgpu-prelink < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NATIVE %s +; RUN: opt -S -passes='default' -mtriple=amdgcn-- -amdgpu-simplify-libcall < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-POSTLINK %s +; RUN: opt -S -passes='default' -mtriple=amdgcn-- -amdgpu-simplify-libcall -amdgpu-prelink <%s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-PRELINK %s +; RUN: opt -S -passes='default' -mtriple=amdgcn-- -amdgpu-use-native -amdgpu-prelink < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NATIVE %s ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos ; GCN-POSTLINK: call fast float @_Z3sinf( diff --git a/llvm/test/CodeGen/AMDGPU/simplify-libcalls2.ll b/llvm/test/CodeGen/AMDGPU/simplify-libcalls2.ll --- a/llvm/test/CodeGen/AMDGPU/simplify-libcalls2.ll +++ b/llvm/test/CodeGen/AMDGPU/simplify-libcalls2.ll @@ -1,5 +1,6 @@ ; REQUIRES: asserts ; RUN: opt -S -amdgpu-simplifylib -debug-only=amdgpu-simplifylib -mtriple=amdgcn-unknown-amdhsa -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -S -passes=amdgpu-simplifylib -debug-only=amdgpu-simplifylib -mtriple=amdgcn-unknown-amdhsa -disable-output < %s 2>&1 | FileCheck %s ; CHECK-NOT: AMDIC: try folding call void @llvm.lifetime.start.p0i8 ; CHECK-NOT: AMDIC: try folding call void @llvm.lifetime.end.p0i8 diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn --- a/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn @@ -112,6 +112,7 @@ "//llvm/lib/CodeGen/SelectionDAG", "//llvm/lib/IR", "//llvm/lib/MC", + "//llvm/lib/Passes", "//llvm/lib/Support", "//llvm/lib/Target", "//llvm/lib/Transforms/IPO",