Index: llvm/include/llvm/InitializePasses.h =================================================================== --- llvm/include/llvm/InitializePasses.h +++ llvm/include/llvm/InitializePasses.h @@ -194,6 +194,7 @@ void initializeIndVarSimplifyLegacyPassPass(PassRegistry&); void initializeIndirectBrExpandPassPass(PassRegistry&); void initializeInferAddressSpacesPass(PassRegistry&); +void initializeInferArgAddressSpacesPass(PassRegistry&); void initializeInferFunctionAttrsLegacyPassPass(PassRegistry&); void initializeInjectTLIMappingsLegacyPass(PassRegistry &); void initializeInlineCostAnalysisPass(PassRegistry&); Index: llvm/include/llvm/Transforms/IPO/InferArgumentAddressSpaces.h =================================================================== --- /dev/null +++ llvm/include/llvm/Transforms/IPO/InferArgumentAddressSpaces.h @@ -0,0 +1,27 @@ +//===-- InferArgumentAddressSpaces.h --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Infer pointer argument address spaces for local functions based on the call +/// sites. When all calls use a same address space for actual argument we can +/// infer its address space in the callee. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_IPO_INFERARGUMENTADDRESSSPACES_H +#define LLVM_TRANSFORMS_IPO_INFERARGUMENTADDRESSSPACES_H + +namespace llvm { +class Pass; + +/// Create a pass to infer function arguments' address spaces. +Pass *createInferArgAddressSpacesPass(); + +} + +#endif // LLVM_TRANSFORMS_IPO_INFERARGUMENTADDRESSSPACES_H Index: llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -50,6 +50,7 @@ #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/IPO/GlobalDCE.h" +#include "llvm/Transforms/IPO/InferArgumentAddressSpaces.h" #include "llvm/Transforms/IPO/Internalize.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/Scalar.h" @@ -401,6 +402,7 @@ initializeAMDGPUResourceUsageAnalysisPass(*PR); initializeGCNNSAReassignPass(*PR); initializeGCNPreRAOptimizationsPass(*PR); + initializeInferArgAddressSpacesPass(*PR); } static std::unique_ptr createTLOF(const Triple &TT) { @@ -1025,6 +1027,9 @@ // without ever running any passes on the second. addPass(createBarrierNoopPass()); + if (TM.getOptLevel() > CodeGenOpt::None) + addPass(createInferArgAddressSpacesPass()); + // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments. if (TM.getTargetTriple().getArch() == Triple::r600) addPass(createR600OpenCLImageTypeLoweringPass()); Index: llvm/lib/Transforms/IPO/CMakeLists.txt =================================================================== --- llvm/lib/Transforms/IPO/CMakeLists.txt +++ llvm/lib/Transforms/IPO/CMakeLists.txt @@ -22,6 +22,7 @@ HotColdSplitting.cpp IPO.cpp IROutliner.cpp + InferArgumentAddressSpaces.cpp InferFunctionAttrs.cpp InlineSimple.cpp Inliner.cpp Index: llvm/lib/Transforms/IPO/IPO.cpp =================================================================== --- llvm/lib/Transforms/IPO/IPO.cpp +++ llvm/lib/Transforms/IPO/IPO.cpp @@ -39,6 +39,7 @@ initializeIROutlinerLegacyPassPass(Registry); initializeAlwaysInlinerLegacyPassPass(Registry); initializeSimpleInlinerPass(Registry); + initializeInferArgAddressSpacesPass(Registry); initializeInferFunctionAttrsLegacyPassPass(Registry); initializeInternalizeLegacyPassPass(Registry); initializeLoopExtractorLegacyPassPass(Registry); Index: llvm/lib/Transforms/IPO/InferArgumentAddressSpaces.cpp =================================================================== --- /dev/null +++ llvm/lib/Transforms/IPO/InferArgumentAddressSpaces.cpp @@ -0,0 +1,185 @@ +//===- InferArgumentAddressSpaces.cpp -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Infer pointer argument address spaces for local functions based on the call +/// sites. When all calls use a same address space for actual argument we can +/// infer its address space in the callee. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/InitializePasses.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/Transforms/IPO/InferArgumentAddressSpaces.h" + +#define DEBUG_TYPE "infer-argument-address-spaces" + +using namespace llvm; + +namespace { +class InferArgAddressSpaces : public ModulePass { +private: + bool handleFunction(Function &F); + +public: + static char ID; + + InferArgAddressSpaces() : ModulePass(ID) {} + + bool runOnModule(Module &M) override; + + StringRef getPassName() const override { + return "Infer Argument Address Spaces"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + AU.addRequired(); + ModulePass::getAnalysisUsage(AU); + } +}; + +} // end anonymous namespace + +char InferArgAddressSpaces::ID = 0; + +INITIALIZE_PASS(InferArgAddressSpaces, DEBUG_TYPE, + "Infer Argument Address Spaces", false, false) + +// Return address space of a pointer \p V if we can deduce it or -1. +static unsigned getPointerAddressSpace(const Value *V) { + for ( ; ; ) { + if (auto *BC = dyn_cast(V)) { + V = BC->getOperand(1); + } + + if (auto *GEP = dyn_cast(V)) { + V = GEP->getPointerOperand(); + } + + break; + } + + if (auto *AC = dyn_cast(V)) { + return AC->getSrcAddressSpace(); + } + + return UINT_MAX; +} + +bool InferArgAddressSpaces::handleFunction(Function &F) { + LLVM_DEBUG(dbgs() << "Infer argument address spaces running on '" + << F.getName() << "'\n"); + + if (F.use_empty()) + return false; + + // The pair for pointer arguments: argument number and address sppace. + std::list> PointerArgs; + unsigned I = 0; + for (Argument &Arg : F.args()) { + if (auto PtrTy = dyn_cast(Arg.getType())) + // Skip pointers which already have specific address space and generic + // pointers already coerced to a specific address space. Essentialy + // make sure coercion only goes in one direction. + if (!PtrTy->getAddressSpace() && + !(Arg.hasOneUse() && isa(*Arg.user_begin()))) + PointerArgs.push_back(std::make_pair(I, UINT_MAX)); + ++I; + } + + if (PointerArgs.empty()) + return false; + + FunctionType *FTy = F.getFunctionType(); + for (auto *U : F.users()) { + CallInst *CI = dyn_cast(&*U); + if (!CI || CI->getCalledFunction() != &F) + return false; + + decltype(PointerArgs)::iterator Next; + for (auto I = PointerArgs.begin(), E = PointerArgs.end(); I != E; + I = Next) { + Next = std::next(I); + Value *Op = CI->getArgOperand(I->first); + unsigned AS = getPointerAddressSpace(Op); + unsigned RecordedAS = I->second; + + // Omit arguments for which we cannot determine address space at all, + // or we have several calls with different address spaces, or there + // is a call with the same address space this pointer already has. + if (AS == UINT_MAX || (AS != RecordedAS && RecordedAS != UINT_MAX) || + AS == FTy->getParamType(I->first)->getPointerAddressSpace()) { + PointerArgs.erase(I); + if (PointerArgs.empty()) + return false; + } + I->second = AS; + } + } + + // We have a list of pointer arguments with unique address space across all + // call sites. Now we can actually infer their types. + bool Changed = false; + BasicBlock &Entry = F.getEntryBlock(); + IRBuilder<> B(&Entry, Entry.getFirstInsertionPt()); + for (auto I : PointerArgs) { + Argument *Arg = F.getArg(I.first); + LLVM_DEBUG(dbgs() << " Coerce argument '" << Arg->getName() + << "' to address space " << I.second << '\n'); + + PointerType *PT = dyn_cast(Arg->getType()); + PointerType *NewPT = + PointerType::getWithSamePointeeType(PT, I.second); + Value *Cast = + B.CreateAddrSpaceCast(Arg, NewPT, Twine(Arg->getName(), ".coerce")); + Value *CastBack = + B.CreateAddrSpaceCast(Cast, PT, Twine(Arg->getName(), ".ptr")); + Arg->replaceUsesWithIf(CastBack, + [Cast](Use &U) { return U.getUser() != Cast; }); + Changed = true; + } + + return Changed; +} + +bool InferArgAddressSpaces::runOnModule(Module &M) { + if (skipModule(M)) + return false; + + CallGraphWrapperPass &CGPass = getAnalysis(); + const CallGraph &CG = CGPass.getCallGraph(); + + SetVector Worklist; + for (scc_iterator CGI = scc_begin(&CG); !CGI.isAtEnd(); + ++CGI) { + for (const CallGraphNode *I : *CGI) { + Function *F = I->getFunction(); + if (!F || F->isDeclaration() || F->hasAvailableExternallyLinkage()) + continue; + + Worklist.insert(F); + } + } + + bool Changed = false; + + // Process nodes in reverse order starting from callers. + while (!Worklist.empty()) + Changed |= handleFunction(*Worklist.pop_back_val()); + + return Changed; +} + +Pass *llvm::createInferArgAddressSpacesPass() { + return new InferArgAddressSpaces(); +} Index: llvm/test/CodeGen/AMDGPU/infer-arg-addrspaces.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/infer-arg-addrspaces.ll @@ -0,0 +1,26 @@ +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s + +; GCN-LABEL: {{^}}bar: +; GCN: global_store_dword +define internal fastcc void @bar(ptr nocapture noundef writeonly %p) #0 { +entry: + store i32 0, ptr %p, align 4 + ret void +} + +; GCN-LABEL: {{^}}foo: +define internal fastcc void @foo(ptr nocapture noundef writeonly %p) #0 { +entry: + tail call fastcc void @bar(ptr noundef %p) + ret void +} + +; GCN-LABEL: {{^}}caller: +define dso_local amdgpu_kernel void @caller(ptr addrspace(1) nocapture noundef writeonly align 4 %p) { +entry: + %c = addrspacecast ptr addrspace(1) %p to ptr + tail call fastcc void @foo(ptr noundef %c) + ret void +} + +attributes #0 = { noinline }