diff --git a/llvm/include/llvm/Transforms/Scalar/InferAddressSpaces.h b/llvm/include/llvm/Transforms/Scalar/InferAddressSpaces.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Transforms/Scalar/InferAddressSpaces.h @@ -0,0 +1,27 @@ +//===- InferAddressSpace.h - ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_INFERADDRESSSPACES_H +#define LLVM_TRANSFORMS_SCALAR_INFERADDRESSSPACES_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +struct InferAddressSpacesPass : PassInfoMixin { + InferAddressSpacesPass(); + InferAddressSpacesPass(unsigned AddressSpace); + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + +private: + unsigned FlatAddrSpace = 0; +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_SCALAR_INFERADDRESSSPACES_H diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -102,10 +102,10 @@ #include "llvm/Transforms/IPO/GlobalOpt.h" #include "llvm/Transforms/IPO/GlobalSplit.h" #include "llvm/Transforms/IPO/HotColdSplitting.h" +#include "llvm/Transforms/IPO/IROutliner.h" #include "llvm/Transforms/IPO/InferFunctionAttrs.h" #include "llvm/Transforms/IPO/Inliner.h" #include "llvm/Transforms/IPO/Internalize.h" -#include "llvm/Transforms/IPO/IROutliner.h" #include "llvm/Transforms/IPO/LoopExtractor.h" #include "llvm/Transforms/IPO/LowerTypeTests.h" #include "llvm/Transforms/IPO/MergeFunctions.h" @@ -154,6 +154,7 @@ #include "llvm/Transforms/Scalar/IVUsersPrinter.h" #include "llvm/Transforms/Scalar/IndVarSimplify.h" #include "llvm/Transforms/Scalar/InductiveRangeCheckElimination.h" +#include "llvm/Transforms/Scalar/InferAddressSpaces.h" #include "llvm/Transforms/Scalar/InstSimplifyPass.h" #include "llvm/Transforms/Scalar/JumpThreading.h" #include "llvm/Transforms/Scalar/LICM.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -227,6 +227,7 @@ FUNCTION_PASS("gvn-hoist", GVNHoistPass()) FUNCTION_PASS("gvn-sink", GVNSinkPass()) FUNCTION_PASS("helloworld", HelloWorldPass()) +FUNCTION_PASS("infer-address-spaces", InferAddressSpacesPass()) FUNCTION_PASS("instcombine", InstCombinePass()) FUNCTION_PASS("instcount", InstCountPass()) FUNCTION_PASS("instsimplify", InstSimplifyPass()) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -54,6 +54,7 @@ #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Scalar/InferAddressSpaces.h" #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/SimplifyLibCalls.h" #include "llvm/Transforms/Vectorize.h" @@ -523,13 +524,20 @@ PB.registerCGSCCOptimizerLateEPCallback( [this, DebugPassManager](CGSCCPassManager &PM, PassBuilder::OptimizationLevel Level) { - if (Level != PassBuilder::OptimizationLevel::O0) { FunctionPassManager FPM(DebugPassManager); - // Promote alloca to vector before SROA and loop unroll. If we manage - // to eliminate allocas before unroll we may choose to unroll less. - FPM.addPass(AMDGPUPromoteAllocaToVectorPass(*this)); + + // Add infer address spaces pass to the opt pipeline after inlining + // but before SROA to increase SROA opportunities. + FPM.addPass(InferAddressSpacesPass()); + + if (Level != PassBuilder::OptimizationLevel::O0) { + // Promote alloca to vector before SROA and loop unroll. If we + // manage to eliminate allocas before unroll we may choose to unroll + // less. + FPM.addPass(AMDGPUPromoteAllocaToVectorPass(*this)); + } + PM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM))); - } }); } diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp --- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -88,6 +88,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Scalar/InferAddressSpaces.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" @@ -108,6 +109,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" @@ -146,13 +148,7 @@ using ValueToAddrSpaceMapTy = DenseMap; using PostorderStackTy = llvm::SmallVector, 4>; -/// InferAddressSpaces class InferAddressSpaces : public FunctionPass { - const TargetTransformInfo *TTI = nullptr; - const DataLayout *DL = nullptr; - - /// Target specific address space which uses of should be replaced if - /// possible. unsigned FlatAddrSpace = 0; public: @@ -168,8 +164,16 @@ } bool runOnFunction(Function &F) override; +}; + +class InferAddressSpacesImpl { + const TargetTransformInfo *TTI = nullptr; + const DataLayout *DL = nullptr; + + /// Target specific address space which uses of should be replaced if + /// possible. + unsigned FlatAddrSpace = 0; -private: // Returns the new address space of V if updated; otherwise, returns None. Optional updateAddressSpace(const Value &V, @@ -211,6 +215,11 @@ const ValueToValueMapTy &ValueWithNewAddrSpace, SmallVectorImpl *UndefUsesToFix) const; unsigned joinAddressSpaces(unsigned AS1, unsigned AS2) const; + +public: + InferAddressSpacesImpl(const TargetTransformInfo *TTI, unsigned FlatAddrSpace) + : TTI(TTI), FlatAddrSpace(FlatAddrSpace) {} + bool run(Function &F); }; } // end anonymous namespace @@ -326,9 +335,9 @@ } } -bool InferAddressSpaces::rewriteIntrinsicOperands(IntrinsicInst *II, - Value *OldV, - Value *NewV) const { +bool InferAddressSpacesImpl::rewriteIntrinsicOperands(IntrinsicInst *II, + Value *OldV, + Value *NewV) const { Module *M = II->getParent()->getParent()->getParent(); switch (II->getIntrinsicID()) { @@ -355,7 +364,7 @@ } } -void InferAddressSpaces::collectRewritableIntrinsicOperands( +void InferAddressSpacesImpl::collectRewritableIntrinsicOperands( IntrinsicInst *II, PostorderStackTy &PostorderStack, DenseSet &Visited) const { auto IID = II->getIntrinsicID(); @@ -380,7 +389,7 @@ // Returns all flat address expressions in function F. The elements are // If V is an unvisited flat address expression, appends V to PostorderStack // and marks it as visited. -void InferAddressSpaces::appendsFlatAddressExpressionToPostorderStack( +void InferAddressSpacesImpl::appendsFlatAddressExpressionToPostorderStack( Value *V, PostorderStackTy &PostorderStack, DenseSet &Visited) const { assert(V->getType()->isPointerTy()); @@ -414,7 +423,7 @@ // Returns all flat address expressions in function F. The elements are ordered // ordered in postorder. std::vector -InferAddressSpaces::collectFlatAddressExpressions(Function &F) const { +InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const { // This function implements a non-recursive postorder traversal of a partial // use-def graph of function F. PostorderStackTy PostorderStack; @@ -524,7 +533,7 @@ // // This may also return nullptr in the case the instruction could not be // rewritten. -Value *InferAddressSpaces::cloneInstructionWithNewAddressSpace( +Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace( Instruction *I, unsigned NewAddrSpace, const ValueToValueMapTy &ValueWithNewAddrSpace, SmallVectorImpl *UndefUsesToFix) const { @@ -709,10 +718,10 @@ // expression whose address space needs to be modified, in postorder. // // See cloneInstructionWithNewAddressSpace for the meaning of UndefUsesToFix. -Value *InferAddressSpaces::cloneValueWithNewAddressSpace( - Value *V, unsigned NewAddrSpace, - const ValueToValueMapTy &ValueWithNewAddrSpace, - SmallVectorImpl *UndefUsesToFix) const { +Value *InferAddressSpacesImpl::cloneValueWithNewAddressSpace( + Value *V, unsigned NewAddrSpace, + const ValueToValueMapTy &ValueWithNewAddrSpace, + SmallVectorImpl *UndefUsesToFix) const { // All values in Postorder are flat address expressions. assert(V->getType()->getPointerAddressSpace() == FlatAddrSpace && isAddressExpression(*V, *DL, TTI)); @@ -735,8 +744,8 @@ // Defines the join operation on the address space lattice (see the file header // comments). -unsigned InferAddressSpaces::joinAddressSpaces(unsigned AS1, - unsigned AS2) const { +unsigned InferAddressSpacesImpl::joinAddressSpaces(unsigned AS1, + unsigned AS2) const { if (AS1 == FlatAddrSpace || AS2 == FlatAddrSpace) return FlatAddrSpace; @@ -749,11 +758,7 @@ return (AS1 == AS2) ? AS1 : FlatAddrSpace; } -bool InferAddressSpaces::runOnFunction(Function &F) { - if (skipFunction(F)) - return false; - - TTI = &getAnalysis().getTTI(F); +bool InferAddressSpacesImpl::run(Function &F) { DL = &F.getParent()->getDataLayout(); if (AssumeDefaultIsFlatAddressSpace) @@ -780,7 +785,7 @@ // Constants need to be tracked through RAUW to handle cases with nested // constant expressions, so wrap values in WeakTrackingVH. -void InferAddressSpaces::inferAddressSpaces( +void InferAddressSpacesImpl::inferAddressSpaces( ArrayRef Postorder, ValueToAddrSpaceMapTy *InferredAddrSpace) const { SetVector Worklist(Postorder.begin(), Postorder.end()); @@ -824,7 +829,7 @@ } } -Optional InferAddressSpaces::updateAddressSpace( +Optional InferAddressSpacesImpl::updateAddressSpace( const Value &V, const ValueToAddrSpaceMapTy &InferredAddrSpace) const { assert(InferredAddrSpace.count(&V)); @@ -970,7 +975,8 @@ // \p returns true if it is OK to change the address space of constant \p C with // a ConstantExpr addrspacecast. -bool InferAddressSpaces::isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const { +bool InferAddressSpacesImpl::isSafeToCastConstAddrSpace(Constant *C, + unsigned NewAS) const { assert(NewAS != UninitializedAddressSpace); unsigned SrcAS = C->getType()->getPointerAddressSpace(); @@ -1009,7 +1015,7 @@ return I; } -bool InferAddressSpaces::rewriteWithNewAddressSpaces( +bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces( const TargetTransformInfo &TTI, ArrayRef Postorder, const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const { // For each address expression to be modified, creates a clone of it with its @@ -1180,6 +1186,34 @@ return true; } +bool InferAddressSpaces::runOnFunction(Function &F) { + if (skipFunction(F)) + return false; + + return InferAddressSpacesImpl( + &getAnalysis().getTTI(F), + FlatAddrSpace) + .run(F); +} + FunctionPass *llvm::createInferAddressSpacesPass(unsigned AddressSpace) { return new InferAddressSpaces(AddressSpace); } + +InferAddressSpacesPass::InferAddressSpacesPass() + : FlatAddrSpace(UninitializedAddressSpace) {} +InferAddressSpacesPass::InferAddressSpacesPass(unsigned AddressSpace) + : FlatAddrSpace(AddressSpace) {} + +PreservedAnalyses InferAddressSpacesPass::run(Function &F, + FunctionAnalysisManager &AM) { + bool Changed = + InferAddressSpacesImpl(&AM.getResult(F), FlatAddrSpace) + .run(F); + if (Changed) { + PreservedAnalyses PA; + PA.preserveSet(); + return PA; + } + return PreservedAnalyses::all(); +} diff --git a/llvm/test/CodeGen/AMDGPU/infer-addrpace-pipeline.ll b/llvm/test/CodeGen/AMDGPU/infer-addrpace-pipeline.ll --- a/llvm/test/CodeGen/AMDGPU/infer-addrpace-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/infer-addrpace-pipeline.ll @@ -1,9 +1,14 @@ -; RUN: opt -mtriple=amdgcn--amdhsa -disable-output -disable-verify -debug-pass=Structure -O2 %s 2>&1 | FileCheck -check-prefix=GCN %s +; RUN: opt -mtriple=amdgcn--amdhsa -disable-output -disable-verify -debug-pass=Structure -O2 %s -enable-new-pm=0 2>&1 | FileCheck -check-prefix=LPM %s +; RUN: opt -mtriple=amdgcn--amdhsa -disable-output -disable-verify -debug-pass-manager -passes='default' %s 2>&1 | FileCheck -check-prefix=NPM %s -; GCN: Function Integration/Inlining -; GCN: FunctionPass Manager -; GCN: Infer address spaces -; GCN: SROA +; LPM: Function Integration/Inlining +; LPM: FunctionPass Manager +; LPM: Infer address spaces +; LPM: SROA + +; NPM: Running pass: InlinerPass +; NPM: Running pass: InferAddressSpacesPass +; NPM: Running pass: SROA define void @empty() { ret void diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -infer-address-spaces %s | FileCheck %s +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=infer-address-spaces %s | FileCheck %s ; Ports of most of test/CodeGen/NVPTX/access-non-generic.ll @scalar = internal addrspace(3) global float 0.0, align 4 diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -481,7 +481,6 @@ "unreachableblockelim", "verify-safepoint-ir", "divergence", - "infer-address-spaces", "atomic-expand", "hardware-loops", "type-promotion",