diff --git a/clang/test/CodeGen/thinlto-distributed-newpm.ll b/clang/test/CodeGen/thinlto-distributed-newpm.ll --- a/clang/test/CodeGen/thinlto-distributed-newpm.ll +++ b/clang/test/CodeGen/thinlto-distributed-newpm.ll @@ -47,11 +47,11 @@ ; CHECK-O: Running pass: PromotePass ; CHECK-O: Running analysis: DominatorTreeAnalysis on main ; CHECK-O: Running analysis: AssumptionAnalysis on main +; CHECK-O: Running analysis: TargetIRAnalysis on main ; CHECK-O: Running pass: DeadArgumentEliminationPass ; CHECK-O: Running pass: InstCombinePass on main ; CHECK-O: Running analysis: TargetLibraryAnalysis on main ; CHECK-O: Running analysis: OptimizationRemarkEmitterAnalysis on main -; CHECK-O: Running analysis: TargetIRAnalysis on main ; CHECK-O: Running analysis: AAManager on main ; CHECK-O: Running analysis: BasicAA on main ; CHECK-O: Running analysis: ScopedNoAliasAA on main diff --git a/llvm/include/llvm/Analysis/AssumptionCache.h b/llvm/include/llvm/Analysis/AssumptionCache.h --- a/llvm/include/llvm/Analysis/AssumptionCache.h +++ b/llvm/include/llvm/Analysis/AssumptionCache.h @@ -29,6 +29,7 @@ class AssumeInst; class Function; class raw_ostream; +class TargetTransformInfo; class Value; /// A cache of \@llvm.assume calls within a function. @@ -59,6 +60,8 @@ /// We track this to lazily populate our assumptions. Function &F; + TargetTransformInfo *TTI; + /// Vector of weak value handles to calls of the \@llvm.assume /// intrinsic. SmallVector AssumeHandles; @@ -103,7 +106,8 @@ public: /// Construct an AssumptionCache from a function by scanning all of /// its instructions. - AssumptionCache(Function &F) : F(F) {} + AssumptionCache(Function &F, TargetTransformInfo *TTI = nullptr) + : F(F), TTI(TTI) {} /// This cache is designed to be self-updating and so it should never be /// invalidated. @@ -174,9 +178,7 @@ public: using Result = AssumptionCache; - AssumptionCache run(Function &F, FunctionAnalysisManager &) { - return AssumptionCache(F); - } + AssumptionCache run(Function &F, FunctionAnalysisManager &); }; /// Printer pass for the \c AssumptionAnalysis results. diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -30,6 +30,7 @@ #include "llvm/Support/DataTypes.h" #include "llvm/Support/InstructionCost.h" #include +#include namespace llvm { @@ -389,6 +390,9 @@ unsigned getAssumedAddrSpace(const Value *V) const; + std::pair + getPredicatedAddrSpace(const Value *V) const; + /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p /// NewV, which has a different address space. This should happen for every /// operand index that collectFlatAddressOperands returned for the intrinsic. @@ -1464,6 +1468,8 @@ virtual bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const = 0; virtual unsigned getAssumedAddrSpace(const Value *V) const = 0; + virtual std::pair + getPredicatedAddrSpace(const Value *V) const = 0; virtual Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const = 0; @@ -1797,6 +1803,11 @@ return Impl.getAssumedAddrSpace(V); } + std::pair + getPredicatedAddrSpace(const Value *V) const override { + return Impl.getPredicatedAddrSpace(V); + } + Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const override { return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV); diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -24,6 +24,7 @@ #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" +#include using namespace llvm::PatternMatch; @@ -110,6 +111,11 @@ unsigned getAssumedAddrSpace(const Value *V) const { return -1; } + std::pair + getPredicatedAddrSpace(const Value *V) const { + return std::make_pair(nullptr, -1); + } + Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const { return nullptr; diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -283,6 +283,11 @@ return getTLI()->getTargetMachine().getAssumedAddrSpace(V); } + std::pair + getPredicatedAddrSpace(const Value *V) const { + return getTLI()->getTargetMachine().getPredicatedAddrSpace(V); + } + Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const { return nullptr; diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h --- a/llvm/include/llvm/Target/TargetMachine.h +++ b/llvm/include/llvm/Target/TargetMachine.h @@ -25,6 +25,7 @@ #include "llvm/Target/CGPassBuilderOption.h" #include "llvm/Target/TargetOptions.h" #include +#include namespace llvm { @@ -319,6 +320,18 @@ /// properties. virtual unsigned getAssumedAddrSpace(const Value *V) const { return -1; } + /// If the specified predicate checks whether a generic pointer falls within + /// a specified address space, return that generic pointer and the address + /// space being queried. + /// + /// Such predicates could be specified in @llvm.assume intrinsics for the + /// optimizer to assume that the given generic pointer always falls within + /// the address space based on that predicate. + virtual std::pair + getPredicatedAddrSpace(const Value *V) const { + return std::make_pair(nullptr, -1); + } + /// Get a \c TargetIRAnalysis appropriate for the target. /// /// This is used to construct the new pass manager's target IR analysis pass, diff --git a/llvm/lib/Analysis/AssumptionCache.cpp b/llvm/lib/Analysis/AssumptionCache.cpp --- a/llvm/lib/Analysis/AssumptionCache.cpp +++ b/llvm/lib/Analysis/AssumptionCache.cpp @@ -11,11 +11,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/AssumeBundleQueries.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AssumeBundleQueries.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" @@ -56,7 +57,7 @@ } static void -findAffectedValues(CallBase *CI, +findAffectedValues(CallBase *CI, TargetTransformInfo *TTI, SmallVectorImpl &Affected) { // Note: This code must be kept in-sync with the code in // computeKnownBitsFromAssume in ValueTracking. @@ -124,11 +125,19 @@ match(B, m_ConstantInt())) AddAffected(X); } + + if (TTI) { + const Value *Ptr; + unsigned AS; + std::tie(Ptr, AS) = TTI->getPredicatedAddrSpace(Cond); + if (Ptr) + AddAffected(const_cast(Ptr->stripInBoundsOffsets())); + } } void AssumptionCache::updateAffectedValues(AssumeInst *CI) { SmallVector Affected; - findAffectedValues(CI, Affected); + findAffectedValues(CI, TTI, Affected); for (auto &AV : Affected) { auto &AVV = getOrInsertAffectedValues(AV.Assume); @@ -141,7 +150,7 @@ void AssumptionCache::unregisterAssumption(AssumeInst *CI) { SmallVector Affected; - findAffectedValues(CI, Affected); + findAffectedValues(CI, TTI, Affected); for (auto &AV : Affected) { auto AVI = AffectedValues.find_as(AV.Assume); @@ -248,6 +257,12 @@ updateAffectedValues(CI); } +AssumptionCache AssumptionAnalysis::run(Function &F, + FunctionAnalysisManager &FAM) { + auto &TTI = FAM.getResult(F); + return AssumptionCache(F, &TTI); +} + AnalysisKey AssumptionAnalysis::Key; PreservedAnalyses AssumptionPrinterPass::run(Function &F, @@ -278,10 +293,13 @@ if (I != AssumptionCaches.end()) return *I->second; + auto *TTIWP = getAnalysisIfAvailable(); + auto *TTI = TTIWP ? &TTIWP->getTTI(F) : nullptr; + // Ok, build a new cache by scanning the function, insert it and the value // handle into our map, and return the newly populated cache. auto IP = AssumptionCaches.insert(std::make_pair( - FunctionCallbackVH(&F, this), std::make_unique(F))); + FunctionCallbackVH(&F, this), std::make_unique(F, TTI))); assert(IP.second && "Scanning function already in the map?"); return *IP.first->second; } diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -268,6 +268,11 @@ return TTIImpl->getAssumedAddrSpace(V); } +std::pair +TargetTransformInfo::getPredicatedAddrSpace(const Value *V) const { + return TTIImpl->getPredicatedAddrSpace(V); +} + Value *TargetTransformInfo::rewriteIntrinsicWithAddressSpace( IntrinsicInst *II, Value *OldV, Value *NewV) const { return TTIImpl->rewriteIntrinsicWithAddressSpace(II, OldV, NewV); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -17,6 +17,7 @@ #include "GCNSubtarget.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/Target/TargetMachine.h" +#include namespace llvm { @@ -63,6 +64,9 @@ bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override; unsigned getAssumedAddrSpace(const Value *V) const override; + + std::pair + getPredicatedAddrSpace(const Value *V) const override; }; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -36,8 +36,10 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/PassManager.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/InitializePasses.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Passes/PassBuilder.h" @@ -780,6 +782,33 @@ return AMDGPUAS::GLOBAL_ADDRESS; } +std::pair +AMDGPUTargetMachine::getPredicatedAddrSpace(const Value *V) const { + if (auto *II = dyn_cast(V)) { + switch (II->getIntrinsicID()) { + case Intrinsic::amdgcn_is_shared: + return std::make_pair(II->getArgOperand(0), AMDGPUAS::LOCAL_ADDRESS); + case Intrinsic::amdgcn_is_private: + return std::make_pair(II->getArgOperand(0), AMDGPUAS::PRIVATE_ADDRESS); + default: + break; + } + return std::make_pair(nullptr, -1); + } + // Check the global pointer predication based on + // (!is_share(p) && !is_private(p)). Note that logic 'and' is commutative and + // the order of 'is_shared' and 'is_private' is not significant. + Value *Ptr; + if (match( + const_cast(V), + m_c_And(m_Not(m_Intrinsic(m_Value(Ptr))), + m_Not(m_Intrinsic( + m_Deferred(Ptr)))))) + return std::make_pair(Ptr, AMDGPUAS::GLOBAL_ADDRESS); + + return std::make_pair(nullptr, -1); +} + //===----------------------------------------------------------------------===// // GCN Target Machine (SI+) //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h @@ -16,6 +16,7 @@ #include "ManagedStringPool.h" #include "NVPTXSubtarget.h" #include "llvm/Target/TargetMachine.h" +#include namespace llvm { @@ -69,6 +70,9 @@ bool isMachineVerifierClean() const override { return false; } + + std::pair + getPredicatedAddrSpace(const Value *V) const override; }; // NVPTXTargetMachine. class NVPTXTargetMachine32 : public NVPTXTargetMachine { diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -23,6 +23,7 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/IntrinsicsNVPTX.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Pass.h" @@ -240,6 +241,25 @@ return TargetTransformInfo(NVPTXTTIImpl(this, F)); } +std::pair +NVPTXTargetMachine::getPredicatedAddrSpace(const Value *V) const { + if (auto *II = dyn_cast(V)) { + switch (II->getIntrinsicID()) { + case Intrinsic::nvvm_isspacep_const: + return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_CONST); + case Intrinsic::nvvm_isspacep_global: + return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_GLOBAL); + case Intrinsic::nvvm_isspacep_local: + return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_LOCAL); + case Intrinsic::nvvm_isspacep_shared: + return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_SHARED); + default: + break; + } + } + return std::make_pair(nullptr, -1); +} + void NVPTXPassConfig::addEarlyCSEOrGVNPass() { if (getOptLevel() == CodeGenOpt::Aggressive) addPass(createGVNPass()); diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp --- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -96,10 +96,13 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" @@ -115,6 +118,7 @@ #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" @@ -146,6 +150,14 @@ namespace { using ValueToAddrSpaceMapTy = DenseMap; +// Different from ValueToAddrSpaceMapTy, where a new addrspace is inferred on +// the *def* of a value, PredicatedAddrSpaceMapTy is map where a new +// addrspace is inferred on the *use* of a pointer. This map is introduced to +// infer addrspace from the addrspace predicate assumption built from assume +// intrinsic. In that scenario, only specific uses (under valid assumption +// context) could be inferred with a new addrspace. +using PredicatedAddrSpaceMapTy = + DenseMap, unsigned>; using PostorderStackTy = llvm::SmallVector, 4>; class InferAddressSpaces : public FunctionPass { @@ -160,6 +172,8 @@ void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); + AU.addPreserved(); + AU.addRequired(); AU.addRequired(); } @@ -167,6 +181,8 @@ }; class InferAddressSpacesImpl { + AssumptionCache &AC; + DominatorTree *DT = nullptr; const TargetTransformInfo *TTI = nullptr; const DataLayout *DL = nullptr; @@ -177,18 +193,21 @@ // Returns the new address space of V if updated; otherwise, returns None. Optional updateAddressSpace(const Value &V, - const ValueToAddrSpaceMapTy &InferredAddrSpace) const; + const ValueToAddrSpaceMapTy &InferredAddrSpace, + PredicatedAddrSpaceMapTy &PredicatedAS) const; // Tries to infer the specific address space of each address expression in // Postorder. void inferAddressSpaces(ArrayRef Postorder, - ValueToAddrSpaceMapTy *InferredAddrSpace) const; + ValueToAddrSpaceMapTy *InferredAddrSpace, + PredicatedAddrSpaceMapTy &PredicatedAS) const; bool isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const; Value *cloneInstructionWithNewAddressSpace( Instruction *I, unsigned NewAddrSpace, const ValueToValueMapTy &ValueWithNewAddrSpace, + const PredicatedAddrSpaceMapTy &PredicatedAS, SmallVectorImpl *UndefUsesToFix) const; // Changes the flat address expressions in function F to point to specific @@ -196,7 +215,8 @@ // all flat expressions in the use-def graph of function F. bool rewriteWithNewAddressSpaces( const TargetTransformInfo &TTI, ArrayRef Postorder, - const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const; + const ValueToAddrSpaceMapTy &InferredAddrSpace, + const PredicatedAddrSpaceMapTy &PredicatedAS, Function *F) const; void appendsFlatAddressExpressionToPostorderStack( Value *V, PostorderStackTy &PostorderStack, @@ -211,14 +231,18 @@ std::vector collectFlatAddressExpressions(Function &F) const; Value *cloneValueWithNewAddressSpace( - Value *V, unsigned NewAddrSpace, - const ValueToValueMapTy &ValueWithNewAddrSpace, - SmallVectorImpl *UndefUsesToFix) const; + Value *V, unsigned NewAddrSpace, + const ValueToValueMapTy &ValueWithNewAddrSpace, + const PredicatedAddrSpaceMapTy &PredicatedAS, + SmallVectorImpl *UndefUsesToFix) const; unsigned joinAddressSpaces(unsigned AS1, unsigned AS2) const; + Optional getPredicatedAddrSpace(const Value &V, Value *Opnd) const; + public: - InferAddressSpacesImpl(const TargetTransformInfo *TTI, unsigned FlatAddrSpace) - : TTI(TTI), FlatAddrSpace(FlatAddrSpace) {} + InferAddressSpacesImpl(AssumptionCache &AC, DominatorTree *DT, + const TargetTransformInfo *TTI, unsigned FlatAddrSpace) + : AC(AC), DT(DT), TTI(TTI), FlatAddrSpace(FlatAddrSpace) {} bool run(Function &F); }; @@ -232,8 +256,12 @@ } // end namespace llvm -INITIALIZE_PASS(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces", - false, false) +INITIALIZE_PASS_BEGIN(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces", + false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_END(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces", + false, false) // Check whether that's no-op pointer bicast using a pair of // `ptrtoint`/`inttoptr` due to the missing no-op pointer bitcast over @@ -505,6 +533,7 @@ static Value *operandWithNewAddressSpaceOrCreateUndef( const Use &OperandUse, unsigned NewAddrSpace, const ValueToValueMapTy &ValueWithNewAddrSpace, + const PredicatedAddrSpaceMapTy &PredicatedAS, SmallVectorImpl *UndefUsesToFix) { Value *Operand = OperandUse.get(); @@ -517,6 +546,18 @@ if (Value *NewOperand = ValueWithNewAddrSpace.lookup(Operand)) return NewOperand; + Instruction *Inst = cast(OperandUse.getUser()); + auto I = PredicatedAS.find(std::make_pair(Inst, Operand)); + if (I != PredicatedAS.end()) { + // Insert an addrspacecast on that operand before the user. + unsigned NewAS = I->second; + Type *NewPtrTy = PointerType::getWithSamePointeeType( + cast(Operand->getType()), NewAS); + auto *NewI = new AddrSpaceCastInst(Operand, NewPtrTy); + NewI->insertBefore(Inst); + return NewI; + } + UndefUsesToFix->push_back(&OperandUse); return UndefValue::get(NewPtrTy); } @@ -536,6 +577,7 @@ Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace( Instruction *I, unsigned NewAddrSpace, const ValueToValueMapTy &ValueWithNewAddrSpace, + const PredicatedAddrSpaceMapTy &PredicatedAS, SmallVectorImpl *UndefUsesToFix) const { Type *NewPtrType = PointerType::getWithSamePointeeType( cast(I->getType()), NewAddrSpace); @@ -557,7 +599,7 @@ assert(II->getIntrinsicID() == Intrinsic::ptrmask); Value *NewPtr = operandWithNewAddressSpaceOrCreateUndef( II->getArgOperandUse(0), NewAddrSpace, ValueWithNewAddrSpace, - UndefUsesToFix); + PredicatedAS, UndefUsesToFix); Value *Rewrite = TTI->rewriteIntrinsicWithAddressSpace(II, II->getArgOperand(0), NewPtr); if (Rewrite) { @@ -586,7 +628,8 @@ NewPointerOperands.push_back(nullptr); else NewPointerOperands.push_back(operandWithNewAddressSpaceOrCreateUndef( - OperandUse, NewAddrSpace, ValueWithNewAddrSpace, UndefUsesToFix)); + OperandUse, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS, + UndefUsesToFix)); } switch (I->getOpcode()) { @@ -724,6 +767,7 @@ Value *InferAddressSpacesImpl::cloneValueWithNewAddressSpace( Value *V, unsigned NewAddrSpace, const ValueToValueMapTy &ValueWithNewAddrSpace, + const PredicatedAddrSpaceMapTy &PredicatedAS, SmallVectorImpl *UndefUsesToFix) const { // All values in Postorder are flat address expressions. assert(V->getType()->getPointerAddressSpace() == FlatAddrSpace && @@ -731,7 +775,7 @@ if (Instruction *I = dyn_cast(V)) { Value *NewV = cloneInstructionWithNewAddressSpace( - I, NewAddrSpace, ValueWithNewAddrSpace, UndefUsesToFix); + I, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS, UndefUsesToFix); if (Instruction *NewI = dyn_cast_or_null(NewV)) { if (NewI->getParent() == nullptr) { NewI->insertBefore(I); @@ -779,18 +823,21 @@ // Runs a data-flow analysis to refine the address spaces of every expression // in Postorder. ValueToAddrSpaceMapTy InferredAddrSpace; - inferAddressSpaces(Postorder, &InferredAddrSpace); + PredicatedAddrSpaceMapTy PredicatedAS; + inferAddressSpaces(Postorder, &InferredAddrSpace, PredicatedAS); // Changes the address spaces of the flat address expressions who are inferred // to point to a specific address space. - return rewriteWithNewAddressSpaces(*TTI, Postorder, InferredAddrSpace, &F); + return rewriteWithNewAddressSpaces(*TTI, Postorder, InferredAddrSpace, + PredicatedAS, &F); } // Constants need to be tracked through RAUW to handle cases with nested // constant expressions, so wrap values in WeakTrackingVH. void InferAddressSpacesImpl::inferAddressSpaces( ArrayRef Postorder, - ValueToAddrSpaceMapTy *InferredAddrSpace) const { + ValueToAddrSpaceMapTy *InferredAddrSpace, + PredicatedAddrSpaceMapTy &PredicatedAS) const { SetVector Worklist(Postorder.begin(), Postorder.end()); // Initially, all expressions are in the uninitialized address space. for (Value *V : Postorder) @@ -802,7 +849,8 @@ // Tries to update the address space of the stack top according to the // address spaces of its operands. LLVM_DEBUG(dbgs() << "Updating the address space of\n " << *V << '\n'); - Optional NewAS = updateAddressSpace(*V, *InferredAddrSpace); + Optional NewAS = + updateAddressSpace(*V, *InferredAddrSpace, PredicatedAS); if (!NewAS.hasValue()) continue; // If any updates are made, grabs its users to the worklist because @@ -832,8 +880,35 @@ } } +Optional +InferAddressSpacesImpl::getPredicatedAddrSpace(const Value &V, + Value *Opnd) const { + const Instruction *I = dyn_cast(&V); + if (!I) + return None; + + Opnd = Opnd->stripInBoundsOffsets(); + const BasicBlock *BB = I->getParent(); + for (auto &AssumeVH : AC.assumptionsFor(Opnd)) { + if (!AssumeVH) + continue; + CallInst *CI = cast(AssumeVH); + if (!isValidAssumeForContext(CI, I, DT)) + continue; + + const Value *Ptr; + unsigned AS; + std::tie(Ptr, AS) = TTI->getPredicatedAddrSpace(CI->getArgOperand(0)); + if (Ptr) + return AS; + } + + return None; +} + Optional InferAddressSpacesImpl::updateAddressSpace( - const Value &V, const ValueToAddrSpaceMapTy &InferredAddrSpace) const { + const Value &V, const ValueToAddrSpaceMapTy &InferredAddrSpace, + PredicatedAddrSpaceMapTy &PredicatedAS) const { assert(InferredAddrSpace.count(&V)); // The new inferred address space equals the join of the address spaces @@ -878,10 +953,20 @@ // Otherwise, infer the address space from its pointer operands. for (Value *PtrOperand : getPointerOperands(V, *DL, TTI)) { auto I = InferredAddrSpace.find(PtrOperand); - unsigned OperandAS = - I != InferredAddrSpace.end() - ? I->second - : PtrOperand->getType()->getPointerAddressSpace(); + unsigned OperandAS; + if (I == InferredAddrSpace.end()) { + OperandAS = PtrOperand->getType()->getPointerAddressSpace(); + if (OperandAS == FlatAddrSpace) { + // Check AC for assumption dominating V. + auto AS = getPredicatedAddrSpace(V, PtrOperand); + if (AS) { + OperandAS = AS.getValue(); + // Record this use with the predicated AS. + PredicatedAS[std::make_pair(&V, PtrOperand)] = OperandAS; + } + } + } else + OperandAS = I->second; // join(flat, *) = flat. So we can break if NewAS is already flat. NewAS = joinAddressSpaces(NewAS, OperandAS); @@ -1026,7 +1111,8 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces( const TargetTransformInfo &TTI, ArrayRef Postorder, - const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const { + const ValueToAddrSpaceMapTy &InferredAddrSpace, + const PredicatedAddrSpaceMapTy &PredicatedAS, Function *F) const { // For each address expression to be modified, creates a clone of it with its // pointer operands converted to the new address space. Since the pointer // operands are converted, the clone is naturally in the new address space by @@ -1042,8 +1128,9 @@ continue; if (V->getType()->getPointerAddressSpace() != NewAddrSpace) { - Value *New = cloneValueWithNewAddressSpace( - V, NewAddrSpace, ValueWithNewAddrSpace, &UndefUsesToFix); + Value *New = + cloneValueWithNewAddressSpace(V, NewAddrSpace, ValueWithNewAddrSpace, + PredicatedAS, &UndefUsesToFix); if (New) ValueWithNewAddrSpace[V] = New; } @@ -1199,7 +1286,10 @@ if (skipFunction(F)) return false; + auto *DTWP = getAnalysisIfAvailable(); + DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; return InferAddressSpacesImpl( + getAnalysis().getAssumptionCache(F), DT, &getAnalysis().getTTI(F), FlatAddrSpace) .run(F); @@ -1217,11 +1307,14 @@ PreservedAnalyses InferAddressSpacesPass::run(Function &F, FunctionAnalysisManager &AM) { bool Changed = - InferAddressSpacesImpl(&AM.getResult(F), FlatAddrSpace) + InferAddressSpacesImpl(AM.getResult(F), + AM.getCachedResult(F), + &AM.getResult(F), FlatAddrSpace) .run(F); if (Changed) { PreservedAnalyses PA; PA.preserveSet(); + PA.preserve(); return PA; } return PreservedAnalyses::all(); diff --git a/llvm/test/Other/loop-pm-invalidation.ll b/llvm/test/Other/loop-pm-invalidation.ll --- a/llvm/test/Other/loop-pm-invalidation.ll +++ b/llvm/test/Other/loop-pm-invalidation.ll @@ -22,6 +22,7 @@ ; CHECK-LOOP-INV-NEXT: Running analysis: LoopAnalysis ; CHECK-LOOP-INV-NEXT: Running analysis: DominatorTreeAnalysis ; CHECK-LOOP-INV-NEXT: Running analysis: AssumptionAnalysis +; CHECK-LOOP-INV-NEXT: Running analysis: TargetIRAnalysis ; CHECK-LOOP-INV-NEXT: Running pass: LCSSAPass ; CHECK-LOOP-INV-NEXT: Running pass: InvalidateAnalysisPass<{{.*}}LoopAnalysis ; CHECK-LOOP-INV-NEXT: Invalidating analysis: LoopAnalysis @@ -33,6 +34,7 @@ ; CHECK-SCEV-INV-NEXT: Running analysis: LoopAnalysis ; CHECK-SCEV-INV-NEXT: Running analysis: DominatorTreeAnalysis ; CHECK-SCEV-INV-NEXT: Running analysis: AssumptionAnalysis +; CHECK-SCEV-INV-NEXT: Running analysis: TargetIRAnalysis ; CHECK-SCEV-INV-NEXT: Running pass: LCSSAPass ; CHECK-SCEV-INV-NEXT: Running pass: InvalidateAnalysisPass<{{.*}}ScalarEvolutionAnalysis ; CHECK-SCEV-INV-NEXT: Running pass: LoopSimplifyPass @@ -47,11 +49,11 @@ ; CHECK-LOOP-INV-NEXT: Running analysis: LoopAnalysis ; CHECK-LOOP-INV-NEXT: Running analysis: DominatorTreeAnalysis ; CHECK-LOOP-INV-NEXT: Running analysis: AssumptionAnalysis +; CHECK-LOOP-INV-NEXT: Running analysis: TargetIRAnalysis ; CHECK-LOOP-INV-NEXT: Running pass: LCSSAPass ; CHECK-LOOP-INV-NEXT: Running analysis: AAManager ; CHECK-LOOP-INV-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-LOOP-INV-NEXT: Running analysis: ScalarEvolutionAnalysis -; CHECK-LOOP-INV-NEXT: Running analysis: TargetIRAnalysis ; CHECK-LOOP-INV-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Loop ; CHECK-LOOP-INV-NEXT: Running pass: NoOpLoopPass ; CHECK-LOOP-INV-NEXT: Running pass: InvalidateAnalysisPass<{{.*}}LoopAnalysis @@ -70,11 +72,11 @@ ; CHECK-SCEV-INV-NEXT: Running analysis: LoopAnalysis ; CHECK-SCEV-INV-NEXT: Running analysis: DominatorTreeAnalysis ; CHECK-SCEV-INV-NEXT: Running analysis: AssumptionAnalysis +; CHECK-SCEV-INV-NEXT: Running analysis: TargetIRAnalysis ; CHECK-SCEV-INV-NEXT: Running pass: LCSSAPass ; CHECK-SCEV-INV-NEXT: Running analysis: AAManager ; CHECK-SCEV-INV-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-SCEV-INV-NEXT: Running analysis: ScalarEvolutionAnalysis -; CHECK-SCEV-INV-NEXT: Running analysis: TargetIRAnalysis ; CHECK-SCEV-INV-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Loop ; CHECK-SCEV-INV-NEXT: Running pass: NoOpLoopPass ; CHECK-SCEV-INV-NEXT: Running pass: InvalidateAnalysisPass<{{.*}}ScalarEvolutionAnalysis @@ -103,11 +105,11 @@ ; CHECK-LOOP-INV-NEXT: Running analysis: LoopAnalysis ; CHECK-LOOP-INV-NEXT: Running analysis: DominatorTreeAnalysis ; CHECK-LOOP-INV-NEXT: Running analysis: AssumptionAnalysis +; CHECK-LOOP-INV-NEXT: Running analysis: TargetIRAnalysis ; CHECK-LOOP-INV-NEXT: Running pass: LCSSAPass ; CHECK-LOOP-INV-NEXT: Running analysis: AAManager ; CHECK-LOOP-INV-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-LOOP-INV-NEXT: Running analysis: ScalarEvolutionAnalysis -; CHECK-LOOP-INV-NEXT: Running analysis: TargetIRAnalysis ; CHECK-LOOP-INV-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Loop ; CHECK-LOOP-INV-NEXT: Running pass: NoOpLoopPass ; CHECK-LOOP-INV-NEXT: Running pass: NoOpLoopPass @@ -129,11 +131,11 @@ ; CHECK-SCEV-INV-NEXT: Running analysis: LoopAnalysis ; CHECK-SCEV-INV-NEXT: Running analysis: DominatorTreeAnalysis ; CHECK-SCEV-INV-NEXT: Running analysis: AssumptionAnalysis +; CHECK-SCEV-INV-NEXT: Running analysis: TargetIRAnalysis ; CHECK-SCEV-INV-NEXT: Running pass: LCSSAPass ; CHECK-SCEV-INV-NEXT: Running analysis: AAManager ; CHECK-SCEV-INV-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-SCEV-INV-NEXT: Running analysis: ScalarEvolutionAnalysis -; CHECK-SCEV-INV-NEXT: Running analysis: TargetIRAnalysis ; CHECK-SCEV-INV-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Loop ; CHECK-SCEV-INV-NEXT: Running pass: NoOpLoopPass ; CHECK-SCEV-INV-NEXT: Running pass: NoOpLoopPass @@ -172,11 +174,11 @@ ; CHECK-LOOP-INV-NEXT: Running analysis: LoopAnalysis ; CHECK-LOOP-INV-NEXT: Running analysis: DominatorTreeAnalysis ; CHECK-LOOP-INV-NEXT: Running analysis: AssumptionAnalysis +; CHECK-LOOP-INV-NEXT: Running analysis: TargetIRAnalysis ; CHECK-LOOP-INV-NEXT: Running pass: LCSSAPass ; CHECK-LOOP-INV-NEXT: Running analysis: AAManager ; CHECK-LOOP-INV-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-LOOP-INV-NEXT: Running analysis: ScalarEvolutionAnalysis -; CHECK-LOOP-INV-NEXT: Running analysis: TargetIRAnalysis ; CHECK-LOOP-INV-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Loop ; CHECK-LOOP-INV-NEXT: Running pass: NoOpLoopPass ; CHECK-LOOP-INV-NEXT: Running pass: InvalidateAnalysisPass<{{.*}}LoopAnalysis @@ -195,11 +197,11 @@ ; CHECK-SCEV-INV-NEXT: Running analysis: LoopAnalysis ; CHECK-SCEV-INV-NEXT: Running analysis: DominatorTreeAnalysis ; CHECK-SCEV-INV-NEXT: Running analysis: AssumptionAnalysis +; CHECK-SCEV-INV-NEXT: Running analysis: TargetIRAnalysis ; CHECK-SCEV-INV-NEXT: Running pass: LCSSAPass ; CHECK-SCEV-INV-NEXT: Running analysis: AAManager ; CHECK-SCEV-INV-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-SCEV-INV-NEXT: Running analysis: ScalarEvolutionAnalysis -; CHECK-SCEV-INV-NEXT: Running analysis: TargetIRAnalysis ; CHECK-SCEV-INV-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Loop ; CHECK-SCEV-INV-NEXT: Running pass: NoOpLoopPass ; CHECK-SCEV-INV-NEXT: Running pass: InvalidateAnalysisPass<{{.*}}ScalarEvolutionAnalysis @@ -216,11 +218,11 @@ ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Running analysis: LoopAnalysis ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Running analysis: DominatorTreeAnalysis ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Running analysis: AssumptionAnalysis +; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Running analysis: TargetIRAnalysis ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Running pass: LCSSAPass ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Running analysis: AAManager ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Running analysis: ScalarEvolutionAnalysis -; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Running analysis: TargetIRAnalysis ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}Loop ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Running pass: NoOpLoopPass ; CHECK-SCEV-INV-AFTER-DELETE-NEXT: Running pass: LoopDeletionPass diff --git a/llvm/test/Other/new-pass-manager.ll b/llvm/test/Other/new-pass-manager.ll --- a/llvm/test/Other/new-pass-manager.ll +++ b/llvm/test/Other/new-pass-manager.ll @@ -340,6 +340,7 @@ ; CHECK-REPEAT-LOOP-PASS-NEXT: Running analysis: LoopAnalysis ; CHECK-REPEAT-LOOP-PASS-NEXT: Running analysis: DominatorTreeAnalysis ; CHECK-REPEAT-LOOP-PASS-NEXT: Running analysis: AssumptionAnalysis +; CHECK-REPEAT-LOOP-PASS-NEXT: Running analysis: TargetIRAnalysis ; CHECK-REPEAT-LOOP-PASS-NEXT: Invalidating analysis: PreservedCFGCheckerAnalysis on foo ; CHECK-REPEAT-LOOP-PASS-NEXT: Running pass: LCSSAPass ; CHECK-REPEAT-LOOP-PASS-NEXT: Running analysis: PreservedCFGCheckerAnalysis on foo @@ -350,7 +351,6 @@ ; CHECK-REPEAT-LOOP-PASS-NEXT: Running analysis: TypeBasedAA ; CHECK-REPEAT-LOOP-PASS-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-REPEAT-LOOP-PASS-NEXT: Running analysis: ScalarEvolutionAnalysis -; CHECK-REPEAT-LOOP-PASS-NEXT: Running analysis: TargetIRAnalysis ; CHECK-REPEAT-LOOP-PASS-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}> ; CHECK-REPEAT-LOOP-PASS-NEXT: Running pass: RepeatedPass ; CHECK-REPEAT-LOOP-PASS-NEXT: Running pass: NoOpLoopPass diff --git a/llvm/test/Other/new-pm-lto-defaults.ll b/llvm/test/Other/new-pm-lto-defaults.ll --- a/llvm/test/Other/new-pm-lto-defaults.ll +++ b/llvm/test/Other/new-pm-lto-defaults.ll @@ -51,6 +51,7 @@ ; CHECK-O-NEXT: Running analysis: AAManager ; CHECK-O-NEXT: Running analysis: BasicAA ; CHECK-O1-NEXT: Running analysis: AssumptionAnalysis on foo +; CHECK-O1-NEXT: Running analysis: TargetIRAnalysis ; CHECK-O1-NEXT: Running analysis: DominatorTreeAnalysis ; CHECK-O-NEXT: Running analysis: ScopedNoAliasAA ; CHECK-O-NEXT: Running analysis: TypeBasedAA diff --git a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll --- a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll @@ -104,20 +104,16 @@ ; CHECK-O-NEXT: Running analysis: AAManager ; CHECK-O-NEXT: Running analysis: BasicAA ; CHECK-O-NEXT: Running analysis: AssumptionAnalysis +; CHECK-O-NEXT: Running analysis: TargetIRAnalysis ; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis ; CHECK-O-NEXT: Running analysis: ScopedNoAliasAA ; CHECK-O-NEXT: Running analysis: TypeBasedAA ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass -; CHECK-O3-NEXT: Running analysis: TargetIRAnalysis ; CHECK-O2-NEXT: Running pass: OpenMPOptCGSCCPass ; CHECK-O3-NEXT: Running pass: OpenMPOptCGSCCPass ; CHECK-O-NEXT: Running pass: SROA ; CHECK-O-NEXT: Running pass: EarlyCSEPass -; CHECK-O1-NEXT: Running analysis: TargetIRAnalysis on foo -; CHECK-O2-NEXT: Running analysis: TargetIRAnalysis on foo -; CHECK-Os-NEXT: Running analysis: TargetIRAnalysis on foo -; CHECK-Oz-NEXT: Running analysis: TargetIRAnalysis on foo ; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis ; CHECK-O23SZ-NEXT: Running pass: SpeculativeExecutionPass ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/builtin-assumed-addrspace.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/builtin-assumed-addrspace.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/builtin-assumed-addrspace.ll @@ -0,0 +1,93 @@ +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -infer-address-spaces -o - %s | FileCheck %s + +; CHECK-LABEL: @f0 +; CHECK: addrspacecast float* {{%.*}} to float addrspace(3)* +; CHECK: getelementptr inbounds float, float addrspace(3)* +; CHECK: load float, float addrspace(3)* +define float @f0(float* %p) { +entry: + %0 = bitcast float* %p to i8* + %1 = call i1 @llvm.amdgcn.is.shared(i8* %0) + tail call void @llvm.assume(i1 %1) + %2 = tail call i32 @llvm.amdgcn.workitem.id.x() + %idxprom = zext i32 %2 to i64 + %arrayidx = getelementptr inbounds float, float* %p, i64 %idxprom + %3 = load float, float* %arrayidx, align 4 + ret float %3 +} + +; CHECK-LABEL: @f1 +; CHECK: addrspacecast float* {{%.*}} to float addrspace(5)* +; CHECK: getelementptr inbounds float, float addrspace(5)* +; CHECK: load float, float addrspace(5)* +define float @f1(float* %p) { +entry: + %0 = bitcast float* %p to i8* + %1 = call i1 @llvm.amdgcn.is.private(i8* %0) + tail call void @llvm.assume(i1 %1) + %2 = tail call i32 @llvm.amdgcn.workitem.id.x() + %idxprom = zext i32 %2 to i64 + %arrayidx = getelementptr inbounds float, float* %p, i64 %idxprom + %3 = load float, float* %arrayidx, align 4 + ret float %3 +} + +; CHECK-LABEL: @f2 +; CHECK: addrspacecast float* {{%.*}} to float addrspace(1)* +; CHECK: getelementptr inbounds float, float addrspace(1)* +; CHECK: load float, float addrspace(1)* +define float @f2(float* %p) { +entry: + %0 = bitcast float* %p to i8* + %1 = call i1 @llvm.amdgcn.is.private(i8* %0) + %2 = xor i1 %1, -1 + %3 = call i1 @llvm.amdgcn.is.shared(i8* %0) + %4 = xor i1 %3, -1 + %5 = and i1 %2, %4 + tail call void @llvm.assume(i1 %5) + %6 = tail call i32 @llvm.amdgcn.workitem.id.x() + %idxprom = zext i32 %6 to i64 + %arrayidx = getelementptr inbounds float, float* %p, i64 %idxprom + %7 = load float, float* %arrayidx, align 4 + ret float %7 +} + +; CHECK-LABEL: @g0 +; CHECK: if.then: +; CHECK: addrspacecast float* {{%.*}} to float addrspace(3)* +; CHECK: getelementptr inbounds float, float addrspace(3)* +; CHECK: load float, float addrspace(3)* +; CHECK: if.end: +; CHECK: getelementptr inbounds float, float* +; CHECK: load float, float* +define float @g0(i32 %c, float* %p) { +entry: + %tobool.not = icmp eq i32 %c, 0 + br i1 %tobool.not, label %if.end, label %if.then + +if.then: + %0 = bitcast float* %p to i8* + %1 = call i1 @llvm.amdgcn.is.shared(i8* %0) + tail call void @llvm.assume(i1 %1) + %2 = tail call i32 @llvm.amdgcn.workitem.id.x() + %idxprom = zext i32 %2 to i64 + %arrayidx = getelementptr inbounds float, float* %p, i64 %idxprom + %3 = load float, float* %arrayidx, align 4 + %add = fadd float %3, 0. + br label %if.end + +if.end: + %s = phi float [ %add, %if.then ], [ 0., %entry ] + %4 = tail call i32 @llvm.amdgcn.workitem.id.y() + %idxprom2 = zext i32 %4 to i64 + %arrayidx2 = getelementptr inbounds float, float* %p, i64 %idxprom2 + %5 = load float, float* %arrayidx2, align 4 + %add2 = fadd float %s, %5 + ret float %add2 +} + +declare void @llvm.assume(i1) +declare i1 @llvm.amdgcn.is.shared(i8* nocapture) +declare i1 @llvm.amdgcn.is.private(i8* nocapture) +declare i32 @llvm.amdgcn.workitem.id.x() +declare i32 @llvm.amdgcn.workitem.id.y() diff --git a/llvm/test/Transforms/InferAddressSpaces/NVPTX/builtin-assumed-addrspace.ll b/llvm/test/Transforms/InferAddressSpaces/NVPTX/builtin-assumed-addrspace.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/NVPTX/builtin-assumed-addrspace.ll @@ -0,0 +1,107 @@ +; RUN: opt -S -mtriple=nvptx64-nvidia-cuda -infer-address-spaces -o - %s | FileCheck %s + +; CHECK-LABEL: @f0 +; CHECK: addrspacecast float* {{%.*}} to float addrspace(4)* +; CHECK: getelementptr inbounds float, float addrspace(4)* +; CHECK: load float, float addrspace(4)* +define float @f0(float* %p) { +entry: + %0 = bitcast float* %p to i8* + %1 = call i1 @llvm.nvvm.isspacep.const(i8* %0) + tail call void @llvm.assume(i1 %1) + %2 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x() + %idxprom = zext i32 %2 to i64 + %arrayidx = getelementptr inbounds float, float* %p, i64 %idxprom + %3 = load float, float* %arrayidx, align 4 + ret float %3 +} + +; CHECK-LABEL: @f1 +; CHECK: addrspacecast float* {{%.*}} to float addrspace(1)* +; CHECK: getelementptr inbounds float, float addrspace(1)* +; CHECK: load float, float addrspace(1)* +define float @f1(float* %p) { +entry: + %0 = bitcast float* %p to i8* + %1 = call i1 @llvm.nvvm.isspacep.global(i8* %0) + tail call void @llvm.assume(i1 %1) + %2 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x() + %idxprom = zext i32 %2 to i64 + %arrayidx = getelementptr inbounds float, float* %p, i64 %idxprom + %3 = load float, float* %arrayidx, align 4 + ret float %3 +} + +; CHECK-LABEL: @f2 +; CHECK: addrspacecast float* {{%.*}} to float addrspace(5)* +; CHECK: getelementptr inbounds float, float addrspace(5)* +; CHECK: load float, float addrspace(5)* +define float @f2(float* %p) { +entry: + %0 = bitcast float* %p to i8* + %1 = call i1 @llvm.nvvm.isspacep.local(i8* %0) + tail call void @llvm.assume(i1 %1) + %2 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x() + %idxprom = zext i32 %2 to i64 + %arrayidx = getelementptr inbounds float, float* %p, i64 %idxprom + %3 = load float, float* %arrayidx, align 4 + ret float %3 +} + +; CHECK-LABEL: @f3 +; CHECK: addrspacecast float* {{%.*}} to float addrspace(3)* +; CHECK: getelementptr inbounds float, float addrspace(3)* +; CHECK: load float, float addrspace(3)* +define float @f3(float* %p) { +entry: + %0 = bitcast float* %p to i8* + %1 = call i1 @llvm.nvvm.isspacep.shared(i8* %0) + tail call void @llvm.assume(i1 %1) + %2 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x() + %idxprom = zext i32 %2 to i64 + %arrayidx = getelementptr inbounds float, float* %p, i64 %idxprom + %3 = load float, float* %arrayidx, align 4 + ret float %3 +} + +; CHECK-LABEL: @g0 +; CHECK: if.then: +; CHECK: addrspacecast float* {{%.*}} to float addrspace(3)* +; CHECK: getelementptr inbounds float, float addrspace(3)* +; CHECK: load float, float addrspace(3)* +; CHECK: if.end: +; CHECK: getelementptr inbounds float, float* +; CHECK: load float, float* +define float @g0(i32 %c, float* %p) { +entry: + %tobool.not = icmp eq i32 %c, 0 + br i1 %tobool.not, label %if.end, label %if.then + +if.then: + %0 = bitcast float* %p to i8* + %1 = call i1 @llvm.nvvm.isspacep.shared(i8* %0) + tail call void @llvm.assume(i1 %1) + %2 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x() + %idxprom = zext i32 %2 to i64 + %arrayidx = getelementptr inbounds float, float* %p, i64 %idxprom + %3 = load float, float* %arrayidx, align 4 + %add = fadd float %3, 0. + br label %if.end + +if.end: + %s = phi float [ %add, %if.then ], [ 0., %entry ] + %4 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.y() + %idxprom2 = zext i32 %4 to i64 + %arrayidx2 = getelementptr inbounds float, float* %p, i64 %idxprom2 + %5 = load float, float* %arrayidx2, align 4 + %add2 = fadd float %s, %5 + ret float %add2 +} + +declare void @llvm.assume(i1) +declare i1 @llvm.nvvm.isspacep.const(i8*) +declare i1 @llvm.nvvm.isspacep.global(i8*) +declare i1 @llvm.nvvm.isspacep.local(i8*) +declare i1 @llvm.nvvm.isspacep.shared(i8*) +declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() +declare i32 @llvm.nvvm.read.ptx.sreg.tid.y() diff --git a/llvm/test/Transforms/LoopRotate/pr35210.ll b/llvm/test/Transforms/LoopRotate/pr35210.ll --- a/llvm/test/Transforms/LoopRotate/pr35210.ll +++ b/llvm/test/Transforms/LoopRotate/pr35210.ll @@ -11,11 +11,11 @@ ; CHECK-NEXT: Running analysis: LoopAnalysis on f ; CHECK-NEXT: Running analysis: DominatorTreeAnalysis on f ; CHECK-NEXT: Running analysis: AssumptionAnalysis on f +; CHECK-NEXT: Running analysis: TargetIRAnalysis on f ; CHECK-NEXT: Running pass: LCSSAPass on f ; CHECK-NEXT: Running analysis: AAManager on f ; CHECK-NEXT: Running analysis: TargetLibraryAnalysis on f ; CHECK-NEXT: Running analysis: ScalarEvolutionAnalysis on f -; CHECK-NEXT: Running analysis: TargetIRAnalysis on f ; CHECK-NEXT: Running analysis: InnerAnalysisManagerProxy{{.*}} on f ; CHECK-NEXT: Running pass: LoopRotatePass on Loop at depth 1 containing: %bb
,%bb4 ; CHECK-NEXT: Folding loop latch bb4 into bb @@ -29,12 +29,12 @@ ; MSSA-NEXT: Running analysis: LoopAnalysis on f ; MSSA-NEXT: Running analysis: DominatorTreeAnalysis on f ; MSSA-NEXT: Running analysis: AssumptionAnalysis on f +; MSSA-NEXT: Running analysis: TargetIRAnalysis on f ; MSSA-NEXT: Running pass: LCSSAPass on f ; MSSA-NEXT: Running analysis: MemorySSAAnalysis on f ; MSSA-NEXT: Running analysis: AAManager on f ; MSSA-NEXT: Running analysis: TargetLibraryAnalysis on f ; MSSA-NEXT: Running analysis: ScalarEvolutionAnalysis on f -; MSSA-NEXT: Running analysis: TargetIRAnalysis on f ; MSSA-NEXT: Running analysis: InnerAnalysisManagerProxy{{.*}} on f ; MSSA-NEXT: Running pass: LoopRotatePass on Loop at depth 1 containing: %bb
,%bb4 ; MSSA-NEXT: Folding loop latch bb4 into bb diff --git a/llvm/unittests/Analysis/AssumeBundleQueriesTest.cpp b/llvm/unittests/Analysis/AssumeBundleQueriesTest.cpp --- a/llvm/unittests/Analysis/AssumeBundleQueriesTest.cpp +++ b/llvm/unittests/Analysis/AssumeBundleQueriesTest.cpp @@ -518,8 +518,7 @@ BasicBlock::iterator First = F->begin()->begin(); BasicBlock::iterator Second = F->begin()->begin(); Second++; - AssumptionCacheTracker ACT; - AssumptionCache &AC = ACT.getAssumptionCache(*F); + AssumptionCache AC(*F); auto AR = AC.assumptionsFor(F->getArg(3)); ASSERT_EQ(AR.size(), 0u); AR = AC.assumptionsFor(F->getArg(1));