Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -64,6 +64,47 @@ class ScopStmt; class ScopInfo; +class MemAccInst : public Instruction { +public: + Value *getValueOperand() { + if (isa(this)) + return cast(this)->getValueOperand(); + else + return this; + } + const Value *getValueOperand() const { + if (isa(this)) + return cast(this)->getValueOperand(); + else + return this; + } + + Value *getPointerOperand() { + return getOperand(isa(this) + ? StoreInst::getPointerOperandIndex() + : LoadInst::getPointerOperandIndex()); + } + const Value *getPointerOperand() const { + return getOperand(isa(this) + ? StoreInst::getPointerOperandIndex() + : LoadInst::getPointerOperandIndex()); + } + + bool isLoad() const { return isa(this); } + bool isStore() const { return isa(this); } + + static inline bool classof(const LoadInst *I) { return true; } + static inline bool classof(const StoreInst *I) { return true; } + static inline bool classof(const Instruction *I) { + return isa(I) || isa(I); + } + static inline bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + +typedef DenseMap OutgoingValueMapTy; + //===---------------------------------------------------------------------===// /// Maps from a loop to the affine function expressing its backedge taken count. @@ -316,7 +357,7 @@ /// Note that there can also be a scalar write access for %PHI if used in a /// different BasicBlock, i.e. there can be a %PHI.phiops as well as a /// %PHI.s2a. - enum AccessOrigin { EXPLICIT, SCALAR, PHI }; + enum AccessOrigin { EXPLICIT, SCALAR, PHI, MAPPED }; /// @brief The access type of a memory access /// @@ -447,6 +488,12 @@ isl_map *NewAccessRelation; // @} + // For Mapped locations + // @{ + // Take the memory access information from here + MemAccInst *MappedAliasAddr; + // @} + unsigned getElemSizeInBytes() const { return ElemBytes; } bool isAffine() const { return IsAffine; } @@ -527,7 +574,8 @@ AccessType Type, Value *BaseAddress, unsigned ElemBytes, bool Affine, ArrayRef Subscripts, ArrayRef Sizes, Value *AccessValue, - AccessOrigin Origin, StringRef BaseName); + AccessOrigin Origin, StringRef BaseName, + MemAccInst *MappedAliasAddr); ~MemoryAccess(); /// @brief Get the type of a memory access. @@ -621,6 +669,13 @@ /// statement. bool isStrideZero(__isl_take const isl_map *Schedule) const; + bool isMapped() const { return Origin == MAPPED; } + + MemAccInst *getMappedAliasAddr() const { + assert(isMapped()); + return MappedAliasAddr; + } + /// @brief Whether this is an access of an explicit load or store in the IR. bool isExplicit() const { return Origin == EXPLICIT; } @@ -1683,6 +1738,8 @@ // The ScalarEvolution to help building Scop. ScalarEvolution *SE; + DominatorTree *DT; + // LoopInfo for information about loops LoopInfo *LI; @@ -1716,18 +1773,17 @@ void clear(); // Build the SCoP for Region @p R. - void buildScop(Region &R, DominatorTree &DT); + void buildScop(Region &R); - /// @brief Build an instance of MemoryAccess from the Load/Store instruction. - /// - /// @param Inst The Load/Store instruction that access the memory - /// @param L The parent loop of the instruction - /// @param R The region on which to build the data access dictionary. - /// @param BoxedLoops The set of loops that are overapproximated in @p R. - /// @param ScopRIL The required invariant loads equivalence classes. - void buildMemoryAccess(Instruction *Inst, Loop *L, Region *R, - const ScopDetection::BoxedLoopsSetTy *BoxedLoops, - const InvariantLoadsSetTy &ScopRIL); + Value *extractBasePointer(Value *Addr); + + const ScopArrayInfo *getOrCreateSAI(MemAccInst *SI); + + MemoryAccess *addMemoryAccessForMem(BasicBlock *BB, Instruction *AccInst, + MemoryAccess::AccessType AccType, + Value *AccessValue, + MemoryAccess::AccessOrigin Origin, + MemAccInst *AddrAlias); /// @brief Analyze and extract the cross-BB scalar dependences (or, /// dataflow dependencies) of an instruction. @@ -1769,6 +1825,16 @@ Region *NonAffineSubRegion = nullptr, bool IsExitBlock = false); + DenseMap ScalarWrites; + DenseMap, MemoryAccess *> ScalarReads; + + MemoryAccess *getScalarWrite(Instruction *Val) const { + auto Iter = ScalarWrites.find(Val); + if (Iter == ScalarWrites.end()) + return nullptr; + return Iter->second; + } + /// @brief Create a new MemoryAccess object and add it to #AccFuncMap. /// /// @param BB The block where the access takes place. @@ -1785,30 +1851,18 @@ /// /// @return The newly created MemoryAccess instance or NULL if the access /// would have no effect. - MemoryAccess *addMemoryAccess(BasicBlock *BB, Instruction *Inst, - MemoryAccess::AccessType Type, - Value *BaseAddress, unsigned ElemBytes, - bool Affine, Value *AccessValue, - ArrayRef Subscripts, - ArrayRef Sizes, - MemoryAccess::AccessOrigin Origin); + MemoryAccess *addMemoryAccess( + BasicBlock *BB, Instruction *Inst, MemoryAccess::AccessType Type, + Value *BaseAddress, unsigned ElemBytes, bool Affine, Value *AccessValue, + ArrayRef Subscripts, ArrayRef Sizes, + MemoryAccess::AccessOrigin Origin, MemAccInst *AddrAlias); /// @brief Create a MemoryAccess that represents either a LoadInst or /// StoreInst. /// /// @param MemAccInst The LoadInst or StoreInst. - /// @param Type The kind of access. - /// @param BaseAddress The accessed array's base address. - /// @param ElemBytes Size of accessed array element. - /// @param IsAffine Whether all subscripts are affine expressions. - /// @param Subscripts Access subscripts per dimension. - /// @param Sizes The array dimension's sizes. - /// @param AccessValue Value read or written. /// @see AccessOrigin - void addExplicitAccess(Instruction *MemAccInst, MemoryAccess::AccessType Type, - Value *BaseAddress, unsigned ElemBytes, bool IsAffine, - ArrayRef Subscripts, - ArrayRef Sizes, Value *AccessValue); + void addExplicitAccess(MemAccInst *MemAccInst); /// @brief Ensure that there is a MemoryAccess that writes a value's /// definition. @@ -1858,6 +1912,164 @@ /// @see AccessOrigin void addPHIReadAccess(PHINode *PHI); + bool isRedundantMemAcc(MemAccInst *AccInst) { + if (AccInst->isLoad()) + return isRedundantLoad(cast(AccInst)); + return isRedundantStore(cast(AccInst)); + } + + bool isRedundantPHIRead(PHINode *PHI) { + auto PHIAddrAlias = getPHIMappedAddr(PHI); + if (!PHIAddrAlias) + return false; + + auto PHIStmt = scop->getStmtForBasicBlock(PHI->getParent()); + bool CrossStmtUse = false; + bool IntraStmtUse = false; + for (auto &Use : PHI->uses()) { + auto User = dyn_cast(Use.getUser()); + if (!User) + return false; //??? + + // Ignore redundant users + if (auto StoreUser = dyn_cast(User)) + if (isRedundantStore(StoreUser)) + continue; + + auto UseStmt = scop->getStmtForBasicBlock(User->getParent()); + if (UseStmt == PHIStmt) + IntraStmtUse = true; + else + CrossStmtUse = true; + } + + if (IntraStmtUse) + return false; + + if (CrossStmtUse) { + auto ScalarAddrAlias = getScalarMappedAddr(PHI); + if (!ScalarAddrAlias) + return false; + if (ScalarAddrAlias != PHIAddrAlias) + return false; + } + + return true; + } + + bool isRedundantScalarRead(Instruction *Val, ScopStmt *UseStmt) { + auto AddrAlias = getScalarMappedAddr(Val); + if (!AddrAlias) + return false; + + for (auto &Use : Val->uses()) { + auto User = dyn_cast(Use.getUser()); + if (!User) + return false; // Play save with strange users + + // Not redundant if this a non-redundant use within the same Stmt. + if (scop->getStmtForBasicBlock(User->getParent()) == UseStmt) { + if (auto StoreUser = dyn_cast(User)) { + if (!isRedundantStore(StoreUser)) + return false; + } else + return false; + } + } + return true; + } + + bool isRedundantLoad(LoadInst *LI) { + auto LoadAddr = SE->getSCEV(LI->getPointerOperand()); + for (auto &Use : LI->uses()) { + auto User = Use.getUser(); + + if (auto PHIUser = dyn_cast(User)) { + if (auto StoreAlias = getPHIMappedAddr(PHIUser)) { + auto StoreAddr = SE->getSCEV(StoreAlias->getPointerOperand()); + if (StoreAddr == LoadAddr) + continue; + } + } + + return false; + } + return true; + } + + bool isRedundantStore(StoreInst *SI) { + auto Scalar = dyn_cast(SI->getValueOperand()); + if (!Scalar) + return false; // Constants + + auto AddrAlias = getScalarMappedAddr(Scalar); + if (!AddrAlias) { + auto PHIScalar = dyn_cast(Scalar); + if (PHIScalar) + AddrAlias = getPHIMappedAddr(PHIScalar); + } + if (!AddrAlias) + return false; + + return SE->getSCEV(AddrAlias->getPointerOperand()) == + SE->getSCEV(SI->getPointerOperand()); + } + + DenseMap CollapseScalar; + DenseMap CollapsePHI; + DenseMap OutgoingCollapsedValue; + + Value *getOutgoingCollapsedValue(MemAccInst *AddrAlias, ScopStmt *Node) { + auto AddrIter = OutgoingCollapsedValue.find(AddrAlias); + if (AddrIter == OutgoingCollapsedValue.end()) + return nullptr; + auto &Map = AddrIter->second; + auto ValIter = Map.find(Node); + if (ValIter == Map.end()) + return nullptr; + return ValIter->second; + } + + // IDEA: Replace the MemAccInst by a tuple/object (const SCEV*, llvm::Type, + // size, AAMetadata) + MemAccInst *getScalarMappedAddr(Instruction *Inst) { + auto Iter = CollapseScalar.find(Inst); + if (Iter == CollapseScalar.end()) + return nullptr; + return Iter->getSecond(); + } + + MemAccInst *getPHIMappedAddr(PHINode *PHI) { + auto Iter = CollapsePHI.find(PHI); + if (Iter == CollapsePHI.end()) + return nullptr; + return Iter->getSecond(); + } + + bool isComputableAtEndingOf(Value *Val, BasicBlock *BB); + + bool isStoreRedundant(StoreInst *SI) { + auto Scalar = dyn_cast(SI->getValueOperand()); + if (!Scalar) + return false; + return cast(getScalarMappedAddr(Scalar)) == SI; + } + + void greedyCollapse(Region &R); + void greedyCollapseStore(Region &R, MemAccInst *Store); + + DenseMap AliveValuesCache; + OutgoingValueMapTy &getLiveEdges(Instruction *Val); + + DenseMap PHIEdgesCache; + OutgoingValueMapTy &getPHIEdges(PHINode *PHI); + + bool mayConflict(MemAccInst *SI, MemAccInst *LI); + + /// Edges where writing a value to the array doesn't matter, because it will + /// be overwritten anyway + OutgoingValueMapTy computeNoUseZones(MemAccInst *SI); + public: static char ID; explicit ScopInfo(); Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -32,6 +32,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopIterator.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/RegionIterator.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Support/Debug.h" @@ -91,6 +92,10 @@ cl::Hidden, cl::ZeroOrMore, cl::init(true), cl::cat(PollyCategory)); +static cl::opt ScalarCollapse( + "polly-scalar-collapse", cl::desc("Try mapping scalars to array elements"), + cl::Hidden, cl::ZeroOrMore, cl::init(true), cl::cat(PollyCategory)); + //===----------------------------------------------------------------------===// // Create a sequence of two schedules. Either argument may be null and is @@ -644,13 +649,14 @@ Value *BaseAddress, unsigned ElemBytes, bool Affine, ArrayRef Subscripts, ArrayRef Sizes, Value *AccessValue, - AccessOrigin Origin, StringRef BaseName) + AccessOrigin Origin, StringRef BaseName, + MemAccInst *AddrAlias) : Id(Id), Origin(Origin), AccType(Type), RedType(RT_NONE), Statement(Stmt), BaseAddr(BaseAddress), BaseName(BaseName), ElemBytes(ElemBytes), Sizes(Sizes.begin(), Sizes.end()), AccessInstruction(AccessInst), AccessValue(AccessValue), IsAffine(Affine), Subscripts(Subscripts.begin(), Subscripts.end()), AccessRelation(nullptr), - NewAccessRelation(nullptr) {} + NewAccessRelation(nullptr), MappedAliasAddr(AddrAlias) {} void MemoryAccess::realignParams() { isl_space *ParamSpace = Statement->getParent()->getParamSpace(); @@ -685,8 +691,12 @@ break; } OS << "[Reduction Type: " << getReductionType() << "] "; - OS << "[Scalar: " << isImplicit() << "]\n"; - OS.indent(16) << getOriginalAccessRelationStr() << ";\n"; + if (isMapped()) + OS << "[MAPPED]\n"; + else + OS << "[Scalar: " << isImplicit() << "]\n"; + if (AccessRelation) + OS.indent(16) << getOriginalAccessRelationStr() << ";\n"; if (hasNewAccessRelation()) OS.indent(11) << "new: " << getNewAccessRelationStr() << ";\n"; } @@ -874,7 +884,8 @@ } void ScopStmt::addPHIRead(MemoryAccess *Access) { - assert(Access->isPHI() && Access->isRead()); + assert(Access->isPHI() || Access->isMapped()); + assert(Access->isRead()); addLeadingLoad(Access); } @@ -3263,29 +3274,51 @@ extern MapInsnToMemAcc InsnToMemAcc; -void ScopInfo::buildMemoryAccess( - Instruction *Inst, Loop *L, Region *R, - const ScopDetection::BoxedLoopsSetTy *BoxedLoops, - const InvariantLoadsSetTy &ScopRIL) { +Value *ScopInfo::extractBasePointer(Value *Addr) { + Loop *L = isa(Addr) + ? LI->getLoopFor(cast(Addr)->getParent()) + : nullptr; + const SCEV *AccessFunction = SE->getSCEVAtScope(Addr, L); + const SCEVUnknown *BasePointer = + dyn_cast(SE->getPointerBase(AccessFunction)); + return BasePointer->getValue(); +} + +const ScopArrayInfo *ScopInfo::getOrCreateSAI(MemAccInst *SI) { + auto Addr = SI->getPointerOperand(); + auto BasePtr = extractBasePointer(Addr); + + return scop->getOrCreateScopArrayInfo( + BasePtr, Addr->getType()->getPointerElementType(), + ArrayRef()); +} + +MemoryAccess *ScopInfo::addMemoryAccessForMem(BasicBlock *BB, + Instruction *AccInst, + MemoryAccess::AccessType Type, + Value *AccessValue, + MemoryAccess::AccessOrigin Origin, + MemAccInst *AddrAlias) { + auto R = &scop->getRegion(); + Loop *L = LI->getLoopFor(AddrAlias->getParent()); + auto BoxedLoops = SD->getBoxedLoops(R); + auto StoreAlias = MemoryAccess::MAPPED ? AddrAlias : nullptr; + auto &ScopRIL = *SD->getRequiredInvariantLoads(R); + assert(AccessValue->getType() == AddrAlias->getValueOperand()->getType()); + unsigned Size; - Type *SizeType; - Value *Val; - enum MemoryAccess::AccessType Type; + llvm::Type *SizeType; - if (LoadInst *Load = dyn_cast(Inst)) { + if (LoadInst *Load = dyn_cast(AddrAlias)) { SizeType = Load->getType(); Size = TD->getTypeStoreSize(SizeType); - Type = MemoryAccess::READ; - Val = Load; } else { - StoreInst *Store = cast(Inst); + StoreInst *Store = cast(AddrAlias); SizeType = Store->getValueOperand()->getType(); Size = TD->getTypeStoreSize(SizeType); - Type = MemoryAccess::MUST_WRITE; - Val = Store->getValueOperand(); } - auto Address = getPointerOperand(*Inst); + auto Address = getPointerOperand(*AddrAlias); const SCEV *AccessFunction = SE->getSCEVAtScope(Address, L); const SCEVUnknown *BasePointer = @@ -3333,19 +3366,19 @@ SizesSCEV.push_back(SE->getSCEV(ConstantInt::get( IntegerType::getInt64Ty(BasePtr->getContext()), Size))); - addExplicitAccess(Inst, Type, BasePointer->getValue(), Size, true, - Subscripts, SizesSCEV, Val); - return; + return addMemoryAccess(BB, AccInst, Type, BasePointer->getValue(), Size, + true, AccessValue, Subscripts, SizesSCEV, Origin, + StoreAlias); } } } - auto AccItr = InsnToMemAcc.find(Inst); + auto AccItr = InsnToMemAcc.find(AddrAlias); if (PollyDelinearize && AccItr != InsnToMemAcc.end()) { - addExplicitAccess(Inst, Type, BasePointer->getValue(), Size, true, - AccItr->second.DelinearizedSubscripts, - AccItr->second.Shape->DelinearizedSizes, Val); - return; + return addMemoryAccess( + BB, AccInst, Type, BasePointer->getValue(), Size, true, AccessValue, + AccItr->second.DelinearizedSubscripts, + AccItr->second.Shape->DelinearizedSizes, Origin, StoreAlias); } // Check if the access depends on a loop contained in a non-affine subregion. @@ -3370,14 +3403,14 @@ // FIXME: Size of the number of bytes of an array element, not the number of // elements as probably intended here. const SCEV *SizeSCEV = - SE->getConstant(TD->getIntPtrType(Inst->getContext()), Size); + SE->getConstant(TD->getIntPtrType(AddrAlias->getContext()), Size); if (!IsAffine && Type == MemoryAccess::MUST_WRITE) Type = MemoryAccess::MAY_WRITE; - addExplicitAccess(Inst, Type, BasePointer->getValue(), Size, IsAffine, - ArrayRef(AccessFunction), - ArrayRef(SizeSCEV), Val); + return addMemoryAccess(BB, AccInst, Type, BasePointer->getValue(), Size, true, + AccessValue, ArrayRef(AccessFunction), + ArrayRef(SizeSCEV), Origin, StoreAlias); } void ScopInfo::buildAccessFunctions(Region &R, Region &SR) { @@ -3413,10 +3446,6 @@ void ScopInfo::buildAccessFunctions(Region &R, BasicBlock &BB, Region *NonAffineSubRegion, bool IsExitBlock) { - Loop *L = LI->getLoopFor(&BB); - - // The set of loops contained in non-affine subregions that are part of R. - const ScopDetection::BoxedLoopsSetTy *BoxedLoops = SD->getBoxedLoops(&R); // The set of loads that are required to be invariant. auto &ScopRIL = *SD->getRequiredInvariantLoads(&R); @@ -3436,8 +3465,8 @@ // invariant and will be hoisted for the SCoP to be processed. Though, // there might be other invariant accesses that will be hoisted and // that would allow to make a non-affine access affine. - if (isa(Inst) || isa(Inst)) - buildMemoryAccess(Inst, L, &R, BoxedLoops, ScopRIL); + if (auto AccInst = dyn_cast(Inst)) + addExplicitAccess(AccInst); if (isIgnoredIntrinsic(Inst)) continue; @@ -3454,13 +3483,11 @@ } } -MemoryAccess *ScopInfo::addMemoryAccess(BasicBlock *BB, Instruction *Inst, - MemoryAccess::AccessType Type, - Value *BaseAddress, unsigned ElemBytes, - bool Affine, Value *AccessValue, - ArrayRef Subscripts, - ArrayRef Sizes, - MemoryAccess::AccessOrigin Origin) { +MemoryAccess *ScopInfo::addMemoryAccess( + BasicBlock *BB, Instruction *Inst, MemoryAccess::AccessType Type, + Value *BaseAddress, unsigned ElemBytes, bool Affine, Value *AccessValue, + ArrayRef Subscripts, ArrayRef Sizes, + MemoryAccess::AccessOrigin Origin, MemAccInst *AddrAlias) { ScopStmt *Stmt = scop->getStmtForBasicBlock(BB); // Do not create a memory access for anything not in the SCoP. It would be @@ -3488,24 +3515,294 @@ } AccList.emplace_back(Stmt, Inst, Id, Type, BaseAddress, ElemBytes, Affine, - Subscripts, Sizes, AccessValue, Origin, BaseName); + Subscripts, Sizes, AccessValue, Origin, BaseName, + AddrAlias); return &AccList.back(); } -void ScopInfo::addExplicitAccess( - Instruction *MemAccInst, MemoryAccess::AccessType Type, Value *BaseAddress, - unsigned ElemBytes, bool IsAffine, ArrayRef Subscripts, - ArrayRef Sizes, Value *AccessValue) { - assert(isa(MemAccInst) || isa(MemAccInst)); - assert(isa(MemAccInst) == (Type == MemoryAccess::READ)); - MemoryAccess *Acc = addMemoryAccess( - MemAccInst->getParent(), MemAccInst, Type, BaseAddress, ElemBytes, - IsAffine, AccessValue, Subscripts, Sizes, MemoryAccess::EXPLICIT); +void ScopInfo::greedyCollapse(Region &R) { + for (auto BB : R.blocks()) { + for (auto &I : *BB) { + auto AccInst = dyn_cast(&I); + if (!AccInst) + continue; + +// WORKAROUND AHEAD: +// getOrCreateScopArrayInfo requires that the base SAI of indirect +// accesses are already created. This relies on requesting those first. +// This workaround may still fail because there is no guarantee that basic +// blocks are iterated through in an topological order of the dominance +// tree (i.e. the base access in a BB before the indirect one, but +// iterated through in reverse order) +#if 1 + getOrCreateSAI(AccInst); +#endif + } + } + + for (auto BB : R.blocks()) { + for (auto &I : *BB) { + if (!isa(I)) + continue; + greedyCollapseStore(R, cast(&I)); + } + } +} + +static bool isConflicting(OutgoingValueMapTy &Superset, + OutgoingValueMapTy &Subset) { + for (auto &Edge : Subset) { + auto BB = Edge.first; + auto Val = Edge.second; + auto Iter = Superset.find(BB); + if (Iter == Superset.end()) + return true; + if (!Iter->second) + continue; + if (Iter->second != Val) + return true; + } + return false; +} + +static void fixOutgoingValues(OutgoingValueMapTy &Superset, + OutgoingValueMapTy &Subset) { + for (auto &Edge : Subset) { + auto BB = Edge.first; + auto Val = Edge.second; + Superset[BB] = Val; + } +} + +void ScopInfo::greedyCollapseStore(Region &R, MemAccInst *Store) { + auto Addr = Store->getPointerOperand(); + auto StoredVal = dyn_cast(Store->getValueOperand()); + if (!StoredVal) + return; + + auto &NoUseZone = OutgoingCollapsedValue[Store] = computeNoUseZones(Store); + // if (NoUseZone.empty()) return; //TODO: Make this an option + SmallVector Worklist; + Worklist.push_back(StoredVal); + + do { + auto Val = Worklist.pop_back_val(); + + if (CollapseScalar.count(Val)) + continue; // Already collapsed to this or another addr + + // Don't try to demote synthesizable values + // TODO: Might allow collapsing to multiple addresses + if (canSynthesize(Val, LI, SE, &R)) + continue; + + // If the address is not available at Val's location, don't collapse it + // TODO: It might be available further downstream if we allow partial + // collapse + if (!isComputableAtEndingOf(Addr, Val->getParent())) + continue; + + auto &Edges = getLiveEdges(Val); + if (isConflicting(NoUseZone, Edges)) + continue; + + if (!Edges.empty()) { + CollapseScalar[Val] = Store; + fixOutgoingValues(NoUseZone, Edges); + } + if (auto PHI = dyn_cast(Val)) { + if (CollapsePHI.count(PHI)) + continue; // Already collapsed to this or another addr + + // Check addr availablability + if (std::any_of(PHI->block_begin(), PHI->block_end(), + [=](BasicBlock *PredBB) { + return !isComputableAtEndingOf(Addr, PredBB); + })) + continue; + + auto &PHIEdges = getPHIEdges(PHI); + if (isConflicting(NoUseZone, PHIEdges)) + continue; + + CollapsePHI[PHI] = Store; + fixOutgoingValues(NoUseZone, PHIEdges); + } + + for (auto &Use : Val->operands()) { + auto UseVal = dyn_cast(Use.get()); + if (UseVal) + Worklist.push_back(UseVal); + } + + } while (!Worklist.empty()); +} + +static bool isExitingBB(ScopStmt *Stmt, BasicBlock *BB) { + if (Stmt->isBlockStmt()) + return true; + auto SubRegion = Stmt->getRegion(); + for (auto Exiting : predecessors(SubRegion->getExit())) { + if (!SubRegion->contains(Exiting)) + continue; + if (Exiting == BB) + return true; + } + return false; +} + +OutgoingValueMapTy &ScopInfo::getLiveEdges(Instruction *Val) { + auto It = AliveValuesCache.find(Val); + if (It != AliveValuesCache.end()) + return It->second; + + auto &ScopRegion = scop->getRegion(); + + auto &Edges = AliveValuesCache[Val]; + for (auto &Use : Val->uses()) { + auto User = Use.getUser(); + + Instruction *EffectiveUser = dyn_cast(User); + if (!EffectiveUser) + continue; + BasicBlock *EffectiveUserBB = EffectiveUser->getParent(); + + if (auto PHI = dyn_cast(User)) { + auto i = Use.getOperandNo(); + EffectiveUserBB = PHI->getIncomingBlock(i); + EffectiveUser = EffectiveUserBB->getTerminator(); + } + + // FIXME: Does this handle split diamond flow between def and use? + auto UserNode = DT->getNode(EffectiveUserBB); + while (UserNode->getBlock() != Val->getParent()) { + auto ParentNode = UserNode->getIDom(); + auto BB = ParentNode->getBlock(); + if (!ScopRegion.contains(BB)) + break; + + auto Node = scop->getStmtForBasicBlock(BB); + if (Node && isExitingBB(Node, BB)) + Edges[Node] = Val; + UserNode = ParentNode; + } + } + return Edges; +} + +OutgoingValueMapTy &ScopInfo::getPHIEdges(PHINode *PHI) { + auto It = PHIEdgesCache.find(PHI); + if (It != PHIEdgesCache.end()) + return It->second; + + auto &ScopRegion = scop->getRegion(); + + // TODO: Maybe we need to mark all edges from the incoming value definition + OutgoingValueMapTy &Result = PHIEdgesCache[PHI]; + // auto Parent = PHI->getParent(); + int NumIncoming = PHI->getNumIncomingValues(); + for (int i = 0; i < NumIncoming; i += 1) { + auto IncomingValue = PHI->getIncomingValue(i); + auto IncomingBlock = PHI->getIncomingBlock(i); + if (!ScopRegion.contains(IncomingBlock)) + continue; + + auto Node = scop->getStmtForBasicBlock(IncomingBlock); + if (!Node) + continue; + if (isExitingBB(Node, IncomingBlock)) + Result[Node] = IncomingValue; + } + return Result; +} + +bool ScopInfo::mayConflict(MemAccInst *SI, MemAccInst *LI) { + auto AATest = AA->alias(MemoryLocation::get(SI), MemoryLocation::get(LI)); + if (AATest == NoAlias) + return false; + + auto LoadSAI = getOrCreateSAI(LI); + if (LoadSAI->getBasePtrOriginSAI()) + LoadSAI = LoadSAI->getBasePtrOriginSAI(); + + auto WriteSAI = getOrCreateSAI(SI); + if (WriteSAI->getBasePtrOriginSAI()) + WriteSAI = WriteSAI->getBasePtrOriginSAI(); + + return LoadSAI == WriteSAI; +} + +OutgoingValueMapTy ScopInfo::computeNoUseZones(MemAccInst *SI) { + auto StoreBB = SI->getParent(); + auto &PD = getAnalysis(); + + OutgoingValueMapTy Result; + + SmallVector Worklist; + SmallVector Postdominated; + DenseSet ReachableRead; + // Postdominated.push_back(SI->getParent()); + PD.getDescendants(SI->getParent(), Postdominated); + for (auto Node : Postdominated) { + for (auto &Inst : *Node) { + if (!isa(Inst)) + continue; + auto &LI = cast(Inst); + if (mayConflict(SI, cast(&LI))) { + Worklist.push_back(Node); + break; + } + } + } + // TODO: Add function return to Worklist; unless the array is local the caller + // is free to read the memory + while (!Worklist.empty()) { + auto BB = Worklist.pop_back_val(); + if (ReachableRead.count(BB)) + continue; + if (!PD.dominates(StoreBB, BB)) + continue; + ReachableRead.insert(BB); + for (auto Pred : predecessors(BB)) { + Worklist.push_back(Pred); + } + } + + for (auto Node : Postdominated) { + if (ReachableRead.count(Node)) + continue; + + for (auto Pred : predecessors(Node)) { + auto Node = scop->getStmtForBasicBlock(Pred); + if (!Node) + continue; + if (isExitingBB(Node, Pred)) + Result[Node] = nullptr; + } + } + + return Result; +} + +void ScopInfo::addExplicitAccess(MemAccInst *MemAccInst) { + if (isRedundantMemAcc(MemAccInst)) + return; + + MemoryAccess *Acc = addMemoryAccessForMem( + MemAccInst->getParent(), MemAccInst, + MemAccInst->isStore() ? MemoryAccess::MUST_WRITE + : MemoryAccess::MAY_WRITE, + MemAccInst->getValueOperand(), MemoryAccess::EXPLICIT, MemAccInst); if (Acc) Acc->getStatement()->addExplicitAccess(Acc); } void ScopInfo::ensureScalarStore(Instruction *Value) { + // MAPPED stores are added in a postprocessing. + auto AddrAlias = getScalarMappedAddr(Value); + if (AddrAlias) + return; + ScopStmt *Stmt = scop->getStmtForBasicBlock(Value->getParent()); // Value not defined within the SCoP. @@ -3519,7 +3816,7 @@ MemoryAccess *Acc = addMemoryAccess(Value->getParent(), Value, MemoryAccess::MUST_WRITE, Value, 1, true, Value, ArrayRef(), - ArrayRef(), MemoryAccess::SCALAR); + ArrayRef(), MemoryAccess::SCALAR, nullptr); if (Acc) Stmt->addScalarWrite(Acc); } @@ -3561,9 +3858,20 @@ if (UserStmt->lookupScalarReadOf(Value)) return; - MemoryAccess *Acc = addMemoryAccess( - UserBB, nullptr, MemoryAccess::READ, Value, 1, true, Value, - ArrayRef(), ArrayRef(), MemoryAccess::SCALAR); + MemoryAccess *Acc; + auto AddrAlias = ValueInst ? getScalarMappedAddr(ValueInst) : nullptr; + if (AddrAlias) { + if (ValueInst && isRedundantScalarRead(ValueInst, UserStmt)) + return; + Acc = addMemoryAccessForMem(UserBB, nullptr, MemoryAccess::READ, Value, + MemoryAccess::SCALAR, + cast(AddrAlias)); + } else { + Acc = addMemoryAccess(UserBB, nullptr, MemoryAccess::READ, Value, 1, true, + Value, ArrayRef(), + ArrayRef(), MemoryAccess::SCALAR, + nullptr); + } if (!Acc) return; @@ -3576,6 +3884,10 @@ void ScopInfo::ensurePHIWriteAccess(PHINode *PHI, BasicBlock *IncomingBlock, Value *IncomingValue, bool IsExitBlock) { + // MAPPED writes are added in a postprocessing. + auto AddrAlias = getPHIMappedAddr(PHI); + if (AddrAlias) + return; ScopStmt *PHIStmt = scop->getStmtForBasicBlock(PHI->getParent()); ScopStmt *IncomingStmt = scop->getStmtForBasicBlock(IncomingBlock); @@ -3602,24 +3914,38 @@ IncomingBlock, IncomingBlock->getTerminator(), MemoryAccess::MUST_WRITE, PHI, 1, true, IncomingStmt->isRegionStmt() ? PHI : IncomingValue, ArrayRef(), ArrayRef(), - IsExitBlock ? MemoryAccess::SCALAR : MemoryAccess::PHI); + IsExitBlock ? MemoryAccess::SCALAR : MemoryAccess::PHI, nullptr); assert(Acc); IncomingStmt->addPHIWrite(Acc); } void ScopInfo::addPHIReadAccess(PHINode *PHI) { - MemoryAccess *Acc = addMemoryAccess( - PHI->getParent(), PHI, MemoryAccess::READ, PHI, 1, true, PHI, - ArrayRef(), ArrayRef(), MemoryAccess::PHI); + MemoryAccess *Acc; + auto AddrAlias = getPHIMappedAddr(PHI); + if (AddrAlias) { + if (isRedundantPHIRead(PHI)) + return; + Acc = addMemoryAccessForMem(PHI->getParent(), PHI, MemoryAccess::READ, PHI, + MemoryAccess::MAPPED, + cast(AddrAlias)); + } else { + Acc = addMemoryAccess(PHI->getParent(), PHI, MemoryAccess::READ, PHI, 1, + true, PHI, ArrayRef(), + ArrayRef(), MemoryAccess::PHI, nullptr); + } + if (Acc) Acc->getStatement()->addPHIRead(Acc); } -void ScopInfo::buildScop(Region &R, DominatorTree &DT) { +void ScopInfo::buildScop(Region &R) { unsigned MaxLoopDepth = getMaxLoopDepthInRegion(R, *LI, *SD); - scop = new Scop(R, AccFuncMap, *SD, *SE, DT, *LI, ctx, MaxLoopDepth); + scop = new Scop(R, AccFuncMap, *SD, *SE, *DT, *LI, ctx, MaxLoopDepth); buildStmts(R); + + if (ScalarCollapse) + greedyCollapse(R); buildAccessFunctions(R, R); // In case the region does not have an exiting block we will later (during @@ -3632,6 +3958,51 @@ if (!R.getExitingBlock()) buildAccessFunctions(R, *R.getExit(), nullptr, /* IsExitBlock */ true); + // TODO: Ensure that escaping values are actually written + for (auto &AddrIter : OutgoingCollapsedValue) { + auto Addr = AddrIter.first; + auto &Map = AddrIter.second; + for (auto &BBIter : Map) { + auto Node = BBIter.first; + auto OutVal = BBIter.second; + + if (!OutVal) + continue; // Happens if in no-use zone, but no scalar/PHI has been + // mapped to it. + + if (auto LI = dyn_cast(OutVal)) { + if (SE->getSCEV(LI->getPointerOperand()) == + SE->getSCEV(Addr->getPointerOperand())) + continue; + } + + if (auto OutInst = dyn_cast(OutVal)) { + auto OutDefBB = OutInst->getParent(); + if (R.contains(OutDefBB)) { + auto OutDefRegion = scop->getStmtForBasicBlock(OutInst->getParent()); + if (OutDefRegion != Node) + continue; // Store is redundant, needs only to be stored in the + // block defining it. + } + } + + auto SomeBB = Node->isRegionStmt() + ? Node->getRegion()->getEntry() + : Node->getBasicBlock(); // TODO: Replace BasicBlock + // argument of addMemoryAccess + // with ScopStmt; + // AccessInstruction also + // doesn't make sense + auto Acc = addMemoryAccessForMem( + SomeBB, SomeBB->getTerminator(), MemoryAccess::MUST_WRITE, OutVal, + MemoryAccess::MAPPED, cast(Addr)); + assert(Acc); + Acc->getStatement()->addTrailingWrite(Acc); + } + } + + OutgoingCollapsedValue.clear(); + scop->init(*AA); } @@ -3645,6 +4016,11 @@ } void ScopInfo::clear() { + CollapseScalar.clear(); + CollapsePHI.clear(); + OutgoingCollapsedValue.clear(); + AliveValuesCache.clear(); + PHIEdgesCache.clear(); AccFuncMap.clear(); if (scop) { delete scop; @@ -3667,6 +4043,7 @@ AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.addRequiredTransitive(); AU.addRequiredTransitive(); AU.addRequired(); @@ -3684,9 +4061,9 @@ LI = &getAnalysis().getLoopInfo(); AA = &getAnalysis().getAAResults(); TD = &F->getParent()->getDataLayout(); - DominatorTree &DT = getAnalysis().getDomTree(); + DT = &getAnalysis().getDomTree(); - buildScop(*R, DT); + buildScop(*R); DEBUG(scop->print(dbgs())); @@ -3703,6 +4080,135 @@ return false; } +struct SCEVExpandibilityChecker + : public SCEVVisitor { +public: +private: + BasicBlock *LocationBlock; + bool IncludeBlock; + DominatorTree *DT; + + SCEVExpandibilityChecker(BasicBlock *LocationBlock, bool IncludeBlock, + DominatorTree *DT) + : LocationBlock(LocationBlock), IncludeBlock(IncludeBlock), DT(DT) {} + +public: + static bool isExpandableBefore(const SCEV *S, BasicBlock *LocationBlock, + bool IncludeBlock, DominatorTree *DT) { + SCEVExpandibilityChecker Checker(LocationBlock, IncludeBlock, DT); + return Checker.visit(S); + } + + bool visit(const SCEV *Expr) { + return SCEVVisitor::visit(Expr); + } + + bool visitConstant(const SCEVConstant *Constant) { return true; } + + bool visitTruncateExpr(const SCEVTruncateExpr *Expr) { + return visit(Expr->getOperand()); + } + + bool visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { + return visit(Expr->getOperand()); + } + + bool visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { + return visit(Expr->getOperand()); + } + + bool visitAddExpr(const SCEVAddExpr *Expr) { + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) + if (!visit(Expr->getOperand(i))) + return false; + + return true; + } + + bool visitMulExpr(const SCEVMulExpr *Expr) { + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) + if (!visit(Expr->getOperand(i))) + return false; + + return true; + } + + bool visitUDivExpr(const SCEVUDivExpr *Expr) { + if (!visit(Expr->getLHS())) + return false; + + if (!visit(Expr->getRHS())) + return false; + + return true; + } + + bool visitAddRecExpr(const SCEVAddRecExpr *Expr) { + if (!visit(Expr->getStart())) + return false; + + for (size_t i = 0; i < Expr->getNumOperands(); ++i) + if (!visit(Expr->getOperand(i))) + return false; + + auto L = Expr->getLoop(); + return L->contains(LocationBlock); + } + + bool visitSMaxExpr(const SCEVSMaxExpr *Expr) { + for (size_t i = 0; i < Expr->getNumOperands(); ++i) + if (!visit(Expr->getOperand(i))) + return false; + + return true; + } + + bool visitUMaxExpr(const SCEVUMaxExpr *Expr) { + for (size_t i = 0; i < Expr->getNumOperands(); ++i) + if (!visit(Expr->getOperand(i))) + return false; + + return true; + } + + bool visitUnknown(const SCEVUnknown *Expr) { + auto Val = Expr->getValue(); + if (isa(Val)) + return true; + + Instruction *Inst = dyn_cast(Val); + + // Strange case + if (!Inst) + return false; + + auto InstBB = Inst->getParent(); + if (InstBB == LocationBlock) + return IncludeBlock; + + return DT->dominates(InstBB, LocationBlock); + } +}; + +bool ScopInfo::isComputableAtEndingOf(Value *Val, BasicBlock *BB) { + if (SE->isSCEVable(Val->getType())) { + if (const SCEV *Scev = SE->getSCEV(Val)) + if (!isa(Scev)) { + if (SCEVExpandibilityChecker::isExpandableBefore(Scev, BB, true, DT)) + return true; + } + } + + auto Inst = dyn_cast(Val); + if (!Inst) + return false; + auto InstBB = Inst->getParent(); + + if (InstBB == BB) + return true; + return DT->dominates(InstBB, BB); +} + char ScopInfo::ID = 0; Pass *polly::createScopInfoPass() { return new ScopInfo(); } Index: test/Isl/CodeGen/doubleacc.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/doubleacc.ll @@ -0,0 +1,42 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-no-early-exit -polly-scops -analyze < %s | FileCheck %s -check-prefix=SCOPS +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-no-early-exit -polly-codegen -S < %s | FileCheck %s -check-prefix=CODEGEN +; +; void foo(long n, float A[n]) { +; for (long i = 0; i < n; i += 1) { +; A[i] += 1; +; A[i] += 1; +; } +; } +; +target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" + +define void @foo(i32 %n, float* %A) #1 { +entry: + %tmp = sext i32 %n to i64 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %cmp = icmp slt i64 %indvars.iv, %tmp + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv + %tmp1 = load float, float* %arrayidx, align 4 + %add = fadd float %tmp1, 1.000000e+00 + store float %add, float* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds float, float* %A, i64 %indvars.iv + %tmp2 = load float, float* %arrayidx2, align 4 + %add3 = fadd float %tmp2, 1.000000e+00 + store float %add3, float* %arrayidx2, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} + +attributes #1 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } Index: test/ScopInfo/licm_reduction.ll =================================================================== --- test/ScopInfo/licm_reduction.ll +++ test/ScopInfo/licm_reduction.ll @@ -1,7 +1,5 @@ -; RUN: opt %loadPolly -basicaa -loop-rotate -indvars -polly-prepare -polly-scops -analyze < %s | FileCheck %s -; RUN: opt %loadPolly -basicaa -loop-rotate -indvars -licm -polly-prepare -polly-scops -analyze < %s | FileCheck %s -; -; XFAIL: * +; RUN: opt %loadPolly -polly-detect-unprofitable -basicaa -loop-rotate -indvars -polly-prepare -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-detect-unprofitable -basicaa -loop-rotate -indvars -licm -polly-prepare -polly-scops -analyze < %s | FileCheck %s ; ; void test(int n, double B[static const restrict n], int j) { ; for (int i = 0; i < n; i += 1) {