Index: CMakeLists.txt =================================================================== --- CMakeLists.txt +++ CMakeLists.txt @@ -189,12 +189,17 @@ lib/External/isl/imath/*.c) list( REMOVE_ITEM files ${jsonfiles} ${islfiles}) +set(MAYBE_IGNORE_LINEENDINGS) +if (WIN32) + set(MAYBE_IGNORE_LINEENDINGS "--strip-trailing-cr") +endif () + set(check_format_depends) set(update_format_depends) set(i 0) foreach (file IN LISTS files) add_custom_command(OUTPUT polly-check-format${i} - COMMAND clang-format -style=llvm ${file} | diff -u ${file} - + COMMAND clang-format -style=llvm ${file} | diff ${MAYBE_IGNORE_LINEENDINGS} -u ${file} - VERBATIM COMMENT "Checking format of ${file}..." ) Index: include/polly/CodeGen/BlockGenerators.h =================================================================== --- include/polly/CodeGen/BlockGenerators.h +++ include/polly/CodeGen/BlockGenerators.h @@ -366,7 +366,8 @@ /// @param Inst The instruction that might need reloaded values. /// @param BBMap A mapping from old values to their new values in this block. virtual void generateScalarLoads(ScopStmt &Stmt, const Instruction *Inst, - ValueMapT &BBMap); + ValueMapT &BBMap, LoopToScevMapT <S, + isl_id_to_ast_expr *NewAccesses); /// @brief Generate the scalar stores for the given statement. /// @@ -378,7 +379,8 @@ /// @param BB The basic block we generate code for. /// @param BBMap A mapping from old values to their new values in this block. virtual void generateScalarStores(ScopStmt &Stmt, BasicBlock *BB, - ValueMapT &BBMAp); + ValueMapT &BBMAp, LoopToScevMapT <S, + isl_id_to_ast_expr *NewAccesses); /// @brief Handle users of @p Inst outside the SCoP. /// @@ -447,6 +449,11 @@ LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses); + Value *generateLocationAccessed(ScopStmt &Stmt, const MemoryAccess &MA, + const Value *Pointer, ValueMapT &BBMap, + LoopToScevMapT <S, + isl_id_to_ast_expr *NewAccesses); + /// @param NewAccesses A map from memory access ids to new ast expressions, /// which may contain new access expressions for certain /// memory accesses. @@ -732,7 +739,8 @@ /// @param Inst The instruction that might need reloaded values. /// @param BBMap A mapping from old values to their new values in this block. virtual void generateScalarLoads(ScopStmt &Stmt, const Instruction *Inst, - ValueMapT &BBMap) override; + ValueMapT &BBMap, LoopToScevMapT <S, + isl_id_to_ast_expr *NewAccesses) override; /// @brief Generate the scalar stores for the given statement. /// @@ -744,7 +752,8 @@ /// @param BB The basic block we generate code for. /// @param BBMap A mapping from old values to their new values in this block. virtual void generateScalarStores(ScopStmt &Stmt, BasicBlock *BB, - ValueMapT &BBMAp) override; + ValueMapT &BBMAp, LoopToScevMapT <S, + isl_id_to_ast_expr *NewAccesses) override; /// @brief Copy a single PHI instruction. /// Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -66,6 +66,47 @@ class Comparison; class SCEVAffFunc; +// TODO: Rename to LoadOrStoreInst/MemAccInst, MemInst could include fences etc. +class MemInst : public Instruction { +public: + Value *getScalar() { + if (isa(this)) + return cast(this)->getValueOperand(); + else + return this; + } + const Value *getScalar() const { + if (isa(this)) + return cast(this)->getValueOperand(); + else + return this; + } + + // RENAME: getAccessedAddr + Value *getPointerOperand() { + return getOperand(isa(this) + ? StoreInst::getPointerOperandIndex() + : LoadInst::getPointerOperandIndex()); + } + const Value *getPointerOperand() const { + return getOperand(isa(this) + ? StoreInst::getPointerOperandIndex() + : LoadInst::getPointerOperandIndex()); + } + + bool isLoad() const { return isa(this); } + bool isStore() const { return isa(this); } + + static inline bool classof(const LoadInst *I) { return true; } + static inline bool classof(const StoreInst *I) { return true; } + static inline bool classof(const Instruction *I) { + return isa(I) || isa(I); + } + static inline bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } +}; + class Comparison { const SCEV *LHS; const SCEV *RHS; @@ -214,10 +255,19 @@ /// @brief Represent memory accesses in statements. class MemoryAccess { + friend class ScopInfo; friend class Scop; friend class ScopStmt; public: + enum AccessOrigin { + AO_MEMINST, + AO_DEDICATED_SCALAR, + AO_MAPPED_SCALAR, + AO_DEDICATED_PHI, + AO_MAPPED_PHI + }; + /// @brief The access type of a memory access /// /// There are three kind of access types: @@ -267,7 +317,7 @@ isl_id *Id; /// @brief Is this MemoryAccess modeling special PHI node accesses? - bool IsPHI; + AccessOrigin Origin; /// @brief Whether it a reading or writing access, and if writing, whether it /// is conditional (MAY_WRITE). @@ -351,12 +401,20 @@ isl_map *NewAccessRelation; // @} + // For Mapped locations + // @{ + // Take the memory access information from here + StoreInst *MappedAliasAddr; + // @} + unsigned getElemSizeInBytes() const { return ElemBytes; } bool isAffine() const { return IsAffine; } /// @brief Is this MemoryAccess modeling special PHI node accesses? - bool isPHI() const { return IsPHI; } + bool isPHI() const { return isDedicatedPHI() || isMappedPHI(); } + bool isDedicatedPHI() const { return Origin == AO_DEDICATED_PHI; } + bool isMappedPHI() const { return Origin == AO_MAPPED_PHI; } void printIR(raw_ostream &OS) const; @@ -437,8 +495,9 @@ MemoryAccess(Instruction *AccessInst, __isl_take isl_id *Id, AccessType Type, Value *BaseAddress, const SCEV *Offset, unsigned ElemBytes, bool Affine, ArrayRef Subscripts, - ArrayRef Sizes, Value *AccessValue, bool IsPHI, - StringRef BaseName); + ArrayRef Sizes, Value *AccessValue, + AccessOrigin Origin, StringRef BaseName, + StoreInst *MappedAliasAddr); ~MemoryAccess(); /// @brief Get the type of a memory access. @@ -533,7 +592,24 @@ bool isStrideZero(__isl_take const isl_map *Schedule) const; /// @brief Check if this is a scalar memory access. - bool isScalar() const; + // TODO: obsolete + // bool isScalar() const; + + bool isImplicit() const { return Origin != AO_MEMINST; } + bool isDedicated() const { return isDedicatedScalar() || isDedicatedPHI(); } + + bool isScalar() const { return isDedicatedScalar() || isMappedScalar(); } + bool isDedicatedScalar() const { return Origin == AO_DEDICATED_SCALAR; } + bool isMappedScalar() const { return Origin == AO_MAPPED_SCALAR; } + Value *getRepresentedScalar() const { + assert(isScalar()); + return AccessValue; + } + bool isMapped() const { return isMappedScalar() || isMappedPHI(); } + StoreInst *getMappedAliasAddr() const { + assert(isMapped()); + return MappedAliasAddr; + } /// @brief Get the statement that contains this memory access. ScopStmt *getStatement() const { return Statement; } @@ -563,6 +639,64 @@ /// @brief Print the MemoryAccess to stderr. void dump() const; + +#ifndef NDEBUG + void verify() const { + switch (Origin) { + case AO_MEMINST: + if (isRead() && !isWrite()) { + assert(isa(AccessInstruction)); + assert(cast(AccessInstruction) == AccessValue); + } else if (!isRead() && isWrite()) { + assert(isa(AccessInstruction)); + assert(cast(AccessInstruction)->getValueOperand() == + AccessValue); + } else + llvm_unreachable("Either read or write"); + break; + case AO_DEDICATED_SCALAR: + assert(BaseAddr == AccessValue); + if (isRead() && !isWrite()) { + } else if (!isRead() && isWrite()) { + assert(AccessValue == AccessInstruction); + } else + llvm_unreachable("Unconsistent access type"); + break; + case AO_MAPPED_SCALAR: + assert(BaseAddr != AccessValue); + if (isRead() && !isWrite()) { + } else if (!isRead() && isWrite()) { + } else + llvm_unreachable("Unconsistent access type"); + break; + case AO_DEDICATED_PHI: + if (isRead() && !isWrite()) { + assert(isa(AccessInstruction)); + assert(cast(AccessValue) + ->getBasicBlockIndex(AccessInstruction->getParent()) >= 0); + } else if (!isRead() && isWrite()) { + assert(isa(AccessValue)); + assert(isa(AccessInstruction)); + assert(AccessValue == AccessInstruction); + assert(BaseAddr == AccessValue); + } else + llvm_unreachable("Either read or write"); + break; + case AO_MAPPED_PHI: + if (isRead() && !isWrite()) { + assert(isa(AccessInstruction)); + assert(cast(AccessValue) + ->getBasicBlockIndex(AccessInstruction->getParent()) >= 0); + } else if (!isRead() && isWrite()) { + assert(isa(AccessValue)); + assert(isa(AccessInstruction)); + assert(AccessValue == AccessInstruction); + } else + llvm_unreachable("Either read or write"); + break; + } + } +#endif }; llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, @@ -827,6 +961,12 @@ /// @return The loop at a certain dimension. const Loop *getLoopForDimension(unsigned Dimension) const; + Loop *getInnermostLoop() const { + if (NestLoops.empty()) + return nullptr; + return NestLoops.back(); + } + /// @brief Align the parameters in the statement to the scop context void realignParams(); @@ -1120,6 +1260,70 @@ } //@} + SmallVector getAccessFunctionsFor(Instruction *Inst) { + SmallVector Result; + auto BB = Inst->getParent(); + if (!AccFuncMap.count(BB)) + return Result; + + auto &Accs = AccFuncMap[BB]; + for (auto &Acc : Accs) { + if (Acc.getAccessInstruction() == Inst) + Result.push_back(&Acc); + } + + return Result; + } + + MemoryAccess *getScalarWriteAccessFor(Instruction *Inst) const { + auto BB = Inst->getParent(); + if (!AccFuncMap.count(BB)) + return nullptr; + + MemoryAccess *Result = nullptr; + for (auto &Acc : AccFuncMap[BB]) { + if (Acc.getAccessInstruction() != Inst) + continue; + if (!Acc.isWrite()) + continue; + if (!Acc.isScalar()) + continue; + + assert(!Result); + Result = &Acc; +#ifdef NDEBUG + break; +#endif + } + return Result; + } + + MemoryAccess *getScalarReadAccessFor(const Use &Use) { + auto Inst = cast(Use.getUser()); + + auto BB = Inst->getParent(); + if (!AccFuncMap.count(BB)) + return nullptr; + + MemoryAccess *Result = nullptr; + for (auto &Acc : AccFuncMap[BB]) { + if (Acc.getAccessInstruction() != Inst) + continue; + if (!Acc.isRead()) + continue; + if (!Acc.isScalar()) + continue; + // TODO: Check if matching OperandNo + + assert(!Result); + Result = &Acc; +#ifdef NDEBUG + break; +#endif + } + return Result; + } + /// @brief Print data access information. /// /// @param OS The output stream the access functions is printed to. @@ -1407,6 +1611,12 @@ return O; } +typedef std::pair EdgeTy; +typedef DenseSet EdgeSetTy; + +typedef DenseMap OutgoingValueMapTy; +// typedef DenseMap OutgoingValueMapTy; + ///===---------------------------------------------------------------------===// /// @brief Build the Polly IR (Scop and ScopStmt) on a Region. /// @@ -1418,6 +1628,8 @@ // The ScalarEvolution to help building Scop. ScalarEvolution *SE; + DominatorTree *DT; + // LoopInfo for information about loops LoopInfo *LI; @@ -1430,6 +1642,8 @@ // Target data for element size computing. const DataLayout *TD; + RegionInfo *RI; + // Access function of statements (currently BasicBlocks) . // // This owns all the MemoryAccess objects of the Scop created in this pass. It @@ -1450,13 +1664,22 @@ // Build the SCoP for Region @p R. Scop *buildScop(Region &R, DominatorTree &DT); + Value *extractBasePointer(Value *Addr); + const ScopArrayInfo *getOrCreateSAI(MemInst *SI); + + MemoryAccess *addMemoryAccessForMem(Instruction *AccessInstruction, + Value *Scalar, + enum MemoryAccess::AccessType Type, + MemInst *Inst, + MemoryAccess::AccessOrigin Origin); + /// @brief Build an instance of MemoryAccess from the Load/Store instruction. /// /// @param Inst The Load/Store instruction that access the memory /// @param L The parent loop of the instruction /// @param R The region on which to build the data access dictionary. /// @param BoxedLoops The set of loops that are overapproximated in @p R. - void buildMemoryAccess(Instruction *Inst, Loop *L, Region *R, + void buildMemoryAccess(MemInst *Inst, Loop *L, Region *R, const ScopDetection::BoxedLoopsSetTy *BoxedLoops); /// @brief Analyze and extract the cross-BB scalar dependences (or, @@ -1468,7 +1691,7 @@ /// /// @return True if the Instruction is used in other BB and a scalar write /// Access is required. - bool buildScalarDependences(Instruction *Inst, Region *R, + void buildScalarDependences(Instruction *Inst, Region *R, Region *NonAffineSubRegio); /// @brief Create MemoryAccesses for the given PHI node in the given region. @@ -1510,19 +1733,22 @@ /// @param Subscripts Access subscripts per dimension. /// @param Sizes The array diminsion's sizes. /// @param IsPHI Whether this is an emulated PHI node. - void addMemoryAccess(BasicBlock *BB, Instruction *Inst, - MemoryAccess::AccessType Type, Value *BaseAddress, - const SCEV *Offset, unsigned ElemBytes, bool Affine, - Value *AccessValue, ArrayRef Subscripts, - ArrayRef Sizes, bool IsPHI); + MemoryAccess * + addMemoryAccess(BasicBlock *BB, Instruction *Inst, + MemoryAccess::AccessType Type, Value *BaseAddress, + const SCEV *Offset, unsigned ElemBytes, bool Affine, + Value *AccessValue, ArrayRef Subscripts, + ArrayRef Sizes, + MemoryAccess::AccessOrigin Origin, StoreInst *AddrAlias); void addMemoryAccess(BasicBlock *BB, Instruction *Inst, MemoryAccess::AccessType Type, Value *BaseAddress, const SCEV *Offset, unsigned ElemBytes, bool Affine, - Value *AccessValue, bool IsPHI = false) { + Value *AccessValue, MemoryAccess::AccessOrigin Origin, + StoreInst *AddrAlias) { addMemoryAccess(BB, Inst, Type, BaseAddress, Offset, ElemBytes, Affine, AccessValue, ArrayRef(), - ArrayRef(), IsPHI); + ArrayRef(), Origin, AddrAlias); } void addMemoryAccess(BasicBlock *BB, Instruction *Inst, @@ -1531,9 +1757,393 @@ ArrayRef Subscripts, ArrayRef Sizes, Value *AccessValue) { addMemoryAccess(BB, Inst, Type, BaseAddress, Offset, ElemBytes, Affine, - AccessValue, Subscripts, Sizes, false); + AccessValue, Subscripts, Sizes, MemoryAccess::AO_MEMINST, + nullptr); + } + + void addScalarWriteAccess(Instruction *Inst) { + auto Store = getScalarMappedAddr(Inst); + if (!Store) { + addMemoryAccess(Inst->getParent(), Inst, MemoryAccess::MUST_WRITE, Inst, + ZeroOffset, 1, true, Inst, ArrayRef(), + ArrayRef(), + MemoryAccess::AO_DEDICATED_SCALAR, nullptr); + return; + } + return; + } + + void addScalarReadAccess(Instruction *Inst, Instruction *Scalar) { + auto Store = getScalarMappedAddr(Scalar); + if (!Store) { + addMemoryAccess(Inst->getParent(), Inst, MemoryAccess::READ, Scalar, + ZeroOffset, 1, true, Scalar, ArrayRef(), + ArrayRef(), + MemoryAccess::AO_DEDICATED_SCALAR, nullptr); + return; + } + +#if 0 + // Check whether redundant + bool AllUsersMapped = true; + for (auto User : Inst->users()) { + if (User == Store) + continue; + + auto UserInst = dyn_cast(User); + if (!UserInst) { + AllUsersMapped = false; + break; + } + + if (getPHIMappedAddr(UserInst) != Store) { + AllUsersMapped = false; + break; + } + } + if (AllUsersMapped) + return; +#endif + + addMemoryAccessForMem(Inst, Scalar, MemoryAccess::READ, + cast(Store), MemoryAccess::AO_MAPPED_SCALAR); + } + + void addPHIWriteAccess(TerminatorInst *Inst, PHINode *PHI, Value *AccessValue, + bool IsPHI = false) { + auto AddrAlias = getPHIMappedAddr(PHI); + if (!AddrAlias) { + addMemoryAccess( + Inst->getParent(), Inst, MemoryAccess::MUST_WRITE, PHI, ZeroOffset, 1, + true, AccessValue, ArrayRef(), ArrayRef(), + IsPHI ? MemoryAccess::AO_DEDICATED_PHI : MemoryAccess::AO_MEMINST, + nullptr); + } + } + void addPHIReadAccess(PHINode *PHI, bool IsPHI = false) { + auto AddrAlias = getPHIMappedAddr(PHI); + if (!AddrAlias) { + addMemoryAccess( + PHI->getParent(), PHI, MemoryAccess::READ, PHI, ZeroOffset, 1, true, + PHI, ArrayRef(), ArrayRef(), + IsPHI ? MemoryAccess::AO_DEDICATED_PHI : MemoryAccess::AO_MEMINST, + nullptr); + return; + } + + // if (AddrAlias == getScalarMappedAddr(PHI)) + // return; + addMemoryAccessForMem( + PHI, PHI, MemoryAccess::READ, cast(AddrAlias), + IsPHI ? MemoryAccess::AO_MAPPED_PHI : MemoryAccess::AO_MEMINST); + } + + DenseMap ScalarWrites; + DenseMap, MemoryAccess *> ScalarReads; + + // DenseMap, MemoryAccess *> + // MappedWrites; + // DenseMap, MemoryAccess *> MappedReads; + + MemoryAccess *getScalarWrite(Instruction *Val) const { + auto Iter = ScalarWrites.find(Val); + if (Iter == ScalarWrites.end()) + return nullptr; + return Iter->second; } + MemoryAccess *getScalarRead(Instruction *Val, RegionNode *Node) const { + auto Iter = ScalarReads.find(std::make_pair(Val, Node)); + if (Iter == ScalarReads.end()) + return nullptr; + return Iter->second; + } + + MemoryAccess *ensureScalarWrite(Instruction *Val) { + +#if 0 + auto LastWrite = getLastWriteInBB(Val); + if (LastWrite != Val) { + return Acc = ensureScalarWrite(LastWrite); + } + + auto Store = getScalarMappedAddr(Val); + if (Store) { + if (isa(Val) && Store == getPHIMappedAddr(cast(Val))) + return Acc = nullptr; + if (isa(Val) && isRedundantMemAcc(cast(Val),Store)) + return Acc = nullptr; + return Acc = addMemoryAccessForMem(Val, Val, MemoryAccess::MUST_WRITE, + cast(Store)); + } +#endif + + auto Store = getScalarMappedAddr(Val); + if (Store) { + return nullptr; // Written at the end of buildScop +#if 0 + auto Node = findStmtRegion(Val->getParent()); + auto &Acc = MappedWrites[std::make_pair(Store, Node)]; + if (Acc) + return Acc; + + if (Node->isSubRegion()) { + llvm_unreachable("unimplemented"); + } else { + auto BB = Node->getNodeAs(); + return Acc = addMemoryAccessForMem(BB->getTerminator(), Val, + MemoryAccess::MUST_WRITE, + cast(Store)); + } +#endif + } + + auto Iter = ScalarWrites.find(Val); + if (Iter != ScalarWrites.end()) + return Iter->second; + + auto &Acc = ScalarWrites[Val]; + return Acc = addMemoryAccess( + Val->getParent(), Val, MemoryAccess::MUST_WRITE, Val, ZeroOffset, + 1, true, Val, ArrayRef(), ArrayRef(), + MemoryAccess::AO_DEDICATED_SCALAR, nullptr); + } + + Instruction *getLastWriteInBB(Instruction *Inst) { + auto BB = Inst->getParent(); + auto InstMapped = getScalarMappedAddr(Inst); + + // Since scalars are SSA they have only one write/def + if (!InstMapped) + return Inst; + + for (auto &Followup : make_range(++BasicBlock::iterator(Inst), BB->end())) { + auto FollowMapped = getScalarMappedAddr(&Followup); + if (FollowMapped == InstMapped) + return &Followup; + } + return Inst; + } + +#if 0 + bool isRedundantMemAcc(MemInst *AccInst, StoreInst *ScalarMapped) { + if (!ScalarMapped) + return false; + + return SE->getSCEV(ScalarMapped->getPointerOperand()) == + SE->getSCEV(AccInst->getPointerOperand()); + } + + bool isRedundantMemAcc(MemInst *AccInst) { + auto Scalar = dyn_cast(AccInst->getScalar()); + if (!Scalar) + return false; + + return isRedundantMemAcc(AccInst, getScalarMappedAddr(Scalar)); + } +#endif + + bool isRedundantMemAcc(MemInst *AccInst) { + if (AccInst->isLoad()) + return isRedundantLoad(cast(AccInst)); + return isRedundantStore(cast(AccInst)); + } + + bool isRedundantPHIRead(PHINode *PHI) { + auto PHIAddrAlias = getPHIMappedAddr(PHI); + if (!PHIAddrAlias) + return false; + + auto PHIStmt = findStmtRegion(PHI->getParent()); + for (auto &Use : PHI->uses()) { + auto User = dyn_cast(Use.getUser()); + if (!User) + return false; //??? + + // Not redundant if this PHI has a non-redundant use within the same Stmt. + if (findStmtRegion(User->getParent()) == PHIStmt) { + if (auto StoreUser = dyn_cast(User)) { + if (!isRedundantStore(StoreUser)) + return false; + } else + return false; + } + } + return true; + } + + bool isRedundantLoad(LoadInst *LI) { + auto LoadAddr = SE->getSCEV(LI->getPointerOperand()); + for (auto &Use : LI->uses()) { + auto User = Use.getUser(); + + if (auto PHIUser = dyn_cast(User)) { + if (auto StoreAlias = getPHIMappedAddr(PHIUser)) { + auto StoreAddr = SE->getSCEV(StoreAlias->getPointerOperand()); + if (StoreAddr == LoadAddr) + continue; + } + } + + return false; + } + return true; + } + + bool isRedundantStore(StoreInst *SI) { + auto Scalar = dyn_cast(SI->getValueOperand()); + if (!Scalar) + return false; // Constants + + auto AddrAlias = getScalarMappedAddr(Scalar); + if (!AddrAlias) { + auto PHIScalar = dyn_cast(Scalar); + if (PHIScalar) + AddrAlias = getPHIMappedAddr(PHIScalar); + } + if (!AddrAlias) + return false; + + return SE->getSCEV(AddrAlias->getPointerOperand()) == + SE->getSCEV(SI->getPointerOperand()); + } + + // TODO: Passing the RegionNode* as parameter might be cheaper than this + // search. + RegionNode *findStmtRegion(BasicBlock *BB) { + auto &ScopRegion = scop->getRegion(); + assert(ScopRegion.contains(BB)); + + SmallVector Hierarchy; + + auto R = RI->getRegionFor(BB); + while (ScopRegion.contains(R)) { + Hierarchy.push_back(R); + if (R == &ScopRegion) + break; + R = R->getParent(); + } + + for (auto R : reverse(Hierarchy)) { + if (SD->isNonAffineSubRegion(R, &ScopRegion)) + return R->getNode(); + } + + return ScopRegion.getBBNode(BB); + } + + MemoryAccess *ensureScalarRead(Value *Val, RegionNode *Node); + + void buildEscapingDependences(Instruction *Inst) { + auto &R = scop->getRegion(); + + // Escaping uses + for (User *U : Inst->users()) { + Instruction *UI = dyn_cast(U); + + // Ignore the strange user + if (UI == 0) + continue; + + if (!R.contains(UI->getParent())) { + // TODO: Exit node PHIs + auto Acc = ensureScalarWrite(UI); + assert(Acc && "Writing excaping values is mandatory"); + break; + } + } + } + + // TODO: Maybe these are not required to callapse everywhere + DenseMap CollapseScalar; + DenseMap CollapsePHI; + // DenseSet RedundantStores; + // DenseSet RedundantPHIs; + + DenseMap OutgoingCollapsedValue; + // DenseMap,Value*> OutgoingCollapsedValue; + + Value *getOutgoingCollapsedValue(StoreInst *AddrAlias, RegionNode *Node) { + auto AddrIter = OutgoingCollapsedValue.find(AddrAlias); + if (AddrIter == OutgoingCollapsedValue.end()) + return nullptr; + auto &Map = AddrIter->second; + auto ValIter = Map.find(Node); + if (ValIter == Map.end()) + return nullptr; + return ValIter->second; + } + + // TODO: Replace the StoreInst by a tuple/object (const SCEV*, llvm::Type, + // size, AAMetadata) or MemInst + StoreInst *getScalarMappedAddr(Instruction *Inst) { + auto Iter = CollapseScalar.find(Inst); + if (Iter == CollapseScalar.end()) + return nullptr; + return Iter->getSecond(); + } + + StoreInst *getPHIMappedAddr(PHINode *PHI) { + auto Iter = CollapsePHI.find(PHI); + if (Iter == CollapsePHI.end()) + return nullptr; + return Iter->getSecond(); + } + + bool isComputableAtBeginningOf(Value *Val, RegionNode *Node) { + if (Node->isSubRegion()) + return isComputableAtBeginningOf(Val, + Node->getNodeAs()->getEntry()); + return isComputableAtBeginningOf(Val, Node->getNodeAs()); + } + + bool isComputableAtBeginningOf(Value *Val, BasicBlock *BB); + bool isComputableAtEndingOf(Value *Val, BasicBlock *BB); + + bool isStoreRedundant(StoreInst *SI) { + auto Scalar = dyn_cast(SI->getValueOperand()); + if (!Scalar) + return false; + return getScalarMappedAddr(Scalar) == SI; + } + + bool isStoreRedundant(PHINode *PHI) { + auto SI = getPHIMappedAddr(PHI); + if (!SI) + return false; + + for (auto &Incoming : PHI->incoming_values()) { + auto UsedValue = dyn_cast(Incoming.get()); + if (!UsedValue) { + // Writing a constant + // We can perfectly emulate this by writing this constant to the array + // However, it is not perfectly redundant then + // FIXME: Is this method required at all?? + return false; + } + auto Loc = getScalarMappedAddr(UsedValue); + if (Loc != SI) + return false; + } + return true; + } + + void greedyCollapse(Region &R); + void greedyCollapseStore(Region &R, StoreInst *Store); + + DenseMap AliveValuesCache; + OutgoingValueMapTy &getLiveEdges(Instruction *Val); + + DenseMap PHIEdgesCache; + OutgoingValueMapTy &getPHIEdges(PHINode *PHI); + + bool mayRead(StoreInst *SI, LoadInst *LI); + + /// Edges where writing a value to the array doesn't matter, because it will + /// be overwritten anyway + OutgoingValueMapTy computeNoUseZones(StoreInst *SI); + // EdgeSetTy computeNoChangeZones(Value *Addr); + public: static char ID; explicit ScopInfo(); Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -34,7 +34,9 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/RegionIterator.h" +//#include "llvm/Analysis/RegionInfo.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/Support/Debug.h" #include "isl/aff.h" #include "isl/constraint.h" @@ -92,6 +94,10 @@ cl::Hidden, cl::ZeroOrMore, cl::init(true), cl::cat(PollyCategory)); +static cl::opt ScalarCollapse( + "polly-scalar-collapse", cl::desc("Try mapping scalars to array elements"), + cl::Hidden, cl::ZeroOrMore, cl::init(true), cl::cat(PollyCategory)); + //===----------------------------------------------------------------------===// /// Helper Classes @@ -617,13 +623,15 @@ const SCEV *Offset, unsigned ElemBytes, bool Affine, ArrayRef Subscripts, ArrayRef Sizes, Value *AccessValue, - bool IsPHI, StringRef BaseName) - : Id(Id), IsPHI(IsPHI), AccType(Type), RedType(RT_NONE), Statement(nullptr), - BaseAddr(BaseAddress), BaseName(BaseName), ElemBytes(ElemBytes), - Sizes(Sizes.begin(), Sizes.end()), AccessInstruction(AccessInst), - AccessValue(AccessValue), Offset(Offset), IsAffine(Affine), - Subscripts(Subscripts.begin(), Subscripts.end()), AccessRelation(nullptr), - NewAccessRelation(nullptr) {} + AccessOrigin Origin, StringRef BaseName, + StoreInst *AddrAlias) + : Id(Id), Origin(Origin), AccType(Type), RedType(RT_NONE), + Statement(nullptr), BaseAddr(BaseAddress), BaseName(BaseName), + ElemBytes(ElemBytes), Sizes(Sizes.begin(), Sizes.end()), + AccessInstruction(AccessInst), AccessValue(AccessValue), Offset(Offset), + IsAffine(Affine), Subscripts(Subscripts.begin(), Subscripts.end()), + AccessRelation(nullptr), NewAccessRelation(nullptr), + MappedAliasAddr(AddrAlias) {} void MemoryAccess::realignParams() { isl_space *ParamSpace = Statement->getParent()->getParamSpace(); @@ -658,7 +666,7 @@ break; } OS << "[Reduction Type: " << getReductionType() << "] "; - OS << "[Scalar: " << isScalar() << "]\n"; + // OS << "[Scalar: " << isScalar() << "]\n"; OS.indent(16) << getOriginalAccessRelationStr() << ";\n"; if (hasNewAccessRelation()) OS.indent(11) << "new: " << getNewAccessRelationStr() << ";\n"; @@ -740,11 +748,11 @@ bool MemoryAccess::isStrideZero(const isl_map *Schedule) const { return isStrideX(Schedule, 0); } - +#if 0 bool MemoryAccess::isScalar() const { return isl_map_n_out(AccessRelation) == 0; } - +#endif bool MemoryAccess::isStrideOne(const isl_map *Schedule) const { return isStrideX(Schedule, 1); } @@ -2718,15 +2726,24 @@ if (!IsExitBlock && canSynthesize(PHI, LI, SE, &R)) return; - // PHI nodes are modeled as if they had been demoted prior to the SCoP - // detection. Hence, the PHI is a load of a new memory location in which the - // incoming value was written at the end of the incoming basic block. - bool OnlyNonAffineSubRegionOperands = true; - for (unsigned u = 0; u < PHI->getNumIncomingValues(); u++) { - Value *Op = PHI->getIncomingValue(u); - BasicBlock *OpBB = PHI->getIncomingBlock(u); + auto StoreAlias = getPHIMappedAddr(PHI); + if (!StoreAlias) { - // Do not build scalar dependences inside a non-affine subregion. + // PHI nodes are modeled as if they had been demoted prior to the SCoP + // detection. Hence, the PHI is a load of a new memory location in which the + // incoming value was written at the end of the incoming basic block. + // bool OnlyNonAffineSubRegionOperands = true; + for (unsigned u = 0; u < PHI->getNumIncomingValues(); u++) { + Value *Op = PHI->getIncomingValue(u); + BasicBlock *OpBB = PHI->getIncomingBlock(u); + + if (!R.contains(OpBB)) + continue; + + ensureScalarRead(Op, NonAffineSubRegion ? NonAffineSubRegion->getNode() + : R.getBBNode(OpBB)); +// TODO: Do not build scalar dependences inside a non-affine subregion. (Why?) +#if 0 if (NonAffineSubRegion && NonAffineSubRegion->contains(OpBB)) continue; @@ -2741,34 +2758,60 @@ // As we pretend there is a use (or more precise a write) of OpI in OpBB // we have to insert a scalar dependence from the definition of OpI to // OpBB if the definition is not in OpBB. + // FIXME: What happens if this scalar already is demoted e.g. because it + // is used in another BB? if (OpIBB != OpBB) { - addMemoryAccess(OpBB, PHI, MemoryAccess::READ, OpI, ZeroOffset, 1, true, - OpI); - addMemoryAccess(OpIBB, OpI, MemoryAccess::MUST_WRITE, OpI, ZeroOffset, - 1, true, OpI); + addScalarReadAccess(PHI, OpI); + addScalarWriteAccess(OpI); } } // Always use the terminator of the incoming basic block as the access // instruction. - OpI = OpBB->getTerminator(); - - addMemoryAccess(OpBB, OpI, MemoryAccess::MUST_WRITE, PHI, ZeroOffset, 1, - true, Op, /* IsPHI */ !IsExitBlock); + auto WriterInst = OpBB->getTerminator(); +#endif + addPHIWriteAccess(OpBB->getTerminator(), PHI, Op, + /* IsPHI */ !IsExitBlock); + } } - if (!OnlyNonAffineSubRegionOperands) { - addMemoryAccess(PHI->getParent(), PHI, MemoryAccess::READ, PHI, ZeroOffset, - 1, true, PHI, - /* IsPHI */ !IsExitBlock); + if (StoreAlias) { + if (isRedundantPHIRead(PHI)) + return; + + // addMemoryAccessForMem(PHI, PHI, MemoryAccess::READ, + // cast(StoreAlias), false); + // addPHIReadAccess(PHI, /* IsPHI */ !IsExitBlock); } + + // if (!OnlyNonAffineSubRegionOperands) { + addPHIReadAccess(PHI, /* IsPHI */ !IsExitBlock); // TODO: IsPHI without + // meaning, as the only + // ever used such + // MemoryAccesses are + // IsPHI=true + //} } -bool ScopInfo::buildScalarDependences(Instruction *Inst, Region *R, +void ScopInfo::buildScalarDependences(Instruction *Inst, Region *R, Region *NonAffineSubRegion) { + auto InstBB = Inst->getParent(); + for (auto &Ops : Inst->operands()) { + ensureScalarRead(Ops.get(), NonAffineSubRegion + ? NonAffineSubRegion->getNode() + : R->getBBNode(InstBB)); + } + +// non-mapped scalars are written on request in ensureScalarRead or +// buildEscapeDependences +// if (getScalarMappedAddr(Inst)) { +// ensureScalarWrite(Inst); +//} + +#if 0 bool canSynthesizeInst = canSynthesize(Inst, LI, SE, R); if (isIgnoredIntrinsic(Inst)) - return false; + return; bool AnyCrossStmtUse = false; BasicBlock *ParentBB = Inst->getParent(); @@ -2780,7 +2823,13 @@ if (UI == 0) continue; + // Skip PHI nodes in the region as they handle their operands on their own. + if (isa(UI)) + continue; + BasicBlock *UseParent = UI->getParent(); + ensureScalarRead(Inst, UseParent); + continue; // Ignore the users in the same BB (statement) if (UseParent == ParentBB) @@ -2816,10 +2865,10 @@ // Do not build a read access that is not in the current SCoP // Use the def instruction as base address of the MemoryAccess, so that it // will become the name of the scalar access in the polyhedral form. - addMemoryAccess(UseParent, UI, MemoryAccess::READ, Inst, ZeroOffset, 1, - true, Inst); + addScalarReadAccess(UI, Inst); } + if (ModelReadOnlyScalars) { for (Value *Op : Inst->operands()) { if (canSynthesize(Op, LI, SE, R)) @@ -2836,15 +2885,138 @@ ZeroOffset, 1, true, Op); } } - - return AnyCrossStmtUse; +#endif } extern MapInsnToMemAcc InsnToMemAcc; +Value *ScopInfo::extractBasePointer(Value *Addr) { + Loop *L = isa(Addr) + ? LI->getLoopFor(cast(Addr)->getParent()) + : nullptr; + const SCEV *AccessFunction = SE->getSCEVAtScope(Addr, L); + const SCEVUnknown *BasePointer = + dyn_cast(SE->getPointerBase(AccessFunction)); + return BasePointer->getValue(); +} + +const ScopArrayInfo *ScopInfo::getOrCreateSAI(MemInst *SI) { + // TODO: Refactor with ScopInfo::buildMemoryAccess + auto Addr = SI->getPointerOperand(); + auto BasePtr = extractBasePointer(Addr); + auto EltType = Addr->getType()->getPointerElementType(); + + auto AccItr = InsnToMemAcc.find(SI); + if (PollyDelinearize && AccItr != InsnToMemAcc.end()) { + return scop->getOrCreateScopArrayInfo( + BasePtr, EltType, AccItr->second.Shape->DelinearizedSizes); + } + + auto Size = TD->getTypeStoreSize(EltType); + auto SizeSCEV = SE->getConstant(ZeroOffset->getType(), Size); + return scop->getOrCreateScopArrayInfo( + BasePtr, Addr->getType()->getPointerElementType(), + ArrayRef(SizeSCEV)); +} + +MemoryAccess * +ScopInfo::addMemoryAccessForMem(Instruction *AccessInstruction, Value *Scalar, + enum MemoryAccess::AccessType Type, + MemInst *Inst, + MemoryAccess::AccessOrigin Origin) { + auto &R = scop->getRegion(); + + Value *Val = Inst->getScalar(); + auto *SizeType = Val->getType(); + auto Size = TD->getTypeStoreSize(SizeType); + auto Addr = Inst->getPointerOperand(); + + Loop *L = isa(Addr) + ? LI->getLoopFor(cast(Addr)->getParent()) + : nullptr; + const SCEV *AccessFunction = SE->getSCEVAtScope(Addr, L); + const SCEVUnknown *BasePointer = + dyn_cast(SE->getPointerBase(AccessFunction)); + + assert(BasePointer && "Could not find base pointer"); + AccessFunction = SE->getMinusSCEV(AccessFunction, BasePointer); + + auto AccItr = InsnToMemAcc.find(Inst); + if (PollyDelinearize && AccItr != InsnToMemAcc.end()) { + return addMemoryAccess(AccessInstruction->getParent(), AccessInstruction, + Type, BasePointer->getValue(), AccessFunction, Size, + true, Scalar, AccItr->second.DelinearizedSubscripts, + AccItr->second.Shape->DelinearizedSizes, Origin, + dyn_cast(Inst)); + } + + // Check if the access depends on a loop contained in a non-affine subregion. + auto BoxedLoops = SD->getBoxedLoops(&R); + bool isVariantInNonAffineLoop = false; + if (BoxedLoops) { + SetVector Loops; + findLoops(AccessFunction, Loops); + for (const Loop *L : Loops) + if (BoxedLoops->count(L)) + isVariantInNonAffineLoop = true; + } + + bool IsAffine = + !isVariantInNonAffineLoop && + isAffineExpr(&R, AccessFunction, *SE, BasePointer->getValue()); + + if (!IsAffine && Type == MemoryAccess::MUST_WRITE) + Type = MemoryAccess::MAY_WRITE; + + // FIXME: Is this correct? this should refer to the the number of elements in + // the array, not the size of a single element + auto SCEVSize = SE->getConstant(ZeroOffset->getType(), Size); + + return addMemoryAccess( + AccessInstruction->getParent(), AccessInstruction, Type, + BasePointer->getValue(), AccessFunction, Size, IsAffine, Scalar, + ArrayRef(AccessFunction), ArrayRef(SCEVSize), + Origin, dyn_cast(Inst)); +} + void ScopInfo::buildMemoryAccess( - Instruction *Inst, Loop *L, Region *R, + MemInst *Inst, Loop *L, Region *R, const ScopDetection::BoxedLoopsSetTy *BoxedLoops) { + +#if 0 + bool RefWithinBB = false; + if (isa(Inst)) { + for (auto User : Inst->users()) { + if (isa(User) && cast(User)->getParent()==Inst->getParent()) { + RefWithinBB = true; + break; + } + } + } + + else { + assert(Inst->isStore()); + RefWithinBB = isa(Inst->getScalar()) && cast(Inst->getScalar())->getParent() == Inst->getParent(); + } +#endif + +#if 1 + if (isRedundantMemAcc(Inst)) + return; +#endif + + // Never write mapped writes, they are always added in buildScop() + if (Inst->isStore()) { + auto Scalar = dyn_cast(Inst); + if (Scalar && getScalarMappedAddr(Scalar)) + return; + } + + addMemoryAccessForMem(Inst, Inst->getScalar(), + isa(Inst) ? MemoryAccess::READ + : MemoryAccess::MUST_WRITE, + Inst, MemoryAccess::AO_MEMINST); +#if 0 unsigned Size; Type *SizeType; Value *Val; @@ -2935,13 +3107,17 @@ SmallVector Subscripts, Sizes; Subscripts.push_back(AccessFunction); - Sizes.push_back(SE->getConstant(ZeroOffset->getType(), Size)); + Sizes.push_back(SE->getConstant( + ZeroOffset->getType(), Size)); // TODO: Is this correct? this should refer + // to the the number of elements in the + // array, not the size of a single element if (!IsAffine && Type == MemoryAccess::MUST_WRITE) Type = MemoryAccess::MAY_WRITE; addMemoryAccess(Inst->getParent(), Inst, Type, BasePointer->getValue(), AccessFunction, Size, IsAffine, Subscripts, Sizes, Val); +#endif } void ScopInfo::buildAccessFunctions(Region &R, Region &SR) { @@ -2967,6 +3143,21 @@ // The set of loops contained in non-affine subregions that are part of R. const ScopDetection::BoxedLoopsSetTy *BoxedLoops = SD->getBoxedLoops(&R); + for (auto &Inst : BB) { + if (isa(Inst)) { + buildPHIAccesses(cast(&Inst), R, NonAffineSubRegion, + IsExitBlock); + } else { + buildScalarDependences(&Inst, &R, NonAffineSubRegion); + if (isa(Inst)) { + buildMemoryAccess(cast(&Inst), L, &R, BoxedLoops); + } + } + + buildEscapingDependences(&Inst); + } + +#if 0 for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) { Instruction *Inst = I; @@ -2978,25 +3169,29 @@ if (!PHI && IsExitBlock) break; - if (isa(Inst) || isa(Inst)) - buildMemoryAccess(Inst, L, &R, BoxedLoops); + if (isa(Inst)) + buildMemoryAccess(cast(Inst), L, &R, BoxedLoops); if (isIgnoredIntrinsic(Inst)) continue; - + + if (!isa(Inst)) buildScalarDependences(Inst, &R, NonAffineSubRegion); +#if 0 if (buildScalarDependences(Inst, &R, NonAffineSubRegion)) { if (!isa(Inst)) - addMemoryAccess(&BB, Inst, MemoryAccess::MUST_WRITE, Inst, ZeroOffset, - 1, true, Inst); + addScalarWriteAccess(Inst); } +#endif } +#endif } -void ScopInfo::addMemoryAccess( +MemoryAccess *ScopInfo::addMemoryAccess( BasicBlock *BB, Instruction *Inst, MemoryAccess::AccessType Type, Value *BaseAddress, const SCEV *Offset, unsigned ElemBytes, bool Affine, Value *AccessValue, ArrayRef Subscripts, - ArrayRef Sizes, bool IsPHI = false) { + ArrayRef Sizes, MemoryAccess::AccessOrigin Origin, + StoreInst *AddrAlias) { AccFuncSetType &AccList = AccFuncMap[BB]; size_t Identifier = AccList.size(); @@ -3007,12 +3202,462 @@ isl_id *Id = isl_id_alloc(ctx, IdName.c_str(), nullptr); AccList.emplace_back(Inst, Id, Type, BaseAddress, Offset, ElemBytes, Affine, - Subscripts, Sizes, AccessValue, IsPHI, BaseName); + Subscripts, Sizes, AccessValue, Origin, BaseName, + AddrAlias); + return &AccList.back(); +} + +// TODO: RENAME: ensureScalarAvailability +// TODO: BasicBlock *BB -> RegionNode * +MemoryAccess *ScopInfo::ensureScalarRead(Value *Val, RegionNode *Node) { + auto Inst = dyn_cast(Val); + + // Constants do not require scalar demotion + if (!Inst) + return nullptr; + + // If the instruction can be synthesized and the user is in the region we do + // not need to add scalar dependences. + auto &ScopRegion = scop->getRegion(); + if (canSynthesize(Inst, LI, SE, &ScopRegion)) + return nullptr; + + if (!ScopRegion.contains(Inst)) + return nullptr; // It's a parameter + +#if 0 + auto StoreAlias = getScalarMappedAddr(Inst); + if (StoreAlias) { + auto MappedIter = MappedReads.insert( + std::make_pair(std::make_pair(StoreAlias, Node), nullptr)); + auto &Acc = MappedIter.first->second; + if (!MappedIter.second) + return Acc; + + ensureScalarWrite(Inst); + + if (Node->isSubRegion()) { + llvm_unreachable("unimplemented"); + } else { + auto BB = Node->getNodeAs(); + Acc = + addMemoryAccessForMem(&BB->front(), Inst /*without meaning ?!?*/, + MemoryAccess::READ, cast(StoreAlias)); + } + return Acc; + } +#endif + + auto Pair = std::make_pair(Inst, Node); + auto Iter = ScalarReads.find(Pair); + if (Iter != ScalarReads.end()) + return Iter->second; + + auto &Acc = ScalarReads[Pair]; + // Acc = nullptr; + + if (isIgnoredIntrinsic(Inst)) + return Acc = nullptr; + + // auto &ScopRegion = scop->getRegion(); + // assert(ScopRegion.contains(Node)); + + if (findStmtRegion(Inst->getParent()) == Node) + return nullptr; + +// if (ScopRegion.contains(BB)) { +// If the instruction can be synthesized and the user is in the region we do +// not need to add scalar dependences. +// if (canSynthesize(Inst, LI, SE, &ScopRegion)) +// return Acc = nullptr; + +#if 0 + if (isa(Inst) && isRedundantMemAcc(cast(Inst))) { + // The value might be defined and stored outside the Stmt, only the load + // (wich we will optimize away) is within this block/ + // FIXME: What if there are multiple loads of the address in the same block, + // i.e. + // %1 = load %addr + // %1.modif = %1 + // store %1.modif, %addr + // %2 = load %addr + // %2.modif = %2 + // store %2.modif, %addr + // Will work fine if every scalar is mapped to %addr, but what if not? + // What should happen is that the scalars are never mapped because there are potential reads of the same addr. + // TODO: write a test case + int a = 0; + } else { + // Scalar def-use chains within the same ScopStmt (either a BB or non-affine + // region) do not need scalar demotion. + if (findStmtRegion(Inst->getParent()) == findStmtRegion(BB)) + return Acc = nullptr; + } +#else +// if (findStmtRegion(Inst->getParent()) == Node) +// return Acc = nullptr; +#endif + + // TODO: Implement ModelReadOnlyScalars; Is original implementation triggered + // at all?? + assert(ScopRegion.contains(Inst) && + "non-scop scalar should be handled as parameter"); + ensureScalarWrite(Inst); + + // Escaping values are required to be written, but handled separately at code + // generation + // if (!ScopRegion.contains(Inst)) + // return nullptr; + + //} else { + // if(!ScopRegion.contains(Val)) + // return nullptr; + // + // ensureScalarWrite(Inst); + //} + + BasicBlock *BB; + if (Node->isSubRegion()) { + BB = Node->getNodeAs()->getEntry(); + } else { + BB = Node->getNodeAs(); + } + + auto Store = getScalarMappedAddr(Inst); + if (Store) { +#if 0 + // Check whether redundant + bool AllUsersMapped = true; + for (auto User : Val->users()) { + if (User == Store) + continue; + + auto UserInst = dyn_cast(User); + if (!UserInst) { + AllUsersMapped = false; + break; + } + + if (getPHIMappedAddr(UserInst) != Store) { + AllUsersMapped = false; + break; + } + } + if (AllUsersMapped) + return; +#endif + + return Acc = addMemoryAccessForMem(&BB->front(), Inst, MemoryAccess::READ, + cast(Store), + MemoryAccess::AO_MAPPED_SCALAR); + } else { + return Acc = addMemoryAccess( + BB, &BB->front(), MemoryAccess::READ, Inst, ZeroOffset, 1, true, + Inst, ArrayRef(), ArrayRef(), + MemoryAccess::AO_DEDICATED_SCALAR, nullptr); + } +} + +static AliasSet *getAliasSetFor(AliasSetTracker *AST, MemInst *I) { + auto Scalar = I->getScalar(); + auto &DL = I->getModule()->getDataLayout(); + AAMDNodes AAInfo; + I->getAAMetadata(AAInfo); + return &AST->getAliasSetForPointer( + Scalar, DL.getTypeStoreSize(Scalar->getType()), AAInfo); +} + +void ScopInfo::greedyCollapse(Region &R) { + for (auto BB : R.blocks()) { + for (auto &I : *BB) { + auto AccInst = dyn_cast(&I); + if (!AccInst) + continue; + + // WORKAROUND AHEAD: + // getOrCreateScopArrayInfo requires that the base SAI of indirect + // accesses are already created. This relies on requesting those first. + // This workaround may still fail because there is no guarantee that basic + // blocks are iterated through in an topological order of the dominance + // tree (i.e. the base access in a BB before the indirect one, but + // iterated through in reverse order) + getOrCreateSAI(AccInst); + } + } + + for (auto BB : R.blocks()) { + for (auto &I : *BB) { + if (!isa(I)) + continue; + greedyCollapseStore(R, cast(&I)); + } + } +} + +static bool isSubset(EdgeSetTy &Superset, EdgeSetTy &Subset) { + for (auto &Edge : Subset) { + if (!Superset.count(Edge)) + return false; + } + return true; +} + +static bool isConflicting(OutgoingValueMapTy &Superset, + OutgoingValueMapTy &Subset) { + for (auto &Edge : Subset) { + auto BB = Edge.first; + auto Val = Edge.second; + auto Iter = Superset.find(BB); + if (Iter == Superset.end()) + return true; + if (!Iter->second) + continue; + if (Iter->second != Val) + return true; + } + return false; +} + +static void subtractSet(EdgeSetTy &Set, EdgeSetTy &ToRemove) { + for (auto &Edge : ToRemove) { + Set.erase(Edge); + } +} + +static void fixOutgoingValues(OutgoingValueMapTy &Superset, + OutgoingValueMapTy &Subset) { + for (auto &Edge : Subset) { + auto BB = Edge.first; + auto Val = Edge.second; + Superset[BB] = Val; + } +} + +void ScopInfo::greedyCollapseStore(Region &R, StoreInst *Store) { + auto SAI = getOrCreateSAI(cast(Store)); + auto Addr = Store->getPointerOperand(); + auto StoredVal = dyn_cast(Store->getValueOperand()); + if (!StoredVal) + return; + + auto &NoUseZone = OutgoingCollapsedValue[Store] = computeNoUseZones(Store); + // if (NoUseZone.empty()) return; //TODO: Make this an option + SmallVector Worklist; + Worklist.push_back(StoredVal); + + do { + auto Val = Worklist.pop_back_val(); + + if (CollapseScalar.count(Val)) + continue; // Already collapsed to this or another addr + + // Don't try to demote synthesizable values + // TODO: Might allow collapsing to multiple addresses + if (canSynthesize(Val, LI, SE, &R)) + continue; + + // If the address is not available at Val's location, don't collapse it + // TODO: It might be available further downstream if we allow partial + // collapse + if (!isComputableAtEndingOf(Addr, Val->getParent())) + continue; + + auto &Edges = getLiveEdges(Val); + if (isConflicting(NoUseZone, Edges)) + continue; + + if (!Edges.empty()) { // || (isa(Val) && + // isRedundantMemAcc(cast(Val), Store) + // /*Ensure that these are detected as redundant + // later*/)) { + CollapseScalar[Val] = Store; + fixOutgoingValues(NoUseZone, Edges); + } + if (auto PHI = dyn_cast(Val)) { + if (CollapsePHI.count(PHI)) + continue; // Already collapsed to this or another addr + + // Check addr availablability + if (std::any_of(PHI->block_begin(), PHI->block_end(), + [=](BasicBlock *PredBB) { + return !isComputableAtEndingOf(Addr, PredBB); + })) + continue; + + auto &PHIEdges = getPHIEdges(PHI); + if (isConflicting(NoUseZone, PHIEdges)) + continue; + + CollapsePHI[PHI] = Store; + fixOutgoingValues(NoUseZone, PHIEdges); + } + + for (auto &Use : Val->operands()) { + auto UseVal = dyn_cast(Use.get()); + if (UseVal) + Worklist.push_back(UseVal); + } + + } while (!Worklist.empty()); +} + +static bool isExitingBB(RegionNode *Node, BasicBlock *BB) { + if (!Node->isSubRegion()) + return true; + auto SubRegion = Node->getNodeAs(); + for (auto Exiting : predecessors(SubRegion->getExit())) { + if (!SubRegion->contains(Exiting)) + continue; + if (Exiting == BB) + return true; + } + return false; +} + +OutgoingValueMapTy &ScopInfo::getLiveEdges(Instruction *Val) { + auto It = AliveValuesCache.find(Val); + if (It != AliveValuesCache.end()) + return It->second; + + auto &ScopRegion = scop->getRegion(); + + auto &Edges = AliveValuesCache[Val]; + for (auto &Use : Val->uses()) { + auto User = Use.getUser(); + + Instruction *EffectiveUser = dyn_cast(User); + if (!EffectiveUser) + continue; + BasicBlock *EffectiveUserBB = EffectiveUser->getParent(); + + if (auto PHI = dyn_cast(User)) { + auto i = Use.getOperandNo(); + EffectiveUserBB = PHI->getIncomingBlock(i); + EffectiveUser = EffectiveUserBB->getTerminator(); + } + + auto UserNode = DT->getNode(EffectiveUserBB); + while (UserNode->getBlock() != Val->getParent()) { + auto ParentNode = UserNode->getIDom(); + auto BB = ParentNode->getBlock(); + if (!ScopRegion.contains(BB)) + continue; + + auto Node = findStmtRegion(BB); + if (isExitingBB(Node, BB)) + Edges[Node] = Val; + UserNode = ParentNode; + } + } + return Edges; +} + +OutgoingValueMapTy &ScopInfo::getPHIEdges(PHINode *PHI) { + auto It = PHIEdgesCache.find(PHI); + if (It != PHIEdgesCache.end()) + return It->second; + + auto &ScopRegion = scop->getRegion(); + + // TODO: Maybe we need to mark all edges from the incoming value definition + OutgoingValueMapTy &Result = PHIEdgesCache[PHI]; + // auto Parent = PHI->getParent(); + auto NumIncoming = PHI->getNumIncomingValues(); + for (auto i = 0; i < NumIncoming; i += 1) { + auto IncomingValue = PHI->getIncomingValue(i); + auto IncomingBlock = PHI->getIncomingBlock(i); + if (!ScopRegion.contains(IncomingBlock)) + continue; + + auto Node = findStmtRegion(IncomingBlock); + if (isExitingBB(Node, IncomingBlock)) + Result[Node] = IncomingValue; + } + return Result; +} + +bool ScopInfo::mayRead(StoreInst *SI, LoadInst *LI) { + auto AATest = AA->alias(MemoryLocation::get(SI), MemoryLocation::get(LI)); + if (AATest == NoAlias) + return false; + + auto LoadSAI = getOrCreateSAI(cast(LI)); + if (LoadSAI->getBasePtrOriginSAI()) + LoadSAI = LoadSAI->getBasePtrOriginSAI(); + + auto WriteSAI = getOrCreateSAI(cast(SI)); + if (WriteSAI->getBasePtrOriginSAI()) + WriteSAI = WriteSAI->getBasePtrOriginSAI(); + + return LoadSAI == WriteSAI; +} + +OutgoingValueMapTy ScopInfo::computeNoUseZones(StoreInst *SI) { + auto &ScopRegion = scop->getRegion(); + auto Addrs = SI->getPointerOperand(); + auto SAI = getOrCreateSAI(cast(SI)); + + auto &R = scop->getRegion(); + auto RI = R.getRegionInfo(); + + OutgoingValueMapTy Result; + + auto StoreBB = SI->getParent(); + auto &PD = getAnalysis(); + auto BBNode = PD.getNode(StoreBB); + + SmallVector Worklist; + SmallVector Postdominated; + DenseSet ReachableRead; + // Postdominated.push_back(SI->getParent()); + PD.getDescendants(SI->getParent(), Postdominated); + for (auto Node : Postdominated) { + for (auto &Inst : *Node) { + if (!isa(Inst)) + continue; + auto &LI = cast(Inst); + if (mayRead(SI, &LI)) { + Worklist.push_back(Node); + break; + } + } + } + // TODO: Add function return to Worklist; unless the array is local the caller + // is free to read the memory + while (!Worklist.empty()) { + auto BB = Worklist.pop_back_val(); + if (ReachableRead.count(BB)) + continue; + if (!PD.dominates(StoreBB, BB)) + continue; + ReachableRead.insert(BB); + for (auto Pred : predecessors(BB)) { + Worklist.push_back(Pred); + } + } + + for (auto Node : Postdominated) { + if (ReachableRead.count(Node)) + continue; + + for (auto Pred : predecessors(Node)) { + if (!ScopRegion.contains(Pred)) + continue; + auto Node = findStmtRegion(Pred); + if (isExitingBB(Node, Pred)) + Result[Node] = nullptr; + } + } + + return Result; } Scop *ScopInfo::buildScop(Region &R, DominatorTree &DT) { unsigned MaxLoopDepth = getMaxLoopDepthInRegion(R, *LI, *SD); - Scop *S = new Scop(R, AccFuncMap, *SE, DT, ctx, MaxLoopDepth); + scop = new Scop(R, AccFuncMap, *SE, DT, ctx, MaxLoopDepth); + + if (ScalarCollapse) + greedyCollapse(R); buildAccessFunctions(R, R); @@ -3023,11 +3668,82 @@ // To handle these PHI nodes later we will now model their operands as scalar // accesses. Note that we do not model anything in the exit block if we have // an exiting block in the region, as there will not be any splitting later. - if (!R.getExitingBlock()) - buildAccessFunctions(R, *R.getExit(), nullptr, /* IsExitBlock */ true); + if (!R.getExitingBlock()) { + // buildAccessFunctions(R, *R.getExit(), nullptr, /* IsExitBlock */ true); + for (auto &Inst : *R.getExit()) { + if (!isa(Inst)) + break; + // auto PHI = dyn_cast(&Inst); + // if (!PHI) + // break; + buildEscapingDependences(&Inst); + // buildPHIAccesses(PHI, R, nullptr, true); + } + } - S->init(*LI, *SD, *AA); - return S; +#if 1 + // TODO: Ensure that escaping values are actually written + for (auto &AddrIter : OutgoingCollapsedValue) { + auto Addr = AddrIter.first; + auto &Map = AddrIter.second; + for (auto &BBIter : Map) { + auto Node = BBIter.first; + auto OutVal = BBIter.second; + + if (!OutVal) + continue; // Happens if in no-use zone, but no scalar/PHI has been + // mapped to it. + + if (auto LI = dyn_cast(OutVal)) { + if (SE->getSCEV(LI->getPointerOperand()) == + SE->getSCEV(Addr->getPointerOperand())) + continue; + } + + MemoryAccess::AccessOrigin Origin = MemoryAccess::AO_MAPPED_PHI; + + if (auto OutInst = dyn_cast(OutVal)) { + auto OutDefBB = OutInst->getParent(); + if (R.contains(OutDefBB)) { + auto OutDefRegion = findStmtRegion(OutInst->getParent()); + if (OutDefRegion == Node) + Origin = MemoryAccess::AO_MAPPED_SCALAR; + else + continue; // Store is redundant, needs only to be stored in the + // block defining it. + } else { + // This Value is defined before the SCoP, so has not been written to + // Addr yet. + Origin = MemoryAccess::AO_MAPPED_PHI; + } + } + + if (Node->isSubRegion()) { + auto SubRegion = Node->getNodeAs(); + for (auto Exiting : predecessors(SubRegion->getExit())) { + if (!SubRegion->contains(Exiting)) + continue; + if (isa(OutVal) && + !DT.dominates(cast(OutVal)->getParent(), Exiting)) + continue; + addMemoryAccessForMem(Exiting->getTerminator(), OutVal, + MemoryAccess::MUST_WRITE, cast(Addr), + Origin); + } + } else { + auto BB = Node->getNodeAs(); + addMemoryAccessForMem(BB->getTerminator(), OutVal, + MemoryAccess::MUST_WRITE, cast(Addr), + Origin); + } + } + } + + OutgoingCollapsedValue.clear(); +#endif + + scop->init(*LI, *SD, *AA); + return scop; } void ScopInfo::print(raw_ostream &OS, const Module *) const { @@ -3041,6 +3757,11 @@ } void ScopInfo::clear() { + CollapseScalar.clear(); + CollapsePHI.clear(); + OutgoingCollapsedValue.clear(); + AliveValuesCache.clear(); + PHIEdgesCache.clear(); AccFuncMap.clear(); if (scop) { delete scop; @@ -3064,6 +3785,7 @@ AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.addRequiredTransitive(); AU.addRequiredTransitive(); AU.addRequired(); @@ -3080,11 +3802,12 @@ SE = &getAnalysis().getSE(); LI = &getAnalysis().getLoopInfo(); AA = &getAnalysis().getAAResults(); + RI = &getAnalysis().getRegionInfo(); TD = &F->getParent()->getDataLayout(); - DominatorTree &DT = getAnalysis().getDomTree(); + DT = &getAnalysis().getDomTree(); ZeroOffset = SE->getConstant(TD->getIntPtrType(F->getContext()), 0); - scop = buildScop(*R, DT); + scop = buildScop(*R, *DT); DEBUG(scop->print(dbgs())); @@ -3101,6 +3824,154 @@ return false; } +struct SCEVExpandibilityChecker + : public SCEVVisitor { +public: +private: + BasicBlock *LocationBlock; + bool IncludeBlock; + DominatorTree *DT; + + SCEVExpandibilityChecker(BasicBlock *LocationBlock, bool IncludeBlock, + DominatorTree *DT) + : LocationBlock(LocationBlock), IncludeBlock(IncludeBlock), DT(DT) {} + +public: + static bool isExpandableBefore(const SCEV *S, BasicBlock *LocationBlock, + bool IncludeBlock, DominatorTree *DT) { + SCEVExpandibilityChecker Checker(LocationBlock, IncludeBlock, DT); + return Checker.visit(S); + } + + bool visit(const SCEV *Expr) { + return SCEVVisitor::visit(Expr); + } + + bool visitConstant(const SCEVConstant *Constant) { return true; } + + bool visitTruncateExpr(const SCEVTruncateExpr *Expr) { + return visit(Expr->getOperand()); + } + + bool visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { + return visit(Expr->getOperand()); + } + + bool visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { + return visit(Expr->getOperand()); + } + + bool visitAddExpr(const SCEVAddExpr *Expr) { + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) + if (!visit(Expr->getOperand(i))) + return false; + + return true; + } + + bool visitMulExpr(const SCEVMulExpr *Expr) { + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) + if (!visit(Expr->getOperand(i))) + return false; + + return true; + } + + bool visitUDivExpr(const SCEVUDivExpr *Expr) { + if (!visit(Expr->getLHS())) + return false; + + if (!visit(Expr->getRHS())) + return false; + + return true; + } + + bool visitAddRecExpr(const SCEVAddRecExpr *Expr) { + if (!visit(Expr->getStart())) + return false; + + for (size_t i = 0; i < Expr->getNumOperands(); ++i) + if (!visit(Expr->getOperand(i))) + return false; + + auto L = Expr->getLoop(); + return L->contains(LocationBlock); + } + + bool visitSMaxExpr(const SCEVSMaxExpr *Expr) { + for (size_t i = 0; i < Expr->getNumOperands(); ++i) + if (!visit(Expr->getOperand(i))) + return false; + + return true; + } + + bool visitUMaxExpr(const SCEVUMaxExpr *Expr) { + for (size_t i = 0; i < Expr->getNumOperands(); ++i) + if (!visit(Expr->getOperand(i))) + return false; + + return true; + } + + bool visitUnknown(const SCEVUnknown *Expr) { + auto Val = Expr->getValue(); + if (isa(Val)) + return true; + + Instruction *Inst = dyn_cast(Val); + + // Strange case + if (!Inst) + return false; + + auto InstBB = Inst->getParent(); + if (InstBB == LocationBlock) + return IncludeBlock; + + return DT->dominates(InstBB, LocationBlock); + } +}; + +bool ScopInfo::isComputableAtBeginningOf(Value *Val, BasicBlock *BB) { + if (SE->isSCEVable(Val->getType())) { + if (const SCEV *Scev = SE->getSCEV(Val)) + if (!isa(Scev)) { + if (SCEVExpandibilityChecker::isExpandableBefore(Scev, BB, false, DT)) + return true; + } + } + + auto Inst = cast(Val); + if (!Inst) + return false; + auto InstBB = Inst->getParent(); + + if (InstBB == BB) + return false; + return DT->dominates(InstBB, BB); +} + +bool ScopInfo::isComputableAtEndingOf(Value *Val, BasicBlock *BB) { + if (SE->isSCEVable(Val->getType())) { + if (const SCEV *Scev = SE->getSCEV(Val)) + if (!isa(Scev)) { + if (SCEVExpandibilityChecker::isExpandableBefore(Scev, BB, true, DT)) + return true; + } + } + + auto Inst = dyn_cast(Val); + if (!Inst) + return false; + auto InstBB = Inst->getParent(); + + if (InstBB == BB) + return true; + return DT->dominates(InstBB, BB); +} + char ScopInfo::ID = 0; Pass *polly::createScopInfoPass() { return new ScopInfo(); } Index: lib/CodeGen/BlockGenerators.cpp =================================================================== --- lib/CodeGen/BlockGenerators.cpp +++ lib/CodeGen/BlockGenerators.cpp @@ -191,7 +191,12 @@ ScopStmt &Stmt, const Instruction *Inst, const Value *Pointer, ValueMapT &BBMap, LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses) { const MemoryAccess &MA = Stmt.getAccessFor(Inst); + return generateLocationAccessed(Stmt, MA, Pointer, BBMap, LTS, NewAccesses); +} +Value *BlockGenerator::generateLocationAccessed( + ScopStmt &Stmt, const MemoryAccess &MA, const Value *Pointer, + ValueMapT &BBMap, LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses) { isl_ast_expr *AccessExpr = isl_id_to_ast_expr_get(NewAccesses, MA.getId()); if (AccessExpr) { @@ -199,7 +204,7 @@ return ExprBuilder->create(AccessExpr); } - return getNewValue(Stmt, Pointer, BBMap, LTS, getLoopForInst(Inst)); + return getNewValue(Stmt, Pointer, BBMap, LTS, Stmt.getInnermostLoop()); } Loop *BlockGenerator::getLoopForInst(const llvm::Instruction *Inst) { @@ -243,7 +248,7 @@ isl_id_to_ast_expr *NewAccesses) { // First check for possible scalar dependences for this instruction. - generateScalarLoads(Stmt, Inst, BBMap); + generateScalarLoads(Stmt, Inst, BBMap, LTS, NewAccesses); // Terminator instructions control the control flow. They are explicitly // expressed in the clast and do not need to be copied. @@ -316,14 +321,36 @@ Builder.SetInsertPoint(CopyBB->begin()); EntryBB = &CopyBB->getParent()->getEntryBlock(); + for (MemoryAccess *MA : Stmt) { + if (!MA->isRead()) + continue; + if (!MA->isMapped()) + continue; + + auto MappedAlias = MA->getMappedAliasAddr(); + assert(MappedAlias); + + // from generateScalarLoad + auto AddrAlias = MA->getMappedAliasAddr(); + const Value *Pointer = AddrAlias->getPointerOperand(); + Value *NewPointer = + generateLocationAccessed(Stmt, *MA, Pointer, BBMap, LTS, NewAccesses); + auto *ScalarLoad = + Builder.CreateAlignedLoad(NewPointer, AddrAlias->getAlignment(), + AddrAlias->getName() + "_p_mapped_"); + auto RepresentedValue = MA->getAccessValue(); + + BBMap[RepresentedValue] = ScalarLoad; + } + for (Instruction &Inst : *BB) copyInstruction(Stmt, &Inst, BBMap, LTS, NewAccesses); // After a basic block was copied store all scalars that escape this block // in their alloca. First the scalars that have dependences inside the SCoP, // then the ones that might escape the SCoP. - generateScalarStores(Stmt, BB, BBMap); + generateScalarStores(Stmt, BB, BBMap, LTS, NewAccesses); const Region &R = Stmt.getParent()->getRegion(); for (Instruction &Inst : *BB) handleOutsideUsers(R, &Inst, BBMap[&Inst]); @@ -400,19 +427,34 @@ void BlockGenerator::generateScalarLoads(ScopStmt &Stmt, const Instruction *Inst, - ValueMapT &BBMap) { + ValueMapT &BBMap, LoopToScevMapT <S, + isl_id_to_ast_expr *NewAccesses) { auto *MAL = Stmt.lookupAccessesFor(Inst); if (!MAL) return; for (MemoryAccess *MA : *MAL) { - if (!MA->isScalar() || !MA->isRead()) + if (!MA->isRead()) + continue; + if (MA->isMapped()) continue; - auto *Address = getOrCreateAlloca(*MA); - BBMap[MA->getBaseAddr()] = - Builder.CreateLoad(Address, Address->getName() + ".reload"); + Value *ScalarLoad; + if (MA->isMapped()) { + // from generateScalarLoad + auto AddrAlias = MA->getMappedAliasAddr(); + const Value *Pointer = AddrAlias->getPointerOperand(); + Value *NewPointer = + generateLocationAccessed(Stmt, *MA, Pointer, BBMap, LTS, NewAccesses); + ScalarLoad = + Builder.CreateAlignedLoad(NewPointer, AddrAlias->getAlignment(), + AddrAlias->getName() + "_p_scalar_"); + } else { + auto *Address = getOrCreateAlloca(*MA); + ScalarLoad = Builder.CreateLoad(Address, Address->getName() + ".reload"); + } + BBMap[MA->getBaseAddr()] = ScalarLoad; } } @@ -453,7 +495,8 @@ } void BlockGenerator::generateScalarStores(ScopStmt &Stmt, BasicBlock *BB, - ValueMapT &BBMap) { + ValueMapT &BBMap, LoopToScevMapT <S, + isl_id_to_ast_expr *NewAccesses) { const Region &R = Stmt.getParent()->getRegion(); assert(Stmt.isBlockStmt() && BB == Stmt.getBasicBlock() && @@ -461,14 +504,23 @@ "function in the RegionGenerator"); for (MemoryAccess *MA : Stmt) { - if (!MA->isScalar() || MA->isRead()) + if (!MA->isWrite()) + continue; + if (!MA->isImplicit()) continue; Value *Val = MA->getAccessValue(); - auto *Address = getOrCreateAlloca(*MA); - Val = getNewScalarValue(Val, R, BBMap); - Builder.CreateStore(Val, Address); + if (MA->isMapped()) { + auto AddrAlias = MA->getMappedAliasAddr(); + const Value *Pointer = AddrAlias->getPointerOperand(); + Value *NewPointer = + generateLocationAccessed(Stmt, *MA, Pointer, BBMap, LTS, NewAccesses); + Builder.CreateAlignedStore(Val, NewPointer, AddrAlias->getAlignment()); + } else { + auto *Address = getOrCreateAlloca(*MA); + Builder.CreateStore(Val, Address); + } } } @@ -1091,7 +1143,8 @@ void RegionGenerator::generateScalarLoads(ScopStmt &Stmt, const Instruction *Inst, - ValueMapT &BBMap) { + ValueMapT &BBMap, LoopToScevMapT <S, + isl_id_to_ast_expr *NewAccesses) { // Inside a non-affine region PHI nodes are copied not demoted. Once the // phi is copied it will reload all inputs from outside the region, hence @@ -1100,11 +1153,14 @@ if (isa(Inst)) return; - return BlockGenerator::generateScalarLoads(Stmt, Inst, BBMap); + return BlockGenerator::generateScalarLoads(Stmt, Inst, BBMap, LTS, + NewAccesses); } void RegionGenerator::generateScalarStores(ScopStmt &Stmt, BasicBlock *BB, - ValueMapT &BBMap) { + ValueMapT &BBMap, + LoopToScevMapT <S, + isl_id_to_ast_expr *NewAccesses) { const Region &R = Stmt.getParent()->getRegion(); assert(Stmt.getRegion() && Index: test/Isl/CodeGen/doubleacc.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/doubleacc.ll @@ -0,0 +1,42 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-no-early-exit -polly-scops -analyze < %s | FileCheck %s -check-prefix=SCOPS +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-no-early-exit -polly-codegen -S < %s | FileCheck %s -check-prefix=CODEGEN +; +; void foo(long n, float A[n]) { +; for (long i = 0; i < n; i += 1) { +; A[i] += 1; +; A[i] += 1; +; } +; } +; +target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128" + +define void @foo(i32 %n, float* %A) #1 { +entry: + %tmp = sext i32 %n to i64 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %cmp = icmp slt i64 %indvars.iv, %tmp + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv + %tmp1 = load float, float* %arrayidx, align 4 + %add = fadd float %tmp1, 1.000000e+00 + store float %add, float* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds float, float* %A, i64 %indvars.iv + %tmp2 = load float, float* %arrayidx2, align 4 + %add3 = fadd float %tmp2, 1.000000e+00 + store float %add3, float* %arrayidx2, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} + +attributes #1 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } Index: test/ScopInfo/licm_reduction.ll =================================================================== --- test/ScopInfo/licm_reduction.ll +++ test/ScopInfo/licm_reduction.ll @@ -1,7 +1,5 @@ -; RUN: opt %loadPolly -basicaa -loop-rotate -indvars -polly-prepare -polly-scops -analyze < %s | FileCheck %s -; RUN: opt %loadPolly -basicaa -loop-rotate -indvars -licm -polly-prepare -polly-scops -analyze < %s | FileCheck %s -; -; XFAIL: * +; RUN: opt %loadPolly -polly-detect-unprofitable -basicaa -loop-rotate -indvars -polly-prepare -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-detect-unprofitable -basicaa -loop-rotate -indvars -licm -polly-prepare -polly-scops -analyze < %s | FileCheck %s ; ; void test(int n, double B[static const restrict n], int j) { ; for (int i = 0; i < n; i += 1) {