Index: include/polly/CodeGen/BlockGenerators.h =================================================================== --- include/polly/CodeGen/BlockGenerators.h +++ include/polly/CodeGen/BlockGenerators.h @@ -338,7 +338,9 @@ /// @param R The current SCoP region. /// @param Inst The current instruction we check. /// @param InstCopy The copy of the instruction @p Inst in the optimized SCoP. - void handleOutsideUsers(const Region &R, Instruction *Inst, Value *InstCopy); + /// @param Address If given it is used as the escape address for @p Inst. + void handleOutsideUsers(const Region &R, Instruction *Inst, Value *InstCopy, + AllocaInst *Address = nullptr); /// @brief Initialize the memory of demoted scalars. /// Index: include/polly/ScopDetection.h =================================================================== --- include/polly/ScopDetection.h +++ include/polly/ScopDetection.h @@ -223,13 +223,6 @@ /// @return True if all blocks in R are valid, false otherwise. bool allBlocksValid(DetectionContext &Context) const; - /// @brief Check the exit block of a region is valid. - /// - /// @param Context The context of scop detection. - /// - /// @return True if the exit of R is valid, false otherwise. - bool isValidExit(DetectionContext &Context) const; - /// @brief Check if a region is a Scop. /// /// @param Context The context of scop detection. Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -243,8 +243,8 @@ /// could allow us to handle the above example. ReductionType RedType = RT_NONE; - /// @brief The access instruction of this memory access. - Instruction *Inst; + /// @brief The access value that caused this memory access. + Value *AccessValue; /// Updated access relation read from JSCOP file. isl_map *newAccessRelation; @@ -307,13 +307,13 @@ /// @brief Create a memory access from an access in LLVM-IR. /// /// @param Access The memory access. - /// @param AccInst The access instruction. + /// @param AccessVal The access value that caused this memory access. /// @param Statement The statement that contains the access. /// @param SAI The ScopArrayInfo object for this base pointer. /// @param Identifier An identifier that is unique for all memory accesses /// belonging to the same scop statement. - MemoryAccess(const IRAccess &Access, Instruction *AccInst, - ScopStmt *Statement, const ScopArrayInfo *SAI, int Identifier); + MemoryAccess(const IRAccess &Access, Value *AccessVale, ScopStmt *Statement, + const ScopArrayInfo *SAI, int Identifier); ~MemoryAccess(); @@ -377,8 +377,13 @@ const std::string &getBaseName() const { return BaseName; } + /// @brief Return the access value of this memory access. + Value *getAccessValue() const { return AccessValue; } + /// @brief Return the access instruction of this memory access. - Instruction *getAccessInstruction() const { return Inst; } + Instruction *getAccessInstruction() const { + return dyn_cast(AccessValue); + } /// Get the stride of this memory access in the specified Schedule. Schedule /// is a map from the statement to a schedule where the innermost dimension is @@ -496,8 +501,8 @@ typedef SmallVector MemoryAccessVec; MemoryAccessVec MemAccs; - /// @brief Mapping from instructions to (scalar) memory accesses. - DenseMap InstructionToAccess; + /// @brief Mapping from values to (scalar) memory accesses they induced. + DenseMap ValueToAccess; //@} @@ -635,31 +640,30 @@ /// @brief Return true if this statement represents a whole region. bool isRegionStmt() const { return R != nullptr; } - /// @brief Return the (scalar) memory accesses for @p Inst. - const MemoryAccessList &getAccessesFor(const Instruction *Inst) const { - MemoryAccessList *MAL = lookupAccessesFor(Inst); + /// @brief Return the (scalar) memory accesses for @p Val. + const MemoryAccessList &getAccessesFor(const Value *Val) const { + MemoryAccessList *MAL = lookupAccessesFor(Val); assert(MAL && "Cannot get memory accesses because they do not exist!"); return *MAL; } - /// @brief Return the (scalar) memory accesses for @p Inst if any. - MemoryAccessList *lookupAccessesFor(const Instruction *Inst) const { - auto It = InstructionToAccess.find(Inst); - return It == InstructionToAccess.end() ? nullptr : It->getSecond(); + /// @brief Return the (scalar) memory accesses for @p Val if any. + MemoryAccessList *lookupAccessesFor(const Value *Val) const { + auto It = ValueToAccess.find(Val); + return It == ValueToAccess.end() ? nullptr : It->getSecond(); } - /// @brief Return the __first__ (scalar) memory access for @p Inst. - const MemoryAccess &getAccessFor(const Instruction *Inst) const { - MemoryAccess *MA = lookupAccessFor(Inst); + /// @brief Return the __first__ (scalar) memory access for @p Val. + const MemoryAccess &getAccessFor(const Value *Val) const { + MemoryAccess *MA = lookupAccessFor(Val); assert(MA && "Cannot get memory access because it does not exist!"); return *MA; } - /// @brief Return the __first__ (scalar) memory access for @p Inst if any. - MemoryAccess *lookupAccessFor(const Instruction *Inst) const { - auto It = InstructionToAccess.find(Inst); - return It == InstructionToAccess.end() ? nullptr - : &It->getSecond()->front(); + /// @brief Return the __first__ (scalar) memory access for @p Val if any. + MemoryAccess *lookupAccessFor(const Value *Val) const { + auto It = ValueToAccess.find(Val); + return It == ValueToAccess.end() ? nullptr : &It->getSecond()->front(); } void setBasicBlock(BasicBlock *Block) { @@ -769,6 +773,9 @@ /// Flag to indicate that the scheduler actually optimized the SCoP. bool IsOptimized; + /// @brief True if the underlying region has a single exiting block. + bool HasSingleExitEdge; + /// Max loop depth. unsigned MaxLoopDepth; @@ -1113,6 +1120,9 @@ /// @brief Align the parameters in the statement to the scop context void realignParams(); + /// @brief Return true if the underlying region has a single exiting block. + bool hasSingleExitEdge() const { return HasSingleExitEdge; } + /// @brief Print the static control part. /// /// @param OS The output stream the static control part is printed to. Index: include/polly/TempScopInfo.h =================================================================== --- include/polly/TempScopInfo.h +++ include/polly/TempScopInfo.h @@ -129,7 +129,7 @@ /// Mapping BBs to its condition constrains typedef std::map BBCondMapType; -typedef std::vector> AccFuncSetType; +typedef std::vector> AccFuncSetType; typedef std::map AccFuncMapType; //===---------------------------------------------------------------------===// @@ -289,8 +289,9 @@ /// @param R The SCoP region /// @param Functions The access functions of the current BB /// @param NonAffineSubRegion The non affine sub-region @p PHI is in. + /// @param IsExitBlock Flag to indicate that @p PHI is in the exit BB. void buildPHIAccesses(PHINode *PHI, Region &R, AccFuncSetType &Functions, - Region *NonAffineSubRegion); + Region *NonAffineSubRegion, bool IsExitBlock = false); /// @brief Build the access functions for the subregion @p SR. /// @@ -303,8 +304,10 @@ /// @param R The SCoP region. /// @param BB A basic block in @p R. /// @param NonAffineSubRegion The non affine sub-region @p BB is in. + /// @param IsExitBlock Flag to indicate that @p BB is in the exit BB. void buildAccessFunctions(Region &R, BasicBlock &BB, - Region *NonAffineSubRegion = nullptr); + Region *NonAffineSubRegion = nullptr, + bool IsExitBlock = false); public: static char ID; Index: lib/Analysis/ScopDetection.cpp =================================================================== --- lib/Analysis/ScopDetection.cpp +++ lib/Analysis/ScopDetection.cpp @@ -760,8 +760,7 @@ DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n"); // Only expand when we did not collect errors. - // Check the exit first (cheap) - if (isValidExit(Context) && !Context.Log.hasErrors()) { + if (!Context.Log.hasErrors()) { // If the exit is valid check all blocks // - if true, a valid region was found => store it + keep expanding // - if false, .tbd. => stop (should this really end the loop?) @@ -903,18 +902,6 @@ return true; } -bool ScopDetection::isValidExit(DetectionContext &Context) const { - - // PHI nodes are not allowed in the exit basic block. - if (BasicBlock *Exit = Context.CurRegion.getExit()) { - BasicBlock::iterator I = Exit->begin(); - if (I != Exit->end() && isa(*I)) - return invalid(Context, /*Assert=*/true, I); - } - - return true; -} - bool ScopDetection::isValidRegion(DetectionContext &Context) const { Region &CurRegion = Context.CurRegion; @@ -957,9 +944,6 @@ &(CurRegion.getEntry()->getParent()->getEntryBlock())) return invalid(Context, /*Assert=*/true, CurRegion.getEntry()); - if (!isValidExit(Context)) - return false; - if (!allBlocksValid(Context)) return false; Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -440,11 +440,11 @@ return AccessRelation; } -MemoryAccess::MemoryAccess(const IRAccess &Access, Instruction *AccInst, +MemoryAccess::MemoryAccess(const IRAccess &Access, Value *AccessVal, ScopStmt *Statement, const ScopArrayInfo *SAI, int Identifier) - : AccType(getMemoryAccessType(Access)), Statement(Statement), Inst(AccInst), - newAccessRelation(nullptr) { + : AccType(getMemoryAccessType(Access)), Statement(Statement), + AccessValue(AccessVal), newAccessRelation(nullptr) { isl_ctx *Ctx = Statement->getIslCtx(); BaseAddr = Access.getBase(); @@ -666,15 +666,10 @@ } // @brief Get the data-type of the elements accessed -static Type *getAccessType(IRAccess &Access, Instruction *AccessInst) { - if (Access.isPHI()) - return Access.getBase()->getType(); - - if (StoreInst *Store = dyn_cast(AccessInst)) +static Type *getAccessType(IRAccess &Access, Value *AccessValue) { + if (StoreInst *Store = dyn_cast(AccessValue)) return Store->getValueOperand()->getType(); - if (BranchInst *Branch = dyn_cast(AccessInst)) - return Branch->getCondition()->getType(); - return AccessInst->getType(); + return AccessValue->getType(); } void ScopStmt::buildAccesses(TempScop &tempScop, BasicBlock *Block, @@ -685,8 +680,8 @@ for (auto &AccessPair : *AFS) { IRAccess &Access = AccessPair.first; - Instruction *AccessInst = AccessPair.second; - Type *ElementType = getAccessType(Access, AccessInst); + Value *AccessValue = AccessPair.second; + Type *ElementType = getAccessType(Access, AccessValue); const ScopArrayInfo *SAI = getParent()->getOrCreateScopArrayInfo( Access.getBase(), ElementType, Access.Sizes, Access.isPHI()); @@ -694,10 +689,10 @@ if (isApproximated && Access.isWrite()) Access.setMayWrite(); - MemoryAccessList *&MAL = InstructionToAccess[AccessInst]; + MemoryAccessList *&MAL = ValueToAccess[AccessValue]; if (!MAL) MAL = new MemoryAccessList(); - MAL->emplace_front(Access, AccessInst, this, SAI, MemAccs.size()); + MAL->emplace_front(Access, AccessValue, this, SAI, MemAccs.size()); MemAccs.push_back(&MAL->front()); } } @@ -971,6 +966,8 @@ // First collect candidate load-store reduction chains by iterating over all // stores and collecting possible reduction loads. for (MemoryAccess *StoreMA : MemAccs) { + if (StoreMA->isScalar()) + continue; if (StoreMA->isRead()) continue; @@ -1061,7 +1058,7 @@ } ScopStmt::~ScopStmt() { - DeleteContainerSeconds(InstructionToAccess); + DeleteContainerSeconds(ValueToAccess); isl_set_free(Domain); } @@ -1464,7 +1461,8 @@ Scop::Scop(Region &R, ScalarEvolution &ScalarEvolution, isl_ctx *Context, unsigned MaxLoopDepth) : SE(&ScalarEvolution), R(R), IsOptimized(false), - MaxLoopDepth(MaxLoopDepth), IslCtx(Context), Affinator(this) {} + HasSingleExitEdge(R.getExitingBlock()), MaxLoopDepth(MaxLoopDepth), + IslCtx(Context), Affinator(this) {} void Scop::initFromTempScop(TempScop &TempScop, LoopInfo &LI, ScopDetection &SD) { Index: lib/Analysis/TempScopInfo.cpp =================================================================== --- lib/Analysis/TempScopInfo.cpp +++ lib/Analysis/TempScopInfo.cpp @@ -107,8 +107,18 @@ void TempScopInfo::buildPHIAccesses(PHINode *PHI, Region &R, AccFuncSetType &Functions, - Region *NonAffineSubRegion) { - if (canSynthesize(PHI, LI, SE, &R)) + Region *NonAffineSubRegion, + bool IsExitBlock) { + + // PHI nodes that are in the ExitBlock of the region, hence if IsExitBlock is + // true, are not modelt as ordinary PHI nodes as they are not part of the + // region. However, we model the operands in the predecessor blocks that are + // part of the region as regular scalar accesses. + + // If we can synthesize a PHI we can skip it, however only if it is in + // the region. If it is not it can only be in the exit block of the region. + // In this case we model the operands but not the PHI itself. + if (PHI->getParent() != R.getExit() && canSynthesize(PHI, LI, SE, &R)) return; // PHI nodes are modeled as if they had been demoted prior to the SCoP @@ -142,19 +152,14 @@ } } - // If the operand is a constant, global or argument we use the terminator - // of the incoming basic block as the access instruction. - if (!OpI) - OpI = OpBB->getTerminator(); - IRAccess ScalarAccess(IRAccess::MUST_WRITE, PHI, ZeroOffset, 1, true, - /* IsPHI */ true); - AccFuncMap[OpBB].push_back(std::make_pair(ScalarAccess, OpI)); + /* IsPHI */ !IsExitBlock); + AccFuncMap[OpBB].push_back(std::make_pair(ScalarAccess, Op)); } if (!OnlyNonAffineSubRegionOperands) { IRAccess ScalarAccess(IRAccess::READ, PHI, ZeroOffset, 1, true, - /* IsPHI */ true); + /* IsPHI */ !IsExitBlock); Functions.push_back(std::make_pair(ScalarAccess, PHI)); } } @@ -306,7 +311,8 @@ } void TempScopInfo::buildAccessFunctions(Region &R, BasicBlock &BB, - Region *NonAffineSubRegion) { + Region *NonAffineSubRegion, + bool IsExitBlock) { AccFuncSetType Functions; Loop *L = LI->getLoopFor(&BB); @@ -315,13 +321,19 @@ for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) { Instruction *Inst = I; + + PHINode *PHI = dyn_cast(Inst); + if (PHI) + buildPHIAccesses(PHI, R, Functions, NonAffineSubRegion, IsExitBlock); + + // For the exit block we stop modeling after the last PHI node. + if (!PHI && IsExitBlock) + break; + if (isa(Inst) || isa(Inst)) Functions.push_back( std::make_pair(buildIRAccess(Inst, L, &R, BoxedLoops), Inst)); - if (PHINode *PHI = dyn_cast(Inst)) - buildPHIAccesses(PHI, R, Functions, NonAffineSubRegion); - if (!isa(Inst) && buildScalarDependences(Inst, &R, NonAffineSubRegion)) { // If the Instruction is used outside the statement, we need to build the @@ -454,6 +466,15 @@ buildAccessFunctions(R, R); + // In case the region does not have an exiting block we will later (during + // code generation) split the exit block. This will move potential PHI nodes + // in the exit block into the region that are at this point not part of it. + // To handle these PHI nodes later on we will model their operands as scalar + // accesses. Note that we do not model anything in the exit block if we have + // a exiting block in the region, hence there will not be any splitting later. + if (!R.getExitingBlock()) + buildAccessFunctions(R, *R.getExit(), nullptr, /* IsExitBlock */ true); + for (const auto &BB : R.blocks()) buildCondition(BB, R); Index: lib/CodeGen/BlockGenerators.cpp =================================================================== --- lib/CodeGen/BlockGenerators.cpp +++ lib/CodeGen/BlockGenerators.cpp @@ -375,9 +375,7 @@ } void BlockGenerator::handleOutsideUsers(const Region &R, Instruction *Inst, - Value *InstCopy) { - BasicBlock *ExitBB = R.getExit(); - + Value *InstCopy, AllocaInst *Address) { EscapeUserVectorTy EscapeUsers; for (User *U : Inst->users()) { @@ -386,7 +384,7 @@ if (!UI) continue; - if (R.contains(UI) && ExitBB != UI->getParent()) + if (R.contains(UI)) continue; EscapeUsers.push_back(UI); @@ -405,18 +403,18 @@ return; // Get or create an escape alloca for this instruction. - bool IsNew; - AllocaInst *ScalarAddr = - getOrCreateAlloca(Inst, ScalarMap, ".escape", &IsNew); + bool IsNew = false; + if (!Address) + Address = getOrCreateAlloca(Inst, ScalarMap, ".escape", &IsNew); // Remember that this instruction has escape uses and the escape alloca. - EscapeMap[Inst] = std::make_pair(ScalarAddr, std::move(EscapeUsers)); + EscapeMap[Inst] = std::make_pair(Address, std::move(EscapeUsers)); // If the escape alloca was just created store the instruction in there, // otherwise that happened already. if (IsNew) { assert(InstCopy && "Except PHIs every instruction should have a copy!"); - Builder.CreateStore(InstCopy, ScalarAddr); + Builder.CreateStore(InstCopy, Address); } } @@ -499,7 +497,6 @@ continue; Instruction *Base = cast(MA->getBaseAddr()); - Instruction *Inst = MA->getAccessInstruction(); Value *Val = nullptr; AllocaInst *Address = nullptr; @@ -512,7 +509,7 @@ Val = BasePHI->getIncomingValue(PHIIdx); } else { Address = getOrCreateAlloca(Base, ScalarMap, ".s2a"); - Val = Inst; + Val = MA->getAccessValue(); } Val = getNewScalarValue(Val, R, ScalarMap, BBMap, GlobalMap); Builder.CreateStore(Val, Address); @@ -599,6 +596,18 @@ } void BlockGenerator::finalizeSCoP(Scop &S, ValueMapT &GlobalMap) { + if (!S.hasSingleExitEdge()) { + for (Instruction &I : *S.getRegion().getExitingBlock()) { + PHINode *PHI = dyn_cast(&I); + if (!PHI) + break; + assert(PHI->getNumUses() == 1); + assert(ScalarMap.count(PHI->user_back())); + AllocaInst *Address = ScalarMap[PHI->user_back()]; + handleOutsideUsers(S.getRegion(), PHI, nullptr, Address); + } + } + createScalarInitialization(S.getRegion(), GlobalMap); createScalarFinalization(S.getRegion()); } @@ -1127,11 +1136,12 @@ continue; Instruction *ScalarBase = cast(MA->getBaseAddr()); - Instruction *ScalarInst = MA->getAccessInstruction(); + Value *ScalarVal = MA->getAccessValue(); + Instruction *ScalarInst = dyn_cast(ScalarVal); PHINode *ScalarBasePHI = dyn_cast(ScalarBase); // Only generate accesses that belong to this basic block. - if (ScalarInst->getParent() != BB) + if (ScalarInst && ScalarInst->getParent() != BB) continue; Value *Val = nullptr; @@ -1139,11 +1149,13 @@ if (MA->getScopArrayInfo()->isPHI()) { int PHIIdx = ScalarBasePHI->getBasicBlockIndex(BB); + if (PHIIdx < 0) + continue; ScalarAddr = getOrCreateAlloca(ScalarBase, PHIOpMap, ".phiops"); Val = ScalarBasePHI->getIncomingValue(PHIIdx); } else { ScalarAddr = getOrCreateAlloca(ScalarBase, ScalarMap, ".s2a"); - Val = ScalarInst; + Val = ScalarVal; } Val = getNewScalarValue(Val, R, ScalarMap, BBMap, GlobalMap);