Index: include/polly/CodeGen/BlockGenerators.h =================================================================== --- include/polly/CodeGen/BlockGenerators.h +++ include/polly/CodeGen/BlockGenerators.h @@ -340,6 +340,13 @@ /// @param InstCopy The copy of the instruction @p Inst in the optimized SCoP. void handleOutsideUsers(const Region &R, Instruction *Inst, Value *InstCopy); + /// @brief Handle PHIs in the exiting block. + /// + /// CodeGeneration::createExitingBlockForExitNodePHIs creates a dedicated exiting BB for PHIs. These are not referenced by any ScopStmt and therefore must be processed explicitely. + /// + /// @param R The current SCoP region. + void handleExitingPHIs(Region &R, ValueMapT &GlobalMap); + /// @brief Initialize the memory of demoted scalars. /// /// If a PHI node was demoted and one of its predecessor blocks was outside @@ -364,7 +371,7 @@ /// we return the old value. If the value can still not be derived, this /// function will assert. /// - /// @param Stmt The statement to code generate. + /// @param Region The Region of the SCoP. /// @param Old The old Value. /// @param BBMap A mapping from old values to their new values /// (for values recalculated within this basic block). @@ -382,7 +389,7 @@ /// @returns o The old value, if it is still valid. /// o The new value, if available. /// o NULL, if no value is found. - Value *getNewValue(ScopStmt &Stmt, const Value *Old, ValueMapT &BBMap, + Value *getNewValue(Region &R, const Value *Old, ValueMapT &BBMap, ValueMapT &GlobalMap, LoopToScevMapT <S, Loop *L) const; void copyInstScalar(ScopStmt &Stmt, const Instruction *Inst, ValueMapT &BBMap, Index: include/polly/ScopDetection.h =================================================================== --- include/polly/ScopDetection.h +++ include/polly/ScopDetection.h @@ -223,13 +223,6 @@ /// @return True if all blocks in R are valid, false otherwise. bool allBlocksValid(DetectionContext &Context) const; - /// @brief Check the exit block of a region is valid. - /// - /// @param Context The context of scop detection. - /// - /// @return True if the exit of R is valid, false otherwise. - bool isValidExit(DetectionContext &Context) const; - /// @brief Check if a region is a Scop. /// /// @param Context The context of scop detection. Index: include/polly/Support/ScopHelper.h =================================================================== --- include/polly/Support/ScopHelper.h +++ include/polly/Support/ScopHelper.h @@ -14,6 +14,8 @@ #ifndef POLLY_SUPPORT_IRHELPER_H #define POLLY_SUPPORT_IRHELPER_H +#include "llvm/ADT/ArrayRef.h" + namespace llvm { class Type; class Instruction; @@ -88,16 +90,63 @@ /// The parameters are the same as for the creation of a SCEVExpander as well /// as the call to SCEVExpander::expandCodeFor: /// -/// @param S The current Scop. +/// @param R The current region. /// @param SE The Scalar Evolution pass. /// @param DL The module data layout. /// @param Name The suffix added to the new instruction names. /// @param E The expression for which code is actually generated. /// @param Ty The type of the resulting code. /// @param IP The insertion point for the new code. -llvm::Value *expandCodeFor(Scop &S, llvm::ScalarEvolution &SE, +llvm::Value *expandCodeFor(llvm::Region &R, llvm::ScalarEvolution &SE, const llvm::DataLayout &DL, const char *Name, const llvm::SCEV *E, llvm::Type *Ty, llvm::Instruction *IP); + +/// @brief Move a set of incoming edges to a new successor. +/// +/// Similar to llvm::SplitPredecessors, but instead of creating a new +/// predecessor, this creates a new successor. +/// Preserves LCSSA because all PHIs will exist in both BasicBlocks. A PHI node +/// is created in the successor for every PHI in the original block, even if +/// there is only one incoming edge. PHI nodes in the orginal block are never +/// removed, even if they have fewer than two incoming edges left. +/// We generally cannot update RegionInfo since it could invalidate regions, +/// depending on the list of edges. +/// +/// Before: After: +/// +/// NonPred[0] NonPred[0] +/// \ / \ / +/// /---->Entry /-->Entry / +/// | | | \ / +/// \--BlockInRegion | Entry.split +/// | | +/// \-------BlockInRegion +/// +/// If Entry is split, neither "Entry" nor "Entry.split" can be made the +/// region's entry node. "Entry" cannot because there is en edge from an +/// ouside-of-region block NonPred[0] to another region block, violating the +/// single entry requirement of a region. The new block "Entry.split" cannot be +/// the entry block because there is an exiting edge to "Entry", which would +/// then be an alternative to the region's other exit block. +/// There is a similar situation with the exit block conflicting with other +/// regions. +/// Hence, the caller of this function should make sure such cases do not happen +/// and update RegionInfo accordingly. +/// +/// @param BB Block to split. +/// @param NonPreds Set of incoming edges to move to the new successor. +/// @param Suffix Suffix to append to the name of the new block. +/// @param DT DominatorTree to update. +/// @param LI LoopInfo to update. +/// @param SE ScalarEvolution to update. +/// +/// @return The newly created successor of BB or null if the block could not be +/// split. +llvm::BasicBlock * +splitBlockNonPredecessors(llvm::BasicBlock *BB, + llvm::ArrayRef NonPreds, + llvm::StringRef Suffix, llvm::DominatorTree *DT, + llvm::LoopInfo *LI, llvm::ScalarEvolution *SE); } #endif Index: include/polly/TempScopInfo.h =================================================================== --- include/polly/TempScopInfo.h +++ include/polly/TempScopInfo.h @@ -306,10 +306,19 @@ /// /// @param R The SCoP region. /// @param BB A basic block in @p R. + /// @param OnlyPHIs Process only the PHI nodes of the block. Used for the region's exit block. /// @param NonAffineSubRegion The non affine sub-region @p BB is in. - void buildAccessFunctions(Region &R, BasicBlock &BB, + void buildAccessFunctions(Region &R, BasicBlock &BB, bool OnlyPHIs, Region *NonAffineSubRegion = nullptr); + /// @brief Determine whether an instruction belongs to the Scop. + /// + /// @param R The region representing the scop. + /// @param Inst The instruction to query. + /// + /// @return True iff the instruction belongs to the scop. + bool scopContains(llvm::Region *R, llvm::Instruction *Inst) const; + public: static char ID; explicit TempScopInfo() : RegionPass(ID), TempScopOfRegion(nullptr) {} Index: lib/Analysis/ScopDetection.cpp =================================================================== --- lib/Analysis/ScopDetection.cpp +++ lib/Analysis/ScopDetection.cpp @@ -761,7 +761,7 @@ // Only expand when we did not collect errors. // Check the exit first (cheap) - if (isValidExit(Context) && !Context.Log.hasErrors()) { + if (!Context.Log.hasErrors()) { // If the exit is valid check all blocks // - if true, a valid region was found => store it + keep expanding // - if false, .tbd. => stop (should this really end the loop?) @@ -903,18 +903,6 @@ return true; } -bool ScopDetection::isValidExit(DetectionContext &Context) const { - - // PHI nodes are not allowed in the exit basic block. - if (BasicBlock *Exit = Context.CurRegion.getExit()) { - BasicBlock::iterator I = Exit->begin(); - if (I != Exit->end() && isa(*I)) - return invalid(Context, /*Assert=*/true, I); - } - - return true; -} - bool ScopDetection::isValidRegion(DetectionContext &Context) const { Region &CurRegion = Context.CurRegion; @@ -957,9 +945,6 @@ &(CurRegion.getEntry()->getParent()->getEntryBlock())) return invalid(Context, /*Assert=*/true, CurRegion.getEntry()); - if (!isValidExit(Context)) - return false; - if (!allBlocksValid(Context)) return false; Index: lib/Analysis/TempScopInfo.cpp =================================================================== --- lib/Analysis/TempScopInfo.cpp +++ lib/Analysis/TempScopInfo.cpp @@ -179,7 +179,11 @@ BasicBlock *UseParent = UI->getParent(); // Ignore the users in the same BB (statement) - if (UseParent == ParentBB) + // There will be a split in the Exit BB after the last PHI instruction which + // we have to consider. PHI nodes in the exit block are in a different block + // than non-PHI instructions. + if ((UseParent == ParentBB) && + !(UseParent == R->getExit() && isa(Inst) != isa(UI))) continue; // Do not build scalar dependences inside a non-affine subregion. @@ -187,7 +191,7 @@ continue; // Check whether or not the use is in the SCoP. - if (!R->contains(UseParent)) { + if (!scopContains(R, UI)) { AnyCrossStmtUse = true; continue; } @@ -222,7 +226,7 @@ continue; if (Instruction *OpInst = dyn_cast(Op)) - if (R->contains(OpInst)) + if (scopContains(R, OpInst)) continue; IRAccess ScalarAccess(IRAccess::READ, Op, ZeroOffset, 1, true, Op); @@ -298,7 +302,7 @@ if (SD->isNonAffineSubRegion(&SR, &R)) { for (BasicBlock *BB : SR.blocks()) - buildAccessFunctions(R, *BB, &SR); + buildAccessFunctions(R, *BB, false, &SR); return; } @@ -306,11 +310,10 @@ if (I->isSubRegion()) buildAccessFunctions(R, *I->getNodeAs()); else - buildAccessFunctions(R, *I->getNodeAs()); + buildAccessFunctions(R, *I->getNodeAs(), false); } -void TempScopInfo::buildAccessFunctions(Region &R, BasicBlock &BB, - Region *NonAffineSubRegion) { +void TempScopInfo::buildAccessFunctions(Region &R, BasicBlock &BB, bool OnlyPHIs, Region *NonAffineSubRegion) { AccFuncSetType Functions; Loop *L = LI->getLoopFor(&BB); @@ -319,6 +322,12 @@ for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) { Instruction *Inst = I; + + // The PHI nodes of the exit block are precessed as if they were in the scop's region. We need to create memory accesses for the PHI's arguments such that we can demote it to an alloca location. + // A block always starts with its PHI nodes and hence once we encouter a non-PHI, subsequent instruction can be skipped. + if (OnlyPHIs && !isa(Inst)) + break; + if (isa(Inst) || isa(Inst)) Functions.push_back( std::make_pair(buildIRAccess(Inst, L, &R, BoxedLoops), Inst)); @@ -343,6 +352,18 @@ Accs.insert(Accs.end(), Functions.begin(), Functions.end()); } +bool TempScopInfo::scopContains(Region *R, llvm::Instruction *Inst) const { + if (R->contains(Inst)) + return true; + + // The region will be extended later to also contain the PHI nodes in the exit + // block. + if (Inst->getParent() == R->getExit() && isa(Inst)) + return true; + + return false; +} + Comparison TempScopInfo::buildAffineCondition(Value &V, bool inverted) { if (ConstantInt *C = dyn_cast(&V)) { // If this is always true condition, we will create 0 <= 1, @@ -459,6 +480,13 @@ buildAccessFunctions(R, R); + // Simple instructions outside of the scop's region that depend on a scalar inside the scop (an escaping value) can be handled at code generation by just replacing the operand with the scop's value at exit time. (More exactly, a PHI node in the merge block will select between the orginal scalar and the generated one, depending on whether the original or generated version has been executed) + // + // If there is a PHI instruction in the scop's region this is not directly possible. The escaping value depends on which edge is taken from the region. This property is not preserved/modeled in SCoPs; the generated code always has just one exiting edge. + // We therefore model these exit node PHIs like other PHIs in there region: The PHI value is demoted to a memory location and the incoming blocks write the incoming values to that location such that the value can be loaded from there instead. The dependencies from these memory accesses ensure that either only one of the is stored, or at least stored on the correct order after code generation. + // Usually there is also a read access for the PHI, but ScopInfo does not create a statement for the exit block such they must be handled explicitely in the code generator. + buildAccessFunctions(R, *R.getExit(), true); + for (const auto &BB : R.blocks()) buildCondition(BB, R); Index: lib/CodeGen/BlockGenerators.cpp =================================================================== --- lib/CodeGen/BlockGenerators.cpp +++ lib/CodeGen/BlockGenerators.cpp @@ -88,7 +88,7 @@ EntryBB(nullptr), PHIOpMap(PHIOpMap), ScalarMap(ScalarMap), EscapeMap(EscapeMap) {} -Value *BlockGenerator::getNewValue(ScopStmt &Stmt, const Value *Old, +Value *BlockGenerator::getNewValue(Region &R, const Value *Old, ValueMapT &BBMap, ValueMapT &GlobalMap, LoopToScevMapT <S, Loop *L) const { // We assume constants never change. @@ -116,15 +116,14 @@ VTV.insert(GlobalMap.begin(), GlobalMap.end()); NewScev = SCEVParameterRewriter::rewrite(NewScev, SE, VTV); - Scop &S = *Stmt.getParent(); const DataLayout &DL = - S.getRegion().getEntry()->getParent()->getParent()->getDataLayout(); + R.getEntry()->getParent()->getParent()->getDataLayout(); auto IP = Builder.GetInsertPoint(); assert(IP != Builder.GetInsertBlock()->end() && "Only instructions can be insert points for SCEVExpander"); Value *Expanded = - expandCodeFor(S, SE, DL, "polly", NewScev, Old->getType(), IP); + expandCodeFor(R, SE, DL, "polly", NewScev, Old->getType(), IP); BBMap[Old] = Expanded; return Expanded; @@ -137,7 +136,7 @@ // A scop-constant value defined by an instruction executed outside the scop. if (const Instruction *Inst = dyn_cast(Old)) - if (!Stmt.getParent()->getRegion().contains(Inst->getParent())) + if (!R.contains(Inst->getParent())) return const_cast(Old); // The scalar dependence is neither available nor SCEVCodegenable. @@ -155,11 +154,11 @@ return; Instruction *NewInst = Inst->clone(); + Region &R = Stmt.getParent()->getRegion(); // Replace old operands with the new ones. for (Value *OldOperand : Inst->operands()) { - Value *NewOperand = getNewValue(Stmt, OldOperand, BBMap, GlobalMap, LTS, - getLoopForInst(Inst)); + Value *NewOperand = getNewValue(R, OldOperand, BBMap, GlobalMap, LTS, getLoopForInst(Inst)); if (!NewOperand) { assert(!isa(NewInst) && @@ -207,7 +206,7 @@ NewPointer = getNewAccessOperand(Stmt, MA); else NewPointer = - getNewValue(Stmt, Pointer, BBMap, GlobalMap, LTS, getLoopForInst(Inst)); + getNewValue(Stmt.getParent()->getRegion(), Pointer, BBMap, GlobalMap, LTS, getLoopForInst(Inst)); return NewPointer; } @@ -234,7 +233,7 @@ const Value *Pointer = Store->getPointerOperand(); Value *NewPointer = generateLocationAccessed(Stmt, Store, Pointer, BBMap, GlobalMap, LTS); - Value *ValueOperand = getNewValue(Stmt, Store->getValueOperand(), BBMap, + Value *ValueOperand = getNewValue(Stmt.getParent()->getRegion(), Store->getValueOperand(), BBMap, GlobalMap, LTS, getLoopForInst(Store)); Builder.CreateAlignedStore(ValueOperand, NewPointer, Store->getAlignment()); @@ -255,7 +254,7 @@ Loop *L = getLoopForInst(Inst); if ((Stmt.isBlockStmt() || !Stmt.getRegion()->contains(L)) && canSynthesize(Inst, &LI, &SE, &Stmt.getParent()->getRegion())) { - Value *NewValue = getNewValue(Stmt, Inst, BBMap, GlobalMap, LTS, L); + Value *NewValue = getNewValue(Stmt.getParent()->getRegion(), Inst, BBMap, GlobalMap, LTS, L); BBMap[Inst] = NewValue; return; } @@ -382,6 +381,7 @@ if (EscapeMap.count(Inst)) return; + BasicBlock *EnteringBB = R.getEnteringBlock(); EscapeUserVectorTy EscapeUsers; for (User *U : Inst->users()) { @@ -390,7 +390,7 @@ if (!UI) continue; - if (R.contains(UI)) + if (R.contains(UI) || EnteringBB == UI->getParent()) continue; EscapeUsers.push_back(UI); @@ -506,6 +506,42 @@ } } +void BlockGenerator::handleExitingPHIs(Region &R, ValueMapT &GlobalMap) { + // The exit block of the __unoptimized__ region. + BasicBlock *OrigExitingBB = R.getExitingBlock(); + // The merge block __just after__ the region and the optimized region. + BasicBlock *MergeBB = R.getExit(); + + // The exit block of the __optimized__ region. + BasicBlock *OptExitingBB = *(pred_begin(MergeBB)); + if (OptExitingBB == OrigExitingBB) + OptExitingBB = *(++pred_begin(MergeBB)); + + EntryBB = &MergeBB->getParent()->getEntryBlock(); + Builder.SetInsertPoint(OptExitingBB->getTerminator()); + + // That's what copyBB does to PHI instructions + for (Instruction &Inst : *OrigExitingBB) { + if (isa(Inst)) + break; + assert(isa(Inst)); + + Value *Val = nullptr; + if (canSynthesize(&Inst, &LI, &SE, &R)) { + ValueMapT EmptyLocalMap; + LoopToScevMapT EmptyLoopMap; + Val = getNewValue(R, &Inst, EmptyLocalMap, GlobalMap, EmptyLoopMap, nullptr); + } else { + bool IsNew = true; + auto Address = getOrCreateAlloca(&Inst, PHIOpMap, ".phiops", &IsNew); + assert(!IsNew && "Some generated block must have written the value as it is undefined otherwise"); + Val = Builder.CreateLoad(Address, Address->getName() + ".reload"); + } + + handleOutsideUsers(R, &Inst, Val); + } +} + void BlockGenerator::createScalarInitialization(Region &R, ValueMapT &GlobalMap) { // The split block __just before__ the region and optimized region. @@ -586,8 +622,11 @@ } void BlockGenerator::finalizeSCoP(Scop &S, ValueMapT &GlobalMap) { - createScalarInitialization(S.getRegion(), GlobalMap); - createScalarFinalization(S.getRegion()); + Region &R = S.getRegion(); + + handleExitingPHIs(R, GlobalMap); + createScalarInitialization(R, GlobalMap); + createScalarFinalization(R); } VectorBlockGenerator::VectorBlockGenerator(BlockGenerator &BlockGen, @@ -613,7 +652,7 @@ for (int Lane = 0; Lane < Width; Lane++) Vector = Builder.CreateInsertElement( - Vector, getNewValue(Stmt, Old, ScalarMaps[Lane], GlobalMaps[Lane], + Vector, getNewValue(Stmt.getParent()->getRegion(), Old, ScalarMaps[Lane], GlobalMaps[Lane], VLTS[Lane], L), Builder.getInt32(Lane)); @@ -1157,7 +1196,7 @@ Value *Op = PHI->getIncomingValueForBlock(IncomingBB); OpCopy = - getNewValue(Stmt, Op, BBCopyMap, GlobalMap, LTS, getLoopForInst(PHI)); + getNewValue(Stmt.getParent()->getRegion(), Op, BBCopyMap, GlobalMap, LTS, getLoopForInst(PHI)); } else { if (PHICopy->getBasicBlockIndex(BBCopy) >= 0) Index: lib/CodeGen/CodeGeneration.cpp =================================================================== --- lib/CodeGen/CodeGeneration.cpp +++ lib/CodeGen/CodeGeneration.cpp @@ -93,6 +93,69 @@ return true; } + // Create a dedicated block for exit node PHIs. + // This special block is made the exiting block such that the PHIs become included into the region. + // The exit block itself keeps its identity and have a single predecessor. The dedicated PHI block is returned. + BasicBlock *createExitingBlockForExitNodePHIs(Region *R) { + BasicBlock *ExitBB = R->getExit(); + + SmallVector NonPreds; + for (BasicBlock *P : predecessors(ExitBB)) + if (!R->contains(P)) + NonPreds.push_back(P); + + // Preds[0] Preds[1] otherBB // + // \ | ________/ // + // \ | / // + // ExitBB // + BasicBlock *NewExitBlock = + splitBlockNonPredecessors(ExitBB, NonPreds, ".exit", DT, LI, SE); + // Preds[0] Preds[1] otherBB // + // \ / / // + // ExitBB / // + // \ / // + // NewExitBlock // + + // Set the region of the newly created block. + Region *RegionOfExit = RI->getRegionFor(ExitBB); + RI->setRegionFor(NewExitBlock, RegionOfExit); + + // If there was a region with ExitBB as entry block, change it to + // NewExitBlock. + while (RegionOfExit && !RegionOfExit->isTopLevelRegion() && + RegionOfExit->getEntry() == ExitBB) { + RegionOfExit->replaceEntry(NewExitBlock); + RegionOfExit = RegionOfExit->getParent(); + } + + // Change the exit node of R, but not its subregions because they can keep + // ExitBB as their exit block. + // This ensures that the region of ExitBB is always R itself. + R->replaceExit(NewExitBlock); + RI->setRegionFor(ExitBB, R); + + // Make NewExitBlock the new exit block of all other regions that previously + // had ExitBB as exit block. + for (BasicBlock *PotentialExiting : predecessors(NewExitBlock)) { + if (PotentialExiting == NewExitBlock) + continue; + + Region *OtherR = RI->getRegionFor(PotentialExiting); + while (OtherR && !OtherR->isTopLevelRegion() && + OtherR->getExit() == ExitBB) { + OtherR->replaceExit(NewExitBlock); + OtherR = OtherR->getParent(); + } + } + + assert(R->contains(ExitBB)); + assert(!R->contains(NewExitBlock)); + assert(R->getExit() == NewExitBlock); + assert(R->getExitingBlock() == ExitBB); + + return ExitBB; // The block containing the PHIs + } + // CodeGeneration adds a lot of BBs without updating the RegionInfo // We make all created BBs belong to the scop's parent region without any // nested structure to keep the RegionInfo verifier happy. @@ -123,6 +186,7 @@ Annotator.buildAliasScopes(S); + createExitingBlockForExitNodePHIs(R); simplifyRegion(R, DT, LI, RI); assert(R->isSimple()); BasicBlock *EnteringBB = S.getRegion().getEnteringBlock(); Index: lib/CodeGen/IslExprBuilder.cpp =================================================================== --- lib/CodeGen/IslExprBuilder.cpp +++ lib/CodeGen/IslExprBuilder.cpp @@ -149,7 +149,7 @@ const SCEV *DimSCEV = SAI->getDimensionSize(u - 1); Value *DimSize = - expandCodeFor(S, SE, DL, "polly", DimSCEV, DimSCEV->getType(), + expandCodeFor(S.getRegion(), SE, DL, "polly", DimSCEV, DimSCEV->getType(), Builder.GetInsertPoint()); Type *Ty = getWidestType(DimSize->getType(), IndexOp->getType()); Index: lib/CodeGen/IslNodeBuilder.cpp =================================================================== --- lib/CodeGen/IslNodeBuilder.cpp +++ lib/CodeGen/IslNodeBuilder.cpp @@ -741,6 +741,6 @@ Value *IslNodeBuilder::generateSCEV(const SCEV *Expr) { Instruction *InsertLocation = --(Builder.GetInsertBlock()->end()); - return expandCodeFor(S, SE, DL, "polly", Expr, Expr->getType(), + return expandCodeFor(S.getRegion(), SE, DL, "polly", Expr, Expr->getType(), InsertLocation); } Index: lib/Support/SCEVValidator.cpp =================================================================== --- lib/Support/SCEVValidator.cpp +++ lib/Support/SCEVValidator.cpp @@ -483,6 +483,9 @@ bool visitUnknown(const SCEVUnknown *Expr) { Instruction *Inst = dyn_cast(Expr->getValue()); + if (Inst && isa(Inst) && Inst->getParent() == R->getExit()) + return true; + // Return true when Inst is defined inside the region R. if (Inst && R->contains(Inst)) return true; Index: lib/Support/ScopHelper.cpp =================================================================== --- lib/Support/ScopHelper.cpp +++ lib/Support/ScopHelper.cpp @@ -355,9 +355,143 @@ ///} }; -Value *polly::expandCodeFor(Scop &S, ScalarEvolution &SE, const DataLayout &DL, +Value *polly::expandCodeFor(Region &R, ScalarEvolution &SE, const DataLayout &DL, const char *Name, const SCEV *E, Type *Ty, Instruction *IP) { - ScopExpander Expander(S.getRegion(), SE, DL, Name); + ScopExpander Expander(R, SE, DL, Name); return Expander.expandCodeFor(E, Ty, IP); } + +BasicBlock *polly::splitBlockNonPredecessors(BasicBlock *BB, + ArrayRef NonPreds, + llvm::StringRef Suffix, + DominatorTree *DT, LoopInfo *LI, + ScalarEvolution *SE) { + assert(BB); + + // Do not attempt to split blocks that cannot be split. + if (!BB->canSplitPredecessors()) + return nullptr; + + // Currently we cannot handle landing pads. + if (BB->isLandingPad()) + return nullptr; + + // Before: + // + // NonPreds[0] // + // \ / // + // BB // + // / \ // + + // Create new basic block, insert right after the original block. + auto OldName = BB->getName(); + BasicBlock *NewBB = + splitBlock(BB, BB->getFirstInsertionPt(), DT, LI, nullptr); + NewBB->setName(OldName + Suffix); + + // Create new PHIs into the new block. + for (auto I = BB->begin(); isa(I); ++I) { + PHINode *OldPHI = cast(&*I); + PHINode *NewPHI = PHINode::Create(OldPHI->getType(), NonPreds.size() + 1, + OldPHI->getName() + ".np", + NewBB->getFirstInsertionPt()); + + // Replaces uses of the original PHI by the new ones. + OldPHI->replaceAllUsesWith(NewPHI); + + // Add an edge from the original block (this adds a use) + NewPHI->addIncoming(OldPHI, BB); + + // Move the incoming block's values. + for (BasicBlock *Edge : NonPreds) { + BasicBlock *RedirectedEdge = Edge == BB ? NewBB : Edge; + auto i = OldPHI->getBasicBlockIndex(RedirectedEdge); + assert(i >= 0); + Value *Val = OldPHI->getIncomingValue(i); + NewPHI->addIncoming(Val, RedirectedEdge); + OldPHI->removeIncomingValue(i, false); + + if (SE) + SE->forgetValue(OldPHI); + } + } + + // If there were no edges to move, there is nothing more to do. + if (NonPreds.empty()) + return NewBB; + + // Move all non-pred edges to the new BB. + for (BasicBlock *Edge : NonPreds) { + BasicBlock *RedirectedEdge = Edge == BB ? NewBB : Edge; + assert(!isa(RedirectedEdge->getTerminator()) && + "Cannot split an edge from an IndirectBrInst"); + RedirectedEdge->getTerminator()->replaceUsesOfWith(BB, NewBB); + } + + if (DT) { + // DominatorTree::splitBlock assumes that it is the predecessor which is + // new. In order to reuse that function, we recreate the situation as if it + // was. + auto BBNode = DT->getNode(BB); + auto NewBBNode = DT->getNode(NewBB); + DT->changeImmediateDominator(NewBBNode, BBNode->getIDom()); + DT->eraseNode(BB); + + DT->splitBlock(BB); + } + + if (LI) { + BasicBlock *OldBB = BB; + Loop *L = LI->getLoopFor(OldBB); + while (L) { + assert(L->contains(OldBB)); + + bool OldBBReachableFromInside = false; + for (BasicBlock *Pred : predecessors(OldBB)) { + if (Pred == NewBB || L->contains(Pred)) { + OldBBReachableFromInside = true; + break; + } + } + + bool NewBBReachableFromOutside = false; + for (auto Pred : make_range(pred_begin(NewBB), pred_end(NewBB))) { + if (Pred == OldBB) + continue; + + if (Pred != NewBB && !L->contains(Pred)) { + NewBBReachableFromOutside = true; + break; + } + } + + if (NewBBReachableFromOutside || !OldBBReachableFromInside) { + // Remove BB from the loop and make the NewBB the next loop header. + // Since NewBB dominates OldBB, it can be the new header. + if (L->getHeader() == OldBB) + L->moveToHeader(NewBB); + } + + if (!OldBBReachableFromInside) { + // OldBB cannot be reached from the loop header anymore, i.e. it has + // been excluded from the loop. + L->removeBlockFromLoop(BB); + LI->changeLoopFor(BB, L->getParentLoop()); + } + + L = L->getParentLoop(); + } + } + + // After: + // + // NonPreds[0] // + // \ / // + // BB / // + // \ / // + // NewBB // + // / \ // + + return NewBB; +} \ No newline at end of file Index: test/Isl/CodeGen/inner_scev_sdiv_2.ll =================================================================== --- test/Isl/CodeGen/inner_scev_sdiv_2.ll +++ test/Isl/CodeGen/inner_scev_sdiv_2.ll @@ -1,4 +1,4 @@ -; RUN: opt %loadPolly -S -polly-no-early-exit -polly-detect-unprofitable -polly-codegen < %s | FileCheck %s +; RUN: opt %loadPolly -S -polly-no-early-exit -polly-detect-unprofitable -polly-codegen < %s ; ; The SCEV expression in this test case refers to a sequence of sdiv ; instructions, which are part of different bbs in the SCoP. When code Index: test/Isl/CodeGen/loop_with_conditional_entry_edge_split_hard_case.ll =================================================================== --- test/Isl/CodeGen/loop_with_conditional_entry_edge_split_hard_case.ll +++ test/Isl/CodeGen/loop_with_conditional_entry_edge_split_hard_case.ll @@ -19,10 +19,10 @@ entry: br label %while.begin -; CHECK-LABEL: while.begin.region_exiting: -; CHECK: br label %polly.merge_new_and_old - ; CHECK-LABEL: while.begin: +; CHECK: br label %polly.merge_new_and_old + +; CHECK-LABEL: while.begin.exit: while.begin: ; CHECK: %call = call i32 @f() %call = call i32 @f() Index: test/Isl/CodeGen/phi_loop_carried_float.ll =================================================================== --- test/Isl/CodeGen/phi_loop_carried_float.ll +++ test/Isl/CodeGen/phi_loop_carried_float.ll @@ -13,7 +13,10 @@ ; CHECK-DAG: %tmp.0.phiops = alloca float ; CHECK-NOT: %tmp7{{[.*]}} = alloca float -; CHECK-LABEL: exit: +; CHECK-LABEL: polly.merge_new_and_old: +; CHECK-NEXT: br label %exit.exit + +; CHECK-LABEL: exit.exit: ; CHECK-NEXT: ret ; CHECK-LABEL: polly.start: Index: test/Isl/CodeGen/phi_loop_carried_float_escape.ll =================================================================== --- test/Isl/CodeGen/phi_loop_carried_float_escape.ll +++ test/Isl/CodeGen/phi_loop_carried_float_escape.ll @@ -8,8 +8,11 @@ ; } ; CHECK-LABEL: polly.merge_new_and_old: -; CHECK-NEXT: %tmp.0.merge = phi float [ %tmp.0.final_reload, %polly.merge ], [ %tmp.0, %bb8 ] -; CHECK-NEXT: br label %exit +; CHECK-NEXT: %tmp.0.merge = phi float [ %tmp.0.final_reload, %polly.merge ], [ %tmp.0, %exit ] +; CHECK-NEXT: br label %exit.exit + +; CHECK-LABEL: exit.exit: +; CHECK-NEXT: ret float %tmp.0.merge ; CHECK-LABEL: polly.start: ; CHECK-NEXT: store float 0.000000e+00, float* %tmp.0.phiops Index: test/Isl/CodeGen/phi_scalar_simple_1.ll =================================================================== --- test/Isl/CodeGen/phi_scalar_simple_1.ll +++ test/Isl/CodeGen/phi_scalar_simple_1.ll @@ -22,7 +22,7 @@ br label %for.cond ; CHECK-LABEL: polly.merge_new_and_old: -; CHECK: %x.addr.0.merge = phi i32 [ %x.addr.0.final_reload, %polly.merge ], [ %x.addr.0, %for.cond ] +; CHECK: %x.addr.0.merge = phi i32 [ %x.addr.0.final_reload, %polly.merge ], [ %x.addr.0, %for.end6 ] ; CHECK: ret i32 %x.addr.0.merge ; CHECK-LABEL: polly.start: Index: test/Isl/CodeGen/phi_scalar_simple_2.ll =================================================================== --- test/Isl/CodeGen/phi_scalar_simple_2.ll +++ test/Isl/CodeGen/phi_scalar_simple_2.ll @@ -24,7 +24,7 @@ br label %for.cond ; CHECK-LABEL: polly.merge_new_and_old: -; CHECK: %x.addr.0.merge = phi i32 [ %x.addr.0.final_reload, %polly.merge ], [ %x.addr.0, %for.cond ] +; CHECK: %x.addr.0.merge = phi i32 [ %x.addr.0.final_reload, %polly.merge ], [ %x.addr.0, %for.end7 ] ; CHECK: ret i32 %x.addr.0.merge ; CHECK-LABEL: polly.start: Index: test/ScopDetect/keep_going_expansion.ll =================================================================== --- test/ScopDetect/keep_going_expansion.ll +++ test/ScopDetect/keep_going_expansion.ll @@ -42,4 +42,4 @@ ret i32 %add } -; CHECK: Valid Region for Scop: for.body => for.cond2.preheader +; CHECK: Valid Region for Scop: for.body => for.body4 Index: test/ScopDetect/multidim_indirect_access.ll =================================================================== --- test/ScopDetect/multidim_indirect_access.ll +++ test/ScopDetect/multidim_indirect_access.ll @@ -3,16 +3,12 @@ ; The outer loop of this function will correctly not be recognized with the ; message: ; -; Non affine access function: (sext i32 %tmp to i64) +; SCEV of PHI node refers to SSA names in region ; ; The access A[x] might mistakenly be treated as a multidimensional access with ; dimension size x. This test will check that we correctly invalidate the ; region and do not detect an outer SCoP. ; -; FIXME: -; We should detect the inner region but the PHI node in the exit blocks -; prohibits that. -; ; void f(int *A, long N) { ; int j = 0; ; while (N > j) { @@ -25,6 +21,7 @@ ; } ; } ; +; CHECK: Valid Region for Scop: bb1 => bb0 ; CHECK-NOT: Valid Region for Scop: bb0 => bb13 ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/ScopDetect/non-affine-loop-condition-dependent-access_2.ll =================================================================== --- test/ScopDetect/non-affine-loop-condition-dependent-access_2.ll +++ test/ScopDetect/non-affine-loop-condition-dependent-access_2.ll @@ -7,9 +7,9 @@ ; innermost loop as a SCoP of depth 1, we have to reject the loop nest if not ; both, non-affine loops as well as non-affine accesses are allowed. ; -; REJECTNONAFFINELOOPS: Valid Region for Scop: bb15 => bb26 +; REJECTNONAFFINELOOPS: Valid Region for Scop: bb15 => bb13 ; REJECTNONAFFINELOOPS-NOT: Valid -; ALLOWNONAFFINELOOPS: Valid Region for Scop: bb15 => bb26 +; ALLOWNONAFFINELOOPS: Valid Region for Scop: bb15 => bb13 ; ALLOWNONAFFINELOOPS-NOT: Valid ; ALLOWNONAFFINELOOPSANDACCESSES: Valid Region for Scop: bb11 => bb29 ; Index: test/ScopDetect/non-affine-loop-condition-dependent-access_3.ll =================================================================== --- test/ScopDetect/non-affine-loop-condition-dependent-access_3.ll +++ test/ScopDetect/non-affine-loop-condition-dependent-access_3.ll @@ -7,9 +7,9 @@ ; innermost loop as a SCoP of depth 1, we have to reject the loop nest if not ; both, non-affine loops as well as non-affine accesses are allowed. ; -; REJECTNONAFFINELOOPS: Valid Region for Scop: bb15 => bb26 +; REJECTNONAFFINELOOPS: Valid Region for Scop: bb15 => bb13 ; REJECTNONAFFINELOOPS-NOT: Valid -; ALLOWNONAFFINELOOPS: Valid Region for Scop: bb15 => bb26 +; ALLOWNONAFFINELOOPS: Valid Region for Scop: bb15 => bb13 ; ALLOWNONAFFINELOOPS-NOT: Valid ; ALLOWNONAFFINELOOPSANDACCESSES: Valid Region for Scop: bb11 => bb29 ; Index: test/ScopDetect/phi_with_multi_exiting_edges.ll =================================================================== --- test/ScopDetect/phi_with_multi_exiting_edges.ll +++ test/ScopDetect/phi_with_multi_exiting_edges.ll @@ -1,7 +1,5 @@ ; RUN: opt %loadPolly -polly-detect-unprofitable -polly-detect -analyze -S < %s | FileCheck %s ; -; XFAIL: * -; ; Region with an exit node that has a PHI node multiple incoming edges from ; inside the region. Motivation for supporting such cases in Polly. ; Index: test/ScopDetect/phi_with_multi_exiting_edges_2.ll =================================================================== --- /dev/null +++ test/ScopDetect/phi_with_multi_exiting_edges_2.ll @@ -0,0 +1,35 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-detect -analyze -S < %s | FileCheck %s + +define float @foo(float* %A, i64 %param) { +entry: + br label %entry.split + +entry.split: + %branchcond = icmp slt i64 %param, 64 + br i1 %branchcond, label %loopA, label %loopB + +loopA: + %indvarA = phi i64 [0, %entry.split], [%indvar.nextA, %loopA] + %indvar.nextA = add i64 %indvarA, 1 + %valA = load float, float* %A + %sumA = fadd float %valA, %valA + store float %valA, float* %A + %cndA = icmp eq i64 %indvar.nextA, 100 + br i1 %cndA, label %next, label %loopA + +loopB: + %indvarB = phi i64 [0, %entry.split], [%indvar.nextB, %loopB] + %indvar.nextB = add i64 %indvarB, 1 + %valB = load float, float* %A + %sumB = fadd float %valB, %valB + store float %valB, float* %A + %cndB = icmp eq i64 %indvar.nextB, 100 + br i1 %cndB, label %next, label %loopB + +next: + %result = phi float [%sumA, %loopA], [%sumB, %loopB] + ret float %result + +} + +; CHECK: Valid Region for Scop: entry.split => next Index: test/ScopDetect/simple_non_single_entry.ll =================================================================== --- test/ScopDetect/simple_non_single_entry.ll +++ test/ScopDetect/simple_non_single_entry.ll @@ -65,4 +65,4 @@ ret void } -; CHECK: Valid Region for Scop: next => for.i.head1 +; CHECK: Valid Region for Scop: next => for.i Index: test/ScopInfo/NonAffine/non-affine-loop-condition-dependent-access_2.ll =================================================================== --- test/ScopInfo/NonAffine/non-affine-loop-condition-dependent-access_2.ll +++ test/ScopInfo/NonAffine/non-affine-loop-condition-dependent-access_2.ll @@ -9,7 +9,7 @@ ; access. ; ; INNERMOST: Function: f -; INNERMOST: Region: %bb15---%bb26 +; INNERMOST: Region: %bb15---%bb13 ; INNERMOST: Max Loop Depth: 1 ; INNERMOST: p0: {0,+,{0,+,-1}<%bb11>}<%bb13> ; INNERMOST: p1: {0,+,{0,+,1}<%bb11>}<%bb13> Index: test/ScopInfo/NonAffine/non-affine-loop-condition-dependent-access_3.ll =================================================================== --- test/ScopInfo/NonAffine/non-affine-loop-condition-dependent-access_3.ll +++ test/ScopInfo/NonAffine/non-affine-loop-condition-dependent-access_3.ll @@ -9,7 +9,7 @@ ; access. ; ; INNERMOST: Function: f -; INNERMOST: Region: %bb15---%bb26 +; INNERMOST: Region: %bb15---%bb13 ; INNERMOST: Max Loop Depth: 1 ; INNERMOST: Context: ; INNERMOST: [p_0, p_1, p_2] -> { : p_0 >= 0 and p_0 <= 2147483647 and p_1 >= 0 and p_1 <= 4096 and p_2 >= 0 and p_2 <= 4096 }