Index: include/polly/CodeGen/BlockGenerators.h =================================================================== --- include/polly/CodeGen/BlockGenerators.h +++ include/polly/CodeGen/BlockGenerators.h @@ -156,6 +156,7 @@ /// This will initialize and finalize the scalar variables we demoted during /// the code generation. /// + /// @see handleExitingPHIs(Scop &, ValueMapT &) /// @see createScalarInitialization(Scop &) /// @see createScalarFinalization(Region &) void finalizeSCoP(Scop &S); @@ -388,6 +389,19 @@ /// SCoP. void handleOutsideUsers(const Region &R, Instruction *Inst, Value *InstCopy); + /// @brief Handle PHIs in the exiting block. + /// + /// PHI nodes in the exiting block have been in the region exit block before + /// region simplification (a pre transformation run before code generation). + /// As the region exit is not part of the region they have not been modeled in + /// the SCoP, hence general code generation did not consider these PHI nodes. + /// However, they have to be "replicated" in the optimized version of the SCoP + /// and these results need to be merged with the original PHI nodes in order + /// to get the correct live out scalar values. + /// + /// @param S The current SCoP. + void handleExitingPHIs(Scop &S); + /// @brief Initialize the memory of demoted scalars. /// /// @param S The scop for which to generate the scalar initializers. @@ -409,7 +423,7 @@ /// we return the old value. If the value can still not be derived, this /// function will assert. /// - /// @param Stmt The statement to code generate. + /// @param S The parent SCoP. /// @param Old The old Value. /// @param BBMap A mapping from old values to their new values /// (for values recalculated within this basic block). @@ -424,7 +438,7 @@ /// @returns o The old value, if it is still valid. /// o The new value, if available. /// o NULL, if no value is found. - Value *getNewValue(ScopStmt &Stmt, const Value *Old, ValueMapT &BBMap, + Value *getNewValue(Scop &S, const Value *Old, ValueMapT &BBMap, LoopToScevMapT <S, Loop *L) const; void copyInstScalar(ScopStmt &Stmt, const Instruction *Inst, ValueMapT &BBMap, Index: include/polly/ScopDetection.h =================================================================== --- include/polly/ScopDetection.h +++ include/polly/ScopDetection.h @@ -223,13 +223,6 @@ /// @return True if all blocks in R are valid, false otherwise. bool allBlocksValid(DetectionContext &Context) const; - /// @brief Check the exit block of a region is valid. - /// - /// @param Context The context of scop detection. - /// - /// @return True if the exit of R is valid, false otherwise. - bool isValidExit(DetectionContext &Context) const; - /// @brief Check if a region is a Scop. /// /// @param Context The context of scop detection. Index: include/polly/Support/ScopHelper.h =================================================================== --- include/polly/Support/ScopHelper.h +++ include/polly/Support/ScopHelper.h @@ -14,6 +14,8 @@ #ifndef POLLY_SUPPORT_IRHELPER_H #define POLLY_SUPPORT_IRHELPER_H +#include "llvm/ADT/ArrayRef.h" + namespace llvm { class Type; class Instruction; @@ -99,5 +101,52 @@ const llvm::DataLayout &DL, const char *Name, const llvm::SCEV *E, llvm::Type *Ty, llvm::Instruction *IP); + +/// @brief Move a set of incoming edges to a new successor. +/// +/// Similar to llvm::SplitPredecessors, but instead of creating a new +/// predecessor, this creates a new successor. +/// Preserves LCSSA because all PHIs will exist in both BasicBlocks. A PHI node +/// is created in the successor for every PHI in the original block, even if +/// there is only one incoming edge. PHI nodes in the orginal block are never +/// removed, even if they have fewer than two incoming edges left. +/// We generally cannot update RegionInfo since it could invalidate regions, +/// depending on the list of edges. +/// +/// Before: After: +/// +/// NonPred[0] NonPred[0] +/// \ / \ / +/// /---->Entry /-->Entry / +/// | | | \ / +/// \--BlockInRegion | Entry.split +/// | | +/// \-------BlockInRegion +/// +/// If Entry is split, neither "Entry" nor "Entry.split" can be made the +/// region's entry node. "Entry" cannot because there is en edge from an +/// ouside-of-region block NonPred[0] to another region block, violating the +/// single entry requirement of a region. The new block "Entry.split" cannot be +/// the entry block because there is an exiting edge to "Entry", which would +/// then be an alternative to the region's other exit block. +/// There is a similar situation with the exit block conflicting with other +/// regions. +/// Hence, the caller of this function should make sure such cases do not happen +/// and update RegionInfo accordingly. +/// +/// @param BB Block to split. +/// @param NonPreds Set of incoming edges to move to the new successor. +/// @param Suffix Suffix to append to the name of the new block. +/// @param DT DominatorTree to update. +/// @param LI LoopInfo to update. +/// @param SE ScalarEvolution to update. +/// +/// @return The newly created successor of BB or null if the block could not be +/// split. +llvm::BasicBlock * +splitBlockNonPredecessors(llvm::BasicBlock *BB, + llvm::ArrayRef NonPreds, + llvm::StringRef Suffix, llvm::DominatorTree *DT, + llvm::LoopInfo *LI, llvm::ScalarEvolution *SE); } #endif Index: include/polly/TempScopInfo.h =================================================================== --- include/polly/TempScopInfo.h +++ include/polly/TempScopInfo.h @@ -269,10 +269,25 @@ /// /// @param R The SCoP region. /// @param BB A basic block in @p R. + /// @param OnlyPHIs Process only the PHI nodes of the block. Used + /// for the region's exit block. /// @param NonAffineSubRegion The non affine sub-region @p BB is in. - void buildAccessFunctions(Region &R, BasicBlock &BB, + void buildAccessFunctions(Region &R, BasicBlock &BB, bool OnlyPHIs, Region *NonAffineSubRegion = nullptr); + /// @brief Determine whether an instruction logically belongs to the Scop. + /// + /// In addition to the instruction in the scop's region, the PHI node in the + /// region's exit block is also considered to belong into the region. When the + /// region is simplified (single exiting node), the PHIs are necessarily moved + /// (or replicated) into the region. + /// + /// @param R The region representing the scop. + /// @param Inst The instruction to query. + /// + /// @return True iff the instruction belongs to the scop. + bool scopContains(llvm::Region &R, llvm::Instruction *Inst) const; + public: static char ID; explicit TempScopInfo() : RegionPass(ID), TempScopOfRegion(nullptr) {} Index: lib/Analysis/ScopDetection.cpp =================================================================== --- lib/Analysis/ScopDetection.cpp +++ lib/Analysis/ScopDetection.cpp @@ -815,7 +815,7 @@ // Only expand when we did not collect errors. // Check the exit first (cheap) - if (isValidExit(Context) && !Context.Log.hasErrors()) { + if (!Context.Log.hasErrors()) { // If the exit is valid check all blocks // - if true, a valid region was found => store it + keep expanding // - if false, .tbd. => stop (should this really end the loop?) @@ -957,18 +957,6 @@ return true; } -bool ScopDetection::isValidExit(DetectionContext &Context) const { - - // PHI nodes are not allowed in the exit basic block. - if (BasicBlock *Exit = Context.CurRegion.getExit()) { - BasicBlock::iterator I = Exit->begin(); - if (I != Exit->end() && isa(*I)) - return invalid(Context, /*Assert=*/true, I); - } - - return true; -} - bool ScopDetection::isValidRegion(DetectionContext &Context) const { Region &CurRegion = Context.CurRegion; @@ -1014,9 +1002,6 @@ if (!DetectUnprofitable && !hasMoreThanOneLoop(&CurRegion)) invalid(Context, /*Assert=*/true, &CurRegion); - if (!isValidExit(Context)) - return false; - if (!allBlocksValid(Context)) return false; Index: lib/Analysis/TempScopInfo.cpp =================================================================== --- lib/Analysis/TempScopInfo.cpp +++ lib/Analysis/TempScopInfo.cpp @@ -160,9 +160,19 @@ if (UI == 0) continue; + // Check whether or not the use is in the SCoP. + // Uses outside the SCoP are called "exposed" and it's value must be + // reloaded after the SCoP's execution to be used by the original user. + if (!scopContains(*R, UI)) { + AnyCrossStmtUse = true; + continue; + } + BasicBlock *UseParent = UI->getParent(); // Ignore the users in the same BB (statement) + // Use-def chains within the same statement do not need to be modelled as + // memory location. if (UseParent == ParentBB) continue; @@ -170,12 +180,6 @@ if (NonAffineSubRegion && NonAffineSubRegion->contains(UseParent)) continue; - // Check whether or not the use is in the SCoP. - if (!R->contains(UseParent)) { - AnyCrossStmtUse = true; - continue; - } - // If the instruction can be synthesized and the user is in the region // we do not need to add scalar dependences. if (canSynthesizeInst) @@ -206,7 +210,7 @@ continue; if (Instruction *OpInst = dyn_cast(Op)) - if (R->contains(OpInst)) + if (scopContains(*R, OpInst)) continue; if (isa(Op)) @@ -285,7 +289,7 @@ if (SD->isNonAffineSubRegion(&SR, &R)) { for (BasicBlock *BB : SR.blocks()) - buildAccessFunctions(R, *BB, &SR); + buildAccessFunctions(R, *BB, false, &SR); return; } @@ -293,10 +297,11 @@ if (I->isSubRegion()) buildAccessFunctions(R, *I->getNodeAs()); else - buildAccessFunctions(R, *I->getNodeAs()); + buildAccessFunctions(R, *I->getNodeAs(), false); } void TempScopInfo::buildAccessFunctions(Region &R, BasicBlock &BB, + bool OnlyPHIs, Region *NonAffineSubRegion) { AccFuncSetType Functions; Loop *L = LI->getLoopFor(&BB); @@ -306,6 +311,15 @@ for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) { Instruction *Inst = I; + + // The PHI nodes of the exit block are precessed as if they were in the + // scop's region. We need to create memory accesses for the PHI's arguments + // such that we can demote it to an alloca location. + // A block always starts with its PHI nodes and hence once we encouter a + // non-PHI, subsequent instruction can be skipped. + if (OnlyPHIs && !isa(Inst)) + break; + if (isa(Inst) || isa(Inst)) Functions.push_back( std::make_pair(buildIRAccess(Inst, L, &R, BoxedLoops), Inst)); @@ -334,11 +348,40 @@ Accs.insert(Accs.end(), Functions.begin(), Functions.end()); } +bool TempScopInfo::scopContains(Region &R, llvm::Instruction *Inst) const { + if (R.contains(Inst)) + return true; + + return (Inst->getParent() == R.getExit()) && isa(Inst); +} + TempScop *TempScopInfo::buildTempScop(Region &R) { TempScop *TScop = new TempScop(R, AccFuncMap); buildAccessFunctions(R, R); + // Simple instructions outside of the scop's region that depend on a scalar + // inside the scop (an escaping value) can be handled at code generation by + // just replacing the operand with the scop's value at exit time. (More + // exactly, a PHI node in the merge block will select between the orginal + // scalar and the generated one, depending on whether the original or + // generated version has been executed) + // + // If there is a PHI instruction in the scop's region this is not directly + // possible. The escaping value depends on which edge is taken from the + // region. This property is not preserved/modeled in SCoPs; the generated code + // always has just one exiting edge. + // We therefore model these exit node PHIs like other PHIs in there region: + // The PHI value is demoted to a memory location and the incoming blocks write + // the incoming values to that location such that the value can be loaded from + // there instead. The dependencies from these memory accesses ensure that + // either only one of the is stored, or at least stored on the correct order + // after code generation. + // Usually there is also a read access for the PHI, but ScopInfo does not + // create a statement for the exit block such they must be handled explicitely + // in the code generator. + buildAccessFunctions(R, *R.getExit(), true); + return TScop; } Index: lib/CodeGen/BlockGenerators.cpp =================================================================== --- lib/CodeGen/BlockGenerators.cpp +++ lib/CodeGen/BlockGenerators.cpp @@ -92,9 +92,8 @@ EntryBB(nullptr), PHIOpMap(PHIOpMap), ScalarMap(ScalarMap), EscapeMap(EscapeMap), GlobalMap(GlobalMap) {} -Value *BlockGenerator::getNewValue(ScopStmt &Stmt, const Value *Old, - ValueMapT &BBMap, LoopToScevMapT <S, - Loop *L) const { +Value *BlockGenerator::getNewValue(Scop &S, const Value *Old, ValueMapT &BBMap, + LoopToScevMapT <S, Loop *L) const { // We assume constants never change. // This avoids map lookups for many calls to this function. if (isa(Old)) @@ -120,7 +119,6 @@ VTV.insert(GlobalMap.begin(), GlobalMap.end()); NewScev = SCEVParameterRewriter::rewrite(NewScev, SE, VTV); - Scop &S = *Stmt.getParent(); const DataLayout &DL = S.getRegion().getEntry()->getParent()->getParent()->getDataLayout(); auto IP = Builder.GetInsertPoint(); @@ -141,7 +139,7 @@ // A scop-constant value defined by an instruction executed outside the scop. if (const Instruction *Inst = dyn_cast(Old)) - if (!Stmt.getParent()->getRegion().contains(Inst->getParent())) + if (!S.getRegion().contains(Inst->getParent())) return const_cast(Old); // The scalar dependence is neither available nor SCEVCodegenable. @@ -161,8 +159,8 @@ // Replace old operands with the new ones. for (Value *OldOperand : Inst->operands()) { - Value *NewOperand = - getNewValue(Stmt, OldOperand, BBMap, LTS, getLoopForInst(Inst)); + Value *NewOperand = getNewValue(*Stmt.getParent(), OldOperand, BBMap, LTS, + getLoopForInst(Inst)); if (!NewOperand) { assert(!isa(NewInst) && @@ -193,7 +191,8 @@ return ExprBuilder->create(AccessExpr); } - return getNewValue(Stmt, Pointer, BBMap, LTS, getLoopForInst(Inst)); + return getNewValue(*Stmt.getParent(), Pointer, BBMap, LTS, + getLoopForInst(Inst)); } Loop *BlockGenerator::getLoopForInst(const llvm::Instruction *Inst) { @@ -217,8 +216,8 @@ const Value *Pointer = Store->getPointerOperand(); Value *NewPointer = generateLocationAccessed(Stmt, Store, Pointer, BBMap, LTS, NewAccesses); - Value *ValueOperand = getNewValue(Stmt, Store->getValueOperand(), BBMap, LTS, - getLoopForInst(Store)); + Value *ValueOperand = getNewValue(*Stmt.getParent(), Store->getValueOperand(), + BBMap, LTS, getLoopForInst(Store)); Builder.CreateAlignedStore(ValueOperand, NewPointer, Store->getAlignment()); } @@ -238,7 +237,7 @@ Loop *L = getLoopForInst(Inst); if ((Stmt.isBlockStmt() || !Stmt.getRegion()->contains(L)) && canSynthesize(Inst, &LI, &SE, &Stmt.getParent()->getRegion())) { - Value *NewValue = getNewValue(Stmt, Inst, BBMap, LTS, L); + Value *NewValue = getNewValue(*Stmt.getParent(), Inst, BBMap, LTS, L); BBMap[Inst] = NewValue; return; } @@ -456,6 +455,56 @@ } } +void BlockGenerator::handleExitingPHIs(Scop &S) { + Region &R = S.getRegion(); + + // The exit block of the __unoptimized__ region. + BasicBlock *OrigExitingBB = R.getExitingBlock(); + // The merge block __just after__ the region and the optimized region. + BasicBlock *MergeBB = R.getExit(); + + // The exit block of the __optimized__ region. + BasicBlock *OptExitingBB = *(pred_begin(MergeBB)); + if (OptExitingBB == OrigExitingBB) + OptExitingBB = *(++pred_begin(MergeBB)); + + EntryBB = &MergeBB->getParent()->getEntryBlock(); + Builder.SetInsertPoint(OptExitingBB->getTerminator()); + // TODO: Refactor with same functionality to find the optimized exiting BB in + // createScalarFinalization. + + // Due to region simplification the exit node PHIs are now in the region's + // exiting node. CodeGeneration creates this new exiting node unconditionally + // such that there are only such PHIs in this node. + // Because there is no ScopStmt for this BB, we iterate manually over all + // instructions and process its instructions as they would if they were inside + // the region. + for (Instruction &Inst : *OrigExitingBB) { + if (Inst.isTerminator()) + break; + assert(isa(Inst)); + + // Get the PHI's value. + Value *Val; + if (canSynthesize(&Inst, &LI, &SE, &R)) { + LoopToScevMapT EmptyLoopMap; + ValueMapT EmptyValueMap; + Val = getNewValue(S, &Inst, EmptyValueMap, EmptyLoopMap, nullptr); + } else { + Value *Address = getOrCreatePHIAlloca(&Inst); + Val = Builder.CreateLoad(Address, Address->getName() + ".reload"); + } + + // Store the value as scalar (and update ScalarMap), as expected for + // escaping values. + Value *S2a = getOrCreateScalarAlloca(&Inst); + Builder.CreateStore(Val, S2a); + + // Update the set of escaping values. + handleOutsideUsers(R, &Inst, Val); + } +} + void BlockGenerator::createScalarInitialization(Scop &S) { Region &R = S.getRegion(); // The split block __just before__ the region and optimized region. @@ -552,8 +601,11 @@ } void BlockGenerator::finalizeSCoP(Scop &S) { + Region &R = S.getRegion(); + + handleExitingPHIs(S); createScalarInitialization(S); - createScalarFinalization(S.getRegion()); + createScalarFinalization(R); } VectorBlockGenerator::VectorBlockGenerator(BlockGenerator &BlockGen, @@ -576,7 +628,8 @@ for (int Lane = 0; Lane < Width; Lane++) Vector = Builder.CreateInsertElement( - Vector, getNewValue(Stmt, Old, ScalarMaps[Lane], VLTS[Lane], L), + Vector, + getNewValue(*Stmt.getParent(), Old, ScalarMaps[Lane], VLTS[Lane], L), Builder.getInt32(Lane)); VectorMap[Old] = Vector; @@ -1107,7 +1160,8 @@ ValueMapT &BBCopyMap = RegionMaps[BBCopy]; Value *Op = PHI->getIncomingValueForBlock(IncomingBB); - OpCopy = getNewValue(Stmt, Op, BBCopyMap, LTS, getLoopForInst(PHI)); + OpCopy = + getNewValue(*Stmt.getParent(), Op, BBCopyMap, LTS, getLoopForInst(PHI)); } else { if (PHICopy->getBasicBlockIndex(BBCopy) >= 0) Index: lib/CodeGen/CodeGeneration.cpp =================================================================== --- lib/CodeGen/CodeGeneration.cpp +++ lib/CodeGen/CodeGeneration.cpp @@ -93,6 +93,70 @@ return true; } + // Create a dedicated block for exit node PHIs. + // This special block is made the exiting block such that the PHIs become + // included into the region. The exit block itself keeps its identity and have + // a single predecessor. The dedicated PHI block is returned. + BasicBlock *createExitingBlockForExitNodePHIs(Region *R) { + BasicBlock *ExitBB = R->getExit(); + + SmallVector NonPreds; + for (BasicBlock *P : predecessors(ExitBB)) + if (!R->contains(P)) + NonPreds.push_back(P); + + // Preds[0] Preds[1] otherBB // + // \ | ________/ // + // \ | / // + // ExitBB // + BasicBlock *NewExitBlock = + splitBlockNonPredecessors(ExitBB, NonPreds, ".exit", DT, LI, SE); + // Preds[0] Preds[1] otherBB // + // \ / / // + // ExitBB / // + // \ / // + // NewExitBlock // + + // Set the region of the newly created block. + Region *RegionOfExit = RI->getRegionFor(ExitBB); + RI->setRegionFor(NewExitBlock, RegionOfExit); + + // If there was a region with ExitBB as entry block, change it to + // NewExitBlock. + while (RegionOfExit && !RegionOfExit->isTopLevelRegion() && + RegionOfExit->getEntry() == ExitBB) { + RegionOfExit->replaceEntry(NewExitBlock); + RegionOfExit = RegionOfExit->getParent(); + } + + // Change the exit node of R, but not its subregions because they can keep + // ExitBB as their exit block. + // This ensures that the region of ExitBB is always R itself. + R->replaceExit(NewExitBlock); + RI->setRegionFor(ExitBB, R); + + // Make NewExitBlock the new exit block of all other regions that previously + // had ExitBB as exit block. + for (BasicBlock *PotentialExiting : predecessors(NewExitBlock)) { + if (PotentialExiting == NewExitBlock) + continue; + + Region *OtherR = RI->getRegionFor(PotentialExiting); + while (OtherR && !OtherR->isTopLevelRegion() && + OtherR->getExit() == ExitBB) { + OtherR->replaceExit(NewExitBlock); + OtherR = OtherR->getParent(); + } + } + + assert(R->contains(ExitBB)); + assert(!R->contains(NewExitBlock)); + assert(R->getExit() == NewExitBlock); + assert(R->getExitingBlock() == ExitBB); + + return ExitBB; // The block containing the PHIs + } + // CodeGeneration adds a lot of BBs without updating the RegionInfo // We make all created BBs belong to the scop's parent region without any // nested structure to keep the RegionInfo verifier happy. @@ -123,6 +187,7 @@ Annotator.buildAliasScopes(S); + createExitingBlockForExitNodePHIs(R); simplifyRegion(R, DT, LI, RI); assert(R->isSimple()); BasicBlock *EnteringBB = S.getRegion().getEnteringBlock(); Index: lib/Support/SCEVValidator.cpp =================================================================== --- lib/Support/SCEVValidator.cpp +++ lib/Support/SCEVValidator.cpp @@ -483,6 +483,12 @@ bool visitUnknown(const SCEVUnknown *Expr) { Instruction *Inst = dyn_cast(Expr->getValue()); + // For non-simple regions, the value of a PHI depends on the control flow + // when exiting the region. Hence, we consider this a region dependency. + if (Inst && isa(Inst) && Inst->getParent() == R->getExit() && + !R->getExitingBlock()) + return true; + // Return true when Inst is defined inside the region R. if (Inst && R->contains(Inst)) return true; Index: lib/Support/ScopHelper.cpp =================================================================== --- lib/Support/ScopHelper.cpp +++ lib/Support/ScopHelper.cpp @@ -361,3 +361,135 @@ ScopExpander Expander(S.getRegion(), SE, DL, Name); return Expander.expandCodeFor(E, Ty, IP); } + +BasicBlock *polly::splitBlockNonPredecessors(BasicBlock *BB, + ArrayRef NonPreds, + llvm::StringRef Suffix, + DominatorTree *DT, LoopInfo *LI, + ScalarEvolution *SE) { + // Do not attempt to split blocks that cannot be split. + if (!BB->canSplitPredecessors()) + return nullptr; + + // Currently we cannot handle landing pads. + if (BB->isLandingPad()) + return nullptr; + + // Before: + // + // NonPreds[0] // + // \ / // + // BB // + // / \ // + + // Create new basic block, insert right after the original block. + auto OldName = BB->getName(); + BasicBlock *NewBB = + splitBlock(BB, BB->getFirstInsertionPt(), DT, LI, nullptr); + NewBB->setName(OldName + Suffix); + + // Create new PHIs into the new block. + for (auto I = BB->begin(); isa(I); ++I) { + PHINode *OldPHI = cast(&*I); + PHINode *NewPHI = PHINode::Create(OldPHI->getType(), NonPreds.size() + 1, + OldPHI->getName() + ".np", + NewBB->getFirstInsertionPt()); + + // Replaces uses of the original PHI by the new ones. + OldPHI->replaceAllUsesWith(NewPHI); + + // Add an edge from the original block (this adds a use) + NewPHI->addIncoming(OldPHI, BB); + + // Move the incoming block's values. + for (BasicBlock *Edge : NonPreds) { + BasicBlock *RedirectedEdge = Edge == BB ? NewBB : Edge; + auto i = OldPHI->getBasicBlockIndex(RedirectedEdge); + assert(i >= 0); + Value *Val = OldPHI->getIncomingValue(i); + NewPHI->addIncoming(Val, RedirectedEdge); + OldPHI->removeIncomingValue(i, false); + + if (SE) + SE->forgetValue(OldPHI); + } + } + + // If there were no edges to move, there is nothing more to do. + if (NonPreds.empty()) + return NewBB; + + // Move all non-pred edges to the new BB. + for (BasicBlock *Edge : NonPreds) { + BasicBlock *RedirectedEdge = Edge == BB ? NewBB : Edge; + assert(!isa(RedirectedEdge->getTerminator()) && + "Cannot split an edge from an IndirectBrInst"); + RedirectedEdge->getTerminator()->replaceUsesOfWith(BB, NewBB); + } + + if (DT) { + // DominatorTree::splitBlock assumes that it is the predecessor which is + // new. In order to reuse that function, we recreate the situation as if it + // was. + auto BBNode = DT->getNode(BB); + auto NewBBNode = DT->getNode(NewBB); + DT->changeImmediateDominator(NewBBNode, BBNode->getIDom()); + DT->eraseNode(BB); + + DT->splitBlock(BB); + } + + if (LI) { + BasicBlock *OldBB = BB; + Loop *L = LI->getLoopFor(OldBB); + while (L) { + assert(L->contains(OldBB)); + + bool OldBBReachableFromInside = false; + for (BasicBlock *Pred : predecessors(OldBB)) { + if (Pred == NewBB || L->contains(Pred)) { + OldBBReachableFromInside = true; + break; + } + } + + bool NewBBReachableFromOutside = false; + for (auto Pred : make_range(pred_begin(NewBB), pred_end(NewBB))) { + if (Pred == OldBB) + continue; + + if (Pred != NewBB && !L->contains(Pred)) { + NewBBReachableFromOutside = true; + break; + } + } + + if (NewBBReachableFromOutside || !OldBBReachableFromInside) { + // Remove BB from the loop and make the NewBB the next loop header. + // Since NewBB dominates OldBB, it can be the new header. + if (L->getHeader() == OldBB) + L->moveToHeader(NewBB); + } + + if (!OldBBReachableFromInside) { + // OldBB cannot be reached from the loop header anymore, i.e. it has + // been excluded from the loop. + L->removeBlockFromLoop(BB); + LI->changeLoopFor(BB, L->getParentLoop()); + } + + L = L->getParentLoop(); + } + } + + // After: + // + // NonPreds[0] // + // \ / // + // BB / // + // \ / // + // NewBB // + // / \ // + + return NewBB; +} \ No newline at end of file Index: test/Isl/CodeGen/inner_scev_sdiv_2.ll =================================================================== --- test/Isl/CodeGen/inner_scev_sdiv_2.ll +++ test/Isl/CodeGen/inner_scev_sdiv_2.ll @@ -1,4 +1,4 @@ -; RUN: opt %loadPolly -S -polly-no-early-exit -polly-detect-unprofitable -polly-codegen < %s | FileCheck %s +; RUN: opt %loadPolly -S -polly-no-early-exit -polly-detect-unprofitable -polly-codegen < %s ; ; The SCEV expression in this test case refers to a sequence of sdiv ; instructions, which are part of different bbs in the SCoP. When code Index: test/Isl/CodeGen/loop_with_conditional_entry_edge_split_hard_case.ll =================================================================== --- test/Isl/CodeGen/loop_with_conditional_entry_edge_split_hard_case.ll +++ test/Isl/CodeGen/loop_with_conditional_entry_edge_split_hard_case.ll @@ -19,10 +19,10 @@ entry: br label %while.begin -; CHECK-LABEL: while.begin.region_exiting: -; CHECK: br label %polly.merge_new_and_old - ; CHECK-LABEL: while.begin: +; CHECK: br label %polly.merge_new_and_old + +; CHECK-LABEL: while.begin.exit: while.begin: ; CHECK: %call = call i32 @f() %call = call i32 @f() Index: test/Isl/CodeGen/phi_loop_carried_float.ll =================================================================== --- test/Isl/CodeGen/phi_loop_carried_float.ll +++ test/Isl/CodeGen/phi_loop_carried_float.ll @@ -13,7 +13,10 @@ ; CHECK-DAG: %tmp.0.phiops = alloca float ; CHECK-NOT: %tmp7{{[.*]}} = alloca float -; CHECK-LABEL: exit: +; CHECK-LABEL: polly.merge_new_and_old: +; CHECK-NEXT: br label %exit.exit + +; CHECK-LABEL: exit.exit: ; CHECK-NEXT: ret ; CHECK-LABEL: polly.start: Index: test/Isl/CodeGen/phi_loop_carried_float_escape.ll =================================================================== --- test/Isl/CodeGen/phi_loop_carried_float_escape.ll +++ test/Isl/CodeGen/phi_loop_carried_float_escape.ll @@ -12,8 +12,11 @@ ; } ; CHECK-LABEL: polly.merge_new_and_old: -; CHECK-NEXT: %tmp.0.merge = phi float [ %tmp.0.final_reload, %polly.merge ], [ %tmp.0, %bb8 ] -; CHECK-NEXT: br label %exit +; CHECK-NEXT: %tmp.0.merge = phi float [ %tmp.0.final_reload, %polly.merge ], [ %tmp.0, %exit ] +; CHECK-NEXT: br label %exit.exit + +; CHECK-LABEL: exit.exit: +; CHECK-NEXT: ret float %tmp.0.merge ; CHECK-LABEL: polly.start: ; CHECK-NEXT: store float 0.000000e+00, float* %tmp.0.phiops Index: test/Isl/CodeGen/phi_scalar_simple_1.ll =================================================================== --- test/Isl/CodeGen/phi_scalar_simple_1.ll +++ test/Isl/CodeGen/phi_scalar_simple_1.ll @@ -22,7 +22,7 @@ br label %for.cond ; CHECK-LABEL: polly.merge_new_and_old: -; CHECK: %x.addr.0.merge = phi i32 [ %x.addr.0.final_reload, %polly.merge ], [ %x.addr.0, %for.cond ] +; CHECK: %x.addr.0.merge = phi i32 [ %x.addr.0.final_reload, %polly.merge ], [ %x.addr.0, %for.end6 ] ; CHECK: ret i32 %x.addr.0.merge ; CHECK-LABEL: polly.start: Index: test/Isl/CodeGen/phi_scalar_simple_2.ll =================================================================== --- test/Isl/CodeGen/phi_scalar_simple_2.ll +++ test/Isl/CodeGen/phi_scalar_simple_2.ll @@ -24,7 +24,7 @@ br label %for.cond ; CHECK-LABEL: polly.merge_new_and_old: -; CHECK: %x.addr.0.merge = phi i32 [ %x.addr.0.final_reload, %polly.merge ], [ %x.addr.0, %for.cond ] +; CHECK: %x.addr.0.merge = phi i32 [ %x.addr.0.final_reload, %polly.merge ], [ %x.addr.0, %for.end7 ] ; CHECK: ret i32 %x.addr.0.merge ; CHECK-LABEL: polly.start: Index: test/ScopDetect/keep_going_expansion.ll =================================================================== --- test/ScopDetect/keep_going_expansion.ll +++ test/ScopDetect/keep_going_expansion.ll @@ -42,4 +42,4 @@ ret i32 %add } -; CHECK: Valid Region for Scop: for.body => for.cond2.preheader +; CHECK: Valid Region for Scop: for.body => for.body4 Index: test/ScopDetect/multidim_indirect_access.ll =================================================================== --- test/ScopDetect/multidim_indirect_access.ll +++ test/ScopDetect/multidim_indirect_access.ll @@ -3,16 +3,12 @@ ; The outer loop of this function will correctly not be recognized with the ; message: ; -; Non affine access function: (sext i32 %tmp to i64) +; SCEV of PHI node refers to SSA names in region ; ; The access A[x] might mistakenly be treated as a multidimensional access with ; dimension size x. This test will check that we correctly invalidate the ; region and do not detect an outer SCoP. ; -; FIXME: -; We should detect the inner region but the PHI node in the exit blocks -; prohibits that. -; ; void f(int *A, long N) { ; int j = 0; ; while (N > j) { @@ -25,6 +21,7 @@ ; } ; } ; +; CHECK: Valid Region for Scop: bb1 => bb0 ; CHECK-NOT: Valid Region for Scop: bb0 => bb13 ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/ScopDetect/non-affine-loop-condition-dependent-access_2.ll =================================================================== --- test/ScopDetect/non-affine-loop-condition-dependent-access_2.ll +++ test/ScopDetect/non-affine-loop-condition-dependent-access_2.ll @@ -14,9 +14,9 @@ ; innermost loop as a SCoP of depth 1, we have to reject the loop nest if not ; both, non-affine loops as well as non-affine accesses are allowed. ; -; REJECTNONAFFINELOOPS: Valid Region for Scop: bb15 => bb26 +; REJECTNONAFFINELOOPS: Valid Region for Scop: bb15 => bb13 ; REJECTNONAFFINELOOPS-NOT: Valid -; ALLOWNONAFFINELOOPS: Valid Region for Scop: bb15 => bb26 +; ALLOWNONAFFINELOOPS: Valid Region for Scop: bb15 => bb13 ; ALLOWNONAFFINELOOPS-NOT: Valid ; ALLOWNONAFFINELOOPSANDACCESSES: Valid Region for Scop: bb11 => bb29 ; Index: test/ScopDetect/non-affine-loop-condition-dependent-access_3.ll =================================================================== --- test/ScopDetect/non-affine-loop-condition-dependent-access_3.ll +++ test/ScopDetect/non-affine-loop-condition-dependent-access_3.ll @@ -14,9 +14,9 @@ ; innermost loop as a SCoP of depth 1, we have to reject the loop nest if not ; both, non-affine loops as well as non-affine accesses are allowed. ; -; REJECTNONAFFINELOOPS: Valid Region for Scop: bb15 => bb26 +; REJECTNONAFFINELOOPS: Valid Region for Scop: bb15 => bb13 ; REJECTNONAFFINELOOPS-NOT: Valid -; ALLOWNONAFFINELOOPS: Valid Region for Scop: bb15 => bb26 +; ALLOWNONAFFINELOOPS: Valid Region for Scop: bb15 => bb13 ; ALLOWNONAFFINELOOPS-NOT: Valid ; ALLOWNONAFFINELOOPSANDACCESSES: Valid Region for Scop: bb11 => bb29 ; Index: test/ScopDetect/phi_with_multi_exiting_edges.ll =================================================================== --- test/ScopDetect/phi_with_multi_exiting_edges.ll +++ test/ScopDetect/phi_with_multi_exiting_edges.ll @@ -1,7 +1,5 @@ ; RUN: opt %loadPolly -polly-detect-unprofitable -polly-detect -analyze -S < %s | FileCheck %s ; -; XFAIL: * -; ; Region with an exit node that has a PHI node multiple incoming edges from ; inside the region. Motivation for supporting such cases in Polly. ; Index: test/ScopDetect/phi_with_multi_exiting_edges_2.ll =================================================================== --- /dev/null +++ test/ScopDetect/phi_with_multi_exiting_edges_2.ll @@ -0,0 +1,35 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-detect -analyze -S < %s | FileCheck %s + +define float @foo(float* %A, i64 %param) { +entry: + br label %entry.split + +entry.split: + %branchcond = icmp slt i64 %param, 64 + br i1 %branchcond, label %loopA, label %loopB + +loopA: + %indvarA = phi i64 [0, %entry.split], [%indvar.nextA, %loopA] + %indvar.nextA = add i64 %indvarA, 1 + %valA = load float, float* %A + %sumA = fadd float %valA, %valA + store float %valA, float* %A + %cndA = icmp eq i64 %indvar.nextA, 100 + br i1 %cndA, label %next, label %loopA + +loopB: + %indvarB = phi i64 [0, %entry.split], [%indvar.nextB, %loopB] + %indvar.nextB = add i64 %indvarB, 1 + %valB = load float, float* %A + %sumB = fadd float %valB, %valB + store float %valB, float* %A + %cndB = icmp eq i64 %indvar.nextB, 100 + br i1 %cndB, label %next, label %loopB + +next: + %result = phi float [%sumA, %loopA], [%sumB, %loopB] + ret float %result + +} + +; CHECK: Valid Region for Scop: entry.split => next Index: test/ScopDetect/simple_non_single_entry.ll =================================================================== --- test/ScopDetect/simple_non_single_entry.ll +++ test/ScopDetect/simple_non_single_entry.ll @@ -65,4 +65,4 @@ ret void } -; CHECK: Valid Region for Scop: next => for.i.head1 +; CHECK: Valid Region for Scop: next => for.i Index: test/ScopInfo/NonAffine/non-affine-loop-condition-dependent-access_2.ll =================================================================== --- test/ScopInfo/NonAffine/non-affine-loop-condition-dependent-access_2.ll +++ test/ScopInfo/NonAffine/non-affine-loop-condition-dependent-access_2.ll @@ -16,7 +16,7 @@ ; access. ; ; INNERMOST: Function: f -; INNERMOST: Region: %bb15---%bb26 +; INNERMOST: Region: %bb15---%bb13 ; INNERMOST: Max Loop Depth: 1 ; INNERMOST: p0: {0,+,{0,+,1}<%bb11>}<%bb13> ; INNERMOST: p1: {0,+,{0,+,-1}<%bb11>}<%bb13> Index: test/ScopInfo/NonAffine/non-affine-loop-condition-dependent-access_3.ll =================================================================== --- test/ScopInfo/NonAffine/non-affine-loop-condition-dependent-access_3.ll +++ test/ScopInfo/NonAffine/non-affine-loop-condition-dependent-access_3.ll @@ -15,7 +15,7 @@ ; access. ; ; INNERMOST: Function: f -; INNERMOST: Region: %bb15---%bb26 +; INNERMOST: Region: %bb15---%bb13 ; INNERMOST: Max Loop Depth: 1 ; INNERMOST: Context: ; INNERMOST: [p_0, p_1, p_2] -> { : p_0 >= 0 and p_0 <= 2147483647 and p_1 >= 0 and p_1 <= 4096 and p_2 >= 0 and p_2 <= 4096 }