Index: include/polly/CodeGen/BlockGenerators.h =================================================================== --- include/polly/CodeGen/BlockGenerators.h +++ include/polly/CodeGen/BlockGenerators.h @@ -340,6 +340,15 @@ /// @param InstCopy The copy of the instruction @p Inst in the optimized SCoP. void handleOutsideUsers(const Region &R, Instruction *Inst, Value *InstCopy); + /// @brief Handle PHIs in the exiting block. + /// + /// Such PHIs can be created by region simplification. They have not been + /// considered byTempScopInfo/ScopInfo and in consequence not by any ScopStmt + /// materialized by BlockGenerator. + /// + /// @param R The current SCoP region. + void handleExitingPHIs(Region &R, ValueMapT &GlobalMap); + /// @brief Initialize the memory of demoted scalars. /// /// If a PHI node was demoted and one of its predecessor blocks was outside @@ -364,7 +373,7 @@ /// we return the old value. If the value can still not be derived, this /// function will assert. /// - /// @param Stmt The statement to code generate. + /// @param Region The Region of the SCoP. /// @param Old The old Value. /// @param BBMap A mapping from old values to their new values /// (for values recalculated within this basic block). @@ -382,7 +391,7 @@ /// @returns o The old value, if it is still valid. /// o The new value, if available. /// o NULL, if no value is found. - Value *getNewValue(ScopStmt &Stmt, const Value *Old, ValueMapT &BBMap, + Value *getNewValue(Region &R, const Value *Old, ValueMapT &BBMap, ValueMapT &GlobalMap, LoopToScevMapT <S, Loop *L) const; void copyInstScalar(ScopStmt &Stmt, const Instruction *Inst, ValueMapT &BBMap, Index: include/polly/CodeGen/IslNodeBuilder.h =================================================================== --- include/polly/CodeGen/IslNodeBuilder.h +++ include/polly/CodeGen/IslNodeBuilder.h @@ -92,6 +92,21 @@ // ivs. IslExprBuilder::IDToValueTy IDToValue; + /// @brief Move SDiv and SRem instructions to before the region. + /// + /// SCEVValidator accepts SDiv and SRem instruction for which no SCEV + /// representation exists. Correspondingly, SCEVExpander does not generate + /// code for it and instead directly uses the result of the SDiv/SRem + /// instructions. Those might be defined only in the region and hence + /// referencing them in the prologue setting up parameters yields invalid + /// code. + /// We avoid this by moving all SDivs and SRems to before the prologue so + /// referencing them for parameters is valid. + /// + /// @param ValueReplacer Receives the associations of values defined inside + /// the SCoP to equivalent ones available at the entry of the optimized SCoP. + void moveInvariantDivRem(ValueToValueMap &ValueReplacer); + /// Generate code for a given SCEV* /// /// This function generates code for a given SCEV expression. It generated Index: include/polly/CodeGen/Utils.h =================================================================== --- include/polly/CodeGen/Utils.h +++ include/polly/CodeGen/Utils.h @@ -17,12 +17,23 @@ class Pass; class Value; class BasicBlock; +class DominatorTree; +class LoopInfo; +class RegionInfo; } namespace polly { class Scop; +/// @brief Alternative to llvm::SplitCriticalEdge. +/// +/// Creates a new block which branches to Succ. The edge to split is redirected to the new block. +// +/// The issue with llvm::SplitCriticalEdge is that it does nothing if the edge is not critical. +/// The issue with llvm::SplitEdge is that it does not always create the middle block, but reuses Prev/Succ if it can. We always want a new middle block. +llvm::BasicBlock *splitEdge(llvm::BasicBlock *Prev, llvm::BasicBlock *Succ, const char *Suffix, llvm::DominatorTree *DT, llvm::LoopInfo *LI, llvm::RegionInfo *RI); + /// @brief Execute a Scop conditionally wrt @p RTC. /// /// In the CFG the optimized code of the Scop is generated next to the Index: include/polly/ScopDetection.h =================================================================== --- include/polly/ScopDetection.h +++ include/polly/ScopDetection.h @@ -223,13 +223,6 @@ /// @return True if all blocks in R are valid, false otherwise. bool allBlocksValid(DetectionContext &Context) const; - /// @brief Check the exit block of a region is valid. - /// - /// @param Context The context of scop detection. - /// - /// @return True if the exit of R is valid, false otherwise. - bool isValidExit(DetectionContext &Context) const; - /// @brief Check if a region is a Scop. /// /// @param Context The context of scop detection. Index: include/polly/Support/ScopHelper.h =================================================================== --- include/polly/Support/ScopHelper.h +++ include/polly/Support/ScopHelper.h @@ -14,6 +14,8 @@ #ifndef POLLY_SUPPORT_IRHELPER_H #define POLLY_SUPPORT_IRHELPER_H +#include "llvm/ADT/ArrayRef.h" + namespace llvm { class Type; class Instruction; @@ -87,9 +89,56 @@ /// /// The parameters are the same as for the creation of a SCEVExpander as well /// as the call to SCEVExpander::expandCodeFor. -llvm::Value *expandCodeFor(Scop &S, llvm::ScalarEvolution &SE, +llvm::Value *expandCodeFor(llvm::Region &R, llvm::ScalarEvolution &SE, const llvm::DataLayout &DL, const char *Name, const llvm::SCEV *E, llvm::Type *Ty, llvm::Instruction *IP); + +/// @brief Move a set of incoming edges to a new successor. +/// +/// Similar to llvm::SplitPredecessors, but instead of creating a new +/// predecessor, this creates a new successor. +/// Preserves LCSSA because all PHIs will exist in both BasicBlocks. A PHI node +/// is created in the successor for every PHI in the original block, even if +/// there is only one incoming edge. PHI nodes in the orginal block are never +/// removed, even if they have fewer than two incoming edges left. +/// We generally cannot update RegionInfo since it could invalidate regions, +/// depending on the list of edges. +/// +/// Before: After: +/// +/// NonPred[0] NonPred[0] +/// \ / \ / +/// /---->Entry /-->Entry / +/// | | | \ / +/// \--BlockInRegion | Entry.split +/// | | +/// \-------BlockInRegion +/// +/// If Entry is split. neither "Entry" nor "Entry.split" can be made the +/// region's entry node. "Entry" cannot because there is en edge from an +/// ouside-of-region block NonPred[0] to another region block, violating the +/// single entry requirement of a region. The new block "Entry.split" cannot be +/// the entry block because there is an exiting edge to "Entry", which would +/// then be an alternative to the region's other exit block. +/// There is a similar situation with the exit block conflicting with other +/// regions. +/// Hence, the caller of this function should make sure such cases do not happen +/// and update RegionInfo accordingly. +/// +/// @param BB Block to split. +/// @param NonPreds Set of incoming edges to move to the new successor. +/// @param Suffix Suffix to append to the name of the new block. +/// @param DT DominatorTree to update. +/// @param LI LoopInfo to update. +/// @param SE ScalarEvolution to update. +/// +/// @return The newly created successor of BB or null if the block could not be +/// split. +llvm::BasicBlock * +splitBlockNonPredecessors(llvm::BasicBlock *BB, + llvm::ArrayRef NonPreds, + llvm::StringRef Suffix, llvm::DominatorTree *DT, + llvm::LoopInfo *LI, llvm::ScalarEvolution *SE); } #endif Index: include/polly/TempScopInfo.h =================================================================== --- include/polly/TempScopInfo.h +++ include/polly/TempScopInfo.h @@ -306,10 +306,19 @@ /// /// @param R The SCoP region. /// @param BB A basic block in @p R. + /// @param OnlyPHIs Process only the PHI nodes of the block. Used for the region's exit block. /// @param NonAffineSubRegion The non affine sub-region @p BB is in. - void buildAccessFunctions(Region &R, BasicBlock &BB, + void buildAccessFunctions(Region &R, BasicBlock &BB, bool OnlyPHIs, Region *NonAffineSubRegion = nullptr); + /// @brief Determine whether an instruction belongs to the Scop. + /// + /// @param R The region representing the scop. + /// @param Inst The instruction to query. + /// + /// @return True iff the instruction belongs to the scop. + bool scopContains(llvm::Region *R, llvm::Instruction *Inst) const; + public: static char ID; explicit TempScopInfo() : RegionPass(ID), TempScopOfRegion(nullptr) {} Index: lib/Analysis/ScopDetection.cpp =================================================================== --- lib/Analysis/ScopDetection.cpp +++ lib/Analysis/ScopDetection.cpp @@ -761,7 +761,7 @@ // Only expand when we did not collect errors. // Check the exit first (cheap) - if (isValidExit(Context) && !Context.Log.hasErrors()) { + if (!Context.Log.hasErrors()) { // If the exit is valid check all blocks // - if true, a valid region was found => store it + keep expanding // - if false, .tbd. => stop (should this really end the loop?) @@ -903,18 +903,6 @@ return true; } -bool ScopDetection::isValidExit(DetectionContext &Context) const { - - // PHI nodes are not allowed in the exit basic block. - if (BasicBlock *Exit = Context.CurRegion.getExit()) { - BasicBlock::iterator I = Exit->begin(); - if (I != Exit->end() && isa(*I)) - return invalid(Context, /*Assert=*/true, I); - } - - return true; -} - bool ScopDetection::isValidRegion(DetectionContext &Context) const { Region &CurRegion = Context.CurRegion; @@ -957,9 +945,6 @@ &(CurRegion.getEntry()->getParent()->getEntryBlock())) return invalid(Context, /*Assert=*/true, CurRegion.getEntry()); - if (!isValidExit(Context)) - return false; - if (!allBlocksValid(Context)) return false; Index: lib/Analysis/TempScopInfo.cpp =================================================================== --- lib/Analysis/TempScopInfo.cpp +++ lib/Analysis/TempScopInfo.cpp @@ -179,7 +179,11 @@ BasicBlock *UseParent = UI->getParent(); // Ignore the users in the same BB (statement) - if (UseParent == ParentBB) + // There will be a split in the Exit BB after the last PHI instruction which + // we have to consider. PHI nodes in the exit block are in a different block + // than non-PHI instructions. + if ((UseParent == ParentBB) && + !(UseParent == R->getExit() && isa(Inst) != isa(UI))) continue; // Do not build scalar dependences inside a non-affine subregion. @@ -187,7 +191,7 @@ continue; // Check whether or not the use is in the SCoP. - if (!R->contains(UseParent)) { + if (!scopContains(R, UI)) { AnyCrossStmtUse = true; continue; } @@ -222,7 +226,7 @@ continue; if (Instruction *OpInst = dyn_cast(Op)) - if (R->contains(OpInst)) + if (scopContains(R, OpInst)) continue; IRAccess ScalarAccess(IRAccess::READ, Op, ZeroOffset, 1, true, Op); @@ -298,7 +302,7 @@ if (SD->isNonAffineSubRegion(&SR, &R)) { for (BasicBlock *BB : SR.blocks()) - buildAccessFunctions(R, *BB, &SR); + buildAccessFunctions(R, *BB, false, &SR); return; } @@ -306,10 +310,10 @@ if (I->isSubRegion()) buildAccessFunctions(R, *I->getNodeAs()); else - buildAccessFunctions(R, *I->getNodeAs()); + buildAccessFunctions(R, *I->getNodeAs(), false); } -void TempScopInfo::buildAccessFunctions(Region &R, BasicBlock &BB, +void TempScopInfo::buildAccessFunctions(Region &R, BasicBlock &BB, bool OnlyPHIs, Region *NonAffineSubRegion) { AccFuncSetType Functions; Loop *L = LI->getLoopFor(&BB); @@ -319,6 +323,13 @@ for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) { Instruction *Inst = I; + + // The PHI nodes of the exit block are later moved into the region so handle + // them like any other PHI. All following instructions do not belong to the + // region and are skipped. + if (OnlyPHIs && !isa(Inst)) + break; + if (isa(Inst) || isa(Inst)) Functions.push_back( std::make_pair(buildIRAccess(Inst, L, &R, BoxedLoops), Inst)); @@ -343,6 +354,18 @@ Accs.insert(Accs.end(), Functions.begin(), Functions.end()); } +bool TempScopInfo::scopContains(Region *R, llvm::Instruction *Inst) const { + if (R->contains(Inst)) + return true; + + // The region will be extended later to also contain the PHI nodes in the exit + // block. + if (Inst->getParent() == R->getExit() && isa(Inst)) + return true; + + return false; +} + Comparison TempScopInfo::buildAffineCondition(Value &V, bool inverted) { if (ConstantInt *C = dyn_cast(&V)) { // If this is always true condition, we will create 0 <= 1, @@ -458,6 +481,7 @@ TempScop *TScop = new TempScop(R, BBConds, AccFuncMap); buildAccessFunctions(R, R); + buildAccessFunctions(R, *R.getExit(), true); for (const auto &BB : R.blocks()) buildCondition(BB, R); Index: lib/CodeGen/BlockGenerators.cpp =================================================================== --- lib/CodeGen/BlockGenerators.cpp +++ lib/CodeGen/BlockGenerators.cpp @@ -17,6 +17,7 @@ #include "polly/CodeGen/BlockGenerators.h" #include "polly/CodeGen/CodeGeneration.h" #include "polly/CodeGen/IslExprBuilder.h" +#include "polly/CodeGen/Utils.h" #include "polly/Options.h" #include "polly/Support/GICHelper.h" #include "polly/Support/SCEVValidator.h" @@ -88,7 +89,7 @@ EntryBB(nullptr), PHIOpMap(PHIOpMap), ScalarMap(ScalarMap), EscapeMap(EscapeMap) {} -Value *BlockGenerator::getNewValue(ScopStmt &Stmt, const Value *Old, +Value *BlockGenerator::getNewValue(Region &R, const Value *Old, ValueMapT &BBMap, ValueMapT &GlobalMap, LoopToScevMapT <S, Loop *L) const { // We assume constants never change. @@ -116,15 +117,14 @@ VTV.insert(GlobalMap.begin(), GlobalMap.end()); NewScev = SCEVParameterRewriter::rewrite(NewScev, SE, VTV); - Scop &S = *Stmt.getParent(); const DataLayout &DL = - S.getRegion().getEntry()->getParent()->getParent()->getDataLayout(); + R.getEntry()->getParent()->getParent()->getDataLayout(); auto IP = Builder.GetInsertPoint(); assert(IP != Builder.GetInsertBlock()->end() && "Only instructions can be insert points for SCEVExpander"); Value *Expanded = - expandCodeFor(S, SE, DL, "polly", NewScev, Old->getType(), IP); + expandCodeFor(R, SE, DL, "polly", NewScev, Old->getType(), IP); BBMap[Old] = Expanded; return Expanded; @@ -137,7 +137,7 @@ // A scop-constant value defined by an instruction executed outside the scop. if (const Instruction *Inst = dyn_cast(Old)) - if (!Stmt.getParent()->getRegion().contains(Inst->getParent())) + if (!R.contains(Inst->getParent())) return const_cast(Old); // The scalar dependence is neither available nor SCEVCodegenable. @@ -158,7 +158,7 @@ // Replace old operands with the new ones. for (Value *OldOperand : Inst->operands()) { - Value *NewOperand = getNewValue(Stmt, OldOperand, BBMap, GlobalMap, LTS, + Value *NewOperand = getNewValue(Stmt.getParent()->getRegion(), OldOperand, BBMap, GlobalMap, LTS, getLoopForInst(Inst)); if (!NewOperand) { @@ -207,7 +207,7 @@ NewPointer = getNewAccessOperand(Stmt, MA); else NewPointer = - getNewValue(Stmt, Pointer, BBMap, GlobalMap, LTS, getLoopForInst(Inst)); + getNewValue(Stmt.getParent()->getRegion(), Pointer, BBMap, GlobalMap, LTS, getLoopForInst(Inst)); return NewPointer; } @@ -234,7 +234,7 @@ const Value *Pointer = Store->getPointerOperand(); Value *NewPointer = generateLocationAccessed(Stmt, Store, Pointer, BBMap, GlobalMap, LTS); - Value *ValueOperand = getNewValue(Stmt, Store->getValueOperand(), BBMap, + Value *ValueOperand = getNewValue(Stmt.getParent()->getRegion(), Store->getValueOperand(), BBMap, GlobalMap, LTS, getLoopForInst(Store)); Builder.CreateAlignedStore(ValueOperand, NewPointer, Store->getAlignment()); @@ -255,7 +255,7 @@ Loop *L = getLoopForInst(Inst); if ((Stmt.isBlockStmt() || !Stmt.getRegion()->contains(L)) && canSynthesize(Inst, &LI, &SE, &Stmt.getParent()->getRegion())) { - Value *NewValue = getNewValue(Stmt, Inst, BBMap, GlobalMap, LTS, L); + Value *NewValue = getNewValue(Stmt.getParent()->getRegion(), Inst, BBMap, GlobalMap, LTS, L); BBMap[Inst] = NewValue; return; } @@ -376,6 +376,14 @@ Value *InstCopy) { BasicBlock *EnteringBB = R.getEnteringBlock(); + // If there are escape users we get the alloca for this instruction and put + // it in the EscapeMap for later finalization. However, if the alloca was not + // created by an already handled scalar dependence we have to initialize it + // also. Lastly, if the instruction was copied multiple times we already did + // this and can exit. + if (EscapeMap.count(Inst)) + return; + EscapeUserVectorTy EscapeUsers; for (User *U : Inst->users()) { @@ -394,14 +402,6 @@ if (EscapeUsers.empty()) return; - // If there are escape users we get the alloca for this instruction and put - // it in the EscapeMap for later finalization. However, if the alloca was not - // created by an already handled scalar dependence we have to initialize it - // also. Lastly, if the instruction was copied multiple times we already did - // this and can exit. - if (EscapeMap.count(Inst)) - return; - // Get or create an escape alloca for this instruction. bool IsNew; AllocaInst *ScalarAddr = @@ -508,6 +508,46 @@ } } +void BlockGenerator::handleExitingPHIs(Region &R, ValueMapT &GlobalMap) { + // The exit block of the __unoptimized__ region. + BasicBlock *OrigExitingBB = R.getExitingBlock(); + // The merge block __just after__ the region and the optimized region. + BasicBlock *MergeBB = R.getExit(); + + // The exit block of the __optimized__ region. + BasicBlock *OptExitingBB = *(pred_begin(MergeBB)); + if (OptExitingBB == OrigExitingBB) + OptExitingBB = *(++pred_begin(MergeBB)); + + // Not strictly necessary, but makes exit value handling more explicit + //OptExitingBB = splitEdge(OptExitingBB, MergeBB, ".scop_exit", &DT, &LI, nullptr); + //OptExitingBB->setName("polly.scop_exit"); + + EntryBB = &MergeBB->getParent()->getEntryBlock(); + Builder.SetInsertPoint(OptExitingBB->getTerminator()); + + // That's what copyBB does to PHI instructions + for (Instruction &Inst : *OrigExitingBB) { + if (isa(Inst)) + break; + assert(isa(Inst)); + + Value *Val; + if (canSynthesize(&Inst, &LI, &SE, &R)) { + ValueMapT EmptyLocalMap; + LoopToScevMapT EmptyLoopMap; + Val = getNewValue(R, &Inst, EmptyLocalMap, GlobalMap, EmptyLoopMap, nullptr); + } else { + bool IsNew = true; + auto Address = getOrCreateAlloca(&Inst, PHIOpMap, ".phiops", &IsNew); + assert(!IsNew && "Some generated block must have written the value as it is undefined otherwise"); + Val = Builder.CreateLoad(Address, Address->getName() + ".reload"); + } + + handleOutsideUsers(R, &Inst, Val); //TODO: nullptr OK? + } +} + void BlockGenerator::createScalarInitialization(Region &R, ValueMapT &GlobalMap) { // The split block __just before__ the region and optimized region. @@ -588,8 +628,11 @@ } void BlockGenerator::finalizeSCoP(Scop &S, ValueMapT &GlobalMap) { - createScalarInitialization(S.getRegion(), GlobalMap); - createScalarFinalization(S.getRegion()); + Region &R = S.getRegion(); + + handleExitingPHIs(R,GlobalMap); + createScalarInitialization(R, GlobalMap); + createScalarFinalization(R); } VectorBlockGenerator::VectorBlockGenerator(BlockGenerator &BlockGen, @@ -615,7 +658,7 @@ for (int Lane = 0; Lane < Width; Lane++) Vector = Builder.CreateInsertElement( - Vector, getNewValue(Stmt, Old, ScalarMaps[Lane], GlobalMaps[Lane], + Vector, getNewValue(Stmt.getParent()->getRegion(), Old, ScalarMaps[Lane], GlobalMaps[Lane], VLTS[Lane], L), Builder.getInt32(Lane)); @@ -1126,6 +1169,9 @@ AllocaInst *ScalarAddr = nullptr; if (MA->getScopArrayInfo()->isPHI()) + //if (!R.contains(ScalarBasePHI)) + // continue; + ScalarAddr = getOrCreateAlloca(ScalarBase, PHIOpMap, ".phiops"); else ScalarAddr = getOrCreateAlloca(ScalarBase, ScalarMap, ".s2a"); @@ -1159,7 +1205,7 @@ Value *Op = PHI->getIncomingValueForBlock(IncomingBB); OpCopy = - getNewValue(Stmt, Op, BBCopyMap, GlobalMap, LTS, getLoopForInst(PHI)); + getNewValue(Stmt.getParent()->getRegion(), Op, BBCopyMap, GlobalMap, LTS, getLoopForInst(PHI)); } else { if (PHICopy->getBasicBlockIndex(BBCopy) >= 0) Index: lib/CodeGen/CodeGeneration.cpp =================================================================== --- lib/CodeGen/CodeGeneration.cpp +++ lib/CodeGen/CodeGeneration.cpp @@ -93,6 +93,74 @@ return true; } + // Create a dedicated block for exit node PHIs. + // This special block is made the exiting block so the PHIs become included + // into the region. + // The exit block itself keeps its identity. + BasicBlock *createExitingBlockForExitNodePHIs(Region *R) { + BasicBlock *ExitBB = R->getExit(); + + SmallVector NonPreds; + for (BasicBlock *P : predecessors(ExitBB)) + if (!R->contains(P)) + NonPreds.push_back(P); + + // Preds[0] Preds[1] otherBB // + // \ | ________/ // + // \ | / // + // ExitBB // + BasicBlock *NewExitBlock = + splitBlockNonPredecessors(ExitBB, NonPreds, ".exit", DT, LI, SE); + // Preds[0] Preds[1] otherBB // + // \ / / // + // ExitBB / // + // \ / // + // NewExitBlock // + + // Set the region of the newly created block. + Region *RegionOfExit = RI->getRegionFor(ExitBB); + RI->setRegionFor(NewExitBlock, RegionOfExit); + + // If there was a region with ExitBB as entry block, change it to + // NewExitBlock. + while (RegionOfExit && !RegionOfExit->isTopLevelRegion() && + RegionOfExit->getEntry() == ExitBB) { + RegionOfExit->replaceEntry(NewExitBlock); + RegionOfExit = RegionOfExit->getParent(); + } + + // Change the exit node of R, but not its subregions because they can keep + // ExitBB as their exit block. + // This ensures that the region of ExitBB is always R itself. + R->replaceExit(NewExitBlock); + RI->setRegionFor(ExitBB, R); + + // Make NewExitBlock the new exit block of all other regions that previously + // had ExitBB as exit block. + for (BasicBlock *PotentialExiting : predecessors(NewExitBlock)) { + if (PotentialExiting == NewExitBlock) + continue; + + Region *OtherR = RI->getRegionFor(PotentialExiting); + while (OtherR && !OtherR->isTopLevelRegion() && + OtherR->getExit() == ExitBB) { + OtherR->replaceExit(NewExitBlock); + OtherR = OtherR->getParent(); + } + } + + assert(R->contains(ExitBB)); + assert(!R->contains(NewExitBlock)); + assert(R->getExit() == NewExitBlock); + assert(R->getExitingBlock() == ExitBB); + +#ifndef NDEBUG + RI->verifyAnalysis(); +#endif + + return ExitBB; // The block containing the PHIs + } + // CodeGeneration adds a lot of BBs without updating the RegionInfo // We make all created BBs belong to the scop's parent region without any // nested structure to keep the RegionInfo verifier happy. @@ -123,6 +191,7 @@ Annotator.buildAliasScopes(S); + createExitingBlockForExitNodePHIs(R); simplifyRegion(R, DT, LI, RI); assert(R->isSimple()); BasicBlock *EnteringBB = S.getRegion().getEnteringBlock(); Index: lib/CodeGen/IslExprBuilder.cpp =================================================================== --- lib/CodeGen/IslExprBuilder.cpp +++ lib/CodeGen/IslExprBuilder.cpp @@ -149,7 +149,7 @@ const SCEV *DimSCEV = SAI->getDimensionSize(u - 1); Value *DimSize = - expandCodeFor(S, SE, DL, "polly", DimSCEV, DimSCEV->getType(), + expandCodeFor(S.getRegion(), SE, DL, "polly", DimSCEV, DimSCEV->getType(), Builder.GetInsertPoint()); Type *Ty = getWidestType(DimSize->getType(), IndexOp->getType()); Index: lib/CodeGen/IslNodeBuilder.cpp =================================================================== --- lib/CodeGen/IslNodeBuilder.cpp +++ lib/CodeGen/IslNodeBuilder.cpp @@ -741,6 +741,6 @@ Value *IslNodeBuilder::generateSCEV(const SCEV *Expr) { Instruction *InsertLocation = --(Builder.GetInsertBlock()->end()); - return expandCodeFor(S, SE, DL, "polly", Expr, Expr->getType(), + return expandCodeFor(S.getRegion(), SE, DL, "polly", Expr, Expr->getType(), InsertLocation); } Index: lib/CodeGen/Utils.cpp =================================================================== --- lib/CodeGen/Utils.cpp +++ lib/CodeGen/Utils.cpp @@ -21,18 +21,7 @@ using namespace llvm; -// Alternative to llvm::SplitCriticalEdge. -// -// Creates a new block which branches to Succ. The edge to split is redirected -// to the new block. -// -// The issue with llvm::SplitCriticalEdge is that it does nothing if the edge is -// not critical. -// The issue with llvm::SplitEdge is that it does not always create the middle -// block, but reuses Prev/Succ if it can. We always want a new middle block. -static BasicBlock *splitEdge(BasicBlock *Prev, BasicBlock *Succ, - const char *Suffix, DominatorTree *DT, - LoopInfo *LI, RegionInfo *RI) { +BasicBlock *polly::splitEdge(BasicBlock *Prev, BasicBlock *Succ, const char *Suffix, DominatorTree *DT, LoopInfo *LI, RegionInfo *RI) { assert(Prev && Succ); // Before: @@ -48,7 +37,7 @@ // llvm::SplitCriticalEdge is more efficient than // llvm::SplitBlockPredecessors, which is more general. In the future we might // either modify llvm::SplitCriticalEdge to allow skipping the critical edge - // check; or Copy&Pase it here. + // check; or Copy&Paste it here. BasicBlock *MiddleBlock = SplitBlockPredecessors( Succ, ArrayRef(Prev), Suffix, DT, LI); @@ -60,6 +49,9 @@ } else { RI->setRegionFor(MiddleBlock, SuccRegion); } + + + } // After: @@ -103,6 +95,17 @@ splitEdge(EnteringBB, EntryBB, ".split_new_and_old", &DT, &LI, &RI); SplitBlock->setName("polly.split_new_and_old"); +// If EntryBB is the exit block of the region that includes Prev, exclude SplitBlock from that region by making it itself the exit block. This is trivially possible because there is just one edge to EnteringBB. +// This is necessary because we will add an outgoing edge from SplitBlock, which would violate the single exit block requirement of PrevRegion. + Region *PrevRegion = RI.getRegionFor(EnteringBB); + while (PrevRegion->getExit() == EntryBB) { + PrevRegion->replaceExit(SplitBlock); + PrevRegion = PrevRegion->getParent(); +} +RI.setRegionFor(SplitBlock, PrevRegion); + + + // Create a join block BasicBlock *ExitingBB = R.getExitingBlock(); BasicBlock *ExitBB = R.getExit(); @@ -174,5 +177,11 @@ // ExitBB // // / \ // +#ifndef NDEBUG + DT.verifyDomTree(); + LI.verify(); + RI.verifyAnalysis(); +#endif + return StartBlock; } Index: lib/Support/SCEVValidator.cpp =================================================================== --- lib/Support/SCEVValidator.cpp +++ lib/Support/SCEVValidator.cpp @@ -483,6 +483,9 @@ bool visitUnknown(const SCEVUnknown *Expr) { Instruction *Inst = dyn_cast(Expr->getValue()); + if (Inst && isa(Inst) && Inst->getParent() == R->getExit()) + return true; + // Return true when Inst is defined inside the region R. if (Inst && R->contains(Inst)) return true; Index: lib/Support/ScopHelper.cpp =================================================================== --- lib/Support/ScopHelper.cpp +++ lib/Support/ScopHelper.cpp @@ -258,15 +258,15 @@ struct ScopExpander : SCEVVisitor { friend struct SCEVVisitor; - explicit ScopExpander(Scop &S, ScalarEvolution &SE, const DataLayout &DL, + explicit ScopExpander(Region &R, ScalarEvolution &SE, const DataLayout &DL, const char *Name) - : Expander(SCEVExpander(SE, DL, Name)), StartIP(nullptr), SE(SE), S(S) {} + : Expander(SCEVExpander(SE, DL, Name)), StartIP(nullptr), SE(SE), R(R) {} Value *expandCodeFor(const SCEV *E, Type *Ty, Instruction *I) { // If we generate code in the region we will immediately fall back to the // SCEVExpander, otherwise we will stop at all unknowns in the SCEV and if // needed replace them by copies computed in the entering block. - if (!S.getRegion().contains(I)) + if (!R.contains(I)) E = visit(E); return Expander.expandCodeFor(E, Ty, I); } @@ -275,10 +275,10 @@ SCEVExpander Expander; Instruction *StartIP; ScalarEvolution &SE; - Scop &S; + Region &R; void setStartIP() { - StartIP = S.getRegion().getEnteringBlock()->getTerminator(); + StartIP = R.getEnteringBlock()->getTerminator(); } const SCEV *visitUnknown(const SCEVUnknown *E) { @@ -287,7 +287,7 @@ Inst->getOpcode() != Instruction::SDiv)) return E; - if (!S.getRegion().contains(Inst)) + if (!R.contains(Inst)) return E; if (!StartIP) @@ -350,9 +350,143 @@ } }; -Value *polly::expandCodeFor(Scop &S, ScalarEvolution &SE, const DataLayout &DL, +Value *polly::expandCodeFor(Region &R, ScalarEvolution &SE, const DataLayout &DL, const char *Name, const SCEV *E, Type *Ty, Instruction *IP) { - ScopExpander Expander(S, SE, DL, Name); + ScopExpander Expander(R, SE, DL, Name); return Expander.expandCodeFor(E, Ty, IP); } + +BasicBlock *polly::splitBlockNonPredecessors(BasicBlock *BB, + ArrayRef NonPreds, + llvm::StringRef Suffix, + DominatorTree *DT, LoopInfo *LI, + ScalarEvolution *SE) { + assert(BB); + + // Do not attempt to split blocks that cannot be split. + if (!BB->canSplitPredecessors()) + return nullptr; + + // Currently we cannot handle landing pads. + if (BB->isLandingPad()) + return nullptr; + + // Before: + // + // NonPreds[0] // + // \ / // + // BB // + // / \ // + + // Create new basic block, insert right after the original block. + auto OldName = BB->getName(); + BasicBlock *NewBB = + splitBlock(BB, BB->getFirstInsertionPt(), DT, LI, nullptr); + NewBB->setName(OldName + Suffix); + + // Create new PHIs into the new block. + for (auto I = BB->begin(); isa(I); ++I) { + PHINode *OldPHI = cast(&*I); + PHINode *NewPHI = PHINode::Create(OldPHI->getType(), NonPreds.size() + 1, + OldPHI->getName() + ".np", + NewBB->getFirstInsertionPt()); + + // Replaces uses of the original PHI by the new ones. + OldPHI->replaceAllUsesWith(NewPHI); + + // Add an edge from the original block (this adds a use) + NewPHI->addIncoming(OldPHI, BB); + + // Move the incoming block's values. + for (BasicBlock *Edge : NonPreds) { + BasicBlock *RedirectedEdge = Edge == BB ? NewBB : Edge; + auto i = OldPHI->getBasicBlockIndex(RedirectedEdge); + assert(i >= 0); + Value *Val = OldPHI->getIncomingValue(i); + NewPHI->addIncoming(Val, RedirectedEdge); + OldPHI->removeIncomingValue(i, false); + + if (SE) + SE->forgetValue(OldPHI); + } + } + + // If there were no edges to move, there is nothing more to do. + if (NonPreds.empty()) + return NewBB; + + // Move all non-pred edges to the new BB. + for (BasicBlock *Edge : NonPreds) { + BasicBlock *RedirectedEdge = Edge == BB ? NewBB : Edge; + assert(!isa(RedirectedEdge->getTerminator()) && + "Cannot split an edge from an IndirectBrInst"); + RedirectedEdge->getTerminator()->replaceUsesOfWith(BB, NewBB); + } + + if (DT) { + // DominatorTree::splitBlock assumes that it is the predecessor which is + // new. In order to reuse that function, we recreate the situation as if it + // was. + auto BBNode = DT->getNode(BB); + auto NewBBNode = DT->getNode(NewBB); + DT->changeImmediateDominator(NewBBNode, BBNode->getIDom()); + DT->eraseNode(BB); + + DT->splitBlock(BB); + } + + if (LI) { + BasicBlock *OldBB = BB; + Loop *L = LI->getLoopFor(OldBB); + while (L) { + assert(L->contains(OldBB)); + + bool OldBBReachableFromInside = false; + for (BasicBlock *Pred : predecessors(OldBB)) { + if (Pred == NewBB || L->contains(Pred)) { + OldBBReachableFromInside = true; + break; + } + } + + bool NewBBReachableFromOutside = false; + for (auto Pred : make_range(pred_begin(NewBB), pred_end(NewBB))) { + if (Pred == OldBB) + continue; + + if (Pred != NewBB && !L->contains(Pred)) { + NewBBReachableFromOutside = true; + break; + } + } + + if (NewBBReachableFromOutside || !OldBBReachableFromInside) { + // Remove BB from the loop and make the NewBB the next loop header. + // Since NewBB dominates OldBB, it can be the new header. + if (L->getHeader() == OldBB) + L->moveToHeader(NewBB); + } + + if (!OldBBReachableFromInside) { + // OldBB cannot be reached from the loop header anymore, i.e. it has + // been excluded from the loop. + L->removeBlockFromLoop(BB); + LI->changeLoopFor(BB, L->getParentLoop()); + } + + L = L->getParentLoop(); + } + } + + // After: + // + // NonPreds[0] // + // \ / // + // BB / // + // \ / // + // NewBB // + // / \ // + + return NewBB; +} \ No newline at end of file Index: test/Isl/CodeGen/inner_scev_2.ll =================================================================== --- test/Isl/CodeGen/inner_scev_2.ll +++ test/Isl/CodeGen/inner_scev_2.ll @@ -1,4 +1,4 @@ -; RUN: opt %loadPolly -S -polly-no-early-exit -polly-detect-unprofitable -polly-codegen < %s | FileCheck %s +; RUN: opt %loadPolly -S -polly-no-early-exit -polly-detect-unprofitable -polly-codegen < %s ; ; The SCEV expression in this test case refers to a sequence of sdiv ; instructions, which are part of different bbs in the SCoP. When code Index: test/Isl/CodeGen/loop_with_conditional_entry_edge_split_hard_case.ll =================================================================== --- test/Isl/CodeGen/loop_with_conditional_entry_edge_split_hard_case.ll +++ test/Isl/CodeGen/loop_with_conditional_entry_edge_split_hard_case.ll @@ -19,10 +19,10 @@ entry: br label %while.begin -; CHECK-LABEL: while.begin.region_exiting: -; CHECK: br label %polly.merge_new_and_old - ; CHECK-LABEL: while.begin: +; CHECK: br label %polly.merge_new_and_old + +; CHECK-LABEL: while.begin.exit: while.begin: ; CHECK: %call = call i32 @f() %call = call i32 @f() Index: test/Isl/CodeGen/phi_loop_carried_float.ll =================================================================== --- test/Isl/CodeGen/phi_loop_carried_float.ll +++ test/Isl/CodeGen/phi_loop_carried_float.ll @@ -13,7 +13,10 @@ ; CHECK-DAG: %tmp.0.phiops = alloca float ; CHECK-NOT: %tmp7{{[.*]}} = alloca float -; CHECK-LABEL: exit: +; CHECK-LABEL: polly.merge_new_and_old: +; CHECK-NEXT: br label %exit.exit + +; CHECK-LABEL: exit.exit: ; CHECK-NEXT: ret ; CHECK-LABEL: polly.start: Index: test/Isl/CodeGen/phi_loop_carried_float_escape.ll =================================================================== --- test/Isl/CodeGen/phi_loop_carried_float_escape.ll +++ test/Isl/CodeGen/phi_loop_carried_float_escape.ll @@ -8,8 +8,11 @@ ; } ; CHECK-LABEL: polly.merge_new_and_old: -; CHECK-NEXT: %tmp.0.merge = phi float [ %tmp.0.final_reload, %polly.merge ], [ %tmp.0, %bb8 ] -; CHECK-NEXT: br label %exit +; CHECK-NEXT: %tmp.0.merge = phi float [ %tmp.0.final_reload, %polly.merge ], [ %tmp.0, %exit ] +; CHECK-NEXT: br label %exit.exit + +; CHECK-LABEL: exit.exit: +; CHECK-NEXT: ret float %tmp.0.merge ; CHECK-LABEL: polly.start: ; CHECK-NEXT: store float 0.000000e+00, float* %tmp.0.phiops Index: test/Isl/CodeGen/phi_scalar_simple_1.ll =================================================================== --- test/Isl/CodeGen/phi_scalar_simple_1.ll +++ test/Isl/CodeGen/phi_scalar_simple_1.ll @@ -22,7 +22,7 @@ br label %for.cond ; CHECK-LABEL: polly.merge_new_and_old: -; CHECK: %x.addr.0.merge = phi i32 [ %x.addr.0.final_reload, %polly.merge ], [ %x.addr.0, %for.cond ] +; CHECK: %x.addr.0.merge = phi i32 [ %x.addr.0.final_reload, %polly.merge ], [ %x.addr.0, %for.end6 ] ; CHECK: ret i32 %x.addr.0.merge ; CHECK-LABEL: polly.start: Index: test/Isl/CodeGen/phi_scalar_simple_2.ll =================================================================== --- test/Isl/CodeGen/phi_scalar_simple_2.ll +++ test/Isl/CodeGen/phi_scalar_simple_2.ll @@ -24,7 +24,7 @@ br label %for.cond ; CHECK-LABEL: polly.merge_new_and_old: -; CHECK: %x.addr.0.merge = phi i32 [ %x.addr.0.final_reload, %polly.merge ], [ %x.addr.0, %for.cond ] +; CHECK: %x.addr.0.merge = phi i32 [ %x.addr.0.final_reload, %polly.merge ], [ %x.addr.0, %for.end7 ] ; CHECK: ret i32 %x.addr.0.merge ; CHECK-LABEL: polly.start: Index: test/ScopDetect/keep_going_expansion.ll =================================================================== --- test/ScopDetect/keep_going_expansion.ll +++ test/ScopDetect/keep_going_expansion.ll @@ -42,4 +42,4 @@ ret i32 %add } -; CHECK: Valid Region for Scop: for.body => for.cond2.preheader +; CHECK: Valid Region for Scop: for.body => for.body4 Index: test/ScopDetect/multidim_indirect_access.ll =================================================================== --- test/ScopDetect/multidim_indirect_access.ll +++ test/ScopDetect/multidim_indirect_access.ll @@ -10,9 +10,6 @@ ; treated as a multidimensional access with dimension size x. This test will ; check that we correctly invalidate the region and do not detect a outer SCoP. ; -; FIXME: -; We should detect the inner region but the PHI node in the exit blocks that. -; ; void f(int *A, long N) { ; int j = 0; ; while (N > j) { @@ -25,8 +22,8 @@ ; } ; } ; -; INDEPENDENT-NOT: Valid -; NON_INDEPENDENT-NOT: Valid +; INDEPENDENT-NOT: Valid Region for Scop: bb0 => bb1 +; NON_INDEPENDENT-NOT: Valid Region for Scop: bb0 => bb1 ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/ScopDetect/non-affine-loop-condition-dependent-access_2.ll =================================================================== --- test/ScopDetect/non-affine-loop-condition-dependent-access_2.ll +++ test/ScopDetect/non-affine-loop-condition-dependent-access_2.ll @@ -7,9 +7,9 @@ ; innermost loop as a SCoP of depth 1, we have to reject the loop nest if not ; both, non-affine loops as well as non-affine accesses are allowed. ; -; REJECTNONAFFINELOOPS: Valid Region for Scop: bb15 => bb26 +; REJECTNONAFFINELOOPS: Valid Region for Scop: bb15 => bb13 ; REJECTNONAFFINELOOPS-NOT: Valid -; ALLOWNONAFFINELOOPS: Valid Region for Scop: bb15 => bb26 +; ALLOWNONAFFINELOOPS: Valid Region for Scop: bb15 => bb13 ; ALLOWNONAFFINELOOPS-NOT: Valid ; ALLOWNONAFFINELOOPSANDACCESSES: Valid Region for Scop: bb11 => bb29 ; Index: test/ScopDetect/non-affine-loop-condition-dependent-access_3.ll =================================================================== --- test/ScopDetect/non-affine-loop-condition-dependent-access_3.ll +++ test/ScopDetect/non-affine-loop-condition-dependent-access_3.ll @@ -7,9 +7,9 @@ ; innermost loop as a SCoP of depth 1, we have to reject the loop nest if not ; both, non-affine loops as well as non-affine accesses are allowed. ; -; REJECTNONAFFINELOOPS: Valid Region for Scop: bb15 => bb26 +; REJECTNONAFFINELOOPS: Valid Region for Scop: bb15 => bb13 ; REJECTNONAFFINELOOPS-NOT: Valid -; ALLOWNONAFFINELOOPS: Valid Region for Scop: bb15 => bb26 +; ALLOWNONAFFINELOOPS: Valid Region for Scop: bb15 => bb13 ; ALLOWNONAFFINELOOPS-NOT: Valid ; ALLOWNONAFFINELOOPSANDACCESSES: Valid Region for Scop: bb11 => bb29 ; Index: test/ScopDetect/phi_with_multi_exiting_edges.ll =================================================================== --- test/ScopDetect/phi_with_multi_exiting_edges.ll +++ test/ScopDetect/phi_with_multi_exiting_edges.ll @@ -1,7 +1,5 @@ ; RUN: opt %loadPolly -polly-detect-unprofitable -polly-detect -analyze -S < %s | FileCheck %s ; -; XFAIL: * -; ; Region with an exit node that has a PHI node multiple incoming edges from ; inside the region. Motivation for supporting such cases in Polly. ; Index: test/ScopDetect/phi_with_multi_exiting_edges_2.ll =================================================================== --- /dev/null +++ test/ScopDetect/phi_with_multi_exiting_edges_2.ll @@ -0,0 +1,35 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-detect -analyze -S < %s | FileCheck %s + +define float @foo(float* %A, i64 %param) { +entry: + br label %entry.split + +entry.split: + %branchcond = icmp slt i64 %param, 64 + br i1 %branchcond, label %loopA, label %loopB + +loopA: + %indvarA = phi i64 [0, %entry.split], [%indvar.nextA, %loopA] + %indvar.nextA = add i64 %indvarA, 1 + %valA = load float, float* %A + %sumA = fadd float %valA, %valA + store float %valA, float* %A + %cndA = icmp eq i64 %indvar.nextA, 100 + br i1 %cndA, label %next, label %loopA + +loopB: + %indvarB = phi i64 [0, %entry.split], [%indvar.nextB, %loopB] + %indvar.nextB = add i64 %indvarB, 1 + %valB = load float, float* %A + %sumB = fadd float %valB, %valB + store float %valB, float* %A + %cndB = icmp eq i64 %indvar.nextB, 100 + br i1 %cndB, label %next, label %loopB + +next: + %result = phi float [%sumA, %loopA], [%sumB, %loopB] + ret float %result + +} + +; CHECK: Valid Region for Scop: entry.split => next Index: test/ScopDetect/simple_non_single_entry.ll =================================================================== --- test/ScopDetect/simple_non_single_entry.ll +++ test/ScopDetect/simple_non_single_entry.ll @@ -65,4 +65,4 @@ ret void } -; CHECK: Valid Region for Scop: next => for.i.head1 +; CHECK: Valid Region for Scop: next => for.i Index: test/ScopInfo/NonAffine/non-affine-loop-condition-dependent-access_2.ll =================================================================== --- test/ScopInfo/NonAffine/non-affine-loop-condition-dependent-access_2.ll +++ test/ScopInfo/NonAffine/non-affine-loop-condition-dependent-access_2.ll @@ -9,7 +9,7 @@ ; access. ; ; INNERMOST: Function: f -; INNERMOST: Region: %bb15---%bb26 +; INNERMOST: Region: %bb15---%bb13 ; INNERMOST: Max Loop Depth: 1 ; INNERMOST: p0: {0,+,{0,+,-1}<%bb11>}<%bb13> ; INNERMOST: p1: {0,+,{0,+,1}<%bb11>}<%bb13> Index: test/ScopInfo/NonAffine/non-affine-loop-condition-dependent-access_3.ll =================================================================== --- test/ScopInfo/NonAffine/non-affine-loop-condition-dependent-access_3.ll +++ test/ScopInfo/NonAffine/non-affine-loop-condition-dependent-access_3.ll @@ -9,7 +9,7 @@ ; access. ; ; INNERMOST: Function: f -; INNERMOST: Region: %bb15---%bb26 +; INNERMOST: Region: %bb15---%bb13 ; INNERMOST: Max Loop Depth: 1 ; INNERMOST: Context: ; INNERMOST: [p_0, p_1, p_2] -> { : p_0 >= 0 and p_0 <= 2147483647 and p_1 >= 0 and p_1 <= 4096 and p_2 >= 0 and p_2 <= 4096 }