Index: include/polly/CodeGen/BlockGenerators.h =================================================================== --- include/polly/CodeGen/BlockGenerators.h +++ include/polly/CodeGen/BlockGenerators.h @@ -78,10 +78,12 @@ /// an original value appearing in this mapping is replaced /// with the new value it is mapped to. /// @param ExprBuilder An expression builder to generate new access functions. + /// @param StartBlock The first basic block after the RTC. BlockGenerator(PollyIRBuilder &Builder, LoopInfo &LI, ScalarEvolution &SE, DominatorTree &DT, ScalarAllocaMapTy &ScalarMap, ScalarAllocaMapTy &PHIOpMap, EscapeUsersAllocaMapTy &EscapeMap, - ValueMapT &GlobalMap, IslExprBuilder *ExprBuilder = nullptr); + ValueMapT &GlobalMap, IslExprBuilder *ExprBuilder, + BasicBlock *StartBlock); /// Copy the basic block. /// @@ -315,6 +317,9 @@ /// code generation. ValueMapT &GlobalMap; + /// The first basic block after the RTC. + BasicBlock *StartBlock; + /// Split @p BB to create a new one we can use to clone @p BB in. BasicBlock *splitBB(BasicBlock *BB); Index: include/polly/CodeGen/IslExprBuilder.h =================================================================== --- include/polly/CodeGen/IslExprBuilder.h +++ include/polly/CodeGen/IslExprBuilder.h @@ -114,18 +114,25 @@ /// Construct an IslExprBuilder. /// - /// @param Builder The IRBuilder used to construct the isl_ast_expr[ession]. - /// The insert location of this IRBuilder defines WHERE the - /// corresponding LLVM-IR is generated. - /// - /// @param IDToValue The isl_ast_expr[ession] may reference parameters or - /// variables (identified by an isl_id). The IDTOValue map - /// specifies the LLVM-IR Values that correspond to these - /// parameters and variables. + /// @param Builder The IRBuilder used to construct the + /// isl_ast_expr[ession]. The insert location of this + /// IRBuilder defines WHERE the corresponding LLVM-IR + /// is generated. + /// @param IDToValue The isl_ast_expr[ession] may reference parameters or + /// variables (identified by an isl_id). The IDTOValue map + /// specifies the LLVM-IR Values that correspond to these + /// parameters and variables. + /// @param GlobalMap A mapping from llvm::Values used in the original scop + /// region to a new set of llvm::Values. + /// @param DL DataLayout for the current Module. + /// @param SE ScalarEvolution analysis for the current function. + /// @param DT DominatorTree analysis for the current function. + /// @param LI LoopInfo analysis for the current function. + /// @param StartBlock The first basic block after the RTC. IslExprBuilder(Scop &S, PollyIRBuilder &Builder, IDToValueTy &IDToValue, ValueMapT &GlobalMap, const llvm::DataLayout &DL, llvm::ScalarEvolution &SE, llvm::DominatorTree &DT, - llvm::LoopInfo &LI); + llvm::LoopInfo &LI, llvm::BasicBlock *StartBlock); /// Create LLVM-IR for an isl_ast_expr[ession]. /// @@ -201,6 +208,7 @@ llvm::ScalarEvolution &SE; llvm::DominatorTree &DT; llvm::LoopInfo &LI; + llvm::BasicBlock *StartBlock; llvm::Value *createOp(__isl_take isl_ast_expr *Expr); llvm::Value *createOpUnary(__isl_take isl_ast_expr *Expr); Index: include/polly/CodeGen/IslNodeBuilder.h =================================================================== --- include/polly/CodeGen/IslNodeBuilder.h +++ include/polly/CodeGen/IslNodeBuilder.h @@ -56,12 +56,14 @@ public: IslNodeBuilder(PollyIRBuilder &Builder, ScopAnnotator &Annotator, Pass *P, const DataLayout &DL, LoopInfo &LI, ScalarEvolution &SE, - DominatorTree &DT, Scop &S) + DominatorTree &DT, Scop &S, BasicBlock *StartBlock) : S(S), Builder(Builder), Annotator(Annotator), - ExprBuilder(S, Builder, IDToValue, ValueMap, DL, SE, DT, LI), + ExprBuilder(S, Builder, IDToValue, ValueMap, DL, SE, DT, LI, + StartBlock), BlockGen(Builder, LI, SE, DT, ScalarMap, PHIOpMap, EscapeMap, ValueMap, - &ExprBuilder), - RegionGen(BlockGen), P(P), DL(DL), LI(LI), SE(SE), DT(DT) {} + &ExprBuilder, StartBlock), + RegionGen(BlockGen), P(P), DL(DL), LI(LI), SE(SE), DT(DT), + StartBlock(StartBlock) {} virtual ~IslNodeBuilder() {} @@ -138,6 +140,7 @@ LoopInfo &LI; ScalarEvolution &SE; DominatorTree &DT; + BasicBlock *StartBlock; /// The current iteration of out-of-scop loops /// Index: include/polly/Support/ScopHelper.h =================================================================== --- include/polly/Support/ScopHelper.h +++ include/polly/Support/ScopHelper.h @@ -333,18 +333,21 @@ /// The parameters are the same as for the creation of a SCEVExpander as well /// as the call to SCEVExpander::expandCodeFor: /// -/// @param S The current Scop. -/// @param SE The Scalar Evolution pass. -/// @param DL The module data layout. -/// @param Name The suffix added to the new instruction names. -/// @param E The expression for which code is actually generated. -/// @param Ty The type of the resulting code. -/// @param IP The insertion point for the new code. -/// @param VMap A remaping of values used in @p E. +/// @param S The current Scop. +/// @param SE The Scalar Evolution pass. +/// @param DL The module data layout. +/// @param Name The suffix added to the new instruction names. +/// @param E The expression for which code is actually generated. +/// @param Ty The type of the resulting code. +/// @param IP The insertion point for the new code. +/// @param VMap A remaping of values used in @p E. +/// @param RTCBB The last block of the RTC. Used to insert loop-invariant +/// instructions in rare cases. llvm::Value *expandCodeFor(Scop &S, llvm::ScalarEvolution &SE, const llvm::DataLayout &DL, const char *Name, const llvm::SCEV *E, llvm::Type *Ty, - llvm::Instruction *IP, ValueMapT *VMap = nullptr); + llvm::Instruction *IP, ValueMapT *VMap, + llvm::BasicBlock *RTCBB); /// Check if the block is a error block. /// Index: lib/CodeGen/BlockGenerators.cpp =================================================================== --- lib/CodeGen/BlockGenerators.cpp +++ lib/CodeGen/BlockGenerators.cpp @@ -48,16 +48,14 @@ cl::desc("Add printf calls that show the values loaded/stored."), cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); -BlockGenerator::BlockGenerator(PollyIRBuilder &B, LoopInfo &LI, - ScalarEvolution &SE, DominatorTree &DT, - ScalarAllocaMapTy &ScalarMap, - ScalarAllocaMapTy &PHIOpMap, - EscapeUsersAllocaMapTy &EscapeMap, - ValueMapT &GlobalMap, - IslExprBuilder *ExprBuilder) +BlockGenerator::BlockGenerator( + PollyIRBuilder &B, LoopInfo &LI, ScalarEvolution &SE, DominatorTree &DT, + ScalarAllocaMapTy &ScalarMap, ScalarAllocaMapTy &PHIOpMap, + EscapeUsersAllocaMapTy &EscapeMap, ValueMapT &GlobalMap, + IslExprBuilder *ExprBuilder, BasicBlock *StartBlock) : Builder(B), LI(LI), SE(SE), ExprBuilder(ExprBuilder), DT(DT), EntryBB(nullptr), PHIOpMap(PHIOpMap), ScalarMap(ScalarMap), - EscapeMap(EscapeMap), GlobalMap(GlobalMap) {} + EscapeMap(EscapeMap), GlobalMap(GlobalMap), StartBlock(StartBlock) {} Value *BlockGenerator::trySynthesizeNewValue(ScopStmt &Stmt, Value *Old, ValueMapT &BBMap, @@ -85,7 +83,8 @@ assert(IP != Builder.GetInsertBlock()->end() && "Only instructions can be insert points for SCEVExpander"); Value *Expanded = - expandCodeFor(S, SE, DL, "polly", NewScev, Old->getType(), &*IP, &VTV); + expandCodeFor(S, SE, DL, "polly", NewScev, Old->getType(), &*IP, &VTV, + StartBlock->getSinglePredecessor()); BBMap[Old] = Expanded; return Expanded; @@ -524,18 +523,9 @@ void BlockGenerator::createScalarInitialization(Scop &S) { BasicBlock *ExitBB = S.getExit(); + BasicBlock *PreEntryBB = S.getEnteringBlock(); - // The split block __just before__ the region and optimized region. - BasicBlock *SplitBB = S.getEnteringBlock(); - BranchInst *SplitBBTerm = cast(SplitBB->getTerminator()); - assert(SplitBBTerm->getNumSuccessors() == 2 && "Bad region entering block!"); - - // Get the start block of the __optimized__ region. - BasicBlock *StartBB = SplitBBTerm->getSuccessor(0); - if (StartBB == S.getEntry()) - StartBB = SplitBBTerm->getSuccessor(1); - - Builder.SetInsertPoint(&*StartBB->begin()); + Builder.SetInsertPoint(&*StartBlock->begin()); for (auto &Array : S.arrays()) { if (Array->getNumberOfDimensions() != 0) @@ -544,15 +534,15 @@ // For PHI nodes, the only values we need to store are the ones that // reach the PHI node from outside the region. In general there should // only be one such incoming edge and this edge should enter through - // 'SplitBB'. + // 'PreEntryBB'. auto PHI = cast(Array->getBasePtr()); for (auto BI = PHI->block_begin(), BE = PHI->block_end(); BI != BE; BI++) - if (!S.contains(*BI) && *BI != SplitBB) + if (!S.contains(*BI) && *BI != PreEntryBB) llvm_unreachable("Incoming edges from outside the scop should always " - "come from SplitBB"); + "come from PreEntryBB"); - int Idx = PHI->getBasicBlockIndex(SplitBB); + int Idx = PHI->getBasicBlockIndex(PreEntryBB); if (Idx < 0) continue; Index: lib/CodeGen/CodeGeneration.cpp =================================================================== --- lib/CodeGen/CodeGeneration.cpp +++ lib/CodeGen/CodeGeneration.cpp @@ -133,8 +133,6 @@ assert(EnteringBB); PollyIRBuilder Builder = createPollyIRBuilder(EnteringBB, Annotator); - IslNodeBuilder NodeBuilder(Builder, Annotator, this, *DL, *LI, *SE, *DT, S); - // Only build the run-time condition and parameters _after_ having // introduced the conditional branch. This is important as the conditional // branch will guard the original scop from new induction variables that @@ -145,6 +143,9 @@ executeScopConditionally(S, this, Builder.getTrue()); auto *SplitBlock = StartBlock->getSinglePredecessor(); + IslNodeBuilder NodeBuilder(Builder, Annotator, this, *DL, *LI, *SE, *DT, S, + StartBlock); + // First generate code for the hoisted invariant loads and transitively the // parameters they reference. Afterwards, for the remaining parameters that // might reference the hoisted loads. Finally, build the runtime check Index: lib/CodeGen/IslExprBuilder.cpp =================================================================== --- lib/CodeGen/IslExprBuilder.cpp +++ lib/CodeGen/IslExprBuilder.cpp @@ -41,9 +41,10 @@ IslExprBuilder::IslExprBuilder(Scop &S, PollyIRBuilder &Builder, IDToValueTy &IDToValue, ValueMapT &GlobalMap, const DataLayout &DL, ScalarEvolution &SE, - DominatorTree &DT, LoopInfo &LI) + DominatorTree &DT, LoopInfo &LI, + BasicBlock *StartBlock) : S(S), Builder(Builder), IDToValue(IDToValue), GlobalMap(GlobalMap), - DL(DL), SE(SE), DT(DT), LI(LI) { + DL(DL), SE(SE), DT(DT), LI(LI), StartBlock(StartBlock) { OverflowState = (OTMode == OT_ALWAYS) ? Builder.getFalse() : nullptr; } @@ -284,7 +285,8 @@ DimSCEV = SCEVParameterRewriter::rewrite(DimSCEV, SE, Map); Value *DimSize = expandCodeFor(S, SE, DL, "polly", DimSCEV, DimSCEV->getType(), - &*Builder.GetInsertPoint()); + &*Builder.GetInsertPoint(), nullptr, + StartBlock->getSinglePredecessor()); Type *Ty = getWidestType(DimSize->getType(), IndexOp->getType()); Index: lib/CodeGen/IslNodeBuilder.cpp =================================================================== --- lib/CodeGen/IslNodeBuilder.cpp +++ lib/CodeGen/IslNodeBuilder.cpp @@ -1286,7 +1286,8 @@ "Insert location points after last valid instruction"); Instruction *InsertLocation = &*Builder.GetInsertPoint(); return expandCodeFor(S, SE, DL, "polly", Expr, Expr->getType(), - InsertLocation, &ValueMap); + InsertLocation, &ValueMap, + StartBlock->getSinglePredecessor()); } /// The AST expression we generate to perform the run-time check assumes Index: lib/CodeGen/PPCGCodeGeneration.cpp =================================================================== --- lib/CodeGen/PPCGCodeGeneration.cpp +++ lib/CodeGen/PPCGCodeGeneration.cpp @@ -146,8 +146,10 @@ public: GPUNodeBuilder(PollyIRBuilder &Builder, ScopAnnotator &Annotator, Pass *P, const DataLayout &DL, LoopInfo &LI, ScalarEvolution &SE, - DominatorTree &DT, Scop &S, gpu_prog *Prog) - : IslNodeBuilder(Builder, Annotator, P, DL, LI, SE, DT, S), Prog(Prog) { + DominatorTree &DT, Scop &S, BasicBlock *StartBlock, + gpu_prog *Prog) + : IslNodeBuilder(Builder, Annotator, P, DL, LI, SE, DT, S, StartBlock), + Prog(Prog) { getExprBuilder().setIDToSAI(&IDToSAI); } @@ -2398,9 +2400,6 @@ PollyIRBuilder Builder = createPollyIRBuilder(EnteringBB, Annotator); - GPUNodeBuilder NodeBuilder(Builder, Annotator, this, *DL, *LI, *SE, *DT, *S, - Prog); - // Only build the run-time condition and parameters _after_ having // introduced the conditional branch. This is important as the conditional // branch will guard the original scop from new induction variables that @@ -2410,6 +2409,9 @@ BasicBlock *StartBlock = executeScopConditionally(*S, this, Builder.getTrue()); + GPUNodeBuilder NodeBuilder(Builder, Annotator, this, *DL, *LI, *SE, *DT, *S, + StartBlock, Prog); + // TODO: Handle LICM auto SplitBlock = StartBlock->getSinglePredecessor(); Builder.SetInsertPoint(SplitBlock->getTerminator()); Index: lib/CodeGen/Utils.cpp =================================================================== --- lib/CodeGen/Utils.cpp +++ lib/CodeGen/Utils.cpp @@ -196,6 +196,26 @@ // | // // ExitBB // // / \ // + // + + // Split the edge between SplitBlock and EntryBB, to avoid a critical edge. + splitEdge(SplitBlock, EntryBB, ".pre_entry_bb", &DT, &LI, &RI); + + // \ / // + // EnteringBB // + // | // + // SplitBlock---------\ // + // | | // + // PreEntryBB | // + // _____|_____ | // + // / EntryBB \ StartBlock // + // | (region) | | // + // \_ExitingBB_/ ExitingBlock // + // | | // + // MergeBlock---------/ // + // | // + // ExitBB // + // / \ // return StartBlock; } Index: lib/Support/ScopHelper.cpp =================================================================== --- lib/Support/ScopHelper.cpp +++ lib/Support/ScopHelper.cpp @@ -226,9 +226,10 @@ friend struct SCEVVisitor; explicit ScopExpander(const Region &R, ScalarEvolution &SE, - const DataLayout &DL, const char *Name, ValueMapT *VMap) + const DataLayout &DL, const char *Name, ValueMapT *VMap, + BasicBlock *RTCBB) : Expander(SCEVExpander(SE, DL, Name)), SE(SE), Name(Name), R(R), - VMap(VMap) {} + VMap(VMap), RTCBB(RTCBB) {} Value *expandCodeFor(const SCEV *E, Type *Ty, Instruction *I) { // If we generate code in the region we will immediately fall back to the @@ -245,6 +246,7 @@ const char *Name; const Region &R; ValueMapT *VMap; + BasicBlock *RTCBB; const SCEV *visitGenericInst(const SCEVUnknown *E, Instruction *Inst, Instruction *IP) { @@ -280,15 +282,14 @@ return visit(NewE); } - auto *EnteringBB = R.getEnteringBlock(); Instruction *Inst = dyn_cast(E->getValue()); Instruction *IP; if (Inst && !R.contains(Inst)) IP = Inst; - else if (Inst && EnteringBB->getParent() == Inst->getFunction()) - IP = EnteringBB->getTerminator(); + else if (Inst && RTCBB->getParent() == Inst->getFunction()) + IP = RTCBB->getTerminator(); else - IP = EnteringBB->getParent()->getEntryBlock().getTerminator(); + IP = RTCBB->getParent()->getEntryBlock().getTerminator(); if (!Inst || (Inst->getOpcode() != Instruction::SRem && Inst->getOpcode() != Instruction::SDiv)) @@ -363,8 +364,9 @@ Value *polly::expandCodeFor(Scop &S, ScalarEvolution &SE, const DataLayout &DL, const char *Name, const SCEV *E, Type *Ty, - Instruction *IP, ValueMapT *VMap) { - ScopExpander Expander(S.getRegion(), SE, DL, Name, VMap); + Instruction *IP, ValueMapT *VMap, + BasicBlock *RTCBB) { + ScopExpander Expander(S.getRegion(), SE, DL, Name, VMap, RTCBB); return Expander.expandCodeFor(E, Ty, IP); }