Index: include/polly/Support/ScopHelper.h
===================================================================
--- include/polly/Support/ScopHelper.h
+++ include/polly/Support/ScopHelper.h
@@ -14,6 +14,8 @@
 #ifndef POLLY_SUPPORT_IRHELPER_H
 #define POLLY_SUPPORT_IRHELPER_H
 
+#include "llvm/ADT/ArrayRef.h"
+
 namespace llvm {
 class Type;
 class Instruction;
@@ -26,6 +28,10 @@
 class Region;
 class Pass;
 class BasicBlock;
+class StringRef;
+class DominatorTree;
+class RegionInfo;
+class ScalarEvolution;
 }
 
 namespace polly {
@@ -51,16 +57,13 @@
 bool hasInvokeEdge(const llvm::PHINode *PN);
 
 llvm::Value *getPointerOperand(llvm::Instruction &Inst);
-llvm::BasicBlock *createSingleExitEdge(llvm::Region *R, llvm::Pass *P);
 
-/// @brief Simplify the region in a SCoP to have a single unconditional entry
-///        edge and a single exit edge.
+/// @brief Simplify the region to have a single unconditional entry edge and a
+/// single exit edge.
 ///
-/// @param S  The SCoP that is simplified.
+/// @param R  The region to be simplified
 /// @param P  The pass that is currently running.
-///
-/// @return The unique entering block for the region.
-llvm::BasicBlock *simplifyRegion(polly::Scop *S, llvm::Pass *P);
+void simplifyRegion(llvm::Region *R, llvm::Pass *P);
 
 /// @brief Split the entry block of a function to store the newly inserted
 ///        allocations outside of all Scops.
@@ -69,5 +72,46 @@
 /// @param P          The pass that currently running.
 ///
 void splitEntryBlockForAlloca(llvm::BasicBlock *EntryBlock, llvm::Pass *P);
+
+/// @brief Split the block into two successive blocks.
+///
+/// Like llvm::SplitBlock but also preserves RegionInfo and a suffix other than
+/// ".split"
+///
+/// @param Old Block to split; will be predecessor of the new BasicBlock.
+/// @param SplitPt All instructions after and including this instruction are
+/// moved to the new block.
+/// @param Suffix The new block's name is the old block's name with this suffix
+/// appended.
+/// @param DT DominatorTree to update.
+/// @param LI LoopInfo to update.
+/// @param RI RegionInfo to update.
+///
+/// @return The new block, unconditional successor of the old one.
+llvm::BasicBlock *splitBlock(llvm::BasicBlock *Old, llvm::Instruction *SplitPt,
+                             llvm::StringRef Suffix, llvm::DominatorTree *DT,
+                             llvm::LoopInfo *LI, llvm::RegionInfo *RI);
+
+/// @brief Move a set of incoming edges to a new successor.
+///
+/// Similar to llvm::SplitPredecessors, but instead of creating a new
+/// predecessor, this creates a new successor.
+/// We generally cannot update RegionInfo since it could make them invalid; do
+/// this on a higher level where consistency is ensured.
+/// Preserves LCSSA since it all PHIs will exist in both BasicBlocks
+///
+/// @param BB Block to split.
+/// @param NonPreds Set of incoming edges to move to the new successor.
+/// @param Suffix Suffic to append to the name of the new block.
+/// @param DT DominatorTree to update.
+/// @param LI LoopIndo to update.
+/// @param SE ScalarEvolution to update.
+///
+/// @return The newly created successor of BB.
+llvm::BasicBlock *
+splitBlockNonPredecessors(llvm::BasicBlock *BB,
+                          llvm::ArrayRef<llvm::BasicBlock *> NonPreds,
+                          llvm::StringRef Suffix, llvm::DominatorTree *DT,
+                          llvm::LoopInfo *LI, llvm::ScalarEvolution *SE);
 }
 #endif
Index: lib/CodeGen/CodeGeneration.cpp
===================================================================
--- lib/CodeGen/CodeGeneration.cpp
+++ lib/CodeGen/CodeGeneration.cpp
@@ -53,6 +53,7 @@
   IslAstInfo *AI;
   DominatorTree *DT;
   ScalarEvolution *SE;
+  RegionInfo *RI;
   ///}
 
   /// @brief The loop annotator to generate llvm.loop metadata.
@@ -91,6 +92,18 @@
     return true;
   }
 
+  // CodeGeneration adds a lot of BBs without updating the RegionInfo
+  // We make all created BBs belong to the scop's parent region without any
+  // nested structure to keep the RegionInfo verifier happy.
+  void fixRegionInfo(Function *F, Region *ParentRegion) {
+    for (auto &BB : *F) {
+      if (RI->getRegionFor(&BB))
+        continue;
+
+      RI->setRegionFor(&BB, ParentRegion);
+    }
+  }
+
   bool runOnScop(Scop &S) override {
     AI = &getAnalysis<IslAstInfo>();
 
@@ -103,13 +116,20 @@
     DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
     SE = &getAnalysis<ScalarEvolution>();
     DL = &S.getRegion().getEntry()->getParent()->getParent()->getDataLayout();
+    RI = &getAnalysis<RegionInfoPass>().getRegionInfo();
 
     assert(!S.getRegion().isTopLevelRegion() &&
            "Top level regions are not supported");
 
     Annotator.buildAliasScopes(S);
 
-    BasicBlock *EnteringBB = simplifyRegion(&S, this);
+    Region *R = &S.getRegion();
+    assert(!R->isTopLevelRegion());
+    simplifyRegion(R, this);
+
+    assert(R->isSimple());
+    BasicBlock *EnteringBB = S.getRegion().getEnteringBlock();
+    assert(EnteringBB);
     PollyIRBuilder Builder = createPollyIRBuilder(EnteringBB, Annotator);
 
     IslNodeBuilder NodeBuilder(Builder, Annotator, this, *DL, *LI, *SE, *DT, S);
@@ -132,6 +152,7 @@
     NodeBuilder.create(AstRoot);
 
     NodeBuilder.finalizeSCoP(S);
+    fixRegionInfo(EnteringBB->getParent(), R->getParent());
 
     assert(!verifyGeneratedFunction(S, *EnteringBB->getParent()) &&
            "Verification of generated function failed");
Index: lib/CodeGen/Utils.cpp
===================================================================
--- lib/CodeGen/Utils.cpp
+++ lib/CodeGen/Utils.cpp
@@ -18,71 +18,129 @@
 #include "llvm/Analysis/RegionInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "polly/Support/ScopHelper.h"
 
 using namespace llvm;
 
+// splitEdge - alternative to llvm::SplitCriticalEdge
+// llvm::SplitCriticalEdge does nothing if the edge is not critical
+// llvm::SplitEdge is unpredictable in which block it creates
+static BasicBlock *splitEdge(BasicBlock *Prev, BasicBlock *Succ,
+                             const char *Suffix, DominatorTree *DT,
+                             LoopInfo *LI, RegionInfo *RI) {
+  assert(Prev && Succ);
+
+#ifndef NDEBUG
+  DT->verifyDomTree();
+  LI->verify();
+  RI->verifyAnalysis();
+#endif
+
+  // Before:
+  /*   \    /     /   */
+  /*    Prev     /    */
+  /*     |  \___/     */
+  /*     |   ___      */
+  /*     |  /   \     */
+  /*    Succ     \    */
+  /*   /    \     \   */
+
+  // It might be more efficient to either change llvm::SplitCriticalEdge than to
+  // llvm::SplitBlockPredecessors optionally forcing it to do something or to
+  // copy&paste it here without the check whether it is a critical edges
+  BasicBlock *MiddleBlock = SplitBlockPredecessors(
+      Succ, ArrayRef<BasicBlock *>(Prev), Suffix, DT, LI);
+
+  if (RI) {
+    Region *PrevRegion = RI->getRegionFor(Prev);
+    Region *SuccRegion = RI->getRegionFor(Succ);
+    if (PrevRegion->contains(MiddleBlock)) {
+      RI->setRegionFor(MiddleBlock, PrevRegion);
+    } else {
+      RI->setRegionFor(MiddleBlock, SuccRegion);
+    }
+#if 0
+		if (PrevRegion->getExit() == Succ) {
+			// MiddleBlock got "sucked" into the predecessor region
+			RI->setRegionFor(MiddleBlock, PrevRegion);
+		} else if (SuccRegion->getEntry() != Succ || SuccRegion->contains(Prev)) {
+			// This case also applies to the TopLevelRegion
+			RI->setRegionFor(MiddleBlock, PrevRegion);
+		} else {
+			RI->setRegionFor(MiddleBlock, SuccRegion->getParent());
+		}
+#endif
+  }
+
+// After:
+/*   \    /     /   */
+/*    Prev     /    */
+/*     |  \___/     */
+/*     |            */
+/* MiddleBlock      */
+/*     |   ___      */
+/*     |  /   \     */
+/*    Succ     \    */
+/*   /    \     \   */
+
+#ifndef NDEBUG
+  DT->verifyDomTree();
+  LI->verify();
+  RI->verifyAnalysis();
+#endif
+
+  return MiddleBlock;
+}
+
 BasicBlock *polly::executeScopConditionally(Scop &S, Pass *P, Value *RTC) {
-  BasicBlock *StartBlock, *SplitBlock, *NewBlock;
   Region &R = S.getRegion();
   PollyIRBuilder Builder(R.getEntry());
   DominatorTree &DT = P->getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   RegionInfo &RI = P->getAnalysis<RegionInfoPass>().getRegionInfo();
   LoopInfo &LI = P->getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
 
-  // Split the entry edge of the region and generate a new basic block on this
-  // edge. This function also updates ScopInfo and RegionInfo.
-  NewBlock = SplitEdge(R.getEnteringBlock(), R.getEntry(), &DT, &LI);
-  if (DT.dominates(R.getEntry(), NewBlock)) {
-    BasicBlock *OldBlock = R.getEntry();
-    std::string OldName = OldBlock->getName();
-
-    // Update ScopInfo.
-    for (ScopStmt &Stmt : S)
-      if (Stmt.getBasicBlock() == OldBlock) {
-        Stmt.setBasicBlock(NewBlock);
-        break;
-      }
-
-    // Update RegionInfo.
-    SplitBlock = OldBlock;
-    OldBlock->setName("polly.split");
-    NewBlock->setName(OldName);
-    R.replaceEntryRecursive(NewBlock);
-    RI.setRegionFor(NewBlock, &R);
-  } else {
-    RI.setRegionFor(NewBlock, R.getParent());
-    SplitBlock = NewBlock;
-  }
+  // Create a fork block
+  BasicBlock *EnteringBB = R.getEnteringBlock();
+  BasicBlock *EntryBB = R.getEntry();
+  assert(EnteringBB && "Must be a simple region");
+  BasicBlock *JunctionBlock =
+      splitEdge(EnteringBB, EntryBB, ".split_new_and_old", &DT, &LI, &RI);
+  JunctionBlock->setName("polly.split_new_and_old");
+
+  // Create a join block
+  BasicBlock *ExitingBB = R.getExitingBlock();
+  BasicBlock *ExitBB = R.getExit();
+  assert(ExitingBB && "Must be a simple region");
+  BasicBlock *MergeBlock =
+      splitEdge(ExitingBB, ExitBB, ".merge_new_and_old", &DT, &LI, &RI);
+  MergeBlock->setName("polly.merge_new_and_old");
 
-  SplitBlock->setName("polly.split_new_and_old");
-  Function *F = SplitBlock->getParent();
-  StartBlock = BasicBlock::Create(F->getContext(), "polly.start", F);
-  SplitBlock->getTerminator()->eraseFromParent();
-  Builder.SetInsertPoint(SplitBlock);
+  // exclude the join block from the region
+  R.replaceExitRecursive(MergeBlock);
+  RI.setRegionFor(MergeBlock, R.getParent());
+
+  // Create the start block
+  Function *F = JunctionBlock->getParent();
+  BasicBlock *StartBlock =
+      BasicBlock::Create(F->getContext(), "polly.start", F);
+  JunctionBlock->getTerminator()->eraseFromParent();
+  Builder.SetInsertPoint(JunctionBlock);
   Builder.CreateCondBr(RTC, StartBlock, R.getEntry());
-  if (Loop *L = LI.getLoopFor(SplitBlock))
+  if (Loop *L = LI.getLoopFor(JunctionBlock))
     L->addBasicBlockToLoop(StartBlock, LI);
-  DT.addNewBlock(StartBlock, SplitBlock);
-  Builder.SetInsertPoint(StartBlock);
-
-  BasicBlock *MergeBlock;
-
-  if (R.getExit()->getSinglePredecessor())
-    // No splitEdge required.  A block with a single predecessor cannot have
-    // PHI nodes that would complicate life.
-    MergeBlock = R.getExit();
-  else {
-    MergeBlock = SplitEdge(R.getExitingBlock(), R.getExit(), &DT, &LI);
-    // SplitEdge will never split R.getExit(), as R.getExit() has more than
-    // one predecessor. Hence, mergeBlock is always a newly generated block.
-    R.replaceExitRecursive(MergeBlock);
-    RI.setRegionFor(MergeBlock, &R);
-  }
+  DT.addNewBlock(StartBlock, JunctionBlock);
+  RI.setRegionFor(StartBlock, RI.getRegionFor(JunctionBlock));
 
+  // Connect start block to the join block
+  Builder.SetInsertPoint(StartBlock);
   Builder.CreateBr(MergeBlock);
-  MergeBlock->setName("polly.merge_new_and_old");
+  DT.changeImmediateDominator(MergeBlock, JunctionBlock);
+
+#ifndef NDEBUG
+  DT.verifyDomTree();
+  LI.verify();
+  RI.verifyAnalysis();
+#endif
 
-  if (DT.dominates(SplitBlock, MergeBlock))
-    DT.changeImmediateDominator(MergeBlock, SplitBlock);
   return StartBlock;
 }
Index: lib/Support/ScopHelper.cpp
===================================================================
--- lib/Support/ScopHelper.cpp
+++ lib/Support/ScopHelper.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 
 using namespace llvm;
+using namespace polly;
 
 #define DEBUG_TYPE "polly-scop-helper"
 
@@ -76,97 +77,142 @@
   return false;
 }
 
-BasicBlock *polly::createSingleExitEdge(Region *R, Pass *P) {
-  BasicBlock *BB = R->getExit();
-
-  SmallVector<BasicBlock *, 4> Preds;
-  for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI)
-    if (R->contains(*PI))
-      Preds.push_back(*PI);
-
-  auto *DTWP = P->getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+// Ensures that there is just one predecessor to the entry node from outside the
+// region.
+// If ExclusiveEnteringBB, it also ensures that the entering block has only one
+// edge which is to the entry.
+// The identity of the region entry node is preserved.
+static void simplifyRegionEntry(Region *R, Pass *P) {
+  assert(R);
+  if (R->isTopLevelRegion())
+    return;
+
+  auto *DTWP =
+      P ? P->getAnalysisIfAvailable<DominatorTreeWrapperPass>() : nullptr;
   auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
-  auto *LIWP = P->getAnalysisIfAvailable<LoopInfoWrapperPass>();
+  auto *LIWP = P ? P->getAnalysisIfAvailable<LoopInfoWrapperPass>() : nullptr;
   auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
+  auto RI = R->getRegionInfo();
 
-  return SplitBlockPredecessors(BB, Preds, ".region", DT, LI);
-}
-
-static void replaceScopAndRegionEntry(polly::Scop *S, BasicBlock *OldEntry,
-                                      BasicBlock *NewEntry) {
-  if (polly::ScopStmt *Stmt = S->getStmtForBasicBlock(OldEntry))
-    Stmt->setBasicBlock(NewEntry);
-
-  S->getRegion().replaceEntryRecursive(NewEntry);
-}
-
-BasicBlock *polly::simplifyRegion(Scop *S, Pass *P) {
-  Region *R = &S->getRegion();
-
-  // The entering block for the region.
   BasicBlock *EnteringBB = R->getEnteringBlock();
-  BasicBlock *OldEntry = R->getEntry();
-  BasicBlock *NewEntry = nullptr;
+  BasicBlock *Entry = R->getEntry();
 
-  auto *DTWP = P->getAnalysisIfAvailable<DominatorTreeWrapperPass>();
-  auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
-  auto *LIWP = P->getAnalysisIfAvailable<LoopInfoWrapperPass>();
-  auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
+  // Before (one of):
+  /*                       \    /            */
+  /*                      EnteringBB         */
+  /*                        |    \------>    */
+  /*   \   /                |                */
+  /*   Entry <--\         Entry <--\         */
+  /*   /   \    /         /   \    /         */
+  /*        ....               ....          */
 
   // Create single entry edge if the region has multiple entry edges.
   if (!EnteringBB) {
-    NewEntry = SplitBlock(OldEntry, OldEntry->begin(), DT, LI);
-    EnteringBB = OldEntry;
-  }
+    SmallVector<BasicBlock *, 4> Preds;
+    for (pred_iterator PI = pred_begin(Entry), PE = pred_end(Entry); PI != PE;
+         ++PI)
+      if (!R->contains(*PI))
+        Preds.push_back(*PI);
+
+    BasicBlock *NewEntering =
+        SplitBlockPredecessors(Entry, Preds, ".region_entering", DT, LI);
+
+    for (auto ExitPred :
+         make_range(pred_begin(NewEntering), pred_end(NewEntering))) {
+      Region *RegionOfPred = RI->getRegionFor(ExitPred);
+      if (RegionOfPred->getExit() != Entry)
+        continue;
+
+      while (!RegionOfPred->isTopLevelRegion() &&
+             RegionOfPred->getExit() == Entry) {
+        RegionOfPred->replaceExit(NewEntering);
+        RegionOfPred = RegionOfPred->getParent();
+      }
+    }
 
-  // Create an unconditional entry edge.
-  if (EnteringBB->getTerminator()->getNumSuccessors() != 1) {
-    BasicBlock *EntryBB = NewEntry ? NewEntry : OldEntry;
-    BasicBlock *SplitEdgeBB = SplitEdge(EnteringBB, EntryBB, DT, LI);
-
-    // Once the edge between EnteringBB and EntryBB is split, two cases arise.
-    // The first is simple. The new block is inserted between EnteringBB and
-    // EntryBB. In this case no further action is needed. However it might
-    // happen (if the splitted edge is not critical) that the new block is
-    // inserted __after__ EntryBB causing the following situation:
-    //
-    // EnteringBB
-    //    _|_
-    //    | |
-    //    |  \-> some_other_BB_not_in_R
-    //    V
-    // EntryBB
-    //    |
-    //    V
-    // SplitEdgeBB
-    //
-    // In this case we need to swap the role of EntryBB and SplitEdgeBB.
-
-    // Check which case SplitEdge produced:
-    if (SplitEdgeBB->getTerminator()->getSuccessor(0) == EntryBB) {
-      // First (simple) case.
-      EnteringBB = SplitEdgeBB;
-    } else {
-      // Second (complicated) case.
-      NewEntry = SplitEdgeBB;
-      EnteringBB = EntryBB;
+    Region *AncestorR = R->getParent();
+    RI->setRegionFor(NewEntering, AncestorR);
+    while (!AncestorR->isTopLevelRegion() && AncestorR->getEntry() == Entry) {
+      AncestorR->replaceEntry(NewEntering);
+      AncestorR = AncestorR->getParent();
     }
 
-    EnteringBB->setName("polly.entering.block");
+    EnteringBB = NewEntering;
   }
+  assert(R->getEnteringBlock() == EnteringBB);
+
+// After:
+/*    \    /      */
+/*  EnteringBB    */
+/*     |          */
+/*     |          */
+/*   Entry <--\   */
+/*   /   \    /   */
+/*        ....    */
+}
 
-  if (NewEntry)
-    replaceScopAndRegionEntry(S, OldEntry, NewEntry);
-
-  // Create single exit edge if the region has multiple exit edges.
-  if (!R->getExitingBlock()) {
-    BasicBlock *NewExiting = createSingleExitEdge(R, P);
-    (void)NewExiting;
-    assert(NewExiting == R->getExitingBlock() &&
-           "Did not create a single exiting block");
+// Ensure that the region has a single block that branches to the exit node.
+// If exclusiveExitNode the sole edge to the exit node will be from the region.
+// If emptyExitNode the exit node will contain nothing but a terminator
+// instruction.
+static void simplifyRegionExit(Region *R, Pass *P) {
+  assert(R);
+  if (R->isTopLevelRegion())
+    return;
+
+  auto *DTWP =
+      P ? P->getAnalysisIfAvailable<DominatorTreeWrapperPass>() : nullptr;
+  auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+  auto *LIWP = P ? P->getAnalysisIfAvailable<LoopInfoWrapperPass>() : nullptr;
+  auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
+  auto RI = R->getRegionInfo();
+
+  BasicBlock *ExitBB = R->getExit();
+  BasicBlock *ExitingBB = R->getExitingBlock();
+
+  // Before:
+  /*   (Region)   ______/  */
+  /*      \  |   /         */
+  /*       ExitBB          */
+  /*       /    \          */
+
+  if (!ExitingBB) {
+    SmallVector<BasicBlock *, 4> Preds;
+    for (pred_iterator PI = pred_begin(ExitBB), PE = pred_end(ExitBB); PI != PE;
+         ++PI)
+      if (R->contains(*PI))
+        Preds.push_back(*PI);
+
+    /*  Preds[0] Preds[1]      otherBB */
+    /*         \  |  ________/         */
+    /*          \ | /                  */
+    /*           BB                    */
+    ExitingBB =
+        SplitBlockPredecessors(ExitBB, Preds, ".region_exiting", DT, LI);
+    /* Preds[0] Preds[1]   otherBB     */
+    /*        \  /        /            */
+    /*      BB.exiting   /             */
+    /*               \  /              */
+    /*                BB               */
+
+    RI->setRegionFor(ExitingBB, R);
+    R->replaceExitRecursive(ExitingBB);
+    R->replaceExit(ExitBB);
   }
+  assert(ExitingBB == R->getExitingBlock());
+
+// After:
+/*         \   /                */
+/*        ExitingBB     _____/  */
+/*              \      /        */
+/*               ExitBB         */
+/*               /    \         */
+}
 
-  return EnteringBB;
+void polly::simplifyRegion(Region *R, Pass *P) {
+  simplifyRegionEntry(R, P);
+  simplifyRegionExit(R, P);
+  assert(R->isSimple());
 }
 
 void polly::splitEntryBlockForAlloca(BasicBlock *EntryBlock, Pass *P) {
@@ -176,13 +222,172 @@
   while (isa<AllocaInst>(I))
     ++I;
 
-  auto *DTWP = P->getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+  auto *DTWP =
+      P ? P->getAnalysisIfAvailable<DominatorTreeWrapperPass>() : nullptr;
   auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
-  auto *LIWP = P->getAnalysisIfAvailable<LoopInfoWrapperPass>();
+  auto *LIWP = P ? P->getAnalysisIfAvailable<LoopInfoWrapperPass>() : nullptr;
   auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
+  auto RIP = P ? P->getAnalysisIfAvailable<RegionInfoPass>() : nullptr;
+  auto RI = RIP ? &RIP->getRegionInfo() : nullptr;
+
+  // SplitBlock updates DT, DF, LI and RI.
+  splitBlock(EntryBlock, I, ".split", DT, LI, RI);
+}
+
+llvm::BasicBlock *polly::splitBlock(llvm::BasicBlock *Old,
+                                    llvm::Instruction *SplitPt,
+                                    llvm::StringRef Suffix,
+                                    llvm::DominatorTree *DT, llvm::LoopInfo *LI,
+                                    llvm::RegionInfo *RI) {
+  assert(Old && SplitPt);
+
+  // Before:
+  /*	 \   /    */
+  /*	  Old     */
+  /*	 /   \    */
+  auto OldName = Old->getName();
+  BasicBlock *NewBlock = llvm::SplitBlock(Old, SplitPt, DT, LI);
+  NewBlock->setName(OldName + Suffix);
+  // After:
+  /*	 \   /    */
+  /*	  Old     */
+  /*	   |      */
+  /*  NewBlock      */
+  /*	 /   \    */
+
+  if (RI) {
+    Region *R = RI->getRegionFor(Old);
+    RI->setRegionFor(NewBlock, R);
+  }
+
+  return NewBlock;
+}
+
+BasicBlock *polly::splitBlockNonPredecessors(
+    BasicBlock *BB, ArrayRef<BasicBlock *> NonPreds, llvm::StringRef Suffix,
+    DominatorTree *DT, LoopInfo *LI, /*RegionInfo *RI,*/ ScalarEvolution *SE) {
+  assert(BB);
+
+  // Do not attempt to split that which cannot be split.
+  if (!BB->canSplitPredecessors())
+    return nullptr;
+
+  // Currently cannot handle landing pads
+  if (BB->isLandingPad())
+    return nullptr;
+
+  // Before:
+  /*          NonPreds[0]  */
+  /*      \  /             */
+  /*       BB              */
+  /*      /  \             */
+
+  // Create new basic block, insert right after the original block.
+  BasicBlock *NewBB =
+      splitBlock(BB, BB->getFirstInsertionPt(), Suffix, DT, LI, nullptr);
+
+  // Create new PHIs into the new block
+  for (auto I = BB->begin(); isa<PHINode>(I); ++I) {
+    PHINode *OldPHI = cast<PHINode>(&*I);
+    PHINode *NewPHI = PHINode::Create(OldPHI->getType(), NonPreds.size() + 1,
+                                      OldPHI->getName() + ".np",
+                                      NewBB->getFirstInsertionPt());
+
+    // Replaces uses of the original PHI by the new ones
+    OldPHI->replaceAllUsesWith(NewPHI);
+
+    // Add an edge from the original block (this adds a use)
+    NewPHI->addIncoming(OldPHI, BB);
+
+    // Move the incoming block's values
+    for (auto Edge : NonPreds) {
+      BasicBlock *RedirectedEdge = Edge == BB ? NewBB : Edge;
+      auto i = OldPHI->getBasicBlockIndex(RedirectedEdge);
+      assert(i >= 0);
+      Value *Val = OldPHI->getIncomingValue(i);
+      NewPHI->addIncoming(Val, RedirectedEdge);
+      OldPHI->removeIncomingValue(i, false);
+
+      if (SE)
+        SE->forgetValue(OldPHI);
+    }
+  }
+
+  // If there were no edges to move, there is nothing more to do
+  if (NonPreds.empty())
+    return NewBB;
+
+  // Move all non-pred edges to the new BB
+  for (auto Edge : NonPreds) {
+    BasicBlock *RedirectedEdge = Edge == BB ? NewBB : Edge;
+    assert(!isa<IndirectBrInst>(RedirectedEdge->getTerminator()) &&
+           "Cannot split an edge from an IndirectBrInst");
+    RedirectedEdge->getTerminator()->replaceUsesOfWith(BB, NewBB);
+  }
+
+  // After:
+  /*               NonPreds[0]  */
+  /*      \         /           */
+  /*       BB      /            */
+  /*         \    /             */
+  /*          NewBB             */
+  /*          /  \              */
+
+  if (DT) {
+    // DominatorTree::splitBlock assumes that it is the predecessor which is
+    // new.
+    // In order to reuse that function, we recreated the situtation as if it
+    // was.
+    auto BBNode = DT->getNode(BB);
+    auto NewBBNode = DT->getNode(NewBB);
+    DT->changeImmediateDominator(NewBBNode, BBNode->getIDom());
+    DT->eraseNode(BB);
+
+    DT->splitBlock(BB);
+  }
+
+  if (LI) {
+    BasicBlock *OldBB = BB;
+    Loop *L = LI->getLoopFor(OldBB);
+    while (L) {
+      assert(L->contains(OldBB));
+
+      bool OldBBReachableFromInside = false;
+      for (auto Pred : make_range(pred_begin(OldBB), pred_end(OldBB))) {
+        if (Pred == NewBB || L->contains(Pred)) {
+          OldBBReachableFromInside = true;
+          break;
+        }
+      }
+
+      bool NewBBReachableFromOutside = false;
+      for (auto Pred : make_range(pred_begin(NewBB), pred_end(NewBB))) {
+        if (Pred == OldBB)
+          continue;
+
+        if (Pred != NewBB && !L->contains(Pred)) {
+          NewBBReachableFromOutside = true;
+          break;
+        }
+      }
+
+      if (NewBBReachableFromOutside || !OldBBReachableFromInside) {
+        // Remove BB from the loop and make the NewBB the next loop header
+        // Since NewBB dominates OldBB, it can be the new header
+        if (L->getHeader() == OldBB)
+          L->moveToHeader(NewBB);
+      }
+
+      if (!OldBBReachableFromInside) {
+        // OldBB cannot be reached from the loop header anymore, i.e. it has
+        // been excluded from the loop
+        L->removeBlockFromLoop(BB);
+        LI->changeLoopFor(BB, L->getParentLoop());
+      }
+
+      L = L->getParentLoop();
+    }
+  }
 
-  // SplitBlock updates DT, DF and LI.
-  BasicBlock *NewEntry = SplitBlock(EntryBlock, I, DT, LI);
-  if (RegionInfoPass *RIP = P->getAnalysisIfAvailable<RegionInfoPass>())
-    RIP->getRegionInfo().splitBlock(NewEntry, EntryBlock);
+  return NewBB;
 }
Index: test/Isl/CodeGen/loop_with_conditional_entry_edge_splited_hard_case.ll
===================================================================
--- test/Isl/CodeGen/loop_with_conditional_entry_edge_splited_hard_case.ll
+++ /dev/null
@@ -1,63 +0,0 @@
-; RUN: opt %loadPolly -polly-detect-unprofitable -polly-no-early-exit -polly-codegen -S < %s | FileCheck %s
-;
-; Test case to trigger the hard way of creating a unique entering
-; edge for the SCoP. It is triggered because the entering edge
-; here: %while.begin --> %if is __not__ critical.
-;
-;    int f(void);
-;    void jd(int b, int *A) {
-;      while (f()) {
-;        if (b)
-;          for (int i = 0; i < 1024; i++)
-;            A[i] = i;
-;      }
-;    }
-;
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-define void @jd(i32 %b, i32* %A) {
-entry:
-  br label %while.begin
-
-; CHECK: while.begin:
-while.begin:
-; CHECK:  %call = call i32 @f()
-  %call = call i32 @f()
-; CHECK:  %tobool = icmp eq i32 %call, 0
-  %tobool = icmp eq i32 %call, 0
-; CHECK:  br i1 %tobool, label %while.end, label %polly.entering.block
-  br i1 %tobool, label %while.end, label %if
-
-; CHECK: polly.entering.block:
-; CHECK:   br label %polly.split_new_and_old
-
-; CHECK: polly.split_new_and_old:
-; CHECK:   br i1 true, label %polly.start, label %if.split
-
-; CHECK: if.split:
-if:                                               ; preds = %while.begin
-; CHECK: %tobool2 = icmp eq i32 %b, 0
-  %tobool2 = icmp eq i32 %b, 0
-; CHECK: br i1 %tobool2, label %while.begin{{[a-zA-Z._]*}}, label %for.cond
-  br i1 %tobool2, label %while.begin, label %for.cond
-
-for.cond:                                         ; preds = %for.inc, %if
-  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %if ]
-  %exitcond = icmp ne i64 %indvars.iv, 1024
-  br i1 %exitcond, label %for.body, label %while.begin
-
-for.body:                                         ; preds = %for.cond
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %tmp = trunc i64 %indvars.iv to i32
-  store i32 %tmp, i32* %arrayidx, align 4
-  br label %for.inc
-
-for.inc:                                          ; preds = %for.body
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  br label %for.cond
-
-while.end:                                        ; preds = %entry, %for.cond
-  ret void
-}
-
-declare i32 @f()
Index: test/Isl/CodeGen/loop_with_conditional_entry_edge_splitted_hard_case.ll
===================================================================
--- /dev/null
+++ test/Isl/CodeGen/loop_with_conditional_entry_edge_splitted_hard_case.ll
@@ -0,0 +1,63 @@
+; RUN: opt %loadPolly -polly-detect-unprofitable -polly-no-early-exit -polly-codegen -S < %s | FileCheck %s
+;
+; Test case to trigger the hard way of creating a unique entering
+; edge for the SCoP. It is triggered because the entering edge
+; here: %while.begin --> %if is __not__ critical.
+;
+;    int f(void);
+;    void jd(int b, int *A) {
+;      while (f()) {
+;        if (b)
+;          for (int i = 0; i < 1024; i++)
+;            A[i] = i;
+;      }
+;    }
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @jd(i32 %b, i32* %A) {
+entry:
+  br label %while.begin
+
+; CHECK-LABEL: while.begin.region_exiting:
+; CHECK:         br label %polly.merge_new_and_old
+
+; CHECK-LABEL: while.begin:
+while.begin:
+; CHECK:  %call = call i32 @f()
+  %call = call i32 @f()
+; CHECK:  %tobool = icmp eq i32 %call, 0
+  %tobool = icmp eq i32 %call, 0
+; CHECK:  br i1 %tobool, label %while.end, label %polly.split_new_and_old
+  br i1 %tobool, label %while.end, label %if
+
+; CHECK-LABEL: polly.split_new_and_old:
+; CHECK:   br i1 true, label %polly.start, label %if
+
+; CHECK-LABEL: if:
+if:                                               ; preds = %while.begin
+; CHECK: %tobool2 = icmp eq i32 %b, 0
+  %tobool2 = icmp eq i32 %b, 0
+; CHECK: br i1 %tobool2, label %while.begin{{[a-zA-Z._]*}}, label %for.cond
+  br i1 %tobool2, label %while.begin, label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %if
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %if ]
+  %exitcond = icmp ne i64 %indvars.iv, 1024
+  br i1 %exitcond, label %for.body, label %while.begin
+
+for.body:                                         ; preds = %for.cond
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %tmp = trunc i64 %indvars.iv to i32
+  store i32 %tmp, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  br label %for.cond
+
+while.end:                                        ; preds = %entry, %for.cond
+  ret void
+}
+
+declare i32 @f()
Index: test/Isl/CodeGen/phi_loop_carried_float.ll
===================================================================
--- test/Isl/CodeGen/phi_loop_carried_float.ll
+++ test/Isl/CodeGen/phi_loop_carried_float.ll
@@ -12,29 +12,29 @@
 ; CHECK-NOT:    %tmp7{{[.*]}} = alloca float
 ; CHECK-DAG:    %tmp.0.phiops = alloca float
 ; CHECK-NOT:    %tmp7{{[.*]}} = alloca float
-;
-; CHECK:      polly.merge_new_and_old:
-; CHECK-NEXT:  ret
-;
-; CHECK:      polly.start:
-; CHECK-NEXT:   store float 0.000000e+00, float* %tmp.0.phiops
 
-; CHECK:      polly.merge:
-; CHECK-NEXT:   br label %polly.merge_new_and_old
+; CHECK-LABEL: exit:
+; CHECK-NEXT:    ret
+
+; CHECK-LABEL: polly.start:
+; CHECK-NEXT:    store float 0.000000e+00, float* %tmp.0.phiops
+
+; CHECK-LABEL: polly.merge:
+; CHECK-NEXT:    br label %polly.merge_new_and_old
 
-; CHECK:      polly.stmt.bb1{{[0-9]*}}:
-; CHECK-NEXT:   %tmp.0.phiops.reload[[R1:[0-9]*]] = load float, float* %tmp.0.phiops
-; CHECK:        store float %tmp.0.phiops.reload[[R1]], float* %tmp.0.s2a
+; CHECK-LABEL: polly.stmt.bb1{{[0-9]*}}:
+; CHECK-NEXT:    %tmp.0.phiops.reload[[R1:[0-9]*]] = load float, float* %tmp.0.phiops
+; CHECK:         store float %tmp.0.phiops.reload[[R1]], float* %tmp.0.s2a
 
-; CHECK:      polly.stmt.bb1{{[0-9]*}}:
-; CHECK-NEXT:   %tmp.0.phiops.reload[[R2:[0-9]*]] = load float, float* %tmp.0.phiops
-; CHECK:        store float %tmp.0.phiops.reload[[R2]], float* %tmp.0.s2a
+; CHECK-LABEL: polly.stmt.bb1{{[0-9]*}}:
+; CHECK-NEXT:    %tmp.0.phiops.reload[[R2:[0-9]*]] = load float, float* %tmp.0.phiops
+; CHECK:         store float %tmp.0.phiops.reload[[R2]], float* %tmp.0.s2a
 
-; CHECK: polly.stmt.bb4:                                   ; preds = %polly.then3
-; CHECK:   %tmp[[R5:[0-9]*]]_p_scalar_ = load float, float* %scevgep, align 4, !alias.scope !0, !noalias !2
-; CHECK:   %tmp.0.s2a.reload[[R3:[0-9]*]] = load float, float* %tmp.0.s2a
-; CHECK:   %p_tmp[[R4:[0-9]*]] = fadd float %tmp.0.s2a.reload[[R3]], %tmp[[R5]]_p_scalar_
-; CHECK:   store float %p_tmp[[R4]], float* %tmp.0.phiops
+; CHECK-LABEL: polly.stmt.bb4:                                   ; preds = %polly.then3
+; CHECK:         %tmp[[R5:[0-9]*]]_p_scalar_ = load float, float* %scevgep, align 4, !alias.scope !0, !noalias !2
+; CHECK:         %tmp.0.s2a.reload[[R3:[0-9]*]] = load float, float* %tmp.0.s2a
+; CHECK:         %p_tmp[[R4:[0-9]*]] = fadd float %tmp.0.s2a.reload[[R3]], %tmp[[R5]]_p_scalar_
+; CHECK:         store float %p_tmp[[R4]], float* %tmp.0.phiops
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
Index: test/Isl/CodeGen/phi_loop_carried_float_escape.ll
===================================================================
--- test/Isl/CodeGen/phi_loop_carried_float_escape.ll
+++ test/Isl/CodeGen/phi_loop_carried_float_escape.ll
@@ -6,31 +6,31 @@
 ;        tmp += A[i];
 ;      return tmp;
 ;    }
-;
-; CHECK:      polly.merge_new_and_old:
-; CHECK-NEXT:   %tmp.0.merge = phi float [ %tmp.0.final_reload, %polly.merge ], [ %tmp.0, %bb8 ]
-; CHECK-NEXT:   ret float %tmp.0.merge
-;
-; CHECK:      polly.start:
-; CHECK-NEXT:   store float 0.000000e+00, float* %tmp.0.phiops
-
-; CHECK:      polly.merge:
-; CHECK-NEXT:   %tmp.0.final_reload = load float, float* %tmp.0.s2a
-; CHECK-NEXT:   br label %polly.merge_new_and_old
-
-; CHECK:      polly.stmt.bb1{{[0-9]*}}:
-; CHECK-NEXT:   %tmp.0.phiops.reload[[R1:[0-9]*]] = load float, float* %tmp.0.phiops
-; CHECK-:       store float %tmp.0.phiops.reload[[R1]], float* %tmp.0.s2a
-
-; CHECK:      polly.stmt.bb1{{[0-9]*}}:
-; CHECK-NEXT:   %tmp.0.phiops.reload[[R2:[0-9]*]] = load float, float* %tmp.0.phiops
-; CHECK:        store float %tmp.0.phiops.reload[[R2]], float* %tmp.0.s2a
-
-; CHECK: polly.stmt.bb4:                                   ; preds = %polly.then3
-; CHECK:   %tmp[[R5:[0-9]*]]_p_scalar_ = load float, float* %scevgep, align 4, !alias.scope !0, !noalias !2
-; CHECK:   %tmp.0.s2a.reload[[R3:[0-9]*]] = load float, float* %tmp.0.s2a
-; CHECK:   %p_tmp[[R4:[0-9]*]] = fadd float %tmp.0.s2a.reload[[R3]], %tmp[[R5]]_p_scalar_
-; CHECK:   store float %p_tmp[[R4]], float* %tmp.0.phiops
+
+; CHECK-LABEL: polly.merge_new_and_old:
+; CHECK-NEXT:    %tmp.0.merge = phi float [ %tmp.0.final_reload, %polly.merge ], [ %tmp.0, %bb8 ]
+; CHECK-NEXT:    br label %exit
+
+; CHECK-LABEL: polly.start:
+; CHECK-NEXT:    store float 0.000000e+00, float* %tmp.0.phiops
+
+; CHECK-LABEL: polly.merge:
+; CHECK-NEXT:    %tmp.0.final_reload = load float, float* %tmp.0.s2a
+; CHECK-NEXT:    br label %polly.merge_new_and_old
+
+; CHECK-LABEL: polly.stmt.bb1{{[0-9]*}}:
+; CHECK-NEXT:    %tmp.0.phiops.reload[[R1:[0-9]*]] = load float, float* %tmp.0.phiops
+; CHECK-:        store float %tmp.0.phiops.reload[[R1]], float* %tmp.0.s2a
+
+; CHECK-LABEL: polly.stmt.bb1{{[0-9]*}}:
+; CHECK-NEXT:    %tmp.0.phiops.reload[[R2:[0-9]*]] = load float, float* %tmp.0.phiops
+; CHECK:         store float %tmp.0.phiops.reload[[R2]], float* %tmp.0.s2a
+
+; CHECK-LABEL: polly.stmt.bb4:                                   ; preds = %polly.then3
+; CHECK:         %tmp[[R5:[0-9]*]]_p_scalar_ = load float, float* %scevgep, align 4, !alias.scope !0, !noalias !2
+; CHECK:         %tmp.0.s2a.reload[[R3:[0-9]*]] = load float, float* %tmp.0.s2a
+; CHECK:         %p_tmp[[R4:[0-9]*]] = fadd float %tmp.0.s2a.reload[[R3]], %tmp[[R5]]_p_scalar_
+; CHECK:         store float %p_tmp[[R4]], float* %tmp.0.phiops
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
Index: test/Isl/CodeGen/simple_non_single_entry.ll
===================================================================
--- test/Isl/CodeGen/simple_non_single_entry.ll
+++ test/Isl/CodeGen/simple_non_single_entry.ll
@@ -65,6 +65,6 @@
   ret void
 }
 
-; CHECK: Create LLVM-IR from SCoPs' for region: 'next.split => polly.merge_new_and_old'
+; CHECK: Create LLVM-IR from SCoPs' for region: 'next => polly.merge_new_and_old'
 ; CHECK-CODE: polly.split_new_and_old
 ; CHECK-CODE: polly.merge_new_and_old