Index: include/polly/CodeGen/BlockGenerators.h
===================================================================
--- include/polly/CodeGen/BlockGenerators.h
+++ include/polly/CodeGen/BlockGenerators.h
@@ -387,6 +387,17 @@
   /// the original value in the non-optimized SCoP.
   void createScalarFinalization(Region &R);
 
+  /// @brief Recompute all scalars needed in this statement.
+  ///
+  /// During SCoP creation scalars can be virtually moved to simplify the SCoP
+  /// description as well as the dependences. However, they are only moved if
+  /// we can recompute them in the statements they are used. This method will
+  /// perform the recomputation before we clone the original statement into the
+  /// new, optimized region, thus ensure all scalars are available.
+  void recomputeDependentScalars(ScopStmt &Stmt, ValueMapT &BBMap,
+                                 LoopToScevMapT &LTS,
+                                 isl_id_to_ast_expr *NewAccesses);
+
   /// @brief Try to synthesize a new value
   ///
   /// Given an old value, we try to synthesize it in a new context from its
@@ -409,7 +420,7 @@
   /// @returns  o A newly synthesized value.
   ///           o NULL, if synthesizing the value failed.
   Value *trySynthesizeNewValue(ScopStmt &Stmt, Value *Old, ValueMapT &BBMap,
-                               LoopToScevMapT &LTS, Loop *L) const;
+                               LoopToScevMapT &LTS, Loop *L);
 
   /// @brief Get the new version of a value.
   ///
@@ -431,15 +442,18 @@
   /// @param L         The loop that surrounded the instruction that referenced
   ///                  this value in the original code. This loop is used to
   ///                  evaluate the scalar evolution at the right scope.
+  /// @param TryOnly   Flag to indicate that nullptr is a valid return value
+  ///                  if no new value was found.
   ///
   /// @returns  o The old value, if it is still valid.
   ///           o The new value, if available.
-  ///           o NULL, if no value is found.
+  ///           o NULL, if no value is found and TryOnly is set.
+  ///           o Otherwise a trap is triggered.
   Value *getNewValue(ScopStmt &Stmt, Value *Old, ValueMapT &BBMap,
-                     LoopToScevMapT &LTS, Loop *L) const;
+                     LoopToScevMapT &LTS, Loop *L, bool TryOnly = false);
 
   void copyInstScalar(ScopStmt &Stmt, Instruction *Inst, ValueMapT &BBMap,
-                      LoopToScevMapT &LTS);
+                      LoopToScevMapT &LTS, bool Recompute = false);
 
   /// @brief Get the innermost loop that surrounds an instruction.
   ///
@@ -500,8 +514,13 @@
   /// @param NewAccesses A map from memory access ids to new ast expressions,
   ///                    which may contain new access expressions for certain
   ///                    memory accesses.
+  /// @param Recompute   Flag to indicate the instruction is a scalar that
+  ///                    needs to be recomputed in this statement. It basically
+  ///                    forces us to copy not only the instruction but also all
+  ///                    operands if we cannot find a local or global mapping.
   void copyInstruction(ScopStmt &Stmt, Instruction *Inst, ValueMapT &BBMap,
-                       LoopToScevMapT &LTS, isl_id_to_ast_expr *NewAccesses);
+                       LoopToScevMapT &LTS, isl_id_to_ast_expr *NewAccesses,
+                       bool Recompute = false);
 
   /// @brief Helper to get the newest version of @p ScalarValue.
   ///
Index: include/polly/LinkAllPasses.h
===================================================================
--- include/polly/LinkAllPasses.h
+++ include/polly/LinkAllPasses.h
@@ -33,7 +33,6 @@
 llvm::Pass *createDOTOnlyViewerPass();
 llvm::Pass *createDOTPrinterPass();
 llvm::Pass *createDOTViewerPass();
-llvm::Pass *createIndependentBlocksPass();
 llvm::Pass *createJSONExporterPass();
 llvm::Pass *createJSONImporterPass();
 llvm::Pass *createPollyCanonicalizePass();
@@ -43,7 +42,6 @@
 llvm::Pass *createCodeGenerationPass();
 llvm::Pass *createIslScheduleOptimizerPass();
 
-extern char &IndependentBlocksID;
 extern char &CodePreparationID;
 }
 
@@ -64,7 +62,6 @@
     polly::createDOTOnlyViewerPass();
     polly::createDOTPrinterPass();
     polly::createDOTViewerPass();
-    polly::createIndependentBlocksPass();
     polly::createJSONExporterPass();
     polly::createJSONImporterPass();
     polly::createScopDetectionPass();
@@ -81,7 +78,6 @@
 class PassRegistry;
 void initializeCodePreparationPass(llvm::PassRegistry &);
 void initializeDeadCodeElimPass(llvm::PassRegistry &);
-void initializeIndependentBlocksPass(llvm::PassRegistry &);
 void initializeJSONExporterPass(llvm::PassRegistry &);
 void initializeJSONImporterPass(llvm::PassRegistry &);
 void initializeIslAstInfoPass(llvm::PassRegistry &);
Index: include/polly/ScopInfo.h
===================================================================
--- include/polly/ScopInfo.h
+++ include/polly/ScopInfo.h
@@ -511,6 +511,12 @@
   /// @param SAI Info object for the accessed array.
   void buildAccessRelation(const ScopArrayInfo *SAI);
 
+  /// @brief Copy this memory access into the given statement @p Stmt.
+  ///
+  /// @param AccList The list that contains all accesses for @p Stmt.
+  /// @param Stmt    The statement the copied access should reside in.
+  MemoryAccess *copy(AccFuncSetType &AccList, ScopStmt *Stmt) const;
+
 public:
   /// @brief Create a new MemoryAccess.
   ///
@@ -534,7 +540,7 @@
   ~MemoryAccess();
 
   /// @brief Get the type of a memory access.
-  enum AccessType getType() { return AccType; }
+  enum AccessType getType() const { return AccType; }
 
   /// @brief Is this a reduction like access?
   bool isReductionLike() const { return RedType != RT_NONE; }
@@ -772,6 +778,9 @@
 
   std::string BaseName;
 
+  /// @brief Set of scalar values that need to be recomputed in this statement
+  SetVector<Instruction *> DependentScalars;
+
   /// Build the statement.
   //@{
   void buildDomain();
@@ -910,7 +919,18 @@
   }
 
   /// @brief Add @p Access to this statement's list of accesses.
-  void addAccess(MemoryAccess *Access);
+  ///
+  /// @param Access The access to add.
+  /// @param Front  Flag to indicate where the access should be added.
+  void addAccess(MemoryAccess *Access, bool Front = false);
+
+  /// @brief Remove the memory access @p MA from this statement.
+  ///
+  /// @param MA     The access to remove.
+  /// @param OnlyMA Flag to indicate if all accesses caused by the access
+  ///               instruction of @p MA should be removed or only @MA.
+  ///
+  void removeMemoryAccess(MemoryAccess *MA, bool OnlyMA);
 
   /// @brief Move the memory access in @p InvMAs to @p InvariantEquivClasses.
   ///
@@ -941,6 +961,14 @@
   /// @brief Get the isl AST build.
   __isl_keep isl_ast_build *getAstBuild() const { return Build; }
 
+  /// @brief Add a scalar that need to be recomputed in this statement.
+  void addDependentScalar(Instruction *Inst) { DependentScalars.insert(Inst); }
+
+  /// @brief Return the scalars that need to be recomputed in this statement.
+  const SetVector<Instruction *> &getDependentScalars() const {
+    return DependentScalars;
+  }
+
   /// @brief Restrict the domain of the statement.
   ///
   /// @param NewDomain The new statement domain.
@@ -1225,6 +1253,25 @@
   /// Required inv. loads: LB[0], LB[1], (V, if it may alias with A or LB)
   void hoistInvariantLoads();
 
+  /// @brief Check if we can recompute all instructions in @p Stmt.
+  ///
+  /// @param Stmt  The statement we want to recompute @p Insts in.
+  /// @param Insts The instructions we need to recompute.
+  ///
+  /// @returns True, if all instructions can be recomputed in @p Stmt.
+  bool canRecomputeInStmt(ScopStmt &Stmt, SmallPtrSet<Instruction *, 4> &Insts);
+
+  /// @brief Simplify the scalar accesses in this SCoP.
+  ///
+  /// Scalar accesses are often not needed and only caused by the placement in
+  /// the code. Additionally it is sometimes possible to recompute scalars to
+  /// avoid communication. As scalars basically sequentialize all loops they are
+  /// in, we try to avoid scalar accesses as much as possible. To this end we
+  /// will virtually move and later recompute them in the code generation. This
+  /// allows more freedom for the scheduler while we do not need to change the
+  /// original code region at all.
+  void simplifyScalarAccesses();
+
   /// @brief Build the Context of the Scop.
   void buildContext();
 
Index: lib/Analysis/ScopInfo.cpp
===================================================================
--- lib/Analysis/ScopInfo.cpp
+++ lib/Analysis/ScopInfo.cpp
@@ -58,6 +58,8 @@
 
 STATISTIC(ScopFound, "Number of valid Scops");
 STATISTIC(RichScopFound, "Number of Scops containing a loop");
+STATISTIC(ScalarsEliminated, "Number of scalars eliminated (moved/recomputed)");
+STATISTIC(StatementsEliminated, "Number of statements eliminated");
 
 static cl::opt<bool> ModelReadOnlyScalars(
     "polly-analyze-read-only-scalars",
@@ -652,6 +654,17 @@
       Subscripts(Subscripts.begin(), Subscripts.end()), AccessRelation(nullptr),
       NewAccessRelation(nullptr) {}
 
+MemoryAccess *MemoryAccess::copy(AccFuncSetType &AccList,
+                                 ScopStmt *Stmt) const {
+  AccList.emplace_back(Stmt, getAccessInstruction(), getId(), getType(),
+                       getBaseAddr(), getElemSizeInBytes(), isAffine(),
+                       Subscripts, Sizes, getAccessValue(), Origin,
+                       getBaseName());
+  MemoryAccess *CopyMA = &AccList.back();
+  CopyMA->buildAccessRelation(getScopArrayInfo());
+  return CopyMA;
+}
+
 void MemoryAccess::realignParams() {
   isl_space *ParamSpace = Statement->getParent()->getParamSpace();
   AccessRelation = isl_map_align_params(AccessRelation, ParamSpace);
@@ -825,14 +838,17 @@
   }
 }
 
-void ScopStmt::addAccess(MemoryAccess *Access) {
+void ScopStmt::addAccess(MemoryAccess *Access, bool Front) {
   Instruction *AccessInst = Access->getAccessInstruction();
 
   MemoryAccessList *&MAL = InstructionToAccess[AccessInst];
   if (!MAL)
     MAL = new MemoryAccessList();
   MAL->emplace_front(Access);
-  MemAccs.push_back(MAL->front());
+  if (Front)
+    MemAccs.insert(MemAccs.begin(), MAL->front());
+  else
+    MemAccs.push_back(MAL->front());
 }
 
 void ScopStmt::realignParams() {
@@ -1354,6 +1370,38 @@
 
 void ScopStmt::dump() const { print(dbgs()); }
 
+void ScopStmt::removeMemoryAccess(MemoryAccess *MA, bool OnlyMA) {
+  auto &MAL = *lookupAccessesFor(MA->getAccessInstruction());
+  MAL.reverse();
+
+  auto MALIt = MAL.begin();
+  auto MALLastIt = MAL.before_begin();
+  auto MALEnd = MAL.end();
+  auto MemAccsIt = MemAccs.begin();
+  while (true) {
+
+    while (OnlyMA && MALIt != MALEnd && (MA != *MALIt)) {
+      MALLastIt++;
+      MALIt++;
+    }
+
+    if (MALIt == MALEnd)
+      break;
+
+    while (*MemAccsIt != *MALIt)
+      MemAccsIt++;
+
+    MemAccs.erase(MemAccsIt);
+    MALIt = MAL.erase_after(MALLastIt);
+  }
+
+  if (!MAL.empty())
+    return;
+
+  InstructionToAccess.erase(MA->getAccessInstruction());
+  delete &MAL;
+}
+
 void ScopStmt::hoistMemoryAccesses(MemoryAccessList &InvMAs,
                                    InvariantAccessesTy &InvariantEquivClasses) {
 
@@ -1362,24 +1410,8 @@
   // order uses is needed because the MemAccs is a vector and the order in
   // which the accesses of each memory access list (MAL) are stored in this
   // vector is reversed.
-  for (MemoryAccess *MA : InvMAs) {
-    auto &MAL = *lookupAccessesFor(MA->getAccessInstruction());
-    MAL.reverse();
-
-    auto MALIt = MAL.begin();
-    auto MALEnd = MAL.end();
-    auto MemAccsIt = MemAccs.begin();
-    while (MALIt != MALEnd) {
-      while (*MemAccsIt != *MALIt)
-        MemAccsIt++;
-
-      MALIt++;
-      MemAccs.erase(MemAccsIt);
-    }
-
-    InstructionToAccess.erase(MA->getAccessInstruction());
-    delete &MAL;
-  }
+  for (MemoryAccess *MA : InvMAs)
+    removeMemoryAccess(MA, false);
 
   // Get the context under which this statement, hence the memory accesses, are
   // executed.
@@ -2449,6 +2481,7 @@
   buildAliasChecks(AA);
 
   hoistInvariantLoads();
+  simplifyScalarAccesses();
   simplifySCoP(false);
 }
 
@@ -2503,6 +2536,7 @@
         StmtMap.erase(Stmt.getBasicBlock());
 
       StmtIt = Stmts.erase(StmtIt);
+      StatementsEliminated++;
       continue;
     }
 
@@ -2650,6 +2684,207 @@
                    compareInvariantAccesses);
 }
 
+bool Scop::canRecomputeInStmt(ScopStmt &Stmt,
+                              SmallPtrSet<Instruction *, 4> &Insts) {
+  if (Insts.empty())
+    return true;
+
+  // TODO: Check if we can actually move the instructions.
+  return false;
+}
+
+void Scop::simplifyScalarAccesses() {
+  using OutsideOperandsSetTy =
+      SmallVector<std::pair<Instruction *, Instruction *>, 4>;
+  using InstructionSetTy = SmallPtrSet<Instruction *, 4>;
+  using NonTrivialOperandsPairTy =
+      std::pair<InstructionSetTy, OutsideOperandsSetTy>;
+  DenseMap<Instruction *, NonTrivialOperandsPairTy> NonTrivialOperandsMap;
+
+  // First iterate over all implicit write accesses, hence scalar definitions
+  // and collect all operands that might have side effects or read memory as
+  // well as all operands that are outside the SCoP. The former is needed to
+  // decide if we can recompute the scalar definition to another statement.
+  // The latter to add read-only scalar accesses to the statement in which the
+  // scalar is recomputed. That allows us to identify values needed e.g., for
+  // parallel code generation.
+
+  SmallPtrSet<Instruction *, 32> Visited;
+  for (ScopStmt &Stmt : *this) {
+    for (MemoryAccess *MA : Stmt) {
+      if (MA->isExplicit() || MA->isRead() || MA->isPHI())
+        continue;
+
+      Instruction *AccessInst = MA->getAccessInstruction();
+      if (isa<TerminatorInst>(AccessInst))
+        AccessInst = cast<Instruction>(MA->getBaseAddr());
+
+      DEBUG(dbgs() << "Check operand tree of " << *AccessInst << "\n");
+
+      auto &NonTrivialOperands = NonTrivialOperandsMap[AccessInst];
+      auto &SideEffectOperands = NonTrivialOperands.first;
+      auto &OutsideOperands = NonTrivialOperands.second;
+
+      SmallPtrSet<Instruction *, 8> Worklist;
+      Worklist.insert(AccessInst);
+      Visited.clear();
+
+      while (!Worklist.empty()) {
+        Instruction *Inst = *Worklist.begin();
+        Worklist.erase(Inst);
+
+        if (!Visited.insert(Inst).second || !R.contains(Inst))
+          continue;
+
+        for (auto &InstOp : Inst->operands())
+          if (Instruction *InstOpInst = dyn_cast<Instruction>(InstOp)) {
+            if (R.contains(InstOpInst))
+              Worklist.insert(InstOpInst);
+            else
+              OutsideOperands.push_back(std::make_pair(InstOpInst, Inst));
+          }
+
+        if (Inst->mayHaveSideEffects() || Inst->mayReadFromMemory())
+          SideEffectOperands.insert(Inst);
+
+        if (isa<PHINode>(Inst) && !canSynthesize(Inst, &LI, SE, &R))
+          SideEffectOperands.insert(Inst);
+      }
+
+      DEBUG({
+        dbgs() << "\tSideEffectOperands: {\n";
+        for (auto *Op : SideEffectOperands)
+          dbgs() << "\t\t" << *Op << "\n";
+        dbgs() << "\t}\n";
+        dbgs() << "\tOutsideOperands: {\n";
+        for (auto *Op : OutsideOperands)
+          dbgs() << "\t\t" << *Op << "\n";
+        dbgs() << "\t}\n";
+      });
+    }
+  }
+
+  // In the second step traverse all implicit read accesses, hence scalar uses
+  // in statements that do not define the scalar. However, at the moment we
+  // exclude PHIs to simplify the logic. For each use we will check if we can
+  // recompute the definition to this block (see canRecomputeInStmt()).
+  // If so we will:
+  //   o Add the definition to the dependent scalars set of the use statement.
+  //   o Add read accesses for all prior to the SCoP defined values if they
+  //     were present in the definition statement.
+  //   o Remove the use access from the use statement as it will be recomputed
+  //     and does not need to be communicated anymore.
+
+  for (ScopStmt &Stmt : *this) {
+    BasicBlock *StmtBB = Stmt.isBlockStmt() ? Stmt.getBasicBlock()
+                                            : Stmt.getRegion()->getEntry();
+    AccFuncSetType &AccList = AccFuncMap[StmtBB];
+
+    SmallVector<MemoryAccess *, 8> AdditionalAccesses;
+    SmallVector<MemoryAccess *, 8> ResolvedAccesses;
+
+    for (MemoryAccess *MA : Stmt) {
+      if (MA->isExplicit() || MA->isWrite() || MA->isPHI())
+        continue;
+
+      Instruction *DefInst = dyn_cast<Instruction>(MA->getAccessValue());
+      if (!DefInst)
+        continue;
+
+      ScopStmt *DefStmt = getStmtForBasicBlock(DefInst->getParent());
+      if (!DefStmt)
+        continue;
+
+      auto &NonTrivialOperands = NonTrivialOperandsMap[DefInst];
+      auto &SideEffectOperands = NonTrivialOperands.first;
+      if (!canRecomputeInStmt(Stmt, SideEffectOperands))
+        continue;
+
+      auto &OutsideOperands = NonTrivialOperands.second;
+      for (auto &OutsideOperandPair : OutsideOperands) {
+        Instruction *OutsideOperand = OutsideOperandPair.first;
+        Instruction *OutsideUser = OutsideOperandPair.second;
+        ScopStmt *UserStmt = getStmtForBasicBlock(OutsideUser->getParent());
+        assert(UserStmt);
+        auto *UseMAs = UserStmt->lookupAccessesFor(OutsideUser);
+        if (!UseMAs)
+          continue;
+
+        for (const MemoryAccess *UseMA : *UseMAs)
+          if (UseMA->getBaseAddr() == OutsideOperand)
+            AdditionalAccesses.push_back(UseMA->copy(AccList, &Stmt));
+      }
+
+      Stmt.addDependentScalar(DefInst);
+      ResolvedAccesses.push_back(MA);
+    }
+
+    ScalarsEliminated += ResolvedAccesses.size();
+    for (MemoryAccess *MA : ResolvedAccesses)
+      Stmt.removeMemoryAccess(MA, true);
+    for (MemoryAccess *MA : AdditionalAccesses)
+      Stmt.addAccess(MA, true);
+  }
+
+  // In the third and final step we iterate over the scalar definitions in
+  // the SCoP again. We will check if we removed the accesses for all users
+  // of the scalar in the SCoP. If so, we can safely remove the scalar write
+  // access as all users will recompute the value. As we currently cannot simply
+  // use this logic to recompute values at the exit of the SCoP we will not
+  // remove scalars that escape the SCoP.
+  //
+  // TODO: Introduce a exit ScopStmt that collects all escaping users so we
+  //       can recompute escaping values in this exit statement and remove
+  //       them from others.
+
+  for (ScopStmt &Stmt : *this) {
+    SmallVector<MemoryAccess *, 8> ResolvedAccesses;
+    for (MemoryAccess *MA : Stmt) {
+      if (MA->isExplicit() || MA->isRead() || MA->isPHI())
+        continue;
+
+      Instruction *AccessInst = MA->getAccessInstruction();
+      if (isa<TerminatorInst>(AccessInst))
+        AccessInst = cast<Instruction>(MA->getBaseAddr());
+
+      if (!R.contains(AccessInst))
+        continue;
+
+      bool AllUsersRemoved = true;
+      for (auto *User : AccessInst->users()) {
+        auto *UserInst = cast<Instruction>(User);
+
+        auto *UserStmt = getStmtForBasicBlock(UserInst->getParent());
+        if (!UserStmt) {
+          AllUsersRemoved = false;
+          break;
+        }
+
+        auto *UserMAs = UserStmt->lookupAccessesFor(UserInst);
+        if (!UserMAs)
+          continue;
+
+        for (auto *UserMA : *UserMAs) {
+          if (UserMA->isExplicit() || UserMA->isWrite())
+            continue;
+
+          AllUsersRemoved = false;
+          break;
+        }
+      }
+
+      if (!AllUsersRemoved)
+        continue;
+
+      ResolvedAccesses.push_back(MA);
+    }
+
+    ScalarsEliminated += ResolvedAccesses.size();
+    for (MemoryAccess *MA : ResolvedAccesses)
+      Stmt.removeMemoryAccess(MA, true);
+  }
+}
+
 const ScopArrayInfo *
 Scop::getOrCreateScopArrayInfo(Value *BasePtr, Type *AccessType,
                                ArrayRef<const SCEV *> Sizes, bool IsPHI) {
@@ -3603,7 +3838,6 @@
 }
 
 void ScopInfo::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequiredID(IndependentBlocksID);
   AU.addRequired<LoopInfoWrapperPass>();
   AU.addRequired<RegionInfoPass>();
   AU.addRequired<DominatorTreeWrapperPass>();
Index: lib/CMakeLists.txt
===================================================================
--- lib/CMakeLists.txt
+++ lib/CMakeLists.txt
@@ -49,7 +49,6 @@
   Transform/Canonicalization.cpp
   Transform/CodePreparation.cpp
   Transform/DeadCodeElimination.cpp
-  Transform/IndependentBlocks.cpp
   Transform/ScheduleOptimizer.cpp
   ${POLLY_HEADER_FILES}
   )
Index: lib/CodeGen/BlockGenerators.cpp
===================================================================
--- lib/CodeGen/BlockGenerators.cpp
+++ lib/CodeGen/BlockGenerators.cpp
@@ -59,14 +59,33 @@
       EntryBB(nullptr), PHIOpMap(PHIOpMap), ScalarMap(ScalarMap),
       EscapeMap(EscapeMap), GlobalMap(GlobalMap) {}
 
+void BlockGenerator::recomputeDependentScalars(
+    ScopStmt &Stmt, ValueMapT &BBMap, LoopToScevMapT &LTS,
+    isl_id_to_ast_expr *NewAccesses) {
+
+  for (auto *Inst : Stmt.getDependentScalars())
+    if (!GlobalMap.count(Inst))
+      copyInstruction(Stmt, Inst, BBMap, LTS, NewAccesses, true);
+}
+
 Value *BlockGenerator::trySynthesizeNewValue(ScopStmt &Stmt, Value *Old,
                                              ValueMapT &BBMap,
-                                             LoopToScevMapT &LTS,
-                                             Loop *L) const {
+                                             LoopToScevMapT &LTS, Loop *L) {
   if (SE.isSCEVable(Old->getType()))
     if (const SCEV *Scev = SE.getSCEVAtScope(const_cast<Value *>(Old), L)) {
       if (!isa<SCEVCouldNotCompute>(Scev)) {
         const SCEV *NewScev = apply(Scev, LTS, SE);
+
+        // Recompute scalars needed for this SCEV
+        const Region &R = Stmt.getParent()->getRegion();
+        SetVector<Value *> Values;
+        findValues(NewScev, Values);
+        for (Value *Val : Values) {
+          if (Instruction *Inst = dyn_cast<Instruction>(Val))
+            if (R.contains(Inst))
+              copyInstScalar(Stmt, Inst, BBMap, LTS, true);
+        }
+
         ValueMapT VTV;
         VTV.insert(BBMap.begin(), BBMap.end());
         VTV.insert(GlobalMap.begin(), GlobalMap.end());
@@ -80,7 +99,6 @@
                "Only instructions can be insert points for SCEVExpander");
         Value *Expanded = expandCodeFor(S, SE, DL, "polly", NewScev,
                                         Old->getType(), IP, &VTV);
-
         BBMap[Old] = Expanded;
         return Expanded;
       }
@@ -90,7 +108,7 @@
 }
 
 Value *BlockGenerator::getNewValue(ScopStmt &Stmt, Value *Old, ValueMapT &BBMap,
-                                   LoopToScevMapT &LTS, Loop *L) const {
+                                   LoopToScevMapT &LTS, Loop *L, bool TryOnly) {
   // We assume constants never change.
   // This avoids map lookups for many calls to this function.
   if (isa<Constant>(Old))
@@ -121,29 +139,44 @@
     if (!Stmt.getParent()->getRegion().contains(Inst->getParent()))
       return const_cast<Value *>(Old);
 
+  if (TryOnly)
+    return nullptr;
+
   // The scalar dependence is neither available nor SCEVCodegenable.
   llvm_unreachable("Unexpected scalar dependence in region!");
-  return nullptr;
 }
 
 void BlockGenerator::copyInstScalar(ScopStmt &Stmt, Instruction *Inst,
-                                    ValueMapT &BBMap, LoopToScevMapT &LTS) {
+                                    ValueMapT &BBMap, LoopToScevMapT &LTS,
+                                    bool Recompute) {
   // We do not generate debug intrinsics as we did not investigate how to
   // copy them correctly. At the current state, they just crash the code
   // generation as the meta-data operands are not correctly copied.
   if (isa<DbgInfoIntrinsic>(Inst))
     return;
 
+  const Region &R = Stmt.getParent()->getRegion();
   Instruction *NewInst = Inst->clone();
 
   // Replace old operands with the new ones.
   for (Value *OldOperand : Inst->operands()) {
-    Value *NewOperand =
-        getNewValue(Stmt, OldOperand, BBMap, LTS, getLoopForInst(Inst));
+    Value *NewOperand = getNewValue(Stmt, OldOperand, BBMap, LTS,
+                                    getLoopForInst(Inst), Recompute);
+
+    if (Recompute) {
+      Instruction *NewOperandInst = dyn_cast_or_null<Instruction>(NewOperand);
+      if (!NewOperand || (NewOperandInst && R.contains(NewOperandInst))) {
+        if (Instruction *OldOperandInst = dyn_cast<Instruction>(OldOperand)) {
+          copyInstScalar(Stmt, OldOperandInst, BBMap, LTS, Recompute);
+          NewOperand = BBMap[OldOperand];
+        }
+      }
+    }
 
     if (!NewOperand) {
       assert(!isa<StoreInst>(NewInst) &&
              "Store instructions are always needed!");
+      assert(!Recompute && "Recompute copy should never fail");
       delete NewInst;
       return;
     }
@@ -230,10 +263,13 @@
 
 void BlockGenerator::copyInstruction(ScopStmt &Stmt, Instruction *Inst,
                                      ValueMapT &BBMap, LoopToScevMapT &LTS,
-                                     isl_id_to_ast_expr *NewAccesses) {
+                                     isl_id_to_ast_expr *NewAccesses,
+                                     bool Recompute) {
 
-  // First check for possible scalar dependences for this instruction.
-  generateScalarLoads(Stmt, Inst, BBMap);
+  // First check for possible scalar dependences for this instruction if we
+  // are not recomputing a scalar.
+  if (!Recompute)
+    generateScalarLoads(Stmt, Inst, BBMap);
 
   // Terminator instructions control the control flow. They are explicitly
   // expressed in the clast and do not need to be copied.
@@ -270,7 +306,7 @@
   if (isIgnoredIntrinsic(Inst))
     return;
 
-  copyInstScalar(Stmt, Inst, BBMap, LTS);
+  copyInstScalar(Stmt, Inst, BBMap, LTS, Recompute);
 }
 
 void BlockGenerator::copyStmt(ScopStmt &Stmt, LoopToScevMapT &LTS,
@@ -305,6 +341,8 @@
   Builder.SetInsertPoint(CopyBB->begin());
   EntryBB = &CopyBB->getParent()->getEntryBlock();
 
+  recomputeDependentScalars(Stmt, BBMap, LTS, NewAccesses);
+
   for (Instruction &Inst : *BB)
     copyInstruction(Stmt, &Inst, BBMap, LTS, NewAccesses);
 
Index: lib/CodeGen/CodeGeneration.cpp
===================================================================
--- lib/CodeGen/CodeGeneration.cpp
+++ lib/CodeGen/CodeGeneration.cpp
@@ -192,7 +192,6 @@
     //        region tree.
     AU.addPreserved<RegionInfoPass>();
     AU.addPreserved<ScopInfo>();
-    AU.addPreservedID(IndependentBlocksID);
   }
 };
 }
Index: lib/Makefile
===================================================================
--- lib/Makefile
+++ lib/Makefile
@@ -135,7 +135,6 @@
          Transform/Canonicalization.cpp \
          Transform/CodePreparation.cpp \
          Transform/DeadCodeElimination.cpp \
-         Transform/IndependentBlocks.cpp \
          Transform/ScheduleOptimizer.cpp \
          ${GPGPU_FILES} \
          ${ISL_CODEGEN_FILES} \
Index: lib/Support/RegisterPasses.cpp
===================================================================
--- lib/Support/RegisterPasses.cpp
+++ lib/Support/RegisterPasses.cpp
@@ -146,7 +146,6 @@
   initializeCodePreparationPass(Registry);
   initializeDeadCodeElimPass(Registry);
   initializeDependenceInfoPass(Registry);
-  initializeIndependentBlocksPass(Registry);
   initializeJSONExporterPass(Registry);
   initializeJSONImporterPass(Registry);
   initializeIslAstInfoPass(Registry);
Index: lib/Transform/IndependentBlocks.cpp
===================================================================
--- lib/Transform/IndependentBlocks.cpp
+++ /dev/null
@@ -1,373 +0,0 @@
-//===------ IndependentBlocks.cpp - Create Independent Blocks in Regions --===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Create independent blocks in the regions detected by ScopDetection.
-//
-//===----------------------------------------------------------------------===//
-//
-#include "polly/LinkAllPasses.h"
-#include "polly/Options.h"
-#include "polly/ScopDetection.h"
-#include "polly/Support/ScopHelper.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/BasicAliasAnalysis.h"
-#include "llvm/Analysis/DominanceFrontier.h"
-#include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/PostDominators.h"
-#include "llvm/Analysis/RegionInfo.h"
-#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include <vector>
-
-using namespace polly;
-using namespace llvm;
-
-#define DEBUG_TYPE "polly-independent"
-
-namespace {
-struct IndependentBlocks : public FunctionPass {
-  RegionInfo *RI;
-  ScalarEvolution *SE;
-  ScopDetection *SD;
-  LoopInfo *LI;
-
-  BasicBlock *AllocaBlock;
-
-  static char ID;
-
-  IndependentBlocks() : FunctionPass(ID) {}
-
-  // Create new code for every instruction operator that can be expressed by a
-  // SCEV.  Like this there are just two types of instructions left:
-  //
-  // 1. Instructions that only reference loop ivs or parameters outside the
-  // region.
-  //
-  // 2. Instructions that are not used for any memory modification. (These
-  //    will be ignored later on.)
-  //
-  // Blocks containing only these kind of instructions are called independent
-  // blocks as they can be scheduled arbitrarily.
-  bool createIndependentBlocks(BasicBlock *BB, const Region *R);
-  bool createIndependentBlocks(const Region *R);
-
-  // Elimination on the Scop to eliminate the scalar dependences come with
-  // trivially dead instructions.
-  bool eliminateDeadCode(const Region *R);
-
-  //===--------------------------------------------------------------------===//
-  /// Non trivial scalar dependences checking functions.
-  /// Non trivial scalar dependences occur when the def and use are located in
-  /// different BBs and we can not move them into the same one. This will
-  /// prevent use from schedule BBs arbitrarily.
-  ///
-  /// @brief This function checks if a scalar value that is part of the
-  ///        Scop is used outside of the Scop.
-  ///
-  /// @param Use  The use of the instruction.
-  /// @param R    The maximum region in the Scop.
-  ///
-  /// @return Return true if the Use of an instruction and the instruction
-  ///         itself form a non trivial scalar dependence.
-  static bool isEscapeUse(const Value *Use, const Region *R);
-
-  //===--------------------------------------------------------------------===//
-  /// Operand tree moving functions.
-  /// Trivial scalar dependences can eliminate by move the def to the same BB
-  /// that containing use.
-  ///
-  /// @brief Check if the instruction can be moved to another place safely.
-  ///
-  /// @param Inst The instruction.
-  ///
-  /// @return Return true if the instruction can be moved safely, false
-  ///         otherwise.
-  static bool isSafeToMove(Instruction *Inst);
-
-  typedef std::map<Instruction *, Instruction *> ReplacedMapType;
-
-  /// @brief Move all safe to move instructions in the Operand Tree (DAG) to
-  ///        eliminate trivial scalar dependences.
-  ///
-  /// @param Inst         The root of the operand Tree.
-  /// @param R            The maximum region in the Scop.
-  /// @param ReplacedMap  The map that mapping original instruction to the moved
-  ///                     instruction.
-  /// @param InsertPos    The insert position of the moved instructions.
-  void moveOperandTree(Instruction *Inst, const Region *R,
-                       ReplacedMapType &ReplacedMap, Instruction *InsertPos);
-
-  bool isIndependentBlock(const Region *R, BasicBlock *BB) const;
-  bool areAllBlocksIndependent(const Region *R) const;
-
-  bool runOnFunction(Function &F);
-  void verifyAnalysis() const;
-  void verifyScop(const Region *R) const;
-  void getAnalysisUsage(AnalysisUsage &AU) const;
-};
-}
-
-bool IndependentBlocks::isSafeToMove(Instruction *Inst) {
-  if (Inst->mayReadFromMemory() || Inst->mayWriteToMemory())
-    return false;
-
-  return isSafeToSpeculativelyExecute(Inst);
-}
-
-void IndependentBlocks::moveOperandTree(Instruction *Inst, const Region *R,
-                                        ReplacedMapType &ReplacedMap,
-                                        Instruction *InsertPos) {
-  BasicBlock *CurBB = Inst->getParent();
-
-  // Depth first traverse the operand tree (or operand dag, because we will
-  // stop at PHINodes, so there are no cycle).
-  typedef Instruction::op_iterator ChildIt;
-  std::vector<std::pair<Instruction *, ChildIt>> WorkStack;
-
-  WorkStack.push_back(std::make_pair(Inst, Inst->op_begin()));
-  DenseSet<Instruction *> VisitedSet;
-
-  while (!WorkStack.empty()) {
-    Instruction *CurInst = WorkStack.back().first;
-    ChildIt It = WorkStack.back().second;
-    DEBUG(dbgs() << "Checking Operand of Node:\n" << *CurInst << "\n------>\n");
-    if (It == CurInst->op_end()) {
-      // Insert the new instructions in topological order.
-      if (!CurInst->getParent()) {
-        CurInst->insertBefore(InsertPos);
-        SE->forgetValue(CurInst);
-      }
-
-      WorkStack.pop_back();
-    } else {
-      // for each node N,
-      Instruction *Operand = dyn_cast<Instruction>(*It);
-      ++WorkStack.back().second;
-
-      // Can not move no instruction value.
-      if (Operand == 0)
-        continue;
-
-      DEBUG(dbgs() << "For Operand:\n" << *Operand << "\n--->");
-
-      // If the Scop Region does not contain N, skip it and all its operands and
-      // continue: because we reach a "parameter".
-      // FIXME: we must keep the predicate instruction inside the Scop,
-      // otherwise it will be translated to a load instruction, and we can not
-      // handle load as affine predicate at this moment.
-      if (!R->contains(Operand) && !isa<TerminatorInst>(CurInst)) {
-        DEBUG(dbgs() << "Out of region.\n");
-        continue;
-      }
-
-      if (canSynthesize(Operand, LI, SE, R)) {
-        DEBUG(dbgs() << "is IV.\n");
-        continue;
-      }
-
-      // We can not move the operand, a non trivial scalar dependence found!
-      if (!isSafeToMove(Operand)) {
-        DEBUG(dbgs() << "Can not move!\n");
-        continue;
-      }
-
-      // Do not need to move instruction if it is contained in the same BB with
-      // the root instruction.
-      if (Operand->getParent() == CurBB) {
-        DEBUG(dbgs() << "No need to move.\n");
-        // Try to move its operand, but do not visit an instuction twice.
-        if (VisitedSet.insert(Operand).second)
-          WorkStack.push_back(std::make_pair(Operand, Operand->op_begin()));
-        continue;
-      }
-
-      // Now we need to move Operand to CurBB.
-      // Check if we already moved it.
-      ReplacedMapType::iterator At = ReplacedMap.find(Operand);
-      if (At != ReplacedMap.end()) {
-        DEBUG(dbgs() << "Moved.\n");
-        Instruction *MovedOp = At->second;
-        It->set(MovedOp);
-        SE->forgetValue(MovedOp);
-      } else {
-        // Note that NewOp is not inserted in any BB now, we will insert it when
-        // it popped form the work stack, so it will be inserted in topological
-        // order.
-        Instruction *NewOp = Operand->clone();
-        NewOp->setName(Operand->getName() + ".moved.to." + CurBB->getName());
-        DEBUG(dbgs() << "Move to " << *NewOp << "\n");
-        It->set(NewOp);
-        ReplacedMap.insert(std::make_pair(Operand, NewOp));
-        SE->forgetValue(Operand);
-
-        // Process its operands, but do not visit an instuction twice.
-        if (VisitedSet.insert(NewOp).second)
-          WorkStack.push_back(std::make_pair(NewOp, NewOp->op_begin()));
-      }
-    }
-  }
-
-  SE->forgetValue(Inst);
-}
-
-bool IndependentBlocks::createIndependentBlocks(BasicBlock *BB,
-                                                const Region *R) {
-  std::vector<Instruction *> WorkList;
-  for (Instruction &Inst : *BB)
-    if (!isSafeToMove(&Inst) && !canSynthesize(&Inst, LI, SE, R))
-      WorkList.push_back(&Inst);
-
-  ReplacedMapType ReplacedMap;
-  Instruction *InsertPos = BB->getFirstNonPHIOrDbg();
-
-  for (Instruction *Inst : WorkList)
-    if (!isa<PHINode>(Inst))
-      moveOperandTree(Inst, R, ReplacedMap, InsertPos);
-
-  // The BB was changed if we replaced any operand.
-  return !ReplacedMap.empty();
-}
-
-bool IndependentBlocks::createIndependentBlocks(const Region *R) {
-  bool Changed = false;
-
-  for (BasicBlock *BB : R->blocks())
-    Changed |= createIndependentBlocks(BB, R);
-
-  return Changed;
-}
-
-bool IndependentBlocks::eliminateDeadCode(const Region *R) {
-  std::vector<Instruction *> WorkList;
-
-  // Find all trivially dead instructions.
-  for (BasicBlock *BB : R->blocks())
-    for (Instruction &Inst : *BB)
-      if (!isIgnoredIntrinsic(&Inst) && isInstructionTriviallyDead(&Inst))
-        WorkList.push_back(&Inst);
-
-  if (WorkList.empty())
-    return false;
-
-  // Delete them so the cross BB scalar dependences come with them will
-  // also be eliminated.
-  while (!WorkList.empty()) {
-    RecursivelyDeleteTriviallyDeadInstructions(WorkList.back());
-    WorkList.pop_back();
-  }
-
-  return true;
-}
-
-bool IndependentBlocks::isEscapeUse(const Value *Use, const Region *R) {
-  // Non-instruction user will never escape.
-  if (!isa<Instruction>(Use))
-    return false;
-
-  return !R->contains(cast<Instruction>(Use));
-}
-
-bool IndependentBlocks::isIndependentBlock(const Region *R,
-                                           BasicBlock *BB) const {
-  for (Instruction &Inst : *BB) {
-    if (canSynthesize(&Inst, LI, SE, R))
-      continue;
-    if (isIgnoredIntrinsic(&Inst))
-      continue;
-
-    // A value inside the Scop is referenced outside.
-    for (User *U : Inst.users()) {
-      if (isEscapeUse(U, R)) {
-        DEBUG(dbgs() << "Instruction not independent:\n");
-        DEBUG(dbgs() << "Instruction used outside the Scop!\n");
-        DEBUG(Inst.print(dbgs()));
-        DEBUG(dbgs() << "\n");
-        return false;
-      }
-    }
-  }
-
-  return true;
-}
-
-bool IndependentBlocks::areAllBlocksIndependent(const Region *R) const {
-  for (BasicBlock *BB : R->blocks())
-    if (!isIndependentBlock(R, BB))
-      return false;
-
-  return true;
-}
-
-void IndependentBlocks::getAnalysisUsage(AnalysisUsage &AU) const {
-  // FIXME: If we set preserves cfg, the cfg only passes do not need to
-  // be "addPreserved"?
-  AU.addPreserved<AAResultsWrapperPass>();
-  AU.addPreserved<BasicAAWrapperPass>();
-  AU.addPreserved<DominatorTreeWrapperPass>();
-  AU.addPreserved<DominanceFrontier>();
-  AU.addPreserved<GlobalsAAWrapperPass>();
-  AU.addPreserved<PostDominatorTree>();
-  AU.addRequired<RegionInfoPass>();
-  AU.addPreserved<RegionInfoPass>();
-  AU.addRequired<LoopInfoWrapperPass>();
-  AU.addPreserved<LoopInfoWrapperPass>();
-  AU.addRequired<ScalarEvolutionWrapperPass>();
-  AU.addPreserved<ScalarEvolutionWrapperPass>();
-  AU.addPreserved<SCEVAAWrapperPass>();
-  AU.addRequired<ScopDetection>();
-  AU.addPreserved<ScopDetection>();
-}
-
-bool IndependentBlocks::runOnFunction(llvm::Function &F) {
-
-  bool Changed = false;
-
-  RI = &getAnalysis<RegionInfoPass>().getRegionInfo();
-  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-  SD = &getAnalysis<ScopDetection>();
-  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
-
-  AllocaBlock = &F.getEntryBlock();
-
-  DEBUG(dbgs() << "Run IndepBlock on " << F.getName() << '\n');
-
-  for (const Region *R : *SD) {
-    Changed |= createIndependentBlocks(R);
-    Changed |= eliminateDeadCode(R);
-  }
-
-  verifyAnalysis();
-
-  return Changed;
-}
-
-void IndependentBlocks::verifyAnalysis() const {}
-
-void IndependentBlocks::verifyScop(const Region *R) const {
-  assert(areAllBlocksIndependent(R) && "Cannot generate independent blocks");
-}
-
-char IndependentBlocks::ID = 0;
-char &polly::IndependentBlocksID = IndependentBlocks::ID;
-
-Pass *polly::createIndependentBlocksPass() { return new IndependentBlocks(); }
-
-INITIALIZE_PASS_BEGIN(IndependentBlocks, "polly-independent",
-                      "Polly - Create independent blocks", false, false);
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
-INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
-INITIALIZE_PASS_DEPENDENCY(ScopDetection);
-INITIALIZE_PASS_END(IndependentBlocks, "polly-independent",
-                    "Polly - Create independent blocks", false, false)
Index: test/IndependentBlocks/inter_bb_scalar_dep.ll
===================================================================
--- test/IndependentBlocks/inter_bb_scalar_dep.ll
+++ /dev/null
@@ -1,48 +0,0 @@
-; RUN: opt %loadPolly -basicaa -polly-independent -S < %s | FileCheck %s -check-prefix=SCALARACCESS
-
-; void f(long A[], int N, int *init_ptr) {
-;   long i, j;
-;
-;   for (i = 0; i < N; ++i) {
-;     init = *init_ptr;
-;     for (i = 0; i < N; ++i) {
-;       A[i] = init + 2;
-;     }
-;   }
-; }
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-
-define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) nounwind {
-entry:
-
-; SCALARACCESS-NOT: alloca
-  br label %for.i
-
-for.i:
-  %indvar.i = phi i64 [ 0, %entry ], [ %indvar.i.next, %for.i.end ]
-  %indvar.i.next = add nsw i64 %indvar.i, 1
-  br label %entry.next
-
-entry.next:
-  %init = load i64, i64* %init_ptr
-; SCALARACCESS-NOT: store
-  br label %for.j
-
-for.j:
-  %indvar.j = phi i64 [ 0, %entry.next ], [ %indvar.j.next, %for.j ]
-  %init_plus_two = add i64 %init, 2
-; SCALARACCESS: %init_plus_two = add i64 %init, 2
-  %scevgep = getelementptr i64, i64* %A, i64 %indvar.j
-  store i64 %init_plus_two, i64* %scevgep
-  %indvar.j.next = add nsw i64 %indvar.j, 1
-  %exitcond.j = icmp eq i64 %indvar.j.next, %N
-  br i1 %exitcond.j, label %for.i.end, label %for.j
-
-for.i.end:
-  %exitcond.i = icmp eq i64 %indvar.i.next, %N
-  br i1 %exitcond.i, label %return, label %for.i
-
-return:
-  ret void
-}
Index: test/IndependentBlocks/intra_and_inter_bb_scalar_dep.ll
===================================================================
--- test/IndependentBlocks/intra_and_inter_bb_scalar_dep.ll
+++ /dev/null
@@ -1,56 +0,0 @@
-; RUN: opt %loadPolly -basicaa -polly-independent -S < %s | FileCheck %s -check-prefix=SCALARACCESS
-
-; void f(long A[], int N, int *init_ptr) {
-;   long i, j;
-;
-;   for (i = 0; i < N; ++i) {
-;     init = *init_ptr;
-;     for (i = 0; i < N; ++i) {
-;       init2 = *init_ptr;
-;       A[i] = init + init2;
-;     }
-;   }
-; }
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-
-define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) nounwind {
-entry:
-
-; SCALARACCESS-NOT: alloca
-  br label %for.i
-
-for.i:
-  %indvar.i = phi i64 [ 0, %entry ], [ %indvar.i.next, %for.i.end ]
-  %indvar.i.next = add nsw i64 %indvar.i, 1
-  br label %entry.next
-
-entry.next:
-  %init = load i64, i64* %init_ptr
-; SCALARACCESS-NOT: store
-  br label %for.j
-
-for.j:
-  %indvar.j = phi i64 [ 0, %entry.next ], [ %indvar.j.next, %for.j ]
-  %init_2 = load i64, i64* %init_ptr
-  %init_sum = add i64 %init, %init_2
-
-; The SCEV of %init_sum is (%init + %init_2). It is referring to both an
-; UnknownValue in the same and in a different basic block. We want only the
-; reference to the different basic block to be replaced.
-
-; SCALARACCESS: %init_2 = load i64, i64* %init_ptr
-; SCALARACCESS: %init_sum = add i64 %init, %init_2
-  %scevgep = getelementptr i64, i64* %A, i64 %indvar.j
-  store i64 %init_sum, i64* %scevgep
-  %indvar.j.next = add nsw i64 %indvar.j, 1
-  %exitcond.j = icmp eq i64 %indvar.j.next, %N
-  br i1 %exitcond.j, label %for.i.end, label %for.j
-
-for.i.end:
-  %exitcond.i = icmp eq i64 %indvar.i.next, %N
-  br i1 %exitcond.i, label %return, label %for.i
-
-return:
-  ret void
-}
Index: test/IndependentBlocks/intra_bb_scalar_dep.ll
===================================================================
--- test/IndependentBlocks/intra_bb_scalar_dep.ll
+++ /dev/null
@@ -1,50 +0,0 @@
-; RUN: opt %loadPolly -basicaa -polly-independent -S < %s | FileCheck %s
-
-; void f(long A[], int N, int *init_ptr) {
-;   long i, j;
-;
-;   for (i = 0; i < N; ++i) {
-;     for (i = 0; i < N; ++i) {
-;       init = *init_ptr;
-;       A[i] = init + 2;
-;     }
-;   }
-; }
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-
-define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) nounwind {
-entry:
-
-; CHECK: entry
-; CHECK: br label %for.i
-  br label %for.i
-
-for.i:
-  %indvar.i = phi i64 [ 0, %entry ], [ %indvar.i.next, %for.i.end ]
-  %indvar.i.next = add nsw i64 %indvar.i, 1
-  br label %entry.next
-
-entry.next:
-  br label %for.j
-
-for.j:
-  %indvar.j = phi i64 [ 0, %entry.next ], [ %indvar.j.next, %for.j ]
-  %init = load i64, i64* %init_ptr
-  %init_plus_two = add i64 %init, 2
-; The scalar evolution of %init_plus_two is (2 + %init). So we have a
-; non-trivial scalar evolution referring to a value in the same basic block.
-; We want to ensure that this scalar is not translated into a memory copy.
-  %scevgep = getelementptr i64, i64* %A, i64 %indvar.j
-  store i64 %init_plus_two, i64* %scevgep
-  %indvar.j.next = add nsw i64 %indvar.j, 1
-  %exitcond.j = icmp eq i64 %indvar.j.next, %N
-  br i1 %exitcond.j, label %for.i.end, label %for.j
-
-for.i.end:
-  %exitcond.i = icmp eq i64 %indvar.i.next, %N
-  br i1 %exitcond.i, label %return, label %for.i
-
-return:
-  ret void
-}
Index: test/IndependentBlocks/phi_outside_scop.ll
===================================================================
--- test/IndependentBlocks/phi_outside_scop.ll
+++ /dev/null
@@ -1,33 +0,0 @@
-; RUN: opt %loadPolly -basicaa -polly-independent -S < %s | FileCheck %s -check-prefix=SCALAR
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-
-define void @phi_nodes_outside() {
-entry:
-  br label %for.i.1
-
-for.i.1:
-  %i.1 = phi i32 [ %i.1.next, %for.i.1 ], [ 0, %entry ]
-  %i.1.next = add nsw i32 %i.1, 1
-  br i1 false, label %for.i.1 , label %for.i.2.preheader
-
-for.i.2.preheader:
-  br label %for.i.2
-
-for.i.2:
-; The value of %i.1.next is used outside of the scop in a PHI node.
-  %i.2 = phi i32 [ %i.2.next , %for.i.2 ], [ %i.1.next, %for.i.2.preheader ]
-  %i.2.next = add nsw i32 %i.2, 1
-  fence seq_cst
-  br i1 false, label %for.i.2, label %cleanup
-
-cleanup:
-  ret void
-}
-
-; SCALAR-NOT: alloca
-
-; SCALAR: for.i.2.preheader:
-; SCALAR-NOT:    load
-
-; SCALAR: for.i.2:
-; SCALAR:    %i.2 = phi i32 [ %i.2.next, %for.i.2 ], [ %i.1.next, %for.i.2.preheader ]
Index: test/IndependentBlocks/scalar_to_array.ll
===================================================================
--- test/IndependentBlocks/scalar_to_array.ll
+++ /dev/null
@@ -1,222 +0,0 @@
-; RUN: opt %loadPolly -basicaa -polly-independent -S < %s | FileCheck %s -check-prefix=SCALARACCESS
-; RAUN: opt %loadPolly -basicaa -polly-independent < %s -S | FileCheck %s -check-prefix=SCALARACCESS
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-
-@A = common global [1024 x float] zeroinitializer, align 8
-
-define i32 @empty() nounwind {
-entry:
-  fence seq_cst
-  br label %for.cond
-
-for.cond:
-  %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ]
-  %exitcond = icmp ne i64 %indvar, 1024
-  br i1 %exitcond, label %for.body, label %return
-
-for.body:
-  br label %for.inc
-
-for.inc:
-  %indvar.next = add i64 %indvar, 1
-  br label %for.cond
-
-return:
-  fence seq_cst
-  ret i32 0
-}
-
-
-; SCALARACCESS-LABEL: @array_access()
-define i32 @array_access() nounwind {
-entry:
-  fence seq_cst
-  br label %for.cond
-; SCALARACCESS: entry:
-; SCALARACCESS-NOT: alloca
-
-for.cond:
-  %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ]
-  %exitcond = icmp ne i64 %indvar, 1024
-  br i1 %exitcond, label %for.body, label %return
-
-for.body:
-  %arrayidx = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar
-  %float = uitofp i64 %indvar to float
-  store float %float, float* %arrayidx
-  br label %for.inc
-
-; SCALARACCESS: for.body:
-; SCALARACCESS: %float = uitofp i64 %indvar to float
-; SCALARACCESS: store float %float, float* %arrayidx
-
-for.inc:
-  %indvar.next = add i64 %indvar, 1
-  br label %for.cond
-
-return:
-  fence seq_cst
-  ret i32 0
-}
-
-; SCALARACCESS-LABEL: @intra_scop_dep()
-define i32 @intra_scop_dep() nounwind {
-entry:
-  fence seq_cst
-  br label %for.cond
-
-; SCALARACCESS: entry:
-; SCALARACCESS-NOT: alloca
-; SCALARACCESS: fence
-
-for.cond:
-  %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ]
-  %exitcond = icmp ne i64 %indvar, 1024
-  br i1 %exitcond, label %for.body.a, label %return
-
-for.body.a:
-  %arrayidx = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar
-  %scalar = load float, float* %arrayidx
-  br label %for.body.b
-
-; SCALARACCESS: for.body.a:
-; SCALARACCESS: %arrayidx = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar
-; SCALARACCESS: %scalar = load float, float* %arrayidx
-; SCALARACCESS-NOT: store
-; SCALARACCESS: br label %for.body.b
-
-for.body.b:
-  %arrayidx2 = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar
-  %float = uitofp i64 %indvar to float
-  %sum = fadd float %scalar, %float
-  store float %sum, float* %arrayidx2
-  br label %for.inc
-
-; SCALARACCESS: for.body.b:
-; SCALARACCESS: %arrayidx2 = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar
-; SCALARACCESS: %float = uitofp i64 %indvar to float
-; SCALARACCESS-NOT: load
-; SCALARACCESS: %sum = fadd float %scalar, %float
-; SCALARACCESS: store float %sum, float* %arrayidx2
-; SCALARACCESS: br label %for.inc
-
-for.inc:
-  %indvar.next = add i64 %indvar, 1
-  br label %for.cond
-
-return:
-  fence seq_cst
-  ret i32 0
-}
-
-; It is not possible to have a scop which accesses a scalar element that is
-; a global variable. All global variables are pointers containing possibly
-; a single element.
-
-; SCALARACCESS-LABEL: @use_after_scop()
-define i32 @use_after_scop() nounwind {
-entry:
-  fence seq_cst
-  br label %for.head
-
-; SCALARACCESS: entry:
-; SCALARACCESS-NOT: alloca
-; SCALARACCESS: fence
-
-for.head:
-  %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ]
-  br label %for.body
-
-for.body:
-  %arrayidx = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar
-  %scalar = load float, float* %arrayidx
-  br label %for.inc
-
-; SCALARACCESS: for.body:
-; SCALARACCESS: %scalar = load float, float* %arrayidx
-; SCALARACCESS-NOT: store float %scalar
-
-for.inc:
-  %indvar.next = add i64 %indvar, 1
-  %exitcond = icmp ne i64 %indvar, 1024
-  br i1 %exitcond, label %for.head, label %for.after
-
-for.after:
-  fence seq_cst
-  %return_value = fptosi float %scalar to i32
-  br label %return
-
-; SCALARACCESS: for.after:
-; SCALARACCESS: fence seq_cst
-; SCALARACCESS: %return_value = fptosi float %scalar to i32
-
-return:
-  ret i32 %return_value
-}
-
-; We currently do not transform scalar references, that have only read accesses
-; in the scop. There are two reasons for this:
-;
-;  o We don't introduce additional memory references which may yield to compile
-;    time overhead.
-;  o For integer values, such a translation may block the use of scalar
-;    evolution on those values.
-;
-; SCALARACCESS-LABEL: @before_scop()
-define i32 @before_scop() nounwind {
-entry:
-  br label %preheader
-
-preheader:
-  %scalar = fadd float 4.0, 5.0
-  fence seq_cst
-  br label %for.cond
-
-for.cond:
-  %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %preheader ]
-  %exitcond = icmp ne i64 %indvar, 1024
-  br i1 %exitcond, label %for.body, label %return
-
-for.body:
-  %arrayidx = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar
-  store float %scalar, float* %arrayidx
-  br label %for.inc
-
-; SCALARACCESS: for.body:
-; SCALARACCESS: store float %scalar, float* %arrayidx
-
-for.inc:
-  %indvar.next = add i64 %indvar, 1
-  br label %for.cond
-
-return:
-  fence seq_cst
-  ret i32 0
-}
-
-; Currently not working
-; SCALARACCESS-LABEL: @param_before_scop(
-define i32 @param_before_scop(float %scalar) nounwind {
-entry:
-  fence seq_cst
-  br label %for.cond
-
-for.cond:
-  %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ]
-  %exitcond = icmp ne i64 %indvar, 1024
-  br i1 %exitcond, label %for.body, label %return
-
-for.body:
-  %arrayidx = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar
-  store float %scalar, float* %arrayidx
-  br label %for.inc
-
-for.inc:
-  %indvar.next = add i64 %indvar, 1
-  br label %for.cond
-
-return:
-  fence seq_cst
-  ret i32 0
-}
Index: test/IndependentBlocks/scev-invalidated.ll
===================================================================
--- test/IndependentBlocks/scev-invalidated.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; RUN: opt %loadPolly -polly-independent < %s
-target datalayout ="e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-
-define void @arc_either() {
-entry:
-  %ang2.2.reg2mem = alloca i64
-  br i1 undef, label %return, label %if.then6
-
-if.then6:
-  %rem7 = srem i64 undef, 1474560
-  br i1 false, label %if.else, label %return
-
-if.else:
-  %add16 = add nsw i64 %rem7, 1474560
-  %rem7.add16 = select i1 undef, i64 %rem7, i64 %add16
-  store i64 %rem7.add16, i64* %ang2.2.reg2mem
-  br label %return
-
-return:
-  ret void
-}
Index: test/Isl/CodeGen/20110312-Fail-without-basicaa.ll
===================================================================
--- test/Isl/CodeGen/20110312-Fail-without-basicaa.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; This should be run without alias analysis enabled.
-;RUN: opt %loadPolly -polly-independent < %s
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
-
-define i32 @main() nounwind {
-entry:
-  %t.02.reg2mem = alloca float
-  br label %entry.split
-
-entry.split:                                      ; preds = %entry
-  store float 0.000000e+00, float* %t.02.reg2mem
-  br label %for.body
-
-for.body:                                         ; preds = %for.body, %entry.split
-  %j.01 = phi i32 [ 0, %entry.split ], [ %inc1, %for.body ]
-  %t.02.reload = load float, float* %t.02.reg2mem
-  %inc = fadd float %t.02.reload, 1.000000e+00
-  %inc1 = add nsw i32 %j.01, 1
-  %exitcond = icmp eq i32 %inc1, 5000001
-  store float %inc, float* %t.02.reg2mem
-  br i1 %exitcond, label %for.end, label %for.body
-
-for.end:                                          ; preds = %for.body
-  %conv = fptosi float %inc to i32
-  ret i32 %conv
-}
Index: test/Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_different_bb.ll
===================================================================
--- test/Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_different_bb.ll
+++ test/Isl/CodeGen/OpenMP/invariant_base_pointer_preloaded_different_bb.ll
@@ -7,8 +7,8 @@
 ;      for (int i = 1; i < 1000; i++)
 ;        A[i] += /* split bb */ A[0];
 ;    }
-;                                           A[0]  tmp (unused)      A
-; CHECK: %polly.par.userContext = alloca { float,    float*,     float* }
+;                                           A[0]    A
+; CHECK: %polly.par.userContext = alloca { float, float* }
 ;
 ; CHECK:  %polly.subfn.storeaddr.polly.access.A.load = getelementptr inbounds
 ; CHECK:  store float %polly.access.A.load, float* %polly.subfn.storeaddr.polly.access.A.load
Index: test/Isl/CodeGen/eliminate-multiple-scalar-fp-reads.ll
===================================================================
--- /dev/null
+++ test/Isl/CodeGen/eliminate-multiple-scalar-fp-reads.ll
@@ -0,0 +1,90 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s --check-prefix=SCOP
+; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s
+;
+; SCOP-NOT: Scalar: 1
+; SCOP-NOT: ReadAccess
+;
+; Verify the original region is untouched but all computation is moved to the
+; only place it is needed in the generated region.
+;
+; CHECK:      for.body.f:
+; CHECK-NEXT:   %idxprom = sext i32 %i.0 to i64
+; CHECK-NEXT:   %arrayidx = getelementptr inbounds float, float* %A, i64 %idxprom
+; CHECK-NEXT:   store float %add5, float* %arrayidx
+;
+; CHECK: polly.stmt.for.body.f:
+; CHECK:   %0 = trunc i64 %polly.indvar to i32
+; CHECK:   %1 = shl i32 %0, 1
+; CHECK:   %p_conv = sitofp i32 %1 to float
+; CHECK:   %p_add = fadd float %p_conv, %p_conv
+; CHECK:   %p_add3 = fadd float %p_conv, %p_add
+; CHECK:   %p_add1 = fadd float %p_add, %p_conv
+; CHECK:   %p_add4 = fadd float %p_add3, %p_add1
+; CHECK:   %p_add2 = fadd float %p_conv, %p_conv
+; CHECK:   %p_add5 = fadd float %p_add4, %p_add2
+; CHECK:   %scevgep = getelementptr float, float* %A, i64 %polly.indvar
+; CHECK:   store float %p_add5, float* %scevgep
+;
+;    void f(float *A) {
+;      for (int i = 0; i < 1000; i++) {
+;        float a = i * 2;
+;        /* split BB */
+;        float b = a + a;
+;        /* split BB */
+;        float c = b + a;
+;        /* split BB */
+;        float d = a + a;
+;        /* split BB */
+;        float e = a + b + c + d;
+;        /* split BB */
+;        A[i] = e;
+;      }
+;    }
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(float* %A) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i32 %i.0, 1000
+  br i1 %cmp, label %for.body.a, label %for.end
+
+for.body.a:
+  %mul = mul nsw i32 %i.0, 2
+  %conv = sitofp i32 %mul to float
+  br label %for.body.b
+
+for.body.b:
+  %add = fadd float %conv, %conv
+  br label %for.body.c
+
+for.body.c:
+  %add1 = fadd float %add, %conv
+  br label %for.body.d
+
+for.body.d:
+  %add2 = fadd float %conv, %conv
+  br label %for.body.e
+
+for.body.e:
+  %add3 = fadd float %conv, %add
+  %add4 = fadd float %add3, %add1
+  %add5 = fadd float %add4, %add2
+  br label %for.body.f
+
+for.body.f:
+  %idxprom = sext i32 %i.0 to i64
+  %arrayidx = getelementptr inbounds float, float* %A, i64 %idxprom
+  store float %add5, float* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
Index: test/Isl/CodeGen/eliminate-multiple-scalar-reads.ll
===================================================================
--- /dev/null
+++ test/Isl/CodeGen/eliminate-multiple-scalar-reads.ll
@@ -0,0 +1,82 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s --check-prefix=SCOP
+; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s
+;
+; SCOP-NOT: Scalar: 1
+; SCOP-NOT: ReadAccess
+;
+; Verify the original region is untouched but all computation is moved to the
+; only place it is needed in the generated region.
+;
+; CHECK:      for.body.f:
+; CHECK-NEXT:   %idxprom6 = sext i32 %i.0 to i64
+; CHECK-NEXT:   %arrayidx7 = getelementptr inbounds i32, i32* %A, i64 %idxprom6
+; CHECK-NEXT:   store i32 %add5, i32* %arrayidx7, align 4
+;
+; CHECK: polly.stmt.for.body.f:
+; CHECK:   %scevgep = getelementptr i32, i32* %A, i64 %polly.indvar
+; CHECK:   %0 = trunc i64 %polly.indvar to i32
+; CHECK:   %1 = shl i32 %0, 4
+; CHECK:   store i32 %1, i32* %scevgep
+;
+;    void f(int *A) {
+;      for (int i = 0; i < 1000; i++) {
+;        int a = i * 2;
+;        /* split BB */
+;        int b = a + a;
+;        /* split BB */
+;        int c = b + a;
+;        /* split BB */
+;        int d = a + a;
+;        /* split BB */
+;        int e = a + b + c + d;
+;        /* split BB */
+;        A[i] = e;
+;      }
+;    }
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* %A) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i32 %i.0, 1000
+  br i1 %cmp, label %for.body.a, label %for.end
+
+for.body.a:                                         ; preds = %for.cond
+  %tmp = mul nsw i32 %i.0, 2
+  br label %for.body.b
+
+for.body.b:
+  %add = add nsw i32 %tmp, %tmp
+  br label %for.body.c
+
+for.body.c:
+  %add1 = add nsw i32 %add, %tmp
+  br label %for.body.d
+
+for.body.d:
+  %add2 = add nsw i32 %tmp, %tmp
+  br label %for.body.e
+
+for.body.e:
+  %add3 = add nsw i32 %tmp, %add
+  %add4 = add nsw i32 %add3, %add1
+  %add5 = add nsw i32 %add4, %add2
+  br label %for.body.f
+
+for.body.f:
+  %idxprom6 = sext i32 %i.0 to i64
+  %arrayidx7 = getelementptr inbounds i32, i32* %A, i64 %idxprom6
+  store i32 %add5, i32* %arrayidx7, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
Index: test/Isl/CodeGen/eliminate-scalars-with-outside-load.ll
===================================================================
--- /dev/null
+++ test/Isl/CodeGen/eliminate-scalars-with-outside-load.ll
@@ -0,0 +1,61 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
+; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s --check-prefix=CODEGEN
+;
+; Verify that we will virtually move %mul but also the read of %tmp to the
+; for.body.split block.
+;
+; TODO: Remove read only statements
+; CHECK:      Stmt_for_body
+; CHECK:            ReadAccess := [Reduction Type: NONE] [Scalar: 1]
+; CHECK:                { Stmt_for_body[i0] -> MemRef_tmp[] };
+; CHECK-NOT:        Access
+; CHECK:      Stmt_for_body_split
+; CHECK-NOT:            MemRef_mul
+; CHECK:            ReadAccess := [Reduction Type: NONE] [Scalar: 1]
+; CHECK:                { Stmt_for_body_split[i0] -> MemRef_tmp[] };
+; CHECK:            MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 0]
+; CHECK:                { Stmt_for_body_split[i0] -> MemRef_A[i0] };
+;
+; CODEGEN:       polly.stmt.for.body.split:
+; CODEGEN-NEXT:    %p_mul1 = fmul float %tmp, 2.000000e+00
+; CODEGEN-NEXT:    %scevgep = getelementptr float, float* %A, i64 %polly.indvar
+; CODEGEN-NEXT:    store float %p_mul1,
+;
+;    void f(float *A) {
+;      float x = A[-1];
+;      for (int i = 0; i < 1000; i++) {
+;        float a = x * 2;
+;        /* split BB */
+;        A[i] = a;
+;      }
+;    }
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(float* %A) {
+entry:
+  %arrayidx = getelementptr inbounds float, float* %A, i64 -1
+  %tmp = load float, float* %arrayidx, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+  %exitcond = icmp ne i64 %indvars.iv, 1000
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %mul = fmul float %tmp, 2.000000e+00
+  br label %for.body.split
+
+for.body.split:                                         ; preds = %for.cond
+  %arrayidx1 = getelementptr inbounds float, float* %A, i64 %indvars.iv
+  store float %mul, float* %arrayidx1, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
Index: test/Isl/CodeGen/non-affine-phi-node-expansion-3.ll
===================================================================
--- test/Isl/CodeGen/non-affine-phi-node-expansion-3.ll
+++ test/Isl/CodeGen/non-affine-phi-node-expansion-3.ll
@@ -14,12 +14,14 @@
 
 ; CHECK-LABEL: polly.stmt.loop:
 ; CHECK-NEXT: %polly.subregion.iv = phi i32 [ 0, %polly.stmt.loop.entry ]
-; CHECK-NEXT: %p_val0 = fadd float 1.000000e+00, 2.000000e+00
 ; CHECK-NEXT: %p_val1 = fadd float 1.000000e+00, 2.000000e+00
 ; CHECK-NEXT: %p_val2 = fadd float 1.000000e+00, 2.000000e+00
+; CHECK-NEXT: %p_val0 = fadd float 1.000000e+00, 2.000000e+00
+; CHECK-NEXT: %p_val11 = fadd float 1.000000e+00, 2.000000e+00
+; CHECK-NEXT: %p_val22 = fadd float 1.000000e+00, 2.000000e+00
 ; CHECK-NEXT: store float %p_val0, float* %merge.phiops
-; CHECK-NEXT: store float %p_val1, float* %val1.s2a
-; CHECK-NEXT: store float %p_val2, float* %val2.s2a
+; CHECK-NEXT: store float %p_val11, float* %val1.s2a
+; CHECK-NEXT: store float %p_val22, float* %val2.s2a
 
 ; FIXME -> The last two writes are not really needed and can be dropped if the
 ;          incoming block of the PHI and the value that is used share the same
@@ -29,13 +31,17 @@
   br i1 %cond1, label %branch2, label %backedge
 
 ; CHECK-LABEL: polly.stmt.branch1:
-; CHECK-NEXT:    store float %p_val1, float* %merge.phiops
+; CHECK-NEXT:    %p_val13 = fadd float 1.000000e+00, 2.000000e+00
+; CHECK-NEXT:    %p_val24 = fadd float 1.000000e+00, 2.000000e+00
+; CHECK-NEXT:    store float %p_val13, float* %merge.phiops
 
 branch2:
   br label %backedge
 
 ; CHECK-LABEL: polly.stmt.branch2:
-; CHECK-NEXT:    store float %p_val2, float* %merge.phiops
+; CHECK-NEXT:    %p_val15 = fadd float 1.000000e+00, 2.000000e+00
+; CHECK-NEXT:    %p_val26 = fadd float 1.000000e+00, 2.000000e+00
+; CHECK-NEXT:    store float %p_val26, float* %merge.phiops
 
 backedge:
   %merge = phi float [%val0, %loop], [%val1, %branch1], [%val2, %branch2]
Index: test/Isl/CodeGen/srem-in-other-bb.ll
===================================================================
--- test/Isl/CodeGen/srem-in-other-bb.ll
+++ test/Isl/CodeGen/srem-in-other-bb.ll
@@ -1,5 +1,4 @@
-; RUN: opt %loadPolly -polly-codegen -S \
-; RUN:     < %s | FileCheck %s
+; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s
 ;
 ;    void pos(float *A, long n) {
 ;      for (long i = 0; i < 100; i++)
@@ -7,8 +6,8 @@
 ;    }
 ;
 ; CHECK: polly.stmt.bb3:
-; CHECK: %p_tmp.moved.to.bb3 = srem i64 %n, 42
-; CHECK: %p_tmp3 = getelementptr inbounds float, float* %A, i64 %p_tmp.moved.to.bb3
+; CHECK:   %[[rem:[._a-zA-Z0-9]*]] = srem i64 %n, 42
+; CHECK:   getelementptr inbounds float, float* %A, i64 %[[rem]]
 
 define void @pos(float* %A, i64 %n) {
 bb:
Index: test/ScopInfo/eliminate-scalar-caused-by-load-reduction-2.ll
===================================================================
--- /dev/null
+++ test/ScopInfo/eliminate-scalar-caused-by-load-reduction-2.ll
@@ -0,0 +1,56 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
+; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s --check-prefix=CODEGEN
+;
+; This is a negative test. We should move the load to the split block
+; and remove all scalar accesses, however at the moment we only move
+; instructions that are trivially safe to move. All three checks should
+; be negated at some point. This also checks that we currently not try to
+; move a part of the scalar operand chain, i.e., the %add instruction.
+;
+; CHECK: Scalar: 1
+; CHECK: Scalar: 1
+; CHECK-NOT: Reduction: +
+;
+; These checks should stay as they verify we did not modify the original region:
+;
+; CODEGEN:      for.body.split:
+; CODEGEN-NEXT:   %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+; CODEGEN-NEXT:   store i32 %add, i32* %arrayidx2, align 4
+;
+;    void f(int *A) {
+;      for (int i = 0; i < 1000; i++) {
+;        int x = A[i] + 3;
+;        /* split BB */
+;        A[i] = x;
+;      }
+;    }
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* %A) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+  %exitcond = icmp ne i64 %indvars.iv, 1000
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %tmp = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %tmp, 3
+  br label %for.body.split
+
+for.body.split:
+  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  store i32 %add, i32* %arrayidx2, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
Index: test/ScopInfo/eliminate-scalar-caused-by-load-reduction.ll
===================================================================
--- /dev/null
+++ test/ScopInfo/eliminate-scalar-caused-by-load-reduction.ll
@@ -0,0 +1,48 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
+;
+; This is a negative test. We should move the load to the split block
+; and remove all scalar accesses, however at the moment we only move
+; instructions that are trivially safe to move. All three checks should
+; be negated at some point.
+;
+; CHECK: Scalar: 1
+; CHECK: Scalar: 1
+; CHECK-NOT: Reduction: +
+;
+;    void f(int *A) {
+;      for (int i = 0; i < 1000; i++) {
+;        int x = A[i];
+;        /* split BB */
+;        A[i] = x + 3;
+;      }
+;    }
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* %A) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+  %exitcond = icmp ne i64 %indvars.iv, 1000
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %tmp = load i32, i32* %arrayidx, align 4
+  br label %for.body.split
+
+for.body.split:
+  %add = add nsw i32 %tmp, 3
+  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  store i32 %add, i32* %arrayidx2, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
Index: test/ScopInfo/independent-blocks-never-stop-on-big-scop.ll
===================================================================
--- test/ScopInfo/independent-blocks-never-stop-on-big-scop.ll
+++ /dev/null
@@ -1,199 +0,0 @@
-; RUN: opt %loadPolly -polly-independent < %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-
-define i32 @main() nounwind uwtable readnone {
-  %arr = alloca [100 x i32], align 16
-  br label %1
-
-; <label>:1                                       ; preds = %1, %0
-  %indvars.iv3 = phi i64 [ 0, %0 ], [ %indvars.iv.next4, %1 ]
-  %2 = getelementptr inbounds [100 x i32], [100 x i32]* %arr, i64 0, i64 %indvars.iv3
-  %3 = trunc i64 %indvars.iv3 to i32
-  store i32 %3, i32* %2, align 4, !tbaa !0
-  %indvars.iv.next4 = add i64 %indvars.iv3, 1
-  %lftr.wideiv5 = trunc i64 %indvars.iv.next4 to i32
-  %exitcond6 = icmp eq i32 %lftr.wideiv5, 100
-  br i1 %exitcond6, label %.preheader, label %1
-
-.preheader:                                       ; preds = %.preheader, %1
-  %indvars.iv = phi i64 [ %indvars.iv.next, %.preheader ], [ 0, %1 ]
-  %4 = getelementptr inbounds [100 x i32], [100 x i32]* %arr, i64 0, i64 %indvars.iv
-  %5 = load i32, i32* %4, align 4, !tbaa !0
-  %6 = xor i32 %5, -1
-  %7 = shl i32 %5, 15
-  %8 = add nsw i32 %7, %6
-  %9 = ashr i32 %8, 12
-  %10 = xor i32 %9, %8
-  %11 = mul i32 %10, 9
-  %12 = ashr i32 %11, 4
-  %13 = xor i32 %12, %11
-  %14 = mul nsw i32 %13, 20571
-  %15 = ashr i32 %14, 16
-  %16 = xor i32 %15, %14
-  %17 = xor i32 %16, -1
-  %18 = shl i32 %16, 15
-  %19 = add nsw i32 %18, %17
-  %20 = ashr i32 %19, 12
-  %21 = xor i32 %20, %19
-  %22 = mul i32 %21, 5
-  %23 = ashr i32 %22, 4
-  %24 = xor i32 %23, %22
-  %25 = mul nsw i32 %24, 20576
-  %26 = ashr i32 %25, 16
-  %27 = xor i32 %26, %25
-  %28 = xor i32 %27, -1
-  %29 = shl i32 %27, 15
-  %30 = add nsw i32 %29, %28
-  %31 = ashr i32 %30, 12
-  %32 = xor i32 %31, %30
-  %33 = mul i32 %32, 5
-  %34 = ashr i32 %33, 4
-  %35 = xor i32 %34, %33
-  %36 = mul nsw i32 %35, 2057
-  %37 = ashr i32 %36, 16
-  %38 = xor i32 %37, %36
-  %39 = xor i32 %38, -1
-  %40 = shl i32 %38, 15
-  %41 = add nsw i32 %40, %39
-  %42 = ashr i32 %41, 12
-  %43 = xor i32 %42, %41
-  %44 = mul i32 %43, 5
-  %45 = ashr i32 %44, 4
-  %46 = xor i32 %45, %44
-  %47 = mul nsw i32 %46, 20572
-  %48 = ashr i32 %47, 16
-  %49 = xor i32 %48, %47
-  %50 = xor i32 %49, -1
-  %51 = shl i32 %49, 15
-  %52 = add nsw i32 %51, %50
-  %53 = ashr i32 %52, 12
-  %54 = xor i32 %53, %52
-  %55 = mul i32 %54, 5
-  %56 = ashr i32 %55, 4
-  %57 = xor i32 %56, %55
-  %58 = mul nsw i32 %57, 2051
-  %59 = ashr i32 %58, 16
-  %60 = xor i32 %59, %58
-  %61 = xor i32 %60, -1
-  %62 = shl i32 %60, 15
-  %63 = add nsw i32 %62, %61
-  %64 = ashr i32 %63, 12
-  %65 = xor i32 %64, %63
-  %66 = mul i32 %65, 5
-  %67 = ashr i32 %66, 4
-  %68 = xor i32 %67, %66
-  %69 = mul nsw i32 %68, 2057
-  %70 = ashr i32 %69, 16
-  %71 = xor i32 %70, %69
-  %72 = xor i32 %71, -1
-  %73 = shl i32 %71, 15
-  %74 = add nsw i32 %73, %72
-  %75 = ashr i32 %74, 12
-  %76 = xor i32 %75, %74
-  %77 = mul i32 %76, 5
-  %78 = ashr i32 %77, 4
-  %79 = xor i32 %78, %77
-  %80 = mul nsw i32 %79, 205
-  %81 = ashr i32 %80, 17
-  %82 = xor i32 %81, %80
-  %83 = xor i32 %82, -1
-  %84 = shl i32 %82, 15
-  %85 = add nsw i32 %84, %83
-  %86 = ashr i32 %85, 12
-  %87 = xor i32 %86, %85
-  %88 = mul i32 %87, 5
-  %89 = ashr i32 %88, 4
-  %90 = xor i32 %89, %88
-  %91 = mul nsw i32 %90, 2057
-  %92 = ashr i32 %91, 16
-  %93 = xor i32 %92, %91
-  %94 = xor i32 %93, -1
-  %95 = shl i32 %93, 15
-  %96 = add nsw i32 %95, %94
-  %97 = ashr i32 %96, 12
-  %98 = xor i32 %97, %96
-  %99 = mul i32 %98, 5
-  %100 = ashr i32 %99, 3
-  %101 = xor i32 %100, %99
-  %102 = mul nsw i32 %101, 20571
-  %103 = ashr i32 %102, 16
-  %104 = xor i32 %103, %102
-  %105 = xor i32 %104, -1
-  %106 = shl i32 %104, 15
-  %107 = add nsw i32 %106, %105
-  %108 = ashr i32 %107, 12
-  %109 = xor i32 %108, %107
-  %110 = mul i32 %109, 5
-  %111 = ashr i32 %110, 4
-  %112 = xor i32 %111, %110
-  %113 = mul nsw i32 %112, 2057
-  %114 = ashr i32 %113, 16
-  %115 = xor i32 %114, %113
-  %116 = xor i32 %115, -1
-  %117 = shl i32 %115, 15
-  %118 = add nsw i32 %117, %116
-  %119 = ashr i32 %118, 12
-  %120 = xor i32 %119, %118
-  %121 = mul i32 %120, 5
-  %122 = ashr i32 %121, 4
-  %123 = xor i32 %122, %121
-  %124 = mul nsw i32 %123, 20572
-  %125 = ashr i32 %124, 16
-  %126 = xor i32 %125, %124
-  %127 = xor i32 %126, -1
-  %128 = shl i32 %126, 15
-  %129 = add nsw i32 %128, %127
-  %130 = ashr i32 %129, 12
-  %131 = xor i32 %130, %129
-  %132 = mul i32 %131, 5
-  %133 = ashr i32 %132, 4
-  %134 = xor i32 %133, %132
-  %135 = mul nsw i32 %134, 2057
-  %136 = ashr i32 %135, 16
-  %137 = xor i32 %136, %135
-  %138 = xor i32 %137, -1
-  %139 = shl i32 %137, 15
-  %140 = add nsw i32 %139, %138
-  %141 = ashr i32 %140, 12
-  %142 = xor i32 %141, %140
-  %143 = mul i32 %142, 5
-  %144 = ashr i32 %143, 4
-  %145 = xor i32 %144, %143
-  %146 = mul nsw i32 %145, 2057
-  %147 = ashr i32 %146, 16
-  %148 = xor i32 %147, %146
-  %149 = xor i32 %148, -1
-  %150 = shl i32 %148, 15
-  %151 = add nsw i32 %150, %149
-  %152 = ashr i32 %151, 12
-  %153 = xor i32 %152, %151
-  %154 = mul i32 %153, 5
-  %155 = ashr i32 %154, 4
-  %156 = xor i32 %155, %154
-  %157 = mul nsw i32 %156, 2057
-  %158 = ashr i32 %157, 16
-  %159 = xor i32 %158, %157
-  %160 = xor i32 %159, -1
-  %161 = shl i32 %159, 15
-  %162 = add nsw i32 %161, %160
-  %163 = ashr i32 %162, 12
-  %164 = xor i32 %163, %162
-  %165 = mul i32 %164, 5
-  %166 = ashr i32 %165, 4
-  %167 = xor i32 %166, %165
-  %168 = mul nsw i32 %167, 2057
-  %169 = ashr i32 %168, 16
-  %170 = xor i32 %169, %168
-  store i32 %170, i32* %4, align 4, !tbaa !0
-  %indvars.iv.next = add i64 %indvars.iv, 1
-  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp eq i32 %lftr.wideiv, 100
-  br i1 %exitcond, label %171, label %.preheader
-
-; <label>:171                                     ; preds = %.preheader
-  ret i32 0
-}
-
-!0 = !{!"int", !1}
-!1 = !{!"omnipotent char", !2}
-!2 = !{!"Simple C/C++ TBAA", null}
Index: test/ScopInfo/inter_bb_scalar_dep.ll
===================================================================
--- test/ScopInfo/inter_bb_scalar_dep.ll
+++ test/ScopInfo/inter_bb_scalar_dep.ll
@@ -1,4 +1,5 @@
 ; RUN: opt %loadPolly -basicaa -polly-scops -analyze < %s | FileCheck %s
+; RUN: opt %loadPolly -basicaa -polly-codegen -analyze < %s
 
 ; void f(long A[], int N, int *init_ptr) {
 ;   long i, j;
@@ -36,10 +37,11 @@
   %indvar.j = phi i64 [ 0, %entry.next ], [ %indvar.j.next, %for.j ]
   %init_plus_two = add i64 %init, 2
 ; CHECK-LABEL: Stmt_for_j
-; CHECK:           ReadAccess :=       [Reduction Type: NONE] [Scalar: 1]
-; CHECK-NEXT:          [N] -> { Stmt_for_j[i0, i1] -> MemRef_init[] };
+; CHECK-NOT:       ReadAccess :=       [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NOT:           [N] -> { Stmt_for_j[i0, i1] -> MemRef_init[] };
 ; CHECK:           MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 0]
 ; CHECK-NEXT:          [N] -> { Stmt_for_j[i0, i1] -> MemRef_A[i1] };
+; CHECK-NOT:       ReadAccess :=       [Reduction Type: NONE] [Scalar: 1]
   %scevgep = getelementptr i64, i64* %A, i64 %indvar.j
   store i64 %init_plus_two, i64* %scevgep
   %indvar.j.next = add nsw i64 %indvar.j, 1
Index: test/ScopInfo/intra_and_inter_bb_scalar_dep.ll
===================================================================
--- test/ScopInfo/intra_and_inter_bb_scalar_dep.ll
+++ test/ScopInfo/intra_and_inter_bb_scalar_dep.ll
@@ -39,10 +39,10 @@
   %init_2 = load i64, i64* %init_ptr
   %init_sum = add i64 %init, %init_2
 ; CHECK:      Stmt_for_j
-; CHECK:           ReadAccess :=       [Reduction Type: NONE] [Scalar: 1]
-; CHECK-NEXT:          [N] -> { Stmt_for_j[i0, i1] -> MemRef_init[] };
-; CHECK-NEXT:      MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NOT:       [Scalar: 1]
+; CHECK:          MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 0]
 ; CHECK-NEXT:          [N] -> { Stmt_for_j[i0, i1] -> MemRef_A[i1] };
+; CHECK-NOT:       [Scalar: 1]
   %scevgep = getelementptr i64, i64* %A, i64 %indvar.j
   store i64 %init_sum, i64* %scevgep
   %indvar.j.next = add nsw i64 %indvar.j, 1
Index: test/ScopInfo/scalar_dependence_cond_br.ll
===================================================================
--- test/ScopInfo/scalar_dependence_cond_br.ll
+++ test/ScopInfo/scalar_dependence_cond_br.ll
@@ -7,8 +7,8 @@
 ;    }
 ;
 ; We should move operands as close to their use as possible, hence in this case
-; there should not be any scalar dependence anymore after %cmp1 is moved to 
-; %for.body (%c and %indvar.iv are synthesis able).
+; there should not be any scalar dependence anymore after %cmp1 is virtually 
+; moved to %for.body (%c and %indvar.iv are synthesis able).
 ;
 ; CHECK-NOT:      [Scalar: 1]
 ;
Index: test/ScopInfo/tempscop-printing.ll
===================================================================
--- test/ScopInfo/tempscop-printing.ll
+++ test/ScopInfo/tempscop-printing.ll
@@ -1,4 +1,5 @@
 ; RUN: opt %loadPolly -basicaa -polly-scops -analyze < %s | FileCheck %s
+; RUN: opt %loadPolly -basicaa -polly-codegen -analyze < %s
 
 ; void f(long A[], int N, int *init_ptr) {
 ;   long i, j;
@@ -35,10 +36,11 @@
 for.j:
   %indvar.j = phi i64 [ 0, %entry.next ], [ %indvar.j.next, %for.j ]
 ; CHECK:      Stmt_for_j
-; CHECK:          ReadAccess :=       [Reduction Type: NONE] [Scalar: 1]
-; CHECK-NEXT:         [N] -> { Stmt_for_j[i0, i1] -> MemRef_init[] };
+; CHECK-NOT:      ReadAccess :=       [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NOT:          [N] -> { Stmt_for_j[i0, i1] -> MemRef_init[] };
 ; CHECK:          MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 0]
 ; CHECK-NEXT:         [N] -> { Stmt_for_j[i0, i1] -> MemRef_A[i1] };
+; CHECK-NOT:      ReadAccess :=       [Reduction Type: NONE] [Scalar: 1]
   %init_plus_two = add i64 %init, 2
   %scevgep = getelementptr i64, i64* %A, i64 %indvar.j
   store i64 %init_plus_two, i64* %scevgep
@@ -77,10 +79,11 @@
   %indvar.j = phi i64 [ 0, %entry.next ], [ %indvar.j.next, %for.j ]
   %scevgep = getelementptr i64, i64* %A, i64 %indvar.j
   store i64 %init, i64* %scevgep
-; CHECK:          ReadAccess :=       [Reduction Type: NONE] [Scalar: 1]
-; CHECK-NEXT:         [N] -> { Stmt_for_j[i0, i1] -> MemRef_init[] };
+; CHECK-NOT:      ReadAccess :=       [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NOT:         [N] -> { Stmt_for_j[i0, i1] -> MemRef_init[] };
 ; CHECK:          MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 0]
 ; CHECK-NEXT:         [N] -> { Stmt_for_j[i0, i1] -> MemRef_A[i1] };
+; CHECK-NOT:      ReadAccess :=       [Reduction Type: NONE] [Scalar: 1]
   %indvar.j.next = add nsw i64 %indvar.j, 1
   %exitcond.j = icmp eq i64 %indvar.j.next, %N
   br i1 %exitcond.j, label %for.i.end, label %for.j
Index: test/ScopInfo/unneeded_scalar_dependences-1.ll
===================================================================
--- /dev/null
+++ test/ScopInfo/unneeded_scalar_dependences-1.ll
@@ -0,0 +1,63 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
+; RUN: opt %loadPolly -polly-codegen -analyze < %s
+;
+; CHECK-NOT: Scalar: 1
+;
+;    void f(int *A, int N) {
+;      for (int i = 0; i < N; i++) {
+;        int x = i + 1;
+;        for (int j = 0; j < N; j++)
+;          A[i] += A[j];
+;        A[i] += x;
+;      }
+;    }
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* %A, i32 %N) {
+entry:
+  %tmp = sext i32 %N to i64
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc.10, %entry
+  %indvars.iv1 = phi i64 [ %indvars.iv.next2, %for.inc.10 ], [ 0, %entry ]
+  %indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1
+  %tmp6 = trunc i64 %indvars.iv.next2 to i32
+  %cmp = icmp slt i64 %indvars.iv1, %tmp
+  br i1 %cmp, label %for.body, label %for.end.12
+
+for.body:                                         ; preds = %for.cond
+  br label %for.cond.1
+
+for.cond.1:                                       ; preds = %for.inc, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body ]
+  %lftr.wideiv = trunc i64 %indvars.iv to i32
+  %exitcond = icmp ne i32 %lftr.wideiv, %N
+  br i1 %exitcond, label %for.body.3, label %for.end
+
+for.body.3:                                       ; preds = %for.cond.1
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %tmp3 = load i32, i32* %arrayidx, align 4
+  %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv1
+  %tmp4 = load i32, i32* %arrayidx5, align 4
+  %add6 = add nsw i32 %tmp4, %tmp3
+  store i32 %add6, i32* %arrayidx5, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body.3
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  br label %for.cond.1
+
+for.end:                                          ; preds = %for.cond.1
+  %arrayidx8 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv1
+  %tmp5 = load i32, i32* %arrayidx8, align 4
+  %add9 = add nsw i32 %tmp5, %tmp6
+  store i32 %add9, i32* %arrayidx8, align 4
+  br label %for.inc.10
+
+for.inc.10:                                       ; preds = %for.end
+  br label %for.cond
+
+for.end.12:                                       ; preds = %for.cond
+  ret void
+}
Index: test/ScopInfo/unneeded_scalar_dependences-2.ll
===================================================================
--- /dev/null
+++ test/ScopInfo/unneeded_scalar_dependences-2.ll
@@ -0,0 +1,65 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
+; RUN: opt %loadPolly -polly-codegen -analyze < %s
+;
+; CHECK-NOT: Scalar: 1
+;
+;    void f(int *A, int N) {
+;      for (int i = 0; i < N; i++) {
+;        int x = i + 3;
+;        for (int j = 0; j < N; j++)
+;          A[i] += A[x + j];
+;        A[x] += x;
+;      }
+;    }
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* %A, i32 %N) {
+entry:
+  %tmp = sext i32 %N to i64
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc.11, %entry
+  %indvars.iv2 = phi i64 [ %indvars.iv.next3, %for.inc.11 ], [ 0, %entry ]
+  %cmp = icmp slt i64 %indvars.iv2, %tmp
+  br i1 %cmp, label %for.body, label %for.end.13
+
+for.body:                                         ; preds = %for.cond
+  %tmp5 = add nuw nsw i64 %indvars.iv2, 3
+  br label %for.cond.1
+
+for.cond.1:                                       ; preds = %for.inc, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body ]
+  %lftr.wideiv = trunc i64 %indvars.iv to i32
+  %exitcond = icmp ne i32 %lftr.wideiv, %N
+  br i1 %exitcond, label %for.body.3, label %for.end
+
+for.body.3:                                       ; preds = %for.cond.1
+  %tmp6 = add nuw nsw i64 %tmp5, %indvars.iv
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %tmp6
+  %tmp7 = load i32, i32* %arrayidx, align 4
+  %arrayidx6 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv2
+  %tmp8 = load i32, i32* %arrayidx6, align 4
+  %add7 = add nsw i32 %tmp8, %tmp7
+  store i32 %add7, i32* %arrayidx6, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body.3
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  br label %for.cond.1
+
+for.end:                                          ; preds = %for.cond.1
+  %arrayidx9 = getelementptr inbounds i32, i32* %A, i64 %tmp5
+  %tmp9 = load i32, i32* %arrayidx9, align 4
+  %tmp10 = trunc i64 %tmp5 to i32
+  %add10 = add nsw i32 %tmp9, %tmp10
+  store i32 %add10, i32* %arrayidx9, align 4
+  br label %for.inc.11
+
+for.inc.11:                                       ; preds = %for.end
+  %indvars.iv.next3 = add nuw nsw i64 %indvars.iv2, 1
+  br label %for.cond
+
+for.end.13:                                       ; preds = %for.cond
+  ret void
+}
Index: test/ScopInfo/unneeded_scalar_dependences-3.ll
===================================================================
--- /dev/null
+++ test/ScopInfo/unneeded_scalar_dependences-3.ll
@@ -0,0 +1,66 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
+; RUN: opt %loadPolly -polly-codegen -analyze < %s
+;
+; CHECK-NOT: Scalar: 1
+;
+;    void f(int *A, int N) {
+;      for (int i = 0; i < N; i++) {
+;        int x = i + 3;
+;        for (int j = 0; j < N; j++)
+;          A[i] += x + j * x;
+;        A[x] += x * x;
+;      }
+;    }
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* %A, i32 %N) {
+entry:
+  %tmp = sext i32 %N to i64
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc.10, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc.10 ], [ 0, %entry ]
+  %cmp = icmp slt i64 %indvars.iv, %tmp
+  br i1 %cmp, label %for.body, label %for.end.12
+
+for.body:                                         ; preds = %for.cond
+  %tmp2 = add nuw nsw i64 %indvars.iv, 3
+  br label %for.cond.1
+
+for.cond.1:                                       ; preds = %for.inc, %for.body
+  %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %j.0, %N
+  br i1 %exitcond, label %for.body.3, label %for.end
+
+for.body.3:                                       ; preds = %for.cond.1
+  %tmp3 = trunc i64 %tmp2 to i32
+  %mul = mul nsw i32 %j.0, %tmp3
+  %tmp4 = trunc i64 %tmp2 to i32
+  %add4 = add nsw i32 %tmp4, %mul
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %tmp5 = load i32, i32* %arrayidx, align 4
+  %add5 = add nsw i32 %tmp5, %add4
+  store i32 %add5, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body.3
+  %inc = add nuw nsw i32 %j.0, 1
+  br label %for.cond.1
+
+for.end:                                          ; preds = %for.cond.1
+  %tmp6 = trunc i64 %tmp2 to i32
+  %mul6 = mul nsw i32 %tmp6, %tmp6
+  %arrayidx8 = getelementptr inbounds i32, i32* %A, i64 %tmp2
+  %tmp7 = load i32, i32* %arrayidx8, align 4
+  %add9 = add nsw i32 %tmp7, %mul6
+  store i32 %add9, i32* %arrayidx8, align 4
+  br label %for.inc.10
+
+for.inc.10:                                       ; preds = %for.end
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  br label %for.cond
+
+for.end.12:                                       ; preds = %for.cond
+  ret void
+}
Index: test/ScopInfo/unneeded_scalar_dependences-4.ll
===================================================================
--- /dev/null
+++ test/ScopInfo/unneeded_scalar_dependences-4.ll
@@ -0,0 +1,68 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
+; RUN: opt %loadPolly -polly-codegen -analyze < %s
+;
+; CHECK-NOT: Scalar: 1
+;
+;    void f(int *A, int N) {
+;      for (int i = 0; i < N; i++) {
+;        int x = i > 42 ? i - 3 : i;
+;        for (int j = 0; j < N; j++)
+;          A[i] += x + j * x;
+;        A[i] += x * x;
+;      }
+;    }
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* %A, i32 %N) {
+entry:
+  %tmp = sext i32 %N to i64
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc.10, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc.10 ], [ 0, %entry ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc11, %for.inc.10 ]
+  %cmp = icmp slt i64 %indvars.iv, %tmp
+  br i1 %cmp, label %for.body, label %for.end.12
+
+for.body:                                         ; preds = %for.cond
+  %cmp1 = icmp sgt i64 %indvars.iv, 42
+  %tmp2 = trunc i64 %indvars.iv to i32
+  %sub = add nsw i32 %i.0, -3
+  %cond = select i1 %cmp1, i32 %sub, i32 %tmp2
+  br label %for.cond.2
+
+for.cond.2:                                       ; preds = %for.inc, %cond.end
+  %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
+  %cmp3 = icmp slt i32 %j.0, %N
+  br i1 %cmp3, label %for.body.4, label %for.end
+
+for.body.4:                                       ; preds = %for.cond.2
+  %mul = mul nsw i32 %j.0, %cond
+  %add = add nsw i32 %cond, %mul
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %tmp4 = load i32, i32* %arrayidx, align 4
+  %add5 = add nsw i32 %tmp4, %add
+  store i32 %add5, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body.4
+  %inc = add nuw nsw i32 %j.0, 1
+  br label %for.cond.2
+
+for.end:                                          ; preds = %for.cond.2
+  %mul6 = mul nsw i32 %cond, %cond
+  %arrayidx8 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %tmp5 = load i32, i32* %arrayidx8, align 4
+  %add9 = add nsw i32 %tmp5, %mul6
+  store i32 %add9, i32* %arrayidx8, align 4
+  br label %for.inc.10
+
+for.inc.10:                                       ; preds = %for.end
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %inc11 = add nuw nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end.12:                                       ; preds = %for.cond
+  ret void
+}
Index: test/ScopInfo/unneeded_scalar_dependences-5.ll
===================================================================
--- /dev/null
+++ test/ScopInfo/unneeded_scalar_dependences-5.ll
@@ -0,0 +1,71 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
+; RUN: opt %loadPolly -polly-codegen -analyze < %s
+;
+; CHECK-NOT: Scalar: 1
+;
+;    void f(int *A, int N) {
+;      for (int i = 0; i < N; i++) {
+;        float x = i + 3.3;
+;        for (int j = 0; j < N; j++)
+;          A[i] += x + j * x;
+;        A[i] += x * x;
+;      }
+;    }
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* %A, i32 %N) {
+entry:
+  %tmp = sext i32 %N to i64
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc.17, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc.17 ], [ 0, %entry ]
+  %cmp = icmp slt i64 %indvars.iv, %tmp
+  br i1 %cmp, label %for.body, label %for.end.19
+
+for.body:                                         ; preds = %for.cond
+  %tmp1 = trunc i64 %indvars.iv to i32
+  %conv = sitofp i32 %tmp1 to double
+  %add = fadd double %conv, 3.300000e+00
+  %conv1 = fptrunc double %add to float
+  br label %for.cond.2
+
+for.cond.2:                                       ; preds = %for.inc, %for.body
+  %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %j.0, %N
+  br i1 %exitcond, label %for.body.5, label %for.end
+
+for.body.5:                                       ; preds = %for.cond.2
+  %conv6 = sitofp i32 %j.0 to float
+  %mul = fmul float %conv6, %conv1
+  %add7 = fadd float %conv1, %mul
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %tmp2 = load i32, i32* %arrayidx, align 4
+  %conv8 = sitofp i32 %tmp2 to float
+  %add9 = fadd float %conv8, %add7
+  %conv10 = fptosi float %add9 to i32
+  store i32 %conv10, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body.5
+  %inc = add nuw nsw i32 %j.0, 1
+  br label %for.cond.2
+
+for.end:                                          ; preds = %for.cond.2
+  %mul11 = fmul float %conv1, %conv1
+  %arrayidx13 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %tmp3 = load i32, i32* %arrayidx13, align 4
+  %conv14 = sitofp i32 %tmp3 to float
+  %add15 = fadd float %conv14, %mul11
+  %conv16 = fptosi float %add15 to i32
+  store i32 %conv16, i32* %arrayidx13, align 4
+  br label %for.inc.17
+
+for.inc.17:                                       ; preds = %for.end
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  br label %for.cond
+
+for.end.19:                                       ; preds = %for.cond
+  ret void
+}
Index: test/ScopInfo/unneeded_scalar_dependences-6.ll
===================================================================
--- /dev/null
+++ test/ScopInfo/unneeded_scalar_dependences-6.ll
@@ -0,0 +1,49 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
+; RUN: opt %loadPolly -polly-codegen -analyze < %s
+;
+;    void f(int *A, int N) {
+;      for (int i = 1; i < N; i++) {
+;        int preloaded = A[0];
+;        /* split BB */
+;        A[i] += preloaded;
+;      }
+;    }
+;
+; CHECK:    Invariant Accesses: {
+; CHECK:            ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK:                [N] -> { Stmt_for_body[i0] -> MemRef_A[0] };
+; CHECK:            Execution Context: [N] -> {  : N >= 2 }
+; CHECK:    }
+;
+; CHECK-NOT: Scalar: 1
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* %A, i32 %N) {
+entry:
+  %tmp = sext i32 %N to i64
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 1, %entry ]
+  %cmp = icmp slt i64 %indvars.iv, %tmp
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %tmp1 = load i32, i32* %A, align 4
+  br label %for.body.split
+
+for.body.split:
+  %arrayidx1 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %tmp2 = load i32, i32* %arrayidx1, align 4
+  %add = add nsw i32 %tmp2, %tmp1
+  store i32 %add, i32* %arrayidx1, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
Index: test/ScopInfo/unneeded_scalar_dependences-7.ll
===================================================================
--- /dev/null
+++ test/ScopInfo/unneeded_scalar_dependences-7.ll
@@ -0,0 +1,47 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
+; RUN: opt %loadPolly -polly-codegen -analyze < %s
+;
+;    void f(int *A, int N) {
+;      for (int i = 0; i < N; i++) {
+;        int scevable_addr = 2 * i + N;
+;        /* split BB */
+;        A[scevable_addr] += i;
+;      }
+;    }
+;
+; CHECK-NOT: Scalar: 1
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* %A, i32 %N) {
+entry:
+  %tmp = sext i32 %N to i64
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+  %cmp = icmp slt i64 %indvars.iv, %tmp
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %tmp1 = trunc i64 %indvars.iv to i32
+  %mul = shl nsw i32 %tmp1, 1
+  %add = add nsw i32 %mul, %N
+  %idxprom = sext i32 %add to i64
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom
+  br label %for.body.split
+
+for.body.split:
+  %tmp2 = load i32, i32* %arrayidx, align 4
+  %tmp3 = trunc i64 %indvars.iv to i32
+  %add1 = add nsw i32 %tmp2, %tmp3
+  store i32 %add1, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
Index: test/ScopInfo/unneeded_scalar_dependences-8.ll
===================================================================
--- /dev/null
+++ test/ScopInfo/unneeded_scalar_dependences-8.ll
@@ -0,0 +1,51 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
+; RUN: opt %loadPolly -polly-codegen -analyze < %s
+;
+;    void f(int *A, int N) {
+;      int pre_scevable_addr = 2 * N + (N - 1);
+;      for (int i = 0; i < N; i++) {
+;        int scevable_addr = 2 * i + pre_scevable_addr;
+;        /* split BB */
+;        A[scevable_addr] += i;
+;      }
+;    }
+;
+; CHECK-NOT: Scalar: 1
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* %A, i32 %N) {
+entry:
+  %tmp = sext i32 %N to i64
+  %m = mul nsw i32 %N, 2
+  %s = sub nsw i32 %N, 1
+  %pre_scevable_addr = add nsw i32 %m, %s
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+  %cmp = icmp slt i64 %indvars.iv, %tmp
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %tmp1 = trunc i64 %indvars.iv to i32
+  %mul = shl nsw i32 %tmp1, 1
+  %add = add nsw i32 %mul, %pre_scevable_addr
+  %idxprom = sext i32 %add to i64
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom
+  br label %for.body.split
+
+for.body.split:
+  %tmp2 = load i32, i32* %arrayidx, align 4
+  %tmp3 = trunc i64 %indvars.iv to i32
+  %add1 = add nsw i32 %tmp2, %tmp3
+  store i32 %add1, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
Index: test/ScopInfo/unneeded_scalar_dependences-9.ll
===================================================================
--- /dev/null
+++ test/ScopInfo/unneeded_scalar_dependences-9.ll
@@ -0,0 +1,60 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
+; RUN: opt %loadPolly -polly-codegen -analyze < %s
+;
+;    void f(int *A, int N) {
+;      if (N > 42) {
+;        int pre_scevable_addr = 2 * N + (N - 1);
+;        for (int i = 0; i < N; i++) {
+;          int scevable_addr = 2 * i + pre_scevable_addr;
+;          /* split BB */
+;          A[scevable_addr] += i;
+;        }
+;      }
+;    }
+;
+; CHECK-NOT: Scalar: 1
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* %A, i32 %N) {
+entry:
+  %tmp = sext i32 %N to i64
+  %m = mul nsw i32 %N, 2
+  br label %if.cond
+
+if.cond:
+  %s = sub nsw i32 %N, 1
+  %c = icmp sgt i32 %N, 42
+  br i1 %c, label %if.then, label %for.end
+
+if.then:
+  %pre_scevable_addr = add nsw i32 %m, %s
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %if.then ]
+  %cmp = icmp slt i64 %indvars.iv, %tmp
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %tmp1 = trunc i64 %indvars.iv to i32
+  %mul = shl nsw i32 %tmp1, 1
+  %add = add nsw i32 %mul, %pre_scevable_addr
+  %idxprom = sext i32 %add to i64
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom
+  br label %for.body.split
+
+for.body.split:
+  %tmp2 = load i32, i32* %arrayidx, align 4
+  %tmp3 = trunc i64 %indvars.iv to i32
+  %add1 = add nsw i32 %tmp2, %tmp3
+  store i32 %add1, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}