Index: polly/trunk/include/polly/ScopBuilder.h
===================================================================
--- polly/trunk/include/polly/ScopBuilder.h
+++ polly/trunk/include/polly/ScopBuilder.h
@@ -235,6 +235,12 @@
   /// separator is found.
   void buildSequentialBlockStmts(BasicBlock *BB);
 
+  /// Create one or more ScopStmts for @p BB using equivalence classes.
+  ///
+  /// Instructions of a basic block that belong to the same equivalence class
+  /// are added to the same statement.
+  void buildEqivClassBlockStmts(BasicBlock *BB);
+
   /// Create ScopStmt for all BBs and non-affine subregions of @p SR.
   ///
   /// @param SR A subregion of @p R.
Index: polly/trunk/lib/Analysis/ScopBuilder.cpp
===================================================================
--- polly/trunk/lib/Analysis/ScopBuilder.cpp
+++ polly/trunk/lib/Analysis/ScopBuilder.cpp
@@ -25,6 +25,7 @@
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/EquivalenceClasses.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
@@ -102,14 +103,16 @@
     cl::desc("Disable multiplicative reductions"), cl::Hidden, cl::ZeroOrMore,
     cl::init(false), cl::cat(PollyCategory));
 
-enum class GranularityChoice { BasicBlocks };
+enum class GranularityChoice { BasicBlocks, ScalarIndepependence };
 
 static cl::opt<GranularityChoice> StmtGranularity(
     "polly-stmt-granularity",
     cl::desc(
         "Algorithm to use for splitting basic blocks into multiple statements"),
     cl::values(clEnumValN(GranularityChoice::BasicBlocks, "bb",
-                          "One statement per basic block")),
+                          "One statement per basic block"),
+               clEnumValN(GranularityChoice::ScalarIndepependence,
+                          "scalar-indep", "Scalar independence heuristic")),
     cl::init(GranularityChoice::BasicBlocks), cl::cat(PollyCategory));
 
 void ScopBuilder::buildPHIAccesses(ScopStmt *PHIStmt, PHINode *PHI,
@@ -701,6 +704,178 @@
   scop->addScopStmt(BB, SurroundingLoop, Instructions, Count);
 }
 
+/// Is @p Inst an ordered instruction?
+///
+/// An unordered instruction is an instruction, such that a sequence of
+/// unordered instructions can be permuted without changing semantics. Any
+/// instruction for which this is not always the case is ordered.
+static bool isOrderedInstruction(Instruction *Inst) {
+  return Inst->mayHaveSideEffects() || Inst->mayReadOrWriteMemory();
+}
+
+/// Join instructions to the same statement if one uses the scalar result of the
+/// other.
+static void joinOperandTree(EquivalenceClasses<Instruction *> &UnionFind,
+                            ArrayRef<Instruction *> ModeledInsts) {
+  for (Instruction *Inst : ModeledInsts) {
+    if (isa<PHINode>(Inst))
+      continue;
+
+    for (Use &Op : Inst->operands()) {
+      Instruction *OpInst = dyn_cast<Instruction>(Op.get());
+      if (!OpInst)
+        continue;
+
+      // Check if OpInst is in the BB and is a modeled instruction.
+      auto OpVal = UnionFind.findValue(OpInst);
+      if (OpVal == UnionFind.end())
+        continue;
+
+      UnionFind.unionSets(Inst, OpInst);
+    }
+  }
+}
+
+/// Join instructions that are used as incoming value in successor PHIs into the
+/// epilogue.
+static void
+joinIncomingPHIValuesIntoEpilogue(EquivalenceClasses<Instruction *> &UnionFind,
+                                  ArrayRef<Instruction *> ModeledInsts,
+                                  BasicBlock *BB) {
+  for (BasicBlock *Succ : successors(BB)) {
+    for (Instruction &SuccInst : *Succ) {
+      PHINode *SuccPHI = dyn_cast<PHINode>(&SuccInst);
+      if (!SuccPHI)
+        break;
+
+      Value *IncomingVal = SuccPHI->getIncomingValueForBlock(BB);
+      Instruction *IncomingInst = dyn_cast<Instruction>(IncomingVal);
+      if (!IncomingInst)
+        continue;
+      if (IncomingInst->getParent() != BB)
+        continue;
+      if (UnionFind.findValue(IncomingInst) == UnionFind.end())
+        continue;
+
+      UnionFind.unionSets(nullptr, IncomingInst);
+    }
+  }
+}
+
+/// Ensure that the order of ordered instructions does not change.
+///
+/// If we encounter an ordered instruction enclosed in instructions belonging to
+/// a different statement (which might as well contain ordered instructions, but
+/// this is not tested here), join them.
+static void
+joinOrderedInstructions(EquivalenceClasses<Instruction *> &UnionFind,
+                        ArrayRef<Instruction *> ModeledInsts) {
+  SetVector<Instruction *> SeenLeaders;
+  for (Instruction *Inst : ModeledInsts) {
+    if (!isOrderedInstruction(Inst))
+      continue;
+
+    Instruction *Leader = UnionFind.getLeaderValue(Inst);
+    bool Inserted = SeenLeaders.insert(Leader);
+    if (Inserted)
+      continue;
+
+    // Merge statements to close holes. Say, we have already seen statements A
+    // and B, in this order. Then we see an instruction of A again and we would
+    // see the pattern "A B A". This function joins all statements until the
+    // only seen occurrence of A.
+    for (Instruction *Prev : reverse(SeenLeaders)) {
+      // Items added to 'SeenLeaders' are leaders, but may have lost their
+      // leadership status when merged into another statement.
+      Instruction *PrevLeader = UnionFind.getLeaderValue(SeenLeaders.back());
+      if (PrevLeader == Leader)
+        break;
+      UnionFind.unionSets(Prev, Leader);
+    }
+  }
+}
+
+/// Also ensure that the epilogue is the last statement relative to all ordered
+/// instructions.
+///
+/// This is basically joinOrderedInstructions() but using the epilogue as
+/// 'ordered instruction'.
+static void joinAllAfterEpilogue(EquivalenceClasses<Instruction *> &UnionFind,
+                                 ArrayRef<Instruction *> ModeledInsts) {
+  bool EpilogueSeen = false;
+  for (Instruction *Inst : ModeledInsts) {
+    auto PHIWritesLeader = UnionFind.findLeader(nullptr);
+    auto InstLeader = UnionFind.findLeader(Inst);
+
+    if (PHIWritesLeader == InstLeader)
+      EpilogueSeen = true;
+
+    if (!isOrderedInstruction(Inst))
+      continue;
+
+    if (EpilogueSeen)
+      UnionFind.unionSets(PHIWritesLeader, InstLeader);
+  }
+}
+
+void ScopBuilder::buildEqivClassBlockStmts(BasicBlock *BB) {
+  Loop *L = LI.getLoopFor(BB);
+
+  // Extracting out modeled instructions saves us from checking
+  // shouldModelInst() repeatedly.
+  SmallVector<Instruction *, 32> ModeledInsts;
+  EquivalenceClasses<Instruction *> UnionFind;
+  for (Instruction &Inst : *BB) {
+    if (!shouldModelInst(&Inst, L))
+      continue;
+    ModeledInsts.push_back(&Inst);
+    UnionFind.insert(&Inst);
+  }
+
+  // 'nullptr' represents the last statement for a basic block. It contains no
+  // instructions, but holds the PHI write accesses for successor basic blocks.
+  // If a PHI has an incoming value defined in this BB, it can also be merged
+  // with other statements.
+  // TODO: We wouldn't need this if we would add PHIWrites into the statement
+  // that defines the incoming value (if in the BB) instead of always the last,
+  // so we could unconditionally always add a last statement.
+  UnionFind.insert(nullptr);
+
+  joinOperandTree(UnionFind, ModeledInsts);
+  joinIncomingPHIValuesIntoEpilogue(UnionFind, ModeledInsts, BB);
+  joinOrderedInstructions(UnionFind, ModeledInsts);
+  joinAllAfterEpilogue(UnionFind, ModeledInsts);
+
+  // The list of instructions for statement (statement represented by the leader
+  // instruction). The order of statements instructions is reversed such that
+  // the epilogue is first. This makes it easier to ensure that the epilogue is
+  // the last statement.
+  MapVector<Instruction *, std::vector<Instruction *>> LeaderToInstList;
+
+  // Ensure that the epilogue is last.
+  LeaderToInstList[nullptr];
+
+  // Collect the instructions of all leaders. UnionFind's member iterator
+  // unfortunately are not in any specific order.
+  for (Instruction &Inst : reverse(*BB)) {
+    auto LeaderIt = UnionFind.findLeader(&Inst);
+    if (LeaderIt == UnionFind.member_end())
+      continue;
+
+    std::vector<Instruction *> &InstList = LeaderToInstList[*LeaderIt];
+    InstList.push_back(&Inst);
+  }
+
+  // Finally build the statements.
+  int Count = 0;
+  for (auto &Instructions : reverse(LeaderToInstList)) {
+    std::vector<Instruction *> &InstList = Instructions.second;
+    std::reverse(InstList.begin(), InstList.end());
+    scop->addScopStmt(BB, L, std::move(InstList), Count);
+    Count += 1;
+  }
+}
+
 void ScopBuilder::buildStmts(Region &SR) {
   if (scop->isNonAffineSubRegion(&SR)) {
     std::vector<Instruction *> Instructions;
@@ -722,6 +897,9 @@
       case GranularityChoice::BasicBlocks:
         buildSequentialBlockStmts(BB);
         break;
+      case GranularityChoice::ScalarIndepependence:
+        buildEqivClassBlockStmts(BB);
+        break;
       }
     }
 }
Index: polly/trunk/test/ScopInfo/granularity_scalar-indep.ll
===================================================================
--- polly/trunk/test/ScopInfo/granularity_scalar-indep.ll
+++ polly/trunk/test/ScopInfo/granularity_scalar-indep.ll
@@ -0,0 +1,68 @@
+; RUN: opt %loadPolly -polly-stmt-granularity=scalar-indep -polly-print-instructions -polly-scops -analyze < %s | FileCheck %s -match-full-lines
+;
+; Split a block into two independent statements that share no scalar.
+; This case has the instructions of the two statements interleaved, such that
+; splitting the BasicBlock in the middle would cause a scalar dependency.
+;
+; for (int j = 0; j < n; j += 1) {
+; body:
+;   double valA = A[0];
+;   double valB = 21.0 + 21.0;
+;   A[0] = valA;
+;   B[0] = valB;
+; }
+;
+define void @func(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B) {
+entry:
+  br label %for
+
+for:
+  %j = phi i32 [0, %entry], [%j.inc, %inc]
+  %j.cmp = icmp slt i32 %j, %n
+  br i1 %j.cmp, label %body, label %exit
+
+    body:
+      %valA = load double, double* %A
+      %valB = fadd double 21.0, 21.0
+      store double %valA, double* %A
+      store double %valB, double* %B
+      br label %inc
+
+inc:
+  %j.inc = add nuw nsw i32 %j, 1
+  br label %for
+
+exit:
+  br label %return
+
+return:
+  ret void
+}
+
+
+; CHECK:      Statements {
+; CHECK-NEXT:     Stmt_body
+; CHECK-NEXT:         Domain :=
+; CHECK-NEXT:             [n] -> { Stmt_body[i0] : 0 <= i0 < n };
+; CHECK-NEXT:         Schedule :=
+; CHECK-NEXT:             [n] -> { Stmt_body[i0] -> [i0, 0] };
+; CHECK-NEXT:         ReadAccess :=       [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:             [n] -> { Stmt_body[i0] -> MemRef_A[0] };
+; CHECK-NEXT:         MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:             [n] -> { Stmt_body[i0] -> MemRef_A[0] };
+; CHECK-NEXT:         Instructions {
+; CHECK-NEXT:               %valA = load double, double* %A
+; CHECK-NEXT:               store double %valA, double* %A
+; CHECK-NEXT:         }
+; CHECK-NEXT:     Stmt_body1
+; CHECK-NEXT:         Domain :=
+; CHECK-NEXT:             [n] -> { Stmt_body1[i0] : 0 <= i0 < n };
+; CHECK-NEXT:         Schedule :=
+; CHECK-NEXT:             [n] -> { Stmt_body1[i0] -> [i0, 1] };
+; CHECK-NEXT:         MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:             [n] -> { Stmt_body1[i0] -> MemRef_B[0] };
+; CHECK-NEXT:         Instructions {
+; CHECK-NEXT:               %valB = fadd double 2.100000e+01, 2.100000e+01
+; CHECK-NEXT:               store double %valB, double* %B
+; CHECK-NEXT:         }
+; CHECK-NEXT: }
Index: polly/trunk/test/ScopInfo/granularity_scalar-indep_epilogue.ll
===================================================================
--- polly/trunk/test/ScopInfo/granularity_scalar-indep_epilogue.ll
+++ polly/trunk/test/ScopInfo/granularity_scalar-indep_epilogue.ll
@@ -0,0 +1,68 @@
+; RUN: opt %loadPolly -polly-stmt-granularity=scalar-indep -polly-print-instructions -polly-scops -analyze < %s | FileCheck %s -match-full-lines
+;
+; Split a block into two independent statements that share no scalar.
+; This case has an independent statement just for PHI writes.
+;
+; for (int j = 0; j < n; j += 1) {
+; bodyA:
+;   double valA = A[0];
+;   A[0] = valA;
+;
+; bodyB:
+;   phi = 42.0;
+; }
+;
+define void @func(i32 %n, double* noalias nonnull %A) {
+entry:
+  br label %for
+
+for:
+  %j = phi i32 [0, %entry], [%j.inc, %inc]
+  %j.cmp = icmp slt i32 %j, %n
+  br i1 %j.cmp, label %bodyA, label %exit
+
+    bodyA:
+      %valA = load double, double* %A
+      store double %valA, double* %A
+      br label %bodyB
+
+    bodyB:
+      %phi = phi double [42.0, %bodyA]
+      br label %inc
+
+inc:
+  %j.inc = add nuw nsw i32 %j, 1
+  br label %for
+
+exit:
+  br label %return
+
+return:
+  ret void
+}
+
+
+; CHECK:      Statements {
+; CHECK-NEXT:     Stmt_bodyA
+; CHECK-NEXT:         Domain :=
+; CHECK-NEXT:             [n] -> { Stmt_bodyA[i0] : 0 <= i0 < n };
+; CHECK-NEXT:         Schedule :=
+; CHECK-NEXT:             [n] -> { Stmt_bodyA[i0] -> [i0, 0] };
+; CHECK-NEXT:         ReadAccess :=       [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:             [n] -> { Stmt_bodyA[i0] -> MemRef_A[0] };
+; CHECK-NEXT:         MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:             [n] -> { Stmt_bodyA[i0] -> MemRef_A[0] };
+; CHECK-NEXT:         Instructions {
+; CHECK-NEXT:             %valA = load double, double* %A
+; CHECK-NEXT:             store double %valA, double* %A
+; CHECK-NEXT:         }
+; CHECK-NEXT:     Stmt_bodyA1
+; CHECK-NEXT:         Domain :=
+; CHECK-NEXT:             [n] -> { Stmt_bodyA1[i0] : 0 <= i0 < n };
+; CHECK-NEXT:         Schedule :=
+; CHECK-NEXT:             [n] -> { Stmt_bodyA1[i0] -> [i0, 1] };
+; CHECK-NEXT:         MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT:             [n] -> { Stmt_bodyA1[i0] -> MemRef_phi__phi[] };
+; CHECK-NEXT:         Instructions {
+; CHECK-NEXT:         }
+; CHECK-NEXT: }
Index: polly/trunk/test/ScopInfo/granularity_scalar-indep_epilogue_last.ll
===================================================================
--- polly/trunk/test/ScopInfo/granularity_scalar-indep_epilogue_last.ll
+++ polly/trunk/test/ScopInfo/granularity_scalar-indep_epilogue_last.ll
@@ -0,0 +1,72 @@
+; RUN: opt %loadPolly -polly-stmt-granularity=scalar-indep -polly-print-instructions -polly-scops -analyze < %s | FileCheck %s -match-full-lines
+;
+; Check that the statement where the PHI writes are in always comes last.
+; Otherwise we'd need to introduce a scalar dependency from the statement
+; defining a value to the last statement of a basic block.
+;
+; for (int j = 0; j < n; j += 1) {
+; bodyA:
+;   double valA = A[0];
+;   A[0] = valA;
+;   double valB = B[0];
+;   B[0] = valB;
+;
+; bodyB:
+;   phi = valA;
+; }
+;
+define void @func(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B) {
+entry:
+  br label %for
+
+for:
+  %j = phi i32 [0, %entry], [%j.inc, %inc]
+  %j.cmp = icmp slt i32 %j, %n
+  br i1 %j.cmp, label %bodyA, label %exit
+
+    bodyA:
+      %valA = load double, double* %A
+      store double %valA, double* %A
+      %valB = load double, double* %B
+      store double %valB, double* %B
+      br label %bodyB
+
+    bodyB:
+      %phi = phi double [%valA, %bodyA]
+      br label %inc
+
+inc:
+  %j.inc = add nuw nsw i32 %j, 1
+  br label %for
+
+exit:
+  br label %return
+
+return:
+  ret void
+}
+
+
+; CHECK:      Statements {
+; CHECK-NEXT:     Stmt_bodyA
+; CHECK-NEXT:         Domain :=
+; CHECK-NEXT:             [n] -> { Stmt_bodyA[i0] : 0 <= i0 < n };
+; CHECK-NEXT:         Schedule :=
+; CHECK-NEXT:             [n] -> { Stmt_bodyA[i0] -> [i0] };
+; CHECK-NEXT:         ReadAccess :=       [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:             [n] -> { Stmt_bodyA[i0] -> MemRef_A[0] };
+; CHECK-NEXT:         MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:             [n] -> { Stmt_bodyA[i0] -> MemRef_A[0] };
+; CHECK-NEXT:         ReadAccess :=       [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:             [n] -> { Stmt_bodyA[i0] -> MemRef_B[0] };
+; CHECK-NEXT:         MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:             [n] -> { Stmt_bodyA[i0] -> MemRef_B[0] };
+; CHECK-NEXT:         MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT:             [n] -> { Stmt_bodyA[i0] -> MemRef_phi__phi[] };
+; CHECK-NEXT:         Instructions {
+; CHECK-NEXT:               %valA = load double, double* %A
+; CHECK-NEXT:               store double %valA, double* %A
+; CHECK-NEXT:               %valB = load double, double* %B
+; CHECK-NEXT:               store double %valB, double* %B
+; CHECK-NEXT:         }
+; CHECK-NEXT: }
Index: polly/trunk/test/ScopInfo/granularity_scalar-indep_noepilogue.ll
===================================================================
--- polly/trunk/test/ScopInfo/granularity_scalar-indep_noepilogue.ll
+++ polly/trunk/test/ScopInfo/granularity_scalar-indep_noepilogue.ll
@@ -0,0 +1,61 @@
+; RUN: opt %loadPolly -polly-stmt-granularity=scalar-indep -polly-print-instructions -polly-scops -analyze < %s | FileCheck %s -match-full-lines
+;
+; This case has no explicit epilogue for PHI writes because it would
+; have a scalar dependency to the previous statement.
+;
+; for (int j = 0; j < n; j += 1) {
+; bodyA:
+;   double valA = A[0];
+;   A[0] = valA;
+;
+; bodyB:
+;   phi = valA;
+; }
+;
+define void @func(i32 %n, double* noalias nonnull %A) {
+entry:
+  br label %for
+
+for:
+  %j = phi i32 [0, %entry], [%j.inc, %inc]
+  %j.cmp = icmp slt i32 %j, %n
+  br i1 %j.cmp, label %bodyA, label %exit
+
+    bodyA:
+      %valA = load double, double* %A
+      store double %valA, double* %A
+      br label %bodyB
+
+    bodyB:
+      %phi = phi double [%valA, %bodyA]
+      br label %inc
+
+inc:
+  %j.inc = add nuw nsw i32 %j, 1
+  br label %for
+
+exit:
+  br label %return
+
+return:
+  ret void
+}
+
+
+; CHECK:      Statements {
+; CHECK-NEXT:     Stmt_bodyA
+; CHECK-NEXT:         Domain :=
+; CHECK-NEXT:             [n] -> { Stmt_bodyA[i0] : 0 <= i0 < n };
+; CHECK-NEXT:         Schedule :=
+; CHECK-NEXT:             [n] -> { Stmt_bodyA[i0] -> [i0] };
+; CHECK-NEXT:         ReadAccess :=       [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:             [n] -> { Stmt_bodyA[i0] -> MemRef_A[0] };
+; CHECK-NEXT:         MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:             [n] -> { Stmt_bodyA[i0] -> MemRef_A[0] };
+; CHECK-NEXT:         MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT:             [n] -> { Stmt_bodyA[i0] -> MemRef_phi__phi[] };
+; CHECK-NEXT:         Instructions {
+; CHECK-NEXT:               %valA = load double, double* %A
+; CHECK-NEXT:               store double %valA, double* %A
+; CHECK-NEXT:         }
+; CHECK-NEXT: }
Index: polly/trunk/test/ScopInfo/granularity_scalar-indep_ordered.ll
===================================================================
--- polly/trunk/test/ScopInfo/granularity_scalar-indep_ordered.ll
+++ polly/trunk/test/ScopInfo/granularity_scalar-indep_ordered.ll
@@ -0,0 +1,62 @@
+; RUN: opt %loadPolly -polly-stmt-granularity=scalar-indep -polly-print-instructions -polly-scops -analyze < %s | FileCheck %s -match-full-lines
+;
+; This case cannot be split into two statements because the order of
+; loads and store would be violated.
+;
+; for (int j = 0; j < n; j += 1) {
+; body:
+;   double valA = A[0];
+;   double valB = B[0];
+;   A[0] = valA;
+;   A[0] = valB;
+; }
+;
+define void @func(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B) {
+entry:
+  br label %for
+
+for:
+  %j = phi i32 [0, %entry], [%j.inc, %inc]
+  %j.cmp = icmp slt i32 %j, %n
+  br i1 %j.cmp, label %body, label %exit
+
+    body:
+      %valA = load double, double* %A
+      %valB = load double, double* %B
+      store double %valA, double* %A
+      store double %valB, double* %A
+      br label %inc
+
+inc:
+  %j.inc = add nuw nsw i32 %j, 1
+  br label %for
+
+exit:
+  br label %return
+
+return:
+  ret void
+}
+
+
+; CHECK:     Statements {
+; CHECK-NEXT:      Stmt_body
+; CHECK-NEXT:          Domain :=
+; CHECK-NEXT:              [n] -> { Stmt_body[i0] : 0 <= i0 < n };
+; CHECK-NEXT:          Schedule :=
+; CHECK-NEXT:              [n] -> { Stmt_body[i0] -> [i0] };
+; CHECK-NEXT:          ReadAccess :=       [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:              [n] -> { Stmt_body[i0] -> MemRef_A[0] };
+; CHECK-NEXT:          ReadAccess :=       [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:              [n] -> { Stmt_body[i0] -> MemRef_B[0] };
+; CHECK-NEXT:          MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:              [n] -> { Stmt_body[i0] -> MemRef_A[0] };
+; CHECK-NEXT:          MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:              [n] -> { Stmt_body[i0] -> MemRef_A[0] };
+; CHECK-NEXT:          Instructions {
+; CHECK-NEXT:                %valA = load double, double* %A
+; CHECK-NEXT:                %valB = load double, double* %B
+; CHECK-NEXT:                store double %valA, double* %A
+; CHECK-NEXT:                store double %valB, double* %A
+; CHECK-NEXT:          }
+; CHECK-NEXT: }