Index: polly/trunk/include/polly/CodeGen/BlockGenerators.h
===================================================================
--- polly/trunk/include/polly/CodeGen/BlockGenerators.h
+++ polly/trunk/include/polly/CodeGen/BlockGenerators.h
@@ -136,31 +136,129 @@
 
   /// @brief Maps to resolve scalar dependences for PHI operands and scalars.
   ///
-  /// Usage example:
-  ///
-  ///  x1 = ...   // x1 will be inserted in the ScalarMap and PhiOpMap.
-  ///  for (i=0...N) {
-  ///      x2 = phi(x1, add) // x2 will be inserted in the ScalarMap, x1 and
-  ///                        // add are mapped in the PHIOpMap.
-  ///      add = x2 + A[i];  // add will be inserted in the ScalarMap and
-  ///                        // the PhiOpMap.
-  ///  }
-  ///  print(x1)  // x1 is mapped in the ScalarMap.
-  ///  print(x2)  // x2 is mapped in the ScalarMap.
-  ///  print(add) // add is mapped in the ScalarMap.
+  /// When translating code that contains scalar dependences as they result from
+  /// inter-block scalar dependences (including the use of data carrying
+  /// PHI nodes), we do not directly regenerate in-register SSA code, but
+  /// instead allocate some stack memory through which these scalar values are
+  /// passed. Only a later pass of -mem2reg will then (re)introduce in-register
+  /// computations.
+  ///
+  /// To keep track of the memory location(s) used to store the data computed by
+  /// a given SSA instruction, we use the maps 'ScalarMap' and 'PHIOpMap'. Each
+  /// maps a given scalar value to a junk of stack allocated memory.
+  ///
+  /// 'ScalarMap' is used for normal scalar dependences that go from a scalar
+  /// definition to its use. Such dependences are lowered by directly writing
+  /// the value an instruction computes into the corresponding chunk of memory
+  /// and reading it back from this chunk of memory right before every use of
+  /// this original scalar value. The memory locations in 'ScalarMap' end with
+  /// '.s2a'.
+  ///
+  /// 'PHIOpMap' is used to model PHI nodes. For each PHI nodes we introduce,
+  /// besides the memory in 'ScalarMap', a second chunk of memory into which we
+  /// write at the end of each basic block preceeding the PHI instruction the
+  /// value passed through this basic block. At the place where the PHI node is
+  /// executed, we replace the PHI node with a load from the corresponding
+  /// memory location in the 'PHIOpMap' table. The memory locations in
+  /// 'PHIOpMap' end with '.phiops'.
+  ///
+  /// An access to/from memory that belongs to a PHI node is in the ScopInfo
+  /// always modeled with the name of the PHI node. However, in reality PHI
+  /// nodes can introduce reads/writes to two different memory locations, the
+  /// normal '.s2a' locations and the special '.phiops' locations. We do not
+  /// track this difference in the polyhedral description, but only through
+  /// the content of the two maps 'ScalarMap' and 'PHIOpMap'.
+  ///
+  /// Example:
+  ///
+  ///                              Input C Code
+  ///                              ============
+  ///
+  ///                 S1:      x1 = ...
+  ///                          for (i=0...N) {
+  ///                 S2:           x2 = phi(x1, add)
+  ///                 S3:           add = x2 + 42;
+  ///                          }
+  ///                 S4:      print(x1)
+  ///                          print(x2)
+  ///                          print(add)
+  ///
+  ///
+  ///        Unmodified IR                         IR After expansion
+  ///        =============                         ==================
+  ///
+  /// S1:   x1 = ...                     S1:    x1 = ...
+  ///                                           x1.s2a = s1
+  ///                                           x2.phiops = s1
+  ///        |                                    |
+  ///        |   <--<--<--<--<                    |   <--<--<--<--<
+  ///        | /              \                   | /              \
+  ///        V V               \                  V V               \
+  /// S2:  x2 = phi (x1, add)   |        S2:    x2 = x2.phiops       |
+  ///                           |               x2.s2a = x2          |
+  ///                           |                                    |
+  /// S3:  add = x2 + 42        |        S3:    add = x2 + 42        |
+  ///                           |               add.s2a = add        |
+  ///                           |               x2.phiops = add      |
+  ///        | \               /                  | \               /
+  ///        |  \             /                   |  \             /
+  ///        |   >-->-->-->-->                    |   >-->-->-->-->
+  ///        V                                    V
+  ///
+  ///                                    S4:    x1 = x1.s2a
+  /// S4:  ... = x1                             ... = x1
+  ///                                           x2 = x2.s2a
+  ///      ... = x2                             ... = x2
+  ///                                           add = add.s2a
+  ///      ... = add                            ... = add
+  ///
+  ///      ScalarMap = { x1 -> x1.s2a, x2 -> x2.s2a, add -> add.s2a }
+  ///      PHIOpMap =  { x2 -> x2.phiops }
+  ///
+  ///
+  ///  ??? Why does a PHI-node require two memory chunks ???
+  ///
+  ///  One may wonder why a PHI node requires two memory chunks and not just
+  ///  all data is stored in a single location. The following example tries
+  ///  to store all data in .s2a and drops the .phiops location:
+  ///
+  ///      S1:    x1 = ...
+  ///             x1.s2a = s1
+  ///             x2.s2a = s1             // use .s2a instead of .phiops
+  ///               |
+  ///               |   <--<--<--<--<
+  ///               | /              \
+  ///               V V               \
+  ///      S2:    x2 = x2.s2a          |  // value is same as above, but read
+  ///                                  |  // from .s2a
+  ///                                  |
+  ///             x2.s2a = x2          |  // store into .s2a as normal
+  ///                                  |
+  ///      S3:    add = x2 + 42        |
+  ///             add.s2a = add        |
+  ///             x2.s2a = add         |  // use s2a instead of .phiops
+  ///               | \               /   // !!! This is wrong, as x2.s2a now
+  ///               |   >-->-->-->-->     // contains add instead of x2.
+  ///               V
+  ///
+  ///      S4:    x1 = x1.s2a
+  ///             ... = x1
+  ///             x2 = x2.s2a             // !!! We now read 'add' instead of
+  ///             ... = x2                // 'x2'
+  ///             add = add.s2a
+  ///             ... = add
+  ///
+  ///  As visible in the example, the SSA value of the PHI node may still be
+  ///  needed _after_ the basic block, which could conceptually branch to the
+  ///  PHI node, has been run and has overwritten the PHI's old value. Hence, a
+  ///  single memory location is not enough to code-generate a PHI node.
   ///
   ///{
-
-  /// The PHIOpMap is used to get the alloca to communicate a value to a PHI
-  /// node, hence when the operand of a PHI is demoted the corresponding write
-  /// access will use the PHIOpMap to look for the correct alloca. PHI nodes
-  /// will then read that location in order to get the correct/current operand
-  /// value.
+  ///
+  /// @brief Memory locations used for the special PHI node modeling.
   ScalarAllocaMapTy &PHIOpMap;
 
-  /// The ScalarMap is used in __all__ other cases, thus always when a scalar
-  /// variable is read/written and the write is not because the scalar is a PHI
-  /// operand.
+  /// @brief Memory locations used to model scalar dependences.
   ScalarAllocaMapTy &ScalarMap;
   ///}