Index: lib/Analysis/ScalarEvolutionNormalization.cpp
===================================================================
--- lib/Analysis/ScalarEvolutionNormalization.cpp
+++ lib/Analysis/ScalarEvolutionNormalization.cpp
@@ -51,40 +51,47 @@
   transform(AR->operands(), std::back_inserter(Operands),
             [&](const SCEV *Op) { return visit(Op); });
 
-  // Conservatively use AnyWrap until/unless we need FlagNW.
-  const SCEV *Result =
-      SE.getAddRecExpr(Operands, AR->getLoop(), SCEV::FlagAnyWrap);
-  switch (Kind) {
-  case Normalize:
-    // We want to normalize step expression, because otherwise we might not be
-    // able to denormalize to the original expression.
+  if (!Pred(AR))
+    return SE.getAddRecExpr(Operands, AR->getLoop(), SCEV::FlagAnyWrap);
+
+  // Normalization and denormalization are fancy names for decrementing and
+  // incrementing a SCEV expression with respect to a set of loops.  Since
+  // Pred(AR) has returned true, we know we need to normalize or denormalize AR
+  // with respect to its loop.
+
+  if (Kind == Denormalize) {
+    // Denormalization / "partial increment" is essentially the same as \c
+    // SCEVAddRecExpr::getPostIncExpr.  Here we use an explicit loop to make the
+    // symmetry with Normalization clear.
+    for (int i = 0, e = Operands.size() - 1; i < e; i++)
+      Operands[i] = SE.getAddExpr(Operands[i], Operands[i + 1]);
+  } else {
+    assert(Kind == Normalize && "Only two possibilities!");
+
+    // Normalization / "partial decrement" is a bit more subtle.  Since
+    // incrementing a SCEV expression (in general) changes the step of the SCEV
+    // expression as well, we cannot use the step of the current expression.
+    // Instead, we have to use the step of the very expression we're trying to
+    // compute!
+    //
+    // We solve the issue by recursively building up the result, starting from
+    // the "least significant" operand in the add recurrence:
     //
-    // Here is an example what will happen if we don't normalize step:
-    //  ORIGINAL ISE:
-    //    {(100 /u {1,+,1}<%bb16>),+,(100 /u {1,+,1}<%bb16>)}<%bb25>
-    //  NORMALIZED ISE:
-    //    {((-1 * (100 /u {1,+,1}<%bb16>)) + (100 /u {0,+,1}<%bb16>)),+,
-    //     (100 /u {0,+,1}<%bb16>)}<%bb25>
-    //  DENORMALIZED BACK ISE:
-    //    {((2 * (100 /u {1,+,1}<%bb16>)) + (-1 * (100 /u {2,+,1}<%bb16>))),+,
-    //     (100 /u {1,+,1}<%bb16>)}<%bb25>
-    //  Note that the initial value changes after normalization +
-    //  denormalization, which isn't correct.
-    if (Pred(AR)) {
-      const SCEV *TransformedStep = visit(AR->getStepRecurrence(SE));
-      Result = SE.getMinusSCEV(Result, TransformedStep);
-    }
-    break;
-  case Denormalize:
-    // Here we want to normalize step expressions for the same reasons, as
-    // stated above.
-    if (Pred(AR)) {
-      const SCEV *TransformedStep = visit(AR->getStepRecurrence(SE));
-      Result = SE.getAddExpr(Result, TransformedStep);
-    }
-    break;
+    // Base case:
+    //   Single operand add recurrence.  It's its own normalization.
+    //
+    // N-operand case:
+    //   {S_{N-1},+,S_{N-2},+,...,+,S_0} = S
+    //
+    //   Since the step recurrence of S is {S_{N-2},+,...,+,S_0}, we know its
+    //   normalization by induction .  We subtract the normalized step
+    //   recurrence from S_{N-1} to get the normalization of S.
+
+    for (int i = Operands.size() - 2; i >= 0; i--)
+      Operands[i] = SE.getMinusSCEV(Operands[i], Operands[i + 1]);
   }
-  return Result;
+
+  return SE.getAddRecExpr(Operands, AR->getLoop(), SCEV::FlagAnyWrap);
 }
 
 const SCEV *llvm::normalizeForPostIncUse(const SCEV *S,
Index: unittests/Analysis/ScalarEvolutionTest.cpp
===================================================================
--- unittests/Analysis/ScalarEvolutionTest.cpp
+++ unittests/Analysis/ScalarEvolutionTest.cpp
@@ -640,6 +640,25 @@
       "for.end.loopexit: "
       "  ret void "
       "} "
+      " "
+      "define void @f_2(i32 %a, i32 %b, i32 %c, i32 %d) "
+      "    local_unnamed_addr { "
+      "entry: "
+      "  br label %loop_0 "
+      " "
+      "loop_0: "
+      "  br i1 undef, label %loop_0, label %loop_1 "
+      " "
+      "loop_1: "
+      "  br i1 undef, label %loop_2, label %loop_1 "
+      " "
+      " "
+      "loop_2: "
+      "  br i1 undef, label %end, label %loop_2 "
+      " "
+      "end: "
+      "  ret void "
+      "} "
       ,
       Err, C);
 
@@ -664,7 +683,85 @@
     auto *D1 = denormalizeForPostIncUse(N1, Loops, SE);
     EXPECT_EQ(S1, D1) << *S1 << " " << *D1;
   });
-}
 
+  runWithSE(*M, "f_2", [&](Function &F, LoopInfo &LI, ScalarEvolution &SE) {
+    auto *L2 = *LI.begin();
+    auto *L1 = *std::next(LI.begin());
+    auto *L0 = *std::next(LI.begin(), 2);
+
+    auto GetAddRec = [&SE](const Loop *L, std::initializer_list<const SCEV *> Ops) {
+      SmallVector<const SCEV *, 4> OpsCopy(Ops);
+      return SE.getAddRecExpr(OpsCopy, L, SCEV::FlagAnyWrap);
+    };
+
+    auto GetAdd = [&SE](std::initializer_list<const SCEV *> Ops) {
+      SmallVector<const SCEV *, 4> OpsCopy(Ops);
+      return SE.getAddExpr(OpsCopy, SCEV::FlagAnyWrap);
+    };
+
+    // We first populate the AddRecs vector with a few "interesting" SCEV
+    // expressions, and then we go through the list and assert that each
+    // expression in it has an invertible normalization.
+
+    std::vector<const SCEV *> Exprs;
+    {
+      const SCEV *V0 = SE.getSCEV(&*F.arg_begin());
+      const SCEV *V1 = SE.getSCEV(&*std::next(F.arg_begin(), 1));
+      const SCEV *V2 = SE.getSCEV(&*std::next(F.arg_begin(), 2));
+      const SCEV *V3 = SE.getSCEV(&*std::next(F.arg_begin(), 3));
+
+      Exprs.push_back(GetAddRec(L0, {V0}));             // 0
+      Exprs.push_back(GetAddRec(L0, {V0, V1}));         // 1
+      Exprs.push_back(GetAddRec(L0, {V0, V1, V2}));     // 2
+      Exprs.push_back(GetAddRec(L0, {V0, V1, V2, V3})); // 3
+
+      Exprs.push_back(
+          GetAddRec(L1, {Exprs[1], Exprs[2], Exprs[3], Exprs[0]})); // 4
+      Exprs.push_back(
+          GetAddRec(L1, {Exprs[1], Exprs[2], Exprs[0], Exprs[3]})); // 5
+      Exprs.push_back(
+          GetAddRec(L1, {Exprs[1], Exprs[3], Exprs[3], Exprs[1]})); // 6
+
+      Exprs.push_back(GetAdd({Exprs[6], Exprs[3], V2})); // 7
+
+      Exprs.push_back(
+          GetAddRec(L2, {Exprs[4], Exprs[3], Exprs[3], Exprs[5]})); // 8
+
+      Exprs.push_back(
+          GetAddRec(L2, {Exprs[4], Exprs[6], Exprs[7], Exprs[3], V0})); // 9
+    }
+
+    std::vector<PostIncLoopSet> LoopSets;
+    for (int i = 0; i < 8; i++) {
+      LoopSets.emplace_back();
+      if (i & 1)
+        LoopSets.back().insert(L0);
+      if (i & 2)
+        LoopSets.back().insert(L1);
+      if (i & 4)
+        LoopSets.back().insert(L2);
+    }
+
+    for (const auto &LoopSet : LoopSets)
+      for (auto *S : Exprs) {
+        {
+          auto *N = llvm::normalizeForPostIncUse(S, LoopSet, SE);
+          auto *D = llvm::denormalizeForPostIncUse(N, LoopSet, SE);
+
+          // Normalization and then denormalizing better give us back the same
+          // value.
+          EXPECT_EQ(S, D) << "S = " << *S << "  D = " << *D << " N = " << *N;
+        }
+        {
+          auto *D = llvm::denormalizeForPostIncUse(S, LoopSet, SE);
+          auto *N = llvm::normalizeForPostIncUse(D, LoopSet, SE);
+
+          // Denormalization and then normalizing better give us back the same
+          // value.
+          EXPECT_EQ(S, N) << "S = " << *S << "  N = " << *N;
+        }
+      }
+  });
+}
 }  // end anonymous namespace
 }  // end namespace llvm