Index: lib/Transforms/Utils/LoopUtils.cpp
===================================================================
--- lib/Transforms/Utils/LoopUtils.cpp
+++ lib/Transforms/Utils/LoopUtils.cpp
@@ -356,11 +356,46 @@
   // If we think Phi may have been type-promoted, we also need to ensure that
   // all source operands of the reduction are either SExtInsts or ZEstInsts. If
   // so, we will be able to evaluate the reduction in the narrower bit width.
-  if (Start != Phi)
+  if (Start != Phi) {
     if (!getSourceExtensionKind(Start, ExitInstruction, RecurrenceType,
                                 IsSigned, VisitedInsts, CastInsts))
       return false;
 
+    // We need to be certain that we are dealing with type-promoted Phi. So we
+    // check that the value that goes outside the loop is only used by LCSSA
+    // Phi node which is either dead or truncated to the type we expect.
+    PHINode *LCSSAPhi = nullptr;
+    for (User *U : ExitInstruction->users()) {
+      Instruction *UI = cast<Instruction>(U);
+      if (!TheLoop->contains(UI->getParent())) {
+        // Two users outside the loop, we should not have it.
+        if (LCSSAPhi)
+          return false;
+
+        PHINode *PN = dyn_cast<PHINode>(UI);
+        // LCSSA Phi node we are looking for is a Phi with 1 input.
+        if (!PN || PN->getNumOperands() != 1)
+          return false;
+        LCSSAPhi = PN;
+      }
+    }
+
+    if (LCSSAPhi) {
+      // If the found LCSSA has uses, make sure that it is the only one use that
+      // is a trunc to a proper type. Otherwise what we are dealing with is not
+      // a type-promoted Phi node.
+      if (LCSSAPhi->hasNUsesOrMore(2))
+        return false;
+
+      if (LCSSAPhi->hasOneUse()) {
+        TruncInst *Trunc =
+            dyn_cast<TruncInst>(LCSSAPhi->use_begin()->getUser());
+        if (!Trunc || Trunc->getType() != RecurrenceType)
+          return false;
+      }
+    }
+  }
+
   // We found a reduction var if we have reached the original phi node and we
   // only have a single instruction with out-of-loop users.
 
Index: test/Transforms/LoopVectorize/and-plus.ll
===================================================================
--- test/Transforms/LoopVectorize/and-plus.ll
+++ test/Transforms/LoopVectorize/and-plus.ll
@@ -0,0 +1,109 @@
+; RUN: opt < %s  -loop-vectorize -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
+target triple = "x86_64-unknown-linux-gnu"
+
+; This should not be vectorized. Current algorithm tries to choose type i1 for
+; vectorization because we apply and with 1 to the accumulator. But the result
+; of this calculation does not fit into type i1.
+; TODO: We vectorize this loop with type i32. For this, we need to be able to
+; undo the changes made by lookThroughAnd after we find out that our idea that
+; we are dealing with extended Phi of type i1 was wrong.
+define i8 @test_01(i8 %c) #0 {
+
+; CHECK-LABEL: @test_01(
+; CHECK-NOT:   vector.body:
+; CHECK-NOT:   zext i1 {{.*}} to i8
+
+entry:
+  br label %loop
+
+exit:                                           ; preds = %loop
+  ret i8 %accum.plus
+
+loop:                                            ; preds = %loop, %entry
+  %accum.phi = phi i8 [ %c, %entry ], [ %accum.plus, %loop ]
+  %iv = phi i32 [ 1, %entry ], [ %iv.next, %loop ]
+  %accum.and = and i8 %accum.phi, 1
+  %accum.plus = add nuw nsw i8 %accum.and, 3
+  %iv.next = add nuw nsw i32 %iv, 1
+  %cond = icmp ugt i32 %iv, 191
+  br i1 %cond, label %exit, label %loop
+}
+
+; This can be vectorized with type i1 because the result is not used.
+define void @test_02(i8 %c) #0 {
+
+; CHECK-LABEL: @test_02(
+; CHECK:   vector.body:
+; CHECK:   zext i1 {{.*}} to i8
+
+entry:
+  br label %loop
+
+exit:                                           ; preds = %loop
+  %lcssa = phi i8 [ %accum.plus, %loop ]
+  ret void
+
+loop:                                            ; preds = %loop, %entry
+  %accum.phi = phi i8 [ %c, %entry ], [ %accum.plus, %loop ]
+  %iv = phi i32 [ 1, %entry ], [ %iv.next, %loop ]
+  %accum.and = and i8 %accum.phi, 1
+  %accum.plus = add nuw nsw i8 %accum.and, 3
+  %iv.next = add nuw nsw i32 %iv, 1
+  %cond = icmp ugt i32 %iv, 191
+  br i1 %cond, label %exit, label %loop
+}
+
+; This can be vectorized with type i1 because the result is truncated properly.
+define i1 @test_03(i8 %c) #0 {
+
+; CHECK-LABEL: @test_03(
+; CHECK:   vector.body:
+; CHECK:   zext i1 {{.*}} to i8
+
+entry:
+  br label %loop
+
+exit:                                           ; preds = %loop
+  %lcssa = phi i8 [ %accum.plus, %loop ]
+  %trunc = trunc i8 %lcssa to i1
+  ret i1 %trunc
+
+loop:                                            ; preds = %loop, %entry
+  %accum.phi = phi i8 [ %c, %entry ], [ %accum.plus, %loop ]
+  %iv = phi i32 [ 1, %entry ], [ %iv.next, %loop ]
+  %accum.and = and i8 %accum.phi, 1
+  %accum.plus = add nuw nsw i8 %accum.and, 3
+  %iv.next = add nuw nsw i32 %iv, 1
+  %cond = icmp ugt i32 %iv, 191
+  br i1 %cond, label %exit, label %loop
+}
+
+; This cannot be vectorized with type i1 because the result is truncated to a
+; wrong type.
+; TODO: It can also be vectorized with type i32 (or maybe i4?) it we learn to
+; undo lookThroughAnd.
+define i4 @test_04(i8 %c) #0 {
+
+; CHECK-LABEL: @test_04(
+; CHECK-NOT:   vector.body:
+; CHECK-NOT:   zext i1 {{.*}} to i8
+
+entry:
+  br label %loop
+
+exit:                                           ; preds = %loop
+  %lcssa = phi i8 [ %accum.plus, %loop ]
+  %trunc = trunc i8 %lcssa to i4
+  ret i4 %trunc
+
+loop:                                            ; preds = %loop, %entry
+  %accum.phi = phi i8 [ %c, %entry ], [ %accum.plus, %loop ]
+  %iv = phi i32 [ 1, %entry ], [ %iv.next, %loop ]
+  %accum.and = and i8 %accum.phi, 1
+  %accum.plus = add nuw nsw i8 %accum.and, 3
+  %iv.next = add nuw nsw i32 %iv, 1
+  %cond = icmp ugt i32 %iv, 191
+  br i1 %cond, label %exit, label %loop
+}