diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3219,6 +3219,14 @@
   Optional<MDNode *> VectorizedLoopID =
       makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll,
                                       LLVMLoopVectorizeFollowupVectorized});
+
+  // If VFxUF is 2 and vector loop is not skipped then remainder executes once.
+  if (VF * UF == 2 && !areSafetyChecksAdded() &&
+      !Cost->requiresScalarEpilogue()) {
+    ScalarLatchBr->setCondition(
+        Builder.getInt1(ScalarLatchBr->getSuccessor(0) == LoopExitBlock));
+  }
+
   if (VectorizedLoopID.hasValue()) {
     L->setLoopID(VectorizedLoopID.getValue());
 
@@ -3691,7 +3699,7 @@
   // loop iterations are now distributed among them. Note that original loop
   // represented by LoopScalarBody becomes remainder loop after vectorization.
   //
-  // For cases like foldTailByMasking() and requiresScalarEpiloque() we may
+  // For cases like foldTailByMasking() and requiresScalarEpilogue() we may
   // end up getting slightly roughened result but that should be OK since
   // profile is not inherently precise anyway. Note also possible bypass of
   // vector code caused by legality checks is ignored, assigning all the weight
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/sphinx.ll b/llvm/test/Transforms/LoopVectorize/ARM/sphinx.ll
--- a/llvm/test/Transforms/LoopVectorize/ARM/sphinx.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/sphinx.ll
@@ -93,7 +93,7 @@
 ; CHECK-NEXT:    [[SUB127]] = fsub fast double [[DVAL1_4131]], [[MUL126]]
 ; CHECK-NEXT:    [[INC129]] = add nuw nsw i32 [[I_2132]], 1
 ; CHECK-NEXT:    [[EXITCOND143:%.*]] = icmp eq i32 [[INC129]], [[T]]
-; CHECK-NEXT:    br i1 [[EXITCOND143]], label [[OUTEREND]], label [[INNERLOOP]], !llvm.loop !2
+; CHECK-NEXT:    br i1 true, label [[OUTEREND]], label [[INNERLOOP]], !llvm.loop !2
 ; CHECK:       outerend:
 ; CHECK-NEXT:    [[SUB127_LCSSA:%.*]] = phi double [ [[SUB127]], [[INNERLOOP]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    [[CONV138:%.*]] = fptosi double [[SUB127_LCSSA]] to i32
diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll
--- a/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll
@@ -79,7 +79,7 @@
 ; CHECK-NEXT:    store i32 [[SCALAR_RECUR]], i32* [[B_PTR]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 5
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop !2
+; CHECK-NEXT:    br i1 true, label [[EXIT]], label [[LOOP]], !llvm.loop !2
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll
--- a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll
@@ -30,7 +30,7 @@
 ; CHECK-NEXT:    store <2 x i16*> <i16* getelementptr inbounds ([1 x %rec8], [1 x %rec8]* @a, i32 0, i32 0, i32 0), i16* getelementptr inbounds ([1 x %rec8], [1 x %rec8]* @a, i32 0, i32 0, i32 0)>, <2 x i16*>* [[TMP4]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 2
-; CHECK-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
+; CHECK-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 2, 2
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[BB3:%.*]], label [[SCALAR_PH]]
@@ -44,10 +44,10 @@
 ; CHECK-NEXT:    [[_TMP4:%.*]] = bitcast %rec8* [[_TMP2]] to i16*
 ; CHECK-NEXT:    [[_TMP6:%.*]] = sext i16 [[C_1_0]] to i64
 ; CHECK-NEXT:    [[_TMP7:%.*]] = getelementptr [2 x i16*], [2 x i16*]* @b, i16 0, i64 [[_TMP6]]
-; CHECK-NEXT:    store i16* [[_TMP4]], i16** [[_TMP7]]
+; CHECK-NEXT:    store i16* [[_TMP4]], i16** [[_TMP7]], align 8
 ; CHECK-NEXT:    [[_TMP9]] = add nsw i16 [[C_1_0]], 1
 ; CHECK-NEXT:    [[_TMP11:%.*]] = icmp slt i16 [[_TMP9]], 2
-; CHECK-NEXT:    br i1 [[_TMP11]], label [[BB2]], label [[BB3]], !llvm.loop !2
+; CHECK-NEXT:    br i1 false, label [[BB2]], label [[BB3]], [[LOOP2:!llvm.loop !.*]]
 ; CHECK:       bb3:
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
--- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
@@ -39,19 +39,18 @@
 ; UNROLL-NEXT:    [[CMP_N:%.*]] = icmp eq i64 128, 128
 ; UNROLL-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]]
 ; UNROLL:       for.body:
-; UNROLL-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 128, [[MIDDLE_BLOCK]] ]
-; UNROLL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[F]], i64 [[INDVARS_IV]]
+; UNROLL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[F]], i64 128
 ; UNROLL-NEXT:    [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
 ; UNROLL-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[TMP9]], 100
-; UNROLL-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
+; UNROLL-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC:%.*]]
 ; UNROLL:       if.then:
 ; UNROLL-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP9]], 20
 ; UNROLL-NEXT:    store i32 [[ADD]], i32* [[ARRAYIDX]], align 4
 ; UNROLL-NEXT:    br label [[FOR_INC]]
 ; UNROLL:       for.inc:
-; UNROLL-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; UNROLL-NEXT:    [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 128, 1
 ; UNROLL-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 128
-; UNROLL-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP2:!llvm.loop !.*]]
+; UNROLL-NEXT:    br label [[FOR_END]]
 ; UNROLL:       for.end:
 ; UNROLL-NEXT:    ret i32 0
 ;
@@ -104,7 +103,7 @@
 ; UNROLL-NOSIMPLIFY:       for.inc:
 ; UNROLL-NOSIMPLIFY-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; UNROLL-NOSIMPLIFY-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 128
-; UNROLL-NOSIMPLIFY-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP2:!llvm.loop !.*]]
+; UNROLL-NOSIMPLIFY-NEXT:    br i1 true, label [[FOR_END]], label [[FOR_BODY]], [[LOOP2:!llvm.loop !.*]]
 ; UNROLL-NOSIMPLIFY:       for.end:
 ; UNROLL-NOSIMPLIFY-NEXT:    ret i32 0
 ;
@@ -144,19 +143,18 @@
 ; VEC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 128, 128
 ; VEC-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]]
 ; VEC:       for.body:
-; VEC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 128, [[MIDDLE_BLOCK]] ]
-; VEC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[F]], i64 [[INDVARS_IV]]
+; VEC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[F]], i64 128
 ; VEC-NEXT:    [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
 ; VEC-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[TMP14]], 100
-; VEC-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
+; VEC-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC:%.*]]
 ; VEC:       if.then:
 ; VEC-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP14]], 20
 ; VEC-NEXT:    store i32 [[ADD]], i32* [[ARRAYIDX]], align 4
 ; VEC-NEXT:    br label [[FOR_INC]]
 ; VEC:       for.inc:
-; VEC-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; VEC-NEXT:    [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 128, 1
 ; VEC-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 128
-; VEC-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP2:!llvm.loop !.*]]
+; VEC-NEXT:    br label [[FOR_END]]
 ; VEC:       for.end:
 ; VEC-NEXT:    ret i32 0
 ;
@@ -196,24 +194,7 @@
 ; UNROLL-NEXT:  entry:
 ; UNROLL-NEXT:    [[TMP0:%.*]] = xor i1 [[COND:%.*]], true
 ; UNROLL-NEXT:    call void @llvm.assume(i1 [[TMP0]])
-; UNROLL-NEXT:    br label [[FOR_BODY14:%.*]]
-; UNROLL:       for.body14:
-; UNROLL-NEXT:    [[INDVARS_IV3:%.*]] = phi i64 [ [[INDVARS_IV_NEXT4:%.*]], [[FOR_INC23:%.*]] ], [ undef, [[ENTRY:%.*]] ]
-; UNROLL-NEXT:    [[INEWCHUNKS_120:%.*]] = phi i32 [ [[INEWCHUNKS_2:%.*]], [[FOR_INC23]] ], [ undef, [[ENTRY]] ]
-; UNROLL-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[INDVARS_IV3]]
-; UNROLL-NEXT:    [[TMP:%.*]] = load i32, i32* [[ARRAYIDX16]], align 4
-; UNROLL-NEXT:    br i1 undef, label [[IF_THEN18:%.*]], label [[FOR_INC23]]
-; UNROLL:       if.then18:
-; UNROLL-NEXT:    store i32 2, i32* [[ARRAYIDX16]], align 4
-; UNROLL-NEXT:    [[INC21:%.*]] = add nsw i32 [[INEWCHUNKS_120]], 1
-; UNROLL-NEXT:    br label [[FOR_INC23]]
-; UNROLL:       for.inc23:
-; UNROLL-NEXT:    [[INEWCHUNKS_2]] = phi i32 [ [[INC21]], [[IF_THEN18]] ], [ [[INEWCHUNKS_120]], [[FOR_BODY14]] ]
-; UNROLL-NEXT:    [[INDVARS_IV_NEXT4]] = add nsw i64 [[INDVARS_IV3]], 1
-; UNROLL-NEXT:    [[TMP1:%.*]] = trunc i64 [[INDVARS_IV3]] to i32
-; UNROLL-NEXT:    [[CMP13:%.*]] = icmp slt i32 [[TMP1]], 0
-; UNROLL-NEXT:    call void @llvm.assume(i1 [[CMP13]])
-; UNROLL-NEXT:    br label [[FOR_BODY14]]
+; UNROLL-NEXT:    unreachable
 ;
 ; UNROLL-NOSIMPLIFY-LABEL: @bug18724(
 ; UNROLL-NOSIMPLIFY-NEXT:  entry:
@@ -279,7 +260,7 @@
 ; UNROLL-NOSIMPLIFY-NEXT:    [[INDVARS_IV_NEXT4]] = add nsw i64 [[INDVARS_IV3]], 1
 ; UNROLL-NOSIMPLIFY-NEXT:    [[TMP1:%.*]] = trunc i64 [[INDVARS_IV3]] to i32
 ; UNROLL-NOSIMPLIFY-NEXT:    [[CMP13:%.*]] = icmp slt i32 [[TMP1]], 0
-; UNROLL-NOSIMPLIFY-NEXT:    br i1 [[CMP13]], label [[FOR_BODY14]], label [[FOR_INC26_LOOPEXIT]], [[LOOP4:!llvm.loop !.*]]
+; UNROLL-NOSIMPLIFY-NEXT:    br i1 false, label [[FOR_BODY14]], label [[FOR_INC26_LOOPEXIT]], [[LOOP4:!llvm.loop !.*]]
 ; UNROLL-NOSIMPLIFY:       for.inc26.loopexit:
 ; UNROLL-NOSIMPLIFY-NEXT:    [[INEWCHUNKS_2_LCSSA:%.*]] = phi i32 [ [[INEWCHUNKS_2]], [[FOR_INC23]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ]
 ; UNROLL-NOSIMPLIFY-NEXT:    br label [[FOR_INC26]]
@@ -291,24 +272,7 @@
 ; VEC-NEXT:  entry:
 ; VEC-NEXT:    [[TMP0:%.*]] = xor i1 [[COND:%.*]], true
 ; VEC-NEXT:    call void @llvm.assume(i1 [[TMP0]])
-; VEC-NEXT:    br label [[FOR_BODY14:%.*]]
-; VEC:       for.body14:
-; VEC-NEXT:    [[INDVARS_IV3:%.*]] = phi i64 [ [[INDVARS_IV_NEXT4:%.*]], [[FOR_INC23:%.*]] ], [ undef, [[ENTRY:%.*]] ]
-; VEC-NEXT:    [[INEWCHUNKS_120:%.*]] = phi i32 [ [[INEWCHUNKS_2:%.*]], [[FOR_INC23]] ], [ undef, [[ENTRY]] ]
-; VEC-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 [[INDVARS_IV3]]
-; VEC-NEXT:    [[TMP:%.*]] = load i32, i32* [[ARRAYIDX16]], align 4
-; VEC-NEXT:    br i1 undef, label [[IF_THEN18:%.*]], label [[FOR_INC23]]
-; VEC:       if.then18:
-; VEC-NEXT:    store i32 2, i32* [[ARRAYIDX16]], align 4
-; VEC-NEXT:    [[INC21:%.*]] = add nsw i32 [[INEWCHUNKS_120]], 1
-; VEC-NEXT:    br label [[FOR_INC23]]
-; VEC:       for.inc23:
-; VEC-NEXT:    [[INEWCHUNKS_2]] = phi i32 [ [[INC21]], [[IF_THEN18]] ], [ [[INEWCHUNKS_120]], [[FOR_BODY14]] ]
-; VEC-NEXT:    [[INDVARS_IV_NEXT4]] = add nsw i64 [[INDVARS_IV3]], 1
-; VEC-NEXT:    [[TMP1:%.*]] = trunc i64 [[INDVARS_IV3]] to i32
-; VEC-NEXT:    [[CMP13:%.*]] = icmp slt i32 [[TMP1]], 0
-; VEC-NEXT:    call void @llvm.assume(i1 [[CMP13]])
-; VEC-NEXT:    br label [[FOR_BODY14]]
+; VEC-NEXT:    unreachable
 ;
 entry:
   br label %for.body9
@@ -371,26 +335,24 @@
 ; UNROLL:       pred.store.continue6:
 ; UNROLL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 2
 ; UNROLL-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef
-; UNROLL-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP3:!llvm.loop !.*]]
+; UNROLL-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP2:!llvm.loop !.*]]
 ; UNROLL:       middle.block:
 ; UNROLL-NEXT:    [[CMP_N:%.*]] = icmp eq i64 undef, undef
 ; UNROLL-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]]
 ; UNROLL:       for.body:
-; UNROLL-NEXT:    [[TMP0:%.*]] = phi i64 [ [[TMP6:%.*]], [[FOR_INC:%.*]] ], [ undef, [[MIDDLE_BLOCK]] ]
-; UNROLL-NEXT:    [[TMP1:%.*]] = phi i64 [ [[TMP7:%.*]], [[FOR_INC]] ], [ undef, [[MIDDLE_BLOCK]] ]
-; UNROLL-NEXT:    [[TMP2:%.*]] = getelementptr i8, i8* undef, i64 [[TMP0]]
+; UNROLL-NEXT:    [[TMP2:%.*]] = getelementptr i8, i8* undef, i64 undef
 ; UNROLL-NEXT:    [[TMP3:%.*]] = load i8, i8* [[TMP2]], align 1
-; UNROLL-NEXT:    br i1 [[C]], label [[IF_THEN:%.*]], label [[FOR_INC]]
+; UNROLL-NEXT:    br i1 [[C]], label [[IF_THEN:%.*]], label [[FOR_INC:%.*]]
 ; UNROLL:       if.then:
 ; UNROLL-NEXT:    [[TMP4:%.*]] = zext i8 [[TMP3]] to i32
 ; UNROLL-NEXT:    [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
 ; UNROLL-NEXT:    store i8 [[TMP5]], i8* [[TMP2]], align 1
 ; UNROLL-NEXT:    br label [[FOR_INC]]
 ; UNROLL:       for.inc:
-; UNROLL-NEXT:    [[TMP6]] = add nuw nsw i64 [[TMP0]], 1
-; UNROLL-NEXT:    [[TMP7]] = add i64 [[TMP1]], -1
+; UNROLL-NEXT:    [[TMP6:%.*]] = add nuw nsw i64 undef, 1
+; UNROLL-NEXT:    [[TMP7:%.*]] = add i64 undef, -1
 ; UNROLL-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0
-; UNROLL-NEXT:    br i1 [[TMP8]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP4:!llvm.loop !.*]]
+; UNROLL-NEXT:    br label [[FOR_END]]
 ; UNROLL:       for.end:
 ; UNROLL-NEXT:    ret void
 ;
@@ -449,7 +411,7 @@
 ; UNROLL-NOSIMPLIFY-NEXT:    [[TMP6]] = add nuw nsw i64 [[TMP0]], 1
 ; UNROLL-NOSIMPLIFY-NEXT:    [[TMP7]] = add i64 [[TMP1]], -1
 ; UNROLL-NOSIMPLIFY-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0
-; UNROLL-NOSIMPLIFY-NEXT:    br i1 [[TMP8]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP6:!llvm.loop !.*]]
+; UNROLL-NOSIMPLIFY-NEXT:    br i1 true, label [[FOR_END]], label [[FOR_BODY]], [[LOOP6:!llvm.loop !.*]]
 ; UNROLL-NOSIMPLIFY:       for.end:
 ; UNROLL-NOSIMPLIFY-NEXT:    ret void
 ;
@@ -489,26 +451,24 @@
 ; VEC:       pred.store.continue3:
 ; VEC-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 2
 ; VEC-NEXT:    [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef
-; VEC-NEXT:    br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]]
+; VEC-NEXT:    br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP2:!llvm.loop !.*]]
 ; VEC:       middle.block:
 ; VEC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 undef, undef
 ; VEC-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY:%.*]]
 ; VEC:       for.body:
-; VEC-NEXT:    [[TMP0:%.*]] = phi i64 [ [[TMP6:%.*]], [[FOR_INC:%.*]] ], [ undef, [[MIDDLE_BLOCK]] ]
-; VEC-NEXT:    [[TMP1:%.*]] = phi i64 [ [[TMP7:%.*]], [[FOR_INC]] ], [ undef, [[MIDDLE_BLOCK]] ]
-; VEC-NEXT:    [[TMP2:%.*]] = getelementptr i8, i8* undef, i64 [[TMP0]]
+; VEC-NEXT:    [[TMP2:%.*]] = getelementptr i8, i8* undef, i64 undef
 ; VEC-NEXT:    [[TMP3:%.*]] = load i8, i8* [[TMP2]], align 1
-; VEC-NEXT:    br i1 [[C]], label [[IF_THEN:%.*]], label [[FOR_INC]]
+; VEC-NEXT:    br i1 [[C]], label [[IF_THEN:%.*]], label [[FOR_INC:%.*]]
 ; VEC:       if.then:
 ; VEC-NEXT:    [[TMP4:%.*]] = zext i8 [[TMP3]] to i32
 ; VEC-NEXT:    [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8
 ; VEC-NEXT:    store i8 [[TMP5]], i8* [[TMP2]], align 1
 ; VEC-NEXT:    br label [[FOR_INC]]
 ; VEC:       for.inc:
-; VEC-NEXT:    [[TMP6]] = add nuw nsw i64 [[TMP0]], 1
-; VEC-NEXT:    [[TMP7]] = add i64 [[TMP1]], -1
+; VEC-NEXT:    [[TMP6:%.*]] = add nuw nsw i64 undef, 1
+; VEC-NEXT:    [[TMP7:%.*]] = add i64 undef, -1
 ; VEC-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0
-; VEC-NEXT:    br i1 [[TMP8]], label [[FOR_END]], label [[FOR_BODY]], [[LOOP5:!llvm.loop !.*]]
+; VEC-NEXT:    br label [[FOR_END]]
 ; VEC:       for.end:
 ; VEC-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll b/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll
--- a/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll
+++ b/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll
@@ -67,7 +67,7 @@
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
 ; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
-; CHECK-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
+; CHECK-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
@@ -82,7 +82,7 @@
 ; CHECK-NEXT:    store i8 7, i8* [[AJP3]], align 8
 ; CHECK-NEXT:    [[J_NEXT]] = add nuw nsw i32 [[J]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[J_NEXT]], 15
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !2
+; CHECK-NEXT:    br i1 true, label [[FOR_END]], label [[FOR_BODY]], [[LOOP2:!llvm.loop !.*]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll b/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll
--- a/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll
@@ -52,7 +52,7 @@
 ; CHECK-NEXT:    store i16 [[TMP18]], i16* @v_39, align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP19:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12
-; CHECK-NEXT:    br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
+; CHECK-NEXT:    br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 12, 12
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
@@ -75,7 +75,7 @@
 ; CHECK-NEXT:    store i16 [[COND6]], i16* @v_39, align 1
 ; CHECK-NEXT:    [[INC7]] = add nsw i16 [[I_07]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i16 [[INC7]], 111
-; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop !2
+; CHECK-NEXT:    br i1 false, label [[FOR_BODY]], label [[EXIT]], [[LOOP2:!llvm.loop !.*]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    [[RV:%.*]] = load i16, i16* @v_39, align 1
 ; CHECK-NEXT:    ret i16 [[RV]]
diff --git a/llvm/test/Transforms/LoopVectorize/pr44547.ll b/llvm/test/Transforms/LoopVectorize/pr44547.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/pr44547.ll
@@ -0,0 +1,62 @@
+; RUN: opt -S -loop-vectorize -simplifycfg -force-vector-width=2 -force-vector-interleave=1 < %s | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+;CHECK-LABEL: @single_iter_remainder(
+define void @single_iter_remainder(i16* noalias nocapture readonly %a, i16* noalias nocapture readonly %b, i16* noalias nocapture %c, i32 %n) {
+entry:
+  %cmp7 = icmp eq i32 %n, 0
+  br i1 %cmp7, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+;CHECK: vector.body:
+;CHECK: for.body:
+;CHECK: br label %for.cond.cleanup
+for.body:                                         ; preds = %entry, %for.body
+  %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %a.addr.010 = phi i16* [ %incdec.ptr, %for.body ], [ %a, %entry ]
+  %c.addr.09 = phi i16* [ %incdec.ptr4, %for.body ], [ %c, %entry ]
+  %b.addr.08 = phi i16* [ %incdec.ptr1, %for.body ], [ %b, %entry ]
+  %incdec.ptr = getelementptr inbounds i16, i16* %a.addr.010, i64 1
+  %0 = load i16, i16* %a.addr.010, align 2
+  %incdec.ptr1 = getelementptr inbounds i16, i16* %b.addr.08, i64 1
+  %1 = load i16, i16* %b.addr.08, align 2
+  %add = add i16 %1, %0
+  %incdec.ptr4 = getelementptr inbounds i16, i16* %c.addr.09, i64 1
+  store i16 %add, i16* %c.addr.09, align 2
+  %inc = add nuw nsw i32 %i.011, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+;CHECK-LABEL: @single_iter_remainder_checks(
+define void @single_iter_remainder_checks(i16* %a, i16* %b, i16* %c, i32 %n) {
+entry:
+  %cmp7 = icmp eq i32 %n, 0
+  br i1 %cmp7, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+;CHECK: vector.body:
+;CHECK: for.body:
+;CHECK-NOT: br label %for.cond.cleanup
+;CHECK: br i1 %exitcond, label %for.cond.cleanup, label %for.body
+for.body:                                         ; preds = %entry, %for.body
+  %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %a.addr.010 = phi i16* [ %incdec.ptr, %for.body ], [ %a, %entry ]
+  %c.addr.09 = phi i16* [ %incdec.ptr4, %for.body ], [ %c, %entry ]
+  %b.addr.08 = phi i16* [ %incdec.ptr1, %for.body ], [ %b, %entry ]
+  %incdec.ptr = getelementptr inbounds i16, i16* %a.addr.010, i64 1
+  %0 = load i16, i16* %a.addr.010, align 2
+  %incdec.ptr1 = getelementptr inbounds i16, i16* %b.addr.08, i64 1
+  %1 = load i16, i16* %b.addr.08, align 2
+  %add = add i16 %1, %0
+  %incdec.ptr4 = getelementptr inbounds i16, i16* %c.addr.09, i64 1
+  store i16 %add, i16* %c.addr.09, align 2
+  %inc = add nuw nsw i32 %i.011, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}