diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -2490,12 +2490,11 @@
   //   Start - |Step| * Backedge > Start
   Value *Add = nullptr, *Sub = nullptr;
   if (PointerType *ARPtrTy = dyn_cast<PointerType>(ARTy)) {
-    const SCEV *MulS = SE.getSCEV(MulV);
-    const SCEV *NegMulS = SE.getNegativeSCEV(MulS);
-    Add = Builder.CreateBitCast(expandAddToGEP(MulS, ARPtrTy, Ty, StartValue),
-                                ARPtrTy);
-    Sub = Builder.CreateBitCast(
-        expandAddToGEP(NegMulS, ARPtrTy, Ty, StartValue), ARPtrTy);
+    StartValue = InsertNoopCastOfTo(
+        StartValue, Builder.getInt8PtrTy(ARPtrTy->getAddressSpace()));
+    Value *NegMulV = Builder.CreateNeg(MulV);
+    Add = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, MulV);
+    Sub = Builder.CreateGEP(Builder.getInt8Ty(), StartValue, NegMulV);
   } else {
     Add = Builder.CreateAdd(StartValue, MulV);
     Sub = Builder.CreateSub(StartValue, MulV);
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/wrapping-pointer-ni.ll b/llvm/test/Analysis/LoopAccessAnalysis/wrapping-pointer-ni.ll
--- a/llvm/test/Analysis/LoopAccessAnalysis/wrapping-pointer-ni.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/wrapping-pointer-ni.ll
@@ -17,10 +17,11 @@
 ; LV: [[OFMul:%[^ ]*]]  = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[Step:%[^ ]*]])
 ; LV-NEXT: [[OFMulResult:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul]], 0
 ; LV-NEXT: [[OFMulOverflow:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul]], 1
-; LV-NEXT: [[PosGEP:%[^ ]*]] = getelementptr i32, i32 addrspace(13)* [[Base:%[^ ]*]], i64 [[Step]]
-; LV-NEXT: [[NegGEP:%[^ ]*]] = getelementptr i32, i32 addrspace(13)* [[Base]], i64 [[NegStep:%[^ ]*]]
-; LV-NEXT: icmp ugt i32 addrspace(13)* [[NegGEP]], [[Base]]
-; LV-NEXT: icmp ult i32 addrspace(13)* [[PosGEP]], [[Base]]
+; LV: [[OFNegMulResult:%[^ ]*]] = sub i64 0, [[OFMulResult]]
+; LV-NEXT: [[PosGEP:%[^ ]*]] = getelementptr i8, i8 addrspace(13)* [[Base:%[^ ]*]], i64 [[OFMulResult]]
+; LV-NEXT: [[NegGEP:%[^ ]*]] = getelementptr i8, i8 addrspace(13)* [[Base]], i64 [[OFNegMulResult]]
+; LV-NEXT: icmp ugt i8 addrspace(13)* [[NegGEP]], [[Base]]
+; LV-NEXT: icmp ult i8 addrspace(13)* [[PosGEP]], [[Base]]
 ; LV-NOT: inttoptr
 ; LV-NOT: ptrtoint
 top:
diff --git a/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll b/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll
--- a/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll
+++ b/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll
@@ -29,18 +29,15 @@
 ; CHECK-NEXT:    [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]])
 ; CHECK-NEXT:    [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
 ; CHECK-NEXT:    [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
-; CHECK-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, i8* [[A5]], i64 [[MUL_RESULT3]]
-; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i8* [[UGLYGEP]] to i32*
-; CHECK-NEXT:    [[TMP12:%.*]] = sub i64 [[MUL_RESULT3]], -8
-; CHECK-NEXT:    [[TMP13:%.*]] = sub i64 8, [[TMP12]]
-; CHECK-NEXT:    [[UGLYGEP6:%.*]] = getelementptr i8, i8* [[A5]], i64 [[TMP13]]
-; CHECK-NEXT:    [[TMP14:%.*]] = bitcast i8* [[UGLYGEP6]] to i32*
-; CHECK-NEXT:    [[TMP15:%.*]] = icmp ugt i32* [[TMP14]], [[A]]
-; CHECK-NEXT:    [[TMP16:%.*]] = icmp ult i32* [[TMP11]], [[A]]
-; CHECK-NEXT:    [[TMP17:%.*]] = select i1 false, i1 [[TMP15]], i1 [[TMP16]]
-; CHECK-NEXT:    [[TMP18:%.*]] = or i1 [[TMP17]], [[MUL_OVERFLOW4]]
-; CHECK-NEXT:    [[TMP19:%.*]] = or i1 [[TMP10]], [[TMP18]]
-; CHECK-NEXT:    br i1 [[TMP19]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
+; CHECK-NEXT:    [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT3]]
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr i8, i8* [[A5]], i64 [[MUL_RESULT3]]
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr i8, i8* [[A5]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP14:%.*]] = icmp ugt i8* [[TMP13]], [[A5]]
+; CHECK-NEXT:    [[TMP15:%.*]] = icmp ult i8* [[TMP12]], [[A5]]
+; CHECK-NEXT:    [[TMP16:%.*]] = select i1 false, i1 [[TMP14]], i1 [[TMP15]]
+; CHECK-NEXT:    [[TMP17:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW4]]
+; CHECK-NEXT:    [[TMP18:%.*]] = or i1 [[TMP10]], [[TMP17]]
+; CHECK-NEXT:    br i1 [[TMP18]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
 ; CHECK:       for.body.ph.lver.orig:
 ; CHECK-NEXT:    br label [[FOR_BODY_LVER_ORIG:%.*]]
 ; CHECK:       for.body.lver.orig:
@@ -101,10 +98,10 @@
 ; CHECK-NEXT:    [[ARRAYIDXC:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[MUL_EXT]]
 ; CHECK-NEXT:    store i32 [[MULC]], i32* [[ARRAYIDXC]], align 4
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[ADD]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT7:%.*]], label [[FOR_BODY]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT6:%.*]], label [[FOR_BODY]]
 ; CHECK:       for.end.loopexit:
 ; CHECK-NEXT:    br label [[FOR_END:%.*]]
-; CHECK:       for.end.loopexit7:
+; CHECK:       for.end.loopexit6:
 ; CHECK-NEXT:    br label [[FOR_END]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    ret void
@@ -181,18 +178,15 @@
 ; CHECK-NEXT:    [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]])
 ; CHECK-NEXT:    [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
 ; CHECK-NEXT:    [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
-; CHECK-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, i8* bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*), i64 [[MUL_RESULT3]]
-; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i8* [[UGLYGEP]] to [8192 x i32]*
-; CHECK-NEXT:    [[TMP12:%.*]] = sub i64 [[MUL_RESULT3]], -8
-; CHECK-NEXT:    [[TMP13:%.*]] = sub i64 8, [[TMP12]]
-; CHECK-NEXT:    [[UGLYGEP5:%.*]] = getelementptr i8, i8* bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*), i64 [[TMP13]]
-; CHECK-NEXT:    [[TMP14:%.*]] = bitcast i8* [[UGLYGEP5]] to [8192 x i32]*
-; CHECK-NEXT:    [[TMP15:%.*]] = icmp ugt [8192 x i32]* [[TMP14]], bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to [8192 x i32]*)
-; CHECK-NEXT:    [[TMP16:%.*]] = icmp ult [8192 x i32]* [[TMP11]], bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to [8192 x i32]*)
-; CHECK-NEXT:    [[TMP17:%.*]] = select i1 false, i1 [[TMP15]], i1 [[TMP16]]
-; CHECK-NEXT:    [[TMP18:%.*]] = or i1 [[TMP17]], [[MUL_OVERFLOW4]]
-; CHECK-NEXT:    [[TMP19:%.*]] = or i1 [[TMP10]], [[TMP18]]
-; CHECK-NEXT:    br i1 [[TMP19]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
+; CHECK-NEXT:    [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT3]]
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr i8, i8* bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*), i64 [[MUL_RESULT3]]
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr i8, i8* bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*), i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP14:%.*]] = icmp ugt i8* [[TMP13]], bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*)
+; CHECK-NEXT:    [[TMP15:%.*]] = icmp ult i8* [[TMP12]], bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*)
+; CHECK-NEXT:    [[TMP16:%.*]] = select i1 false, i1 [[TMP14]], i1 [[TMP15]]
+; CHECK-NEXT:    [[TMP17:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW4]]
+; CHECK-NEXT:    [[TMP18:%.*]] = or i1 [[TMP10]], [[TMP17]]
+; CHECK-NEXT:    br i1 [[TMP18]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
 ; CHECK:       for.body.ph.lver.orig:
 ; CHECK-NEXT:    br label [[FOR_BODY_LVER_ORIG:%.*]]
 ; CHECK:       for.body.lver.orig:
@@ -253,10 +247,10 @@
 ; CHECK-NEXT:    [[ARRAYIDXC:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[MUL_EXT]]
 ; CHECK-NEXT:    store i32 [[MULC]], i32* [[ARRAYIDXC]], align 4
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[ADD]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT6:%.*]], label [[FOR_BODY]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT5:%.*]], label [[FOR_BODY]]
 ; CHECK:       for.end.loopexit:
 ; CHECK-NEXT:    br label [[FOR_END:%.*]]
-; CHECK:       for.end.loopexit6:
+; CHECK:       for.end.loopexit5:
 ; CHECK-NEXT:    br label [[FOR_END]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    ret void
diff --git a/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll b/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll
--- a/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll
+++ b/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll
@@ -46,18 +46,15 @@
 ; LV-NEXT:    [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])
 ; LV-NEXT:    [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
 ; LV-NEXT:    [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
-; LV-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, i8* [[A5]], i64 [[MUL_RESULT3]]
-; LV-NEXT:    [[TMP11:%.*]] = bitcast i8* [[UGLYGEP]] to i16*
-; LV-NEXT:    [[TMP12:%.*]] = sub i64 [[MUL_RESULT3]], -4
-; LV-NEXT:    [[TMP13:%.*]] = sub i64 4, [[TMP12]]
-; LV-NEXT:    [[UGLYGEP6:%.*]] = getelementptr i8, i8* [[A5]], i64 [[TMP13]]
-; LV-NEXT:    [[TMP14:%.*]] = bitcast i8* [[UGLYGEP6]] to i16*
-; LV-NEXT:    [[TMP15:%.*]] = icmp ugt i16* [[TMP14]], [[A]]
-; LV-NEXT:    [[TMP16:%.*]] = icmp ult i16* [[TMP11]], [[A]]
-; LV-NEXT:    [[TMP17:%.*]] = select i1 false, i1 [[TMP15]], i1 [[TMP16]]
-; LV-NEXT:    [[TMP18:%.*]] = or i1 [[TMP17]], [[MUL_OVERFLOW4]]
-; LV-NEXT:    [[TMP19:%.*]] = or i1 [[TMP10]], [[TMP18]]
-; LV-NEXT:    br i1 [[TMP19]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
+; LV-NEXT:    [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT3]]
+; LV-NEXT:    [[TMP12:%.*]] = getelementptr i8, i8* [[A5]], i64 [[MUL_RESULT3]]
+; LV-NEXT:    [[TMP13:%.*]] = getelementptr i8, i8* [[A5]], i64 [[TMP11]]
+; LV-NEXT:    [[TMP14:%.*]] = icmp ugt i8* [[TMP13]], [[A5]]
+; LV-NEXT:    [[TMP15:%.*]] = icmp ult i8* [[TMP12]], [[A5]]
+; LV-NEXT:    [[TMP16:%.*]] = select i1 false, i1 [[TMP14]], i1 [[TMP15]]
+; LV-NEXT:    [[TMP17:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW4]]
+; LV-NEXT:    [[TMP18:%.*]] = or i1 [[TMP10]], [[TMP17]]
+; LV-NEXT:    br i1 [[TMP18]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
 ; LV:       for.body.ph.lver.orig:
 ; LV-NEXT:    br label [[FOR_BODY_LVER_ORIG:%.*]]
 ; LV:       for.body.lver.orig:
@@ -91,10 +88,10 @@
 ; LV-NEXT:    [[INC]] = add nuw nsw i64 [[IND]], 1
 ; LV-NEXT:    [[INC1]] = add i32 [[IND1]], 1
 ; LV-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[N]]
-; LV-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT7:%.*]], label [[FOR_BODY]]
+; LV-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT6:%.*]], label [[FOR_BODY]]
 ; LV:       for.end.loopexit:
 ; LV-NEXT:    br label [[FOR_END:%.*]]
-; LV:       for.end.loopexit7:
+; LV:       for.end.loopexit6:
 ; LV-NEXT:    br label [[FOR_END]]
 ; LV:       for.end:
 ; LV-NEXT:    ret void
@@ -181,18 +178,15 @@
 ; LV-NEXT:    [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
 ; LV-NEXT:    [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
 ; LV-NEXT:    [[SCEVGEP5:%.*]] = bitcast i16* [[SCEVGEP]] to i8*
-; LV-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[MUL_RESULT3]]
-; LV-NEXT:    [[TMP15:%.*]] = bitcast i8* [[UGLYGEP]] to i16*
-; LV-NEXT:    [[TMP16:%.*]] = sub i64 [[MUL_RESULT3]], -4
-; LV-NEXT:    [[TMP17:%.*]] = sub i64 4, [[TMP16]]
-; LV-NEXT:    [[UGLYGEP6:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[TMP17]]
-; LV-NEXT:    [[TMP18:%.*]] = bitcast i8* [[UGLYGEP6]] to i16*
-; LV-NEXT:    [[TMP19:%.*]] = icmp ugt i16* [[TMP18]], [[SCEVGEP]]
-; LV-NEXT:    [[TMP20:%.*]] = icmp ult i16* [[TMP15]], [[SCEVGEP]]
-; LV-NEXT:    [[TMP21:%.*]] = select i1 true, i1 [[TMP19]], i1 [[TMP20]]
-; LV-NEXT:    [[TMP22:%.*]] = or i1 [[TMP21]], [[MUL_OVERFLOW4]]
-; LV-NEXT:    [[TMP23:%.*]] = or i1 [[TMP11]], [[TMP22]]
-; LV-NEXT:    br i1 [[TMP23]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
+; LV-NEXT:    [[TMP15:%.*]] = sub i64 0, [[MUL_RESULT3]]
+; LV-NEXT:    [[TMP16:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[MUL_RESULT3]]
+; LV-NEXT:    [[TMP17:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[TMP15]]
+; LV-NEXT:    [[TMP18:%.*]] = icmp ugt i8* [[TMP17]], [[SCEVGEP5]]
+; LV-NEXT:    [[TMP19:%.*]] = icmp ult i8* [[TMP16]], [[SCEVGEP5]]
+; LV-NEXT:    [[TMP20:%.*]] = select i1 true, i1 [[TMP18]], i1 [[TMP19]]
+; LV-NEXT:    [[TMP21:%.*]] = or i1 [[TMP20]], [[MUL_OVERFLOW4]]
+; LV-NEXT:    [[TMP22:%.*]] = or i1 [[TMP11]], [[TMP21]]
+; LV-NEXT:    br i1 [[TMP22]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
 ; LV:       for.body.ph.lver.orig:
 ; LV-NEXT:    br label [[FOR_BODY_LVER_ORIG:%.*]]
 ; LV:       for.body.lver.orig:
@@ -226,10 +220,10 @@
 ; LV-NEXT:    [[INC]] = add nuw nsw i64 [[IND]], 1
 ; LV-NEXT:    [[DEC]] = sub i32 [[IND1]], 1
 ; LV-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[N]]
-; LV-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT7:%.*]], label [[FOR_BODY]]
+; LV-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT6:%.*]], label [[FOR_BODY]]
 ; LV:       for.end.loopexit:
 ; LV-NEXT:    br label [[FOR_END:%.*]]
-; LV:       for.end.loopexit7:
+; LV:       for.end.loopexit6:
 ; LV-NEXT:    br label [[FOR_END]]
 ; LV:       for.end:
 ; LV-NEXT:    ret void
@@ -295,18 +289,15 @@
 ; LV-NEXT:    [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])
 ; LV-NEXT:    [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
 ; LV-NEXT:    [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
-; LV-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, i8* [[A5]], i64 [[MUL_RESULT3]]
-; LV-NEXT:    [[TMP11:%.*]] = bitcast i8* [[UGLYGEP]] to i16*
-; LV-NEXT:    [[TMP12:%.*]] = sub i64 [[MUL_RESULT3]], -4
-; LV-NEXT:    [[TMP13:%.*]] = sub i64 4, [[TMP12]]
-; LV-NEXT:    [[UGLYGEP6:%.*]] = getelementptr i8, i8* [[A5]], i64 [[TMP13]]
-; LV-NEXT:    [[TMP14:%.*]] = bitcast i8* [[UGLYGEP6]] to i16*
-; LV-NEXT:    [[TMP15:%.*]] = icmp ugt i16* [[TMP14]], [[A]]
-; LV-NEXT:    [[TMP16:%.*]] = icmp ult i16* [[TMP11]], [[A]]
-; LV-NEXT:    [[TMP17:%.*]] = select i1 false, i1 [[TMP15]], i1 [[TMP16]]
-; LV-NEXT:    [[TMP18:%.*]] = or i1 [[TMP17]], [[MUL_OVERFLOW4]]
-; LV-NEXT:    [[TMP19:%.*]] = or i1 [[TMP10]], [[TMP18]]
-; LV-NEXT:    br i1 [[TMP19]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
+; LV-NEXT:    [[TMP11:%.*]] = sub i64 0, [[MUL_RESULT3]]
+; LV-NEXT:    [[TMP12:%.*]] = getelementptr i8, i8* [[A5]], i64 [[MUL_RESULT3]]
+; LV-NEXT:    [[TMP13:%.*]] = getelementptr i8, i8* [[A5]], i64 [[TMP11]]
+; LV-NEXT:    [[TMP14:%.*]] = icmp ugt i8* [[TMP13]], [[A5]]
+; LV-NEXT:    [[TMP15:%.*]] = icmp ult i8* [[TMP12]], [[A5]]
+; LV-NEXT:    [[TMP16:%.*]] = select i1 false, i1 [[TMP14]], i1 [[TMP15]]
+; LV-NEXT:    [[TMP17:%.*]] = or i1 [[TMP16]], [[MUL_OVERFLOW4]]
+; LV-NEXT:    [[TMP18:%.*]] = or i1 [[TMP10]], [[TMP17]]
+; LV-NEXT:    br i1 [[TMP18]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
 ; LV:       for.body.ph.lver.orig:
 ; LV-NEXT:    br label [[FOR_BODY_LVER_ORIG:%.*]]
 ; LV:       for.body.lver.orig:
@@ -340,10 +331,10 @@
 ; LV-NEXT:    [[INC]] = add nuw nsw i64 [[IND]], 1
 ; LV-NEXT:    [[INC1]] = add i32 [[IND1]], 1
 ; LV-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[N]]
-; LV-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT7:%.*]], label [[FOR_BODY]]
+; LV-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT6:%.*]], label [[FOR_BODY]]
 ; LV:       for.end.loopexit:
 ; LV-NEXT:    br label [[FOR_END:%.*]]
-; LV:       for.end.loopexit7:
+; LV:       for.end.loopexit6:
 ; LV-NEXT:    br label [[FOR_END]]
 ; LV:       for.end:
 ; LV-NEXT:    ret void
@@ -404,18 +395,15 @@
 ; LV-NEXT:    [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
 ; LV-NEXT:    [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
 ; LV-NEXT:    [[SCEVGEP5:%.*]] = bitcast i16* [[SCEVGEP]] to i8*
-; LV-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[MUL_RESULT3]]
-; LV-NEXT:    [[TMP13:%.*]] = bitcast i8* [[UGLYGEP]] to i16*
-; LV-NEXT:    [[TMP14:%.*]] = sub i64 [[MUL_RESULT3]], -4
-; LV-NEXT:    [[TMP15:%.*]] = sub i64 4, [[TMP14]]
-; LV-NEXT:    [[UGLYGEP6:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[TMP15]]
-; LV-NEXT:    [[TMP16:%.*]] = bitcast i8* [[UGLYGEP6]] to i16*
-; LV-NEXT:    [[TMP17:%.*]] = icmp ugt i16* [[TMP16]], [[SCEVGEP]]
-; LV-NEXT:    [[TMP18:%.*]] = icmp ult i16* [[TMP13]], [[SCEVGEP]]
-; LV-NEXT:    [[TMP19:%.*]] = select i1 true, i1 [[TMP17]], i1 [[TMP18]]
-; LV-NEXT:    [[TMP20:%.*]] = or i1 [[TMP19]], [[MUL_OVERFLOW4]]
-; LV-NEXT:    [[TMP21:%.*]] = or i1 [[TMP11]], [[TMP20]]
-; LV-NEXT:    br i1 [[TMP21]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
+; LV-NEXT:    [[TMP13:%.*]] = sub i64 0, [[MUL_RESULT3]]
+; LV-NEXT:    [[TMP14:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[MUL_RESULT3]]
+; LV-NEXT:    [[TMP15:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[TMP13]]
+; LV-NEXT:    [[TMP16:%.*]] = icmp ugt i8* [[TMP15]], [[SCEVGEP5]]
+; LV-NEXT:    [[TMP17:%.*]] = icmp ult i8* [[TMP14]], [[SCEVGEP5]]
+; LV-NEXT:    [[TMP18:%.*]] = select i1 true, i1 [[TMP16]], i1 [[TMP17]]
+; LV-NEXT:    [[TMP19:%.*]] = or i1 [[TMP18]], [[MUL_OVERFLOW4]]
+; LV-NEXT:    [[TMP20:%.*]] = or i1 [[TMP11]], [[TMP19]]
+; LV-NEXT:    br i1 [[TMP20]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
 ; LV:       for.body.ph.lver.orig:
 ; LV-NEXT:    br label [[FOR_BODY_LVER_ORIG:%.*]]
 ; LV:       for.body.lver.orig:
@@ -449,10 +437,10 @@
 ; LV-NEXT:    [[INC]] = add nuw nsw i64 [[IND]], 1
 ; LV-NEXT:    [[DEC]] = sub i32 [[IND1]], 1
 ; LV-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[N]]
-; LV-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT7:%.*]], label [[FOR_BODY]]
+; LV-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT6:%.*]], label [[FOR_BODY]]
 ; LV:       for.end.loopexit:
 ; LV-NEXT:    br label [[FOR_END:%.*]]
-; LV:       for.end.loopexit7:
+; LV:       for.end.loopexit6:
 ; LV-NEXT:    br label [[FOR_END]]
 ; LV:       for.end:
 ; LV-NEXT:    ret void
@@ -521,18 +509,15 @@
 ; LV-NEXT:    [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
 ; LV-NEXT:    [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
 ; LV-NEXT:    [[SCEVGEP5:%.*]] = bitcast i16* [[SCEVGEP]] to i8*
-; LV-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[MUL_RESULT3]]
-; LV-NEXT:    [[TMP13:%.*]] = bitcast i8* [[UGLYGEP]] to i16*
-; LV-NEXT:    [[TMP14:%.*]] = sub i64 [[MUL_RESULT3]], -4
-; LV-NEXT:    [[TMP15:%.*]] = sub i64 4, [[TMP14]]
-; LV-NEXT:    [[UGLYGEP6:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[TMP15]]
-; LV-NEXT:    [[TMP16:%.*]] = bitcast i8* [[UGLYGEP6]] to i16*
-; LV-NEXT:    [[TMP17:%.*]] = icmp ugt i16* [[TMP16]], [[SCEVGEP]]
-; LV-NEXT:    [[TMP18:%.*]] = icmp ult i16* [[TMP13]], [[SCEVGEP]]
-; LV-NEXT:    [[TMP19:%.*]] = select i1 true, i1 [[TMP17]], i1 [[TMP18]]
-; LV-NEXT:    [[TMP20:%.*]] = or i1 [[TMP19]], [[MUL_OVERFLOW4]]
-; LV-NEXT:    [[TMP21:%.*]] = or i1 [[TMP11]], [[TMP20]]
-; LV-NEXT:    br i1 [[TMP21]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
+; LV-NEXT:    [[TMP13:%.*]] = sub i64 0, [[MUL_RESULT3]]
+; LV-NEXT:    [[TMP14:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[MUL_RESULT3]]
+; LV-NEXT:    [[TMP15:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[TMP13]]
+; LV-NEXT:    [[TMP16:%.*]] = icmp ugt i8* [[TMP15]], [[SCEVGEP5]]
+; LV-NEXT:    [[TMP17:%.*]] = icmp ult i8* [[TMP14]], [[SCEVGEP5]]
+; LV-NEXT:    [[TMP18:%.*]] = select i1 true, i1 [[TMP16]], i1 [[TMP17]]
+; LV-NEXT:    [[TMP19:%.*]] = or i1 [[TMP18]], [[MUL_OVERFLOW4]]
+; LV-NEXT:    [[TMP20:%.*]] = or i1 [[TMP11]], [[TMP19]]
+; LV-NEXT:    br i1 [[TMP20]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
 ; LV:       for.body.ph.lver.orig:
 ; LV-NEXT:    br label [[FOR_BODY_LVER_ORIG:%.*]]
 ; LV:       for.body.lver.orig:
@@ -564,10 +549,10 @@
 ; LV-NEXT:    [[INC]] = add nuw nsw i64 [[IND]], 1
 ; LV-NEXT:    [[DEC]] = sub i32 [[IND1]], 1
 ; LV-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[N]]
-; LV-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT7:%.*]], label [[FOR_BODY]]
+; LV-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT6:%.*]], label [[FOR_BODY]]
 ; LV:       for.end.loopexit:
 ; LV-NEXT:    br label [[FOR_END:%.*]]
-; LV:       for.end.loopexit7:
+; LV:       for.end.loopexit6:
 ; LV-NEXT:    br label [[FOR_END]]
 ; LV:       for.end:
 ; LV-NEXT:    ret void