diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -2461,15 +2461,11 @@
 
   IntegerType *Ty =
       IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(ARTy));
-  Type *ARExpandTy = DL.isNonIntegralPointerType(ARTy) ? ARTy : Ty;
 
   Value *StepValue = expandCodeForImpl(Step, Ty, Loc, false);
   Value *NegStepValue =
       expandCodeForImpl(SE.getNegativeSCEV(Step), Ty, Loc, false);
-  Value *StartValue = expandCodeForImpl(
-      isa<PointerType>(ARExpandTy) ? Start
-                                   : SE.getPtrToIntExpr(Start, ARExpandTy),
-      ARExpandTy, Loc, false);
+  Value *StartValue = expandCodeForImpl(Start, ARTy, Loc, false);
 
   ConstantInt *Zero =
       ConstantInt::get(Loc->getContext(), APInt::getNullValue(DstBits));
@@ -2493,7 +2489,7 @@
   //   Start + |Step| * Backedge < Start
   //   Start - |Step| * Backedge > Start
   Value *Add = nullptr, *Sub = nullptr;
-  if (PointerType *ARPtrTy = dyn_cast<PointerType>(ARExpandTy)) {
+  if (PointerType *ARPtrTy = dyn_cast<PointerType>(ARTy)) {
     const SCEV *MulS = SE.getSCEV(MulV);
     const SCEV *NegMulS = SE.getNegativeSCEV(MulS);
     Add = Builder.CreateBitCast(expandAddToGEP(MulS, ARPtrTy, Ty, StartValue),
diff --git a/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll b/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll
--- a/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll
+++ b/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll
@@ -9,7 +9,7 @@
 define void @f(i32* noalias %a, i32* noalias %b, i32* noalias %c, i32* noalias %d, i32* noalias %e, i64 %N) {
 ; CHECK-LABEL: @f(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[A2:%.*]] = ptrtoint i32* [[A:%.*]] to i64
+; CHECK-NEXT:    [[A5:%.*]] = bitcast i32* [[A:%.*]] to i8*
 ; CHECK-NEXT:    br label [[FOR_BODY_LVER_CHECK:%.*]]
 ; CHECK:       for.body.lver.check:
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N:%.*]], -1
@@ -26,17 +26,21 @@
 ; CHECK-NEXT:    [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = or i1 false, [[TMP9]]
-; CHECK-NEXT:    [[MUL3:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]])
-; CHECK-NEXT:    [[MUL_RESULT4:%.*]] = extractvalue { i64, i1 } [[MUL3]], 0
-; CHECK-NEXT:    [[MUL_OVERFLOW5:%.*]] = extractvalue { i64, i1 } [[MUL3]], 1
-; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[A2]], [[MUL_RESULT4]]
-; CHECK-NEXT:    [[TMP12:%.*]] = sub i64 [[A2]], [[MUL_RESULT4]]
-; CHECK-NEXT:    [[TMP13:%.*]] = icmp ugt i64 [[TMP12]], [[A2]]
-; CHECK-NEXT:    [[TMP14:%.*]] = icmp ult i64 [[TMP11]], [[A2]]
-; CHECK-NEXT:    [[TMP15:%.*]] = select i1 false, i1 [[TMP13]], i1 [[TMP14]]
-; CHECK-NEXT:    [[TMP16:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW5]]
-; CHECK-NEXT:    [[TMP17:%.*]] = or i1 [[TMP10]], [[TMP16]]
-; CHECK-NEXT:    br i1 [[TMP17]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
+; CHECK-NEXT:    [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]])
+; CHECK-NEXT:    [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
+; CHECK-NEXT:    [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
+; CHECK-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, i8* [[A5]], i64 [[MUL_RESULT3]]
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i8* [[UGLYGEP]] to i32*
+; CHECK-NEXT:    [[TMP12:%.*]] = sub i64 [[MUL_RESULT3]], -8
+; CHECK-NEXT:    [[TMP13:%.*]] = sub i64 8, [[TMP12]]
+; CHECK-NEXT:    [[UGLYGEP6:%.*]] = getelementptr i8, i8* [[A5]], i64 [[TMP13]]
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast i8* [[UGLYGEP6]] to i32*
+; CHECK-NEXT:    [[TMP15:%.*]] = icmp ugt i32* [[TMP14]], [[A]]
+; CHECK-NEXT:    [[TMP16:%.*]] = icmp ult i32* [[TMP11]], [[A]]
+; CHECK-NEXT:    [[TMP17:%.*]] = select i1 false, i1 [[TMP15]], i1 [[TMP16]]
+; CHECK-NEXT:    [[TMP18:%.*]] = or i1 [[TMP17]], [[MUL_OVERFLOW4]]
+; CHECK-NEXT:    [[TMP19:%.*]] = or i1 [[TMP10]], [[TMP18]]
+; CHECK-NEXT:    br i1 [[TMP19]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
 ; CHECK:       for.body.ph.lver.orig:
 ; CHECK-NEXT:    br label [[FOR_BODY_LVER_ORIG:%.*]]
 ; CHECK:       for.body.lver.orig:
@@ -97,10 +101,10 @@
 ; CHECK-NEXT:    [[ARRAYIDXC:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[MUL_EXT]]
 ; CHECK-NEXT:    store i32 [[MULC]], i32* [[ARRAYIDXC]], align 4
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[ADD]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT6:%.*]], label [[FOR_BODY]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT7:%.*]], label [[FOR_BODY]]
 ; CHECK:       for.end.loopexit:
 ; CHECK-NEXT:    br label [[FOR_END:%.*]]
-; CHECK:       for.end.loopexit6:
+; CHECK:       for.end.loopexit7:
 ; CHECK-NEXT:    br label [[FOR_END]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    ret void
@@ -177,14 +181,18 @@
 ; CHECK-NEXT:    [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]])
 ; CHECK-NEXT:    [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
 ; CHECK-NEXT:    [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
-; CHECK-NEXT:    [[TMP11:%.*]] = add i64 add (i64 ptrtoint ([8192 x i32]* @global_a to i64), i64 168), [[MUL_RESULT3]]
-; CHECK-NEXT:    [[TMP12:%.*]] = sub i64 add (i64 ptrtoint ([8192 x i32]* @global_a to i64), i64 168), [[MUL_RESULT3]]
-; CHECK-NEXT:    [[TMP13:%.*]] = icmp ugt i64 [[TMP12]], add (i64 ptrtoint ([8192 x i32]* @global_a to i64), i64 168)
-; CHECK-NEXT:    [[TMP14:%.*]] = icmp ult i64 [[TMP11]], add (i64 ptrtoint ([8192 x i32]* @global_a to i64), i64 168)
-; CHECK-NEXT:    [[TMP15:%.*]] = select i1 false, i1 [[TMP13]], i1 [[TMP14]]
-; CHECK-NEXT:    [[TMP16:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW4]]
-; CHECK-NEXT:    [[TMP17:%.*]] = or i1 [[TMP10]], [[TMP16]]
-; CHECK-NEXT:    br i1 [[TMP17]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
+; CHECK-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, i8* bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*), i64 [[MUL_RESULT3]]
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i8* [[UGLYGEP]] to [8192 x i32]*
+; CHECK-NEXT:    [[TMP12:%.*]] = sub i64 [[MUL_RESULT3]], -8
+; CHECK-NEXT:    [[TMP13:%.*]] = sub i64 8, [[TMP12]]
+; CHECK-NEXT:    [[UGLYGEP5:%.*]] = getelementptr i8, i8* bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*), i64 [[TMP13]]
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast i8* [[UGLYGEP5]] to [8192 x i32]*
+; CHECK-NEXT:    [[TMP15:%.*]] = icmp ugt [8192 x i32]* [[TMP14]], bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to [8192 x i32]*)
+; CHECK-NEXT:    [[TMP16:%.*]] = icmp ult [8192 x i32]* [[TMP11]], bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to [8192 x i32]*)
+; CHECK-NEXT:    [[TMP17:%.*]] = select i1 false, i1 [[TMP15]], i1 [[TMP16]]
+; CHECK-NEXT:    [[TMP18:%.*]] = or i1 [[TMP17]], [[MUL_OVERFLOW4]]
+; CHECK-NEXT:    [[TMP19:%.*]] = or i1 [[TMP10]], [[TMP18]]
+; CHECK-NEXT:    br i1 [[TMP19]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
 ; CHECK:       for.body.ph.lver.orig:
 ; CHECK-NEXT:    br label [[FOR_BODY_LVER_ORIG:%.*]]
 ; CHECK:       for.body.lver.orig:
@@ -245,10 +253,10 @@
 ; CHECK-NEXT:    [[ARRAYIDXC:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[MUL_EXT]]
 ; CHECK-NEXT:    store i32 [[MULC]], i32* [[ARRAYIDXC]], align 4
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[ADD]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT5:%.*]], label [[FOR_BODY]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT6:%.*]], label [[FOR_BODY]]
 ; CHECK:       for.end.loopexit:
 ; CHECK-NEXT:    br label [[FOR_END:%.*]]
-; CHECK:       for.end.loopexit5:
+; CHECK:       for.end.loopexit6:
 ; CHECK-NEXT:    br label [[FOR_END]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    ret void
diff --git a/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll b/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll
--- a/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll
+++ b/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll
@@ -28,7 +28,7 @@
 define void @f1(i16* noalias %a,
 ; LV-LABEL: @f1(
 ; LV-NEXT:  for.body.lver.check:
-; LV-NEXT:    [[A2:%.*]] = ptrtoint i16* [[A:%.*]] to i64
+; LV-NEXT:    [[A5:%.*]] = bitcast i16* [[A:%.*]] to i8*
 ; LV-NEXT:    [[TMP0:%.*]] = add i64 [[N:%.*]], -1
 ; LV-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
 ; LV-NEXT:    [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP1]])
@@ -43,17 +43,21 @@
 ; LV-NEXT:    [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
 ; LV-NEXT:    [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
 ; LV-NEXT:    [[TMP10:%.*]] = or i1 false, [[TMP9]]
-; LV-NEXT:    [[MUL3:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])
-; LV-NEXT:    [[MUL_RESULT4:%.*]] = extractvalue { i64, i1 } [[MUL3]], 0
-; LV-NEXT:    [[MUL_OVERFLOW5:%.*]] = extractvalue { i64, i1 } [[MUL3]], 1
-; LV-NEXT:    [[TMP11:%.*]] = add i64 [[A2]], [[MUL_RESULT4]]
-; LV-NEXT:    [[TMP12:%.*]] = sub i64 [[A2]], [[MUL_RESULT4]]
-; LV-NEXT:    [[TMP13:%.*]] = icmp ugt i64 [[TMP12]], [[A2]]
-; LV-NEXT:    [[TMP14:%.*]] = icmp ult i64 [[TMP11]], [[A2]]
-; LV-NEXT:    [[TMP15:%.*]] = select i1 false, i1 [[TMP13]], i1 [[TMP14]]
-; LV-NEXT:    [[TMP16:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW5]]
-; LV-NEXT:    [[TMP17:%.*]] = or i1 [[TMP10]], [[TMP16]]
-; LV-NEXT:    br i1 [[TMP17]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
+; LV-NEXT:    [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])
+; LV-NEXT:    [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
+; LV-NEXT:    [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
+; LV-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, i8* [[A5]], i64 [[MUL_RESULT3]]
+; LV-NEXT:    [[TMP11:%.*]] = bitcast i8* [[UGLYGEP]] to i16*
+; LV-NEXT:    [[TMP12:%.*]] = sub i64 [[MUL_RESULT3]], -4
+; LV-NEXT:    [[TMP13:%.*]] = sub i64 4, [[TMP12]]
+; LV-NEXT:    [[UGLYGEP6:%.*]] = getelementptr i8, i8* [[A5]], i64 [[TMP13]]
+; LV-NEXT:    [[TMP14:%.*]] = bitcast i8* [[UGLYGEP6]] to i16*
+; LV-NEXT:    [[TMP15:%.*]] = icmp ugt i16* [[TMP14]], [[A]]
+; LV-NEXT:    [[TMP16:%.*]] = icmp ult i16* [[TMP11]], [[A]]
+; LV-NEXT:    [[TMP17:%.*]] = select i1 false, i1 [[TMP15]], i1 [[TMP16]]
+; LV-NEXT:    [[TMP18:%.*]] = or i1 [[TMP17]], [[MUL_OVERFLOW4]]
+; LV-NEXT:    [[TMP19:%.*]] = or i1 [[TMP10]], [[TMP18]]
+; LV-NEXT:    br i1 [[TMP19]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
 ; LV:       for.body.ph.lver.orig:
 ; LV-NEXT:    br label [[FOR_BODY_LVER_ORIG:%.*]]
 ; LV:       for.body.lver.orig:
@@ -87,10 +91,10 @@
 ; LV-NEXT:    [[INC]] = add nuw nsw i64 [[IND]], 1
 ; LV-NEXT:    [[INC1]] = add i32 [[IND1]], 1
 ; LV-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[N]]
-; LV-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT6:%.*]], label [[FOR_BODY]]
+; LV-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT7:%.*]], label [[FOR_BODY]]
 ; LV:       for.end.loopexit:
 ; LV-NEXT:    br label [[FOR_END:%.*]]
-; LV:       for.end.loopexit6:
+; LV:       for.end.loopexit7:
 ; LV-NEXT:    br label [[FOR_END]]
 ; LV:       for.end:
 ; LV-NEXT:    ret void
@@ -153,7 +157,6 @@
 define void @f2(i16* noalias %a,
 ; LV-LABEL: @f2(
 ; LV-NEXT:  for.body.lver.check:
-; LV-NEXT:    [[A2:%.*]] = ptrtoint i16* [[A:%.*]] to i64
 ; LV-NEXT:    [[TRUNCN:%.*]] = trunc i64 [[N:%.*]] to i32
 ; LV-NEXT:    [[TMP0:%.*]] = add i64 [[N]], -1
 ; LV-NEXT:    [[TMP1:%.*]] = shl i32 [[TRUNCN]], 1
@@ -172,19 +175,24 @@
 ; LV-NEXT:    [[TMP11:%.*]] = or i1 false, [[TMP10]]
 ; LV-NEXT:    [[TMP12:%.*]] = trunc i64 [[N]] to i31
 ; LV-NEXT:    [[TMP13:%.*]] = zext i31 [[TMP12]] to i64
-; LV-NEXT:    [[TMP14:%.*]] = shl nuw nsw i64 [[TMP13]], 2
-; LV-NEXT:    [[TMP15:%.*]] = add i64 [[A2]], [[TMP14]]
-; LV-NEXT:    [[MUL3:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])
-; LV-NEXT:    [[MUL_RESULT4:%.*]] = extractvalue { i64, i1 } [[MUL3]], 0
-; LV-NEXT:    [[MUL_OVERFLOW5:%.*]] = extractvalue { i64, i1 } [[MUL3]], 1
-; LV-NEXT:    [[TMP16:%.*]] = add i64 [[TMP15]], [[MUL_RESULT4]]
-; LV-NEXT:    [[TMP17:%.*]] = sub i64 [[TMP15]], [[MUL_RESULT4]]
-; LV-NEXT:    [[TMP18:%.*]] = icmp ugt i64 [[TMP17]], [[TMP15]]
-; LV-NEXT:    [[TMP19:%.*]] = icmp ult i64 [[TMP16]], [[TMP15]]
-; LV-NEXT:    [[TMP20:%.*]] = select i1 true, i1 [[TMP18]], i1 [[TMP19]]
-; LV-NEXT:    [[TMP21:%.*]] = or i1 [[TMP20]], [[MUL_OVERFLOW5]]
-; LV-NEXT:    [[TMP22:%.*]] = or i1 [[TMP11]], [[TMP21]]
-; LV-NEXT:    br i1 [[TMP22]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
+; LV-NEXT:    [[TMP14:%.*]] = shl nuw nsw i64 [[TMP13]], 1
+; LV-NEXT:    [[SCEVGEP:%.*]] = getelementptr i16, i16* [[A:%.*]], i64 [[TMP14]]
+; LV-NEXT:    [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])
+; LV-NEXT:    [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
+; LV-NEXT:    [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
+; LV-NEXT:    [[SCEVGEP5:%.*]] = bitcast i16* [[SCEVGEP]] to i8*
+; LV-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[MUL_RESULT3]]
+; LV-NEXT:    [[TMP15:%.*]] = bitcast i8* [[UGLYGEP]] to i16*
+; LV-NEXT:    [[TMP16:%.*]] = sub i64 [[MUL_RESULT3]], -4
+; LV-NEXT:    [[TMP17:%.*]] = sub i64 4, [[TMP16]]
+; LV-NEXT:    [[UGLYGEP6:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[TMP17]]
+; LV-NEXT:    [[TMP18:%.*]] = bitcast i8* [[UGLYGEP6]] to i16*
+; LV-NEXT:    [[TMP19:%.*]] = icmp ugt i16* [[TMP18]], [[SCEVGEP]]
+; LV-NEXT:    [[TMP20:%.*]] = icmp ult i16* [[TMP15]], [[SCEVGEP]]
+; LV-NEXT:    [[TMP21:%.*]] = select i1 true, i1 [[TMP19]], i1 [[TMP20]]
+; LV-NEXT:    [[TMP22:%.*]] = or i1 [[TMP21]], [[MUL_OVERFLOW4]]
+; LV-NEXT:    [[TMP23:%.*]] = or i1 [[TMP11]], [[TMP22]]
+; LV-NEXT:    br i1 [[TMP23]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
 ; LV:       for.body.ph.lver.orig:
 ; LV-NEXT:    br label [[FOR_BODY_LVER_ORIG:%.*]]
 ; LV:       for.body.lver.orig:
@@ -218,10 +226,10 @@
 ; LV-NEXT:    [[INC]] = add nuw nsw i64 [[IND]], 1
 ; LV-NEXT:    [[DEC]] = sub i32 [[IND1]], 1
 ; LV-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[N]]
-; LV-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT6:%.*]], label [[FOR_BODY]]
+; LV-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT7:%.*]], label [[FOR_BODY]]
 ; LV:       for.end.loopexit:
 ; LV-NEXT:    br label [[FOR_END:%.*]]
-; LV:       for.end.loopexit6:
+; LV:       for.end.loopexit7:
 ; LV-NEXT:    br label [[FOR_END]]
 ; LV:       for.end:
 ; LV-NEXT:    ret void
@@ -269,7 +277,7 @@
 define void @f3(i16* noalias %a,
 ; LV-LABEL: @f3(
 ; LV-NEXT:  for.body.lver.check:
-; LV-NEXT:    [[A2:%.*]] = ptrtoint i16* [[A:%.*]] to i64
+; LV-NEXT:    [[A5:%.*]] = bitcast i16* [[A:%.*]] to i8*
 ; LV-NEXT:    [[TMP0:%.*]] = add i64 [[N:%.*]], -1
 ; LV-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
 ; LV-NEXT:    [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP1]])
@@ -284,17 +292,21 @@
 ; LV-NEXT:    [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
 ; LV-NEXT:    [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]]
 ; LV-NEXT:    [[TMP10:%.*]] = or i1 false, [[TMP9]]
-; LV-NEXT:    [[MUL3:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])
-; LV-NEXT:    [[MUL_RESULT4:%.*]] = extractvalue { i64, i1 } [[MUL3]], 0
-; LV-NEXT:    [[MUL_OVERFLOW5:%.*]] = extractvalue { i64, i1 } [[MUL3]], 1
-; LV-NEXT:    [[TMP11:%.*]] = add i64 [[A2]], [[MUL_RESULT4]]
-; LV-NEXT:    [[TMP12:%.*]] = sub i64 [[A2]], [[MUL_RESULT4]]
-; LV-NEXT:    [[TMP13:%.*]] = icmp ugt i64 [[TMP12]], [[A2]]
-; LV-NEXT:    [[TMP14:%.*]] = icmp ult i64 [[TMP11]], [[A2]]
-; LV-NEXT:    [[TMP15:%.*]] = select i1 false, i1 [[TMP13]], i1 [[TMP14]]
-; LV-NEXT:    [[TMP16:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW5]]
-; LV-NEXT:    [[TMP17:%.*]] = or i1 [[TMP10]], [[TMP16]]
-; LV-NEXT:    br i1 [[TMP17]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
+; LV-NEXT:    [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])
+; LV-NEXT:    [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
+; LV-NEXT:    [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
+; LV-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, i8* [[A5]], i64 [[MUL_RESULT3]]
+; LV-NEXT:    [[TMP11:%.*]] = bitcast i8* [[UGLYGEP]] to i16*
+; LV-NEXT:    [[TMP12:%.*]] = sub i64 [[MUL_RESULT3]], -4
+; LV-NEXT:    [[TMP13:%.*]] = sub i64 4, [[TMP12]]
+; LV-NEXT:    [[UGLYGEP6:%.*]] = getelementptr i8, i8* [[A5]], i64 [[TMP13]]
+; LV-NEXT:    [[TMP14:%.*]] = bitcast i8* [[UGLYGEP6]] to i16*
+; LV-NEXT:    [[TMP15:%.*]] = icmp ugt i16* [[TMP14]], [[A]]
+; LV-NEXT:    [[TMP16:%.*]] = icmp ult i16* [[TMP11]], [[A]]
+; LV-NEXT:    [[TMP17:%.*]] = select i1 false, i1 [[TMP15]], i1 [[TMP16]]
+; LV-NEXT:    [[TMP18:%.*]] = or i1 [[TMP17]], [[MUL_OVERFLOW4]]
+; LV-NEXT:    [[TMP19:%.*]] = or i1 [[TMP10]], [[TMP18]]
+; LV-NEXT:    br i1 [[TMP19]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
 ; LV:       for.body.ph.lver.orig:
 ; LV-NEXT:    br label [[FOR_BODY_LVER_ORIG:%.*]]
 ; LV:       for.body.lver.orig:
@@ -328,10 +340,10 @@
 ; LV-NEXT:    [[INC]] = add nuw nsw i64 [[IND]], 1
 ; LV-NEXT:    [[INC1]] = add i32 [[IND1]], 1
 ; LV-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[N]]
-; LV-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT6:%.*]], label [[FOR_BODY]]
+; LV-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT7:%.*]], label [[FOR_BODY]]
 ; LV:       for.end.loopexit:
 ; LV-NEXT:    br label [[FOR_END:%.*]]
-; LV:       for.end.loopexit6:
+; LV:       for.end.loopexit7:
 ; LV-NEXT:    br label [[FOR_END]]
 ; LV:       for.end:
 ; LV-NEXT:    ret void
@@ -370,7 +382,6 @@
 define void @f4(i16* noalias %a,
 ; LV-LABEL: @f4(
 ; LV-NEXT:  for.body.lver.check:
-; LV-NEXT:    [[A2:%.*]] = ptrtoint i16* [[A:%.*]] to i64
 ; LV-NEXT:    [[TRUNCN:%.*]] = trunc i64 [[N:%.*]] to i32
 ; LV-NEXT:    [[TMP0:%.*]] = add i64 [[N]], -1
 ; LV-NEXT:    [[TMP1:%.*]] = shl i32 [[TRUNCN]], 1
@@ -388,17 +399,21 @@
 ; LV-NEXT:    [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]]
 ; LV-NEXT:    [[TMP11:%.*]] = or i1 false, [[TMP10]]
 ; LV-NEXT:    [[TMP12:%.*]] = sext i32 [[TMP1]] to i64
-; LV-NEXT:    [[TMP13:%.*]] = shl nsw i64 [[TMP12]], 1
-; LV-NEXT:    [[TMP14:%.*]] = add i64 [[A2]], [[TMP13]]
-; LV-NEXT:    [[MUL3:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])
-; LV-NEXT:    [[MUL_RESULT4:%.*]] = extractvalue { i64, i1 } [[MUL3]], 0
-; LV-NEXT:    [[MUL_OVERFLOW5:%.*]] = extractvalue { i64, i1 } [[MUL3]], 1
-; LV-NEXT:    [[TMP15:%.*]] = add i64 [[TMP14]], [[MUL_RESULT4]]
-; LV-NEXT:    [[TMP16:%.*]] = sub i64 [[TMP14]], [[MUL_RESULT4]]
-; LV-NEXT:    [[TMP17:%.*]] = icmp ugt i64 [[TMP16]], [[TMP14]]
-; LV-NEXT:    [[TMP18:%.*]] = icmp ult i64 [[TMP15]], [[TMP14]]
+; LV-NEXT:    [[SCEVGEP:%.*]] = getelementptr i16, i16* [[A:%.*]], i64 [[TMP12]]
+; LV-NEXT:    [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])
+; LV-NEXT:    [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
+; LV-NEXT:    [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
+; LV-NEXT:    [[SCEVGEP5:%.*]] = bitcast i16* [[SCEVGEP]] to i8*
+; LV-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[MUL_RESULT3]]
+; LV-NEXT:    [[TMP13:%.*]] = bitcast i8* [[UGLYGEP]] to i16*
+; LV-NEXT:    [[TMP14:%.*]] = sub i64 [[MUL_RESULT3]], -4
+; LV-NEXT:    [[TMP15:%.*]] = sub i64 4, [[TMP14]]
+; LV-NEXT:    [[UGLYGEP6:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[TMP15]]
+; LV-NEXT:    [[TMP16:%.*]] = bitcast i8* [[UGLYGEP6]] to i16*
+; LV-NEXT:    [[TMP17:%.*]] = icmp ugt i16* [[TMP16]], [[SCEVGEP]]
+; LV-NEXT:    [[TMP18:%.*]] = icmp ult i16* [[TMP13]], [[SCEVGEP]]
 ; LV-NEXT:    [[TMP19:%.*]] = select i1 true, i1 [[TMP17]], i1 [[TMP18]]
-; LV-NEXT:    [[TMP20:%.*]] = or i1 [[TMP19]], [[MUL_OVERFLOW5]]
+; LV-NEXT:    [[TMP20:%.*]] = or i1 [[TMP19]], [[MUL_OVERFLOW4]]
 ; LV-NEXT:    [[TMP21:%.*]] = or i1 [[TMP11]], [[TMP20]]
 ; LV-NEXT:    br i1 [[TMP21]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
 ; LV:       for.body.ph.lver.orig:
@@ -434,10 +449,10 @@
 ; LV-NEXT:    [[INC]] = add nuw nsw i64 [[IND]], 1
 ; LV-NEXT:    [[DEC]] = sub i32 [[IND1]], 1
 ; LV-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[N]]
-; LV-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT6:%.*]], label [[FOR_BODY]]
+; LV-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT7:%.*]], label [[FOR_BODY]]
 ; LV:       for.end.loopexit:
 ; LV-NEXT:    br label [[FOR_END:%.*]]
-; LV:       for.end.loopexit6:
+; LV:       for.end.loopexit7:
 ; LV-NEXT:    br label [[FOR_END]]
 ; LV:       for.end:
 ; LV-NEXT:    ret void
@@ -484,7 +499,6 @@
 define void @f5(i16* noalias %a,
 ; LV-LABEL: @f5(
 ; LV-NEXT:  for.body.lver.check:
-; LV-NEXT:    [[A2:%.*]] = ptrtoint i16* [[A:%.*]] to i64
 ; LV-NEXT:    [[TRUNCN:%.*]] = trunc i64 [[N:%.*]] to i32
 ; LV-NEXT:    [[TMP0:%.*]] = add i64 [[N]], -1
 ; LV-NEXT:    [[TMP1:%.*]] = shl i32 [[TRUNCN]], 1
@@ -502,17 +516,21 @@
 ; LV-NEXT:    [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]]
 ; LV-NEXT:    [[TMP11:%.*]] = or i1 false, [[TMP10]]
 ; LV-NEXT:    [[TMP12:%.*]] = sext i32 [[TMP1]] to i64
-; LV-NEXT:    [[TMP13:%.*]] = shl nsw i64 [[TMP12]], 1
-; LV-NEXT:    [[TMP14:%.*]] = add i64 [[A2]], [[TMP13]]
-; LV-NEXT:    [[MUL3:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])
-; LV-NEXT:    [[MUL_RESULT4:%.*]] = extractvalue { i64, i1 } [[MUL3]], 0
-; LV-NEXT:    [[MUL_OVERFLOW5:%.*]] = extractvalue { i64, i1 } [[MUL3]], 1
-; LV-NEXT:    [[TMP15:%.*]] = add i64 [[TMP14]], [[MUL_RESULT4]]
-; LV-NEXT:    [[TMP16:%.*]] = sub i64 [[TMP14]], [[MUL_RESULT4]]
-; LV-NEXT:    [[TMP17:%.*]] = icmp ugt i64 [[TMP16]], [[TMP14]]
-; LV-NEXT:    [[TMP18:%.*]] = icmp ult i64 [[TMP15]], [[TMP14]]
+; LV-NEXT:    [[SCEVGEP:%.*]] = getelementptr i16, i16* [[A:%.*]], i64 [[TMP12]]
+; LV-NEXT:    [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]])
+; LV-NEXT:    [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0
+; LV-NEXT:    [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1
+; LV-NEXT:    [[SCEVGEP5:%.*]] = bitcast i16* [[SCEVGEP]] to i8*
+; LV-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[MUL_RESULT3]]
+; LV-NEXT:    [[TMP13:%.*]] = bitcast i8* [[UGLYGEP]] to i16*
+; LV-NEXT:    [[TMP14:%.*]] = sub i64 [[MUL_RESULT3]], -4
+; LV-NEXT:    [[TMP15:%.*]] = sub i64 4, [[TMP14]]
+; LV-NEXT:    [[UGLYGEP6:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[TMP15]]
+; LV-NEXT:    [[TMP16:%.*]] = bitcast i8* [[UGLYGEP6]] to i16*
+; LV-NEXT:    [[TMP17:%.*]] = icmp ugt i16* [[TMP16]], [[SCEVGEP]]
+; LV-NEXT:    [[TMP18:%.*]] = icmp ult i16* [[TMP13]], [[SCEVGEP]]
 ; LV-NEXT:    [[TMP19:%.*]] = select i1 true, i1 [[TMP17]], i1 [[TMP18]]
-; LV-NEXT:    [[TMP20:%.*]] = or i1 [[TMP19]], [[MUL_OVERFLOW5]]
+; LV-NEXT:    [[TMP20:%.*]] = or i1 [[TMP19]], [[MUL_OVERFLOW4]]
 ; LV-NEXT:    [[TMP21:%.*]] = or i1 [[TMP11]], [[TMP20]]
 ; LV-NEXT:    br i1 [[TMP21]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]]
 ; LV:       for.body.ph.lver.orig:
@@ -546,10 +564,10 @@
 ; LV-NEXT:    [[INC]] = add nuw nsw i64 [[IND]], 1
 ; LV-NEXT:    [[DEC]] = sub i32 [[IND1]], 1
 ; LV-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[N]]
-; LV-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT6:%.*]], label [[FOR_BODY]]
+; LV-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT7:%.*]], label [[FOR_BODY]]
 ; LV:       for.end.loopexit:
 ; LV-NEXT:    br label [[FOR_END:%.*]]
-; LV:       for.end.loopexit6:
+; LV:       for.end.loopexit7:
 ; LV-NEXT:    br label [[FOR_END]]
 ; LV:       for.end:
 ; LV-NEXT:    ret void