diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -2461,15 +2461,11 @@ IntegerType *Ty = IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(ARTy)); - Type *ARExpandTy = DL.isNonIntegralPointerType(ARTy) ? ARTy : Ty; Value *StepValue = expandCodeForImpl(Step, Ty, Loc, false); Value *NegStepValue = expandCodeForImpl(SE.getNegativeSCEV(Step), Ty, Loc, false); - Value *StartValue = expandCodeForImpl( - isa(ARExpandTy) ? Start - : SE.getPtrToIntExpr(Start, ARExpandTy), - ARExpandTy, Loc, false); + Value *StartValue = expandCodeForImpl(Start, ARTy, Loc, false); ConstantInt *Zero = ConstantInt::get(Loc->getContext(), APInt::getNullValue(DstBits)); @@ -2493,7 +2489,7 @@ // Start + |Step| * Backedge < Start // Start - |Step| * Backedge > Start Value *Add = nullptr, *Sub = nullptr; - if (PointerType *ARPtrTy = dyn_cast(ARExpandTy)) { + if (PointerType *ARPtrTy = dyn_cast(ARTy)) { const SCEV *MulS = SE.getSCEV(MulV); const SCEV *NegMulS = SE.getNegativeSCEV(MulS); Add = Builder.CreateBitCast(expandAddToGEP(MulS, ARPtrTy, Ty, StartValue), diff --git a/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll b/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll --- a/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll +++ b/llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll @@ -9,7 +9,7 @@ define void @f(i32* noalias %a, i32* noalias %b, i32* noalias %c, i32* noalias %d, i32* noalias %e, i64 %N) { ; CHECK-LABEL: @f( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[A2:%.*]] = ptrtoint i32* [[A:%.*]] to i64 +; CHECK-NEXT: [[A5:%.*]] = bitcast i32* [[A:%.*]] to i8* ; CHECK-NEXT: br label [[FOR_BODY_LVER_CHECK:%.*]] ; CHECK: for.body.lver.check: ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1 @@ -26,17 +26,21 @@ ; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] ; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]] ; CHECK-NEXT: [[TMP10:%.*]] = or i1 false, [[TMP9]] -; CHECK-NEXT: [[MUL3:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]]) -; CHECK-NEXT: [[MUL_RESULT4:%.*]] = extractvalue { i64, i1 } [[MUL3]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW5:%.*]] = extractvalue { i64, i1 } [[MUL3]], 1 -; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[A2]], [[MUL_RESULT4]] -; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[A2]], [[MUL_RESULT4]] -; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[TMP12]], [[A2]] -; CHECK-NEXT: [[TMP14:%.*]] = icmp ult i64 [[TMP11]], [[A2]] -; CHECK-NEXT: [[TMP15:%.*]] = select i1 false, i1 [[TMP13]], i1 [[TMP14]] -; CHECK-NEXT: [[TMP16:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW5]] -; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP10]], [[TMP16]] -; CHECK-NEXT: br i1 [[TMP17]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]] +; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]]) +; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 +; CHECK-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[A5]], i64 [[MUL_RESULT3]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8* [[UGLYGEP]] to i32* +; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[MUL_RESULT3]], -8 +; CHECK-NEXT: [[TMP13:%.*]] = sub i64 8, [[TMP12]] +; CHECK-NEXT: [[UGLYGEP6:%.*]] = getelementptr i8, i8* [[A5]], i64 [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i8* [[UGLYGEP6]] to i32* +; CHECK-NEXT: [[TMP15:%.*]] = icmp ugt i32* [[TMP14]], [[A]] +; CHECK-NEXT: [[TMP16:%.*]] = icmp ult i32* [[TMP11]], [[A]] +; CHECK-NEXT: [[TMP17:%.*]] = select i1 false, i1 [[TMP15]], i1 [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP17]], [[MUL_OVERFLOW4]] +; CHECK-NEXT: [[TMP19:%.*]] = or i1 [[TMP10]], [[TMP18]] +; CHECK-NEXT: br i1 [[TMP19]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]] ; CHECK: for.body.ph.lver.orig: ; CHECK-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]] ; CHECK: for.body.lver.orig: @@ -97,10 +101,10 @@ ; CHECK-NEXT: [[ARRAYIDXC:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[MUL_EXT]] ; CHECK-NEXT: store i32 [[MULC]], i32* [[ARRAYIDXC]], align 4 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[ADD]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT6:%.*]], label [[FOR_BODY]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT7:%.*]], label [[FOR_BODY]] ; CHECK: for.end.loopexit: ; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: for.end.loopexit6: +; CHECK: for.end.loopexit7: ; CHECK-NEXT: br label [[FOR_END]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -177,14 +181,18 @@ ; CHECK-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 8, i64 [[TMP0]]) ; CHECK-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 -; CHECK-NEXT: [[TMP11:%.*]] = add i64 add (i64 ptrtoint ([8192 x i32]* @global_a to i64), i64 168), [[MUL_RESULT3]] -; CHECK-NEXT: [[TMP12:%.*]] = sub i64 add (i64 ptrtoint ([8192 x i32]* @global_a to i64), i64 168), [[MUL_RESULT3]] -; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[TMP12]], add (i64 ptrtoint ([8192 x i32]* @global_a to i64), i64 168) -; CHECK-NEXT: [[TMP14:%.*]] = icmp ult i64 [[TMP11]], add (i64 ptrtoint ([8192 x i32]* @global_a to i64), i64 168) -; CHECK-NEXT: [[TMP15:%.*]] = select i1 false, i1 [[TMP13]], i1 [[TMP14]] -; CHECK-NEXT: [[TMP16:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW4]] -; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP10]], [[TMP16]] -; CHECK-NEXT: br i1 [[TMP17]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]] +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*), i64 [[MUL_RESULT3]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8* [[UGLYGEP]] to [8192 x i32]* +; CHECK-NEXT: [[TMP12:%.*]] = sub i64 [[MUL_RESULT3]], -8 +; CHECK-NEXT: [[TMP13:%.*]] = sub i64 8, [[TMP12]] +; CHECK-NEXT: [[UGLYGEP5:%.*]] = getelementptr i8, i8* bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to i8*), i64 [[TMP13]] +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i8* [[UGLYGEP5]] to [8192 x i32]* +; CHECK-NEXT: [[TMP15:%.*]] = icmp ugt [8192 x i32]* [[TMP14]], bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to [8192 x i32]*) +; CHECK-NEXT: [[TMP16:%.*]] = icmp ult [8192 x i32]* [[TMP11]], bitcast (i32* getelementptr inbounds ([8192 x i32], [8192 x i32]* @global_a, i64 0, i64 42) to [8192 x i32]*) +; CHECK-NEXT: [[TMP17:%.*]] = select i1 false, i1 [[TMP15]], i1 [[TMP16]] +; CHECK-NEXT: [[TMP18:%.*]] = or i1 [[TMP17]], [[MUL_OVERFLOW4]] +; CHECK-NEXT: [[TMP19:%.*]] = or i1 [[TMP10]], [[TMP18]] +; CHECK-NEXT: br i1 [[TMP19]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]] ; CHECK: for.body.ph.lver.orig: ; CHECK-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]] ; CHECK: for.body.lver.orig: @@ -245,10 +253,10 @@ ; CHECK-NEXT: [[ARRAYIDXC:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[MUL_EXT]] ; CHECK-NEXT: store i32 [[MULC]], i32* [[ARRAYIDXC]], align 4 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[ADD]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT5:%.*]], label [[FOR_BODY]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT6:%.*]], label [[FOR_BODY]] ; CHECK: for.end.loopexit: ; CHECK-NEXT: br label [[FOR_END:%.*]] -; CHECK: for.end.loopexit5: +; CHECK: for.end.loopexit6: ; CHECK-NEXT: br label [[FOR_END]] ; CHECK: for.end: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll b/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll --- a/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll +++ b/llvm/test/Transforms/LoopVersioning/wrapping-pointer-versioning.ll @@ -28,7 +28,7 @@ define void @f1(i16* noalias %a, ; LV-LABEL: @f1( ; LV-NEXT: for.body.lver.check: -; LV-NEXT: [[A2:%.*]] = ptrtoint i16* [[A:%.*]] to i64 +; LV-NEXT: [[A5:%.*]] = bitcast i16* [[A:%.*]] to i8* ; LV-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1 ; LV-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 ; LV-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP1]]) @@ -43,17 +43,21 @@ ; LV-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] ; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]] ; LV-NEXT: [[TMP10:%.*]] = or i1 false, [[TMP9]] -; LV-NEXT: [[MUL3:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]]) -; LV-NEXT: [[MUL_RESULT4:%.*]] = extractvalue { i64, i1 } [[MUL3]], 0 -; LV-NEXT: [[MUL_OVERFLOW5:%.*]] = extractvalue { i64, i1 } [[MUL3]], 1 -; LV-NEXT: [[TMP11:%.*]] = add i64 [[A2]], [[MUL_RESULT4]] -; LV-NEXT: [[TMP12:%.*]] = sub i64 [[A2]], [[MUL_RESULT4]] -; LV-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[TMP12]], [[A2]] -; LV-NEXT: [[TMP14:%.*]] = icmp ult i64 [[TMP11]], [[A2]] -; LV-NEXT: [[TMP15:%.*]] = select i1 false, i1 [[TMP13]], i1 [[TMP14]] -; LV-NEXT: [[TMP16:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW5]] -; LV-NEXT: [[TMP17:%.*]] = or i1 [[TMP10]], [[TMP16]] -; LV-NEXT: br i1 [[TMP17]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]] +; LV-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]]) +; LV-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 +; LV-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 +; LV-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[A5]], i64 [[MUL_RESULT3]] +; LV-NEXT: [[TMP11:%.*]] = bitcast i8* [[UGLYGEP]] to i16* +; LV-NEXT: [[TMP12:%.*]] = sub i64 [[MUL_RESULT3]], -4 +; LV-NEXT: [[TMP13:%.*]] = sub i64 4, [[TMP12]] +; LV-NEXT: [[UGLYGEP6:%.*]] = getelementptr i8, i8* [[A5]], i64 [[TMP13]] +; LV-NEXT: [[TMP14:%.*]] = bitcast i8* [[UGLYGEP6]] to i16* +; LV-NEXT: [[TMP15:%.*]] = icmp ugt i16* [[TMP14]], [[A]] +; LV-NEXT: [[TMP16:%.*]] = icmp ult i16* [[TMP11]], [[A]] +; LV-NEXT: [[TMP17:%.*]] = select i1 false, i1 [[TMP15]], i1 [[TMP16]] +; LV-NEXT: [[TMP18:%.*]] = or i1 [[TMP17]], [[MUL_OVERFLOW4]] +; LV-NEXT: [[TMP19:%.*]] = or i1 [[TMP10]], [[TMP18]] +; LV-NEXT: br i1 [[TMP19]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]] ; LV: for.body.ph.lver.orig: ; LV-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]] ; LV: for.body.lver.orig: @@ -87,10 +91,10 @@ ; LV-NEXT: [[INC]] = add nuw nsw i64 [[IND]], 1 ; LV-NEXT: [[INC1]] = add i32 [[IND1]], 1 ; LV-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[N]] -; LV-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT6:%.*]], label [[FOR_BODY]] +; LV-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT7:%.*]], label [[FOR_BODY]] ; LV: for.end.loopexit: ; LV-NEXT: br label [[FOR_END:%.*]] -; LV: for.end.loopexit6: +; LV: for.end.loopexit7: ; LV-NEXT: br label [[FOR_END]] ; LV: for.end: ; LV-NEXT: ret void @@ -153,7 +157,6 @@ define void @f2(i16* noalias %a, ; LV-LABEL: @f2( ; LV-NEXT: for.body.lver.check: -; LV-NEXT: [[A2:%.*]] = ptrtoint i16* [[A:%.*]] to i64 ; LV-NEXT: [[TRUNCN:%.*]] = trunc i64 [[N:%.*]] to i32 ; LV-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 ; LV-NEXT: [[TMP1:%.*]] = shl i32 [[TRUNCN]], 1 @@ -172,19 +175,24 @@ ; LV-NEXT: [[TMP11:%.*]] = or i1 false, [[TMP10]] ; LV-NEXT: [[TMP12:%.*]] = trunc i64 [[N]] to i31 ; LV-NEXT: [[TMP13:%.*]] = zext i31 [[TMP12]] to i64 -; LV-NEXT: [[TMP14:%.*]] = shl nuw nsw i64 [[TMP13]], 2 -; LV-NEXT: [[TMP15:%.*]] = add i64 [[A2]], [[TMP14]] -; LV-NEXT: [[MUL3:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]]) -; LV-NEXT: [[MUL_RESULT4:%.*]] = extractvalue { i64, i1 } [[MUL3]], 0 -; LV-NEXT: [[MUL_OVERFLOW5:%.*]] = extractvalue { i64, i1 } [[MUL3]], 1 -; LV-NEXT: [[TMP16:%.*]] = add i64 [[TMP15]], [[MUL_RESULT4]] -; LV-NEXT: [[TMP17:%.*]] = sub i64 [[TMP15]], [[MUL_RESULT4]] -; LV-NEXT: [[TMP18:%.*]] = icmp ugt i64 [[TMP17]], [[TMP15]] -; LV-NEXT: [[TMP19:%.*]] = icmp ult i64 [[TMP16]], [[TMP15]] -; LV-NEXT: [[TMP20:%.*]] = select i1 true, i1 [[TMP18]], i1 [[TMP19]] -; LV-NEXT: [[TMP21:%.*]] = or i1 [[TMP20]], [[MUL_OVERFLOW5]] -; LV-NEXT: [[TMP22:%.*]] = or i1 [[TMP11]], [[TMP21]] -; LV-NEXT: br i1 [[TMP22]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]] +; LV-NEXT: [[TMP14:%.*]] = shl nuw nsw i64 [[TMP13]], 1 +; LV-NEXT: [[SCEVGEP:%.*]] = getelementptr i16, i16* [[A:%.*]], i64 [[TMP14]] +; LV-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]]) +; LV-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 +; LV-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 +; LV-NEXT: [[SCEVGEP5:%.*]] = bitcast i16* [[SCEVGEP]] to i8* +; LV-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[MUL_RESULT3]] +; LV-NEXT: [[TMP15:%.*]] = bitcast i8* [[UGLYGEP]] to i16* +; LV-NEXT: [[TMP16:%.*]] = sub i64 [[MUL_RESULT3]], -4 +; LV-NEXT: [[TMP17:%.*]] = sub i64 4, [[TMP16]] +; LV-NEXT: [[UGLYGEP6:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[TMP17]] +; LV-NEXT: [[TMP18:%.*]] = bitcast i8* [[UGLYGEP6]] to i16* +; LV-NEXT: [[TMP19:%.*]] = icmp ugt i16* [[TMP18]], [[SCEVGEP]] +; LV-NEXT: [[TMP20:%.*]] = icmp ult i16* [[TMP15]], [[SCEVGEP]] +; LV-NEXT: [[TMP21:%.*]] = select i1 true, i1 [[TMP19]], i1 [[TMP20]] +; LV-NEXT: [[TMP22:%.*]] = or i1 [[TMP21]], [[MUL_OVERFLOW4]] +; LV-NEXT: [[TMP23:%.*]] = or i1 [[TMP11]], [[TMP22]] +; LV-NEXT: br i1 [[TMP23]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]] ; LV: for.body.ph.lver.orig: ; LV-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]] ; LV: for.body.lver.orig: @@ -218,10 +226,10 @@ ; LV-NEXT: [[INC]] = add nuw nsw i64 [[IND]], 1 ; LV-NEXT: [[DEC]] = sub i32 [[IND1]], 1 ; LV-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[N]] -; LV-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT6:%.*]], label [[FOR_BODY]] +; LV-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT7:%.*]], label [[FOR_BODY]] ; LV: for.end.loopexit: ; LV-NEXT: br label [[FOR_END:%.*]] -; LV: for.end.loopexit6: +; LV: for.end.loopexit7: ; LV-NEXT: br label [[FOR_END]] ; LV: for.end: ; LV-NEXT: ret void @@ -269,7 +277,7 @@ define void @f3(i16* noalias %a, ; LV-LABEL: @f3( ; LV-NEXT: for.body.lver.check: -; LV-NEXT: [[A2:%.*]] = ptrtoint i16* [[A:%.*]] to i64 +; LV-NEXT: [[A5:%.*]] = bitcast i16* [[A:%.*]] to i8* ; LV-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1 ; LV-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32 ; LV-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP1]]) @@ -284,17 +292,21 @@ ; LV-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] ; LV-NEXT: [[TMP9:%.*]] = or i1 [[TMP8]], [[MUL_OVERFLOW]] ; LV-NEXT: [[TMP10:%.*]] = or i1 false, [[TMP9]] -; LV-NEXT: [[MUL3:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]]) -; LV-NEXT: [[MUL_RESULT4:%.*]] = extractvalue { i64, i1 } [[MUL3]], 0 -; LV-NEXT: [[MUL_OVERFLOW5:%.*]] = extractvalue { i64, i1 } [[MUL3]], 1 -; LV-NEXT: [[TMP11:%.*]] = add i64 [[A2]], [[MUL_RESULT4]] -; LV-NEXT: [[TMP12:%.*]] = sub i64 [[A2]], [[MUL_RESULT4]] -; LV-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[TMP12]], [[A2]] -; LV-NEXT: [[TMP14:%.*]] = icmp ult i64 [[TMP11]], [[A2]] -; LV-NEXT: [[TMP15:%.*]] = select i1 false, i1 [[TMP13]], i1 [[TMP14]] -; LV-NEXT: [[TMP16:%.*]] = or i1 [[TMP15]], [[MUL_OVERFLOW5]] -; LV-NEXT: [[TMP17:%.*]] = or i1 [[TMP10]], [[TMP16]] -; LV-NEXT: br i1 [[TMP17]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]] +; LV-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]]) +; LV-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 +; LV-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 +; LV-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[A5]], i64 [[MUL_RESULT3]] +; LV-NEXT: [[TMP11:%.*]] = bitcast i8* [[UGLYGEP]] to i16* +; LV-NEXT: [[TMP12:%.*]] = sub i64 [[MUL_RESULT3]], -4 +; LV-NEXT: [[TMP13:%.*]] = sub i64 4, [[TMP12]] +; LV-NEXT: [[UGLYGEP6:%.*]] = getelementptr i8, i8* [[A5]], i64 [[TMP13]] +; LV-NEXT: [[TMP14:%.*]] = bitcast i8* [[UGLYGEP6]] to i16* +; LV-NEXT: [[TMP15:%.*]] = icmp ugt i16* [[TMP14]], [[A]] +; LV-NEXT: [[TMP16:%.*]] = icmp ult i16* [[TMP11]], [[A]] +; LV-NEXT: [[TMP17:%.*]] = select i1 false, i1 [[TMP15]], i1 [[TMP16]] +; LV-NEXT: [[TMP18:%.*]] = or i1 [[TMP17]], [[MUL_OVERFLOW4]] +; LV-NEXT: [[TMP19:%.*]] = or i1 [[TMP10]], [[TMP18]] +; LV-NEXT: br i1 [[TMP19]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]] ; LV: for.body.ph.lver.orig: ; LV-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]] ; LV: for.body.lver.orig: @@ -328,10 +340,10 @@ ; LV-NEXT: [[INC]] = add nuw nsw i64 [[IND]], 1 ; LV-NEXT: [[INC1]] = add i32 [[IND1]], 1 ; LV-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[N]] -; LV-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT6:%.*]], label [[FOR_BODY]] +; LV-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT7:%.*]], label [[FOR_BODY]] ; LV: for.end.loopexit: ; LV-NEXT: br label [[FOR_END:%.*]] -; LV: for.end.loopexit6: +; LV: for.end.loopexit7: ; LV-NEXT: br label [[FOR_END]] ; LV: for.end: ; LV-NEXT: ret void @@ -370,7 +382,6 @@ define void @f4(i16* noalias %a, ; LV-LABEL: @f4( ; LV-NEXT: for.body.lver.check: -; LV-NEXT: [[A2:%.*]] = ptrtoint i16* [[A:%.*]] to i64 ; LV-NEXT: [[TRUNCN:%.*]] = trunc i64 [[N:%.*]] to i32 ; LV-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 ; LV-NEXT: [[TMP1:%.*]] = shl i32 [[TRUNCN]], 1 @@ -388,17 +399,21 @@ ; LV-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]] ; LV-NEXT: [[TMP11:%.*]] = or i1 false, [[TMP10]] ; LV-NEXT: [[TMP12:%.*]] = sext i32 [[TMP1]] to i64 -; LV-NEXT: [[TMP13:%.*]] = shl nsw i64 [[TMP12]], 1 -; LV-NEXT: [[TMP14:%.*]] = add i64 [[A2]], [[TMP13]] -; LV-NEXT: [[MUL3:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]]) -; LV-NEXT: [[MUL_RESULT4:%.*]] = extractvalue { i64, i1 } [[MUL3]], 0 -; LV-NEXT: [[MUL_OVERFLOW5:%.*]] = extractvalue { i64, i1 } [[MUL3]], 1 -; LV-NEXT: [[TMP15:%.*]] = add i64 [[TMP14]], [[MUL_RESULT4]] -; LV-NEXT: [[TMP16:%.*]] = sub i64 [[TMP14]], [[MUL_RESULT4]] -; LV-NEXT: [[TMP17:%.*]] = icmp ugt i64 [[TMP16]], [[TMP14]] -; LV-NEXT: [[TMP18:%.*]] = icmp ult i64 [[TMP15]], [[TMP14]] +; LV-NEXT: [[SCEVGEP:%.*]] = getelementptr i16, i16* [[A:%.*]], i64 [[TMP12]] +; LV-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]]) +; LV-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 +; LV-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 +; LV-NEXT: [[SCEVGEP5:%.*]] = bitcast i16* [[SCEVGEP]] to i8* +; LV-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[MUL_RESULT3]] +; LV-NEXT: [[TMP13:%.*]] = bitcast i8* [[UGLYGEP]] to i16* +; LV-NEXT: [[TMP14:%.*]] = sub i64 [[MUL_RESULT3]], -4 +; LV-NEXT: [[TMP15:%.*]] = sub i64 4, [[TMP14]] +; LV-NEXT: [[UGLYGEP6:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[TMP15]] +; LV-NEXT: [[TMP16:%.*]] = bitcast i8* [[UGLYGEP6]] to i16* +; LV-NEXT: [[TMP17:%.*]] = icmp ugt i16* [[TMP16]], [[SCEVGEP]] +; LV-NEXT: [[TMP18:%.*]] = icmp ult i16* [[TMP13]], [[SCEVGEP]] ; LV-NEXT: [[TMP19:%.*]] = select i1 true, i1 [[TMP17]], i1 [[TMP18]] -; LV-NEXT: [[TMP20:%.*]] = or i1 [[TMP19]], [[MUL_OVERFLOW5]] +; LV-NEXT: [[TMP20:%.*]] = or i1 [[TMP19]], [[MUL_OVERFLOW4]] ; LV-NEXT: [[TMP21:%.*]] = or i1 [[TMP11]], [[TMP20]] ; LV-NEXT: br i1 [[TMP21]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]] ; LV: for.body.ph.lver.orig: @@ -434,10 +449,10 @@ ; LV-NEXT: [[INC]] = add nuw nsw i64 [[IND]], 1 ; LV-NEXT: [[DEC]] = sub i32 [[IND1]], 1 ; LV-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[N]] -; LV-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT6:%.*]], label [[FOR_BODY]] +; LV-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT7:%.*]], label [[FOR_BODY]] ; LV: for.end.loopexit: ; LV-NEXT: br label [[FOR_END:%.*]] -; LV: for.end.loopexit6: +; LV: for.end.loopexit7: ; LV-NEXT: br label [[FOR_END]] ; LV: for.end: ; LV-NEXT: ret void @@ -484,7 +499,6 @@ define void @f5(i16* noalias %a, ; LV-LABEL: @f5( ; LV-NEXT: for.body.lver.check: -; LV-NEXT: [[A2:%.*]] = ptrtoint i16* [[A:%.*]] to i64 ; LV-NEXT: [[TRUNCN:%.*]] = trunc i64 [[N:%.*]] to i32 ; LV-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 ; LV-NEXT: [[TMP1:%.*]] = shl i32 [[TRUNCN]], 1 @@ -502,17 +516,21 @@ ; LV-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW]] ; LV-NEXT: [[TMP11:%.*]] = or i1 false, [[TMP10]] ; LV-NEXT: [[TMP12:%.*]] = sext i32 [[TMP1]] to i64 -; LV-NEXT: [[TMP13:%.*]] = shl nsw i64 [[TMP12]], 1 -; LV-NEXT: [[TMP14:%.*]] = add i64 [[A2]], [[TMP13]] -; LV-NEXT: [[MUL3:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]]) -; LV-NEXT: [[MUL_RESULT4:%.*]] = extractvalue { i64, i1 } [[MUL3]], 0 -; LV-NEXT: [[MUL_OVERFLOW5:%.*]] = extractvalue { i64, i1 } [[MUL3]], 1 -; LV-NEXT: [[TMP15:%.*]] = add i64 [[TMP14]], [[MUL_RESULT4]] -; LV-NEXT: [[TMP16:%.*]] = sub i64 [[TMP14]], [[MUL_RESULT4]] -; LV-NEXT: [[TMP17:%.*]] = icmp ugt i64 [[TMP16]], [[TMP14]] -; LV-NEXT: [[TMP18:%.*]] = icmp ult i64 [[TMP15]], [[TMP14]] +; LV-NEXT: [[SCEVGEP:%.*]] = getelementptr i16, i16* [[A:%.*]], i64 [[TMP12]] +; LV-NEXT: [[MUL2:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[TMP0]]) +; LV-NEXT: [[MUL_RESULT3:%.*]] = extractvalue { i64, i1 } [[MUL2]], 0 +; LV-NEXT: [[MUL_OVERFLOW4:%.*]] = extractvalue { i64, i1 } [[MUL2]], 1 +; LV-NEXT: [[SCEVGEP5:%.*]] = bitcast i16* [[SCEVGEP]] to i8* +; LV-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[MUL_RESULT3]] +; LV-NEXT: [[TMP13:%.*]] = bitcast i8* [[UGLYGEP]] to i16* +; LV-NEXT: [[TMP14:%.*]] = sub i64 [[MUL_RESULT3]], -4 +; LV-NEXT: [[TMP15:%.*]] = sub i64 4, [[TMP14]] +; LV-NEXT: [[UGLYGEP6:%.*]] = getelementptr i8, i8* [[SCEVGEP5]], i64 [[TMP15]] +; LV-NEXT: [[TMP16:%.*]] = bitcast i8* [[UGLYGEP6]] to i16* +; LV-NEXT: [[TMP17:%.*]] = icmp ugt i16* [[TMP16]], [[SCEVGEP]] +; LV-NEXT: [[TMP18:%.*]] = icmp ult i16* [[TMP13]], [[SCEVGEP]] ; LV-NEXT: [[TMP19:%.*]] = select i1 true, i1 [[TMP17]], i1 [[TMP18]] -; LV-NEXT: [[TMP20:%.*]] = or i1 [[TMP19]], [[MUL_OVERFLOW5]] +; LV-NEXT: [[TMP20:%.*]] = or i1 [[TMP19]], [[MUL_OVERFLOW4]] ; LV-NEXT: [[TMP21:%.*]] = or i1 [[TMP11]], [[TMP20]] ; LV-NEXT: br i1 [[TMP21]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH:%.*]] ; LV: for.body.ph.lver.orig: @@ -546,10 +564,10 @@ ; LV-NEXT: [[INC]] = add nuw nsw i64 [[IND]], 1 ; LV-NEXT: [[DEC]] = sub i32 [[IND1]], 1 ; LV-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INC]], [[N]] -; LV-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT6:%.*]], label [[FOR_BODY]] +; LV-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT7:%.*]], label [[FOR_BODY]] ; LV: for.end.loopexit: ; LV-NEXT: br label [[FOR_END:%.*]] -; LV: for.end.loopexit6: +; LV: for.end.loopexit7: ; LV-NEXT: br label [[FOR_END]] ; LV: for.end: ; LV-NEXT: ret void