Index: llvm/lib/Analysis/ScalarEvolution.cpp =================================================================== --- llvm/lib/Analysis/ScalarEvolution.cpp +++ llvm/lib/Analysis/ScalarEvolution.cpp @@ -8126,7 +8126,7 @@ "Predicate should be always true!"); } - return SE->getUMinFromMismatchedTypes(Ops); + return SE->getUMinFromMismatchedTypes(Ops, /* Sequential */ true); } /// Get the exact not taken count for this loop exit. Index: llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll +++ llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll @@ -135,13 +135,15 @@ ; CHECK-V8M-NEXT: loop.preheader: ; CHECK-V8M-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) ; CHECK-V8M-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 -; CHECK-V8M-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH:%.*]], i32 [[TMP0]]) -; CHECK-V8M-NEXT: [[TMP1:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] +; CHECK-V8M-NEXT: [[TMP1:%.*]] = icmp eq i32 [[LENGTH:%.*]], 0 +; CHECK-V8M-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[LENGTH]]) +; CHECK-V8M-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 0, i32 [[UMIN]] +; CHECK-V8M-NEXT: [[TMP3:%.*]] = icmp ne i32 [[LENGTH]], [[TMP2]] ; CHECK-V8M-NEXT: br label [[LOOP:%.*]] ; CHECK-V8M: loop: ; CHECK-V8M-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8M-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8M-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8M-NEXT: br i1 [[TMP3]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0:![0-9]+]] ; CHECK-V8M: deopt: ; CHECK-V8M-NEXT: call void @prevent_merging() ; CHECK-V8M-NEXT: ret i32 -1 @@ -161,13 +163,15 @@ ; CHECK-V8A-NEXT: loop.preheader: ; CHECK-V8A-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) ; CHECK-V8A-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 -; CHECK-V8A-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH:%.*]], i32 [[TMP0]]) -; CHECK-V8A-NEXT: [[TMP1:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] +; CHECK-V8A-NEXT: [[TMP1:%.*]] = icmp eq i32 [[LENGTH:%.*]], 0 +; CHECK-V8A-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[LENGTH]]) +; CHECK-V8A-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 0, i32 [[UMIN]] +; CHECK-V8A-NEXT: [[TMP3:%.*]] = icmp ne i32 [[LENGTH]], [[TMP2]] ; CHECK-V8A-NEXT: br label [[LOOP:%.*]] ; CHECK-V8A: loop: ; CHECK-V8A-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8A-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8A-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8A-NEXT: br i1 [[TMP3]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0:![0-9]+]] ; CHECK-V8A: deopt: ; CHECK-V8A-NEXT: call void @prevent_merging() ; CHECK-V8A-NEXT: ret i32 -1 @@ -216,13 +220,15 @@ ; CHECK-V8M-LABEL: @test2( ; CHECK-V8M-NEXT: loop.preheader: ; CHECK-V8M-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 -; CHECK-V8M-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH:%.*]], i32 [[TMP0]]) -; CHECK-V8M-NEXT: [[TMP1:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] +; CHECK-V8M-NEXT: [[TMP1:%.*]] = icmp eq i32 [[LENGTH:%.*]], 0 +; CHECK-V8M-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[LENGTH]]) +; CHECK-V8M-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 0, i32 [[UMIN]] +; CHECK-V8M-NEXT: [[TMP3:%.*]] = icmp ne i32 [[LENGTH]], [[TMP2]] ; CHECK-V8M-NEXT: br label [[LOOP:%.*]] ; CHECK-V8M: loop: ; CHECK-V8M-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8M-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8M-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8M-NEXT: br i1 [[TMP3]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK-V8M: deopt: ; CHECK-V8M-NEXT: call void @prevent_merging() ; CHECK-V8M-NEXT: ret i32 -1 @@ -241,13 +247,15 @@ ; CHECK-V8A-LABEL: @test2( ; CHECK-V8A-NEXT: loop.preheader: ; CHECK-V8A-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 -; CHECK-V8A-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH:%.*]], i32 [[TMP0]]) -; CHECK-V8A-NEXT: [[TMP1:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] +; CHECK-V8A-NEXT: [[TMP1:%.*]] = icmp eq i32 [[LENGTH:%.*]], 0 +; CHECK-V8A-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[LENGTH]]) +; CHECK-V8A-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 0, i32 [[UMIN]] +; CHECK-V8A-NEXT: [[TMP3:%.*]] = icmp ne i32 [[LENGTH]], [[TMP2]] ; CHECK-V8A-NEXT: br label [[LOOP:%.*]] ; CHECK-V8A: loop: ; CHECK-V8A-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8A-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8A-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8A-NEXT: br i1 [[TMP3]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK-V8A: deopt: ; CHECK-V8A-NEXT: call void @prevent_merging() ; CHECK-V8A-NEXT: ret i32 -1 @@ -294,16 +302,17 @@ ; CHECK-V8M-LABEL: @two_range_checks( ; CHECK-V8M-NEXT: loop.preheader: ; CHECK-V8M-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_2:%.*]], i32 [[LENGTH_1:%.*]]) -; CHECK-V8M-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_2]], i32 [[LENGTH_1]]) ; CHECK-V8M-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) ; CHECK-V8M-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 -; CHECK-V8M-NEXT: [[UMIN2:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN1]], i32 [[TMP0]]) -; CHECK-V8M-NEXT: [[TMP1:%.*]] = icmp ne i32 [[UMIN]], [[UMIN2]] +; CHECK-V8M-NEXT: [[TMP1:%.*]] = icmp eq i32 [[UMIN]], 0 +; CHECK-V8M-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[UMIN]]) +; CHECK-V8M-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 0, i32 [[UMIN1]] +; CHECK-V8M-NEXT: [[TMP3:%.*]] = icmp ne i32 [[UMIN]], [[TMP2]] ; CHECK-V8M-NEXT: br label [[LOOP:%.*]] ; CHECK-V8M: loop: ; CHECK-V8M-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8M-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8M-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8M-NEXT: br i1 [[TMP3]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK-V8M: deopt: ; CHECK-V8M-NEXT: call void @prevent_merging() ; CHECK-V8M-NEXT: ret i32 -1 @@ -325,16 +334,17 @@ ; CHECK-V8A-LABEL: @two_range_checks( ; CHECK-V8A-NEXT: loop.preheader: ; CHECK-V8A-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_2:%.*]], i32 [[LENGTH_1:%.*]]) -; CHECK-V8A-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_2]], i32 [[LENGTH_1]]) ; CHECK-V8A-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) ; CHECK-V8A-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 -; CHECK-V8A-NEXT: [[UMIN2:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN1]], i32 [[TMP0]]) -; CHECK-V8A-NEXT: [[TMP1:%.*]] = icmp ne i32 [[UMIN]], [[UMIN2]] +; CHECK-V8A-NEXT: [[TMP1:%.*]] = icmp eq i32 [[UMIN]], 0 +; CHECK-V8A-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[UMIN]]) +; CHECK-V8A-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 0, i32 [[UMIN1]] +; CHECK-V8A-NEXT: [[TMP3:%.*]] = icmp ne i32 [[UMIN]], [[TMP2]] ; CHECK-V8A-NEXT: br label [[LOOP:%.*]] ; CHECK-V8A: loop: ; CHECK-V8A-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8A-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8A-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8A-NEXT: br i1 [[TMP3]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK-V8A: deopt: ; CHECK-V8A-NEXT: call void @prevent_merging() ; CHECK-V8A-NEXT: ret i32 -1 @@ -390,17 +400,17 @@ ; CHECK-V8M-NEXT: loop.preheader: ; CHECK-V8M-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_3:%.*]], i32 [[LENGTH_2:%.*]]) ; CHECK-V8M-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[LENGTH_1:%.*]]) -; CHECK-V8M-NEXT: [[UMIN2:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_3]], i32 [[LENGTH_2]]) -; CHECK-V8M-NEXT: [[UMIN3:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN2]], i32 [[LENGTH_1]]) ; CHECK-V8M-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) ; CHECK-V8M-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 -; CHECK-V8M-NEXT: [[UMIN4:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN3]], i32 [[TMP0]]) -; CHECK-V8M-NEXT: [[TMP1:%.*]] = icmp ne i32 [[UMIN1]], [[UMIN4]] +; CHECK-V8M-NEXT: [[TMP1:%.*]] = icmp eq i32 [[UMIN1]], 0 +; CHECK-V8M-NEXT: [[UMIN2:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[UMIN1]]) +; CHECK-V8M-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 0, i32 [[UMIN2]] +; CHECK-V8M-NEXT: [[TMP3:%.*]] = icmp ne i32 [[UMIN1]], [[TMP2]] ; CHECK-V8M-NEXT: br label [[LOOP:%.*]] ; CHECK-V8M: loop: ; CHECK-V8M-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8M-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8M-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8M-NEXT: br i1 [[TMP3]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK-V8M: deopt: ; CHECK-V8M-NEXT: call void @prevent_merging() ; CHECK-V8M-NEXT: ret i32 -1 @@ -426,17 +436,17 @@ ; CHECK-V8A-NEXT: loop.preheader: ; CHECK-V8A-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_3:%.*]], i32 [[LENGTH_2:%.*]]) ; CHECK-V8A-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[LENGTH_1:%.*]]) -; CHECK-V8A-NEXT: [[UMIN2:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_3]], i32 [[LENGTH_2]]) -; CHECK-V8A-NEXT: [[UMIN3:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN2]], i32 [[LENGTH_1]]) ; CHECK-V8A-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) ; CHECK-V8A-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 -; CHECK-V8A-NEXT: [[UMIN4:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN3]], i32 [[TMP0]]) -; CHECK-V8A-NEXT: [[TMP1:%.*]] = icmp ne i32 [[UMIN1]], [[UMIN4]] +; CHECK-V8A-NEXT: [[TMP1:%.*]] = icmp eq i32 [[UMIN1]], 0 +; CHECK-V8A-NEXT: [[UMIN2:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[UMIN1]]) +; CHECK-V8A-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 0, i32 [[UMIN2]] +; CHECK-V8A-NEXT: [[TMP3:%.*]] = icmp ne i32 [[UMIN1]], [[TMP2]] ; CHECK-V8A-NEXT: br label [[LOOP:%.*]] ; CHECK-V8A: loop: ; CHECK-V8A-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8A-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8A-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8A-NEXT: br i1 [[TMP3]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK-V8A: deopt: ; CHECK-V8A-NEXT: call void @prevent_merging() ; CHECK-V8A-NEXT: ret i32 -1 @@ -499,17 +509,21 @@ define i32 @distinct_checks(i32* %array.1, i32 %length.1, i32* %array.2, i32 %length.2, i32* %array.3, i32 %length.3, i32 %n) #0 { ; CHECK-V8M-LABEL: @distinct_checks( ; CHECK-V8M-NEXT: loop.preheader: -; CHECK-V8M-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_2:%.*]], i32 [[LENGTH_1:%.*]]) ; CHECK-V8M-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) ; CHECK-V8M-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 -; CHECK-V8M-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[TMP0]]) -; CHECK-V8M-NEXT: [[TMP1:%.*]] = icmp ne i32 [[LENGTH_1]], [[UMIN1]] -; CHECK-V8M-NEXT: [[TMP2:%.*]] = icmp ne i32 [[LENGTH_2]], [[UMIN1]] +; CHECK-V8M-NEXT: [[TMP1:%.*]] = icmp eq i32 [[LENGTH_1:%.*]], 0 +; CHECK-V8M-NEXT: [[TMP2:%.*]] = icmp eq i32 [[LENGTH_2:%.*]], 0 +; CHECK-V8M-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i1 true, i1 [[TMP2]] +; CHECK-V8M-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[LENGTH_2]]) +; CHECK-V8M-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[LENGTH_1]]) +; CHECK-V8M-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 0, i32 [[UMIN1]] +; CHECK-V8M-NEXT: [[TMP5:%.*]] = icmp ne i32 [[LENGTH_1]], [[TMP4]] +; CHECK-V8M-NEXT: [[TMP6:%.*]] = icmp ne i32 [[LENGTH_2]], [[TMP4]] ; CHECK-V8M-NEXT: br label [[LOOP:%.*]] ; CHECK-V8M: loop: ; CHECK-V8M-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED1:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8M-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED1]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8M-NEXT: br i1 [[TMP1]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8M-NEXT: br i1 [[TMP5]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK-V8M: deopt: ; CHECK-V8M-NEXT: call void @prevent_merging() ; CHECK-V8M-NEXT: ret i32 -1 @@ -518,7 +532,7 @@ ; CHECK-V8M-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]] ; CHECK-V8M-NEXT: [[ARRAY_1_I:%.*]] = load i32, i32* [[ARRAY_1_I_PTR]], align 4 ; CHECK-V8M-NEXT: [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]] -; CHECK-V8M-NEXT: br i1 [[TMP2]], label [[GUARDED1]], label [[DEOPT2:%.*]], !prof !0 +; CHECK-V8M-NEXT: br i1 [[TMP6]], label [[GUARDED1]], label [[DEOPT2:%.*]], !prof [[PROF0]] ; CHECK-V8M: deopt2: ; CHECK-V8M-NEXT: call void @prevent_merging() ; CHECK-V8M-NEXT: ret i32 -1 @@ -535,17 +549,21 @@ ; ; CHECK-V8A-LABEL: @distinct_checks( ; CHECK-V8A-NEXT: loop.preheader: -; CHECK-V8A-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_2:%.*]], i32 [[LENGTH_1:%.*]]) ; CHECK-V8A-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) ; CHECK-V8A-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 -; CHECK-V8A-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[TMP0]]) -; CHECK-V8A-NEXT: [[TMP1:%.*]] = icmp ne i32 [[LENGTH_1]], [[UMIN1]] -; CHECK-V8A-NEXT: [[TMP2:%.*]] = icmp ne i32 [[LENGTH_2]], [[UMIN1]] +; CHECK-V8A-NEXT: [[TMP1:%.*]] = icmp eq i32 [[LENGTH_1:%.*]], 0 +; CHECK-V8A-NEXT: [[TMP2:%.*]] = icmp eq i32 [[LENGTH_2:%.*]], 0 +; CHECK-V8A-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i1 true, i1 [[TMP2]] +; CHECK-V8A-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[LENGTH_2]]) +; CHECK-V8A-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[LENGTH_1]]) +; CHECK-V8A-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 0, i32 [[UMIN1]] +; CHECK-V8A-NEXT: [[TMP5:%.*]] = icmp ne i32 [[LENGTH_1]], [[TMP4]] +; CHECK-V8A-NEXT: [[TMP6:%.*]] = icmp ne i32 [[LENGTH_2]], [[TMP4]] ; CHECK-V8A-NEXT: br label [[LOOP:%.*]] ; CHECK-V8A: loop: ; CHECK-V8A-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED1:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8A-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED1]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8A-NEXT: br i1 [[TMP1]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8A-NEXT: br i1 [[TMP5]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK-V8A: deopt: ; CHECK-V8A-NEXT: call void @prevent_merging() ; CHECK-V8A-NEXT: ret i32 -1 @@ -554,7 +572,7 @@ ; CHECK-V8A-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]] ; CHECK-V8A-NEXT: [[ARRAY_1_I:%.*]] = load i32, i32* [[ARRAY_1_I_PTR]], align 4 ; CHECK-V8A-NEXT: [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]] -; CHECK-V8A-NEXT: br i1 [[TMP2]], label [[GUARDED1]], label [[DEOPT2:%.*]], !prof !0 +; CHECK-V8A-NEXT: br i1 [[TMP6]], label [[GUARDED1]], label [[DEOPT2:%.*]], !prof [[PROF0]] ; CHECK-V8A: deopt2: ; CHECK-V8A-NEXT: call void @prevent_merging() ; CHECK-V8A-NEXT: ret i32 -1 @@ -612,13 +630,15 @@ ; CHECK-V8M-NEXT: loop.preheader: ; CHECK-V8M-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) ; CHECK-V8M-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 -; CHECK-V8M-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH:%.*]], i32 [[TMP0]]) -; CHECK-V8M-NEXT: [[TMP1:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] +; CHECK-V8M-NEXT: [[TMP1:%.*]] = icmp eq i32 [[LENGTH:%.*]], 0 +; CHECK-V8M-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[LENGTH]]) +; CHECK-V8M-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 0, i32 [[UMIN]] +; CHECK-V8M-NEXT: [[TMP3:%.*]] = icmp ne i32 [[LENGTH]], [[TMP2]] ; CHECK-V8M-NEXT: br label [[LOOP:%.*]] ; CHECK-V8M: loop: ; CHECK-V8M-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED1:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8M-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED1]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8M-NEXT: br i1 [[TMP1]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8M-NEXT: br i1 [[TMP3]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK-V8M: deopt: ; CHECK-V8M-NEXT: call void @prevent_merging() ; CHECK-V8M-NEXT: ret i32 -1 @@ -627,7 +647,7 @@ ; CHECK-V8M-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]] ; CHECK-V8M-NEXT: [[ARRAY_1_I:%.*]] = load i32, i32* [[ARRAY_1_I_PTR]], align 4 ; CHECK-V8M-NEXT: [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]] -; CHECK-V8M-NEXT: br i1 true, label [[GUARDED1]], label [[DEOPT2:%.*]], !prof !0 +; CHECK-V8M-NEXT: br i1 true, label [[GUARDED1]], label [[DEOPT2:%.*]], !prof [[PROF0]] ; CHECK-V8M: deopt2: ; CHECK-V8M-NEXT: call void @prevent_merging() ; CHECK-V8M-NEXT: ret i32 -1 @@ -646,13 +666,15 @@ ; CHECK-V8A-NEXT: loop.preheader: ; CHECK-V8A-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) ; CHECK-V8A-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 -; CHECK-V8A-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH:%.*]], i32 [[TMP0]]) -; CHECK-V8A-NEXT: [[TMP1:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] +; CHECK-V8A-NEXT: [[TMP1:%.*]] = icmp eq i32 [[LENGTH:%.*]], 0 +; CHECK-V8A-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[LENGTH]]) +; CHECK-V8A-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 0, i32 [[UMIN]] +; CHECK-V8A-NEXT: [[TMP3:%.*]] = icmp ne i32 [[LENGTH]], [[TMP2]] ; CHECK-V8A-NEXT: br label [[LOOP:%.*]] ; CHECK-V8A: loop: ; CHECK-V8A-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED1:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8A-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED1]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8A-NEXT: br i1 [[TMP1]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8A-NEXT: br i1 [[TMP3]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK-V8A: deopt: ; CHECK-V8A-NEXT: call void @prevent_merging() ; CHECK-V8A-NEXT: ret i32 -1 @@ -661,7 +683,7 @@ ; CHECK-V8A-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]] ; CHECK-V8A-NEXT: [[ARRAY_1_I:%.*]] = load i32, i32* [[ARRAY_1_I_PTR]], align 4 ; CHECK-V8A-NEXT: [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]] -; CHECK-V8A-NEXT: br i1 true, label [[GUARDED1]], label [[DEOPT2:%.*]], !prof !0 +; CHECK-V8A-NEXT: br i1 true, label [[GUARDED1]], label [[DEOPT2:%.*]], !prof [[PROF0]] ; CHECK-V8A: deopt2: ; CHECK-V8A-NEXT: call void @prevent_merging() ; CHECK-V8A-NEXT: ret i32 -1 @@ -721,17 +743,19 @@ ; CHECK-V8M-LABEL: @different_ivs( ; CHECK-V8M-NEXT: loop.preheader: ; CHECK-V8M-NEXT: [[N64:%.*]] = zext i32 [[N:%.*]] to i64 +; CHECK-V8M-NEXT: [[TMP0:%.*]] = zext i32 [[LENGTH:%.*]] to i64 ; CHECK-V8M-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N64]], i64 1) -; CHECK-V8M-NEXT: [[TMP0:%.*]] = add nsw i64 [[UMAX]], -1 -; CHECK-V8M-NEXT: [[TMP1:%.*]] = zext i32 [[LENGTH:%.*]] to i64 -; CHECK-V8M-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP0]], i64 [[TMP1]]) -; CHECK-V8M-NEXT: [[TMP2:%.*]] = zext i32 [[LENGTH]] to i64 -; CHECK-V8M-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP2]], [[UMIN]] +; CHECK-V8M-NEXT: [[TMP1:%.*]] = add nsw i64 [[UMAX]], -1 +; CHECK-V8M-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP0]], 0 +; CHECK-V8M-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 [[TMP0]]) +; CHECK-V8M-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i64 0, i64 [[UMIN]] +; CHECK-V8M-NEXT: [[TMP4:%.*]] = zext i32 [[LENGTH]] to i64 +; CHECK-V8M-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], [[TMP3]] ; CHECK-V8M-NEXT: br label [[LOOP:%.*]] ; CHECK-V8M: loop: ; CHECK-V8M-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8M-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8M-NEXT: br i1 [[TMP3]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8M-NEXT: br i1 [[TMP5]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK-V8M: deopt: ; CHECK-V8M-NEXT: call void @prevent_merging() ; CHECK-V8M-NEXT: ret i32 -1 @@ -749,17 +773,19 @@ ; CHECK-V8A-LABEL: @different_ivs( ; CHECK-V8A-NEXT: loop.preheader: ; CHECK-V8A-NEXT: [[N64:%.*]] = zext i32 [[N:%.*]] to i64 +; CHECK-V8A-NEXT: [[TMP0:%.*]] = zext i32 [[LENGTH:%.*]] to i64 ; CHECK-V8A-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N64]], i64 1) -; CHECK-V8A-NEXT: [[TMP0:%.*]] = add nsw i64 [[UMAX]], -1 -; CHECK-V8A-NEXT: [[TMP1:%.*]] = zext i32 [[LENGTH:%.*]] to i64 -; CHECK-V8A-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP0]], i64 [[TMP1]]) -; CHECK-V8A-NEXT: [[TMP2:%.*]] = zext i32 [[LENGTH]] to i64 -; CHECK-V8A-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP2]], [[UMIN]] +; CHECK-V8A-NEXT: [[TMP1:%.*]] = add nsw i64 [[UMAX]], -1 +; CHECK-V8A-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP0]], 0 +; CHECK-V8A-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 [[TMP0]]) +; CHECK-V8A-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i64 0, i64 [[UMIN]] +; CHECK-V8A-NEXT: [[TMP4:%.*]] = zext i32 [[LENGTH]] to i64 +; CHECK-V8A-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], [[TMP3]] ; CHECK-V8A-NEXT: br label [[LOOP:%.*]] ; CHECK-V8A: loop: ; CHECK-V8A-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-V8A-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-V8A-NEXT: br i1 [[TMP3]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-V8A-NEXT: br i1 [[TMP5]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK-V8A: deopt: ; CHECK-V8A-NEXT: call void @prevent_merging() ; CHECK-V8A-NEXT: ret i32 -1 Index: llvm/test/Transforms/IndVarSimplify/loop-predication.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/loop-predication.ll +++ llvm/test/Transforms/IndVarSimplify/loop-predication.ll @@ -9,13 +9,15 @@ ; CHECK-NEXT: loop.preheader: ; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 -; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH:%.*]], i32 [[TMP0]]) -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[LENGTH:%.*]], 0 +; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[LENGTH]]) +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 0, i32 [[UMIN]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[LENGTH]], [[TMP2]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[TMP3]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0:![0-9]+]] ; CHECK: deopt: ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 @@ -67,7 +69,7 @@ ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]] -; CHECK-NEXT: br i1 [[WITHIN_BOUNDS]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[WITHIN_BOUNDS]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 @@ -124,7 +126,7 @@ ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: call void @maythrow() ; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]] -; CHECK-NEXT: br i1 [[WITHIN_BOUNDS]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[WITHIN_BOUNDS]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 @@ -175,13 +177,15 @@ ; CHECK-LABEL: @test2( ; CHECK-NEXT: loop.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 -; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH:%.*]], i32 [[TMP0]]) -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[LENGTH:%.*]], 0 +; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[LENGTH]]) +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 0, i32 [[UMIN]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[LENGTH]], [[TMP2]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[TMP3]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 @@ -229,16 +233,17 @@ ; CHECK-LABEL: @two_range_checks( ; CHECK-NEXT: loop.preheader: ; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_2:%.*]], i32 [[LENGTH_1:%.*]]) -; CHECK-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_2]], i32 [[LENGTH_1]]) ; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 -; CHECK-NEXT: [[UMIN2:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN1]], i32 [[TMP0]]) -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[UMIN]], [[UMIN2]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[UMIN]], 0 +; CHECK-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[UMIN]]) +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 0, i32 [[UMIN1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[UMIN]], [[TMP2]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[TMP3]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 @@ -294,17 +299,17 @@ ; CHECK-NEXT: loop.preheader: ; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_3:%.*]], i32 [[LENGTH_2:%.*]]) ; CHECK-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[LENGTH_1:%.*]]) -; CHECK-NEXT: [[UMIN2:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_3]], i32 [[LENGTH_2]]) -; CHECK-NEXT: [[UMIN3:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN2]], i32 [[LENGTH_1]]) ; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 -; CHECK-NEXT: [[UMIN4:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN3]], i32 [[TMP0]]) -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[UMIN1]], [[UMIN4]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[UMIN1]], 0 +; CHECK-NEXT: [[UMIN2:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[UMIN1]]) +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 0, i32 [[UMIN2]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[UMIN1]], [[TMP2]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-NEXT: br i1 [[TMP1]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[TMP3]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 @@ -367,17 +372,21 @@ define i32 @distinct_checks(i32* %array.1, i32 %length.1, i32* %array.2, i32 %length.2, i32* %array.3, i32 %length.3, i32 %n) { ; CHECK-LABEL: @distinct_checks( ; CHECK-NEXT: loop.preheader: -; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_2:%.*]], i32 [[LENGTH_1:%.*]]) ; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 -; CHECK-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[TMP0]]) -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[LENGTH_1]], [[UMIN1]] -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[LENGTH_2]], [[UMIN1]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[LENGTH_1:%.*]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[LENGTH_2:%.*]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i1 true, i1 [[TMP2]] +; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[LENGTH_2]]) +; CHECK-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[LENGTH_1]]) +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 0, i32 [[UMIN1]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i32 [[LENGTH_1]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i32 [[LENGTH_2]], [[TMP4]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED1:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED1]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-NEXT: br i1 [[TMP1]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[TMP5]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 @@ -386,7 +395,7 @@ ; CHECK-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]] ; CHECK-NEXT: [[ARRAY_1_I:%.*]] = load i32, i32* [[ARRAY_1_I_PTR]], align 4 ; CHECK-NEXT: [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]] -; CHECK-NEXT: br i1 [[TMP2]], label [[GUARDED1]], label [[DEOPT2:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[TMP6]], label [[GUARDED1]], label [[DEOPT2:%.*]], !prof [[PROF0]] ; CHECK: deopt2: ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 @@ -444,13 +453,15 @@ ; CHECK-NEXT: loop.preheader: ; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1) ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 -; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH:%.*]], i32 [[TMP0]]) -; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[LENGTH:%.*]], 0 +; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[LENGTH]]) +; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 0, i32 [[UMIN]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[LENGTH]], [[TMP2]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED1:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED1]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-NEXT: br i1 [[TMP1]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[TMP3]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 @@ -459,7 +470,7 @@ ; CHECK-NEXT: [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAY_1:%.*]], i64 [[I_I64]] ; CHECK-NEXT: [[ARRAY_1_I:%.*]] = load i32, i32* [[ARRAY_1_I_PTR]], align 4 ; CHECK-NEXT: [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]] -; CHECK-NEXT: br i1 true, label [[GUARDED1]], label [[DEOPT2:%.*]], !prof !0 +; CHECK-NEXT: br i1 true, label [[GUARDED1]], label [[DEOPT2:%.*]], !prof [[PROF0]] ; CHECK: deopt2: ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 @@ -520,7 +531,7 @@ ; CHECK: loop: ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-NEXT: br i1 false, label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 false, label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 @@ -569,7 +580,7 @@ ; CHECK-NEXT: loop.preheader: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: br i1 false, label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 false, label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 @@ -601,7 +612,7 @@ ; CHECK: loop: ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED:%.*]] ], [ 400, [[LOOP_PREHEADER:%.*]] ] ; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]] -; CHECK-NEXT: br i1 [[WITHIN_BOUNDS]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[WITHIN_BOUNDS]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 @@ -635,17 +646,19 @@ ; CHECK-LABEL: @different_ivs( ; CHECK-NEXT: loop.preheader: ; CHECK-NEXT: [[N64:%.*]] = zext i32 [[N:%.*]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[LENGTH:%.*]] to i64 ; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N64]], i64 1) -; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[UMAX]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[LENGTH:%.*]] to i64 -; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP0]], i64 [[TMP1]]) -; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[LENGTH]] to i64 -; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP2]], [[UMIN]] +; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[UMAX]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP0]], 0 +; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 [[TMP0]]) +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i64 0, i64 [[UMIN]] +; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[LENGTH]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], [[TMP3]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] -; CHECK-NEXT: br i1 [[TMP3]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[TMP5]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 @@ -707,7 +720,7 @@ ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: [[J:%.*]] = phi i32 [ [[J_NEXT:%.*]], [[GUARDED]] ], [ [[J_START]], [[LOOP_PREHEADER]] ] ; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[J]], [[LENGTH]] -; CHECK-NEXT: br i1 [[WITHIN_BOUNDS]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[WITHIN_BOUNDS]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 @@ -771,14 +784,14 @@ ; CHECK-NEXT: [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED2:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ] ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED2]] ], [ 0, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]] -; CHECK-NEXT: br i1 [[WITHIN_BOUNDS]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[WITHIN_BOUNDS]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: ; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC]], [[LOOP]] ] ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 [[RESULT]] ; CHECK: guarded: ; CHECK-NEXT: [[WITHIN_BOUNDS2:%.*]] = icmp ult i32 [[I]], [[LENGTH2:%.*]] -; CHECK-NEXT: br i1 [[WITHIN_BOUNDS2]], label [[GUARDED2]], label [[DEOPT2:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[WITHIN_BOUNDS2]], label [[GUARDED2]], label [[DEOPT2:%.*]], !prof [[PROF0]] ; CHECK: deopt2: ; CHECK-NEXT: call void @prevent_merging() ; CHECK-NEXT: ret i32 -1 Index: llvm/test/Transforms/PhaseOrdering/AArch64/peel-multiple-unreachable-exits-for-vectorization.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/AArch64/peel-multiple-unreachable-exits-for-vectorization.ll +++ llvm/test/Transforms/PhaseOrdering/AArch64/peel-multiple-unreachable-exits-for-vectorization.ll @@ -30,55 +30,59 @@ ; CHECK-NEXT: [[C_PEEL:%.*]] = icmp sgt i64 [[N:%.*]], 0 ; CHECK-NEXT: br i1 [[C_PEEL]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] ; CHECK: loop.preheader: -; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[SUB_I7_PEEL]], i64 [[SUB_I]]) ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 -; CHECK-NEXT: [[UMIN16:%.*]] = call i64 @llvm.umin.i64(i64 [[UMIN]], i64 [[TMP0]]) -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[UMIN16]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 5 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[LOOP_PREHEADER22:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64* [[END_I]], [[START_I]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64* [[END_I4_PEEL]], [[START_I2_PEEL]] +; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], i1 true, i1 [[TMP2]] +; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP0]], i64 [[SUB_I7_PEEL]]) +; CHECK-NEXT: [[UMIN16:%.*]] = call i64 @llvm.umin.i64(i64 [[UMIN]], i64 [[SUB_I]]) +; CHECK-NEXT: [[UMIN16_OP:%.*]] = add i64 [[UMIN16]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i64 1, i64 [[UMIN16_OP]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP4]], 5 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[LOOP_PREHEADER21:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[TMP1]], 3 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i64 4, i64 [[N_MOD_VF]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[TMP4]], 3 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 4, i64 [[N_MOD_VF]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP4]], [[TMP6]] ; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[N_VEC]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> , i64 [[SUM_NEXT_PEEL]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i64> , i64 [[SUM_NEXT_PEEL]], i64 0 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ [[TMP4]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI18:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ [[TMP7]], [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI17:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = or i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, i64* [[START_I]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[TMP5]] to <2 x i64>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP6]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, i64* [[TMP5]], i64 2 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64* [[TMP7]] to <2 x i64>* -; CHECK-NEXT: [[WIDE_LOAD19:%.*]] = load <2 x i64>, <2 x i64>* [[TMP8]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, i64* [[START_I2_PEEL]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64* [[TMP9]] to <2 x i64>* -; CHECK-NEXT: [[WIDE_LOAD20:%.*]] = load <2 x i64>, <2 x i64>* [[TMP10]], align 4 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, i64* [[TMP9]], i64 2 -; CHECK-NEXT: [[TMP12:%.*]] = bitcast i64* [[TMP11]] to <2 x i64>* -; CHECK-NEXT: [[WIDE_LOAD21:%.*]] = load <2 x i64>, <2 x i64>* [[TMP12]], align 4 -; CHECK-NEXT: [[TMP13:%.*]] = add <2 x i64> [[WIDE_LOAD]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP14:%.*]] = add <2 x i64> [[WIDE_LOAD19]], [[VEC_PHI18]] -; CHECK-NEXT: [[TMP15]] = add <2 x i64> [[TMP13]], [[WIDE_LOAD20]] -; CHECK-NEXT: [[TMP16]] = add <2 x i64> [[TMP14]], [[WIDE_LOAD21]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, i64* [[START_I]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64* [[TMP8]] to <2 x i64>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP9]], align 4 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i64, i64* [[TMP8]], i64 2 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64* [[TMP10]] to <2 x i64>* +; CHECK-NEXT: [[WIDE_LOAD18:%.*]] = load <2 x i64>, <2 x i64>* [[TMP11]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i64, i64* [[START_I2_PEEL]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i64* [[TMP12]] to <2 x i64>* +; CHECK-NEXT: [[WIDE_LOAD19:%.*]] = load <2 x i64>, <2 x i64>* [[TMP13]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i64, i64* [[TMP12]], i64 2 +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i64* [[TMP14]] to <2 x i64>* +; CHECK-NEXT: [[WIDE_LOAD20:%.*]] = load <2 x i64>, <2 x i64>* [[TMP15]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = add <2 x i64> [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP17:%.*]] = add <2 x i64> [[WIDE_LOAD18]], [[VEC_PHI17]] +; CHECK-NEXT: [[TMP18]] = add <2 x i64> [[TMP16]], [[WIDE_LOAD19]] +; CHECK-NEXT: [[TMP19]] = add <2 x i64> [[TMP17]], [[WIDE_LOAD20]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[TMP16]], [[TMP15]] -; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]]) -; CHECK-NEXT: br label [[LOOP_PREHEADER22]] +; CHECK-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[TMP19]], [[TMP18]] +; CHECK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]]) +; CHECK-NEXT: br label [[LOOP_PREHEADER21]] ; CHECK: loop.preheader21: ; CHECK-NEXT: [[IV_PH:%.*]] = phi i64 [ 1, [[LOOP_PREHEADER]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[SUM_PH:%.*]] = phi i64 [ [[SUM_NEXT_PEEL]], [[LOOP_PREHEADER]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[SUM_PH:%.*]] = phi i64 [ [[SUM_NEXT_PEEL]], [[LOOP_PREHEADER]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[AT_WITH_INT_CONVERSION_EXIT12:%.*]] ], [ [[IV_PH]], [[LOOP_PREHEADER22]] ] -; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ [[SUM_NEXT:%.*]], [[AT_WITH_INT_CONVERSION_EXIT12]] ], [ [[SUM_PH]], [[LOOP_PREHEADER22]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[AT_WITH_INT_CONVERSION_EXIT12:%.*]] ], [ [[IV_PH]], [[LOOP_PREHEADER21]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ [[SUM_NEXT:%.*]], [[AT_WITH_INT_CONVERSION_EXIT12]] ], [ [[SUM_PH]], [[LOOP_PREHEADER21]] ] ; CHECK-NEXT: [[INRANGE_I:%.*]] = icmp ult i64 [[SUB_I]], [[IV]] ; CHECK-NEXT: br i1 [[INRANGE_I]], label [[ERROR_I:%.*]], label [[AT_WITH_INT_CONVERSION_EXIT:%.*]] ; CHECK: error.i: @@ -154,64 +158,70 @@ ; CHECK-NEXT: [[COND_PEEL:%.*]] = icmp sgt i64 [[N:%.*]], 0 ; CHECK-NEXT: br i1 [[COND_PEEL]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] ; CHECK: loop.preheader: -; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[SUB_I19_PEEL]], i64 [[SUB_I7_PEEL]]) -; CHECK-NEXT: [[UMIN28:%.*]] = call i64 @llvm.umin.i64(i64 [[UMIN]], i64 [[SUB_I]]) ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 -; CHECK-NEXT: [[UMIN29:%.*]] = call i64 @llvm.umin.i64(i64 [[UMIN28]], i64 [[TMP0]]) -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[UMIN29]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 5 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[LOOP_PREHEADER37:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64* [[END_I]], [[START_I]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64* [[END_I4_PEEL]], [[START_I2_PEEL]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64* [[END_I16_PEEL]], [[START_I14_PEEL]] +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], i1 true, i1 [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i1 true, i1 [[TMP3]] +; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP0]], i64 [[SUB_I19_PEEL]]) +; CHECK-NEXT: [[UMIN28:%.*]] = call i64 @llvm.umin.i64(i64 [[UMIN]], i64 [[SUB_I7_PEEL]]) +; CHECK-NEXT: [[UMIN29:%.*]] = call i64 @llvm.umin.i64(i64 [[UMIN28]], i64 [[SUB_I]]) +; CHECK-NEXT: [[UMIN29_OP:%.*]] = add i64 [[UMIN29]], 1 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 1, i64 [[UMIN29_OP]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP6]], 5 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[LOOP_PREHEADER36:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[TMP1]], 3 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP2]], i64 4, i64 [[N_MOD_VF]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[TMP6]], 3 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i64 4, i64 [[N_MOD_VF]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP6]], [[TMP8]] ; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[N_VEC]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> , i64 [[SUM_NEXT_PEEL]], i64 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i64> , i64 [[SUM_NEXT_PEEL]], i64 0 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ [[TMP4]], [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI31:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ [[TMP9]], [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI30:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP27:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = or i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, i64* [[START_I]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i64* [[TMP5]] to <2 x i64>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP6]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, i64* [[TMP5]], i64 2 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64* [[TMP7]] to <2 x i64>* -; CHECK-NEXT: [[WIDE_LOAD32:%.*]] = load <2 x i64>, <2 x i64>* [[TMP8]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, i64* [[START_I2_PEEL]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64* [[TMP9]] to <2 x i64>* -; CHECK-NEXT: [[WIDE_LOAD33:%.*]] = load <2 x i64>, <2 x i64>* [[TMP10]], align 4 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, i64* [[TMP9]], i64 2 -; CHECK-NEXT: [[TMP12:%.*]] = bitcast i64* [[TMP11]] to <2 x i64>* -; CHECK-NEXT: [[WIDE_LOAD34:%.*]] = load <2 x i64>, <2 x i64>* [[TMP12]], align 4 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i64, i64* [[START_I14_PEEL]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64* [[TMP13]] to <2 x i64>* -; CHECK-NEXT: [[WIDE_LOAD35:%.*]] = load <2 x i64>, <2 x i64>* [[TMP14]], align 4 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i64, i64* [[TMP13]], i64 2 -; CHECK-NEXT: [[TMP16:%.*]] = bitcast i64* [[TMP15]] to <2 x i64>* -; CHECK-NEXT: [[WIDE_LOAD36:%.*]] = load <2 x i64>, <2 x i64>* [[TMP16]], align 4 -; CHECK-NEXT: [[TMP17:%.*]] = add <2 x i64> [[WIDE_LOAD]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP18:%.*]] = add <2 x i64> [[WIDE_LOAD32]], [[VEC_PHI31]] -; CHECK-NEXT: [[TMP19:%.*]] = add <2 x i64> [[TMP17]], [[WIDE_LOAD33]] -; CHECK-NEXT: [[TMP20:%.*]] = add <2 x i64> [[TMP18]], [[WIDE_LOAD34]] -; CHECK-NEXT: [[TMP21]] = add <2 x i64> [[TMP19]], [[WIDE_LOAD35]] -; CHECK-NEXT: [[TMP22]] = add <2 x i64> [[TMP20]], [[WIDE_LOAD36]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i64, i64* [[START_I]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64* [[TMP10]] to <2 x i64>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP11]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i64, i64* [[TMP10]], i64 2 +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i64* [[TMP12]] to <2 x i64>* +; CHECK-NEXT: [[WIDE_LOAD31:%.*]] = load <2 x i64>, <2 x i64>* [[TMP13]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i64, i64* [[START_I2_PEEL]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i64* [[TMP14]] to <2 x i64>* +; CHECK-NEXT: [[WIDE_LOAD32:%.*]] = load <2 x i64>, <2 x i64>* [[TMP15]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, i64* [[TMP14]], i64 2 +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i64* [[TMP16]] to <2 x i64>* +; CHECK-NEXT: [[WIDE_LOAD33:%.*]] = load <2 x i64>, <2 x i64>* [[TMP17]], align 4 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i64, i64* [[START_I14_PEEL]], i64 [[OFFSET_IDX]] +; CHECK-NEXT: [[TMP19:%.*]] = bitcast i64* [[TMP18]] to <2 x i64>* +; CHECK-NEXT: [[WIDE_LOAD34:%.*]] = load <2 x i64>, <2 x i64>* [[TMP19]], align 4 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i64, i64* [[TMP18]], i64 2 +; CHECK-NEXT: [[TMP21:%.*]] = bitcast i64* [[TMP20]] to <2 x i64>* +; CHECK-NEXT: [[WIDE_LOAD35:%.*]] = load <2 x i64>, <2 x i64>* [[TMP21]], align 4 +; CHECK-NEXT: [[TMP22:%.*]] = add <2 x i64> [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP23:%.*]] = add <2 x i64> [[WIDE_LOAD31]], [[VEC_PHI30]] +; CHECK-NEXT: [[TMP24:%.*]] = add <2 x i64> [[TMP22]], [[WIDE_LOAD32]] +; CHECK-NEXT: [[TMP25:%.*]] = add <2 x i64> [[TMP23]], [[WIDE_LOAD33]] +; CHECK-NEXT: [[TMP26]] = add <2 x i64> [[TMP24]], [[WIDE_LOAD34]] +; CHECK-NEXT: [[TMP27]] = add <2 x i64> [[TMP25]], [[WIDE_LOAD35]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[TMP22]], [[TMP21]] -; CHECK-NEXT: [[TMP24:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]]) -; CHECK-NEXT: br label [[LOOP_PREHEADER37]] +; CHECK-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[TMP27]], [[TMP26]] +; CHECK-NEXT: [[TMP29:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]]) +; CHECK-NEXT: br label [[LOOP_PREHEADER36]] ; CHECK: loop.preheader36: ; CHECK-NEXT: [[IV_PH:%.*]] = phi i64 [ 1, [[LOOP_PREHEADER]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[SUM_PH:%.*]] = phi i64 [ [[SUM_NEXT_PEEL]], [[LOOP_PREHEADER]] ], [ [[TMP24]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[SUM_PH:%.*]] = phi i64 [ [[SUM_NEXT_PEEL]], [[LOOP_PREHEADER]] ], [ [[TMP29]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[AT_WITH_INT_CONVERSION_EXIT24:%.*]] ], [ [[IV_PH]], [[LOOP_PREHEADER37]] ] -; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ [[SUM_NEXT:%.*]], [[AT_WITH_INT_CONVERSION_EXIT24]] ], [ [[SUM_PH]], [[LOOP_PREHEADER37]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[AT_WITH_INT_CONVERSION_EXIT24:%.*]] ], [ [[IV_PH]], [[LOOP_PREHEADER36]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ [[SUM_NEXT:%.*]], [[AT_WITH_INT_CONVERSION_EXIT24]] ], [ [[SUM_PH]], [[LOOP_PREHEADER36]] ] ; CHECK-NEXT: [[INRANGE_I:%.*]] = icmp ult i64 [[SUB_I]], [[IV]] ; CHECK-NEXT: br i1 [[INRANGE_I]], label [[ERROR_I:%.*]], label [[AT_WITH_INT_CONVERSION_EXIT:%.*]] ; CHECK: error.i: