Index: llvm/lib/Transforms/Scalar/LoopRerollPass.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopRerollPass.cpp +++ llvm/lib/Transforms/Scalar/LoopRerollPass.cpp @@ -989,7 +989,6 @@ LoopIncs.push_back(cast(IVU)); } findRootsRecursive(IV, SmallInstructionSet()); - LoopIncs.push_back(IV); } else { if (!findRootsBase(IV, SmallInstructionSet())) return false; @@ -1069,6 +1068,18 @@ } } + // Allow simple arithmetic operations in def-use chain between the induction + // variable and roots, which are allowed in the findRootsRecursive function. + if (std::abs(Inc) == 1) { + DenseSet V; + collectInLoopUserSet(IV, Exclude, PossibleRedSet, V); + for (auto *I : V) { + if (I != IV && !isSimpleArithmeticOp(I)) + return false; + Uses[I].set(IL_All); + } + } + // Make sure the loop increments are also accounted for. Exclude.clear(); Index: llvm/test/Transforms/LoopReroll/extra_instr.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopReroll/extra_instr.ll @@ -0,0 +1,696 @@ +; RUN: opt -S -loop-reroll %s | FileCheck %s +target triple = "aarch64--linux-gnu" + +declare void @bar(i32) + +define void @rerollable_simple([2 x i32]* nocapture %a) { +entry: + br label %loop + +loop: + +;CHECK-LABEL: @rerollable_simple +;CHECK: loop: +;CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] +;CHECK-NEXT: %scevgep + + ; base instruction + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + + ; NO unrerollable instructions + + ; extra simple arithmetic operations, used by root instructions + %plus20 = add nuw nsw i64 %iv, 20 + %plus10 = add nuw nsw i64 %iv, 10 + + ; root instruction 0 + %ldptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 0 + %value0 = load i32, i32* %ldptr0, align 4 + %stptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 0 + store i32 %value0, i32* %stptr0, align 4 + + ; root instruction 1 + %ldptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 1 + %value1 = load i32, i32* %ldptr1, align 4 + %stptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 1 + store i32 %value1, i32* %stptr1, align 4 + + ; loop-increment and latch + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 5 + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} + +define void @unrerollable_simple([2 x i32]* nocapture %a) { +entry: + br label %loop + +loop: + +;CHECK-LABEL: @unrerollable_simple +;CHECK: loop: +;CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] +;CHECK-NEXT: %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv, i64 0 +;CHECK-NEXT: store i32 999, i32* %stptrx, align 4 + + ; base instruction + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + + ; unrerollable instructions + %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %iv, i64 0 + store i32 999, i32* %stptrx, align 4 + + ; extra simple arithmetic operations, used by root instructions + %plus20 = add nuw nsw i64 %iv, 20 + %plus10 = add nuw nsw i64 %iv, 10 + + ; root instruction 0 + %ldptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 0 + %value0 = load i32, i32* %ldptr0, align 4 + %stptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 0 + store i32 %value0, i32* %stptr0, align 4 + + ; root instruction 1 + %ldptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 1 + %value1 = load i32, i32* %ldptr1, align 4 + %stptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 1 + store i32 %value1, i32* %stptr1, align 4 + + ; loop-increment and latch + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, 5 + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} + +define void @rerollable_inc_sym() { +entry: + br label %loop + +exit: + ret void + +loop: + +;CHECK-LABEL: @rerollable_inc_sym +;CHECK: loop: +;CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +;CHECK-NEXT: %0 = add i32 %iv, 28 +;CHECK-NEXT: %1 = add i32 %iv, 24 + + ; induction variable + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + + ; scale instruction + %iv.mul3 = mul nuw nsw i32 %iv, 3 + + ; extra simple arithmetic operations, used by root instructions + %iv.scaled = add nuw nsw i32 %iv.mul3, 20 + + ; root set 1 + + ; base + %iv.scaled.add4 = add nuw nsw i32 %iv.scaled, 4 + %iv.scaled.add4.div5 = udiv i32 %iv.scaled.add4, 5 + tail call void @bar(i32 %iv.scaled.add4.div5) + ; root 1 + %iv.scaled.add5 = add nuw nsw i32 %iv.scaled, 5 + %iv.scaled.add5.div5 = udiv i32 %iv.scaled.add5, 5 + tail call void @bar(i32 %iv.scaled.add5.div5) + ; root 2 + %iv.scaled.add6 = add nuw nsw i32 %iv.scaled, 6 + %iv.scaled.add6.div5 = udiv i32 %iv.scaled.add6, 5 + tail call void @bar(i32 %iv.scaled.add6.div5) + + ; root set 2 + + ; base + %iv.scaled.add8 = add nuw nsw i32 %iv.scaled, 8 + %iv.scaled.add8.div5 = udiv i32 %iv.scaled.add8, 5 + tail call void @bar(i32 %iv.scaled.add8.div5) + ; root 1 + %iv.scaled.add9 = add nuw nsw i32 %iv.scaled, 9 + %iv.scaled.add9.div5 = udiv i32 %iv.scaled.add9, 5 + tail call void @bar(i32 %iv.scaled.add9.div5) + ; root 2 + %iv.scaled.add10 = add nuw nsw i32 %iv.scaled, 10 + %iv.scaled.add10.div5 = udiv i32 %iv.scaled.add10, 5 + tail call void @bar(i32 %iv.scaled.add10.div5) + + ; latch + %iv.next = add nuw nsw i32 %iv, 1 + %cmp = icmp ult i32 %iv.next, 3 + br i1 %cmp, label %loop, label %exit +} + +define void @unrerollable_inc_sym() { +entry: + br label %loop + +exit: + ret void + +loop: + +;CHECK-LABEL: @unrerollable_inc_sym +;CHECK: loop: +;CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +;CHECK-NEXT: %iv.mul3 = mul nuw nsw i32 %iv, 3 +;CHECK-NEXT: %iv.scaled = add nuw nsw i32 %iv.mul3, 20 +;CHECK-NEXT: %iv.mul7 = mul nuw nsw i32 %iv, 7 +;CHECK-NEXT: tail call void @bar(i32 %iv.mul7) + + ; induction variable + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + + ; scale instruction + %iv.mul3 = mul nuw nsw i32 %iv, 3 + + ; extra simple arithmetic operations, used by root instructions + %iv.scaled = add nuw nsw i32 %iv.mul3, 20 + + ; unrerollable instructions + %iv.mul7 = mul nuw nsw i32 %iv, 7 + tail call void @bar(i32 %iv.mul7) + + ; root set 1 + + ; base + %iv.scaled.add4 = add nuw nsw i32 %iv.scaled, 4 + %iv.scaled.add4.div5 = udiv i32 %iv.scaled.add4, 5 + tail call void @bar(i32 %iv.scaled.add4.div5) + ; root 1 + %iv.scaled.add5 = add nuw nsw i32 %iv.scaled, 5 + %iv.scaled.add5.div5 = udiv i32 %iv.scaled.add5, 5 + tail call void @bar(i32 %iv.scaled.add5.div5) + ; root 2 + %iv.scaled.add6 = add nuw nsw i32 %iv.scaled, 6 + %iv.scaled.add6.div5 = udiv i32 %iv.scaled.add6, 5 + tail call void @bar(i32 %iv.scaled.add6.div5) + + ; root set 2 + + ; base + %iv.scaled.add8 = add nuw nsw i32 %iv.scaled, 8 + %iv.scaled.add8.div5 = udiv i32 %iv.scaled.add8, 5 + tail call void @bar(i32 %iv.scaled.add8.div5) + ; root 1 + %iv.scaled.add9 = add nuw nsw i32 %iv.scaled, 9 + %iv.scaled.add9.div5 = udiv i32 %iv.scaled.add9, 5 + tail call void @bar(i32 %iv.scaled.add9.div5) + ; root 2 + %iv.scaled.add10 = add nuw nsw i32 %iv.scaled, 10 + %iv.scaled.add10.div5 = udiv i32 %iv.scaled.add10, 5 + tail call void @bar(i32 %iv.scaled.add10.div5) + + ; latch + %iv.next = add nuw nsw i32 %iv, 1 + %cmp = icmp ult i32 %iv.next, 3 + br i1 %cmp, label %loop, label %exit +} + +define void @rerollable_inc_asym() { +entry: + br label %loop + +exit: + ret void + +loop: + +;CHECK-LABEL: @rerollable_inc_asym +;CHECK: loop: +;CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +;CHECK-NEXT: %0 = add i32 %iv, 24 +;CHECK-NEXT: %1 = add i32 %iv, 20 + + ; induction variable + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + + ; scale instruction + %iv.mul3 = mul nuw nsw i32 %iv, 3 + + ; extra simple arithmetic operations, used by root instructions + %iv.scaled = add nuw nsw i32 %iv.mul3, 20 + + ; root set 1 + + ; base + %iv.scaled.div5 = udiv i32 %iv.scaled, 5 + tail call void @bar(i32 %iv.scaled.div5) + ; root 1 + %iv.scaled.add1 = add nuw nsw i32 %iv.scaled, 1 + %iv.scaled.add1.div5 = udiv i32 %iv.scaled.add1, 5 + tail call void @bar(i32 %iv.scaled.add1.div5) + ; root 2 + %iv.scaled.add2 = add nuw nsw i32 %iv.scaled, 2 + %iv.scaled.add2.div5 = udiv i32 %iv.scaled.add2, 5 + tail call void @bar(i32 %iv.scaled.add2.div5) + + ; root set 2 + + ; base + %iv.scaled.add4 = add nuw nsw i32 %iv.scaled, 4 + %iv.scaled.add4.div5 = udiv i32 %iv.scaled.add4, 5 + tail call void @bar(i32 %iv.scaled.add4.div5) + ; root 1 + %iv.scaled.add5 = add nuw nsw i32 %iv.scaled, 5 + %iv.scaled.add5.div5 = udiv i32 %iv.scaled.add5, 5 + tail call void @bar(i32 %iv.scaled.add5.div5) + ; root 2 + %iv.scaled.add6 = add nuw nsw i32 %iv.scaled, 6 + %iv.scaled.add6.div5 = udiv i32 %iv.scaled.add6, 5 + tail call void @bar(i32 %iv.scaled.add6.div5) + + ; latch + %iv.next = add nuw nsw i32 %iv, 1 + %cmp = icmp ult i32 %iv.next, 3 + br i1 %cmp, label %loop, label %exit +} + +define void @unrerollable_inc_asym() { +entry: + br label %loop + +exit: + ret void + +loop: + +;CHECK-LABEL: @unrerollable_inc_asym +;CHECK: loop: +;CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +;CHECK-NEXT: %iv.mul3 = mul nuw nsw i32 %iv, 3 +;CHECK-NEXT: %iv.scaled = add nuw nsw i32 %iv.mul3, 20 +;CHECK-NEXT: %iv.mul7 = mul nuw nsw i32 %iv, 7 +;CHECK-NEXT: tail call void @bar(i32 %iv.mul7) + + ; induction variable + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + + ; scale instruction + %iv.mul3 = mul nuw nsw i32 %iv, 3 + + ; extra simple arithmetic operations, used by root instructions + %iv.scaled = add nuw nsw i32 %iv.mul3, 20 + + ; unrerollable instructions + %iv.mul7 = mul nuw nsw i32 %iv, 7 + tail call void @bar(i32 %iv.mul7) + + ; root set 1 + + ; base + %iv.scaled.div5 = udiv i32 %iv.scaled, 5 + tail call void @bar(i32 %iv.scaled.div5) + ; root 1 + %iv.scaled.add1 = add nuw nsw i32 %iv.scaled, 1 + %iv.scaled.add1.div5 = udiv i32 %iv.scaled.add1, 5 + tail call void @bar(i32 %iv.scaled.add1.div5) + ; root 2 + %iv.scaled.add2 = add nuw nsw i32 %iv.scaled, 2 + %iv.scaled.add2.div5 = udiv i32 %iv.scaled.add2, 5 + tail call void @bar(i32 %iv.scaled.add2.div5) + + ; root set 2 + + ; base + %iv.scaled.add4 = add nuw nsw i32 %iv.scaled, 4 + %iv.scaled.add4.div5 = udiv i32 %iv.scaled.add4, 5 + tail call void @bar(i32 %iv.scaled.add4.div5) + ; root 1 + %iv.scaled.add5 = add nuw nsw i32 %iv.scaled, 5 + %iv.scaled.add5.div5 = udiv i32 %iv.scaled.add5, 5 + tail call void @bar(i32 %iv.scaled.add5.div5) + ; root 2 + %iv.scaled.add6 = add nuw nsw i32 %iv.scaled, 6 + %iv.scaled.add6.div5 = udiv i32 %iv.scaled.add6, 5 + tail call void @bar(i32 %iv.scaled.add6.div5) + + ; latch + %iv.next = add nuw nsw i32 %iv, 1 + %cmp = icmp ult i32 %iv.next, 3 + br i1 %cmp, label %loop, label %exit +} + +define void @rerollable_dec_sym() { +entry: + br label %loop + +exit: + ret void + +loop: + +;CHECK-LABEL: @rerollable_dec_sym +;CHECK: loop: +;CHECK-NEXT: %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] +;CHECK-NEXT: %0 = mul nsw i32 %indvar, -1 +;CHECK-NEXT: %1 = add i32 %0, 336 + + ; induction variable + %iv = phi i32 [ 108, %entry ], [ %iv.next, %loop ] + + ; scale instruction + %iv.mul3 = mul nuw nsw i32 %iv, 3 + + ; extra simple arithmetic operations, used by root instructions + %iv.scaled = add nuw nsw i32 %iv.mul3, 20 + + ; root set 1 + + ; base + %iv.scaled.sub4 = add nuw nsw i32 %iv.scaled, -4 + %iv.scaled.sub4.div5 = udiv i32 %iv.scaled.sub4, 5 + tail call void @bar(i32 %iv.scaled.sub4.div5) + ; root 1 + %iv.scaled.sub5 = add nuw nsw i32 %iv.scaled, -5 + %iv.scaled.sub5.div5 = udiv i32 %iv.scaled.sub5, 5 + tail call void @bar(i32 %iv.scaled.sub5.div5) + ; root 2 + %iv.scaled.sub6 = add nuw nsw i32 %iv.scaled, -6 + %iv.scaled.sub6.div5 = udiv i32 %iv.scaled.sub6, 5 + tail call void @bar(i32 %iv.scaled.sub6.div5) + + ; root set 2 + + ; base + %iv.scaled.sub8 = add nuw nsw i32 %iv.scaled, -8 + %iv.scaled.sub8.div5 = udiv i32 %iv.scaled.sub8, 5 + tail call void @bar(i32 %iv.scaled.sub8.div5) + ; root 1 + %iv.scaled.sub9 = add nuw nsw i32 %iv.scaled, -9 + %iv.scaled.sub9.div5 = udiv i32 %iv.scaled.sub9, 5 + tail call void @bar(i32 %iv.scaled.sub9.div5) + ; root 2 + %iv.scaled.sub10 = add nuw nsw i32 %iv.scaled, -10 + %iv.scaled.sub10.div5 = udiv i32 %iv.scaled.sub10, 5 + tail call void @bar(i32 %iv.scaled.sub10.div5) + + ; latch + %iv.next = add nuw nsw i32 %iv, -1 + %cmp = icmp uge i32 %iv.next, 100 + br i1 %cmp, label %loop, label %exit +} + +define void @unrerollable_dec_sym() { +entry: + br label %loop + +exit: + ret void + +loop: + +;CHECK-LABEL: @unrerollable_dec_sym +;CHECK: loop: +;CHECK-NEXT: %iv = phi i32 [ 108, %entry ], [ %iv.next, %loop ] +;CHECK-NEXT: %iv.mul3 = mul nuw nsw i32 %iv, 3 +;CHECK-NEXT: %iv.scaled = add nuw nsw i32 %iv.mul3, 20 +;CHECK-NEXT: %iv.mul7 = mul nuw nsw i32 %iv, 7 +;CHECK-NEXT: tail call void @bar(i32 %iv.mul7) + + ; induction variable + %iv = phi i32 [ 108, %entry ], [ %iv.next, %loop ] + + ; scale instruction + %iv.mul3 = mul nuw nsw i32 %iv, 3 + + ; extra simple arithmetic operations, used by root instructions + %iv.scaled = add nuw nsw i32 %iv.mul3, 20 + + ; unrerollable instructions + %iv.mul7 = mul nuw nsw i32 %iv, 7 + tail call void @bar(i32 %iv.mul7) + + ; root set 1 + + ; base + %iv.scaled.sub4 = add nuw nsw i32 %iv.scaled, -4 + %iv.scaled.sub4.div5 = udiv i32 %iv.scaled.sub4, 5 + tail call void @bar(i32 %iv.scaled.sub4.div5) + ; root 1 + %iv.scaled.sub5 = add nuw nsw i32 %iv.scaled, -5 + %iv.scaled.sub5.div5 = udiv i32 %iv.scaled.sub5, 5 + tail call void @bar(i32 %iv.scaled.sub5.div5) + ; root 2 + %iv.scaled.sub6 = add nuw nsw i32 %iv.scaled, -6 + %iv.scaled.sub6.div5 = udiv i32 %iv.scaled.sub6, 5 + tail call void @bar(i32 %iv.scaled.sub6.div5) + + ; root set 2 + + ; base + %iv.scaled.sub8 = add nuw nsw i32 %iv.scaled, -8 + %iv.scaled.sub8.div5 = udiv i32 %iv.scaled.sub8, 5 + tail call void @bar(i32 %iv.scaled.sub8.div5) + ; root 1 + %iv.scaled.sub9 = add nuw nsw i32 %iv.scaled, -9 + %iv.scaled.sub9.div5 = udiv i32 %iv.scaled.sub9, 5 + tail call void @bar(i32 %iv.scaled.sub9.div5) + ; root 2 + %iv.scaled.sub10 = add nuw nsw i32 %iv.scaled, -10 + %iv.scaled.sub10.div5 = udiv i32 %iv.scaled.sub10, 5 + tail call void @bar(i32 %iv.scaled.sub10.div5) + + ; latch + %iv.next = add nuw nsw i32 %iv, -1 + %cmp = icmp uge i32 %iv.next, 100 + br i1 %cmp, label %loop, label %exit +} + +define void @rerollable_dec_asym() { +entry: + br label %loop + +exit: + ret void + +loop: + +;CHECK-LABEL: @rerollable_dec_asym +;CHECK: loop: +;CHECK-NEXT: %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] +;CHECK-NEXT: %0 = mul nsw i32 %indvar, -1 +;CHECK-NEXT: %1 = add i32 %0, 340 + + ; induction variable + %iv = phi i32 [ 108, %entry ], [ %iv.next, %loop ] + + ; scale instruction + %iv.mul3 = mul nuw nsw i32 %iv, 3 + + ; extra simple arithmetic operations, used by root instructions + %iv.scaled = add nuw nsw i32 %iv.mul3, 20 + + ; root set 1 + + ; base + %iv.scaled.div5 = udiv i32 %iv.scaled, 5 + tail call void @bar(i32 %iv.scaled.div5) + ; root 1 + %iv.scaled.sub1 = add nuw nsw i32 %iv.scaled, -1 + %iv.scaled.sub1.div5 = udiv i32 %iv.scaled.sub1, 5 + tail call void @bar(i32 %iv.scaled.sub1.div5) + ; root 2 + %iv.scaled.sub2 = add nuw nsw i32 %iv.scaled, -2 + %iv.scaled.sub2.div5 = udiv i32 %iv.scaled.sub2, 5 + tail call void @bar(i32 %iv.scaled.sub2.div5) + + ; root set 2 + + ; base + %iv.scaled.sub4 = add nuw nsw i32 %iv.scaled, -4 + %iv.scaled.sub4.div5 = udiv i32 %iv.scaled.sub4, 5 + tail call void @bar(i32 %iv.scaled.sub4.div5) + ; root 1 + %iv.scaled.sub5 = add nuw nsw i32 %iv.scaled, -5 + %iv.scaled.sub5.div5 = udiv i32 %iv.scaled.sub5, 5 + tail call void @bar(i32 %iv.scaled.sub5.div5) + ; root 2 + %iv.scaled.sub6 = add nuw nsw i32 %iv.scaled, -6 + %iv.scaled.sub6.div5 = udiv i32 %iv.scaled.sub6, 5 + tail call void @bar(i32 %iv.scaled.sub6.div5) + + ; latch + %iv.next = add nuw nsw i32 %iv, -1 + %cmp = icmp uge i32 %iv.next, 100 + br i1 %cmp, label %loop, label %exit +} + +define void @unrerollable_dec_asym() { +entry: + br label %loop + +exit: + ret void + +loop: + +;CHECK-LABEL: @unrerollable_dec_asym +;CHECK: loop: +;CHECK-NEXT: %iv = phi i32 [ 108, %entry ], [ %iv.next, %loop ] +;CHECK-NEXT: %iv.mul3 = mul nuw nsw i32 %iv, 3 +;CHECK-NEXT: %iv.scaled = add nuw nsw i32 %iv.mul3, 20 +;CHECK-NEXT: %iv.mul7 = mul nuw nsw i32 %iv, 7 +;CHECK-NEXT: tail call void @bar(i32 %iv.mul7) + + ; induction variable + %iv = phi i32 [ 108, %entry ], [ %iv.next, %loop ] + + ; scale instruction + %iv.mul3 = mul nuw nsw i32 %iv, 3 + + ; extra simple arithmetic operations, used by root instructions + %iv.scaled = add nuw nsw i32 %iv.mul3, 20 + + ; unrerollable instructions + %iv.mul7 = mul nuw nsw i32 %iv, 7 + tail call void @bar(i32 %iv.mul7) + + ; root set 1 + + ; base + %iv.scaled.div5 = udiv i32 %iv.scaled, 5 + tail call void @bar(i32 %iv.scaled.div5) + ; root 1 + %iv.scaled.sub1 = add nuw nsw i32 %iv.scaled, -1 + %iv.scaled.sub1.div5 = udiv i32 %iv.scaled.sub1, 5 + tail call void @bar(i32 %iv.scaled.sub1.div5) + ; root 2 + %iv.scaled.sub2 = add nuw nsw i32 %iv.scaled, -2 + %iv.scaled.sub2.div5 = udiv i32 %iv.scaled.sub2, 5 + tail call void @bar(i32 %iv.scaled.sub2.div5) + + ; root set 2 + + ; base + %iv.scaled.sub4 = add nuw nsw i32 %iv.scaled, -4 + %iv.scaled.sub4.div5 = udiv i32 %iv.scaled.sub4, 5 + tail call void @bar(i32 %iv.scaled.sub4.div5) + ; root 1 + %iv.scaled.sub5 = add nuw nsw i32 %iv.scaled, -5 + %iv.scaled.sub5.div5 = udiv i32 %iv.scaled.sub5, 5 + tail call void @bar(i32 %iv.scaled.sub5.div5) + ; root 2 + %iv.scaled.sub6 = add nuw nsw i32 %iv.scaled, -6 + %iv.scaled.sub6.div5 = udiv i32 %iv.scaled.sub6, 5 + tail call void @bar(i32 %iv.scaled.sub6.div5) + + ; latch + %iv.next = add nuw nsw i32 %iv, -1 + %cmp = icmp uge i32 %iv.next, 100 + br i1 %cmp, label %loop, label %exit +} + +define void @rerollable_inc3_sym() { +entry: + br label %loop + +exit: + ret void + +loop: + +;CHECK-LABEL: @rerollable_inc3_sym +;CHECK: loop: +;CHECK-NEXT: %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] +;CHECK-NEXT: %0 = add i32 %indvar, 8 +;CHECK-NEXT: %1 = add i32 %indvar, 4 + + ; induction variable + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + + ; root set 1 + + ; base + %iv.add4 = add nuw nsw i32 %iv, 4 + %iv.add4.div5 = udiv i32 %iv.add4, 5 + tail call void @bar(i32 %iv.add4.div5) + ; root 1 + %iv.add5 = add nuw nsw i32 %iv, 5 + %iv.add5.div5 = udiv i32 %iv.add5, 5 + tail call void @bar(i32 %iv.add5.div5) + ; root 2 + %iv.add6 = add nuw nsw i32 %iv, 6 + %iv.add6.div5 = udiv i32 %iv.add6, 5 + tail call void @bar(i32 %iv.add6.div5) + + ; root set 2 + + ; base + %iv.add8 = add nuw nsw i32 %iv, 8 + %iv.add8.div5 = udiv i32 %iv.add8, 5 + tail call void @bar(i32 %iv.add8.div5) + ; root 1 + %iv.add9 = add nuw nsw i32 %iv, 9 + %iv.add9.div5 = udiv i32 %iv.add9, 5 + tail call void @bar(i32 %iv.add9.div5) + ; root 2 + %iv.add10 = add nuw nsw i32 %iv, 10 + %iv.add10.div5 = udiv i32 %iv.add10, 5 + tail call void @bar(i32 %iv.add10.div5) + + ; latch + %iv.next = add nuw nsw i32 %iv, 3 + %cmp = icmp ult i32 %iv.next, 9 + br i1 %cmp, label %loop, label %exit +} + +define void @rerollable_inc3_asym() { +entry: + br label %loop + +exit: + ret void + +loop: + +;CHECK-LABEL: @rerollable_inc3_asym +;CHECK: loop: +;CHECK-NEXT: %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] +;CHECK-NEXT: %0 = add i32 %indvar, 4 +;CHECK-NEXT: %iv.div5 = udiv i32 %indvar, 5 + + ; induction variable + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + + ; root set 1 + + ; base + %iv.div5 = udiv i32 %iv, 5 + tail call void @bar(i32 %iv.div5) + ; root 1 + %iv.add1 = add nuw nsw i32 %iv, 1 + %iv.add1.div5 = udiv i32 %iv.add1, 5 + tail call void @bar(i32 %iv.add1.div5) + ; root 2 + %iv.add2 = add nuw nsw i32 %iv, 2 + %iv.add2.div5 = udiv i32 %iv.add2, 5 + tail call void @bar(i32 %iv.add2.div5) + + ; root set 2 + + ; base + %iv.add4 = add nuw nsw i32 %iv, 4 + %iv.add4.div5 = udiv i32 %iv.add4, 5 + tail call void @bar(i32 %iv.add4.div5) + ; root 1 + %iv.add5 = add nuw nsw i32 %iv, 5 + %iv.add5.div5 = udiv i32 %iv.add5, 5 + tail call void @bar(i32 %iv.add5.div5) + ; root 2 + %iv.add6 = add nuw nsw i32 %iv, 6 + %iv.add6.div5 = udiv i32 %iv.add6, 5 + tail call void @bar(i32 %iv.add6.div5) + + ; latch + %iv.next = add nuw nsw i32 %iv, 3 + %cmp = icmp ult i32 %iv.next, 9 + br i1 %cmp, label %loop, label %exit +}