Index: llvm/lib/Transforms/Scalar/LoopRerollPass.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopRerollPass.cpp +++ llvm/lib/Transforms/Scalar/LoopRerollPass.cpp @@ -1080,6 +1080,12 @@ DenseSet V; collectInLoopUserSet(LoopIncs, Exclude, PossibleRedSet, V); for (auto *I : V) { + if (I->mayHaveSideEffects()) { + LLVM_DEBUG(dbgs() << "LRR: Aborting - An instruction which does not " + << "belongs to any root sets may have side effects: " + << *I); + return false; + } Uses[I].set(IL_All); } Index: llvm/test/Transforms/LoopReroll/extra_instr.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopReroll/extra_instr.ll @@ -0,0 +1,135 @@ +; RUN: opt -S -loop-reroll %s | FileCheck %s +target triple = "aarch64--linux-gnu" + +define void @rerollable1([2 x i32]* nocapture %a) { +entry: + br label %loop + +loop: + +; CHECK-LABEL: loop: +; CHECK-NEXT: %indvars = phi i64 [ 0, %entry ], [ %indvars.next, %loop ] +; CHECK-NEXT: %scevgep + + ; base instruction + %indvars = phi i64 [ 0, %entry ], [ %indvars.next, %loop ] + + ; NO unrerollable instructions + + ; extra simple arithmetic operations, used by root instructions + %plus20 = add nuw nsw i64 %indvars, 20 + %plus10 = add nuw nsw i64 %indvars, 10 + + ; root instruction 0 + %ldptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 0 + %value0 = load i32, i32* %ldptr0, align 4 + %stptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 0 + store i32 %value0, i32* %stptr0, align 4 + + ; root instruction 1 + %ldptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 1 + %value1 = load i32, i32* %ldptr1, align 4 + %stptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 1 + store i32 %value1, i32* %stptr1, align 4 + + ; loop-increment + %indvars.next = add nuw nsw i64 %indvars, 1 + + ; latch + %exitcond = icmp eq i64 %indvars.next, 5 + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} + +define void @unrerollable1([2 x i32]* nocapture %a) { +entry: + br label %loop + +loop: + +; CHECK-LABEL: loop: +; CHECK-NEXT: %indvars = phi i64 [ 0, %entry ], [ %indvars.next, %loop ] +; CHECK-NEXT: %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %indvars, i64 0 +; CHECK-NEXT: store i32 999, i32* %stptrx, align 4 + + ; base instruction + %indvars = phi i64 [ 0, %entry ], [ %indvars.next, %loop ] + + ; unrerollable instructions using %indvars + %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %indvars, i64 0 + store i32 999, i32* %stptrx, align 4 + + ; extra simple arithmetic operations, used by root instructions + %plus20 = add nuw nsw i64 %indvars, 20 + %plus10 = add nuw nsw i64 %indvars, 10 + + ; root instruction 0 + %ldptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 0 + %value0 = load i32, i32* %ldptr0, align 4 + %stptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 0 + store i32 %value0, i32* %stptr0, align 4 + + ; root instruction 1 + %ldptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 1 + %value1 = load i32, i32* %ldptr1, align 4 + %stptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 1 + store i32 %value1, i32* %stptr1, align 4 + + ; loop-increment + %indvars.next = add nuw nsw i64 %indvars, 1 + + ; latch + %exitcond = icmp eq i64 %indvars.next, 5 + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} + +define void @unrerollable2([2 x i32]* nocapture %a) { +entry: + br label %loop + +loop: + +; CHECK-LABEL: loop: +; CHECK-NEXT: %indvars = phi i64 [ 0, %entry ], [ %indvars.next, %loop ] +; CHECK-NEXT: %indvars.next = add nuw nsw i64 %indvars, 1 +; CHECK-NEXT: %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %indvars.next, i64 0 +; CHECK-NEXT: store i32 999, i32* %stptrx, align 4 + + ; base instruction + %indvars = phi i64 [ 0, %entry ], [ %indvars.next, %loop ] + + ; loop-increment + %indvars.next = add nuw nsw i64 %indvars, 1 + + ; unrerollable instructions using %indvars.next + %stptrx = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %indvars.next, i64 0 + store i32 999, i32* %stptrx, align 4 + + ; extra simple arithmetic operations, used by root instructions + %plus20 = add nuw nsw i64 %indvars, 20 + %plus10 = add nuw nsw i64 %indvars, 10 + + ; root instruction 0 + %ldptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 0 + %value0 = load i32, i32* %ldptr0, align 4 + %stptr0 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 0 + store i32 %value0, i32* %stptr0, align 4 + + ; root instruction 1 + %ldptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus20, i64 1 + %value1 = load i32, i32* %ldptr1, align 4 + %stptr1 = getelementptr inbounds [2 x i32], [2 x i32]* %a, i64 %plus10, i64 1 + store i32 %value1, i32* %stptr1, align 4 + + ; latch + %exitcond = icmp eq i64 %indvars.next, 5 + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +}