diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -745,6 +745,21 @@ return (uint64_t)(LoopSize - UP.BEInsns) * UP.Count + UP.BEInsns; } +/// Helper struct to reset the peel count to zero, if the loops actual peel +/// count of the loop has not been computed yet. Without computing the peel +/// count, a peel count could be set for loops that do not support peeling via +/// TTI or -unroll-peel-count. +struct PeelCountReset { + bool PeelCountComputed = false; + TargetTransformInfo::UnrollingPreferences &UP; + + PeelCountReset(TargetTransformInfo::UnrollingPreferences &UP) : UP(UP) {} + + ~PeelCountReset() { + if (!PeelCountComputed) + UP.PeelCount = 0; + } +}; // Returns true if unroll count was set explicitly. // Calculates unroll count and writes it to UP.Count. // Unless IgnoreUser is true, will also use metadata and command-line options @@ -760,6 +775,7 @@ bool MaxOrZero, unsigned &TripMultiple, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound) { + PeelCountReset PCR(UP); // Check for explicit Count. // 1st priority is unroll count set by "unroll-count" option. bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0; @@ -838,6 +854,8 @@ UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount); TripCount = FullUnrollTripCount; TripMultiple = UP.UpperBound ? 1 : TripMultiple; + UP.PeelCount = 0; // Make sure we do not try to peel, if + // -unroll-peel-count or TTI set PeelCount. return ExplicitUnroll; } else { // The loop isn't that small, but we still can fully unroll it if that @@ -852,6 +870,8 @@ UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount); TripCount = FullUnrollTripCount; TripMultiple = UP.UpperBound ? 1 : TripMultiple; + UP.PeelCount = 0; // Make sure we do not try to peel, if + // -unroll-peel-count or TTI set PeelCount. return ExplicitUnroll; } } @@ -860,6 +880,7 @@ // 4th priority is loop peeling. computePeelCount(L, LoopSize, UP, TripCount, SE); + PCR.PeelCountComputed = true; if (UP.PeelCount) { UP.Runtime = false; UP.Count = 1; diff --git a/llvm/test/Transforms/LoopUnroll/pr33437.ll b/llvm/test/Transforms/LoopUnroll/pr33437.ll --- a/llvm/test/Transforms/LoopUnroll/pr33437.ll +++ b/llvm/test/Transforms/LoopUnroll/pr33437.ll @@ -33,39 +33,24 @@ ; CHECK-LABEL: @tinky2( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[NEXT:%.*]] -; CHECK: loopexit.loopexit: -; CHECK-NEXT: br label [[LOOPEXIT:%.*]] ; CHECK: loopexit: ; CHECK-NEXT: ret void ; CHECK: next: -; CHECK-NEXT: br label [[LOOP_PEEL_BEGIN:%.*]] -; CHECK: loop.peel.begin: -; CHECK-NEXT: br label [[LOOP_PEEL:%.*]] -; CHECK: loop.peel: -; CHECK-NEXT: [[CALL593_PEEL:%.*]] = tail call zeroext i8 @patatino() -; CHECK-NEXT: [[B_NEXT_PEEL:%.*]] = add i32 0, 1 -; CHECK-NEXT: [[COND_PEEL:%.*]] = icmp ne i32 0, 30 -; CHECK-NEXT: br i1 [[COND_PEEL]], label [[LOOP_PEEL_NEXT:%.*]], label [[LOOPEXIT]] -; CHECK: loop.peel.next: -; CHECK-NEXT: br label [[LOOP_PEEL_NEXT1:%.*]] -; CHECK: loop.peel.next1: -; CHECK-NEXT: br label [[NEXT_PEEL_NEWPH:%.*]] -; CHECK: next.peel.newph: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[B:%.*]] = phi i32 [ [[B_NEXT_PEEL]], [[NEXT_PEEL_NEWPH]] ], [ [[B_NEXT_3:%.*]], [[LOOP_2:%.*]] ] +; CHECK-NEXT: [[B:%.*]] = phi i32 [ 0, [[NEXT]] ], [ [[B_NEXT_3:%.*]], [[LOOP_3:%.*]] ] ; CHECK-NEXT: [[CALL593:%.*]] = tail call zeroext i8 @patatino() ; CHECK-NEXT: [[B_NEXT:%.*]] = add nuw nsw i32 [[B]], 1 ; CHECK-NEXT: [[CALL593_1:%.*]] = tail call zeroext i8 @patatino() ; CHECK-NEXT: [[B_NEXT_1:%.*]] = add nuw nsw i32 [[B_NEXT]], 1 -; CHECK-NEXT: [[COND_1:%.*]] = icmp ne i32 [[B_NEXT]], 30 -; CHECK-NEXT: br i1 [[COND_1]], label [[LOOP_2]], label [[LOOPEXIT_LOOPEXIT:%.*]], !llvm.loop !0 -; CHECK: loop.2: ; CHECK-NEXT: [[CALL593_2:%.*]] = tail call zeroext i8 @patatino() ; CHECK-NEXT: [[B_NEXT_2:%.*]] = add nuw nsw i32 [[B_NEXT_1]], 1 +; CHECK-NEXT: [[COND_2:%.*]] = icmp ne i32 [[B_NEXT_1]], 30 +; CHECK-NEXT: br i1 [[COND_2]], label [[LOOP_3]], label [[LOOPEXIT:%.*]] +; CHECK: loop.3: ; CHECK-NEXT: [[CALL593_3:%.*]] = tail call zeroext i8 @patatino() ; CHECK-NEXT: [[B_NEXT_3]] = add nuw nsw i32 [[B_NEXT_2]], 1 -; CHECK-NEXT: br label [[LOOP]], !llvm.loop !2 +; CHECK-NEXT: br label [[LOOP]], !llvm.loop !0 ; entry: br label %next diff --git a/llvm/test/Transforms/LoopUnroll/pr45939-peel-count-and-complete-unroll.ll b/llvm/test/Transforms/LoopUnroll/pr45939-peel-count-and-complete-unroll.ll --- a/llvm/test/Transforms/LoopUnroll/pr45939-peel-count-and-complete-unroll.ll +++ b/llvm/test/Transforms/LoopUnroll/pr45939-peel-count-and-complete-unroll.ll @@ -38,45 +38,19 @@ ; ; PEEL2UNROLL2-LABEL: @test1( ; PEEL2UNROLL2-NEXT: entry: -; PEEL2UNROLL2-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]] -; PEEL2UNROLL2: for.body.peel.begin: -; PEEL2UNROLL2-NEXT: br label [[FOR_BODY_PEEL:%.*]] -; PEEL2UNROLL2: for.body.peel: -; PEEL2UNROLL2-NEXT: [[ARRAYIDX_PEEL:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 0 -; PEEL2UNROLL2-NEXT: [[TMP0:%.*]] = trunc i64 0 to i32 -; PEEL2UNROLL2-NEXT: store i32 [[TMP0]], i32* [[ARRAYIDX_PEEL]], align 4 -; PEEL2UNROLL2-NEXT: [[INDVARS_IV_NEXT_PEEL:%.*]] = add nuw nsw i64 0, 1 -; PEEL2UNROLL2-NEXT: [[EXITCOND_PEEL:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT_PEEL]], 8 -; PEEL2UNROLL2-NEXT: br i1 [[EXITCOND_PEEL]], label [[FOR_BODY_PEEL_NEXT:%.*]], label [[FOR_EXIT:%.*]] -; PEEL2UNROLL2: for.body.peel.next: -; PEEL2UNROLL2-NEXT: br label [[FOR_BODY_PEEL2:%.*]] -; PEEL2UNROLL2: for.body.peel2: -; PEEL2UNROLL2-NEXT: [[ARRAYIDX_PEEL3:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_PEEL]] -; PEEL2UNROLL2-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV_NEXT_PEEL]] to i32 -; PEEL2UNROLL2-NEXT: store i32 [[TMP1]], i32* [[ARRAYIDX_PEEL3]], align 4 -; PEEL2UNROLL2-NEXT: [[INDVARS_IV_NEXT_PEEL4:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_PEEL]], 1 -; PEEL2UNROLL2-NEXT: [[EXITCOND_PEEL5:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT_PEEL4]], 8 -; PEEL2UNROLL2-NEXT: br i1 [[EXITCOND_PEEL5]], label [[FOR_BODY_PEEL_NEXT1:%.*]], label [[FOR_EXIT]] -; PEEL2UNROLL2: for.body.peel.next1: -; PEEL2UNROLL2-NEXT: br label [[FOR_BODY_PEEL_NEXT6:%.*]] -; PEEL2UNROLL2: for.body.peel.next6: -; PEEL2UNROLL2-NEXT: br label [[ENTRY_PEEL_NEWPH:%.*]] -; PEEL2UNROLL2: entry.peel.newph: ; PEEL2UNROLL2-NEXT: br label [[FOR_BODY:%.*]] ; PEEL2UNROLL2: for.body: -; PEEL2UNROLL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PEEL4]], [[ENTRY_PEEL_NEWPH]] ], [ [[INDVARS_IV_NEXT_1:%.*]], [[FOR_BODY]] ] +; PEEL2UNROLL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT_1:%.*]], [[FOR_BODY]] ] ; PEEL2UNROLL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV]] -; PEEL2UNROLL2-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVARS_IV]] to i32 -; PEEL2UNROLL2-NEXT: store i32 [[TMP2]], i32* [[ARRAYIDX]], align 4 +; PEEL2UNROLL2-NEXT: [[TMP0:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; PEEL2UNROLL2-NEXT: store i32 [[TMP0]], i32* [[ARRAYIDX]], align 4 ; PEEL2UNROLL2-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; PEEL2UNROLL2-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT]] -; PEEL2UNROLL2-NEXT: [[TMP3:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; PEEL2UNROLL2-NEXT: store i32 [[TMP3]], i32* [[ARRAYIDX_1]], align 4 +; PEEL2UNROLL2-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; PEEL2UNROLL2-NEXT: store i32 [[TMP1]], i32* [[ARRAYIDX_1]], align 4 ; PEEL2UNROLL2-NEXT: [[INDVARS_IV_NEXT_1]] = add nuw nsw i64 [[INDVARS_IV_NEXT]], 1 ; PEEL2UNROLL2-NEXT: [[EXITCOND_1:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT_1]], 8 -; PEEL2UNROLL2-NEXT: br i1 [[EXITCOND_1]], label [[FOR_BODY]], label [[FOR_EXIT_LOOPEXIT:%.*]], !llvm.loop !0 -; PEEL2UNROLL2: for.exit.loopexit: -; PEEL2UNROLL2-NEXT: br label [[FOR_EXIT]] +; PEEL2UNROLL2-NEXT: br i1 [[EXITCOND_1]], label [[FOR_BODY]], label [[FOR_EXIT:%.*]], !llvm.loop !0 ; PEEL2UNROLL2: for.exit: ; PEEL2UNROLL2-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopUnroll/pr45940-unroll-peel-unpeelable-loop.ll b/llvm/test/Transforms/LoopUnroll/pr45940-unroll-peel-unpeelable-loop.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/pr45940-unroll-peel-unpeelable-loop.ll @@ -0,0 +1,54 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -loop-unroll -unroll-peel-count=2 -unroll-count=4 -S %s | FileCheck %s + +; The loop in @main can be unrolled but not peeled (due to limitations of the +; peeling implementation). Make sure we do not peel (or crash) when +; -unroll-peel-count is set. + +declare void @use(i32) + +define void @main() { +; CHECK-LABEL: @main( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_HEADER:%.*]] +; CHECK: for.header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT_3:%.*]], [[FOR_LATCH_3:%.*]] ] +; CHECK-NEXT: br i1 false, label [[FOR_LATCH:%.*]], label [[FOR_EXIT:%.*]] +; CHECK: for.latch: +; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: call void @use(i32 [[IV_NEXT]]) +; CHECK-NEXT: br i1 false, label [[FOR_EXIT]], label [[FOR_HEADER_1:%.*]] +; CHECK: for.exit: +; CHECK-NEXT: ret void +; CHECK: for.header.1: +; CHECK-NEXT: br i1 false, label [[FOR_LATCH_1:%.*]], label [[FOR_EXIT]] +; CHECK: for.latch.1: +; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i32 [[IV_NEXT]], 1 +; CHECK-NEXT: call void @use(i32 [[IV_NEXT_1]]) +; CHECK-NEXT: br i1 false, label [[FOR_LATCH_2:%.*]], label [[FOR_EXIT]] +; CHECK: for.latch.2: +; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i32 [[IV_NEXT_1]], 1 +; CHECK-NEXT: call void @use(i32 [[IV_NEXT_2]]) +; CHECK-NEXT: br i1 false, label [[FOR_LATCH_3]], label [[FOR_EXIT]] +; CHECK: for.latch.3: +; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i32 [[IV_NEXT_2]], 1 +; CHECK-NEXT: call void @use(i32 [[IV_NEXT_3]]) +; CHECK-NEXT: br label [[FOR_HEADER]], !llvm.loop !0 +; +entry: + br label %for.header + +for.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %for.latch ] + %c = icmp eq i32 %iv, 200 + br i1 %c, label %for.latch, label %for.exit + +for.latch: + %ec = icmp eq i32 %iv, 300 + %iv.next = add i32 %iv, 1 + call void @use(i32 %iv.next) + br i1 %ec, label %for.exit, label %for.header + +for.exit: ; preds = %cleanup345.thread.us.i.i, %for.body77.us.i.i + ret void +}