diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -200,7 +200,8 @@ /// @{ bool runOnCountableLoop(); - bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, + bool runOnLoopBlock(bool isFromRotatedLoop, BasicBlock *BB, + const SCEV *BECount, SmallVectorImpl &ExitBlocks); void collectStores(BasicBlock *BB); @@ -418,7 +419,8 @@ if (LI->getLoopFor(BB) != CurLoop) continue; - MadeChange |= runOnLoopBlock(BB, BECount, ExitBlocks); + MadeChange |= + runOnLoopBlock(CurLoop->isRotatedForm(), BB, BECount, ExitBlocks); } return MadeChange; } @@ -620,14 +622,18 @@ /// with the specified backedge count. This block is known to be in the current /// loop and not in any subloops. bool LoopIdiomRecognize::runOnLoopBlock( - BasicBlock *BB, const SCEV *BECount, + bool isFromRotatedLoop, BasicBlock *BB, const SCEV *BECount, SmallVectorImpl &ExitBlocks) { + // We can only promote stores in this block if they are unconditionally - // executed in the loop. For a block to be unconditionally executed, it has - // to dominate all the exit blocks of the loop. Verify this now. - for (BasicBlock *ExitBlock : ExitBlocks) - if (!DT->dominates(BB, ExitBlock)) - return false; + // executed in the loop. For a block to be unconditionally executed, it has + // to dominate all the exit blocks of the loop. Verify this now. + // On countable unrotated loops, checking the exit blocks fails. + // Hence, this is only run on rotated loops. + if (isFromRotatedLoop) + for (BasicBlock *ExitBlock : ExitBlocks) + if (!DT->dominates(BB, ExitBlock)) + return false; bool MadeChange = false; // Look for store instructions, which may be optimized to memset/memcpy. diff --git a/llvm/test/Transforms/LoopIdiom/memset-unrotated-loop.ll b/llvm/test/Transforms/LoopIdiom/memset-unrotated-loop.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopIdiom/memset-unrotated-loop.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -loop-idiom -S %s | FileCheck %s + +define void @test(i8* noalias nonnull align 1 %start, i8* %end) unnamed_addr { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[START2:%.*]] = ptrtoint i8* [[START:%.*]] to i64 +; CHECK-NEXT: [[END1:%.*]] = ptrtoint i8* [[END:%.*]] to i64 +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[END1]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]] +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[START]], i8 1, i64 [[TMP1]], i1 false) +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[PTR_IV:%.*]] = phi i8* [ [[START]], [[ENTRY:%.*]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[_12_I:%.*]] = icmp eq i8* [[PTR_IV]], [[END]] +; CHECK-NEXT: br i1 [[_12_I]], label [[EXIT:%.*]], label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i8, i8* [[PTR_IV]], i64 1 +; CHECK-NEXT: br label [[LOOP_HEADER]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop.header + +loop.header: + %ptr.iv = phi i8* [ %start, %entry ], [ %ptr.iv.next, %loop.latch ] + %_12.i = icmp eq i8* %ptr.iv, %end + br i1 %_12.i, label %exit, label %loop.latch + +loop.latch: + %ptr.iv.next = getelementptr inbounds i8, i8* %ptr.iv, i64 1 + store i8 1, i8* %ptr.iv, align 1 + br label %loop.header + +exit: + ret void +}