diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -2350,6 +2350,82 @@ return NewCond; } +// Remove any nowrap flags that occur on instructions in the control flow +// between current position of Cond and its planned new position. +// +// This is to account for situations where an increment that occurs after the +// Cond was allowed to wrap when its (poison) value could not possibly flow into +// the Cond, but with the Cond's new position after LSR, the poison *does* flow +// into the Cond, yielding undefined behavior. +void removeNoWrapBetweenCondAndTermBr(ICmpInst *Cond, BranchInst *TermBr) { + // In the common case, the Cond and the TermBr will be in the same block... + auto FinalIP = BasicBlock::iterator(TermBr); + bool SameBlock = true; + // ... but this does not hold in general + if (Cond->getParent() != TermBr->getParent()) { + // initial processing instead stops with the Cond block's end instruction, + // and we'll deal with the rest of the instructions below. + FinalIP = Cond->getParent()->end(); + SameBlock = false; + } + + auto InterimIP = BasicBlock::iterator(Cond); + ++InterimIP; + while (InterimIP != FinalIP) { + Instruction *InterimInst = &*InterimIP; + LLVM_DEBUG(dbgs() << " Will be moving Cond: "; Cond->print(dbgs()); + dbgs() << " past InterimInst: "; InterimInst->print(dbgs()); + dbgs() << " so, drop its poison generating flags.\n"); + InterimInst->dropPoisonGeneratingFlags(); + ++InterimIP; + } + + // Easy and common case: We're done. + if (SameBlock) + return; + + // Otherwise: traverse successors to find remaining instructions between Cond + // and TermBr. + SmallVector Worklist; + SmallPtrSet Visited; + for (BasicBlock *Succ : successors(Cond->getParent())) { + Visited.insert(Succ); + Worklist.push_back(Succ); + } + + while (!Worklist.empty()) { + BasicBlock *BB = Worklist.pop_back_val(); + + // For every BB that doesn't have TermBr, we'll need to keep processing its + // successors. + bool add_successors = true; + + for (Instruction &InterimInst : *BB) { + if (&InterimInst != TermBr) { + LLVM_DEBUG(dbgs() << " Will be moving Cond: "; Cond->print(dbgs()); + dbgs() << " past InterimInst: "; InterimInst.print(dbgs()); + dbgs() << " so, drop its poison generating flags.\n"); + InterimInst.dropPoisonGeneratingFlags(); + } else { + // found the terminating condition; no need to process remainder of + // *this* block (nor its successors). But we may have other blocks that + // do still need processing in our worklist, so cannot just return here. + add_successors = false; + break; + } + } + + if (add_successors) { + for (BasicBlock *Succ : successors(BB)) { + // Don't process the same BB twice + if (!Visited.insert(Succ).second) + continue; + Worklist.push_back(Succ); + } + } + } +} + /// Change loop terminating condition to use the postinc iv when possible. void LSRInstance::OptimizeLoopTermCond() { @@ -2470,6 +2546,11 @@ // possible for it to have multiple users. If it is not immediately before // the exiting block branch, move it. if (&*++BasicBlock::iterator(Cond) != TermBr) { + + // Remove any nowrap flags that occur on instructions in the control flow + // between current position of Cond and its planned new position. + removeNoWrapBetweenCondAndTermBr(Cond, TermBr); + if (Cond->hasOneUse()) { Cond->moveBefore(TermBr); } else { diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll --- a/llvm/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll @@ -40,7 +40,7 @@ ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: ; CHECK-NEXT: [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1 -; CHECK-NEXT: [[LSR_IV_NEXT2]] = add nuw nsw i64 [[LSR_IV1]], [[T0]] +; CHECK-NEXT: [[LSR_IV_NEXT2]] = add i64 [[LSR_IV1]], [[T0]] ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT3]], [[T1]] ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]] ; CHECK: for.end.loopexit: diff --git a/llvm/test/Transforms/LoopStrengthReduce/dont_move_nowrap_op_before_test.ll b/llvm/test/Transforms/LoopStrengthReduce/dont_move_nowrap_op_before_test.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopStrengthReduce/dont_move_nowrap_op_before_test.ll @@ -0,0 +1,41 @@ +; RUN: opt < %s -loop-reduce -S | FileCheck %s + +; If the output of a no-wrapping operation flows into the loop test expression, +; then we cannot move the loop test expression after that operation without also +; removing the no-wrap flag. + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare void @dummy(i8 zeroext) + +define i8 @test() { + br label %loop.1 + +; If loop test is moved, then no-wrap must be removed. +; (also double-checks loop test actually moved, to avoid false negative). + +; CHECK: %x.2 = phi i8 [ 0, %0 ], [ %x.5, %loop.2 ] +; CHECK-NEXT: call void @dummy(i8 %x.2) +; CHECK-NEXT: %x.4 = add i8 %x.2, 1 +; CHECK: %x.5 = add i8 %x.4, 0 +; CHECK-NEXT: %x.3 = icmp eq i8 %x.5, 0 + +loop.1: + %x.2 = phi i8 [ 0, %0 ], [ %x.5, %loop.2 ] + call void @dummy(i8 %x.2) + + %x.3 = icmp eq i8 %x.2, -1 + ; after final loop test, this produces (unused) poison ... + %x.4 = add nuw i8 %x.2, 1 + ; (in general, the icmp and cond br may be in distinct blocks) + br label %loop.2 + +loop.2: + %x.5 = add i8 %x.4, 0 + ; ... but when test is moved here by LSR, poison becomes *used*. + br i1 %x.3, label %done, label %loop.1 + +done: + ret i8 %x.2 +}