Index: llvm/lib/Analysis/ScalarEvolutionExpander.cpp =================================================================== --- llvm/lib/Analysis/ScalarEvolutionExpander.cpp +++ llvm/lib/Analysis/ScalarEvolutionExpander.cpp @@ -1732,49 +1732,52 @@ // Compute an insertion point for this SCEV object. Hoist the instructions // as far out in the loop nest as possible. Instruction *InsertPt = &*Builder.GetInsertPoint(); - for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());; - L = L->getParentLoop()) - if (SE.isLoopInvariant(S, L)) { - if (!L) break; - if (BasicBlock *Preheader = L->getLoopPreheader()) - InsertPt = Preheader->getTerminator(); - else { - // LSR sets the insertion point for AddRec start/step values to the - // block start to simplify value reuse, even though it's an invalid - // position. SCEVExpander must correct for this in all cases. - InsertPt = &*L->getHeader()->getFirstInsertionPt(); - } - } else { - // We can move insertion point only if there is no div or rem operations - // otherwise we are risky to move it over the check for zero denominator. - auto SafeToHoist = [](const SCEV *S) { - return !SCEVExprContains(S, [](const SCEV *S) { - if (const auto *D = dyn_cast(S)) { - if (const auto *SC = dyn_cast(D->getRHS())) - // Division by non-zero constants can be hoisted. - return SC->getValue()->isZero(); - // All other divisions should not be moved as they may be - // divisions by zero and should be kept within the - // conditions of the surrounding loops that guard their - // execution (see PR35406). - return true; - } - return false; - }); - }; - // If the SCEV is computable at this level, insert it into the header - // after the PHIs (and after any other instructions that we've inserted - // there) so that it is guaranteed to dominate any user inside the loop. - if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L) && - SafeToHoist(S)) - InsertPt = &*L->getHeader()->getFirstInsertionPt(); - while (InsertPt->getIterator() != Builder.GetInsertPoint() && - (isInsertedInstruction(InsertPt) || - isa(InsertPt))) { - InsertPt = &*std::next(InsertPt->getIterator()); + + // We can move insertion point only if there is no div or rem operations + // otherwise we are risky to move it over the check for zero denominator. + auto SafeToHoist = [](const SCEV *S) { + return !SCEVExprContains(S, [](const SCEV *S) { + if (const auto *D = dyn_cast(S)) { + if (const auto *SC = dyn_cast(D->getRHS())) + // Division by non-zero constants can be hoisted. + return SC->getValue()->isZero(); + // All other divisions should not be moved as they may be + // divisions by zero and should be kept within the + // conditions of the surrounding loops that guard their + // execution (see PR35406). + return true; + } + return false; + }); + }; + if (SafeToHoist(S)) { + for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());; + L = L->getParentLoop()) { + if (SE.isLoopInvariant(S, L)) { + if (!L) break; + if (BasicBlock *Preheader = L->getLoopPreheader()) + InsertPt = Preheader->getTerminator(); + else { + // LSR sets the insertion point for AddRec start/step values to the + // block start to simplify value reuse, even though it's an invalid + // position. SCEVExpander must correct for this in all cases. + InsertPt = &*L->getHeader()->getFirstInsertionPt(); + } + } else { + // If the SCEV is computable at this level, insert it into the header + // after the PHIs (and after any other instructions that we've inserted + // there) so that it is guaranteed to dominate any user inside the loop. + if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L)) + InsertPt = &*L->getHeader()->getFirstInsertionPt(); + while (InsertPt->getIterator() != Builder.GetInsertPoint() && + (isInsertedInstruction(InsertPt) || + isa(InsertPt))) { + InsertPt = &*std::next(InsertPt->getIterator()); + } + break; } - break; } + } // Check to see if we already expanded this here. auto I = InsertedExpressions.find(std::make_pair(S, InsertPt)); Index: llvm/test/Transforms/LoopVectorize/pr30806.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/pr30806.ll @@ -0,0 +1,65 @@ +; RUN: opt -loop-vectorize -S < %s 2>&1 | FileCheck %s + +; Produced from test-case: +; +; void testGuardedInnerLoop(uint32_t *ptr, uint32_t denom, uint32_t numer, uint32_t outer_lim) +; { +; for(uint32_t outer_i = 0; outer_i < outer_lim; ++outer_i) { +; if (denom > 0) { +; const uint32_t lim = numer / denom; +; +; for (uint32_t i = 0; i < lim; ++i) +; ptr[i] = 1; +; } +; } +; } + + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1" +target triple = "x86_64-unknown-linux-gnu" + +define void @testGuardedInnerLoop(i32* %ptr, i32 %denom, i32 %numer, i32 %outer_lim) { +entry: + %cmp1 = icmp eq i32 %outer_lim, 0 + br i1 %cmp1, label %exit, label %loop1.preheader + +; Verify that a 'udiv' does not appear between the 'loop1.preheader' label, and +; whatever label comes next. +loop1.preheader: +; CHECK-LABEL: loop1.preheader: +; CHECK-NOT: udiv +; CHECK-LABEL: : + br label %loop1 + +loop1: + %outer_i = phi i32 [ %inc1, %loop2.exit ], [ 0, %loop1.preheader ] + %0 = add i32 %denom, -1 + %1 = icmp ult i32 %0, %numer + br i1 %1, label %loop2.preheader, label %loop2.exit + +; Verify that a 'udiv' does appear between the 'loop2.preheader' label, and +; whatever label comes next. +loop2.preheader: +; CHECK-LABEL: loop2.preheader: +; CHECK: udiv +; CHECK-LABEL: : + %lim = udiv i32 %numer, %denom + %2 = zext i32 %lim to i64 + br label %loop2 + +loop2: + %indvar.loop2 = phi i64 [ 0, %loop2.preheader ], [ %indvar.loop2.next, %loop2 ] + %arrayidx = getelementptr inbounds i32, i32* %ptr, i64 %indvar.loop2 + store i32 1, i32* %arrayidx, align 4 + %indvar.loop2.next = add nuw nsw i64 %indvar.loop2, 1 + %cmp2 = icmp ult i64 %indvar.loop2.next, %2 + br i1 %cmp2, label %loop2, label %loop2.exit + +loop2.exit: + %inc1 = add nuw i32 %outer_i, 1 + %exitcond = icmp eq i32 %inc1, %outer_lim + br i1 %exitcond, label %exit, label %loop1 + +exit: + ret void +}