Index: llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp =================================================================== --- llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp +++ llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp @@ -1731,49 +1731,55 @@ // Compute an insertion point for this SCEV object. Hoist the instructions // as far out in the loop nest as possible. Instruction *InsertPt = &*Builder.GetInsertPoint(); - for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());; - L = L->getParentLoop()) - if (SE.isLoopInvariant(S, L)) { - if (!L) break; - if (BasicBlock *Preheader = L->getLoopPreheader()) - InsertPt = Preheader->getTerminator(); - else { - // LSR sets the insertion point for AddRec start/step values to the - // block start to simplify value reuse, even though it's an invalid - // position. SCEVExpander must correct for this in all cases. - InsertPt = &*L->getHeader()->getFirstInsertionPt(); - } - } else { - // We can move insertion point only if there is no div or rem operations - // otherwise we are risky to move it over the check for zero denominator. - auto SafeToHoist = [](const SCEV *S) { - return !SCEVExprContains(S, [](const SCEV *S) { - if (const auto *D = dyn_cast(S)) { - if (const auto *SC = dyn_cast(D->getRHS())) - // Division by non-zero constants can be hoisted. - return SC->getValue()->isZero(); - // All other divisions should not be moved as they may be - // divisions by zero and should be kept within the - // conditions of the surrounding loops that guard their - // execution (see PR35406). - return true; - } - return false; - }); - }; - // If the SCEV is computable at this level, insert it into the header - // after the PHIs (and after any other instructions that we've inserted - // there) so that it is guaranteed to dominate any user inside the loop. - if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L) && - SafeToHoist(S)) - InsertPt = &*L->getHeader()->getFirstInsertionPt(); - while (InsertPt->getIterator() != Builder.GetInsertPoint() && - (isInsertedInstruction(InsertPt) || - isa(InsertPt))) { - InsertPt = &*std::next(InsertPt->getIterator()); + + // We can move insertion point only if there is no div or rem operations + // otherwise we are risky to move it over the check for zero denominator. + auto SafeToHoist = [](const SCEV *S) { + return !SCEVExprContains(S, [](const SCEV *S) { + if (const auto *D = dyn_cast(S)) { + if (const auto *SC = dyn_cast(D->getRHS())) + // Division by non-zero constants can be hoisted. + return SC->getValue()->isZero(); + // All other divisions should not be moved as they may be + // divisions by zero and should be kept within the + // conditions of the surrounding loops that guard their + // execution (see PR35406). + return true; + } + return false; + }); + }; + if (SafeToHoist(S)) { + for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());; + L = L->getParentLoop()) { + if (SE.isLoopInvariant(S, L)) { + if (!L) break; + if (BasicBlock *Preheader = L->getLoopPreheader()) + InsertPt = Preheader->getTerminator(); + else + // LSR sets the insertion point for AddRec start/step values to the + // block start to simplify value reuse, even though it's an invalid + // position. SCEVExpander must correct for this in all cases. + InsertPt = &*L->getHeader()->getFirstInsertionPt(); + } else { + // If the SCEV is computable at this level, insert it into the header + // after the PHIs (and after any other instructions that we've inserted + // there) so that it is guaranteed to dominate any user inside the loop. + if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L)) + InsertPt = &*L->getHeader()->getFirstInsertionPt(); + while (InsertPt->getIterator() != Builder.GetInsertPoint() && + (isInsertedInstruction(InsertPt) || + isa(InsertPt))) + InsertPt = &*std::next(InsertPt->getIterator()); + break; } - break; } + } + + // IndVarSimplify sometimes sets the insertion point at the block start, even + // when there are PHIs at that point. We must correct for this. + if (isa(*InsertPt)) + InsertPt = &*InsertPt->getParent()->getFirstInsertionPt(); // Check to see if we already expanded this here. auto I = InsertedExpressions.find(std::make_pair(S, InsertPt)); Index: llvm/trunk/test/Transforms/LoopVectorize/pr30806-phi-scev.ll =================================================================== --- llvm/trunk/test/Transforms/LoopVectorize/pr30806-phi-scev.ll +++ llvm/trunk/test/Transforms/LoopVectorize/pr30806-phi-scev.ll @@ -0,0 +1,66 @@ +; RUN: opt -S -indvars < %s | FileCheck %s + +; Produced from the test-case: +; +; extern void foo(char *, unsigned , unsigned *); +; extern void bar(int *, long); +; extern char *processBuf(char *); +; +; extern unsigned theSize; +; +; void foo(char *buf, unsigned denominator, unsigned *flag) { +; int incr = (int) (theSize / denominator); +; int inx = 0; +; while (*flag) { +; int itmp = inx + incr; +; int i = (int) theSize; +; bar(&i, (long) itmp); +; buf = processBuf(buf); +; inx = itmp; +; } +; } + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +@theSize = external local_unnamed_addr global i32, align 4 + +define void @foo(i8* %buf, i32 %denominator, i32* %flag) local_unnamed_addr { +entry: + %i = alloca i32, align 4 + %0 = load i32, i32* @theSize, align 4 + %div = udiv i32 %0, %denominator + %1 = load i32, i32* %flag, align 4 + %tobool5 = icmp eq i32 %1, 0 + br i1 %tobool5, label %while.end, label %while.body.lr.ph + +while.body.lr.ph: ; preds = %entry + %2 = bitcast i32* %i to i8* + br label %while.body + +while.body: ; preds = %while.body.lr.ph, %while.body +; Check that there are two PHIs followed by a 'sext' in the same block, and that +; the test does not crash. +; CHECK: phi +; CHECK-NEXT: phi +; CHECK-NEXT: sext + %buf.addr.07 = phi i8* [ %buf, %while.body.lr.ph ], [ %call, %while.body ] + %inx.06 = phi i32 [ 0, %while.body.lr.ph ], [ %add, %while.body ] + %add = add nsw i32 %inx.06, %div + %3 = load i32, i32* @theSize, align 4 + store i32 %3, i32* %i, align 4 + %conv = sext i32 %add to i64 + call void @bar(i32* nonnull %i, i64 %conv) + %call = call i8* @processBuf(i8* %buf.addr.07) + %4 = load i32, i32* %flag, align 4 + %tobool = icmp eq i32 %4, 0 + br i1 %tobool, label %while.end.loopexit, label %while.body + +while.end.loopexit: ; preds = %while.body + br label %while.end + +while.end: ; preds = %while.end.loopexit, %entry + ret void +} + +declare void @bar(i32*, i64) local_unnamed_addr +declare i8* @processBuf(i8*) local_unnamed_addr Index: llvm/trunk/test/Transforms/LoopVectorize/pr30806.ll =================================================================== --- llvm/trunk/test/Transforms/LoopVectorize/pr30806.ll +++ llvm/trunk/test/Transforms/LoopVectorize/pr30806.ll @@ -0,0 +1,65 @@ +; RUN: opt -loop-vectorize -S < %s 2>&1 | FileCheck %s + +; Produced from test-case: +; +; void testGuardedInnerLoop(uint32_t *ptr, uint32_t denom, uint32_t numer, uint32_t outer_lim) +; { +; for(uint32_t outer_i = 0; outer_i < outer_lim; ++outer_i) { +; if (denom > 0) { +; const uint32_t lim = numer / denom; +; +; for (uint32_t i = 0; i < lim; ++i) +; ptr[i] = 1; +; } +; } +; } + + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1" +target triple = "x86_64-unknown-linux-gnu" + +define void @testGuardedInnerLoop(i32* %ptr, i32 %denom, i32 %numer, i32 %outer_lim) { +entry: + %cmp1 = icmp eq i32 %outer_lim, 0 + br i1 %cmp1, label %exit, label %loop1.preheader + +; Verify that a 'udiv' does not appear between the 'loop1.preheader' label, and +; whatever label comes next. +loop1.preheader: +; CHECK-LABEL: loop1.preheader: +; CHECK-NOT: udiv +; CHECK-LABEL: : + br label %loop1 + +loop1: + %outer_i = phi i32 [ %inc1, %loop2.exit ], [ 0, %loop1.preheader ] + %0 = add i32 %denom, -1 + %1 = icmp ult i32 %0, %numer + br i1 %1, label %loop2.preheader, label %loop2.exit + +; Verify that a 'udiv' does appear between the 'loop2.preheader' label, and +; whatever label comes next. +loop2.preheader: +; CHECK-LABEL: loop2.preheader: +; CHECK: udiv +; CHECK-LABEL: : + %lim = udiv i32 %numer, %denom + %2 = zext i32 %lim to i64 + br label %loop2 + +loop2: + %indvar.loop2 = phi i64 [ 0, %loop2.preheader ], [ %indvar.loop2.next, %loop2 ] + %arrayidx = getelementptr inbounds i32, i32* %ptr, i64 %indvar.loop2 + store i32 1, i32* %arrayidx, align 4 + %indvar.loop2.next = add nuw nsw i64 %indvar.loop2, 1 + %cmp2 = icmp ult i64 %indvar.loop2.next, %2 + br i1 %cmp2, label %loop2, label %loop2.exit + +loop2.exit: + %inc1 = add nuw i32 %outer_i, 1 + %exitcond = icmp eq i32 %inc1, %outer_lim + br i1 %exitcond, label %exit, label %loop1 + +exit: + ret void +}