Index: llvm/include/llvm/Analysis/ScalarEvolutionExpander.h =================================================================== --- llvm/include/llvm/Analysis/ScalarEvolutionExpander.h +++ llvm/include/llvm/Analysis/ScalarEvolutionExpander.h @@ -316,8 +316,10 @@ SmallPtrSetImpl &Processed); /// Insert the specified binary operator, doing a small amount of work to - /// avoid inserting an obviously redundant operation. - Value *InsertBinop(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS); + /// avoid inserting an obviously redundant operation, and hoisting to an + /// outer loop when the opportunity is there and it is safe. + Value *InsertBinop(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS, + bool IsSafeToHoist = true); /// Arrange for there to be a cast of V to Ty at IP, reusing an existing /// cast if a suitable one exists, moving an existing cast if a suitable one Index: llvm/lib/Analysis/ScalarEvolutionExpander.cpp =================================================================== --- llvm/lib/Analysis/ScalarEvolutionExpander.cpp +++ llvm/lib/Analysis/ScalarEvolutionExpander.cpp @@ -167,9 +167,10 @@ } /// InsertBinop - Insert the specified binary operator, doing a small amount -/// of work to avoid inserting an obviously redundant operation. +/// of work to avoid inserting an obviously redundant operation, and hoisting +/// to an outer loop when the opportunity is there and it is safe. Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, - Value *LHS, Value *RHS) { + Value *LHS, Value *RHS, bool IsSafeToHoist) { // Fold a binop with constant operands. if (Constant *CLHS = dyn_cast(LHS)) if (Constant *CRHS = dyn_cast(RHS)) @@ -211,14 +212,16 @@ DebugLoc Loc = Builder.GetInsertPoint()->getDebugLoc(); SCEVInsertPointGuard Guard(Builder, this); - // Move the insertion point out of as many loops as we can. - while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) { - if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break; - BasicBlock *Preheader = L->getLoopPreheader(); - if (!Preheader) break; + if (IsSafeToHoist) { + // Move the insertion point out of as many loops as we can. + while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) { + if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break; + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) break; - // Ok, move up a level. - Builder.SetInsertPoint(Preheader->getTerminator()); + // Ok, move up a level. + Builder.SetInsertPoint(Preheader->getTerminator()); + } } // If we haven't found this binop, insert it. @@ -839,15 +842,18 @@ Type *Ty = SE.getEffectiveSCEVType(S->getType()); Value *LHS = expandCodeFor(S->getLHS(), Ty); + bool IsSafeToHoist = false; // Most divisions are assumed not safe to hoist. if (const SCEVConstant *SC = dyn_cast(S->getRHS())) { const APInt &RHS = SC->getAPInt(); if (RHS.isPowerOf2()) return InsertBinop(Instruction::LShr, LHS, ConstantInt::get(Ty, RHS.logBase2())); + // Division by non-zero constants are safe to hoist. + IsSafeToHoist = !SC->getValue()->isZero(); } Value *RHS = expandCodeFor(S->getRHS(), Ty); - return InsertBinop(Instruction::UDiv, LHS, RHS); + return InsertBinop(Instruction::UDiv, LHS, RHS, IsSafeToHoist); } /// Move parts of Base into Rest to leave Base with the minimal Index: llvm/test/Transforms/LoopVectorize/pr38697.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/pr38697.ll @@ -0,0 +1,78 @@ +; RUN: opt -loop-vectorize -force-vector-width=2 -S < %s 2>&1 | FileCheck %s + +; Produced from test-case: +; +; void testCountIncrLoop(unsigned char *ptr, int lim, int count, int val) +; { +; int inx = 0; +; for (int outer_i = 0; outer_i < lim; ++outer_i) { +; if (count > 0) { // At runtime, 'count' is 0, so the following code is dead. +; int result = val; +; int tmp = count; +; +; while (tmp < 8) { +; result += val >> tmp; +; tmp += count; +; } +; +; ptr[inx++] = (unsigned char) result; +; } +; } +; } + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @testCountIncrLoop(i8* %ptr, i32 %lim, i32 %count, i32 %val) { +entry: + %cmp1 = icmp sgt i32 %lim, 0 + br i1 %cmp1, label %loop1.preheader, label %exit + +; Verify that a 'udiv' does not appear between the 'loop1.preheader' label, and +; the 'while.cond.preheader' label. And verify that a 'udiv' does appear at +; some point after the 'while.cond.preheader' label. +; CHECK: loop1.preheader: +; CHECK-NOT: udiv +; CHECK: while.cond.preheader: +; CHECK: udiv + +loop1.preheader: + %cmp2 = icmp sgt i32 %count, 0 + %cmp4 = icmp slt i32 %count, 8 + br label %loop1.body + +loop1.body: + %outer_i = phi i32 [ 0, %loop1.preheader ], [ %outer_i.1, %loop1.inc ] + %inx.1 = phi i32 [ 0, %loop1.preheader ], [ %inx.2, %loop1.inc ] + br i1 %cmp2, label %while.cond.preheader, label %loop1.inc + +while.cond.preheader: + br i1 %cmp4, label %while.body, label %while.end + +while.body: + %tmp = phi i32 [ %add3, %while.body ], [ %count, %while.cond.preheader ] + %result.1 = phi i32 [ %add, %while.body ], [ %val, %while.cond.preheader ] + %shr = ashr i32 %val, %tmp + %add = add nsw i32 %shr, %result.1 + %add3 = add nsw i32 %tmp, %count + %cmp3 = icmp slt i32 %add3, 8 + br i1 %cmp3, label %while.body, label %while.end + +while.end: + %result.0.lcssa = phi i32 [ %val, %while.cond.preheader ], [ %add, %while.body ] + %conv = trunc i32 %result.0.lcssa to i8 + %inc = add nsw i32 %inx.1, 1 + %idxprom = sext i32 %inx.1 to i64 + %arrayidx = getelementptr inbounds i8, i8* %ptr, i64 %idxprom + store i8 %conv, i8* %arrayidx, align 1 + br label %loop1.inc + +loop1.inc: + %inx.2 = phi i32 [ %inc, %while.end ], [ %inx.1, %loop1.body ] + %outer_i.1 = add nuw nsw i32 %outer_i, 1 + %exitcond = icmp eq i32 %outer_i.1, %lim + br i1 %exitcond, label %exit, label %loop1.body + +exit: + ret void +}