Index: llvm/trunk/include/llvm/Analysis/ScalarEvolutionExpander.h =================================================================== --- llvm/trunk/include/llvm/Analysis/ScalarEvolutionExpander.h +++ llvm/trunk/include/llvm/Analysis/ScalarEvolutionExpander.h @@ -197,6 +197,13 @@ /// block. Value *expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I); + /// \brief Insert code to directly compute the specified SCEV expression + /// into the program. The inserted code is inserted into the SCEVExpander's + /// current insertion point. If a type is specified, the result will be + /// expanded to have that type, with a cast if necessary. + Value *expandCodeFor(const SCEV *SH, Type *Ty = nullptr); + + /// \brief Generates a code sequence that evaluates this predicate. /// The inserted instructions will be at position \p Loc. /// The result will be of type i1 and will have a value of 0 when the @@ -254,6 +261,15 @@ void enableLSRMode() { LSRMode = true; } + /// \brief Set the current insertion point. This is useful if multiple calls + /// to expandCodeFor() are going to be made with the same insert point and + /// the insert point may be moved during one of the expansions (e.g. if the + /// insert point is not a block terminator). + void setInsertPoint(Instruction *IP) { + assert(IP); + Builder.SetInsertPoint(IP); + } + /// \brief Clear the current insertion point. This is useful if the /// instruction that had been serving as the insertion point may have been /// deleted. @@ -325,12 +341,6 @@ Value *expand(const SCEV *S); - /// \brief Insert code to directly compute the specified SCEV expression - /// into the program. The inserted code is inserted into the SCEVExpander's - /// current insertion point. If a type is specified, the result will be - /// expanded to have that type, with a cast if necessary. - Value *expandCodeFor(const SCEV *SH, Type *Ty = nullptr); - /// \brief Determine the most "relevant" loop for the given SCEV. const Loop *getRelevantLoop(const SCEV *); Index: llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp =================================================================== --- llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp +++ llvm/trunk/lib/Analysis/ScalarEvolutionExpander.cpp @@ -1610,8 +1610,7 @@ Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty, Instruction *IP) { - assert(IP); - Builder.SetInsertPoint(IP); + setInsertPoint(IP); return expandCodeFor(SH, Ty); } Index: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -4444,6 +4444,7 @@ // Determine an input position which will be dominated by the operands and // which will dominate the result. IP = AdjustInsertPositionForExpand(IP, LF, LU, Rewriter); + Rewriter.setInsertPoint(&*IP); // Inform the Rewriter if we have a post-increment use, so that it can // perform an advantageous expansion. @@ -4475,7 +4476,7 @@ LF.UserInst, LF.OperandValToReplace, Loops, SE, DT); - Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr, &*IP))); + Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr))); } // Expand the ScaledReg portion. @@ -4493,14 +4494,14 @@ // Expand ScaleReg as if it was part of the base regs. if (F.Scale == 1) Ops.push_back( - SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, &*IP))); + SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr))); else { // An interesting way of "folding" with an icmp is to use a negated // scale, which we'll implement by inserting it into the other operand // of the icmp. assert(F.Scale == -1 && "The only scale supported by ICmpZero uses is -1!"); - ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr, &*IP); + ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr); } } else { // Otherwise just expand the scaled register and an explicit scale, @@ -4510,11 +4511,11 @@ // Unless the addressing mode will not be folded. if (!Ops.empty() && LU.Kind == LSRUse::Address && isAMCompletelyFolded(TTI, LU, F)) { - Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP); + Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty); Ops.clear(); Ops.push_back(SE.getUnknown(FullV)); } - ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, &*IP)); + ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr)); if (F.Scale != 1) ScaledS = SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale)); @@ -4526,7 +4527,7 @@ if (F.BaseGV) { // Flush the operand list to suppress SCEVExpander hoisting. if (!Ops.empty()) { - Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP); + Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty); Ops.clear(); Ops.push_back(SE.getUnknown(FullV)); } @@ -4536,7 +4537,7 @@ // Flush the operand list to suppress SCEVExpander hoisting of both folded and // unfolded offsets. LSR assumes they both live next to their uses. if (!Ops.empty()) { - Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP); + Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty); Ops.clear(); Ops.push_back(SE.getUnknown(FullV)); } @@ -4572,7 +4573,7 @@ const SCEV *FullS = Ops.empty() ? SE.getConstant(IntTy, 0) : SE.getAddExpr(Ops); - Value *FullV = Rewriter.expandCodeFor(FullS, Ty, &*IP); + Value *FullV = Rewriter.expandCodeFor(FullS, Ty); // We're done expanding now, so reset the rewriter. Rewriter.clearPostInc(); Index: llvm/trunk/test/Transforms/LoopStrengthReduce/X86/pr28719.ll =================================================================== --- llvm/trunk/test/Transforms/LoopStrengthReduce/X86/pr28719.ll +++ llvm/trunk/test/Transforms/LoopStrengthReduce/X86/pr28719.ll @@ -0,0 +1,47 @@ +; RUN: opt < %s -loop-reduce -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@a = global i32 0, align 4 +@b = global i8 0, align 1 +@c = global [4 x i8] zeroinitializer, align 1 + +; Just make sure we don't generate code with uses not dominated by defs. +; CHECK-LABEL: @main( +define i32 @main() { +entry: + %a0 = load i32, i32* @a, align 4 + %cmpa = icmp slt i32 %a0, 4 + br i1 %cmpa, label %preheader, label %for.end + +preheader: + %b0 = load i8, i8* @b, align 1 + %b0sext = sext i8 %b0 to i64 + br label %for.body + +for.body: + %iv = phi i64 [ 0, %preheader ], [ %iv.next, %lor.false ] + %mul = mul nsw i64 %b0sext, %iv + %multrunc = trunc i64 %mul to i32 + %cmp = icmp eq i32 %multrunc, 0 + br i1 %cmp, label %lor.false, label %if.then + +lor.false: + %cgep = getelementptr inbounds [4 x i8], [4 x i8]* @c, i64 0, i64 %iv + %ci = load i8, i8* %cgep, align 1 + %cisext = sext i8 %ci to i32 + %ivtrunc = trunc i64 %iv to i32 + %cmp2 = icmp eq i32 %cisext, %ivtrunc + %iv.next = add i64 %iv, 1 + br i1 %cmp2, label %for.body, label %if.then + +if.then: + tail call void @abort() + unreachable + +for.end: + ret i32 0 +} + +declare void @abort()