Index: llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -4315,28 +4315,10 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP, const SmallVectorImpl &Inputs) const { + Instruction *Tentative = &*IP; for (;;) { - const Loop *IPLoop = LI.getLoopFor(IP->getParent()); - unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0; - - BasicBlock *IDom; - for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) { - if (!Rung) return IP; - Rung = Rung->getIDom(); - if (!Rung) return IP; - IDom = Rung->getBlock(); - - // Don't climb into a loop though. - const Loop *IDomLoop = LI.getLoopFor(IDom); - unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0; - if (IDomDepth <= IPLoopDepth && - (IDomDepth != IPLoopDepth || IDomLoop == IPLoop)) - break; - } - bool AllDominate = true; Instruction *BetterPos = nullptr; - Instruction *Tentative = IDom->getTerminator(); // Don't bother attempting to insert before a catchswitch, their basic block // cannot have other non-PHI instructions. if (isa(Tentative)) @@ -4349,7 +4331,7 @@ } // Attempt to find an insert position in the middle of the block, // instead of at the end, so that it can be used for other expansions. - if (IDom == Inst->getParent() && + if (Tentative->getParent() == Inst->getParent() && (!BetterPos || !DT.dominates(Inst, BetterPos))) BetterPos = &*std::next(BasicBlock::iterator(Inst)); } @@ -4359,6 +4341,26 @@ IP = BetterPos->getIterator(); else IP = Tentative->getIterator(); + + const Loop *IPLoop = LI.getLoopFor(IP->getParent()); + unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0; + + BasicBlock *IDom; + for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) { + if (!Rung) return IP; + Rung = Rung->getIDom(); + if (!Rung) return IP; + IDom = Rung->getBlock(); + + // Don't climb into a loop though. + const Loop *IDomLoop = LI.getLoopFor(IDom); + unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0; + if (IDomDepth <= IPLoopDepth && + (IDomDepth != IPLoopDepth || IDomLoop == IPLoop)) + break; + } + + Tentative = IDom->getTerminator(); } return IP; Index: llvm/trunk/test/Transforms/LoopStrengthReduce/AArch64/lsr-reuse.ll =================================================================== --- llvm/trunk/test/Transforms/LoopStrengthReduce/AArch64/lsr-reuse.ll +++ llvm/trunk/test/Transforms/LoopStrengthReduce/AArch64/lsr-reuse.ll @@ -0,0 +1,34 @@ +; RUN: llc -mtriple=arm64-unknown-unknown -print-lsr-output < %s 2>&1 | FileCheck %s + +declare void @foo(i64) + +; Verify that redundant adds aren't inserted by LSR. +; CHECK-LABEL: @bar( +define void @bar(double* %A) { +entry: + br label %while.cond + +while.cond: +; CHECK-LABEL: while.cond: +; CHECK: add i64 %lsr.iv, 1 +; CHECK-NOT: add i64 %lsr.iv, 1 +; CHECK-LABEL: land.rhs: + %indvars.iv28 = phi i64 [ %indvars.iv.next29, %land.rhs ], [ 50, %entry ] + %cmp = icmp sgt i64 %indvars.iv28, 0 + br i1 %cmp, label %land.rhs, label %while.end + +land.rhs: + %indvars.iv.next29 = add nsw i64 %indvars.iv28, -1 + %arrayidx = getelementptr inbounds double, double* %A, i64 %indvars.iv.next29 + %Aload = load double, double* %arrayidx, align 8 + %cmp1 = fcmp oeq double %Aload, 0.000000e+00 + br i1 %cmp1, label %while.cond, label %if.end + +while.end: + %indvars.iv28.lcssa = phi i64 [ %indvars.iv28, %while.cond ] + tail call void @foo(i64 %indvars.iv28.lcssa) + br label %if.end + +if.end: + ret void +} Index: llvm/trunk/test/Transforms/LoopStrengthReduce/scev-insertpt-bug.ll =================================================================== --- llvm/trunk/test/Transforms/LoopStrengthReduce/scev-insertpt-bug.ll +++ llvm/trunk/test/Transforms/LoopStrengthReduce/scev-insertpt-bug.ll @@ -0,0 +1,47 @@ +; RUN: opt < %s -loop-reduce -S + +; Test that SCEV insertpoint's don't get corrupted and cause an +; invalid instruction to be inserted in a block other than its parent. +; See http://reviews.llvm.org/D20703 for context. +define void @test() { +entry: + %bf.load = load i32, i32* null, align 4 + %bf.clear = lshr i32 %bf.load, 1 + %div = and i32 %bf.clear, 134217727 + %sub = add nsw i32 %div, -1 + %0 = zext i32 %sub to i64 + br label %while.cond + +while.cond: ; preds = %cond.end, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %cond.end ], [ 0, %entry ] + %cmp = icmp eq i64 %indvars.iv, %0 + br i1 %cmp, label %cleanup16, label %while.body + +while.body: ; preds = %while.cond + %1 = trunc i64 %indvars.iv to i32 + %mul = shl i32 %1, 1 + %add = add nuw i32 %mul, 2 + %cmp3 = icmp ult i32 %add, 0 + br i1 %cmp3, label %if.end, label %if.then + +if.then: ; preds = %while.body + unreachable + +if.end: ; preds = %while.body + br i1 false, label %cond.end, label %cond.true + +cond.true: ; preds = %if.end + br label %cond.end + +cond.end: ; preds = %cond.true, %if.end + %add7 = add i32 %1, 1 + %cmp12 = icmp ugt i32 %add7, %sub + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br i1 %cmp12, label %if.then13, label %while.cond + +if.then13: ; preds = %cond.end + unreachable + +cleanup16: ; preds = %while.cond + ret void +}