Index: lib/Transforms/Scalar/LoopStrengthReduce.cpp =================================================================== --- lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -4331,28 +4331,10 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP, const SmallVectorImpl &Inputs) const { + Instruction *Tentative = &*IP; for (;;) { - const Loop *IPLoop = LI.getLoopFor(IP->getParent()); - unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0; - - BasicBlock *IDom; - for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) { - if (!Rung) return IP; - Rung = Rung->getIDom(); - if (!Rung) return IP; - IDom = Rung->getBlock(); - - // Don't climb into a loop though. - const Loop *IDomLoop = LI.getLoopFor(IDom); - unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0; - if (IDomDepth <= IPLoopDepth && - (IDomDepth != IPLoopDepth || IDomLoop == IPLoop)) - break; - } - bool AllDominate = true; Instruction *BetterPos = nullptr; - Instruction *Tentative = IDom->getTerminator(); for (Instruction *Inst : Inputs) { if (Inst == Tentative || !DT.dominates(Inst, Tentative)) { AllDominate = false; @@ -4360,7 +4342,7 @@ } // Attempt to find an insert position in the middle of the block, // instead of at the end, so that it can be used for other expansions. - if (IDom == Inst->getParent() && + if (Tentative->getParent() == Inst->getParent() && (!BetterPos || !DT.dominates(Inst, BetterPos))) BetterPos = &*std::next(BasicBlock::iterator(Inst)); } @@ -4370,6 +4352,26 @@ IP = BetterPos->getIterator(); else IP = Tentative->getIterator(); + + const Loop *IPLoop = LI.getLoopFor(IP->getParent()); + unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0; + + BasicBlock *IDom; + for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) { + if (!Rung) return IP; + Rung = Rung->getIDom(); + if (!Rung) return IP; + IDom = Rung->getBlock(); + + // Don't climb into a loop though. + const Loop *IDomLoop = LI.getLoopFor(IDom); + unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0; + if (IDomDepth <= IPLoopDepth && + (IDomDepth != IPLoopDepth || IDomLoop == IPLoop)) + break; + } + + Tentative = IDom->getTerminator(); } return IP; Index: test/Transforms/LoopStrengthReduce/AArch64/lsr-reuse.ll =================================================================== --- /dev/null +++ test/Transforms/LoopStrengthReduce/AArch64/lsr-reuse.ll @@ -0,0 +1,34 @@ +; RUN: llc -mtriple=arm64-unknown-unknown -print-lsr-output < %s 2>&1 | FileCheck %s + +declare void @foo(i64) + +; Verify that redundant adds aren't inserted by LSR. +; CHECK-LABEL: @bar( +define void @bar(double* %A) { +entry: + br label %while.cond + +while.cond: +; CHECK-LABEL: while.cond: +; CHECK: add i64 %lsr.iv, 1 +; CHECK-NOT: add i64 %lsr.iv, 1 +; CHECK-LABEL: land.rhs: + %indvars.iv28 = phi i64 [ %indvars.iv.next29, %land.rhs ], [ 50, %entry ] + %cmp = icmp sgt i64 %indvars.iv28, 0 + br i1 %cmp, label %land.rhs, label %while.end + +land.rhs: + %indvars.iv.next29 = add nsw i64 %indvars.iv28, -1 + %arrayidx = getelementptr inbounds double, double* %A, i64 %indvars.iv.next29 + %Aload = load double, double* %arrayidx, align 8 + %cmp1 = fcmp oeq double %Aload, 0.000000e+00 + br i1 %cmp1, label %while.cond, label %if.end + +while.end: + %indvars.iv28.lcssa = phi i64 [ %indvars.iv28, %while.cond ] + tail call void @foo(i64 %indvars.iv28.lcssa) + br label %if.end + +if.end: + ret void +}