Index: lib/Transforms/Scalar/IndVarSimplify.cpp =================================================================== --- lib/Transforms/Scalar/IndVarSimplify.cpp +++ lib/Transforms/Scalar/IndVarSimplify.cpp @@ -147,6 +147,7 @@ bool canLoopBeDeleted(Loop *L, SmallVector &RewritePhiSet); void rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter); + bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const; void rewriteFirstIterationLoopExitValues(Loop *L); Value *linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, @@ -542,6 +543,29 @@ // As a side effect, reduces the amount of IV processing within the loop. //===----------------------------------------------------------------------===// +bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const { + SmallPtrSet Visited; + SmallVector WorkList; + Visited.insert(I); + WorkList.push_back(I); + while (!WorkList.empty()) { + const Instruction *Curr = WorkList.pop_back_val(); + // This use is outside the loop, nothing to do. + if (!L->contains(Curr)) + continue; + // Do we assume it is a "hard" use which will not be eliminated easily? + if (Curr->mayHaveSideEffects()) + return true; + // Otherwise, add all its users to worklist. + for (auto U : Curr->users()) { + auto *UI = cast(U); + if (Visited.insert(UI).second) + WorkList.push_back(UI); + } + } + return false; +} + /// Check to see if this loop has a computable loop-invariant execution count. /// If so, this means that we can compute the final value of any expressions /// that are recurrent in the loop, and substitute the exit values from the loop @@ -616,19 +640,8 @@ // Computing the value outside of the loop brings no benefit if it is // definitely used inside the loop in a way which can not be optimized // away. - if (ExitValue->getSCEVType()>=scMulExpr) { - bool HasHardInternalUses = false; - for (auto *IB : Inst->users()) { - Instruction *UseInstr = cast(IB); - unsigned Opc = UseInstr->getOpcode(); - if (L->contains(UseInstr) && Opc == Instruction::Call) { - HasHardInternalUses = true; - break; - } - } - if (HasHardInternalUses) - continue; - } + if (hasHardUserWithinLoop(L, Inst)) + continue; bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst); Value *ExitVal = Index: test/Analysis/ScalarEvolution/pr28705.ll =================================================================== --- test/Analysis/ScalarEvolution/pr28705.ll +++ test/Analysis/ScalarEvolution/pr28705.ll @@ -1,11 +1,11 @@ ; PR28705 ; RUN: opt < %s -indvars -S | FileCheck %s -; Check IndVarSimplify replaces the exitval use of the induction var "%inc.i.i" -; with "%.sroa.speculated + 1". +; Check IndVarSimplify doesn't replace external use of the induction var +; "%inc.i.i" with "%.sroa.speculated + 1" because it is not profitable. ; ; CHECK-LABEL: @foo( -; CHECK: %[[EXIT:.+]] = sub i32 %.sroa.speculated, -1 +; CHECK: %[[EXIT:.+]] = phi i32 [ %inc.i.i, %for.body650 ] ; CHECK: %DB.sroa.9.0.lcssa = phi i32 [ 1, %entry ], [ %[[EXIT]], %loopexit ] ; define void @foo(i32 %sub.ptr.div.i, i8* %ref.i1174) local_unnamed_addr { Index: test/Transforms/IndVarSimplify/dont-recompute.ll =================================================================== --- test/Transforms/IndVarSimplify/dont-recompute.ll +++ test/Transforms/IndVarSimplify/dont-recompute.ll @@ -123,3 +123,54 @@ tail call void @func(i32 %soft_use) ret void } + +; CHECK-LABEL: @test5( +define void @test5(i32 %m) nounwind uwtable { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %a.05 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %add = add i32 %a.05, %m + %soft_use = add i32 %add, 123 +; CHECK: tail call void @func(i32 %soft_use) + tail call void @func(i32 %soft_use) + %inc = add nsw i32 %i.06, 1 + %exitcond = icmp eq i32 %inc, 186 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body +; CHECK: for.end: +; CHECK-NOT: mul i32 %m, 186 +; CHECK:%add.lcssa = phi i32 [ %add, %for.body ] +; CHECK-NEXT: tail call void @func(i32 %add.lcssa) + tail call void @func(i32 %add) + ret void +} + +; CHECK-LABEL: @test6( +define void @test6(i32 %m, i32* %p) nounwind uwtable { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %a.05 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %add = add i32 %a.05, %m + %soft_use = add i32 %add, 123 +; CHECK: store i32 %soft_use, i32* %pidx + %pidx = getelementptr i32, i32* %p, i32 %add + store i32 %soft_use, i32* %pidx + %inc = add nsw i32 %i.06, 1 + %exitcond = icmp eq i32 %inc, 186 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body +; CHECK: for.end: +; CHECK-NOT: mul i32 %m, 186 +; CHECK:%add.lcssa = phi i32 [ %add, %for.body ] +; CHECK-NEXT: tail call void @func(i32 %add.lcssa) + tail call void @func(i32 %add) + ret void +} Index: test/Transforms/IndVarSimplify/lrev-existing-umin.ll =================================================================== --- test/Transforms/IndVarSimplify/lrev-existing-umin.ll +++ test/Transforms/IndVarSimplify/lrev-existing-umin.ll @@ -1,5 +1,7 @@ ; RUN: opt -S -indvars < %s | FileCheck %s +; Do not rewrite the user outside the loop because we must keep the instruction +; inside the loop due to store. Rewrite doesn't give us any profit. define void @f(i32 %length.i.88, i32 %length.i, i8* %tmp12, i32 %tmp10, i8* %tmp8) { ; CHECK-LABEL: @f( not_zero11.preheader: @@ -24,6 +26,42 @@ main.exit.selector: ; CHECK-LABEL: main.exit.selector: +; CHECK: %tmp22.lcssa = phi i32 [ %tmp22, %not_zero11 ] +; CHECK: %tmp24 = icmp slt i32 %tmp22.lcssa, %length. + %tmp24 = icmp slt i32 %tmp22, %length.i + br i1 %tmp24, label %not_zero11.postloop, label %leave + +leave: + ret void + +not_zero11.postloop: + ret void +} + +; Rewrite the user outside the loop because there is no hard users inside the loop. +define void @f1(i32 %length.i.88, i32 %length.i, i8* %tmp12, i32 %tmp10, i8* %tmp8) { +; CHECK-LABEL: @f1( +not_zero11.preheader: + %tmp13 = icmp ugt i32 %length.i, %length.i.88 + %tmp14 = select i1 %tmp13, i32 %length.i.88, i32 %length.i + %tmp15 = icmp sgt i32 %tmp14, 0 + br i1 %tmp15, label %not_zero11, label %not_zero11.postloop + +not_zero11: + %v_1 = phi i32 [ %tmp22, %not_zero11 ], [ 0, %not_zero11.preheader ] + %tmp16 = zext i32 %v_1 to i64 + %tmp17 = getelementptr inbounds i8, i8* %tmp8, i64 %tmp16 + %tmp18 = load i8, i8* %tmp17, align 1 + %tmp19 = zext i8 %tmp18 to i32 + %tmp20 = or i32 %tmp19, %tmp10 + %tmp21 = trunc i32 %tmp20 to i8 + %addr22 = getelementptr inbounds i8, i8* %tmp12, i64 %tmp16 + %tmp22 = add nuw nsw i32 %v_1, 1 + %tmp23 = icmp slt i32 %tmp22, %tmp14 + br i1 %tmp23, label %not_zero11, label %main.exit.selector + +main.exit.selector: +; CHECK-LABEL: main.exit.selector: ; CHECK: %tmp24 = icmp slt i32 %tmp14, %length.i %tmp24 = icmp slt i32 %tmp22, %length.i br i1 %tmp24, label %not_zero11.postloop, label %leave