Index: lib/Transforms/Scalar/IndVarSimplify.cpp =================================================================== --- lib/Transforms/Scalar/IndVarSimplify.cpp +++ lib/Transforms/Scalar/IndVarSimplify.cpp @@ -41,6 +41,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -135,6 +136,10 @@ PHINode *IndVar, SCEVExpander &Rewriter); void SinkUnusedInvariants(Loop *L); + + Value *ExpandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, Loop *L, + Instruction *InsertPt, Type *Ty, + bool &IsHighCostExpansion); }; } @@ -496,6 +501,52 @@ }; } +Value *IndVarSimplify::ExpandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, + Loop *L, Instruction *InsertPt, + Type *ResultTy, + bool &IsHighCostExpansion) { + using namespace llvm::PatternMatch; + + if (!Rewriter.isHighCostExpansion(S, L)) { + IsHighCostExpansion = false; + return Rewriter.expandCodeFor(S, ResultTy, InsertPt); + } + + // Before expanding S into an expensive LLVM expression, see if we can use an + // already existing value as the expansion for S. There is potential to make + // this significantly smarter, but this simple heuristic already gets some + // interesting cases. + + SmallVector Latches; + L->getLoopLatches(Latches); + + for (BasicBlock *BB : Latches) { + ICmpInst::Predicate Pred; + Instruction *LHS, *RHS; + BasicBlock *TrueBB, *FalseBB; + + if (!match(BB->getTerminator(), + m_Br(m_ICmp(Pred, m_Instruction(LHS), m_Instruction(RHS)), + TrueBB, FalseBB))) + continue; + + if (SE->getSCEV(LHS) == S && DT->dominates(LHS, InsertPt)) { + IsHighCostExpansion = false; + return LHS; + } + + if (SE->getSCEV(RHS) == S && DT->dominates(RHS, InsertPt)) { + IsHighCostExpansion = false; + return RHS; + } + } + + // We didn't find anything, fall back to using SCEVExpander. + assert(Rewriter.isHighCostExpansion(S, L) && "this should not have changed!"); + IsHighCostExpansion = true; + return Rewriter.expandCodeFor(S, ResultTy, InsertPt); +} + //===----------------------------------------------------------------------===// // RewriteLoopExitValues - Optimize IV users outside the loop. // As a side effect, reduces the amount of IV processing within the loop. @@ -628,7 +679,9 @@ continue; } - Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); + bool HighCost = false; + Value *ExitVal = ExpandSCEVIfNeeded(Rewriter, ExitValue, L, Inst, + PN->getType(), HighCost); DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n' << " LoopVal = " << *Inst << "\n"); @@ -637,7 +690,6 @@ DeadInsts.push_back(ExitVal); continue; } - bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L); // Collect all the candidate PHINodes to be rewritten. RewritePhiSet.push_back( Index: test/Transforms/IndVarSimplify/lrev-existing-umin.ll =================================================================== --- /dev/null +++ test/Transforms/IndVarSimplify/lrev-existing-umin.ll @@ -0,0 +1,36 @@ +; RUN: opt -S -indvars < %s | FileCheck %s + +define void @f(i32 %length.i.88, i32 %length.i, i8* %tmp12, i32 %tmp10, i8* %tmp8) { +; CHECK-LABEL: @f( +not_zero11.preheader: + %tmp13 = icmp ugt i32 %length.i, %length.i.88 + %tmp14 = select i1 %tmp13, i32 %length.i.88, i32 %length.i + %tmp15 = icmp sgt i32 %tmp14, 0 + br i1 %tmp15, label %not_zero11, label %not_zero11.postloop + +not_zero11: + %v_1 = phi i32 [ %tmp22, %not_zero11 ], [ 0, %not_zero11.preheader ] + %tmp16 = zext i32 %v_1 to i64 + %tmp17 = getelementptr inbounds i8, i8* %tmp8, i64 %tmp16 + %tmp18 = load i8, i8* %tmp17, align 1 + %tmp19 = zext i8 %tmp18 to i32 + %tmp20 = or i32 %tmp19, %tmp10 + %tmp21 = trunc i32 %tmp20 to i8 + %addr22 = getelementptr inbounds i8, i8* %tmp12, i64 %tmp16 + store i8 %tmp21, i8* %addr22, align 1 + %tmp22 = add nuw nsw i32 %v_1, 1 + %tmp23 = icmp slt i32 %tmp22, %tmp14 + br i1 %tmp23, label %not_zero11, label %main.exit.selector + +main.exit.selector: +; CHECK-LABEL: main.exit.selector: +; CHECK: %tmp24 = icmp slt i32 %tmp14, %length.i + %tmp24 = icmp slt i32 %tmp22, %length.i + br i1 %tmp24, label %not_zero11.postloop, label %leave + +leave: + ret void + +not_zero11.postloop: + ret void +}