Index: lib/Transforms/Scalar/LoopRerollPass.cpp =================================================================== --- lib/Transforms/Scalar/LoopRerollPass.cpp +++ lib/Transforms/Scalar/LoopRerollPass.cpp @@ -163,6 +163,9 @@ // Map between induction variable and its increment DenseMap IVToIncMap; + // For loop with multiple induction variable, remember the one used only to + // control the loop. + Instruction *LoopControlIV; // A chain of isomorphic instructions, identified by a single-use PHI // representing a reduction. Only the last value may be used outside the @@ -350,9 +353,11 @@ ScalarEvolution *SE, AliasAnalysis *AA, TargetLibraryInfo *TLI, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA, - DenseMap &IncrMap) + DenseMap &IncrMap, + Instruction *LoopCtrlIV) : Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), DT(DT), LI(LI), - PreserveLCSSA(PreserveLCSSA), IV(IV), IVToIncMap(IncrMap) {} + PreserveLCSSA(PreserveLCSSA), IV(IV), IVToIncMap(IncrMap), + LoopControlIV(LoopCtrlIV) {} /// Stage 1: Find all the DAG roots for the induction variable. bool findRoots(); @@ -391,6 +396,7 @@ UsesTy::iterator Start, UsesTy::iterator End); void replaceIV(Instruction *Inst, Instruction *IV, const SCEV *IterCount); + void updateNonLoopCtrlIncr(); LoopReroll *Parent; @@ -421,8 +427,18 @@ UsesTy Uses; // Map between induction variable and its increment DenseMap &IVToIncMap; + Instruction *LoopControlIV; }; + // Check if it is a compare-like instruction whose user is a branch + bool isCompareUsedByBranch(Instruction *I) { + auto *TI = I->getParent()->getTerminator(); + if (!isa(TI) || !isa(I)) + return false; + return I->hasOneUse() && TI->getOperand(0) == I; + }; + + bool isLoopControlIV(Loop *L, Instruction *IV); void collectPossibleIVs(Loop *L, SmallInstructionVector &PossibleIVs); void collectPossibleReductions(Loop *L, ReductionTracker &Reductions); @@ -494,6 +510,60 @@ return CIncSCEV; } +// Check if an IV is only used to control the loop. There are two cases: +// 1. It only has one use which is loop increment, and the increment is only +// used by comparison and the PHI, and the comparison is only used by branch. +// 2. It is used by loop increment and the comparison, the loop increment is +// only used by the PHI, and the comparison is used only by the branch. +bool LoopReroll::isLoopControlIV(Loop *L, Instruction *IV) { + + unsigned IVUses = IV->getNumUses(); + if (IVUses != 2 && IVUses != 1) + return false; + + for (auto *User : IV->users()) { + int32_t IncOrCmpUses = User->getNumUses(); + bool IsCompInst = isCompareUsedByBranch(cast(User)); + + // User can only have one or two uses. + if (IncOrCmpUses != 2 && IncOrCmpUses != 1) + return false; + + // Case 1 + if (IVUses == 1) { + // The only user must be the loop increment. + // The loop increment must have two uses. + if (IsCompInst || IncOrCmpUses != 2) + return false; + } + + // Case 2 + if (IVUses == 2 && IncOrCmpUses != 1) + return false; + + // The users of the IV must be a binary operation or a comparison + if (auto *BO = dyn_cast(User)) { + if (BO->getOpcode() == Instruction::Add) { + // Loop Increment + // User of Loop Increment should be either PHI or CMP + for (auto *UU : User->users()) { + if (PHINode *PN = dyn_cast(UU)) { + if (PN != IV) + return false; + } + // Must be a CMP + else if (!isCompareUsedByBranch(dyn_cast(UU))) + return false; + } + } else + return false; + // Compare : can only have one use, and must be branch + } else if (!IsCompInst) + return false; + } + return true; +} + // Collect the list of loop induction variables with respect to which it might // be possible to reroll the loop. void LoopReroll::collectPossibleIVs(Loop *L, @@ -525,7 +595,14 @@ IVToIncMap[&*I] = IncSCEV->getValue()->getSExtValue(); DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " << *PHISCEV << "\n"); - PossibleIVs.push_back(&*I); + + if (isLoopControlIV(L, &*I)) { + assert(!LoopControlIV && "Found two loop control only IV"); + LoopControlIV = &(*I); + DEBUG(dbgs() << "LRR: Possible loop control only IV: " << *I << " = " + << *PHISCEV << "\n"); + } else + PossibleIVs.push_back(&*I); } } } @@ -1072,6 +1149,28 @@ Uses[I].set(IL_All); } + // Make sure we mark loop-control-only PHIs as used in all iterations. See + // comment above LoopReroll::isLoopControlIV for more information. + BasicBlock *Header = L->getHeader(); + if (LoopControlIV && LoopControlIV != IV) { + for (auto *U : LoopControlIV->users()) { + Instruction *IVUser = dyn_cast(U); + // IVUser could be loop increment or compare + Uses[IVUser].set(IL_All); + for (auto *UU : IVUser->users()) { + Instruction *UUser = dyn_cast(UU); + // UUser could be compare, PHI or branch + Uses[UUser].set(IL_All); + // Is UUser a compare instruction? + if (UU->hasOneUse()) { + Instruction *BI = dyn_cast(*UUser->user_begin()); + if (BI == cast(Header->getTerminator())) + Uses[BI].set(IL_All); + } + } + } + } + // Make sure all instructions in the loop are in one and only one // set. for (auto &KV : Uses) { @@ -1314,25 +1413,65 @@ ++J; } - // We need to create a new induction variable for each different BaseInst. - for (auto &DRS : RootSets) - // Insert the new induction variable. - replaceIV(DRS.BaseInst, IV, IterCount); + bool HasTwoIVs = LoopControlIV && LoopControlIV != IV; + + if (HasTwoIVs) { + updateNonLoopCtrlIncr(); + replaceIV(LoopControlIV, LoopControlIV, IterCount); + } else + // We need to create a new induction variable for each different BaseInst. + for (auto &DRS : RootSets) + // Insert the new induction variable. + replaceIV(DRS.BaseInst, IV, IterCount); SimplifyInstructionsInBlock(Header, TLI); DeleteDeadPHIs(Header, TLI); } +// For non-loop-control IVs, we only need to update the last increment +// with right amount, then we are done. +void LoopReroll::DAGRootTracker::updateNonLoopCtrlIncr() { + const SCEV *NewInc = nullptr; + for (auto *LoopInc : LoopIncs) { + GetElementPtrInst *GEP = dyn_cast(LoopInc); + const SCEVConstant *COp = nullptr; + if (GEP && LoopInc->getOperand(0)->getType()->isPointerTy()) { + COp = dyn_cast(SE->getSCEV(LoopInc->getOperand(1))); + } else { + COp = dyn_cast(SE->getSCEV(LoopInc->getOperand(0))); + if (!COp) + COp = dyn_cast(SE->getSCEV(LoopInc->getOperand(1))); + } + + assert(COp && "Didn't find constant operand of LoopInc!\n"); + + const APInt &AInt = COp->getValue()->getValue(); + const SCEV *ScaleSCEV = SE->getConstant(COp->getType(), Scale); + if (AInt.isNegative()) { + NewInc = SE->getNegativeSCEV(COp); + NewInc = SE->getUDivExpr(NewInc, ScaleSCEV); + NewInc = SE->getNegativeSCEV(NewInc); + } else + NewInc = SE->getUDivExpr(COp, ScaleSCEV); + + LoopInc->setOperand(1, dyn_cast(NewInc)->getValue()); + } +} + void LoopReroll::DAGRootTracker::replaceIV(Instruction *Inst, Instruction *InstIV, const SCEV *IterCount) { BasicBlock *Header = L->getHeader(); int64_t Inc = IVToIncMap[InstIV]; - bool Negative = Inc < 0; + bool NeedNewIV = InstIV == LoopControlIV; + bool Negative = !NeedNewIV && Inc < 0; const SCEVAddRecExpr *RealIVSCEV = cast(SE->getSCEV(Inst)); const SCEV *Start = RealIVSCEV->getStart(); + if (NeedNewIV) + Start = SE->getConstant(Start->getType(), 0); + const SCEV *SizeOfExpr = nullptr; const SCEV *IncrExpr = SE->getConstant(RealIVSCEV->getType(), Negative ? -1 : 1); @@ -1360,6 +1499,12 @@ if (Uses[BI].find_first() == IL_All) { const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE); + if (NeedNewIV) + ICSCEV = SE->getMulExpr(IterCount, + SE->getConstant(IterCount->getType(), Scale)); + else + ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE); + // Iteration count SCEV minus or plus 1 const SCEV *MinusPlus1SCEV = SE->getConstant(ICSCEV->getType(), Negative ? -1 : 1); @@ -1514,7 +1659,7 @@ const SCEV *IterCount, ReductionTracker &Reductions) { DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI, DT, LI, PreserveLCSSA, - IVToIncMap); + IVToIncMap, LoopControlIV); if (!DAGRoots.findRoots()) return false; @@ -1566,6 +1711,7 @@ // reroll (there may be several possible options). SmallInstructionVector PossibleIVs; IVToIncMap.clear(); + LoopControlIV = nullptr; collectPossibleIVs(L, PossibleIVs); if (PossibleIVs.empty()) { Index: test/Transforms/LoopReroll/complex_reroll.ll =================================================================== --- /dev/null +++ test/Transforms/LoopReroll/complex_reroll.ll @@ -0,0 +1,134 @@ +; RUN: opt -S -loop-reroll %s | FileCheck %s +declare i32 @goo(i32, i32) + +@buf = external global i8* +@aaa = global [16 x i8] c"\01\02\03\04\05\06\07\08\09\0A\0B\0C\0D\0E\0F\10", align 1 + +define i32 @test1(i32 %len) { +entry: + br label %while.body + +while.body: +;CHECK-LABEL: while.body: +;CHECK-NEXT: %indvar = phi i32 [ %indvar.next, %while.body ], [ 0, %entry ] +;CHECK-NEXT: %buf.021 = phi i8* [ getelementptr inbounds ([16 x i8], [16 x i8]* @aaa, i64 0, i64 0), %entry ], [ %add.ptr, %while.body ] +;CHECK-NEXT: %sum44.020 = phi i64 [ 0, %entry ], [ %add, %while.body ] +;CHECK-NEXT: [[T2:%[0-9]+]] = load i8, i8* %buf.021, align 1 +;CHECK-NEXT: %conv = zext i8 [[T2]] to i64 +;CHECK-NEXT: %add = add i64 %conv, %sum44.020 +;CHECK-NEXT: %add.ptr = getelementptr inbounds i8, i8* %buf.021, i64 1 +;CHECK-NEXT: %indvar.next = add i32 %indvar, 1 +;CHECK-NEXT: %exitcond = icmp eq i32 %indvar, 1 +;CHECK-NEXT: br i1 %exitcond, label %while.end, label %while.body + + %dec22 = phi i32 [ 4, %entry ], [ %dec, %while.body ] + %buf.021 = phi i8* [ getelementptr inbounds ([16 x i8], [16 x i8]* @aaa, i64 0, i64 0), %entry ], [ %add.ptr, %while.body ] + %sum44.020 = phi i64 [ 0, %entry ], [ %add9, %while.body ] + %0 = load i8, i8* %buf.021, align 1 + %conv = zext i8 %0 to i64 + %add = add i64 %conv, %sum44.020 + %arrayidx1 = getelementptr inbounds i8, i8* %buf.021, i64 1 + %1 = load i8, i8* %arrayidx1, align 1 + %conv2 = zext i8 %1 to i64 + %add3 = add i64 %add, %conv2 + %arrayidx4 = getelementptr inbounds i8, i8* %buf.021, i64 2 + %2 = load i8, i8* %arrayidx4, align 1 + %conv5 = zext i8 %2 to i64 + %add6 = add i64 %add3, %conv5 + %arrayidx7 = getelementptr inbounds i8, i8* %buf.021, i64 3 + %3 = load i8, i8* %arrayidx7, align 1 + %conv8 = zext i8 %3 to i64 + %add9 = add i64 %add6, %conv8 + %add.ptr = getelementptr inbounds i8, i8* %buf.021, i64 4 + %dec = add nsw i32 %dec22, -1 + %tobool = icmp eq i32 %dec, 0 + br i1 %tobool, label %while.end, label %while.body + +while.end: ; preds = %while.body + %conv11 = trunc i64 %add9 to i32 + %call = tail call i32 @goo(i32 0, i32 %conv11) + unreachable +} + +define i32 @test2(i32 %N, i32* nocapture readonly %a, i32 %S) { +entry: + %cmp.9 = icmp sgt i32 %N, 0 + br i1 %cmp.9, label %for.body.lr.ph, label %for.cond.cleanup + +for.body.lr.ph: + br label %for.body + +for.cond.for.cond.cleanup_crit_edge: + br label %for.cond.cleanup + +for.cond.cleanup: + %S.addr.0.lcssa = phi i32 [ %add2, %for.cond.for.cond.cleanup_crit_edge ], [ %S, %entry ] + ret i32 %S.addr.0.lcssa + +for.body: +;CHECK-LABEL: for.body: +;CHECK-NEXT: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %for.body.lr.ph ] +;CHECK-NEXT: %S.addr.011 = phi i32 [ %S, %for.body.lr.ph ], [ %add, %for.body ] +;CHECK-NEXT: %a.addr.010 = phi i32* [ %a, %for.body.lr.ph ], [ %incdec.ptr1, %for.body ] +;CHECK-NEXT: %4 = load i32, i32* %a.addr.010, align 4 +;CHECK-NEXT: %add = add nsw i32 %4, %S.addr.011 +;CHECK-NEXT: %incdec.ptr1 = getelementptr inbounds i32, i32* %a.addr.010, i64 1 +;CHECK-NEXT: %indvar.next = add i32 %indvar, 1 +;CHECK-NEXT: %exitcond = icmp eq i32 %indvar, %3 +;CHECK-NEXT: br i1 %exitcond, label %for.cond.for.cond.cleanup_crit_edge, label %for.body + + %i.012 = phi i32 [ 0, %for.body.lr.ph ], [ %add3, %for.body ] + %S.addr.011 = phi i32 [ %S, %for.body.lr.ph ], [ %add2, %for.body ] + %a.addr.010 = phi i32* [ %a, %for.body.lr.ph ], [ %incdec.ptr1, %for.body ] + %incdec.ptr = getelementptr inbounds i32, i32* %a.addr.010, i64 1 + %0 = load i32, i32* %a.addr.010, align 4 + %add = add nsw i32 %0, %S.addr.011 + %incdec.ptr1 = getelementptr inbounds i32, i32* %a.addr.010, i64 2 + %1 = load i32, i32* %incdec.ptr, align 4 + %add2 = add nsw i32 %add, %1 + %add3 = add nsw i32 %i.012, 2 + %cmp = icmp slt i32 %add3, %N + br i1 %cmp, label %for.body, label %for.cond.for.cond.cleanup_crit_edge +} + +define i32 @test3(i32* nocapture readonly %buf, i32 %len) #0 { +entry: + %cmp10 = icmp sgt i32 %len, 1 + br i1 %cmp10, label %while.body.preheader, label %while.end + +while.body.preheader: ; preds = %entry + br label %while.body + +while.body: ; preds = %while.body.preheader, %while.body +;CHECK-LABEL: while.body: +;CHECK-NEXT: %indvar = phi i32 [ %indvar.next, %while.body ], [ 0, %while.body.preheader ] +;CHECK-NEXT: %S.012 = phi i32 [ %add, %while.body ], [ undef, %while.body.preheader ] +;CHECK-NEXT: %buf.addr.011 = phi i32* [ %add.ptr, %while.body ], [ %buf, %while.body.preheader ] +;CHECK-NEXT: %4 = load i32, i32* %buf.addr.011, align 4 +;CHECK-NEXT: %add = add nsw i32 %4, %S.012 +;CHECK-NEXT: %add.ptr = getelementptr inbounds i32, i32* %buf.addr.011, i64 -1 +;CHECK-NEXT: %indvar.next = add i32 %indvar, 1 +;CHECK-NEXT: %exitcond = icmp eq i32 %indvar, %3 +;CHECK-NEXT: br i1 %exitcond, label %while.end.loopexit, label %while.body + + %i.013 = phi i32 [ %sub, %while.body ], [ %len, %while.body.preheader ] + %S.012 = phi i32 [ %add2, %while.body ], [ undef, %while.body.preheader ] + %buf.addr.011 = phi i32* [ %add.ptr, %while.body ], [ %buf, %while.body.preheader ] + %0 = load i32, i32* %buf.addr.011, align 4 + %add = add nsw i32 %0, %S.012 + %arrayidx1 = getelementptr inbounds i32, i32* %buf.addr.011, i64 -1 + %1 = load i32, i32* %arrayidx1, align 4 + %add2 = add nsw i32 %add, %1 + %add.ptr = getelementptr inbounds i32, i32* %buf.addr.011, i64 -2 + %sub = add nsw i32 %i.013, -2 + %cmp = icmp sgt i32 %sub, 1 + br i1 %cmp, label %while.body, label %while.end.loopexit + +while.end.loopexit: ; preds = %while.body + br label %while.end + +while.end: ; preds = %while.end.loopexit, %entry + %S.0.lcssa = phi i32 [ undef, %entry ], [ %add2, %while.end.loopexit ] + ret i32 %S.0.lcssa +} +