Index: llvm/trunk/lib/Transforms/Scalar/LoopRerollPass.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopRerollPass.cpp +++ llvm/trunk/lib/Transforms/Scalar/LoopRerollPass.cpp @@ -70,10 +70,6 @@ STATISTIC(NumRerolledLoops, "Number of rerolled loops"); static cl::opt -MaxInc("max-reroll-increment", cl::init(2048), cl::Hidden, - cl::desc("The maximum increment for loop rerolling")); - -static cl::opt NumToleratedFailedMatches("reroll-num-tolerated-failed-matches", cl::init(400), cl::Hidden, cl::desc("The maximum number of failures to tolerate" @@ -398,8 +394,8 @@ /// Stage 3: Assuming validate() returned true, perform the /// replacement. - /// @param IterCount The maximum iteration count of L. - void replace(const SCEV *IterCount); + /// @param BackedgeTakenCount The backedge-taken count of L. + void replace(const SCEV *BackedgeTakenCount); protected: using UsesTy = MapVector; @@ -429,8 +425,7 @@ bool instrDependsOn(Instruction *I, UsesTy::iterator Start, UsesTy::iterator End); - void replaceIV(Instruction *Inst, Instruction *IV, const SCEV *IterCount); - void updateNonLoopCtrlIncr(); + void replaceIV(DAGRootSet &DRS, const SCEV *Start, const SCEV *IncrExpr); LoopReroll *Parent; @@ -483,8 +478,8 @@ void collectPossibleIVs(Loop *L, SmallInstructionVector &PossibleIVs); void collectPossibleReductions(Loop *L, ReductionTracker &Reductions); - bool reroll(Instruction *IV, Loop *L, BasicBlock *Header, const SCEV *IterCount, - ReductionTracker &Reductions); + bool reroll(Instruction *IV, Loop *L, BasicBlock *Header, + const SCEV *BackedgeTakenCount, ReductionTracker &Reductions); }; } // end anonymous namespace @@ -511,48 +506,6 @@ return false; } -static const SCEVConstant *getIncrmentFactorSCEV(ScalarEvolution *SE, - const SCEV *SCEVExpr, - Instruction &IV) { - const SCEVMulExpr *MulSCEV = dyn_cast(SCEVExpr); - - // If StepRecurrence of a SCEVExpr is a constant (c1 * c2, c2 = sizeof(ptr)), - // Return c1. - if (!MulSCEV && IV.getType()->isPointerTy()) - if (const SCEVConstant *IncSCEV = dyn_cast(SCEVExpr)) { - const PointerType *PTy = cast(IV.getType()); - Type *ElTy = PTy->getElementType(); - const SCEV *SizeOfExpr = - SE->getSizeOfExpr(SE->getEffectiveSCEVType(IV.getType()), ElTy); - if (IncSCEV->getValue()->getValue().isNegative()) { - const SCEV *NewSCEV = - SE->getUDivExpr(SE->getNegativeSCEV(SCEVExpr), SizeOfExpr); - return dyn_cast(SE->getNegativeSCEV(NewSCEV)); - } else { - return dyn_cast(SE->getUDivExpr(SCEVExpr, SizeOfExpr)); - } - } - - if (!MulSCEV) - return nullptr; - - // If StepRecurrence of a SCEVExpr is a c * sizeof(x), where c is constant, - // Return c. - const SCEVConstant *CIncSCEV = nullptr; - for (const SCEV *Operand : MulSCEV->operands()) { - if (const SCEVConstant *Constant = dyn_cast(Operand)) { - CIncSCEV = Constant; - } else if (const SCEVUnknown *Unknown = dyn_cast(Operand)) { - Type *AllocTy; - if (!Unknown->isSizeOf(AllocTy)) - break; - } else { - return nullptr; - } - } - return CIncSCEV; -} - // Check if an IV is only used to control the loop. There are two cases: // 1. It only has one use which is loop increment, and the increment is only // used by comparison and the PHI (could has sext with nsw in between), and the @@ -633,16 +586,8 @@ continue; if (!PHISCEV->isAffine()) continue; - const SCEVConstant *IncSCEV = nullptr; - if (I->getType()->isPointerTy()) - IncSCEV = - getIncrmentFactorSCEV(SE, PHISCEV->getStepRecurrence(*SE), *I); - else - IncSCEV = dyn_cast(PHISCEV->getStepRecurrence(*SE)); + auto IncSCEV = dyn_cast(PHISCEV->getStepRecurrence(*SE)); if (IncSCEV) { - const APInt &AInt = IncSCEV->getValue()->getValue().abs(); - if (IncSCEV->getValue()->isZero() || AInt.uge(MaxInc)) - continue; IVToIncMap[&*I] = IncSCEV->getValue()->getSExtValue(); LLVM_DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " << *PHISCEV << "\n"); @@ -1463,8 +1408,20 @@ return true; } -void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) { +void LoopReroll::DAGRootTracker::replace(const SCEV *BackedgeTakenCount) { BasicBlock *Header = L->getHeader(); + + // Compute the start and increment for each BaseInst before we start erasing + // instructions. + SmallVector StartExprs; + SmallVector IncrExprs; + for (auto &DRS : RootSets) { + const SCEVAddRecExpr *IVSCEV = + cast(SE->getSCEV(DRS.BaseInst)); + StartExprs.push_back(IVSCEV->getStart()); + IncrExprs.push_back(SE->getMinusSCEV(SE->getSCEV(DRS.Roots[0]), IVSCEV)); + } + // Remove instructions associated with non-base iterations. for (BasicBlock::reverse_iterator J = Header->rbegin(), JE = Header->rend(); J != JE;) { @@ -1478,74 +1435,47 @@ ++J; } - bool HasTwoIVs = LoopControlIV && LoopControlIV != IV; + // Rewrite each BaseInst using SCEV. + for (size_t i = 0, e = RootSets.size(); i != e; ++i) + // Insert the new induction variable. + replaceIV(RootSets[i], StartExprs[i], IncrExprs[i]); - if (HasTwoIVs) { - updateNonLoopCtrlIncr(); - replaceIV(LoopControlIV, LoopControlIV, IterCount); - } else - // We need to create a new induction variable for each different BaseInst. - for (auto &DRS : RootSets) - // Insert the new induction variable. - replaceIV(DRS.BaseInst, IV, IterCount); + { // Limit the lifetime of SCEVExpander. + BranchInst *BI = cast(Header->getTerminator()); + const DataLayout &DL = Header->getModule()->getDataLayout(); + SCEVExpander Expander(*SE, DL, "reroll"); + auto Zero = SE->getZero(BackedgeTakenCount->getType()); + auto One = SE->getOne(BackedgeTakenCount->getType()); + auto NewIVSCEV = SE->getAddRecExpr(Zero, One, L, SCEV::FlagAnyWrap); + Value *NewIV = + Expander.expandCodeFor(NewIVSCEV, BackedgeTakenCount->getType(), + Header->getFirstNonPHIOrDbg()); + // FIXME: This arithmetic can overflow. + auto TripCount = SE->getAddExpr(BackedgeTakenCount, One); + auto ScaledTripCount = SE->getMulExpr( + TripCount, SE->getConstant(BackedgeTakenCount->getType(), Scale)); + auto ScaledBECount = SE->getMinusSCEV(ScaledTripCount, One); + Value *TakenCount = + Expander.expandCodeFor(ScaledBECount, BackedgeTakenCount->getType(), + Header->getFirstNonPHIOrDbg()); + Value *Cond = + new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, TakenCount, "exitcond"); + BI->setCondition(Cond); + + if (BI->getSuccessor(1) != Header) + BI->swapSuccessors(); + } SimplifyInstructionsInBlock(Header, TLI); DeleteDeadPHIs(Header, TLI); } -// For non-loop-control IVs, we only need to update the last increment -// with right amount, then we are done. -void LoopReroll::DAGRootTracker::updateNonLoopCtrlIncr() { - const SCEV *NewInc = nullptr; - for (auto *LoopInc : LoopIncs) { - GetElementPtrInst *GEP = dyn_cast(LoopInc); - const SCEVConstant *COp = nullptr; - if (GEP && LoopInc->getOperand(0)->getType()->isPointerTy()) { - COp = dyn_cast(SE->getSCEV(LoopInc->getOperand(1))); - } else { - COp = dyn_cast(SE->getSCEV(LoopInc->getOperand(0))); - if (!COp) - COp = dyn_cast(SE->getSCEV(LoopInc->getOperand(1))); - } - - assert(COp && "Didn't find constant operand of LoopInc!\n"); - - const APInt &AInt = COp->getValue()->getValue(); - const SCEV *ScaleSCEV = SE->getConstant(COp->getType(), Scale); - if (AInt.isNegative()) { - NewInc = SE->getNegativeSCEV(COp); - NewInc = SE->getUDivExpr(NewInc, ScaleSCEV); - NewInc = SE->getNegativeSCEV(NewInc); - } else - NewInc = SE->getUDivExpr(COp, ScaleSCEV); - - LoopInc->setOperand(1, dyn_cast(NewInc)->getValue()); - } -} - -void LoopReroll::DAGRootTracker::replaceIV(Instruction *Inst, - Instruction *InstIV, - const SCEV *IterCount) { +void LoopReroll::DAGRootTracker::replaceIV(DAGRootSet &DRS, + const SCEV *Start, + const SCEV *IncrExpr) { BasicBlock *Header = L->getHeader(); - int64_t Inc = IVToIncMap[InstIV]; - bool NeedNewIV = InstIV == LoopControlIV; - bool Negative = !NeedNewIV && Inc < 0; - - const SCEVAddRecExpr *RealIVSCEV = cast(SE->getSCEV(Inst)); - const SCEV *Start = RealIVSCEV->getStart(); - - if (NeedNewIV) - Start = SE->getConstant(Start->getType(), 0); - - const SCEV *SizeOfExpr = nullptr; - const SCEV *IncrExpr = - SE->getConstant(RealIVSCEV->getType(), Negative ? -1 : 1); - if (auto *PTy = dyn_cast(Inst->getType())) { - Type *ElTy = PTy->getElementType(); - SizeOfExpr = - SE->getSizeOfExpr(SE->getEffectiveSCEVType(Inst->getType()), ElTy); - IncrExpr = SE->getMulExpr(IncrExpr, SizeOfExpr); - } + Instruction *Inst = DRS.BaseInst; + const SCEV *NewIVSCEV = SE->getAddRecExpr(Start, IncrExpr, L, SCEV::FlagAnyWrap); @@ -1558,54 +1488,6 @@ for (auto &KV : Uses) if (KV.second.find_first() == 0) KV.first->replaceUsesOfWith(Inst, NewIV); - - if (BranchInst *BI = dyn_cast(Header->getTerminator())) { - // FIXME: Why do we need this check? - if (Uses[BI].find_first() == IL_All) { - const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE); - - if (NeedNewIV) - ICSCEV = SE->getMulExpr(IterCount, - SE->getConstant(IterCount->getType(), Scale)); - - // Iteration count SCEV minus or plus 1 - const SCEV *MinusPlus1SCEV = - SE->getConstant(ICSCEV->getType(), Negative ? -1 : 1); - if (Inst->getType()->isPointerTy()) { - assert(SizeOfExpr && "SizeOfExpr is not initialized"); - MinusPlus1SCEV = SE->getMulExpr(MinusPlus1SCEV, SizeOfExpr); - } - - const SCEV *ICMinusPlus1SCEV = SE->getMinusSCEV(ICSCEV, MinusPlus1SCEV); - // Iteration count minus 1 - Instruction *InsertPtr = nullptr; - if (isa(ICMinusPlus1SCEV)) { - InsertPtr = BI; - } else { - BasicBlock *Preheader = L->getLoopPreheader(); - if (!Preheader) - Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA); - InsertPtr = Preheader->getTerminator(); - } - - if (!isa(NewIV->getType()) && NeedNewIV && - (SE->getTypeSizeInBits(NewIV->getType()) < - SE->getTypeSizeInBits(ICMinusPlus1SCEV->getType()))) { - IRBuilder<> Builder(BI); - Builder.SetCurrentDebugLocation(BI->getDebugLoc()); - NewIV = Builder.CreateSExt(NewIV, ICMinusPlus1SCEV->getType()); - } - Value *ICMinusPlus1 = Expander.expandCodeFor( - ICMinusPlus1SCEV, NewIV->getType(), InsertPtr); - - Value *Cond = - new ICmpInst(BI, CmpInst::ICMP_EQ, NewIV, ICMinusPlus1, "exitcond"); - BI->setCondition(Cond); - - if (BI->getSuccessor(1) != Header) - BI->swapSuccessors(); - } - } } } @@ -1722,7 +1604,7 @@ // f(%iv) or part of some f(%iv.i). If all of that is true (and all reductions // have been validated), then we reroll the loop. bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header, - const SCEV *IterCount, + const SCEV *BackedgeTakenCount, ReductionTracker &Reductions) { DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI, DT, LI, PreserveLCSSA, IVToIncMap, LoopControlIV); @@ -1740,7 +1622,7 @@ // making changes! Reductions.replaceSelected(); - DAGRoots.replace(IterCount); + DAGRoots.replace(BackedgeTakenCount); ++NumRerolledLoops; return true; @@ -1769,10 +1651,10 @@ if (!SE->hasLoopInvariantBackedgeTakenCount(L)) return false; - const SCEV *LIBETC = SE->getBackedgeTakenCount(L); - const SCEV *IterCount = SE->getAddExpr(LIBETC, SE->getOne(LIBETC->getType())); + const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); LLVM_DEBUG(dbgs() << "\n Before Reroll:\n" << *(L->getHeader()) << "\n"); - LLVM_DEBUG(dbgs() << "LRR: iteration count = " << *IterCount << "\n"); + LLVM_DEBUG(dbgs() << "LRR: backedge-taken count = " << *BackedgeTakenCount + << "\n"); // First, we need to find the induction variable with respect to which we can // reroll (there may be several possible options). @@ -1793,7 +1675,7 @@ // For each possible IV, collect the associated possible set of 'root' nodes // (i+1, i+2, etc.). for (Instruction *PossibleIV : PossibleIVs) - if (reroll(PossibleIV, L, Header, IterCount, Reductions)) { + if (reroll(PossibleIV, L, Header, BackedgeTakenCount, Reductions)) { Changed = true; break; } Index: llvm/trunk/test/Transforms/LoopReroll/basic.ll =================================================================== --- llvm/trunk/test/Transforms/LoopReroll/basic.ll +++ llvm/trunk/test/Transforms/LoopReroll/basic.ll @@ -79,11 +79,12 @@ ; CHECK: for.body: ; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ] +; CHECK: %0 = trunc i64 %indvar to i32 ; CHECK: %call = tail call i32 @foo(i32 0) #1 ; CHECK: %arrayidx = getelementptr inbounds i32, i32* %x, i64 %indvar ; CHECK: store i32 %call, i32* %arrayidx, align 4 ; CHECK: %indvar.next = add i64 %indvar, 1 -; CHECK: %exitcond = icmp eq i64 %indvar, 1499 +; CHECK: %exitcond = icmp eq i32 %0, 1499 ; CHECK: br i1 %exitcond, label %for.end, label %for.body ; CHECK: ret @@ -205,15 +206,16 @@ ; CHECK: for.body: ; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ] +; CHECK: %0 = trunc i64 %indvar to i32 ; CHECK: %arrayidx = getelementptr inbounds float, float* %b, i64 %indvar -; CHECK: %0 = load float, float* %arrayidx, align 4 -; CHECK: %mul = fmul float %0, %alpha +; CHECK: %1 = load float, float* %arrayidx, align 4 +; CHECK: %mul = fmul float %1, %alpha ; CHECK: %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvar -; CHECK: %1 = load float, float* %arrayidx2, align 4 -; CHECK: %add = fadd float %1, %mul +; CHECK: %2 = load float, float* %arrayidx2, align 4 +; CHECK: %add = fadd float %2, %mul ; CHECK: store float %add, float* %arrayidx2, align 4 ; CHECK: %indvar.next = add i64 %indvar, 1 -; CHECK: %exitcond = icmp eq i64 %indvar, 3199 +; CHECK: %exitcond = icmp eq i32 %0, 3199 ; CHECK: br i1 %exitcond, label %for.end, label %for.body ; CHECK: ret @@ -302,18 +304,19 @@ ; CHECK: for.body: ; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ] +; CHECK: %0 = trunc i64 %indvar to i32 ; CHECK: %arrayidx = getelementptr inbounds i32, i32* %ip, i64 %indvar -; CHECK: %0 = load i32, i32* %arrayidx, align 4 -; CHECK: %idxprom1 = sext i32 %0 to i64 +; CHECK: %1 = load i32, i32* %arrayidx, align 4 +; CHECK: %idxprom1 = sext i32 %1 to i64 ; CHECK: %arrayidx2 = getelementptr inbounds float, float* %b, i64 %idxprom1 -; CHECK: %1 = load float, float* %arrayidx2, align 4 -; CHECK: %mul = fmul float %1, %alpha +; CHECK: %2 = load float, float* %arrayidx2, align 4 +; CHECK: %mul = fmul float %2, %alpha ; CHECK: %arrayidx4 = getelementptr inbounds float, float* %a, i64 %indvar -; CHECK: %2 = load float, float* %arrayidx4, align 4 -; CHECK: %add = fadd float %2, %mul +; CHECK: %3 = load float, float* %arrayidx4, align 4 +; CHECK: %add = fadd float %3, %mul ; CHECK: store float %add, float* %arrayidx4, align 4 ; CHECK: %indvar.next = add i64 %indvar, 1 -; CHECK: %exitcond = icmp eq i64 %indvar, 3199 +; CHECK: %exitcond = icmp eq i32 %0, 3199 ; CHECK: br i1 %exitcond, label %for.end, label %for.body ; CHECK: ret @@ -374,8 +377,8 @@ ; CHECK: %arrayidx6 = getelementptr inbounds i32, i32* %x, i64 %0 ; CHECK: store i32 %call, i32* %arrayidx6, align 4 ; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 -; CHECK: %exitcond2 = icmp eq i64 %0, 1505 -; CHECK: br i1 %exitcond2, label %for.end, label %for.body +; CHECK: %exitcond1 = icmp eq i64 %indvars.iv, 1499 +; CHECK: br i1 %exitcond1, label %for.end, label %for.body for.end: ; preds = %for.body ret void @@ -434,8 +437,8 @@ ; CHECK: %arrayidx6 = getelementptr inbounds i32, i32* %x, i64 %0 ; CHECK: store i32 %call, i32* %arrayidx6, align 4 ; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 -; CHECK: %exitcond2 = icmp eq i64 %indvars.iv, 1499 -; CHECK: br i1 %exitcond2, label %for.end, label %for.body +; CHECK: %exitcond1 = icmp eq i64 %indvars.iv, 1499 +; CHECK: br i1 %exitcond1, label %for.end, label %for.body for.end: ; preds = %for.body ret void @@ -481,7 +484,7 @@ ; CHECK: %arrayidx = getelementptr inbounds i32, i32* %x, i64 %0 ; CHECK: store i32 %call, i32* %arrayidx, align 4 ; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 -; CHECK: %exitcond1 = icmp eq i64 %0, 1502 +; CHECK: %exitcond1 = icmp eq i64 %indvars.iv, 1499 ; CHECK: br i1 %exitcond1, label %for.end, label %for.body for.end: ; preds = %for.body @@ -599,8 +602,8 @@ ; CHECK-NEXT: %scevgep = getelementptr i32, i32* %x, i64 %indvars.iv ; CHECK-NEXT: store i32 %call, i32* %scevgep, align 4 ; CHECK-NEXT: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 -; CHECK-NEXT: %exitcond2 = icmp eq i32* %scevgep, %scevgep1 -; CHECK-NEXT: br i1 %exitcond2, label %for.end, label %for.body +; CHECK-NEXT: %exitcond1 = icmp eq i64 %indvars.iv, 1499 +; CHECK-NEXT: br i1 %exitcond1, label %for.end, label %for.body for.end: ; preds = %for.body ret void @@ -738,6 +741,50 @@ ret void } +define void @pointer_bitcast_baseinst(i16* %arg, i8* %arg1, i64 %arg2) { +; CHECK-LABEL: @pointer_bitcast_baseinst( +; CHECK: bb3: +; CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %bb3 ], [ 0, %bb ] +; CHECK-NEXT: %4 = shl i64 %indvar, 3 +; CHECK-NEXT: %5 = add i64 %4, 1 +; CHECK-NEXT: %tmp5 = shl nuw i64 %5, 1 +; CHECK-NEXT: %tmp6 = getelementptr i8, i8* %arg1, i64 %tmp5 +; CHECK-NEXT: %tmp7 = bitcast i8* %tmp6 to <8 x i16>* +; CHECK-NEXT: %tmp8 = load <8 x i16>, <8 x i16>* %tmp7, align 2 +; CHECK-NEXT: %tmp13 = getelementptr i16, i16* %arg, i64 %5 +; CHECK-NEXT: %tmp14 = bitcast i16* %tmp13 to <8 x i16>* +; CHECK-NEXT: store <8 x i16> %tmp8, <8 x i16>* %tmp14, align 2 +; CHECK-NEXT: %indvar.next = add i64 %indvar, 1 +; CHECK-NEXT: %exitcond = icmp eq i64 %indvar, %3 +; CHECK-NEXT: br i1 %exitcond, label %bb19, label %bb3 +bb: + br label %bb3 + +bb3: ; preds = %bb3, %bb + %tmp = phi i64 [ 1, %bb ], [ %tmp17, %bb3 ] + %tmp4 = add nuw i64 %tmp, 8 + %tmp5 = shl nuw i64 %tmp, 1 + %tmp6 = getelementptr i8, i8* %arg1, i64 %tmp5 + %tmp7 = bitcast i8* %tmp6 to <8 x i16>* + %tmp8 = load <8 x i16>, <8 x i16>* %tmp7, align 2 + %tmp9 = shl i64 %tmp4, 1 + %tmp10 = getelementptr i8, i8* %arg1, i64 %tmp9 + %tmp11 = bitcast i8* %tmp10 to <8 x i16>* + %tmp12 = load <8 x i16>, <8 x i16>* %tmp11, align 2 + %tmp13 = getelementptr i16, i16* %arg, i64 %tmp + %tmp14 = bitcast i16* %tmp13 to <8 x i16>* + store <8 x i16> %tmp8, <8 x i16>* %tmp14, align 2 + %tmp15 = getelementptr i16, i16* %arg, i64 %tmp4 + %tmp16 = bitcast i16* %tmp15 to <8 x i16>* + store <8 x i16> %tmp12, <8 x i16>* %tmp16, align 2 + %tmp17 = add nuw nsw i64 %tmp, 16 + %tmp18 = icmp eq i64 %tmp17, %arg2 + br i1 %tmp18, label %bb19, label %bb3 + +bb19: ; preds = %bb3 + ret void +} + attributes #0 = { nounwind uwtable } attributes #1 = { nounwind } Index: llvm/trunk/test/Transforms/LoopReroll/complex_reroll.ll =================================================================== --- llvm/trunk/test/Transforms/LoopReroll/complex_reroll.ll +++ llvm/trunk/test/Transforms/LoopReroll/complex_reroll.ll @@ -10,15 +10,15 @@ while.body: ;CHECK-LABEL: while.body: -;CHECK-NEXT: %indvar = phi i32 [ %indvar.next, %while.body ], [ 0, %entry ] -;CHECK-NEXT: %buf.021 = phi i8* [ getelementptr inbounds ([16 x i8], [16 x i8]* @aaa, i64 0, i64 0), %entry ], [ %add.ptr, %while.body ] +;CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %while.body ], [ 0, %entry ] ;CHECK-NEXT: %sum44.020 = phi i64 [ 0, %entry ], [ %add, %while.body ] -;CHECK-NEXT: [[T2:%[0-9]+]] = load i8, i8* %buf.021, align 1 +;CHECK-NEXT: %0 = trunc i64 %indvar to i32 +;CHECK-NEXT: %scevgep = getelementptr [16 x i8], [16 x i8]* @aaa, i64 0, i64 %indvar +;CHECK-NEXT: [[T2:%[0-9]+]] = load i8, i8* %scevgep, align 1 ;CHECK-NEXT: %conv = zext i8 [[T2]] to i64 ;CHECK-NEXT: %add = add i64 %conv, %sum44.020 -;CHECK-NEXT: %add.ptr = getelementptr inbounds i8, i8* %buf.021, i64 1 -;CHECK-NEXT: %indvar.next = add i32 %indvar, 1 -;CHECK-NEXT: %exitcond = icmp eq i32 %indvar, 1 +;CHECK-NEXT: %indvar.next = add i64 %indvar, 1 +;CHECK-NEXT: %exitcond = icmp eq i32 %0, 15 ;CHECK-NEXT: br i1 %exitcond, label %while.end, label %while.body %dec22 = phi i32 [ 4, %entry ], [ %dec, %while.body ] @@ -67,14 +67,14 @@ for.body: ;CHECK-LABEL: for.body: -;CHECK-NEXT: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %for.body.lr.ph ] +;CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %for.body.lr.ph ] ;CHECK-NEXT: %S.addr.011 = phi i32 [ %S, %for.body.lr.ph ], [ %add, %for.body ] -;CHECK-NEXT: %a.addr.010 = phi i32* [ %a, %for.body.lr.ph ], [ %incdec.ptr1, %for.body ] -;CHECK-NEXT: %4 = load i32, i32* %a.addr.010, align 4 -;CHECK-NEXT: %add = add nsw i32 %4, %S.addr.011 -;CHECK-NEXT: %incdec.ptr1 = getelementptr inbounds i32, i32* %a.addr.010, i64 1 -;CHECK-NEXT: %indvar.next = add i32 %indvar, 1 -;CHECK-NEXT: %exitcond = icmp eq i32 %indvar, %3 +;CHECK-NEXT: %4 = trunc i64 %indvar to i32 +;CHECK-NEXT: %scevgep = getelementptr i32, i32* %a, i64 %indvar +;CHECK-NEXT: %5 = load i32, i32* %scevgep, align 4 +;CHECK-NEXT: %add = add nsw i32 %5, %S.addr.011 +;CHECK-NEXT: %indvar.next = add i64 %indvar, 1 +;CHECK-NEXT: %exitcond = icmp eq i32 %4, %3 ;CHECK-NEXT: br i1 %exitcond, label %for.cond.for.cond.cleanup_crit_edge, label %for.body %i.012 = phi i32 [ 0, %for.body.lr.ph ], [ %add3, %for.body ] @@ -101,14 +101,15 @@ while.body: ; preds = %while.body.preheader, %while.body ;CHECK-LABEL: while.body: -;CHECK-NEXT: %indvar = phi i32 [ %indvar.next, %while.body ], [ 0, %while.body.preheader ] +;CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %while.body ], [ 0, %while.body.preheader ] ;CHECK-NEXT: %S.012 = phi i32 [ %add, %while.body ], [ undef, %while.body.preheader ] -;CHECK-NEXT: %buf.addr.011 = phi i32* [ %add.ptr, %while.body ], [ %buf, %while.body.preheader ] -;CHECK-NEXT: %4 = load i32, i32* %buf.addr.011, align 4 -;CHECK-NEXT: %add = add nsw i32 %4, %S.012 -;CHECK-NEXT: %add.ptr = getelementptr inbounds i32, i32* %buf.addr.011, i64 -1 -;CHECK-NEXT: %indvar.next = add i32 %indvar, 1 -;CHECK-NEXT: %exitcond = icmp eq i32 %indvar, %3 +;CHECK-NEXT: %4 = trunc i64 %indvar to i32 +;CHECK-NEXT: %5 = mul i64 %indvar, -1 +;CHECK-NEXT: %scevgep = getelementptr i32, i32* %buf, i64 %5 +;CHECK-NEXT: %6 = load i32, i32* %scevgep, align 4 +;CHECK-NEXT: %add = add nsw i32 %6, %S.012 +;CHECK-NEXT: %indvar.next = add i64 %indvar, 1 +;CHECK-NEXT: %exitcond = icmp eq i32 %4, %3 ;CHECK-NEXT: br i1 %exitcond, label %while.end.loopexit, label %while.body %i.013 = phi i32 [ %sub, %while.body ], [ %len, %while.body.preheader ] Index: llvm/trunk/test/Transforms/LoopReroll/indvar_with_ext.ll =================================================================== --- llvm/trunk/test/Transforms/LoopReroll/indvar_with_ext.ll +++ llvm/trunk/test/Transforms/LoopReroll/indvar_with_ext.ll @@ -14,18 +14,16 @@ while.body: ;CHECK-LABEL: while.body: -;CHECK-NEXT: %indvars.iv.i423 = phi i64 [ %indvars.iv.next.i424, %while.body ], [ 0, %while.body.preheader ] -;CHECK-NEXT: [[T1:%[0-9]+]] = trunc i64 %indvars.iv.i423 to i32 -;CHECK-NEXT: %arrayidx62.i = getelementptr inbounds float, float* %arrayidx200, i64 %indvars.iv.i423 +;CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %while.body ], [ 0, %while.body.preheader ] +;CHECK-NEXT: %arrayidx62.i = getelementptr inbounds float, float* %arrayidx200, i64 %indvar ;CHECK-NEXT: %t1 = load float, float* %arrayidx62.i, align 4 -;CHECK-NEXT: %arrayidx64.i = getelementptr inbounds float, float* %arrayidx164, i64 %indvars.iv.i423 +;CHECK-NEXT: %arrayidx64.i = getelementptr inbounds float, float* %arrayidx164, i64 %indvar ;CHECK-NEXT: %t2 = load float, float* %arrayidx64.i, align 4 ;CHECK-NEXT: %mul65.i = fmul fast float %t2, %t22 ;CHECK-NEXT: %add66.i = fadd fast float %mul65.i, %t1 ;CHECK-NEXT: store float %add66.i, float* %arrayidx62.i, align 4 -;CHECK-NEXT: %indvars.iv.next.i424 = add i64 %indvars.iv.i423, 1 -;CHECK-NEXT: [[T2:%[0-9]+]] = sext i32 [[T1]] to i64 -;CHECK-NEXT: %exitcond = icmp eq i64 [[T2]], %{{[0-9]+}} +;CHECK-NEXT: %indvar.next = add i64 %indvar, 1 +;CHECK-NEXT: %exitcond = icmp eq i64 %indvar, %{{[0-9]+}} ;CHECK-NEXT: br i1 %exitcond, label %while.end.loopexit, label %while.body %indvars.iv.i423 = phi i64 [ %indvars.iv.next.i424, %while.body ], [ 0, %while.body.preheader ] @@ -69,7 +67,7 @@ for.body: ; preds = %for.body.preheader, %for.body -;CHECK: for.body: +;CHECK-LABEL: for.body: ;CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %for.body.preheader ] ;CHECK-NEXT: %arrayidx = getelementptr inbounds i32, i32* %y, i64 %indvar ;CHECK-NEXT: [[T1:%[0-9]+]] = load i32, i32* %arrayidx, align 4 @@ -111,7 +109,7 @@ for.body: ; preds = %for.body.preheader, %for.body -;CHECK: for.body: +;CHECK-LABEL: for.body: ;CHECK: %add12 = add i8 %i.022, 2 ;CHECK-NEXT: %conv = sext i8 %add12 to i32 ;CHECK-NEXT: %cmp = icmp slt i32 %conv, %n @@ -153,7 +151,7 @@ for.body: ; preds = %for.body.preheader, %for.body -;CHECK: for.body: +;CHECK-LABEL: for.body: ;CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %for.body.preheader ] ;CHECK-NEXT: %arrayidx = getelementptr inbounds i32, i32* %y, i64 %indvar ;CHECK-NEXT: [[T1:%[0-9]+]] = load i32, i32* %arrayidx, align 4 Index: llvm/trunk/test/Transforms/LoopReroll/nonconst_lb.ll =================================================================== --- llvm/trunk/test/Transforms/LoopReroll/nonconst_lb.ll +++ llvm/trunk/test/Transforms/LoopReroll/nonconst_lb.ll @@ -53,20 +53,19 @@ ; CHECK: %1 = sub i32 %0, %m ; CHECK: %2 = lshr i32 %1, 2 ; CHECK: %3 = shl i32 %2, 2 -; CHECK: %4 = add i32 %m, %3 -; CHECK: %5 = add i32 %4, 3 +; CHECK: %4 = add i32 %3, 3 ; CHECK: br label %for.body ; CHECK: for.body: ; preds = %for.body, %for.body.preheader ; CHECK: %indvar = phi i32 [ 0, %for.body.preheader ], [ %indvar.next, %for.body ] -; CHECK: %6 = add i32 %m, %indvar -; CHECK: %arrayidx = getelementptr inbounds i32, i32* %B, i32 %6 -; CHECK: %7 = load i32, i32* %arrayidx, align 4 -; CHECK: %mul = shl nsw i32 %7, 2 -; CHECK: %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %6 +; CHECK: %5 = add i32 %m, %indvar +; CHECK: %arrayidx = getelementptr inbounds i32, i32* %B, i32 %5 +; CHECK: %6 = load i32, i32* %arrayidx, align 4 +; CHECK: %mul = shl nsw i32 %6, 2 +; CHECK: %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %5 ; CHECK: store i32 %mul, i32* %arrayidx2, align 4 ; CHECK: %indvar.next = add i32 %indvar, 1 -; CHECK: %exitcond = icmp eq i32 %6, %5 +; CHECK: %exitcond = icmp eq i32 %indvar, %4 ; CHECK: br i1 %exitcond, label %for.end.loopexit, label %for.body ;void daxpy_ur(int n,float da,float *dx,float *dy) @@ -133,20 +132,19 @@ ; CHECK: %1 = sub i32 %0, %rem ; CHECK: %2 = lshr i32 %1, 2 ; CHECK: %3 = shl i32 %2, 2 -; CHECK: %4 = add i32 %rem, %3 -; CHECK: %5 = add i32 %4, 3 +; CHECK: %4 = add i32 %3, 3 ; CHECK: br label %for.body ; CHECK: for.body: ; CHECK: %indvar = phi i32 [ 0, %for.body.preheader ], [ %indvar.next, %for.body ] -; CHECK: %6 = add i32 %rem, %indvar -; CHECK: %arrayidx = getelementptr inbounds float, float* %dy, i32 %6 -; CHECK: %7 = load float, float* %arrayidx, align 4 -; CHECK: %arrayidx1 = getelementptr inbounds float, float* %dx, i32 %6 -; CHECK: %8 = load float, float* %arrayidx1, align 4 -; CHECK: %mul = fmul float %8, %da -; CHECK: %add = fadd float %7, %mul +; CHECK: %5 = add i32 %rem, %indvar +; CHECK: %arrayidx = getelementptr inbounds float, float* %dy, i32 %5 +; CHECK: %6 = load float, float* %arrayidx, align 4 +; CHECK: %arrayidx1 = getelementptr inbounds float, float* %dx, i32 %5 +; CHECK: %7 = load float, float* %arrayidx1, align 4 +; CHECK: %mul = fmul float %7, %da +; CHECK: %add = fadd float %6, %mul ; CHECK: store float %add, float* %arrayidx, align 4 ; CHECK: %indvar.next = add i32 %indvar, 1 -; CHECK: %exitcond = icmp eq i32 %6, %5 +; CHECK: %exitcond = icmp eq i32 %indvar, %4 ; CHECK: br i1 %exitcond, label %for.end.loopexit, label %for.body Index: llvm/trunk/test/Transforms/LoopReroll/ptrindvar.ll =================================================================== --- llvm/trunk/test/Transforms/LoopReroll/ptrindvar.ll +++ llvm/trunk/test/Transforms/LoopReroll/ptrindvar.ll @@ -17,7 +17,7 @@ ;CHECK-NEXT: %4 = load i32, i32* %scevgep, align 4 ;CHECK-NEXT: %add = add nsw i32 %4, %S.011 ;CHECK-NEXT: %indvar.next = add i64 %indvar, 1 -;CHECK-NEXT: %exitcond = icmp eq i32* %scevgep, %scevgep5 +;CHECK-NEXT: %exitcond = icmp eq i64 %indvar, %3 ;CHECK-NEXT: br i1 %exitcond, label %while.end.loopexit, label %while.body %S.011 = phi i32 [ %add2, %while.body ], [ undef, %while.body.preheader ] @@ -57,7 +57,7 @@ ;CHECK-NEXT: %5 = load i32, i32* %scevgep, align 4 ;CHECK-NEXT: %add = add nsw i32 %5, %S.011 ;CHECK-NEXT: %indvar.next = add i64 %indvar, 1 -;CHECK-NEXT: %exitcond = icmp eq i32* %scevgep, %scevgep5 +;CHECK-NEXT: %exitcond = icmp eq i64 %indvar, %3 ;CHECK-NEXT: br i1 %exitcond, label %while.end.loopexit, label %while.body %S.011 = phi i32 [ %add2, %while.body ], [ undef, %while.body.preheader ] Index: llvm/trunk/test/Transforms/LoopReroll/reduction.ll =================================================================== --- llvm/trunk/test/Transforms/LoopReroll/reduction.ll +++ llvm/trunk/test/Transforms/LoopReroll/reduction.ll @@ -35,10 +35,10 @@ ; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ] ; CHECK: %r.029 = phi i32 [ 0, %entry ], [ %add, %for.body ] ; CHECK: %arrayidx = getelementptr inbounds i32, i32* %x, i64 %indvar -; CHECK: %0 = load i32, i32* %arrayidx, align 4 -; CHECK: %add = add nsw i32 %0, %r.029 +; CHECK: %1 = load i32, i32* %arrayidx, align 4 +; CHECK: %add = add nsw i32 %1, %r.029 ; CHECK: %indvar.next = add i64 %indvar, 1 -; CHECK: %exitcond = icmp eq i64 %indvar, 399 +; CHECK: %exitcond = icmp eq i32 %0, 399 ; CHECK: br i1 %exitcond, label %for.end, label %for.body ; CHECK: ret @@ -80,10 +80,10 @@ ; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ] ; CHECK: %r.029 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ] ; CHECK: %arrayidx = getelementptr inbounds float, float* %x, i64 %indvar -; CHECK: %0 = load float, float* %arrayidx, align 4 -; CHECK: %add = fadd float %0, %r.029 +; CHECK: %1 = load float, float* %arrayidx, align 4 +; CHECK: %add = fadd float %1, %r.029 ; CHECK: %indvar.next = add i64 %indvar, 1 -; CHECK: %exitcond = icmp eq i64 %indvar, 399 +; CHECK: %exitcond = icmp eq i32 %0, 399 ; CHECK: br i1 %exitcond, label %for.end, label %for.body ; CHECK: ret