Index: lib/Transforms/Scalar/LoopRerollPass.cpp =================================================================== --- lib/Transforms/Scalar/LoopRerollPass.cpp +++ lib/Transforms/Scalar/LoopRerollPass.cpp @@ -371,11 +371,12 @@ protected: typedef MapVector UsesTy; - bool findRootsRecursive(Instruction *IVU, + void findRootsRecursive(Instruction *IVU, SmallInstructionSet SubsumedInsts); bool findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts); bool collectPossibleRoots(Instruction *Base, std::map &Roots); + bool validateRootSet(DAGRootSet &DRS); bool collectUsedInstructions(SmallInstructionSet &PossibleRedSet); void collectInLoopUserSet(const SmallInstructionVector &Roots, @@ -827,7 +828,8 @@ Roots[V] = cast(I); } - if (Roots.empty()) + // Make sure we have at least two roots. + if (Roots.empty() || (Roots.size() == 1 && BaseUsers.empty())) return false; // If we found non-loop-inc, non-root users of Base, assume they are @@ -861,40 +863,61 @@ return true; } -bool LoopReroll::DAGRootTracker:: +void LoopReroll::DAGRootTracker:: findRootsRecursive(Instruction *I, SmallInstructionSet SubsumedInsts) { // Does the user look like it could be part of a root set? // All its users must be simple arithmetic ops. if (I->getNumUses() > IL_MaxRerollIterations) - return false; + return; - if ((I->getOpcode() == Instruction::Mul || - I->getOpcode() == Instruction::PHI) && - I != IV && - findRootsBase(I, SubsumedInsts)) - return true; + if (I != IV && findRootsBase(I, SubsumedInsts)) + return; SubsumedInsts.insert(I); for (User *V : I->users()) { - Instruction *I = dyn_cast(V); + Instruction *I = cast(V); if (is_contained(LoopIncs, I)) continue; - if (!I || !isSimpleArithmeticOp(I) || - !findRootsRecursive(I, SubsumedInsts)) - return false; + if (!isSimpleArithmeticOp(I)) + continue; + + // The recursive call makes a copy of SubsumedInsts. + findRootsRecursive(I, SubsumedInsts); } +} + +bool LoopReroll::DAGRootTracker::validateRootSet(DAGRootSet &DRS) { + if (DRS.Roots.empty()) + return false; + + // Consider a DAGRootSet with N-1 roots (so N different values including + // BaseInst). + // Define d = Roots[0] - BaseInst, which should be the same as + // Roots[I] - Roots[I-1] for all I in [1..N). + // Define D = BaseInst@J - BaseInst@J-1, where "@J" means the value at the + // loop iteration J. + // + // Now, For the loop iterations to be consecutive: + // D = d * N + const auto *ADR = dyn_cast(SE->getSCEV(DRS.BaseInst)); + if (!ADR) + return false; + unsigned N = DRS.Roots.size() + 1; + const SCEV *StepSCEV = SE->getMinusSCEV(SE->getSCEV(DRS.Roots[0]), ADR); + const SCEV *ScaleSCEV = SE->getConstant(StepSCEV->getType(), N); + if (ADR->getStepRecurrence(*SE) != SE->getMulExpr(StepSCEV, ScaleSCEV)) + return false; + return true; } bool LoopReroll::DAGRootTracker:: findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts) { - - // The base instruction needs to be a multiply so - // that we can erase it. - if (IVU->getOpcode() != Instruction::Mul && - IVU->getOpcode() != Instruction::PHI) + // The base of a RootSet must be an AddRec, so it can be erased. + const auto *IVU_ADR = dyn_cast(SE->getSCEV(IVU)); + if (!IVU_ADR || IVU_ADR->getLoop() != L) return false; std::map V; @@ -910,6 +933,8 @@ DAGRootSet DRS; DRS.BaseInst = nullptr; + SmallVector PotentialRootSets; + for (auto &KV : V) { if (!DRS.BaseInst) { DRS.BaseInst = KV.second; @@ -920,13 +945,22 @@ DRS.Roots.push_back(KV.second); } else { // Linear sequence terminated. - RootSets.push_back(DRS); + if (!validateRootSet(DRS)) + return false; + + // Construct a new DAGRootSet with the next sequence. + PotentialRootSets.push_back(DRS); DRS.BaseInst = KV.second; - DRS.SubsumedInsts = SubsumedInsts; DRS.Roots.clear(); } } - RootSets.push_back(DRS); + + if (!validateRootSet(DRS)) + return false; + + PotentialRootSets.push_back(DRS); + + RootSets.append(PotentialRootSets.begin(), PotentialRootSets.end()); return true; } @@ -940,8 +974,7 @@ if (isLoopIncrement(IVU, IV)) LoopIncs.push_back(cast(IVU)); } - if (!findRootsRecursive(IV, SmallInstructionSet())) - return false; + findRootsRecursive(IV, SmallInstructionSet()); LoopIncs.push_back(IV); } else { if (!findRootsBase(IV, SmallInstructionSet())) @@ -961,31 +994,6 @@ } } - // And ensure all loop iterations are consecutive. We rely on std::map - // providing ordered traversal. - for (auto &V : RootSets) { - const auto *ADR = dyn_cast(SE->getSCEV(V.BaseInst)); - if (!ADR) - return false; - - // Consider a DAGRootSet with N-1 roots (so N different values including - // BaseInst). - // Define d = Roots[0] - BaseInst, which should be the same as - // Roots[I] - Roots[I-1] for all I in [1..N). - // Define D = BaseInst@J - BaseInst@J-1, where "@J" means the value at the - // loop iteration J. - // - // Now, For the loop iterations to be consecutive: - // D = d * N - - unsigned N = V.Roots.size() + 1; - const SCEV *StepSCEV = SE->getMinusSCEV(SE->getSCEV(V.Roots[0]), ADR); - const SCEV *ScaleSCEV = SE->getConstant(StepSCEV->getType(), N); - if (ADR->getStepRecurrence(*SE) != SE->getMulExpr(StepSCEV, ScaleSCEV)) { - DEBUG(dbgs() << "LRR: Aborting because iterations are not consecutive\n"); - return false; - } - } Scale = RootSets[0].Roots.size() + 1; if (Scale > IL_MaxRerollIterations) { @@ -1498,8 +1506,8 @@ { // Limit the lifetime of SCEVExpander. const DataLayout &DL = Header->getModule()->getDataLayout(); SCEVExpander Expander(*SE, DL, "reroll"); - Value *NewIV = - Expander.expandCodeFor(NewIVSCEV, InstIV->getType(), &Header->front()); + Value *NewIV = Expander.expandCodeFor(NewIVSCEV, Inst->getType(), + Header->getFirstNonPHIOrDbg()); for (auto &KV : Uses) if (KV.second.find_first() == 0) Index: test/Transforms/LoopReroll/basic.ll =================================================================== --- test/Transforms/LoopReroll/basic.ll +++ test/Transforms/LoopReroll/basic.ll @@ -575,6 +575,37 @@ ret void } +define void @gep-indexing(i32* nocapture %x) { +entry: + %call = tail call i32 @foo(i32 0) #1 + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %0 = mul nsw i64 %indvars.iv, 3 + %arrayidx = getelementptr inbounds i32, i32* %x, i64 %0 + store i32 %call, i32* %arrayidx, align 4 + %arrayidx4 = getelementptr inbounds i32, i32* %arrayidx, i64 1 + store i32 %call, i32* %arrayidx4, align 4 + %arrayidx9 = getelementptr inbounds i32, i32* %arrayidx, i64 2 + store i32 %call, i32* %arrayidx9, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 500 + br i1 %exitcond, label %for.end, label %for.body + +; CHECK-LABEL: @gep-indexing +; CHECK: for.body: +; CHECK-NEXT: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] +; CHECK-NEXT: %scevgep = getelementptr i32, i32* %x, i64 %indvars.iv +; CHECK-NEXT: store i32 %call, i32* %scevgep, align 4 +; CHECK-NEXT: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 +; CHECK-NEXT: %exitcond2 = icmp eq i32* %scevgep, %scevgep1 +; CHECK-NEXT: br i1 %exitcond2, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + define void @unordered_atomic_ops(i32* noalias %buf_0, i32* noalias %buf_1) { ; CHECK-LABEL: @unordered_atomic_ops(