Index: lib/Transforms/Scalar/LoopRerollPass.cpp =================================================================== --- lib/Transforms/Scalar/LoopRerollPass.cpp +++ lib/Transforms/Scalar/LoopRerollPass.cpp @@ -394,9 +394,14 @@ const SmallInstructionSet &Final, DenseSet &Users); - UsesTy::iterator nextInstr(int Val, UsesTy &In, UsesTy::iterator I); + UsesTy::iterator nextInstr(int Val, UsesTy &In, + const SmallInstructionSet &Exclude, + UsesTy::iterator *StartI=nullptr); bool isBaseInst(Instruction *I); bool isRootInst(Instruction *I); + bool instrDependsOn(Instruction *I, + UsesTy::iterator Start, + UsesTy::iterator End); LoopReroll *Parent; @@ -941,10 +946,16 @@ } +/// Get the next instruction in "In" that is a member of set Val. +/// Start searching from StartI, and do not return anything in Exclude. +/// If StartI is not given, start from In.begin(). LoopReroll::DAGRootTracker::UsesTy::iterator LoopReroll::DAGRootTracker::nextInstr(int Val, UsesTy &In, - UsesTy::iterator I) { - while (I != In.end() && I->second.test(Val) == 0) + const SmallInstructionSet &Exclude, + UsesTy::iterator *StartI) { + UsesTy::iterator I = StartI ? *StartI : In.begin(); + while (I != In.end() && (I->second.test(Val) == 0 || + Exclude.count(I->first) != 0)) ++I; return I; } @@ -965,6 +976,19 @@ return false; } +/// Return true if instruction I depends on any instruction between +/// Start and End. +bool LoopReroll::DAGRootTracker::instrDependsOn(Instruction *I, + UsesTy::iterator Start, + UsesTy::iterator End) { + for (auto *U : I->users()) { + for (auto It = Start; It != End; ++It) + if (U == It->first) + return true; + } + return false; +} + bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) { // We now need to check for equivalence of the use graph of each root with // that of the primary induction variable (excluding the roots). Our goal @@ -1022,8 +1046,9 @@ DenseMap BaseMap; // Compare iteration Iter to the base. - auto BaseIt = nextInstr(0, Uses, Uses.begin()); - auto RootIt = nextInstr(Iter, Uses, Uses.begin()); + SmallInstructionSet Visited; + auto BaseIt = nextInstr(0, Uses, Visited); + auto RootIt = nextInstr(Iter, Uses, Visited); auto LastRootIt = Uses.begin(); while (BaseIt != Uses.end() && RootIt != Uses.end()) { @@ -1033,20 +1058,54 @@ // Skip over the IV or root instructions; only match their users. bool Continue = false; if (isBaseInst(BaseInst)) { - BaseIt = nextInstr(0, Uses, ++BaseIt); + Visited.insert(BaseInst); + BaseIt = nextInstr(0, Uses, Visited); Continue = true; } if (isRootInst(RootInst)) { LastRootIt = RootIt; - RootIt = nextInstr(Iter, Uses, ++RootIt); + Visited.insert(RootInst); + RootIt = nextInstr(Iter, Uses, Visited); Continue = true; } if (Continue) continue; + if (!BaseInst->isSameOperationAs(RootInst)) { + // Last chance saloon. We don't try and solve the full isomorphism problem, + // but try and at least catch the case where two instructions *of different + // types* are round the wrong way. We won't be able to efficiently tell, + // given two ADD instructions, which way around we should match them, but + // given an ADD and a SUB, we can at least infer which one is which. + // + // This should allow us to deal with a greater subset of the isomorphism + // problem. + auto TryIt = RootIt; + while (TryIt != Uses.end() && + !BaseInst->isSameOperationAs(TryIt->first)) { + ++TryIt; + TryIt = nextInstr(Iter, Uses, Visited, &TryIt); + } + + if (TryIt == Uses.end() || + instrDependsOn(TryIt->first, RootIt, TryIt)) { + DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst << + " vs. " << *RootInst << "\n"); + return false; + } + + RootIt = TryIt; + RootInst = TryIt->first; + } + // All instructions between the last root and this root - // belong to some other iteration. If they belong to a + // may belong to some other iteration. If they belong to a // future iteration, then they're dangerous to alias with. - for (; LastRootIt != RootIt; ++LastRootIt) { + // + // Note that because we allow a limited amount of flexibility in the order + // that we visit nodes, LastRootIt might be *before* RootIt, in which + // case we've already checked this set of instructions so we shouldn't + // do anything. + for (; LastRootIt < RootIt; ++LastRootIt) { Instruction *I = LastRootIt->first; if (LastRootIt->second.find_first() < (int)Iter) continue; @@ -1062,12 +1121,6 @@ FutureSideEffects = true; } - if (!BaseInst->isSameOperationAs(RootInst)) { - DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst << - " vs. " << *RootInst << "\n"); - return false; - } - // Make sure that this instruction, which is in the use set of this // root instruction, does not also belong to the base set or the set of // some other root instruction. @@ -1174,8 +1227,10 @@ BaseMap.insert(std::make_pair(RootInst, BaseInst)); LastRootIt = RootIt; - BaseIt = nextInstr(0, Uses, ++BaseIt); - RootIt = nextInstr(Iter, Uses, ++RootIt); + Visited.insert(BaseInst); + Visited.insert(RootInst); + BaseIt = nextInstr(0, Uses, Visited); + RootIt = nextInstr(Iter, Uses, Visited); } assert (BaseIt == Uses.end() && RootIt == Uses.end() && "Mismatched set sizes!"); Index: test/Transforms/LoopReroll/basic.ll =================================================================== --- test/Transforms/LoopReroll/basic.ll +++ test/Transforms/LoopReroll/basic.ll @@ -488,6 +488,63 @@ ret void } +; int foo(int a); +; void bar2(int *x, int y, int z) { +; for (int i = 0; i < 500; i += 3) { +; foo(i+y+i*z); // Slightly reordered instruction order +; foo(i+1+y+(i+1)*z); +; foo(i+2+y+(i+2)*z); +; } +; } + +; Function Attrs: nounwind uwtable +define void @bar2(i32* nocapture readnone %x, i32 %y, i32 %z) #0 { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.08 = phi i32 [ 0, %entry ], [ %add3, %for.body ] + + %tmp2 = mul i32 %i.08, %z + %tmp1 = add i32 %i.08, %y + %tmp3 = add i32 %tmp2, %tmp1 + %call = tail call i32 @foo(i32 %tmp3) #1 + + %add = add nsw i32 %i.08, 1 + %tmp2a = mul i32 %add, %z + %tmp1a = add i32 %add, %y + %tmp3a = add i32 %tmp2a, %tmp1a + %calla = tail call i32 @foo(i32 %tmp3a) #1 + + %add2 = add nsw i32 %i.08, 2 + %tmp2b = mul i32 %add2, %z + %tmp1b = add i32 %add2, %y + %tmp3b = add i32 %tmp2b, %tmp1b + %callb = tail call i32 @foo(i32 %tmp3b) #1 + + %add3 = add nsw i32 %i.08, 3 + + %exitcond = icmp eq i32 %add3, 500 + br i1 %exitcond, label %for.end, label %for.body + +; CHECK-LABEL: @bar2 + +; CHECK: for.body: +; CHECK: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %entry ] +; CHECK: %tmp2 = mul i32 %indvar, %z +; CHECK: %tmp1 = add i32 %indvar, %y +; CHECK: %tmp3 = add i32 %tmp2, %tmp1 +; CHECK: %call = tail call i32 @foo(i32 %tmp3) #1 +; CHECK: %indvar.next = add i32 %indvar, 1 +; CHECK: %exitcond1 = icmp eq i32 %indvar, 497 +; CHECK: br i1 %exitcond1, label %for.end, label %for.body + +; CHECK: ret + +for.end: ; preds = %for.body + ret void +} + attributes #0 = { nounwind uwtable } attributes #1 = { nounwind }