diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -324,9 +324,9 @@ /// LoopInterchangeLegality checks if it is legal to interchange the loop. class LoopInterchangeLegality { public: - LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE, - OptimizationRemarkEmitter *ORE) - : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {} + LoopInterchangeLegality(Loop *Outer, Loop *Inner, DominatorTree *DT, + ScalarEvolution *SE, OptimizationRemarkEmitter *ORE) + : OuterLoop(Outer), InnerLoop(Inner), DT(DT), SE(SE), ORE(ORE) {} /// Check if the loops can be interchanged. bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId, @@ -357,6 +357,7 @@ Loop *OuterLoop; Loop *InnerLoop; + DominatorTree *DT; ScalarEvolution *SE; /// Interface to emit optimization remarks. @@ -542,7 +543,7 @@ std::vector> &DependencyMatrix) { LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId << " and OuterLoopId = " << OuterLoopId << "\n"); - LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE); + LoopInterchangeLegality LIL(OuterLoop, InnerLoop, DT, SE, ORE); if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) { LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n"); return false; @@ -967,6 +968,107 @@ return true; } +// Get a list of blocks that are guarded by branches operating on loop variant +// conditions, excluding the backedges of the loop. +static DenseSet getLoopVariantGuardedBlocks(Loop *L) { + SmallVector Worklist; + DenseSet GuardedBlocks; + + const BasicBlock *Header = L->getHeader(); + + for (BasicBlock *BB : L->getBlocks()) { + if (BranchInst *BI = dyn_cast(BB->getTerminator())) { + if (BI->isUnconditional()) + continue; + Value *Cond = BI->getCondition(); + if (!L->isLoopInvariant(Cond)) + Worklist.push_back(BB); + } + } + + while (!Worklist.empty()) { + BasicBlock *BB = Worklist.pop_back_val(); + for (BasicBlock *Succ : successors(BB)) { + // If this is a backedge or the successor is not contained in the loop. + if (Succ == Header || !L->contains(Succ)) + continue; + auto Result = GuardedBlocks.insert(Succ); + if (Result.second) + Worklist.push_back(Succ); + } + } + + LLVM_DEBUG({ + dbgs() << "List of guarded blocks:"; + for (BasicBlock *BB : GuardedBlocks) + dbgs() << " " << BB->getName(); + dbgs() << "\n"; + }); + + return GuardedBlocks; +} + +// Check whether there are unsafe instructions guarded by control flow. That is, +// if an unsafe instruction directly or indirectly depends on how the control +// flow goes, the loop interchange should be considered illegal. +static bool haveControlFlowGuardedUnsafeInstructions(Loop *L, + DominatorTree *DT) { + const BasicBlock *LoopLatch = L->getLoopLatch(); + const DenseSet LoopVariantGuardedBlocks = + getLoopVariantGuardedBlocks(L); + + DenseSet GuardedInst; + SmallVector Worklist; + + // First of all, we put all instructions located in basic blocks that are + // directly guarded by control flow into the worklist. This includes PHI nodes + // that depend on such blocks as well. + for (BasicBlock *BB : L->getBlocks()) { + // Skip blocks which are only guarded by branches operating on loop + // invariant conditions. + if (!LoopVariantGuardedBlocks.count(BB)) + continue; + bool Dominate = DT->dominates(BB, LoopLatch); + for (Instruction &I : *BB) { + bool Guarded = !Dominate; + if (PHINode *PHI = dyn_cast(&I)) { + for (BasicBlock *OpBlock : PHI->blocks()) + Guarded |= !DT->dominates(OpBlock, LoopLatch); + } + + if (Guarded) { + GuardedInst.insert(&I); + Worklist.push_back(&I); + } + } + } + + // Calculate all instructions that are directly or indirectly dependant on + // instructions in the worklist. + while (!Worklist.empty()) { + Instruction *Inst = Worklist.pop_back_val(); + for (User *U : Inst->users()) { + if (Instruction *I = dyn_cast(U)) { + auto Result = GuardedInst.insert(I); + if (Result.second) + Worklist.push_back(I); + } + } + } + + // Check if either the loop contains unsafe instructions guarded by + // control flow. + for (BasicBlock *BB : L->getBlocks()) { + for (Instruction &I : *BB) { + if (!I.mayHaveSideEffects() && !I.mayReadFromMemory()) + continue; + if (GuardedInst.find(&I) != GuardedInst.end()) + return true; + } + } + return false; +} + bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix) { @@ -1045,6 +1147,19 @@ return false; } + if (haveControlFlowGuardedUnsafeInstructions(InnerLoop, DT)) { + LLVM_DEBUG(dbgs() << "Instructions guarded by control flow may break " + "loop interchange.\n"); + ORE->emit([&]() { + return OptimizationRemarkMissed( + DEBUG_TYPE, "InvalidInstructionWithControlFlow", + InnerLoop->getStartLoc(), InnerLoop->getHeader()) + << "Instructions guarded by control flow may break loop " + "interchange.\n"; + }); + return false; + } + return true; } diff --git a/llvm/test/Transforms/LoopInterchange/control-flow.ll b/llvm/test/Transforms/LoopInterchange/control-flow.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopInterchange/control-flow.ll @@ -0,0 +1,187 @@ +; RUN: opt -loop-interchange -debug-only=loop-interchange 2>&1 %s | FileCheck %s + +; Test case of PR48057. + +; CHECK: Instructions guarded by control flow may break loop interchange. +; CHECK-NEXT: Not interchanging loops. Cannot prove legality. + +@b = dso_local global [7 x [8 x i8]] [[8 x i8] zeroinitializer, [8 x i8] zeroinitializer, [8 x i8] zeroinitializer, [8 x i8] zeroinitializer, [8 x i8] c"\05\00\00\00\00\00\00\00", [8 x i8] zeroinitializer, [8 x i8] c"\02\03\00\00\00\00\00\00"], align 16 +@c = dso_local global i32 0, align 4 +@d = dso_local global i32 0, align 4 +@e = dso_local global i16 0, align 2 + +define internal fastcc void @func() { +entry: + %.pr = load i32, i32* @c, align 4 + %cmp2 = icmp slt i32 %.pr, 8 + br i1 %cmp2, label %for.cond1.preheader.preheader, label %for.end13 + +for.cond1.preheader.preheader: ; preds = %entry + %0 = sext i32 %.pr to i64 + %1 = sub i32 7, %.pr + %2 = zext i32 %1 to i64 + %3 = add i64 %0, %2 + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc12 + %indvars.iv4 = phi i64 [ %0, %for.cond1.preheader.preheader ], [ %indvars.iv.next5, %for.inc12 ] + br label %for.body2 + +for.body2: ; preds = %for.cond1.preheader, %land.end + %indvars.iv = phi i64 [ 4, %for.cond1.preheader ], [ %indvars.iv.next, %land.end ] + %4 = add nuw nsw i64 %indvars.iv, 2 + %arrayidx4 = getelementptr inbounds [7 x [8 x i8]], [7 x [8 x i8]]* @b, i64 0, i64 %4, i64 %indvars.iv4 + %5 = load i8, i8* %arrayidx4, align 1 + %tobool5.not = icmp eq i8 %5, 0 + br i1 %tobool5.not, label %land.end, label %land.rhs + +land.rhs: ; preds = %for.body2 + %arrayidx8 = getelementptr inbounds [7 x [8 x i8]], [7 x [8 x i8]]* @b, i64 0, i64 %indvars.iv, i64 0 + %6 = load i8, i8* %arrayidx8, align 8 + %conv9 = sext i8 %6 to i16 + store i16 %conv9, i16* @e, align 2 + br label %land.end + +land.end: ; preds = %land.rhs, %for.body2 + %indvars.iv.next = add nsw i64 %indvars.iv, -1 + %tobool.not = icmp eq i64 %indvars.iv.next, 0 + br i1 %tobool.not, label %for.inc12, label %for.body2 + +for.inc12: ; preds = %land.end + %indvars.iv.next5 = add nsw i64 %indvars.iv4, 1 + %exitcond = icmp ne i64 %indvars.iv.next5, 8 + br i1 %exitcond, label %for.cond1.preheader, label %for.cond.for.end13_crit_edge + +for.cond.for.end13_crit_edge: ; preds = %for.inc12 + %7 = add i64 %3, 1 + %8 = trunc i64 %7 to i32 + store i32 0, i32* @d, align 4 + store i32 %8, i32* @c, align 4 + br label %for.end13 + +for.end13: ; preds = %for.cond.for.end13_crit_edge, %entry + ret void +} + +; Test case of PR47523. + +; CHECK: Instructions guarded by control flow may break loop interchange. +; CHECK-NEXT: Not interchanging loops. Cannot prove legality. + +%struct.a = type { i8 } + +@f = dso_local local_unnamed_addr global [4 x [9 x i32]] [[9 x i32] [i32 5, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0], [9 x i32] zeroinitializer, [9 x i32] zeroinitializer, [9 x i32] zeroinitializer], align 4 +@g = common dso_local local_unnamed_addr global i32 0, align 4 +@h = common dso_local global [1 x %struct.a] zeroinitializer, align 1 + +define dso_local i32 @main() local_unnamed_addr { +for.cond2.preheader.preheader.i: + %0 = sext i32 undef to i64 + br label %for.cond2.preheader.i + +for.cond2.preheader.i: ; preds = %for.end11.i, %for.cond2.preheader.preheader.i + %indvars.iv20.i = phi i64 [ %0, %for.cond2.preheader.preheader.i ], [ %indvars.iv.next21.i, %for.end11.i ] + br label %for.body4.i + +for.body4.i: ; preds = %if.end.i, %for.cond2.preheader.i + %indvars.iv.i = phi i64 [ 0, %for.cond2.preheader.i ], [ %indvars.iv.next.i, %if.end.i ] + %arrayidx6.i = getelementptr inbounds [4 x [9 x i32]], [4 x [9 x i32]]* @f, i64 0, i64 %indvars.iv.i, i64 %indvars.iv20.i + %1 = load i32, i32* %arrayidx6.i, align 4 + %tobool.i = icmp eq i32 %1, 0 + br i1 %tobool.i, label %land.end.i, label %land.rhs.i + +land.rhs.i: ; preds = %for.body4.i + store i16 3, i16* bitcast (i32* @g to i16*), align 4 + br label %land.end.i + +land.end.i: ; preds = %land.rhs.i, %for.body4.i + br i1 icmp eq (i64 urem (i64 zext (i16 ptrtoint ([1 x %struct.a]* @h to i16) to i64), i64 4073709551606), i64 0), label %if.end.i, label %if.then.i + +if.then.i: ; preds = %land.end.i + %2 = load i16, i16* bitcast (i32* @g to i16*), align 4 + %inc.i = add i16 %2, 1 + store i16 %inc.i, i16* bitcast (i32* @g to i16*), align 4 + br label %if.end.i + +if.end.i: ; preds = %if.then.i, %land.end.i + %indvars.iv.next.i = add nuw nsw i64 %indvars.iv.i, 1 + %exitcond.i = icmp eq i64 %indvars.iv.next.i, 3 + br i1 %exitcond.i, label %for.end11.i, label %for.body4.i + +for.end11.i: ; preds = %if.end.i + %indvars.iv.next21.i = add nsw i64 %indvars.iv20.i, 1 + %cmp.i = icmp slt i64 %indvars.iv20.i, 2 + br i1 %cmp.i, label %for.cond2.preheader.i, label %for.cond.for.end14_crit_edge.i + +for.cond.for.end14_crit_edge.i: ; preds = %for.end11.i + br label %h.exit + +h.exit: ; preds = %for.cond.for.end14_crit_edge.i + %3 = load i32, i32* @g, align 4 + ret i32 0 +} + +; void control_flow(bool *cond) { +; for (int i = 0; i < 100; ++i) { +; for (int j = 0; j < 100; ++j) { +; int *ptr = *cond ? &B : &C; +; A[j][i] = *ptr; +; } +; } +; } +; +; Interchange is illegal because the value of *ptr depends on the value of *cond. +; The store into A[j][i] is implicitly guarded by control flow according to its operands. + +@A = dso_local global [100 x [100 x i32]] zeroinitializer, align 16 +@B = dso_local global i32 0, align 4 +@C = dso_local global i32 0, align 4 + +; CHECK: Instructions guarded by control flow may break loop interchange. +; CHECK-NEXT: Not interchanging loops. Cannot prove legality. +define dso_local void @_Z12control_flowPVb(i8* %cond) #0 { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.inc6 + %i.02 = phi i32 [ 0, %entry ], [ %inc7, %for.inc6 ] + br label %for.body3 + +for.body3: ; preds = %for.body, %for.inc + %j.01 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ] + %0 = load i8, i8* %cond, align 1 + %tobool = trunc i8 %0 to i1 + br i1 %tobool, label %if.then, label %if.else + +if.then: ; preds = %for.body3 + br label %if.end + +if.else: ; preds = %for.body3 + br label %if.end + +if.end: ; preds = %if.else, %if.then + %ptr.0 = phi i32* [ @B, %if.then ], [ @C, %if.else ] + %1 = load i32, i32* %ptr.0, align 4 + %idxprom = sext i32 %j.01 to i64 + %arrayidx = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* @A, i64 0, i64 %idxprom + %idxprom4 = sext i32 %i.02 to i64 + %arrayidx5 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx, i64 0, i64 %idxprom4 + store i32 %1, i32* %arrayidx5, align 4 + br label %for.inc + +for.inc: ; preds = %if.end + %inc = add nsw i32 %j.01, 1 + %cmp2 = icmp slt i32 %inc, 100 + br i1 %cmp2, label %for.body3, label %for.end + +for.end: ; preds = %for.inc + br label %for.inc6 + +for.inc6: ; preds = %for.end + %inc7 = add nsw i32 %i.02, 1 + %cmp = icmp slt i32 %inc7, 100 + br i1 %cmp, label %for.body, label %for.end8 + +for.end8: ; preds = %for.inc6 + ret void +}