diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -342,6 +342,10 @@ return OuterInnerReductions; } + const SmallVectorImpl &getOuterLoopInductions() const { + return OuterLoopInductions; + } + private: bool tightlyNested(Loop *Outer, Loop *Inner); bool containsUnsafeInstructions(BasicBlock *BB); @@ -365,6 +369,9 @@ /// Set of reduction PHIs taking part of a reduction across the inner and /// outer loop. SmallPtrSet OuterInnerReductions; + + /// Set of outer loop induction PHIs. + SmallVector OuterLoopInductions; }; /// LoopInterchangeProfitability checks if it is profitable to interchange the @@ -831,19 +838,7 @@ return true; } - // TODO: Currently we handle only loops with 1 induction variable. - if (Inductions.size() != 1) { - LLVM_DEBUG(dbgs() << "Loops with more than 1 induction variables are not " - << "supported currently.\n"); - ORE->emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "MultiIndutionOuter", - OuterLoop->getStartLoc(), - OuterLoop->getHeader()) - << "Only outer loops with 1 induction variable can be " - "interchanged currently."; - }); - return true; - } + OuterLoopInductions = Inductions; Inductions.clear(); if (!findInductionAndReductions(InnerLoop, Inductions, nullptr)) { @@ -1713,14 +1708,20 @@ for (PHINode &PHI : InnerLoopHeader->phis()) if (OuterInnerReductions.contains(&PHI)) InnerLoopPHIs.push_back(cast(&PHI)); - for (PHINode &PHI : OuterLoopHeader->phis()) - if (OuterInnerReductions.contains(&PHI)) - OuterLoopPHIs.push_back(cast(&PHI)); + + auto &OuterLoopInductions = LIL.getOuterLoopInductions(); + for (PHINode &PHI : OuterLoopHeader->phis()) { + if (std::find(OuterLoopInductions.begin(), OuterLoopInductions.end(), + &PHI) != OuterLoopInductions.end()) + continue; + OuterLoopPHIs.push_back(cast(&PHI)); + } // Now move the remaining reduction PHIs from outer to inner loop header and // vice versa. The PHI nodes must be part of a reduction across the inner and // outer loop and all the remains to do is and updating the incoming blocks. for (PHINode *PHI : OuterLoopPHIs) { + LLVM_DEBUG(dbgs() << "Outer loop reduction PHIs:\n"; PHI->dump();); PHI->moveBefore(InnerLoopHeader->getFirstNonPHI()); assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node"); } diff --git a/llvm/test/Transforms/LoopInterchange/interchangeable-outerloop-multiple-indvars.ll b/llvm/test/Transforms/LoopInterchange/interchangeable-outerloop-multiple-indvars.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopInterchange/interchangeable-outerloop-multiple-indvars.ll @@ -0,0 +1,191 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s --basic-aa -loop-interchange -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -S | FileCheck %s + +@c = common dso_local local_unnamed_addr global i32 0, align 4 +@e = common dso_local local_unnamed_addr global i32 0, align 4 +@d = common dso_local local_unnamed_addr global i32 0, align 4 +@b = common dso_local local_unnamed_addr global [2 x [10 x i32]] zeroinitializer, align 4 +@a = common dso_local local_unnamed_addr global i32 0, align 4 + +; int a, c, d, e; +; int b[2][10]; +; void fn1() { +; for (; c && e; c++,e++) { +; d = 5; +; for (; d; d--) +; a |= b[d][c + 9]; +; } +; } +define void @test1() { +; CHECK-LABEL: @test1( +; CHECK: for.cond2.preheader: +; CHECK: [[INDVARS_IV16:%.*]] = phi i64 [ [[TMP5:%.*]], [[FOR_COND2_PREHEADER_LR_PH:%.*]] ], [ [[INDVARS_IV_NEXT17:%.*]], [[FOR_INC7:%.*]] ] +; CHECK: [[TMP3:%.*]] = phi i32 [ [[TMP1:%.*]], [[FOR_COND2_PREHEADER_LR_PH]] ], [ [[INC8:%.*]], [[FOR_INC7]] ] +; CHECK: [[OR13:%.*]] = phi i32 [ [[OR:%.*]], [[FOR_INC7]] ], [ [[OR_LCSSA15:%.*]], [[FOR_COND2_PREHEADER_LR_PH]] ] +; CHECK: for.body4: +; CHECK: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP9:%.*]], [[FOR_BODY4_SPLIT:%.*]] ], [ 5, [[FOR_BODY4_PREHEADER:%.*]] ] +; CHECK: [[OR_LCSSA15]] = phi i32 [ [[A_PROMOTED14:%.*]], [[FOR_BODY4_PREHEADER]] ], [ [[OR_LCSSA:%.*]], [[FOR_BODY4_SPLIT:%.*]] ] +; CHECK: br label [[FOR_COND2_PREHEADER_LR_PH]] +; CHECK: for.body4.split: +; CHECK: [[OR_LCSSA]] = phi i32 [ [[OR]], [[FOR_INC7]] ] +; CHECK: [[TMP7:%.*]] = phi i64 [ [[INDVARS_IV_NEXT17]], [[FOR_INC7]] ] +; CHECK: [[TMP8:%.*]] = phi i32 [ [[INC8]], [[FOR_INC7]] ] +; CHECK: for.inc7: +; CHECK: [[INDVARS_IV_NEXT17]] = add nsw i64 [[INDVARS_IV16]], 1 +; CHECK: [[INC8]] = add nsw i32 [[TMP3]], 1 +; CHECK: for.cond.for.end9_crit_edge: +; CHECK: [[INC_LCSSA_WIDE:%.*]] = phi i64 [ [[TMP7]], [[FOR_BODY4_SPLIT]] ] +; CHECK: [[INC8_LCSSA:%.*]] = phi i32 [ [[TMP8]], [[FOR_BODY4_SPLIT]] ] +; CHECK: [[OR_LCSSA_LCSSA:%.*]] = phi i32 [ [[OR_LCSSA]], [[FOR_BODY4_SPLIT]] ] + + + +entry: + %0 = load i32, i32* @c, align 4 + %tobool11 = icmp ne i32 %0, 0 + %1 = load i32, i32* @e, align 4 + %tobool112 = icmp ne i32 %1, 0 + %2 = and i1 %tobool11, %tobool112 + br i1 %2, label %for.cond2.preheader.lr.ph, label %for.end9 + +for.cond2.preheader.lr.ph: ; preds = %entry + %a.promoted14 = load i32, i32* @a, align 4 + %3 = sext i32 %0 to i64 + br label %for.cond2.preheader + +for.cond2.preheader: ; preds = %for.cond2.preheader.lr.ph, %for.inc7 + %indvars.iv16 = phi i64 [ %3, %for.cond2.preheader.lr.ph ], [ %indvars.iv.next17, %for.inc7 ] + %or.lcssa15 = phi i32 [ %a.promoted14, %for.cond2.preheader.lr.ph ], [ %or.lcssa, %for.inc7 ] + %4 = phi i32 [ %1, %for.cond2.preheader.lr.ph ], [ %inc8, %for.inc7 ] + %5 = add nsw i64 %indvars.iv16, 9 + br label %for.body4 + +for.body4: ; preds = %for.cond2.preheader, %for.body4 + %indvars.iv = phi i64 [ 5, %for.cond2.preheader ], [ %indvars.iv.next, %for.body4 ] + %or13 = phi i32 [ %or.lcssa15, %for.cond2.preheader ], [ %or, %for.body4 ] + %arrayidx6 = getelementptr inbounds [2 x [10 x i32]], [2 x [10 x i32]]* @b, i64 0, i64 %indvars.iv, i64 %5 + %6 = load i32, i32* %arrayidx6, align 4 + %or = or i32 %or13, %6 + %indvars.iv.next = add nsw i64 %indvars.iv, -1 + %tobool3 = icmp eq i64 %indvars.iv.next, 0 + br i1 %tobool3, label %for.inc7, label %for.body4 + +for.inc7: ; preds = %for.body4 + %or.lcssa = phi i32 [ %or, %for.body4 ] + %indvars.iv.next17 = add nsw i64 %indvars.iv16, 1 + %inc8 = add nsw i32 %4, 1 + %7 = trunc i64 %indvars.iv.next17 to i32 + %tobool = icmp ne i32 %7, 0 + %tobool1 = icmp ne i32 %inc8, 0 + %8 = and i1 %tobool, %tobool1 + br i1 %8, label %for.cond2.preheader, label %for.cond.for.end9_crit_edge + +for.cond.for.end9_crit_edge: ; preds = %for.inc7 + %inc.lcssa.wide = phi i64 [ %indvars.iv.next17, %for.inc7 ] + %inc8.lcssa = phi i32 [ %inc8, %for.inc7 ] + %or.lcssa.lcssa = phi i32 [ %or.lcssa, %for.inc7 ] + %9 = trunc i64 %inc.lcssa.wide to i32 + store i32 0, i32* @d, align 4 + store i32 %or.lcssa.lcssa, i32* @a, align 4 + store i32 %9, i32* @c, align 4 + store i32 %inc8.lcssa, i32* @e, align 4 + br label %for.end9 + +for.end9: ; preds = %for.cond.for.end9_crit_edge, %entry + ret void +} + +; int a, c, d, e; +; int b[2][10]; +; void fn1() { +; for (; c && e; c++,e++) { +; d = 5; +; for (; d; d--) +; a |= b[d+e][c+1]; +; } +; } + +define void @test2() { +; CHECK-LABEL: @test2( +; CHECK: for.cond2.preheader: +; CHECK: [[INDVARS_IV19:%.*]] = phi i64 [ [[TMP4:%.*]], [[FOR_COND2_PREHEADER_LR_PH:%.*]] ], [ [[INDVARS_IV_NEXT20:%.*]], [[FOR_INC7:%.*]] ] +; CHECK: [[INDVARS_IV17:%.*]] = phi i64 [ [[TMP3:%.*]], [[FOR_COND2_PREHEADER_LR_PH]] ], [ [[INDVARS_IV_NEXT18:%.*]], [[FOR_INC7]] ] +; CHECK: [[OR13:%.*]] = phi i32 [ [[OR:%.*]], [[FOR_INC7]] ], [ [[OR_LCSSA15:%.*]], [[FOR_COND2_PREHEADER_LR_PH]] ] +; CHECK: for.body4: +; CHECK: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP9:%.*]], [[FOR_BODY4_SPLIT:%.*]] ], [ 5, [[FOR_BODY4_PREHEADER]] ] +; CHECK: [[OR_LCSSA15]] = phi i32 [ [[A_PROMOTED14]], [[FOR_BODY4_PREHEADER]] ], [ [[OR_LCSSA:%.*]], [[FOR_BODY4_SPLIT]] ] +; CHECK: for.body4.split1: +; CHECK: [[INDVARS_IV19_ADD:%.*]] = add nsw i64 [[INDVARS_IV19]], 1 +; CHECK: [[TMP5:%.*]] = add nsw i64 [[INDVARS_IV]], [[INDVARS_IV17]] +; CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x [10 x i32]], [2 x [10 x i32]]* @b, i64 0, i64 [[TMP5]], i64 [[INDVARS_IV19_ADD]] +; CHECK: for.body4.split: +; CHECK: [[OR_LCSSA]] = phi i32 [ [[OR]], [[FOR_INC7]] ] +; CHECK: [[TMP7:%.*]] = phi i64 [ [[INDVARS_IV_NEXT20]], [[FOR_INC7]] ] +; CHECK: [[TMP8:%.*]] = phi i64 [ [[INDVARS_IV_NEXT18]], [[FOR_INC7]] ] +; CHECK: for.inc7: +; CHECK: [[INDVARS_IV_NEXT20]] = add nsw i64 [[INDVARS_IV19]], 1 +; CHECK: [[INDVARS_IV_NEXT18]] = add nsw i64 [[INDVARS_IV17]], 1 +; CHECK: for.cond.for.end9_crit_edge: +; CHECK: [[INC_LCSSA_WIDE:%.*]] = phi i64 [ [[TMP7]], [[FOR_BODY4_SPLIT]] ] +; CHECK: [[INC8_LCSSA_WIDE:%.*]] = phi i64 [ [[TMP8]], [[FOR_BODY4_SPLIT]] ] +; CHECK: [[OR_LCSSA_LCSSA:%.*]] = phi i32 [ [[OR_LCSSA]], [[FOR_BODY4_SPLIT]] ] + + +entry: + %0 = load i32, i32* @c, align 4 + %tobool11 = icmp ne i32 %0, 0 + %1 = load i32, i32* @e, align 4 + %tobool112 = icmp ne i32 %1, 0 + %2 = and i1 %tobool11, %tobool112 + br i1 %2, label %for.cond2.preheader.lr.ph, label %for.end9 + +for.cond2.preheader.lr.ph: ; preds = %entry + %a.promoted14 = load i32, i32* @a, align 4 + %3 = sext i32 %1 to i64 + %4 = sext i32 %0 to i64 + br label %for.cond2.preheader + +for.cond2.preheader: ; preds = %for.cond2.preheader.lr.ph, %for.inc7 + %indvars.iv19 = phi i64 [ %4, %for.cond2.preheader.lr.ph ], [ %indvars.iv.next20, %for.inc7 ] + %indvars.iv17 = phi i64 [ %3, %for.cond2.preheader.lr.ph ], [ %indvars.iv.next18, %for.inc7 ] + %or.lcssa15 = phi i32 [ %a.promoted14, %for.cond2.preheader.lr.ph ], [ %or.lcssa, %for.inc7 ] + br label %for.body4 + +for.body4: ; preds = %for.cond2.preheader, %for.body4 + %indvars.iv = phi i64 [ 5, %for.cond2.preheader ], [ %indvars.iv.next, %for.body4 ] + %or13 = phi i32 [ %or.lcssa15, %for.cond2.preheader ], [ %or, %for.body4 ] + %indvars.iv19.add = add nsw i64 %indvars.iv19, 1 + %5 = add nsw i64 %indvars.iv, %indvars.iv17 + %arrayidx6 = getelementptr inbounds [2 x [10 x i32]], [2 x [10 x i32]]* @b, i64 0, i64 %5, i64 %indvars.iv19.add + %6 = load i32, i32* %arrayidx6, align 4 + %or = or i32 %or13, %6 + %indvars.iv.next = add nsw i64 %indvars.iv, -1 + %tobool3 = icmp eq i64 %indvars.iv.next, 0 + br i1 %tobool3, label %for.inc7, label %for.body4 + +for.inc7: ; preds = %for.body4 + %or.lcssa = phi i32 [ %or, %for.body4 ] + %indvars.iv.next20 = add nsw i64 %indvars.iv19, 1 + %indvars.iv.next18 = add nsw i64 %indvars.iv17, 1 + %7 = trunc i64 %indvars.iv.next20 to i32 + %tobool = icmp ne i32 %7, 0 + %8 = trunc i64 %indvars.iv.next18 to i32 + %tobool1 = icmp ne i32 %8, 0 + %9 = and i1 %tobool, %tobool1 + br i1 %9, label %for.cond2.preheader, label %for.cond.for.end9_crit_edge + +for.cond.for.end9_crit_edge: ; preds = %for.inc7 + %inc.lcssa.wide = phi i64 [ %indvars.iv.next20, %for.inc7 ] + %inc8.lcssa.wide = phi i64 [ %indvars.iv.next18, %for.inc7 ] + %or.lcssa.lcssa = phi i32 [ %or.lcssa, %for.inc7 ] + %10 = trunc i64 %inc.lcssa.wide to i32 + %11 = trunc i64 %inc8.lcssa.wide to i32 + store i32 0, i32* @d, align 4 + store i32 %or.lcssa.lcssa, i32* @a, align 4 + store i32 %10, i32* @c, align 4 + store i32 %11, i32* @e, align 4 + br label %for.end9 + +for.end9: ; preds = %for.cond.for.end9_crit_edge, %entry + ret void +}