Index: llvm/lib/Transforms/Scalar/LoopInterchange.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -332,9 +332,13 @@ bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix); + /// Discover induction PHIs in the header of \p L. Induction + /// PHIs are added to \p Inductions. + bool findInductions(Loop *L, SmallVectorImpl &Inductions); + /// Check if the loop structure is understood. We do not handle triangular /// loops for now. - bool isLoopStructureUnderstood(PHINode *InnerInductionVar); + bool isLoopStructureUnderstood(); bool currentLimitations(); @@ -342,6 +346,10 @@ return OuterInnerReductions; } + const SmallVectorImpl &getInnerLoopInductions() const { + return InnerLoopInductions; + } + private: bool tightlyNested(Loop *Outer, Loop *Inner); bool containsUnsafeInstructions(BasicBlock *BB); @@ -365,6 +373,9 @@ /// Set of reduction PHIs taking part of a reduction across the inner and /// outer loop. SmallPtrSet OuterInnerReductions; + + /// Set of inner loop induction PHIs + SmallVector InnerLoopInductions; }; /// LoopInterchangeProfitability checks if it is profitable to interchange the @@ -635,25 +646,26 @@ return true; } -bool LoopInterchangeLegality::isLoopStructureUnderstood( - PHINode *InnerInduction) { - unsigned Num = InnerInduction->getNumOperands(); +bool LoopInterchangeLegality::isLoopStructureUnderstood() { BasicBlock *InnerLoopPreheader = InnerLoop->getLoopPreheader(); - for (unsigned i = 0; i < Num; ++i) { - Value *Val = InnerInduction->getOperand(i); - if (isa(Val)) - continue; - Instruction *I = dyn_cast(Val); - if (!I) - return false; - // TODO: Handle triangular loops. - // e.g. for(int i=0;igetIncomingBlock(IncomBlockIndx) == - InnerLoopPreheader && - !OuterLoop->isLoopInvariant(I)) { - return false; + for (PHINode *InnerInduction : InnerLoopInductions) { + unsigned Num = InnerInduction->getNumOperands(); + for (unsigned i = 0; i < Num; ++i) { + Value *Val = InnerInduction->getOperand(i); + if (isa(Val)) + continue; + Instruction *I = dyn_cast(Val); + if (!I) + return false; + // TODO: Handle triangular loops. + // e.g. for(int i=0;igetIncomingBlock(IncomBlockIndx) == + InnerLoopPreheader && + !OuterLoop->isLoopInvariant(I)) { + return false; + } } } @@ -683,12 +695,12 @@ // InnerInduction, or a binary operator that involves // InnerInduction and a constant. std::function IsPathToIndVar; - IsPathToIndVar = [&InnerInduction, &IsPathToIndVar](Value *V) -> bool { - if (V == InnerInduction) + IsPathToIndVar = [this, &IsPathToIndVar](const Value *V) -> bool { + if (llvm::is_contained(InnerLoopInductions, V)) return true; if (isa(V)) return true; - Instruction *I = dyn_cast(V); + const Instruction *I = dyn_cast(V); if (!I) return false; if (isa(I)) @@ -699,6 +711,13 @@ return false; }; + // In case of multiple inner loop indvars, it is okay if LHS and RHS + // are both inner indvar related variables. + if (IsPathToIndVar(Op0) && IsPathToIndVar(Op1)) + return true; + + // Otherwise we check if the cmp instruction compares an inner indvar + // related variable (Left) with a outer loop invariant (Right). if (IsPathToIndVar(Op0) && !isa(Op0)) { Left = Op0; Right = Op1; @@ -814,7 +833,6 @@ return true; } - PHINode *InnerInductionVar; SmallVector Inductions; if (!findInductionAndReductions(OuterLoop, Inductions, InnerLoop)) { LLVM_DEBUG( @@ -859,24 +877,8 @@ return true; } - // TODO: Currently we handle only loops with 1 induction variable. - if (Inductions.size() != 1) { - LLVM_DEBUG( - dbgs() << "We currently only support loops with 1 induction variable." - << "Failed to interchange due to current limitation\n"); - ORE->emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "MultiInductionInner", - InnerLoop->getStartLoc(), - InnerLoop->getHeader()) - << "Only inner loops with 1 induction variable can be " - "interchanged currently."; - }); - return true; - } - InnerInductionVar = Inductions.pop_back_val(); - // TODO: Triangular loops are not handled for now. - if (!isLoopStructureUnderstood(InnerInductionVar)) { + if (!isLoopStructureUnderstood()) { LLVM_DEBUG(dbgs() << "Loop structure not understood by pass\n"); ORE->emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "UnsupportedStructureInner", @@ -890,6 +892,16 @@ return false; } +bool LoopInterchangeLegality::findInductions( + Loop *L, SmallVectorImpl &Inductions) { + for (PHINode &PHI : L->getHeader()->phis()) { + InductionDescriptor ID; + if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID)) + Inductions.push_back(&PHI); + } + return !Inductions.empty(); +} + // We currently only support LCSSA PHI nodes in the inner loop exit, if their // users are either reduction PHIs or PHIs outside the outer loop (which means // the we are only interested in the final value after the loop). @@ -1018,6 +1030,11 @@ return false; } + if (!findInductions(InnerLoop, InnerLoopInductions)) { + LLVM_DEBUG(dbgs() << "Cound not find inner loop induction variables.\n"); + return false; + } + if (!areInnerLoopLatchPHIsSupported(OuterLoop, InnerLoop)) { LLVM_DEBUG(dbgs() << "Found unsupported PHI nodes in inner loop latch.\n"); ORE->emit([&]() { @@ -1274,25 +1291,25 @@ bool LoopInterchangeTransform::transform() { bool Transformed = false; - Instruction *InnerIndexVar; if (InnerLoop->getSubLoops().empty()) { BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); LLVM_DEBUG(dbgs() << "Splitting the inner loop latch\n"); - PHINode *InductionPHI = getInductionVariable(InnerLoop, SE); - if (!InductionPHI) { + auto &InductionPHIs = LIL.getInnerLoopInductions(); + if (InductionPHIs.empty()) { LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n"); return false; } - if (InductionPHI->getIncomingBlock(0) == InnerLoopPreHeader) - InnerIndexVar = dyn_cast(InductionPHI->getIncomingValue(1)); - else - InnerIndexVar = dyn_cast(InductionPHI->getIncomingValue(0)); - - // Ensure that InductionPHI is the first Phi node. - if (&InductionPHI->getParent()->front() != InductionPHI) - InductionPHI->moveBefore(&InductionPHI->getParent()->front()); + SmallVector InnerIndexVarList; + for (PHINode *CurInductionPHI : InductionPHIs) { + if (CurInductionPHI->getIncomingBlock(0) == InnerLoopPreHeader) + InnerIndexVarList.push_back( + dyn_cast(CurInductionPHI->getIncomingValue(1))); + else + InnerIndexVarList.push_back( + dyn_cast(CurInductionPHI->getIncomingValue(0))); + } // Create a new latch block for the inner loop. We split at the // current latch's terminator and then move the condition and all @@ -1304,7 +1321,7 @@ SmallSetVector WorkList; unsigned i = 0; - auto MoveInstructions = [&i, &WorkList, this, InductionPHI, NewLatch]() { + auto MoveInstructions = [&i, &WorkList, this, &InductionPHIs, NewLatch]() { for (; i < WorkList.size(); i++) { // Duplicate instruction and move it the new latch. Update uses that // have been moved. @@ -1316,7 +1333,8 @@ for (Use &U : llvm::make_early_inc_range(WorkList[i]->uses())) { Instruction *UserI = cast(U.getUser()); if (!InnerLoop->contains(UserI->getParent()) || - UserI->getParent() == NewLatch || UserI == InductionPHI) + UserI->getParent() == NewLatch || + llvm::is_contained(InductionPHIs, UserI)) U.set(NewI); } // Add operands of moved instruction to the worklist, except if they are @@ -1325,7 +1343,7 @@ Instruction *OpI = dyn_cast(Op); if (!OpI || this->LI->getLoopFor(OpI->getParent()) != this->InnerLoop || - OpI == InductionPHI) + llvm::is_contained(InductionPHIs, OpI)) continue; WorkList.insert(OpI); } @@ -1339,7 +1357,8 @@ if (CondI) WorkList.insert(CondI); MoveInstructions(); - WorkList.insert(cast(InnerIndexVar)); + for (Instruction *InnerIndexVar : InnerIndexVarList) + WorkList.insert(cast(InnerIndexVar)); MoveInstructions(); // Splits the inner loops phi nodes out into a separate basic block. @@ -1612,7 +1631,6 @@ updateSuccessor(InnerLoopLatchPredecessorBI, InnerLoopLatch, InnerLoopLatchSuccessor, DTUpdates); - if (OuterLoopLatchBI->getSuccessor(0) == OuterLoopHeader) OuterLoopLatchSuccessor = OuterLoopLatchBI->getSuccessor(1); else @@ -1639,7 +1657,8 @@ SmallVector InnerLoopPHIs, OuterLoopPHIs; for (PHINode &PHI : InnerLoopHeader->phis()) if (OuterInnerReductions.contains(&PHI)) - InnerLoopPHIs.push_back(cast(&PHI)); + InnerLoopPHIs.push_back(&PHI); + for (PHINode &PHI : OuterLoopHeader->phis()) if (OuterInnerReductions.contains(&PHI)) OuterLoopPHIs.push_back(cast(&PHI)); @@ -1652,6 +1671,7 @@ assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node"); } for (PHINode *PHI : InnerLoopPHIs) { + LLVM_DEBUG(dbgs() << "Inner loop reduction PHIs:\n"; PHI->dump();); PHI->moveBefore(OuterLoopHeader->getFirstNonPHI()); assert(OuterInnerReductions.count(PHI) && "Expected a reduction PHI node"); } Index: llvm/test/Transforms/LoopInterchange/interchangeable-innerloop-multiple-indvars.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopInterchange/interchangeable-innerloop-multiple-indvars.ll @@ -0,0 +1,240 @@ +; RUN: opt < %s -basic-aa -loop-interchange -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -S | FileCheck %s + +@b = common dso_local local_unnamed_addr global [200 x [200 x i32]] zeroinitializer, align 4 +@a = common dso_local local_unnamed_addr global i32 0, align 4 + +;; int a, c, d, e; +;; int b[200][200]; +;; void fn1() { +;; for (c = 0; c < 100; c++) { +;; for (d = 5, e = 5; d > 0, e > 0; d--, e--) +;; a |= b[d][c + 9]; +;; } +;; } +; +; There are multiple inner loop indvars and only one +; of them is used in the loop exit condition at the +; inner loop latch. +; +define void @test1() { +; CHECK-LABEL: @test1( +; CHECK: for.body: +; CHECK: [[INDVARS_OUTER:%.*]] = phi i64 [ [[INDVARS_OUTER_NEXT:%.*]], [[FOR_INC7:%.*]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ] +; CHECK: [[OR_REDUCTION_INNER:%.*]] = phi i32 [ [[OR:%.*]], [[FOR_INC7]] ], [ [[OR_REDUCTION_OUTER:%.*]], [[FOR_BODY_PREHEADER]] ] +; CHECK: br label %for.body3.split +; CHECK: for.body3: +; CHECK: [[INDVAR0:%.*]] = phi i64 [ [[TMP0:%.*]], [[FOR_BODY3_SPLIT:%.*]] ], [ 5, [[FOR_BODY3_PREHEADER:%.*]] ] +; CHECK: [[INDVAR1:%.*]] = phi i32 [ [[TMP1:%.*]], [[FOR_BODY3_SPLIT]] ], [ 5, [[FOR_BODY3_PREHEADER]] ] +; CHECK: [[OR_REDUCTION_OUTER]] = phi i32 [ [[OR_LCSSA:%.*]], [[FOR_BODY3_SPLIT]] ], [ [[A:%.*]], [[FOR_BODY3_PREHEADER]] ] +; CHECK: for.body3.split1: +; CHECK: [[ARRAYIDX5:%.*]] = getelementptr inbounds [200 x [200 x i32]], [200 x [200 x i32]]* @b, i64 0, i64 [[INDVAR0]], i64 [[INDEX:%.*]] +; CHECK: [[LOAD_VAL:%.*]] = load i32, i32* [[ARRAYIDX5]], align 4 +; CHECK: [[OR]] = or i32 [[OR_REDUCTION_INNER]], [[LOAD_VAL]] +; CHECK: br label %for.inc7 +; CHECK: for.body3.split: +; CHECK: [[OR_LCSSA]] = phi i32 [ [[OR]], [[FOR_INC7]] ] +; CHECK: [[TMP0]] = add nsw i64 [[INDVAR0]], -1 +; CHECK: [[TMP1]] = add nsw i32 [[INDVAR1]], -1 +; CHECK: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK: br i1 [[TMP2]], label %for.cond.for.end8_crit_edge, label %for.body3 +; CHECK: for.inc7: +; CHECK: [[INDVARS_OUTER_NEXT]] = add nsw i64 [[INDVARS_OUTER]], 1 +; CHECK: br i1 [[TOBOOL:%.*]], label %for.body3.split, label %for.body +; CHECK: for.cond.for.end8_crit_edge: +; CHECK: [[OR_LCSSA_LCSSA:%.*]] = phi i32 [ [[OR_LCSSA]], [[FOR_BODY3_SPLIT]] ] + +entry: + %a = load i32, i32* @a + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.inc7 + %indvars.outer = phi i64 [ 0, %entry ], [ %indvars.outer.next, %for.inc7 ] + %or.reduction.outer = phi i32 [ %a, %entry ], [ %or.lcssa, %for.inc7 ] + %index = add nsw i64 %indvars.outer, 9 + br label %for.body3 + +for.body3: ; preds = %for.body, %for.body3 + %or.reduction.inner = phi i32 [ %or.reduction.outer, %for.body ], [ %or, %for.body3 ] + %indvar0 = phi i64 [ 5, %for.body ], [ %indvar0.next, %for.body3 ] + %indvar1 = phi i32 [ 5, %for.body ], [ %indvar1.next, %for.body3 ] + %arrayidx5 = getelementptr inbounds [200 x [200 x i32]], [200 x [200 x i32]]* @b, i64 0, i64 %indvar0, i64 %index + %load.val = load i32, i32* %arrayidx5, align 4 + %or = or i32 %or.reduction.inner, %load.val + %indvar0.next = add nsw i64 %indvar0, -1 + %indvar1.next = add nsw i32 %indvar1, -1 + %tobool2 = icmp eq i32 %indvar1.next, 0 + br i1 %tobool2, label %for.inc7, label %for.body3 + +for.inc7: ; preds = %for.body3 + %or.lcssa = phi i32 [ %or, %for.body3 ] + %indvars.outer.next = add nsw i64 %indvars.outer, 1 + %indvars.outer.next.trunc = trunc i64 %indvars.outer.next to i32 + %tobool = icmp eq i32 %indvars.outer.next.trunc, 100 + br i1 %tobool, label %for.cond.for.end8_crit_edge, label %for.body + +for.cond.for.end8_crit_edge: ; preds = %for.inc7 + %or.lcssa.lcssa = phi i32 [ %or.lcssa, %for.inc7 ] + store i32 %or.lcssa.lcssa, i32* @a + br label %for.end8 + +for.end8: ; preds = %for.cond.for.end8_crit_edge, %entry + ret void +} + +;; int a, c, d, e; +;; int b[200][200]; +;; void fn1() { +;; for (; c; c++) { +;; for (d = 5, e = 6; d + e > 0; d--, e = e - 2) +;; a |= b[d][c + 9]; +;; } +;; } +; +; All inner loop indvars are used in the inner latch. +; +define void @test2() { +; CHECK-LABEL: @test2( +; CHECK: for.body: +; CHECK: [[INDVARS_OUTER:%.*]] = phi i64 [ [[INDVARS_OUTER_NEXT:%.*]], [[FOR_INC7:%.*]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ] +; CHECK: [[OR_REDUCTION_INNER:%.*]] = phi i32 [ [[OR:%.*]], [[FOR_INC7]] ], [ [[OR_REDUCTION_OUTER:%.*]], [[FOR_BODY_PREHEADER]] ] +; CHECK: [[INDEX:%.*]] = add nsw i64 [[INDVARS_OUTER]], 9 +; CHECK: for.body3: +; CHECK: [[INDVAR0:%.*]] = phi i64 [ [[TMP2:%.*]], [[FOR_BODY3_SPLIT:%.*]] ], [ 5, [[FOR_BODY3_PREHEADER]] ] +; CHECK: [[INDVAR1:%.*]] = phi i32 [ [[TMP0:%.*]], [[FOR_BODY3_SPLIT]] ], [ 6, [[FOR_BODY3_PREHEADER]] ] +; CHECK: [[OR_REDUCTION_OUTER]] = phi i32 [ [[OR_LCSSA:%.*]], [[FOR_BODY3_SPLIT]] ], [ [[A]], [[FOR_BODY3_PREHEADER]] ] +; CHECK: for.body3.split1: +; CHECK: [[ARRAYIDX5:%.*]] = getelementptr inbounds [200 x [200 x i32]], [200 x [200 x i32]]* @b, i64 0, i64 [[INDVAR0]], i64 [[INDEX]] +; CHECK: [[LOAD_VAL:%.*]] = load i32, i32* [[ARRAYIDX5]], align 4 +; CHECK: [[OR]] = or i32 [[OR_REDUCTION_INNER]], [[LOAD_VAL]] +; CHECK: for.body3.split: +; CHECK: [[OR_LCSSA]] = phi i32 [ [[OR]], [[FOR_INC7]] ] +; CHECK: [[TMP0]] = add nsw i32 [[INDVAR1]], -2 +; CHECK: [[TMP1:%.*]] = sext i32 [[TMP0]] to i64 +; CHECK: [[TMP2]] = add nsw i64 [[INDVAR0]], -1 +; CHECK: [[TMP3:%.*]] = add nsw i64 [[TMP2]], [[TMP1]] +; CHECK: [[TMP4:%.*]] = icmp eq i64 [[TMP3]], 0 +; CHECK: br i1 [[TMP4]], label %for.cond.for.end8_crit_edge, label %for.body3 +; CHECK: for.inc7: +; CHECK: [[INDVARS_OUTER_NEXT]] = add nsw i64 [[INDVARS_OUTER]], 1 +; CHECK: br i1 [[TOBOOL:%.*]], label %for.body3.split, label %for.body +; CHECK: for.cond.for.end8_crit_edge: +; CHECK: [[OR_LCSSA_LCSSA:%.*]] = phi i32 [ [[OR_LCSSA]], [[FOR_BODY3_SPLIT]] ] +; +entry: + %a = load i32, i32* @a + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.inc7 + %indvars.outer = phi i64 [ 0, %entry ], [ %indvars.outer.next, %for.inc7 ] + %or.reduction.outer = phi i32 [ %a, %entry ], [ %or.lcssa, %for.inc7 ] + %index = add nsw i64 %indvars.outer, 9 + br label %for.body3 + +for.body3: ; preds = %for.body, %for.body3 + %or.reduction.inner = phi i32 [ %or.reduction.outer, %for.body ], [ %or, %for.body3 ] + %indvar0 = phi i64 [ 5, %for.body ], [ %indvar0.next, %for.body3 ] + %indvar1 = phi i32 [ 6, %for.body ], [ %indvar1.next, %for.body3 ] + %arrayidx5 = getelementptr inbounds [200 x [200 x i32]], [200 x [200 x i32]]* @b, i64 0, i64 %indvar0, i64 %index + %load.val = load i32, i32* %arrayidx5, align 4 + %or = or i32 %or.reduction.inner, %load.val + %indvar0.next = add nsw i64 %indvar0, -1 + %indvar1.next = add nsw i32 %indvar1, -2 + %indvar1.next.ext = sext i32 %indvar1.next to i64 + %indvars.add = add nsw i64 %indvar0.next, %indvar1.next.ext + %tobool2 = icmp eq i64 %indvars.add, 0 + br i1 %tobool2, label %for.inc7, label %for.body3 + +for.inc7: ; preds = %for.body3 + %or.lcssa = phi i32 [ %or, %for.body3 ] + %indvars.outer.next = add nsw i64 %indvars.outer, 1 + %indvars.outer.next.trunc = trunc i64 %indvars.outer.next to i32 + %tobool = icmp eq i32 %indvars.outer.next.trunc, 100 + br i1 %tobool, label %for.cond.for.end8_crit_edge, label %for.body + +for.cond.for.end8_crit_edge: ; preds = %for.inc7 + %or.lcssa.lcssa = phi i32 [ %or.lcssa, %for.inc7 ] + store i32 %or.lcssa.lcssa, i32* @a + br label %for.end8 + +for.end8: ; preds = %for.cond.for.end8_crit_edge, %entry + ret void +} + +;; int a, c, d, e; +;; int b[200][200]; +;; void fn1() { +;; for (; c; c++) { +;; d = 5; +;; e = 49; +;; for (; d != e; d++, e--) +;; a |= b[d][c + 9]; +;; } +;; } +; +; Two inner loop indvars are involved in the inner loop exit +; condition as LHS and RHS. +define void @test3() { +; CHECK-LABEL: @test3( +; CHECK: for.body: +; CHECK: [[INDVARS_OUTER:%.*]] = phi i64 [ [[INDVARS_OUTER_NEXT:%.*]], [[FOR_INC7:%.*]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ] +; CHECK: [[OR_REDUCTION_INNER:%.*]] = phi i32 [ [[OR:%.*]], [[FOR_INC7]] ], [ [[OR_REDUCTION_OUTER:%.*]], [[FOR_BODY_PREHEADER]] ] +; CHECK: [[INDEX:%.*]] = add nsw i64 [[INDVARS_OUTER]], 9 +; CHECK: br label %for.body3.split +; CHECK: for.body3: +; CHECK: [[INDVAR0:%.*]] = phi i32 [ [[TMP1:%.*]], [[FOR_BODY3_SPLIT:%.*]] ], [ 5, [[FOR_BODY3_PREHEADER]] ] +; CHECK: [[INDVAR1:%.*]] = phi i32 [ [[TMP0:%.*]], [[FOR_BODY3_SPLIT]] ], [ 49, [[FOR_BODY3_PREHEADER]] ] +; CHECK: [[OR_REDUCTION_OUTER]] = phi i32 [ [[OR_LCSSA:%.*]], [[FOR_BODY3_SPLIT]] ], [ [[A]], [[FOR_BODY3_PREHEADER]] ] +; CHECK: for.body3.split1: +; CHECK: [[ARRAYIDX5:%.*]] = getelementptr inbounds [200 x [200 x i32]], [200 x [200 x i32]]* @b, i64 0, i32 [[INDVAR0]], i64 [[INDEX]] +; CHECK: [[LOAD_VAL:%.*]] = load i32, i32* [[ARRAYIDX5]], align 4 +; CHECK: [[OR]] = or i32 [[OR_REDUCTION_INNER]], [[LOAD_VAL]] +; CHECK: br label %for.inc7 +; CHECK: for.body3.split: +; CHECK: [[OR_LCSSA]] = phi i32 [ [[OR]], [[FOR_INC7]] ] +; CHECK: [[TMP0]] = add nsw i32 [[INDVAR1]], -1 +; CHECK: [[TMP1]] = add nsw i32 [[INDVAR0]], 1 +; CHECK: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], [[TMP0]] +; CHECK: br i1 [[TMP2]], label %for.cond.for.end8_crit_edge, label %for.body3 +; CHECK: for.inc7: +; CHECK: [[INDVARS_OUTER_NEXT]] = add nsw i64 [[INDVARS_OUTER]], 1 +; CHECK: br i1 [[TOBOOL:%.*]], label %for.body3.split, label %for.body +; CHECK: for.cond.for.end8_crit_edge: +; CHECK: [[OR_LCSSA_LCSSA:%.*]] = phi i32 [ [[OR_LCSSA]], [[FOR_BODY3_SPLIT]] ] +; + +entry: + %a = load i32, i32* @a + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.inc7 + %indvars.outer = phi i64 [ 0, %entry ], [ %indvars.outer.next, %for.inc7 ] + %or.reduction.outer = phi i32 [ %a, %entry ], [ %or.lcssa, %for.inc7 ] + %index = add nsw i64 %indvars.outer, 9 + br label %for.body3 + +for.body3: ; preds = %for.body, %for.body3 + %or.reduction.inner = phi i32 [ %or.reduction.outer, %for.body ], [ %or, %for.body3 ] + %indvar0 = phi i32 [ 5, %for.body ], [ %indvar0.next, %for.body3 ] + %indvar1 = phi i32 [ 49, %for.body ], [ %indvar1.next, %for.body3 ] + %arrayidx5 = getelementptr inbounds [200 x [200 x i32]], [200 x [200 x i32]]* @b, i64 0, i32 %indvar0, i64 %index + %load.val = load i32, i32* %arrayidx5, align 4 + %or = or i32 %or.reduction.inner, %load.val + %indvar0.next = add nsw i32 %indvar0, 1 + %indvar1.next = add nsw i32 %indvar1, -1 + %tobool2 = icmp eq i32 %indvar0.next, %indvar1.next + br i1 %tobool2, label %for.inc7, label %for.body3 + +for.inc7: ; preds = %for.body3 + %or.lcssa = phi i32 [ %or, %for.body3 ] + %indvars.outer.next = add nsw i64 %indvars.outer, 1 + %tobool = icmp eq i64 %indvars.outer.next, 100 + br i1 %tobool, label %for.cond.for.end8_crit_edge, label %for.body + +for.cond.for.end8_crit_edge: ; preds = %for.inc7 + %or.lcssa.lcssa = phi i32 [ %or.lcssa, %for.inc7 ] + store i32 %or.lcssa.lcssa, i32* @a + br label %for.end8 + +for.end8: ; preds = %for.cond.for.end8_crit_edge, %entry + ret void +} \ No newline at end of file