diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -1661,15 +1661,21 @@ // replaced by Inners'. OuterLoopLatchSuccessor->replacePhiUsesWith(OuterLoopLatch, InnerLoopLatch); + auto &OuterInnerReductions = LIL.getOuterInnerReductions(); + (void)OuterInnerReductions; + // Now update the reduction PHIs in the inner and outer loop headers. SmallVector InnerLoopPHIs, OuterLoopPHIs; - for (PHINode &PHI : drop_begin(InnerLoopHeader->phis())) + for (PHINode &PHI : InnerLoopHeader->phis()) { + if (OuterInnerReductions.find(&PHI) == OuterInnerReductions.end()) + continue; InnerLoopPHIs.push_back(cast(&PHI)); - for (PHINode &PHI : drop_begin(OuterLoopHeader->phis())) + } + for (PHINode &PHI : OuterLoopHeader->phis()) { + if (OuterInnerReductions.find(&PHI) == OuterInnerReductions.end()) + continue; OuterLoopPHIs.push_back(cast(&PHI)); - - auto &OuterInnerReductions = LIL.getOuterInnerReductions(); - (void)OuterInnerReductions; + } // Now move the remaining reduction PHIs from outer to inner loop header and // vice versa. The PHI nodes must be part of a reduction across the inner and diff --git a/llvm/test/Transforms/LoopInterchange/pr43326.ll b/llvm/test/Transforms/LoopInterchange/pr43326.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopInterchange/pr43326.ll @@ -0,0 +1,86 @@ +; RUN: opt < %s -basic-aa -loop-interchange -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \ +; RUN: -verify-dom-info -verify-loop-info -verify-loop-lcssa -stats 2>&1 +; RUN: FileCheck --input-file=%t --check-prefix=REMARKS %s + +@a = global i32 0 +@b = global i8 0 +@c = global i32 0 +@d = global i32 0 +@e = global [1 x [1 x i32]] zeroinitializer + +; REMARKS: --- !Passed +; REMARKS-NEXT: Pass: loop-interchange +; REMARKS-NEXT: Name: Interchanged +; REMARKS-NEXT: Function: pr43326 + +define void @pr43326() { +entry: + %0 = load i32, i32* @a + %tobool.not2 = icmp eq i32 %0, 0 + br i1 %tobool.not2, label %for.end14, label %for.body.lr.ph + +for.body.lr.ph: ; preds = %entry + %d.promoted = load i32, i32* @d + %a.promoted = load i32, i32* @a + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.inc12 + %inc1312 = phi i32 [ %a.promoted, %for.body.lr.ph ], [ %inc13, %for.inc12 ] + %xor.lcssa.lcssa11 = phi i32 [ %d.promoted, %for.body.lr.ph ], [ %xor.lcssa.lcssa, %for.inc12 ] + br label %for.body3 + +for.body3: ; preds = %for.body, %for.inc10 + %xor.lcssa9 = phi i32 [ %xor.lcssa.lcssa11, %for.body ], [ %xor.lcssa, %for.inc10 ] + %dec7 = phi i8 [ 0, %for.body ], [ %dec, %for.inc10 ] + %idxprom8 = sext i8 %dec7 to i64 + br label %for.body7 + +for.body7: ; preds = %for.body3, %for.inc + %xor5 = phi i32 [ %xor.lcssa9, %for.body3 ], [ %xor, %for.inc ] + %inc4 = phi i32 [ 0, %for.body3 ], [ %inc, %for.inc ] + %idxprom = sext i32 %inc4 to i64 + %arrayidx9 = getelementptr inbounds [1 x [1 x i32]], [1 x [1 x i32]]* @e, i64 0, i64 %idxprom, i64 %idxprom8 + %1 = load i32, i32* %arrayidx9 + %xor = xor i32 %xor5, %1 + br label %for.inc + +for.inc: ; preds = %for.body7 + %inc = add nsw i32 %inc4, 1 + %cmp5 = icmp slt i32 %inc, 1 + br i1 %cmp5, label %for.body7, label %for.end + +for.end: ; preds = %for.inc + %xor.lcssa = phi i32 [ %xor, %for.inc ] + %inc.lcssa = phi i32 [ %inc, %for.inc ] + br label %for.inc10 + +for.inc10: ; preds = %for.end + %dec = add i8 %dec7, -1 + %cmp = icmp sgt i8 %dec, -1 + br i1 %cmp, label %for.body3, label %for.end11 + +for.end11: ; preds = %for.inc10 + %xor.lcssa.lcssa = phi i32 [ %xor.lcssa, %for.inc10 ] + %dec.lcssa = phi i8 [ %dec, %for.inc10 ] + %inc.lcssa.lcssa = phi i32 [ %inc.lcssa, %for.inc10 ] + br label %for.inc12 + +for.inc12: ; preds = %for.end11 + %inc13 = add nsw i32 %inc1312, 1 + %tobool.not = icmp eq i32 %inc13, 0 + br i1 %tobool.not, label %for.cond.for.end14_crit_edge, label %for.body + +for.cond.for.end14_crit_edge: ; preds = %for.inc12 + %inc13.lcssa = phi i32 [ %inc13, %for.inc12 ] + %inc.lcssa.lcssa.lcssa = phi i32 [ %inc.lcssa.lcssa, %for.inc12 ] + %xor.lcssa.lcssa.lcssa = phi i32 [ %xor.lcssa.lcssa, %for.inc12 ] + %dec.lcssa.lcssa = phi i8 [ %dec.lcssa, %for.inc12 ] + store i8 %dec.lcssa.lcssa, i8* @b + store i32 %xor.lcssa.lcssa.lcssa, i32* @d + store i32 %inc.lcssa.lcssa.lcssa, i32* @c + store i32 %inc13.lcssa, i32* @a + br label %for.end14 + +for.end14: ; preds = %for.cond.for.end14_crit_edge, %entry + ret void +} diff --git a/llvm/test/Transforms/LoopInterchange/pr48212.ll b/llvm/test/Transforms/LoopInterchange/pr48212.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopInterchange/pr48212.ll @@ -0,0 +1,54 @@ +; RUN: opt < %s -basic-aa -loop-interchange -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \ +; RUN: -verify-dom-info -verify-loop-info -verify-loop-lcssa 2>&1 +; RUN: FileCheck --input-file=%t --check-prefix=REMARKS %s + +; REMARKS: --- !Passed +; REMARKS-NEXT: Pass: loop-interchange +; REMARKS-NEXT: Name: Interchanged +; REMARKS-NEXT: Function: pr48212 + +define void @pr48212([5 x i32]* %filter) #0 { +entry: + br label %L1 + +L1: ; preds = %entry + br label %for.body + +for.body: ; preds = %L1, %for.inc6 + %temp.04 = phi i32 [ undef, %L1 ], [ %temp.1.lcssa, %for.inc6 ] + %k1.03 = phi i32 [ 0, %L1 ], [ %inc7, %for.inc6 ] + br label %L2 + +L2: ; preds = %for.body + br label %for.body3 + +for.body3: ; preds = %L2, %for.inc + %temp.12 = phi i32 [ %temp.04, %L2 ], [ %add, %for.inc ] + %k2.01 = phi i32 [ 0, %L2 ], [ %inc, %for.inc ] + %idxprom = sext i32 %k2.01 to i64 + %arrayidx = getelementptr inbounds [5 x i32], [5 x i32]* %filter, i64 %idxprom + %idxprom4 = sext i32 %k1.03 to i64 + %arrayidx5 = getelementptr inbounds [5 x i32], [5 x i32]* %arrayidx, i64 0, i64 %idxprom4 + %0 = load i32, i32* %arrayidx5 + %add = add nsw i32 %temp.12, %0 + br label %for.inc + +for.inc: ; preds = %for.body3 + %inc = add nsw i32 %k2.01, 1 + %cmp2 = icmp slt i32 %inc, 3 + br i1 %cmp2, label %for.body3, label %for.end + +for.end: ; preds = %for.inc + %temp.1.lcssa = phi i32 [ %add, %for.inc ] + br label %for.inc6 + +for.inc6: ; preds = %for.end + %inc7 = add nsw i32 %k1.03, 1 + %cmp = icmp slt i32 %inc7, 5 + br i1 %cmp, label %for.body, label %for.end8 + +for.end8: ; preds = %for.inc6 + ret void +} + +