diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -323,9 +323,9 @@ /// LoopInterchangeLegality checks if it is legal to interchange the loop. class LoopInterchangeLegality { public: - LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE, - OptimizationRemarkEmitter *ORE) - : OuterLoop(Outer), InnerLoop(Inner), SE(SE), ORE(ORE) {} + LoopInterchangeLegality(Loop *Outer, Loop *Inner, DominatorTree *DT, + ScalarEvolution *SE, OptimizationRemarkEmitter *ORE) + : OuterLoop(Outer), InnerLoop(Inner), DT(DT), SE(SE), ORE(ORE) {} /// Check if the loops can be interchanged. bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId, @@ -356,6 +356,7 @@ Loop *OuterLoop; Loop *InnerLoop; + DominatorTree *DT; ScalarEvolution *SE; /// Interface to emit optimization remarks. @@ -539,7 +540,7 @@ std::vector> &DependencyMatrix) { LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId << " and OuterLoopId = " << OuterLoopId << "\n"); - LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, ORE); + LoopInterchangeLegality LIL(OuterLoop, InnerLoop, DT, SE, ORE); if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) { LLVM_DEBUG(dbgs() << "Not interchanging loops. Cannot prove legality.\n"); return false; @@ -1014,6 +1015,26 @@ return false; } + // Check if either outer and inner loop contains instructions guarded by + // control flow that have side effects. + for (BasicBlock *BB : OuterLoop->blocks()) { + Loop *L = InnerLoop->contains(BB) ? InnerLoop : OuterLoop; + if (DT->dominates(BB, L->getLoopLatch())) + continue; + if (containsUnsafeInstructions(BB)) { + LLVM_DEBUG(dbgs() << "Instructions guarded by control flow may break " + "loop interchange.\n"); + ORE->emit([&]() { + return OptimizationRemarkMissed( + DEBUG_TYPE, "InvalidInstructionWithControlFlow", + OuterLoop->getStartLoc(), OuterLoop->getHeader()) + << "Instructions guarded by control flow may break loop " + "interchange.\n"; + }); + return false; + } + } + return true; } diff --git a/llvm/test/Transforms/LoopInterchange/pr48057.ll b/llvm/test/Transforms/LoopInterchange/pr48057.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopInterchange/pr48057.ll @@ -0,0 +1,100 @@ +; RUN: opt -loop-interchange -debug-only=loop-interchange 2>&1 %s | FileCheck %s + +; CHECK: Instructions guarded by control flow may break loop interchange. +; CHECK-NEXT: Not interchanging loops. Cannot prove legality. + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@b = dso_local local_unnamed_addr global [7 x [8 x i8]] [[8 x i8] zeroinitializer, [8 x i8] zeroinitializer, [8 x i8] zeroinitializer, [8 x i8] zeroinitializer, [8 x i8] c"\05\00\00\00\00\00\00\00", [8 x i8] zeroinitializer, [8 x i8] c"\02\03\00\00\00\00\00\00"], align 16 +@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 +@e = dso_local local_unnamed_addr global i16 0, align 2 +@c = dso_local local_unnamed_addr global i32 0, align 4 +@d = dso_local local_unnamed_addr global i32 0, align 4 + +; Function Attrs: nounwind uwtable +define dso_local i32 @main() local_unnamed_addr #0 { +entry: + call fastcc void @f() + %0 = load i16, i16* @e, align 2, !tbaa !2 + %conv = sext i16 %0 to i32 + %call1 = call i32 (i8*, ...) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i32 %conv) + ret i32 0 +} + +; Function Attrs: nofree nounwind +declare dso_local noundef i32 @printf(i8* nocapture noundef readonly, ...) local_unnamed_addr #1 + +; Function Attrs: nofree norecurse nounwind uwtable +define internal fastcc void @f() unnamed_addr #2 { +entry: + %.pr = load i32, i32* @c, align 4, !tbaa !6 + %cmp2 = icmp slt i32 %.pr, 8 + br i1 %cmp2, label %for.cond1.preheader.preheader, label %for.end13 + +for.cond1.preheader.preheader: ; preds = %entry + %0 = sext i32 %.pr to i64 + %1 = sub i32 7, %.pr + %2 = zext i32 %1 to i64 + %3 = add i64 %0, %2 + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc12 + %indvars.iv4 = phi i64 [ %0, %for.cond1.preheader.preheader ], [ %indvars.iv.next5, %for.inc12 ] + br label %for.body2 + +for.body2: ; preds = %for.cond1.preheader, %land.end + %indvars.iv = phi i64 [ 4, %for.cond1.preheader ], [ %indvars.iv.next, %land.end ] + %4 = add nuw nsw i64 %indvars.iv, 2 + %arrayidx4 = getelementptr inbounds [7 x [8 x i8]], [7 x [8 x i8]]* @b, i64 0, i64 %4, i64 %indvars.iv4 + %5 = load i8, i8* %arrayidx4, align 1, !tbaa !8 + %tobool5.not = icmp eq i8 %5, 0 + br i1 %tobool5.not, label %land.end, label %land.rhs + +land.rhs: ; preds = %for.body2 + %arrayidx8 = getelementptr inbounds [7 x [8 x i8]], [7 x [8 x i8]]* @b, i64 0, i64 %indvars.iv, i64 0 + %6 = load i8, i8* %arrayidx8, align 8, !tbaa !8 + %conv9 = sext i8 %6 to i16 + store i16 %conv9, i16* @e, align 2, !tbaa !2 + br label %land.end + +land.end: ; preds = %land.rhs, %for.body2 + %indvars.iv.next = add nsw i64 %indvars.iv, -1 + %tobool.not = icmp eq i64 %indvars.iv.next, 0 + br i1 %tobool.not, label %for.inc12, label %for.body2, !llvm.loop !9 + +for.inc12: ; preds = %land.end + %indvars.iv.next5 = add nsw i64 %indvars.iv4, 1 + %exitcond = icmp ne i64 %indvars.iv.next5, 8 + br i1 %exitcond, label %for.cond1.preheader, label %for.cond.for.end13_crit_edge, !llvm.loop !11 + +for.cond.for.end13_crit_edge: ; preds = %for.inc12 + %7 = add i64 %3, 1 + %8 = trunc i64 %7 to i32 + store i32 0, i32* @d, align 4, !tbaa !6 + store i32 %8, i32* @c, align 4, !tbaa !6 + br label %for.end13 + +for.end13: ; preds = %for.cond.for.end13_crit_edge, %entry + ret void +} + +attributes #0 = { nounwind uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #1 = { nofree nounwind "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #2 = { nofree norecurse nounwind uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 13.0.0 (git@github.com:llvm/llvm-project.git 2bfd21a3510e5fb8378fd863060e600da20c00b7)"} +!2 = !{!3, !3, i64 0} +!3 = !{!"short", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C/C++ TBAA"} +!6 = !{!7, !7, i64 0} +!7 = !{!"int", !4, i64 0} +!8 = !{!4, !4, i64 0} +!9 = distinct !{!9, !10} +!10 = !{!"llvm.loop.mustprogress"} +!11 = distinct !{!11, !10}