Index: llvm/lib/Transforms/Scalar/LoopInterchange.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -948,6 +948,65 @@ return true; } +static bool isInductionOfLoop(Loop *L, const SCEV *Expr) { + const SCEVAddRecExpr *AddRec = dyn_cast(Expr); + if (!AddRec || !AddRec->isAffine()) + return false; + if (AddRec->getLoop() == L) + return true; + else + return isInductionOfLoop(L, AddRec->getStart()); +} + +// Check if the affine load access is the induction of the innterloop and +// outerloop. +static bool checkInductionAccess(Loop *OuterLoop, Loop *InnerLoop, + ScalarEvolution *SE, Value *Op) { + auto *PHI = dyn_cast(Op); + if (!isa(*Op) || InnerLoop->isLoopInvariant(Op) || PHI) + return false; + if (LoadInst *Ld = dyn_cast(Op)) { + Value *LdAddr = Ld->getPointerOperand(); + return isInductionOfLoop(InnerLoop, SE->getSCEV(LdAddr)) && + isInductionOfLoop(OuterLoop, SE->getSCEV(LdAddr)); + } else + return any_of(cast(Op)->operands(), [&](const Use &U) { + return checkInductionAccess(OuterLoop, InnerLoop, SE, U.get()); + }); +} + +static bool inLoopAffineConditions(Loop *OuterLoop, Loop *InnerLoop, + ScalarEvolution *SE) { + using ValueVector = SmallVector; + ValueVector BranchInstr; + BasicBlock *InnerLatch = InnerLoop->getLoopLatch(); + Instruction *InnerLoopBranch = + dyn_cast(InnerLatch->getTerminator()); + + for (BasicBlock *BB : InnerLoop->blocks()) { + for (Instruction &I : *BB) { + auto *BrInst = dyn_cast(&I); + if (BrInst && BrInst->isConditional() && &I != InnerLoopBranch) + BranchInstr.push_back(&I); + } + } + + // For each conditional branch instruction, we need to check if the + // condition is affine + for (ValueVector::iterator I = BranchInstr.begin(), IE = BranchInstr.end(); + I != IE; ++I) { + Value *Condition = cast(*I)->getOperand(0); + Instruction *Cmp = dyn_cast(Condition); + if (Cmp) { + Value *Op0 = Cmp->getOperand(0); + Value *Op1 = Cmp->getOperand(1); + return checkInductionAccess(OuterLoop, InnerLoop, SE, Op0) || + checkInductionAccess(OuterLoop, InnerLoop, SE, Op1); + } + } + return false; +} + bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix) { @@ -1026,6 +1085,18 @@ return false; } + if (inLoopAffineConditions(OuterLoop, InnerLoop, SE)) { + LLVM_DEBUG( + dbgs() << "Found affine value as control-flow conditions in loop.\n"); + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "AffineConditionInLoop", + OuterLoop->getStartLoc(), + OuterLoop->getHeader()) + << "Found affine value as control-flow conditions in loop."; + }); + return false; + } + return true; } Index: llvm/test/Transforms/LoopInterchange/affine-load-condition-no-interchange.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopInterchange/affine-load-condition-no-interchange.ll @@ -0,0 +1,66 @@ +; REQUIRES: asserts +; RUN: opt < %s -basicaa -loop-interchange -S -debug 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +@b = dso_local local_unnamed_addr global [7 x [8 x i8]] [[8 x i8] zeroinitializer, [8 x i8] zeroinitializer, [8 x i8] zeroinitializer, [8 x i8] zeroinitializer, [8 x i8] c"\05\00\00\00\00\00\00\00", [8 x i8] zeroinitializer, [8 x i8] c"\02\03\00\00\00\00\00\00"], align 1 +@e = common dso_local local_unnamed_addr global i16 0, align 2 + +;; The following case should not be interchanged, +;; since the affine load is a if condition. +;; +;; char b[][8] = {{}, {}, {}, {}, {5}, {}, 2, 3}; +;; int c, d; +;; short e; +;; static char f() { +;; for (; c <= 7; c++) { +;; d = 4; +;; for (; d; d--) +;; b[d + 2][c] && (e = b[d][0]); +;; } +;; } + +; CHECK: Found affine value as control-flow conditions in loop. +; CHECK: Not interchanging loops. Cannot prove legality. + + +define i32 @foo() { +outer.preheader: + br label %outer.header + +outer.header: ; preds = %outer.latch, %outer.preheader + %indvars.c = phi i64 [ 0, %outer.preheader ], [ %indvars.c.next, %outer.latch ] + br label %inner.header + +inner.header: ; preds = %inner.latch, %outer.header + %indvars.iv.d = phi i64 [ 4, %outer.header ], [ %indvars.d.next, %inner.latch ] + %0 = add nuw nsw i64 %indvars.iv.d, 2 + %arrayidx4.i = getelementptr inbounds [7 x [8 x i8]], [7 x [8 x i8]]* @b, i64 0, i64 %0, i64 %indvars.c + %1 = load i8, i8* %arrayidx4.i, align 1 + %tobool5.i = icmp eq i8 %1, 0 + br i1 %tobool5.i, label %inner.latch, label %land.rhs.i + +land.rhs.i: ; preds = %inner.header + %arrayidx8.i = getelementptr inbounds [7 x [8 x i8]], [7 x [8 x i8]]* @b, i64 0, i64 %indvars.iv.d, i64 0 + %2 = load i8, i8* %arrayidx8.i, align 1 + %conv9.i = zext i8 %2 to i16 + store i16 %conv9.i, i16* @e, align 2 + br label %inner.latch + +inner.latch: ; preds = %land.rhs.i, %inner.header + %indvars.d.next = add nsw i64 %indvars.iv.d, -1 + %tobool.i = icmp eq i64 %indvars.d.next, 0 + br i1 %tobool.i, label %outer.latch, label %inner.header + +outer.latch: ; preds = %inner.latch + %indvars.c.next = add nsw i64 %indvars.c, 1 + %cmp.i = icmp slt i64 %indvars.c, 7 + br i1 %cmp.i, label %outer.header, label %outer.exit + +outer.exit: ; preds = %outer.latch + br label %f.exit + +f.exit: ; preds = %outer.exit + ret i32 0 +}