Index: llvm/lib/Transforms/Scalar/LoopInterchange.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -947,6 +947,77 @@ return true; } +static bool containsInductionOfLoop(Loop *L, const SCEV *Expr) { + const SCEVAddRecExpr *AddRec = dyn_cast(Expr); + if (AddRec == nullptr || !AddRec->isAffine()) + return false; + if (AddRec->getLoop() == L) + return true; + else + return containsInductionOfLoop(L, AddRec->getStart()); +} + +// If a value/variable is loaded from non-affine access and did finite number +// of calculation, we call it non-affine value/variable. If a value/variable +// is loaded from affine access and did finite number of calculation, we call +// it affine value/variable. +static bool checkNonAffineValue(Value *Op, ScalarEvolution *SE, Loop *OuterLoop, + Loop *InnerLoop) { + if (!isa(*Op) || OuterLoop->isLoopInvariant(Op)) + return false; + // Check if Op is the induction variable of any loop contained in OuterLoop. + // If the Op is the induction variable of the OuterLoop or InnerLoop. It is + // safe to store to the affine value. Otherwise, the assignment of the + // induction variable to the affine variable is similar to assign a + // non-affine value to the affine variable. + if (PHINode *PHI = dyn_cast(Op)) { + return !(containsInductionOfLoop(InnerLoop, SE->getSCEV(PHI)) || + containsInductionOfLoop(OuterLoop, SE->getSCEV(PHI))); + } + + if (LoadInst *Ld = dyn_cast(Op)) { + Value *LdAddr = Ld->getPointerOperand(); + // If the value is loaded from a non-affine access, we need to prevent + // interchanging when it is assigned to an affine variable. + return !(containsInductionOfLoop(InnerLoop, SE->getSCEV(LdAddr)) || + containsInductionOfLoop(OuterLoop, SE->getSCEV(LdAddr))); + } else { + return any_of(cast(Op)->operands(), [&](const Use &U) { + return checkNonAffineValue(U.get(), SE, OuterLoop, InnerLoop); + }); + } +} + +static bool nonAffineValueAssignedToAffine(Loop *InnerLoop, Loop *OuterLoop, + ScalarEvolution *SE) { + + using ValueVector = SmallVector; + ValueVector AffineStInstr; + + for (BasicBlock *BB : OuterLoop->blocks()) { + for (Instruction &I : *BB) { + if (auto *St = dyn_cast(&I)) { + Value *StAddr = St->getPointerOperand(); + const SCEVAddRecExpr *StAddrAddRec = + dyn_cast(SE->getSCEV(StAddr)); + if (StAddrAddRec != nullptr && StAddrAddRec->isAffine()) + AffineStInstr.push_back(&I); + } + } + } + + // For each affine store instruction, we need to check if the store value is + // non-affine. + for (ValueVector::iterator I = AffineStInstr.begin(), + IE = AffineStInstr.end(); + I != IE; ++I) { + Value *Op = cast(*I)->getOperand(0); + if (checkNonAffineValue(Op, SE, OuterLoop, InnerLoop)) + return true; + } + return false; +} + bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix) { @@ -1025,6 +1096,17 @@ return false; } + if (nonAffineValueAssignedToAffine(InnerLoop, OuterLoop, SE)) { + LLVM_DEBUG(dbgs() << "Found non-affine value assigned by affine access.\n"); + ORE->emit([&]() { + return OptimizationRemarkMissed( + DEBUG_TYPE, "NonAffineValueAssignedToAffine", + OuterLoop->getStartLoc(), OuterLoop->getHeader()) + << "Found non-affine value assigned by affine access."; + }); + return false; + } + return true; } Index: llvm/test/Transforms/LoopInterchange/non-affine-store-to-affine.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopInterchange/non-affine-store-to-affine.ll @@ -0,0 +1,59 @@ +; REQUIRES: asserts +; RUN: opt < %s -basicaa -loop-interchange -S -debug 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +@f = dso_local local_unnamed_addr global i32* undef, align 8 +@c = common dso_local local_unnamed_addr global [4 x [4 x i32]] zeroinitializer, align 4 + +;; The following case should not be interchanged. +;; But interchange can not be prevent by dependence matrix. +;; +;; int* f; +;; int c[4][4]; +;; +;; void foo() { +;; for (int i = 0; i <= 3; i++) { +;; for (int j = 0; j <= 3; j++) { +;; *f ^= 0x1000; +;; c[j][i] = *f; +;; } +;; } +;; } + +; CHECK: Found non-affine value assigned by affine access. +; CHECK: Not interchanging loops. Cannot prove legality. + +define void @foo() { +entry: + %0 = load i32*, i32** @f, align 8 + br label %outer.header + +outer.header: ; preds = %outer.latch, %entry + %indvars.iv.i = phi i64 [ 0, %entry ], [ %indvars.iv.next.i, %outer.latch ] + br label %inner.header + +inner.header: ; preds = %for.body4.i, %outer.header + %indvars.iv.j = phi i64 [ 0, %outer.header ], [ %indvars.iv.next.j, %inner.header ] + %1 = load i32, i32* %0, align 4 + %xor.i = xor i32 %1, 4096 + store i32 %xor.i, i32* %0, align 4 + %arrayidx6.i = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @c, i64 0, i64 %indvars.iv.j, i64 %indvars.iv.i + store i32 %xor.i, i32* %arrayidx6.i, align 4 + %indvars.iv.next.j = add nuw nsw i64 %indvars.iv.j, 1 + %exitcond.i = icmp eq i64 %indvars.iv.next.j, 4 + br i1 %exitcond.i, label %outer.latch, label %inner.header + +outer.latch: ; preds = %inner.header + %indvars.iv.next.i = add nuw nsw i64 %indvars.iv.i, 1 + %exitcond20.i = icmp eq i64 %indvars.iv.next.i, 4 + br i1 %exitcond20.i, label %outer.exit, label %outer.header + +outer.exit: ; preds = %outer.latch + br label %exit + +exit: ; preds = %outer.exit + ret void +} +