Index: llvm/lib/Transforms/Scalar/LoopInterchange.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -948,6 +948,86 @@ return true; } +// If a value is loaded from non-affine access and +// did finite number of calculation, we call it non-affine value. +// If a value is loaded from affine access and +// did finite number of calculation, we call it affine value. +static bool checkNonAffineValue(Value *Op, ScalarEvolution *SE, Loop *OutLoop, + Loop *InnerLoop) { + if (!isa(*Op)) + return false; + else if (OutLoop->isLoopInvariant(Op)) + return false; + // check if Op is the indction variable of any loop contained in OutLoop + else { + auto *PHIOut = OutLoop->getInductionVariable(*SE); + auto *PHI2 = dyn_cast(Op); + if (PHIOut && PHI2 && PHIOut == PHI2) { + return false; + } + auto &SubLoops = OutLoop->getSubLoopsVector(); + for (auto &L : SubLoops) { + auto *PHI = L->getInductionVariable(*SE); + if (PHI && PHI2 && PHI == PHI2) { + if (L == InnerLoop) + return false; + else + return true; + } + } + } + + if (LoadInst *Ld = dyn_cast(Op)) { + Value *LdAddr = Ld->getPointerOperand(); + const SCEVAddRecExpr *LdAddrAddRec = + dyn_cast(SE->getSCEV(LdAddr)); + if ((!LdAddrAddRec || !LdAddrAddRec->isAffine()) && + !OutLoop->isLoopInvariant(Op)) + return true; + else + return false; + } else { + bool Res = false; + auto *I = cast(Op); + for (Use &U : I->operands()) { + Res |= checkNonAffineValue(U.get(), SE, OutLoop, InnerLoop); + } + return Res; + } +} + +static bool nonAffineValueAssignedToAffine(Loop *InnerLoop, Loop *OutLoop, + ScalarEvolution *SE) { + + using ValueVector = SmallVector; + ValueVector AffineStInstr; + + for (BasicBlock *BB : OutLoop->blocks()) { + for (Instruction &I : *BB) { + if (auto *St = dyn_cast(&I)) { + Value *StAddr = St->getPointerOperand(); + const SCEVAddRecExpr *StAddrAddRec = + dyn_cast(SE->getSCEV(StAddr)); + if (StAddrAddRec && StAddrAddRec->isAffine()) + AffineStInstr.push_back(&I); + } + } + } + + // For each affine store instruction, we need to check if the store value is + // affine + for (ValueVector::iterator I = AffineStInstr.begin(), + IE = AffineStInstr.end(); + I != IE; ++I) { + Value *Op = cast(*I)->getOperand(0); + if (OutLoop->isLoopInvariant(Op)) + return false; + else if (checkNonAffineValue(Op, SE, OutLoop, InnerLoop)) + return true; + } + return false; +} + bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix) { @@ -1026,6 +1106,17 @@ return false; } + if (nonAffineValueAssignedToAffine(InnerLoop, OuterLoop, SE)) { + LLVM_DEBUG(dbgs() << "Found non-affine value assigned by affine access.\n"); + ORE->emit([&]() { + return OptimizationRemarkMissed( + DEBUG_TYPE, "NonAffineValueAssignedToAffine", + OuterLoop->getStartLoc(), OuterLoop->getHeader()) + << "Found non-affine value assigned by affine access."; + }); + return false; + } + return true; } Index: llvm/test/Transforms/LoopInterchange/non-affine-store-to-affine.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopInterchange/non-affine-store-to-affine.ll @@ -0,0 +1,55 @@ +; RUN: opt < %s -basicaa -loop-interchange -S -debug 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-linux-gnu" + +@f = dso_local local_unnamed_addr global i32* undef, align 8 +@c = common dso_local local_unnamed_addr global [4 x [4 x i32]] zeroinitializer, align 4 + +;; The following case should not be interchanged. +;; But interchange can not be prevent by dependence matrix. +;; +;; int* f; +;; int c[4][4]; +;; +;; main() { +;; for (int i = 0; i <= 3; i++) { +;; for (int j = 0; j <= 3; j++) { +;; *f ^= 0x1000; +;; c[j][i] = *f; +;; } +;; } +;; } + +; CHECK: Found non-affine value assigned by affine access. +; CHECK: Not interchanging loops. Cannot prove legality. + +define dso_local i32 @main() local_unnamed_addr #0 { +entry: + %0 = load i32*, i32** @f, align 8 + br label %for.cond1.preheader.i + +for.cond1.preheader.i: ; preds = %for.cond.cleanup3.i, %entry + %indvars.iv18.i = phi i64 [ 0, %entry ], [ %indvars.iv.next19.i, %for.cond.cleanup3.i ] + br label %for.body4.i + +for.cond.cleanup3.i: ; preds = %for.body4.i + %indvars.iv.next19.i = add nuw nsw i64 %indvars.iv18.i, 1 + %exitcond20.i = icmp eq i64 %indvars.iv.next19.i, 4 + br i1 %exitcond20.i, label %for.body, label %for.cond1.preheader.i + +for.body4.i: ; preds = %for.body4.i, %for.cond1.preheader.i + %indvars.iv.i = phi i64 [ 0, %for.cond1.preheader.i ], [ %indvars.iv.next.i, %for.body4.i ] + %1 = load i32, i32* %0, align 4 + %xor.i = xor i32 %1, 4096 + store i32 %xor.i, i32* %0, align 4 + %arrayidx6.i = getelementptr inbounds [4 x [4 x i32]], [4 x [4 x i32]]* @c, i64 0, i64 %indvars.iv.i, i64 %indvars.iv18.i + store i32 %xor.i, i32* %arrayidx6.i, align 4 + %indvars.iv.next.i = add nuw nsw i64 %indvars.iv.i, 1 + %exitcond.i = icmp eq i64 %indvars.iv.next.i, 4 + br i1 %exitcond.i, label %for.cond.cleanup3.i, label %for.body4.i + +for.body: ; preds = %for.body, %for.cond.cleanup3.i + br label %for.body +} +