Index: llvm/trunk/lib/Transforms/Scalar/LoopInterchange.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopInterchange.cpp +++ llvm/trunk/lib/Transforms/Scalar/LoopInterchange.cpp @@ -339,16 +339,10 @@ bool currentLimitations(); - bool hasInnerLoopReduction() { return InnerLoopHasReduction; } - private: bool tightlyNested(Loop *Outer, Loop *Inner); - bool containsUnsafeInstructionsInHeader(BasicBlock *BB); - bool areAllUsesReductions(Instruction *Ins, Loop *L); - bool containsUnsafeInstructionsInLatch(BasicBlock *BB); - bool findInductionAndReductions(Loop *L, - SmallVector &Inductions, - SmallVector &Reductions); + bool containsUnsafeInstructions(BasicBlock *BB); + bool findInductions(Loop *L, SmallVector &Inductions); Loop *OuterLoop; Loop *InnerLoop; @@ -358,7 +352,6 @@ /// Interface to emit optimization remarks. OptimizationRemarkEmitter *ORE; - bool InnerLoopHasReduction = false; }; /// LoopInterchangeProfitability checks if it is profitable to interchange the @@ -391,11 +384,9 @@ public: LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, - BasicBlock *LoopNestExit, - bool InnerLoopContainsReductions) + BasicBlock *LoopNestExit) : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), - LoopExit(LoopNestExit), - InnerLoopHasReduction(InnerLoopContainsReductions) {} + LoopExit(LoopNestExit) {} /// Interchange OuterLoop and InnerLoop. bool transform(); @@ -420,7 +411,6 @@ LoopInfo *LI; DominatorTree *DT; BasicBlock *LoopExit; - bool InnerLoopHasReduction; }; // Main LoopInterchange Pass. @@ -571,7 +561,7 @@ }); LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, - LoopNestExit, LIL.hasInnerLoopReduction()); + LoopNestExit); LIT.transform(); LLVM_DEBUG(dbgs() << "Loops interchanged.\n"); LoopsInterchanged++; @@ -581,42 +571,12 @@ } // end anonymous namespace -bool LoopInterchangeLegality::areAllUsesReductions(Instruction *Ins, Loop *L) { - return llvm::none_of(Ins->users(), [=](User *U) -> bool { - auto *UserIns = dyn_cast(U); - RecurrenceDescriptor RD; - return !UserIns || !RecurrenceDescriptor::isReductionPHI(UserIns, L, RD); +bool LoopInterchangeLegality::containsUnsafeInstructions(BasicBlock *BB) { + return any_of(*BB, [](const Instruction &I) { + return I.mayHaveSideEffects() || I.mayReadFromMemory(); }); } -bool LoopInterchangeLegality::containsUnsafeInstructionsInHeader( - BasicBlock *BB) { - for (Instruction &I : *BB) { - // Load corresponding to reduction PHI's are safe while concluding if - // tightly nested. - if (LoadInst *L = dyn_cast(&I)) { - if (!areAllUsesReductions(L, InnerLoop)) - return true; - } else if (I.mayHaveSideEffects() || I.mayReadFromMemory()) - return true; - } - return false; -} - -bool LoopInterchangeLegality::containsUnsafeInstructionsInLatch( - BasicBlock *BB) { - for (Instruction &I : *BB) { - // Stores corresponding to reductions are safe while concluding if tightly - // nested. - if (StoreInst *L = dyn_cast(&I)) { - if (!isa(L->getOperand(0))) - return true; - } else if (I.mayHaveSideEffects() || I.mayReadFromMemory()) - return true; - } - return false; -} - bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) { BasicBlock *OuterLoopHeader = OuterLoop->getHeader(); BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); @@ -640,8 +600,8 @@ LLVM_DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n"); // We do not have any basic block in between now make sure the outer header // and outer loop latch doesn't contain any unsafe instructions. - if (containsUnsafeInstructionsInHeader(OuterLoopHeader) || - containsUnsafeInstructionsInLatch(OuterLoopLatch)) + if (containsUnsafeInstructions(OuterLoopHeader) || + containsUnsafeInstructions(OuterLoopLatch)) return false; LLVM_DEBUG(dbgs() << "Loops are perfectly nested\n"); @@ -673,9 +633,8 @@ return true; } -bool LoopInterchangeLegality::findInductionAndReductions( - Loop *L, SmallVector &Inductions, - SmallVector &Reductions) { +bool LoopInterchangeLegality::findInductions( + Loop *L, SmallVector &Inductions) { if (!L->getLoopLatch() || !L->getLoopPredecessor()) return false; for (PHINode &PHI : L->getHeader()->phis()) { @@ -683,11 +642,8 @@ InductionDescriptor ID; if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID)) Inductions.push_back(&PHI); - else if (RecurrenceDescriptor::isReductionPHI(&PHI, L, RD)) - Reductions.push_back(&PHI); else { - LLVM_DEBUG( - dbgs() << "Failed to recognize PHI as an induction or reduction.\n"); + LLVM_DEBUG(dbgs() << "Failed to recognize PHI as an induction.\n"); return false; } } @@ -737,8 +693,7 @@ PHINode *InnerInductionVar; SmallVector Inductions; - SmallVector Reductions; - if (!findInductionAndReductions(InnerLoop, Inductions, Reductions)) { + if (!findInductions(InnerLoop, Inductions)) { LLVM_DEBUG( dbgs() << "Only inner loops with induction or reduction PHI nodes " << "are supported currently.\n"); @@ -766,12 +721,9 @@ }); return true; } - if (Reductions.size() > 0) - InnerLoopHasReduction = true; InnerInductionVar = Inductions.pop_back_val(); - Reductions.clear(); - if (!findInductionAndReductions(OuterLoop, Inductions, Reductions)) { + if (!findInductions(OuterLoop, Inductions)) { LLVM_DEBUG( dbgs() << "Only outer loops with induction or reduction PHI nodes " << "are supported currently.\n"); @@ -785,20 +737,6 @@ return true; } - // Outer loop cannot have reduction because then loops will not be tightly - // nested. - if (!Reductions.empty()) { - LLVM_DEBUG(dbgs() << "Outer loops with reductions are not supported " - << "currently.\n"); - ORE->emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "ReductionsOuter", - OuterLoop->getStartLoc(), - OuterLoop->getHeader()) - << "Outer loops with reductions cannot be interchangeed " - "currently."; - }); - return true; - } // TODO: Currently we handle only loops with 1 induction variable. if (Inductions.size() != 1) { LLVM_DEBUG(dbgs() << "Loops with more than 1 induction variables are not " @@ -1449,34 +1387,11 @@ // replaced by Inners'. updateIncomingBlock(OuterLoopLatchSuccessor, OuterLoopLatch, InnerLoopLatch); - // Now update the reduction PHIs in the inner and outer loop headers. - SmallVector InnerLoopPHIs, OuterLoopPHIs; - for (PHINode &PHI : drop_begin(InnerLoopHeader->phis(), 1)) - InnerLoopPHIs.push_back(cast(&PHI)); - for (PHINode &PHI : drop_begin(OuterLoopHeader->phis(), 1)) - OuterLoopPHIs.push_back(cast(&PHI)); - - for (PHINode *PHI : OuterLoopPHIs) - PHI->moveBefore(InnerLoopHeader->getFirstNonPHI()); - - // Move the PHI nodes from the inner loop header to the outer loop header. - // We have to deal with one kind of PHI nodes: - // 1) PHI nodes that are part of inner loop-only reductions. - // We only have to move the PHI node and update the incoming blocks. - for (PHINode *PHI : InnerLoopPHIs) { - PHI->moveBefore(OuterLoopHeader->getFirstNonPHI()); - for (BasicBlock *InBB : PHI->blocks()) { - if (InnerLoop->contains(InBB)) - continue; - - assert(!isa(PHI->getIncomingValueForBlock(InBB)) && - "Unexpected incoming PHI node, reductions in outer loop are not " - "supported yet"); - PHI->replaceAllUsesWith(PHI->getIncomingValueForBlock(InBB)); - PHI->eraseFromParent(); - break; - } - } + // Make sure we have no other PHIs. + auto InnerPhis = drop_begin(InnerLoopHeader->phis(), 1); + auto OuterPhis = drop_begin(OuterLoopHeader->phis(), 1); + assert(begin(InnerPhis) == end(InnerPhis) && "Unexpected PHIs in inner loop"); + assert(begin(OuterPhis) == end(OuterPhis) && "Unexpected PHis in outer loop"); // Update the incoming blocks for moved PHI nodes. updateIncomingBlock(OuterLoopHeader, InnerLoopPreHeader, OuterLoopPreHeader); Index: llvm/trunk/test/Transforms/LoopInterchange/inner-only-reductions.ll =================================================================== --- llvm/trunk/test/Transforms/LoopInterchange/inner-only-reductions.ll +++ llvm/trunk/test/Transforms/LoopInterchange/inner-only-reductions.ll @@ -0,0 +1,124 @@ +; RUN: opt < %s -basicaa -loop-interchange -pass-remarks-missed='loop-interchange' -pass-remarks-output=%t -S \ +; RUN: -verify-dom-info -verify-loop-info 2>&1 | FileCheck -check-prefix=IR %s +; RUN: FileCheck --input-file=%t %s + +; Inner loop only reductions are not supported currently. See discussion at +; D53027 for more information on the required checks. + +@A = common global [500 x [500 x i32]] zeroinitializer +@X = common global i32 0 +@B = common global [500 x [500 x i32]] zeroinitializer +@Y = common global i32 0 + +;; global X + +;; for( int i=1;i&1 | FileCheck %s +; RUN: opt < %s -loop-interchange -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -loop-interchange-threshold=-1000 -S 2>&1 | FileCheck %s ;; Checks the order of the inner phi nodes does not cause havoc. ;; The inner loop has a reduction into c. The IV is not the first phi. @@ -23,8 +23,6 @@ ; CHECK-NEXT: br label [[FOR2_HEADER:%.*]] ; CHECK: for2.header: ; CHECK-NEXT: [[J:%.*]] = phi i32 [ [[INC17:%.*]], [[FOR2_INC16:%.*]] ], [ 0, [[FOR2_HEADER_PREHEADER]] ] -; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [90 x i32], [90 x i32]* [[C:%.*]], i32 [[I]], i32 [[J]] -; CHECK-NEXT: [[ARRAYIDX14_PROMOTED:%.*]] = load i32, i32* [[ARRAYIDX14]], align 4 ; CHECK-NEXT: br label [[FOR3_SPLIT1:%.*]] ; CHECK: for3.preheader: ; CHECK-NEXT: br label [[FOR3:%.*]] @@ -35,15 +33,14 @@ ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[K]], [[MUL]] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[A:%.*]], i32 [[ADD]] ; CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 -; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32 -; CHECK-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV]], [[ARRAYIDX14_PROMOTED]] +; CHECK-NEXT: [[ADD15:%.*]] = add nsw i16 [[TMP0]], 1 +; CHECK-NEXT: store i16 [[ADD15]], i16* [[ARRAYIDX]] ; CHECK-NEXT: br label [[FOR2_INC16]] ; CHECK: for3.split: ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[K]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 90 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR1_LOOPEXIT:%.*]], label [[FOR3]] ; CHECK: for2.inc16: -; CHECK-NEXT: store i32 [[ADD15]], i32* [[ARRAYIDX14]], align 4 ; CHECK-NEXT: [[INC17]] = add nuw nsw i32 [[J]], 1 ; CHECK-NEXT: [[EXITCOND47:%.*]] = icmp eq i32 [[INC17]], 90 ; CHECK-NEXT: br i1 [[EXITCOND47]], label [[FOR1_INC19]], label [[FOR2_HEADER]] @@ -66,25 +63,20 @@ for2.header: ; preds = %for2.inc16, %for1.header %j = phi i32 [ 0, %for1.header ], [ %inc17, %for2.inc16 ] - %arrayidx14 = getelementptr inbounds [90 x i32], [90 x i32]* %C, i32 %i, i32 %j - %arrayidx14.promoted = load i32, i32* %arrayidx14, align 4 br label %for3 for3: ; preds = %for3, %for2.header - %add1541 = phi i32 [ %arrayidx14.promoted, %for2.header ], [ %add15, %for3 ] %k = phi i32 [ 1, %for2.header ], [ %inc, %for3 ] %add = add nsw i32 %k, %mul %arrayidx = getelementptr inbounds i16, i16* %A, i32 %add %0 = load i16, i16* %arrayidx, align 2 - %conv = sext i16 %0 to i32 - %add15 = add nsw i32 %conv, %add1541 + %add15 = add nsw i16 %0, 1 + store i16 %add15, i16* %arrayidx %inc = add nuw nsw i32 %k, 1 %exitcond = icmp eq i32 %inc, 90 br i1 %exitcond, label %for2.inc16, label %for3 for2.inc16: ; preds = %for.body6 - %add15.lcssa = phi i32 [ %add15, %for3 ] - store i32 %add15.lcssa, i32* %arrayidx14, align 4 %inc17 = add nuw nsw i32 %j, 1 %exitcond47 = icmp eq i32 %inc17, 90 br i1 %exitcond47, label %for1.inc19, label %for2.header Index: llvm/trunk/test/Transforms/LoopInterchange/reductions.ll =================================================================== --- llvm/trunk/test/Transforms/LoopInterchange/reductions.ll +++ llvm/trunk/test/Transforms/LoopInterchange/reductions.ll @@ -1,272 +0,0 @@ -; REQUIRES: asserts -; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -verify-loop-info -verify-loop-lcssa -S -debug 2>&1 | FileCheck %s - -@A = common global [500 x [500 x i32]] zeroinitializer -@X = common global i32 0 -@B = common global [500 x [500 x i32]] zeroinitializer -@Y = common global i32 0 - -;; for( int i=1;i