Index: include/llvm/Transforms/Utils/LoopRotationUtils.h =================================================================== --- include/llvm/Transforms/Utils/LoopRotationUtils.h +++ include/llvm/Transforms/Utils/LoopRotationUtils.h @@ -20,6 +20,7 @@ class DominatorTree; class Loop; class LoopInfo; +class MemorySSAUpdater; class ScalarEvolution; struct SimplifyQuery; class TargetTransformInfo; @@ -32,8 +33,8 @@ /// LoopRotation. If it is true, the profitability heuristic will be ignored. bool LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI, AssumptionCache *AC, DominatorTree *DT, ScalarEvolution *SE, - const SimplifyQuery &SQ, bool RotationOnly, - unsigned Threshold, bool IsUtilMode); + MemorySSAUpdater *MSSAU, const SimplifyQuery &SQ, + bool RotationOnly, unsigned Threshold, bool IsUtilMode); } // namespace llvm Index: lib/Transforms/Scalar/LoopRotation.cpp =================================================================== --- lib/Transforms/Scalar/LoopRotation.cpp +++ lib/Transforms/Scalar/LoopRotation.cpp @@ -15,6 +15,8 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Support/Debug.h" @@ -40,12 +42,19 @@ const DataLayout &DL = L.getHeader()->getModule()->getDataLayout(); const SimplifyQuery SQ = getBestSimplifyQuery(AR, DL); - bool Changed = LoopRotation(&L, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE, SQ, - false, Threshold, false); + Optional MSSAU; + if (AR.MSSA) + MSSAU = MemorySSAUpdater(AR.MSSA); + bool Changed = LoopRotation(&L, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE, + MSSAU.hasValue() ? MSSAU.getPointer() : nullptr, + SQ, false, Threshold, false); if (!Changed) return PreservedAnalyses::all(); + if (AR.MSSA && VerifyMemorySSA) + AR.MSSA->verifyMemorySSA(); + return getLoopPassPreservedAnalyses(); } @@ -68,6 +77,10 @@ void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); + if (EnableMSSALoopDependency) { + AU.addRequired(); + AU.addPreserved(); + } getLoopAnalysisUsage(AU); } @@ -84,8 +97,14 @@ auto *SEWP = getAnalysisIfAvailable(); auto *SE = SEWP ? &SEWP->getSE() : nullptr; const SimplifyQuery SQ = getBestSimplifyQuery(*this, F); - return LoopRotation(L, LI, TTI, AC, DT, SE, SQ, false, MaxHeaderSize, - false); + Optional MSSAU; + if (EnableMSSALoopDependency) { + MemorySSA *MSSA = &getAnalysis().getMSSA(); + MSSAU = MemorySSAUpdater(MSSA); + } + return LoopRotation(L, LI, TTI, AC, DT, SE, + MSSAU.hasValue() ? MSSAU.getPointer() : nullptr, SQ, + false, MaxHeaderSize, false); } }; } @@ -96,6 +115,7 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(LoopPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) INITIALIZE_PASS_END(LoopRotateLegacyPass, "loop-rotate", "Rotate Loops", false, false) Index: lib/Transforms/Utils/LoopRotationUtils.cpp =================================================================== --- lib/Transforms/Utils/LoopRotationUtils.cpp +++ lib/Transforms/Utils/LoopRotationUtils.cpp @@ -20,6 +20,8 @@ #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -54,6 +56,7 @@ AssumptionCache *AC; DominatorTree *DT; ScalarEvolution *SE; + MemorySSAUpdater *MSSAU; const SimplifyQuery &SQ; bool RotationOnly; bool IsUtilMode; @@ -61,10 +64,11 @@ public: LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI, const TargetTransformInfo *TTI, AssumptionCache *AC, - DominatorTree *DT, ScalarEvolution *SE, const SimplifyQuery &SQ, - bool RotationOnly, bool IsUtilMode) + DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, + const SimplifyQuery &SQ, bool RotationOnly, bool IsUtilMode) : MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE), - SQ(SQ), RotationOnly(RotationOnly), IsUtilMode(IsUtilMode) {} + MSSAU(MSSAU), SQ(SQ), RotationOnly(RotationOnly), + IsUtilMode(IsUtilMode) {} bool processLoop(Loop *L); private: @@ -269,6 +273,8 @@ SE->forgetTopmostLoop(L); LLVM_DEBUG(dbgs() << "LoopRotation: rotating "; L->dump()); + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); // Find new Loop header. NewHeader is a Header's one and only successor // that is inside loop. Header's other successor is outside the @@ -385,7 +391,6 @@ // remove the corresponding incoming values from the PHI nodes in OrigHeader. LoopEntryBranch->eraseFromParent(); - SmallVector InsertedPHIs; // If there were any uses of instructions in the duplicated block outside the // loop, update them, inserting PHI nodes as required @@ -411,6 +416,29 @@ Updates.push_back({DominatorTree::Insert, OrigPreheader, NewHeader}); Updates.push_back({DominatorTree::Delete, OrigPreheader, OrigHeader}); DT->applyUpdates(Updates); + + if (MSSAU) { + ValueMap[OrigHeader] = OrigPreheader; + // Above RewriteUses call may replace map keys (cloned instructions) + // with phis. Re-add mapping (ClonedInstruction, CloneOfInstruction). + // We do not delete existing entry of (PhiNode, CloneOfInstruction), + // since the map is not used further. Ideally we'd set + // Config::FollowRAW = false for ValueMap before the above call to + // avoid the replacement in the first place. + for (auto *PNI : InsertedPHIs) + if (Instruction *NewInsn = + dyn_cast_or_null(ValueMap.lookup(PNI))) { + Instruction *OldInsn = cast(PNI->getIncomingValue(0)); + if (OldInsn == NewInsn) + OldInsn = cast(PNI->getIncomingValue(1)); + ValueMap[OldInsn] = NewInsn; + } + + MSSAU->updateForClonedBlockIntoPred(OrigHeader, OrigPreheader, ValueMap); + MSSAU->applyUpdates(Updates, *DT); + if (VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + } } // At this point, we've finished our major CFG changes. As part of cloning @@ -433,7 +461,7 @@ // Split the edge to form a real preheader. BasicBlock *NewPH = SplitCriticalEdge( OrigPreheader, NewHeader, - CriticalEdgeSplittingOptions(DT, LI).setPreserveLCSSA()); + CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA()); NewPH->setName(NewHeader->getName() + ".lr.ph"); // Preserve canonical loop form, which means that 'Exit' should have only @@ -452,7 +480,7 @@ SplitLatchEdge |= L->getLoopLatch() == ExitPred; BasicBlock *ExitSplit = SplitCriticalEdge( ExitPred, Exit, - CriticalEdgeSplittingOptions(DT, LI).setPreserveLCSSA()); + CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA()); ExitSplit->moveBefore(Exit); } assert(SplitLatchEdge && @@ -467,17 +495,27 @@ // With our CFG finalized, update DomTree if it is available. if (DT) DT->deleteEdge(OrigPreheader, Exit); + + // Update MSSA too, if available. + if (MSSAU) + MSSAU->removeEdge(OrigPreheader, Exit); } assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation"); assert(L->getLoopLatch() && "Invalid loop latch after loop rotation"); + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + // Now that the CFG and DomTree are in a consistent state again, try to merge // the OrigHeader block into OrigLatch. This will succeed if they are // connected by an unconditional branch. This is just a cleanup so the // emitted code isn't too gross in this common case. DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); - MergeBlockIntoPredecessor(OrigHeader, &DTU, LI); + MergeBlockIntoPredecessor(OrigHeader, &DTU, LI, MSSAU); + + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); LLVM_DEBUG(dbgs() << "LoopRotation: into "; L->dump()); @@ -586,9 +624,14 @@ << LastExit->getName() << "\n"); // Hoist the instructions from Latch into LastExit. + Instruction *FirstLatchInst = &*(Latch->begin()); LastExit->getInstList().splice(BI->getIterator(), Latch->getInstList(), Latch->begin(), Jmp->getIterator()); + // Update MemorySSA + if (MSSAU) + MSSAU->moveAllAfterMergeBlocks(Latch, LastExit, FirstLatchInst); + unsigned FallThruPath = BI->getSuccessor(0) == Latch ? 0 : 1; BasicBlock *Header = Jmp->getSuccessor(0); assert(Header == L->getHeader() && "expected a backward branch"); @@ -604,6 +647,10 @@ if (DT) DT->eraseNode(Latch); Latch->eraseFromParent(); + + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + return true; } @@ -636,11 +683,16 @@ /// The utility to convert a loop into a loop with bottom test. bool llvm::LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI, AssumptionCache *AC, DominatorTree *DT, - ScalarEvolution *SE, const SimplifyQuery &SQ, - bool RotationOnly = true, + ScalarEvolution *SE, MemorySSAUpdater *MSSAU, + const SimplifyQuery &SQ, bool RotationOnly = true, unsigned Threshold = unsigned(-1), bool IsUtilMode = true) { - LoopRotate LR(Threshold, LI, TTI, AC, DT, SE, SQ, RotationOnly, IsUtilMode); + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + LoopRotate LR(Threshold, LI, TTI, AC, DT, SE, MSSAU, SQ, RotationOnly, + IsUtilMode); + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); return LR.processLoop(L); } Index: test/Transforms/LoopRotate/2009-01-25-SingleEntryPhi.ll =================================================================== --- test/Transforms/LoopRotate/2009-01-25-SingleEntryPhi.ll +++ test/Transforms/LoopRotate/2009-01-25-SingleEntryPhi.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -loop-rotate -verify-dom-info -verify-loop-info -disable-output +; RUN: opt < %s -loop-rotate -verify-dom-info -verify-loop-info -enable-mssa-loop-dependency=true -verify-memoryssa -disable-output ; PR3408 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" target triple = "x86_64-unknown-linux-gnu" Index: test/Transforms/LoopRotate/PhiRename-1.ll =================================================================== --- test/Transforms/LoopRotate/PhiRename-1.ll +++ test/Transforms/LoopRotate/PhiRename-1.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -loop-rotate -verify-dom-info -verify-loop-info -S | FileCheck %s +; RUN: opt < %s -loop-rotate -verify-dom-info -verify-loop-info -enable-mssa-loop-dependency=true -verify-memoryssa -S | FileCheck %s ; CHECK-NOT: [ {{.}}tmp224 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64" Index: test/Transforms/LoopRotate/PhiSelfReference-1.ll =================================================================== --- test/Transforms/LoopRotate/PhiSelfReference-1.ll +++ test/Transforms/LoopRotate/PhiSelfReference-1.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -loop-rotate -verify-dom-info -verify-loop-info -disable-output +; RUN: opt < %s -loop-rotate -verify-dom-info -verify-loop-info -enable-mssa-loop-dependency=true -verify-memoryssa -disable-output ; ModuleID = 'PhiSelfReference-1.bc' define void @snrm2(i32 %incx) { Index: test/Transforms/LoopRotate/alloca.ll =================================================================== --- test/Transforms/LoopRotate/alloca.ll +++ test/Transforms/LoopRotate/alloca.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -loop-rotate -S | FileCheck %s +; RUN: opt < %s -loop-rotate -enable-mssa-loop-dependency=true -verify-memoryssa -S | FileCheck %s ; Test alloca in -loop-rotate. Index: test/Transforms/LoopRotate/basic.ll =================================================================== --- test/Transforms/LoopRotate/basic.ll +++ test/Transforms/LoopRotate/basic.ll @@ -1,5 +1,7 @@ ; RUN: opt -S -loop-rotate < %s | FileCheck %s +; RUN: opt -S -loop-rotate -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s ; RUN: opt -S -passes='require,require,loop(rotate)' < %s | FileCheck %s +; RUN: opt -S -passes='require,require,loop(rotate)' -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-darwin10.0.0" Index: test/Transforms/LoopRotate/catchret.ll =================================================================== --- test/Transforms/LoopRotate/catchret.ll +++ test/Transforms/LoopRotate/catchret.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -loop-rotate -S | FileCheck %s +; RUN: opt < %s -loop-rotate -enable-mssa-loop-dependency=true -verify-memoryssa -S | FileCheck %s target triple = "x86_64-pc-windows-msvc" Index: test/Transforms/LoopRotate/convergent.ll =================================================================== --- test/Transforms/LoopRotate/convergent.ll +++ test/Transforms/LoopRotate/convergent.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -loop-rotate < %s | FileCheck %s +; RUN: opt -S -loop-rotate -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s @e = global i32 10 Index: test/Transforms/LoopRotate/crash.ll =================================================================== --- test/Transforms/LoopRotate/crash.ll +++ test/Transforms/LoopRotate/crash.ll @@ -1,4 +1,5 @@ ; RUN: opt -loop-rotate -disable-output -verify-dom-info -verify-loop-info < %s +; RUN: opt -loop-rotate -disable-output -verify-dom-info -verify-loop-info -enable-mssa-loop-dependency=true -verify-memoryssa < %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-darwin10.0.0" Index: test/Transforms/LoopRotate/dbg-value-duplicates.ll =================================================================== --- test/Transforms/LoopRotate/dbg-value-duplicates.ll +++ test/Transforms/LoopRotate/dbg-value-duplicates.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -loop-rotate < %s | FileCheck %s +; RUN: opt -S -loop-rotate -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s source_filename = "/tmp/loop.c" target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.13.0" Index: test/Transforms/LoopRotate/dbgvalue.ll =================================================================== --- test/Transforms/LoopRotate/dbgvalue.ll +++ test/Transforms/LoopRotate/dbgvalue.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -loop-rotate < %s | FileCheck %s +; RUN: opt -S -loop-rotate -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone declare void @llvm.dbg.value(metadata, metadata, metadata) nounwind readnone Index: test/Transforms/LoopRotate/indirectbr.ll =================================================================== --- test/Transforms/LoopRotate/indirectbr.ll +++ test/Transforms/LoopRotate/indirectbr.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -S -loop-rotate -o - -verify-loop-info -verify-dom-info | FileCheck %s +; RUN: opt < %s -S -loop-rotate -o - -verify-loop-info -verify-dom-info -enable-mssa-loop-dependency=true -verify-memoryssa | FileCheck %s ; PR5502 define void @z80_do_opcodes() nounwind { Index: test/Transforms/LoopRotate/loopexitinglatch.ll =================================================================== --- test/Transforms/LoopRotate/loopexitinglatch.ll +++ test/Transforms/LoopRotate/loopexitinglatch.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -loop-rotate < %s -verify-loop-info -verify-dom-info | FileCheck %s +; RUN: opt -S -loop-rotate < %s -verify-loop-info -verify-dom-info -enable-mssa-loop-dependency=true -verify-memoryssa | FileCheck %s target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "thumbv8m.base-arm-none-eabi" Index: test/Transforms/LoopRotate/multiple-exits.ll =================================================================== --- test/Transforms/LoopRotate/multiple-exits.ll +++ test/Transforms/LoopRotate/multiple-exits.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -loop-rotate < %s -verify-loop-info -verify-dom-info | FileCheck %s +; RUN: opt -S -loop-rotate < %s -verify-loop-info -verify-dom-info -enable-mssa-loop-dependency=true -verify-memoryssa | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" Index: test/Transforms/LoopRotate/phi-dbgvalue.ll =================================================================== --- test/Transforms/LoopRotate/phi-dbgvalue.ll +++ test/Transforms/LoopRotate/phi-dbgvalue.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -loop-rotate < %s | FileCheck %s +; RUN: opt -S -loop-rotate -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s ;CHECK-LABEL: func ;CHECK-LABEL: entry Index: test/Transforms/LoopRotate/phi-duplicate.ll =================================================================== --- test/Transforms/LoopRotate/phi-duplicate.ll +++ test/Transforms/LoopRotate/phi-duplicate.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -loop-rotate < %s | FileCheck %s +; RUN: opt -S -loop-rotate -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-darwin10.0" Index: test/Transforms/LoopRotate/pr22337.ll =================================================================== --- test/Transforms/LoopRotate/pr22337.ll +++ test/Transforms/LoopRotate/pr22337.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -loop-rotate -S | FileCheck %s +; RUN: opt < %s -loop-rotate -enable-mssa-loop-dependency=true -verify-memoryssa -S | FileCheck %s @a = external global i8, align 4 @tmp = global i8* @a Index: test/Transforms/LoopRotate/pr33701.ll =================================================================== --- test/Transforms/LoopRotate/pr33701.ll +++ test/Transforms/LoopRotate/pr33701.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -loop-rotate -verify-dom-info -verify-loop-info -disable-output +; RUN: opt < %s -loop-rotate -verify-dom-info -verify-loop-info -enable-mssa-loop-dependency=true -verify-memoryssa -disable-output define void @func() { bb0: Index: test/Transforms/LoopRotate/pr35210.ll =================================================================== --- test/Transforms/LoopRotate/pr35210.ll +++ test/Transforms/LoopRotate/pr35210.ll @@ -1,4 +1,5 @@ ;RUN: opt %s -passes='adce,loop(rotate),adce' -S -debug-pass-manager -debug-only=loop-rotate 2>&1 | FileCheck %s +;RUN: opt %s -passes='adce,loop(rotate),adce' -S -debug-pass-manager -debug-only=loop-rotate -enable-mssa-loop-dependency=true -verify-memoryssa 2>&1 | FileCheck %s --check-prefix=MSSA ;REQUIRES: asserts ; This test is to make sure we invalidate the post dominator pass after loop rotate simplifies the loop latch. @@ -31,6 +32,35 @@ ; CHECK-NEXT: Running analysis: PostDominatorTreeAnalysis on f ; CHECK-NEXT: Finished llvm::Function pass manager run. +; MSSA: Starting llvm::Function pass manager run. +; MSSA-NEXT: Running pass: ADCEPass on f +; MSSA-NEXT: Running analysis: PostDominatorTreeAnalysis on f +; MSSA-NEXT: Running pass: FunctionToLoopPassAdaptor{{.*}} on f +; MSSA-NEXT: Starting llvm::Function pass manager run. +; MSSA-NEXT: Running pass: LoopSimplifyPass on f +; MSSA-NEXT: Running analysis: LoopAnalysis on f +; MSSA-NEXT: Running analysis: DominatorTreeAnalysis on f +; MSSA-NEXT: Running analysis: AssumptionAnalysis on f +; MSSA-NEXT: Running pass: LCSSAPass on f +; MSSA-NEXT: Finished llvm::Function pass manager run. +; MSSA-NEXT: Running analysis: MemorySSAAnalysis on f +; MSSA-NEXT: Running analysis: AAManager on f +; MSSA-NEXT: Running analysis: TargetLibraryAnalysis on f +; MSSA-NEXT: Running analysis: ScalarEvolutionAnalysis on f +; MSSA-NEXT: Running analysis: TargetIRAnalysis on f +; MSSA-NEXT: Running analysis: InnerAnalysisManagerProxy{{.*}} on f +; MSSA-NEXT: Starting Loop pass manager run. +; MSSA-NEXT: Running pass: LoopRotatePass on Loop at depth 1 containing: %bb
,%bb4 +; MSSA-NEXT: Folding loop latch bb4 into bb +; MSSA-NEXT: Invalidating all non-preserved analyses for: bb +; MSSA-NEXT: Finished Loop pass manager run. +; MSSA-NEXT: Invalidating all non-preserved analyses for: f +; MSSA-NEXT: Invalidating analysis: PostDominatorTreeAnalysis on f +; MSSA-NEXT: Running pass: ADCEPass on f +; MSSA-NEXT: Running analysis: PostDominatorTreeAnalysis on f +; MSSA-NEXT: Finished llvm::Function pass manager run. + + ; CHECK-LABEL: define i8 @f() { ; CHECK-NEXT : entry: ; CHECK-NEXT : br label %bb @@ -51,6 +81,26 @@ ; CHECK-NEXT : ; CHECK-NEXT : attributes #0 = { noreturn } +; MSSA-LABEL: define i8 @f() { +; MSSA-NEXT : entry: +; MSSA-NEXT : br label %bb +; MSSA-NEXT : +; MSSA-NEXT : bb: ; preds = %bb, %entry +; MSSA-NEXT : %mode.0 = phi i8 [ 0, %entry ], [ %indvar.next, %bb ] +; MSSA-NEXT : %tmp5 = icmp eq i8 %mode.0, 1 +; MSSA-NEXT : %indvar.next = add i8 %mode.0, 1 +; MSSA-NEXT : br i1 %tmp5, label %bb5, label %bb +; MSSA-NEXT : +; MSSA-NEXT : bb5: ; preds = %bb +; MSSA-NEXT : tail call void @raise_exception() #0 +; MSSA-NEXT : unreachable +; MSSA-NEXT : } +; MSSA-NEXT : +; MSSA-NEXT : ; Function Attrs: noreturn +; MSSA-NEXT : declare void @raise_exception() #0 +; MSSA-NEXT : +; MSSA-NEXT : attributes #0 = { noreturn } + define i8 @f() { entry: br label %bb Index: test/Transforms/LoopRotate/pr37205.ll =================================================================== --- test/Transforms/LoopRotate/pr37205.ll +++ test/Transforms/LoopRotate/pr37205.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -indvars -verify -loop-rotate -loop-idiom < %s | FileCheck %s +; RUN: opt -S -indvars -verify -loop-rotate -loop-idiom -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s target triple = "x86_64-unknown-linux-gnu" ; Verify that we invalidate SCEV properly. Index: test/Transforms/LoopRotate/preserve-mssa.ll =================================================================== --- /dev/null +++ test/Transforms/LoopRotate/preserve-mssa.ll @@ -0,0 +1,114 @@ +; RUN: opt -S -loop-rotate -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s + +; CHECK-LABEL: @multiedge( +; Function Attrs: nounwind uwtable +define dso_local void @multiedge() local_unnamed_addr #0 { +entry: + br label %retry + +retry: ; preds = %sw.epilog, %entry + br i1 undef, label %cleanup, label %if.end + +if.end: ; preds = %retry + switch i32 undef, label %sw.epilog [ + i32 -3, label %cleanup + i32 -5, label %cleanup + i32 -16, label %cleanup + i32 -25, label %cleanup + ] + +sw.epilog: ; preds = %if.end + br label %retry + +cleanup: ; preds = %if.end, %if.end, %if.end, %if.end, %retry + ret void +} + +; CHECK-LABEL: @read_line( +; Function Attrs: nounwind uwtable +define internal fastcc i32 @read_line(i8* nocapture %f) unnamed_addr #0 { +entry: + br label %for.cond + +for.cond: ; preds = %if.end, %entry + %call = call i8* @prepbuffer(i8* nonnull undef) #4 + %call1 = call i8* @fgets(i8* %call, i32 8192, i8* %f) + br i1 undef, label %if.then, label %if.end + +if.then: ; preds = %for.cond + ret i32 undef + +if.end: ; preds = %for.cond + %call4 = call i64 @strlen(i8* %call) #5 + br label %for.cond +} + +; Function Attrs: nounwind readonly +declare dso_local i8* @prepbuffer(i8*) local_unnamed_addr #1 + +; Function Attrs: nounwind +declare dso_local i8* @fgets(i8*, i32, i8* nocapture) local_unnamed_addr #3 + +; Function Attrs: argmemonly nounwind readonly +declare dso_local i64 @strlen(i8* nocapture) local_unnamed_addr #2 + + + +; Function Attrs: inlinehint norecurse nounwind uwtable +declare dso_local fastcc void @list_Pop() unnamed_addr #6 + +; CHECK-LABEL: @loop3 +; Function Attrs: nounwind uwtable +define dso_local fastcc void @loop3() unnamed_addr #0 { +entry: + br label %for.cond + +for.cond: ; preds = %for.body, %entry + br i1 undef, label %for.body, label %for.end81 + +for.body: ; preds = %for.cond + %.idx122.val = load i32, i32* undef, align 8 + call fastcc void @list_Pop() + br label %for.cond + +for.end81: ; preds = %for.cond + ret void +} + +; CHECK-LABEL: @loop4 +; Function Attrs: inlinehint nounwind uwtable +define dso_local fastcc void @loop4() unnamed_addr #6 { +entry: + br label %while.cond + +while.cond: ; preds = %while.body, %entry + br i1 undef, label %while.end, label %while.body + +while.body: ; preds = %while.cond + call fastcc void @cont_BackTrackLastBinding() + br label %while.cond + +while.end: ; preds = %while.cond + call fastcc void @cont_StackInit() + call fastcc void @cont_ResetIndexVarScanner() + ret void +} + +; Function Attrs: inlinehint nounwind uwtable +declare dso_local fastcc void @cont_BackTrackLastBinding() unnamed_addr #6 + +; Function Attrs: inlinehint nounwind uwtable +declare dso_local fastcc void @cont_StackInit() unnamed_addr #6 + +; Function Attrs: inlinehint nounwind uwtable +declare dso_local fastcc void @cont_ResetIndexVarScanner() unnamed_addr #6 + +attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { argmemonly nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #4 = { nounwind } +attributes #5 = { nounwind readonly } +attributes #6 = { inlinehint norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + + Index: test/Transforms/LoopRotate/preserve-scev.ll =================================================================== --- test/Transforms/LoopRotate/preserve-scev.ll +++ test/Transforms/LoopRotate/preserve-scev.ll @@ -1,27 +1,47 @@ ; RUN: opt < %s -loop-rotate -loop-reduce -verify-dom-info -verify-loop-info -disable-output -define fastcc void @foo() nounwind { +define fastcc void @foo(i32* %A, i64 %i) nounwind { BB: br label %BB1 BB1: ; preds = %BB19, %BB + %tttmp1 = getelementptr i32, i32* %A, i64 %i + %tttmp2 = load i32, i32* %tttmp1 + %tttmp3 = add i32 %tttmp2, 1 + store i32 %tttmp3, i32* %tttmp1 br label %BB4 BB2: ; preds = %BB4 %tmp = bitcast i32 undef to i32 ; [#uses=1] + %tttmp7 = getelementptr i32, i32* %A, i64 %i + %tttmp8 = load i32, i32* %tttmp7 + %tttmp9 = add i32 %tttmp8, 3 + store i32 %tttmp9, i32* %tttmp7 br label %BB4 -BB4: ; preds = %BB3, %BB1 +BB4: ; preds = %BB2, %BB1 %tmp5 = phi i32 [ undef, %BB1 ], [ %tmp, %BB2 ] ; [#uses=1] + %tttmp4 = getelementptr i32, i32* %A, i64 %i + %tttmp5 = load i32, i32* %tttmp4 + %tttmp6 = add i32 %tttmp5, 3 + store i32 %tttmp6, i32* %tttmp4 br i1 false, label %BB8, label %BB2 BB8: ; preds = %BB6 %tmp7 = bitcast i32 %tmp5 to i32 ; [#uses=2] + %tttmp10 = getelementptr i32, i32* %A, i64 %i + %tttmp11 = load i32, i32* %tttmp10 + %tttmp12 = add i32 %tttmp11, 3 + store i32 %tttmp12, i32* %tttmp10 br i1 false, label %BB9, label %BB13 BB9: ; preds = %BB12, %BB8 %tmp10 = phi i32 [ %tmp11, %BB12 ], [ %tmp7, %BB8 ] ; [#uses=2] %tmp11 = add i32 %tmp10, 1 ; [#uses=1] + %tttmp13 = getelementptr i32, i32* %A, i64 %i + %tttmp14 = load i32, i32* %tttmp13 + %tttmp15 = add i32 %tttmp14, 3 + store i32 %tttmp15, i32* %tttmp13 br label %BB12 BB12: ; preds = %BB9 @@ -29,16 +49,28 @@ BB13: ; preds = %BB15, %BB8 %tmp14 = phi i32 [ %tmp16, %BB15 ], [ %tmp7, %BB8 ] ; [#uses=1] + %tttmp16 = getelementptr i32, i32* %A, i64 %i + %tttmp17 = load i32, i32* %tttmp16 + %tttmp18 = add i32 %tttmp17, 3 + store i32 %tttmp18, i32* %tttmp16 br label %BB15 BB15: ; preds = %BB13 %tmp16 = add i32 %tmp14, -1 ; [#uses=1] + %tttmp19 = getelementptr i32, i32* %A, i64 %i + %tttmp20 = load i32, i32* %tttmp19 + %tttmp21 = add i32 %tttmp20, 3 + store i32 %tttmp21, i32* %tttmp19 br i1 false, label %BB13, label %BB18 BB17: ; preds = %BB12 br label %BB19 BB18: ; preds = %BB15 + %tttmp22 = getelementptr i32, i32* %A, i64 %i + %tttmp23 = load i32, i32* %tttmp22 + %tttmp24 = add i32 %tttmp23, 3 + store i32 %tttmp24, i32* %tttmp22 br label %BB19 BB19: ; preds = %BB18, %BB17 Index: test/Transforms/LoopRotate/vect.omp.persistence.ll =================================================================== --- test/Transforms/LoopRotate/vect.omp.persistence.ll +++ test/Transforms/LoopRotate/vect.omp.persistence.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -loop-rotate -S | FileCheck %s +; RUN: opt < %s -loop-rotate -enable-mssa-loop-dependency=true -verify-memoryssa -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"