Index: llvm/include/llvm/InitializePasses.h =================================================================== --- llvm/include/llvm/InitializePasses.h +++ llvm/include/llvm/InitializePasses.h @@ -197,6 +197,7 @@ void initializeLoopDistributeLegacyPass(PassRegistry&); void initializeLoopExtractorPass(PassRegistry&); void initializeLoopIdiomRecognizeLegacyPassPass(PassRegistry&); +void initializeLoopVersioningIdiomRecognizeLegacyPassPass(PassRegistry&); void initializeLoopInfoWrapperPassPass(PassRegistry&); void initializeLoopInstSimplifyLegacyPassPass(PassRegistry&); void initializeLoopInterchangePass(PassRegistry &); Index: llvm/include/llvm/LinkAllPasses.h =================================================================== --- llvm/include/llvm/LinkAllPasses.h +++ llvm/include/llvm/LinkAllPasses.h @@ -125,6 +125,7 @@ (void) llvm::createLoopUnswitchPass(); (void) llvm::createLoopVersioningLICMPass(); (void) llvm::createLoopIdiomPass(); + (void) llvm::createLoopVersioningIdiomPass(); (void) llvm::createLoopRotatePass(); (void) llvm::createLowerExpectIntrinsicPass(); (void) llvm::createLowerInvokePass(); Index: llvm/include/llvm/Transforms/Scalar.h =================================================================== --- llvm/include/llvm/Transforms/Scalar.h +++ llvm/include/llvm/Transforms/Scalar.h @@ -207,6 +207,13 @@ //===----------------------------------------------------------------------===// // +// LoopVersioningIdiom - This pass recognizes and replaces idioms in loops with the +// help of loop versioning for runtime information. +// +Pass *createLoopVersioningIdiomPass(); + +//===----------------------------------------------------------------------===// +// // LoopVersioningLICM - This pass is a loop versioning pass for LICM. // Pass *createLoopVersioningLICMPass(); Index: llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h =================================================================== --- llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h +++ llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h @@ -28,6 +28,14 @@ PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U); }; + +/// Performs Loop Idiom Recognize Pass with Loop Versioning. +class LoopVersioningIdiomRecognizePass + : public PassInfoMixin { +public: + PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U); +}; } // end namespace llvm #endif // LLVM_TRANSFORMS_SCALAR_LOOPIDIOMRECOGNIZE_H Index: llvm/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/lib/Passes/PassBuilder.cpp +++ llvm/lib/Passes/PassBuilder.cpp @@ -578,6 +578,11 @@ // llvm.loop.distribute=true or when -enable-loop-distribute is specified. OptimizePM.addPass(LoopDistributePass()); + // Recognize loop patterns and use loop versioning to compute at run-time + // correctness and cost of the transform. + OptimizePM.addPass( + createFunctionToLoopPassAdaptor(LoopVersioningIdiomRecognizePass())); + // Now run the core loop vectorizer. OptimizePM.addPass(LoopVectorizePass()); Index: llvm/lib/Passes/PassRegistry.def =================================================================== --- llvm/lib/Passes/PassRegistry.def +++ llvm/lib/Passes/PassRegistry.def @@ -218,6 +218,7 @@ LOOP_PASS("invalidate", InvalidateAllAnalysesPass()) LOOP_PASS("licm", LICMPass()) LOOP_PASS("loop-idiom", LoopIdiomRecognizePass()) +LOOP_PASS("loop-versioning-idiom", LoopVersioningIdiomRecognizePass()) LOOP_PASS("loop-instsimplify", LoopInstSimplifyPass()) LOOP_PASS("rotate", LoopRotatePass()) LOOP_PASS("no-op-loop", NoOpLoopPass()) Index: llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -22,7 +22,7 @@ // TODO List: // // Future loop memory idioms to recognize: -// memcmp, memmove, strlen, etc. +// memcmp, strlen, etc. // Future floating point idioms to recognize in -ffast-math mode: // fpowi // Future integer operation idioms to recognize: @@ -44,6 +44,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" @@ -70,6 +71,7 @@ STATISTIC(NumMemSet, "Number of memset's formed from loop stores"); STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores"); +STATISTIC(NumMemMove, "Number of memmove's formed from loop load+stores"); static cl::opt UseLIRCodeSizeHeurs( "use-lir-code-size-heurs", @@ -88,6 +90,7 @@ TargetLibraryInfo *TLI; const TargetTransformInfo *TTI; const DataLayout *DL; + bool UseLoopVersioning; bool ApplyCodeSizeHeuristics; public: @@ -95,9 +98,9 @@ LoopInfo *LI, ScalarEvolution *SE, TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, - const DataLayout *DL) + const DataLayout *DL, bool ULV) : CurLoop(nullptr), AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), - DL(DL) {} + DL(DL), UseLoopVersioning(ULV) {} bool runOnLoop(Loop *L); @@ -131,6 +134,7 @@ SmallPtrSetImpl &Stores, const SCEVAddRecExpr *Ev, const SCEV *BECount, bool NegStride, bool IsLoopMemset = false); + bool coverLoop(Loop *L, SmallVectorImpl &Insts) const; bool processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount); bool avoidLIRForMultiBlockLoop(bool IsMemset = false, bool IsLoopMemset = false); @@ -171,7 +175,7 @@ *L->getHeader()->getParent()); const DataLayout *DL = &L->getHeader()->getModule()->getDataLayout(); - LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL); + LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL, false); return LIR.runOnLoop(L); } @@ -184,6 +188,44 @@ getLoopAnalysisUsage(AU); } }; + +class LoopVersioningIdiomRecognizeLegacyPass : public LoopPass { +public: + static char ID; + explicit LoopVersioningIdiomRecognizeLegacyPass() : LoopPass(ID) { + initializeLoopVersioningIdiomRecognizeLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + + bool runOnLoop(Loop *L, LPPassManager &LPM) override { + if (skipLoop(L)) + return false; + + AliasAnalysis *AA = &getAnalysis().getAAResults(); + DominatorTree *DT = &getAnalysis().getDomTree(); + LoopInfo *LI = &getAnalysis().getLoopInfo(); + ScalarEvolution *SE = &getAnalysis().getSE(); + TargetLibraryInfo *TLI = + &getAnalysis().getTLI(); + const TargetTransformInfo *TTI = + &getAnalysis().getTTI( + *L->getHeader()->getParent()); + const DataLayout *DL = &L->getHeader()->getModule()->getDataLayout(); + + LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL, true); + return LIR.runOnLoop(L); + } + + /// This transformation requires natural loop information & requires that + /// loop preheaders be inserted into the CFG. + /// + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + getLoopAnalysisUsage(AU); + } +}; + } // End anonymous namespace. PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM, @@ -191,7 +233,22 @@ LPMUpdater &) { const auto *DL = &L.getHeader()->getModule()->getDataLayout(); - LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI, DL); + LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI, DL, + false); + if (!LIR.runOnLoop(&L)) + return PreservedAnalyses::all(); + + return getLoopPassPreservedAnalyses(); +} + +PreservedAnalyses +LoopVersioningIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &) { + const auto *DL = &L.getHeader()->getModule()->getDataLayout(); + + LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI, DL, + true); if (!LIR.runOnLoop(&L)) return PreservedAnalyses::all(); @@ -207,7 +264,22 @@ INITIALIZE_PASS_END(LoopIdiomRecognizeLegacyPass, "loop-idiom", "Recognize loop idioms", false, false) +char LoopVersioningIdiomRecognizeLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(LoopVersioningIdiomRecognizeLegacyPass, + "loop-versioning-idiom", + "Recognize loop idioms with loop versioning", false, + false) +INITIALIZE_PASS_DEPENDENCY(LoopPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_END(LoopVersioningIdiomRecognizeLegacyPass, + "loop-versioning-idiom", + "Recognize loop idioms with loop versioning", false, false) + Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognizeLegacyPass(); } +Pass *llvm::createLoopVersioningIdiomPass() { + return new LoopVersioningIdiomRecognizeLegacyPass(); +} static void deleteDeadInstruction(Instruction *I) { I->replaceAllUsesWith(UndefValue::get(I->getType())); @@ -229,7 +301,7 @@ // Disable loop idiom recognition if the function's name is a common idiom. StringRef Name = L->getHeader()->getParent()->getName(); - if (Name == "memset" || Name == "memcpy") + if (Name == "memset" || Name == "memcpy" || Name == "memmove") return false; // Determine if code size heuristics need to be applied. @@ -849,6 +921,58 @@ return true; } +// \brief Check if the instructions in Insts, together with their dependencies +// cover the loop in the sense that the loop could be safely eliminated once +// the instructions in Insts are removed. +bool LoopIdiomRecognize::coverLoop(Loop *L, + SmallVectorImpl &Insts) const { + SmallSet LoopBlocks; + for (auto *B : L->blocks()) + LoopBlocks.insert(B); + + SetVector Worklist(Insts.begin(), Insts.end()); + + // Collect all instructions from the loop that the instructions in Insts + // depend on (plus their dependencies, etc.). These instructions will + // constitute the expression trees that feed those in Insts, but the trees + // will be limited only to instructions contained in the loop. + for (unsigned i = 0; i < Worklist.size(); ++i) { + Instruction *In = Worklist[i]; + for (auto I = In->op_begin(), E = In->op_end(); I != E; ++I) { + Instruction *OpI = dyn_cast(I); + if (!OpI) + continue; + BasicBlock *PB = OpI->getParent(); + if (!LoopBlocks.count(PB)) + continue; + Worklist.insert(OpI); + } + } + + // Scan all instructions in the loop, if any of them have a user outside + // of the loop, or outside of the expressions collected above, then either + // the loop has a side-effect visible outside of it, or there are + // instructions in it that are not involved in the original set Insts. + for (auto *B : L->blocks()) { + for (auto &In : *B) { + if (isa(In) || isa(In)) + continue; + if (!Worklist.count(&In) && In.mayHaveSideEffects()) + return false; + for (const auto &K : In.users()) { + Instruction *UseI = dyn_cast(K); + if (!UseI) + continue; + BasicBlock *UseB = UseI->getParent(); + if (LI->getLoopFor(UseB) != L) + return false; + } + } + } + + return true; +} + /// If the stored value is a strided load in the same loop with the same stride /// this may be transformable into a memcpy. This kicks in for stuff like /// for (i) A[i] = B[i]; @@ -857,26 +981,26 @@ assert(SI->isSimple() && "Expected only non-volatile stores."); Value *StorePtr = SI->getPointerOperand(); - const SCEVAddRecExpr *StoreEv = cast(SE->getSCEV(StorePtr)); + auto *StoreEv = cast(SE->getSCEV(StorePtr)); APInt Stride = getStoreStride(StoreEv); unsigned StoreSize = getStoreSizeInBytes(SI, DL); bool NegStride = StoreSize == -Stride; // The store must be feeding a non-volatile load. - LoadInst *LI = cast(SI->getValueOperand()); - assert(LI->isSimple() && "Expected only non-volatile stores."); + LoadInst *LdI = cast(SI->getValueOperand()); + assert(LdI->isSimple() && "Expected only non-volatile stores."); // See if the pointer expression is an AddRec like {base,+,1} on the current // loop, which indicates a strided load. If we have something else, it's a // random load we can't handle. - const SCEVAddRecExpr *LoadEv = - cast(SE->getSCEV(LI->getPointerOperand())); + auto *LoadEv = cast(SE->getSCEV(LdI->getPointerOperand())); // The trip count of the loop and the base pointer of the addrec SCEV is // guaranteed to be loop invariant, which means that it should dominate the // header. This allows us to insert code for it in the preheader. BasicBlock *Preheader = CurLoop->getLoopPreheader(); - IRBuilder<> Builder(Preheader->getTerminator()); + Instruction *ExpPt = Preheader->getTerminator(); + IRBuilder<> Builder(ExpPt); SCEVExpander Expander(*SE, *DL, "loop-idiom"); const SCEV *StrStart = StoreEv->getStart(); @@ -893,21 +1017,51 @@ // or write the memory region we're storing to. This includes the load that // feeds the stores. Check for an alias by generating the base address and // checking everything. - Value *StoreBasePtr = Expander.expandCodeFor( - StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator()); + Value *StoreBasePtr = + Expander.expandCodeFor(StrStart, Builder.getInt8PtrTy(StrAS), ExpPt); + Value *LoadBasePtr = nullptr; - SmallPtrSet Stores; - Stores.insert(SI); + bool isMemcpySafe = true; + SmallPtrSet Ignore1; + Ignore1.insert(SI); if (mayLoopAccessLocation(StoreBasePtr, MRI_ModRef, CurLoop, BECount, - StoreSize, *AA, Stores)) { - Expander.clear(); - // If we generated new code for the base pointer, clean up. - RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI); - return false; + StoreSize, *AA, Ignore1)) { + if (!UseLoopVersioning) + goto CleanupAndExit; + + // Check if the load is the offending instruction. + Ignore1.insert(LdI); + if (mayLoopAccessLocation(StoreBasePtr, MRI_ModRef, CurLoop, BECount, + StoreSize, *AA, Ignore1)) { + CleanupAndExit: + Expander.clear(); + // If we generated new code for the base pointer, clean up. + if (StoreBasePtr && (LoadBasePtr != StoreBasePtr)) { + RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI); + StoreBasePtr = nullptr; + } + if (LoadBasePtr) { + RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI); + LoadBasePtr = nullptr; + } + return false; + } + // It worked with the load ignored. + isMemcpySafe = false; + + // In case of a memmove, the call to memmove will be executed instead + // of the loop, so we need to make sure that there is nothing else in + // the loop than the load, store and instructions that these two depend + // on. + SmallVector Insts; + Insts.push_back(SI); + Insts.push_back(LdI); + if (!coverLoop(CurLoop, Insts)) + goto CleanupAndExit; } const SCEV *LdStart = LoadEv->getStart(); - unsigned LdAS = LI->getPointerAddressSpace(); + unsigned LdAS = LdI->getPointerAddressSpace(); // Handle negative strided loops. if (NegStride) @@ -915,26 +1069,36 @@ // For a memcpy, we have to make sure that the input array is not being // mutated by the loop. - Value *LoadBasePtr = Expander.expandCodeFor( + LoadBasePtr = Expander.expandCodeFor( LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator()); + SmallPtrSet Ignore2; + Ignore2.insert(SI); if (mayLoopAccessLocation(LoadBasePtr, MRI_Mod, CurLoop, BECount, StoreSize, - *AA, Stores)) { - Expander.clear(); - // If we generated new code for the base pointer, clean up. - RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI); - RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI); - return false; - } + *AA, Ignore2)) + goto CleanupAndExit; if (avoidLIRForMultiBlockLoop()) - return false; + goto CleanupAndExit; // Okay, everything is safe, we can transform this! + BasicBlock *ExitB; + if (!isMemcpySafe) { + // The runtime check needs a single exit block. + SmallVector ExitBlocks; + CurLoop->getUniqueExitBlocks(ExitBlocks); + if (ExitBlocks.size() != 1) + goto CleanupAndExit; + ExitB = ExitBlocks[0]; + } + // The # stored bytes is (BECount+1)*Size. Expand the trip count out to // pointer size if it isn't already. + LLVMContext &Ctx = SI->getContext(); BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy); + unsigned Align = std::min(SI->getAlignment(), LdI->getAlignment()); + DebugLoc DLoc = SI->getDebugLoc(); const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getOne(IntPtrTy), SCEV::FlagNUW); @@ -942,22 +1106,122 @@ NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize), SCEV::FlagNUW); - Value *NumBytes = - Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator()); + Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtrTy, ExpPt); + if (Instruction *In = dyn_cast(NumBytes)) + if (Value *Simp = SimplifyInstruction(In, *DL, TLI, DT)) + NumBytes = Simp; + + CallInst *NewCall; + + if (isMemcpySafe) { + NewCall = Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, Align); + ++NumMemCpy; + } else { + unsigned Threshold = 0 /* RuntimeMemSizeThreshold */; + if (ConstantInt *CI = dyn_cast(NumBytes)) { + uint64_t C = CI->getZExtValue(); + if (Threshold != 0 && C < Threshold) + goto CleanupAndExit; + /* if (C < CompileTimeMemSizeThreshold) + goto CleanupAndExit; */ + } + + BasicBlock *Header = CurLoop->getHeader(); + Function *Func = Header->getParent(); + Loop *ParentL = LI->getLoopFor(Preheader); + StringRef HeaderName = Header->getName(); + + // Create a new (empty) preheader, and update the PHI nodes in the + // header to use the new preheader. + BasicBlock *NewPreheader = BasicBlock::Create(Ctx, HeaderName+".rtli.ph", + Func, Header); + if (ParentL) + ParentL->addBasicBlockToLoop(NewPreheader, *LI); + IRBuilder<>(NewPreheader).CreateBr(Header); + for (auto &In : *Header) { + PHINode *PN = dyn_cast(&In); + if (!PN) + break; + int bx = PN->getBasicBlockIndex(Preheader); + if (bx >= 0) + PN->setIncomingBlock(bx, NewPreheader); + } + DT->addNewBlock(NewPreheader, Preheader); + DT->changeImmediateDominator(Header, NewPreheader); + + // Check for safe conditions to execute memmove. + // If stride is positive, copying things from higher to lower addresses + // is equivalent to memmove. For negative stride, it's the other way + // around. Copying forward in memory with positive stride may not be + // same as memmove since we may be copying values that we just stored + // in some previous iteration. + Value *LA = Builder.CreatePtrToInt(LoadBasePtr, IntPtrTy); + Value *SA = Builder.CreatePtrToInt(StoreBasePtr, IntPtrTy); + Value *LowA = NegStride ? LA : SA; + Value *HighA = NegStride ? SA : LA; + Value *CmpA = Builder.CreateICmpULT(LowA, HighA); + Value *Cond = CmpA; + + // Check for distance between pointers. + Value *Dist = Builder.CreateSub(HighA, LowA); + Value *CmpD = Builder.CreateICmpSLT(NumBytes, Dist); + Value *CmpEither = Builder.CreateOr(Cond, CmpD); + Cond = CmpEither; + + if (Threshold != 0) { + Type *Ty = NumBytes->getType(); + Value *Thr = ConstantInt::get(Ty, Threshold); + Value *CmpB = Builder.CreateICmpULT(Thr, NumBytes); + Value *CmpBoth = Builder.CreateAnd(Cond, CmpB); + Cond = CmpBoth; + } + BasicBlock *MemmoveB = BasicBlock::Create(Ctx, Header->getName()+".rtli", + Func, NewPreheader); + if (ParentL) + ParentL->addBasicBlockToLoop(MemmoveB, *LI); + Instruction *OldT = Preheader->getTerminator(); + Builder.CreateCondBr(Cond, MemmoveB, NewPreheader); + OldT->eraseFromParent(); + Preheader->setName(Preheader->getName()+".old"); + DT->addNewBlock(MemmoveB, Preheader); + // Find the new immediate dominator of the exit block. + BasicBlock *ExitD = Preheader; + for (auto PI = pred_begin(ExitB), PE = pred_end(ExitB); PI != PE; ++PI) { + BasicBlock *PB = *PI; + ExitD = DT->findNearestCommonDominator(ExitD, PB); + if (!ExitD) + break; + } + // If the prior immediate dominator of ExitB was dominated by the + // old preheader, then the old preheader becomes the new immediate + // dominator. Otherwise don't change anything (because the newly + // added blocks are dominated by the old preheader). + if (ExitD && DT->dominates(Preheader, ExitD)) { + DomTreeNode *BN = DT->getNode(ExitB); + DomTreeNode *DN = DT->getNode(ExitD); + BN->setIDom(DN); + } + + // Add a call to memmove to the conditional block. + IRBuilder<> CondBuilder(MemmoveB); + CondBuilder.CreateBr(ExitB); + CondBuilder.SetInsertPoint(MemmoveB->getTerminator()); + + NewCall = CondBuilder.CreateMemMove(StoreBasePtr, LoadBasePtr, + NumBytes, Align); + ++NumMemMove; + } - CallInst *NewCall = - Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, - std::min(SI->getAlignment(), LI->getAlignment())); NewCall->setDebugLoc(SI->getDebugLoc()); - DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n" - << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" + DEBUG(dbgs() << " Formed " << (isMemcpySafe ? "memcpy: " : "memmove: ") + << *NewCall << "\n" + << " from load ptr=" << *LoadEv << " at: " << *LdI << "\n" << " from store ptr=" << *StoreEv << " at: " << *SI << "\n"); // Okay, the memcpy has been formed. Zap the original store and anything that // feeds into it. deleteDeadInstruction(SI); - ++NumMemCpy; return true; } Index: llvm/lib/Transforms/Scalar/Scalar.cpp =================================================================== --- llvm/lib/Transforms/Scalar/Scalar.cpp +++ llvm/lib/Transforms/Scalar/Scalar.cpp @@ -67,6 +67,7 @@ initializeLoopUnswitchPass(Registry); initializeLoopVersioningLICMPass(Registry); initializeLoopIdiomRecognizeLegacyPassPass(Registry); + initializeLoopVersioningIdiomRecognizeLegacyPassPass(Registry); initializeLowerAtomicLegacyPassPass(Registry); initializeLowerExpectIntrinsicPass(Registry); initializeLowerGuardIntrinsicLegacyPassPass(Registry); Index: llvm/test/Other/new-pm-defaults.ll =================================================================== --- llvm/test/Other/new-pm-defaults.ll +++ llvm/test/Other/new-pm-defaults.ll @@ -137,6 +137,7 @@ ; CHECK-O-NEXT: Running pass: Float2IntPass ; CHECK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LoopRotatePass ; CHECK-O-NEXT: Running pass: LoopDistributePass +; CHECK-O-NEXT: Running pass: FunctionToLoopPassAdaptor<{{.*}}LoopVersioningIdiomRecognizePass ; CHECK-O-NEXT: Running pass: LoopVectorizePass ; CHECK-O-NEXT: Running analysis: BlockFrequencyAnalysis ; CHECK-O-NEXT: Running analysis: BranchProbabilityAnalysis Index: llvm/test/Transforms/LoopIdiom/memmove.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopIdiom/memmove.ll @@ -0,0 +1,43 @@ +; RUN: opt -S -basicaa -loop-versioning-idiom < %s | FileCheck %s +; RUN: opt -S -basicaa -loop-idiom < %s | FileCheck %s -check-prefix=LOOP-IDIOM + +declare i64 @foo() nounwind + +; CHECK-LABEL: @test1 +; CHECK: call void @llvm.memmove.p0i8.p0i8.i64( +; LOOP-IDIOM-LABEL: @test1 +; LOOP-IDIOM-NOT: call void @llvm.memmove.p0i8.p0i8.i64( + +; Nested loops +define void @test1(i8* nocapture %A, i64 %n) nounwind { +entry: + %call8 = tail call i64 @foo() nounwind + %tobool9 = icmp eq i64 %call8, 0 + br i1 %tobool9, label %while.end, label %for.cond.preheader.lr.ph + +for.cond.preheader.lr.ph: ; preds = %entry + %cmp6 = icmp eq i64 %n, 0 + br label %for.cond.preheader + +while.cond.loopexit: ; preds = %for.body, %for.cond.preheader + %call = tail call i64 @foo() nounwind + %tobool = icmp eq i64 %call, 0 + br i1 %tobool, label %while.end, label %for.cond.preheader + +for.cond.preheader: ; preds = %for.cond.preheader.lr.ph, %while.cond.loopexit + br i1 %cmp6, label %while.cond.loopexit, label %for.body + +for.body: ; preds = %for.cond.preheader, %for.body + %i.07 = phi i64 [ %inc, %for.body ], [ 0, %for.cond.preheader ] + %add = add i64 %i.07, 10 + %arrayidx = getelementptr inbounds i8, i8* %A, i64 %add + %0 = load i8, i8* %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, i8* %A, i64 %i.07 + store i8 %0, i8* %arrayidx1, align 1 + %inc = add i64 %i.07, 1 + %exitcond = icmp eq i64 %inc, %n + br i1 %exitcond, label %while.cond.loopexit, label %for.body + +while.end: ; preds = %while.cond.loopexit, %entry + ret void +} Index: llvm/test/Transforms/LoopIdiom/memset_noidiom.ll =================================================================== --- llvm/test/Transforms/LoopIdiom/memset_noidiom.ll +++ llvm/test/Transforms/LoopIdiom/memset_noidiom.ll @@ -28,3 +28,54 @@ ret i8* %b } +; CHECK-LABEL: @memcpy +; CHECK-NOT: llvm.memcpy +define i8* @memcpy(i8* noalias %dst, i8* noalias %src, i64 %n) nounwind { +entry: + %tobool3 = icmp eq i64 %n, 0 + br i1 %tobool3, label %while.end, label %while.body + +while.body: ; preds = %entry, %while.body + %c2.06 = phi i8* [ %incdec.ptr, %while.body ], [ %src, %entry ] + %c1.05 = phi i8* [ %incdec.ptr1, %while.body ], [ %dst, %entry ] + %n.addr.04 = phi i64 [ %dec, %while.body ], [ %n, %entry ] + %dec = add i64 %n.addr.04, -1 + %incdec.ptr = getelementptr inbounds i8, i8* %c2.06, i64 1 + %0 = load i8, i8* %c2.06, align 1 + %incdec.ptr1 = getelementptr inbounds i8, i8* %c1.05, i64 1 + store i8 %0, i8* %c1.05, align 1 + %tobool = icmp eq i64 %dec, 0 + br i1 %tobool, label %while.end, label %while.body + +while.end: ; preds = %while.body, %entry + ret i8* %dst +} + +; CHECK-LABEL: @memmove +; CHECK-NOT: llvm.memmove +define i8* @memmove(i8* %dst, i8* nocapture %src, i64 %count) nounwind { +entry: + %sub = add i64 %count, -1 + %tobool9 = icmp eq i64 %count, 0 + br i1 %tobool9, label %while.end, label %while.body.lr.ph + +while.body.lr.ph: ; preds = %entry + %add.ptr2 = getelementptr inbounds i8, i8* %src, i64 %sub + %add.ptr = getelementptr inbounds i8, i8* %dst, i64 %sub + br label %while.body + +while.body: ; preds = %while.body.lr.ph, %while.body + %b.012 = phi i8* [ %add.ptr2, %while.body.lr.ph ], [ %incdec.ptr, %while.body ] + %a.011 = phi i8* [ %add.ptr, %while.body.lr.ph ], [ %incdec.ptr3, %while.body ] + %count.addr.010 = phi i64 [ %count, %while.body.lr.ph ], [ %dec, %while.body ] + %dec = add i64 %count.addr.010, -1 + %incdec.ptr = getelementptr inbounds i8, i8* %b.012, i64 -1 + %0 = load i8, i8* %b.012, align 1 + %incdec.ptr3 = getelementptr inbounds i8, i8* %a.011, i64 -1 + store i8 %0, i8* %a.011, align 1 + %tobool = icmp eq i64 %dec, 0 + br i1 %tobool, label %while.end, label %while.body + +while.end: ; preds = %while.body, %entry + ret i8* %dst +}