diff --git a/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h b/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h --- a/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h @@ -44,7 +44,11 @@ LoopStandardAnalysisResults &AR, LPMUpdater &U); }; -// NFC LoopNestPass with regards to the current LoopPass-LoopIdiomRecognize +// The LoopNestIdiomRecognize is a LoopNestPass that feeds LoopNest into +// LoopIdiomRecognize. The main difference from LoopIdiomRecognize is it +// allows runtime-determined store size optimization by versioning and creates +// runtime checks on the top-level loop. The reason to only version on the +// top-level loop is to avoid the exponential growth of versioning. class LoopNestIdiomRecognizePass : public PassInfoMixin { public: diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -96,6 +96,7 @@ #include "llvm/Transforms/Utils/BuildLibCalls.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/LoopVersioning.h" #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" #include #include @@ -108,6 +109,8 @@ #define DEBUG_TYPE "loop-idiom" STATISTIC(NumMemSet, "Number of memset's formed from loop stores"); +STATISTIC(NumMemSetRuntimeLength, + "Number of memset's formed from memset with runtime length"); STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores"); STATISTIC( NumShiftUntilBitTest, @@ -144,11 +147,39 @@ "with -Os/-Oz"), cl::init(true), cl::Hidden); +static cl::opt ForceNoLoopVersion( + DEBUG_TYPE "-no-loop-version", + cl::desc("Force not to create loop versions if the user guarantees that" + "the length of each array dimension is positive value, and" + "the multiplication of lengths of all array dimensions does not" + "exceeds the range of type size_t"), + cl::init(false), cl::Hidden); + namespace { +typedef DenseMap SCEVConstValPairMap; + +/// A helper class to do the following SCEV expression conversions. +/// 1) "sext %val" to "zext %val" +/// 2) "SOME_CONSTANT_VALUE smax %val" to "%val" +class SCEVExprConverter { +public: + Loop *CurLoop; + ScalarEvolution &SE; + SCEVConstValPairMap CheckSltMap; + + SCEVExprConverter(ScalarEvolution &SE) + : SE(SE) {} + + const SCEV *convertSCEV(const SCEV *Expr, bool AddRuntimCheck); +}; + class LoopIdiomRecognize { Loop *CurLoop = nullptr; LoopNest *LN; + Loop *TopLoop = nullptr; + Loop *FallBackLoop = nullptr; + BasicBlock *RuntimeCheckBB = nullptr; AliasAnalysis *AA; DominatorTree *DT; LoopInfo *LI; @@ -159,6 +190,8 @@ OptimizationRemarkEmitter &ORE; bool ApplyCodeSizeHeuristics; std::unique_ptr MSSAU; + SCEVConstValPairMap CheckSltMap; + SCEVExprConverter Converter; public: explicit LoopIdiomRecognize(AliasAnalysis *AA, DominatorTree *DT, @@ -168,9 +201,11 @@ const DataLayout *DL, OptimizationRemarkEmitter &ORE) : LN(LN), AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), - ORE(ORE) { + ORE(ORE), Converter(*SE) { if (MSSA) MSSAU = std::make_unique(MSSA); + if (LN) + TopLoop = &LN->getOutermostLoop(); } bool runOnLoopNest(); @@ -235,6 +270,8 @@ const SCEV *BECount); bool avoidLIRForMultiBlockLoop(bool IsMemset = false, bool IsLoopMemset = false); + bool isTopLoopVersioned() const { return RuntimeCheckBB != nullptr; } + void versionTopLoop(); /// @} /// \name Noncountable Loop Idiom Handling @@ -311,6 +348,144 @@ } // end anonymous namespace +/// Implementation of SCEVExprConverter. +/// Tries to fold the SCEV with regard to loop guards of CurLoop +const SCEV *SCEVExprConverter::convertSCEV(const SCEV *Expr, bool AddRuntimeCheck) { + switch (Expr->getSCEVType()) { + case scConstant: + case scUnknown: + case scCouldNotCompute: + return Expr; + case scTruncate: { + const SCEVTruncateExpr *Trunc = cast(Expr); + Type *Ty = Trunc->getType(); + const SCEV *NewTrunc = convertSCEV(Trunc->getOperand(), AddRuntimeCheck); + return SE.getTruncateExpr(NewTrunc, Ty); + } + case scZeroExtend: { + const SCEVZeroExtendExpr *Zext = cast(Expr); + Type *Ty = Zext->getType(); + const SCEV *NewZext = convertSCEV(Zext->getOperand(), AddRuntimeCheck); + return SE.getZeroExtendExpr(NewZext, Ty); + } + case scSignExtend: { + // Return original SCEV if expression is not guarded by loop with Zero and + // dont want to add runtime check. Otherwise we fold constant and add appropriate + // runtime check. + const SCEVSignExtendExpr *Sext = cast(Expr); + if (SE.isLoopEntryGuardedByCond(CurLoop, ICmpInst::ICMP_SGE, Sext, SE.getZero(Sext->getType())) == false) { + if (AddRuntimeCheck == false) + return Sext; + if (CheckSltMap[Sext] < 0) + CheckSltMap[Sext] = 0; + } + const SCEV *NewZext = convertSCEV(Sext->getOperand(), AddRuntimeCheck); + return SE.getZeroExtendExpr(NewZext, Sext->getType()); + } + case scAddExpr: { + const SCEVAddExpr *Add = cast(Expr); + const SCEV *NewAdd = convertSCEV(Add->getOperand(0), AddRuntimeCheck); + for (int I = 1, E = Add->getNumOperands(); I != E; ++I) { + NewAdd = SE.getAddExpr(NewAdd, convertSCEV(Add->getOperand(I), AddRuntimeCheck)); + } + return NewAdd; + } + case scMulExpr: { + const SCEVMulExpr *Mul = cast(Expr); + const SCEV *NewMul = convertSCEV(Mul->getOperand(0), AddRuntimeCheck); + for (int I = 1, E = Mul->getNumOperands(); I != E; ++I) { + NewMul = SE.getMulExpr(NewMul, convertSCEV(Mul->getOperand(I), AddRuntimeCheck)); + } + return NewMul; + } + case scUDivExpr: { + const SCEVUDivExpr *UDiv = cast(Expr); + const SCEV *NewLHS = convertSCEV(UDiv->getLHS(), AddRuntimeCheck); + const SCEV *NewRHS = convertSCEV(UDiv->getRHS(), AddRuntimeCheck); + return SE.getUDivExpr(NewLHS, NewRHS); + } + case scAddRecExpr: + assert(false && "Do not expect AddRec here!"); + case scUMaxExpr: { + const SCEVUMaxExpr *UMax = cast(Expr); + const SCEV *NewUMax = convertSCEV(UMax->getOperand(0), AddRuntimeCheck); + for (int I = 1, E = UMax->getNumOperands(); I != E; ++I) { + NewUMax = SE.getUMaxExpr(NewUMax, convertSCEV(UMax->getOperand(I), AddRuntimeCheck)); + } + return NewUMax; + } + case scSMaxExpr: { + // Return original SCEV if expression is not guarded by loop with Constant and + // dont want to add runtime check. Otherwise we fold constant and add appropriate + // runtime check. + const SCEVSMaxExpr *SMax = cast(Expr); + const int NumOfOps = SMax->getNumOperands(); + bool Fold = false; + // If an operand is constant, it will be the first operand. + const SCEV *SMaxOp0 = SMax->getOperand(0); + const SCEVConstant *Cst = dyn_cast(SMaxOp0); + + if (Cst) { + // check if the operand is guarded to the constant + // if not, return orignal expression + Fold = true; + for (int I = 1, E = NumOfOps; I != E; ++I) { + auto Ev = SMax->getOperand(I); + if (SE.isLoopEntryGuardedByCond(CurLoop, ICmpInst::ICMP_SGE, Ev, Cst) == false) { + if (AddRuntimeCheck == false) + return SMax; + int CstValue = Cst->getAPInt().roundToDouble(); + if (CheckSltMap[SMax] < CstValue) + CheckSltMap[SMax] = CstValue; + } + } + } + + const int StartIdx = Fold ? 1 : 0; + const SCEV *NewSMax = convertSCEV(SMax->getOperand(StartIdx), AddRuntimeCheck); + for (int I = StartIdx + 1, E = NumOfOps; I != E; ++I) { + NewSMax = SE.getSMaxExpr(NewSMax, convertSCEV(SMax->getOperand(I), AddRuntimeCheck)); + } + return NewSMax; + } + case scUMinExpr: { + const SCEVUMinExpr *UMin = cast(Expr); + const SCEV *NewUMin = convertSCEV(UMin->getOperand(0), AddRuntimeCheck); + for (int I = 1, E = UMin->getNumOperands(); I != E; ++I) { + NewUMin = SE.getUMinExpr(NewUMin, convertSCEV(UMin->getOperand(I), AddRuntimeCheck)); + } + return NewUMin; + } + case scSMinExpr: { + const SCEVSMinExpr *SMin = cast(Expr); + const SCEV *NewSMin = convertSCEV(SMin->getOperand(0), AddRuntimeCheck); + for (int I = 1, E = SMin->getNumOperands(); I != E; ++I) { + NewSMin = SE.getSMinExpr(NewSMin, convertSCEV(SMin->getOperand(I), AddRuntimeCheck)); + } + return NewSMin; + } + default: + llvm_unreachable("Unexpected SCEV expression!"); + } +} + +/// Helper function to generate predicate "X < ConstVal". +// This function is used to add runtime check conditions for value that we +// assume in order to canonicalize the SCEV for comparison. +// For example: turn sext to zext, eliminate smax +static Value *generateSltCstPredicate(const SCEV *Ev, int ConstVal, + BranchInst *BI, + const DataLayout *DL, + ScalarEvolution *SE, + IRBuilder<> &Builder) { + SCEVExpander Expander(*SE, *DL, "loop-nest-idiom-runtime-bound-check"); + Type *Ty = Ev->getType(); + Value *Val = Expander.expandCodeFor(Ev, Ty, BI); + Constant *Cst = ConstantInt::get(Ty, ConstVal); + + return Builder.CreateICmpSLT(Val, Cst); +} + char LoopIdiomRecognizeLegacyPass::ID = 0; PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM, @@ -400,6 +575,35 @@ Changed |= runOnLoop(L); } + // After processing all the loops, we now add the stored conditions into + // the RuntimeCheckBB. Conditions are stored when: + // - detect runtime store size in StridedStore + if (Changed && isTopLoopVersioned()) { + // Get the branch instruction in the runtime check basic block. + BranchInst *BI = dyn_cast(RuntimeCheckBB->getTerminator()); + assert(BI && "Expects a BranchInst"); + LLVM_DEBUG(dbgs() << "runOnLoopNest: Add runtime check to RuntimeCheckBB\n"); + // Create conditional branch instructions for SCEV that is folded. + // If any of the condition above is true, the fallback loop is taken. + // Otherwise, the optimized loop is taken. + LLVMContext &Context = TopLoop->getHeader()->getContext(); + Value *Cond = ConstantInt::getFalse(Context); + + IRBuilder<> Builder(BI); + // add runtime check for SCEV we fold in SCEVExprConverter + for (auto Pair : CheckSltMap) { + const SCEV *Ev = Pair.first; + const int Cst = Pair.second; + LLVM_DEBUG(dbgs() << " check: " << *Ev << " < " << Cst << "\n"); + Value *CheckInBoundCond = generateSltCstPredicate(Ev, Cst, BI, DL, SE, Builder); + Cond = Builder.CreateOr(Cond, CheckInBoundCond); + } + + BranchInst::Create(FallBackLoop->getLoopPreheader(), + LN->getOutermostLoop().getLoopPreheader(), Cond, BI); + deleteDeadInstruction(BI); + } + return Changed; } @@ -462,8 +666,11 @@ // Scan all the blocks in the loop that are not in subloops. for (auto *BB : CurLoop->getBlocks()) { // Ignore blocks in subloops. - if (LI->getLoopFor(BB) != CurLoop) + LLVM_DEBUG(dbgs() << "Loop Block: " << BB->getName() << "\n"); + if (LI->getLoopFor(BB) != CurLoop) { + LLVM_DEBUG(dbgs() << "block is not on current loop, abort\n"); continue; + } MadeChange |= runOnLoopBlock(BB, BECount, ExitBlocks); } @@ -673,8 +880,10 @@ // executed in the loop. For a block to be unconditionally executed, it has // to dominate all the exit blocks of the loop. Verify this now. for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) - if (!DT->dominates(BB, ExitBlocks[i])) + if (!DT->dominates(BB, ExitBlocks[i])) { + LLVM_DEBUG(dbgs() << "does not dominate all exit blocks to promote the store, abort.\n"); return false; + } bool MadeChange = false; // Look for store instructions, which may be optimized to memset/memcpy. @@ -858,6 +1067,7 @@ Instruction *Inst = &*I++; // Look for memory instructions, which may be optimized to a larger one. if (MemInst *MI = dyn_cast(Inst)) { + LLVM_DEBUG(dbgs() << "found MemInst: " << *MI << "\n"); WeakTrackingVH InstPtr(&*I); if (!(this->*Processor)(MI, BECount)) continue; @@ -944,7 +1154,7 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) { // We can only handle non-volatile memsets with a constant size. - if (MSI->isVolatile() || !isa(MSI->getLength())) + if (MSI->isVolatile()) return false; // If we're not allowed to hack on memset, we fail. @@ -956,28 +1166,104 @@ // See if the pointer expression is an AddRec like {base,+,1} on the current // loop, which indicates a strided store. If we have something else, it's a // random store we can't handle. + const SCEV *PointerSCEV = SE->getSCEV(Pointer); + if (PointerSCEV) + LLVM_DEBUG(dbgs() << "PointerSCEV: " << *PointerSCEV << "\n"); const SCEVAddRecExpr *Ev = dyn_cast(SE->getSCEV(Pointer)); - if (!Ev || Ev->getLoop() != CurLoop || !Ev->isAffine()) + if (!Ev || Ev->getLoop() != CurLoop || !Ev->isAffine()) { + LLVM_DEBUG(dbgs() << "PointerSCEV cannot be converted to SCEVAddRecExpr, abort\n"); return false; - + } + const SCEV *StrideSCEV = Ev->getOperand(1); const SCEV *MemsetSizeSCEV = SE->getSCEV(MSI->getLength()); - if (!MemsetSizeSCEV) + if (!StrideSCEV || !MemsetSizeSCEV) return false; - // Reject memsets that are so large that they overflow an unsigned. - uint64_t SizeInBytes = cast(MSI->getLength())->getZExtValue(); - if ((SizeInBytes >> 32) != 0) - return false; + bool NegStride; + const bool IsConstantSize = isa(MSI->getLength()); + if (IsConstantSize) { + // Memset size is constant + // Reject memsets that are so large that they overflow an unsigned. + LLVM_DEBUG(dbgs() << " memset size is constant\n"); + uint64_t SizeInBytes = cast(MSI->getLength())->getZExtValue(); + if ((SizeInBytes >> 32) != 0) + return false; - // Check to see if the stride matches the size of the memset. If so, then we - // know that every byte is touched in the loop. - const SCEVConstant *ConstStride = dyn_cast(Ev->getOperand(1)); - if (!ConstStride) - return false; + // Check to see if the stride matches the size of the memset. If so, then + // we know that every byte is touched in the loop. + const SCEVConstant *ConstStride = dyn_cast(Ev->getOperand(1)); + if (!ConstStride) + return false; - APInt Stride = ConstStride->getAPInt(); - if (SizeInBytes != Stride && SizeInBytes != -Stride) - return false; + APInt Stride = ConstStride->getAPInt(); + if (SizeInBytes != Stride && SizeInBytes != -Stride) + return false; + + NegStride = SizeInBytes == -Stride; + } else { + // Memset size is non-constant + // Check if the stride matches the memset size, by comparing the SCEV + // expressions of the stride and memset size. The two expressions match + // if they are equal. If they match, then we know that every byte is + // touched in the loop. We only handle memset length and stride that + // are invariant for the top level loop. + // To be conservative, in runtime we would not promote pointers that isn't + // in address space zero + LLVM_DEBUG(dbgs() << " memset size is non-constant\n"); + if (Pointer->getType()->getPointerAddressSpace() != 0) { + LLVM_DEBUG(dbgs() << " pointer is not in address space zero\n"); + return false; + } + if (!SE->isLoopInvariant(MemsetSizeSCEV, TopLoop) || + !SE->isLoopInvariant(StrideSCEV, TopLoop)) { + LLVM_DEBUG(dbgs() << " memset size or stride is not a loop-invariant, " + << "abort\n"); + return false; + } + + // compare positive direction strideSCEV with MemsizeSizeSCEV + NegStride = StrideSCEV->isNonConstantNegative(); + const SCEV *PositiveStrideSCEV = + NegStride ? SE->getNegativeSCEV(StrideSCEV) : StrideSCEV; + LLVM_DEBUG(dbgs() << " MemsetSizeSCEV: " << *MemsetSizeSCEV << "\n" + << " PositiveStrideSCEV: " << *PositiveStrideSCEV + << "\n"); + + if (PositiveStrideSCEV != MemsetSizeSCEV) { + // If the original StrideSCEV and MemsetSizeSCEV does not match, the pass will + // fold expressions that is covered by the loop guard at loop entry. + // We will compare again after the folding and proceed if equal + Converter.CheckSltMap.clear(); + Converter.CurLoop = CurLoop; + const SCEV *FoldedPositiveStride = + Converter.convertSCEV(PositiveStrideSCEV, /*AddRuntimeCheck=*/false); + const SCEV *FoldedMemsetSize = + Converter.convertSCEV(MemsetSizeSCEV, /*AddRuntimeCheck=*/false); + LLVM_DEBUG(dbgs() << " Try to fold SCEV expression covered by loop guard\n" + << " FoldedMemsetSCEV: " << *FoldedMemsetSize << "\n" + << " FoldedPositiveStrideSCEV: " + << *FoldedPositiveStride << "\n"); + + if (FoldedPositiveStride != FoldedMemsetSize) { + if (LN == nullptr || ForceNoLoopVersion) { + LLVM_DEBUG(dbgs() << " unable to do loop versioning here, abort\n"); + return false; + } + const SCEV *ConvertedPositiveStride = + Converter.convertSCEV(FoldedPositiveStride, /*AddRuntimeCheck=*/true); + const SCEV *ConvertedMemsetSize = + Converter.convertSCEV(FoldedMemsetSize, /*AddRuntimeCheck=*/true); + LLVM_DEBUG(dbgs() << " Try to convert SCEV expression and add appropriate runtime check\n" + << " ConvertedMemsetSCEV: " << *ConvertedMemsetSize << "\n" + << " ConvertedPositiveStrideSCEV: " + << *ConvertedPositiveStride << "\n"); + if (ConvertedPositiveStride != ConvertedMemsetSize) { + LLVM_DEBUG(dbgs() << " Converted SCEV inequal, abort\n"); + return false; + } + } + } + } // Verify that the memset value is loop invariant. If not, we can't promote // the memset. @@ -987,10 +1273,23 @@ SmallPtrSet MSIs; MSIs.insert(MSI); - bool NegStride = SizeInBytes == -Stride; - return processLoopStridedStore( + bool Changed = processLoopStridedStore( Pointer, MemsetSizeSCEV, MaybeAlign(MSI->getDestAlignment()), SplatValue, MSI, MSIs, Ev, BECount, NegStride, /*IsLoopMemset=*/true); + + // if we have successfully changed with processLoopStridedStore + // add the require runtime check information into list. + if (Changed && isTopLoopVersioned()) { + for (auto Pair : Converter.CheckSltMap) { + auto Ev = Pair.first; + auto Cst = Pair.second; + if (CheckSltMap[Ev] < Cst) + CheckSltMap[Ev] = Cst; + } + Converter.CheckSltMap.clear(); + } + + return Changed; } /// mayLoopAccessLocation - Return true if the specified loop might access the @@ -1180,8 +1479,12 @@ bool Changed = false; const SCEV *Start = Ev->getStart(); // Handle negative strided loops. - if (NegStride) + if (NegStride) { + LLVM_DEBUG(dbgs() << "Handle negative stride," + << " original Start: " << Start << "\n"); Start = getStartForNegStride(Start, BECount, IntIdxTy, StoreSizeSCEV, SE); + LLVM_DEBUG(dbgs() << " converted Start: " << Start << "\n"); + } // TODO: ideally we should still be able to generate memset if SCEV expander // is taught to generate the dependencies at the latest point. @@ -1216,15 +1519,39 @@ // NumBytes = TripCount * StoreSize const SCEV *TripCountS = getTripCount(BECount, IntIdxTy, CurLoop, DL, SE); + + // If store size is not constant and we need to add runtime checks for the optimization + // to proceed, then versioning is required. + const bool IsConstantSize = isa(StoreSizeSCEV); + if (IsLoopMemset && !IsConstantSize && Converter.CheckSltMap.size()) { + if (LN == nullptr) { + LLVM_DEBUG(dbgs() << "requires versioning but running LoopIdiomRecognizer, " + << "abort (run LoopNestIdiomRecognizer instead support versioning)\n"); + return Changed; + } + if (!SE->isLoopInvariant(TripCountS, TopLoop) || ForceNoLoopVersion) { + LLVM_DEBUG(dbgs() << "abort becuase TripCount is not TopLoop's invariant " + << "or ForceNoLoopVersion = true\n"); + return Changed; + } + } + const SCEVConstant *ConstSize = dyn_cast(StoreSizeSCEV); const SCEV *NumBytesS; - if (!ConstSize && ConstSize->getAPInt() == 1) + + if (ConstSize && ConstSize->getAPInt() == 1) { NumBytesS = TripCountS; - else + LLVM_DEBUG(dbgs() << "StoreSize = 1, NumbytesS: " << *NumBytesS << "\n"); + } else { NumBytesS = SE->getMulExpr(TripCountS, SE->getTruncateOrZeroExtend(StoreSizeSCEV, IntIdxTy), SCEV::FlagNUW); + LLVM_DEBUG(dbgs() << " Calculate NumBytesS = TripCountS * StoreSizeSCEV\n" + << " TripCountS: " << *TripCountS << "\n" + << " StoreSizeSCEV: " << *StoreSizeSCEV << "\n" + << " NumBytesS: " << *NumBytesS << "\n"); + } // TODO: ideally we should still be able to generate memset if SCEV expander // is taught to generate the dependencies at the latest point. @@ -1234,6 +1561,26 @@ Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator()); + // If the memset size is not a constant, we will need to version the top + // level loop in the current loop nest with runtime checks. We are going + // to version on only the top level loop once to avoid exponential growth + // of versioning. + // Here we check whether the top level clone has beed created yet, and create + // it if it hasn't. The initial runtime check is set to false and the + // conditions would be updated after we process all the loops. + if (LN != nullptr && IsLoopMemset && !IsConstantSize && !ForceNoLoopVersion && + !isTopLoopVersioned() && Converter.CheckSltMap.size()) { + LLVM_DEBUG(dbgs() << " Create versioning for top loop because runtime check for SCEV is needed\n"); + versionTopLoop(); + + // If current loop is the top loop, versioning would change the loop's + // preheader to RuntimeCheckBB, so we need to reset the insert point. + if (CurLoop == TopLoop) { + Preheader = CurLoop->getLoopPreheader(); + Builder.SetInsertPoint(Preheader->getTerminator()); + } + } + CallInst *NewCall; if (SplatValue) { NewCall = Builder.CreateMemSet(BasePtr, SplatValue, NumBytes, @@ -1291,6 +1638,8 @@ MSSAU->getMemorySSA()->verifyMemorySSA(); ++NumMemSet; ExpCleaner.markResultUsed(); + if (IsLoopMemset && !IsConstantSize) + ++NumMemSetRuntimeLength; return true; } @@ -1503,6 +1852,18 @@ return false; } +/// versionTopLoop - Create a fallback version the TopLoop +void LoopIdiomRecognize::versionTopLoop() { + const LoopAccessInfo LAI(TopLoop, SE, TLI, AA, DT, LI); + LoopVersioning LV(LAI, LAI.getRuntimePointerChecking()->getChecks(), TopLoop, + LI, DT, SE); + + LV.versionLoopWithPlainRuntimeCheck(); + + RuntimeCheckBB = LV.getRuntimeCheckBB(); + FallBackLoop = LV.getNonVersionedLoop(); +} + bool LoopIdiomRecognize::runOnNoncountableLoop() { LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F[" << CurLoop->getHeader()->getParent()->getName() diff --git a/llvm/test/Transforms/LoopIdiom/memset-runtime-lir.ll b/llvm/test/Transforms/LoopIdiom/memset-runtime-lir.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopIdiom/memset-runtime-lir.ll @@ -0,0 +1,237 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes="function(loop(loop-idiom,loop-deletion),simplifycfg)" < %s -S | FileCheck %s +; The C code to generate this testcase: +; void test(int ar[][m], long n, long m) +; { +; long i; +; for (i=0; i