Index: llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -106,6 +106,7 @@ STATISTIC(NumMemSet, "Number of memset's formed from loop stores"); STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores"); +STATISTIC(NumStrLen, "Number of strlen's formed from loop loads"); bool DisableLIRP::All; static cl::opt @@ -136,6 +137,11 @@ "with -Os/-Oz"), cl::init(true), cl::Hidden); +static cl::opt DisableStrlenIdiom( + "disable-strlen-idiom", + cl::desc("Disable strlen loop idiom recognize and transform"), + cl::init(false), cl::Hidden); + namespace { class LoopIdiomRecognize { @@ -226,6 +232,7 @@ Value *Var, Instruction *DefX, const DebugLoc &DL, bool ZeroCheck, bool IsCntPhiUsedOutsideLoop); + bool recognizeAndInsertStrlen(); /// @} }; @@ -1228,7 +1235,8 @@ << "] Noncountable Loop %" << CurLoop->getHeader()->getName() << "\n"); - return recognizePopcount() || recognizeAndInsertFFS(); + return recognizePopcount() || recognizeAndInsertFFS() || + recognizeAndInsertStrlen(); } /// Check if the given conditional branch is based on the comparison between @@ -1600,6 +1608,118 @@ return true; } +/// Recognizes a strlen idiom by checking for loops that either keep +/// a variable counter or increment a char pointer and then subtract +/// with the base pointer. +/// +/// If detected, transforms the relevant code to a strlen function +/// call, and returns true; otherwise, returns false. +bool LoopIdiomRecognize::recognizeAndInsertStrlen() { + if (DisableStrlenIdiom) + return false; + + // Give up if the loop has multiple blocks or multiple backedges. + if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1) + return false; + + // It should have a preheader containing nothing but an unconditional branch. + BasicBlock *PH = CurLoop->getLoopPreheader(); + if (!PH || &PH->front() != PH->getTerminator()) + return false; + + // It should have a precondition block + auto *PreCondBB = PH->getSinglePredecessor(); + if (!PreCondBB) + return false; + + auto *PreCondBI = dyn_cast(PreCondBB->getTerminator()); + if (!PreCondBI || PreCondBI->isUnconditional()) + return false; + + // The loop exit must be conditioned on an icmp with 0. + // The icmp operand has to be a load on some SSA reg that increments + // by 1 in the loop. + BasicBlock *LoopBody = *(CurLoop->block_begin()); + + auto *LoopTerm = dyn_cast(LoopBody->getTerminator()); + auto *LoopCond = matchCondition(LoopTerm, LoopBody); + if (!LoopCond) + return false; + auto *LoopLoad = dyn_cast(LoopCond); + if (!LoopLoad || LoopLoad->getPointerAddressSpace() != 0) + return false; + + // See if the pointer expression is an AddRec with step 1 ({n,+,1}) on + // the loop, indicating strlen calculation. + auto *IncPtr = LoopLoad->getPointerOperand(); + const SCEVAddRecExpr *LoadEv = dyn_cast(SE->getSCEV(IncPtr)); + if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine()) + return false; + const SCEV *StepRec = LoadEv->getStepRecurrence(*SE); + if (!StepRec || !StepRec->isOne()) + return false; + + // Scan every instruction in the loop to ensure there are no side effects. + for (auto &I : *LoopBody) + if (I.mayHaveSideEffects()) + return false; + + // Check that the loop exit block is valid: + // It needs to have exactly one LCSSA Phi which is an AddRec. + auto *LoopExitBB = + (LoopTerm->getSuccessor(0) == LoopBody ? LoopTerm->getSuccessor(1) + : LoopTerm->getSuccessor(0)); + PHINode *LCSSAPhi = dyn_cast(LoopExitBB->begin()); + if (!LCSSAPhi || LCSSAPhi->getNumIncomingValues() != 1) + return false; + + const SCEVAddRecExpr *LCSSAEv = + dyn_cast(SE->getSCEV(LCSSAPhi->getIncomingValue(0))); + if (!LCSSAEv || !LCSSAEv->isAffine()) + return false; + + auto *CleanupBB = + (PreCondBI->getSuccessor(0) == PH ? PreCondBI->getSuccessor(1) + : PreCondBI->getSuccessor(0)); + Value *ResInst = nullptr; + for (auto &Phi : CleanupBB->phis()) { + if (Phi.getBasicBlockIndex(LoopExitBB) != 1) { + ResInst = Phi.getIncomingValueForBlock(LoopExitBB); + break; + } + } + if (!ResInst) + return false; + + // We can now expand the base of the str and transform the loop + SCEVExpander Expander(*SE, *DL, "loop-idiom"); + SCEVExpanderCleaner ExpCleaner(Expander, *DT); + IRBuilder<> Builder(PH->getTerminator()); + + Value *Expanded = Expander.expandCodeFor( + LCSSAEv->getStart(), + Builder.getInt8PtrTy(LoopLoad->getPointerAddressSpace()), + PH->getTerminator()); + + Value *LibCall = emitStrLen(Expanded, Builder, *DL, TLI); + Value *Strlen = Builder.CreateZExtOrTrunc(LibCall, ResInst->getType()); + ResInst->replaceAllUsesWith(Strlen); + + // Remove the loop-exit branch and delete death instructions + RecursivelyDeleteTriviallyDeadInstructions(ResInst, TLI); + ConstantInt *NewLoopCond = LoopTerm->getSuccessor(0) == LoopBody + ? Builder.getFalse() + : Builder.getTrue(); + LoopTerm->setCondition(NewLoopCond); + deleteDeadInstruction(cast(LoopCond)); + deleteDeadInstruction(cast(IncPtr)); + SE->forgetLoop(CurLoop); + + ++NumStrLen; + ExpCleaner.markResultUsed(); + return true; +} + /// Recognizes a population count idiom in a non-countable loop. /// /// If detected, transforms the relevant code to issue the popcount intrinsic Index: llvm/test/Transforms/LoopIdiom/recognize-strlen.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopIdiom/recognize-strlen.ll @@ -0,0 +1,61 @@ +; RUN: opt -loop-idiom < %s -S | FileCheck %s +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +define i64 @valid_strlen1(i8* %Str) { +entry: + %tobool = icmp eq i8* %Str, null + br i1 %tobool, label %cleanup, label %lor.lhs.false + +lor.lhs.false: ; preds = %entry + %0 = load i8, i8* %Str, align 1 + %cmp = icmp eq i8 %0, 0 + br i1 %cmp, label %cleanup, label %for.inc + +; CHECK: call i64 @strlen(i8* %scevgep) +; CHECK: br i1 true +for.inc: ; preds = %lor.lhs.false, %for.inc + %Src.09 = phi i8* [ %incdec.ptr, %for.inc ], [ %Str, %lor.lhs.false ] + %incdec.ptr = getelementptr inbounds i8, i8* %Src.09, i64 1 + %.pr = load i8, i8* %incdec.ptr, align 1 + %tobool2 = icmp eq i8 %.pr, 0 + br i1 %tobool2, label %for.end, label %for.inc + +; CHECK-NOT: sub +for.end: ; preds = %for.inc + %sub.ptr.lhs.cast = ptrtoint i8* %incdec.ptr to i64 + %sub.ptr.rhs.cast = ptrtoint i8* %Str to i64 + %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast + br label %cleanup + +cleanup: ; preds = %lor.lhs.false, %entry, %for.end + %retval.0 = phi i64 [ %sub.ptr.sub, %for.end ], [ 0, %entry ], [ 0, %lor.lhs.false ] + ret i64 %retval.0 +} + + +define i64 @valid_strlen2(i8* %Str) { +entry: + %tobool = icmp eq i8* %Str, null + br i1 %tobool, label %cleanup, label %for.cond + +; CHECK: call i64 @strlen(i8* %Str) +; CHECK: br i1 true +for.cond: ; preds = %entry, %for.cond + %Src.0 = phi i8* [ %incdec.ptr, %for.cond ], [ %Str, %entry ] + %0 = load i8, i8* %Src.0, align 1 + %tobool1 = icmp eq i8 %0, 0 + %incdec.ptr = getelementptr inbounds i8, i8* %Src.0, i64 1 + br i1 %tobool1, label %for.end, label %for.cond + +; CHECK-NOT: sub +for.end: ; preds = %for.cond + %sub.ptr.lhs.cast = ptrtoint i8* %Src.0 to i64 + %sub.ptr.rhs.cast = ptrtoint i8* %Str to i64 + %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast + br label %cleanup + + cleanup: ; preds = %entry, %for.end + %retval.0 = phi i64 [ %sub.ptr.sub, %for.end ], [ 0, %entry ] + ret i64 %retval.0 +}