diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -106,6 +106,7 @@ STATISTIC(NumMemSet, "Number of memset's formed from loop stores"); STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores"); +STATISTIC(NumStrLen, "Number of strlen's formed from loop loads"); static cl::opt UseLIRCodeSizeHeurs( "use-lir-code-size-heurs", @@ -113,6 +114,11 @@ "with -Os/-Oz"), cl::init(true), cl::Hidden); +static cl::opt DisableStrlenIdiom( + "disable-strlen-idiom", + cl::desc("Disable strlen loop idiom recognize and transform"), + cl::init(false), cl::Hidden); + namespace { class LoopIdiomRecognize { @@ -203,6 +209,7 @@ Value *Var, Instruction *DefX, const DebugLoc &DL, bool ZeroCheck, bool IsCntPhiUsedOutsideLoop); + bool recognizeAndInsertStrlen(); /// @} }; @@ -1203,7 +1210,8 @@ << "] Noncountable Loop %" << CurLoop->getHeader()->getName() << "\n"); - return recognizePopcount() || recognizeAndInsertFFS(); + return recognizePopcount() || recognizeAndInsertFFS() || + recognizeAndInsertStrlen(); } /// Check if the given conditional branch is based on the comparison between @@ -1575,6 +1583,120 @@ return true; } +/// Recognizes a strlen idiom by checking for loops that either keep +/// a variable counter or increment a char pointer and then subtract +/// with the base pointer. +/// +/// If detected, transforms the relevant code to a strlen function +/// call, and returns true; otherwise, returns false. +bool LoopIdiomRecognize::recognizeAndInsertStrlen() { + if (DisableStrlenIdiom) + return false; + + // Give up if the loop has multiple blocks or multiple backedges. + if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1) + return false; + + // It should have a preheader containing nothing but an unconditional branch. + BasicBlock *PH = CurLoop->getLoopPreheader(); + if (!PH || &PH->front() != PH->getTerminator()) + return false; + auto *EntryBI = dyn_cast(PH->getTerminator()); + if (!EntryBI || EntryBI->isConditional()) + return false; + + // It should have a precondition block where the generated strlen + // function call can be inserted. + auto *PreCondBB = PH->getSinglePredecessor(); + if (!PreCondBB) + return false; + auto *PreCondBI = dyn_cast(PreCondBB->getTerminator()); + if (!PreCondBI || PreCondBI->isUnconditional()) + return false; + + // The loop exit must be conditioned on an icmp with 0. + // The icmp operand has to be a load on some SSA reg that increments + // by 1 in the loop. + BasicBlock *LoopBody = *(CurLoop->block_begin()); + auto *LoopTerm = dyn_cast(LoopBody->getTerminator()); + auto *LoopCond = matchCondition(LoopTerm, LoopBody); + if (!LoopCond) + return false; + auto *LoopLoad = dyn_cast(LoopCond); + if (!LoopLoad) + return false; + + // See if the pointer expression is an AddRec with step 1 ({n,+,1}) on + // the loop, indicating strlen calculation. + auto *IncPtr = LoopLoad->getPointerOperand(); + const SCEVAddRecExpr *LoadEv = dyn_cast(SE->getSCEV(IncPtr)); + if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine()) + return false; + const SCEV *StepRec = LoadEv->getStepRecurrence(*SE); + if (!StepRec || !StepRec->isOne()) + return false; + + // Scan every instruction in the loop to ensure there are no side effects. + for (BasicBlock::iterator I = LoopBody->begin(), E = LoopBody->end(); + I != E;) { + Instruction *Inst = &*I++; + if (Inst->mayHaveSideEffects()) + return false; + } + + // Check that the loop exit block is valid: + // It needs to have one LCSSA Phi + auto *LoopExitBB = + (LoopTerm->getSuccessor(0) == LoopBody ? LoopTerm->getSuccessor(1) + : LoopTerm->getSuccessor(0)); + PHINode *LCSSAPhi = dyn_cast(LoopExitBB->begin()); + if (!LCSSAPhi || LCSSAPhi->getNumIncomingValues() != 1) + return false; + + // Find the 'sub' instruction that computes the strlen. First we need + // the base pointer to check the operands of the 'sub'. + auto *BasePtrSCEV = dyn_cast(SE->getPointerBase(LoadEv)); + Value *BasePtr = BasePtrSCEV->getValue(); + auto *SubLHS = dyn_cast(LoopExitBB->getFirstNonPHIOrDbg()); + if (!SubLHS || !SubLHS->hasOneUse()) + return false; + auto *ResInst = dyn_cast(SubLHS->user_back()); + if (!ResInst || ResInst->getOpcode() != Instruction::Sub) + return false; + auto *SubRHS = dyn_cast(ResInst->getOperand(1)); + if (!SubRHS || SubRHS->getPointerOperand() != BasePtr) + return false; + + // We can now expand the base of the str and transform the loop + SCEVExpander Expander(*SE, *DL, "loop-idiom"); + ExpandedValuesCleaner EVC(Expander, TLI); + IRBuilder<> Builder(PH->getTerminator()); + + const SCEV *StrBaseSCEV = LoadEv->getStart(); + Value *Expanded = Expander.expandCodeFor( + StrBaseSCEV, Builder.getInt8PtrTy(LoopLoad->getPointerAddressSpace()), + PH->getTerminator()); + EVC.add(Expanded); + EVC.commit(); + + Value *FuncCall = emitStrLen(Expanded, Builder, *DL, TLI); + Value *Strlen = Builder.CreateZExtOrTrunc(FuncCall, ResInst->getType()); + ResInst->replaceAllUsesWith(Strlen); + + // Remove the loop-exit branch and delete death instructions + RecursivelyDeleteTriviallyDeadInstructions(ResInst, TLI); + ConstantInt *NewLoopCond = LoopTerm->getSuccessor(0) == LoopBody + ? Builder.getFalse() + : Builder.getTrue(); + LoopTerm->setCondition(NewLoopCond); + deleteDeadInstruction(cast(LoopCond)); + deleteDeadInstruction(cast(IncPtr)); + SE->forgetLoop(CurLoop); + + ++NumStrLen; + return true; +} + /// Recognizes a population count idiom in a non-countable loop. /// /// If detected, transforms the relevant code to issue the popcount intrinsic diff --git a/llvm/test/Transforms/LoopIdiom/recognize-strlen.ll b/llvm/test/Transforms/LoopIdiom/recognize-strlen.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopIdiom/recognize-strlen.ll @@ -0,0 +1,95 @@ +; RUN: opt -loop-idiom < %s -S | FileCheck %s +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +define i64 @valid_strlen1(i8* %Str) { +entry: + %tobool = icmp eq i8* %Str, null + br i1 %tobool, label %cleanup, label %lor.lhs.false + +lor.lhs.false: ; preds = %entry + %0 = load i8, i8* %Str, align 1 + %cmp = icmp eq i8 %0, 0 + br i1 %cmp, label %cleanup, label %for.inc + +; CHECK: call i64 @strlen(i8* %Str) +; CHECK: br i1 true +for.inc: ; preds = %lor.lhs.false, %for.inc + %Src.09 = phi i8* [ %incdec.ptr, %for.inc ], [ %Str, %lor.lhs.false ] + %incdec.ptr = getelementptr inbounds i8, i8* %Src.09, i64 1 + %.pr = load i8, i8* %incdec.ptr, align 1 + %tobool2 = icmp eq i8 %.pr, 0 + br i1 %tobool2, label %for.end, label %for.inc + +; CHECK-NOT: sub +for.end: ; preds = %for.inc + %sub.ptr.lhs.cast = ptrtoint i8* %incdec.ptr to i64 + %sub.ptr.rhs.cast = ptrtoint i8* %Str to i64 + %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast + br label %cleanup + +cleanup: ; preds = %lor.lhs.false, %entry, %for.end + %retval.0 = phi i64 [ %sub.ptr.sub, %for.end ], [ 0, %entry ], [ 0, %lor.lhs.false ] + ret i64 %retval.0 +} + + +define i64 @valid_strlen2(i8* %Str) { +entry: + %tobool = icmp eq i8* %Str, null + br i1 %tobool, label %cleanup, label %for.cond + +; CHECK: call i64 @strlen(i8* %Str) +; CHECK: br i1 true +for.cond: ; preds = %entry, %for.cond + %Src.0 = phi i8* [ %incdec.ptr, %for.cond ], [ %Str, %entry ] + %0 = load i8, i8* %Src.0, align 1 + %tobool1 = icmp eq i8 %0, 0 + %incdec.ptr = getelementptr inbounds i8, i8* %Src.0, i64 1 + br i1 %tobool1, label %for.end, label %for.cond + +; CHECK-NOT: sub +for.end: ; preds = %for.cond + %sub.ptr.lhs.cast = ptrtoint i8* %Src.0 to i64 + %sub.ptr.rhs.cast = ptrtoint i8* %Str to i64 + %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast + br label %cleanup + + cleanup: ; preds = %entry, %for.end + %retval.0 = phi i64 [ %sub.ptr.sub, %for.end ], [ 0, %entry ] + ret i64 %retval.0 +} + + +define zeroext i32 @valid_strlen3(i8* %Str) { +entry: + %tobool = icmp eq i8* %Str, null + br i1 %tobool, label %cleanup, label %lor.lhs.false + +lor.lhs.false: ; preds = %entry + %0 = load i8, i8* %Str, align 1 + %cmp = icmp eq i8 %0, 0 + br i1 %cmp, label %cleanup, label %for.inc + +; CHECK: call i64 @strlen(i8* %Str) +; CHECK: br i1 true +for.inc: ; preds = %lor.lhs.false, %for.inc + %Src.010 = phi i8* [ %incdec.ptr, %for.inc ], [ %Str, %lor.lhs.false ] + %incdec.ptr = getelementptr inbounds i8, i8* %Src.010, i64 1 + %.pr = load i8, i8* %incdec.ptr, align 1 + %tobool2 = icmp eq i8 %.pr, 0 + br i1 %tobool2, label %for.end, label %for.inc + +; CHECK: trunc i64 %strlen to i32 +; CHECK-NOT: sub +for.end: ; preds = %for.inc + %sub.ptr.lhs.cast = ptrtoint i8* %incdec.ptr to i64 + %sub.ptr.rhs.cast = ptrtoint i8* %Str to i64 + %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast + %conv3 = trunc i64 %sub.ptr.sub to i32 + br label %cleanup + +cleanup: ; preds = %lor.lhs.false, %entry, %for.end + %retval.0 = phi i32 [ %conv3, %for.end ], [ 0, %entry ], [ 0, %lor.lhs.false ] + ret i32 %retval.0 +}