Index: llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -106,6 +106,7 @@ STATISTIC(NumMemSet, "Number of memset's formed from loop stores"); STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores"); +STATISTIC(NumStrLen, "Number of strlen's formed from loop loads"); static cl::opt UseLIRCodeSizeHeurs( "use-lir-code-size-heurs", @@ -113,6 +114,11 @@ "with -Os/-Oz"), cl::init(true), cl::Hidden); +static cl::opt DisableStrlenIdiom( + "disable-strlen-idiom", + cl::desc("Disable strlen loop idiom recognize and transform"), + cl::init(false), cl::Hidden); + namespace { class LoopIdiomRecognize { @@ -203,6 +209,7 @@ Value *Var, Instruction *DefX, const DebugLoc &DL, bool ZeroCheck, bool IsCntPhiUsedOutsideLoop); + bool recognizeAndInsertStrlen(); /// @} }; @@ -1203,7 +1210,8 @@ << "] Noncountable Loop %" << CurLoop->getHeader()->getName() << "\n"); - return recognizePopcount() || recognizeAndInsertFFS(); + return recognizePopcount() || recognizeAndInsertFFS() || + recognizeAndInsertStrlen(); } /// Check if the given conditional branch is based on the comparison between @@ -1575,6 +1583,153 @@ return true; } +/// Recognizes a strlen idiom by checking for loops that increment +/// a char pointer and then subtract with the base pointer. +/// +/// If detected, transforms the relevant code to a strlen function +/// call, and returns true; otherwise, returns false. +bool LoopIdiomRecognize::recognizeAndInsertStrlen() { + if (DisableStrlenIdiom) + return false; + + // Give up if the loop has multiple blocks or multiple backedges. + if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1) + return false; + + // It should have a preheader containing nothing but an unconditional branch. + BasicBlock *PH = CurLoop->getLoopPreheader(); + if (!PH || &PH->front() != PH->getTerminator()) + return false; + + auto *EntryBI = dyn_cast(PH->getTerminator()); + if (!EntryBI || EntryBI->isConditional()) + return false; + + // It should have a precondition block + auto *PreCondBB = PH->getSinglePredecessor(); + if (!PreCondBB) + return false; + + auto *PreCondBI = dyn_cast(PreCondBB->getTerminator()); + if (!PreCondBI || PreCondBI->isUnconditional()) + return false; + + // The loop exit must be conditioned on an icmp with 0. + // The icmp operand has to be a load on some SSA reg that increments + // by 1 in the loop. + BasicBlock *LoopBody = *(CurLoop->block_begin()); + auto *LoopTerm = dyn_cast(LoopBody->getTerminator()); + auto *LoopCond = matchCondition(LoopTerm, LoopBody); + + if (!LoopCond) + return false; + + auto *LoopLoad = dyn_cast(LoopCond); + if (!LoopLoad || !LoopLoad->getType()->isIntegerTy(8) || + LoopLoad->getPointerAddressSpace() != 0) + return false; + + // See if the pointer expression is an AddRec with step 1 ({n,+,1}) on + // the loop, indicating strlen calculation. + auto *IncPtr = LoopLoad->getPointerOperand(); + const SCEVAddRecExpr *LoadEv = dyn_cast(SE->getSCEV(IncPtr)); + if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine()) + return false; + const SCEV *StepRec = LoadEv->getStepRecurrence(*SE); + if (!StepRec || !StepRec->isOne()) + return false; + + // Scan every instruction in the loop to ensure there are no side effects. + for (auto &I : *LoopBody) + if (I.mayHaveSideEffects()) + return false; + + auto *LoopExitBB = + (LoopTerm->getSuccessor(0) == LoopBody ? LoopTerm->getSuccessor(1) + : LoopTerm->getSuccessor(0)); + + // Check that the loop exit block is valid: + // It needs to have exactly one LCSSA Phi which is an AddRec. + bool FirstUseFound = false; + PHINode *LCSSAPhi = NULL; + for (auto &I : *LoopExitBB) { + PHINode *PN = dyn_cast(&I); + if (PN) { + for (unsigned PredBB = 0, E = PN->getNumIncomingValues(); PredBB != E; + ++PredBB) { + if ((PN->getIncomingBlock(PredBB) == LoopLoad->getParent()) && + (PN->getNumIncomingValues() == 1)) { + if (!FirstUseFound) { + LCSSAPhi = PN; + FirstUseFound = true; + } else + return false; + } + } + } + } + + if (!LCSSAPhi) + return false; + + const SCEVAddRecExpr *LCSSAEv = + dyn_cast(SE->getSCEV(LCSSAPhi->getIncomingValue(0))); + + if (!LCSSAEv || !dyn_cast(SE->getPointerBase(LCSSAEv)) || + !LCSSAEv->isAffine()) + return false; + + // We can now expand the base of the str and transform the loop + SCEVExpander Expander(*SE, *DL, "loop-idiom"); + ExpandedValuesCleaner EVC(Expander, TLI); + IRBuilder<> Builder(PH->getTerminator()); + + Value *Expanded = Expander.expandCodeFor( + LoadEv->getStart(), + Builder.getInt8PtrTy(LoopLoad->getPointerAddressSpace()), + PH->getTerminator()); + + if (!Expanded) + return false; + + Value *Strlen = emitStrLen(Expanded, Builder, *DL, TLI); + + if (!Strlen) + return false; + + EVC.add(Expanded); + EVC.commit(); + + // Rewrite LCSSAPhi to point to (%Str + %strlen) + Builder.SetInsertPoint(&*LCSSAPhi->getParent()->getFirstInsertionPt()); + + auto *GEP = Builder.CreateGEP(Expanded, Strlen, "strlen-idiom"); + + LCSSAPhi->replaceAllUsesWith(GEP); + + // Remove the loop-exit branch and delete dead instructions + RecursivelyDeleteTriviallyDeadInstructions(LCSSAPhi, TLI); + + ConstantInt *NewLoopCond = LoopTerm->getSuccessor(0) == LoopBody + ? Builder.getFalse() + : Builder.getTrue(); + LoopTerm->setCondition(NewLoopCond); + deleteDeadInstruction(cast(LoopCond)); + deleteDeadInstruction(cast(IncPtr)); + SE->forgetLoop(CurLoop); + + LLVM_DEBUG(dbgs() << " Formed strlen: " << *Strlen << "\n"); + + ORE.emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "recognizeAndInsertStrlen", + CurLoop->getStartLoc(), PH) + << "Transformed pointer difference into a call to Strlen() function"; + }); + + ++NumStrLen; + return true; +} + /// Recognizes a population count idiom in a non-countable loop. /// /// If detected, transforms the relevant code to issue the popcount intrinsic @@ -1879,3 +2034,4 @@ // loop. The loop would otherwise not be deleted even if it becomes empty. SE->forgetLoop(CurLoop); } + Index: llvm/test/Transforms/LoopIdiom/recognize-strlen.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopIdiom/recognize-strlen.ll @@ -0,0 +1,158 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -loop-idiom < %s -S | FileCheck %s +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +define i64 @valid_strlen1(i8* %Str) { +; CHECK-LABEL: @valid_strlen1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i8* [[STR:%.*]], null +; CHECK-NEXT: br i1 [[TOBOOL]], label [[CLEANUP:%.*]], label [[LOR_LHS_FALSE:%.*]] +; CHECK: lor.lhs.false: +; CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[STR]], align 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[CLEANUP]], label [[FOR_INC_PREHEADER:%.*]] +; CHECK: for.inc.preheader: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[STR]], i64 1 +; CHECK-NEXT: [[STRLEN:%.*]] = call i64 @strlen(i8* [[SCEVGEP]]) +; CHECK-NEXT: br label [[FOR_INC:%.*]] +; CHECK: for.inc: +; CHECK-NEXT: [[SRC_09:%.*]] = phi i8* [ undef, [[FOR_INC]] ], [ [[STR]], [[FOR_INC_PREHEADER]] ] +; CHECK-NEXT: [[TOBOOL2:%.*]] = icmp eq i8 undef, 0 +; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[FOR_INC]] +; CHECK: for.end: +; CHECK-NEXT: [[STRLEN_IDIOM:%.*]] = getelementptr i8, i8* [[SCEVGEP]], i64 [[STRLEN]] +; CHECK-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint i8* [[STRLEN_IDIOM]] to i64 +; CHECK-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint i8* [[STR]] to i64 +; CHECK-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +; CHECK-NEXT: br label [[CLEANUP]] +; CHECK: cleanup: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i64 [ [[SUB_PTR_SUB]], [[FOR_END]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[LOR_LHS_FALSE]] ] +; CHECK-NEXT: ret i64 [[RETVAL_0]] +; +entry: + %tobool = icmp eq i8* %Str, null + br i1 %tobool, label %cleanup, label %lor.lhs.false + +lor.lhs.false: ; preds = %entry + %0 = load i8, i8* %Str, align 1 + %cmp = icmp eq i8 %0, 0 + br i1 %cmp, label %cleanup, label %for.inc + +for.inc: ; preds = %lor.lhs.false, %for.inc + %Src.09 = phi i8* [ %incdec.ptr, %for.inc ], [ %Str, %lor.lhs.false ] + %incdec.ptr = getelementptr inbounds i8, i8* %Src.09, i64 1 + %.pr = load i8, i8* %incdec.ptr, align 1 + %tobool2 = icmp eq i8 %.pr, 0 + br i1 %tobool2, label %for.end, label %for.inc + +for.end: ; preds = %for.inc + %sub.ptr.lhs.cast = ptrtoint i8* %incdec.ptr to i64 + %sub.ptr.rhs.cast = ptrtoint i8* %Str to i64 + %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast + br label %cleanup + +cleanup: ; preds = %lor.lhs.false, %entry, %for.end + %retval.0 = phi i64 [ %sub.ptr.sub, %for.end ], [ 0, %entry ], [ 0, %lor.lhs.false ] + ret i64 %retval.0 +} + + +define i64 @valid_strlen2(i8* %Str) { +; CHECK-LABEL: @valid_strlen2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i8* [[STR:%.*]], null +; CHECK-NEXT: br i1 [[TOBOOL]], label [[CLEANUP:%.*]], label [[FOR_COND_PREHEADER:%.*]] +; CHECK: for.cond.preheader: +; CHECK-NEXT: [[STRLEN:%.*]] = call i64 @strlen(i8* [[STR]]) +; CHECK-NEXT: br label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[TOBOOL1:%.*]] = icmp eq i8 undef, 0 +; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i8, i8* undef, i64 1 +; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[FOR_COND]] +; CHECK: for.end: +; CHECK-NEXT: [[STRLEN_IDIOM:%.*]] = getelementptr i8, i8* [[STR]], i64 [[STRLEN]] +; CHECK-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint i8* [[STRLEN_IDIOM]] to i64 +; CHECK-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint i8* [[STR]] to i64 +; CHECK-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] +; CHECK-NEXT: br label [[CLEANUP]] +; CHECK: cleanup: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i64 [ [[SUB_PTR_SUB]], [[FOR_END]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i64 [[RETVAL_0]] +; +entry: + %tobool = icmp eq i8* %Str, null + br i1 %tobool, label %cleanup, label %for.cond + +for.cond: ; preds = %entry, %for.cond + %Src.0 = phi i8* [ %incdec.ptr, %for.cond ], [ %Str, %entry ] + %0 = load i8, i8* %Src.0, align 1 + %tobool1 = icmp eq i8 %0, 0 + %incdec.ptr = getelementptr inbounds i8, i8* %Src.0, i64 1 + br i1 %tobool1, label %for.end, label %for.cond + +for.end: ; preds = %for.cond + %sub.ptr.lhs.cast = ptrtoint i8* %Src.0 to i64 + %sub.ptr.rhs.cast = ptrtoint i8* %Str to i64 + %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast + br label %cleanup + + cleanup: ; preds = %entry, %for.end + %retval.0 = phi i64 [ %sub.ptr.sub, %for.end ], [ 0, %entry ] + ret i64 %retval.0 +} + + +define void @invalid_strlen3(i8* %s, i32 zeroext %i) { +; CHECK-LABEL: @invalid_strlen3( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[WHILE_COND:%.*]] +; CHECK: while.cond: +; CHECK-NEXT: [[S_ADDR_0:%.*]] = phi i8* [ [[S:%.*]], [[ENTRY:%.*]] ], [ [[INCDEC_PTR1:%.*]], [[WHILE_COND]] ] +; CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[S_ADDR_0]], align 1 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i8 [[TMP0]], 0 +; CHECK-NEXT: [[INCDEC_PTR1]] = getelementptr inbounds i8, i8* [[S_ADDR_0]], i64 1 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END:%.*]], label [[WHILE_COND]] +; CHECK: while.end: +; CHECK-NEXT: [[S_ADDR_0_LCSSA:%.*]] = phi i8* [ [[S_ADDR_0]], [[WHILE_COND]] ] +; CHECK-NEXT: [[INCDEC_PTR1_LCSSA:%.*]] = phi i8* [ [[INCDEC_PTR1]], [[WHILE_COND]] ] +; CHECK-NEXT: store i8 45, i8* [[S_ADDR_0_LCSSA]], align 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I:%.*]], 10 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: store i8 65, i8* [[INCDEC_PTR1_LCSSA]], align 1 +; CHECK-NEXT: br label [[IF_END9:%.*]] +; CHECK: if.end: +; CHECK-NEXT: store i8 66, i8* [[INCDEC_PTR1_LCSSA]], align 1 +; CHECK-NEXT: br label [[IF_END9]] +; CHECK: if.end9: +; CHECK-NEXT: ret void +; +entry: + br label %while.cond + +while.cond: ; preds = %while.cond, %entry + %s.addr.0 = phi i8* [ %s, %entry ], [ %incdec.ptr1, %while.cond ] + %0 = load i8, i8* %s.addr.0, align 1 + %tobool.not = icmp eq i8 %0, 0 + %incdec.ptr1 = getelementptr inbounds i8, i8* %s.addr.0, i64 1 + br i1 %tobool.not, label %while.end, label %while.cond + +while.end: ; preds = %while.cond + %s.addr.0.lcssa = phi i8* [ %s.addr.0, %while.cond ] + %incdec.ptr1.lcssa = phi i8* [ %incdec.ptr1, %while.cond ] + store i8 45, i8* %s.addr.0.lcssa, align 1 + %cmp = icmp ult i32 %i, 10 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %while.end + store i8 65, i8* %incdec.ptr1.lcssa, align 1 + br label %if.end9 + +if.end: ; preds = %while.end + store i8 66, i8* %incdec.ptr1.lcssa, align 1 + br label %if.end9 + +if.end9: ; preds = %if.end, %if.then + ret void +}