Index: llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -79,6 +79,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" @@ -101,11 +102,13 @@ #include using namespace llvm; +using namespace llvm::PatternMatch; #define DEBUG_TYPE "loop-idiom" STATISTIC(NumMemSet, "Number of memset's formed from loop stores"); STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores"); +STATISTIC(NumStrLen, "Number of strlen's formed from loop loads"); static cl::opt UseLIRCodeSizeHeurs( "use-lir-code-size-heurs", @@ -113,6 +116,11 @@ "with -Os/-Oz"), cl::init(true), cl::Hidden); +static cl::opt DisableStrLenIdiom( + "disable-strlen-idiom", + cl::desc("Disable strlen loop idiom recognize and transform"), + cl::init(false), cl::Hidden); + namespace { class LoopIdiomRecognize { @@ -204,6 +212,7 @@ const DebugLoc &DL, bool ZeroCheck, bool IsCntPhiUsedOutsideLoop); + bool recognizeAndInsertStrLen(); /// @} }; @@ -1203,12 +1212,44 @@ << "] Noncountable Loop %" << CurLoop->getHeader()->getName() << "\n"); - return recognizePopcount() || recognizeAndInsertFFS(); + return recognizePopcount() || recognizeAndInsertFFS() || + recognizeAndInsertStrLen(); +} + +/// getCandidateSubInstr - Return sub instruction if there is a pointer +/// difference of \p EndAddress and \p StartAddress calculated that is also the +/// only use of EndAddress, else return nullptr to the caller. +static Instruction *getCandidateSubInstr(Instruction *EndAddress, + Value *StartAddress) { + // The pointer to the end address should only have one use which is a pointer + // to int instruction. + if (!EndAddress->hasOneUse()) + return nullptr; + + auto &U = *EndAddress->use_begin(); + if (PtrToIntInst *PToI = dyn_cast(U.getUser())) { + if (!PToI->hasOneUse()) + return nullptr; + Use &PToIUse = *PToI->use_begin(); + + // The only user of the PtrToIntInst should be the sub instruction that + // calculates the difference b/w the two pointer operands. + Instruction *Inst = dyn_cast(PToIUse.getUser()); + if (Inst->getOpcode() != Instruction::Sub || Inst->getOperand(0) != PToI) + return nullptr; + if (match(Inst->getOperand(1), m_PtrToInt(m_Value(StartAddress)))) { + // The candidate sub instruction is found, update the pointer to pass + // back to the caller. + return Inst; + } + } + + return nullptr; } /// Check if the given conditional branch is based on the comparison between -/// a variable and zero, and if the variable is non-zero or zero (JmpOnZero is -/// true), the control yields to the loop entry. If the branch matches the +/// a variable and zero/null, and if the variable is non-zero or zero (JmpOnZero +/// is true), the control yields to the loop entry. If the branch matches the /// behavior, the variable involved in the comparison is returned. This function /// will be called to see if the precondition and postcondition of the loop are /// in desirable form. @@ -1221,9 +1262,11 @@ if (!Cond) return nullptr; - ConstantInt *CmpZero = dyn_cast(Cond->getOperand(1)); - if (!CmpZero || !CmpZero->isZero()) - return nullptr; + if (!isa(Cond->getOperand(1))) { + ConstantInt *CmpZero = dyn_cast(Cond->getOperand(1)); + if (!CmpZero || !CmpZero->isZero()) + return nullptr; + } BasicBlock *TrueSucc = BI->getSuccessor(0); BasicBlock *FalseSucc = BI->getSuccessor(1); @@ -1575,6 +1618,162 @@ return true; } +/// Recognizes a strlen idiom by checking for loops that increment +/// a char pointer and then subtract with the base pointer. +/// +/// If detected, transforms the relevant code to a strlen function +/// call, and returns true; otherwise, returns false. +/// +/// The core idiom we are trying to detect is: +/// \code +/// if (str == NULL) +/// goto loop-exit // the precondition of the loop +/// start = str; +/// do { +/// str++; +/// } while(*str!='\0'); +/// return (str - start); +/// loop-exit: +/// \endcode +/// +/// The transformed output is similar to below c-code: +/// \code +/// if (str == NULL) +/// goto loop-exit // the precondition of the loop +/// return strlen(str); +/// \endcode +bool LoopIdiomRecognize::recognizeAndInsertStrLen() { + if (DisableStrLenIdiom) + return false; + + // Give up if the loop has multiple blocks or multiple backedges. + if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1) + return false; + + // It should have a preheader containing nothing but an unconditional branch. + auto *Pre = CurLoop->getLoopPreheader(); + if (!Pre || &Pre->front() != Pre->getTerminator()) + return false; + + auto *EntryBI = dyn_cast(Pre->getTerminator()); + if (!EntryBI || EntryBI->isConditional()) + return false; + + // It should have a precondition block + auto *PreCondBB = Pre->getSinglePredecessor(); + if (!PreCondBB) + return false; + + // The precondition terminator instruction should skip the loop body based on + // an icmp with zero/null. + if (!matchCondition(dyn_cast(PreCondBB->getTerminator()), Pre)) + return false; + + // The loop exit must be conditioned on an icmp with 0. + // The icmp operand has to be a load on some SSA reg that increments + // by 1 in the loop. + auto *LoopBody = *(CurLoop->block_begin()); + auto *LoopTerm = dyn_cast(LoopBody->getTerminator()); + auto *LoopCond = matchCondition(LoopTerm, LoopBody); + + if (!LoopCond) + return false; + + auto *LoopLoad = dyn_cast(LoopCond); + if (!LoopLoad || !LoopLoad->getType()->isIntegerTy(8) || + LoopLoad->getPointerAddressSpace() != 0) + return false; + + // See if the pointer expression is an AddRec with step 1 ({n,+,1}) on + // the loop, indicating strlen calculation. + auto *IncPtr = LoopLoad->getPointerOperand(); + const SCEVAddRecExpr *LoadEv = dyn_cast(SE->getSCEV(IncPtr)); + if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine()) + return false; + auto *StepRec = LoadEv->getStepRecurrence(*SE); + if (!StepRec || !StepRec->isOne()) + return false; + + // Scan every instruction in the loop to ensure there are no side effects. + for (auto &I : *LoopBody) + if (I.mayHaveSideEffects()) + return false; + + auto *LoopExitBB = CurLoop->getExitBlock(); + if (!LoopExitBB) + return false; + + // Check that the loop exit block is valid: + // It needs to have exactly one LCSSA Phi which is an AddRec. + PHINode *LCSSAPhi = nullptr; + for (PHINode &PN : LoopExitBB->phis()) { + if (!LCSSAPhi && PN.getNumIncomingValues() == 1) + LCSSAPhi = &PN; + else + return false; + } + + if (!LCSSAPhi || !SE->isSCEVable(LCSSAPhi->getType())) + return false; + + const SCEVAddRecExpr *LCSSAEv = + dyn_cast(SE->getSCEV(LCSSAPhi->getIncomingValue(0))); + + if (!LCSSAEv || !dyn_cast(SE->getPointerBase(LCSSAEv)) || + !LCSSAEv->isAffine()) + return false; + + // We can now expand the base of the str + SCEVExpander Expander(*SE, *DL, "loop-idiom"); + ExpandedValuesCleaner EVC(Expander, TLI); + IRBuilder<> Builder(Pre->getTerminator()); + + Value *Expanded = Expander.expandCodeFor( + LoadEv->getStart(), + Builder.getInt8PtrTy(LoopLoad->getPointerAddressSpace()), + Pre->getTerminator()); + + if (!Expanded) + return false; + + // Check that the LoopExitBB is calculating the string length + Instruction *ResInst = getCandidateSubInstr(LCSSAPhi, Expanded); + if (!ResInst) + return false; + + Value *StrLen = emitStrLen(Expanded, Builder, *DL, TLI); + if (!StrLen) + return false; + + EVC.add(Expanded); + EVC.commit(); + + // Replace the subtraction instruction by the result of strlen + ResInst->replaceAllUsesWith(StrLen); + + // Remove the loop-exit branch and delete dead instructions + RecursivelyDeleteTriviallyDeadInstructions(ResInst, TLI); + + ConstantInt *NewLoopCond = LoopTerm->getSuccessor(0) == LoopBody + ? Builder.getFalse() + : Builder.getTrue(); + LoopTerm->setCondition(NewLoopCond); + deleteDeadInstruction(cast(LoopCond)); + deleteDeadInstruction(cast(IncPtr)); + SE->forgetLoop(CurLoop); + + LLVM_DEBUG(dbgs() << " Formed strlen: " << *StrLen << "\n"); + + ORE.emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "recognizeAndInsertStrLen", + CurLoop->getStartLoc(), Pre) + << "Transformed pointer difference into a call to strlen() function"; + }); + + ++NumStrLen; + return true; +} + /// Recognizes a population count idiom in a non-countable loop. /// /// If detected, transforms the relevant code to issue the popcount intrinsic Index: llvm/test/Transforms/LoopIdiom/recognize-strlen.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopIdiom/recognize-strlen.ll @@ -0,0 +1,150 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -loop-idiom < %s -S | FileCheck %s +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +define i64 @valid_strlen1(i8* %Str) { +; CHECK-LABEL: @valid_strlen1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i8* [[STR:%.*]], null +; CHECK-NEXT: br i1 [[TOBOOL]], label [[CLEANUP:%.*]], label [[LOR_LHS_FALSE:%.*]] +; CHECK: lor.lhs.false: +; CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[STR]], align 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[CLEANUP]], label [[FOR_INC_PREHEADER:%.*]] +; CHECK: for.inc.preheader: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[STR]], i64 1 +; CHECK-NEXT: [[STRLEN:%.*]] = call i64 @strlen(i8* [[SCEVGEP]]) +; CHECK-NEXT: br label [[FOR_INC:%.*]] +; CHECK: for.inc: +; CHECK-NEXT: [[SRC_09:%.*]] = phi i8* [ undef, [[FOR_INC]] ], [ [[STR]], [[FOR_INC_PREHEADER]] ] +; CHECK-NEXT: [[TOBOOL2:%.*]] = icmp eq i8 undef, 0 +; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[FOR_INC]] +; CHECK: for.end: +; CHECK-NEXT: br label [[CLEANUP]] +; CHECK: cleanup: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i64 [ [[STRLEN]], [[FOR_END]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[LOR_LHS_FALSE]] ] +; CHECK-NEXT: ret i64 [[RETVAL_0]] +; +entry: + %tobool = icmp eq i8* %Str, null + br i1 %tobool, label %cleanup, label %lor.lhs.false + +lor.lhs.false: ; preds = %entry + %0 = load i8, i8* %Str, align 1 + %cmp = icmp eq i8 %0, 0 + br i1 %cmp, label %cleanup, label %for.inc + +for.inc: ; preds = %lor.lhs.false, %for.inc + %Src.09 = phi i8* [ %incdec.ptr, %for.inc ], [ %Str, %lor.lhs.false ] + %incdec.ptr = getelementptr inbounds i8, i8* %Src.09, i64 1 + %.pr = load i8, i8* %incdec.ptr, align 1 + %tobool2 = icmp eq i8 %.pr, 0 + br i1 %tobool2, label %for.end, label %for.inc + +for.end: ; preds = %for.inc + %sub.ptr.lhs.cast = ptrtoint i8* %incdec.ptr to i64 + %sub.ptr.rhs.cast = ptrtoint i8* %Str to i64 + %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast + br label %cleanup + +cleanup: ; preds = %lor.lhs.false, %entry, %for.end + %retval.0 = phi i64 [ %sub.ptr.sub, %for.end ], [ 0, %entry ], [ 0, %lor.lhs.false ] + ret i64 %retval.0 +} + + +define i64 @valid_strlen2(i8* %Str) { +; CHECK-LABEL: @valid_strlen2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i8* [[STR:%.*]], null +; CHECK-NEXT: br i1 [[TOBOOL]], label [[CLEANUP:%.*]], label [[FOR_COND_PREHEADER:%.*]] +; CHECK: for.cond.preheader: +; CHECK-NEXT: [[STRLEN:%.*]] = call i64 @strlen(i8* [[STR]]) +; CHECK-NEXT: br label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[TOBOOL1:%.*]] = icmp eq i8 undef, 0 +; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i8, i8* undef, i64 1 +; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[FOR_COND]] +; CHECK: for.end: +; CHECK-NEXT: br label [[CLEANUP]] +; CHECK: cleanup: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i64 [ [[STRLEN]], [[FOR_END]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i64 [[RETVAL_0]] +; +entry: + %tobool = icmp eq i8* %Str, null + br i1 %tobool, label %cleanup, label %for.cond + +for.cond: ; preds = %entry, %for.cond + %Src.0 = phi i8* [ %incdec.ptr, %for.cond ], [ %Str, %entry ] + %0 = load i8, i8* %Src.0, align 1 + %tobool1 = icmp eq i8 %0, 0 + %incdec.ptr = getelementptr inbounds i8, i8* %Src.0, i64 1 + br i1 %tobool1, label %for.end, label %for.cond + +for.end: ; preds = %for.cond + %sub.ptr.lhs.cast = ptrtoint i8* %Src.0 to i64 + %sub.ptr.rhs.cast = ptrtoint i8* %Str to i64 + %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast + br label %cleanup + + cleanup: ; preds = %entry, %for.end + %retval.0 = phi i64 [ %sub.ptr.sub, %for.end ], [ 0, %entry ] + ret i64 %retval.0 +} + + +define void @invalid_strlen3(i8* %s, i32 zeroext %i) { +; CHECK-LABEL: @invalid_strlen3( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[WHILE_COND:%.*]] +; CHECK: while.cond: +; CHECK-NEXT: [[S_ADDR_0:%.*]] = phi i8* [ [[S:%.*]], [[ENTRY:%.*]] ], [ [[INCDEC_PTR1:%.*]], [[WHILE_COND]] ] +; CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[S_ADDR_0]], align 1 +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i8 [[TMP0]], 0 +; CHECK-NEXT: [[INCDEC_PTR1]] = getelementptr inbounds i8, i8* [[S_ADDR_0]], i64 1 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[WHILE_END:%.*]], label [[WHILE_COND]] +; CHECK: while.end: +; CHECK-NEXT: [[S_ADDR_0_LCSSA:%.*]] = phi i8* [ [[S_ADDR_0]], [[WHILE_COND]] ] +; CHECK-NEXT: [[INCDEC_PTR1_LCSSA:%.*]] = phi i8* [ [[INCDEC_PTR1]], [[WHILE_COND]] ] +; CHECK-NEXT: store i8 45, i8* [[S_ADDR_0_LCSSA]], align 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I:%.*]], 10 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: store i8 65, i8* [[INCDEC_PTR1_LCSSA]], align 1 +; CHECK-NEXT: br label [[IF_END9:%.*]] +; CHECK: if.end: +; CHECK-NEXT: store i8 66, i8* [[INCDEC_PTR1_LCSSA]], align 1 +; CHECK-NEXT: br label [[IF_END9]] +; CHECK: if.end9: +; CHECK-NEXT: ret void +; +entry: + br label %while.cond + +while.cond: ; preds = %while.cond, %entry + %s.addr.0 = phi i8* [ %s, %entry ], [ %incdec.ptr1, %while.cond ] + %0 = load i8, i8* %s.addr.0, align 1 + %tobool.not = icmp eq i8 %0, 0 + %incdec.ptr1 = getelementptr inbounds i8, i8* %s.addr.0, i64 1 + br i1 %tobool.not, label %while.end, label %while.cond + +while.end: ; preds = %while.cond + %s.addr.0.lcssa = phi i8* [ %s.addr.0, %while.cond ] + %incdec.ptr1.lcssa = phi i8* [ %incdec.ptr1, %while.cond ] + store i8 45, i8* %s.addr.0.lcssa, align 1 + %cmp = icmp ult i32 %i, 10 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %while.end + store i8 65, i8* %incdec.ptr1.lcssa, align 1 + br label %if.end9 + +if.end: ; preds = %while.end + store i8 66, i8* %incdec.ptr1.lcssa, align 1 + br label %if.end9 + +if.end9: ; preds = %if.end, %if.then + ret void +}