diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -79,6 +79,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" @@ -101,6 +102,7 @@ #include using namespace llvm; +using namespace llvm::PatternMatch; #define DEBUG_TYPE "loop-idiom" @@ -113,6 +115,11 @@ "with -Os/-Oz"), cl::init(true), cl::Hidden); +static cl::opt DisableStrlenIdiom( + "disable-strlen-idiom", + cl::desc("Disable strlen loop idiom recognize and transform"), + cl::init(false), cl::Hidden); + namespace { class LoopIdiomRecognize { @@ -203,6 +210,7 @@ Value *Var, Instruction *DefX, const DebugLoc &DL, bool ZeroCheck, bool IsCntPhiUsedOutsideLoop); + bool recognizeAndInsertStrlen(); /// @} }; @@ -1203,7 +1211,8 @@ << "] Noncountable Loop %" << CurLoop->getHeader()->getName() << "\n"); - return recognizePopcount() || recognizeAndInsertFFS(); + return recognizePopcount() || recognizeAndInsertFFS() || + recognizeAndInsertStrlen(); } /// Check if the given conditional branch is based on the comparison between @@ -1575,6 +1584,211 @@ return true; } +/// Recognizes a strlen idiom by checking for loops that either keep +/// a variable counter or increment a char pointer and then subtract +/// with the base pointer. +/// +/// If detected, transforms the relevant code to a strlen function +/// call, and returns true; otherwise, returns false. +bool LoopIdiomRecognize::recognizeAndInsertStrlen() { + if (DisableStrlenIdiom) + return false; + + // Give up if the loop has multiple blocks or multiple backedges. + if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1) + return false; + + BasicBlock *LoopBody = *(CurLoop->block_begin()); + // The strlen loop idiom can only have a maximum of 7 instructions: + // this arises when there is an increment variable in the loop. + const unsigned MaxLoopSize = 7; + if (LoopBody->size() > MaxLoopSize) + return false; + + // It should have a preheader containing nothing but an unconditional branch. + BasicBlock *PH = CurLoop->getLoopPreheader(); + if (!PH || &PH->front() != PH->getTerminator()) + return false; + auto *EntryBI = dyn_cast(PH->getTerminator()); + if (!EntryBI || EntryBI->isConditional()) + return false; + + // It should have a precondition block where the generated strlen intrinsic + // function can be inserted. + auto *PreCondBB = PH->getSinglePredecessor(); + if (!PreCondBB) + return false; + auto *PreCondBI = dyn_cast(PreCondBB->getTerminator()); + if (!PreCondBI || PreCondBI->isUnconditional()) + return false; + + // The loop exit must be conditioned on an icmp with 0. + // The icmp operand has to be a load on some SSA reg that increments + // by 1 in the loop. + auto *LoopTerm = dyn_cast(LoopBody->getTerminator()); + auto *LoopCond = matchCondition(LoopTerm, LoopBody); + if (!LoopCond || !LoopCond->hasOneUse()) + return false; + auto *LoopLoad = dyn_cast(LoopCond); + if (!LoopLoad) + return false; + + // Check that the vregs (phi and gep) are of valid form. + auto *IncPtr = LoopLoad->getPointerOperand(); + PHINode *LoopPHI = nullptr; + GetElementPtrInst *LoopGEP = nullptr; + if ((LoopGEP = dyn_cast(IncPtr))) { + LoopPHI = dyn_cast(LoopGEP->getPointerOperand()); + if (!LoopPHI || LoopPHI->getParent() != LoopBody) + return false; + // The phi node has to have incoming values from loop body and preheader + if (LoopPHI->getBasicBlockIndex(LoopBody) == -1 || + LoopPHI->getBasicBlockIndex(PH) == -1) + return false; + // The IncPtr could be a phi node if the first char was already null checked + } else if ((LoopPHI = dyn_cast(IncPtr))) { + if (LoopPHI->getBasicBlockIndex(LoopBody) == -1 || + LoopPHI->getBasicBlockIndex(PH) == -1) + return false; + LoopGEP = dyn_cast( + LoopPHI->getIncomingValueForBlock(LoopBody)); + } else + return false; + if (!LoopPHI || !LoopGEP || LoopGEP->getParent() != LoopBody) + return false; + if (LoopPHI->getNumIncomingValues() != 2 || + LoopPHI->getIncomingValueForBlock(LoopBody) != LoopGEP) + return false; + + // The increment can only accumulate constant offset of 1 + const auto &DL = CurLoop->getHeader()->getModule()->getDataLayout(); + APInt Offset(DL.getPointerTypeSizeInBits(LoopGEP->getType()), 0); + if (!LoopGEP->accumulateConstantOffset(DL, Offset)) + return false; + + // The string base (source) must be of pointer type. + Value *StrSrc = LoopPHI->getIncomingValueForBlock(PH); + PointerType *PT = dyn_cast(StrSrc->getType()); + if (!PT) + return false; + // Make sure the char width is 8 + if (PT->getPointerElementType()->getTypeID() != Type::IntegerTyID) + return false; + unsigned PtBW = PT->getPointerElementType()->getIntegerBitWidth(); + if (PtBW != 8) + return false; + if (Offset != 1) + return false; + + Value *ResInstr = nullptr; + if (IncPtr->hasNUses(3)) { + // Check if the increment vreg computes a pointer difference + // with the string base. If so, the loop should do nothing but increment it. + PtrToIntInst *SubLHS = nullptr; + Value *SubRHS = nullptr; + for (auto *U : IncPtr->users()) { + if (U == LoopLoad || U == LoopPHI || U == LoopGEP) + continue; + auto *LHSPhi = dyn_cast(U); + if (!LHSPhi || LHSPhi->getNumIncomingValues() != 1 || + !LHSPhi->hasOneUse()) + return false; + SubLHS = dyn_cast(LHSPhi->user_back()); + break; + } + // Patternmatching IR that subtracts pointers of this form: + // %inc.lcssa = phi i8* [ %inc, %loop ] + // %sub.ptr.lhs = ptrtoint i8* %inc.lcssa to i64 + // %sub.ptr.rhs = ptrtoint i8* %str.base to i64 + // %sub.ptr.sub = sub i64 %sub.ptr.lhs, %sub.ptr.rhs + if (!SubLHS || !SubLHS->hasOneUse()) + return false; + ResInstr = SubLHS->user_back(); + if (!match(ResInstr, m_Sub(m_Specific(SubLHS), m_Value(SubRHS))) || + !isa(SubRHS) || !SubRHS->hasOneUse()) + return false; + // Check that the RHS is the string base + if (cast(SubRHS)->getPointerOperand() != StrSrc) + return false; + + // loop can only have phi, gep, load, icmp, and br + if (LoopBody->size() != 5) + return false; + + } else if (IncPtr->hasNUses(2)) { + // If the increment vreg does not compute a pointer difference, + // check if there is an add instruction that is used as the strlen result. + // If so, the loop should do nothing else. + // Seeking form: + // %add.phi = phi i64 [ %inc, %loop ], [ 0, %loop.preheader ] + // %inc = add i64 %add.phi, 1 + for (BasicBlock::iterator I = LoopBody->begin(), E = LoopBody->end(); + I != E;) { + Instruction *Inst = &*I++; + BinaryOperator *AddOneOp; + if ((AddOneOp = dyn_cast(Inst))) { + if (AddOneOp->getOpcode() != Instruction::Add) + return false; + ConstantInt *AddInc = dyn_cast(AddOneOp->getOperand(1)); + if (!AddInc || !AddInc->isOne()) + return false; + auto *AddPhi = dyn_cast(AddOneOp->getOperand(0)); + if (!AddPhi || !AddPhi->hasOneUse() || + AddPhi->getNumIncomingValues() != 2 || + AddPhi->getBasicBlockIndex(LoopBody) == -1 || + AddPhi->getBasicBlockIndex(PH) == -1 || + AddPhi->getIncomingValueForBlock(LoopBody) != AddOneOp || + AddPhi->getParent() != LoopBody) + return false; + ConstantInt *StartVal = + dyn_cast(AddPhi->getIncomingValueForBlock(PH)); + if (!StartVal || !StartVal->isZero()) + return false; + ResInstr = AddOneOp; + break; + } + } + // loop can only have ptr phi, ptr gep, inc phi, inc add, + // load, icmp, and br. + if (LoopBody->size() != 7) + return false; + } else + return false; + + // Scan every instruction in the loop to ensure it is doing nothing else. + for (BasicBlock::iterator I = LoopBody->begin(), E = LoopBody->end(); + I != E;) { + Instruction *Inst = &*I++; + if (Inst == LoopLoad || Inst == LoopCond) + if (!Inst->hasOneUse()) + return false; + if (Inst->mayHaveSideEffects()) + return false; + if (Inst != LoopTerm && !Inst->getType()->isIntegerTy() && + !Inst->getType()->isPointerTy()) + return false; + } + + // Insert strlen and RAUW + IRBuilder<> Builder(PreCondBI); + Value *FuncCall = emitStrLen(StrSrc, Builder, DL, TLI); + Value *Strlen = Builder.CreateZExtOrTrunc(FuncCall, ResInstr->getType()); + ResInstr->replaceAllUsesWith(Strlen); + RecursivelyDeleteTriviallyDeadInstructions(ResInstr, TLI); + + // Remove the loop-exit branch + ConstantInt *NewLoopCond = LoopTerm->getSuccessor(0) == LoopBody + ? Builder.getFalse() + : Builder.getTrue(); + LoopTerm->setCondition(NewLoopCond); + + // Delete the dead loop instructions + deleteDeadInstruction(cast(LoopCond)); + deleteDeadInstruction(cast(IncPtr)); + SE->forgetLoop(CurLoop); + return true; +} + /// Recognizes a population count idiom in a non-countable loop. /// /// If detected, transforms the relevant code to issue the popcount intrinsic diff --git a/llvm/test/Transforms/LoopIdiom/recognize-strlen.ll b/llvm/test/Transforms/LoopIdiom/recognize-strlen.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopIdiom/recognize-strlen.ll @@ -0,0 +1,122 @@ +; RUN: opt -loop-idiom < %s -S | FileCheck %s +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +define i64 @valid_strlen1(i8* %Str) { +entry: + %tobool = icmp eq i8* %Str, null + br i1 %tobool, label %cleanup, label %lor.lhs.false + +lor.lhs.false: ; preds = %entry + %0 = load i8, i8* %Str, align 1 + %cmp = icmp eq i8 %0, 0 + br i1 %cmp, label %cleanup, label %for.inc + +; CHECK: call i64 @strlen(i8* %Str) +; CHECK: br i1 true +for.inc: ; preds = %lor.lhs.false, %for.inc + %Src.09 = phi i8* [ %incdec.ptr, %for.inc ], [ %Str, %lor.lhs.false ] + %incdec.ptr = getelementptr inbounds i8, i8* %Src.09, i64 1 + %.pr = load i8, i8* %incdec.ptr, align 1 + %tobool2 = icmp eq i8 %.pr, 0 + br i1 %tobool2, label %for.end, label %for.inc + +; CHECK-NOT: sub +for.end: ; preds = %for.inc + %sub.ptr.lhs.cast = ptrtoint i8* %incdec.ptr to i64 + %sub.ptr.rhs.cast = ptrtoint i8* %Str to i64 + %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast + br label %cleanup + +cleanup: ; preds = %lor.lhs.false, %entry, %for.end + %retval.0 = phi i64 [ %sub.ptr.sub, %for.end ], [ 0, %entry ], [ 0, %lor.lhs.false ] + ret i64 %retval.0 +} + + +define i64 @valid_strlen2(i8* %Str) { +entry: + %tobool = icmp eq i8* %Str, null + br i1 %tobool, label %cleanup, label %for.cond + +; CHECK: call i64 @strlen(i8* %Str) +; CHECK: br i1 true +for.cond: ; preds = %entry, %for.cond + %Src.0 = phi i8* [ %incdec.ptr, %for.cond ], [ %Str, %entry ] + %0 = load i8, i8* %Src.0, align 1 + %tobool1 = icmp eq i8 %0, 0 + %incdec.ptr = getelementptr inbounds i8, i8* %Src.0, i64 1 + br i1 %tobool1, label %for.end, label %for.cond + +; CHECK-NOT: sub +for.end: ; preds = %for.cond + %sub.ptr.lhs.cast = ptrtoint i8* %Src.0 to i64 + %sub.ptr.rhs.cast = ptrtoint i8* %Str to i64 + %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast + br label %cleanup + + cleanup: ; preds = %entry, %for.end + %retval.0 = phi i64 [ %sub.ptr.sub, %for.end ], [ 0, %entry ] + ret i64 %retval.0 +} + + +define i64 @valid_strlen3(i8* readonly %Str) { +entry: + %tobool = icmp eq i8* %Str, null + br i1 %tobool, label %cleanup, label %while.cond.preheader + +while.cond.preheader: ; preds = %entry + %0 = load i8, i8* %Str, align 1 + %tobool16 = icmp eq i8 %0, 0 + br i1 %tobool16, label %cleanup, label %while.body + +; CHECK: call i64 @strlen(i8* %Str) +; CHECK: br i1 true +while.body: ; preds = %while.cond.preheader, %while.body + %Len.08 = phi i64 [ %inc, %while.body ], [ 0, %while.cond.preheader ] + %Str.addr.07 = phi i8* [ %incdec.ptr, %while.body ], [ %Str, %while.cond.preheader ] + %incdec.ptr = getelementptr inbounds i8, i8* %Str.addr.07, i64 1 + %inc = add i64 %Len.08, 1 + %1 = load i8, i8* %incdec.ptr, align 1 + %tobool1 = icmp eq i8 %1, 0 + br i1 %tobool1, label %cleanup, label %while.body + +cleanup: ; preds = %while.body, %while.cond.preheader, %entry + %retval.0 = phi i64 [ 0, %entry ], [ 0, %while.cond.preheader ], [ %inc, %while.body ] + ret i64 %retval.0 +} + + +define zeroext i32 @valid_strlen4(i8* %Str) { +entry: + %tobool = icmp eq i8* %Str, null + br i1 %tobool, label %cleanup, label %lor.lhs.false + +lor.lhs.false: ; preds = %entry + %0 = load i8, i8* %Str, align 1 + %cmp = icmp eq i8 %0, 0 + br i1 %cmp, label %cleanup, label %for.inc + +; CHECK: call i64 @strlen(i8* %Str) +; CHECK: br i1 true +for.inc: ; preds = %lor.lhs.false, %for.inc + %Src.010 = phi i8* [ %incdec.ptr, %for.inc ], [ %Str, %lor.lhs.false ] + %incdec.ptr = getelementptr inbounds i8, i8* %Src.010, i64 1 + %.pr = load i8, i8* %incdec.ptr, align 1 + %tobool2 = icmp eq i8 %.pr, 0 + br i1 %tobool2, label %for.end, label %for.inc + +; CHECK: trunc i64 %strlen to i32 +; CHECK-NOT: sub +for.end: ; preds = %for.inc + %sub.ptr.lhs.cast = ptrtoint i8* %incdec.ptr to i64 + %sub.ptr.rhs.cast = ptrtoint i8* %Str to i64 + %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast + %conv3 = trunc i64 %sub.ptr.sub to i32 + br label %cleanup + +cleanup: ; preds = %lor.lhs.false, %entry, %for.end + %retval.0 = phi i32 [ %conv3, %for.end ], [ 0, %entry ], [ 0, %lor.lhs.false ] + ret i32 %retval.0 +}