Index: include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- include/llvm/Analysis/TargetTransformInfo.h +++ include/llvm/Analysis/TargetTransformInfo.h @@ -196,10 +196,17 @@ /// \brief Estimate the cost of a GEP operation when lowered. /// /// This user-based overload adds the ability to check if the GEP can be - /// folded into its users. + /// folded into all of its users. int getGEPCost(const GEPOperator *GEP, ArrayRef Operands) const; + /// \brief Estimate the cost of a GEP operation when lowered. + /// + /// This user-based overload adds the ability to check if the GEP can be + /// folded into its users in \p Users. + int getGEPCost(const GEPOperator *GEP, ArrayRef Operands, + ArrayRef Users) const; + /// \brief Estimate the cost of a EXT operation when lowered. /// /// The contract for this function is the same as \c getOperationCost except @@ -274,9 +281,28 @@ /// list must be the same as the order of the current operands the IR user /// has. /// + /// \p Users is a list of Users which use \p U in the IR. Currently, only GEPs + /// consider the list of Users in the cost calculation. + /// /// The returned cost is defined in terms of \c TargetCostConstants, see its /// comments for a detailed explanation of the cost values. - int getUserCost(const User *U, ArrayRef Operands) const; + int getUserCost(const User *U, ArrayRef Operands, + ArrayRef Users) const; + + /// \brief This is a helper function which passes \p Operands to the + /// three-argument getUserCost with the list of all Users which use \p U. + int getUserCost(const User *U, ArrayRef Operands) const { + SmallVector Users(U->user_begin(), U->user_end()); + return getUserCost(U, Operands, Users); + } + + /// \brief This is a helper function which passes \p Users to the + /// three-argument getUserCost with the operands \p U has. + int getUserCost(const User *U, ArrayRef Users) const { + SmallVector Operands(U->value_op_begin(), + U->value_op_end()); + return getUserCost(U, Operands, Users); + } /// \brief This is a helper function which calls the two-argument getUserCost /// with \p Operands which are the current operands U has. @@ -941,6 +967,9 @@ ArrayRef Operands) = 0; virtual int getGEPCost(const GEPOperator *GEP, ArrayRef Operands) = 0; + virtual int getGEPCost(const GEPOperator *GEP, + ArrayRef Operands, + ArrayRef Users) = 0; virtual int getExtCost(const Instruction *I, const Value *Src) = 0; virtual int getCallCost(FunctionType *FTy, int NumArgs) = 0; virtual int getCallCost(const Function *F, int NumArgs) = 0; @@ -953,8 +982,8 @@ ArrayRef Arguments) = 0; virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize) = 0; - virtual int - getUserCost(const User *U, ArrayRef Operands) = 0; + virtual int getUserCost(const User *U, ArrayRef Operands, + ArrayRef Users) = 0; virtual bool hasBranchDivergence() = 0; virtual bool isSourceOfDivergence(const Value *V) = 0; virtual bool isAlwaysUniform(const Value *V) = 0; @@ -1126,6 +1155,10 @@ ArrayRef Operands) override { return Impl.getGEPCost(GEP, Operands); } + int getGEPCost(const GEPOperator *GEP, ArrayRef Operands, + ArrayRef Users) override { + return Impl.getGEPCost(GEP, Operands, Users); + } int getExtCost(const Instruction *I, const Value *Src) override { return Impl.getExtCost(I, Src); } @@ -1150,8 +1183,9 @@ ArrayRef Arguments) override { return Impl.getIntrinsicCost(IID, RetTy, Arguments); } - int getUserCost(const User *U, ArrayRef Operands) override { - return Impl.getUserCost(U, Operands); + int getUserCost(const User *U, ArrayRef Operands, + ArrayRef Users) override { + return Impl.getUserCost(U, Operands, Users); } bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); } bool isSourceOfDivergence(const Value *V) override { Index: include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- include/llvm/Analysis/TargetTransformInfoImpl.h +++ include/llvm/Analysis/TargetTransformInfoImpl.h @@ -727,6 +727,12 @@ } int getGEPCost(const GEPOperator *GEP, ArrayRef Operands) { + SmallVector Users(GEP->user_begin(), GEP->user_end()); + return getGEPCost(GEP, Operands, Users); + } + + int getGEPCost(const GEPOperator *GEP, ArrayRef Operands, + ArrayRef Users) { if (!isa(GEP)) return TTI::TCC_Basic; @@ -741,7 +747,7 @@ // load/store instructions together with other instructions (e.g., other // GEPs). Handling all such cases must be expensive to be performed // in this function, so we stay conservative for now. - for (const User *U : GEP->users()) { + for (const User *U : Users) { const Operator *UOP = cast(U); const Value *PointerOperand = nullptr; if (auto *LI = dyn_cast(UOP)) @@ -772,7 +778,8 @@ return static_cast(this)->getIntrinsicCost(IID, RetTy, ParamTys); } - unsigned getUserCost(const User *U, ArrayRef Operands) { + unsigned getUserCost(const User *U, ArrayRef Operands, + ArrayRef Users) { if (isa(U)) return TTI::TCC_Free; // Model all PHI nodes as free. @@ -782,8 +789,8 @@ return TTI::TCC_Free; if (const GEPOperator *GEP = dyn_cast(U)) - return static_cast(this)->getGEPCost(GEP, - Operands.drop_front()); + return static_cast(this)->getGEPCost(GEP, Operands.drop_front(), + Users); if (auto CS = ImmutableCallSite(U)) { const Function *F = CS.getCalledFunction(); @@ -816,7 +823,9 @@ int getInstructionLatency(const Instruction *I) { SmallVector Operands(I->value_op_begin(), I->value_op_end()); - if (getUserCost(I, Operands) == TTI::TCC_Free) + SmallVector Users(I->user_begin(), + I->user_end()); + if (getUserCost(I, Operands, Users) == TTI::TCC_Free) return 0; if (isa(I)) Index: include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- include/llvm/CodeGen/BasicTTIImpl.h +++ include/llvm/CodeGen/BasicTTIImpl.h @@ -194,6 +194,11 @@ return BaseT::getGEPCost(GEP, Operands); } + int getGEPCost(const GEPOperator *GEP, ArrayRef Operands, + ArrayRefUsers) { + return BaseT::getGEPCost(GEP, Operands, Users); + } + int getExtCost(const Instruction *I, const Value *Src) { if (getTLI()->isExtFree(I)) return TargetTransformInfo::TCC_Free; Index: include/llvm/Transforms/Utils/LoopUtils.h =================================================================== --- include/llvm/Transforms/Utils/LoopUtils.h +++ include/llvm/Transforms/Utils/LoopUtils.h @@ -424,8 +424,9 @@ /// instructions of the loop and loop safety information as /// arguments. Diagnostics is emitted via \p ORE. It returns changed status. bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *, - TargetLibraryInfo *, Loop *, AliasSetTracker *, - LoopSafetyInfo *, OptimizationRemarkEmitter *ORE); + TargetLibraryInfo *, TargetTransformInfo *, Loop *, + AliasSetTracker *, LoopSafetyInfo *, + OptimizationRemarkEmitter *ORE); /// \brief Walk the specified region of the CFG (defined by all blocks /// dominated by the specified block, and that are in the current loop) in depth Index: lib/Analysis/TargetTransformInfo.cpp =================================================================== --- lib/Analysis/TargetTransformInfo.cpp +++ lib/Analysis/TargetTransformInfo.cpp @@ -93,6 +93,12 @@ return TTIImpl->getGEPCost(GEP, Operands); } +int TargetTransformInfo::getGEPCost(const GEPOperator *GEP, + ArrayRef Operands, + ArrayRef Users) const { + return TTIImpl->getGEPCost(GEP, Operands, Users); +} + int TargetTransformInfo::getExtCost(const Instruction *I, const Value *Src) const { return TTIImpl->getExtCost(I, Src); @@ -110,10 +116,10 @@ unsigned &JTSize) const { return TTIImpl->getEstimatedNumberOfCaseClusters(SI, JTSize); } - int TargetTransformInfo::getUserCost(const User *U, - ArrayRef Operands) const { - int Cost = TTIImpl->getUserCost(U, Operands); + ArrayRef Operands, + ArrayRef Users) const { + int Cost = TTIImpl->getUserCost(U, Operands, Users); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } Index: lib/Target/ARM/ARMTargetTransformInfo.cpp =================================================================== --- lib/Target/ARM/ARMTargetTransformInfo.cpp +++ lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -611,7 +611,9 @@ } SmallVector Operands(I.value_op_begin(), I.value_op_end()); - Cost += getUserCost(&I, Operands); + SmallVector Users(I.user_begin(), + I.user_end()); + Cost += getUserCost(&I, Operands, Users); } UP.Partial = true; Index: lib/Target/Hexagon/HexagonTargetTransformInfo.h =================================================================== --- lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -70,7 +70,8 @@ /// @} - int getUserCost(const User *U, ArrayRef Operands); + int getUserCost(const User *U, ArrayRef Operands, + ArrayRef Users); // Hexagon specific decision to generate a lookup table. bool shouldBuildLookupTables() const; Index: lib/Target/Hexagon/HexagonTargetTransformInfo.cpp =================================================================== --- lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -55,8 +55,8 @@ return getST()->getL1CacheLineSize(); } -int HexagonTTIImpl::getUserCost(const User *U, - ArrayRef Operands) { +int HexagonTTIImpl::getUserCost(const User *U, ArrayRef Operands, + ArrayRef Users) { auto isCastFoldedIntoLoad = [](const CastInst *CI) -> bool { if (!CI->isIntegerCast()) return false; @@ -77,7 +77,7 @@ if (const CastInst *CI = dyn_cast(U)) if (isCastFoldedIntoLoad(CI)) return TargetTransformInfo::TCC_Free; - return BaseT::getUserCost(U, Operands); + return BaseT::getUserCost(U, Operands, Users); } bool HexagonTTIImpl::shouldBuildLookupTables() const { Index: lib/Target/X86/X86TargetTransformInfo.h =================================================================== --- lib/Target/X86/X86TargetTransformInfo.h +++ lib/Target/X86/X86TargetTransformInfo.h @@ -113,7 +113,8 @@ int getIntImmCost(const APInt &Imm, Type *Ty); - unsigned getUserCost(const User *U, ArrayRef Operands); + unsigned getUserCost(const User *U, ArrayRef Operands, + ArrayRef Users); int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty); int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Index: lib/Target/X86/X86TargetTransformInfo.cpp =================================================================== --- lib/Target/X86/X86TargetTransformInfo.cpp +++ lib/Target/X86/X86TargetTransformInfo.cpp @@ -2315,7 +2315,8 @@ } unsigned X86TTIImpl::getUserCost(const User *U, - ArrayRef Operands) { + ArrayRef Operands, + ArrayRef Users) { if (isa(U)) { Value *Ptr = U->getOperand(1); // Store instruction with index and scale costs 2 Uops. @@ -2326,7 +2327,7 @@ } return TTI::TCC_Basic; } - return BaseT::getUserCost(U, Operands); + return BaseT::getUserCost(U, Operands, Users); } // Return an average cost of Gather / Scatter instruction, maybe improved later Index: lib/Transforms/Scalar/LICM.cpp =================================================================== --- lib/Transforms/Scalar/LICM.cpp +++ lib/Transforms/Scalar/LICM.cpp @@ -88,15 +88,18 @@ "invariance in loop using invariant start (default = 8)")); static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI); -static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop, - const LoopSafetyInfo *SafetyInfo); +static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, + const LoopSafetyInfo *SafetyInfo, + TargetTransformInfo *TTI, + bool &FreeInLoop); static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE); static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT, const Loop *CurLoop, AliasSetTracker *CurAST, const LoopSafetyInfo *SafetyInfo, - OptimizationRemarkEmitter *ORE); + OptimizationRemarkEmitter *ORE, + bool FreeInLoop); static bool isSafeToExecuteUnconditionally(Instruction &Inst, const DominatorTree *DT, const Loop *CurLoop, @@ -114,8 +117,9 @@ namespace { struct LoopInvariantCodeMotion { bool runOnLoop(Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, - TargetLibraryInfo *TLI, ScalarEvolution *SE, - OptimizationRemarkEmitter *ORE, bool DeleteAST); + TargetLibraryInfo *TLI, TargetTransformInfo *TTI, + ScalarEvolution *SE, OptimizationRemarkEmitter *ORE, + bool DeleteAST); DenseMap &getLoopToAliasSetMap() { return LoopToAliasSetMap; @@ -155,6 +159,8 @@ &getAnalysis().getLoopInfo(), &getAnalysis().getDomTree(), &getAnalysis().getTLI(), + &getAnalysis().getTTI( + *L->getHeader()->getParent()), SE ? &SE->getSE() : nullptr, &ORE, false); } @@ -164,6 +170,7 @@ void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired(); + AU.addRequired(); getLoopAnalysisUsage(AU); } @@ -204,7 +211,8 @@ "cached at a higher level"); LoopInvariantCodeMotion LICM; - if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.SE, ORE, true)) + if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.TTI, &AR.SE, ORE, + true)) return PreservedAnalyses::all(); auto PA = getLoopPassPreservedAnalyses(); @@ -217,6 +225,7 @@ false, false) INITIALIZE_PASS_DEPENDENCY(LoopPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false, false) @@ -228,12 +237,10 @@ /// We should delete AST for inner loops in the new pass manager to avoid /// memory leak. /// -bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AliasAnalysis *AA, - LoopInfo *LI, DominatorTree *DT, - TargetLibraryInfo *TLI, - ScalarEvolution *SE, - OptimizationRemarkEmitter *ORE, - bool DeleteAST) { +bool LoopInvariantCodeMotion::runOnLoop( + Loop *L, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, + TargetLibraryInfo *TLI, TargetTransformInfo *TTI, ScalarEvolution *SE, + OptimizationRemarkEmitter *ORE, bool DeleteAST) { bool Changed = false; assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form."); @@ -258,7 +265,7 @@ // instructions, we perform another pass to hoist them out of the loop. // if (L->hasDedicatedExits()) - Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L, + Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L, CurAST, &SafetyInfo, ORE); if (Preheader) Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L, @@ -351,7 +358,8 @@ /// definitions, allowing us to sink a loop body in one pass without iteration. /// bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, - DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop, + DominatorTree *DT, TargetLibraryInfo *TLI, + TargetTransformInfo *TTI, Loop *CurLoop, AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) { @@ -392,10 +400,14 @@ // outside of the loop. In this case, it doesn't even matter if the // operands of the instruction are loop invariant. // - if (isNotUsedInLoop(I, CurLoop, SafetyInfo) && + bool FreeInLoop = false; + if (isNotUsedOrFreeInLoop(I, CurLoop, SafetyInfo, TTI, + FreeInLoop) && canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE)) { - ++II; - Changed |= sink(I, LI, DT, CurLoop, CurAST, SafetyInfo, ORE); + if (!FreeInLoop) + ++II; + Changed |= sink(I, LI, DT, CurLoop, CurAST, SafetyInfo, ORE, + FreeInLoop); } } } @@ -688,13 +700,33 @@ return true; } +/// Return true if the instruction is free in the loop. +static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop, + const TargetTransformInfo *TTI) { + + SmallVector UsersInLoop; + for (const User *U : I.users()) { + const Instruction *UI = cast(U); + if (CurLoop->contains(UI)) + UsersInLoop.push_back(U); + } + return TTI->getUserCost(&I, UsersInLoop) == + TargetTransformInfo::TCC_Free; +} + /// Return true if the only users of this instruction are outside of /// the loop. If this is true, we can sink the instruction to the exit /// blocks of the loop. /// -static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop, - const LoopSafetyInfo *SafetyInfo) { +/// We also return true if the instruction could be folded away in lowering. +/// (e.g., a GEP can be folded into a load as an addressing mode in the loop). +static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop, + const LoopSafetyInfo *SafetyInfo, + TargetTransformInfo *TTI, + bool &FreeInLoop) { const auto &BlockColors = SafetyInfo->BlockColors; + bool IsFree = isFreeInLoop(I, CurLoop, TTI); + for (const User *U : I.users()) { const Instruction *UI = cast(U); if (const PHINode *PN = dyn_cast(UI)) { @@ -731,8 +763,13 @@ continue; } - if (CurLoop->contains(UI)) + if (CurLoop->contains(UI)) { + if (IsFree) { + FreeInLoop = true; + continue; + } return false; + } } return true; } @@ -806,7 +843,8 @@ static bool sink(Instruction &I, const LoopInfo *LI, const DominatorTree *DT, const Loop *CurLoop, AliasSetTracker *CurAST, const LoopSafetyInfo *SafetyInfo, - OptimizationRemarkEmitter *ORE) { + OptimizationRemarkEmitter *ORE, + bool FreeInLoop) { DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n"); ORE->emit(OptimizationRemark(DEBUG_TYPE, "InstSunk", &I) << "sinking " << ore::NV("Inst", &I)); @@ -827,13 +865,19 @@ // Clones of this instruction. Don't create more than one per exit block! SmallDenseMap SunkCopies; + SmallPtrSet UsersToBeRemoved; // If this instruction is only used outside of the loop, then all users are // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of // the instruction. - while (!I.use_empty()) { - Value::user_iterator UI = I.user_begin(); + for (Value::user_iterator UI = I.user_begin(), UE = I.user_end(); UI != UE;) { auto *User = cast(*UI); + Use &U = UI.getUse(); + ++UI; + + if (CurLoop->contains(User) || UsersToBeRemoved.count(User)) + continue; + if (!DT->isReachableFromEntry(User->getParent())) { User->replaceUsesOfWith(&I, UndefValue::get(I.getType())); continue; @@ -844,7 +888,6 @@ // Surprisingly, instructions can be used outside of loops without any // exits. This can only happen in PHI nodes if the incoming block is // unreachable. - Use &U = UI.getUse(); BasicBlock *BB = PN->getIncomingBlock(U); if (!DT->isReachableFromEntry(BB)) { U = UndefValue::get(I.getType()); @@ -863,12 +906,17 @@ New = SunkCopies[ExitBlock] = CloneInstructionInExitBlock(I, *ExitBlock, *PN, LI, SafetyInfo); + UsersToBeRemoved.insert(PN); PN->replaceAllUsesWith(New); - PN->eraseFromParent(); } - CurAST->deleteValue(&I); - I.eraseFromParent(); + for (auto *User : UsersToBeRemoved) + User->eraseFromParent(); + + if (!FreeInLoop) { + CurAST->deleteValue(&I); + I.eraseFromParent(); + } return Changed; } Index: test/Transforms/LICM/sink-foldable.ll =================================================================== --- /dev/null +++ test/Transforms/LICM/sink-foldable.ll @@ -0,0 +1,147 @@ +; RUN: opt < %s -licm -S | FileCheck %s +target triple = "aarch64--linux-gnueabi" + +; CHECK-LABEL:@test1 +; CHECK-LABEL:loopexit1: +; CHECK: %[[PHI:.+]] = phi i8** [ %arrayidx0, %if.end ] +; CHECK: getelementptr inbounds i8*, i8** %[[PHI]], i64 1 +define i8** @test1(i32 %j, i8** readonly %P, i8* readnone %Q) { +entry: + %cmp0 = icmp slt i32 0, %j + br i1 %cmp0, label %for.body.lr.ph, label %return + +for.body.lr.ph: + br label %for.body + +for.body: + %P.addr = phi i8** [ %P, %for.body.lr.ph ], [ %arrayidx0, %if.end ] + %i0 = phi i32 [ 0, %for.body.lr.ph ], [ %i.add, %if.end] + + %i0.ext = sext i32 %i0 to i64 + %arrayidx0 = getelementptr inbounds i8*, i8** %P.addr, i64 %i0.ext + %l0 = load i8*, i8** %arrayidx0, align 8 + %cmp1 = icmp ugt i8* %l0, %Q + br i1 %cmp1, label %loopexit0, label %if.end + +if.end: ; preds = %for.body + %arrayidx1 = getelementptr inbounds i8*, i8** %arrayidx0, i64 1 + %l1 = load i8*, i8** %arrayidx1, align 8 + %cmp4 = icmp ugt i8* %l1, %Q + %i.add = add nsw i32 %i0, 2 + br i1 %cmp4, label %loopexit1, label %for.body + +loopexit0: + %p1 = phi i8** [%arrayidx0, %for.body] + br label %return + +loopexit1: + %p2 = phi i8** [%arrayidx1, %if.end] + br label %return + +return: + %retval.0 = phi i8** [ %p1, %loopexit0 ], [%p2, %loopexit1], [ null, %entry ] + ret i8** %retval.0 +} + +; CHECK-LABEL: @test2 +; CHECK-LABEL: loopexit2: +; CHECK: %[[PHI:.*]] = phi i8** [ %add.ptr, %if.end ] +; CHECK: getelementptr inbounds i8*, i8** %[[PHI]] +define i8** @test2(i32 %j, i8** readonly %P, i8* readnone %Q) { + +entry: + br label %for.body + +for.cond: + %i.addr.0 = phi i32 [ %add, %if.end ] + %P.addr.0 = phi i8** [ %add.ptr, %if.end ] + %cmp = icmp slt i32 %i.addr.0, %j + br i1 %cmp, label %for.body, label %loopexit0 + +for.body: + %P.addr = phi i8** [ %P, %entry ], [ %P.addr.0, %for.cond ] + %i.addr = phi i32 [ 0, %entry ], [ %i.addr.0, %for.cond ] + + %idx.ext = sext i32 %i.addr to i64 + %add.ptr = getelementptr inbounds i8*, i8** %P.addr, i64 %idx.ext + %l0 = load i8*, i8** %add.ptr, align 8 + + %cmp1 = icmp ugt i8* %l0, %Q + br i1 %cmp1, label %loopexit1, label %if.end + +if.end: + %add.i = add i32 %i.addr, 1 + %idx2.ext = sext i32 %add.i to i64 + %arrayidx2 = getelementptr inbounds i8*, i8** %add.ptr, i64 %idx2.ext + %l1 = load i8*, i8** %arrayidx2, align 8 + %cmp2 = icmp ugt i8* %l1, %Q + %add = add nsw i32 %add.i, 1 + br i1 %cmp2, label %loopexit2, label %for.cond + +loopexit0: + %p0 = phi i8** [ null, %for.cond ] + br label %return + +loopexit1: + %p1 = phi i8** [ %add.ptr, %for.body ] + br label %return + +loopexit2: + %p2 = phi i8** [ %arrayidx2, %if.end ] + br label %return + +return: + %retval.0 = phi i8** [ %p1, %loopexit1 ], [ %p2, %loopexit2 ], [ %p0, %loopexit0 ] + ret i8** %retval.0 +} + + +; CHECK-LABEL: @test3 +; CHECK-LABEL: loopexit1: +; CHECK: %[[ADD:.*]] = phi i64 [ %add, %if.end ] +; CHECK: %[[ADDR:.*]] = phi i8** [ %P.addr, %if.end ] +; CHECK: %[[TRUNC:.*]] = trunc i64 %[[ADD]] to i32 +; CHECK: getelementptr inbounds i8*, i8** %[[ADDR]], i32 %[[TRUNC]] +; CHECK: call void @dummy(i32 %[[TRUNC]]) +define i8** @test3(i64 %j, i8** readonly %P, i8* readnone %Q) { +entry: + %cmp0 = icmp slt i64 0, %j + br i1 %cmp0, label %for.body.lr.ph, label %return + +for.body.lr.ph: + br label %for.body + +for.body: + %P.addr = phi i8** [ %P, %for.body.lr.ph ], [ %arrayidx0, %if.end ] + %i0 = phi i32 [ 0, %for.body.lr.ph ], [ %i.add, %if.end] + + %i0.ext = sext i32 %i0 to i64 + %arrayidx0 = getelementptr inbounds i8*, i8** %P.addr, i64 %i0.ext + %l0 = load i8*, i8** %arrayidx0, align 8 + %cmp1 = icmp ugt i8* %l0, %Q + br i1 %cmp1, label %loopexit0, label %if.end + +if.end: ; preds = %for.body + %add = add i64 %i0.ext, 1 + %trunc = trunc i64 %add to i32 + %arrayidx1 = getelementptr inbounds i8*, i8** %P.addr, i32 %trunc + %l1 = load i8*, i8** %arrayidx1, align 8 + %cmp4 = icmp ugt i8* %l1, %Q + %i.add = add nsw i32 %i0, 2 + br i1 %cmp4, label %loopexit1, label %for.body + +loopexit0: + %p1 = phi i8** [%arrayidx0, %for.body] + br label %return + +loopexit1: + %p2 = phi i8** [%arrayidx1, %if.end] + call void @dummy(i32 %trunc) + br label %return + +return: + %retval.0 = phi i8** [ %p1, %loopexit0 ], [%p2, %loopexit1], [ null, %entry ] + ret i8** %retval.0 +} + +declare void @dummy(i32)