Index: include/llvm/Analysis/IVUsers.h =================================================================== --- include/llvm/Analysis/IVUsers.h +++ include/llvm/Analysis/IVUsers.h @@ -15,6 +15,7 @@ #ifndef LLVM_ANALYSIS_IVUSERS_H #define LLVM_ANALYSIS_IVUSERS_H +#include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionNormalization.h" @@ -99,6 +100,7 @@ DominatorTree *DT; ScalarEvolution *SE; SmallPtrSet Processed; + DenseMap> IVOperandsMap; /// IVUses - A list of all tracked IV uses of induction variable expressions /// we are interested in. @@ -114,7 +116,8 @@ IVUsers(IVUsers &&X) : L(std::move(X.L)), AC(std::move(X.AC)), DT(std::move(X.DT)), SE(std::move(X.SE)), Processed(std::move(X.Processed)), - IVUses(std::move(X.IVUses)), EphValues(std::move(X.EphValues)) { + IVOperandsMap(std::move(X.IVOperandsMap)), IVUses(std::move(X.IVUses)), + EphValues(std::move(X.EphValues)) { for (IVStrideUse &U : IVUses) U.Parent = this; } @@ -152,6 +155,13 @@ return Processed.count(Inst); } + /// Return IV operands used by \p Use. + SmallPtrSetImpl *getIVsUsedBy(Value *Use) { + if (!IVOperandsMap.count(Use)) + return nullptr; + return &IVOperandsMap[Use]; + } + void releaseMemory(); void print(raw_ostream &OS, const Module * = nullptr) const; Index: lib/Analysis/IVUsers.cpp =================================================================== --- lib/Analysis/IVUsers.cpp +++ lib/Analysis/IVUsers.cpp @@ -225,6 +225,9 @@ if (!isSimplifiedLoopNest(UseBB, DT, LI, SimpleLoopNests)) return false; + // Track IV operands. + IVOperandsMap[User].insert(I); + // Descend recursively, but not into PHI nodes outside the current loop. // It's important to see the entire expression outside the loop to get // choices that depend on addressing mode use right, although we won't @@ -349,6 +352,7 @@ void IVUsers::releaseMemory() { Processed.clear(); IVUses.clear(); + IVOperandsMap.clear(); } IVUsersWrapperPass::IVUsersWrapperPass() : LoopPass(ID) { Index: lib/Transforms/Scalar/LoopStrengthReduce.cpp =================================================================== --- lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -65,6 +65,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/IVUsers.h" #include "llvm/Analysis/LoopAnalysisManager.h" @@ -123,12 +124,19 @@ #define DEBUG_TYPE "loop-reduce" +STATISTIC(NumCheaperInput, "Number of cheaper LSR input"); + /// MaxIVUsers is an arbitrary threshold that provides an early opportunity for /// bail out. This threshold is far beyond the number of users that LSR can /// conceivably solve, so it should not affect generated code, but catches the /// worst cases before LSR burns too much compile time and stack space. static const unsigned MaxIVUsers = 200; +static cl::opt + EnableInitalCostCheck("enable-lsr-input-cost-check", cl::Hidden, + cl::init(true), + cl::desc("Enable LSR input cost check")); + // Temporary flag to cleanup congruent phis after LSR phi expansion. // It's currently disabled until we can determine whether it's truly useful or // not. The flag should be removed after the v3.0 release. @@ -1020,6 +1028,8 @@ void Lose(); + void ApplyWeight(); + #ifndef NDEBUG // Once any of the metrics loses, they must all remain losers. bool isValid() { @@ -1404,6 +1414,20 @@ C.ScaleCost = std::numeric_limits::max(); } +/// Apply some weight on this cost. +void Cost::ApplyWeight() { + // FIXME: For now, simply increase Insns and NumRegs by one and ignore other + // cost criteria. + ++C.Insns; + ++C.NumRegs; + C.AddRecCost = std::numeric_limits::max(); + C.NumIVMuls = std::numeric_limits::max(); + C.NumBaseAdds = std::numeric_limits::max(); + C.ImmCost = std::numeric_limits::max(); + C.SetupCost = std::numeric_limits::max(); + C.ScaleCost = std::numeric_limits::max(); +} + /// Choose the lower cost. bool Cost::isLess(Cost &Other, const TargetTransformInfo &TTI) { if (InsnsCost.getNumOccurrences() > 0 && InsnsCost && @@ -1946,6 +1970,35 @@ std::pair getUse(const SCEV *&Expr, LSRUse::KindType Kind, MemAccessTy AccessTy); + std::pair getUse(const SCEV *&Expr, LSRUse::KindType Kind, + MemAccessTy AccessTy, + SmallVectorImpl &LsrUses, + UseMapTy &UseMap); + + LSRUse &GetOrCreateLSRUse(Instruction *UserInst, Value *IVOp, const SCEV *&S, + LSRUse::KindType Kind, MemAccessTy &AccessTy, + PostIncLoopSet &TmpPostIncLoops, UseMapTy &UseMap, + SmallVectorImpl &LsrUses, size_t &LUIdx); + + bool FormInputGEPFormula(Value *Op, LSRUse &LU, Formula &F); + + bool + FormInputLSRUsesAndFormulae(Instruction *UserInst, Value *OperandValToReplace, + DenseSet> &Visited, + SmallVectorImpl &InputUses, + UseMapTy &InputUseMap); + + bool FormInputLSRUseAndFormula(Instruction *UserInst, Value *Op, + DenseSet> &Visited, + SmallVectorImpl &InputUses, + UseMapTy &InputUseMap); + + bool CollectInputFormulae(SmallVectorImpl &LSRUses); + + bool IsInputCostCheaperThanSolutionCost(Cost &SolutionCost); + + bool IsInputCostStillCheap(Cost &SolutionCost, Cost &InputCost); + void DeleteUse(LSRUse &LU, size_t LUIdx); LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU); @@ -1953,7 +2006,8 @@ void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx); void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx); void CountRegisters(const Formula &F, size_t LUIdx); - bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F); + bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F, + bool CountReg = true); void CollectLoopInvariantFixupsAndFormulae(); @@ -1996,7 +2050,9 @@ const Cost &CurCost, const SmallPtrSet &CurRegs, DenseSet &VisitedRegs) const; - void Solve(SmallVectorImpl &Solution) const; + + void Solve(SmallVectorImpl &Solution, + Cost &SolutionCost) const; BasicBlock::iterator HoistInsertPosition(BasicBlock::iterator IP, @@ -2532,6 +2588,14 @@ std::pair LSRInstance::getUse(const SCEV *&Expr, LSRUse::KindType Kind, MemAccessTy AccessTy) { + return getUse(Expr, Kind, AccessTy, Uses, UseMap); +} + +std::pair LSRInstance::getUse(const SCEV *&Expr, + LSRUse::KindType Kind, + MemAccessTy AccessTy, + SmallVectorImpl &LUs, + UseMapTy &UMap) { const SCEV *Copy = Expr; int64_t Offset = ExtractImmediate(Expr, SE); @@ -2543,21 +2607,22 @@ } std::pair P = - UseMap.insert(std::make_pair(LSRUse::SCEVUseKindPair(Expr, Kind), 0)); + UMap.insert(std::make_pair(LSRUse::SCEVUseKindPair(Expr, Kind), 0)); + if (!P.second) { // A use already existed with this base. size_t LUIdx = P.first->second; - LSRUse &LU = Uses[LUIdx]; + LSRUse &LU = LUs[LUIdx]; if (reconcileNewOffset(LU, Offset, /*HasBaseReg=*/true, Kind, AccessTy)) // Reuse this use. return std::make_pair(LUIdx, Offset); } // Create a new use. - size_t LUIdx = Uses.size(); + size_t LUIdx = LUs.size(); P.first->second = LUIdx; - Uses.push_back(LSRUse(Kind, AccessTy)); - LSRUse &LU = Uses[LUIdx]; + LUs.push_back(LSRUse(Kind, AccessTy)); + LSRUse &LU = LUs[LUIdx]; LU.MinOffset = Offset; LU.MaxOffset = Offset; @@ -3204,6 +3269,126 @@ } } +bool LSRInstance::FormInputLSRUsesAndFormulae( + Instruction *UserInst, Value *OperandValToReplace, + DenseSet> &Visited, + SmallVectorImpl &InputUses, UseMapTy &InputUseMap) { + SmallVector, 4> Worklist; + Worklist.push_back(std::make_pair(UserInst, OperandValToReplace)); + + while (!Worklist.empty()) { + std::pair Element = Worklist.pop_back_val(); + Value *Op = Element.second; + Instruction *Inst = Element.first; + + if (!FormInputLSRUseAndFormula(Inst, Op, Visited, InputUses, InputUseMap)) + return false; + + if (!IU.getIVsUsedBy(Op)) { + assert(isa(Op) && "Expect only a PHI is the root of IV operand"); + continue; + } + for (auto IV : *IU.getIVsUsedBy(Op)) + Worklist.push_back(std::make_pair(cast(Op), IV)); + } + return true; +} + +bool LSRInstance::FormInputLSRUseAndFormula( + Instruction *UserInst, Value *Op, + DenseSet> &Visited, + SmallVectorImpl &InputUses, UseMapTy &InputUseMap) { + if (!Visited.insert(std::make_pair(UserInst, Op)).second) + return true; + + LSRUse::KindType Kind = LSRUse::Basic; + MemAccessTy AccessTy; + if (UserInst && isAddressUse(TTI, UserInst, Op)) { + Kind = LSRUse::Address; + AccessTy = getAccessType(TTI, UserInst, Op); + } + + const SCEV *S = SE.getSCEV(Op); + size_t LUIdx; + PostIncLoopSet TmpPostIncLoops; + LSRUse &LU = + GetOrCreateLSRUse(UserInst, Op, S, Kind, AccessTy, TmpPostIncLoops, + InputUseMap, InputUses, LUIdx); + + Formula F; + + if (isa(Op)) { + // TODO: For now we only consider GEPs directly used in addressing. + if (!isAddressUse(TTI, UserInst, Op)) + return false; + // Since we need to model the situation where IV operands are all kept we + // should break down GEP's SCEV in terms of its IV operands so that we can + // reuse Regs from IV operands in GEP. For example, in IR below, + // + // %iv = i32 phi [0, %Preheader],[%iv.next, %LoopBody] + // getelementptr i32* %Base, i32 %iv + // %iv.next = add i32 %iv, #1 + // + // we cannot simply use the original initial formula (reg(%Base,+,4)) from + // SE.getSCEV(). Instead, we have to break down the formula in terms of the + // IV operand like reg(%Base) + 4*reg(0,+,1) so that we can let the cost + // model capture reg(0,+,1) is shared. + if (!FormInputGEPFormula(Op, LU, F)) + return false; + } else + F.initialMatch(S, L, SE); + + if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) + return false; + + InsertFormula(LU, LUIdx, F, /* CountReg= */ false); + + // TODO: For now, expect only one formula for a LU. If we encounter multiple + // formulae, it must be due to other GEPs in the same LU created in different + // form in FormInputGEPFormula(). We do not handle such case for now. + if (LU.Formulae.size() != 1) + return false; + + return true; +} + +LSRUse &LSRInstance::GetOrCreateLSRUse( + Instruction *UserInst, Value *IVOp, const SCEV *&S, LSRUse::KindType Kind, + MemAccessTy &AccessTy, PostIncLoopSet &TmpPostIncLoops, UseMapTy &LsrUseMap, + SmallVectorImpl &LsrUses, size_t &LUIdx) { + // Get or create an LSRUse. + std::pair P = getUse(S, Kind, AccessTy, LsrUses, LsrUseMap); + LUIdx = P.first; + int64_t Offset = P.second; + LSRUse &LU = LsrUses[LUIdx]; + + // Record the fixup. + LSRFixup &LF = LU.getNewFixup(); + LF.UserInst = UserInst; + LF.OperandValToReplace = IVOp; + LF.PostIncLoops = TmpPostIncLoops; + LF.Offset = Offset; + LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L); + + if (!LU.WidestFixupType || + SE.getTypeSizeInBits(LU.WidestFixupType) < + SE.getTypeSizeInBits(LF.OperandValToReplace->getType())) + LU.WidestFixupType = LF.OperandValToReplace->getType(); + + return LU; +} + +bool LSRInstance::CollectInputFormulae(SmallVectorImpl &InputUses) { + DenseSet> Visited; + UseMapTy InputUseMap; + + for (const IVStrideUse &U : IU) + if (!FormInputLSRUsesAndFormulae(U.getUser(), U.getOperandValToReplace(), + Visited, InputUses, InputUseMap)) + return false; + return true; +} + void LSRInstance::CollectFixupsAndInitialFormulae() { for (const IVStrideUse &U : IU) { Instruction *UserInst = U.getUser(); @@ -3262,24 +3447,10 @@ Factors.insert(-1); } - // Get or create an LSRUse. - std::pair P = getUse(S, Kind, AccessTy); - size_t LUIdx = P.first; - int64_t Offset = P.second; - LSRUse &LU = Uses[LUIdx]; - - // Record the fixup. - LSRFixup &LF = LU.getNewFixup(); - LF.UserInst = UserInst; - LF.OperandValToReplace = U.getOperandValToReplace(); - LF.PostIncLoops = TmpPostIncLoops; - LF.Offset = Offset; - LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L); - - if (!LU.WidestFixupType || - SE.getTypeSizeInBits(LU.WidestFixupType) < - SE.getTypeSizeInBits(LF.OperandValToReplace->getType())) - LU.WidestFixupType = LF.OperandValToReplace->getType(); + size_t LUIdx; + LSRUse &LU = + GetOrCreateLSRUse(UserInst, U.getOperandValToReplace(), S, Kind, + AccessTy, TmpPostIncLoops, UseMap, Uses, LUIdx); // If this is the first use of this LSRUse, give it a formula. if (LU.Formulae.empty()) { @@ -3327,7 +3498,8 @@ /// If the given formula has not yet been inserted, add it to the list, and /// return true. Return false otherwise. -bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) { +bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F, + bool CountReg) { // Do not insert formula that we will not be able to expand. assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) && "Formula is illegal"); @@ -3335,7 +3507,95 @@ if (!LU.InsertFormula(F, *L)) return false; - CountRegisters(F, LUIdx); + if (CountReg) + CountRegisters(F, LUIdx); + return true; +} + +bool LSRInstance::FormInputGEPFormula(Value *Op, LSRUse &LU, Formula &F) { + assert(IU.getIVsUsedBy(Op) && "Expect at least one IV operand in GEP"); + GEPOperator *GEP = cast(Op); + SmallVector NonIVOpExprs; + SmallVector, 4> IVOpExprs; + const SCEV *BaseExpr = SE.getSCEV(GEP->getPointerOperand()); + Type *IntPtrTy = SE.getEffectiveSCEVType(BaseExpr->getType()); + SCEV::NoWrapFlags Wrap = + GEP->isInBounds() ? SCEV::FlagNSW : SCEV::FlagAnyWrap; + Type *CurTy = ArrayType::get(GEP->getSourceElementType(), 0); + + if (!IU.getIVsUsedBy(Op)->count(GEP->getPointerOperand())) + NonIVOpExprs.push_back(BaseExpr); + else + IVOpExprs.push_back(std::make_pair(SE.getOne(IntPtrTy), BaseExpr)); + + // Note that this loop is inspired by SE.getGEPExpr(). + for (auto Index = GEP->idx_begin(); Index != GEP->idx_end(); ++Index) { + Value *GEPIndex = *Index; + const SCEV *IndexExpr = SE.getSCEV(GEPIndex); + + // Compute the (potentially symbolic) offset in bytes for this index. + if (StructType *STy = dyn_cast(CurTy)) { + // For a struct, add the member offset. + ConstantInt *Index = cast(IndexExpr)->getValue(); + unsigned FieldNo = Index->getZExtValue(); + const SCEV *FieldOffset = SE.getOffsetOfExpr(IntPtrTy, STy, FieldNo); + + // The field offset will be added as a part of NonIVOps. + NonIVOpExprs.push_back(FieldOffset); + + // Update CurTy to the type of the field at Index. + CurTy = STy->getTypeAtIndex(Index); + } else { + // Update CurTy to its element type. + CurTy = cast(CurTy)->getElementType(); + // For an array, find the element offset, explicitly scaled. + const SCEV *ElementSize = SE.getSizeOfExpr(IntPtrTy, CurTy); + // Getelementptr indices are signed. + IndexExpr = SE.getTruncateOrSignExtend(IndexExpr, IntPtrTy); + + // Multiply the index by the element size to compute the element offset. + const SCEV *LocalOffset = SE.getMulExpr(IndexExpr, ElementSize, Wrap); + + if (!IU.getIVsUsedBy(Op)->count(GEPIndex)) + NonIVOpExprs.push_back(LocalOffset); + else + IVOpExprs.push_back(std::make_pair(ElementSize, IndexExpr)); + } + } + + const SCEV *NonIVOps; + if (NonIVOpExprs.size() < 1) + NonIVOps = SE.getZero(IntPtrTy); + else + NonIVOps = SE.getAddExpr(NonIVOpExprs); + + Formula TempF; + TempF.initialMatch(NonIVOps, L, SE); + + // Flatten the representation, i.e., reg1 + 1*reg2 => reg1 + reg2 because we + // want to place IV in ScaleReg. + TempF.unscale(); + + for (auto IVOpExpr : IVOpExprs) { + const SCEV *ElementSize = IVOpExpr.first; + const SCEV *IVOp = IVOpExpr.second; + int64_t Scale = + (cast(ElementSize))->getAPInt().getSExtValue(); + + if (!TempF.BaseRegs.empty() && !TempF.ScaledReg) { + TempF.Scale = Scale; + TempF.ScaledReg = IVOp; + } else { + const SCEV *Offset = SE.getMulExpr(IVOp, ElementSize, Wrap); + TempF.BaseRegs.push_back(Offset); + TempF.HasBaseReg = true; + } + } + + if (!TempF.isCanonical(*L)) + return false; + + F = TempF; return true; } @@ -4841,9 +5101,9 @@ /// Choose one formula from each use. Return the results in the given Solution /// vector. -void LSRInstance::Solve(SmallVectorImpl &Solution) const { +void LSRInstance::Solve(SmallVectorImpl &Solution, + Cost &SolutionCost) const { SmallVector Workspace; - Cost SolutionCost; SolutionCost.Lose(); Cost CurCost; SmallPtrSet CurRegs; @@ -4874,6 +5134,43 @@ assert(Solution.size() == Uses.size() && "Malformed solution!"); } +bool LSRInstance::IsInputCostStillCheap(Cost &SolutionCost, Cost &InputCost) { + if (!InputCost.isLess(SolutionCost, TTI)) + return false; + + // By giving some weight on the input cost, we conservatively skip using + // LSR's selected solution. + Cost WeightedInputCost = InputCost; + WeightedInputCost.ApplyWeight(); + return WeightedInputCost.isLess(SolutionCost, TTI); +} + +bool LSRInstance::IsInputCostCheaperThanSolutionCost(Cost &SolutionCost) { + SmallVector InputUses; + if (!CollectInputFormulae(InputUses) || InputUses.size() < 1) + return false; + + DenseSet VisitedRegs; + SmallPtrSet Regs; + Cost InputCost; + for (size_t i = 0, e = InputUses.size(); i != e; ++i) { + const LSRUse &LU = InputUses[i]; + assert(LU.Formulae.size() == 1 && "Expect only one formula"); + InputCost.RateFormula(TTI, LU.Formulae[0], Regs, VisitedRegs, L, SE, DT, + LU); + + if (!IsInputCostStillCheap(SolutionCost, InputCost)) + return false; + } + + LLVM_DEBUG(dbgs() << "\n" + "The input requires "; + InputCost.print(dbgs()); dbgs() << ":\n" + "The chosen solution requires "; + SolutionCost.print(dbgs()); dbgs() << ":\n";); + return true; +} + /// Helper for AdjustInsertPositionForExpand. Climb up the dominator tree far as /// we can go while still being dominated by the input positions. This helps /// canonicalize the insert position, which encourages sharing. @@ -5414,7 +5711,8 @@ NarrowSearchSpaceUsingHeuristics(); SmallVector Solution; - Solve(Solution); + Cost SolutionCost; + Solve(Solution, SolutionCost); // Release memory that is no longer needed. Factors.clear(); @@ -5424,6 +5722,13 @@ if (Solution.empty()) return; + if (EnableInitalCostCheck && + IsInputCostCheaperThanSolutionCost(SolutionCost)) { + ++NumCheaperInput; + LLVM_DEBUG(dbgs() << "Skip using LSR's solution.\n"); + return; + } + #ifndef NDEBUG // Formulae should be legal. for (const LSRUse &LU : Uses) { Index: test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll =================================================================== --- test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll +++ test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll @@ -1,4 +1,4 @@ -; RUN: opt -loop-reduce -S < %s | FileCheck %s +; RUN: opt -loop-reduce -S -enable-lsr-input-cost-check=false < %s | FileCheck %s ; ; LTO of clang, which mistakenly uses no TargetLoweringInfo, causes a ; miscompile. ReuseOrCreateCast replace ptrtoint operand with undef. Index: test/Transforms/LoopStrengthReduce/AArch64/skip-lsr-solution.ll =================================================================== --- /dev/null +++ test/Transforms/LoopStrengthReduce/AArch64/skip-lsr-solution.ll @@ -0,0 +1,91 @@ +; REQUIRES: asserts +; RUN: llc < %s -mtriple=aarch64 -lsr-insns-cost=true -debug-only=loop-reduce 2>&1 | FileCheck %s + +; ModuleID = 'loophmmer.c' +source_filename = "loophmmer.c" +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-dcg-linux-gnu" + +;void test(int M, int *P, int *P2, int *P3, int *P4, +; int *P5, int *P6, int *P7, int *P8) { +; +; int t = 0; +; for (int k = 1; k <= M; ++k) { +; P[k-1] = P[k-1] + P2[k-1]; +; P[k-1] = P3[k-1] + P4[k-1]; +; P[k-1] = P6[k-1] + P7[k-1]; +; +; if (k < M) { +; P6[k] = P8[k] + P[k]; +; P[k] = P4[k] + P5[k]; +; } +; } +;} + + +; CHECK: Skip using LSR's solution +; CHECK-LABEL: test: +; CHECK: lsl [[K_IDX:x[0-9]+]], [[K:x[0-9]+]], #2 +; CHECK: sub [[K_1_IDX:x[0-9]+]], [[K_IDX]], #4 +; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}, [[K_1_IDX]] +; CHECK: add [[K]], [[K]], #1 + +define void @test(i32 %M, i32* %P, i32* readonly %P2, i32* readonly %P3, i32* readonly %P4, i32* readonly %P5, i32* %P6, i32* readonly %P7, i32* readonly %P8){ +entry: + %cmp69 = icmp slt i32 %M, 1 + br i1 %cmp69, label %for.cond.cleanup, label %for.body.preheader + +for.body.preheader: ; preds = %entry + %0 = sext i32 %M to i64 + %1 = add i32 %M, 1 + %wide.trip.count = zext i32 %1 to i64 + br label %for.body + +for.cond.cleanup: ; preds = %for.inc, %entry + ret void + +for.body: ; preds = %for.inc, %for.body.preheader + %indvars.iv = phi i64 [ 1, %for.body.preheader ], [ %indvars.iv.next, %for.inc ] + %2 = add nsw i64 %indvars.iv, -1 + %arrayidx = getelementptr inbounds i32, i32* %P, i64 %2 + %3 = load i32, i32* %arrayidx, align 4 + %arrayidx3 = getelementptr inbounds i32, i32* %P2, i64 %2 + %4 = load i32, i32* %arrayidx3, align 4 + %add = add nsw i32 %4, %3 + store i32 %add, i32* %arrayidx, align 4 + %arrayidx9 = getelementptr inbounds i32, i32* %P3, i64 %2 + %5 = load i32, i32* %arrayidx9, align 4 + %arrayidx12 = getelementptr inbounds i32, i32* %P4, i64 %2 + %6 = load i32, i32* %arrayidx12, align 4 + %add13 = add nsw i32 %6, %5 + store i32 %add13, i32* %arrayidx, align 4 + %arrayidx19 = getelementptr inbounds i32, i32* %P6, i64 %2 + %7 = load i32, i32* %arrayidx19, align 4 + %arrayidx22 = getelementptr inbounds i32, i32* %P7, i64 %2 + %8 = load i32, i32* %arrayidx22, align 4 + %add23 = add nsw i32 %8, %7 + store i32 %add23, i32* %arrayidx, align 4 + %cmp27 = icmp slt i64 %indvars.iv, %0 + br i1 %cmp27, label %if.then, label %for.inc + +if.then: ; preds = %for.body + %arrayidx29 = getelementptr inbounds i32, i32* %P8, i64 %indvars.iv + %9 = load i32, i32* %arrayidx29, align 4 + %arrayidx31 = getelementptr inbounds i32, i32* %P, i64 %indvars.iv + %10 = load i32, i32* %arrayidx31, align 4 + %add32 = add nsw i32 %10, %9 + %arrayidx34 = getelementptr inbounds i32, i32* %P6, i64 %indvars.iv + store i32 %add32, i32* %arrayidx34, align 4 + %arrayidx36 = getelementptr inbounds i32, i32* %P4, i64 %indvars.iv + %11 = load i32, i32* %arrayidx36, align 4 + %arrayidx38 = getelementptr inbounds i32, i32* %P5, i64 %indvars.iv + %12 = load i32, i32* %arrayidx38, align 4 + %add39 = add nsw i32 %12, %11 + store i32 %add39, i32* %arrayidx31, align 4 + br label %for.inc + +for.inc: ; preds = %for.body, %if.then + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.cond.cleanup, label %for.body +}