Index: lib/Transforms/Scalar/DeadStoreElimination.cpp =================================================================== --- lib/Transforms/Scalar/DeadStoreElimination.cpp +++ lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -44,6 +44,7 @@ STATISTIC(NumRedundantStores, "Number of redundant stores deleted"); STATISTIC(NumFastStores, "Number of stores deleted"); STATISTIC(NumFastOther , "Number of other instrs removed"); +STATISTIC(NumNonLocalStores, "Number of non-local stores deleted"); namespace { struct DSE : public FunctionPass { @@ -477,6 +478,26 @@ return true; } +/// Find all blocks including single-block loops that unconditionally +/// lead to the block BB and append them to Blocks. We exclude blocks +/// that form cycles with BB. +static void +FindUncondPredsIncludingSimpleLoops(SmallVectorImpl &Blocks, + BasicBlock *BB, DominatorTree *DT) { + for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) { + BasicBlock *Pred = *I; + if (Pred == BB) + continue; + TerminatorInst *PredTI = Pred->getTerminator(); + if ((PredTI->getNumSuccessors() == 1) || + ((PredTI->getNumSuccessors() == 2) && + ((PredTI->getSuccessor(0) == Pred) || + (PredTI->getSuccessor(1) == Pred)))) { + if (DT->isReachableFromEntry(Pred) && !DT->dominates(BB, Pred)) + Blocks.push_back(Pred); + } + } +} //===----------------------------------------------------------------------===// // DSE Pass @@ -554,98 +575,188 @@ MemDepResult InstDep = MD->getDependency(Inst); - // Ignore any store where we can't find a local dependence. - // FIXME: cross-block DSE would be fun. :) - if (!InstDep.isDef() && !InstDep.isClobber()) - continue; - - // Figure out what location is being stored to. - MemoryLocation Loc = getLocForWrite(Inst, *AA); - - // If we didn't get a useful location, fail. - if (!Loc.Ptr) - continue; + if (InstDep.isDef() || InstDep.isClobber()) { + // Figure out what location is being stored to. + MemoryLocation Loc = getLocForWrite(Inst, *AA); - while (InstDep.isDef() || InstDep.isClobber()) { - // Get the memory clobbered by the instruction we depend on. MemDep will - // skip any instructions that 'Loc' clearly doesn't interact with. If we - // end up depending on a may- or must-aliased load, then we can't optimize - // away the store and we bail out. However, if we depend on on something - // that overwrites the memory location we *can* potentially optimize it. - // - // Find out what memory location the dependent instruction stores. - Instruction *DepWrite = InstDep.getInst(); - MemoryLocation DepLoc = getLocForWrite(DepWrite, *AA); - // If we didn't get a useful location, or if it isn't a size, bail out. - if (!DepLoc.Ptr) - break; + // If we didn't get a useful location, fail. + if (!Loc.Ptr) + continue; - // If we find a write that is a) removable (i.e., non-volatile), b) is - // completely obliterated by the store to 'Loc', and c) which we know that - // 'Inst' doesn't load from, then we can remove it. - if (isRemovable(DepWrite) && - !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) { - int64_t InstWriteOffset, DepWriteOffset; - OverwriteResult OR = - isOverwrite(Loc, DepLoc, DL, *TLI, DepWriteOffset, InstWriteOffset); - if (OR == OverwriteComplete) { - DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " - << *DepWrite << "\n KILLER: " << *Inst << '\n'); - - // Delete the store and now-dead instructions that feed it. - DeleteDeadInstruction(DepWrite, *MD, *TLI); - ++NumFastStores; - MadeChange = true; - - // DeleteDeadInstruction can delete the current instruction in loop - // cases, reset BBI. - BBI = Inst; - if (BBI != BB.begin()) - --BBI; + while (InstDep.isDef() || InstDep.isClobber()) { + // Get the memory clobbered by the instruction we depend on. MemDep will + // skip any instructions that 'Loc' clearly doesn't interact with. If we + // end up depending on a may- or must-aliased load, then we can't optimize + // away the store and we bail out. However, if we depend on on something + // that overwrites the memory location we *can* potentially optimize it. + // + // Find out what memory location the dependent instruction stores. + Instruction *DepWrite = InstDep.getInst(); + MemoryLocation DepLoc = getLocForWrite(DepWrite, *AA); + // If we didn't get a useful location, or if it isn't a size, bail out. + if (!DepLoc.Ptr) break; - } else if (OR == OverwriteEnd && isShortenable(DepWrite)) { - // TODO: base this on the target vector size so that if the earlier - // store was too small to get vector writes anyway then its likely - // a good idea to shorten it - // Power of 2 vector writes are probably always a bad idea to optimize - // as any store/memset/memcpy is likely using vector instructions so - // shortening it to not vector size is likely to be slower - MemIntrinsic* DepIntrinsic = cast(DepWrite); - unsigned DepWriteAlign = DepIntrinsic->getAlignment(); - if (llvm::isPowerOf2_64(InstWriteOffset) || - ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) { - - DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW END: " - << *DepWrite << "\n KILLER (offset " - << InstWriteOffset << ", " - << DepLoc.Size << ")" - << *Inst << '\n'); - - Value* DepWriteLength = DepIntrinsic->getLength(); - Value* TrimmedLength = ConstantInt::get(DepWriteLength->getType(), - InstWriteOffset - - DepWriteOffset); - DepIntrinsic->setLength(TrimmedLength); + + // If we find a write that is a) removable (i.e., non-volatile), b) is + // completely obliterated by the store to 'Loc', and c) which we know that + // 'Inst' doesn't load from, then we can remove it. + if (isRemovable(DepWrite) && + !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) { + int64_t InstWriteOffset, DepWriteOffset; + OverwriteResult OR = + isOverwrite(Loc, DepLoc, DL, *TLI, DepWriteOffset, InstWriteOffset); + if (OR == OverwriteComplete) { + DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " + << *DepWrite << "\n KILLER: " << *Inst << '\n'); + + // Delete the store and now-dead instructions that feed it. + DeleteDeadInstruction(DepWrite, *MD, *TLI); + ++NumFastStores; MadeChange = true; + + // DeleteDeadInstruction can delete the current instruction in loop + // cases, reset BBI. + BBI = Inst; + if (BBI != BB.begin()) + --BBI; + break; + } else if (OR == OverwriteEnd && isShortenable(DepWrite)) { + // TODO: base this on the target vector size so that if the earlier + // store was too small to get vector writes anyway then its likely + // a good idea to shorten it + // Power of 2 vector writes are probably always a bad idea to optimize + // as any store/memset/memcpy is likely using vector instructions so + // shortening it to not vector size is likely to be slower + MemIntrinsic* DepIntrinsic = cast(DepWrite); + unsigned DepWriteAlign = DepIntrinsic->getAlignment(); + if (llvm::isPowerOf2_64(InstWriteOffset) || + ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) { + + DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW END: " + << *DepWrite << "\n KILLER (offset " + << InstWriteOffset << ", " + << DepLoc.Size << ")" + << *Inst << '\n'); + + Value* DepWriteLength = DepIntrinsic->getLength(); + Value* TrimmedLength = ConstantInt::get(DepWriteLength->getType(), + InstWriteOffset - + DepWriteOffset); + DepIntrinsic->setLength(TrimmedLength); + MadeChange = true; + } } } + + // If this is a may-aliased store that is clobbering the store value, we + // can keep searching past it for another must-aliased pointer that stores + // to the same location. For example, in: + // store -> P + // store -> Q + // store -> P + // we can remove the first store to P even though we don't know if P and Q + // alias. + if (DepWrite == &BB.front()) break; + + // Can't look past this instruction if it might read 'Loc'. + if (AA->getModRefInfo(DepWrite, Loc) & MRI_Ref) + break; + + InstDep = MD->getPointerDependencyFrom(Loc, false, DepWrite, &BB); } + } - // If this is a may-aliased store that is clobbering the store value, we - // can keep searching past it for another must-aliased pointer that stores - // to the same location. For example, in: - // store -> P - // store -> Q - // store -> P - // we can remove the first store to P even though we don't know if P and Q - // alias. - if (DepWrite == &BB.front()) break; - - // Can't look past this instruction if it might read 'Loc'. - if (AA->getModRefInfo(DepWrite, Loc) & MRI_Ref) - break; + // DSE across BB + else if (InstDep.isNonLocal()) { + if (StoreInst *SI = dyn_cast(Inst)) { + // Get the location being stored to. + // If we didn't get a useful location, bail out. + MemoryLocation Loc = getLocForWrite(SI, *AA); + if (!Loc.Ptr) + continue; + Value *AObj = GetUnderlyingObject(SI->getPointerOperand(), DL); + + SmallVector Blocks; + FindUncondPredsIncludingSimpleLoops(Blocks, &BB, DT); + + while (!Blocks.empty()) { + BasicBlock *PB = Blocks.pop_back_val(); + TerminatorInst *InstPt = PB->getTerminator(); + bool StopProcessPBAndItsPreds = false; + + MemDepResult Dep = + MD->getPointerDependencyFrom(Loc, false, InstPt, PB, SI); + while (Dep.isDef() || Dep.isClobber()) { + Instruction *Dependency = Dep.getInst(); + + // Filter out false dependency from load to store in loops + // looking through phis. + if (LoadInst *LI = dyn_cast(Dependency)) { + SmallVector Pointers; + GetUnderlyingObjects(LI->getPointerOperand(), Pointers, DL); + bool NoAlias = true; + for (SmallVectorImpl::iterator I = Pointers.begin(), + E = Pointers.end(); + I != E; ++I) { + Value *BObj = *I; + if (!AA->isNoAlias(AObj, DL.getTypeStoreSize(AObj->getType()), + BObj, + DL.getTypeStoreSize(BObj->getType()))) { + NoAlias = false; + break; + } + } + if (NoAlias) { + if (Dependency != &PB->front()) { + Dep = MD->getPointerDependencyFrom(Loc, false, Dependency, PB, + SI); + continue; + } + break; // we reached top of PB and found no dependencies + } + } + + // If we didn't get a useful location, bail out. + MemoryLocation DepLoc = getLocForWrite(Dependency, *AA); + if (!DepLoc.Ptr) { + StopProcessPBAndItsPreds = true; + break; + } + // If instruction doesn't write memory or not removable, bail out. + if (!hasMemoryWrite(Dependency, *TLI) || !isRemovable(Dependency)) { + StopProcessPBAndItsPreds = true; + break; + } + // Don't remove a store within a loop while SI is outside the loop; + // we need more analysis: e.g. looking for an interferring load + // above the store within the loop, etc. + if (InstPt->getNumSuccessors() == 2) { + StopProcessPBAndItsPreds = true; + break; + } + + int64_t InstWriteOffset, DepWriteOffset; + OverwriteResult OR = isOverwrite(Loc, DepLoc, DL, *TLI, + DepWriteOffset, InstWriteOffset); + if (OR == OverwriteComplete) { + DEBUG(dbgs() << "DSE: Remove Non-Local Dead Store:\n DEAD: " + << *Dependency << "\n KILLER: " << *SI << '\n'); + + // Delete the store and now-dead instructions that feed it. + DeleteDeadInstruction(Dependency, *MD, *TLI); + ++NumNonLocalStores; + MadeChange = true; + // no need to search for redundant stores up in the block or + // up into predecessors; those should have been handled earlier + } + StopProcessPBAndItsPreds = true; + break; + } - InstDep = MD->getPointerDependencyFrom(Loc, false, DepWrite, &BB); + if (!StopProcessPBAndItsPreds) + FindUncondPredsIncludingSimpleLoops(Blocks, PB, DT); + } + } } } Index: test/Transforms/DeadStoreElimination/cycle.ll =================================================================== --- /dev/null +++ test/Transforms/DeadStoreElimination/cycle.ll @@ -0,0 +1,112 @@ +; RUN: opt < %s -basicaa -dse -S | FileCheck %s +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" +target triple = "armv7-none-linux-gnueabi" + +%"struct.pov::Sym_Table_Struct" = type { i8*, [257 x %"struct.pov::Sym_Table_Entry"*] } +%"struct.pov::Sym_Table_Entry" = type { %"struct.pov::Sym_Table_Entry"*, i8*, i8*, i32 } +%"struct.pov::InputFileData" = type { %"class.pov_base::ITextStream"*, i8 } +%"class.pov_base::ITextStream" = type { i32 (...)**, %"class.pov_base::IStream"*, [512 x i8], i32, i32, i32, i32, i8*, i32, i32 } +%"class.pov_base::IStream" = type { %"class.pov_base::IOBase" } +%"class.pov_base::IOBase" = type { i32 (...)**, i8, %struct._IO_FILE*, i32, i32, i8* } +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } +%"struct.pov::Token_Struct" = type { i32, i32, %"struct.pov_base::ITextStream::FilePos", i32, i32, i8*, double, i32, i32, %"class.pov_base::ITextStream"*, i8*, i32*, i8**, i8 } +%"struct.pov_base::ITextStream::FilePos" = type { i32, i32 } +%"struct.pov::Cond_Stack_Entry" = type { i32, double, %"class.pov_base::ITextStream"*, %"class.pov_base::ITextStream"*, i8*, i32, %"struct.pov::Pov_Macro_Struct"*, %"struct.pov_base::ITextStream::FilePos" } +%"struct.pov::Pov_Macro_Struct" = type { i8*, i8*, %"struct.pov_base::ITextStream::FilePos", i32, i32, [56 x i8*] } +%"struct.pov::Reserved_Word_Struct" = type { i32, i8* } +%"struct.pov::OPTIONS_STRUCT" = type { i32, i8, i8, i8, i32, i32, [1024 x i8], [1024 x i8], [1024 x i8], [1024 x i8], [1024 x i8], float, float, i32, i32, double, double, i32, [25 x i8*], i32, i32, i32, double, double, i32, i32, double, double, i32, i8, i32, i32, i32, i32, %"struct.pov::FRAMESEQ", double, i32, double, double, double, double, double, i32, i32, i32, i32, i32, i32, i32, i32, i32, double, double, double, i32, i32, i8*, i8*, i32, i8, double, double, i32, i32, i8, i32, i32, [1024 x i8], [6 x %"struct.pov::shelldata"], [1024 x i8], [1024 x i8], i32, i32, i32, i32, i32, i32, %struct.POVMSData } +%"struct.pov::FRAMESEQ" = type { i32, double, i32, i32, double, i32, i32, double, i32, double, i32, double, i8, i8 } +%"struct.pov::shelldata" = type { i32, i32, [250 x i8] } +%struct.POVMSData = type { i32, i32, %union.anon } +%union.anon = type { i8* } +%"class.pov_base::OTextStream" = type { i32 (...)**, %"class.pov_base::OStream"*, i8* } +%"class.pov_base::OStream" = type { %"class.pov_base::IOBase" } +%"struct.pov::Data_File_Struct" = type { %"class.pov_base::ITextStream"*, %"class.pov_base::OTextStream"*, i8 } +%"struct.pov::Pov_Array_Struct" = type { i32, i32, i32, [5 x i32], [5 x i32], i8** } + +@_ZN3pov6TablesE = global [100 x %"struct.pov::Sym_Table_Struct"*] zeroinitializer, align 4 +@_ZN3pov11Table_IndexE = global i32 0, align 4 +@_ZN3pov18String_Fast_BufferE = global [256 x i8] zeroinitializer, align 1 +@_ZN3pov12String_IndexE = global i32 0, align 4 +@_ZN3pov18String_Buffer_FreeE = global i32 0, align 4 +@_ZN3pov6StringE = global i8* null, align 4 +@_ZN3pov7String2E = global i8* null, align 4 +@_ZN3pov19Current_Token_CountE = global i64 0, align 8 +@_ZN3pov11token_countE = global i32 0, align 4 +@_ZN3pov10line_countE = global i32 10, align 4 +@_ZN3pov18Include_File_IndexE = global i32 0, align 4 +@_ZN3pov10Input_FileE = global %"struct.pov::InputFileData"* null, align 4 +@_ZN3pov13Include_FilesE = global [32 x %"struct.pov::InputFileData"] zeroinitializer, align 4 +@_ZN3pov5TokenE = global %"struct.pov::Token_Struct" zeroinitializer, align 8 +@_ZN3pov9Echo_IndxE = global i32 0, align 4 +@_ZN3pov10Cond_StackE = global %"struct.pov::Cond_Stack_Entry"* null, align 4 +@_ZN3pov8CS_IndexE = global i32 0, align 4 +@_ZN3pov8SkippingE = global i32 0, align 4 +@_ZN3pov12Inside_IfdefE = global i32 0, align 4 +@_ZN3pov15Inside_MacroDefE = global i32 0, align 4 +@_ZN3pov7Got_EOFE = global i32 0, align 4 +@_ZN3pov21Conversion_Util_TableE = global [535 x i32] zeroinitializer, align 4 + +; +; CHECK: store i32 64 +; +define void @_ZN3pov18pre_init_tokenizerEv() { +entry: + store i32 0, i32* getelementptr inbounds (%"struct.pov::Token_Struct", %"struct.pov::Token_Struct"* @_ZN3pov5TokenE, i32 0, i32 2, i32 1), align 4 + store i32 0, i32* getelementptr inbounds (%"struct.pov::Token_Struct", %"struct.pov::Token_Struct"* @_ZN3pov5TokenE, i32 0, i32 2, i32 0), align 8 + store i32 0, i32* getelementptr inbounds (%"struct.pov::Token_Struct", %"struct.pov::Token_Struct"* @_ZN3pov5TokenE, i32 0, i32 3), align 8 + store i8* null, i8** getelementptr inbounds (%"struct.pov::Token_Struct", %"struct.pov::Token_Struct"* @_ZN3pov5TokenE, i32 0, i32 5), align 8 + store i32 0, i32* getelementptr inbounds (%"struct.pov::Token_Struct", %"struct.pov::Token_Struct"* @_ZN3pov5TokenE, i32 0, i32 7), align 8 + store i32 0, i32* getelementptr inbounds (%"struct.pov::Token_Struct", %"struct.pov::Token_Struct"* @_ZN3pov5TokenE, i32 0, i32 8), align 4 + store i8* null, i8** getelementptr inbounds (%"struct.pov::Token_Struct", %"struct.pov::Token_Struct"* @_ZN3pov5TokenE, i32 0, i32 10), align 4 + store i32 10, i32* @_ZN3pov10line_countE, align 4 + store i32 0, i32* @_ZN3pov11token_countE, align 4 + store i64 0, i64* @_ZN3pov19Current_Token_CountE, align 8 + store i32 0, i32* @_ZN3pov18Include_File_IndexE, align 4 + store i32 0, i32* @_ZN3pov9Echo_IndxE, align 4 + store i32 0, i32* @_ZN3pov8CS_IndexE, align 4 + store i32 0, i32* @_ZN3pov8SkippingE, align 4 + store i32 0, i32* @_ZN3pov12Inside_IfdefE, align 4 + store i32 0, i32* @_ZN3pov15Inside_MacroDefE, align 4 + store %"struct.pov::Cond_Stack_Entry"* null, %"struct.pov::Cond_Stack_Entry"** @_ZN3pov10Cond_StackE, align 4 + store %"struct.pov::InputFileData"* getelementptr inbounds ([32 x %"struct.pov::InputFileData"], [32 x %"struct.pov::InputFileData"]* @_ZN3pov13Include_FilesE, i32 0, i32 0), %"struct.pov::InputFileData"** @_ZN3pov10Input_FileE, align 4 + store i32 -1, i32* @_ZN3pov11Table_IndexE, align 4 + br label %for.body + +for.body: + %i.019 = phi i32 [ 0, %entry ], [ %inc, %for.inc ], [ %inc20, %for.inc.thread ] + %arrayidx = getelementptr inbounds [535 x i32], [535 x i32]* @_ZN3pov21Conversion_Util_TableE, i32 0, i32 %i.019 + store i32 %i.019, i32* %arrayidx, align 4 + %cmp1 = icmp slt i32 %i.019, 64 + br i1 %cmp1, label %for.inc.thread, label %if.else + +for.inc.thread: + store i32 64, i32* %arrayidx, align 4 + %inc20 = add nsw i32 %i.019, 1 + br label %for.body + +if.else: + %cmp3 = icmp slt i32 %i.019, 77 + br i1 %cmp3, label %if.then4, label %if.else6 + +if.then4: + store i32 77, i32* %arrayidx, align 4 + br label %for.inc + +if.else6: + %cmp7 = icmp slt i32 %i.019, 89 + br i1 %cmp7, label %if.then8, label %for.inc + +if.then8: + store i32 89, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: + %inc = add nsw i32 %i.019, 1 + %exitcond = icmp eq i32 %i.019, 534 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} Index: test/Transforms/DeadStoreElimination/ifthen.ll =================================================================== --- /dev/null +++ test/Transforms/DeadStoreElimination/ifthen.ll @@ -0,0 +1,22 @@ +; RUN: opt < %s -basicaa -dse -S | FileCheck %s +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" +target triple = "armv4t-none-linux-gnueabi" + +; CHECK-NOT: stval +define void @foo(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %c) { +entry: + %cmp = icmp sgt i32 %c, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + %0 = load i32, i32* %b, align 4 + %stval = add nsw i32 %0, 1 + store i32 %stval, i32* %a, align 4 + br label %if.end + +if.end: + %m.0 = phi i32 [ 13, %if.then ], [ 10, %entry ] + store i32 %m.0, i32* %a, align 4 + ret void +} + Index: test/Transforms/DeadStoreElimination/loop.ll =================================================================== --- /dev/null +++ test/Transforms/DeadStoreElimination/loop.ll @@ -0,0 +1,44 @@ +; RUN: opt < %s -basicaa -dse -S | FileCheck %s +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" +target triple = "armv4t-none-linux-gnueabi" + +; +; CHECK-NOT: store i32 0 +; +define void @sum(i32 %N, i32* noalias nocapture %C, i32* noalias nocapture readonly %A, i32* noalias nocapture readonly %B) { +entry: + %cmp24 = icmp eq i32 %N, 0 + br i1 %cmp24, label %for.end11, label %for.body + +for.body: + %i.025 = phi i32 [ %inc10, %for.cond1.for.inc9_crit_edge ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %C, i32 %i.025 + store i32 0, i32* %arrayidx, align 4 + %mul = mul i32 %i.025, %N + %arrayidx4.gep = getelementptr i32, i32* %A, i32 %mul + br label %for.body3 + +for.body3: + %0 = phi i32 [ 0, %for.body ], [ %add8, %for.body3 ] + %arrayidx4.phi = phi i32* [ %arrayidx4.gep, %for.body ], [ %arrayidx4.inc, %for.body3 ] + %arrayidx5.phi = phi i32* [ %B, %for.body ], [ %arrayidx5.inc, %for.body3 ] + %j.023 = phi i32 [ 0, %for.body ], [ %inc, %for.body3 ] + %1 = load i32, i32* %arrayidx4.phi, align 4 + %2 = load i32, i32* %arrayidx5.phi, align 4 + %add6 = add nsw i32 %2, %1 + %add8 = add nsw i32 %add6, %0 + %inc = add i32 %j.023, 1 + %exitcond = icmp ne i32 %inc, %N + %arrayidx4.inc = getelementptr i32, i32* %arrayidx4.phi, i32 1 + %arrayidx5.inc = getelementptr i32, i32* %arrayidx5.phi, i32 1 + br i1 %exitcond, label %for.body3, label %for.cond1.for.inc9_crit_edge + +for.cond1.for.inc9_crit_edge: + store i32 %add8, i32* %arrayidx, align 4 + %inc10 = add i32 %i.025, 1 + %exitcond26 = icmp ne i32 %inc10, %N + br i1 %exitcond26, label %for.body, label %for.end11 + +for.end11: + ret void +}