Index: include/polly/CodeGen/BlockGenerators.h =================================================================== --- include/polly/CodeGen/BlockGenerators.h +++ include/polly/CodeGen/BlockGenerators.h @@ -361,13 +361,11 @@ Value *getOrCreateAlloca(Value *ScalarBase, ScalarAllocaMapTy &Map, const char *NameExt); - /// @brief Generate reload of scalars demoted to memory and needed by @p Inst. + /// @brief Generate reload of scalars demoted to memory and needed by @p Stmt. /// /// @param Stmt The statement we generate code for. - /// @param Inst The instruction that might need reloaded values. /// @param BBMap A mapping from old values to their new values in this block. - virtual void generateScalarLoads(ScopStmt &Stmt, const Instruction *Inst, - ValueMapT &BBMap); + virtual void generateScalarLoads(ScopStmt &Stmt, ValueMapT &BBMap); /// @brief Generate the scalar stores for the given statement. /// @@ -376,14 +374,13 @@ /// be demoted to memory. /// /// @param Stmt The statement we generate code for. - /// @param BB The basic block we generate code for. /// @param LTS A mapping from loops virtual canonical induction /// variable to their new values /// (for values recalculated in the new ScoP, but not /// within this basic block) /// @param BBMap A mapping from old values to their new values in this block. - virtual void generateScalarStores(ScopStmt &Stmt, BasicBlock *BB, - LoopToScevMapT <S, ValueMapT &BBMap); + virtual void generateScalarStores(ScopStmt &Stmt, LoopToScevMapT <S, + ValueMapT &BBMap); /// @brief Handle users of @p Inst outside the SCoP. /// @@ -758,13 +755,11 @@ void addOperandToPHI(ScopStmt &Stmt, const PHINode *PHI, PHINode *PHICopy, BasicBlock *IncomingBB, LoopToScevMapT <S); - /// @brief Generate reload of scalars demoted to memory and needed by @p Inst. + /// @brief Generate reload of scalars demoted to memory and needed by @p Stmt. /// /// @param Stmt The statement we generate code for. - /// @param Inst The instruction that might need reloaded values. /// @param BBMap A mapping from old values to their new values in this block. - virtual void generateScalarLoads(ScopStmt &Stmt, const Instruction *Inst, - ValueMapT &BBMap) override; + virtual void generateScalarLoads(ScopStmt &Stmt, ValueMapT &BBMap) override; /// @brief Generate the scalar stores for the given statement. /// @@ -773,13 +768,11 @@ /// be demoted to memory. /// /// @param Stmt The statement we generate code for. - /// @param BB The basic block we generate code for. /// @param LTS A mapping from loops virtual canonical induction variable to /// their new values (for values recalculated in the new ScoP, /// but not within this basic block) /// @param BBMap A mapping from old values to their new values in this block. - virtual void generateScalarStores(ScopStmt &Stmt, BasicBlock *BB, - LoopToScevMapT <S, + virtual void generateScalarStores(ScopStmt &Stmt, LoopToScevMapT <S, ValueMapT &BBMAp) override; /// @brief Copy a single PHI instruction. Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -451,9 +451,6 @@ bool isAffine() const { return IsAffine; } - /// @brief Is this MemoryAccess modeling special PHI node accesses? - bool isPHI() const { return Origin == PHI; } - __isl_give isl_basic_map *createBasicAccessMap(ScopStmt *Statement); void assumeNoOutOfBound(); @@ -631,6 +628,9 @@ /// nodes. bool isImplicit() const { return !isExplicit(); } + /// @brief Is this MemoryAccess modeling special PHI node accesses? + bool isPHI() const { return Origin == PHI; } + /// @brief Get the statement that contains this memory access. ScopStmt *getStatement() const { return Statement; } Index: lib/CodeGen/BlockGenerators.cpp =================================================================== --- lib/CodeGen/BlockGenerators.cpp +++ lib/CodeGen/BlockGenerators.cpp @@ -271,10 +271,6 @@ void BlockGenerator::copyInstruction(ScopStmt &Stmt, Instruction *Inst, ValueMapT &BBMap, LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses) { - - // First check for possible scalar dependences for this instruction. - generateScalarLoads(Stmt, Inst, BBMap); - // Terminator instructions control the control flow. They are explicitly // expressed in the clast and do not need to be copied. if (Inst->isTerminator()) @@ -335,24 +331,25 @@ ValueMapT &BBMap, LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses) { BasicBlock *CopyBB = splitBB(BB); + Builder.SetInsertPoint(CopyBB->begin()); + generateScalarLoads(Stmt, BBMap); + copyBB(Stmt, BB, CopyBB, BBMap, LTS, NewAccesses); + + // After a basic block was copied store all scalars that escape this block in + // their alloca. + generateScalarStores(Stmt, LTS, BBMap); return CopyBB; } void BlockGenerator::copyBB(ScopStmt &Stmt, BasicBlock *BB, BasicBlock *CopyBB, ValueMapT &BBMap, LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses) { - Builder.SetInsertPoint(CopyBB->begin()); EntryBB = &CopyBB->getParent()->getEntryBlock(); for (Instruction &Inst : *BB) copyInstruction(Stmt, &Inst, BBMap, LTS, NewAccesses); - // After a basic block was copied store all scalars that escape this block - // in their alloca. First the scalars that have dependences inside the SCoP, - // then the ones that might escape the SCoP. - generateScalarStores(Stmt, BB, LTS, BBMap); - const Region &R = Stmt.getParent()->getRegion(); for (Instruction &Inst : *BB) handleOutsideUsers(R, &Inst, BBMap[&Inst]); @@ -426,16 +423,9 @@ EscapeMap[Inst] = std::make_pair(ScalarAddr, std::move(EscapeUsers)); } -void BlockGenerator::generateScalarLoads(ScopStmt &Stmt, - const Instruction *Inst, - ValueMapT &BBMap) { - auto *MAL = Stmt.lookupAccessesFor(Inst); - - if (!MAL) - return; - - for (MemoryAccess *MA : *MAL) { - if (MA->isExplicit() || !MA->isRead()) +void BlockGenerator::generateScalarLoads(ScopStmt &Stmt, ValueMapT &BBMap) { + for (MemoryAccess *MA : Stmt) { + if (MA->isExplicit() || MA->isWrite()) continue; auto *Address = getOrCreateAlloca(*MA); @@ -491,14 +481,13 @@ return ScalarValue; } -void BlockGenerator::generateScalarStores(ScopStmt &Stmt, BasicBlock *BB, - LoopToScevMapT <S, +void BlockGenerator::generateScalarStores(ScopStmt &Stmt, LoopToScevMapT <S, ValueMapT &BBMap) { const Region &R = Stmt.getParent()->getRegion(); - assert(Stmt.isBlockStmt() && BB == Stmt.getBasicBlock() && - "Region statements need to use the generateScalarStores() " - "function in the RegionGenerator"); + assert(Stmt.isBlockStmt() && "Region statements need to use the " + "generateScalarStores() function in the " + "RegionGenerator"); for (MemoryAccess *MA : Stmt) { if (MA->isExplicit() || MA->isRead()) @@ -1008,6 +997,9 @@ RegionMaps.clear(); IncompletePHINodeMap.clear(); + // Collection of all values related to this subregion. + ValueMapT ValueMap; + // The region represented by the statement. Region *R = Stmt.getRegion(); @@ -1019,6 +1011,8 @@ EntryBBCopy->setName("polly.stmt." + EntryBB->getName() + ".entry"); Builder.SetInsertPoint(EntryBBCopy->begin()); + generateScalarLoads(Stmt, RegionMaps[EntryBBCopy]); + for (auto PI = pred_begin(EntryBB), PE = pred_end(EntryBB); PI != PE; ++PI) if (!R->contains(*PI)) BlockMap[*PI] = EntryBBCopy; @@ -1043,9 +1037,11 @@ // Get the mapping for this block and initialize it with the mapping // available at its immediate dominator (in the new region). ValueMapT &RegionMap = RegionMaps[BBCopy]; - RegionMap = RegionMaps[BBCopyIDom]; + if (BBCopy != EntryBBCopy) + RegionMap = RegionMaps[BBCopyIDom]; // Copy the block with the BlockGenerator. + Builder.SetInsertPoint(BBCopy->begin()); copyBB(Stmt, BB, BBCopy, RegionMap, LTS, IdToAstExp); // In order to remap PHI nodes we store also basic block mappings. @@ -1060,6 +1056,9 @@ for (auto SI = succ_begin(BB), SE = succ_end(BB); SI != SE; SI++) if (R->contains(*SI) && SeenBlocks.insert(*SI).second) Blocks.push_back(*SI); + + // Remember value in case it is visible after this subregion. + ValueMap.insert(RegionMap.begin(), RegionMap.end()); } // Now create a new dedicated region exit block and add it to the region map. @@ -1125,26 +1124,33 @@ LTS[L] = SE.getUnknown(LoopPHI); } - // Reset the old insert point for the build. - Builder.SetInsertPoint(ExitBBCopy->begin()); + // Continue generating code in the exit block. + Builder.SetInsertPoint(ExitBBCopy->getFirstInsertionPt()); + + // Write values visible to other statments. + generateScalarStores(Stmt, LTS, ValueMap); } -void RegionGenerator::generateScalarLoads(ScopStmt &Stmt, - const Instruction *Inst, - ValueMapT &BBMap) { +void RegionGenerator::generateScalarLoads(ScopStmt &Stmt, ValueMapT &BBMap) { + Region &R = Stmt.getParent()->getRegion(); - // Inside a non-affine region PHI nodes are copied not demoted. Once the - // phi is copied it will reload all inputs from outside the region, hence - // we do not need to generate code for the read access of the operands of a - // PHI. - if (isa(Inst)) - return; + for (MemoryAccess *MA : Stmt) { + if (MA->isExplicit() || MA->isWrite()) + continue; - return BlockGenerator::generateScalarLoads(Stmt, Inst, BBMap); + // Only generate loads for PHIs in the entry block. Intra-ScopStmt PHIs are + // copied directly. + Instruction *AccInst = MA->getAccessInstruction(); + if (MA->isPHI() && AccInst->getParent() != R.getEntry()) + continue; + + Value *Address = getOrCreateAlloca(*MA); + BBMap[MA->getBaseAddr()] = + Builder.CreateLoad(Address, Address->getName() + ".reload"); + } } -void RegionGenerator::generateScalarStores(ScopStmt &Stmt, BasicBlock *BB, - LoopToScevMapT <S, +void RegionGenerator::generateScalarStores(ScopStmt &Stmt, LoopToScevMapT <S, ValueMapT &BBMap) { const Region &R = Stmt.getParent()->getRegion(); @@ -1152,23 +1158,65 @@ "Block statements need to use the generateScalarStores() " "function in the BlockGenerator"); - for (MemoryAccess *MA : Stmt) { + // This statement's region exit block, not the scop's exit. + BasicBlock *StmtExitBB = Stmt.getRegion()->getExit(); + for (MemoryAccess *MA : Stmt) { if (MA->isExplicit() || MA->isRead()) continue; Instruction *ScalarInst = MA->getAccessInstruction(); - - // Only generate accesses that belong to this basic block. - if (ScalarInst->getParent() != BB) + Value *Val = MA->getAccessValue(); + Value *BaseAddr = MA->getBaseAddr(); + + // Determine whether this store belongs to the write of a PHI's incoming + // value. These must be written in any case because the PHI receiving the + // value is outside this non-affine region. There are two cases for such PHI + // writes: + // - A regular PHI is in the stmt's region exit block, depending on the edge + // we are exiting it. + // - A value escaping the scop via a PHI in the scop's exit block. Such an + // access directly writes the scalar's demoted memory instead of the PHI's + // (.s2a instead of .phiops). Such escaping writes are connected to + // terminator instruction which in any other case do not write scalars. + // + // In both cases the write takes place on the terminator of a stmt's region + // exiting block. + bool IsPHIWrite = + (MA->isPHI() && cast(BaseAddr)->getParent() == StmtExitBB) || + (!MA->isPHI() && isa(ScalarInst)); + + // Only generate writes for PHIs in exiting blocks. Intra-ScopStmt PHIs are + // copied directly. + if (MA->isPHI() && !IsPHIWrite) continue; - Value *Val = MA->getAccessValue(); + // In case we add the store into an exiting block, we need to restore the + // position for stores in the exit node. + auto SavedInsertionPoint = Builder.GetInsertPoint(); + + // For PHI writes, change the insertion point. For scalar writes, first see + // whether there could be any use of it outside of this non-affine region; + // i.e. if the defintion is not dominating the exit and therefore any use + // that could follow, there can be no such use-def chain in the original + // program because it would be illegal. + if (IsPHIWrite) { + // Determine the position for PHI writes. + BasicBlock *ExitingBB = ScalarInst->getParent(); + BasicBlock *ExitingBBCopy = BlockMap[ExitingBB]; + Builder.SetInsertPoint(ExitingBBCopy->getTerminator()); + } else if (isa(Val) && + !DT.dominates(cast(Val)->getParent(), StmtExitBB)) + continue; auto Address = getOrCreateAlloca(*MA); Val = getNewScalarValue(Val, R, Stmt, LTS, BBMap); Builder.CreateStore(Val, Address); + + // Restore the insertion point if necessary. + if (IsPHIWrite) + Builder.SetInsertPoint(SavedInsertionPoint); } } Index: test/Isl/CodeGen/non-affine-phi-node-expansion-2.ll =================================================================== --- test/Isl/CodeGen/non-affine-phi-node-expansion-2.ll +++ test/Isl/CodeGen/non-affine-phi-node-expansion-2.ll @@ -4,7 +4,7 @@ ; CHECK: polly.stmt.bb3: ; preds = %polly.stmt.bb3.entry -; CHECK: %tmp6_p_scalar_ = load double, double* %arg11, !alias.scope !0, !noalias !2 +; CHECK: %tmp6_p_scalar_ = load double, double* %arg12, !alias.scope !0, !noalias !2 ; CHECK: %p_tmp7 = fadd double 1.000000e+00, %tmp6_p_scalar_ ; CHECK: %p_tmp8 = fcmp olt double 1.400000e+01, %p_tmp7 ; CHECK: br i1 %p_tmp8, label %polly.stmt.bb9, label %polly.stmt.bb10 Index: test/Isl/CodeGen/non-affine-phi-node-expansion-3.ll =================================================================== --- test/Isl/CodeGen/non-affine-phi-node-expansion-3.ll +++ test/Isl/CodeGen/non-affine-phi-node-expansion-3.ll @@ -17,13 +17,7 @@ ; CHECK-NEXT: %p_val0 = fadd float 1.000000e+00, 2.000000e+00 ; CHECK-NEXT: %p_val1 = fadd float 1.000000e+00, 2.000000e+00 ; CHECK-NEXT: %p_val2 = fadd float 1.000000e+00, 2.000000e+00 -; CHECK-NEXT: store float %p_val0, float* %merge.phiops -; CHECK-NEXT: store float %p_val1, float* %val1.s2a -; CHECK-NEXT: store float %p_val2, float* %val2.s2a - -; FIXME -> The last two writes are not really needed and can be dropped if the -; incoming block of the PHI and the value that is used share the same -; non-affine region. +; CHECK: store float %p_val0, float* %merge.phiops branch1: br i1 %cond1, label %branch2, label %backedge Index: test/Isl/CodeGen/phi_loop_carried_float.ll =================================================================== --- test/Isl/CodeGen/phi_loop_carried_float.ll +++ test/Isl/CodeGen/phi_loop_carried_float.ll @@ -28,8 +28,8 @@ ; CHECK: store float %tmp.0.phiops.reload[[R1]], float* %tmp.0.s2a ; CHECK-LABEL: polly.stmt.bb4: -; CHECK: %tmp[[R5:[0-9]*]]_p_scalar_ = load float, float* %scevgep, align 4, !alias.scope !0, !noalias !2 ; CHECK: %tmp.0.s2a.reload[[R3:[0-9]*]] = load float, float* %tmp.0.s2a +; CHECK: %tmp[[R5:[0-9]*]]_p_scalar_ = load float, float* %scevgep, align 4, !alias.scope !0, !noalias !2 ; CHECK: %p_tmp[[R4:[0-9]*]] = fadd float %tmp.0.s2a.reload[[R3]], %tmp[[R5]]_p_scalar_ ; CHECK: store float %p_tmp[[R4]], float* %tmp.0.phiops Index: test/Isl/CodeGen/phi_loop_carried_float_escape.ll =================================================================== --- test/Isl/CodeGen/phi_loop_carried_float_escape.ll +++ test/Isl/CodeGen/phi_loop_carried_float_escape.ll @@ -28,8 +28,8 @@ ; CHECK-: store float %tmp.0.phiops.reload[[R1]], float* %tmp.0.s2a ; CHECK-LABEL: polly.stmt.bb4: -; CHECK: %tmp[[R5:[0-9]*]]_p_scalar_ = load float, float* %scevgep, align 4, !alias.scope !0, !noalias !2 ; CHECK: %tmp.0.s2a.reload[[R3:[0-9]*]] = load float, float* %tmp.0.s2a +; CHECK: %tmp[[R5:[0-9]*]]_p_scalar_ = load float, float* %scevgep, align 4, !alias.scope !0, !noalias !2 ; CHECK: %p_tmp[[R4:[0-9]*]] = fadd float %tmp.0.s2a.reload[[R3]], %tmp[[R5]]_p_scalar_ ; CHECK: store float %p_tmp[[R4]], float* %tmp.0.phiops Index: test/Isl/CodeGen/read-only-scalars.ll =================================================================== --- test/Isl/CodeGen/read-only-scalars.ll +++ test/Isl/CodeGen/read-only-scalars.ll @@ -14,8 +14,8 @@ ; SCALAR-NEXT: store float %scalar, float* %scalar.s2a ; SCALAR-LABEL: polly.stmt.stmt1: -; SCALAR-NEXT: %val_p_scalar_ = load float, float* %A, ; SCALAR-NEXT: %scalar.s2a.reload = load float, float* %scalar.s2a +; SCALAR-NEXT: %val_p_scalar_ = load float, float* %A, ; SCALAR-NEXT: %p_sum = fadd float %val_p_scalar_, %scalar.s2a.reload define void @foo(float* noalias %A, float %scalar) { Index: test/Isl/CodeGen/scev_expansion_in_nonaffine.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/scev_expansion_in_nonaffine.ll @@ -0,0 +1,90 @@ +; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s + +; bugpoint-reduced testcase of MiBench/consumer-lame/quantize-pvt.c from the +; test-suite. +; It features a SCEV that is used in two BasicBlock within a non-affine +; subregion where none of the blocks dominate the other. We check that the SCEV +; is expanded independently for both BasicBlocks instead of just once for the +; whole subregion. + +; CHECK-LABEL: polly.stmt.if.then.110: +; CHECK: %64 = mul i64 %polly.indvar16, 30 +; CHECK: %scevgep24 = getelementptr i32, i32* %scevgep23, i64 %64 +; CHECK: store i32 0, i32* %scevgep24 + +; CHECK-LABEL: polly.stmt.if.else: +; CHECK: %65 = mul i64 %polly.indvar16, 30 +; CHECK: %scevgep30 = getelementptr i32, i32* %scevgep29, i64 %65 +; CHECK: store i32 21, i32* %scevgep30, align 8 + + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.gr_info.4.59.136.224.290 = type { i32, i32, i32, i32, i32, i32, i32, i32, [3 x i32], [3 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32*, [4 x i32] } +%struct.gr_info_ss.5.60.137.225.291 = type { %struct.gr_info.4.59.136.224.290 } +%struct.anon.6.61.138.226.292 = type { [2 x %struct.gr_info_ss.5.60.137.225.291] } +%struct.III_side_info_t.7.62.139.227.293 = type { i32, i32, i32, [2 x [4 x i32]], [2 x %struct.anon.6.61.138.226.292] } +%struct.lame_global_flags.3.58.135.223.289 = type { i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, float, i32, i32, i32, i64, i64, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, float, float, float, float, i32, i32, i32, i32, i32, i32, i32, i32 } + +@convert_mdct = external global i32, align 4 +@reduce_sidechannel = external global i32, align 4 + +; Function Attrs: nounwind uwtable +define void @iteration_init(%struct.lame_global_flags.3.58.135.223.289* %gfp, %struct.III_side_info_t.7.62.139.227.293* %l3_side, [2 x [576 x i32]]* %l3_enc) #0 { +entry: + %resvDrain = getelementptr inbounds %struct.III_side_info_t.7.62.139.227.293, %struct.III_side_info_t.7.62.139.227.293* %l3_side, i64 0, i32 2 + store i32 0, i32* %resvDrain, align 8 + store i32 0, i32* @convert_mdct, align 4 + store i32 0, i32* @reduce_sidechannel, align 4 + %mode_gr = getelementptr inbounds %struct.lame_global_flags.3.58.135.223.289, %struct.lame_global_flags.3.58.135.223.289* %gfp, i64 0, i32 45 + %0 = load i32, i32* %mode_gr, align 8 + %cmp95.145 = icmp sgt i32 %0, 0 + br i1 %cmp95.145, label %for.cond.98.preheader, label %for.cond.120.preheader + +for.cond.98.preheader: ; preds = %for.inc.117, %entry + %indvars.iv157 = phi i64 [ %indvars.iv.next158, %for.inc.117 ], [ 0, %entry ] + %stereo = getelementptr inbounds %struct.lame_global_flags.3.58.135.223.289, %struct.lame_global_flags.3.58.135.223.289* %gfp, i64 0, i32 46 + %1 = load i32, i32* %stereo, align 4 + %cmp99.143 = icmp sgt i32 %1, 0 + br i1 %cmp99.143, label %for.body.101, label %for.inc.117 + +for.cond.120.preheader: ; preds = %for.inc.117, %entry + ret void + +for.body.101: ; preds = %for.inc.114, %for.cond.98.preheader + %indvars.iv155 = phi i64 [ %indvars.iv.next156, %for.inc.114 ], [ 0, %for.cond.98.preheader ] + %block_type = getelementptr inbounds %struct.III_side_info_t.7.62.139.227.293, %struct.III_side_info_t.7.62.139.227.293* %l3_side, i64 0, i32 4, i64 %indvars.iv157, i32 0, i64 %indvars.iv155, i32 0, i32 6 + %2 = load i32, i32* %block_type, align 8 + %cmp108 = icmp eq i32 %2, 2 + %sfb_lmax = getelementptr inbounds %struct.III_side_info_t.7.62.139.227.293, %struct.III_side_info_t.7.62.139.227.293* %l3_side, i64 0, i32 4, i64 %indvars.iv157, i32 0, i64 %indvars.iv155, i32 0, i32 16 + br i1 %cmp108, label %if.then.110, label %if.else + +if.then.110: ; preds = %for.body.101 + store i32 0, i32* %sfb_lmax, align 8 + %sfb_smax = getelementptr inbounds %struct.III_side_info_t.7.62.139.227.293, %struct.III_side_info_t.7.62.139.227.293* %l3_side, i64 0, i32 4, i64 %indvars.iv157, i32 0, i64 %indvars.iv155, i32 0, i32 17 + store i32 0, i32* %sfb_smax, align 4 + br label %for.inc.114 + +if.else: ; preds = %for.body.101 + store i32 21, i32* %sfb_lmax, align 8 + %sfb_smax112 = getelementptr inbounds %struct.III_side_info_t.7.62.139.227.293, %struct.III_side_info_t.7.62.139.227.293* %l3_side, i64 0, i32 4, i64 %indvars.iv157, i32 0, i64 %indvars.iv155, i32 0, i32 17 + store i32 12, i32* %sfb_smax112, align 4 + br label %for.inc.114 + +for.inc.114: ; preds = %if.else, %if.then.110 + %indvars.iv.next156 = add nuw nsw i64 %indvars.iv155, 1 + %3 = load i32, i32* %stereo, align 4 + %4 = sext i32 %3 to i64 + %cmp99 = icmp slt i64 %indvars.iv.next156, %4 + br i1 %cmp99, label %for.body.101, label %for.inc.117 + +for.inc.117: ; preds = %for.inc.114, %for.cond.98.preheader + %indvars.iv.next158 = add nuw nsw i64 %indvars.iv157, 1 + %5 = load i32, i32* %mode_gr, align 8 + %6 = sext i32 %5 to i64 + %cmp95 = icmp slt i64 %indvars.iv.next158, %6 + br i1 %cmp95, label %for.cond.98.preheader, label %for.cond.120.preheader +} + +attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }