Index: include/polly/CodeGen/BlockGenerators.h =================================================================== --- include/polly/CodeGen/BlockGenerators.h +++ include/polly/CodeGen/BlockGenerators.h @@ -338,7 +338,9 @@ /// @param R The current SCoP region. /// @param Inst The current instruction we check. /// @param InstCopy The copy of the instruction @p Inst in the optimized SCoP. - void handleOutsideUsers(const Region &R, Instruction *Inst, Value *InstCopy); + /// @param SAMap The alloca map in which the escape instruction is stored. + void handleOutsideUsers(const Region &R, Instruction *Inst, Value *InstCopy, + ScalarAllocaMapTy &SAMap); /// @brief Initialize the memory of demoted scalars. /// Index: include/polly/ScopDetection.h =================================================================== --- include/polly/ScopDetection.h +++ include/polly/ScopDetection.h @@ -223,13 +223,6 @@ /// @return True if all blocks in R are valid, false otherwise. bool allBlocksValid(DetectionContext &Context) const; - /// @brief Check the exit block of a region is valid. - /// - /// @param Context The context of scop detection. - /// - /// @return True if the exit of R is valid, false otherwise. - bool isValidExit(DetectionContext &Context) const; - /// @brief Check if a region is a Scop. /// /// @param Context The context of scop detection. Index: lib/Analysis/ScopDetection.cpp =================================================================== --- lib/Analysis/ScopDetection.cpp +++ lib/Analysis/ScopDetection.cpp @@ -760,8 +760,7 @@ DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n"); // Only expand when we did not collect errors. - // Check the exit first (cheap) - if (isValidExit(Context) && !Context.Log.hasErrors()) { + if (!Context.Log.hasErrors()) { // If the exit is valid check all blocks // - if true, a valid region was found => store it + keep expanding // - if false, .tbd. => stop (should this really end the loop?) @@ -903,18 +902,6 @@ return true; } -bool ScopDetection::isValidExit(DetectionContext &Context) const { - - // PHI nodes are not allowed in the exit basic block. - if (BasicBlock *Exit = Context.CurRegion.getExit()) { - BasicBlock::iterator I = Exit->begin(); - if (I != Exit->end() && isa(*I)) - return invalid(Context, /*Assert=*/true, I); - } - - return true; -} - bool ScopDetection::isValidRegion(DetectionContext &Context) const { Region &CurRegion = Context.CurRegion; @@ -957,9 +944,6 @@ &(CurRegion.getEntry()->getParent()->getEntryBlock())) return invalid(Context, /*Assert=*/true, CurRegion.getEntry()); - if (!isValidExit(Context)) - return false; - if (!allBlocksValid(Context)) return false; Index: lib/Analysis/TempScopInfo.cpp =================================================================== --- lib/Analysis/TempScopInfo.cpp +++ lib/Analysis/TempScopInfo.cpp @@ -108,7 +108,10 @@ void TempScopInfo::buildPHIAccesses(PHINode *PHI, Region &R, AccFuncSetType &Functions, Region *NonAffineSubRegion) { - if (canSynthesize(PHI, LI, SE, &R)) + // If we can synthesize a PHI we can skip it, however only if it is in + // the region. If it is not it can only be in the exit block of the region. + // In this case we model the operands but not the PHI itself. + if (PHI->getParent() != R.getExit() && canSynthesize(PHI, LI, SE, &R)) return; // PHI nodes are modeled as if they had been demoted prior to the SCoP @@ -309,18 +312,22 @@ Region *NonAffineSubRegion) { AccFuncSetType Functions; Loop *L = LI->getLoopFor(&BB); + bool isExit = &BB == R.getExit(); // The set of loops contained in non-affine subregions that are part of R. const ScopDetection::BoxedLoopsSetTy *BoxedLoops = SD->getBoxedLoops(&R); for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) { Instruction *Inst = I; - if (isa(Inst) || isa(Inst)) - Functions.push_back( - std::make_pair(buildIRAccess(Inst, L, &R, BoxedLoops), Inst)); if (PHINode *PHI = dyn_cast(Inst)) buildPHIAccesses(PHI, R, Functions, NonAffineSubRegion); + else if (isExit) + break; + + if (isa(Inst) || isa(Inst)) + Functions.push_back( + std::make_pair(buildIRAccess(Inst, L, &R, BoxedLoops), Inst)); if (!isa(Inst) && buildScalarDependences(Inst, &R, NonAffineSubRegion)) { @@ -454,6 +461,15 @@ buildAccessFunctions(R, R); + // If the exit block of a region has multiple entry edges from within the + // region we model the PHI nodes in the block too. + BasicBlock *ExitBB = R.getExit(); + unsigned NumInRegion = 0; + for (auto *PredBB : predecessors(ExitBB)) + NumInRegion += R.contains(PredBB); + if (NumInRegion > 1) + buildAccessFunctions(R, *ExitBB); + for (const auto &BB : R.blocks()) buildCondition(BB, R); Index: lib/CodeGen/BlockGenerators.cpp =================================================================== --- lib/CodeGen/BlockGenerators.cpp +++ lib/CodeGen/BlockGenerators.cpp @@ -349,7 +349,7 @@ const Region &R = Stmt.getParent()->getRegion(); for (Instruction &Inst : *BB) - handleOutsideUsers(R, &Inst, BBMap[&Inst]); + handleOutsideUsers(R, &Inst, BBMap[&Inst], ScalarMap); } AllocaInst *BlockGenerator::getOrCreateAlloca(Value *ScalarBase, @@ -375,7 +375,8 @@ } void BlockGenerator::handleOutsideUsers(const Region &R, Instruction *Inst, - Value *InstCopy) { + Value *InstCopy, + ScalarAllocaMapTy &SAMap) { BasicBlock *ExitBB = R.getExit(); EscapeUserVectorTy EscapeUsers; @@ -406,8 +407,7 @@ // Get or create an escape alloca for this instruction. bool IsNew; - AllocaInst *ScalarAddr = - getOrCreateAlloca(Inst, ScalarMap, ".escape", &IsNew); + AllocaInst *ScalarAddr = getOrCreateAlloca(Inst, SAMap, ".escape", &IsNew); // Remember that this instruction has escape uses and the escape alloca. EscapeMap[Inst] = std::make_pair(ScalarAddr, std::move(EscapeUsers)); @@ -485,6 +485,21 @@ return ScalarValue; } +static PHINode *getSingleInRegionPHIOperandPHI(const Region &R, PHINode *PHI) { + PHINode *OperandPHI = nullptr; + BasicBlock *RegionExitBB = R.getExit(); + for (unsigned u = 0, e = PHI->getNumIncomingValues(); u < e; u++) { + BasicBlock *EntryBB = PHI->getIncomingBlock(u); + if (!R.contains(EntryBB) && EntryBB != RegionExitBB) + continue; + assert(OperandPHI == nullptr); + OperandPHI = cast(PHI->getIncomingValue(u)); + } + + assert(OperandPHI != nullptr); + return OperandPHI; +} + void BlockGenerator::generateScalarStores(ScopStmt &Stmt, BasicBlock *BB, ValueMapT &BBMap, ValueMapT &GlobalMap) { @@ -501,19 +516,38 @@ Instruction *Base = cast(MA->getBaseAddr()); Instruction *Inst = MA->getAccessInstruction(); + // Set for exit node PHIs. + PHINode *ExitBlockPHI = nullptr; + Value *Val = nullptr; AllocaInst *Address = nullptr; if (MA->getScopArrayInfo()->isPHI()) { PHINode *BasePHI = dyn_cast(Base); + + // Handle PHI nodes in the former exit block of the region. + if (!R.contains(BasePHI->getParent())) { + ExitBlockPHI = getSingleInRegionPHIOperandPHI(R, BasePHI); + BasePHI = ExitBlockPHI; + } + int PHIIdx = BasePHI->getBasicBlockIndex(BB); assert(PHIIdx >= 0); - Address = getOrCreateAlloca(Base, PHIOpMap, ".phiops"); + Address = getOrCreateAlloca(BasePHI, PHIOpMap, ".phiops"); Val = BasePHI->getIncomingValue(PHIIdx); } else { Address = getOrCreateAlloca(Base, ScalarMap, ".s2a"); Val = Inst; } + + // Multi-entry exit node PHIs do not escae (as they are not in the region in + // the first place) but their operands do. However, when we use the alloca + // for the PHI operands and consider the PHI as escaping (to collect the + // users) the escaping scalar infrastructure will also take care of PHI + // nodes in the former exit block. + if (ExitBlockPHI) + handleOutsideUsers(R, ExitBlockPHI, nullptr, PHIOpMap); + Val = getNewScalarValue(Val, R, ScalarMap, BBMap, GlobalMap); Builder.CreateStore(Val, Address); } @@ -1134,12 +1168,22 @@ if (ScalarInst->getParent() != BB) continue; + // Set for exit node PHIs. + PHINode *ExitBlockPHI = nullptr; + Value *Val = nullptr; AllocaInst *ScalarAddr = nullptr; if (MA->getScopArrayInfo()->isPHI()) { + + // Handle PHI nodes in the former exit block of the region. + if (!R.contains(ScalarBasePHI->getParent())) { + ExitBlockPHI = getSingleInRegionPHIOperandPHI(R, ScalarBasePHI); + ScalarBasePHI = ExitBlockPHI; + } + int PHIIdx = ScalarBasePHI->getBasicBlockIndex(BB); - ScalarAddr = getOrCreateAlloca(ScalarBase, PHIOpMap, ".phiops"); + ScalarAddr = getOrCreateAlloca(ScalarBasePHI, PHIOpMap, ".phiops"); Val = ScalarBasePHI->getIncomingValue(PHIIdx); } else { ScalarAddr = getOrCreateAlloca(ScalarBase, ScalarMap, ".s2a"); @@ -1148,6 +1192,14 @@ Val = getNewScalarValue(Val, R, ScalarMap, BBMap, GlobalMap); Builder.CreateStore(Val, ScalarAddr); + + // Multi-entry exit node PHIs do not escae (as they are not in the region in + // the first place) but their operands do. However, when we use the alloca + // for the PHI operands and consider the PHI as escaping (to collect the + // users) the escaping scalar infrastructure will also take care of PHI + // nodes in the former exit block. + if (ExitBlockPHI) + handleOutsideUsers(R, ExitBlockPHI, nullptr, PHIOpMap); } } Index: test/Isl/CodeGen/phi_in_exit_early_lnt_failure_1.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/phi_in_exit_early_lnt_failure_1.ll @@ -0,0 +1,42 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-codegen -polly-no-early-exit -S < %s | FileCheck %s +; +; This caused an lnt crash at some point, just verify it will run through. +; +; CHECK-LABEL: polly.merge_new_and_old: +; CHECK-NEXT: br label %for.body.6 +; +; CHECK-LABEL: for.body.6: +; CHECK-NEXT: %i.14 = phi i32 [ undef, %for.body.6 ], [ 0, %polly.merge_new_and_old ] +; +@recd = external hidden global [255 x i32], align 16 + +define void @rsdec_204(i8* %data_in) { +entry: + br i1 undef, label %if.then, label %for.body + +if.then: ; preds = %entry + unreachable + +for.body: ; preds = %for.body, %entry + %i.05 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i8, i8* %data_in, i64 0 + %0 = load i8, i8* %arrayidx, align 1 + %conv = zext i8 %0 to i32 + %arrayidx2 = getelementptr inbounds [255 x i32], [255 x i32]* @recd, i64 0, i64 0 + store i32 %conv, i32* %arrayidx2, align 4 + %inc = add nuw nsw i32 %i.05, 1 + br i1 false, label %for.body, label %for.body.6 + +for.body.6: ; preds = %for.body.6, %for.body + %i.14 = phi i32 [ undef, %for.body.6 ], [ 0, %for.body ] + br i1 undef, label %for.body.6, label %for.body.16 + +for.body.16: ; preds = %for.body.16, %for.body.6 + br i1 undef, label %for.body.16, label %for.body.29 + +for.body.29: ; preds = %for.body.29, %for.body.16 + br i1 undef, label %for.body.29, label %for.end.38 + +for.end.38: ; preds = %for.body.29 + unreachable +} Index: test/Isl/CodeGen/phi_in_exit_early_lnt_failure_2.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/phi_in_exit_early_lnt_failure_2.ll @@ -0,0 +1,39 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-codegen -polly-no-early-exit -S < %s | FileCheck %s +; +; This caused an lnt crash at some point, just verify it will run through and +; produce the PHI node in the exit we are looking for. +; +; CHECK-LABEL: polly.merge_new_and_old: +; CHECK: %eps1.addr.0.ph.merge = phi double [ %eps1.addr.0.ph.final_reload, %polly.stmt.if.end.47.region_exiting.exit ], [ %eps1.addr.0.ph, %if.end.47.region_exiting ] +; +define void @dbisect(double* %c, double* %b, double %eps1, double* %eps2) { +entry: + br label %entry.split + +entry.split: ; preds = %entry + store double 0.000000e+00, double* %b, align 8 + br i1 false, label %for.inc, label %for.end + +if.end: ; preds = %if.then, %for.body + %arrayidx33 = getelementptr inbounds double, double* %c, i64 0 + %0 = load double, double* %arrayidx33, align 8 + br label %for.inc + +for.inc: ; preds = %if.then.36, %if.end + br i1 false, label %if.end, label %for.cond.for.end_crit_edge + +for.cond.for.end_crit_edge: ; preds = %for.inc + br label %for.end + +for.end: ; preds = %for.cond.for.end_crit_edge, %entry.split + %cmp45 = fcmp ugt double %eps1, 0.000000e+00 + br i1 %cmp45, label %if.end.47, label %if.then.46 + +if.then.46: ; preds = %for.end + %1 = load double, double* %eps2, align 8 + br label %if.end.47 + +if.end.47: ; preds = %if.then.46, %for.end + %eps1.addr.0 = phi double [ %1, %if.then.46 ], [ %eps1, %for.end ] + ret void +} Index: test/Isl/CodeGen/phi_in_exit_early_lnt_failure_3.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/phi_in_exit_early_lnt_failure_3.ll @@ -0,0 +1,63 @@ +; ModuleID = 'bugpoint-reduced-simplified.bc' +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.bc_struct.0.2.4.6.8.15.24.27.29.32.38.46.48.92.93.94.95.97.99.100.102.105.107.111.118.119.121 = type { i32, i32, i32, i32, [1024 x i8] } + +; Function Attrs: nounwind uwtable +declare %struct.bc_struct.0.2.4.6.8.15.24.27.29.32.38.46.48.92.93.94.95.97.99.100.102.105.107.111.118.119.121* @new_num() #0 + +; Function Attrs: nounwind uwtable +define void @_do_add(%struct.bc_struct.0.2.4.6.8.15.24.27.29.32.38.46.48.92.93.94.95.97.99.100.102.105.107.111.118.119.121* %n2) #0 { +entry: + %call = tail call %struct.bc_struct.0.2.4.6.8.15.24.27.29.32.38.46.48.92.93.94.95.97.99.100.102.105.107.111.118.119.121* @new_num() + %0 = load i32, i32* undef, align 4 + %add.ptr22 = getelementptr inbounds %struct.bc_struct.0.2.4.6.8.15.24.27.29.32.38.46.48.92.93.94.95.97.99.100.102.105.107.111.118.119.121, %struct.bc_struct.0.2.4.6.8.15.24.27.29.32.38.46.48.92.93.94.95.97.99.100.102.105.107.111.118.119.121* %n2, i64 0, i32 4, i64 0 + %add.ptr24 = getelementptr inbounds i8, i8* %add.ptr22, i64 0 + %add.ptr25 = getelementptr inbounds i8, i8* %add.ptr24, i64 -1 + %add.ptr29 = getelementptr inbounds %struct.bc_struct.0.2.4.6.8.15.24.27.29.32.38.46.48.92.93.94.95.97.99.100.102.105.107.111.118.119.121, %struct.bc_struct.0.2.4.6.8.15.24.27.29.32.38.46.48.92.93.94.95.97.99.100.102.105.107.111.118.119.121* %call, i64 0, i32 4, i64 0 + %add.ptr31 = getelementptr inbounds i8, i8* %add.ptr29, i64 0 + %add.ptr32 = getelementptr inbounds i8, i8* %add.ptr31, i64 -1 + br i1 undef, label %if.end.45, label %if.then + +if.then: ; preds = %entry + br i1 undef, label %while.cond.preheader, label %while.cond.38.preheader + +while.cond.38.preheader: ; preds = %if.then + %cmp39.39 = icmp sgt i32 %0, 0 + br i1 %cmp39.39, label %while.body.40.lr.ph, label %if.end.45 + +while.body.40.lr.ph: ; preds = %while.cond.38.preheader + br label %while.body.40 + +while.cond.preheader: ; preds = %if.then + br i1 undef, label %while.body.lr.ph, label %if.end.45 + +while.body.lr.ph: ; preds = %while.cond.preheader + br label %while.body + +while.body: ; preds = %while.body, %while.body.lr.ph + br label %while.body + +while.body.40: ; preds = %while.body.40, %while.body.40.lr.ph + %sumptr.141 = phi i8* [ %add.ptr32, %while.body.40.lr.ph ], [ %incdec.ptr42, %while.body.40 ] + %n2ptr.040 = phi i8* [ %add.ptr25, %while.body.40.lr.ph ], [ %incdec.ptr41, %while.body.40 ] + %incdec.ptr41 = getelementptr inbounds i8, i8* %n2ptr.040, i64 -1 + %1 = load i8, i8* %n2ptr.040, align 1 + %incdec.ptr42 = getelementptr inbounds i8, i8* %sumptr.141, i64 -1 + store i8 %1, i8* %sumptr.141, align 1 + br i1 false, label %while.body.40, label %while.cond.38.if.end.45.loopexit9_crit_edge + +while.cond.38.if.end.45.loopexit9_crit_edge: ; preds = %while.body.40 + br label %if.end.45 + +if.end.45: ; preds = %while.cond.38.if.end.45.loopexit9_crit_edge, %while.cond.preheader, %while.cond.38.preheader, %entry + %n2ptr.2 = phi i8* [ %add.ptr25, %entry ], [ %add.ptr25, %while.cond.preheader ], [ undef, %while.cond.38.if.end.45.loopexit9_crit_edge ], [ %add.ptr25, %while.cond.38.preheader ] + ret void +} + +attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 3.8.0 (http://llvm.org/git/clang.git d72bb21bbd941815bc52c93433564db3b5759ad5)"} Index: test/Isl/CodeGen/phi_in_exit_early_lnt_failure_4.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/phi_in_exit_early_lnt_failure_4.ll @@ -0,0 +1,64 @@ +; ModuleID = 'bugpoint-reduced-simplified.bc' +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826 = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8**, i8**, i32, i32***, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [9 x [16 x [16 x i16]]], [5 x [16 x [16 x i16]]], [9 x [8 x [8 x i16]]], [2 x [4 x [16 x [16 x i16]]]], [16 x [16 x i16]], [16 x [16 x i32]], i32****, i32***, i32***, i32***, i32****, i32****, %struct.Picture.8.32.56.80.104.320.536.752.1016.1040.1184.1232.1352.1376.1400.1424.1496.1568.1664.1736.1832.2048.2120.2336.2384.2840.2864.2888.2912.3584.3800.3823*, %struct.Slice.7.31.55.79.103.319.535.751.1015.1039.1183.1231.1351.1375.1399.1423.1495.1567.1663.1735.1831.2047.2119.2335.2383.2839.2863.2887.2911.3583.3799.3822*, %struct.macroblock.9.33.57.81.105.321.537.753.1017.1041.1185.1233.1353.1377.1401.1425.1497.1569.1665.1737.1833.2049.2121.2337.2385.2841.2865.2889.2913.3585.3801.3824*, i32*, i32*, i32, i32, i32, i32, [4 x [4 x i32]], i32, i32, i32, i32, i32, double, i32, i32, i32, i32, i16******, i16******, i16******, i16******, [15 x i16], i32, i32, i32, i32, i32, i32, i32, i32, [6 x [32 x i32]], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [1 x i32], i32, i32, [2 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.DecRefPicMarking_s.10.34.58.82.106.322.538.754.1018.1042.1186.1234.1354.1378.1402.1426.1498.1570.1666.1738.1834.2050.2122.2338.2386.2842.2866.2890.2914.3586.3802.3825*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, double**, double***, i32***, double**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [3 x [2 x i32]], [2 x i32], i32, i32, i16, i32, i32, i32, i32, i32 } +%struct.Picture.8.32.56.80.104.320.536.752.1016.1040.1184.1232.1352.1376.1400.1424.1496.1568.1664.1736.1832.2048.2120.2336.2384.2840.2864.2888.2912.3584.3800.3823 = type { i32, i32, [100 x %struct.Slice.7.31.55.79.103.319.535.751.1015.1039.1183.1231.1351.1375.1399.1423.1495.1567.1663.1735.1831.2047.2119.2335.2383.2839.2863.2887.2911.3583.3799.3822*], i32, float, float, float } +%struct.Slice.7.31.55.79.103.319.535.751.1015.1039.1183.1231.1351.1375.1399.1423.1495.1567.1663.1735.1831.2047.2119.2335.2383.2839.2863.2887.2911.3583.3799.3822 = type { i32, i32, i32, i32, i32, i32, %struct.datapartition.3.27.51.75.99.315.531.747.1011.1035.1179.1227.1347.1371.1395.1419.1491.1563.1659.1731.1827.2043.2115.2331.2379.2835.2859.2883.2907.3579.3795.3818*, %struct.MotionInfoContexts.5.29.53.77.101.317.533.749.1013.1037.1181.1229.1349.1373.1397.1421.1493.1565.1661.1733.1829.2045.2117.2333.2381.2837.2861.2885.2909.3581.3797.3820*, %struct.TextureInfoContexts.6.30.54.78.102.318.534.750.1014.1038.1182.1230.1350.1374.1398.1422.1494.1566.1662.1734.1830.2046.2118.2334.2382.2838.2862.2886.2910.3582.3798.3821*, i32, i32*, i32*, i32*, i32, i32*, i32*, i32*, i32 (i32)*, [3 x [2 x i32]] } +%struct.datapartition.3.27.51.75.99.315.531.747.1011.1035.1179.1227.1347.1371.1395.1419.1491.1563.1659.1731.1827.2043.2115.2331.2379.2835.2859.2883.2907.3579.3795.3818 = type { %struct.Bitstream.1.25.49.73.97.313.529.745.1009.1033.1177.1225.1345.1369.1393.1417.1489.1561.1657.1729.1825.2041.2113.2329.2377.2833.2857.2881.2905.3577.3793.3816*, %struct.EncodingEnvironment.2.26.50.74.98.314.530.746.1010.1034.1178.1226.1346.1370.1394.1418.1490.1562.1658.1730.1826.2042.2114.2330.2378.2834.2858.2882.2906.3578.3794.3817, %struct.EncodingEnvironment.2.26.50.74.98.314.530.746.1010.1034.1178.1226.1346.1370.1394.1418.1490.1562.1658.1730.1826.2042.2114.2330.2378.2834.2858.2882.2906.3578.3794.3817 } +%struct.Bitstream.1.25.49.73.97.313.529.745.1009.1033.1177.1225.1345.1369.1393.1417.1489.1561.1657.1729.1825.2041.2113.2329.2377.2833.2857.2881.2905.3577.3793.3816 = type { i32, i32, i8, i32, i32, i8, i8, i32, i32, i8*, i32 } +%struct.EncodingEnvironment.2.26.50.74.98.314.530.746.1010.1034.1178.1226.1346.1370.1394.1418.1490.1562.1658.1730.1826.2042.2114.2330.2378.2834.2858.2882.2906.3578.3794.3817 = type { i32, i32, i32, i32, i32, i8*, i32*, i32, i32 } +%struct.MotionInfoContexts.5.29.53.77.101.317.533.749.1013.1037.1181.1229.1349.1373.1397.1421.1493.1565.1661.1733.1829.2045.2117.2333.2381.2837.2861.2885.2909.3581.3797.3820 = type { [3 x [11 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [2 x [9 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [2 x [10 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [2 x [6 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [4 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819], [4 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819], [3 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819] } +%struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819 = type { i16, i8, i64 } +%struct.TextureInfoContexts.6.30.54.78.102.318.534.750.1014.1038.1182.1230.1350.1374.1398.1422.1494.1566.1662.1734.1830.2046.2118.2334.2382.2838.2862.2886.2910.3582.3798.3821 = type { [2 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819], [4 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819], [3 x [4 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [10 x [4 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [10 x [15 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [10 x [15 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [10 x [5 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [10 x [5 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [10 x [15 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [10 x [15 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]] } +%struct.macroblock.9.33.57.81.105.321.537.753.1017.1041.1185.1233.1353.1377.1401.1425.1497.1569.1665.1737.1833.2049.2121.2337.2385.2841.2865.2889.2913.3585.3801.3824 = type { i32, i32, i32, [2 x i32], i32, [8 x i32], %struct.macroblock.9.33.57.81.105.321.537.753.1017.1041.1185.1233.1353.1377.1401.1425.1497.1569.1665.1737.1833.2049.2121.2337.2385.2841.2865.2889.2913.3585.3801.3824*, %struct.macroblock.9.33.57.81.105.321.537.753.1017.1041.1185.1233.1353.1377.1401.1425.1497.1569.1665.1737.1833.2049.2121.2337.2385.2841.2865.2889.2913.3585.3801.3824*, i32, [2 x [4 x [4 x [2 x i32]]]], [16 x i8], [16 x i8], i32, i64, [4 x i32], [4 x i32], i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, double, i32, i32, i32, i32, i32, i32, i32, i32, i32 } +%struct.DecRefPicMarking_s.10.34.58.82.106.322.538.754.1018.1042.1186.1234.1354.1378.1402.1426.1498.1570.1666.1738.1834.2050.2122.2338.2386.2842.2866.2890.2914.3586.3802.3825 = type { i32, i32, i32, i32, i32, %struct.DecRefPicMarking_s.10.34.58.82.106.322.538.754.1018.1042.1186.1234.1354.1378.1402.1426.1498.1570.1666.1738.1834.2050.2122.2338.2386.2842.2866.2890.2914.3586.3802.3825* } + +@img = external global %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826*, align 8 + +; Function Attrs: nounwind uwtable +define void @intrapred_luma() #0 { +entry: + %PredPel = alloca [13 x i16], align 16 + br label %for.body + +for.body: ; preds = %for.body, %entry + br i1 undef, label %for.body, label %for.body.262 + +for.body.262: ; preds = %for.body + %0 = load %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826*, %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826** @img, align 8 + br label %for.body.280 + +for.body.280: ; preds = %for.body.280, %for.body.262 + %indvars.iv66 = phi i64 [ 0, %for.body.262 ], [ %indvars.iv.next67, %for.body.280 ] + %arrayidx282 = getelementptr inbounds [13 x i16], [13 x i16]* %PredPel, i64 0, i64 1 + %arrayidx283 = getelementptr inbounds i16, i16* %arrayidx282, i64 %indvars.iv66 + %1 = load i16, i16* %arrayidx283, align 2 + %arrayidx289 = getelementptr inbounds %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826, %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826* %0, i64 0, i32 47, i64 0, i64 2, i64 %indvars.iv66 + store i16 %1, i16* %arrayidx289, align 2 + %indvars.iv.next67 = add nuw nsw i64 %indvars.iv66, 1 + br i1 false, label %for.body.280, label %for.end.298 + +for.end.298: ; preds = %for.body.280 + %2 = load %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826*, %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826** @img, align 8 + br label %for.body.310 + +for.body.310: ; preds = %for.body.310, %for.end.298 + %indvars.iv = phi i64 [ 0, %for.end.298 ], [ %indvars.iv.next, %for.body.310 ] + %arrayidx312 = getelementptr inbounds [13 x i16], [13 x i16]* %PredPel, i64 0, i64 9 + %arrayidx313 = getelementptr inbounds i16, i16* %arrayidx312, i64 %indvars.iv + %3 = load i16, i16* %arrayidx313, align 2 + %arrayidx322 = getelementptr inbounds %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826, %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826* %2, i64 0, i32 47, i64 1, i64 %indvars.iv, i64 1 + store i16 %3, i16* %arrayidx322, align 2 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br i1 false, label %for.body.310, label %for.end.328 + +for.end.328: ; preds = %for.body.310 + ret void +} + +attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 3.8.0 (http://llvm.org/git/clang.git d72bb21bbd941815bc52c93433564db3b5759ad5)"} Index: test/Isl/CodeGen/phi_in_exit_early_lnt_failure_5.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/phi_in_exit_early_lnt_failure_5.ll @@ -0,0 +1,46 @@ +; ModuleID = 'bugpoint-reduced-simplified.bc' +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define void @_vorbis_apply_window(float* %d) #0 { +entry: + %0 = load float*, float** undef, align 8 + %div23.neg = sdiv i64 0, -4 + %sub24 = add i64 0, %div23.neg + br i1 undef, label %for.body.34, label %for.end.42 + +for.body.34: ; preds = %for.body.34, %entry + br i1 false, label %for.body.34, label %for.end.42 + +for.end.42: ; preds = %for.body.34, %entry + %sext = shl i64 %sub24, 32 + %conv48.4 = ashr exact i64 %sext, 32 + %cmp49.5 = icmp slt i64 %conv48.4, 0 + br i1 %cmp49.5, label %for.body.51.lr.ph, label %for.cond.60.preheader + +for.body.51.lr.ph: ; preds = %for.end.42 + %div44 = sdiv i64 0, 2 + %sub45 = add nsw i64 %div44, 4294967295 + %1 = trunc i64 %sub45 to i32 + %2 = sext i32 %1 to i64 + br label %for.body.51 + +for.cond.60.preheader: ; preds = %for.body.51, %for.end.42 + ret void + +for.body.51: ; preds = %for.body.51, %for.body.51.lr.ph + %indvars.iv16 = phi i64 [ %2, %for.body.51.lr.ph ], [ undef, %for.body.51 ] + %arrayidx53 = getelementptr inbounds float, float* %0, i64 %indvars.iv16 + %3 = load float, float* %arrayidx53, align 4 + %arrayidx55 = getelementptr inbounds float, float* %d, i64 0 + %mul56 = fmul float %3, undef + store float %mul56, float* %arrayidx55, align 4 + br i1 false, label %for.body.51, label %for.cond.60.preheader +} + +attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 3.8.0 (http://llvm.org/git/clang.git d72bb21bbd941815bc52c93433564db3b5759ad5)"} Index: test/Isl/CodeGen/phi_with_multi_exiting_edges_2.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/phi_with_multi_exiting_edges_2.ll @@ -0,0 +1,36 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-codegen -polly-no-early-exit -S < %s | FileCheck %s + +define float @foo(float* %A, i64 %param) { +entry: + br label %entry.split + +entry.split: + %branchcond = icmp slt i64 %param, 64 + br i1 %branchcond, label %loopA, label %loopB + +loopA: + %indvarA = phi i64 [0, %entry.split], [%indvar.nextA, %loopA] + %indvar.nextA = add i64 %indvarA, 1 + %valA = load float, float* %A + %sumA = fadd float %valA, %valA + store float %valA, float* %A + %cndA = icmp eq i64 %indvar.nextA, 100 + br i1 %cndA, label %next, label %loopA + +loopB: + %indvarB = phi i64 [0, %entry.split], [%indvar.nextB, %loopB] + %indvar.nextB = add i64 %indvarB, 1 + %valB = load float, float* %A + %sumB = fadd float %valB, %valB + store float %valB, float* %A + %cndB = icmp eq i64 %indvar.nextB, 100 + br i1 %cndB, label %next, label %loopB + +next: + %result = phi float [%sumA, %loopA], [%sumB, %loopB] + ret float %result + +} + +; CHECK: Valid Region for Scop: entry.split => next + Index: test/Isl/CodeGen/phi_with_one_exit_edge.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/phi_with_one_exit_edge.ll @@ -0,0 +1,26 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-codegen -polly-no-early-exit -S < %s | FileCheck %s + +define float @foo(float* %A, i64 %param) { +entry: + br label %entry.split + +entry.split: + br label %loopA + +loopA: + %indvarA = phi i64 [0, %entry.split], [%indvar.nextA, %loopA] + %indvar.nextA = add i64 %indvarA, 1 + %valA = load float, float* %A + %sumA = fadd float %valA, %valA + store float %valA, float* %A + %cndA = icmp eq i64 %indvar.nextA, 100 + br i1 %cndA, label %next, label %loopA + +next: + %result = phi float [%sumA, %loopA] + ret float %result + +} + +; CHECK: Valid Region for Scop: entry.split => next +