Index: include/polly/CodeGen/BlockGenerators.h =================================================================== --- include/polly/CodeGen/BlockGenerators.h +++ include/polly/CodeGen/BlockGenerators.h @@ -340,6 +340,15 @@ /// @param InstCopy The copy of the instruction @p Inst in the optimized SCoP. void handleOutsideUsers(const Region &R, Instruction *Inst, Value *InstCopy); + /// @brief Handle PHIs in the exiting block. + /// + /// Such PHIs can be created by region simplification. They have not been + /// considered byTempScopInfo/ScopInfo and in consequence not by any ScopStmt + /// materialized by BlockGenerator. + /// + /// @param R The current SCoP region. + void handleExitingPHIs(Region &R); + /// @brief Initialize the memory of demoted scalars. /// /// If a PHI node was demoted and one of its predecessor blocks was outside Index: include/polly/ScopDetection.h =================================================================== --- include/polly/ScopDetection.h +++ include/polly/ScopDetection.h @@ -223,13 +223,6 @@ /// @return True if all blocks in R are valid, false otherwise. bool allBlocksValid(DetectionContext &Context) const; - /// @brief Check the exit block of a region is valid. - /// - /// @param Context The context of scop detection. - /// - /// @return True if the exit of R is valid, false otherwise. - bool isValidExit(DetectionContext &Context) const; - /// @brief Check if a region is a Scop. /// /// @param Context The context of scop detection. Index: lib/Analysis/ScopDetection.cpp =================================================================== --- lib/Analysis/ScopDetection.cpp +++ lib/Analysis/ScopDetection.cpp @@ -761,7 +761,7 @@ // Only expand when we did not collect errors. // Check the exit first (cheap) - if (isValidExit(Context) && !Context.Log.hasErrors()) { + if (!Context.Log.hasErrors()) { // If the exit is valid check all blocks // - if true, a valid region was found => store it + keep expanding // - if false, .tbd. => stop (should this really end the loop?) @@ -903,18 +903,6 @@ return true; } -bool ScopDetection::isValidExit(DetectionContext &Context) const { - - // PHI nodes are not allowed in the exit basic block. - if (BasicBlock *Exit = Context.CurRegion.getExit()) { - BasicBlock::iterator I = Exit->begin(); - if (I != Exit->end() && isa(*I)) - return invalid(Context, /*Assert=*/true, I); - } - - return true; -} - bool ScopDetection::isValidRegion(DetectionContext &Context) const { Region &CurRegion = Context.CurRegion; @@ -957,9 +945,6 @@ &(CurRegion.getEntry()->getParent()->getEntryBlock())) return invalid(Context, /*Assert=*/true, CurRegion.getEntry()); - if (!isValidExit(Context)) - return false; - if (!allBlocksValid(Context)) return false; Index: lib/Analysis/TempScopInfo.cpp =================================================================== --- lib/Analysis/TempScopInfo.cpp +++ lib/Analysis/TempScopInfo.cpp @@ -315,6 +315,13 @@ for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) { Instruction *Inst = I; + + // The PHI nodes of the exit block are later moved into the region so handle + // them like any other PHI. All following instructions do not belong to the + // region and are skipped. + if (R.getExit() == &BB && !isa(Inst)) + break; + if (isa(Inst) || isa(Inst)) Functions.push_back( std::make_pair(buildIRAccess(Inst, L, &R, BoxedLoops), Inst)); @@ -453,6 +460,7 @@ TempScop *TScop = new TempScop(R, BBConds, AccFuncMap); buildAccessFunctions(R, R); + buildAccessFunctions(R, *R.getExit()); for (const auto &BB : R.blocks()) buildCondition(BB, R); Index: lib/CodeGen/BlockGenerators.cpp =================================================================== --- lib/CodeGen/BlockGenerators.cpp +++ lib/CodeGen/BlockGenerators.cpp @@ -505,6 +505,9 @@ AllocaInst *Address = nullptr; if (MA->getScopArrayInfo()->isPHI()) { + if (!R.contains(Base)) + continue; // PHI in exit node + PHINode *BasePHI = dyn_cast(Base); int PHIIdx = BasePHI->getBasicBlockIndex(BB); assert(PHIIdx >= 0); @@ -519,6 +522,36 @@ } } +void BlockGenerator::handleExitingPHIs(Region &R) { + BasicBlock *Exiting = R.getExitingBlock(); + assert(Exiting && "Must be a simple region"); + for (auto I = Exiting->begin(); isa(I); ++I) { + PHINode *PHI = cast(I); + EscapeUserVectorTy EscapingUsers; + for (auto U : PHI->users()) { + if (!isa(U)) + continue; + auto User = cast(U); + + if (R.contains(User)) + continue; // Inside use + + // There should be just one in the block following + // polly.merge_new_and_old which is another PHI. + EscapingUsers.push_back(User); + } + if (EscapingUsers.empty()) + continue; + + bool IsNew; + AllocaInst *Address = getOrCreateAlloca(PHI, PHIOpMap, ".phiops", &IsNew); + if (!IsNew) + continue; // Already been handled?!? + + EscapeMap[PHI] = std::make_pair(Address, std::move(EscapingUsers)); + } +} + void BlockGenerator::createScalarInitialization(Region &R, ValueMapT &GlobalMap) { // The split block __just before__ the region and optimized region. @@ -599,8 +632,12 @@ } void BlockGenerator::finalizeSCoP(Scop &S, ValueMapT &GlobalMap) { - createScalarInitialization(S.getRegion(), GlobalMap); - createScalarFinalization(S.getRegion()); + Region &R = S.getRegion(); + + // Find additional PHI escapes + handleExitingPHIs(R); + createScalarInitialization(R, GlobalMap); + createScalarFinalization(R); } VectorBlockGenerator::VectorBlockGenerator(BlockGenerator &BlockGen, Index: test/Isl/CodeGen/OpenMP/single_loop.ll =================================================================== --- test/Isl/CodeGen/OpenMP/single_loop.ll +++ test/Isl/CodeGen/OpenMP/single_loop.ll @@ -43,7 +43,7 @@ ; IR-NEXT: call void @llvm.lifetime.end(i64 8, i8* %1) ; IR-NEXT: br label %polly.merge_new_and_old -; IR: define internal void @single_parallel_loop.polly.subfn(i8* %polly.par.userContext) #2 +; IR: define internal void @single_parallel_loop.polly.subfn(i8* %polly.par.userContext) #1 ; IR-LABEL: polly.par.setup: ; IR-NEXT: %polly.par.LBPtr = alloca i64 ; IR-NEXT: %polly.par.UBPtr = alloca i64 @@ -83,7 +83,7 @@ ; IR-LABEL: polly.loop_preheader: ; IR-NEXT: br label %polly.loop_header -; IR: attributes #2 = { "polly.skip.fn" } +; IR: attributes #1 = { "polly.skip.fn" } ; IR-STRIDE4: call void @GOMP_parallel_loop_runtime_start(void (i8*)* @single_parallel_loop.polly.subfn, i8* %polly.par.userContext1, i32 0, i64 0, i64 1024, i64 4) ; IR-STRIDE4: add nsw i64 %polly.indvar, 3 Index: test/ScopDetect/keep_going_expansion.ll =================================================================== --- test/ScopDetect/keep_going_expansion.ll +++ test/ScopDetect/keep_going_expansion.ll @@ -42,4 +42,4 @@ ret i32 %add } -; CHECK: Valid Region for Scop: for.body => for.cond2.preheader +; CHECK: Valid Region for Scop: for.body => for.body4 Index: test/ScopDetect/multidim_indirect_access.ll =================================================================== --- test/ScopDetect/multidim_indirect_access.ll +++ test/ScopDetect/multidim_indirect_access.ll @@ -10,9 +10,6 @@ ; treated as a multidimensional access with dimension size x. This test will ; check that we correctly invalidate the region and do not detect a outer SCoP. ; -; FIXME: -; We should detect the inner region but the PHI node in the exit blocks that. -; ; void f(int *A, long N) { ; int j = 0; ; while (N > j) { @@ -25,8 +22,8 @@ ; } ; } ; -; INDEPENDENT-NOT: Valid -; NON_INDEPENDENT-NOT: Valid +; INDEPENDENT-NOT: Valid Region for Scop: bb0 => bb1 +; NON_INDEPENDENT-NOT: Valid Region for Scop: bb0 => bb1 ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/ScopDetect/non-affine-loop-condition-dependent-access_2.ll =================================================================== --- test/ScopDetect/non-affine-loop-condition-dependent-access_2.ll +++ test/ScopDetect/non-affine-loop-condition-dependent-access_2.ll @@ -7,9 +7,9 @@ ; innermost loop as a SCoP of depth 1, we have to reject the loop nest if not ; both, non-affine loops as well as non-affine accesses are allowed. ; -; REJECTNONAFFINELOOPS: Valid Region for Scop: bb15 => bb26 +; REJECTNONAFFINELOOPS: Valid Region for Scop: bb15 => bb13 ; REJECTNONAFFINELOOPS-NOT: Valid -; ALLOWNONAFFINELOOPS: Valid Region for Scop: bb15 => bb26 +; ALLOWNONAFFINELOOPS: Valid Region for Scop: bb15 => bb13 ; ALLOWNONAFFINELOOPS-NOT: Valid ; ALLOWNONAFFINELOOPSANDACCESSES: Valid Region for Scop: bb11 => bb29 ; Index: test/ScopDetect/non-affine-loop-condition-dependent-access_3.ll =================================================================== --- test/ScopDetect/non-affine-loop-condition-dependent-access_3.ll +++ test/ScopDetect/non-affine-loop-condition-dependent-access_3.ll @@ -7,9 +7,9 @@ ; innermost loop as a SCoP of depth 1, we have to reject the loop nest if not ; both, non-affine loops as well as non-affine accesses are allowed. ; -; REJECTNONAFFINELOOPS: Valid Region for Scop: bb15 => bb26 +; REJECTNONAFFINELOOPS: Valid Region for Scop: bb15 => bb13 ; REJECTNONAFFINELOOPS-NOT: Valid -; ALLOWNONAFFINELOOPS: Valid Region for Scop: bb15 => bb26 +; ALLOWNONAFFINELOOPS: Valid Region for Scop: bb15 => bb13 ; ALLOWNONAFFINELOOPS-NOT: Valid ; ALLOWNONAFFINELOOPSANDACCESSES: Valid Region for Scop: bb11 => bb29 ; Index: test/ScopDetect/phi_with_multi_exiting_edges.ll =================================================================== --- test/ScopDetect/phi_with_multi_exiting_edges.ll +++ test/ScopDetect/phi_with_multi_exiting_edges.ll @@ -1,7 +1,5 @@ ; RUN: opt %loadPolly -polly-detect-unprofitable -polly-detect -analyze -S < %s | FileCheck %s ; -; XFAIL: * -; ; Region with an exit node that has a PHI node multiple incoming edges from ; inside the region. Motivation for supporting such cases in Polly. ; Index: test/ScopDetect/phi_with_multi_exiting_edges_2.ll =================================================================== --- /dev/null +++ test/ScopDetect/phi_with_multi_exiting_edges_2.ll @@ -0,0 +1,35 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-detect -analyze -S < %s | FileCheck %s + +define float @foo(float* %A, i64 %param) { +entry: + br label %entry.split + +entry.split: + %branchcond = icmp slt i64 %param, 64 + br i1 %branchcond, label %loopA, label %loopB + +loopA: + %indvarA = phi i64 [0, %entry.split], [%indvar.nextA, %loopA] + %indvar.nextA = add i64 %indvarA, 1 + %valA = load float, float* %A + %sumA = fadd float %valA, %valA + store float %valA, float* %A + %cndA = icmp eq i64 %indvar.nextA, 100 + br i1 %cndA, label %next, label %loopA + +loopB: + %indvarB = phi i64 [0, %entry.split], [%indvar.nextB, %loopB] + %indvar.nextB = add i64 %indvarB, 1 + %valB = load float, float* %A + %sumB = fadd float %valB, %valB + store float %valB, float* %A + %cndB = icmp eq i64 %indvar.nextB, 100 + br i1 %cndB, label %next, label %loopB + +next: + %result = phi float [%sumA, %loopA], [%sumB, %loopB] + ret float %result + +} + +; CHECK: Valid Region for Scop: entry.split => next Index: test/ScopDetect/simple_non_single_entry.ll =================================================================== --- test/ScopDetect/simple_non_single_entry.ll +++ test/ScopDetect/simple_non_single_entry.ll @@ -65,4 +65,4 @@ ret void } -; CHECK: Valid Region for Scop: next => for.i.head1 +; CHECK: Valid Region for Scop: next => for.i Index: test/ScopInfo/20111108-Parameter-not-detected.ll =================================================================== --- test/ScopInfo/20111108-Parameter-not-detected.ll +++ test/ScopInfo/20111108-Parameter-not-detected.ll @@ -54,5 +54,5 @@ ; CHECK: p0: {0,+,1}<%for.cond> ; CHECK: Domain := -; CHECK: [p_0] -> { Stmt_if_then[i0] : i0 >= 0 and i0 <= 1022 and i0 >= 999 - p_0 }; +; CHECK: [p_0] -> { Stmt_if_then[i0] : i0 <= 1022 and i0 >= 0 and i0 >= 999 - p_0 }; Index: test/ScopInfo/NonAffine/non-affine-loop-condition-dependent-access_2.ll =================================================================== --- test/ScopInfo/NonAffine/non-affine-loop-condition-dependent-access_2.ll +++ test/ScopInfo/NonAffine/non-affine-loop-condition-dependent-access_2.ll @@ -9,7 +9,7 @@ ; access. ; ; INNERMOST: Function: f -; INNERMOST: Region: %bb15---%bb26 +; INNERMOST: Region: %bb15---%bb13 ; INNERMOST: Max Loop Depth: 1 ; INNERMOST: p0: {0,+,{0,+,-1}<%bb11>}<%bb13> ; INNERMOST: p1: {0,+,{0,+,1}<%bb11>}<%bb13> @@ -19,11 +19,11 @@ ; INNERMOST: Alias Groups (0): ; INNERMOST: n/a ; INNERMOST: Statements { -; INNERMOST: Stmt_bb16 +; INNERMOST: Stmt_bb16 ; INNERMOST: Domain := -; INNERMOST: [p_0, p_1, p_2, p_3, p_4] -> { Stmt_bb16[i0] : (i0 <= 1023 - p_1 and i0 >= 0 and i0 <= 1024 + p_0) or (i0 >= 0 and i0 >= 1025 - p_1 and i0 <= 1024 + p_0) }; +; INNERMOST: [p_0, p_1, p_2, p_3, p_4] -> { Stmt_bb16[i0] : (i0 >= 0 and i0 >= 1025 - p_1 and i0 <= 1024 + p_0) or (i0 <= 1023 - p_1 and i0 >= 0 and i0 <= 1024 + p_0) }; ; INNERMOST: Schedule := -; INNERMOST: [p_0, p_1, p_2, p_3, p_4] -> { Stmt_bb16[i0] -> [i0] : i0 >= 1025 - p_1 or i0 <= 1023 - p_1 }; +; INNERMOST: [p_0, p_1, p_2, p_3, p_4] -> { Stmt_bb16[i0] -> [0, i0] : i0 >= 1025 - p_1 or i0 <= 1023 - p_1 }; ; INNERMOST: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; INNERMOST: [p_0, p_1, p_2, p_3, p_4] -> { Stmt_bb16[i0] -> MemRef_A[o0] : 4o0 = p_2 }; ; INNERMOST: ReadAccess := [Reduction Type: NONE] [Scalar: 0] @@ -32,6 +32,15 @@ ; INNERMOST: [p_0, p_1, p_2, p_3, p_4] -> { Stmt_bb16[i0] -> MemRef_A[o0] : 4o0 = p_4 + 4i0 }; ; INNERMOST: MustWriteAccess := [Reduction Type: +] [Scalar: 0] ; INNERMOST: [p_0, p_1, p_2, p_3, p_4] -> { Stmt_bb16[i0] -> MemRef_A[o0] : 4o0 = p_4 + 4i0 }; +; INNERMOST: Stmt_bb26 +; INNERMOST: Domain := +; INNERMOST: [p_0, p_1, p_2, p_3, p_4] -> { Stmt_bb26[] }; +; INNERMOST: Schedule := +; INNERMOST: [p_0, p_1, p_2, p_3, p_4] -> { Stmt_bb26[] -> [1, 0] }; +; INNERMOST: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; INNERMOST: [p_0, p_1, p_2, p_3, p_4] -> { Stmt_bb26[] -> MemRef_indvars_iv_next6[] }; +; INNERMOST: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; INNERMOST: [p_0, p_1, p_2, p_3, p_4] -> { Stmt_bb26[] -> MemRef_indvars_iv_next4[] }; ; INNERMOST: } ; ; ALL: Function: f Index: test/ScopInfo/NonAffine/non-affine-loop-condition-dependent-access_3.ll =================================================================== --- test/ScopInfo/NonAffine/non-affine-loop-condition-dependent-access_3.ll +++ test/ScopInfo/NonAffine/non-affine-loop-condition-dependent-access_3.ll @@ -9,7 +9,7 @@ ; access. ; ; INNERMOST: Function: f -; INNERMOST: Region: %bb15---%bb26 +; INNERMOST: Region: %bb15---%bb13 ; INNERMOST: Max Loop Depth: 1 ; INNERMOST: Context: ; INNERMOST: [p_0, p_1, p_2] -> { : p_0 >= 0 and p_0 <= 2147483647 and p_1 >= 0 and p_1 <= 4096 and p_2 >= 0 and p_2 <= 4096 } @@ -25,7 +25,7 @@ ; INNERMOST: Domain := ; INNERMOST: [p_0, p_1, p_2] -> { Stmt_bb16[i0] : i0 >= 0 and i0 <= -1 + p_0 }; ; INNERMOST: Schedule := -; INNERMOST: [p_0, p_1, p_2] -> { Stmt_bb16[i0] -> [i0] }; +; INNERMOST: [p_0, p_1, p_2] -> { Stmt_bb16[i0] -> [0, i0] }; ; INNERMOST: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; INNERMOST: [p_0, p_1, p_2] -> { Stmt_bb16[i0] -> MemRef_A[o0] : 4o0 = p_1 }; ; INNERMOST: ReadAccess := [Reduction Type: NONE] [Scalar: 0]