Index: polly/trunk/lib/Transform/DeLICM.cpp =================================================================== --- polly/trunk/lib/Transform/DeLICM.cpp +++ polly/trunk/lib/Transform/DeLICM.cpp @@ -146,6 +146,12 @@ "lifetime analysis; 0=no limit"), cl::init(1000000), cl::cat(PollyCategory)); +cl::opt DelicmOverapproximateWrites( + "polly-delicm-overapproximate-writes", + cl::desc( + "Do more PHI writes than necessary in order to avoid partial accesses"), + cl::init(false), cl::Hidden, cl::cat(PollyCategory)); + STATISTIC(DeLICMAnalyzed, "Number of successfully analyzed SCoPs"); STATISTIC(DeLICMOutOfQuota, "Analyses aborted because max_operations was reached"); @@ -388,6 +394,26 @@ return nullptr; } +/// Try to find a 'natural' extension of a mapped to elements outside its +/// domain. +/// +/// @param Relevant The map with mapping that may not be modified. +/// @param Universe The domain to which @p Relevant needs to be extended. +/// +/// @return A map with that associates the domain elements of @p Relevant to the +/// same elements and in addition the elements of @p Universe to some +/// undefined elements. The function prefers to return simple maps. +IslPtr expandMapping(IslPtr Relevant, + IslPtr Universe) { + Relevant = give(isl_union_map_coalesce(Relevant.take())); + auto RelevantDomain = give(isl_union_map_domain(Relevant.copy())); + auto Simplified = + give(isl_union_map_gist_domain(Relevant.take(), RelevantDomain.take())); + Simplified = give(isl_union_map_coalesce(Simplified.take())); + return give( + isl_union_map_intersect_domain(Simplified.take(), Universe.take())); +} + /// Represent the knowledge of the contents of any array elements in any zone or /// the knowledge we would add when mapping a scalar to an array element. /// @@ -1299,17 +1325,24 @@ simplify(WritesTarget); // { DomainWrite[] } - auto ExpandedWritesDom = give(isl_union_map_domain(WritesTarget.copy())); auto UniverseWritesDom = give(isl_union_set_empty(ParamSpace.copy())); for (auto *MA : DefUse.getPHIIncomings(SAI)) UniverseWritesDom = give(isl_union_set_add_set(UniverseWritesDom.take(), getDomainFor(MA).take())); + auto RelevantWritesTarget = WritesTarget; + if (DelicmOverapproximateWrites) + WritesTarget = expandMapping(WritesTarget, UniverseWritesDom); + + auto ExpandedWritesDom = give(isl_union_map_domain(WritesTarget.copy())); if (!isl_union_set_is_subset(UniverseWritesDom.keep(), ExpandedWritesDom.keep())) { DEBUG(dbgs() << " Reject because did not find PHI write mapping for " "all instances\n"); + if (DelicmOverapproximateWrites) + DEBUG(dbgs() << " Relevant Mapping: " << RelevantWritesTarget + << '\n'); DEBUG(dbgs() << " Deduced Mapping: " << WritesTarget << '\n'); DEBUG(dbgs() << " Missing instances: " << give(isl_union_set_subtract(UniverseWritesDom.copy(), Index: polly/trunk/test/DeLICM/reduction_overapproximate.ll =================================================================== --- polly/trunk/test/DeLICM/reduction_overapproximate.ll +++ polly/trunk/test/DeLICM/reduction_overapproximate.ll @@ -0,0 +1,115 @@ +; RUN: opt %loadPolly -polly-flatten-schedule -polly-delicm-overapproximate-writes=true -polly-delicm -analyze < %s | FileCheck %s --check-prefix=APPROX +; RUN: opt %loadPolly -polly-flatten-schedule -polly-delicm-overapproximate-writes=false -polly-delicm -analyze < %s | FileCheck %s --check-prefix=EXACT +; +; void func(double *A { +; for (int j = -1; j < 3; j += 1) { /* outer */ +; double phi = 0.0; +; if (0 < j) +; for (int i = 0; i < j; i += 1) /* reduction */ +; phi += 4.2; +; A[j] = phi; +; } +; } +; +define void @func(double* noalias nonnull %A) { +entry: + br label %outer.preheader + +outer.preheader: + br label %outer.for + +outer.for: + %j = phi i32 [-1, %outer.preheader], [%j.inc, %outer.inc] + %j.cmp = icmp slt i32 %j, 3 + br i1 %j.cmp, label %reduction.checkloop, label %outer.exit + + + + reduction.checkloop: + %j2.cmp = icmp slt i32 0, %j + br i1 %j2.cmp, label %reduction.preheader, label %reduction.exit + + reduction.preheader: + br label %reduction.for + + reduction.for: + %i = phi i32 [0, %reduction.preheader], [%i.inc, %reduction.inc] + %phi = phi double [0.0, %reduction.preheader], [%add, %reduction.inc] + br label %body + + + + body: + %add = fadd double %phi, 4.2 + br label %reduction.inc + + + + reduction.inc: + %i.inc = add nuw nsw i32 %i, 1 + %i.cmp = icmp slt i32 %i.inc, %j + br i1 %i.cmp, label %reduction.for, label %reduction.exit + + reduction.exit: + %val = phi double [%add, %reduction.inc], [0.0, %reduction.checkloop] + %A_idx = getelementptr inbounds double, double* %A, i32 %j + store double %val, double* %A_idx + br label %outer.inc + + + +outer.inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %outer.for + +outer.exit: + br label %return + +return: + ret void +} + +; TODO: MemRef_phi__phi[] should be mapped to A[j-1] as as well, but it is +; occupied by %add. It is actually the same value but DeLICM currently +; does not known without the "Known"-extension. + +; APPROX: After accesses { +; APPROX-NEXT: Stmt_reduction_checkloop +; APPROX-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; APPROX-NEXT: { Stmt_reduction_checkloop[i0] -> MemRef_val__phi[] }; +; APPROX-NEXT: new: { Stmt_reduction_checkloop[i0] -> MemRef_A[-1 + i0] : 0 <= i0 <= 3 }; +; APPROX-NEXT: Stmt_reduction_preheader +; APPROX-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; APPROX-NEXT: { Stmt_reduction_preheader[i0] -> MemRef_phi__phi[] }; +; APPROX-NEXT: Stmt_reduction_for +; APPROX-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; APPROX-NEXT: { Stmt_reduction_for[i0, i1] -> MemRef_phi__phi[] }; +; APPROX-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; APPROX-NEXT: { Stmt_reduction_for[i0, i1] -> MemRef_phi[] }; +; APPROX-NEXT: new: { Stmt_reduction_for[i0, i1] -> MemRef_A[-1 + i0] : i0 <= 3 and i1 >= 0 and 2 - 3i0 <= i1 <= 11 - 3i0 and i1 <= 2 and i1 <= -2 + i0 }; +; APPROX-NEXT: Stmt_body +; APPROX-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; APPROX-NEXT: { Stmt_body[i0, i1] -> MemRef_add[] }; +; APPROX-NEXT: new: { Stmt_body[i0, i1] -> MemRef_A[-1 + i0] : i0 <= 3 and i1 >= 0 and 2 - 3i0 <= i1 <= 10 - 3i0 and i1 <= 1 and i1 <= -2 + i0 }; +; APPROX-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; APPROX-NEXT: { Stmt_body[i0, i1] -> MemRef_phi[] }; +; APPROX-NEXT: new: { Stmt_body[i0, i1] -> MemRef_A[-1 + i0] : i0 <= 3 and 0 <= i1 <= -2 + i0 }; +; APPROX-NEXT: Stmt_reduction_inc +; APPROX-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; APPROX-NEXT: { Stmt_reduction_inc[i0, i1] -> MemRef_add[] }; +; APPROX-NEXT: new: { Stmt_reduction_inc[i0, i1] -> MemRef_A[-1 + i0] : i0 <= 3 and 0 <= i1 <= -2 + i0 }; +; APPROX-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; APPROX-NEXT: { Stmt_reduction_inc[i0, i1] -> MemRef_phi__phi[] }; +; APPROX-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; APPROX-NEXT: { Stmt_reduction_inc[i0, i1] -> MemRef_val__phi[] }; +; APPROX-NEXT: new: { Stmt_reduction_inc[i0, i1] -> MemRef_A[-1 + i0] : 2 <= i0 <= 3 and 0 <= i1 <= -2 + i0 }; +; APPROX-NEXT: Stmt_reduction_exit +; APPROX-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; APPROX-NEXT: { Stmt_reduction_exit[i0] -> MemRef_val__phi[] }; +; APPROX-NEXT: new: { Stmt_reduction_exit[i0] -> MemRef_A[-1 + i0] : 0 <= i0 <= 3 }; +; APPROX-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; APPROX-NEXT: { Stmt_reduction_exit[i0] -> MemRef_A[-1 + i0] }; +; APPROX-NEXT: } + + +; EXACT: No modification has been made