Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -911,6 +911,15 @@ return It == InstructionToAccess.end() ? nullptr : It->getSecond()->front(); } + /// @brief Return the __unique__ explicit access for @p Inst if any. + const MemoryAccess *lookupExplicitAccessFor(const Instruction *Inst) const { + if (const MemoryAccessList *MAL = lookupAccessesFor(Inst)) + for (MemoryAccess *MA : *MAL) + if (MA->isExplicit()) + return MA; + return nullptr; + } + void setBasicBlock(BasicBlock *Block) { // TODO: Handle the case where the statement is a region statement, thus // the entry block was split and needs to be changed in the region R. @@ -983,6 +992,9 @@ /// @return The loop at a certain dimension. const Loop *getLoopForDimension(unsigned Dimension) const; + /// @brief Return the number of loops shared with @p Other. + unsigned getNumSharedLoops(const ScopStmt &Other) const; + /// @brief Align the parameters in the statement to the scop context void realignParams(); @@ -1257,9 +1269,15 @@ /// /// @param Stmt The statement we want to recompute @p Insts in. /// @param Insts The instructions we need to recompute. + /// @param AdditionalAccesses Additionally needed accesses for @p Stmt are + /// added here if the return value is true. + /// @param AccList The location in which new MemoryAccess will be + /// allocated. /// /// @returns True, if all instructions can be recomputed in @p Stmt. - bool canRecomputeInStmt(ScopStmt &Stmt, SmallPtrSet &Insts); + bool canRecomputeInStmt(ScopStmt &Stmt, SmallPtrSet &Insts, + SmallVectorImpl &AdditionalAccesses, + AccFuncSetType &AccList); /// @brief Simplify the scalar accesses in this SCoP. /// Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -1331,6 +1331,14 @@ return NestLoops[Dimension]; } +unsigned ScopStmt::getNumSharedLoops(const ScopStmt &Other) const { + unsigned u, e = std::min(getNumIterators(), Other.getNumIterators()); + for (u = 0; u < e; u++) + if (getLoopForDimension(u) != Other.getLoopForDimension(u)) + break; + return u; +} + isl_ctx *ScopStmt::getIslCtx() const { return Parent.getIslCtx(); } __isl_give isl_set *ScopStmt::getDomain() const { return isl_set_copy(Domain); } @@ -2684,13 +2692,213 @@ compareInvariantAccesses); } -bool Scop::canRecomputeInStmt(ScopStmt &Stmt, - SmallPtrSet &Insts) { +/// @brief Fix all elements of @p Maps in the first @p NumDims dimensions to +/// a parametric value. +static void fixParametricIteration(unsigned NumDims, + ArrayRef Maps) { + + isl_ctx *Ctx = isl_map_get_ctx(*Maps[0]); + unsigned NumParams = isl_map_n_param(*Maps[0]); + + for (isl_map **MapPtr : Maps) + *MapPtr = isl_map_add_dims(*MapPtr, isl_dim_param, NumDims); + + for (unsigned dim = 0; dim < NumDims; dim++) { + + isl_id *Id = isl_id_alloc(Ctx, "p", nullptr); + for (isl_map **MapPtr : Maps) { + *MapPtr = isl_map_set_dim_id(*MapPtr, isl_dim_param, NumParams + dim, + isl_id_copy(Id)); + *MapPtr = isl_map_equate(*MapPtr, isl_dim_param, NumParams + dim, + isl_dim_in, dim); + } + isl_id_free(Id); + } +} + +/// @brief Revert the fix of @p Map in the first @p NumDims dimensions. +static void unfixParametricIteration(unsigned NumDims, unsigned NumParams, + isl_map *&Map) { + Map = isl_map_drop_constraints_involving_dims(Map, isl_dim_in, NumDims, + isl_map_n_in(Map) - NumDims); + for (unsigned dim = 0; dim < NumDims; dim++) + Map = isl_map_equate(Map, isl_dim_param, NumParams + dim, isl_dim_in, dim); + Map = isl_map_project_out(Map, isl_dim_param, NumParams, NumDims); +} + +bool Scop::canRecomputeInStmt( + ScopStmt &Stmt, SmallPtrSet &Insts, + SmallVectorImpl &AdditionalAccesses, + AccFuncSetType &AccList) { + + // Early exit for trivial cases. if (Insts.empty()) return true; - // TODO: Check if we can actually move the instructions. - return false; + // Local version of the AdditionalAccesses vector as we do not yet know if + // the instructions can be moved and we do not want to modify the + // AdditionalAccesses if not. + SmallVector LocalAdditionalAccesses; + + // SCoP specific information. + auto *Schedule = isl_union_map_intersect_domain(getSchedule(), getDomains()); + auto *Writes = getWrites(); + + // Invocation specific information. + auto *StmtSchedule = + isl_map_intersect_domain(Stmt.getSchedule(), Stmt.getDomain()); + bool CanBeMoved = true; + + for (Instruction *Inst : Insts) { + + // Currently we do not try to recompute PHIs. + if (isa(Inst)) { + CanBeMoved = false; + break; + } + + // For non-phi instructions we check for memory effects. If there are non, + // we can safely recompute this instruction. If there are memory effects we + // currently know that is a load instruction. For those we check if the + // loaded location might be overwritten before the re-load in @p Stmt. + auto *DefStmt = getStmtForBasicBlock(Inst->getParent()); + assert(DefStmt); + + auto *InstMA = DefStmt->lookupExplicitAccessFor(Inst); + if (!InstMA) + continue; + assert(InstMA->isRead() && "Expected operands to only read memory"); + + auto *DefAccessRelation = isl_map_intersect_domain( + InstMA->getAccessRelation(), DefStmt->getDomain()); + + // Check the parent block of Inst to see if it can be overwritten there. + Instruction *NextInst = Inst->getNextNode(); + while (CanBeMoved && !isa(NextInst)) { + + // If a following instruction does not write memory it cannot overwrite + // the value read by Inst. + auto *NextInstMA = DefStmt->lookupExplicitAccessFor(NextInst); + if (!NextInstMA || NextInstMA->isRead()) { + NextInst = NextInst->getNextNode(); + continue; + } + + // If a following instruction does access a different base pointer it + // cannot overwrite the value read by Inst. + if (NextInstMA->getScopArrayInfo() != InstMA->getScopArrayInfo()) { + NextInst = NextInst->getNextNode(); + continue; + } + + // If both Inst and a instruction following Inst in its parent block + // access the same base pointer and the later is a write, we perform a + // simple and cheap version of the dependence analysis. To this end we + // compare the accessed locations for a fixed but arbitrary iteration. + auto *NextInstAccessRelation = isl_map_intersect_domain( + NextInstMA->getAccessRelation(), DefStmt->getDomain()); + isl_map *Overwriting = isl_map_intersect(isl_map_copy(DefAccessRelation), + NextInstAccessRelation); + CanBeMoved = isl_map_is_empty(Overwriting); + isl_map_free(Overwriting); + NextInst = NextInst->getNextNode(); + } + + // Early exit, taken if Inst might get overwritten in its parent block. + if (!CanBeMoved) { + isl_map_free(DefAccessRelation); + break; + } + + auto *DefStmtSchedule = + isl_map_intersect_domain(DefStmt->getSchedule(), DefStmt->getDomain()); + auto *UseStmtSchedule = isl_map_copy(StmtSchedule); + + // We now check if the location read by Inst might get overwritten after + // the parent block of Inst (DefStmtSchedule) and before the beginning + // of the use block (UseStmtSchedule) where we want to recompute Inst. To + // this end we will first fix the iterations of the shared loops to an + // arbitrary/parametric value as we are not interested in loop-carried + // dependences. + unsigned SameDims = Stmt.getNumSharedLoops(*DefStmt); + fixParametricIteration( + SameDims, {&DefStmtSchedule, &UseStmtSchedule, &DefAccessRelation}); + + // In case the use and the definition share all loops (1) or the use is + // deeper nested (2) than the definition the fixed parametric iteration is + // enough to reason about one scalar def-use chain. However, if the + // definition is nested in at least one loop that is not shared with the + // use (3) we would check if any iteration of the definition is not + // overwritten before the use is executed. To this end we will additionally + // compute the last iteration of the definition after we fixed the shared + // loops. This is a no-op for cases (1) and (2) but allows to reason about + // the actual used value in case (3). + auto *LastDefStmtIteration = + isl_set_lexmax(isl_map_domain(isl_map_copy(DefStmtSchedule))); + DefStmtSchedule = isl_map_intersect_domain( + DefStmtSchedule, isl_set_copy(LastDefStmtIteration)); + DefAccessRelation = + isl_map_intersect_domain(DefAccessRelation, LastDefStmtIteration); + + // Now we compute the statement iterations that are in-between the actually + // used definition of the scalar and the use. + auto *GTDefStmt = isl_union_map_domain(isl_union_map_lex_gt_union_map( + isl_union_map_copy(Schedule), isl_union_map_from_map(DefStmtSchedule))); + auto *LTUseStmt = isl_union_map_domain(isl_union_map_lex_lt_union_map( + isl_union_map_copy(Schedule), isl_union_map_from_map(UseStmtSchedule))); + auto *InbetweenStmts = isl_union_set_intersect(GTDefStmt, LTUseStmt); + + // Then we check if in-between an access occurred that could overwrite the + // definition. If so, we bail out as we cannot reload the value. + auto *InbetweenAccesses = + isl_union_set_apply(InbetweenStmts, isl_union_map_copy(Writes)); + auto *DefAccessLocation = isl_map_range(DefAccessRelation); + + auto *Overwriting = isl_union_set_intersect( + InbetweenAccesses, + isl_union_set_from_set(isl_set_copy(DefAccessLocation))); + CanBeMoved = isl_union_set_is_empty(Overwriting); + isl_union_set_free(Overwriting); + + if (!CanBeMoved) { + isl_set_free(DefAccessLocation); + break; + } + + // Record that we need to reload Inst in the statement. + // TODO: As we store memory accesses in the AccList we might create copies + // of accesses here that will not be useful but still exist till the + // SCoP is dropped. + MemoryAccess *InstMACopy = InstMA->copy(AccList, &Stmt); + LocalAdditionalAccesses.push_back(InstMACopy); + + // Finally, we have to adjust the access relation for case (3) described + // above if the use statement does not share the same loops as the + // definition statement. + if (SameDims == DefStmt->getNumIterators()) { + isl_set_free(DefAccessLocation); + continue; + } + + // To adjust the access relation we intersect the old access relation with + // the maximal accessed locations computed earlier. Then we revert the + // fixation of the parametric iteration for the accesses locations. + auto *NewAccessRelation = isl_map_intersect_range( + InstMACopy->getAccessRelation(), DefAccessLocation); + unfixParametricIteration(SameDims, getNumParams(), NewAccessRelation); + InstMACopy->setNewAccessRelation(NewAccessRelation); + } + + // In case the access is movable we provide the needed additional accesses, + // otherwise they will not be referenced and deleted with the SCoP. + if (CanBeMoved) + AdditionalAccesses.append(LocalAdditionalAccesses.begin(), + LocalAdditionalAccesses.end()); + + isl_union_map_free(Schedule); + isl_union_map_free(Writes); + isl_map_free(StmtSchedule); + return CanBeMoved; } void Scop::simplifyScalarAccesses() { @@ -2757,8 +2965,10 @@ dbgs() << "\t\t" << *Op << "\n"; dbgs() << "\t}\n"; dbgs() << "\tOutsideOperands: {\n"; - for (auto *Op : OutsideOperands) - dbgs() << "\t\t" << *Op << "\n"; + for (const auto &OpPair : OutsideOperands) { + dbgs() << "\t\t" << *OpPair.first << "\n"; + dbgs() << "\t\tused by " << *OpPair.second << "\n"; + } dbgs() << "\t}\n"; }); } @@ -2797,7 +3007,8 @@ auto &NonTrivialOperands = NonTrivialOperandsMap[DefInst]; auto &SideEffectOperands = NonTrivialOperands.first; - if (!canRecomputeInStmt(Stmt, SideEffectOperands)) + if (!canRecomputeInStmt(Stmt, SideEffectOperands, AdditionalAccesses, + AccList)) continue; auto &OutsideOperands = NonTrivialOperands.second; Index: test/ScopInfo/eliminate-scalar-accesses-caused-by-load-1.ll =================================================================== --- /dev/null +++ test/ScopInfo/eliminate-scalar-accesses-caused-by-load-1.ll @@ -0,0 +1,67 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-codegen -analyze < %s +; +; void f(int *A) { +; for (int i = 0; i < 1000; i++) { +; int x = A[0]; +; for (int j = 1; j < 1000; j++) +; A[j] = x + 2; +; } +; A[0] = 0; +; } +; +; TODO: Remove read only statements. +; +; CHECK: Stmt_for_body +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_for_body[i0] -> MemRef_A[0] }; +; CHECK: Stmt_for_body_3 +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_for_body_3[i0, i1] -> MemRef_A[0] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_for_body_3[i0, i1] -> MemRef_A[1 + i1] }; +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc.5, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc.5 ], [ 0, %entry ] + %exitcond1 = icmp ne i64 %indvars.iv, 1000 + br i1 %exitcond1, label %for.body, label %for.end.7 + +for.body: ; preds = %for.cond + %tmp = load i32, i32* %A, align 4 + br label %for.cond.1 + +for.cond.1: ; preds = %for.inc, %for.body + %j.0 = phi i32 [ 1, %for.body ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %j.0, 1000 + br i1 %exitcond, label %for.body.3, label %for.end + +for.body.3: ; preds = %for.cond.1 + %arrayidx4 = getelementptr inbounds i32, i32* %A, i32 %j.0 + %add = add i32 %tmp, 2 + store i32 %add, i32* %arrayidx4, align 4 + br label %for.inc + +for.inc: ; preds = %for.body.3 + %inc = add nuw nsw i32 %j.0, 1 + br label %for.cond.1 + +for.end: ; preds = %for.cond.1 + br label %for.inc.5 + +for.inc.5: ; preds = %for.end + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end.7: ; preds = %for.cond + store i32 0, i32* %A, align 4 + br label %exit + +exit: + ret void +} Index: test/ScopInfo/eliminate-scalar-accesses-caused-by-load-10.ll =================================================================== --- /dev/null +++ test/ScopInfo/eliminate-scalar-accesses-caused-by-load-10.ll @@ -0,0 +1,72 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-codegen -analyze < %s +; +; void f(int *A, int *B, int p) { +; for (int i = 0; i < 1000; i++) { +; int x = A[i]; +; A[p] = x; +; for (int j = 1; j < 1000; j++) { +; B[i] = x; +; } +; } +; } +; +; Check that we do not recompute x in the inner loop as it might be overwritten. +; +; CHECK: Stmt_for_body +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: [p] -> { Stmt_for_body[i0] -> MemRef_A[i0] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK: [p] -> { Stmt_for_body[i0] -> MemRef_tmp[] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: [p] -> { Stmt_for_body[i0] -> MemRef_A[p] }; +; +; CHECK: Stmt_for_body_5 +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK: [p] -> { Stmt_for_body_5[i0, i1] -> MemRef_tmp[] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: [p] -> { Stmt_for_body_5[i0, i1] -> MemRef_B[i0] }; +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A, i32* %B, i32 %p) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc.8, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc.8 ], [ 0, %entry ] + %exitcond1 = icmp ne i64 %indvars.iv, 1000 + br i1 %exitcond1, label %for.body, label %for.end.10 + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %tmp = load i32, i32* %arrayidx, align 4 + %idxprom1 = sext i32 %p to i64 + %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %idxprom1 + store i32 %tmp, i32* %arrayidx2, align 4 + br label %for.cond.3 + +for.cond.3: ; preds = %for.inc, %for.body + %j.0 = phi i32 [ 1, %for.body ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %j.0, 1000 + br i1 %exitcond, label %for.body.5, label %for.end + +for.body.5: ; preds = %for.cond.3 + %arrayidx7 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv + store i32 %tmp, i32* %arrayidx7, align 4 + br label %for.inc + +for.inc: ; preds = %for.body.5 + %inc = add nuw nsw i32 %j.0, 1 + br label %for.cond.3 + +for.end: ; preds = %for.cond.3 + br label %for.inc.8 + +for.inc.8: ; preds = %for.end + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end.10: ; preds = %for.cond + ret void +} Index: test/ScopInfo/eliminate-scalar-accesses-caused-by-load-2.ll =================================================================== --- /dev/null +++ test/ScopInfo/eliminate-scalar-accesses-caused-by-load-2.ll @@ -0,0 +1,62 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-codegen -analyze < %s +; +; void f(int *A) { +; for (int i = 0; i < 1000; i++) { +; int x = A[i]; +; for (int j = i + 1; j < 1000; j++) +; A[j] = x; +; } +; } +; +; TODO: Remove read only statements. +; +; CHECK: Stmt_for_body +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_for_body[i0] -> MemRef_A[i0] }; +; CHECK: Stmt_for_body_3 +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_for_body_3[i0, i1] -> MemRef_A[i0] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_for_body_3[i0, i1] -> MemRef_A[1 + i0 + i1] }; +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc.5, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc.5 ], [ 0, %entry ] + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond1 = icmp ne i64 %indvars.iv, 1000 + br i1 %exitcond1, label %for.body, label %for.end.7 + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %tmp = load i32, i32* %arrayidx, align 4 + br label %for.cond.1 + +for.cond.1: ; preds = %for.inc, %for.body + %j.0 = phi i64 [ %indvars.iv.next, %for.body ], [ %inc, %for.inc ] + %exitcond = icmp ne i64 %j.0, 1000 + br i1 %exitcond, label %for.body.3, label %for.end + +for.body.3: ; preds = %for.cond.1 + %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %j.0 + store i32 %tmp, i32* %arrayidx4, align 4 + br label %for.inc + +for.inc: ; preds = %for.body.3 + %inc = add nuw nsw i64 %j.0, 1 + br label %for.cond.1 + +for.end: ; preds = %for.cond.1 + br label %for.inc.5 + +for.inc.5: ; preds = %for.end + br label %for.cond + +for.end.7: ; preds = %for.cond + ret void +} Index: test/ScopInfo/eliminate-scalar-accesses-caused-by-load-3.ll =================================================================== --- /dev/null +++ test/ScopInfo/eliminate-scalar-accesses-caused-by-load-3.ll @@ -0,0 +1,68 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-codegen -analyze < %s +; +; void f(int *A) { +; for (int i = 0; i < 1000; i++) { +; int x = A[0]; +; for (int j = 0; j < 1000; j++) +; A[j] = x + 2; +; } +; } +; +; Verify we do not move x to the inner loop as it is overwritten. +; +; CHECK: Stmt_for_body +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_for_body[i0] -> MemRef_A[0] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK: { Stmt_for_body[i0] -> MemRef_x[] }; +; CHECK: Stmt_for_body_3 +; CHECK-NOT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NOT: { Stmt_for_body_3[i0, i1] -> MemRef_A[0] }; +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK: { Stmt_for_body_3[i0, i1] -> MemRef_x[] }; +; CHECK-NOT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NOT: { Stmt_for_body_3[i0, i1] -> MemRef_A[i0] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_for_body_3[i0, i1] -> MemRef_A[i1] }; +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc.5, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc.5 ], [ 0, %entry ] + %exitcond1 = icmp ne i64 %indvars.iv, 1000 + br i1 %exitcond1, label %for.body, label %for.end.7 + +for.body: ; preds = %for.cond + %x = load i32, i32* %A, align 4 + br label %for.cond.1 + +for.cond.1: ; preds = %for.inc, %for.body + %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %j.0, 1000 + br i1 %exitcond, label %for.body.3, label %for.end + +for.body.3: ; preds = %for.cond.1 + %arrayidx4 = getelementptr inbounds i32, i32* %A, i32 %j.0 + %add = add i32 %x, 2 + store i32 %add, i32* %arrayidx4, align 4 + br label %for.inc + +for.inc: ; preds = %for.body.3 + %inc = add nuw nsw i32 %j.0, 1 + br label %for.cond.1 + +for.end: ; preds = %for.cond.1 + br label %for.inc.5 + +for.inc.5: ; preds = %for.end + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end.7: ; preds = %for.cond + ret void +} Index: test/ScopInfo/eliminate-scalar-accesses-caused-by-load-4.ll =================================================================== --- /dev/null +++ test/ScopInfo/eliminate-scalar-accesses-caused-by-load-4.ll @@ -0,0 +1,69 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-codegen -analyze < %s +; +; void f(int *A) { +; for (int i = 0; i < 1000; i++) { +; int x = A[i]; +; for (int j = i; j < 1000; j++) +; A[j] = x + 2; +; } +; } +; +; Verify we do not move x to the inner loop as it is overwritten. +; +; CHECK: Stmt_for_body +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_for_body[i0] -> MemRef_A[i0] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK: { Stmt_for_body[i0] -> MemRef_x[] }; +; CHECK: Stmt_for_body_3 +; CHECK-NOT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NOT: { Stmt_for_body_3[i0, i1] -> MemRef_A[0] }; +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK: { Stmt_for_body_3[i0, i1] -> MemRef_x[] }; +; CHECK-NOT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NOT: { Stmt_for_body_3[i0, i1] -> MemRef_A[i0] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_for_body_3[i0, i1] -> MemRef_A[i0 + i1] }; +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc.5, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc.5 ], [ 0, %entry ] + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond1 = icmp ne i64 %indvars.iv, 1000 + br i1 %exitcond1, label %for.body, label %for.end.7 + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %x = load i32, i32* %arrayidx, align 4 + br label %for.cond.1 + +for.cond.1: ; preds = %for.inc, %for.body + %j.0 = phi i64 [ %indvars.iv, %for.body ], [ %inc, %for.inc ] + %exitcond = icmp ne i64 %j.0, 1000 + br i1 %exitcond, label %for.body.3, label %for.end + +for.body.3: ; preds = %for.cond.1 + %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %j.0 + %add = add i32 %x, 2 + store i32 %add, i32* %arrayidx4, align 4 + br label %for.inc + +for.inc: ; preds = %for.body.3 + %inc = add nuw nsw i64 %j.0, 1 + br label %for.cond.1 + +for.end: ; preds = %for.cond.1 + br label %for.inc.5 + +for.inc.5: ; preds = %for.end + br label %for.cond + +for.end.7: ; preds = %for.cond + ret void +} Index: test/ScopInfo/eliminate-scalar-accesses-caused-by-load-5.ll =================================================================== --- /dev/null +++ test/ScopInfo/eliminate-scalar-accesses-caused-by-load-5.ll @@ -0,0 +1,64 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-codegen -analyze < %s +; +; void f(int *A) { +; int x = 0; +; int i = 0; +; do { +; x = A[i]; +; } while (i++ < 1000); +; +; for (int j = 0; j < 1000; j++) { +; A[j] = x; +; } +; } +; +; TODO: Remove read only statements. +; +; CHECK: Stmt_do_body +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_do_body[i0] -> MemRef_A[i0] }; +; CHECK: Stmt_for_body +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_for_body[i0] -> MemRef_A[i0] }; +; CHECK: new: { Stmt_for_body[i0] -> MemRef_A[1000] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_for_body[i0] -> MemRef_A[i0] }; +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A) { +entry: + br label %do.body + +do.body: ; preds = %do.cond, %entry + %indvars.iv1 = phi i64 [ %indvars.iv.next2, %do.cond ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv1 + %x = load i32, i32* %arrayidx, align 4 + br label %do.cond + +do.cond: ; preds = %do.body + %indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1 + %exitcond3 = icmp ne i64 %indvars.iv.next2, 1001 + br i1 %exitcond3, label %do.body, label %do.end + +do.end: ; preds = %do.cond + br label %for.cond + +for.cond: ; preds = %for.inc, %do.end + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %do.end ] + %exitcond = icmp ne i64 %indvars.iv, 1000 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + store i32 %x, i32* %arrayidx3, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} Index: test/ScopInfo/eliminate-scalar-accesses-caused-by-load-5b.ll =================================================================== --- /dev/null +++ test/ScopInfo/eliminate-scalar-accesses-caused-by-load-5b.ll @@ -0,0 +1,82 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-codegen -analyze < %s +; +; void f(int *A) { +; for (int k = 0; k < 128; k++) { +; int x = 0; +; int i = 0; +; do { +; x = A[i]; +; } while (i++ < 1000); +; +; for (int j = 0; j < 1000; j++) { +; A[j] = x; +; } +; } +; } +; +; TODO: Remove read only statements. +; +; CHECK: Stmt_do_body +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_do_body[i0, i1] -> MemRef_A[i1] }; +; CHECK: Stmt_for_body_4 +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_for_body_4[i0, i1] -> MemRef_A[i1] }; +; CHECK: new: { Stmt_for_body_4[i0, i1] -> MemRef_A[1000] +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_for_body_4[i0, i1] -> MemRef_A[i1] }; +; +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc.8, %entry + %k.0 = phi i32 [ 0, %entry ], [ %inc9, %for.inc.8 ] + %exitcond4 = icmp ne i32 %k.0, 128 + br i1 %exitcond4, label %for.body, label %for.end.10 + +for.body: ; preds = %for.cond + br label %do.body + +do.body: ; preds = %do.cond, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %do.cond ], [ 0, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %tmp = load i32, i32* %arrayidx, align 4 + br label %do.cond + +do.cond: ; preds = %do.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 1001 + br i1 %exitcond, label %do.body, label %do.end + +do.end: ; preds = %do.cond + br label %for.cond.2 + +for.cond.2: ; preds = %for.inc, %do.end + %indvars.iv1 = phi i64 [ %indvars.iv.next2, %for.inc ], [ 0, %do.end ] + %exitcond3 = icmp ne i64 %indvars.iv1, 1000 + br i1 %exitcond3, label %for.body.4, label %for.end + +for.body.4: ; preds = %for.cond.2 + %arrayidx6 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv1 + store i32 %tmp, i32* %arrayidx6, align 4 + br label %for.inc + +for.inc: ; preds = %for.body.4 + %indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1 + br label %for.cond.2 + +for.end: ; preds = %for.cond.2 + br label %for.inc.8 + +for.inc.8: ; preds = %for.end + %inc9 = add nuw nsw i32 %k.0, 1 + br label %for.cond + +for.end.10: ; preds = %for.cond + ret void +} Index: test/ScopInfo/eliminate-scalar-accesses-caused-by-load-6.ll =================================================================== --- /dev/null +++ test/ScopInfo/eliminate-scalar-accesses-caused-by-load-6.ll @@ -0,0 +1,65 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-codegen -analyze < %s +; +; void f(int *A) { +; for (int i = 0; i < 1000; i++) { +; int x = A[2 * i]; +; for (int j = 0; j < 1000; j++) +; A[2 * j + 1] = x; +; } +; } +; +; TODO: Remove read only statements. +; +; CHECK: Stmt_for_body +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_for_body[i0] -> MemRef_A[2i0] }; +; CHECK: Stmt_for_body_3 +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_for_body_3[i0, i1] -> MemRef_A[2i0] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_for_body_3[i0, i1] -> MemRef_A[1 + 2i1] }; +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc.7, %entry + %indvars.iv3 = phi i64 [ %indvars.iv.next4, %for.inc.7 ], [ 0, %entry ] + %exitcond6 = icmp ne i64 %indvars.iv3, 1000 + br i1 %exitcond6, label %for.body, label %for.end.9 + +for.body: ; preds = %for.cond + %tmp = shl nsw i64 %indvars.iv3, 1 + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %tmp + %tmp7 = load i32, i32* %arrayidx, align 4 + br label %for.cond.1 + +for.cond.1: ; preds = %for.inc, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body ] + %exitcond = icmp ne i64 %indvars.iv, 1000 + br i1 %exitcond, label %for.body.3, label %for.end + +for.body.3: ; preds = %for.cond.1 + %tmp8 = shl nsw i64 %indvars.iv, 1 + %tmp9 = or i64 %tmp8, 1 + %arrayidx6 = getelementptr inbounds i32, i32* %A, i64 %tmp9 + store i32 %tmp7, i32* %arrayidx6, align 4 + br label %for.inc + +for.inc: ; preds = %for.body.3 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond.1 + +for.end: ; preds = %for.cond.1 + br label %for.inc.7 + +for.inc.7: ; preds = %for.end + %indvars.iv.next4 = add nuw nsw i64 %indvars.iv3, 1 + br label %for.cond + +for.end.9: ; preds = %for.cond + ret void +} Index: test/ScopInfo/eliminate-scalar-accesses-caused-by-load-7.ll =================================================================== --- /dev/null +++ test/ScopInfo/eliminate-scalar-accesses-caused-by-load-7.ll @@ -0,0 +1,140 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-codegen -analyze < %s +; +; void f(int *A) { +; for (int i = 0; i < 1000; i++) { +; int x = A[0]; +; for (int j = 1; j < 1000; j++) +; A[j] += x; +; for (int j = 1; j < 1000; j++) +; A[j] += x; +; for (int j = 1; j < 1000; j++) +; A[j] += x; +; } +; for (int i = 0; i < 1000; i++) +; A[i] = 0; +; } +; +; TODO: Remove read only statements. +; +; CHECK: Stmt_for_body +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_for_body[i0] -> MemRef_A[0] }; +; CHECK: Stmt_for_body_3 +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_for_body_3[i0, i1] -> MemRef_A[0] }; +; CHECK: ReadAccess := [Reduction Type: +] [Scalar: 0] +; CHECK: { Stmt_for_body_3[i0, i1] -> MemRef_A[1 + i1] }; +; CHECK: MustWriteAccess := [Reduction Type: +] [Scalar: 0] +; CHECK: { Stmt_for_body_3[i0, i1] -> MemRef_A[1 + i1] }; +; CHECK: Stmt_for_body_8 +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_for_body_8[i0, i1] -> MemRef_A[0] }; +; CHECK: ReadAccess := [Reduction Type: +] [Scalar: 0] +; CHECK: { Stmt_for_body_8[i0, i1] -> MemRef_A[1 + i1] }; +; CHECK: MustWriteAccess := [Reduction Type: +] [Scalar: 0] +; CHECK: { Stmt_for_body_8[i0, i1] -> MemRef_A[1 + i1] }; +; CHECK: Stmt_for_body_18 +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_for_body_18[i0, i1] -> MemRef_A[0] }; +; CHECK: ReadAccess := [Reduction Type: +] [Scalar: 0] +; CHECK: { Stmt_for_body_18[i0, i1] -> MemRef_A[1 + i1] }; +; CHECK: MustWriteAccess := [Reduction Type: +] [Scalar: 0] +; CHECK: { Stmt_for_body_18[i0, i1] -> MemRef_A[1 + i1] }; +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc.25, %entry + %indvars.iv4 = phi i64 [ %indvars.iv.next5, %for.inc.25 ], [ 0, %entry ] + %exitcond6 = icmp ne i64 %indvars.iv4, 1000 + br i1 %exitcond6, label %for.body, label %for.end.27 + +for.body: ; preds = %for.cond + %tmp = load i32, i32* %A, align 4 + br label %for.cond.1 + +for.cond.1: ; preds = %for.inc, %for.body + %j.0 = phi i32 [ 1, %for.body ], [ %inc, %for.inc ] + %exitcond1 = icmp ne i32 %j.0, 1000 + br i1 %exitcond1, label %for.body.3, label %for.end + +for.body.3: ; preds = %for.cond.1 + %arrayidx4 = getelementptr inbounds i32, i32* %A, i32 %j.0 + %tmp7 = load i32, i32* %arrayidx4, align 4 + %add = add nsw i32 %tmp7, %tmp + store i32 %add, i32* %arrayidx4, align 4 + br label %for.inc + +for.inc: ; preds = %for.body.3 + %inc = add nuw nsw i32 %j.0, 1 + br label %for.cond.1 + +for.end: ; preds = %for.cond.1 + br label %for.cond.6 + +for.cond.6: ; preds = %for.inc.12, %for.end + %j5.0 = phi i32 [ 1, %for.end ], [ %inc13, %for.inc.12 ] + %exitcond2 = icmp ne i32 %j5.0, 1000 + br i1 %exitcond2, label %for.body.8, label %for.end.14 + +for.body.8: ; preds = %for.cond.6 + %arrayidx10 = getelementptr inbounds i32, i32* %A, i32 %j5.0 + %tmp8 = load i32, i32* %arrayidx10, align 4 + %add11 = add nsw i32 %tmp8, %tmp + store i32 %add11, i32* %arrayidx10, align 4 + br label %for.inc.12 + +for.inc.12: ; preds = %for.body.8 + %inc13 = add nuw nsw i32 %j5.0, 1 + br label %for.cond.6 + +for.end.14: ; preds = %for.cond.6 + br label %for.cond.16 + +for.cond.16: ; preds = %for.inc.22, %for.end.14 + %j15.0 = phi i32 [ 1, %for.end.14 ], [ %inc23, %for.inc.22 ] + %exitcond3 = icmp ne i32 %j15.0, 1000 + br i1 %exitcond3, label %for.body.18, label %for.end.24 + +for.body.18: ; preds = %for.cond.16 + %arrayidx20 = getelementptr inbounds i32, i32* %A, i32 %j15.0 + %tmp9 = load i32, i32* %arrayidx20, align 4 + %add21 = add nsw i32 %tmp9, %tmp + store i32 %add21, i32* %arrayidx20, align 4 + br label %for.inc.22 + +for.inc.22: ; preds = %for.body.18 + %inc23 = add nuw nsw i32 %j15.0, 1 + br label %for.cond.16 + +for.end.24: ; preds = %for.cond.16 + br label %for.inc.25 + +for.inc.25: ; preds = %for.end.24 + %indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1 + br label %for.cond + +for.end.27: ; preds = %for.cond + br label %for.cond.29 + +for.cond.29: ; preds = %for.inc.34, %for.end.27 + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc.34 ], [ 0, %for.end.27 ] + %exitcond = icmp ne i64 %indvars.iv, 1000 + br i1 %exitcond, label %for.body.31, label %for.end.36 + +for.body.31: ; preds = %for.cond.29 + %arrayidx33 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + store i32 0, i32* %arrayidx33, align 4 + br label %for.inc.34 + +for.inc.34: ; preds = %for.body.31 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond.29 + +for.end.36: ; preds = %for.cond.29 + ret void +} Index: test/ScopInfo/eliminate-scalar-accesses-caused-by-load-8.ll =================================================================== --- /dev/null +++ test/ScopInfo/eliminate-scalar-accesses-caused-by-load-8.ll @@ -0,0 +1,72 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-codegen -analyze < %s +; +; void f(int *A, int *B) { +; for (int i = 0; i < 1000; i++) { +; int x = A[i]; +; A[i - 1] = x; +; for (int j = 1; j < 1000; j++) { +; B[i] = x; +; } +; } +; } +; +; CHECK: Stmt_for_body +; CHECK-NOT: [Scalar: 1] +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body[i0] -> MemRef_A[i0] }; +; CHECK-NOT: [Scalar: 1] +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body[i0] -> MemRef_A[-1 + i0] }; +; CHECK: Stmt_for_body_5 +; CHECK-NOT: [Scalar: 1] +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body_5[i0, i1] -> MemRef_A[i0] }; +; CHECK-NOT: [Scalar: 1] +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body_5[i0, i1] -> MemRef_B[i0] }; +; CHECK-NOT: [Scalar: 1] +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A, i32* %B) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc.8, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc.8 ], [ 0, %entry ] + %exitcond2 = icmp ne i64 %indvars.iv, 1000 + br i1 %exitcond2, label %for.body, label %for.end.10 + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %tmp = load i32, i32* %arrayidx, align 4 + %tmp3 = add nsw i64 %indvars.iv, -1 + %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %tmp3 + store i32 %tmp, i32* %arrayidx2, align 4 + br label %for.cond.3 + +for.cond.3: ; preds = %for.inc, %for.body + %j.0 = phi i32 [ 1, %for.body ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %j.0, 1000 + br i1 %exitcond, label %for.body.5, label %for.end + +for.body.5: ; preds = %for.cond.3 + %arrayidx7 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv + store i32 %tmp, i32* %arrayidx7, align 4 + br label %for.inc + +for.inc: ; preds = %for.body.5 + %inc = add nuw nsw i32 %j.0, 1 + br label %for.cond.3 + +for.end: ; preds = %for.cond.3 + br label %for.inc.8 + +for.inc.8: ; preds = %for.end + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end.10: ; preds = %for.cond + ret void +} Index: test/ScopInfo/eliminate-scalar-accesses-caused-by-load-9.ll =================================================================== --- /dev/null +++ test/ScopInfo/eliminate-scalar-accesses-caused-by-load-9.ll @@ -0,0 +1,72 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-codegen -analyze < %s +; +; void f(int *A, int *B) { +; for (int i = 0; i < 1000; i++) { +; int x = A[i]; +; A[i] = x; +; for (int j = 1; j < 1000; j++) { +; B[i] = x; +; } +; } +; } +; +; Check that we do not recompute x in the inner loop as it is overwritten in +; it definition block. +; +; CHECK: Stmt_for_body +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_for_body[i0] -> MemRef_A[i0] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK: { Stmt_for_body[i0] -> MemRef_tmp[] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_for_body[i0] -> MemRef_A[i0] }; +; +; CHECK: Stmt_for_body_5 +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK: { Stmt_for_body_5[i0, i1] -> MemRef_tmp[] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_for_body_5[i0, i1] -> MemRef_B[i0] }; +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A, i32* %B) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc.8, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc.8 ], [ 0, %entry ] + %exitcond1 = icmp ne i64 %indvars.iv, 1000 + br i1 %exitcond1, label %for.body, label %for.end.10 + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %tmp = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + store i32 %tmp, i32* %arrayidx2, align 4 + br label %for.cond.3 + +for.cond.3: ; preds = %for.inc, %for.body + %j.0 = phi i32 [ 1, %for.body ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %j.0, 1000 + br i1 %exitcond, label %for.body.5, label %for.end + +for.body.5: ; preds = %for.cond.3 + %arrayidx7 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv + store i32 %tmp, i32* %arrayidx7, align 4 + br label %for.inc + +for.inc: ; preds = %for.body.5 + %inc = add nuw nsw i32 %j.0, 1 + br label %for.cond.3 + +for.end: ; preds = %for.cond.3 + br label %for.inc.8 + +for.inc.8: ; preds = %for.end + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end.10: ; preds = %for.cond + ret void +} Index: test/ScopInfo/eliminate-scalar-caused-by-load-reduction-2.ll =================================================================== --- test/ScopInfo/eliminate-scalar-caused-by-load-reduction-2.ll +++ test/ScopInfo/eliminate-scalar-caused-by-load-reduction-2.ll @@ -1,15 +1,15 @@ ; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s ; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s --check-prefix=CODEGEN ; -; This is a negative test. We should move the load to the split block -; and remove all scalar accesses, however at the moment we only move -; instructions that are trivially safe to move. All three checks should -; be negated at some point. This also checks that we currently not try to -; move a part of the scalar operand chain, i.e., the %add instruction. +; TODO: We could detect that it is actually a reduction in the future. ; -; CHECK: Scalar: 1 -; CHECK: Scalar: 1 -; CHECK-NOT: Reduction: + +; CHECK-NOT: Scalar: 1 +; +; CHECK: Stmt_for_body_split +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_for_body_split[i0] -> MemRef_A[i0] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_for_body_split[i0] -> MemRef_A[i0] }; ; ; These checks should stay as they verify we did not modify the original region: ; Index: test/ScopInfo/eliminate-scalar-caused-by-load-reduction.ll =================================================================== --- test/ScopInfo/eliminate-scalar-caused-by-load-reduction.ll +++ test/ScopInfo/eliminate-scalar-caused-by-load-reduction.ll @@ -1,13 +1,14 @@ ; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s ; -; This is a negative test. We should move the load to the split block -; and remove all scalar accesses, however at the moment we only move -; instructions that are trivially safe to move. All three checks should -; be negated at some point. +; TODO: We could detect that it is actually a reduction in the future. ; -; CHECK: Scalar: 1 -; CHECK: Scalar: 1 -; CHECK-NOT: Reduction: + +; CHECK-NOT: Scalar: 1 +; +; CHECK: Stmt_for_body_split +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_for_body_split[i0] -> MemRef_A[i0] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_for_body_split[i0] -> MemRef_A[i0] }; ; ; void f(int *A) { ; for (int i = 0; i < 1000; i++) { Index: test/ScopInfo/non_affine_region_4.ll =================================================================== --- test/ScopInfo/non_affine_region_4.ll +++ test/ScopInfo/non_affine_region_4.ll @@ -1,7 +1,7 @@ ; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s ; -; Verify that both scalars (x and y) are properly written in the non-affine -; region and read afterwards. +; Verify that only y are properly written in the non-affine +; region and read afterwards. The value of x will be reloaded. ; ; void f(int *A, int b) { ; for (int i = 0; i < 1024; i++) { @@ -37,8 +37,6 @@ ; CHECK: { Stmt_bb2__TO__bb7[i0] -> [i0, 0] }; ; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK: { Stmt_bb2__TO__bb7[i0] -> MemRef_A[i0] }; -; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] -; CHECK: { Stmt_bb2__TO__bb7[i0] -> MemRef_x[] }; ; CHECK: MayWriteAccess := [Reduction Type: NONE] [Scalar: 1] ; CHECK: { Stmt_bb2__TO__bb7[i0] -> MemRef_y__phi[] }; ; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] @@ -52,8 +50,8 @@ ; CHECK: }; ; CHECK: Schedule := ; CHECK: { Stmt_bb7[i0] -> [i0, 1] }; -; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 1] -; CHECK: { Stmt_bb7[i0] -> MemRef_x[] }; +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: { Stmt_bb7[i0] -> MemRef_A[i0] }; ; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 1] ; CHECK: { Stmt_bb7[i0] -> MemRef_y__phi[] }; ; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] Index: test/ScopInfo/scalar.ll =================================================================== --- test/ScopInfo/scalar.ll +++ test/ScopInfo/scalar.ll @@ -46,14 +46,14 @@ ; CHECK: [N] -> { Stmt_S1[i0] -> [i0, 0] }; ; CHECK: ReadAccess := ; CHECK: [N] -> { Stmt_S1[i0] -> MemRef_a[i0] }; -; CHECK: MustWriteAccess := -; CHECK: [N] -> { Stmt_S1[i0] -> MemRef_val[] }; +; CHECK-NOT: MustWriteAccess := ; CHECK: Stmt_S2 ; CHECK: Domain := ; CHECK: [N] -> { Stmt_S2[i0] : i0 >= 0 and i0 <= -1 + N }; ; CHECK: Schedule := ; CHECK: [N] -> { Stmt_S2[i0] -> [i0, 1] }; +; CHECK-NOT: MemRef_val ; CHECK: ReadAccess := -; CHECK: [N] -> { Stmt_S2[i0] -> MemRef_val[] }; +; CHECK: [N] -> { Stmt_S2[i0] -> MemRef_a[i0] }; ; CHECK: MustWriteAccess := ; CHECK: [N] -> { Stmt_S2[i0] -> MemRef_a[i0] }; Index: test/ScopInfo/scalar_to_array.ll =================================================================== --- test/ScopInfo/scalar_to_array.ll +++ test/ScopInfo/scalar_to_array.ll @@ -81,8 +81,7 @@ ; CHECK: Stmt_for_body_a ; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: { Stmt_for_body_a[i0] -> MemRef_A[i0] }; -; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] -; CHECK-NEXT: { Stmt_for_body_a[i0] -> MemRef_scalar[] }; +; CHECK-NOT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] for.body.b: ; preds = %for.body.a %arrayidx2 = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar @@ -91,8 +90,9 @@ store float %sum, float* %arrayidx2 br label %for.inc ; CHECK: Stmt_for_body_b -; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 1] -; CHECK-NEXT: { Stmt_for_body_b[i0] -> MemRef_scalar[] }; +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_for_body_b[i0] -> MemRef_A[i0] }; +; CHECK-NOT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] ; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: { Stmt_for_body_b[i0] -> MemRef_A[i0] };