Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -94,6 +94,8 @@ RT_BOR, ///< Bitwise Or RT_BXOR, ///< Bitwise XOr RT_BAND, ///< Bitwise And + + RT_BOTTOM, ///< Pseudo type for the data flow analysis }; private: @@ -113,27 +115,11 @@ /// @brief Reduction type for reduction like accesses, RT_NONE otherwise /// /// An access is reduction like if it is part of a load-store chain in which - /// both access the same memory location (use the same LLVM-IR value - /// as pointer reference). Furthermore, between the load and the store there - /// is exactly one binary operator which is known to be associative and - /// commutative. - /// - /// TODO: - /// - /// We can later lift the constraint that the same LLVM-IR value defines the - /// memory location to handle scops such as the following: - /// - /// for i - /// for j - /// sum[i+j] = sum[i] + 3; - /// - /// Here not all iterations access the same memory location, but iterations - /// for which j = 0 holds do. After lifing the equality check in ScopInfo, - /// subsequent transformations do not only need check if a statement is - /// reduction like, but they also need to verify that that the reduction - /// property is only exploited for statement instances that load from and - /// store to the same data location. Doing so at dependence analysis time - /// could allow us to handle the above example. + /// both access the same memory location. Furthermore, between the load and + /// the store there is only one kind of associative and commutative binary + /// operator. Also the load is only allowed to flow into the store once and + /// neither the load nor the binary operators can be used by any instruction + /// not part of the reduction chain. ReductionType RedType = RT_NONE; /// @brief The access instruction of this memory access. @@ -262,6 +248,12 @@ /// accesses. /// At the moment every statement represents a single basic block of LLVM-IR. class ScopStmt { +public: + typedef SmallVector MemoryAccessVec; + typedef MemoryAccessVec::iterator iterator; + typedef MemoryAccessVec::const_iterator const_iterator; + +private: //===-------------------------------------------------------------------===// ScopStmt(const ScopStmt &) LLVM_DELETED_FUNCTION; const ScopStmt &operator=(const ScopStmt &) LLVM_DELETED_FUNCTION; @@ -326,7 +318,6 @@ /// The memory accesses of this statement. /// /// The only side effects of a statement are its memory accesses. - typedef SmallVector MemoryAccessVec; MemoryAccessVec MemAccs; std::map InstructionToAccess; @@ -357,11 +348,6 @@ /// @brief Detect and mark reductions in the ScopStmt void checkForReductions(); - - /// @brief Collect loads which might form a reduction chain with @p StoreMA - void - collectCandiateReductionLoads(MemoryAccess *StoreMA, - llvm::SmallVectorImpl &Loads); //@} /// Create the ScopStmt from a BasicBlock. @@ -423,9 +409,6 @@ void setBasicBlock(BasicBlock *Block) { BB = Block; } - typedef MemoryAccessVec::iterator iterator; - typedef MemoryAccessVec::const_iterator const_iterator; - iterator begin() { return MemAccs.begin(); } iterator end() { return MemAccs.end(); } const_iterator begin() const { return MemAccs.begin(); } Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -278,6 +278,8 @@ case MemoryAccess::RT_NONE: llvm_unreachable("Requested a reduction operator string for a memory " "access which isn't a reduction"); + case MemoryAccess::RT_BOTTOM: + llvm_unreachable("Internal reduction operator used!"); case MemoryAccess::RT_ADD: return "+"; case MemoryAccess::RT_MUL: @@ -294,8 +296,8 @@ } /// @brief Return the reduction type for a given binary operator -static MemoryAccess::ReductionType getReductionType(const BinaryOperator *BinOp, - const Instruction *Load) { +static MemoryAccess::ReductionType +getReductionType(const BinaryOperator *BinOp) { if (!BinOp) return MemoryAccess::RT_NONE; switch (BinOp->getOpcode()) { @@ -323,6 +325,100 @@ return MemoryAccess::RT_NONE; } } + +/// @brief Combine two reduction types +static MemoryAccess::ReductionType +combineReductionType(MemoryAccess::ReductionType RT0, + MemoryAccess::ReductionType RT1) { + if (RT0 == MemoryAccess::RT_BOTTOM) + return RT1; + if (RT0 == RT1) + return RT1; + return MemoryAccess::RT_NONE; +} + +/// @brief True if @p AllAccs intersects with @p MemAccs execpt @p ExlcludeMAX +static bool hasIntersectingAccesses(__isl_take isl_set *AllAccs, + MemoryAccess *ExcludeMA0, + MemoryAccess *ExcludeMA1, + __isl_take isl_set *Domain, + ScopStmt::MemoryAccessVec &MemAccs) { + bool HasIntersectingAccs = false; + isl_set *AllAccsNoParams = isl_set_project_out( + isl_set_copy(AllAccs), isl_dim_param, 0, isl_set_n_param(AllAccs)); + + for (MemoryAccess *MA : MemAccs) { + if (MA == ExcludeMA0 || MA == ExcludeMA1) + continue; + + isl_map *AccRel = + isl_map_intersect_domain(MA->getAccessRelation(), isl_set_copy(Domain)); + isl_set *Accs = isl_map_range(AccRel); + isl_set *AccsNoParams = isl_set_project_out( + isl_set_copy(Accs), isl_dim_param, 0, isl_set_n_param(Accs)); + + bool CompatibleSpace = + isl_set_has_equal_space(AllAccsNoParams, AccsNoParams); + isl_set_free(AccsNoParams); + + if (CompatibleSpace || isl_set_free(Accs)) { + isl_set *OverlapAccs = isl_set_intersect(Accs, isl_set_copy(AllAccs)); + HasIntersectingAccs = !isl_set_is_empty(OverlapAccs); + isl_set_free(OverlapAccs); + if (HasIntersectingAccs) + break; + } + } + + isl_set_free(Domain); + isl_set_free(AllAccs); + isl_set_free(AllAccsNoParams); + return HasIntersectingAccs; +} + +/// @brief Test if the accesses of @p LoadMA and @p StoreMA can form a reduction +static bool checkCandidatePairAccesses(MemoryAccess *LoadMA, + MemoryAccess *StoreMA, + __isl_take isl_set *Domain, + ScopStmt::MemoryAccessVec &MemAccs) { + // First check if the base value is the same. + isl_map *LoadAccs = LoadMA->getAccessRelation(); + isl_map *StoreAccs = StoreMA->getAccessRelation(); + bool Valid = isl_map_has_equal_space(LoadAccs, StoreAccs); + DEBUG(dbgs() << " == The accessed space is " << (Valid ? "" : "not ") + << "equal!\n"); + + if (Valid) { + // Then check if they actually access the same memory. + isl_map *InterAccs = + isl_map_intersect(isl_map_copy(LoadAccs), isl_map_copy(StoreAccs)); + Valid = !isl_map_is_empty(InterAccs); + isl_map_free(InterAccs); + DEBUG(dbgs() << " == The accessed memory is " << (Valid ? "" : "not ") + << "overlapping!\n"); + } + + if (Valid) { + // Finally, check if they are no other instructions accessing this memory + isl_map *AllAccsRel = isl_map_union(LoadAccs, StoreAccs); + LoadAccs = StoreAccs = nullptr; + + AllAccsRel = isl_map_intersect_domain(AllAccsRel, isl_set_copy(Domain)); + isl_set *AllAccs = isl_map_range(AllAccsRel); + + Valid = !hasIntersectingAccesses(AllAccs, LoadMA, StoreMA, Domain, MemAccs); + Domain = nullptr; + + DEBUG(dbgs() << " == The accessed memory is " << (Valid ? "not " : "") + << "accessed by other instructions!\n"); + } + + isl_map_free(LoadAccs); + isl_map_free(StoreAccs); + isl_set_free(Domain); + return Valid; +} + //===----------------------------------------------------------------------===// MemoryAccess::~MemoryAccess() { @@ -792,132 +888,160 @@ checkForReductions(); } -/// @brief Collect loads which might form a reduction chain with @p StoreMA -/// -/// Check if the stored value for @p StoreMA is a binary operator with one or -/// two loads as operands. If the binary operand is commutative & associative, -/// used only once (by @p StoreMA) and its load operands are also used only -/// once, we have found a possible reduction chain. It starts at an operand -/// load and includes the binary operator and @p StoreMA. +/// Perform a data flow analysis on the current basic block to propagate the +/// uses of loaded values. Then check and mark the memory accesses which are +/// part of reduction like chains. /// -/// Note: We allow only one use to ensure the load and binary operator cannot -/// escape this block or into any other store except @p StoreMA. -void ScopStmt::collectCandiateReductionLoads( - MemoryAccess *StoreMA, SmallVectorImpl &Loads) { - auto *Store = dyn_cast(StoreMA->getAccessInstruction()); - if (!Store) - return; - - // Skip if there is not one binary operator between the load and the store - auto *BinOp = dyn_cast(Store->getValueOperand()); - if (!BinOp) - return; - - // Skip if the binary operators has multiple uses - if (BinOp->getNumUses() != 1) - return; - - // Skip if the opcode of the binary operator is not commutative/associative - if (!BinOp->isCommutative() || !BinOp->isAssociative()) - return; - - // Skip if the binary operator is outside the current SCoP - if (BinOp->getParent() != Store->getParent()) - return; - - // Skip if it is a multiplicative reduction and we disabled them - if (DisableMultiplicativeReductions && - (BinOp->getOpcode() == Instruction::Mul || - BinOp->getOpcode() == Instruction::FMul)) - return; - - // Check the binary operator operands for a candidate load - auto *PossibleLoad0 = dyn_cast(BinOp->getOperand(0)); - auto *PossibleLoad1 = dyn_cast(BinOp->getOperand(1)); - if (!PossibleLoad0 && !PossibleLoad1) - return; - - // A load is only a candidate if it cannot escape (thus has only this use) - if (PossibleLoad0 && PossibleLoad0->getNumUses() == 1) - if (PossibleLoad0->getParent() == Store->getParent()) - Loads.push_back(lookupAccessFor(PossibleLoad0)); - if (PossibleLoad1 && PossibleLoad1->getNumUses() == 1) - if (PossibleLoad1->getParent() == Store->getParent()) - Loads.push_back(lookupAccessFor(PossibleLoad1)); -} - -/// @brief Check for reductions in this ScopStmt -/// -/// Iterate over all store memory accesses and check for valid binary reduction -/// like chains. For all candidates we check if they have the same base address -/// and there are no other accesses which overlap with them. The base address -/// check rules out impossible reductions candidates early. The overlap check, -/// together with the "only one user" check in collectCandiateReductionLoads, -/// guarantees that none of the intermediate results will escape during -/// execution of the loop nest. We basically check here that no other memory -/// access can access the same memory as the potential reduction. +/// NOTE: This assumes independent blocks and breaks otherwise. void ScopStmt::checkForReductions() { - SmallVector Loads; - SmallVector, 4> Candidates; + // During the data flow anaylis we use the State variable to keep track of + // the used "load-instructions" for each instruction in the basic block. + // This includes the LLVM-IR of the load and the "number of uses" (or the + // number of paths in the operand tree which end in this load). + using StatePairTy = std::pair; + using FlowInSetTy = DenseMap; + using StateTy = DenseMap; + StateTy State; + + // Invalid loads are loads which: + // o do not form a reduction when they flow into a memory location: + // (e.g., A[i] = B[i] * 3 and A[i] = A[i] * A[i] + A[i]) + // o are used by a non binary operator or one which is not community + // and associative (e.g., A[i] = A[i] % 3) + // o might change the control flow (e.g., if (A[i])) + // o are used in indirect memory accesses (e.g., A[B[i]]) + SmallPtrSet InvalidLoads; + + // Run the data flow analysis for all values in the basic block + for (Instruction &Inst : *BB) { + + // Treat loads and stores special + if (auto *Load = dyn_cast(&Inst)) { + // Invalidate all loads used for an indirect access + if (auto *Ptr = dyn_cast(Load->getPointerOperand())) { + const auto &It = State.find(Ptr); + if (It != State.end()) + for (const auto &FlowInSetElem : It->second) + InvalidLoads.insert(FlowInSetElem.first); + } - // First collect candidate load-store reduction chains by iterating over all - // stores and collecting possible reduction loads. - for (MemoryAccess *StoreMA : MemAccs) { - if (StoreMA->isRead()) + // And indicate that this load uses itself once but without specifying + // any reduction operator. + State[Load].insert( + std::make_pair(Load, std::make_pair(1, MemoryAccess::RT_BOTTOM))); continue; + } - Loads.clear(); - collectCandiateReductionLoads(StoreMA, Loads); - for (MemoryAccess *LoadMA : Loads) - Candidates.push_back(std::make_pair(LoadMA, StoreMA)); - } - - // Then check each possible candidate pair. - for (const auto &CandidatePair : Candidates) { - bool Valid = true; - isl_map *LoadAccs = CandidatePair.first->getAccessRelation(); - isl_map *StoreAccs = CandidatePair.second->getAccessRelation(); + if (auto *Store = dyn_cast(&Inst)) { + // Invalidate all loads used for an indirect access + if (const Instruction *Ptr = + dyn_cast(Store->getPointerOperand())) { + const auto &It = State.find(Ptr); + if (It != State.end()) + for (const auto &FlowInSetElem : It->second) + InvalidLoads.insert(FlowInSetElem.first); + } - // Skip those with obviously unequal base addresses. - if (!isl_map_has_equal_space(LoadAccs, StoreAccs)) { - isl_map_free(LoadAccs); - isl_map_free(StoreAccs); + // Propagate the uses of the value operand to the store + if (auto *ValueInst = dyn_cast(Store->getValueOperand())) + State.insert(std::make_pair(Store, State[ValueInst])); continue; } - // And check if the remaining for overlap with other memory accesses. - isl_map *AllAccsRel = isl_map_union(LoadAccs, StoreAccs); - AllAccsRel = isl_map_intersect_domain(AllAccsRel, getDomain()); - isl_set *AllAccs = isl_map_range(AllAccsRel); + // Non load and store instructions are either binary operators or they will + // invalidate all used loads. + auto *BinOp = dyn_cast(&Inst); + auto CurRedType = getReductionType(BinOp); + DEBUG(dbgs() << "CurInst: " << Inst << " RT: " << CurRedType << "\n"); + + // Iterate over all operands and propagate the uses of the operand. + FlowInSetTy &InstInFlowSet = State[&Inst]; + for (Use &Op : Inst.operands()) { + auto *OpInst = dyn_cast(Op); + if (!OpInst) + continue; - for (MemoryAccess *MA : MemAccs) { - if (MA == CandidatePair.first || MA == CandidatePair.second) + DEBUG(dbgs().indent(4) << "Op Inst: " << *OpInst << "\n"); + const StateTy::iterator &OpInFlowSetIt = State.find(OpInst); + if (OpInFlowSetIt == State.end()) continue; - isl_map *AccRel = - isl_map_intersect_domain(MA->getAccessRelation(), getDomain()); - isl_set *Accs = isl_map_range(AccRel); + // Iterate over all the uses of the operand and combine them in + // the current instruction. + FlowInSetTy &OpInFlowSet = OpInFlowSetIt->second; + for (auto &OpInFlowPair : OpInFlowSet) { + unsigned OpFlowIn = OpInFlowPair.second.first; + unsigned InstFlowIn = InstInFlowSet[OpInFlowPair.first].first; + + auto OpRedType = OpInFlowPair.second.second; + auto InstRedType = InstInFlowSet[OpInFlowPair.first].second; - if (isl_set_has_equal_space(AllAccs, Accs) || isl_set_free(Accs)) { - isl_set *OverlapAccs = isl_set_intersect(Accs, isl_set_copy(AllAccs)); - Valid = Valid && isl_set_is_empty(OverlapAccs); - isl_set_free(OverlapAccs); + auto NewRedType = combineReductionType(OpRedType, CurRedType); + if (InstFlowIn) + NewRedType = combineReductionType(NewRedType, InstRedType); + + DEBUG(dbgs().indent(8) << "OpRedType: " << OpRedType << "\n"); + DEBUG(dbgs().indent(8) << "NewRedType: " << NewRedType << "\n"); + InstInFlowSet[OpInFlowPair.first] = + std::make_pair(OpFlowIn + InstFlowIn, NewRedType); } } + } - isl_set_free(AllAccs); - if (!Valid) + // All used loads are propagated through the whole basic block; now try to + // find valid reduction like candidate pairs. These load-store pairs fulfill + // all reduction like properties with regards to only this load-store chain. + // We later have to check if the loaded value was invalidated by an + // instruction not in that chain. + using MemAccPair = std::pair; + DenseMap ValidCandidates; + + // Iterate over all write memory accesses and check the loads flowing into + // it for reduction candidate pairs. + for (MemoryAccess *WriteMA : MemAccs) { + if (WriteMA->isRead()) continue; - const LoadInst *Load = - dyn_cast(CandidatePair.first->getAccessInstruction()); - MemoryAccess::ReductionType RT = - getReductionType(dyn_cast(Load->user_back()), Load); + FlowInSetTy &MaInFlowSet = State[WriteMA->getAccessInstruction()]; + for (auto &MaInFlowSetElem : MaInFlowSet) { + MemoryAccess *ReadMA = lookupAccessFor(MaInFlowSetElem.first); + assert(ReadMA && "Couldn't finde memory access for incoming load!"); + + DEBUG(dbgs() << "'" << *ReadMA->getAccessInstruction() + << "'\n\tflows into\n'" << *WriteMA->getAccessInstruction() + << "'\n\t #" << MaInFlowSetElem.second.first + << " times & RT: " << MaInFlowSetElem.second.second << "\n"); + + // We allow the load to flow in exactly once. + bool Valid = (MaInFlowSetElem.second.first == 1); + + // Check if we saw a valid chain of binary operators. + MemoryAccess::ReductionType RT = MaInFlowSetElem.second.second; + Valid = Valid && RT != MemoryAccess::RT_BOTTOM; + Valid = Valid && RT != MemoryAccess::RT_NONE; + + // Then check if the memory accesses allow a reduction. + Valid = Valid && + checkCandidatePairAccesses(ReadMA, WriteMA, getDomain(), MemAccs); + + // Finally, mark the pair as a canidate or the load as a invalid one. + if (Valid) + ValidCandidates[std::make_pair(ReadMA, WriteMA)] = RT; + else + InvalidLoads.insert(ReadMA->getAccessInstruction()); + } + } + + // In the last step mark the memory accesses of candidate pairs as reduction + // like if the load wasn't marked invalid in the last step. + for (auto &CandidatePair : ValidCandidates) { + MemoryAccess *LoadMA = CandidatePair.first.first; + if (InvalidLoads.count(LoadMA->getAccessInstruction())) + continue; - // If no overlapping access was found we mark the load and store as - // reduction like. - CandidatePair.first->markAsReductionLike(RT); - CandidatePair.second->markAsReductionLike(RT); + MemoryAccess::ReductionType RT = CandidatePair.second; + CandidatePair.first.first->markAsReductionLike(RT); + CandidatePair.first.second->markAsReductionLike(RT); } } Index: test/Dependences/reduction_indirect_access.ll =================================================================== --- /dev/null +++ test/Dependences/reduction_indirect_access.ll @@ -0,0 +1,44 @@ +; RUN: opt %loadPolly -basicaa -polly-dependences -analyze -polly-allow-nonaffine < %s | FileCheck %s +; +; CHECK: RAW dependences: +; CHECK: [N] -> { } +; CHECK: WAR dependences: +; CHECK: [N] -> { } +; CHECK: WAW dependences: +; CHECK: [N] -> { } +; CHECK: Reduction dependences: +; CHECK: [N] -> { Stmt_for_body[i0] -> Stmt_for_body[1 + i0] : i0 >= 0 and i0 <= -2 + N } +; +; void f(double *restrict A, int *restrict INDICES, int N) { +; for (int i = 0; i < N; i++) +; A[INDICES[i]] += N; +; } +; +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @f(double* noalias %A, i32* noalias %INDICES, i32 %N) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %cmp = icmp slt i32 %i.0, %N + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %conv = sitofp i32 %N to double + %arrayidx = getelementptr inbounds i32* %INDICES, i32 %i.0 + %tmp = load i32* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds double* %A, i32 %tmp + %tmp1 = load double* %arrayidx1, align 8 + %add = fadd fast double %tmp1, %conv + store double %add, double* %arrayidx1, align 8 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} Index: test/ScopInfo/reduction_escaping_intermediate_3.ll =================================================================== --- /dev/null +++ test/ScopInfo/reduction_escaping_intermediate_3.ll @@ -0,0 +1,43 @@ +; RUN: opt %loadPolly -basicaa -polly-scops -analyze < %s | FileCheck %s +; +; void f(int N, int * restrict sums, int * restrict escape) { +; int i, j; +; for (i = 0; i < 1024; i++) { +; sums[i] += 5; +; escape[i] = sums[i]; +; } +; } +; +; CHECK: Reduction Type: NONE +; CHECK: sums +; CHECK: Reduction Type: NONE +; CHECK: sums +; CHECK: Reduction Type: NONE +; CHECK: escape +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @f(i32 %N, i32* noalias %sums, i32* noalias %escape) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc8, %for.inc ] + %exitcond1 = icmp ne i32 %i.0, 1024 + br i1 %exitcond1, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds i32* %sums, i32 0 + %tmp = load i32* %arrayidx, align 4 + %add = add nsw i32 %tmp, 5 + store i32 %add, i32* %arrayidx, align 4 + %arrayidx6 = getelementptr inbounds i32* %escape, i32 %i.0 + store i32 %add, i32* %arrayidx6, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %inc8 = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} Index: test/ScopInfo/reduction_indirect_access.ll =================================================================== --- /dev/null +++ test/ScopInfo/reduction_indirect_access.ll @@ -0,0 +1,42 @@ +; RUN: opt %loadPolly -basicaa -polly-scops -analyze -polly-allow-nonaffine < %s | FileCheck %s +; +; CHECK: Reduction Type: NONE +; CHECK: MemRef_INDICES[i0] +; CHECK: Reduction Type: + +; CHECK: MemRef_A[o0] +; CHECK: Reduction Type: + +; CHECK: MemRef_A[o0] +; +; void f(double *restrict A, int *restrict INDICES, int N) { +; for (int i = 0; i < N; i++) +; A[INDICES[i]] += N; +; } +; +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @f(double* noalias %A, i32* noalias %INDICES, i32 %N) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %cmp = icmp slt i32 %i.0, %N + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %conv = sitofp i32 %N to double + %arrayidx = getelementptr inbounds i32* %INDICES, i32 %i.0 + %tmp = load i32* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds double* %A, i32 %tmp + %tmp1 = load double* %arrayidx1, align 8 + %add = fadd fast double %tmp1, %conv + store double %add, double* %arrayidx1, align 8 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} Index: test/ScopInfo/reduction_indirect_access_2.ll =================================================================== --- /dev/null +++ test/ScopInfo/reduction_indirect_access_2.ll @@ -0,0 +1,50 @@ +; RUN: opt %loadPolly -basicaa -polly-scops -analyze -polly-allow-nonaffine < %s | FileCheck %s +; +; Validate that the accesses to INDICES[i] is not part of a reduction. +; +; CHECK: Reduction Type: NONE +; CHECK: MemRef_INDICES[i0] +; CHECK: Reduction Type: + +; CHECK: MemRef_A[o0] +; CHECK: Reduction Type: + +; CHECK: MemRef_A[o0] +; CHECK: Reduction Type: NONE +; CHECK: MemRef_INDICES[i0] +; +; void f(double *restrict A, int *restrict INDICES, int N) { +; for (int i = 0; i < N; i++) { +; A[INDICES[i]] += N; +; INDICES[i] += N; +; } +; } +; +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @f(double* noalias %A, i32* noalias %INDICES, i32 %N) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %cmp = icmp slt i32 %i.0, %N + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %conv = sitofp i32 %N to double + %arrayidx = getelementptr inbounds i32* %INDICES, i32 %i.0 + %tmp = load i32* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds double* %A, i32 %tmp + %tmp1 = load double* %arrayidx1, align 8 + %add = fadd fast double %tmp1, %conv + store double %add, double* %arrayidx1, align 8 + %add3 = add nsw i32 %tmp, %N + store i32 %add3, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} Index: test/ScopInfo/reduction_long_reduction_chain.ll =================================================================== --- /dev/null +++ test/ScopInfo/reduction_long_reduction_chain.ll @@ -0,0 +1,61 @@ +; RUN: opt %loadPolly -analyze -basicaa -polly-scops < %s | FileCheck %s +; +; CHECK: Reduction Type: + +; CHECK: MemRef_sum +; CHECK: Reduction Type: NONE +; CHECK: MemRef_A +; CHECK: Reduction Type: + +; CHECK: MemRef_sum +; CHECK-NOT: MemRef_A +; +; void f(int *restrict sum, int *restrict A) { +; for (int i = 0; i < 1024; i++) +; *sum = (A[i + 3] * (i - 14)) + ((A[i] + *sum + A[0]) + A[1023]) + +; (A[i + 2] * A[i - 1]); +; } +; +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @f(i32* noalias %sum, i32* noalias %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %i.0, 1024 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %add = add nsw i32 %i.0, 3 + %arrayidx = getelementptr inbounds i32* %A, i32 %add + %tmp = load i32* %arrayidx, align 4 + %sub = add nsw i32 %i.0, -14 + %mul = mul nsw i32 %tmp, %sub + %arrayidx1 = getelementptr inbounds i32* %A, i32 %i.0 + %tmp1 = load i32* %arrayidx1, align 4 + %tmp2 = load i32* %sum, align 4 + %add2 = add nsw i32 %tmp1, %tmp2 + %tmp3 = load i32* %A, align 4 + %add4 = add nsw i32 %add2, %tmp3 + %arrayidx5 = getelementptr inbounds i32* %A, i32 1023 + %tmp4 = load i32* %arrayidx5, align 4 + %add6 = add nsw i32 %add4, %tmp4 + %add7 = add nsw i32 %mul, %add6 + %add8 = add nsw i32 %i.0, 2 + %arrayidx9 = getelementptr inbounds i32* %A, i32 %add8 + %tmp5 = load i32* %arrayidx9, align 4 + %sub10 = add nsw i32 %i.0, -1 + %arrayidx11 = getelementptr inbounds i32* %A, i32 %sub10 + %tmp6 = load i32* %arrayidx11, align 4 + %mul12 = mul nsw i32 %tmp5, %tmp6 + %add13 = add nsw i32 %add7, %mul12 + store i32 %add13, i32* %sum, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} Index: test/ScopInfo/reduction_long_reduction_chain_double_use.ll =================================================================== --- /dev/null +++ test/ScopInfo/reduction_long_reduction_chain_double_use.ll @@ -0,0 +1,57 @@ +; RUN: opt %loadPolly -analyze -basicaa -polly-scops < %s | FileCheck %s +; +; CHECK-NOT: Reduction like: 1 +; +; void f(int *restrict sum, int *restrict A) { +; for (int i = 0; i < 1024; i++) +; *sum = (A[i + 3] * (i - 14)) + ((A[i] + *sum + A[0]) + A[1023]) + +; (A[i + 2] * A[i - 1]) + *sum; +; } +; +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" + +define void @f(i32* noalias %sum, i32* noalias %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %exitcond = icmp ne i32 %i.0, 1024 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %add = add nsw i32 %i.0, 3 + %arrayidx = getelementptr inbounds i32* %A, i32 %add + %tmp = load i32* %arrayidx, align 4 + %sub = add nsw i32 %i.0, -14 + %mul = mul nsw i32 %tmp, %sub + %arrayidx1 = getelementptr inbounds i32* %A, i32 %i.0 + %tmp1 = load i32* %arrayidx1, align 4 + %tmp2 = load i32* %sum, align 4 + %add2 = add nsw i32 %tmp1, %tmp2 + %tmp3 = load i32* %A, align 4 + %add4 = add nsw i32 %add2, %tmp3 + %arrayidx5 = getelementptr inbounds i32* %A, i32 1023 + %tmp4 = load i32* %arrayidx5, align 4 + %add6 = add nsw i32 %add4, %tmp4 + %add7 = add nsw i32 %mul, %add6 + %add8 = add nsw i32 %i.0, 2 + %arrayidx9 = getelementptr inbounds i32* %A, i32 %add8 + %tmp5 = load i32* %arrayidx9, align 4 + %sub10 = add nsw i32 %i.0, -1 + %arrayidx11 = getelementptr inbounds i32* %A, i32 %sub10 + %tmp6 = load i32* %arrayidx11, align 4 + %mul12 = mul nsw i32 %tmp5, %tmp6 + %add13 = add nsw i32 %add7, %mul12 + %tmp7 = load i32* %sum, align 4 + %add14 = add nsw i32 %add13, %tmp7 + store i32 %add14, i32* %sum, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} Index: test/ScopInfo/reduction_multiple_different_operators.ll =================================================================== --- test/ScopInfo/reduction_multiple_different_operators.ll +++ test/ScopInfo/reduction_multiple_different_operators.ll @@ -1,29 +1,29 @@ ; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s ; -; CHECK: Reduction Type: + +; CHECK-NOT: Reduction like: 1 +; +; void f(int *restrict sum) { +; for (int i = 0; i < 1024; i++) { +; *sum = (*sum * 5) + 25; +; } +; } ; -; void f(int *sum) { -; for (int i = 0; i < 100; i++) -; sum[i] = sum[99-i] + i; -; } target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" -define void @f(i32* %sum) { +define void @f(i32* noalias %sum) { entry: br label %for.cond for.cond: ; preds = %for.inc, %entry %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] - %exitcond = icmp ne i32 %i.0, 100 + %exitcond = icmp ne i32 %i.0, 1024 br i1 %exitcond, label %for.body, label %for.end for.body: ; preds = %for.cond - %sub = sub nsw i32 99, %i.0 - %arrayidx = getelementptr inbounds i32* %sum, i32 %sub - %tmp = load i32* %arrayidx, align 4 - %add = add nsw i32 %tmp, %i.0 - %arrayidx1 = getelementptr inbounds i32* %sum, i32 %i.0 - store i32 %add, i32* %arrayidx1, align 4 + %tmp = load i32* %sum, align 4 + %tmp1 = mul i32 %tmp, 5 + %mul = add i32 %tmp1, 25 + store i32 %mul, i32* %sum, align 4 br label %for.inc for.inc: ; preds = %for.body Index: test/ScopInfo/reduction_only_reduction_like_access.ll =================================================================== --- test/ScopInfo/reduction_only_reduction_like_access.ll +++ test/ScopInfo/reduction_only_reduction_like_access.ll @@ -4,7 +4,7 @@ ; ; void f(int *sum) { ; for (int i = 0; i < 100; i++) -; sum[i] = sum[99-i] + i; +; sum[i] = sum[100-i] + i; ; } target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64" @@ -18,7 +18,7 @@ br i1 %exitcond, label %for.body, label %for.end for.body: ; preds = %for.cond - %sub = sub nsw i32 99, %i.0 + %sub = sub nsw i32 100, %i.0 %arrayidx = getelementptr inbounds i32* %sum, i32 %sub %tmp = load i32* %arrayidx, align 4 %add = add nsw i32 %tmp, %i.0