Index: lib/Transform/MaximalStaticExpansion.cpp =================================================================== --- lib/Transform/MaximalStaticExpansion.cpp +++ lib/Transform/MaximalStaticExpansion.cpp @@ -50,7 +50,7 @@ void getAnalysisUsage(AnalysisUsage &AU) const override; private: - /// OptimizationRemarkEmitter object for displaying diagnostic remarks + /// OptimizationRemarkEmitter object for displaying diagnostic remarks. OptimizationRemarkEmitter *ORE; /// Emit remark @@ -68,20 +68,11 @@ SmallPtrSetImpl &Reads, Scop &S, const isl::union_map &Dependences); - /// Expand a write memory access. + /// Expand the MemoryAccess according to its domain. /// /// @param S The SCop in which the memory access appears in. /// @param MA The memory access that need to be expanded. - ScopArrayInfo *expandWrite(Scop &S, MemoryAccess *MA); - - /// Expand the read memory access. - /// - /// @param S The SCop in which the memory access appears in. - /// @param MA The memory access that need to be expanded. - /// @param Dependences The RAW dependences of the SCop. - /// @param ExpandedSAI The expanded SAI created during write expansion. - void expandRead(Scop &S, MemoryAccess *MA, const isl::union_map &Dependences, - ScopArrayInfo *ExpandedSAI); + ScopArrayInfo *expandAccess(Scop &S, MemoryAccess *MA); /// Filter the dependences to have only one related to current memory access. /// @@ -91,6 +82,27 @@ isl::union_map filterDependences(Scop &S, const isl::union_map &MapDependences, MemoryAccess *MA); + + /// Expand the MemoryAccess according to Dependences and already expanded + /// MemoryAccesses. + /// + /// @param The SCop in which the memory access appears in. + /// @param The memory access that need to be expanded. + /// @param Dependences The RAW dependences of the SCop. + /// @param ExpandedSAI The expanded SAI created during write expansion. + /// @param Reverse if true, the Dependences union_map is reversed before + /// intersection. + void mapAccess(Scop &S, SmallPtrSetImpl &Accesses, + const isl::union_map &Dependences, ScopArrayInfo *ExpandedSAI, + bool Reverse); + + /// Expand PHI memory accesses. + /// + /// @param The SCop in which the memory access appears in. + /// @param The ScopArrayInfo representing the PHI accesses to expand. + /// @param Dependences The RAW dependences of the SCop. + void expandPhi(Scop &S, const ScopArrayInfo *SAI, + const isl::union_map &Dependences); }; } // namespace @@ -165,8 +177,8 @@ isl::union_map MapDependences = isl::union_map::empty(S.getParamSpace()); - Dependences.reverse().foreach_map([&MapDependences, &AccessDomainId, - &SAI](isl::map Map) -> isl::stat { + Dependences.foreach_map([&MapDependences, &AccessDomainId, + &SAI](isl::map Map) -> isl::stat { // Filter out Statement to Statement dependences. if (!Map.can_curry()) @@ -203,6 +215,46 @@ SmallPtrSetImpl &Reads, Scop &S, const isl::union_map &Dependences) { + if (SAI->isValueKind()) { + Writes.insert(S.getValueDef(SAI)); + for (auto MA : S.getValueUses(SAI)) + Reads.insert(MA); + return true; + } else if (SAI->isPHIKind()) { + + auto Read = S.getPHIRead(SAI); + + auto StmtDomain = isl::union_set(Read->getStatement()->getDomain()); + + auto Writes = S.getPHIIncomings(SAI); + + // Get the domain where all the writes are writing to. + auto WriteDomain = isl::union_set::empty(S.getParamSpace()); + + for (auto Write : Writes) { + auto MapDeps = filterDependences(S, Dependences, Write); + MapDeps.foreach_map( + [&StmtDomain, &WriteDomain](isl::map Map) -> isl::stat { + WriteDomain = WriteDomain.add_set(Map.range()); + return isl::stat::ok; + }); + } + + // For now, read from original scalar is not possible. + if (!StmtDomain.is_equal(WriteDomain)) { + emitRemark(SAI->getName() + " read from its original value.", + Read->getAccessInstruction()); + return false; + } + + return true; + } else if (SAI->isExitPHIKind()) { + // For now, we are not able to expand ExitPhi. + emitRemark(SAI->getName() + " is a ExitPhi node.", + S.getEnteringBlock()->getFirstNonPHI()); + return false; + } + int NumberWrites = 0; for (ScopStmt &Stmt : S) { auto StmtReads = isl::union_map::empty(S.getParamSpace()); @@ -214,13 +266,6 @@ if (SAI != MA->getLatestScopArrayInfo()) continue; - // For now, we are not able to expand Scalar. - if (MA->isLatestScalarKind()) { - emitRemark(SAI->getName() + " is a Scalar access.", - MA->getAccessInstruction()); - return false; - } - // For now, we are not able to expand array where read come after write // (to the same location) in a same statement. auto AccRel = isl::union_map(MA->getAccessRelation()); @@ -271,7 +316,7 @@ auto ReadDomain = isl::union_set(ReadDomainSet); // Get the dependences relevant for this MA - auto MapDependences = filterDependences(S, Dependences, MA); + auto MapDependences = filterDependences(S, Dependences.reverse(), MA); auto DepsDomain = MapDependences.domain(); unsigned NumberElementMap = isl_union_map_n_map(MapDependences.get()); @@ -309,38 +354,48 @@ return true; } -void MaximalStaticExpander::expandRead(Scop &S, MemoryAccess *MA, - const isl::union_map &Dependences, - ScopArrayInfo *ExpandedSAI) { +void MaximalStaticExpander::mapAccess(Scop &S, + SmallPtrSetImpl &Accesses, + const isl::union_map &Dependences, + ScopArrayInfo *ExpandedSAI, + bool Reverse) { - // Get the current AM. - auto CurrentAccessMap = MA->getAccessRelation(); + for (auto MA : Accesses) { - // Get RAW dependences for the current WA. - auto WriteDomainSet = MA->getAccessRelation().domain(); - auto WriteDomain = isl::union_set(WriteDomainSet); + // Get the current AM. + auto CurrentAccessMap = MA->getAccessRelation(); - // Get the dependences relevant for this MA - auto MapDependences = filterDependences(S, Dependences, MA); + // Get RAW dependences for the current WA. + auto DomainSet = MA->getAccessRelation().domain(); + auto Domain = isl::union_set(DomainSet); - // If no dependences, no need to modify anything. - if (MapDependences.is_empty()) - return; + // Get the dependences relevant for this MA. + isl::union_map MapDependences; + if (Reverse) { + MapDependences = filterDependences(S, Dependences.reverse(), MA); + } else { + MapDependences = filterDependences(S, Dependences, MA); + } - assert(isl_union_map_n_map(MapDependences.get()) == 1 && - "There are more than one RAW dependencies in the union map."); - auto NewAccessMap = isl::map::from_union_map(MapDependences); + // If no dependences, no need to modify anything. + if (MapDependences.is_empty()) + return; - auto Id = ExpandedSAI->getBasePtrId(); + assert(isl_union_map_n_map(MapDependences.get()) == 1 && + "There are more than one RAW dependencies in the union map."); + auto NewAccessMap = isl::map::from_union_map(MapDependences); - // Replace the out tuple id with the one of the access array. - NewAccessMap = NewAccessMap.set_tuple_id(isl::dim::out, Id); + auto Id = ExpandedSAI->getBasePtrId(); - // Set the new access relation. - MA->setNewAccessRelation(NewAccessMap); + // Replace the out tuple id with the one of the access array. + NewAccessMap = NewAccessMap.set_tuple_id(isl::dim::out, Id); + + // Set the new access relation. + MA->setNewAccessRelation(NewAccessMap); + } } -ScopArrayInfo *MaximalStaticExpander::expandWrite(Scop &S, MemoryAccess *MA) { +ScopArrayInfo *MaximalStaticExpander::expandAccess(Scop &S, MemoryAccess *MA) { // Get the current AM. auto CurrentAccessMap = MA->getAccessRelation(); @@ -409,13 +464,23 @@ return ExpandedSAI; } +void MaximalStaticExpander::expandPhi(Scop &S, const ScopArrayInfo *SAI, + const isl::union_map &Dependences) { + SmallPtrSet Writes; + for (auto MA : S.getPHIIncomings(SAI)) + Writes.insert(MA); + auto Read = S.getPHIRead(SAI); + auto ExpandedSAI = expandAccess(S, Read); + + mapAccess(S, Writes, Dependences, ExpandedSAI, false); +} + void MaximalStaticExpander::emitRemark(StringRef Msg, Instruction *Inst) { ORE->emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ExpansionRejection", Inst) << Msg); } bool MaximalStaticExpander::runOnScop(Scop &S) { - // Get the ORE from OptimizationRemarkEmitterWrapperPass. ORE = &(getAnalysis().getORE()); @@ -433,13 +498,16 @@ if (!isExpandable(SAI, AllWrites, AllReads, S, Dependences)) continue; - assert(AllWrites.size() == 1); + if (SAI->isValueKind() || SAI->isArrayKind()) { + assert(AllWrites.size() == 1 || SAI->isValueKind()); - auto TheWrite = *(AllWrites.begin()); - ScopArrayInfo *ExpandedArray = expandWrite(S, TheWrite); + auto TheWrite = *(AllWrites.begin()); + ScopArrayInfo *ExpandedArray = expandAccess(S, TheWrite); - for (MemoryAccess *MA : AllReads) - expandRead(S, MA, Dependences, ExpandedArray); + mapAccess(S, AllReads, Dependences, ExpandedArray, true); + } else if (SAI->isPHIKind()) { + expandPhi(S, SAI, Dependences); + } } return false; Index: test/MaximalStaticExpansion/working_expansion.ll =================================================================== --- test/MaximalStaticExpansion/working_expansion.ll +++ test/MaximalStaticExpansion/working_expansion.ll @@ -1,6 +1,6 @@ ; RUN: opt %loadPolly -polly-mse -analyze < %s | FileCheck %s ; -; Verify that the accesses are correctly expanded +; Verify that the accesses are correctly expanded for MemoryKind::Array ; ; Original source code : ; Index: test/MaximalStaticExpansion/working_phi_expansion.ll =================================================================== --- /dev/null +++ test/MaximalStaticExpansion/working_phi_expansion.ll @@ -0,0 +1,77 @@ +; RUN: opt %loadPolly -polly-mse -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-mse -pass-remarks-analysis="polly-mse" -analyze < %s 2>&1 | FileCheck %s --check-prefix=MSE +; +; Verify that the accesses are correctly expanded for MemoryKind::PHI +; tmp_04 is not expanded because it need copy-in. +; +; Original source code : +; +; #define Ni 10000 +; #define Nj 10000 +; +; void mse(double A[Ni], double B[Nj]) { +; int i,j; +; double tmp = 6; +; for (i = 0; i < Ni; i++) { +; for (int j = 0; j MemRef_tmp_04__phi_Stmt_for_body_expanded[i0] }; +; CHECK: new: { Stmt_for_body[i0] -> MemRef_tmp_11__phi_Stmt_for_inc_expanded[i0, 0] }; +; CHECK: new: { Stmt_for_inc[i0, i1] -> MemRef_tmp_11__phi_Stmt_for_inc_expanded[i0, 1 + i1] : i1 <= 9998 }; +; CHECK: new: { Stmt_for_inc[i0, i1] -> MemRef_tmp_11__phi_Stmt_for_inc_expanded[i0, i1] }; +; CHECK: new: { Stmt_for_inc[i0, 9999] -> MemRef_add_lcssa__phi_Stmt_for_end_expanded[i0] }; +; CHECK-NOT: new: { Stmt_for_end[i0] -> MemRef_tmp_04__phi_Stmt_for_body_expanded[1 + i0] : i0 <= 9998 }; +; CHECK: new: { Stmt_for_end[i0] -> MemRef_add_lcssa__phi_Stmt_for_end_expanded[i0] }; +; CHECK: new: { Stmt_for_end[i0] -> MemRef_B_Stmt_for_end_expanded[i0] }; +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @tmp(double* %A, double* %B) { +entry: + br label %entry.split + +entry.split: ; preds = %entry + br label %for.body + +for.body: ; preds = %entry.split, %for.end + %indvars.iv = phi i64 [ 0, %entry.split ], [ %indvars.iv.next, %for.end ] + %tmp.04 = phi double [ 6.000000e+00, %entry.split ], [ %add.lcssa, %for.end ] + br label %for.inc + +for.inc: ; preds = %for.body, %for.inc + %j1.02 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ] + %tmp.11 = phi double [ %tmp.04, %for.body ], [ %add, %for.inc ] + %add = fadd double %tmp.11, 2.000000e+00 + %inc = add nuw nsw i32 %j1.02, 1 + %exitcond = icmp ne i32 %inc, 10000 + br i1 %exitcond, label %for.inc, label %for.end + +for.end: ; preds = %for.inc + %add.lcssa = phi double [ %add, %for.inc ] + %arrayidx = getelementptr inbounds double, double* %B, i64 %indvars.iv + store double %add.lcssa, double* %arrayidx, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond5 = icmp ne i64 %indvars.iv.next, 10000 + br i1 %exitcond5, label %for.body, label %for.end7 + +for.end7: ; preds = %for.end + ret void +} Index: test/MaximalStaticExpansion/working_value_expansion.ll =================================================================== --- /dev/null +++ test/MaximalStaticExpansion/working_value_expansion.ll @@ -0,0 +1,66 @@ +; RUN: opt %loadPolly -polly-mse -analyze < %s | FileCheck %s +; +; Verify that the accesses are correctly expanded for MemoryKind::Value +; +; Original source code : +; +; #define Ni 10000 +; #define Nj 10000 +; +; void mse(double A[Ni], double B[Nj]) { +; int i,j; +; double tmp = 6; +; for (i = 0; i < Ni; i++) { +; tmp = i; +; for (int j = 0; j MemRef_conv_Stmt_for_body_expanded[i0] }; +; CHECK: new: { Stmt_for_body5[i0, i1] -> MemRef_conv_Stmt_for_body_expanded[i0] }; +; CHECK: new: { Stmt_for_end[i0] -> MemRef_conv_Stmt_for_body_expanded[i0] }; +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @mse(double* %A, double* %B) { +entry: + br label %entry.split + +entry.split: ; preds = %entry + br label %for.body + +for.body: ; preds = %entry.split, %for.end + %indvars.iv3 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next4, %for.end ] + %0 = trunc i64 %indvars.iv3 to i32 + %conv = sitofp i32 %0 to double + br label %for.body5 + +for.body5: ; preds = %for.body, %for.body5 + %indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body5 ] + %add = fadd double %conv, 3.000000e+00 + %arrayidx = getelementptr inbounds double, double* %A, i64 %indvars.iv + store double %add, double* %arrayidx, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 10000 + br i1 %exitcond, label %for.body5, label %for.end + +for.end: ; preds = %for.body5 + %arrayidx7 = getelementptr inbounds double, double* %B, i64 %indvars.iv3 + store double %conv, double* %arrayidx7, align 8 + %indvars.iv.next4 = add nuw nsw i64 %indvars.iv3, 1 + %exitcond5 = icmp ne i64 %indvars.iv.next4, 10000 + br i1 %exitcond5, label %for.body, label %for.end10 + +for.end10: ; preds = %for.end + ret void +}