Index: polly/trunk/lib/Transform/MaximalStaticExpansion.cpp =================================================================== --- polly/trunk/lib/Transform/MaximalStaticExpansion.cpp +++ polly/trunk/lib/Transform/MaximalStaticExpansion.cpp @@ -50,7 +50,7 @@ void getAnalysisUsage(AnalysisUsage &AU) const override; private: - /// OptimizationRemarkEmitter object for displaying diagnostic remarks + /// OptimizationRemarkEmitter object for displaying diagnostic remarks. OptimizationRemarkEmitter *ORE; /// Emit remark @@ -68,20 +68,11 @@ SmallPtrSetImpl &Reads, Scop &S, const isl::union_map &Dependences); - /// Expand a write memory access. + /// Expand the MemoryAccess according to its domain. /// /// @param S The SCop in which the memory access appears in. /// @param MA The memory access that need to be expanded. - ScopArrayInfo *expandWrite(Scop &S, MemoryAccess *MA); - - /// Expand the read memory access. - /// - /// @param S The SCop in which the memory access appears in. - /// @param MA The memory access that need to be expanded. - /// @param Dependences The RAW dependences of the SCop. - /// @param ExpandedSAI The expanded SAI created during write expansion. - void expandRead(Scop &S, MemoryAccess *MA, const isl::union_map &Dependences, - ScopArrayInfo *ExpandedSAI); + ScopArrayInfo *expandAccess(Scop &S, MemoryAccess *MA); /// Filter the dependences to have only one related to current memory access. /// @@ -91,6 +82,27 @@ isl::union_map filterDependences(Scop &S, const isl::union_map &MapDependences, MemoryAccess *MA); + + /// Expand the MemoryAccess according to Dependences and already expanded + /// MemoryAccesses. + /// + /// @param The SCop in which the memory access appears in. + /// @param The memory access that need to be expanded. + /// @param Dependences The RAW dependences of the SCop. + /// @param ExpandedSAI The expanded SAI created during write expansion. + /// @param Reverse if true, the Dependences union_map is reversed before + /// intersection. + void mapAccess(Scop &S, SmallPtrSetImpl &Accesses, + const isl::union_map &Dependences, ScopArrayInfo *ExpandedSAI, + bool Reverse); + + /// Expand PHI memory accesses. + /// + /// @param The SCop in which the memory access appears in. + /// @param The ScopArrayInfo representing the PHI accesses to expand. + /// @param Dependences The RAW dependences of the SCop. + void expandPhi(Scop &S, const ScopArrayInfo *SAI, + const isl::union_map &Dependences); }; } // namespace @@ -167,8 +179,8 @@ isl::union_map MapDependences = isl::union_map::empty(S.getParamSpace()); - Dependences.reverse().foreach_map([&MapDependences, &AccessDomainId, - &SAI](isl::map Map) -> isl::stat { + Dependences.foreach_map([&MapDependences, &AccessDomainId, + &SAI](isl::map Map) -> isl::stat { // Filter out Statement to Statement dependences. if (!Map.can_curry()) @@ -205,6 +217,45 @@ SmallPtrSetImpl &Reads, Scop &S, const isl::union_map &Dependences) { + if (SAI->isValueKind()) { + Writes.insert(S.getValueDef(SAI)); + for (auto MA : S.getValueUses(SAI)) + Reads.insert(MA); + return true; + } else if (SAI->isPHIKind()) { + auto Read = S.getPHIRead(SAI); + + auto StmtDomain = isl::union_set(Read->getStatement()->getDomain()); + + auto Writes = S.getPHIIncomings(SAI); + + // Get the domain where all the writes are writing to. + auto WriteDomain = isl::union_set::empty(S.getParamSpace()); + + for (auto Write : Writes) { + auto MapDeps = filterDependences(S, Dependences, Write); + MapDeps.foreach_map( + [&StmtDomain, &WriteDomain](isl::map Map) -> isl::stat { + WriteDomain = WriteDomain.add_set(Map.range()); + return isl::stat::ok; + }); + } + + // For now, read from original scalar is not possible. + if (!StmtDomain.is_equal(WriteDomain)) { + emitRemark(SAI->getName() + " read from its original value.", + Read->getAccessInstruction()); + return false; + } + + return true; + } else if (SAI->isExitPHIKind()) { + // For now, we are not able to expand ExitPhi. + emitRemark(SAI->getName() + " is a ExitPhi node.", + S.getEnteringBlock()->getFirstNonPHI()); + return false; + } + int NumberWrites = 0; for (ScopStmt &Stmt : S) { auto StmtReads = isl::union_map::empty(S.getParamSpace()); @@ -216,13 +267,6 @@ if (SAI != MA->getLatestScopArrayInfo()) continue; - // For now, we are not able to expand Scalar. - if (MA->isLatestScalarKind()) { - emitRemark(SAI->getName() + " is a Scalar access.", - MA->getAccessInstruction()); - return false; - } - // For now, we are not able to expand array where read come after write // (to the same location) in a same statement. auto AccRel = isl::union_map(MA->getAccessRelation()); @@ -273,10 +317,18 @@ auto ReadDomain = isl::union_set(ReadDomainSet); // Get the dependences relevant for this MA - auto MapDependences = filterDependences(S, Dependences, MA); - auto DepsDomain = MapDependences.domain(); + auto MapDependences = filterDependences(S, Dependences.reverse(), MA); unsigned NumberElementMap = isl_union_map_n_map(MapDependences.get()); + if (NumberElementMap == 0) { + emitRemark("The expansion of " + SAI->getName() + + " would lead to a read from the original array.", + MA->getAccessInstruction()); + return false; + } + + auto DepsDomain = MapDependences.domain(); + // If there are multiple maps in the Deps, we cannot handle this case // for now. if (NumberElementMap != 1) { @@ -311,38 +363,48 @@ return true; } -void MaximalStaticExpander::expandRead(Scop &S, MemoryAccess *MA, - const isl::union_map &Dependences, - ScopArrayInfo *ExpandedSAI) { - - // Get the current AM. - auto CurrentAccessMap = MA->getAccessRelation(); - - // Get RAW dependences for the current WA. - auto WriteDomainSet = MA->getAccessRelation().domain(); - auto WriteDomain = isl::union_set(WriteDomainSet); - - // Get the dependences relevant for this MA - auto MapDependences = filterDependences(S, Dependences, MA); +void MaximalStaticExpander::mapAccess(Scop &S, + SmallPtrSetImpl &Accesses, + const isl::union_map &Dependences, + ScopArrayInfo *ExpandedSAI, + bool Reverse) { + + for (auto MA : Accesses) { + + // Get the current AM. + auto CurrentAccessMap = MA->getAccessRelation(); + + // Get RAW dependences for the current WA. + auto DomainSet = MA->getAccessRelation().domain(); + auto Domain = isl::union_set(DomainSet); + + // Get the dependences relevant for this MA. + isl::union_map MapDependences; + if (Reverse) { + MapDependences = filterDependences(S, Dependences.reverse(), MA); + } else { + MapDependences = filterDependences(S, Dependences, MA); + } - // If no dependences, no need to modify anything. - if (MapDependences.is_empty()) - return; + // If no dependences, no need to modify anything. + if (MapDependences.is_empty()) + return; - assert(isl_union_map_n_map(MapDependences.get()) == 1 && - "There are more than one RAW dependencies in the union map."); - auto NewAccessMap = isl::map::from_union_map(MapDependences); + assert(isl_union_map_n_map(MapDependences.get()) == 1 && + "There are more than one RAW dependencies in the union map."); + auto NewAccessMap = isl::map::from_union_map(MapDependences); - auto Id = ExpandedSAI->getBasePtrId(); + auto Id = ExpandedSAI->getBasePtrId(); - // Replace the out tuple id with the one of the access array. - NewAccessMap = NewAccessMap.set_tuple_id(isl::dim::out, Id); + // Replace the out tuple id with the one of the access array. + NewAccessMap = NewAccessMap.set_tuple_id(isl::dim::out, Id); - // Set the new access relation. - MA->setNewAccessRelation(NewAccessMap); + // Set the new access relation. + MA->setNewAccessRelation(NewAccessMap); + } } -ScopArrayInfo *MaximalStaticExpander::expandWrite(Scop &S, MemoryAccess *MA) { +ScopArrayInfo *MaximalStaticExpander::expandAccess(Scop &S, MemoryAccess *MA) { // Get the current AM. auto CurrentAccessMap = MA->getAccessRelation(); @@ -411,13 +473,23 @@ return ExpandedSAI; } +void MaximalStaticExpander::expandPhi(Scop &S, const ScopArrayInfo *SAI, + const isl::union_map &Dependences) { + SmallPtrSet Writes; + for (auto MA : S.getPHIIncomings(SAI)) + Writes.insert(MA); + auto Read = S.getPHIRead(SAI); + auto ExpandedSAI = expandAccess(S, Read); + + mapAccess(S, Writes, Dependences, ExpandedSAI, false); +} + void MaximalStaticExpander::emitRemark(StringRef Msg, Instruction *Inst) { ORE->emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ExpansionRejection", Inst) << Msg); } bool MaximalStaticExpander::runOnScop(Scop &S) { - // Get the ORE from OptimizationRemarkEmitterWrapperPass. ORE = &(getAnalysis().getORE()); @@ -435,13 +507,16 @@ if (!isExpandable(SAI, AllWrites, AllReads, S, Dependences)) continue; - assert(AllWrites.size() == 1); + if (SAI->isValueKind() || SAI->isArrayKind()) { + assert(AllWrites.size() == 1 || SAI->isValueKind()); - auto TheWrite = *(AllWrites.begin()); - ScopArrayInfo *ExpandedArray = expandWrite(S, TheWrite); + auto TheWrite = *(AllWrites.begin()); + ScopArrayInfo *ExpandedArray = expandAccess(S, TheWrite); - for (MemoryAccess *MA : AllReads) - expandRead(S, MA, Dependences, ExpandedArray); + mapAccess(S, AllReads, Dependences, ExpandedArray, true); + } else if (SAI->isPHIKind()) { + expandPhi(S, SAI, Dependences); + } } return false; Index: polly/trunk/test/MaximalStaticExpansion/working_deps_between_inners.ll =================================================================== --- polly/trunk/test/MaximalStaticExpansion/working_deps_between_inners.ll +++ polly/trunk/test/MaximalStaticExpansion/working_deps_between_inners.ll @@ -0,0 +1,97 @@ +; RUN: opt %loadPolly -polly-mse -analyze < %s | FileCheck %s +; +; Verify that the accesses are correctly expanded for MemoryKind::Array +; +; Original source code : +; +; #define Ni 2000 +; #define Nj 3000 +; +; void tmp3(double A[Ni], double B[Nj]) { +; int i,j; +; double tmp = 6; +; for (i = 0; i < Ni; i++) { +; +; for(int h = 0; h MemRef_B_Stmt_for_body3_expanded[i0, i1] }; +; CHECK: new: { Stmt_for_body11[i0, i1, i2] -> MemRef_B_Stmt_for_body3_expanded[i0, i2] }; +; CHECK: new: { Stmt_for_body11[i0, i1, i2] -> MemRef_A_Stmt_for_body11_expanded[i0, i1, i2] }; +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @mse(double* %A, double* %B) { +entry: + br label %entry.split + +entry.split: ; preds = %entry + br label %for.body + +for.body: ; preds = %entry.split, %for.inc25 + %indvars.iv14 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next15, %for.inc25 ] + br label %for.body3 + +for.body3: ; preds = %for.body, %for.body3 + %indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body3 ] + %0 = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %0 to double + %arrayidx = getelementptr inbounds double, double* %B, i64 %indvars.iv + store double %conv, double* %arrayidx, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 10000 + br i1 %exitcond, label %for.body3, label %for.end + +for.end: ; preds = %for.body3 + br label %for.body7 + +for.body7: ; preds = %for.end, %for.inc22 + %indvars.iv9 = phi i64 [ 0, %for.end ], [ %indvars.iv.next10, %for.inc22 ] + br label %for.body11 + +for.body11: ; preds = %for.body7, %for.body11 + %indvars.iv5 = phi i64 [ 0, %for.body7 ], [ %indvars.iv.next6, %for.body11 ] + %1 = add nuw nsw i64 %indvars.iv9, %indvars.iv14 + %2 = add nuw nsw i64 %1, %indvars.iv5 + %3 = trunc i64 %2 to i32 + %conv13 = sitofp i32 %3 to double + %arrayidx15 = getelementptr inbounds double, double* %B, i64 %indvars.iv5 + %4 = load double, double* %arrayidx15, align 8 + %mul = fmul double %4, %conv13 + %5 = add nuw nsw i64 %indvars.iv9, %indvars.iv14 + %arrayidx18 = getelementptr inbounds double, double* %A, i64 %5 + store double %mul, double* %arrayidx18, align 8 + %indvars.iv.next6 = add nuw nsw i64 %indvars.iv5, 1 + %exitcond8 = icmp ne i64 %indvars.iv.next6, 10000 + br i1 %exitcond8, label %for.body11, label %for.inc22 + +for.inc22: ; preds = %for.body11 + %indvars.iv.next10 = add nuw nsw i64 %indvars.iv9, 1 + %exitcond13 = icmp ne i64 %indvars.iv.next10, 10000 + br i1 %exitcond13, label %for.body7, label %for.inc25 + +for.inc25: ; preds = %for.inc22 + %indvars.iv.next15 = add nuw nsw i64 %indvars.iv14, 1 + %exitcond16 = icmp ne i64 %indvars.iv.next15, 10000 + br i1 %exitcond16, label %for.body, label %for.end27 + +for.end27: ; preds = %for.inc25 + ret void +} Index: polly/trunk/test/MaximalStaticExpansion/working_deps_between_inners_phi.ll =================================================================== --- polly/trunk/test/MaximalStaticExpansion/working_deps_between_inners_phi.ll +++ polly/trunk/test/MaximalStaticExpansion/working_deps_between_inners_phi.ll @@ -0,0 +1,131 @@ +; RUN: opt %loadPolly -polly-mse -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-mse -pass-remarks-analysis="polly-mse" -analyze < %s 2>&1| FileCheck %s --check-prefix=MSE +; +; Verify that the accesses are correctly expanded for MemoryKind::Array and MemoryKind::PHI. +; tmp_06_phi is not expanded because it need copy in. +; +; Original source code : +; +; #define Ni 2000 +; #define Nj 3000 +; +; void tmp3(double A[Ni], double B[Nj]) { +; int i,j; +; double tmp = 6; +; for (i = 0; i < Ni; i++) { +; +; for(int h = 0; h MemRef_tmp_06__phi_Stmt_for_body_expanded[i0] }; +; CHECK-DAG: new: { Stmt_for_body[i0] -> MemRef_tmp_06_Stmt_for_body_expanded[i0] }; +; CHECK-DAG: new: { Stmt_for_body3[i0, i1] -> MemRef_B_Stmt_for_body3_expanded[i0, i1] }; +; CHECK-DAG: new: { Stmt_for_end[i0] -> MemRef_tmp_06_Stmt_for_body_expanded[i0] }; +; CHECK-DAG: new: { Stmt_for_end[i0] -> MemRef_tmp_14__phi_Stmt_for_body7_expanded[i0, 0] }; +; CHECK-DAG: new: { Stmt_for_body7[i0, i1] -> MemRef_tmp_14__phi_Stmt_for_body7_expanded[i0, i1] }; +; CHECK-DAG: new: { Stmt_for_body7[i0, i1] -> MemRef_tmp_22__phi_Stmt_for_body11_expanded[i0, i1, 0] }; +; CHECK-DAG: new: { Stmt_for_body11[i0, i1, i2] -> MemRef_tmp_22__phi_Stmt_for_body11_expanded[i0, i1, 1 + i2] : i2 <= 9998 }; +; CHECK-DAG: new: { Stmt_for_body11[i0, i1, i2] -> MemRef_tmp_22__phi_Stmt_for_body11_expanded[i0, i1, i2] }; +; CHECK-DAG: new: { Stmt_for_body11[i0, i1, i2] -> MemRef_B_Stmt_for_body3_expanded[i0, i2] }; +; CHECK-DAG: new: { Stmt_for_body11[i0, i1, i2] -> MemRef_A_Stmt_for_body11_expanded[i0, i1, i2] }; +; CHECK-DAG: new: { Stmt_for_body11[i0, i1, 9999] -> MemRef_add16_lcssa__phi_Stmt_for_inc25_expanded[i0, i1] }; +; CHECK-DAG: new: { Stmt_for_inc25[i0, i1] -> MemRef_tmp_14__phi_Stmt_for_body7_expanded[i0, 1 + i1] : i1 <= 9998 }; +; CHECK-DAG: new: { Stmt_for_inc25[i0, i1] -> MemRef_add16_lcssa__phi_Stmt_for_inc25_expanded[i0, i1] }; +; CHECK-DAG: new: { Stmt_for_inc25[i0, 9999] -> MemRef_add16_lcssa_lcssa__phi_Stmt_for_inc28_expanded[i0] }; +; CHECK-DAG: new: { Stmt_for_inc28[i0] -> MemRef_add16_lcssa_lcssa__phi_Stmt_for_inc28_expanded[i0] }; +; CHECK-NOT: new: { Stmt_for_inc28[i0] -> MemRef_tmp_06__phi_Stmt_for_body_expanded[1 + i0] : i0 <= 9998 }; +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @mse(double* %A, double* %B) { +entry: + br label %entry.split + +entry.split: ; preds = %entry + br label %for.body + +for.body: ; preds = %entry.split, %for.inc28 + %indvars.iv15 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next16, %for.inc28 ] + %tmp.06 = phi double [ 6.000000e+00, %entry.split ], [ %add16.lcssa.lcssa, %for.inc28 ] + br label %for.body3 + +for.body3: ; preds = %for.body, %for.body3 + %indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body3 ] + %0 = trunc i64 %indvars.iv to i32 + %conv = sitofp i32 %0 to double + %arrayidx = getelementptr inbounds double, double* %B, i64 %indvars.iv + store double %conv, double* %arrayidx, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 10000 + br i1 %exitcond, label %for.body3, label %for.end + +for.end: ; preds = %for.body3 + br label %for.body7 + +for.body7: ; preds = %for.end, %for.inc25 + %indvars.iv11 = phi i64 [ 0, %for.end ], [ %indvars.iv.next12, %for.inc25 ] + %tmp.14 = phi double [ %tmp.06, %for.end ], [ %add16.lcssa, %for.inc25 ] + br label %for.body11 + +for.body11: ; preds = %for.body7, %for.body11 + %indvars.iv8 = phi i64 [ 0, %for.body7 ], [ %indvars.iv.next9, %for.body11 ] + %tmp.22 = phi double [ %tmp.14, %for.body7 ], [ %add16, %for.body11 ] + %1 = trunc i64 %indvars.iv15 to i32 + %conv12 = sitofp i32 %1 to double + %add = fadd double %tmp.22, %conv12 + %2 = trunc i64 %indvars.iv8 to i32 + %conv13 = sitofp i32 %2 to double + %add14 = fadd double %add, %conv13 + %3 = trunc i64 %indvars.iv11 to i32 + %conv15 = sitofp i32 %3 to double + %add16 = fadd double %add14, %conv15 + %arrayidx18 = getelementptr inbounds double, double* %B, i64 %indvars.iv8 + %4 = load double, double* %arrayidx18, align 8 + %mul = fmul double %add16, %4 + %5 = add nuw nsw i64 %indvars.iv11, %indvars.iv15 + %arrayidx21 = getelementptr inbounds double, double* %A, i64 %5 + store double %mul, double* %arrayidx21, align 8 + %indvars.iv.next9 = add nuw nsw i64 %indvars.iv8, 1 + %exitcond10 = icmp ne i64 %indvars.iv.next9, 10000 + br i1 %exitcond10, label %for.body11, label %for.inc25 + +for.inc25: ; preds = %for.body11 + %add16.lcssa = phi double [ %add16, %for.body11 ] + %indvars.iv.next12 = add nuw nsw i64 %indvars.iv11, 1 + %exitcond14 = icmp ne i64 %indvars.iv.next12, 10000 + br i1 %exitcond14, label %for.body7, label %for.inc28 + +for.inc28: ; preds = %for.inc25 + %add16.lcssa.lcssa = phi double [ %add16.lcssa, %for.inc25 ] + %indvars.iv.next16 = add nuw nsw i64 %indvars.iv15, 1 + %exitcond17 = icmp ne i64 %indvars.iv.next16, 10000 + br i1 %exitcond17, label %for.body, label %for.end30 + +for.end30: ; preds = %for.inc28 + ret void +} Index: polly/trunk/test/MaximalStaticExpansion/working_expansion.ll =================================================================== --- polly/trunk/test/MaximalStaticExpansion/working_expansion.ll +++ polly/trunk/test/MaximalStaticExpansion/working_expansion.ll @@ -1,6 +1,6 @@ ; RUN: opt %loadPolly -polly-mse -analyze < %s | FileCheck %s ; -; Verify that the accesses are correctly expanded +; Verify that the accesses are correctly expanded for MemoryKind::Array ; ; Original source code : ; Index: polly/trunk/test/MaximalStaticExpansion/working_phi_expansion.ll =================================================================== --- polly/trunk/test/MaximalStaticExpansion/working_phi_expansion.ll +++ polly/trunk/test/MaximalStaticExpansion/working_phi_expansion.ll @@ -0,0 +1,77 @@ +; RUN: opt %loadPolly -polly-mse -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-mse -pass-remarks-analysis="polly-mse" -analyze < %s 2>&1 | FileCheck %s --check-prefix=MSE +; +; Verify that the accesses are correctly expanded for MemoryKind::PHI +; tmp_04 is not expanded because it need copy-in. +; +; Original source code : +; +; #define Ni 10000 +; #define Nj 10000 +; +; void mse(double A[Ni], double B[Nj]) { +; int i,j; +; double tmp = 6; +; for (i = 0; i < Ni; i++) { +; for (int j = 0; j MemRef_tmp_04__phi_Stmt_for_body_expanded[i0] }; +; CHECK: new: { Stmt_for_body[i0] -> MemRef_tmp_11__phi_Stmt_for_inc_expanded[i0, 0] }; +; CHECK: new: { Stmt_for_inc[i0, i1] -> MemRef_tmp_11__phi_Stmt_for_inc_expanded[i0, 1 + i1] : i1 <= 9998 }; +; CHECK: new: { Stmt_for_inc[i0, i1] -> MemRef_tmp_11__phi_Stmt_for_inc_expanded[i0, i1] }; +; CHECK: new: { Stmt_for_inc[i0, 9999] -> MemRef_add_lcssa__phi_Stmt_for_end_expanded[i0] }; +; CHECK-NOT: new: { Stmt_for_end[i0] -> MemRef_tmp_04__phi_Stmt_for_body_expanded[1 + i0] : i0 <= 9998 }; +; CHECK: new: { Stmt_for_end[i0] -> MemRef_add_lcssa__phi_Stmt_for_end_expanded[i0] }; +; CHECK: new: { Stmt_for_end[i0] -> MemRef_B_Stmt_for_end_expanded[i0] }; +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @tmp(double* %A, double* %B) { +entry: + br label %entry.split + +entry.split: ; preds = %entry + br label %for.body + +for.body: ; preds = %entry.split, %for.end + %indvars.iv = phi i64 [ 0, %entry.split ], [ %indvars.iv.next, %for.end ] + %tmp.04 = phi double [ 6.000000e+00, %entry.split ], [ %add.lcssa, %for.end ] + br label %for.inc + +for.inc: ; preds = %for.body, %for.inc + %j1.02 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ] + %tmp.11 = phi double [ %tmp.04, %for.body ], [ %add, %for.inc ] + %add = fadd double %tmp.11, 2.000000e+00 + %inc = add nuw nsw i32 %j1.02, 1 + %exitcond = icmp ne i32 %inc, 10000 + br i1 %exitcond, label %for.inc, label %for.end + +for.end: ; preds = %for.inc + %add.lcssa = phi double [ %add, %for.inc ] + %arrayidx = getelementptr inbounds double, double* %B, i64 %indvars.iv + store double %add.lcssa, double* %arrayidx, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond5 = icmp ne i64 %indvars.iv.next, 10000 + br i1 %exitcond5, label %for.body, label %for.end7 + +for.end7: ; preds = %for.end + ret void +} Index: polly/trunk/test/MaximalStaticExpansion/working_phi_two_scalars.ll =================================================================== --- polly/trunk/test/MaximalStaticExpansion/working_phi_two_scalars.ll +++ polly/trunk/test/MaximalStaticExpansion/working_phi_two_scalars.ll @@ -0,0 +1,91 @@ +; RUN: opt %loadPolly -polly-mse -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-mse -pass-remarks-analysis="polly-mse" -analyze < %s 2>&1 | FileCheck %s --check-prefix=MSE +; +; Verify that the accesses are correctly expanded for MemoryKind::PHI +; tmp_05 and tmp2_06 are not expanded because they need copy-in. +; +; Original source code : +; +; #define Ni 10000 +; #define Nj 10000 +; +; void mse(double A[Ni], double B[Nj]) { +; int i,j; +; double tmp = 6; +; double tmp2 = 9; +; for (i = 0; i < Ni; i++) { +; for(j = 0; j < Nj; j++) { +; tmp = tmp + tmp2; +; tmp2 = i*j; +; } +; } +; } +; +; Check that the pass detects that tmp_05 and tmp2_06 read from their original values. +; +; MSE-DAG: MemRef_tmp_05__phi read from its original value. +; MSE-DAG: MemRef_tmp2_06__phi read from its original value. +; +; Check that the SAI are created except the expanded SAI of tmp_05 and tmp2_06. +; +; CHECK-DAG: double MemRef_add_lcssa__phi_Stmt_for_inc4_expanded[10000]; // Element size 8 +; CHECK-DAG: double MemRef_tmp2_13__phi_Stmt_for_inc_expanded[10000][10000]; // Element size +; CHECK-DAG: double MemRef_conv_lcssa__phi_Stmt_for_inc4_expanded[10000]; // Element size 8 +; CHECK-DAG: double MemRef_tmp_12__phi_Stmt_for_inc_expanded[10000][10000]; // Element size 8 +; CHECK-NOT: double MemRef_tmp_05__phi_Stmt_for_body_expanded[10000]; // Element size 8 +; CHECK-NOT: double MemRef_tmp2_06__phi_Stmt_for_body_expanded[10000]; // Element size 8 +; +; Check that the memory accesses are modified except those related to tmp_05 and tmp_06. +; +; CHECK-NOT: new: { Stmt_for_body[i0] -> MemRef_tmp2_06__phi_Stmt_for_body_expanded[i0] }; +; CHECK-NOT: new: { Stmt_for_body[i0] -> MemRef_tmp_05__phi_Stmt_for_body_expanded[i0] }; +; CHECK: new: { Stmt_for_body[i0] -> MemRef_tmp2_13__phi_Stmt_for_inc_expanded[i0, 0] }; +; CHECK: new: { Stmt_for_body[i0] -> MemRef_tmp_12__phi_Stmt_for_inc_expanded[i0, 0] }; +; CHECK: new: { Stmt_for_inc[i0, i1] -> MemRef_tmp2_13__phi_Stmt_for_inc_expanded[i0, 1 + i1] : i1 <= 9998 }; +; CHECK: new: { Stmt_for_inc[i0, i1] -> MemRef_tmp2_13__phi_Stmt_for_inc_expanded[i0, i1] }; +; CHECK: new: { Stmt_for_inc[i0, i1] -> MemRef_tmp_12__phi_Stmt_for_inc_expanded[i0, 1 + i1] : i1 <= 9998 }; +; CHECK: new: { Stmt_for_inc[i0, i1] -> MemRef_tmp_12__phi_Stmt_for_inc_expanded[i0, i1] }; +; CHECK: new: { Stmt_for_inc[i0, 9999] -> MemRef_conv_lcssa__phi_Stmt_for_inc4_expanded[i0] }; +; CHECK: new: { Stmt_for_inc[i0, 9999] -> MemRef_add_lcssa__phi_Stmt_for_inc4_expanded[i0] }; +; CHECK-NOT: new: { Stmt_for_inc4[i0] -> MemRef_tmp2_06__phi_Stmt_for_body_expanded[1 + i0] : i0 <= 9998 }; +; CHECK-NOT: new: { Stmt_for_inc4[i0] -> MemRef_tmp_05__phi_Stmt_for_body_expanded[1 + i0] : i0 <= 9998 }; +; CHECK: new: { Stmt_for_inc4[i0] -> MemRef_conv_lcssa__phi_Stmt_for_inc4_expanded[i0] }; +; CHECK: new: { Stmt_for_inc4[i0] -> MemRef_add_lcssa__phi_Stmt_for_inc4_expanded[i0] }; +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @tmp(double* %A, double* %B) { +entry: + br label %entry.split + +entry.split: ; preds = %entry + br label %for.body + +for.body: ; preds = %entry.split, %for.inc4 + %tmp2.06 = phi double [ 9.000000e+00, %entry.split ], [ %conv.lcssa, %for.inc4 ] + %tmp.05 = phi double [ 6.000000e+00, %entry.split ], [ %add.lcssa, %for.inc4 ] + %i.04 = phi i32 [ 0, %entry.split ], [ %inc5, %for.inc4 ] + br label %for.inc + +for.inc: ; preds = %for.body, %for.inc + %tmp2.13 = phi double [ %tmp2.06, %for.body ], [ %conv, %for.inc ] + %tmp.12 = phi double [ %tmp.05, %for.body ], [ %add, %for.inc ] + %j.01 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ] + %mul = mul nuw nsw i32 %j.01, %i.04 + %conv = sitofp i32 %mul to double + %add = fadd double %tmp.12, %tmp2.13 + %inc = add nuw nsw i32 %j.01, 1 + %exitcond = icmp ne i32 %inc, 10000 + br i1 %exitcond, label %for.inc, label %for.inc4 + +for.inc4: ; preds = %for.inc + %conv.lcssa = phi double [ %conv, %for.inc ] + %add.lcssa = phi double [ %add, %for.inc ] + %inc5 = add nuw nsw i32 %i.04, 1 + %exitcond7 = icmp ne i32 %inc5, 10000 + br i1 %exitcond7, label %for.body, label %for.end6 + +for.end6: ; preds = %for.inc4 + ret void +} Index: polly/trunk/test/MaximalStaticExpansion/working_value_expansion.ll =================================================================== --- polly/trunk/test/MaximalStaticExpansion/working_value_expansion.ll +++ polly/trunk/test/MaximalStaticExpansion/working_value_expansion.ll @@ -0,0 +1,66 @@ +; RUN: opt %loadPolly -polly-mse -analyze < %s | FileCheck %s +; +; Verify that the accesses are correctly expanded for MemoryKind::Value +; +; Original source code : +; +; #define Ni 10000 +; #define Nj 10000 +; +; void mse(double A[Ni], double B[Nj]) { +; int i,j; +; double tmp = 6; +; for (i = 0; i < Ni; i++) { +; tmp = i; +; for (int j = 0; j MemRef_conv_Stmt_for_body_expanded[i0] }; +; CHECK: new: { Stmt_for_body5[i0, i1] -> MemRef_conv_Stmt_for_body_expanded[i0] }; +; CHECK: new: { Stmt_for_end[i0] -> MemRef_conv_Stmt_for_body_expanded[i0] }; +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @mse(double* %A, double* %B) { +entry: + br label %entry.split + +entry.split: ; preds = %entry + br label %for.body + +for.body: ; preds = %entry.split, %for.end + %indvars.iv3 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next4, %for.end ] + %0 = trunc i64 %indvars.iv3 to i32 + %conv = sitofp i32 %0 to double + br label %for.body5 + +for.body5: ; preds = %for.body, %for.body5 + %indvars.iv = phi i64 [ 0, %for.body ], [ %indvars.iv.next, %for.body5 ] + %add = fadd double %conv, 3.000000e+00 + %arrayidx = getelementptr inbounds double, double* %A, i64 %indvars.iv + store double %add, double* %arrayidx, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 10000 + br i1 %exitcond, label %for.body5, label %for.end + +for.end: ; preds = %for.body5 + %arrayidx7 = getelementptr inbounds double, double* %B, i64 %indvars.iv3 + store double %conv, double* %arrayidx7, align 8 + %indvars.iv.next4 = add nuw nsw i64 %indvars.iv3, 1 + %exitcond5 = icmp ne i64 %indvars.iv.next4, 10000 + br i1 %exitcond5, label %for.body, label %for.end10 + +for.end10: ; preds = %for.end + ret void +}