Index: lib/Transform/Simplify.cpp =================================================================== --- lib/Transform/Simplify.cpp +++ lib/Transform/Simplify.cpp @@ -16,6 +16,7 @@ #include "polly/ScopPass.h" #include "polly/Support/GICHelper.h" #include "polly/Support/ISLOStream.h" +#include "polly/Support/ISLTools.h" #include "polly/Support/VirtualInstruction.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/Debug.h" @@ -34,6 +35,7 @@ STATISTIC(InBetweenStore, "Number of Load-Store pairs NOT removed because " "there is another store between them"); STATISTIC(TotalOverwritesRemoved, "Number of removed overwritten writes"); +STATISTIC(TotalWritesCoalesced, "Number of writes coalesced with another"); STATISTIC(TotalRedundantWritesRemoved, "Number of writes of same value removed in any SCoP"); STATISTIC(TotalEmptyPartialAccessesRemoved, @@ -96,6 +98,9 @@ /// Number of writes that are overwritten anyway. int OverwritesRemoved = 0; + /// Number of combined writes. + int WritesCoalesced = 0; + /// Number of redundant writes removed from this SCoP. int RedundantWritesRemoved = 0; @@ -113,9 +118,10 @@ /// Return whether at least one simplification has been applied. bool isModified() const { - return OverwritesRemoved > 0 || RedundantWritesRemoved > 0 || - EmptyPartialAccessesRemoved > 0 || DeadAccessesRemoved > 0 || - DeadInstructionsRemoved > 0 || StmtsRemoved > 0; + return OverwritesRemoved > 0 || WritesCoalesced > 0 || + RedundantWritesRemoved > 0 || EmptyPartialAccessesRemoved > 0 || + DeadAccessesRemoved > 0 || DeadInstructionsRemoved > 0 || + StmtsRemoved > 0; } MemoryAccess *getReadAccessForValue(ScopStmt *Stmt, llvm::Value *Val) { @@ -239,6 +245,173 @@ } } + /// Combine writes that write the same value if possible. + /// + /// This function is able to combine: + /// - Partial writes with disjoint domain. + /// - Writes that write to the same array element. + /// + /// In all cases, both writes must write the same values. + void coalesceWrites() { + for (auto &Stmt : *S) { + isl::set Domain = + give(Stmt.getDomain()).intersect_params(give(S->getContext())); + + // We let isl do the lookup for the same-value condition. For this, we + // wrap llvm::Value into an isl::set such that isl can do the lookup in + // its hashtable implementation. llvm::Values are only compared within a + // ScopStmt, so the map can be local to this scope. TODO: Refactor with + // ZoneAlgorithm::makeValueSet() + SmallDenseMap ValueSets; + auto makeValueSet = [&ValueSets, this](Value *V) -> isl::set { + assert(V); + isl::set &Result = ValueSets[V]; + if (Result.is_null()) { + isl_ctx *Ctx = S->getIslCtx(); + std::string Name = + getIslCompatibleName("Val", V, ValueSets.size() - 1, + std::string(), UseInstructionNames); + isl::id Id = give(isl_id_alloc(Ctx, Name.c_str(), V)); + Result = isl::set::universe( + isl::space(Ctx, 0, 0).set_tuple_id(isl::dim::set, Id)); + } + return Result; + }; + + // List of all eligible (for coalescing) writes of the future. + // { [Domain[] -> Element[]] -> [Value[] -> MemoryAccess[]] } + isl::union_map FutureWrites = + isl::union_map::empty(give(S->getParamSpace())); + + // Iterate over accesses from the last to the first. + SmallVector Accesses(getAccessesInOrder(Stmt)); + for (MemoryAccess *MA : reverse(Accesses)) { + // In region statements, the explicit accesses can be in blocks that can + // be executed in any order. We therefore process only the implicit + // writes and stop after that. + if (Stmt.isRegionStmt() && isExplicitAccess(MA)) + break; + + // { Domain[] -> Element[] } + isl::map AccRel = + MA->getLatestAccessRelation().intersect_domain(Domain); + + // { [Domain[] -> Element[]] } + isl::set AccRelWrapped = AccRel.wrap(); + + // { Value[] } + isl::set ValSet; + + if (MA->isMustWrite() && (MA->isOriginalScalarKind() || + isa(MA->getAccessInstruction()))) { + // Normally, tryGetValueStored() should be used to determine which + // element is written, but it can return nullptr; For PHI accesses, + // getAccessValue() returns the PHI instead of the PHI's incoming + // value. In this case, where we only compare values of a single + // statement, this is fine, because within a statement, a PHI in a + // successor block has always the same value as the incoming write. We + // still preferably use the incoming value directly so we also catch + // direct uses of that. + Value *StoredVal = MA->tryGetValueStored(); + if (!StoredVal) + StoredVal = MA->getAccessValue(); + ValSet = makeValueSet(StoredVal); + + // { Domain[] } + isl::set AccDomain = AccRel.domain(); + + // Parts of the statement's domain that is not written by this access. + isl::set UndefDomain = Domain.subtract(AccDomain); + + // { Element[] } + isl::set ElementUniverse = + isl::set::universe(AccRel.get_space().range()); + + // { Domain[] -> Element[] } + isl::map UndefAnything = + isl::map::from_domain_and_range(UndefDomain, ElementUniverse); + + // We are looking a compatible write access. The other write can + // access these elements... + isl::map AllowedAccesses = AccRel.unite(UndefAnything); + + // ... and must write the same value. + // { [Domain[] -> Element[]] -> Value[] } + isl::map Filter = + isl::map::from_domain_and_range(AllowedAccesses.wrap(), ValSet); + + // Lookup future write that fulfills these conditions. + // { [[Domain[] -> Element[]] -> Value[]] -> MemoryAccess[] } + isl::union_map Filtered = + FutureWrites.uncurry().intersect_domain(Filter.wrap()); + + // Iterate through the candidates. + Filtered.foreach_map([&, this](isl::map Map) -> isl::stat { + MemoryAccess *OtherMA = (MemoryAccess *)Map.get_space() + .get_tuple_id(isl::dim::out) + .get_user(); + + isl::map OtherAccRel = + OtherMA->getLatestAccessRelation().intersect_domain(Domain); + + // The filter only guaranteed that some of OtherMA's accessed + // elements are allowed. Verify that it only accesses allowed + // elements. Otherwise, continue with the next candidate. + if (!OtherAccRel.is_subset(AllowedAccesses).is_true()) + return isl::stat::ok; + + // The combined access relation. + // { Domain[] -> Element[] } + isl::map NewAccRel = AccRel.unite(OtherAccRel); + simplify(NewAccRel); + + // Carry out the coalescing. + Stmt.removeSingleMemoryAccess(MA); + OtherMA->setNewAccessRelation(NewAccRel.copy()); + + // We removed MA, OtherMA takes its role. + MA = OtherMA; + + TotalWritesCoalesced++; + WritesCoalesced++; + + // Don't look for more candidates. + return isl::stat::error; + }); + } + + // Two writes cannot be coalesced if there is another access (to some of + // the written elements) between them. Remove all visited write accesses + // from the list of eligible writes. Don't just remove the accessed + // elements, but any MemoryAccess that touches any of the invalidated + // elements. + // { MemoryAccess[] } + isl::union_set TouchedAccesses = + FutureWrites.intersect_domain(AccRelWrapped) + .range() + .unwrap() + .range(); + FutureWrites = + FutureWrites.uncurry().subtract_range(TouchedAccesses).curry(); + + if (MA->isMustWrite() && !ValSet.is_null()) { + // { MemoryAccess[] } + auto AccSet = + isl::set::universe(isl::space(S->getIslCtx(), 0, 0) + .set_tuple_id(isl::dim::set, MA->getId())); + + // { Val[] -> MemoryAccess[] } + isl::map ValAccSet = isl::map::from_domain_and_range(ValSet, AccSet); + + // { [Domain[] -> Element[]] -> [Value[] -> MemoryAccess[]] } + isl::map AccRelValAcc = + isl::map::from_domain_and_range(AccRelWrapped, ValAccSet.wrap()); + FutureWrites = FutureWrites.add_map(AccRelValAcc); + } + } + } + } + /// Remove writes that just write the same value already stored in the /// element. void removeRedundantWrites() { @@ -413,6 +586,8 @@ OS.indent(Indent) << "Statistics {\n"; OS.indent(Indent + 4) << "Overwrites removed: " << OverwritesRemoved << '\n'; + OS.indent(Indent + 4) << "Partial writes coalesced: " << WritesCoalesced + << "\n"; OS.indent(Indent + 4) << "Redundant writes removed: " << RedundantWritesRemoved << "\n"; OS.indent(Indent + 4) << "Accesses with empty domains removed: " @@ -461,6 +636,9 @@ DEBUG(dbgs() << "Removing overwrites...\n"); removeOverwrites(); + DEBUG(dbgs() << "Coalesce partial writes...\n"); + coalesceWrites(); + DEBUG(dbgs() << "Removing redundant writes...\n"); removeRedundantWrites(); @@ -495,6 +673,7 @@ S = nullptr; OverwritesRemoved = 0; + WritesCoalesced = 0; RedundantWritesRemoved = 0; EmptyPartialAccessesRemoved = 0; DeadAccessesRemoved = 0; Index: test/Simplify/coalesce_3partials.ll =================================================================== --- /dev/null +++ test/Simplify/coalesce_3partials.ll @@ -0,0 +1,48 @@ +; RUN: opt %loadPolly -polly-import-jscop -polly-import-jscop-postfix=transformed -polly-simplify -analyze < %s | FileCheck -match-full-lines %s +; +; Combine 3 partial accesses into one. +; +; for (int j = 0; j < n; j += 1) { +; A[0] = 42.0; +; A[0] = 42.0; +; A[0] = 42.0; +; } +; +define void @coalesce_3partials(i32 %n, double* noalias nonnull %A) { +entry: + br label %for + +for: + %j = phi i32 [0, %entry], [%j.inc, %inc] + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %body, label %exit + + body: + store double 42.0, double* %A + store double 42.0, double* %A + store double 42.0, double* %A + br label %inc + +inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %for + +exit: + br label %return + +return: + ret void +} + + +; CHECK: Statistics { +; CHECK: Overwrites removed: 0 +; CHECK: Partial writes coalesced: 2 +; CHECK: } + +; CHECK: After accesses { +; CHECK-NEXT: Stmt_body +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_body[i0] -> MemRef_A[0] }; +; CHECK-NEXT: new: [n] -> { Stmt_body[i0] -> MemRef_A[0] : n <= 2147483647 }; +; CHECK-NEXT: } Index: test/Simplify/coalesce_3partials___%for---%return.jscop =================================================================== --- /dev/null +++ test/Simplify/coalesce_3partials___%for---%return.jscop @@ -0,0 +1,32 @@ +{ + "arrays" : [ + { + "name" : "MemRef_A", + "sizes" : [ "*" ], + "type" : "double" + } + ], + "context" : "[n] -> { : -2147483648 <= n <= 2147483647 }", + "name" : "%for---%return", + "statements" : [ + { + "accesses" : [ + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }" + }, + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }" + }, + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }" + } + ], + "domain" : "[n] -> { Stmt_body[i0] : 0 <= i0 < n }", + "name" : "Stmt_body", + "schedule" : "[n] -> { Stmt_body[i0] -> [i0] }" + } + ] +} Index: test/Simplify/coalesce_3partials___%for---%return.jscop.transformed =================================================================== --- /dev/null +++ test/Simplify/coalesce_3partials___%for---%return.jscop.transformed @@ -0,0 +1,32 @@ +{ + "arrays" : [ + { + "name" : "MemRef_A", + "sizes" : [ "*" ], + "type" : "double" + } + ], + "context" : "[n] -> { : -2147483648 <= n <= 2147483647 }", + "name" : "%for---%return", + "statements" : [ + { + "accesses" : [ + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] : i0 < 8 }" + }, + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] : 8 <= i0 < 16 }" + }, + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] : 16 <= i0 }" + } + ], + "domain" : "[n] -> { Stmt_body[i0] : 0 <= i0 < n }", + "name" : "Stmt_body", + "schedule" : "[n] -> { Stmt_body[i0] -> [i0] }" + } + ] +} Index: test/Simplify/coalesce_disjointelements.ll =================================================================== --- /dev/null +++ test/Simplify/coalesce_disjointelements.ll @@ -0,0 +1,56 @@ +; RUN: opt %loadPolly -polly-import-jscop -polly-import-jscop-postfix=transformed -polly-simplify -analyze < %s | FileCheck -match-full-lines %s +; +; Combine four partial stores into two. +; The stores write to the same array, but never the same element. +; +; for (int j = 0; j < n; j += 1) { +; A[0] = 21.0; +; A[1] = 42.0; +; A[0] = 21.0; +; A[1] = 42.0; +; } +; +define void @coalesce_disjointelements(i32 %n, double* noalias nonnull %A) { +entry: + br label %for + +for: + %j = phi i32 [0, %entry], [%j.inc, %inc] + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %body, label %exit + + body: + %A_0 = getelementptr inbounds double, double* %A, i32 0 + %A_1 = getelementptr inbounds double, double* %A, i32 1 + store double 21.0, double* %A_0 + store double 42.0, double* %A_1 + store double 21.0, double* %A_0 + store double 42.0, double* %A_1 + br label %inc + +inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %for + +exit: + br label %return + +return: + ret void +} + + +; CHECK: Statistics { +; CHECK: Overwrites removed: 0 +; CHECK: Partial writes coalesced: 2 +; CHECK: } + +; CHECK: After accesses { +; CHECK-NEXT: Stmt_body +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_body[i0] -> MemRef_A[0] }; +; CHECK-NEXT: new: [n] -> { Stmt_body[i0] -> MemRef_A[0] : n <= 2147483647 }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_body[i0] -> MemRef_A[1] }; +; CHECK-NEXT: new: [n] -> { Stmt_body[i0] -> MemRef_A[1] : n <= 2147483647 }; +; CHECK-NEXT: } Index: test/Simplify/coalesce_disjointelements___%for---%return.jscop =================================================================== --- /dev/null +++ test/Simplify/coalesce_disjointelements___%for---%return.jscop @@ -0,0 +1,36 @@ +{ + "arrays" : [ + { + "name" : "MemRef_A", + "sizes" : [ "*" ], + "type" : "double" + } + ], + "context" : "[n] -> { : -2147483648 <= n <= 2147483647 }", + "name" : "%for---%return", + "statements" : [ + { + "accesses" : [ + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }" + }, + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[1] }" + }, + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }" + }, + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[1] }" + } + ], + "domain" : "[n] -> { Stmt_body[i0] : 0 <= i0 < n }", + "name" : "Stmt_body", + "schedule" : "[n] -> { Stmt_body[i0] -> [i0] }" + } + ] +} Index: test/Simplify/coalesce_disjointelements___%for---%return.jscop.transformed =================================================================== --- /dev/null +++ test/Simplify/coalesce_disjointelements___%for---%return.jscop.transformed @@ -0,0 +1,36 @@ +{ + "arrays" : [ + { + "name" : "MemRef_A", + "sizes" : [ "*" ], + "type" : "double" + } + ], + "context" : "[n] -> { : -2147483648 <= n <= 2147483647 }", + "name" : "%for---%return", + "statements" : [ + { + "accesses" : [ + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] : i0 > 16 }" + }, + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[1] : i0 > 16 }" + }, + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] : i0 <= 16 }" + }, + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[1] : i0 <= 16 }" + } + ], + "domain" : "[n] -> { Stmt_body[i0] : 0 <= i0 < n }", + "name" : "Stmt_body", + "schedule" : "[n] -> { Stmt_body[i0] -> [i0] }" + } + ] +} Index: test/Simplify/coalesce_overlapping.ll =================================================================== --- /dev/null +++ test/Simplify/coalesce_overlapping.ll @@ -0,0 +1,46 @@ +; RUN: opt %loadPolly -polly-import-jscop -polly-import-jscop-postfix=transformed -polly-simplify -analyze < %s | FileCheck -match-full-lines %s +; +; Combine two partial stores (with overlapping domains) into one. +; +; for (int j = 0; j < n; j += 1) { +; A[0] = 42.0; +; A[0] = 42.0; +; } +; +define void @coalesce_overlapping(i32 %n, double* noalias nonnull %A) { +entry: + br label %for + +for: + %j = phi i32 [0, %entry], [%j.inc, %inc] + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %body, label %exit + + body: + store double 42.0, double* %A + store double 42.0, double* %A + br label %inc + +inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %for + +exit: + br label %return + +return: + ret void +} + + +; CHECK: Statistics { +; CHECK: Overwrites removed: 0 +; CHECK: Partial writes coalesced: 1 +; CHECK: } + +; CHECK: After accesses { +; CHECK-NEXT: Stmt_body +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_body[i0] -> MemRef_A[0] }; +; CHECK-NEXT: new: [n] -> { Stmt_body[i0] -> MemRef_A[0] : n <= 2147483647 }; +; CHECK-NEXT: } Index: test/Simplify/coalesce_overlapping___%for---%return.jscop =================================================================== --- /dev/null +++ test/Simplify/coalesce_overlapping___%for---%return.jscop @@ -0,0 +1,28 @@ +{ + "arrays" : [ + { + "name" : "MemRef_A", + "sizes" : [ "*" ], + "type" : "double" + } + ], + "context" : "[n] -> { : -2147483648 <= n <= 2147483647 }", + "name" : "%for---%return", + "statements" : [ + { + "accesses" : [ + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }" + }, + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }" + } + ], + "domain" : "[n] -> { Stmt_body[i0] : 0 <= i0 < n }", + "name" : "Stmt_body", + "schedule" : "[n] -> { Stmt_body[i0] -> [i0] }" + } + ] +} Index: test/Simplify/coalesce_overlapping___%for---%return.jscop.transformed =================================================================== --- /dev/null +++ test/Simplify/coalesce_overlapping___%for---%return.jscop.transformed @@ -0,0 +1,28 @@ +{ + "arrays" : [ + { + "name" : "MemRef_A", + "sizes" : [ "*" ], + "type" : "double" + } + ], + "context" : "[n] -> { : -2147483648 <= n <= 2147483647 }", + "name" : "%for---%return", + "statements" : [ + { + "accesses" : [ + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] : i0 < 18 }" + }, + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] : i0 >= 8}" + } + ], + "domain" : "[n] -> { Stmt_body[i0] : 0 <= i0 < n }", + "name" : "Stmt_body", + "schedule" : "[n] -> { Stmt_body[i0] -> [i0] }" + } + ] +} Index: test/Simplify/coalesce_partial.ll =================================================================== --- /dev/null +++ test/Simplify/coalesce_partial.ll @@ -0,0 +1,46 @@ +; RUN: opt %loadPolly -polly-import-jscop -polly-import-jscop-postfix=transformed -polly-simplify -analyze < %s | FileCheck -match-full-lines %s +; +; Combine two partial stores (with disjoint domains) into one. +; +; for (int j = 0; j < n; j += 1) { +; A[0] = 42.0; +; A[0] = 42.0; +; } +; +define void @coalesce_partial(i32 %n, double* noalias nonnull %A) { +entry: + br label %for + +for: + %j = phi i32 [0, %entry], [%j.inc, %inc] + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %body, label %exit + + body: + store double 42.0, double* %A + store double 42.0, double* %A + br label %inc + +inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %for + +exit: + br label %return + +return: + ret void +} + + +; CHECK: Statistics { +; CHECK: Overwrites removed: 0 +; CHECK: Partial writes coalesced: 1 +; CHECK: } + +; CHECK: After accesses { +; CHECK-NEXT: Stmt_body +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_body[i0] -> MemRef_A[0] }; +; CHECK-NEXT: new: [n] -> { Stmt_body[i0] -> MemRef_A[0] : n <= 2147483647 }; +; CHECK-NEXT: } Index: test/Simplify/coalesce_partial___%for---%return.jscop =================================================================== --- /dev/null +++ test/Simplify/coalesce_partial___%for---%return.jscop @@ -0,0 +1,28 @@ +{ + "arrays" : [ + { + "name" : "MemRef_A", + "sizes" : [ "*" ], + "type" : "double" + } + ], + "context" : "[n] -> { : -2147483648 <= n <= 2147483647 }", + "name" : "%for---%return", + "statements" : [ + { + "accesses" : [ + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }" + }, + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }" + } + ], + "domain" : "[n] -> { Stmt_body[i0] : 0 <= i0 < n }", + "name" : "Stmt_body", + "schedule" : "[n] -> { Stmt_body[i0] -> [i0] }" + } + ] +} Index: test/Simplify/coalesce_partial___%for---%return.jscop.transformed =================================================================== --- /dev/null +++ test/Simplify/coalesce_partial___%for---%return.jscop.transformed @@ -0,0 +1,28 @@ +{ + "arrays" : [ + { + "name" : "MemRef_A", + "sizes" : [ "*" ], + "type" : "double" + } + ], + "context" : "[n] -> { : -2147483648 <= n <= 2147483647 }", + "name" : "%for---%return", + "statements" : [ + { + "accesses" : [ + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] : i0 < 16 }" + }, + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] : i0 >= 16 }" + } + ], + "domain" : "[n] -> { Stmt_body[i0] : 0 <= i0 < n }", + "name" : "Stmt_body", + "schedule" : "[n] -> { Stmt_body[i0] -> [i0] }" + } + ] +} Index: test/Simplify/gemm.ll =================================================================== --- test/Simplify/gemm.ll +++ test/Simplify/gemm.ll @@ -13,16 +13,6 @@ ; } ; CHECK: After accesses { -; CHECK-NEXT: Stmt_bb10 -; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] -; CHECK-NEXT: { Stmt_bb10[i0, i1, i2] -> MemRef_tmp_0__phi[] }; -; CHECK-NEXT: new: { Stmt_bb10[i0, i1, i2] -> MemRef_C[i0, i1] }; -; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] -; CHECK-NEXT: { Stmt_bb10[i0, i1, i2] -> MemRef_tmp_0[] }; -; CHECK-NEXT: new: { Stmt_bb10[i0, i1, i2] -> MemRef_C[i0, i1] }; -; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] -; CHECK-NEXT: { Stmt_bb10[i0, i1, i2] -> MemRef_tmp_0_lcssa__phi[] }; -; CHECK-NEXT: new: { Stmt_bb10[i0, i1, 1024] -> MemRef_C[i0, i1] }; ; CHECK-NEXT: Stmt_bb13 ; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] ; CHECK-NEXT: { Stmt_bb13[i0, i1, i2] -> MemRef_tmp_0__phi[] }; Index: test/Simplify/nocoalesce_differentvalues.ll =================================================================== --- /dev/null +++ test/Simplify/nocoalesce_differentvalues.ll @@ -0,0 +1,41 @@ +; RUN: opt %loadPolly -polly-import-jscop -polly-import-jscop-postfix=transformed -polly-simplify -analyze < %s | FileCheck -match-full-lines %s +; +; Do not combine stores that write different values. +; +; for (int j = 0; j < n; j += 1) { +; A[0] = 21.0; +; A[0] = 42.0; +; } +; +define void @nocoalesce_differentvalues(i32 %n, double* noalias nonnull %A) { +entry: + br label %for + +for: + %j = phi i32 [0, %entry], [%j.inc, %inc] + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %body, label %exit + + body: + store double 21.0, double* %A + store double 42.0, double* %A + br label %inc + +inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %for + +exit: + br label %return + +return: + ret void +} + + +; CHECK: Statistics { +; CHECK: Overwrites removed: 0 +; CHECK: Partial writes coalesced: 0 +; CHECK: } + +; CHECK: SCoP could not be simplified Index: test/Simplify/nocoalesce_differentvalues___%for---%return.jscop =================================================================== --- /dev/null +++ test/Simplify/nocoalesce_differentvalues___%for---%return.jscop @@ -0,0 +1,28 @@ +{ + "arrays" : [ + { + "name" : "MemRef_A", + "sizes" : [ "*" ], + "type" : "double" + } + ], + "context" : "[n] -> { : -2147483648 <= n <= 2147483647 }", + "name" : "%for---%return", + "statements" : [ + { + "accesses" : [ + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }" + }, + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }" + } + ], + "domain" : "[n] -> { Stmt_body[i0] : 0 <= i0 < n }", + "name" : "Stmt_body", + "schedule" : "[n] -> { Stmt_body[i0] -> [i0] }" + } + ] +} Index: test/Simplify/nocoalesce_differentvalues___%for---%return.jscop.transformed =================================================================== --- /dev/null +++ test/Simplify/nocoalesce_differentvalues___%for---%return.jscop.transformed @@ -0,0 +1,28 @@ +{ + "arrays" : [ + { + "name" : "MemRef_A", + "sizes" : [ "*" ], + "type" : "double" + } + ], + "context" : "[n] -> { : -2147483648 <= n <= 2147483647 }", + "name" : "%for---%return", + "statements" : [ + { + "accesses" : [ + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] : i0 >= 16}" + }, + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] : i0 < 16}" + } + ], + "domain" : "[n] -> { Stmt_body[i0] : 0 <= i0 < n }", + "name" : "Stmt_body", + "schedule" : "[n] -> { Stmt_body[i0] -> [i0] }" + } + ] +} Index: test/Simplify/nocoalesce_elementmismatch.ll =================================================================== --- /dev/null +++ test/Simplify/nocoalesce_elementmismatch.ll @@ -0,0 +1,39 @@ +; RUN: opt %loadPolly -polly-import-jscop -polly-import-jscop-postfix=transformed -polly-simplify -analyze < %s | FileCheck -match-full-lines %s +; +; Do not combine stores that do not write to different elements in the +; same instance. +; +; for (int j = 0; j < n; j += 1) { +; A[0] = 21.0; +; A[0] = 42.0; +; } +; +define void @nocoalesce_elementmismatch(i32 %n, double* noalias nonnull %A) { +entry: + br label %for + +for: + %j = phi i32 [0, %entry], [%j.inc, %inc] + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %body, label %exit + + body: + %A_0 = getelementptr inbounds double, double* %A, i32 0 + %A_1 = getelementptr inbounds double, double* %A, i32 1 + store double 42.0, double* %A_0 + store double 42.0, double* %A_1 + br label %inc + +inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %for + +exit: + br label %return + +return: + ret void +} + + +; CHECK: SCoP could not be simplified Index: test/Simplify/nocoalesce_elementmismatch___%for---%return.jscop =================================================================== --- /dev/null +++ test/Simplify/nocoalesce_elementmismatch___%for---%return.jscop @@ -0,0 +1,28 @@ +{ + "arrays" : [ + { + "name" : "MemRef_A", + "sizes" : [ "*" ], + "type" : "double" + } + ], + "context" : "[n] -> { : -2147483648 <= n <= 2147483647 }", + "name" : "%for---%return", + "statements" : [ + { + "accesses" : [ + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }" + }, + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[1] }" + } + ], + "domain" : "[n] -> { Stmt_body[i0] : 0 <= i0 < n }", + "name" : "Stmt_body", + "schedule" : "[n] -> { Stmt_body[i0] -> [i0] }" + } + ] +} Index: test/Simplify/nocoalesce_elementmismatch___%for---%return.jscop.transformed =================================================================== --- /dev/null +++ test/Simplify/nocoalesce_elementmismatch___%for---%return.jscop.transformed @@ -0,0 +1,28 @@ +{ + "arrays" : [ + { + "name" : "MemRef_A", + "sizes" : [ "*" ], + "type" : "double" + } + ], + "context" : "[n] -> { : -2147483648 <= n <= 2147483647 }", + "name" : "%for---%return", + "statements" : [ + { + "accesses" : [ + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] : i0 >= 16 }" + }, + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[1] : i0 <= 16}" + } + ], + "domain" : "[n] -> { Stmt_body[i0] : 0 <= i0 < n }", + "name" : "Stmt_body", + "schedule" : "[n] -> { Stmt_body[i0] -> [i0] }" + } + ] +} Index: test/Simplify/nocoalesce_readbetween.ll =================================================================== --- /dev/null +++ test/Simplify/nocoalesce_readbetween.ll @@ -0,0 +1,53 @@ +; RUN: opt %loadPolly -polly-import-jscop -polly-import-jscop-postfix=transformed -polly-simplify -analyze < %s | FileCheck -match-full-lines %s +; +; Do not combine stores if there is a read between them. +; Note: The read between is unused, so will be removed by markAndSweep. +; However, searches for coalesces takes place before. +; +; for (int j = 0; j < n; j += 1) { +; A[0] = 42.0; +; tmp = A[0]; +; A[0] = 42.0; +; } +; +define void @nocoalesce_readbetween(i32 %n, double* noalias nonnull %A) { +entry: + br label %for + +for: + %j = phi i32 [0, %entry], [%j.inc, %inc] + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %body, label %exit + + body: + store double 42.0, double* %A + %tmp = load double, double* %A + store double 42.0, double* %A + br label %inc + +inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %for + +exit: + br label %return + +return: + ret void +} + + +; CHECK: Statistics { +; CHECK: Overwrites removed: 0 +; CHECK: Partial writes coalesced: 0 +; CHECK: } + +; CHECK: After accesses { +; CHECK-NEXT: Stmt_body +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_body[i0] -> MemRef_A[0] }; +; CHECK-NEXT: new: [n] -> { Stmt_body[i0] -> MemRef_A[0] : i0 >= 17 }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_body[i0] -> MemRef_A[0] }; +; CHECK-NEXT: new: [n] -> { Stmt_body[i0] -> MemRef_A[0] : i0 <= 16 }; +; CHECK-NEXT: } Index: test/Simplify/nocoalesce_readbetween___%for---%return.jscop =================================================================== --- /dev/null +++ test/Simplify/nocoalesce_readbetween___%for---%return.jscop @@ -0,0 +1,32 @@ +{ + "arrays" : [ + { + "name" : "MemRef_A", + "sizes" : [ "*" ], + "type" : "double" + } + ], + "context" : "[n] -> { : -2147483648 <= n <= 2147483647 }", + "name" : "%for---%return", + "statements" : [ + { + "accesses" : [ + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }" + }, + { + "kind" : "read", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }" + }, + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }" + } + ], + "domain" : "[n] -> { Stmt_body[i0] : 0 <= i0 < n }", + "name" : "Stmt_body", + "schedule" : "[n] -> { Stmt_body[i0] -> [i0] }" + } + ] +} Index: test/Simplify/nocoalesce_readbetween___%for---%return.jscop.transformed =================================================================== --- /dev/null +++ test/Simplify/nocoalesce_readbetween___%for---%return.jscop.transformed @@ -0,0 +1,32 @@ +{ + "arrays" : [ + { + "name" : "MemRef_A", + "sizes" : [ "*" ], + "type" : "double" + } + ], + "context" : "[n] -> { : -2147483648 <= n <= 2147483647 }", + "name" : "%for---%return", + "statements" : [ + { + "accesses" : [ + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] : i0 > 16 }" + }, + { + "kind" : "read", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }" + }, + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] : i0 <= 16 }" + } + ], + "domain" : "[n] -> { Stmt_body[i0] : 0 <= i0 < n }", + "name" : "Stmt_body", + "schedule" : "[n] -> { Stmt_body[i0] -> [i0] }" + } + ] +} Index: test/Simplify/nocoalesce_writebetween.ll =================================================================== --- /dev/null +++ test/Simplify/nocoalesce_writebetween.ll @@ -0,0 +1,38 @@ +; RUN: opt %loadPolly -polly-import-jscop -polly-import-jscop-postfix=transformed -polly-simplify -analyze < %s | FileCheck -match-full-lines %s +; +; Do not combine stores if there is a write between them. +; +; for (int j = 0; j < n; j += 1) { +; A[0] = 42.0; +; A[0] = 21.0; +; A[0] = 42.0; +; } +; +define void @nocoalesce_writebetween(i32 %n, double* noalias nonnull %A) { +entry: + br label %for + +for: + %j = phi i32 [0, %entry], [%j.inc, %inc] + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %body, label %exit + + body: + store double 42.0, double* %A + store double 21.0, double* %A + store double 42.0, double* %A + br label %inc + +inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %for + +exit: + br label %return + +return: + ret void +} + + +; CHECK: SCoP could not be simplified Index: test/Simplify/nocoalesce_writebetween___%for---%return.jscop =================================================================== --- /dev/null +++ test/Simplify/nocoalesce_writebetween___%for---%return.jscop @@ -0,0 +1,32 @@ +{ + "arrays" : [ + { + "name" : "MemRef_A", + "sizes" : [ "*" ], + "type" : "double" + } + ], + "context" : "[n] -> { : -2147483648 <= n <= 2147483647 }", + "name" : "%for---%return", + "statements" : [ + { + "accesses" : [ + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }" + }, + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }" + }, + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }" + } + ], + "domain" : "[n] -> { Stmt_body[i0] : 0 <= i0 < n }", + "name" : "Stmt_body", + "schedule" : "[n] -> { Stmt_body[i0] -> [i0] }" + } + ] +} Index: test/Simplify/nocoalesce_writebetween___%for---%return.jscop.transformed =================================================================== --- /dev/null +++ test/Simplify/nocoalesce_writebetween___%for---%return.jscop.transformed @@ -0,0 +1,32 @@ +{ + "arrays" : [ + { + "name" : "MemRef_A", + "sizes" : [ "*" ], + "type" : "double" + } + ], + "context" : "[n] -> { : -2147483648 <= n <= 2147483647 }", + "name" : "%for---%return", + "statements" : [ + { + "accesses" : [ + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] : i0 < 16 }" + }, + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] : 8 <= i0 < 24}" + }, + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] : i0 >= 16 }" + } + ], + "domain" : "[n] -> { Stmt_body[i0] : 0 <= i0 < n }", + "name" : "Stmt_body", + "schedule" : "[n] -> { Stmt_body[i0] -> [i0] }" + } + ] +}