Index: polly/trunk/lib/Transform/Simplify.cpp =================================================================== --- polly/trunk/lib/Transform/Simplify.cpp +++ polly/trunk/lib/Transform/Simplify.cpp @@ -33,6 +33,7 @@ "there is another store between them"); STATISTIC(TotalIdenticalWritesRemoved, "Number of double writes removed in any SCoP"); +STATISTIC(TotalOverwritesRemoved, "Number of removed overwritten writes"); STATISTIC(TotalRedundantWritesRemoved, "Number of writes of same value removed in any SCoP"); STATISTIC(TotalStmtsRemoved, "Number of statements removed in any SCoP"); @@ -63,6 +64,43 @@ return WA->getAccessInstruction(); } +static bool isImplicitRead(MemoryAccess *MA) { + return MA->isRead() && MA->isOriginalScalarKind(); +} + +static bool isExplicitAccess(MemoryAccess *MA) { + return MA->isOriginalArrayKind(); +} + +static bool isImplicitWrite(MemoryAccess *MA) { + return MA->isWrite() && MA->isOriginalScalarKind(); +} + +/// Return an iterator range that iterates over MemoryAccesses in the order in +/// which they are executed. +/// +/// The order is: +/// - Implicit reads (BlockGenerator::generateScalarLoads) +/// - Explicit reads and writes (BlockGenerator::generateArrayLoad, +/// BlockGenerator::generateArrayStore) +/// - In block statements, the accesses are in order in which their +/// instructions are executed. +/// - In region statements, that order of execution is not predictable at +/// compile-time. +/// - Implicit writes (BlockGenerator::generateScalarStores) +/// The order in which implicit writes are executed relative to each other is +/// undefined. +static auto accessesInOrder(ScopStmt *Stmt) -> decltype(concat( + make_filter_range(make_range(Stmt->begin(), Stmt->end()), isImplicitRead), + make_filter_range(make_range(Stmt->begin(), Stmt->end()), isExplicitAccess), + make_filter_range(make_range(Stmt->begin(), Stmt->end()), + isImplicitWrite))) { + auto AllRange = make_range(Stmt->begin(), Stmt->end()); + return concat(make_filter_range(AllRange, isImplicitRead), + make_filter_range(AllRange, isExplicitAccess), + make_filter_range(AllRange, isImplicitWrite)); +} + class Simplify : public ScopPass { private: /// The last/current SCoP that is/has been processed. @@ -71,6 +109,9 @@ /// Number of double writes removed from this SCoP. int IdenticalWritesRemoved = 0; + /// Number of writes that are overwritten anyway. + int OverwritesRemoved = 0; + /// Number of redundant writes removed from this SCoP. int RedundantWritesRemoved = 0; @@ -79,8 +120,8 @@ /// Return whether at least one simplification has been applied. bool isModified() const { - return IdenticalWritesRemoved > 0 || RedundantWritesRemoved > 0 || - StmtsRemoved > 0; + return IdenticalWritesRemoved > 0 || OverwritesRemoved > 0 || + RedundantWritesRemoved > 0 || StmtsRemoved > 0; } MemoryAccess *getReadAccessForValue(ScopStmt *Stmt, llvm::Value *Val) { @@ -223,6 +264,61 @@ } } + /// Remove writes that are overwritten unconditionally later in the same + /// statement. + /// + /// There must be no read of the same value between the write (that is to be + /// removed) and the overwrite. + void removeOverwrites() { + for (auto &Stmt : *S) { + auto Domain = give(Stmt.getDomain()); + isl::union_map WillBeOverwritten = + isl::union_map::empty(give(S->getParamSpace())); + + auto AccRange = accessesInOrder(&Stmt); + SmallVector Accesses{AccRange.begin(), + AccRange.end()}; + + // Iterate in reverse order, so the overwrite comes before the write that + // is to be removed. + for (auto *MA : reverse(Accesses)) { + + // In region statements, the explicit accesses can be in blocks that are + // can be executed in any order. We therefore process only the implicit + // writes and stop after that. + if (Stmt.isRegionStmt() && isExplicitAccess(MA)) + break; + + auto AccRel = give(MA->getAccessRelation()); + AccRel = AccRel.intersect_domain(Domain); + AccRel = AccRel.intersect_params(give(S->getContext())); + + // If a value is read in-between, do not consider it as overwritten. + if (MA->isRead()) { + WillBeOverwritten = WillBeOverwritten.subtract(AccRel); + continue; + } + + // If all of a write's elements are overwritten, remove it. + isl::union_map AccRelUnion = AccRel; + if (isl_union_map_is_subset(AccRelUnion.keep(), + WillBeOverwritten.keep()) == + isl_bool_true) { + DEBUG(dbgs() << "Removing " << MA + << " which will be overwritten anyway\n"); + + Stmt.removeSingleMemoryAccess(MA); + OverwritesRemoved++; + TotalOverwritesRemoved++; + } + + // Unconditional writes overwrite other values. + if (MA->isMustWrite()) + WillBeOverwritten = WillBeOverwritten.add_map(AccRel); + } + } + } + /// Remove writes that just write the same value already stored in the /// element. void removeRedundantWrites() { @@ -314,6 +410,8 @@ OS.indent(Indent) << "Statistics {\n"; OS.indent(Indent + 4) << "Identical writes removed: " << IdenticalWritesRemoved << '\n'; + OS.indent(Indent + 4) << "Overwrites removed: " << OverwritesRemoved + << '\n'; OS.indent(Indent + 4) << "Redundant writes removed: " << RedundantWritesRemoved << "\n"; OS.indent(Indent + 4) << "Stmts removed: " << StmtsRemoved << "\n"; @@ -351,6 +449,9 @@ DEBUG(dbgs() << "Removing identical writes...\n"); removeIdenticalWrites(); + DEBUG(dbgs() << "Removing overwrites...\n"); + removeOverwrites(); + DEBUG(dbgs() << "Removing redundant writes...\n"); removeRedundantWrites(); @@ -381,6 +482,7 @@ S = nullptr; IdenticalWritesRemoved = 0; + OverwritesRemoved = 0; RedundantWritesRemoved = 0; StmtsRemoved = 0; } Index: polly/trunk/test/Simplify/overwritten.ll =================================================================== --- polly/trunk/test/Simplify/overwritten.ll +++ polly/trunk/test/Simplify/overwritten.ll @@ -0,0 +1,44 @@ +; RUN: opt %loadPolly -polly-simplify -analyze < %s | FileCheck -match-full-lines %s +; +; Remove a store that is overwritten by another store in the same statement. +; +; for (int j = 0; j < n; j += 1) { +; A[0] = 21.0; +; A[0] = 42.0; +; } +; +define void @overwritten(i32 %n, double* noalias nonnull %A) { +entry: + br label %for + +for: + %j = phi i32 [0, %entry], [%j.inc, %inc] + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %body, label %exit + + body: + store double 21.0, double* %A + store double 42.0, double* %A + br label %inc + +inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %for + +exit: + br label %return + +return: + ret void +} + + +; CHECK: Statistics { +; CHECK: Overwrites removed: 1 +; CHECK: } + +; CHECK: After accesses { +; CHECK-NEXT: Stmt_body +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_body[i0] -> MemRef_A[0] }; +; CHECK-NEXT: } Index: polly/trunk/test/Simplify/overwritten_3store.ll =================================================================== --- polly/trunk/test/Simplify/overwritten_3store.ll +++ polly/trunk/test/Simplify/overwritten_3store.ll @@ -0,0 +1,47 @@ +; RUN: opt %loadPolly -polly-simplify -analyze < %s | FileCheck -match-full-lines %s +; +; Remove a store that is overwritten by another store in the same statement. +; Check that even multiple stores are removed. +; +; for (int j = 0; j < n; j += 1) { +; A[0] = 10.5; +; A[0] = 21.0; +; A[0] = 42.0; +; } +; +define void @overwritten_3store(i32 %n, double* noalias nonnull %A) { +entry: + br label %for + +for: + %j = phi i32 [0, %entry], [%j.inc, %inc] + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %body, label %exit + + body: + store double 10.5, double* %A + store double 21.0, double* %A + store double 42.0, double* %A + br label %inc + +inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %for + +exit: + br label %return + +return: + ret void +} + + +; CHECK: Statistics { +; CHECK: Overwrites removed: 2 +; CHECK: } + +; CHECK: After accesses { +; CHECK-NEXT: Stmt_body +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [n] -> { Stmt_body[i0] -> MemRef_A[0] }; +; CHECK-NEXT: } Index: polly/trunk/test/Simplify/overwritten_implicit_and_explicit.ll =================================================================== --- polly/trunk/test/Simplify/overwritten_implicit_and_explicit.ll +++ polly/trunk/test/Simplify/overwritten_implicit_and_explicit.ll @@ -0,0 +1,53 @@ +; RUN: opt %loadPolly -polly-import-jscop -polly-import-jscop-dir=%S -polly-import-jscop-postfix=transformed -polly-simplify -analyze < %s | FileCheck -match-full-lines %s +; +; Remove a store that is overwritten by another store in the same statement. +; Check that this works even if one of the writes is a scalar MemoryKind. +; +; for (int j = 0; j < n; j += 1) { +; body: +; val = 21.0 + 21.0; +; A[1] = val; +; +; user: +; A[0] = val; +; } +; +define void @overwritten_implicit_and_explicit(i32 %n, double* noalias nonnull %A, double* noalias nonnull %C) { +entry: + br label %for + +for: + %j = phi i32 [0, %entry], [%j.inc, %inc] + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %body, label %exit + + body: + %val = fadd double 21.0, 21.0 + store double %val, double* %A + br label %user + + user: + store double %val, double* %C + br label %inc + +inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %for + +exit: + br label %return + +return: + ret void +} + + +; CHECK: Statistics { +; CHECK: Overwrites removed: 1 +; CHECK: } + +; CHECK: Stmt_body +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK-NEXT: [n] -> { Stmt_body[i0] -> MemRef_val[] }; +; CHECK-NEXT: new: [n] -> { Stmt_body[i0] -> MemRef_A[0] }; +; CHECK-NEXT: Stmt_user Index: polly/trunk/test/Simplify/overwritten_implicit_and_explicit___%for---%return.jscop =================================================================== --- polly/trunk/test/Simplify/overwritten_implicit_and_explicit___%for---%return.jscop +++ polly/trunk/test/Simplify/overwritten_implicit_and_explicit___%for---%return.jscop @@ -0,0 +1,48 @@ +{ + "arrays" : [ + { + "name" : "MemRef_A", + "sizes" : [ "*" ], + "type" : "double" + }, + { + "name" : "MemRef_C", + "sizes" : [ "*" ], + "type" : "double" + } + ], + "context" : "[n] -> { : -2147483648 <= n <= 2147483647 }", + "name" : "%for---%return", + "statements" : [ + { + "accesses" : [ + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }" + }, + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_val[] }" + } + ], + "domain" : "[n] -> { Stmt_body[i0] : 0 <= i0 < n }", + "name" : "Stmt_body", + "schedule" : "[n] -> { Stmt_body[i0] -> [i0, 0] }" + }, + { + "accesses" : [ + { + "kind" : "write", + "relation" : "[n] -> { Stmt_user[i0] -> MemRef_C[0] }" + }, + { + "kind" : "read", + "relation" : "[n] -> { Stmt_user[i0] -> MemRef_val[] }" + } + ], + "domain" : "[n] -> { Stmt_user[i0] : 0 <= i0 < n }", + "name" : "Stmt_user", + "schedule" : "[n] -> { Stmt_user[i0] -> [i0, 1] }" + } + ] +} Index: polly/trunk/test/Simplify/overwritten_implicit_and_explicit___%for---%return.jscop.transformed =================================================================== --- polly/trunk/test/Simplify/overwritten_implicit_and_explicit___%for---%return.jscop.transformed +++ polly/trunk/test/Simplify/overwritten_implicit_and_explicit___%for---%return.jscop.transformed @@ -0,0 +1,48 @@ +{ + "arrays" : [ + { + "name" : "MemRef_A", + "sizes" : [ "*" ], + "type" : "double" + }, + { + "name" : "MemRef_C", + "sizes" : [ "*" ], + "type" : "double" + } + ], + "context" : "[n] -> { : -2147483648 <= n <= 2147483647 }", + "name" : "%for---%return", + "statements" : [ + { + "accesses" : [ + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }" + }, + { + "kind" : "write", + "relation" : "[n] -> { Stmt_body[i0] -> MemRef_A[0] }" + } + ], + "domain" : "[n] -> { Stmt_body[i0] : 0 <= i0 < n }", + "name" : "Stmt_body", + "schedule" : "[n] -> { Stmt_body[i0] -> [i0, 0] }" + }, + { + "accesses" : [ + { + "kind" : "write", + "relation" : "[n] -> { Stmt_user[i0] -> MemRef_C[0] }" + }, + { + "kind" : "read", + "relation" : "[n] -> { Stmt_user[i0] -> MemRef_val[] }" + } + ], + "domain" : "[n] -> { Stmt_user[i0] : 0 <= i0 < n }", + "name" : "Stmt_user", + "schedule" : "[n] -> { Stmt_user[i0] -> [i0, 1] }" + } + ] +} Index: polly/trunk/test/Simplify/overwritten_loadbetween.ll =================================================================== --- polly/trunk/test/Simplify/overwritten_loadbetween.ll +++ polly/trunk/test/Simplify/overwritten_loadbetween.ll @@ -0,0 +1,46 @@ +; RUN: opt %loadPolly -polly-simplify -analyze < %s | FileCheck -match-full-lines %s +; +; Do not remove overwrites when the value is read before. +; +; for (int j = 0; j < n; j += 1) { +;body: +; A[0] = 21.0; +; val = A[0]; +; A[0] = 42.0; +; +;user: +; B[0] = val; +; } +; +define void @overwritten(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B) { +entry: + br label %for + +for: + %j = phi i32 [0, %entry], [%j.inc, %inc] + %j.cmp = icmp slt i32 %j, %n + br i1 %j.cmp, label %body, label %exit + + body: + store double 21.0, double* %A + %val = load double, double* %A + store double 42.0, double* %A + br label %user + + user: + store double %val, double* %B + br label %inc + +inc: + %j.inc = add nuw nsw i32 %j, 1 + br label %for + +exit: + br label %return + +return: + ret void +} + + +; CHECK: SCoP could not be simplified