Index: polly/trunk/lib/Transform/ScheduleOptimizer.cpp =================================================================== --- polly/trunk/lib/Transform/ScheduleOptimizer.cpp +++ polly/trunk/lib/Transform/ScheduleOptimizer.cpp @@ -688,9 +688,9 @@ /// false, otherwise. static bool isMatMulNonScalarReadAccess(MemoryAccess *MemAccess, MatMulInfoTy &MMI) { - if (!MemAccess->isArrayKind() || !MemAccess->isRead()) + if (!MemAccess->isLatestArrayKind() || !MemAccess->isRead()) return false; - isl_map *AccMap = MemAccess->getAccessRelation(); + isl_map *AccMap = MemAccess->getLatestAccessRelation(); if (isMatMulOperandAcc(AccMap, MMI.i, MMI.j) && !MMI.ReadFromC && isl_map_n_basic_map(AccMap) == 1) { MMI.ReadFromC = MemAccess; @@ -743,7 +743,7 @@ MMI.k, OutDimNum - 1); for (auto *MemA = Stmt->begin(); MemA != Stmt->end() - 1; MemA++) { auto *MemAccessPtr = *MemA; - if (MemAccessPtr->isArrayKind() && MemAccessPtr != MMI.WriteToC && + if (MemAccessPtr->isLatestArrayKind() && MemAccessPtr != MMI.WriteToC && !isMatMulNonScalarReadAccess(MemAccessPtr, MMI) && !(MemAccessPtr->isStrideZero(isl_map_copy(MapI)) && MemAccessPtr->isStrideZero(isl_map_copy(MapJ)) && @@ -835,11 +835,11 @@ return false; for (auto *MemA = Stmt->end() - 1; MemA != Stmt->begin(); MemA--) { auto *MemAccessPtr = *MemA; - if (!MemAccessPtr->isArrayKind()) + if (!MemAccessPtr->isLatestArrayKind()) continue; if (!MemAccessPtr->isWrite()) return false; - auto *AccMap = MemAccessPtr->getAccessRelation(); + auto *AccMap = MemAccessPtr->getLatestAccessRelation(); if (isl_map_n_basic_map(AccMap) != 1 || !isMatMulOperandAcc(AccMap, MMI.i, MMI.j)) { isl_map_free(AccMap); @@ -1132,7 +1132,7 @@ MMI.B->getElementType(), "Packed_B", {FirstDimSize, SecondDimSize, ThirdDimSize}); AccRel = isl_map_set_tuple_id(AccRel, isl_dim_out, SAI->getBasePtrId()); - auto *OldAcc = MMI.B->getAccessRelation(); + auto *OldAcc = MMI.B->getLatestAccessRelation(); MMI.B->setNewAccessRelation(AccRel); auto *ExtMap = isl_map_project_out(isl_map_copy(MapOldIndVar), isl_dim_out, 2, @@ -1145,7 +1145,7 @@ // originating statement is executed. auto *DomainId = isl_set_get_tuple_id(Domain); auto *NewStmt = Stmt->getParent()->addScopStmt( - OldAcc, MMI.B->getAccessRelation(), isl_set_copy(Domain)); + OldAcc, MMI.B->getLatestAccessRelation(), isl_set_copy(Domain)); ExtMap = isl_map_set_tuple_id(ExtMap, isl_dim_out, isl_id_copy(DomainId)); ExtMap = isl_map_intersect_range(ExtMap, isl_set_copy(Domain)); ExtMap = isl_map_set_tuple_id(ExtMap, isl_dim_out, NewStmt->getDomainId()); @@ -1161,14 +1161,14 @@ MMI.A->getElementType(), "Packed_A", {FirstDimSize, SecondDimSize, ThirdDimSize}); AccRel = isl_map_set_tuple_id(AccRel, isl_dim_out, SAI->getBasePtrId()); - OldAcc = MMI.A->getAccessRelation(); + OldAcc = MMI.A->getLatestAccessRelation(); MMI.A->setNewAccessRelation(AccRel); ExtMap = isl_map_project_out(MapOldIndVar, isl_dim_out, 3, isl_map_dim(MapOldIndVar, isl_dim_out) - 3); ExtMap = isl_map_reverse(ExtMap); ExtMap = isl_map_fix_si(ExtMap, isl_dim_out, MMI.j, 0); - NewStmt = Stmt->getParent()->addScopStmt(OldAcc, MMI.A->getAccessRelation(), - isl_set_copy(Domain)); + NewStmt = Stmt->getParent()->addScopStmt( + OldAcc, MMI.A->getLatestAccessRelation(), isl_set_copy(Domain)); // Restrict the domains of the copy statements to only execute when also its // originating statement is executed. Index: polly/trunk/test/ScheduleOptimizer/kernel_gemm___%for.cond1.preheader---%for.end18.jscop.transformed =================================================================== --- polly/trunk/test/ScheduleOptimizer/kernel_gemm___%for.cond1.preheader---%for.end18.jscop.transformed +++ polly/trunk/test/ScheduleOptimizer/kernel_gemm___%for.cond1.preheader---%for.end18.jscop.transformed @@ -0,0 +1,46 @@ +{ + "arrays" : [ + { + "name" : "MemRef_B", + "sizes" : [ "*", "1024" ], + "type" : "double" + }, + { + "name" : "MemRef_C", + "sizes" : [ "*", "1024" ], + "type" : "double" + }, + { + "name" : "New_MemRef_A", + "sizes" : [ "1024", "1024" ], + "type" : "double" + } + ], + "context" : "{ : }", + "name" : "%for.cond1.preheader---%for.end18", + "statements" : [ + { + "accesses" : [ + { + "kind" : "read", + "relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_B[i2, i1] }" + }, + { + "kind" : "read", + "relation" : "{ Stmt_for_body6[i0, i1, i2] -> New_MemRef_A[i0, i2] }" + }, + { + "kind" : "read", + "relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_C[i0, i1] }" + }, + { + "kind" : "write", + "relation" : "{ Stmt_for_body6[i0, i1, i2] -> MemRef_C[i0, i1] }" + } + ], + "domain" : "{ Stmt_for_body6[i0, i1, i2] : 0 <= i0 <= 1023 and 0 <= i1 <= 1023 and 0 <= i2 <= 1023 }", + "name" : "Stmt_for_body6", + "schedule" : "{ Stmt_for_body6[i0, i1, i2] -> [i0, i1, i2] }" + } + ] +} Index: polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_11.ll =================================================================== --- polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_11.ll +++ polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_11.ll @@ -0,0 +1,52 @@ +; RUN: opt %loadPolly -polly-import-jscop \ +; RUN: -polly-import-jscop-postfix=transformed -polly -polly-delicm \ +; RUN: -polly-delicm-overapproximate-writes -polly-pattern-matching-based-opts \ +; RUN: -polly-opt-isl -debug < %s 2>&1 | FileCheck %s +; +; Check that the pattern matching detects the matrix multiplication pattern +; in case scalar memory accesses were replaced by accesses to newly created +; arrays. +; +; CHECK: The matrix multiplication pattern was detected +; +define void @kernel_gemm(i32 %ni, i32 %nj, i32 %nk, double %A, [1024 x double]* %B, [1024 x double]* %C) { +entry: + br label %entry.split + +entry.split: ; preds = %entry + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.inc16, %entry.split + %indvars.iv35 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next36, %for.inc16 ] + br label %for.cond4.preheader + +for.cond4.preheader: ; preds = %for.inc13, %for.cond1.preheader + %indvars.iv32 = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next33, %for.inc13 ] + br label %for.body6 + +for.body6: ; preds = %for.body6, %for.cond4.preheader + %indvars.iv = phi i64 [ 0, %for.cond4.preheader ], [ %indvars.iv.next, %for.body6 ] + %arrayidx8 = getelementptr inbounds [1024 x double], [1024 x double]* %B, i64 %indvars.iv, i64 %indvars.iv32 + %tmp = load double, double* %arrayidx8, align 8 + %mul = fmul double %tmp, %A + %arrayidx12 = getelementptr inbounds [1024 x double], [1024 x double]* %C, i64 %indvars.iv35, i64 %indvars.iv32 + %tmp1 = load double, double* %arrayidx12, align 8 + %add = fadd double %tmp1, %mul + store double %add, double* %arrayidx12, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.body6, label %for.inc13 + +for.inc13: ; preds = %for.body6 + %indvars.iv.next33 = add nuw nsw i64 %indvars.iv32, 1 + %exitcond34 = icmp ne i64 %indvars.iv.next33, 1024 + br i1 %exitcond34, label %for.cond4.preheader, label %for.inc16 + +for.inc16: ; preds = %for.inc13 + %indvars.iv.next36 = add nuw nsw i64 %indvars.iv35, 1 + %exitcond37 = icmp ne i64 %indvars.iv.next36, 1024 + br i1 %exitcond37, label %for.cond1.preheader, label %for.end18 + +for.end18: ; preds = %for.inc16 + ret void +}