Index: polly/trunk/include/polly/ScheduleOptimizer.h =================================================================== --- polly/trunk/include/polly/ScheduleOptimizer.h +++ polly/trunk/include/polly/ScheduleOptimizer.h @@ -179,7 +179,7 @@ /// the outer loop). /// 4. all memory accesses of the statement except from the last one, are /// read memory access and the last one is write memory access. - /// 5. all subscripts of the last memory access of the statement don’t + /// 5. all subscripts of the last memory access of the statement don't /// contain the variable used in the inner loop. /// If this is the case, we could try to use an approach that is similar to /// the one used to get close-to-peak performance of matrix multiplications. Index: polly/trunk/lib/Transform/ScheduleOptimizer.cpp =================================================================== --- polly/trunk/lib/Transform/ScheduleOptimizer.cpp +++ polly/trunk/lib/Transform/ScheduleOptimizer.cpp @@ -233,7 +233,7 @@ /// 2. Extend it to a set, which has exactly VectorWidth iterations for /// any prefix from the set that was built on the previous step. /// 3. Subtract loop domain from it, project out the vector loop dimension and -/// get a set of prefixes, which don’t have exactly VectorWidth iterations. +/// get a set of prefixes, which don't have exactly VectorWidth iterations. /// 4. Subtract it from all prefixes of the vector loop and get the desired /// set. /// @@ -431,7 +431,7 @@ /// loop to the outer loop). /// 2. all memory accesses of the statement except from the last one, are /// read memory access and the last one is write memory access. -/// 3. all subscripts of the last memory access of the statement don’t contain +/// 3. all subscripts of the last memory access of the statement don't contain /// the variable used in the inner loop. /// /// @param PartialSchedule The PartialSchedule that contains a SCoP statement @@ -445,14 +445,14 @@ auto MemA = ScpStmt->begin(); for (unsigned i = 0; i < ScpStmt->size() - 2 && MemA != ScpStmt->end(); i++, MemA++) - if (!(*MemA)->isRead() or - ((*MemA)->isArrayKind() and - !((*MemA)->isStrideOne(isl_map_copy(PartialSchedule)) or + if (!(*MemA)->isRead() || + ((*MemA)->isArrayKind() && + !((*MemA)->isStrideOne(isl_map_copy(PartialSchedule)) || (*MemA)->isStrideZero(isl_map_copy(PartialSchedule))))) return false; MemA++; - if (!(*MemA)->isWrite() or !(*MemA)->isArrayKind() or - !((*MemA)->isStrideOne(isl_map_copy(PartialSchedule)) or + if (!(*MemA)->isWrite() || !(*MemA)->isArrayKind() || + !((*MemA)->isStrideOne(isl_map_copy(PartialSchedule)) || (*MemA)->isStrideZero(isl_map_copy(PartialSchedule)))) return false; auto DimNum = isl_map_dim(PartialSchedule, isl_dim_in); Index: polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts.ll =================================================================== --- polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts.ll +++ polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts.ll @@ -1,8 +1,20 @@ ; RUN: opt %loadPolly -polly-opt-isl -debug < %s 2>&1| FileCheck %s ; RUN: opt %loadPolly -polly-opt-isl -polly-pattern-matching-based-opts=true -debug < %s 2>&1| FileCheck %s --check-prefix=PATTERN-MATCHING-OPTS ; REQUIRES: asserts +; +; /* C := alpha*A*B + beta*C */ +; for (i = 0; i < _PB_NI; i++) +; for (j = 0; j < _PB_NJ; j++) +; { +; C[i][j] *= beta; +; for (k = 0; k < _PB_NK; ++k) +; C[i][j] += alpha * A[i][k] * B[k][j]; +; } +; ; CHECK-NOT: The matrix multiplication pattern was detected ; PATTERN-MATCHING-OPTS: The matrix multiplication pattern was detected +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define internal void @kernel_gemm(i32 %arg, i32 %arg1, i32 %arg2, double %arg3, double %arg4, [1056 x double]* %arg5, [1024 x double]* %arg6, [1056 x double]* %arg7) { bb: Index: polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_2.ll =================================================================== --- polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_2.ll +++ polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_2.ll @@ -1,6 +1,22 @@ ; RUN: opt %loadPolly -polly-opt-isl -polly-pattern-matching-based-opts=true -debug < %s 2>&1 | FileCheck %s ; REQUIRES: asserts +; +; /* C := alpha*A*B + beta*C */ +; for (i = 0; i < _PB_NI; i++) +; for (j = 0; j < _PB_NJ; j += 2) +; { +; C[i][j] *= beta; +; for (k = 0; k < _PB_NK; ++k) +; C[i][j] += alpha * A[i][k] * B[k][j]; +; } +; +; Check that we won’t detect the matrix multiplication pattern, +; if, for example, there are memory accesses that have stride 2 +; after the interchanging of loops. +; ; CHECK-NOT: The matrix multiplication pattern was detected +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define internal void @kernel_gemm(i32 %arg, i32 %arg1, i32 %arg2, double %arg3, double %arg4, [1056 x double]* %arg5, [1024 x double]* %arg6, [1056 x double]* %arg7) { bb: