Index: lib/Transform/ScheduleOptimizer.cpp =================================================================== --- lib/Transform/ScheduleOptimizer.cpp +++ lib/Transform/ScheduleOptimizer.cpp @@ -619,8 +619,8 @@ .equate(isl::dim::in, SecondDims[i], isl::dim::out, 1); AccMap = AccMap.intersect_domain(Domain).intersect_params(Context); - PossibleMatMul = PossibleMatMul.intersect_domain(Domain).intersect_params(Context); - + PossibleMatMul = + PossibleMatMul.intersect_domain(Domain).intersect_params(Context); // If AccMap spans entire domain (Non-partial write), // compute FirstPos and SecondPos. @@ -1156,7 +1156,6 @@ ExtMap = ExtMap.intersect_range(Domain); ExtMap = ExtMap.set_tuple_id(isl::dim::out, NewStmt->getDomainId()); Node = createExtensionNode(Node, ExtMap); - return Node.child(0).child(0).child(0).child(0).child(0); } Index: test/ScheduleOptimizer/pattern-matching-based-opts_15.ll =================================================================== --- /dev/null +++ test/ScheduleOptimizer/pattern-matching-based-opts_15.ll @@ -0,0 +1,126 @@ +; RUN: opt %loadPolly -polly-opt-isl -polly-pattern-matching-based-opts=false \ +; RUN: -debug < %s 2>&1| FileCheck %s +; RUN: opt %loadPolly -polly-opt-isl -polly-pattern-matching-based-opts=true -debug < %s 2>&1| FileCheck %s --check-prefix=PATTERN-MATCHING-OPTS +; RUN: opt %loadPolly -polly-opt-isl -polly-pattern-matching-based-opts=true -stats -disable-output < %s 2>&1| FileCheck %s --check-prefix=STATS -match-full-lines +; REQUIRES: asserts +; +; /* C := alpha*A*B + beta*C */ +; void kernel_gemm(int _PB_NI, int _PB_NJ, int _PB_NK, double alpha, double beta, +; double *A, double *B, double *C) { +; for (int i = 0; i < _PB_NI; i++) +; for (int j = 0; j < _PB_NJ; j++) { +; C[i * _PB_NJ + j] *= beta; +; for (int k = 0; k < _PB_NK; ++k) +; C[i * _PB_NJ + j] += alpha * A[i * _PB_NK + k] * B[k * _PB_NJ + j]; +; } +; } +; +; CHECK-NOT: The matrix multiplication pattern was detected +; PATTERN-MATCHING-OPTS: The matrix multiplication pattern was detected +; STATS: 1 polly-opt-isl - Number of matrix multiplication patterns detected and optimized +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-unknown" + +define dso_local void @kernel_gemm(i32 %_PB_NI, i32 %_PB_NJ, i32 %_PB_NK, double %alpha, double %beta, double* %A, double* %B, double* %C) { +entry: + %tmp = sext i32 %_PB_NK to i64 + %tmp14 = sext i32 %_PB_NJ to i64 + %tmp15 = sext i32 %_PB_NJ to i64 + %tmp16 = sext i32 %_PB_NI to i64 + %tmp17 = sext i32 %_PB_NJ to i64 + %tmp18 = sext i32 %_PB_NK to i64 + %tmp19 = sext i32 %_PB_NJ to i64 + br label %for.cond + +for.cond: ; preds = %for.inc28, %entry + %indvars.iv8 = phi i64 [ %indvars.iv.next9, %for.inc28 ], [ 0, %entry ] + %cmp = icmp slt i64 %indvars.iv8, %tmp16 + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + br label %for.end30 + +for.body: ; preds = %for.cond + br label %for.cond1 + +for.cond1: ; preds = %for.inc25, %for.body + %indvars.iv4 = phi i64 [ %indvars.iv.next5, %for.inc25 ], [ 0, %for.body ] + %cmp2 = icmp slt i64 %indvars.iv4, %tmp15 + br i1 %cmp2, label %for.body4, label %for.cond.cleanup3 + +for.cond.cleanup3: ; preds = %for.cond1 + br label %for.end27 + +for.body4: ; preds = %for.cond1 + %tmp20 = mul nsw i64 %indvars.iv8, %tmp17 + %tmp21 = add nsw i64 %tmp20, %indvars.iv4 + %arrayidx = getelementptr inbounds double, double* %C, i64 %tmp21 + %tmp22 = load double, double* %arrayidx, align 8, !tbaa !2 + %mul5 = fmul double %tmp22, %beta + store double %mul5, double* %arrayidx, align 8, !tbaa !2 + br label %for.cond6 + +for.cond6: ; preds = %for.inc, %for.body4 + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body4 ] + %cmp7 = icmp slt i64 %indvars.iv, %tmp + br i1 %cmp7, label %for.body9, label %for.cond.cleanup8 + +for.cond.cleanup8: ; preds = %for.cond6 + br label %for.end + +for.body9: ; preds = %for.cond6 + %tmp23 = mul nsw i64 %indvars.iv8, %tmp18 + %tmp24 = add nsw i64 %tmp23, %indvars.iv + %arrayidx13 = getelementptr inbounds double, double* %A, i64 %tmp24 + %tmp25 = load double, double* %arrayidx13, align 8, !tbaa !2 + %mul14 = fmul double %tmp25, %alpha + %tmp26 = mul nsw i64 %indvars.iv, %tmp14 + %tmp27 = add nsw i64 %tmp26, %indvars.iv4 + %arrayidx18 = getelementptr inbounds double, double* %B, i64 %tmp27 + %tmp28 = load double, double* %arrayidx18, align 8, !tbaa !2 + %mul19 = fmul double %mul14, %tmp28 + %tmp29 = mul nsw i64 %indvars.iv8, %tmp19 + %tmp30 = add nsw i64 %tmp29, %indvars.iv4 + %arrayidx23 = getelementptr inbounds double, double* %C, i64 %tmp30 + %tmp31 = load double, double* %arrayidx23, align 8, !tbaa !2 + %add24 = fadd double %tmp31, %mul19 + store double %add24, double* %arrayidx23, align 8, !tbaa !2 + br label %for.inc + +for.inc: ; preds = %for.body9 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond6 + +for.end: ; preds = %for.cond.cleanup8 + br label %for.inc25 + +for.inc25: ; preds = %for.end + %indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1 + br label %for.cond1 + +for.end27: ; preds = %for.cond.cleanup3 + br label %for.inc28 + +for.inc28: ; preds = %for.end27 + %indvars.iv.next9 = add nuw nsw i64 %indvars.iv8, 1 + br label %for.cond + +for.end30: ; preds = %for.cond.cleanup + ret void +} + +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) + +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) + + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 7.0.0 (git@github.com:llvm-mirror/clang.git e845f691846783e8e30d3131c8f8a08311c406e8) (git@github.com:llvm-mirror/llvm.git 1eba8752d75ed6cc7461e49ca3bda0b9ac4d66e3)"} +!2 = !{!3, !3, i64 0} +!3 = !{!"double", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C/C++ TBAA"}