Index: lib/Transform/ScheduleOptimizer.cpp =================================================================== --- lib/Transform/ScheduleOptimizer.cpp +++ lib/Transform/ScheduleOptimizer.cpp @@ -671,12 +671,19 @@ static bool isMatMulOperandAcc(__isl_keep isl_map *AccMap, int &FirstPos, int &SecondPos) { int DimInPos[] = {FirstPos, SecondPos}; - if (isl_map_foreach_basic_map(AccMap, isMatMulOperandBasicMap, + if (isl_map_dim(AccMap, isl_dim_out) != 2) + return false; + auto *NonPartialAccMap = isl_map_drop_constraints_not_involving_dims( + isl_map_copy(AccMap), isl_dim_out, 0, 2); + if (isl_map_foreach_basic_map(NonPartialAccMap, isMatMulOperandBasicMap, static_cast(DimInPos)) != isl_stat_ok || - DimInPos[0] < 0 || DimInPos[1] < 0) + DimInPos[0] < 0 || DimInPos[1] < 0) { + isl_map_free(NonPartialAccMap); return false; + } FirstPos = DimInPos[0]; SecondPos = DimInPos[1]; + isl_map_free(NonPartialAccMap); return true; } @@ -693,7 +700,7 @@ /// false, otherwise. static bool isMatMulNonScalarReadAccess(MemoryAccess *MemAccess, MatMulInfoTy &MMI) { - if (!MemAccess->isArrayKind() || !MemAccess->isRead()) + if (!MemAccess->isLatestArrayKind() || !MemAccess->isRead()) return false; isl_map *AccMap = MemAccess->getAccessRelation(); if (isMatMulOperandAcc(AccMap, MMI.i, MMI.j) && !MMI.ReadFromC && @@ -748,7 +755,7 @@ MMI.k, OutDimNum - 1); for (auto *MemA = Stmt->begin(); MemA != Stmt->end() - 1; MemA++) { auto *MemAccessPtr = *MemA; - if (MemAccessPtr->isArrayKind() && MemAccessPtr != MMI.WriteToC && + if (MemAccessPtr->isLatestArrayKind() && MemAccessPtr != MMI.WriteToC && !isMatMulNonScalarReadAccess(MemAccessPtr, MMI) && !(MemAccessPtr->isStrideZero(isl_map_copy(MapI)) && MemAccessPtr->isStrideZero(isl_map_copy(MapJ)) && @@ -840,7 +847,7 @@ return false; for (auto *MemA = Stmt->end() - 1; MemA != Stmt->begin(); MemA--) { auto *MemAccessPtr = *MemA; - if (!MemAccessPtr->isArrayKind()) + if (!MemAccessPtr->isLatestArrayKind()) continue; if (!MemAccessPtr->isWrite()) return false; Index: test/ScheduleOptimizer/pattern-matching-based-opts_11.ll =================================================================== --- /dev/null +++ test/ScheduleOptimizer/pattern-matching-based-opts_11.ll @@ -0,0 +1,184 @@ +; RUN: opt %loadPolly -polly-invariant-load-hoisting \ +; RUN: -polly-delicm-overapproximate-writes -polly-opt-isl \ +; RUN: -polly-pattern-matching-based-opts=true \ +; RUN: -debug < %s 2>&1| FileCheck %s +; +; TODO: Add the description. +; +; CHECK: The matrix multiplication pattern was detected +; +%"class.boost::numeric::ublas::matrix" = type { i64, i64, %"class.boost::numeric::ublas::unbounded_array" } +%"class.boost::numeric::ublas::unbounded_array" = type { %"class.std::allocator", i64, float* } +%"class.std::allocator" = type { i8 } + +declare i32 @__gxx_personality_v0(...) + +declare void @_ZSt17__throw_bad_allocv() + +declare noalias nonnull i8* @_Znwm(i64) + +define void @_Z4gemmRN5boost7numeric5ublas6matrixIfNS1_15basic_row_majorImlEENS1_15unbounded_arrayIfSaIfEEEEES9_S9_(%"class.boost::numeric::ublas::matrix"* dereferenceable(40) %C, %"class.boost::numeric::ublas::matrix"* dereferenceable(40) %A, %"class.boost::numeric::ublas::matrix"* dereferenceable(40) %B) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + %temporary.i = alloca %"class.boost::numeric::ublas::matrix", align 8 + %tmp2 = bitcast %"class.boost::numeric::ublas::matrix"* %temporary.i to i8* + %size1_.i.i = getelementptr inbounds %"class.boost::numeric::ublas::matrix", %"class.boost::numeric::ublas::matrix"* %temporary.i, i64 0, i32 0 + %size1_.i.i.i.i.i = getelementptr inbounds %"class.boost::numeric::ublas::matrix", %"class.boost::numeric::ublas::matrix"* %A, i64 0, i32 0 + %Asize1 = load i64, i64* %size1_.i.i.i.i.i, align 8 + store i64 %Asize1, i64* %size1_.i.i, align 8 + %size2_.i.i.i.i.i = getelementptr inbounds %"class.boost::numeric::ublas::matrix", %"class.boost::numeric::ublas::matrix"* %B, i64 0, i32 1 + %Bsize2 = load i64, i64* %size2_.i.i.i.i.i, align 8 + %size2_.i.i = getelementptr inbounds %"class.boost::numeric::ublas::matrix", %"class.boost::numeric::ublas::matrix"* %temporary.i, i64 0, i32 1 + store i64 %Bsize2, i64* %size2_.i.i, align 8 + %mul.i.i.i = mul i64 %Bsize2, %Asize1 + %size_.i21.i.i = getelementptr inbounds %"class.boost::numeric::ublas::matrix", %"class.boost::numeric::ublas::matrix"* %temporary.i, i64 0, i32 2, i32 1 + store i64 %mul.i.i.i, i64* %size_.i21.i.i, align 8 + %tobool.i22.i.i = icmp eq i64 %mul.i.i.i, 0 + br i1 %tobool.i22.i.i, label %_ZN5boost7numeric5ublas15unbounded_arrayIfSaIfEEC2EmRKS3_.exit.i.i, label %if.then.i23.i.i + +if.then.i23.i.i: ; preds = %entry + %cmp.i.i.i.i = icmp ugt i64 %mul.i.i.i, 4611686018427387903 + br i1 %cmp.i.i.i.i, label %if.then.i.i.i.i, label %if.end.i.i.i.i + +if.then.i.i.i.i: ; preds = %if.then.i23.i.i + invoke void @_ZSt17__throw_bad_allocv() + to label %.noexc.i.i.i unwind label %lpad.i.i.i + +.noexc.i.i.i: ; preds = %if.then.i.i.i.i + unreachable + +if.end.i.i.i.i: ; preds = %if.then.i23.i.i + %mul.i.i.i.i = shl i64 %mul.i.i.i, 2 + %call2.i8.i.i.i = invoke i8* @_Znwm(i64 %mul.i.i.i.i) + to label %_ZN9__gnu_cxx13new_allocatorIfE8allocateEmPKv.exit.i.i.i unwind label %lpad.i.i.i + +_ZN9__gnu_cxx13new_allocatorIfE8allocateEmPKv.exit.i.i.i: ; preds = %if.end.i.i.i.i + %tmp7 = bitcast i8* %call2.i8.i.i.i to float* + br label %_ZN5boost7numeric5ublas15unbounded_arrayIfSaIfEEC2EmRKS3_.exit.i.i + +lpad.i.i.i: ; preds = %if.end.i.i.i.i, %if.then.i.i.i.i + %tmp8 = landingpad { i8*, i32 } + cleanup + resume { i8*, i32 } %tmp8 + +_ZN5boost7numeric5ublas15unbounded_arrayIfSaIfEEC2EmRKS3_.exit.i.i: ; preds = %_ZN9__gnu_cxx13new_allocatorIfE8allocateEmPKv.exit.i.i.i, %entry + %.sink.i.i.i = phi float* [ null, %entry ], [ %tmp7, %_ZN9__gnu_cxx13new_allocatorIfE8allocateEmPKv.exit.i.i.i ] + %data_5.i.i.i = getelementptr inbounds %"class.boost::numeric::ublas::matrix", %"class.boost::numeric::ublas::matrix"* %temporary.i, i64 0, i32 2, i32 2 + store float* %.sink.i.i.i, float** %data_5.i.i.i, align 8 + %cmp26.i.i.i.i.i = icmp eq i64 %Asize1, 0 + br i1 %cmp26.i.i.i.i.i, label %_ZN5boost7numeric5ublas6matrixIfNS1_15basic_row_majorImlEENS1_15unbounded_arrayIfSaIfEEEEC2INS1_20matrix_matrix_binaryIS8_S8_NS1_18matrix_matrix_prodIS8_S8_fEEEEEERKNS1_17matrix_expressionIT_EE.exit.i, label %for.cond2.preheader.i.i.i.i.i.preheader + +for.cond2.preheader.i.i.i.i.i.preheader: ; preds = %_ZN5boost7numeric5ublas15unbounded_arrayIfSaIfEEC2EmRKS3_.exit.i.i + br label %for.cond2.preheader.i.i.i.i.i + +for.cond2.preheader.i.i.i.i.i: ; preds = %for.cond.cleanup4.i.i.i.i.i, %for.cond2.preheader.i.i.i.i.i.preheader + %i.027.i.i.i.i.i = phi i64 [ %inc10.i.i.i.i.i, %for.cond.cleanup4.i.i.i.i.i ], [ 0, %for.cond2.preheader.i.i.i.i.i.preheader ] + %cmp324.i.i.i.i.i = icmp eq i64 %Bsize2, 0 + br i1 %cmp324.i.i.i.i.i, label %for.cond.cleanup4.i.i.i.i.i, label %for.body5.i.i.i.i.i.preheader + +for.body5.i.i.i.i.i.preheader: ; preds = %for.cond2.preheader.i.i.i.i.i + br label %for.body5.i.i.i.i.i + +for.cond.cleanup4.i.i.i.i.i.loopexit: ; preds = %ExitStmt + br label %for.cond.cleanup4.i.i.i.i.i + +for.cond.cleanup4.i.i.i.i.i: ; preds = %for.cond.cleanup4.i.i.i.i.i.loopexit, %for.cond2.preheader.i.i.i.i.i + %inc10.i.i.i.i.i = add nuw i64 %i.027.i.i.i.i.i, 1 + %exitcond5 = icmp ne i64 %inc10.i.i.i.i.i, %Asize1 + br i1 %exitcond5, label %for.cond2.preheader.i.i.i.i.i, label %_ZN5boost7numeric5ublas6matrixIfNS1_15basic_row_majorImlEENS1_15unbounded_arrayIfSaIfEEEEC2INS1_20matrix_matrix_binaryIS8_S8_NS1_18matrix_matrix_prodIS8_S8_fEEEEEERKNS1_17matrix_expressionIT_EE.exit.i.loopexit + +for.body5.i.i.i.i.i: ; preds = %ExitStmt, %for.body5.i.i.i.i.i.preheader + %j.025.i.i.i.i.i = phi i64 [ %inc.i.i.i.i.i, %ExitStmt ], [ 0, %for.body5.i.i.i.i.i.preheader ] + %tmp11 = load i64, i64* %size2_.i.i, align 8 + %mul.i.i.i.i.i.i.i.i = mul i64 %tmp11, %i.027.i.i.i.i.i + %add.i.i.i.i.i.i.i.i = add i64 %mul.i.i.i.i.i.i.i.i, %j.025.i.i.i.i.i + %tmp12 = load float*, float** %data_5.i.i.i, align 8 + %arrayidx.i.i.i.i.i.i.i.i = getelementptr inbounds float, float* %tmp12, i64 %add.i.i.i.i.i.i.i.i + %size2_.i.i24.i.i.i.i.i.i.i = getelementptr inbounds %"class.boost::numeric::ublas::matrix", %"class.boost::numeric::ublas::matrix"* %A, i64 0, i32 1 + %Asize2 = load i64, i64* %size2_.i.i24.i.i.i.i.i.i.i, align 8 + %cmp14.i.i.i.i.i.i.i = icmp eq i64 %Asize2, 0 + br i1 %cmp14.i.i.i.i.i.i.i, label %ExitStmt, label %KernelStmt.preheader + +KernelStmt.preheader: ; preds = %for.body5.i.i.i.i.i + %data_.i4.i.i22.i.i.i.i.i.i.i.phi.trans.insert = getelementptr inbounds %"class.boost::numeric::ublas::matrix", %"class.boost::numeric::ublas::matrix"* %A, i64 0, i32 2, i32 2 + %tmp15.pre = load float*, float** %data_.i4.i.i22.i.i.i.i.i.i.i.phi.trans.insert, align 8 + %tmp18.pre = load i64, i64* %size2_.i.i.i.i.i, align 8 + %data_.i4.i.i.i.i.i.i.i.i.i.phi.trans.insert = getelementptr inbounds %"class.boost::numeric::ublas::matrix", %"class.boost::numeric::ublas::matrix"* %B, i64 0, i32 2, i32 2 + %tmp19.pre = load float*, float** %data_.i4.i.i.i.i.i.i.i.i.i.phi.trans.insert, align 8 + br label %KernelStmt + +KernelStmt: ; preds = %KernelStmt, %KernelStmt.preheader + %k.016.i.i.i.i.i.i.i = phi i64 [ %inc.i.i.i.i.i.i.i, %KernelStmt ], [ 0, %KernelStmt.preheader ] + %t.015.i.i.i.i.i.i.i = phi float [ %add.i.i.i.i.i.i.i, %KernelStmt ], [ 0.000000e+00, %KernelStmt.preheader ] + %mul.i.i.i20.i.i.i.i.i.i.i = mul i64 %Asize2, %i.027.i.i.i.i.i + %add.i.i.i21.i.i.i.i.i.i.i = add i64 %mul.i.i.i20.i.i.i.i.i.i.i, %k.016.i.i.i.i.i.i.i + %arrayidx.i.i.i23.i.i.i.i.i.i.i = getelementptr inbounds float, float* %tmp15.pre, i64 %add.i.i.i21.i.i.i.i.i.i.i + %tmp16 = load float, float* %arrayidx.i.i.i23.i.i.i.i.i.i.i, align 4 + %mul.i.i.i.i.i.i.i.i.i.i = mul i64 %tmp18.pre, %k.016.i.i.i.i.i.i.i + %add.i.i.i.i.i.i.i.i.i.i = add i64 %mul.i.i.i.i.i.i.i.i.i.i, %j.025.i.i.i.i.i + %arrayidx.i.i.i.i.i.i.i.i.i.i = getelementptr inbounds float, float* %tmp19.pre, i64 %add.i.i.i.i.i.i.i.i.i.i + %tmp20 = load float, float* %arrayidx.i.i.i.i.i.i.i.i.i.i, align 4 + %mul.i.i.i.i.i.i.i = fmul float %tmp16, %tmp20 + %add.i.i.i.i.i.i.i = fadd float %t.015.i.i.i.i.i.i.i, %mul.i.i.i.i.i.i.i + %inc.i.i.i.i.i.i.i = add nuw i64 %k.016.i.i.i.i.i.i.i, 1 + %exitcond = icmp ne i64 %inc.i.i.i.i.i.i.i, %Asize2 + br i1 %exitcond, label %KernelStmt, label %LoopexitStmt + +LoopexitStmt: ; preds = %KernelStmt + br label %ExitStmt + +ExitStmt: ; preds = %LoopexitStmt, %for.body5.i.i.i.i.i + %t.0.lcssa.i.i.i.i.i.i.i = phi float [ 0.000000e+00, %for.body5.i.i.i.i.i ], [ %add.i.i.i.i.i.i.i, %LoopexitStmt ] + store float %t.0.lcssa.i.i.i.i.i.i.i, float* %arrayidx.i.i.i.i.i.i.i.i, align 4 + %inc.i.i.i.i.i = add nuw i64 %j.025.i.i.i.i.i, 1 + %exitcond4 = icmp ne i64 %inc.i.i.i.i.i, %Bsize2 + br i1 %exitcond4, label %for.body5.i.i.i.i.i, label %for.cond.cleanup4.i.i.i.i.i.loopexit + +_ZN5boost7numeric5ublas6matrixIfNS1_15basic_row_majorImlEENS1_15unbounded_arrayIfSaIfEEEEC2INS1_20matrix_matrix_binaryIS8_S8_NS1_18matrix_matrix_prodIS8_S8_fEEEEEERKNS1_17matrix_expressionIT_EE.exit.i.loopexit: ; preds = %for.cond.cleanup4.i.i.i.i.i + br label %_ZN5boost7numeric5ublas6matrixIfNS1_15basic_row_majorImlEENS1_15unbounded_arrayIfSaIfEEEEC2INS1_20matrix_matrix_binaryIS8_S8_NS1_18matrix_matrix_prodIS8_S8_fEEEEEERKNS1_17matrix_expressionIT_EE.exit.i + +_ZN5boost7numeric5ublas6matrixIfNS1_15basic_row_majorImlEENS1_15unbounded_arrayIfSaIfEEEEC2INS1_20matrix_matrix_binaryIS8_S8_NS1_18matrix_matrix_prodIS8_S8_fEEEEEERKNS1_17matrix_expressionIT_EE.exit.i: ; preds = %_ZN5boost7numeric5ublas6matrixIfNS1_15basic_row_majorImlEENS1_15unbounded_arrayIfSaIfEEEEC2INS1_20matrix_matrix_binaryIS8_S8_NS1_18matrix_matrix_prodIS8_S8_fEEEEEERKNS1_17matrix_expressionIT_EE.exit.i.loopexit, %_ZN5boost7numeric5ublas15unbounded_arrayIfSaIfEEC2EmRKS3_.exit.i.i + %cmp.i.i.i = icmp eq %"class.boost::numeric::ublas::matrix"* %temporary.i, %C + br i1 %cmp.i.i.i, label %_ZN5boost7numeric5ublas6matrixIfNS1_15basic_row_majorImlEENS1_15unbounded_arrayIfSaIfEEEEC2INS1_20matrix_matrix_binaryIS8_S8_NS1_18matrix_matrix_prodIS8_S8_fEEEEEERKNS1_17matrix_expressionIT_EE.exit.i.invoke.cont.i_crit_edge, label %if.then.i.i.i + +_ZN5boost7numeric5ublas6matrixIfNS1_15basic_row_majorImlEENS1_15unbounded_arrayIfSaIfEEEEC2INS1_20matrix_matrix_binaryIS8_S8_NS1_18matrix_matrix_prodIS8_S8_fEEEEEERKNS1_17matrix_expressionIT_EE.exit.i.invoke.cont.i_crit_edge: ; preds = %_ZN5boost7numeric5ublas6matrixIfNS1_15basic_row_majorImlEENS1_15unbounded_arrayIfSaIfEEEEC2INS1_20matrix_matrix_binaryIS8_S8_NS1_18matrix_matrix_prodIS8_S8_fEEEEEERKNS1_17matrix_expressionIT_EE.exit.i + %Asize13.pre = load i64, i64* %size_.i21.i.i, align 8 + br label %invoke.cont.i + +if.then.i.i.i: ; preds = %_ZN5boost7numeric5ublas6matrixIfNS1_15basic_row_majorImlEENS1_15unbounded_arrayIfSaIfEEEEC2INS1_20matrix_matrix_binaryIS8_S8_NS1_18matrix_matrix_prodIS8_S8_fEEEEEERKNS1_17matrix_expressionIT_EE.exit.i + %size1_.i.i.i = getelementptr inbounds %"class.boost::numeric::ublas::matrix", %"class.boost::numeric::ublas::matrix"* %C, i64 0, i32 0 + %tmp21 = load i64, i64* %size1_.i.i.i, align 8 + %tmp22 = load i64, i64* %size1_.i.i, align 8 + store i64 %tmp22, i64* %size1_.i.i.i, align 8 + store i64 %tmp21, i64* %size1_.i.i, align 8 + %size2_.i.i.i = getelementptr inbounds %"class.boost::numeric::ublas::matrix", %"class.boost::numeric::ublas::matrix"* %C, i64 0, i32 1 + %tmp23 = load i64, i64* %size2_.i.i.i, align 8 + %tmp24 = load i64, i64* %size2_.i.i, align 8 + store i64 %tmp24, i64* %size2_.i.i.i, align 8 + store i64 %tmp23, i64* %size2_.i.i, align 8 + %size_.i.i.i.i = getelementptr inbounds %"class.boost::numeric::ublas::matrix", %"class.boost::numeric::ublas::matrix"* %C, i64 0, i32 2, i32 1 + %tmp25 = load i64, i64* %size_.i.i.i.i, align 8 + %tmp26 = load i64, i64* %size_.i21.i.i, align 8 + store i64 %tmp26, i64* %size_.i.i.i.i, align 8 + store i64 %tmp25, i64* %size_.i21.i.i, align 8 + %data_.i.i.i.i = getelementptr inbounds %"class.boost::numeric::ublas::matrix", %"class.boost::numeric::ublas::matrix"* %C, i64 0, i32 2, i32 2 + %tmp27 = bitcast float** %data_.i.i.i.i to i64* + %tmp28 = load i64, i64* %tmp27, align 8 + %tmp29 = bitcast float** %data_5.i.i.i to i64* + %Asize10 = load i64, i64* %tmp29, align 8 + store i64 %Asize10, i64* %tmp27, align 8 + store i64 %tmp28, i64* %tmp29, align 8 + br label %invoke.cont.i + +invoke.cont.i: ; preds = %_ZN5boost7numeric5ublas6matrixIfNS1_15basic_row_majorImlEENS1_15unbounded_arrayIfSaIfEEEEC2INS1_20matrix_matrix_binaryIS8_S8_NS1_18matrix_matrix_prodIS8_S8_fEEEEEERKNS1_17matrix_expressionIT_EE.exit.i.invoke.cont.i_crit_edge, %if.then.i.i.i + %Asize13 = phi i64 [ %Asize13.pre, %_ZN5boost7numeric5ublas6matrixIfNS1_15basic_row_majorImlEENS1_15unbounded_arrayIfSaIfEEEEC2INS1_20matrix_matrix_binaryIS8_S8_NS1_18matrix_matrix_prodIS8_S8_fEEEEEERKNS1_17matrix_expressionIT_EE.exit.i.invoke.cont.i_crit_edge ], [ %tmp25, %if.then.i.i.i ] + %tobool.i.i.i = icmp eq i64 %Asize13, 0 + br i1 %tobool.i.i.i, label %_ZN5boost7numeric5ublas6matrixIfNS1_15basic_row_majorImlEENS1_15unbounded_arrayIfSaIfEEEEaSINS1_20matrix_matrix_binaryIS8_S8_NS1_18matrix_matrix_prodIS8_S8_fEEEEEERS8_RKNS1_17matrix_expressionIT_EE.exit, label %if.then.i.i4.i + +if.then.i.i4.i: ; preds = %invoke.cont.i + %Asize14 = bitcast float** %data_5.i.i.i to i8** + %Asize15 = load i8*, i8** %Asize14, align 8 + br label %_ZN5boost7numeric5ublas6matrixIfNS1_15basic_row_majorImlEENS1_15unbounded_arrayIfSaIfEEEEaSINS1_20matrix_matrix_binaryIS8_S8_NS1_18matrix_matrix_prodIS8_S8_fEEEEEERS8_RKNS1_17matrix_expressionIT_EE.exit + +_ZN5boost7numeric5ublas6matrixIfNS1_15basic_row_majorImlEENS1_15unbounded_arrayIfSaIfEEEEaSINS1_20matrix_matrix_binaryIS8_S8_NS1_18matrix_matrix_prodIS8_S8_fEEEEEERS8_RKNS1_17matrix_expressionIT_EE.exit: ; preds = %if.then.i.i4.i, %invoke.cont.i + ret void +}