Differential D137037 Diff 471821 polly/test/ScheduleOptimizer/pattern-matching-based-opts-after-delicm.ll
Changeset View
Changeset View
Standalone View
Standalone View
polly/test/ScheduleOptimizer/pattern-matching-based-opts-after-delicm.ll
; RUN: opt %loadPolly \ | ; RUN: opt %loadPolly \ | ||||
; RUN: -polly-pattern-matching-based-opts=true \ | ; RUN: -polly-pattern-matching-based-opts=true \ | ||||
; RUN: -polly-optree -polly-delicm -polly-simplify \ | ; RUN: -polly-optree -polly-delicm -polly-simplify \ | ||||
; RUN: -polly-opt-isl -polly-tc-opt=true -debug -disable-output < %s 2>&1 \ | ; RUN: -polly-opt-isl -polly-tc-opt=true -polly-ast-detect-parallel \ | ||||
; RUN: -polly-print-ast -disable-output < %s 2>&1 \ | |||||
; RUN: | FileCheck %s | ; RUN: | FileCheck %s | ||||
; REQUIRES: asserts | ; REQUIRES: asserts | ||||
; Check that the pattern matching detects the matrix multiplication pattern | ; Check that the pattern matching detects the matrix multiplication pattern | ||||
; after a full run of -polly-optree and -polly-delicm, where the write access | ; after a full run of -polly-optree and -polly-delicm, where the write access | ||||
; is not through the original memory access, but trough a PHI node that was | ; is not through the original memory access, but trough a PHI node that was | ||||
; delicmed. This test covers the polybench 2mm and 3mm cases. | ; delicmed. This test covers the polybench 2mm and 3mm cases. | ||||
; | ; | ||||
Show All 21 Lines | |||||
; - filter: "{ Stmt_for_body8[i0, i1, i2] }" | ; - filter: "{ Stmt_for_body8[i0, i1, i2] }" | ||||
; child: | ; child: | ||||
; schedule: "[{ Stmt_for_body8[i0, i1, i2] -> [(i0)] }, | ; schedule: "[{ Stmt_for_body8[i0, i1, i2] -> [(i0)] }, | ||||
; { Stmt_for_body8[i0, i1, i2] -> [(i1)] }, | ; { Stmt_for_body8[i0, i1, i2] -> [(i1)] }, | ||||
; { Stmt_for_body8[i0, i1, i2] -> [(i2)] }]" | ; { Stmt_for_body8[i0, i1, i2] -> [(i2)] }]" | ||||
; permutable: 1 | ; permutable: 1 | ||||
; coincident: [ 1, 1, 0 ] | ; coincident: [ 1, 1, 0 ] | ||||
; | ; | ||||
; CHECK: The tensor contraction pattern was detected | ; CHECK: { | ||||
; CHECK: The matrix multiplication pattern was detected | ; CHECK-NEXT: // 1st level tiling - Tiles | ||||
; CHECK-NEXT: for (int c0 = 0; c0 <= 49; c0 += 1) | |||||
; CHECK-NEXT: for (int c1 = 0; c1 <= 56; c1 += 1) { | |||||
; CHECK-NEXT: // 1st level tiling - Points | |||||
; CHECK-NEXT: for (int c2 = 0; c2 <= 31; c2 += 1) | |||||
; CHECK-NEXT: for (int c3 = 0; c3 <= min(31, -32 * c1 + 1799); c3 += 1) | |||||
; CHECK-NEXT: Stmt_for_body3(32 * c0 + c2, 32 * c1 + c3); | |||||
; CHECK-NEXT: } | |||||
; CHECK-NEXT: // 1st level tiling - Tiles | |||||
; CHECK-NEXT: for (int c0 = 0; c0 <= 49; c0 += 1) | |||||
; CHECK-NEXT: for (int c1 = 0; c1 <= 56; c1 += 1) { | |||||
; CHECK-NEXT: // 1st level tiling - Points | |||||
; CHECK-NEXT: for (int c2 = 0; c2 <= 31; c2 += 1) | |||||
; CHECK-NEXT: for (int c3 = 0; c3 <= min(31, -32 * c1 + 1799); c3 += 1) | |||||
; CHECK-NEXT: Stmt_for_body3_last(32 * c0 + c2, 32 * c1 + c3); | |||||
; CHECK-NEXT: } | |||||
; CHECK-NEXT: // 1st level tiling - Tiles | |||||
; CHECK-NEXT: for (int c1 = 0; c1 <= 8; c1 += 1) { | |||||
; CHECK-NEXT: for (int c4 = 256 * c1; c4 <= min(2199, 256 * c1 + 255); c4 += 1) | |||||
; CHECK-NEXT: for (int c5 = 0; c5 <= 1799; c5 += 1) | |||||
; CHECK-NEXT: CopyStmt_0(0, c1, c4, c5); | |||||
; CHECK-NEXT: for (int c2 = 0; c2 <= 16; c2 += 1) { | |||||
; CHECK-NEXT: for (int c6 = 96 * c2; c6 <= min(1599, 96 * c2 + 95); c6 += 1) | |||||
; CHECK-NEXT: for (int c7 = 256 * c1; c7 <= min(2199, 256 * c1 + 255); c7 += 1) | |||||
; CHECK-NEXT: CopyStmt_1(0, c1, c2, c6, c7); | |||||
; CHECK-NEXT: // 1st level tiling - Points | |||||
; CHECK-NEXT: // Register tiling - Tiles | |||||
; CHECK-NEXT: for (int c3 = 0; c3 <= 224; c3 += 1) | |||||
; CHECK-NEXT: for (int c4 = 0; c4 <= min(23, -24 * c2 + 399); c4 += 1) | |||||
; CHECK-NEXT: for (int c5 = 0; c5 <= min(255, -256 * c1 + 2199); c5 += 1) { | |||||
; CHECK-NEXT: // Loop Vectorizer Disabled | |||||
; CHECK-NEXT: // Register tiling - Points | |||||
; CHECK-NEXT: { | |||||
; CHECK-NEXT: Stmt_for_body8(96 * c2 + 4 * c4, 8 * c3, 256 * c1 + c5); | |||||
; CHECK-NEXT: Stmt_for_body8(96 * c2 + 4 * c4, 8 * c3 + 1, 256 * c1 + c5); | |||||
; CHECK-NEXT: Stmt_for_body8(96 * c2 + 4 * c4, 8 * c3 + 2, 256 * c1 + c5); | |||||
; CHECK-NEXT: Stmt_for_body8(96 * c2 + 4 * c4, 8 * c3 + 3, 256 * c1 + c5); | |||||
; CHECK-NEXT: Stmt_for_body8(96 * c2 + 4 * c4, 8 * c3 + 4, 256 * c1 + c5); | |||||
; CHECK-NEXT: Stmt_for_body8(96 * c2 + 4 * c4, 8 * c3 + 5, 256 * c1 + c5); | |||||
; CHECK-NEXT: Stmt_for_body8(96 * c2 + 4 * c4, 8 * c3 + 6, 256 * c1 + c5); | |||||
; CHECK-NEXT: Stmt_for_body8(96 * c2 + 4 * c4, 8 * c3 + 7, 256 * c1 + c5); | |||||
; CHECK-NEXT: Stmt_for_body8(96 * c2 + 4 * c4 + 1, 8 * c3, 256 * c1 + c5); | |||||
; CHECK-NEXT: Stmt_for_body8(96 * c2 + 4 * c4 + 1, 8 * c3 + 1, 256 * c1 + c5); | |||||
; CHECK-NEXT: Stmt_for_body8(96 * c2 + 4 * c4 + 1, 8 * c3 + 2, 256 * c1 + c5); | |||||
; CHECK-NEXT: Stmt_for_body8(96 * c2 + 4 * c4 + 1, 8 * c3 + 3, 256 * c1 + c5); | |||||
; CHECK-NEXT: Stmt_for_body8(96 * c2 + 4 * c4 + 1, 8 * c3 + 4, 256 * c1 + c5); | |||||
; CHECK-NEXT: Stmt_for_body8(96 * c2 + 4 * c4 + 1, 8 * c3 + 5, 256 * c1 + c5); | |||||
; CHECK-NEXT: Stmt_for_body8(96 * c2 + 4 * c4 + 1, 8 * c3 + 6, 256 * c1 + c5); | |||||
; CHECK-NEXT: Stmt_for_body8(96 * c2 + 4 * c4 + 1, 8 * c3 + 7, 256 * c1 + c5); | |||||
; CHECK-NEXT: Stmt_for_body8(96 * c2 + 4 * c4 + 2, 8 * c3, 256 * c1 + c5); | |||||
; CHECK-NEXT: Stmt_for_body8(96 * c2 + 4 * c4 + 2, 8 * c3 + 1, 256 * c1 + c5); | |||||
; CHECK-NEXT: Stmt_for_body8(96 * c2 + 4 * c4 + 2, 8 * c3 + 2, 256 * c1 + c5); | |||||
; CHECK-NEXT: Stmt_for_body8(96 * c2 + 4 * c4 + 2, 8 * c3 + 3, 256 * c1 + c5); | |||||
; CHECK-NEXT: Stmt_for_body8(96 * c2 + 4 * c4 + 2, 8 * c3 + 4, 256 * c1 + c5); | |||||
; CHECK-NEXT: Stmt_for_body8(96 * c2 + 4 * c4 + 2, 8 * c3 + 5, 256 * c1 + c5); | |||||
; CHECK-NEXT: Stmt_for_body8(96 * c2 + 4 * c4 + 2, 8 * c3 + 6, 256 * c1 + c5); | |||||
; CHECK-NEXT: Stmt_for_body8(96 * c2 + 4 * c4 + 2, 8 * c3 + 7, 256 * c1 + c5); | |||||
; CHECK-NEXT: Stmt_for_body8(96 * c2 + 4 * c4 + 3, 8 * c3, 256 * c1 + c5); | |||||
; CHECK-NEXT: Stmt_for_body8(96 * c2 + 4 * c4 + 3, 8 * c3 + 1, 256 * c1 + c5); | |||||
; CHECK-NEXT: Stmt_for_body8(96 * c2 + 4 * c4 + 3, 8 * c3 + 2, 256 * c1 + c5); | |||||
; CHECK-NEXT: Stmt_for_body8(96 * c2 + 4 * c4 + 3, 8 * c3 + 3, 256 * c1 + c5); | |||||
; CHECK-NEXT: Stmt_for_body8(96 * c2 + 4 * c4 + 3, 8 * c3 + 4, 256 * c1 + c5); | |||||
; CHECK-NEXT: Stmt_for_body8(96 * c2 + 4 * c4 + 3, 8 * c3 + 5, 256 * c1 + c5); | |||||
; CHECK-NEXT: Stmt_for_body8(96 * c2 + 4 * c4 + 3, 8 * c3 + 6, 256 * c1 + c5); | |||||
; CHECK-NEXT: Stmt_for_body8(96 * c2 + 4 * c4 + 3, 8 * c3 + 7, 256 * c1 + c5); | |||||
; CHECK-NEXT: } | |||||
; CHECK-NEXT: } | |||||
; CHECK-NEXT: } | |||||
; CHECK-NEXT: } | |||||
; CHECK-NEXT: } | |||||
; | ; | ||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" | target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" | ||||
target triple = "x86_64-unknown-linux-gnu" | target triple = "x86_64-unknown-linux-gnu" | ||||
; Function Attrs: norecurse nounwind uwtable | ; Function Attrs: norecurse nounwind uwtable | ||||
define void @kernel_2mm(i32 %ni, i32 %nj, i32 %nk, i32 %nl, double %alpha, double %beta, [1800 x double]* nocapture %tmp, [2200 x double]* nocapture readonly %A, [1800 x double]* nocapture readonly %B, [2400 x double]* nocapture readnone %C, [2400 x double]* nocapture readnone %D) local_unnamed_addr #0 { | define void @kernel_2mm(i32 %ni, i32 %nj, i32 %nk, i32 %nl, double %alpha, double %beta, [1800 x double]* nocapture %tmp, [2200 x double]* nocapture readonly %A, [1800 x double]* nocapture readonly %B, [2400 x double]* nocapture readnone %C, [2400 x double]* nocapture readnone %D) local_unnamed_addr #0 { | ||||
entry: | entry: | ||||
▲ Show 20 Lines • Show All 55 Lines • Show Last 20 Lines |