Index: polly/trunk/lib/Transform/ScheduleOptimizer.cpp =================================================================== --- polly/trunk/lib/Transform/ScheduleOptimizer.cpp +++ polly/trunk/lib/Transform/ScheduleOptimizer.cpp @@ -185,6 +185,12 @@ " --polly-register-tile-sizes)"), cl::Hidden, cl::init(2), cl::ZeroOrMore, cl::cat(PollyCategory)); +static cl::opt PollyPatternMatchingNcQuotient( + "polly-pattern-matching-nc-quotient", + cl::desc("Quotient that is obtained by dividing Nc, the parameter of the" + "macro-kernel, by Nr, the parameter of the micro-kernel"), + cl::Hidden, cl::init(256), cl::ZeroOrMore, cl::cat(PollyCategory)); + static cl::list RegisterTileSizes("polly-register-tile-sizes", cl::desc("A tile size for each loop dimension, filled " @@ -610,6 +616,9 @@ CacheLevelSizes[0] > 0 && CacheLevelSizes[1] > 0 && CacheLevelAssociativity[0] > 2 && CacheLevelAssociativity[1] > 2)) return {1, 1, 1}; + // The quotient should be greater than zero. + if (PollyPatternMatchingNcQuotient <= 0) + return {1, 1, 1}; int Car = floor( (CacheLevelAssociativity[0] - 1) / (1 + static_cast(MicroKernelParams.Nr) / MicroKernelParams.Mr)); @@ -618,7 +627,7 @@ double Cac = static_cast(Kc * 8 * CacheLevelAssociativity[1]) / CacheLevelSizes[1]; int Mc = floor((CacheLevelAssociativity[1] - 2) / Cac); - int Nc = floor(1 / Cac); + int Nc = PollyPatternMatchingNcQuotient * MicroKernelParams.Nr; return {Mc, Nc, Kc}; } Index: polly/trunk/test/ScheduleOptimizer/mat_mul_pattern_data_layout.ll =================================================================== --- polly/trunk/test/ScheduleOptimizer/mat_mul_pattern_data_layout.ll +++ polly/trunk/test/ScheduleOptimizer/mat_mul_pattern_data_layout.ll @@ -9,14 +9,14 @@ ; C[i][j] += alpha * A[i][k] * B[k][j]; ; } ; -; CHECK: double Packed_B[ { [] -> [(2)] } ][ { [] -> [(256)] } ][ { [] -> [(8)] } ]; // Element size 8 +; CHECK: double Packed_B[ { [] -> [(256)] } ][ { [] -> [(256)] } ][ { [] -> [(8)] } ]; ; CHECK-NEXT: double Packed_A[ { [] -> [(24)] } ][ { [] -> [(256)] } ][ { [] -> [(4)] } ]; // Element size 8 ; ; CHECK: { Stmt_Copy_0[i0, i1, i2] -> MemRef_arg6[i0, i2] }; ; CHECK-NEXT: new: { Stmt_Copy_0[i0, i1, i2] -> Packed_A[o0, o1, o2] : 256*floor((-i2 + o1)/256) = -i2 + o1 and 4*floor((-i0 + o2)/4) = -i0 + o2 and 0 <= o1 <= 255 and 0 <= o2 <= 3 and -3 + i0 - 4o0 <= 96*floor((i0)/96) <= i0 - 4o0 }; ; ; CHECK: { Stmt_Copy_0[i0, i1, i2] -> MemRef_arg7[i2, i1] }; -; CHECK-NEXT: new: { Stmt_Copy_0[i0, i1, i2] -> Packed_B[o0, o1, o2] : 256*floor((-i2 + o1)/256) = -i2 + o1 and 8*floor((-i1 + o2)/8) = -i1 + o2 and 0 <= o1 <= 255 and 0 <= o2 <= 7 and -7 + i1 - 8o0 <= 16*floor((i1)/16) <= i1 - 8o0 }; +; CHECK-NEXT: new: { Stmt_Copy_0[i0, i1, i2] -> Packed_B[o0, o1, o2] : 256*floor((-i2 + o1)/256) = -i2 + o1 and 8*floor((-i1 + o2)/8) = -i1 + o2 and 0 <= o1 <= 255 and 0 <= o2 <= 7 and -7 + i1 - 8o0 <= 2048*floor((i1)/2048) <= i1 - 8o0 }; ; ; CHECK: CopyStmt_0 ; CHECK-NEXT: Domain := @@ -25,7 +25,7 @@ ; CHECK-NEXT: ; ; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: null; -; CHECK-NEXT: new: { CopyStmt_0[i0, i1, i2] -> Packed_B[o0, o1, o2] : 256*floor((-i2 + o1)/256) = -i2 + o1 and 8*floor((-i1 + o2)/8) = -i1 + o2 and 0 <= o1 <= 255 and 0 <= o2 <= 7 and -7 + i1 - 8o0 <= 16*floor((i1)/16) <= i1 - 8o0 }; +; CHECK-NEXT: new: { CopyStmt_0[i0, i1, i2] -> Packed_B[o0, o1, o2] : 256*floor((-i2 + o1)/256) = -i2 + o1 and 8*floor((-i1 + o2)/8) = -i1 + o2 and 0 <= o1 <= 255 and 0 <= o2 <= 7 and -7 + i1 - 8o0 <= 2048*floor((i1)/2048) <= i1 - 8o0 }; ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: null; ; CHECK-NEXT: new: { CopyStmt_0[i0, i1, i2] -> MemRef_arg7[i2, i1] }; Index: polly/trunk/test/ScheduleOptimizer/mat_mul_pattern_data_layout_2.ll =================================================================== --- polly/trunk/test/ScheduleOptimizer/mat_mul_pattern_data_layout_2.ll +++ polly/trunk/test/ScheduleOptimizer/mat_mul_pattern_data_layout_2.ll @@ -20,60 +20,59 @@ ; CHECK-NEXT: Stmt_bb9(32 * c0 + c2, 32 * c1 + c3); ; CHECK-NEXT: } ; CHECK-NEXT: // 1st level tiling - Tiles -; CHECK-NEXT: for (int c0 = 0; c0 <= 65; c0 += 1) -; CHECK-NEXT: for (int c1 = 0; c1 <= 3; c1 += 1) { -; CHECK-NEXT: for (int c3 = 16 * c0; c3 <= 16 * c0 + 15; c3 += 1) -; CHECK-NEXT: for (int c4 = 256 * c1; c4 <= min(1022, 256 * c1 + 255); c4 += 1) -; CHECK-NEXT: CopyStmt_0(0, c3, c4); -; CHECK-NEXT: for (int c2 = 0; c2 <= 10; c2 += 1) { -; CHECK-NEXT: for (int c3 = 96 * c2; c3 <= 96 * c2 + 95; c3 += 1) -; CHECK-NEXT: for (int c5 = 256 * c1; c5 <= min(1022, 256 * c1 + 255); c5 += 1) -; CHECK-NEXT: CopyStmt_1(c3, 0, c5); -; CHECK-NEXT: // 1st level tiling - Points -; CHECK-NEXT: // Register tiling - Tiles -; CHECK-NEXT: for (int c3 = 0; c3 <= 1; c3 += 1) -; CHECK-NEXT: for (int c4 = 0; c4 <= 23; c4 += 1) -; CHECK-NEXT: for (int c5 = 0; c5 <= min(255, -256 * c1 + 1022); c5 += 1) { -; CHECK-NEXT: // Register tiling - Points -; CHECK-NEXT: // 1st level tiling - Tiles -; CHECK-NEXT: // 1st level tiling - Points -; CHECK-NEXT: { -; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4, 16 * c0 + 8 * c3, 256 * c1 + c5); -; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4, 16 * c0 + 8 * c3 + 1, 256 * c1 + c5); -; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4, 16 * c0 + 8 * c3 + 2, 256 * c1 + c5); -; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4, 16 * c0 + 8 * c3 + 3, 256 * c1 + c5); -; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4, 16 * c0 + 8 * c3 + 4, 256 * c1 + c5); -; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4, 16 * c0 + 8 * c3 + 5, 256 * c1 + c5); -; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4, 16 * c0 + 8 * c3 + 6, 256 * c1 + c5); -; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4, 16 * c0 + 8 * c3 + 7, 256 * c1 + c5); -; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 1, 16 * c0 + 8 * c3, 256 * c1 + c5); -; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 1, 16 * c0 + 8 * c3 + 1, 256 * c1 + c5); -; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 1, 16 * c0 + 8 * c3 + 2, 256 * c1 + c5); -; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 1, 16 * c0 + 8 * c3 + 3, 256 * c1 + c5); -; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 1, 16 * c0 + 8 * c3 + 4, 256 * c1 + c5); -; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 1, 16 * c0 + 8 * c3 + 5, 256 * c1 + c5); -; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 1, 16 * c0 + 8 * c3 + 6, 256 * c1 + c5); -; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 1, 16 * c0 + 8 * c3 + 7, 256 * c1 + c5); -; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 2, 16 * c0 + 8 * c3, 256 * c1 + c5); -; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 2, 16 * c0 + 8 * c3 + 1, 256 * c1 + c5); -; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 2, 16 * c0 + 8 * c3 + 2, 256 * c1 + c5); -; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 2, 16 * c0 + 8 * c3 + 3, 256 * c1 + c5); -; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 2, 16 * c0 + 8 * c3 + 4, 256 * c1 + c5); -; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 2, 16 * c0 + 8 * c3 + 5, 256 * c1 + c5); -; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 2, 16 * c0 + 8 * c3 + 6, 256 * c1 + c5); -; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 2, 16 * c0 + 8 * c3 + 7, 256 * c1 + c5); -; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 3, 16 * c0 + 8 * c3, 256 * c1 + c5); -; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 3, 16 * c0 + 8 * c3 + 1, 256 * c1 + c5); -; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 3, 16 * c0 + 8 * c3 + 2, 256 * c1 + c5); -; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 3, 16 * c0 + 8 * c3 + 3, 256 * c1 + c5); -; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 3, 16 * c0 + 8 * c3 + 4, 256 * c1 + c5); -; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 3, 16 * c0 + 8 * c3 + 5, 256 * c1 + c5); -; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 3, 16 * c0 + 8 * c3 + 6, 256 * c1 + c5); -; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 3, 16 * c0 + 8 * c3 + 7, 256 * c1 + c5); -; CHECK-NEXT: } +; CHECK-NEXT: for (int c1 = 0; c1 <= 3; c1 += 1) { +; CHECK-NEXT: for (int c3 = 0; c3 <= 1055; c3 += 1) +; CHECK-NEXT: for (int c4 = 256 * c1; c4 <= min(1022, 256 * c1 + 255); c4 += 1) +; CHECK-NEXT: CopyStmt_0(0, c3, c4); +; CHECK-NEXT: for (int c2 = 0; c2 <= 10; c2 += 1) { +; CHECK-NEXT: for (int c3 = 96 * c2; c3 <= 96 * c2 + 95; c3 += 1) +; CHECK-NEXT: for (int c5 = 256 * c1; c5 <= min(1022, 256 * c1 + 255); c5 += 1) +; CHECK-NEXT: CopyStmt_1(c3, 0, c5); +; CHECK-NEXT: // 1st level tiling - Points +; CHECK-NEXT: // Register tiling - Tiles +; CHECK-NEXT: for (int c3 = 0; c3 <= 131; c3 += 1) +; CHECK-NEXT: for (int c4 = 0; c4 <= 23; c4 += 1) +; CHECK-NEXT: for (int c5 = 0; c5 <= min(255, -256 * c1 + 1022); c5 += 1) { +; CHECK-NEXT: // Register tiling - Points +; CHECK-NEXT: // 1st level tiling - Tiles +; CHECK-NEXT: // 1st level tiling - Points +; CHECK-NEXT: { +; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4, 8 * c3, 256 * c1 + c5); +; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4, 8 * c3 + 1, 256 * c1 + c5); +; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4, 8 * c3 + 2, 256 * c1 + c5); +; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4, 8 * c3 + 3, 256 * c1 + c5); +; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4, 8 * c3 + 4, 256 * c1 + c5); +; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4, 8 * c3 + 5, 256 * c1 + c5); +; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4, 8 * c3 + 6, 256 * c1 + c5); +; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4, 8 * c3 + 7, 256 * c1 + c5); +; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 1, 8 * c3, 256 * c1 + c5); +; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 1, 8 * c3 + 1, 256 * c1 + c5); +; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 1, 8 * c3 + 2, 256 * c1 + c5); +; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 1, 8 * c3 + 3, 256 * c1 + c5); +; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 1, 8 * c3 + 4, 256 * c1 + c5); +; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 1, 8 * c3 + 5, 256 * c1 + c5); +; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 1, 8 * c3 + 6, 256 * c1 + c5); +; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 1, 8 * c3 + 7, 256 * c1 + c5); +; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 2, 8 * c3, 256 * c1 + c5); +; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 2, 8 * c3 + 1, 256 * c1 + c5); +; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 2, 8 * c3 + 2, 256 * c1 + c5); +; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 2, 8 * c3 + 3, 256 * c1 + c5); +; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 2, 8 * c3 + 4, 256 * c1 + c5); +; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 2, 8 * c3 + 5, 256 * c1 + c5); +; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 2, 8 * c3 + 6, 256 * c1 + c5); +; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 2, 8 * c3 + 7, 256 * c1 + c5); +; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 3, 8 * c3, 256 * c1 + c5); +; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 3, 8 * c3 + 1, 256 * c1 + c5); +; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 3, 8 * c3 + 2, 256 * c1 + c5); +; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 3, 8 * c3 + 3, 256 * c1 + c5); +; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 3, 8 * c3 + 4, 256 * c1 + c5); +; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 3, 8 * c3 + 5, 256 * c1 + c5); +; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 3, 8 * c3 + 6, 256 * c1 + c5); +; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4 + 3, 8 * c3 + 7, 256 * c1 + c5); ; CHECK-NEXT: } -; CHECK-NEXT: } +; CHECK-NEXT: } ; CHECK-NEXT: } +; CHECK-NEXT: } ; CHECK-NEXT: } ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_3.ll =================================================================== --- polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_3.ll +++ polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_3.ll @@ -73,53 +73,52 @@ ; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb14(32 * c0 + c2, 32 * c1 + c3); ; EXTRACTION-OF-MACRO-KERNEL-NEXT: } ; EXTRACTION-OF-MACRO-KERNEL-NEXT: // 1st level tiling - Tiles -; EXTRACTION-OF-MACRO-KERNEL-NEXT: for (int c0 = 0; c0 <= 65; c0 += 1) -; EXTRACTION-OF-MACRO-KERNEL-NEXT: for (int c1 = 0; c1 <= 3; c1 += 1) -; EXTRACTION-OF-MACRO-KERNEL-NEXT: for (int c2 = 0; c2 <= 10; c2 += 1) { -; EXTRACTION-OF-MACRO-KERNEL-NEXT: // 1st level tiling - Points -; EXTRACTION-OF-MACRO-KERNEL-NEXT: // Register tiling - Tiles -; EXTRACTION-OF-MACRO-KERNEL-NEXT: for (int c3 = 0; c3 <= 1; c3 += 1) -; EXTRACTION-OF-MACRO-KERNEL-NEXT: for (int c4 = 0; c4 <= 23; c4 += 1) -; EXTRACTION-OF-MACRO-KERNEL-NEXT: for (int c5 = 0; c5 <= 255; c5 += 1) { -; EXTRACTION-OF-MACRO-KERNEL-NEXT: // Register tiling - Points -; EXTRACTION-OF-MACRO-KERNEL-NEXT: // 1st level tiling - Tiles -; EXTRACTION-OF-MACRO-KERNEL-NEXT: // 1st level tiling - Points -; EXTRACTION-OF-MACRO-KERNEL-NEXT: { -; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4, 16 * c0 + 8 * c3, 256 * c1 + c5); -; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4, 16 * c0 + 8 * c3 + 1, 256 * c1 + c5); -; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4, 16 * c0 + 8 * c3 + 2, 256 * c1 + c5); -; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4, 16 * c0 + 8 * c3 + 3, 256 * c1 + c5); -; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4, 16 * c0 + 8 * c3 + 4, 256 * c1 + c5); -; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4, 16 * c0 + 8 * c3 + 5, 256 * c1 + c5); -; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4, 16 * c0 + 8 * c3 + 6, 256 * c1 + c5); -; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4, 16 * c0 + 8 * c3 + 7, 256 * c1 + c5); -; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 1, 16 * c0 + 8 * c3, 256 * c1 + c5); -; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 1, 16 * c0 + 8 * c3 + 1, 256 * c1 + c5); -; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 1, 16 * c0 + 8 * c3 + 2, 256 * c1 + c5); -; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 1, 16 * c0 + 8 * c3 + 3, 256 * c1 + c5); -; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 1, 16 * c0 + 8 * c3 + 4, 256 * c1 + c5); -; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 1, 16 * c0 + 8 * c3 + 5, 256 * c1 + c5); -; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 1, 16 * c0 + 8 * c3 + 6, 256 * c1 + c5); -; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 1, 16 * c0 + 8 * c3 + 7, 256 * c1 + c5); -; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 2, 16 * c0 + 8 * c3, 256 * c1 + c5); -; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 2, 16 * c0 + 8 * c3 + 1, 256 * c1 + c5); -; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 2, 16 * c0 + 8 * c3 + 2, 256 * c1 + c5); -; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 2, 16 * c0 + 8 * c3 + 3, 256 * c1 + c5); -; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 2, 16 * c0 + 8 * c3 + 4, 256 * c1 + c5); -; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 2, 16 * c0 + 8 * c3 + 5, 256 * c1 + c5); -; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 2, 16 * c0 + 8 * c3 + 6, 256 * c1 + c5); -; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 2, 16 * c0 + 8 * c3 + 7, 256 * c1 + c5); -; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 3, 16 * c0 + 8 * c3, 256 * c1 + c5); -; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 3, 16 * c0 + 8 * c3 + 1, 256 * c1 + c5); -; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 3, 16 * c0 + 8 * c3 + 2, 256 * c1 + c5); -; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 3, 16 * c0 + 8 * c3 + 3, 256 * c1 + c5); -; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 3, 16 * c0 + 8 * c3 + 4, 256 * c1 + c5); -; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 3, 16 * c0 + 8 * c3 + 5, 256 * c1 + c5); -; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 3, 16 * c0 + 8 * c3 + 6, 256 * c1 + c5); -; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 3, 16 * c0 + 8 * c3 + 7, 256 * c1 + c5); -; EXTRACTION-OF-MACRO-KERNEL-NEXT: } +; EXTRACTION-OF-MACRO-KERNEL-NEXT: for (int c1 = 0; c1 <= 3; c1 += 1) +; EXTRACTION-OF-MACRO-KERNEL-NEXT: for (int c2 = 0; c2 <= 10; c2 += 1) { +; EXTRACTION-OF-MACRO-KERNEL-NEXT: // 1st level tiling - Points +; EXTRACTION-OF-MACRO-KERNEL-NEXT: // Register tiling - Tiles +; EXTRACTION-OF-MACRO-KERNEL-NEXT: for (int c3 = 0; c3 <= 131; c3 += 1) +; EXTRACTION-OF-MACRO-KERNEL-NEXT: for (int c4 = 0; c4 <= 23; c4 += 1) +; EXTRACTION-OF-MACRO-KERNEL-NEXT: for (int c5 = 0; c5 <= 255; c5 += 1) { +; EXTRACTION-OF-MACRO-KERNEL-NEXT: // Register tiling - Points +; EXTRACTION-OF-MACRO-KERNEL-NEXT: // 1st level tiling - Tiles +; EXTRACTION-OF-MACRO-KERNEL-NEXT: // 1st level tiling - Points +; EXTRACTION-OF-MACRO-KERNEL-NEXT: { +; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4, 8 * c3, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4, 8 * c3 + 1, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4, 8 * c3 + 2, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4, 8 * c3 + 3, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4, 8 * c3 + 4, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4, 8 * c3 + 5, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4, 8 * c3 + 6, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4, 8 * c3 + 7, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 1, 8 * c3, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 1, 8 * c3 + 1, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 1, 8 * c3 + 2, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 1, 8 * c3 + 3, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 1, 8 * c3 + 4, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 1, 8 * c3 + 5, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 1, 8 * c3 + 6, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 1, 8 * c3 + 7, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 2, 8 * c3, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 2, 8 * c3 + 1, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 2, 8 * c3 + 2, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 2, 8 * c3 + 3, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 2, 8 * c3 + 4, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 2, 8 * c3 + 5, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 2, 8 * c3 + 6, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 2, 8 * c3 + 7, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 3, 8 * c3, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 3, 8 * c3 + 1, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 3, 8 * c3 + 2, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 3, 8 * c3 + 3, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 3, 8 * c3 + 4, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 3, 8 * c3 + 5, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 3, 8 * c3 + 6, 256 * c1 + c5); +; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_bb24(96 * c2 + 4 * c4 + 3, 8 * c3 + 7, 256 * c1 + c5); ; EXTRACTION-OF-MACRO-KERNEL-NEXT: } -; EXTRACTION-OF-MACRO-KERNEL-NEXT: } +; EXTRACTION-OF-MACRO-KERNEL-NEXT: } +; EXTRACTION-OF-MACRO-KERNEL-NEXT: } ; EXTRACTION-OF-MACRO-KERNEL-NEXT: } ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"