Index: lib/Transform/ScheduleOptimizer.cpp =================================================================== --- lib/Transform/ScheduleOptimizer.cpp +++ lib/Transform/ScheduleOptimizer.cpp @@ -323,19 +323,20 @@ return isl::union_set(IsolateOption); } -/// Create an isl::union_set, which describes the atomic option for the +namespace { +/// Create an isl::union_set, which describes the specified option for the /// dimension of the current node. /// -/// It may help to reduce the size of generated code. -/// -/// @param Ctx An isl::ctx, which is used to create the isl::union_set. -static isl::union_set getAtomicOptions(isl::ctx Ctx) { +/// @param Ctx An isl::ctx, which is used to create the isl::union_set. +/// @param Option The name of the option. +isl::union_set getDimOptions(isl::ctx Ctx, const char *Option) { isl::space Space(Ctx, 0, 1); - isl::set AtomicOption = isl::set::universe(Space); - isl::id Id = isl::id::alloc(Ctx, "atomic", nullptr); - AtomicOption = AtomicOption.set_tuple_id(Id); - return isl::union_set(AtomicOption); + auto DimOption = isl::set::universe(Space); + auto Id = isl::id::alloc(Ctx, Option, nullptr); + DimOption = DimOption.set_tuple_id(Id); + return isl::union_set(DimOption); } +} // namespace /// Create an isl::union_set, which describes the option of the form /// [isolate[] -> unroll[x]]. @@ -391,7 +392,7 @@ isl::map ScheduleRelation = isl::map::from_union_map(SchedRelUMap); isl::set ScheduleRange = ScheduleRelation.range(); isl::set IsolateDomain = getPartialTilePrefixes(ScheduleRange, VectorWidth); - isl::union_set AtomicOption = getAtomicOptions(IsolateDomain.get_ctx()); + auto AtomicOption = getDimOptions(IsolateDomain.get_ctx(), "atomic"); isl::union_set IsolateOption = getIsolateOptions(IsolateDomain, 1); Node = Node.parent().parent(); isl::union_set Options = IsolateOption.unite(AtomicOption); @@ -1207,13 +1208,12 @@ isl::union_set IsolateOption = getIsolateOptions(Prefix.add_dims(isl::dim::set, 3), 3); isl::ctx Ctx = Node.get_ctx(); - isl::union_set AtomicOption = getAtomicOptions(Ctx); - isl::union_set Options = IsolateOption.unite(AtomicOption); + auto Options = IsolateOption.unite(getDimOptions(Ctx, "unroll")); Options = Options.unite(getUnrollIsolatedSetOptions(Ctx)); Node = Node.band_set_ast_build_options(Options); Node = Node.parent().parent().parent(); IsolateOption = getIsolateOptions(Prefix, 3); - Options = IsolateOption.unite(AtomicOption); + Options = IsolateOption.unite(getDimOptions(Ctx, "separate")); Node = Node.band_set_ast_build_options(Options); Node = Node.child(0).child(0).child(0); return Node; Index: test/ScheduleOptimizer/pattern-matching-based-opts_12.ll =================================================================== --- test/ScheduleOptimizer/pattern-matching-based-opts_12.ll +++ test/ScheduleOptimizer/pattern-matching-based-opts_12.ll @@ -287,26 +287,378 @@ ; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 7, 32 * c3 + 31, 512 * c1 + c5); ; CHECK-NEXT: } ; CHECK-NEXT: } -; CHECK-NEXT: if (c2 == 2) -; CHECK-NEXT: for (int c5 = 0; c5 <= min(511, -512 * c1 + 1019); c5 += 1) { -; CHECK-NEXT: // Loop Vectorizer Disabled -; CHECK-NEXT: // Register tiling - Points -; CHECK-NEXT: for (int c6 = 0; c6 <= 3; c6 += 1) -; CHECK-NEXT: for (int c7 = 0; c7 <= 31; c7 += 1) -; CHECK-NEXT: Stmt_for_body6(c6 + 1016, 32 * c3 + c7, 512 * c1 + c5); +; CHECK-NEXT: if (c2 == 2) +; CHECK-NEXT: for (int c5 = 0; c5 <= min(511, -512 * c1 + 1019); c5 += 1) { +; CHECK-NEXT: // Loop Vectorizer Disabled +; CHECK-NEXT: // Register tiling - Points +; CHECK-NEXT: { +; CHECK-NEXT: Stmt_for_body6(1016, 32 * c3, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1016, 32 * c3 + 1, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1016, 32 * c3 + 2, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1016, 32 * c3 + 3, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1016, 32 * c3 + 4, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1016, 32 * c3 + 5, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1016, 32 * c3 + 6, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1016, 32 * c3 + 7, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1016, 32 * c3 + 8, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1016, 32 * c3 + 9, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1016, 32 * c3 + 10, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1016, 32 * c3 + 11, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1016, 32 * c3 + 12, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1016, 32 * c3 + 13, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1016, 32 * c3 + 14, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1016, 32 * c3 + 15, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1016, 32 * c3 + 16, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1016, 32 * c3 + 17, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1016, 32 * c3 + 18, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1016, 32 * c3 + 19, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1016, 32 * c3 + 20, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1016, 32 * c3 + 21, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1016, 32 * c3 + 22, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1016, 32 * c3 + 23, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1016, 32 * c3 + 24, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1016, 32 * c3 + 25, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1016, 32 * c3 + 26, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1016, 32 * c3 + 27, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1016, 32 * c3 + 28, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1016, 32 * c3 + 29, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1016, 32 * c3 + 30, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1016, 32 * c3 + 31, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1017, 32 * c3, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1017, 32 * c3 + 1, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1017, 32 * c3 + 2, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1017, 32 * c3 + 3, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1017, 32 * c3 + 4, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1017, 32 * c3 + 5, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1017, 32 * c3 + 6, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1017, 32 * c3 + 7, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1017, 32 * c3 + 8, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1017, 32 * c3 + 9, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1017, 32 * c3 + 10, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1017, 32 * c3 + 11, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1017, 32 * c3 + 12, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1017, 32 * c3 + 13, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1017, 32 * c3 + 14, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1017, 32 * c3 + 15, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1017, 32 * c3 + 16, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1017, 32 * c3 + 17, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1017, 32 * c3 + 18, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1017, 32 * c3 + 19, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1017, 32 * c3 + 20, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1017, 32 * c3 + 21, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1017, 32 * c3 + 22, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1017, 32 * c3 + 23, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1017, 32 * c3 + 24, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1017, 32 * c3 + 25, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1017, 32 * c3 + 26, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1017, 32 * c3 + 27, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1017, 32 * c3 + 28, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1017, 32 * c3 + 29, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1017, 32 * c3 + 30, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1017, 32 * c3 + 31, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1018, 32 * c3, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1018, 32 * c3 + 1, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1018, 32 * c3 + 2, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1018, 32 * c3 + 3, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1018, 32 * c3 + 4, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1018, 32 * c3 + 5, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1018, 32 * c3 + 6, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1018, 32 * c3 + 7, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1018, 32 * c3 + 8, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1018, 32 * c3 + 9, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1018, 32 * c3 + 10, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1018, 32 * c3 + 11, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1018, 32 * c3 + 12, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1018, 32 * c3 + 13, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1018, 32 * c3 + 14, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1018, 32 * c3 + 15, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1018, 32 * c3 + 16, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1018, 32 * c3 + 17, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1018, 32 * c3 + 18, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1018, 32 * c3 + 19, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1018, 32 * c3 + 20, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1018, 32 * c3 + 21, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1018, 32 * c3 + 22, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1018, 32 * c3 + 23, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1018, 32 * c3 + 24, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1018, 32 * c3 + 25, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1018, 32 * c3 + 26, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1018, 32 * c3 + 27, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1018, 32 * c3 + 28, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1018, 32 * c3 + 29, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1018, 32 * c3 + 30, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1018, 32 * c3 + 31, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1019, 32 * c3, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1019, 32 * c3 + 1, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1019, 32 * c3 + 2, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1019, 32 * c3 + 3, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1019, 32 * c3 + 4, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1019, 32 * c3 + 5, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1019, 32 * c3 + 6, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1019, 32 * c3 + 7, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1019, 32 * c3 + 8, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1019, 32 * c3 + 9, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1019, 32 * c3 + 10, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1019, 32 * c3 + 11, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1019, 32 * c3 + 12, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1019, 32 * c3 + 13, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1019, 32 * c3 + 14, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1019, 32 * c3 + 15, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1019, 32 * c3 + 16, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1019, 32 * c3 + 17, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1019, 32 * c3 + 18, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1019, 32 * c3 + 19, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1019, 32 * c3 + 20, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1019, 32 * c3 + 21, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1019, 32 * c3 + 22, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1019, 32 * c3 + 23, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1019, 32 * c3 + 24, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1019, 32 * c3 + 25, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1019, 32 * c3 + 26, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1019, 32 * c3 + 27, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1019, 32 * c3 + 28, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1019, 32 * c3 + 29, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1019, 32 * c3 + 30, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(1019, 32 * c3 + 31, 512 * c1 + c5); +; CHECK-NEXT: } +; CHECK-NEXT: } ; CHECK-NEXT: } -; CHECK-NEXT: } -; CHECK-NEXT: for (int c4 = 0; c4 <= min(47, -48 * c2 + 127); c4 += 1) -; CHECK-NEXT: for (int c5 = 0; c5 <= min(511, -512 * c1 + 1019); c5 += 1) { -; CHECK-NEXT: // Loop Vectorizer Disabled -; CHECK-NEXT: // Register tiling - Points -; CHECK-NEXT: for (int c6 = 0; c6 <= min(7, -384 * c2 - 8 * c4 + 1019); c6 += 1) -; CHECK-NEXT: for (int c7 = 0; c7 <= 27; c7 += 1) -; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + c6, c7 + 992, 512 * c1 + c5); +; CHECK-NEXT: for (int c4 = 0; c4 <= min(47, -48 * c2 + 127); c4 += 1) +; CHECK-NEXT: for (int c5 = 0; c5 <= min(511, -512 * c1 + 1019); c5 += 1) { +; CHECK-NEXT: // Loop Vectorizer Disabled +; CHECK-NEXT: // Register tiling - Points +; CHECK-NEXT: { +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4, 992, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4, 993, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4, 994, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4, 995, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4, 996, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4, 997, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4, 998, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4, 999, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4, 1000, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4, 1001, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4, 1002, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4, 1003, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4, 1004, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4, 1005, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4, 1006, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4, 1007, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4, 1008, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4, 1009, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4, 1010, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4, 1011, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4, 1012, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4, 1013, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4, 1014, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4, 1015, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4, 1016, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4, 1017, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4, 1018, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4, 1019, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 1, 992, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 1, 993, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 1, 994, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 1, 995, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 1, 996, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 1, 997, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 1, 998, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 1, 999, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 1, 1000, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 1, 1001, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 1, 1002, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 1, 1003, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 1, 1004, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 1, 1005, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 1, 1006, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 1, 1007, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 1, 1008, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 1, 1009, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 1, 1010, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 1, 1011, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 1, 1012, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 1, 1013, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 1, 1014, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 1, 1015, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 1, 1016, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 1, 1017, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 1, 1018, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 1, 1019, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 2, 992, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 2, 993, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 2, 994, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 2, 995, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 2, 996, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 2, 997, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 2, 998, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 2, 999, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 2, 1000, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 2, 1001, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 2, 1002, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 2, 1003, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 2, 1004, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 2, 1005, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 2, 1006, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 2, 1007, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 2, 1008, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 2, 1009, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 2, 1010, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 2, 1011, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 2, 1012, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 2, 1013, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 2, 1014, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 2, 1015, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 2, 1016, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 2, 1017, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 2, 1018, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 2, 1019, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 3, 992, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 3, 993, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 3, 994, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 3, 995, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 3, 996, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 3, 997, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 3, 998, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 3, 999, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 3, 1000, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 3, 1001, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 3, 1002, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 3, 1003, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 3, 1004, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 3, 1005, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 3, 1006, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 3, 1007, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 3, 1008, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 3, 1009, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 3, 1010, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 3, 1011, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 3, 1012, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 3, 1013, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 3, 1014, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 3, 1015, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 3, 1016, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 3, 1017, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 3, 1018, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 3, 1019, 512 * c1 + c5); +; CHECK-NEXT: if (48 * c2 + c4 <= 126) { +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 4, 992, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 4, 993, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 4, 994, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 4, 995, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 4, 996, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 4, 997, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 4, 998, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 4, 999, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 4, 1000, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 4, 1001, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 4, 1002, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 4, 1003, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 4, 1004, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 4, 1005, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 4, 1006, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 4, 1007, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 4, 1008, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 4, 1009, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 4, 1010, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 4, 1011, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 4, 1012, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 4, 1013, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 4, 1014, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 4, 1015, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 4, 1016, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 4, 1017, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 4, 1018, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 4, 1019, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 5, 992, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 5, 993, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 5, 994, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 5, 995, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 5, 996, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 5, 997, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 5, 998, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 5, 999, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 5, 1000, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 5, 1001, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 5, 1002, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 5, 1003, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 5, 1004, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 5, 1005, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 5, 1006, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 5, 1007, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 5, 1008, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 5, 1009, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 5, 1010, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 5, 1011, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 5, 1012, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 5, 1013, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 5, 1014, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 5, 1015, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 5, 1016, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 5, 1017, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 5, 1018, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 5, 1019, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 6, 992, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 6, 993, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 6, 994, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 6, 995, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 6, 996, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 6, 997, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 6, 998, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 6, 999, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 6, 1000, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 6, 1001, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 6, 1002, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 6, 1003, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 6, 1004, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 6, 1005, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 6, 1006, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 6, 1007, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 6, 1008, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 6, 1009, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 6, 1010, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 6, 1011, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 6, 1012, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 6, 1013, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 6, 1014, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 6, 1015, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 6, 1016, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 6, 1017, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 6, 1018, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 6, 1019, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 7, 992, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 7, 993, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 7, 994, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 7, 995, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 7, 996, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 7, 997, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 7, 998, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 7, 999, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 7, 1000, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 7, 1001, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 7, 1002, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 7, 1003, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 7, 1004, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 7, 1005, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 7, 1006, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 7, 1007, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 7, 1008, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 7, 1009, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 7, 1010, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 7, 1011, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 7, 1012, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 7, 1013, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 7, 1014, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 7, 1015, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 7, 1016, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 7, 1017, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 7, 1018, 512 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4 + 7, 1019, 512 * c1 + c5); +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: } ; CHECK-NEXT: } +; CHECK-NEXT: } ; CHECK-NEXT: } -; CHECK-NEXT: } -; CHECK-NEXT: } ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" Index: test/ScheduleOptimizer/pattern-matching-based-opts_13.ll =================================================================== --- test/ScheduleOptimizer/pattern-matching-based-opts_13.ll +++ test/ScheduleOptimizer/pattern-matching-based-opts_13.ll @@ -11,70 +11,79 @@ ; Test whether isolation works as expected. ; ; CHECK: // Inter iteration alias-free -; CHECK-NEXT: // 1st level tiling - Tiles -; CHECK-NEXT: for (int c0 = 0; c0 <= 1; c0 += 1) -; CHECK-NEXT: for (int c1 = 0; c1 <= 6; c1 += 1) { -; CHECK-NEXT: for (int c3 = 1536 * c0; c3 <= min(1999, 1536 * c0 + 1535); c3 += 1) -; CHECK-NEXT: for (int c4 = 307 * c1; c4 <= min(1999, 307 * c1 + 306); c4 += 1) -; CHECK-NEXT: CopyStmt_0(0, c3, c4); -; CHECK-NEXT: for (int c2 = 0; c2 <= 24; c2 += 1) { -; CHECK-NEXT: if (c0 == 0) -; CHECK-NEXT: for (int c3 = 80 * c2; c3 <= 80 * c2 + 79; c3 += 1) -; CHECK-NEXT: for (int c5 = 307 * c1; c5 <= min(1999, 307 * c1 + 306); c5 += 1) -; CHECK-NEXT: CopyStmt_1(c3, 0, c5); -; CHECK-NEXT: // 1st level tiling - Points -; CHECK-NEXT: // Register tiling - Tiles -; CHECK-NEXT: { -; CHECK-NEXT: for (int c3 = 0; c3 <= min(255, -256 * c0 + 332); c3 += 1) -; CHECK-NEXT: for (int c4 = 0; c4 <= 15; c4 += 1) -; CHECK-NEXT: for (int c5 = 0; c5 <= min(306, -307 * c1 + 1999); c5 += 1) { -; CHECK-NEXT: // Loop Vectorizer Disabled -; CHECK-NEXT: // Register tiling - Points -; CHECK-NEXT: { -; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4, 1536 * c0 + 6 * c3, 307 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4, 1536 * c0 + 6 * c3 + 1, 307 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4, 1536 * c0 + 6 * c3 + 2, 307 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4, 1536 * c0 + 6 * c3 + 3, 307 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4, 1536 * c0 + 6 * c3 + 4, 307 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4, 1536 * c0 + 6 * c3 + 5, 307 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 1, 1536 * c0 + 6 * c3, 307 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 1, 1536 * c0 + 6 * c3 + 1, 307 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 1, 1536 * c0 + 6 * c3 + 2, 307 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 1, 1536 * c0 + 6 * c3 + 3, 307 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 1, 1536 * c0 + 6 * c3 + 4, 307 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 1, 1536 * c0 + 6 * c3 + 5, 307 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 2, 1536 * c0 + 6 * c3, 307 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 2, 1536 * c0 + 6 * c3 + 1, 307 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 2, 1536 * c0 + 6 * c3 + 2, 307 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 2, 1536 * c0 + 6 * c3 + 3, 307 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 2, 1536 * c0 + 6 * c3 + 4, 307 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 2, 1536 * c0 + 6 * c3 + 5, 307 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 3, 1536 * c0 + 6 * c3, 307 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 3, 1536 * c0 + 6 * c3 + 1, 307 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 3, 1536 * c0 + 6 * c3 + 2, 307 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 3, 1536 * c0 + 6 * c3 + 3, 307 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 3, 1536 * c0 + 6 * c3 + 4, 307 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 3, 1536 * c0 + 6 * c3 + 5, 307 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 4, 1536 * c0 + 6 * c3, 307 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 4, 1536 * c0 + 6 * c3 + 1, 307 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 4, 1536 * c0 + 6 * c3 + 2, 307 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 4, 1536 * c0 + 6 * c3 + 3, 307 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 4, 1536 * c0 + 6 * c3 + 4, 307 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 4, 1536 * c0 + 6 * c3 + 5, 307 * c1 + c5); -; CHECK-NEXT: } +; CHECK-NEXT: // 1st level tiling - Tiles +; CHECK-NEXT: for (int c0 = 0; c0 <= 1; c0 += 1) +; CHECK-NEXT: for (int c1 = 0; c1 <= 6; c1 += 1) { +; CHECK-NEXT: for (int c3 = 1536 * c0; c3 <= min(1999, 1536 * c0 + 1535); c3 += 1) +; CHECK-NEXT: for (int c4 = 307 * c1; c4 <= min(1999, 307 * c1 + 306); c4 += 1) +; CHECK-NEXT: CopyStmt_0(0, c3, c4); +; CHECK-NEXT: for (int c2 = 0; c2 <= 24; c2 += 1) { +; CHECK-NEXT: if (c0 == 0) +; CHECK-NEXT: for (int c3 = 80 * c2; c3 <= 80 * c2 + 79; c3 += 1) +; CHECK-NEXT: for (int c5 = 307 * c1; c5 <= min(1999, 307 * c1 + 306); c5 += 1) +; CHECK-NEXT: CopyStmt_1(c3, 0, c5); +; CHECK-NEXT: // 1st level tiling - Points +; CHECK-NEXT: // Register tiling - Tiles +; CHECK-NEXT: { +; CHECK-NEXT: for (int c3 = 0; c3 <= min(255, -256 * c0 + 332); c3 += 1) +; CHECK-NEXT: for (int c4 = 0; c4 <= 15; c4 += 1) +; CHECK-NEXT: for (int c5 = 0; c5 <= min(306, -307 * c1 + 1999); c5 += 1) { +; CHECK-NEXT: // Loop Vectorizer Disabled +; CHECK-NEXT: // Register tiling - Points +; CHECK-NEXT: { +; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4, 1536 * c0 + 6 * c3, 307 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4, 1536 * c0 + 6 * c3 + 1, 307 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4, 1536 * c0 + 6 * c3 + 2, 307 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4, 1536 * c0 + 6 * c3 + 3, 307 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4, 1536 * c0 + 6 * c3 + 4, 307 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4, 1536 * c0 + 6 * c3 + 5, 307 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 1, 1536 * c0 + 6 * c3, 307 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 1, 1536 * c0 + 6 * c3 + 1, 307 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 1, 1536 * c0 + 6 * c3 + 2, 307 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 1, 1536 * c0 + 6 * c3 + 3, 307 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 1, 1536 * c0 + 6 * c3 + 4, 307 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 1, 1536 * c0 + 6 * c3 + 5, 307 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 2, 1536 * c0 + 6 * c3, 307 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 2, 1536 * c0 + 6 * c3 + 1, 307 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 2, 1536 * c0 + 6 * c3 + 2, 307 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 2, 1536 * c0 + 6 * c3 + 3, 307 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 2, 1536 * c0 + 6 * c3 + 4, 307 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 2, 1536 * c0 + 6 * c3 + 5, 307 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 3, 1536 * c0 + 6 * c3, 307 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 3, 1536 * c0 + 6 * c3 + 1, 307 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 3, 1536 * c0 + 6 * c3 + 2, 307 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 3, 1536 * c0 + 6 * c3 + 3, 307 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 3, 1536 * c0 + 6 * c3 + 4, 307 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 3, 1536 * c0 + 6 * c3 + 5, 307 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 4, 1536 * c0 + 6 * c3, 307 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 4, 1536 * c0 + 6 * c3 + 1, 307 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 4, 1536 * c0 + 6 * c3 + 2, 307 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 4, 1536 * c0 + 6 * c3 + 3, 307 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 4, 1536 * c0 + 6 * c3 + 4, 307 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 4, 1536 * c0 + 6 * c3 + 5, 307 * c1 + c5); +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: if (c0 == 1) +; CHECK-NEXT: for (int c4 = 0; c4 <= 15; c4 += 1) +; CHECK-NEXT: for (int c5 = 0; c5 <= min(306, -307 * c1 + 1999); c5 += 1) { +; CHECK-NEXT: // Loop Vectorizer Disabled +; CHECK-NEXT: // Register tiling - Points +; CHECK-NEXT: { +; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4, 1998, 307 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4, 1999, 307 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 1, 1998, 307 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 1, 1999, 307 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 2, 1998, 307 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 2, 1999, 307 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 3, 1998, 307 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 3, 1999, 307 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 4, 1998, 307 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + 4, 1999, 307 * c1 + c5); +; CHECK-NEXT: } +; CHECK-NEXT: } ; CHECK-NEXT: } -; CHECK-NEXT: if (c0 == 1) -; CHECK-NEXT: for (int c4 = 0; c4 <= 15; c4 += 1) -; CHECK-NEXT: for (int c5 = 0; c5 <= min(306, -307 * c1 + 1999); c5 += 1) { -; CHECK-NEXT: // Loop Vectorizer Disabled -; CHECK-NEXT: // Register tiling - Points -; CHECK-NEXT: for (int c6 = 0; c6 <= 4; c6 += 1) -; CHECK-NEXT: for (int c7 = 0; c7 <= 1; c7 += 1) -; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4 + c6, c7 + 1998, 307 * c1 + c5); -; CHECK-NEXT: } -; CHECK-NEXT: } -; CHECK-NEXT: } -; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: } ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" Index: test/ScheduleOptimizer/pattern-matching-based-opts_5.ll =================================================================== --- test/ScheduleOptimizer/pattern-matching-based-opts_5.ll +++ test/ScheduleOptimizer/pattern-matching-based-opts_5.ll @@ -36,88 +36,267 @@ ; for (k = 0; k < _PB_NK; ++k) ; C[i][j] += A[i][k] * B[k][j]; ; -; CHECK: if (ni >= 1) { -; CHECK-NEXT: // Inter iteration alias-free -; CHECK-NEXT: // 1st level tiling - Tiles -; CHECK-NEXT: for (int c0 = 0; c0 <= floord(nj - 1, 2048); c0 += 1) -; CHECK-NEXT: for (int c1 = 0; c1 <= floord(nk - 1, 256); c1 += 1) { -; CHECK-NEXT: for (int c3 = 2048 * c0; c3 <= min(nj - 1, 2048 * c0 + 2047); c3 += 1) -; CHECK-NEXT: for (int c4 = 256 * c1; c4 <= min(nk - 1, 256 * c1 + 255); c4 += 1) -; CHECK-NEXT: CopyStmt_0(0, c3, c4); -; CHECK-NEXT: for (int c2 = 0; c2 <= floord(ni - 1, 96); c2 += 1) { -; CHECK-NEXT: if (c0 == 0) -; CHECK-NEXT: for (int c3 = 96 * c2; c3 <= min(ni - 1, 96 * c2 + 95); c3 += 1) -; CHECK-NEXT: for (int c5 = 256 * c1; c5 <= min(nk - 1, 256 * c1 + 255); c5 += 1) -; CHECK-NEXT: CopyStmt_1(c3, 0, c5); -; CHECK-NEXT: // 1st level tiling - Points -; CHECK-NEXT: // Register tiling - Tiles -; CHECK-NEXT: { -; CHECK-NEXT: if (ni >= 96 * c2 + 4) -; CHECK-NEXT: for (int c3 = 0; c3 <= min(255, -256 * c0 + nj / 8 - 1); c3 += 1) { -; CHECK-NEXT: for (int c4 = 0; c4 <= min(23, -24 * c2 + ni / 4 - 1); c4 += 1) -; CHECK-NEXT: for (int c5 = 0; c5 <= min(255, nk - 256 * c1 - 1); c5 += 1) { -; CHECK-NEXT: // Loop Vectorizer Disabled -; CHECK-NEXT: // Register tiling - Points -; CHECK-NEXT: { -; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4, 2048 * c0 + 8 * c3, 256 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4, 2048 * c0 + 8 * c3 + 1, 256 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4, 2048 * c0 + 8 * c3 + 2, 256 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4, 2048 * c0 + 8 * c3 + 3, 256 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4, 2048 * c0 + 8 * c3 + 4, 256 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4, 2048 * c0 + 8 * c3 + 5, 256 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4, 2048 * c0 + 8 * c3 + 6, 256 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4, 2048 * c0 + 8 * c3 + 7, 256 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 1, 2048 * c0 + 8 * c3, 256 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 1, 2048 * c0 + 8 * c3 + 1, 256 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 1, 2048 * c0 + 8 * c3 + 2, 256 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 1, 2048 * c0 + 8 * c3 + 3, 256 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 1, 2048 * c0 + 8 * c3 + 4, 256 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 1, 2048 * c0 + 8 * c3 + 5, 256 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 1, 2048 * c0 + 8 * c3 + 6, 256 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 1, 2048 * c0 + 8 * c3 + 7, 256 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 2, 2048 * c0 + 8 * c3, 256 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 2, 2048 * c0 + 8 * c3 + 1, 256 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 2, 2048 * c0 + 8 * c3 + 2, 256 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 2, 2048 * c0 + 8 * c3 + 3, 256 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 2, 2048 * c0 + 8 * c3 + 4, 256 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 2, 2048 * c0 + 8 * c3 + 5, 256 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 2, 2048 * c0 + 8 * c3 + 6, 256 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 2, 2048 * c0 + 8 * c3 + 7, 256 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 3, 2048 * c0 + 8 * c3, 256 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 3, 2048 * c0 + 8 * c3 + 1, 256 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 3, 2048 * c0 + 8 * c3 + 2, 256 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 3, 2048 * c0 + 8 * c3 + 3, 256 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 3, 2048 * c0 + 8 * c3 + 4, 256 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 3, 2048 * c0 + 8 * c3 + 5, 256 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 3, 2048 * c0 + 8 * c3 + 6, 256 * c1 + c5); -; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 3, 2048 * c0 + 8 * c3 + 7, 256 * c1 + c5); +; CHECK: if (ni >= 1) { +; CHECK-NEXT: // Inter iteration alias-free +; CHECK-NEXT: // 1st level tiling - Tiles +; CHECK-NEXT: for (int c0 = 0; c0 <= floord(nj - 1, 2048); c0 += 1) +; CHECK-NEXT: for (int c1 = 0; c1 <= floord(nk - 1, 256); c1 += 1) { +; CHECK-NEXT: for (int c3 = 2048 * c0; c3 <= min(nj - 1, 2048 * c0 + 2047); c3 += 1) +; CHECK-NEXT: for (int c4 = 256 * c1; c4 <= min(nk - 1, 256 * c1 + 255); c4 += 1) +; CHECK-NEXT: CopyStmt_0(0, c3, c4); +; CHECK-NEXT: for (int c2 = 0; c2 <= floord(ni - 1, 96); c2 += 1) { +; CHECK-NEXT: if (c0 == 0) +; CHECK-NEXT: for (int c3 = 96 * c2; c3 <= min(ni - 1, 96 * c2 + 95); c3 += 1) +; CHECK-NEXT: for (int c5 = 256 * c1; c5 <= min(nk - 1, 256 * c1 + 255); c5 += 1) +; CHECK-NEXT: CopyStmt_1(c3, 0, c5); +; CHECK-NEXT: // 1st level tiling - Points +; CHECK-NEXT: // Register tiling - Tiles +; CHECK-NEXT: { +; CHECK-NEXT: if (ni >= 96 * c2 + 4) +; CHECK-NEXT: for (int c3 = 0; c3 <= min(255, -256 * c0 + nj / 8 - 1); c3 += 1) { +; CHECK-NEXT: for (int c4 = 0; c4 <= min(23, -24 * c2 + ni / 4 - 1); c4 += 1) +; CHECK-NEXT: for (int c5 = 0; c5 <= min(255, nk - 256 * c1 - 1); c5 += 1) { +; CHECK-NEXT: // Loop Vectorizer Disabled +; CHECK-NEXT: // Register tiling - Points +; CHECK-NEXT: { +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4, 2048 * c0 + 8 * c3, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4, 2048 * c0 + 8 * c3 + 1, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4, 2048 * c0 + 8 * c3 + 2, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4, 2048 * c0 + 8 * c3 + 3, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4, 2048 * c0 + 8 * c3 + 4, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4, 2048 * c0 + 8 * c3 + 5, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4, 2048 * c0 + 8 * c3 + 6, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4, 2048 * c0 + 8 * c3 + 7, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 1, 2048 * c0 + 8 * c3, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 1, 2048 * c0 + 8 * c3 + 1, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 1, 2048 * c0 + 8 * c3 + 2, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 1, 2048 * c0 + 8 * c3 + 3, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 1, 2048 * c0 + 8 * c3 + 4, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 1, 2048 * c0 + 8 * c3 + 5, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 1, 2048 * c0 + 8 * c3 + 6, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 1, 2048 * c0 + 8 * c3 + 7, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 2, 2048 * c0 + 8 * c3, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 2, 2048 * c0 + 8 * c3 + 1, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 2, 2048 * c0 + 8 * c3 + 2, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 2, 2048 * c0 + 8 * c3 + 3, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 2, 2048 * c0 + 8 * c3 + 4, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 2, 2048 * c0 + 8 * c3 + 5, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 2, 2048 * c0 + 8 * c3 + 6, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 2, 2048 * c0 + 8 * c3 + 7, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 3, 2048 * c0 + 8 * c3, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 3, 2048 * c0 + 8 * c3 + 1, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 3, 2048 * c0 + 8 * c3 + 2, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 3, 2048 * c0 + 8 * c3 + 3, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 3, 2048 * c0 + 8 * c3 + 4, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 3, 2048 * c0 + 8 * c3 + 5, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 3, 2048 * c0 + 8 * c3 + 6, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 3, 2048 * c0 + 8 * c3 + 7, 256 * c1 + c5); +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: if (96 * c2 + 95 >= ni && ni % 4 >= 1) +; CHECK-NEXT: for (int c5 = 0; c5 <= min(255, nk - 256 * c1 - 1); c5 += 1) { +; CHECK-NEXT: // Loop Vectorizer Disabled +; CHECK-NEXT: // Register tiling - Points +; CHECK-NEXT: { +; CHECK-NEXT: Stmt_for_body6(-((ni + 4) % 4) + ni, 2048 * c0 + 8 * c3, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(-((ni + 4) % 4) + ni, 2048 * c0 + 8 * c3 + 1, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(-((ni + 4) % 4) + ni, 2048 * c0 + 8 * c3 + 2, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(-((ni + 4) % 4) + ni, 2048 * c0 + 8 * c3 + 3, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(-((ni + 4) % 4) + ni, 2048 * c0 + 8 * c3 + 4, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(-((ni + 4) % 4) + ni, 2048 * c0 + 8 * c3 + 5, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(-((ni + 4) % 4) + ni, 2048 * c0 + 8 * c3 + 6, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(-((ni + 4) % 4) + ni, 2048 * c0 + 8 * c3 + 7, 256 * c1 + c5); +; CHECK-NEXT: if (ni % 4 >= 2) { +; CHECK-NEXT: Stmt_for_body6(-((ni + 4) % 4) + ni + 1, 2048 * c0 + 8 * c3, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(-((ni + 4) % 4) + ni + 1, 2048 * c0 + 8 * c3 + 1, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(-((ni + 4) % 4) + ni + 1, 2048 * c0 + 8 * c3 + 2, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(-((ni + 4) % 4) + ni + 1, 2048 * c0 + 8 * c3 + 3, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(-((ni + 4) % 4) + ni + 1, 2048 * c0 + 8 * c3 + 4, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(-((ni + 4) % 4) + ni + 1, 2048 * c0 + 8 * c3 + 5, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(-((ni + 4) % 4) + ni + 1, 2048 * c0 + 8 * c3 + 6, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(-((ni + 4) % 4) + ni + 1, 2048 * c0 + 8 * c3 + 7, 256 * c1 + c5); +; CHECK-NEXT: if ((ni + 1) % 4 == 0) { +; CHECK-NEXT: Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 1, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 2, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 3, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 4, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 5, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 6, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 7, 256 * c1 + c5); +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: if (96 * c2 + 3 >= ni || (2048 * c0 + 2047 >= nj && nj % 8 >= 1)) { +; CHECK-NEXT: if (96 * c2 + 3 >= ni) { +; CHECK-NEXT: for (int c3 = 0; c3 <= min(255, -256 * c0 + (nj - 1) / 8); c3 += 1) +; CHECK-NEXT: for (int c5 = 0; c5 <= min(255, nk - 256 * c1 - 1); c5 += 1) { +; CHECK-NEXT: // Loop Vectorizer Disabled +; CHECK-NEXT: // Register tiling - Points +; CHECK-NEXT: { +; CHECK-NEXT: Stmt_for_body6(96 * c2, 2048 * c0 + 8 * c3, 256 * c1 + c5); +; CHECK-NEXT: if (nj >= 2048 * c0 + 8 * c3 + 2) { +; CHECK-NEXT: Stmt_for_body6(96 * c2, 2048 * c0 + 8 * c3 + 1, 256 * c1 + c5); +; CHECK-NEXT: if (nj >= 2048 * c0 + 8 * c3 + 3) { +; CHECK-NEXT: Stmt_for_body6(96 * c2, 2048 * c0 + 8 * c3 + 2, 256 * c1 + c5); +; CHECK-NEXT: if (nj >= 2048 * c0 + 8 * c3 + 4) { +; CHECK-NEXT: Stmt_for_body6(96 * c2, 2048 * c0 + 8 * c3 + 3, 256 * c1 + c5); +; CHECK-NEXT: if (nj >= 2048 * c0 + 8 * c3 + 5) { +; CHECK-NEXT: Stmt_for_body6(96 * c2, 2048 * c0 + 8 * c3 + 4, 256 * c1 + c5); +; CHECK-NEXT: if (nj >= 2048 * c0 + 8 * c3 + 6) { +; CHECK-NEXT: Stmt_for_body6(96 * c2, 2048 * c0 + 8 * c3 + 5, 256 * c1 + c5); +; CHECK-NEXT: if (nj >= 2048 * c0 + 8 * c3 + 7) { +; CHECK-NEXT: Stmt_for_body6(96 * c2, 2048 * c0 + 8 * c3 + 6, 256 * c1 + c5); +; CHECK-NEXT: if (nj >= 2048 * c0 + 8 * c3 + 8) +; CHECK-NEXT: Stmt_for_body6(96 * c2, 2048 * c0 + 8 * c3 + 7, 256 * c1 + c5); +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: if (ni >= 96 * c2 + 2) { +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 1, 2048 * c0 + 8 * c3, 256 * c1 + c5); +; CHECK-NEXT: if (nj >= 2048 * c0 + 8 * c3 + 2) { +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 1, 2048 * c0 + 8 * c3 + 1, 256 * c1 + c5); +; CHECK-NEXT: if (nj >= 2048 * c0 + 8 * c3 + 3) { +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 1, 2048 * c0 + 8 * c3 + 2, 256 * c1 + c5); +; CHECK-NEXT: if (nj >= 2048 * c0 + 8 * c3 + 4) { +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 1, 2048 * c0 + 8 * c3 + 3, 256 * c1 + c5); +; CHECK-NEXT: if (nj >= 2048 * c0 + 8 * c3 + 5) { +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 1, 2048 * c0 + 8 * c3 + 4, 256 * c1 + c5); +; CHECK-NEXT: if (nj >= 2048 * c0 + 8 * c3 + 6) { +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 1, 2048 * c0 + 8 * c3 + 5, 256 * c1 + c5); +; CHECK-NEXT: if (nj >= 2048 * c0 + 8 * c3 + 7) { +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 1, 2048 * c0 + 8 * c3 + 6, 256 * c1 + c5); +; CHECK-NEXT: if (nj >= 2048 * c0 + 8 * c3 + 8) +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 1, 2048 * c0 + 8 * c3 + 7, 256 * c1 + c5); +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: if (96 * c2 + 3 == ni) { +; CHECK-NEXT: Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3, 256 * c1 + c5); +; CHECK-NEXT: if (nj >= 2048 * c0 + 8 * c3 + 2) { +; CHECK-NEXT: Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 1, 256 * c1 + c5); +; CHECK-NEXT: if (nj >= 2048 * c0 + 8 * c3 + 3) { +; CHECK-NEXT: Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 2, 256 * c1 + c5); +; CHECK-NEXT: if (nj >= 2048 * c0 + 8 * c3 + 4) { +; CHECK-NEXT: Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 3, 256 * c1 + c5); +; CHECK-NEXT: if (nj >= 2048 * c0 + 8 * c3 + 5) { +; CHECK-NEXT: Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 4, 256 * c1 + c5); +; CHECK-NEXT: if (nj >= 2048 * c0 + 8 * c3 + 6) { +; CHECK-NEXT: Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 5, 256 * c1 + c5); +; CHECK-NEXT: if (nj >= 2048 * c0 + 8 * c3 + 7) { +; CHECK-NEXT: Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 6, 256 * c1 + c5); +; CHECK-NEXT: if (nj >= 2048 * c0 + 8 * c3 + 8) +; CHECK-NEXT: Stmt_for_body6(ni - 1, 2048 * c0 + 8 * c3 + 7, 256 * c1 + c5); +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: } else { +; CHECK-NEXT: for (int c4 = 0; c4 <= min(23, -24 * c2 + (ni - 1) / 4); c4 += 1) +; CHECK-NEXT: for (int c5 = 0; c5 <= min(255, nk - 256 * c1 - 1); c5 += 1) { +; CHECK-NEXT: // Loop Vectorizer Disabled +; CHECK-NEXT: // Register tiling - Points +; CHECK-NEXT: { +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4, -(nj % 8) + nj, 256 * c1 + c5); +; CHECK-NEXT: if (nj % 8 >= 2) { +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4, -(nj % 8) + nj + 1, 256 * c1 + c5); +; CHECK-NEXT: if (nj % 8 >= 3) { +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4, -(nj % 8) + nj + 2, 256 * c1 + c5); +; CHECK-NEXT: if (nj % 8 >= 4) { +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4, -(nj % 8) + nj + 3, 256 * c1 + c5); +; CHECK-NEXT: if (nj % 8 >= 5) { +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4, -(nj % 8) + nj + 4, 256 * c1 + c5); +; CHECK-NEXT: if (nj % 8 >= 6) { +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4, -(nj % 8) + nj + 5, 256 * c1 + c5); +; CHECK-NEXT: if ((nj + 1) % 8 == 0) +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4, nj - 1, 256 * c1 + c5); +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: if (ni >= 96 * c2 + 4 * c4 + 2) { +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 1, -(nj % 8) + nj, 256 * c1 + c5); +; CHECK-NEXT: if (nj % 8 >= 2) { +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 1, -(nj % 8) + nj + 1, 256 * c1 + c5); +; CHECK-NEXT: if (nj % 8 >= 3) { +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 1, -(nj % 8) + nj + 2, 256 * c1 + c5); +; CHECK-NEXT: if (nj % 8 >= 4) { +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 1, -(nj % 8) + nj + 3, 256 * c1 + c5); +; CHECK-NEXT: if (nj % 8 >= 5) { +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 1, -(nj % 8) + nj + 4, 256 * c1 + c5); +; CHECK-NEXT: if (nj % 8 >= 6) { +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 1, -(nj % 8) + nj + 5, 256 * c1 + c5); +; CHECK-NEXT: if ((nj + 1) % 8 == 0) +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 1, nj - 1, 256 * c1 + c5); +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: if (ni >= 96 * c2 + 4 * c4 + 3) { +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 2, -(nj % 8) + nj, 256 * c1 + c5); +; CHECK-NEXT: if (nj % 8 >= 2) { +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 2, -(nj % 8) + nj + 1, 256 * c1 + c5); +; CHECK-NEXT: if (nj % 8 >= 3) { +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 2, -(nj % 8) + nj + 2, 256 * c1 + c5); +; CHECK-NEXT: if (nj % 8 >= 4) { +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 2, -(nj % 8) + nj + 3, 256 * c1 + c5); +; CHECK-NEXT: if (nj % 8 >= 5) { +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 2, -(nj % 8) + nj + 4, 256 * c1 + c5); +; CHECK-NEXT: if (nj % 8 >= 6) { +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 2, -(nj % 8) + nj + 5, 256 * c1 + c5); +; CHECK-NEXT: if ((nj + 1) % 8 == 0) +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 2, nj - 1, 256 * c1 + c5); +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: if (ni >= 96 * c2 + 4 * c4 + 4) { +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 3, -(nj % 8) + nj, 256 * c1 + c5); +; CHECK-NEXT: if (nj % 8 >= 2) { +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 3, -(nj % 8) + nj + 1, 256 * c1 + c5); +; CHECK-NEXT: if (nj % 8 >= 3) { +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 3, -(nj % 8) + nj + 2, 256 * c1 + c5); +; CHECK-NEXT: if (nj % 8 >= 4) { +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 3, -(nj % 8) + nj + 3, 256 * c1 + c5); +; CHECK-NEXT: if (nj % 8 >= 5) { +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 3, -(nj % 8) + nj + 4, 256 * c1 + c5); +; CHECK-NEXT: if (nj % 8 >= 6) { +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 3, -(nj % 8) + nj + 5, 256 * c1 + c5); +; CHECK-NEXT: if ((nj + 1) % 8 == 0) +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 3, nj - 1, 256 * c1 + c5); +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: } +; CHECK-NEXT: } ; CHECK-NEXT: } ; CHECK-NEXT: } -; CHECK-NEXT: if (96 * c2 + 95 >= ni) -; CHECK-NEXT: for (int c5 = 0; c5 <= min(255, nk - 256 * c1 - 1); c5 += 1) { -; CHECK-NEXT: // Loop Vectorizer Disabled -; CHECK-NEXT: // Register tiling - Points -; CHECK-NEXT: for (int c6 = 0; c6 < ni % 4; c6 += 1) -; CHECK-NEXT: for (int c7 = 0; c7 <= 7; c7 += 1) -; CHECK-NEXT: Stmt_for_body6(-((ni + 4) % 4) + ni + c6, 2048 * c0 + 8 * c3 + c7, 256 * c1 + c5); -; CHECK-NEXT: } +; CHECK-NEXT: } ; CHECK-NEXT: } -; CHECK-NEXT: if (96 * c2 + 3 >= ni || (2048 * c0 + 2047 >= nj && nj % 8 >= 1)) -; CHECK-NEXT: for (int c3 = 0; c3 <= min(255, -256 * c0 + (nj - 1) / 8); c3 += 1) -; CHECK-NEXT: if (96 * c2 + 3 >= ni || 2048 * c0 + 8 * c3 + 7 >= nj) -; CHECK-NEXT: for (int c4 = 0; c4 <= min(23, -24 * c2 + (ni - 1) / 4); c4 += 1) -; CHECK-NEXT: if ((ni >= 96 * c2 + 4 && 2048 * c0 + 8 * c3 + 7 >= nj) || 1) -; CHECK-NEXT: for (int c5 = 0; c5 <= min(255, nk - 256 * c1 - 1); c5 += 1) { -; CHECK-NEXT: // Loop Vectorizer Disabled -; CHECK-NEXT: // Register tiling - Points -; CHECK-NEXT: for (int c6 = 0; c6 <= min(3, ni - 96 * c2 - 4 * c4 - 1); c6 += 1) -; CHECK-NEXT: for (int c7 = 0; c7 <= min(7, nj - 2048 * c0 - 8 * c3 - 1); c7 += 1) -; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + c6, 2048 * c0 + 8 * c3 + c7, 256 * c1 + c5); -; CHECK-NEXT: } -; CHECK-NEXT: } +; CHECK-NEXT: } ; CHECK-NEXT: } -; CHECK-NEXT: } -; CHECK-NEXT: } ; ; AUTO-VECTORIZATION: fmul <4 x double> Index: test/ScheduleOptimizer/pattern-matching-based-opts_6.ll =================================================================== --- test/ScheduleOptimizer/pattern-matching-based-opts_6.ll +++ test/ScheduleOptimizer/pattern-matching-based-opts_6.ll @@ -90,17 +90,32 @@ ; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 3, 8 * c3 + 7, 256 * c1 + c5); ; CHECK-NEXT: } ; CHECK-NEXT: } -; CHECK-NEXT: for (int c4 = 0; c4 <= min(23, -24 * c2 + 254); c4 += 1) -; CHECK-NEXT: for (int c5 = 0; c5 <= min(255, -256 * c1 + 1019); c5 += 1) { -; CHECK-NEXT: // Loop Vectorizer Disabled -; CHECK-NEXT: // Register tiling - Points -; CHECK-NEXT: for (int c6 = 0; c6 <= 3; c6 += 1) -; CHECK-NEXT: for (int c7 = 0; c7 <= 3; c7 += 1) -; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + c6, c7 + 1016, 256 * c1 + c5); +; CHECK-NEXT: for (int c4 = 0; c4 <= min(23, -24 * c2 + 254); c4 += 1) +; CHECK-NEXT: for (int c5 = 0; c5 <= min(255, -256 * c1 + 1019); c5 += 1) { +; CHECK-NEXT: // Loop Vectorizer Disabled +; CHECK-NEXT: // Register tiling - Points +; CHECK-NEXT: { +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4, 1016, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4, 1017, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4, 1018, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4, 1019, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 1, 1016, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 1, 1017, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 1, 1018, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 1, 1019, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 2, 1016, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 2, 1017, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 2, 1018, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 2, 1019, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 3, 1016, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 3, 1017, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 3, 1018, 256 * c1 + c5); +; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4 + 3, 1019, 256 * c1 + c5); +; CHECK-NEXT: } +; CHECK-NEXT: } ; CHECK-NEXT: } +; CHECK-NEXT: } ; CHECK-NEXT: } -; CHECK-NEXT: } -; CHECK-NEXT: } ; ; AUTO-VECTORIZATION: fmul <4 x double> ; AUTO-VECTORIZATION: fadd <4 x double>