Index: polly/trunk/include/polly/CodeGen/IRBuilder.h =================================================================== --- polly/trunk/include/polly/CodeGen/IRBuilder.h +++ polly/trunk/include/polly/CodeGen/IRBuilder.h @@ -59,8 +59,8 @@ void annotate(llvm::Instruction *I); /// Annotate the loop latch @p B wrt. @p L. - void annotateLoopLatch(llvm::BranchInst *B, llvm::Loop *L, - bool IsParallel) const; + void annotateLoopLatch(llvm::BranchInst *B, llvm::Loop *L, bool IsParallel, + bool IsLoopVectorizerDisabled) const; /// Add alternative alias based pointers /// Index: polly/trunk/include/polly/CodeGen/LoopGenerators.h =================================================================== --- polly/trunk/include/polly/CodeGen/LoopGenerators.h +++ polly/trunk/include/polly/CodeGen/LoopGenerators.h @@ -31,29 +31,36 @@ /// Create a scalar do/for-style loop. /// -/// @param LowerBound The starting value of the induction variable. -/// @param UpperBound The upper bound of the induction variable. -/// @param Stride The value by which the induction variable is incremented. -/// -/// @param Builder The builder used to create the loop. -/// @param P A pointer to the pass that uses this function. It is used -/// to update analysis information. -/// @param LI The loop info for the current function -/// @param DT The dominator tree we need to update -/// @param ExitBlock The block the loop will exit to. -/// @param Predicate The predicate used to generate the upper loop bound. -/// @param Annotator This function can (optionally) take a ScopAnnotator which -/// annotates loops and alias information in the SCoP. -/// @param Parallel If this loop should be marked parallel in the Annotator. -/// @param UseGuard Create a guard in front of the header to check if the -/// loop is executed at least once, otherwise just assume it. +/// @param LowerBound The starting value of the induction variable. +/// @param UpperBound The upper bound of the induction variable. +/// @param Stride The value by which the induction variable +/// is incremented. +/// +/// @param Builder The builder used to create the loop. +/// @param P A pointer to the pass that uses this function. +/// It is used to update analysis information. +/// @param LI The loop info for the current function +/// @param DT The dominator tree we need to update +/// @param ExitBlock The block the loop will exit to. +/// @param Predicate The predicate used to generate the upper loop +/// bound. +/// @param Annotator This function can (optionally) take +/// a ScopAnnotator which +/// annotates loops and alias information in the SCoP. +/// @param Parallel If this loop should be marked parallel in +/// the Annotator. +/// @param UseGuard Create a guard in front of the header to check if +/// the loop is executed at least once, otherwise just +/// assume it. +/// @param LoopVectDisabled If the Loop vectorizer should be disabled for this +/// loop. /// /// @return Value* The newly created induction variable for this loop. Value *createLoop(Value *LowerBound, Value *UpperBound, Value *Stride, PollyIRBuilder &Builder, LoopInfo &LI, DominatorTree &DT, BasicBlock *&ExitBlock, ICmpInst::Predicate Predicate, ScopAnnotator *Annotator = NULL, bool Parallel = false, - bool UseGuard = true); + bool UseGuard = true, bool LoopVectDisabled = false); /// The ParallelLoopGenerator allows to create parallelized loops /// Index: polly/trunk/lib/CodeGen/IRBuilder.cpp =================================================================== --- polly/trunk/lib/CodeGen/IRBuilder.cpp +++ polly/trunk/lib/CodeGen/IRBuilder.cpp @@ -114,15 +114,27 @@ ParallelLoops.pop_back(); } -void ScopAnnotator::annotateLoopLatch(BranchInst *B, Loop *L, - bool IsParallel) const { - if (!IsParallel) - return; +void ScopAnnotator::annotateLoopLatch(BranchInst *B, Loop *L, bool IsParallel, + bool IsLoopVectorizerDisabled) const { + MDNode *MData = nullptr; + + if (IsLoopVectorizerDisabled) { + SmallVector Args; + LLVMContext &Ctx = SE->getContext(); + Args.push_back(MDString::get(Ctx, "llvm.loop.vectorize.enable")); + auto *FalseValue = ConstantInt::get(Type::getInt1Ty(Ctx), 0); + Args.push_back(ValueAsMetadata::get(FalseValue)); + MData = MDNode::concatenate(MData, getID(Ctx, MDNode::get(Ctx, Args))); + } + + if (IsParallel) { + assert(!ParallelLoops.empty() && "Expected a parallel loop to annotate"); + MDNode *Ids = ParallelLoops.back(); + MDNode *Id = cast(Ids->getOperand(Ids->getNumOperands() - 1)); + MData = MDNode::concatenate(MData, Id); + } - assert(!ParallelLoops.empty() && "Expected a parallel loop to annotate"); - MDNode *Ids = ParallelLoops.back(); - MDNode *Id = cast(Ids->getOperand(Ids->getNumOperands() - 1)); - B->setMetadata("llvm.loop", Id); + B->setMetadata("llvm.loop", MData); } /// Get the pointer operand Index: polly/trunk/lib/CodeGen/IslNodeBuilder.cpp =================================================================== --- polly/trunk/lib/CodeGen/IslNodeBuilder.cpp +++ polly/trunk/lib/CodeGen/IslNodeBuilder.cpp @@ -482,6 +482,27 @@ isl_ast_expr_free(Iterator); } +/// Restore the initial ordering of dimensions of the band node +/// +/// In case the band node represents all the dimensions of the iteration +/// domain, recreate the band node to restore the initial ordering of the +/// dimensions. +/// +/// @param Node The band node to be modified. +/// @return The modified schedule node. +namespace { +bool IsLoopVectorizerDisabled(isl::ast_node Node) { + assert(isl_ast_node_get_type(Node.keep()) == isl_ast_node_for); + auto Body = Node.for_get_body(); + if (isl_ast_node_get_type(Body.keep()) != isl_ast_node_mark) + return false; + auto Id = Body.mark_get_id(); + if (!strcmp(Id.get_name().c_str(), "Loop Vectorizer Disabled")) + return true; + return false; +} +} // namespace + void IslNodeBuilder::createForSequential(__isl_take isl_ast_node *For, bool KnownParallel) { isl_ast_node *Body; @@ -497,6 +518,9 @@ Parallel = KnownParallel || (IslAstInfo::isParallel(For) && !IslAstInfo::isReductionParallel(For)); + bool LoopVectorizerDisabled = + IsLoopVectorizerDisabled(isl::manage(isl_ast_node_copy(For))); + Body = isl_ast_node_for_get_body(For); // isl_ast_node_for_is_degenerate(For) @@ -532,7 +556,8 @@ bool UseGuardBB = !SE.isKnownPredicate(Predicate, SE.getSCEV(ValueLB), SE.getSCEV(ValueUB)); IV = createLoop(ValueLB, ValueUB, ValueInc, Builder, LI, DT, ExitBlock, - Predicate, &Annotator, Parallel, UseGuardBB); + Predicate, &Annotator, Parallel, UseGuardBB, + LoopVectorizerDisabled); IDToValue[IteratorID] = IV; create(Body); Index: polly/trunk/lib/CodeGen/LoopGenerators.cpp =================================================================== --- polly/trunk/lib/CodeGen/LoopGenerators.cpp +++ polly/trunk/lib/CodeGen/LoopGenerators.cpp @@ -56,8 +56,8 @@ PollyIRBuilder &Builder, LoopInfo &LI, DominatorTree &DT, BasicBlock *&ExitBB, ICmpInst::Predicate Predicate, - ScopAnnotator *Annotator, bool Parallel, - bool UseGuard) { + ScopAnnotator *Annotator, bool Parallel, bool UseGuard, + bool LoopVectDisabled) { Function *F = Builder.GetInsertBlock()->getParent(); LLVMContext &Context = F->getContext(); @@ -132,7 +132,7 @@ // Create the loop latch and annotate it as such. BranchInst *B = Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB); if (Annotator) - Annotator->annotateLoopLatch(B, NewLoop, Parallel); + Annotator->annotateLoopLatch(B, NewLoop, Parallel, LoopVectDisabled); IV->addIncoming(IncrementedIV, HeaderBB); if (GuardBB) Index: polly/trunk/lib/Transform/ScheduleOptimizer.cpp =================================================================== --- polly/trunk/lib/Transform/ScheduleOptimizer.cpp +++ polly/trunk/lib/Transform/ScheduleOptimizer.cpp @@ -993,7 +993,7 @@ // Create a copy statement that corresponds to the memory access to the // matrix B, the second operand of the matrix multiplication. - Node = Node.parent().parent().parent().parent().parent(); + Node = Node.parent().parent().parent().parent().parent().parent(); Node = isl::manage(isl_schedule_node_band_split(Node.release(), 2)).child(0); auto AccRel = getMatMulAccRel(isl::manage(MapOldIndVar.copy()), 3, 7); unsigned FirstDimSize = MacroParams.Nc / MicroParams.Nr; @@ -1046,7 +1046,7 @@ ExtMap = ExtMap.intersect_range(Domain); ExtMap = ExtMap.set_tuple_id(isl::dim::out, NewStmt->getDomainId()); Node = createExtensionNode(Node, ExtMap); - return Node.child(0).child(0).child(0).child(0); + return Node.child(0).child(0).child(0).child(0).child(0); } /// Get a relation mapping induction variables produced by schedule @@ -1106,11 +1106,11 @@ isl::union_set Options = IsolateOption.unite(AtomicOption); Options = Options.unite(getUnrollIsolatedSetOptions(Ctx)); Node = Node.band_set_ast_build_options(Options); - Node = Node.parent().parent(); + Node = Node.parent().parent().parent(); IsolateOption = getIsolateOptions(Prefix, 3); Options = IsolateOption.unite(AtomicOption); Node = Node.band_set_ast_build_options(Options); - Node = Node.child(0).child(0); + Node = Node.child(0).child(0).child(0); return Node; } @@ -1129,6 +1129,15 @@ return Node.insert_mark(Id).child(0); } +/// Insert "Loop Vectorizer Disabled" mark node. +/// +/// @param Node The child of the mark node to be inserted. +/// @return The modified isl_schedule_node. +static isl::schedule_node markLoopVectorizerDisabled(isl::schedule_node Node) { + auto Id = isl::id::alloc(Node.get_ctx(), "Loop Vectorizer Disabled", nullptr); + return Node.insert_mark(Id).child(0); +} + /// Restore the initial ordering of dimensions of the band node /// /// In case the band node represents all the dimensions of the iteration @@ -1187,6 +1196,7 @@ MacroKernelParams); if (!MapOldIndVar) return Node; + Node = markLoopVectorizerDisabled(Node.parent()).child(0); Node = isolateAndUnrollMatMulInnerLoops(Node, MicroKernelParams); return optimizeDataLayoutMatrMulPattern(Node, MapOldIndVar, MicroKernelParams, MacroKernelParams, MMI); Index: polly/trunk/test/ScheduleOptimizer/mat_mul_pattern_data_layout_2.ll =================================================================== --- polly/trunk/test/ScheduleOptimizer/mat_mul_pattern_data_layout_2.ll +++ polly/trunk/test/ScheduleOptimizer/mat_mul_pattern_data_layout_2.ll @@ -42,6 +42,7 @@ ; CHECK-NEXT: for (int c3 = 0; c3 <= 131; c3 += 1) ; CHECK-NEXT: for (int c4 = 0; c4 <= 23; c4 += 1) ; CHECK-NEXT: for (int c5 = 0; c5 <= min(255, -256 * c1 + 1022); c5 += 1) { +; CHECK-NEXT: // Loop Vectorizer Disabled ; CHECK-NEXT: // Register tiling - Points ; CHECK-NEXT: { ; CHECK-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4, 8 * c3, 256 * c1 + c5); Index: polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_12.ll =================================================================== --- polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_12.ll +++ polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_12.ll @@ -26,6 +26,7 @@ ; CHECK-NEXT: for (int c3 = 0; c3 <= 30; c3 += 1) { ; CHECK-NEXT: for (int c4 = 0; c4 <= min(47, -48 * c2 + 126); c4 += 1) ; CHECK-NEXT: for (int c5 = 0; c5 <= min(511, -512 * c1 + 1019); c5 += 1) { +; CHECK-NEXT: // Loop Vectorizer Disabled ; CHECK-NEXT: // Register tiling - Points ; CHECK-NEXT: { ; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4, 32 * c3, 512 * c1 + c5); @@ -288,6 +289,7 @@ ; CHECK-NEXT: } ; CHECK-NEXT: if (c2 == 2) ; CHECK-NEXT: for (int c5 = 0; c5 <= min(511, -512 * c1 + 1019); c5 += 1) { +; CHECK-NEXT: // Loop Vectorizer Disabled ; CHECK-NEXT: // Register tiling - Points ; CHECK-NEXT: for (int c6 = 0; c6 <= 3; c6 += 1) ; CHECK-NEXT: for (int c7 = 0; c7 <= 31; c7 += 1) @@ -296,6 +298,7 @@ ; CHECK-NEXT: } ; CHECK-NEXT: for (int c4 = 0; c4 <= min(47, -48 * c2 + 127); c4 += 1) ; CHECK-NEXT: for (int c5 = 0; c5 <= min(511, -512 * c1 + 1019); c5 += 1) { +; CHECK-NEXT: // Loop Vectorizer Disabled ; CHECK-NEXT: // Register tiling - Points ; CHECK-NEXT: for (int c6 = 0; c6 <= min(7, -384 * c2 - 8 * c4 + 1019); c6 += 1) ; CHECK-NEXT: for (int c7 = 0; c7 <= 27; c7 += 1) Index: polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_13.ll =================================================================== --- polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_13.ll +++ polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_13.ll @@ -28,6 +28,7 @@ ; CHECK-NEXT: for (int c3 = 0; c3 <= min(255, -256 * c0 + 332); c3 += 1) ; CHECK-NEXT: for (int c4 = 0; c4 <= 15; c4 += 1) ; CHECK-NEXT: for (int c5 = 0; c5 <= min(306, -307 * c1 + 1999); c5 += 1) { +; CHECK-NEXT: // Loop Vectorizer Disabled ; CHECK-NEXT: // Register tiling - Points ; CHECK-NEXT: { ; CHECK-NEXT: Stmt_for_body6(80 * c2 + 5 * c4, 1536 * c0 + 6 * c3, 307 * c1 + c5); @@ -65,6 +66,7 @@ ; CHECK-NEXT: if (c0 == 1) ; CHECK-NEXT: for (int c4 = 0; c4 <= 15; c4 += 1) ; CHECK-NEXT: for (int c5 = 0; c5 <= min(306, -307 * c1 + 1999); c5 += 1) { +; CHECK-NEXT: // Loop Vectorizer Disabled ; CHECK-NEXT: // Register tiling - Points ; CHECK-NEXT: for (int c6 = 0; c6 <= 4; c6 += 1) ; CHECK-NEXT: for (int c7 = 0; c7 <= 1; c7 += 1) Index: polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_14.ll =================================================================== --- polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_14.ll +++ polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_14.ll @@ -12,7 +12,10 @@ ; Check that we do not create different alias sets for locations represented by ; different raw pointers. ; +; Also check that we disable the Loop Vectorizer. +; ; CHECK-NOT: !76 = distinct !{!76, !5, !"second level alias metadata"} +; CHECK: !{!"llvm.loop.vectorize.enable", i1 false} ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-unknown" Index: polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_3.ll =================================================================== --- polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_3.ll +++ polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_3.ll @@ -100,6 +100,7 @@ ; EXTRACTION-OF-MACRO-KERNEL-NEXT: for (int c3 = 0; c3 <= 131; c3 += 1) ; EXTRACTION-OF-MACRO-KERNEL-NEXT: for (int c4 = 0; c4 <= 23; c4 += 1) ; EXTRACTION-OF-MACRO-KERNEL-NEXT: for (int c5 = 0; c5 <= 255; c5 += 1) { +; EXTRACTION-OF-MACRO-KERNEL-NEXT: // Loop Vectorizer Disabled ; EXTRACTION-OF-MACRO-KERNEL-NEXT: // Register tiling - Points ; EXTRACTION-OF-MACRO-KERNEL-NEXT: { ; EXTRACTION-OF-MACRO-KERNEL-NEXT: Stmt_Copy_0(96 * c2 + 4 * c4, 8 * c3, 256 * c1 + c5); Index: polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_4.ll =================================================================== --- polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_4.ll +++ polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_4.ll @@ -36,6 +36,7 @@ ; PATTERN-MATCHING-OPTS-NEXT: for (int c3 = 0; c3 <= 127; c3 += 1) ; PATTERN-MATCHING-OPTS-NEXT: for (int c4 = 0; c4 <= min(23, -24 * c2 + 255); c4 += 1) ; PATTERN-MATCHING-OPTS-NEXT: for (int c5 = 0; c5 <= 255; c5 += 1) { +; PATTERN-MATCHING-OPTS-NEXT: // Loop Vectorizer Disabled ; PATTERN-MATCHING-OPTS-NEXT: // Register tiling - Points ; PATTERN-MATCHING-OPTS-NEXT: { ; PATTERN-MATCHING-OPTS-NEXT: Stmt_for_body6(96 * c2 + 4 * c4, 256 * c1 + c5, 8 * c3); Index: polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_5.ll =================================================================== --- polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_5.ll +++ polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_5.ll @@ -56,6 +56,7 @@ ; CHECK-NEXT: for (int c3 = 0; c3 <= min(255, -256 * c0 + nj / 8 - 1); c3 += 1) { ; CHECK-NEXT: for (int c4 = 0; c4 <= min(23, -24 * c2 + ni / 4 - 1); c4 += 1) ; CHECK-NEXT: for (int c5 = 0; c5 <= min(255, nk - 256 * c1 - 1); c5 += 1) { +; CHECK-NEXT: // Loop Vectorizer Disabled ; CHECK-NEXT: // Register tiling - Points ; CHECK-NEXT: { ; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4, 2048 * c0 + 8 * c3, 256 * c1 + c5); @@ -94,6 +95,7 @@ ; CHECK-NEXT: } ; CHECK-NEXT: if (96 * c2 + 95 >= ni) ; CHECK-NEXT: for (int c5 = 0; c5 <= min(255, nk - 256 * c1 - 1); c5 += 1) { +; CHECK-NEXT: // Loop Vectorizer Disabled ; CHECK-NEXT: // Register tiling - Points ; CHECK-NEXT: for (int c6 = 0; c6 < ni % 4; c6 += 1) ; CHECK-NEXT: for (int c7 = 0; c7 <= 7; c7 += 1) @@ -106,6 +108,7 @@ ; CHECK-NEXT: for (int c4 = 0; c4 <= min(23, -24 * c2 + (ni - 1) / 4); c4 += 1) ; CHECK-NEXT: if ((ni >= 96 * c2 + 4 && 2048 * c0 + 8 * c3 + 7 >= nj) || 1) ; CHECK-NEXT: for (int c5 = 0; c5 <= min(255, nk - 256 * c1 - 1); c5 += 1) { +; CHECK-NEXT: // Loop Vectorizer Disabled ; CHECK-NEXT: // Register tiling - Points ; CHECK-NEXT: for (int c6 = 0; c6 <= min(3, ni - 96 * c2 - 4 * c4 - 1); c6 += 1) ; CHECK-NEXT: for (int c7 = 0; c7 <= min(7, nj - 2048 * c0 - 8 * c3 - 1); c7 += 1) Index: polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_6.ll =================================================================== --- polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_6.ll +++ polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_6.ll @@ -53,6 +53,7 @@ ; CHECK-NEXT: for (int c3 = 0; c3 <= 126; c3 += 1) ; CHECK-NEXT: for (int c4 = 0; c4 <= min(23, -24 * c2 + 254); c4 += 1) ; CHECK-NEXT: for (int c5 = 0; c5 <= min(255, -256 * c1 + 1019); c5 += 1) { +; CHECK-NEXT: // Loop Vectorizer Disabled ; CHECK-NEXT: // Register tiling - Points ; CHECK-NEXT: { ; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4, 8 * c3, 256 * c1 + c5); @@ -91,6 +92,7 @@ ; CHECK-NEXT: } ; CHECK-NEXT: for (int c4 = 0; c4 <= min(23, -24 * c2 + 254); c4 += 1) ; CHECK-NEXT: for (int c5 = 0; c5 <= min(255, -256 * c1 + 1019); c5 += 1) { +; CHECK-NEXT: // Loop Vectorizer Disabled ; CHECK-NEXT: // Register tiling - Points ; CHECK-NEXT: for (int c6 = 0; c6 <= 3; c6 += 1) ; CHECK-NEXT: for (int c7 = 0; c7 <= 3; c7 += 1) Index: polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_7.ll =================================================================== --- polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_7.ll +++ polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_7.ll @@ -36,6 +36,7 @@ ; CHECK-NEXT: for (int c3 = 0; c3 <= 127; c3 += 1) ; CHECK-NEXT: for (int c4 = 0; c4 <= 15; c4 += 1) ; CHECK-NEXT: for (int c5 = 0; c5 <= min(383, -384 * c1 + 1023); c5 += 1) { +; CHECK-NEXT: // Loop Vectorizer Disabled ; CHECK-NEXT: // Register tiling - Points ; CHECK-NEXT: { ; CHECK-NEXT: Stmt_for_body6(128 * c2 + 8 * c4, 8 * c3, 384 * c1 + c5); Index: polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_8.ll =================================================================== --- polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_8.ll +++ polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_8.ll @@ -37,6 +37,7 @@ ; CHECK-NEXT: for (int c3 = 0; c3 <= 127; c3 += 1) ; CHECK-NEXT: for (int c4 = 0; c4 <= min(23, -24 * c2 + 255); c4 += 1) ; CHECK-NEXT: for (int c5 = 0; c5 <= 255; c5 += 1) { +; CHECK-NEXT: // Loop Vectorizer Disabled ; CHECK-NEXT: // Register tiling - Points ; CHECK-NEXT: { ; CHECK-NEXT: Stmt_for_body6(96 * c2 + 4 * c4, 8 * c3, 256 * c1 + c5); Index: polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_9.ll =================================================================== --- polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_9.ll +++ polly/trunk/test/ScheduleOptimizer/pattern-matching-based-opts_9.ll @@ -43,6 +43,7 @@ ; CHECK-NEXT: for (int c3 = 0; c3 <= 31; c3 += 1) ; CHECK-NEXT: for (int c4 = 0; c4 <= min(47, -48 * c2 + 127); c4 += 1) ; CHECK-NEXT: for (int c5 = 0; c5 <= 511; c5 += 1) { +; CHECK-NEXT: // Loop Vectorizer Disabled ; CHECK-NEXT: // Register tiling - Points ; CHECK-NEXT: { ; CHECK-NEXT: Stmt_for_body6(384 * c2 + 8 * c4, 32 * c3, 512 * c1 + c5);