Index: include/polly/CodeGen/IslAst.h =================================================================== --- include/polly/CodeGen/IslAst.h +++ include/polly/CodeGen/IslAst.h @@ -24,6 +24,7 @@ #include "polly/Config/config.h" #include "polly/ScopPass.h" +#include "llvm/ADT/SetVector.h" #include "isl/ast.h" @@ -46,6 +47,11 @@ public: using MemoryAccessSet = SmallPtrSet; + // Use a SetVector for the loop set to ensure deterministic output when + // iterating over the loops in the set. + using LoopSet = SetVector, + SmallPtrSet >; + /// @brief Payload information used to annotate an AST node. struct IslAstUserPayload { /// @brief Construct and initialize the payload. @@ -74,6 +80,9 @@ /// @brief Set of accesses which break reduction dependences. MemoryAccessSet BrokenReductions; + + /// @brief Set of loops associated with the node. + LoopSet Loops; }; private: @@ -129,6 +138,19 @@ /// @brief Get the nodes broken reductions or a nullptr if not available. static MemoryAccessSet *getBrokenReductions(__isl_keep isl_ast_node *Node); + /// @brief Get the original loops associated with the new node or a nullptr + /// if not available. + /// + /// This routine attempts to map Loop*'s to isl_ast nodes. The mapping + /// is done based on the dimension that moves in the innermost isl for loop. + /// Each moving dimension in the new innermost loop is mapped back to a (set + /// of) llvm loops that correspond to that original dimension. + static LoopSet *getLoops(__isl_keep isl_ast_node *Node); + + /// @brief If getLoops() would return exactly one loop, return that loop. + /// Otherwise return nullptr. + static const Loop *getUniqueLoop(__isl_keep isl_ast_node *Node); + /// @brief Get the nodes build context or a nullptr if not available. static __isl_give isl_ast_build *getBuild(__isl_keep isl_ast_node *Node); Index: lib/CodeGen/IslAst.cpp =================================================================== --- lib/CodeGen/IslAst.cpp +++ lib/CodeGen/IslAst.cpp @@ -25,7 +25,9 @@ #include "polly/LinkAllPasses.h" #include "polly/Options.h" #include "polly/ScopInfo.h" +#include "llvm/ADT/SmallString.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "isl/union_map.h" #include "isl/list.h" @@ -136,6 +138,18 @@ return str; } +/// @brief Return a pragma string for annotating an isl ast with a llvm loop. +static std::string getLLVMLoopPragmaStr(const Loop *L) { + SmallString<128> Buf; + raw_svector_ostream OS(Buf); + OS << "#llvm loop(" << L->getHeader()->getName(); + if (BasicBlock *Latch = L->getLoopLatch()) + OS << ", " << Latch->getName(); + OS << ")"; + OS << " depth(" << L->getLoopDepth() << ")"; + return OS.str(); +} + /// @brief Callback executed for each for node in the ast in order to print it. static isl_printer *cbPrintFor(__isl_take isl_printer *Printer, __isl_take isl_ast_print_options *Options, @@ -145,6 +159,10 @@ const std::string SimdPragmaStr = "#pragma simd"; const std::string OmpPragmaStr = "#pragma omp parallel for"; + if (IslAstInfo::LoopSet *Loops = IslAstInfo::getLoops(Node)) + for (const Loop *L : *Loops) + Printer = printLine(Printer, getLLVMLoopPragmaStr(L)); + if (IslAstInfo::isInnermostParallel(Node)) Printer = printLine(Printer, SimdPragmaStr + BrokenReductionsStr); @@ -197,6 +215,83 @@ return true; } +/// @brief Collect loop annotations from the orignal loops surrounding @p Body. +/// +/// We will look at all statements in @p Body and all loops formerly surrounding +/// those statements and aggregate their loop annotations if they are invovled +/// in the __new__ innermost dimension. +static void collectLoopAnnotations(__isl_take isl_ast_node *Body, + IslAstUserPayload *Payload) { + // Recurce for block and conditional statements but extract the annotations + // once a user ast node was found. + switch (isl_ast_node_get_type(Body)) { + case isl_ast_node_block: { + isl_ast_node_list *List = isl_ast_node_block_get_children(Body); + for (int i = 0; i < isl_ast_node_list_n_ast_node(List); ++i) + collectLoopAnnotations(isl_ast_node_list_get_ast_node(List, i), Payload); + isl_ast_node_list_free(List); + break; + } + case isl_ast_node_if: { + collectLoopAnnotations(isl_ast_node_if_get_then(Body), Payload); + if (isl_ast_node_if_has_else(Body)) + collectLoopAnnotations(isl_ast_node_if_get_else(Body), Payload); + break; + } + case isl_ast_node_user: { + isl_ast_expr *Expr, *UserExpr; + isl_pw_multi_aff *ScatPMA; + isl_pw_aff *ScatPA; + isl_id *Id; + + UserExpr = isl_ast_node_user_get_expr(Body); + Expr = isl_ast_expr_get_op_arg(UserExpr, 0); + Id = isl_ast_expr_get_id(Expr); + + ScopStmt *Stmt = (ScopStmt *)isl_id_get_user(Id); + assert(Stmt->getNumIterators() && "Unexpected scattering found"); + + // Find the highest/innermost dimension which is not constant. + ScatPMA = isl_pw_multi_aff_from_map(Stmt->getScattering()); + unsigned pos = isl_pw_multi_aff_dim(ScatPMA, isl_dim_out); + assert(pos && "Unexpected scattering found"); + + ScatPA = nullptr; + do { + isl_pw_aff_free(ScatPA); + ScatPA = isl_pw_multi_aff_get_pw_aff(ScatPMA, --pos); + } while (pos && isl_pw_aff_is_cst(ScatPA)); + + // If a non constant dimension was found check for loops. + if (!isl_pw_aff_is_cst(ScatPA)) { + + // Get rid of the constrains caused by the domain. + ScatPA = isl_pw_aff_gist(ScatPA, Stmt->getDomain()); + + // For each input dimension we check if it is actually used in the + // innermost + // (now only) dimension. If so we can get the corresponding Loop and check + // for annotations. + for (unsigned u = 0, e = Stmt->getNumIterators(); u != e; u++) + if (isl_pw_aff_involves_dims(ScatPA, isl_dim_in, u, 1)) + if (const Loop *L = Stmt->getLoopForDimension(u)) + Payload->Loops.insert(L); + } + + isl_pw_multi_aff_free(ScatPMA); + isl_ast_expr_free(UserExpr); + isl_ast_expr_free(Expr); + isl_pw_aff_free(ScatPA); + isl_id_free(Id); + break; + } + default: + llvm_unreachable("Loop body was unexpected"); + } + + isl_ast_node_free(Body); +} + // This method is executed before the construction of a for node. It creates // an isl_id that is used to annotate the subsequently generated ast for nodes. // @@ -253,6 +348,11 @@ if (Payload->IsOutermostParallel) BuildInfo->InParallelFor = false; + // For innermost loops collect all loop annotations from the orignal loop(s) + // involved in this new innermost dimension. + if (Payload->IsInnermost) + collectLoopAnnotations(isl_ast_node_for_get_body(Node), Payload); + isl_id_free(Id); return Node; } @@ -414,6 +514,18 @@ return Payload ? &Payload->BrokenReductions : nullptr; } +IslAstInfo::LoopSet *IslAstInfo::getLoops(__isl_keep isl_ast_node *Node) { + IslAstUserPayload *Payload = getNodePayload(Node); + return Payload ? &Payload->Loops : nullptr; +} + +const Loop *IslAstInfo::getUniqueLoop(__isl_keep isl_ast_node *Node) { + if (LoopSet *Loops = getLoops(Node)) + if (Loops->size() == 1) + return *Loops->begin(); + return nullptr; +} + isl_ast_build *IslAstInfo::getBuild(__isl_keep isl_ast_node *Node) { IslAstUserPayload *Payload = getNodePayload(Node); return Payload ? Payload->Build : nullptr; Index: test/Isl/Ast/llvm_loop_to_isl_ast_node_for_1d.ll =================================================================== --- /dev/null +++ test/Isl/Ast/llvm_loop_to_isl_ast_node_for_1d.ll @@ -0,0 +1,38 @@ +; RUN: opt %loadPolly -basicaa -polly-opt-isl -polly-ast -polly-no-tiling -polly-vectorizer=polly -analyze < %s | FileCheck %s +; +; CHECK: for (int c0 = 0; c0 <= 9; c0 += 4) +; CHECK: #llvm loop(for.cond, for.inc) depth(1) +; CHECK: for (int c1 = c0; c1 <= min(9, c0 + 3); c1 += 1) +; CHECK: Stmt_for_body(c1); +; +; void test_1d(int *A) { +; for (int i = 0; i < 10; ++i) { +; A[i] += 1; +; } +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @test_1d(i32* %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i64 %indvars.iv, 10 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv + %tmp = load i32* %arrayidx, align 4 + %add = add nsw i32 %tmp, 1 + store i32 %add, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} Index: test/Isl/Ast/llvm_loop_to_isl_ast_node_for_1d_fusion.ll =================================================================== --- /dev/null +++ test/Isl/Ast/llvm_loop_to_isl_ast_node_for_1d_fusion.ll @@ -0,0 +1,60 @@ +; RUN: opt %loadPolly -basicaa -polly-opt-isl -polly-ast -polly-opt-fusion=max -polly-no-tiling -polly-vectorizer=polly -analyze < %s | FileCheck %s +; +; CHECK: #llvm loop(for.cond, for.inc) depth(1) +; CHECK: #llvm loop(for.cond2, for.inc8) depth(1) +; CHECK: for (int c0 = 0; c0 <= 9; c0 += 1) { +; CHECK: Stmt_for_body(c0); +; CHECK: Stmt_for_body4(c0); +; CHECK: } +; +; void test_1d_fusion(int *A) { +; for (int i = 0; i < 10; ++i) +; A[i] += 1; +; for (int i = 0; i < 10; ++i) +; A[i] += 2; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @test_1d_fusion(i32* %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv1 = phi i64 [ %indvars.iv.next2, %for.inc ], [ 0, %entry ] + %exitcond3 = icmp ne i64 %indvars.iv1, 10 + br i1 %exitcond3, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv1 + %tmp = load i32* %arrayidx, align 4 + %add = add nsw i32 %tmp, 1 + store i32 %add, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1 + br label %for.cond + +for.end: ; preds = %for.cond + br label %for.cond2 + +for.cond2: ; preds = %for.inc8, %for.end + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc8 ], [ 0, %for.end ] + %exitcond = icmp ne i64 %indvars.iv, 10 + br i1 %exitcond, label %for.body4, label %for.end10 + +for.body4: ; preds = %for.cond2 + %arrayidx6 = getelementptr inbounds i32* %A, i64 %indvars.iv + %tmp4 = load i32* %arrayidx6, align 4 + %add7 = add nsw i32 %tmp4, 2 + store i32 %add7, i32* %arrayidx6, align 4 + br label %for.inc8 + +for.inc8: ; preds = %for.body4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond2 + +for.end10: ; preds = %for.cond2 + ret void +} Index: test/Isl/Ast/llvm_loop_to_isl_ast_node_for_2d.ll =================================================================== --- /dev/null +++ test/Isl/Ast/llvm_loop_to_isl_ast_node_for_2d.ll @@ -0,0 +1,57 @@ +; RUN: opt %loadPolly -basicaa -polly-opt-isl -polly-ast -polly-no-tiling -polly-vectorizer=polly -analyze < %s | FileCheck %s +; +; CHECK: for (int c0 = 0; c0 <= 99; c0 += 1) +; CHECK: for (int c1 = 0; c1 <= 15; c1 += 4) +; CHECK: #llvm loop(for.cond1, for.inc) depth(2) +; CHECK: for (int c2 = c1; c2 <= c1 + 3; c2 += 1) +; CHECK: Stmt_for_body3(c0, c2); +; +; int A2[100][100]; +; void test_2d() { +; for (int i = 0; i < 100; ++i) +; for (int j = 0; j < 16; ++j) +; A2[i][j] += 1; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +@A2 = common global [100 x [100 x i32]] zeroinitializer, align 16 + +define void @test_2d() { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc6, %entry + %indvars.iv1 = phi i64 [ %indvars.iv.next2, %for.inc6 ], [ 0, %entry ] + %exitcond3 = icmp ne i64 %indvars.iv1, 100 + br i1 %exitcond3, label %for.body, label %for.end8 + +for.body: ; preds = %for.cond + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body ] + %exitcond = icmp ne i64 %indvars.iv, 16 + br i1 %exitcond, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + %arrayidx5 = getelementptr inbounds [100 x [100 x i32]]* @A2, i64 0, i64 %indvars.iv1, i64 %indvars.iv + %tmp = load i32* %arrayidx5, align 4 + %add = add nsw i32 %tmp, 1 + store i32 %add, i32* %arrayidx5, align 4 + br label %for.inc + +for.inc: ; preds = %for.body3 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond1 + +for.end: ; preds = %for.cond1 + br label %for.inc6 + +for.inc6: ; preds = %for.end + %indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1 + br label %for.cond + +for.end8: ; preds = %for.cond + ret void +} Index: test/Isl/Ast/llvm_loop_to_isl_ast_node_for_2d_flatten.ll =================================================================== --- /dev/null +++ test/Isl/Ast/llvm_loop_to_isl_ast_node_for_2d_flatten.ll @@ -0,0 +1,61 @@ +; RUN: opt %loadPolly -basicaa -polly-opt-isl -polly-ast -polly-no-tiling -polly-vectorizer=polly -analyze < %s | FileCheck %s +; +; XFAIL: * +; FIXME: Should this be marked with both loops, or just the outer loop? +; CHECK: #llvm loop(for.cond, for.inc12) depth(1) +; CHECK: #llvm loop(for.cond1, for.inc) depth(2) +; CHECK: for (int c0 = 0; c0 <= 1599; c0 += 1) +; CHECK: Stmt_for_body3(c0 / 100, c0 % 100); +; +; int A2[100][100]; +; void test_2d_flatten() { +; for (int i = 0; i < 16; ++i) +; for (int j = 0; j < 100; ++j) +; A2[i + 1][j + 1] += A2[i][j]; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +@A2 = common global [100 x [100 x i32]] zeroinitializer, align 16 + +define void @test_2d_flatten() { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc12, %entry + %indvars.iv1 = phi i64 [ %indvars.iv.next2, %for.inc12 ], [ 0, %entry ] + %exitcond4 = icmp ne i64 %indvars.iv1, 16 + br i1 %exitcond4, label %for.body, label %for.end14 + +for.body: ; preds = %for.cond + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body ] + %exitcond = icmp ne i64 %indvars.iv, 100 + br i1 %exitcond, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + %arrayidx5 = getelementptr inbounds [100 x [100 x i32]]* @A2, i64 0, i64 %indvars.iv1, i64 %indvars.iv + %tmp = load i32* %arrayidx5, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %tmp5 = add nsw i64 %indvars.iv1, 1 + %arrayidx10 = getelementptr inbounds [100 x [100 x i32]]* @A2, i64 0, i64 %tmp5, i64 %indvars.iv.next + %tmp6 = load i32* %arrayidx10, align 4 + %add11 = add nsw i32 %tmp6, %tmp + store i32 %add11, i32* %arrayidx10, align 4 + br label %for.inc + +for.inc: ; preds = %for.body3 + br label %for.cond1 + +for.end: ; preds = %for.cond1 + br label %for.inc12 + +for.inc12: ; preds = %for.end + %indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1 + br label %for.cond + +for.end14: ; preds = %for.cond + ret void +} Index: test/Isl/Ast/llvm_loop_to_isl_ast_node_for_2d_inner.ll =================================================================== --- /dev/null +++ test/Isl/Ast/llvm_loop_to_isl_ast_node_for_2d_inner.ll @@ -0,0 +1,60 @@ +; RUN: opt %loadPolly -basicaa -polly-opt-isl -polly-ast -polly-no-tiling -polly-vectorizer=polly -analyze < %s | FileCheck %s +; +; CHECK: for (int c0 = 0; c0 <= 99; c0 += 4) +; CHECK: for (int c1 = 0; c1 <= 15; c1 += 1) +; CHECK: #llvm loop(for.cond1, for.inc) depth(2) +; CHECK: for (int c2 = c0; c2 <= c0 + 3; c2 += 1) +; CHECK: Stmt_for_body3(c1, c2); +; +; int A2[100][100]; +; void test_2d_inner() { +; for (int i = 0; i < 16; ++i) +; for (int j = 0; j < 100; ++j) +; A2[j][i + 1] += A2[j][i]; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +@A2 = common global [100 x [100 x i32]] zeroinitializer, align 16 + +define void @test_2d_inner() { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc11, %entry + %indvars.iv1 = phi i64 [ %indvars.iv.next2, %for.inc11 ], [ 0, %entry ] + %exitcond4 = icmp ne i64 %indvars.iv1, 16 + br i1 %exitcond4, label %for.body, label %for.end13 + +for.body: ; preds = %for.cond + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body ] + %exitcond = icmp ne i64 %indvars.iv, 100 + br i1 %exitcond, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + %arrayidx5 = getelementptr inbounds [100 x [100 x i32]]* @A2, i64 0, i64 %indvars.iv, i64 %indvars.iv1 + %tmp = load i32* %arrayidx5, align 4 + %tmp5 = add nsw i64 %indvars.iv1, 1 + %arrayidx9 = getelementptr inbounds [100 x [100 x i32]]* @A2, i64 0, i64 %indvars.iv, i64 %tmp5 + %tmp6 = load i32* %arrayidx9, align 4 + %add10 = add nsw i32 %tmp6, %tmp + store i32 %add10, i32* %arrayidx9, align 4 + br label %for.inc + +for.inc: ; preds = %for.body3 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond1 + +for.end: ; preds = %for.cond1 + br label %for.inc11 + +for.inc11: ; preds = %for.end + %indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1 + br label %for.cond + +for.end13: ; preds = %for.cond + ret void +} Index: test/Isl/Ast/llvm_loop_to_isl_ast_node_for_2d_outer.ll =================================================================== --- /dev/null +++ test/Isl/Ast/llvm_loop_to_isl_ast_node_for_2d_outer.ll @@ -0,0 +1,59 @@ +; RUN: opt %loadPolly -basicaa -polly-opt-isl -polly-ast -polly-no-tiling -polly-vectorizer=polly -analyze < %s | FileCheck %s +; +; CHECK: for (int c0 = 0; c0 <= 15; c0 += 4) +; CHECK: for (int c1 = 0; c1 <= 99; c1 += 1) +; CHECK: #llvm loop(for.cond, for.inc11) depth(1) +; CHECK: for (int c2 = c0; c2 <= c0 + 3; c2 += 1) +; CHECK: Stmt_for_body3(c2, c1); +; +; int A2[100][100]; +; void test_2d_outer() { +; for (int i = 0; i < 16; ++i) +; for (int j = 0; j < 100; ++j) +; A2[j + 1][i] += A2[j][i]; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +@A2 = common global [100 x [100 x i32]] zeroinitializer, align 16 + +define void @test_2d_outer() { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc11, %entry + %indvars.iv1 = phi i64 [ %indvars.iv.next2, %for.inc11 ], [ 0, %entry ] + %exitcond3 = icmp ne i64 %indvars.iv1, 16 + br i1 %exitcond3, label %for.body, label %for.end13 + +for.body: ; preds = %for.cond + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body ] + %exitcond = icmp ne i64 %indvars.iv, 100 + br i1 %exitcond, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + %arrayidx5 = getelementptr inbounds [100 x [100 x i32]]* @A2, i64 0, i64 %indvars.iv, i64 %indvars.iv1 + %tmp = load i32* %arrayidx5, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %arrayidx9 = getelementptr inbounds [100 x [100 x i32]]* @A2, i64 0, i64 %indvars.iv.next, i64 %indvars.iv1 + %tmp4 = load i32* %arrayidx9, align 4 + %add10 = add nsw i32 %tmp4, %tmp + store i32 %add10, i32* %arrayidx9, align 4 + br label %for.inc + +for.inc: ; preds = %for.body3 + br label %for.cond1 + +for.end: ; preds = %for.cond1 + br label %for.inc11 + +for.inc11: ; preds = %for.end + %indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1 + br label %for.cond + +for.end13: ; preds = %for.cond + ret void +} Index: test/Isl/Ast/llvm_loop_to_isl_ast_node_for_2d_skew.ll =================================================================== --- /dev/null +++ test/Isl/Ast/llvm_loop_to_isl_ast_node_for_2d_skew.ll @@ -0,0 +1,60 @@ +; RUN: opt %loadPolly -basicaa -polly-opt-isl -polly-ast -polly-no-tiling -polly-vectorizer=polly -analyze < %s | FileCheck %s +; +; CHECK: for (int c0 = 0; c0 <= 15; c0 += 1) +; CHECK: #llvm loop(for.cond, for.inc12) depth(1) +; CHECK: #llvm loop(for.cond1, for.inc) depth(2) +; CHECK: for (int c1 = c0; c1 <= c0 + 99; c1 += 1) +; CHECK: Stmt_for_body3(c0, -c0 + c1); +; +; int A2[100][100]; +; void test_2d_skew() { +; for (int i = 0; i < 16; ++i) +; for (int j = 0; j < 100; ++j) +; A2[i + 1][j] += A2[i][j + 1]; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +@A2 = common global [100 x [100 x i32]] zeroinitializer, align 16 + +define void @test_2d_skew() { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc12, %entry + %indvars.iv1 = phi i64 [ %indvars.iv.next2, %for.inc12 ], [ 0, %entry ] + %exitcond4 = icmp ne i64 %indvars.iv1, 16 + br i1 %exitcond4, label %for.body, label %for.end14 + +for.body: ; preds = %for.cond + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body ] + %exitcond = icmp ne i64 %indvars.iv, 100 + br i1 %exitcond, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %arrayidx5 = getelementptr inbounds [100 x [100 x i32]]* @A2, i64 0, i64 %indvars.iv1, i64 %indvars.iv.next + %tmp = load i32* %arrayidx5, align 4 + %tmp5 = add nsw i64 %indvars.iv1, 1 + %arrayidx10 = getelementptr inbounds [100 x [100 x i32]]* @A2, i64 0, i64 %tmp5, i64 %indvars.iv + %tmp6 = load i32* %arrayidx10, align 4 + %add11 = add nsw i32 %tmp6, %tmp + store i32 %add11, i32* %arrayidx10, align 4 + br label %for.inc + +for.inc: ; preds = %for.body3 + br label %for.cond1 + +for.end: ; preds = %for.cond1 + br label %for.inc12 + +for.inc12: ; preds = %for.end + %indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1 + br label %for.cond + +for.end14: ; preds = %for.cond + ret void +}