Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -1198,7 +1198,15 @@ std::vector Instructions, int Count); /// Create an overapproximating ScopStmt for the region @p R. - ScopStmt(Scop &parent, Region &R, Loop *SurroundingLoop); + /// + /// @param EntryBlockInstructions The list of instructions that belong to the + /// entry block of the region statement. + /// Instructions are only tracked for entry + /// blocks for now. We currently do not allow + /// to modify the instructions of blocks later + /// in the region statement. + ScopStmt(Scop &parent, Region &R, Loop *SurroundingLoop, + std::vector EntryBlockInstructions); /// Create a copy statement. /// @@ -2194,9 +2202,13 @@ /// A new statement for @p R will be created and added to the statement vector /// and map. /// - /// @param R The region we build the statement for. - /// @param SurroundingLoop The loop the created statement is contained in. - void addScopStmt(Region *R, Loop *SurroundingLoop); + /// @param R The region we build the statement for. + /// @param SurroundingLoop The loop the created statement is contained + /// in. + /// @param EntryBlockInstructions The (interesting) instructions in the + /// entry block of the region statement. + void addScopStmt(Region *R, Loop *SurroundingLoop, + std::vector EntryBlockInstructions); /// Update access dimensionalities. /// Index: lib/Analysis/ScopBuilder.cpp =================================================================== --- lib/Analysis/ScopBuilder.cpp +++ lib/Analysis/ScopBuilder.cpp @@ -661,9 +661,14 @@ void ScopBuilder::buildStmts(Region &SR) { if (scop->isNonAffineSubRegion(&SR)) { + std::vector Instructions; Loop *SurroundingLoop = getFirstNonBoxedLoopFor(SR.getEntry(), LI, scop->getBoxedLoops()); - scop->addScopStmt(&SR, SurroundingLoop); + for (Instruction &Inst : *SR.getEntry()) + if (!isa(&Inst) && !isIgnoredIntrinsic(&Inst) && + !canSynthesize(&Inst, *scop, &SE, SurroundingLoop)) + Instructions.push_back(&Inst); + scop->addScopStmt(&SR, SurroundingLoop, Instructions); return; } Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -1676,9 +1676,13 @@ ConditionSets); } -ScopStmt::ScopStmt(Scop &parent, Region &R, Loop *SurroundingLoop) +ScopStmt::ScopStmt(Scop &parent, Region &R, Loop *SurroundingLoop, + std::vector EntryBlockInstructions) : Parent(parent), InvalidDomain(nullptr), Domain(nullptr), R(&R), - Build(nullptr), SurroundingLoop(SurroundingLoop) { + Build(nullptr), SurroundingLoop(SurroundingLoop), + Instructions(EntryBlockInstructions) + +{ BaseName = getIslCompatibleName( "Stmt", R.getNameStr(), parent.getNextStmtIdx(), "", UseInstructionNames); } @@ -1779,7 +1783,7 @@ for (MemoryAccess *Access : MemAccs) Access->print(OS); - if (PrintInstructions && isBlockStmt()) + if (PrintInstructions) printInstructions(OS.indent(12)); } @@ -3588,13 +3592,17 @@ } void Scop::removeFromStmtMap(ScopStmt &Stmt) { - if (Stmt.isRegionStmt()) + if (Stmt.isRegionStmt()) { + for (Instruction *Inst : Stmt.getInstructions()) + InstStmtMap.erase(Inst); for (BasicBlock *BB : Stmt.getRegion()->blocks()) { StmtMap.erase(BB); + if (BB == Stmt.getEntryBlock()) + continue; for (Instruction &Inst : *BB) InstStmtMap.erase(&Inst); } - else { + } else { StmtMap.erase(Stmt.getBasicBlock()); for (Instruction *Inst : Stmt.getInstructions()) InstStmtMap.erase(Inst); @@ -4681,9 +4689,10 @@ } } -void Scop::addScopStmt(Region *R, Loop *SurroundingLoop) { +void Scop::addScopStmt(Region *R, Loop *SurroundingLoop, + std::vector Instructions) { assert(R && "Unexpected nullptr!"); - Stmts.emplace_back(*this, *R, SurroundingLoop); + Stmts.emplace_back(*this, *R, SurroundingLoop, Instructions); auto *Stmt = &Stmts.back(); for (BasicBlock *BB : R->blocks()) { StmtMap[BB].push_back(Stmt); Index: lib/CodeGen/BlockGenerators.cpp =================================================================== --- lib/CodeGen/BlockGenerators.cpp +++ lib/CodeGen/BlockGenerators.cpp @@ -450,7 +450,12 @@ isl_id_to_ast_expr *NewAccesses) { EntryBB = &CopyBB->getParent()->getEntryBlock(); - if (Stmt.isBlockStmt()) + // Block statements and the entry blocks of region statement are code + // generated from instruction lists. This allow us to optimize the + // instructions that belong to a certain scop statement. As the code + // structure of region statements might be arbitrary complex, optimizing the + // instruction list is not yet supported. + if (Stmt.isBlockStmt() || (Stmt.isRegionStmt() && Stmt.getEntryBlock() == BB)) for (Instruction *Inst : Stmt.getInstructions()) copyInstruction(Stmt, Inst, BBMap, LTS, NewAccesses); else Index: test/Isl/CodeGen/partial_write_in_region_with_loop.ll =================================================================== --- test/Isl/CodeGen/partial_write_in_region_with_loop.ll +++ test/Isl/CodeGen/partial_write_in_region_with_loop.ll @@ -9,8 +9,15 @@ ; CHECK:polly.stmt.bb3: ; CHECK-NEXT: %polly.subregion.iv = phi i32 [ %polly.subregion.iv.inc, %polly.stmt.bb5.cont ], [ 0, %polly.stmt.bb3.entry ] ; CHECK-NEXT: %polly.j.0 = phi i64 [ %j.0.phiops.reload, %polly.stmt.bb3.entry ], [ %p_tmp10, %polly.stmt.bb5.cont ] -; CHECK-NEXT: %p_tmp = mul nsw i64 %polly.indvar, %polly.indvar -; CHECK-NEXT: %p_tmp4 = icmp slt i64 %polly.j.0, %p_tmp +; CHECK-NEXT: %8 = zext i64 %polly.indvar to i65 +; CHECK-NEXT: %9 = add i64 %polly.indvar, -1 +; CHECK-NEXT: %10 = zext i64 %9 to i65 +; CHECK-NEXT: %11 = mul i65 %8, %10 +; CHECK-NEXT: %12 = lshr i65 %11, 1 +; CHECK-NEXT: %13 = trunc i65 %12 to i64 +; CHECK-NEXT: %14 = shl i64 %13, 1 +; CHECK-NEXT: %15 = add i64 %polly.indvar, %14 +; CHECK-NEXT: %p_tmp4 = icmp slt i64 %polly.j.0, %15 ; CHECK-NEXT: %polly.subregion.iv.inc = add i32 %polly.subregion.iv, 1 ; CHECK-NEXT: br i1 %p_tmp4, label %polly.stmt.bb5, label %polly.stmt.bb11.exit @@ -18,8 +25,8 @@ ; CHECK-NEXT: %p_tmp6 = getelementptr inbounds float, float* %B, i64 42 ; CHECK-NEXT: %tmp7_p_scalar_ = load float, float* %p_tmp6 ; CHECK-NEXT: %p_tmp8 = fadd float %tmp7_p_scalar_, 1.000000e+00 -; CHECK-NEXT: %8 = icmp sle i64 %polly.indvar, 9 -; CHECK-NEXT: %polly.Stmt_bb3__TO__bb11_MayWrite2.cond = icmp ne i1 %8, false +; CHECK-NEXT: %16 = icmp sle i64 %polly.indvar, 9 +; CHECK-NEXT: %polly.Stmt_bb3__TO__bb11_MayWrite2.cond = icmp ne i1 %16, false ; CHECK-NEXT: br i1 %polly.Stmt_bb3__TO__bb11_MayWrite2.cond, label %polly.stmt.bb5.Stmt_bb3__TO__bb11_MayWrite2.partial, label %polly.stmt.bb5.cont ; CHECK:polly.stmt.bb5.Stmt_bb3__TO__bb11_MayWrite2.partial: ; preds = %polly.stmt.bb5 Index: test/Isl/CodeGen/region-with-instructions.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/region-with-instructions.ll @@ -0,0 +1,47 @@ +; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s + +; CHECK-LABEL: polly.stmt.bb48: +; CHECK-NEXT: %scevgep = getelementptr i64, i64* %A, i64 %polly.indvar +; CHECK-NEXT: %tmp51_p_scalar_ = load i64, i64* %scevgep, +; CHECK-NEXT: %p_tmp52 = and i64 %tmp51_p_scalar_, %tmp26 +; CHECK-NEXT: %p_tmp53 = icmp eq i64 %p_tmp52, %tmp26 +; CHECK-NEXT: store i64 42, i64* %scevgep, align 8 +; CHECK-NEXT: br i1 %p_tmp53, label %polly.stmt.bb54, label %polly.stmt.bb56.exit + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @quux(i32 %arg, i32 %arg1, i64* %A, i64 %tmp9, i64 %tmp24, i64 %tmp14, i64 %tmp22, i64 %tmp44) { +bb: + %tmp26 = or i64 %tmp22, %tmp24 + br label %bb39 + +bb39: ; preds = %bb39, %bb38 + %tmp45 = icmp eq i64 %tmp44, %tmp9 + br i1 %tmp45, label %bb46, label %bb81 + +bb46: ; preds = %bb39 + %tmp47 = or i64 1, %tmp14 + br label %bb48 + +bb48: ; preds = %bb56, %bb46 + %tmp49 = phi i64 [ 0, %bb46 ], [ %tmp57, %bb56 ] + %tmp50 = getelementptr inbounds i64, i64* %A, i64 %tmp49 + %tmp51 = load i64, i64* %tmp50, align 8 + %tmp52 = and i64 %tmp51, %tmp26 + %tmp53 = icmp eq i64 %tmp52, %tmp26 + store i64 42, i64* %tmp50, align 8 + br i1 %tmp53, label %bb54, label %bb56 + +bb54: ; preds = %bb48 + %tmp55 = xor i64 %tmp51, %tmp47 + store i64 %tmp55, i64* %tmp50, align 8 + br label %bb56 + +bb56: ; preds = %bb54, %bb48 + %tmp57 = add nuw nsw i64 %tmp49, 1 + %tmp58 = icmp eq i64 %tmp57, %tmp9 + br i1 %tmp58, label %bb81, label %bb48 + +bb81: ; preds = %bb74, %bb56 + ret void +} Index: test/ScopInfo/region-with-instructions.ll =================================================================== --- /dev/null +++ test/ScopInfo/region-with-instructions.ll @@ -0,0 +1,72 @@ +; RUN: opt %loadPolly -polly-scops -analyze -polly-print-instructions \ +; RUN: < %s | FileCheck %s + +; CHECK: Statements { +; CHECK: Stmt_bb46 +; CHECK: Domain := +; CHECK: [tmp44, tmp9] -> { Stmt_bb46[] : tmp9 = tmp44 }; +; CHECK: Schedule := +; CHECK: [tmp44, tmp9] -> { Stmt_bb46[] -> [0, 0] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK: [tmp44, tmp9] -> { Stmt_bb46[] -> MemRef_tmp47[] }; +; CHECK: Instructions { +; CHECK: %tmp47 = or i64 1, %tmp14 +; CHECK: } +; CHECK: Stmt_bb48__TO__bb56 +; CHECK: Domain := +; CHECK: [tmp44, tmp9] -> { Stmt_bb48__TO__bb56[i0] : tmp9 = tmp44 and 0 <= i0 < tmp44 }; +; CHECK: Schedule := +; CHECK: [tmp44, tmp9] -> { Stmt_bb48__TO__bb56[i0] -> [1, i0] }; +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: [tmp44, tmp9] -> { Stmt_bb48__TO__bb56[i0] -> MemRef_A[i0] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: [tmp44, tmp9] -> { Stmt_bb48__TO__bb56[i0] -> MemRef_A[i0] }; +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 1] +; CHECK: [tmp44, tmp9] -> { Stmt_bb48__TO__bb56[i0] -> MemRef_tmp47[] }; +; CHECK: MayWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: [tmp44, tmp9] -> { Stmt_bb48__TO__bb56[i0] -> MemRef_A[i0] }; +; CHECK: Instructions { +; CHECK: %tmp51 = load i64, i64* %tmp50, align 8 +; CHECK: %tmp52 = and i64 %tmp51, %tmp26 +; CHECK: %tmp53 = icmp eq i64 %tmp52, %tmp26 +; CHECK: store i64 42, i64* %tmp50, align 8 +; CHECK: } +; CHECK: } + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @quux(i32 %arg, i32 %arg1, i64* %A, i64 %tmp9, i64 %tmp24, i64 %tmp14, i64 %tmp22, i64 %tmp44) { +bb: + %tmp26 = or i64 %tmp22, %tmp24 + br label %bb39 + +bb39: ; preds = %bb39, %bb38 + %tmp45 = icmp eq i64 %tmp44, %tmp9 + br i1 %tmp45, label %bb46, label %bb81 + +bb46: ; preds = %bb39 + %tmp47 = or i64 1, %tmp14 + br label %bb48 + +bb48: ; preds = %bb56, %bb46 + %tmp49 = phi i64 [ 0, %bb46 ], [ %tmp57, %bb56 ] + %tmp50 = getelementptr inbounds i64, i64* %A, i64 %tmp49 + %tmp51 = load i64, i64* %tmp50, align 8 + %tmp52 = and i64 %tmp51, %tmp26 + %tmp53 = icmp eq i64 %tmp52, %tmp26 + store i64 42, i64* %tmp50, align 8 + br i1 %tmp53, label %bb54, label %bb56 + +bb54: ; preds = %bb48 + %tmp55 = xor i64 %tmp51, %tmp47 + store i64 %tmp55, i64* %tmp50, align 8 + br label %bb56 + +bb56: ; preds = %bb54, %bb48 + %tmp57 = add nuw nsw i64 %tmp49, 1 + %tmp58 = icmp eq i64 %tmp57, %tmp9 + br i1 %tmp58, label %bb81, label %bb48 + +bb81: ; preds = %bb74, %bb56 + ret void +} Index: test/ScopInfo/stmt_split_exit_of_region_stmt.ll =================================================================== --- test/ScopInfo/stmt_split_exit_of_region_stmt.ll +++ test/ScopInfo/stmt_split_exit_of_region_stmt.ll @@ -8,6 +8,9 @@ ; CHECK-NEXT: { Stmt_Region__TO__Stmt[i0] -> [i0, 0] }; ; CHECK-NEXT: MayWriteAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: { Stmt_Region__TO__Stmt[i0] -> MemRef_C[0] }; +; CHECK-NEXT: Instructions { +; CHECK-NEXT: %cond = fcmp oeq double 2.100000e+01, 2.100000e+01 +; CHECK-NEXT: } ; CHECK-NEXT: Stmt_Stmt ; CHECK-NEXT: Domain := ; CHECK-NEXT: { Stmt_Stmt[i0] : 0 <= i0 <= 1023 };