Index: include/polly/CodeGen/BlockGenerators.h =================================================================== --- include/polly/CodeGen/BlockGenerators.h +++ include/polly/CodeGen/BlockGenerators.h @@ -100,6 +100,9 @@ /// @brief The dominator tree of this function. DominatorTree &DT; + /// @brief Split @p BB to create a new one we can use to clone @p BB in. + BasicBlock *splitBB(BasicBlock *BB); + /// @brief Copy the given basic block. /// /// @param Stmt The statement to code generate. @@ -115,6 +118,20 @@ BasicBlock *copyBB(ScopStmt &Stmt, BasicBlock *BB, ValueMapT &BBMap, ValueMapT &GlobalMap, LoopToScevMapT <S); + /// @brief Copy the given basic block. + /// + /// @param Stmt The statement to code generate. + /// @param BB The basic block to code generate. + /// @param BBCopy The new basic block to generate code in. + /// @param BBMap A mapping from old values to their new values in this + /// block. + /// @param GlobalMap A mapping from old values to their new values + /// (for values recalculated in the new ScoP, but not + /// within this basic block). + /// @param LTS A map from old loops to new induction variables as SCEVs. + void copyBB(ScopStmt &Stmt, BasicBlock *BB, BasicBlock *BBCopy, + ValueMapT &BBMap, ValueMapT &GlobalMap, LoopToScevMapT <S); + /// @brief Get the new version of a value. /// /// Given an old value, we first check if a new version of this value is @@ -360,6 +377,13 @@ /// within this basic block). /// @param LTS A map from old loops to new induction variables as SCEVs. void copyStmt(ScopStmt &Stmt, ValueMapT &GlobalMap, LoopToScevMapT <S); + +private: + /// @brief Repair the dominance tree after we created a copy block for @p BB. + /// + /// @returns The immediate dominator in the DT for @p BBCopy if in the region. + BasicBlock *repairDominance(BasicBlock *BB, BasicBlock *BBCopy, + DenseMap &BlockMap); }; } #endif Index: lib/CodeGen/BlockGenerators.cpp =================================================================== --- lib/CodeGen/BlockGenerators.cpp +++ lib/CodeGen/BlockGenerators.cpp @@ -111,7 +111,6 @@ SCEVExpander Expander(SE, "polly"); Value *Expanded = Expander.expandCodeFor(NewScev, Old->getType(), Builder.GetInsertPoint()); - BBMap[Old] = Expanded; return Expanded; } @@ -295,18 +294,27 @@ copyBB(Stmt, BB, BBMap, GlobalMap, LTS); } -BasicBlock *BlockGenerator::copyBB(ScopStmt &Stmt, BasicBlock *BB, - ValueMapT &BBMap, ValueMapT &GlobalMap, - LoopToScevMapT <S) { +BasicBlock *BlockGenerator::splitBB(BasicBlock *BB) { BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI); CopyBB->setName("polly.stmt." + BB->getName()); - Builder.SetInsertPoint(CopyBB->begin()); + return CopyBB; +} + +BasicBlock *BlockGenerator::copyBB(ScopStmt &Stmt, BasicBlock *BB, + ValueMapT &BBMap, ValueMapT &GlobalMap, + LoopToScevMapT <S) { + BasicBlock *CopyBB = splitBB(BB); + copyBB(Stmt, BB, CopyBB, BBMap, GlobalMap, LTS); + return CopyBB; +} +void BlockGenerator::copyBB(ScopStmt &Stmt, BasicBlock *BB, BasicBlock *CopyBB, + ValueMapT &BBMap, ValueMapT &GlobalMap, + LoopToScevMapT <S) { + Builder.SetInsertPoint(CopyBB->begin()); for (Instruction &Inst : *BB) copyInstruction(Stmt, &Inst, BBMap, GlobalMap, LTS); - - return CopyBB; } VectorBlockGenerator::VectorBlockGenerator(BlockGenerator &BlockGen, @@ -662,6 +670,19 @@ copyInstruction(Stmt, &Inst, VectorBlockMap, ScalarBlockMap); } +BasicBlock *RegionGenerator::repairDominance( + BasicBlock *BB, BasicBlock *BBCopy, + DenseMap &BlockMap) { + + BasicBlock *BBIDom = DT.getNode(BB)->getIDom()->getBlock(); + BasicBlock *BBCopyIDom = BlockMap.lookup(BBIDom); + + if (BBCopyIDom) + DT.changeImmediateDominator(BBCopy, BBCopyIDom); + + return BBCopyIDom; +} + void RegionGenerator::copyStmt(ScopStmt &Stmt, ValueMapT &GlobalMap, LoopToScevMapT <S) { assert(Stmt.isRegionStmt() && @@ -670,8 +691,11 @@ // The region represented by the statement. Region *R = Stmt.getRegion(); - // The "BBMap" for the whole region. - ValueMapT RegionMap; + // The "BBMaps" for the whole region. + DenseMap RegionMaps; + + // A map from old to new blocks in the region + DenseMap BlockMap; // Iterate over all blocks in the region in a breadth-first search. std::deque Blocks; @@ -683,8 +707,17 @@ BasicBlock *BB = Blocks.front(); Blocks.pop_front(); + // First split the block and update dominance information. + BasicBlock *BBCopy = splitBB(BB); + BasicBlock *BBCopyIDom = repairDominance(BB, BBCopy, BlockMap); + + // Get the mapping for this block and initialize it with the mapping + // available at its immediate dominator (in the new region). + ValueMapT &RegionMap = RegionMaps[BBCopy]; + RegionMap = RegionMaps[BBCopyIDom]; + // Copy the block with the BlockGenerator. - BasicBlock *BBCopy = copyBB(Stmt, BB, RegionMap, GlobalMap, LTS); + copyBB(Stmt, BB, BBCopy, RegionMap, GlobalMap, LTS); // And continue with new successors inside the region. for (auto SI = succ_begin(BB), SE = succ_end(BB); SI != SE; SI++) @@ -692,14 +725,16 @@ Blocks.push_back(*SI); // In order to remap PHI nodes we store also basic block mappings. - RegionMap[BB] = BBCopy; + BlockMap[BB] = BBCopy; } // Now create a new dedicated region exit block and add it to the region map. - BasicBlock *RegionExit = + BasicBlock *ExitBBCopy = SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI); - RegionExit->setName("polly.stmt." + R->getExit()->getName() + ".pre"); - RegionMap[R->getExit()] = RegionExit; + ExitBBCopy->setName("polly.stmt." + R->getExit()->getName() + ".as.exit"); + BlockMap[R->getExit()] = ExitBBCopy; + + repairDominance(R->getExit(), ExitBBCopy, BlockMap); // As the block generator doesn't handle control flow we need to add the // region control flow by hand after all blocks have been copied. @@ -707,14 +742,17 @@ BranchInst *BI = cast(BB->getTerminator()); - BasicBlock *BBCopy = cast(RegionMap[BB]); + BasicBlock *BBCopy = BlockMap[BB]; Instruction *BICopy = BBCopy->getTerminator(); + ValueMapT &RegionMap = RegionMaps[BBCopy]; + RegionMap.insert(BlockMap.begin(), BlockMap.end()); + Builder.SetInsertPoint(BBCopy); copyInstScalar(Stmt, BI, RegionMap, GlobalMap, LTS); BICopy->eraseFromParent(); } // Reset the old insert point for the build. - Builder.SetInsertPoint(RegionExit->begin()); + Builder.SetInsertPoint(ExitBBCopy->begin()); } Index: test/Isl/CodeGen/non-affine-subregion-dominance-reuse.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/non-affine-subregion-dominance-reuse.ll @@ -0,0 +1,74 @@ +; RUN: opt %loadPolly -polly-codegen-isl -S -verify-dom-info < %s | FileCheck %s +; +; Check that we do not reuse the B[i-1] GEP created in block S again in +; block Q. Hence, we create two GEPs for B[i-1]: +; +; CHECK: %scevgep{{.}} = getelementptr i32* %B, i64 -1 +; CHECK: %scevgep{{.}} = getelementptr i32* %B, i64 -1 +; +; void f(int *A, int *B) { +; int x = 0; +; for (int i = 0; i < 1024; i++) { +; if (A[i]) { +; if (i > 512) +; S: A[i] = B[i - 1]; +; Q: A[i] += B[i - 1]; +; } +; } +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A, i32* %B) { +bb: + br label %bb1 + +bb1: ; preds = %bb22, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb22 ], [ 0, %bb ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %bb2, label %bb23 + +bb2: ; preds = %bb1 + %tmp = getelementptr inbounds i32* %A, i64 %indvars.iv + %tmp3 = load i32* %tmp, align 4 + %tmp4 = icmp eq i32 %tmp3, 0 + br i1 %tmp4, label %bb21, label %bb5 + +bb5: ; preds = %bb2 + %tmp6 = icmp sgt i64 %indvars.iv, 512 + br i1 %tmp6, label %bb7, label %bb13 + +bb7: ; preds = %bb5 + br label %bb8 + +bb8: ; preds = %bb7 + %tmp9 = add nsw i64 %indvars.iv, -1 + %tmp10 = getelementptr inbounds i32* %B, i64 %tmp9 + %tmp11 = load i32* %tmp10, align 4 + %tmp12 = getelementptr inbounds i32* %A, i64 %indvars.iv + store i32 %tmp11, i32* %tmp12, align 4 + br label %bb13 + +bb13: ; preds = %bb8, %bb5 + br label %bb14 + +bb14: ; preds = %bb13 + %tmp15 = add nsw i64 %indvars.iv, -1 + %tmp16 = getelementptr inbounds i32* %B, i64 %tmp15 + %tmp17 = load i32* %tmp16, align 4 + %tmp18 = getelementptr inbounds i32* %A, i64 %indvars.iv + %tmp19 = load i32* %tmp18, align 4 + %tmp20 = add nsw i32 %tmp19, %tmp17 + store i32 %tmp20, i32* %tmp18, align 4 + br label %bb21 + +bb21: ; preds = %bb2, %bb14 + br label %bb22 + +bb22: ; preds = %bb21 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %bb1 + +bb23: ; preds = %bb1 + ret void +} Index: test/Isl/CodeGen/non_affine_float_compare.ll =================================================================== --- test/Isl/CodeGen/non_affine_float_compare.ll +++ test/Isl/CodeGen/non_affine_float_compare.ll @@ -1,4 +1,4 @@ -; RUN: opt %loadPolly -polly-codegen-isl -polly-no-early-exit -polly-allow-nonaffine-branches -S < %s | FileCheck %s +; RUN: opt %loadPolly -polly-codegen-isl -polly-no-early-exit -polly-allow-nonaffine-branches -S -verify-dom-info < %s | FileCheck %s ; ; void f(float *A) { ; for (int i = 0; i < 1024; i++) @@ -14,16 +14,16 @@ ; CHECK: %scevgep[[R2:[0-9]*]] = getelementptr float* %scevgep{{[0-9]*}}, i64 %polly.indvar ; CHECK: %tmp6_p_scalar_ = load float* %scevgep[[R2]], align 4, !alias.scope !0, !noalias !2 ; CHECK: %p_tmp7 = fcmp oeq float %tmp3_p_scalar_, %tmp6_p_scalar_ -; CHECK: br i1 %p_tmp7, label %polly.stmt.bb8, label %polly.stmt.bb12.pre +; CHECK: br i1 %p_tmp7, label %polly.stmt.bb8, label %polly.stmt.bb12.[[R:[a-zA-Z_.0-9]*]] ; CHECK: polly.stmt.bb8: ; CHECK: %scevgep[[R3:[0-9]*]] = getelementptr float* %A, i64 %polly.indvar ; CHECK: %tmp10_p_scalar_ = load float* %scevgep[[R3]], align 4, !alias.scope !0, !noalias !2 ; CHECK: %p_tmp11 = fadd float %tmp10_p_scalar_, 1.000000e+00 ; CHECK: store float %p_tmp11, float* %scevgep[[R3]], align 4, !alias.scope !0, !noalias !2 -; CHECK: br label %polly.stmt.bb12.pre +; CHECK: br label %polly.stmt.bb12.[[R]] -; CHECK: polly.stmt.bb12.pre: +; CHECK: polly.stmt.bb12.[[R]]: ; CHECK: br label %polly.stmt.bb12 ; CHECK: polly.stmt.bb12: