Index: include/polly/CodeGen/BlockGenerators.h =================================================================== --- include/polly/CodeGen/BlockGenerators.h +++ include/polly/CodeGen/BlockGenerators.h @@ -84,12 +84,12 @@ /// This copies the entire basic block and updates references to old values /// with references to new values, as defined by GlobalMap. /// - /// @param Stmt The statement to code generate. + /// @param Stmt The block statement to code generate. /// @param GlobalMap A mapping from old values to their new values /// (for values recalculated in the new ScoP, but not /// within this basic block). /// @param LTS A map from old loops to new induction variables as SCEVs. - void copyBB(ScopStmt &Stmt, ValueMapT &GlobalMap, LoopToScevMapT <S); + void copyStmt(ScopStmt &Stmt, ValueMapT &GlobalMap, LoopToScevMapT <S); protected: PollyIRBuilder &Builder; @@ -100,6 +100,21 @@ /// @brief The dominator tree of this function. DominatorTree &DT; + /// @brief Copy the given basic block. + /// + /// @param Stmt The statement to code generate. + /// @param BB The basic block to code generate. + /// @param BBMap A mapping from old values to their new values in this + /// block. + /// @param GlobalMap A mapping from old values to their new values + /// (for values recalculated in the new ScoP, but not + /// within this basic block). + /// @param LTS A map from old loops to new induction variables as SCEVs. + /// + /// @returns The copy of the basic block. + BasicBlock *copyBB(ScopStmt &Stmt, BasicBlock *BB, ValueMapT &BBMap, + ValueMapT &GlobalMap, LoopToScevMapT <S); + /// @brief Get the new version of a value. /// /// Given an old value, we first check if a new version of this value is @@ -209,7 +224,7 @@ std::vector &VLTS, __isl_keep isl_map *Schedule) { VectorBlockGenerator Generator(BlockGen, GlobalMaps, VLTS, Schedule); - Generator.copyBB(Stmt); + Generator.copyStmt(Stmt); } private: @@ -322,7 +337,29 @@ void copyInstruction(ScopStmt &Stmt, const Instruction *Inst, ValueMapT &VectorMap, VectorValueMapT &ScalarMaps); - void copyBB(ScopStmt &Stmt); + void copyStmt(ScopStmt &Stmt); +}; + +/// @brief Generator for new versions of polyhedral region statements. +class RegionGenerator : public BlockGenerator { +public: + /// @brief Create a generator for regions. + /// + /// @param BlockGen A generator for basic blocks. + RegionGenerator(BlockGenerator &BlockGen) : BlockGenerator(BlockGen) {} + + /// @brief Copy the region statement @p Stmt. + /// + /// This copies the entire region represented by @p Stmt and updates + /// references to old values with references to new values, as defined by + /// GlobalMap. + /// + /// @param Stmt The statement to code generate. + /// @param GlobalMap A mapping from old values to their new values + /// (for values recalculated in the new ScoP, but not + /// within this basic block). + /// @param LTS A map from old loops to new induction variables as SCEVs. + void copyStmt(ScopStmt &Stmt, ValueMapT &GlobalMap, LoopToScevMapT <S); }; } #endif Index: lib/CodeGen/BlockGenerators.cpp =================================================================== --- lib/CodeGen/BlockGenerators.cpp +++ lib/CodeGen/BlockGenerators.cpp @@ -284,18 +284,29 @@ copyInstScalar(Stmt, Inst, BBMap, GlobalMap, LTS); } -void BlockGenerator::copyBB(ScopStmt &Stmt, ValueMapT &GlobalMap, - LoopToScevMapT <S) { +void BlockGenerator::copyStmt(ScopStmt &Stmt, ValueMapT &GlobalMap, + LoopToScevMapT <S) { + assert(Stmt.isBlockStmt() && + "Only block statements can be copied by the block generator"); + + ValueMapT BBMap; + BasicBlock *BB = Stmt.getBasicBlock(); + copyBB(Stmt, BB, BBMap, GlobalMap, LTS); +} + +BasicBlock *BlockGenerator::copyBB(ScopStmt &Stmt, BasicBlock *BB, + ValueMapT &BBMap, ValueMapT &GlobalMap, + LoopToScevMapT <S) { BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI); CopyBB->setName("polly.stmt." + BB->getName()); Builder.SetInsertPoint(CopyBB->begin()); - ValueMapT BBMap; - for (Instruction &Inst : *BB) copyInstruction(Stmt, &Inst, BBMap, GlobalMap, LTS); + + return CopyBB; } VectorBlockGenerator::VectorBlockGenerator(BlockGenerator &BlockGen, @@ -620,7 +631,10 @@ copyInstScalarized(Stmt, Inst, VectorMap, ScalarMaps); } -void VectorBlockGenerator::copyBB(ScopStmt &Stmt) { +void VectorBlockGenerator::copyStmt(ScopStmt &Stmt) { + assert(Stmt.isBlockStmt() && "TODO: Only block statements can be copied by " + "the vector block generator"); + BasicBlock *BB = Stmt.getBasicBlock(); BasicBlock *CopyBB = SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI); @@ -647,3 +661,60 @@ for (Instruction &Inst : *BB) copyInstruction(Stmt, &Inst, VectorBlockMap, ScalarBlockMap); } + +void RegionGenerator::copyStmt(ScopStmt &Stmt, ValueMapT &GlobalMap, + LoopToScevMapT <S) { + assert(Stmt.isRegionStmt() && + "Only block statements can be copied by the block generator"); + + // The region represented by the statement. + Region *R = Stmt.getRegion(); + + // The "BBMap" for the whole region. + ValueMapT RegionMap; + + // Iterate over all blocks in the region in a breadth-first search. + std::deque Blocks; + SmallPtrSet SeenBlocks; + Blocks.push_back(R->getEntry()); + SeenBlocks.insert(R->getEntry()); + + while (!Blocks.empty()) { + BasicBlock *BB = Blocks.front(); + Blocks.pop_front(); + + // Copy the block with the BlockGenerator. + BasicBlock *BBCopy = copyBB(Stmt, BB, RegionMap, GlobalMap, LTS); + + // And continue with new successors inside the region. + for (auto SI = succ_begin(BB), SE = succ_end(BB); SI != SE; SI++) + if (R->contains(*SI) && SeenBlocks.insert(*SI).second) + Blocks.push_back(*SI); + + // In order to remap PHI nodes we store also basic block mappings. + RegionMap[BB] = BBCopy; + } + + // Now create a new dedicated region exit block and add it to the region map. + BasicBlock *RegionExit = + SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI); + RegionExit->setName("polly.stmt." + R->getExit()->getName() + ".pre"); + RegionMap[R->getExit()] = RegionExit; + + // As the block generator doesn't handle control flow we need to add the + // region control flow by hand after all blocks have been copied. + for (BasicBlock *BB : SeenBlocks) { + + BranchInst *BI = cast(BB->getTerminator()); + + BasicBlock *BBCopy = cast(RegionMap[BB]); + Instruction *BICopy = BBCopy->getTerminator(); + + Builder.SetInsertPoint(BBCopy); + copyInstScalar(Stmt, BI, RegionMap, GlobalMap, LTS); + BICopy->eraseFromParent(); + } + + // Reset the old insert point for the build. + Builder.SetInsertPoint(RegionExit->begin()); +} Index: lib/CodeGen/IslCodeGeneration.cpp =================================================================== --- lib/CodeGen/IslCodeGeneration.cpp +++ lib/CodeGen/IslCodeGeneration.cpp @@ -65,8 +65,8 @@ : S(S), Builder(Builder), Annotator(Annotator), Rewriter(new SCEVExpander(SE, "polly")), ExprBuilder(Builder, IDToValue, *Rewriter), - BlockGen(Builder, LI, SE, DT, &ExprBuilder), P(P), DL(DL), LI(LI), - SE(SE), DT(DT) {} + BlockGen(Builder, LI, SE, DT, &ExprBuilder), RegionGen(BlockGen), P(P), + DL(DL), LI(LI), SE(SE), DT(DT) {} ~IslNodeBuilder() { delete Rewriter; } @@ -84,6 +84,10 @@ IslExprBuilder ExprBuilder; BlockGenerator BlockGen; + + /// @brief Generator for region statements. + RegionGenerator RegionGen; + Pass *P; const DataLayout &DL; LoopInfo &LI; @@ -800,7 +804,10 @@ Stmt->setAstBuild(IslAstInfo::getBuild(User)); createSubstitutions(Expr, Stmt, VMap, LTS); - BlockGen.copyBB(*Stmt, VMap, LTS); + if (Stmt->isBlockStmt()) + BlockGen.copyStmt(*Stmt, VMap, LTS); + else + RegionGen.copyStmt(*Stmt, VMap, LTS); isl_ast_node_free(User); isl_id_free(Id); Index: test/Isl/CodeGen/non_affine_float_compare.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/non_affine_float_compare.ll @@ -0,0 +1,78 @@ +; RUN: opt %loadPolly -polly-codegen-isl -polly-no-early-exit -polly-allow-nonaffine-branches -S < %s | FileCheck %s +; +; void f(float *A) { +; for (int i = 0; i < 1024; i++) +; if (A[i] == A[i - 1]) +; A[i]++; +; A[i]++; +; } +; +; +; CHECK: polly.stmt.bb2: +; CHECK: %scevgep[[R0:[0-9]*]] = getelementptr float* %A, i64 %polly.indvar +; CHECK: %tmp3_p_scalar_ = load float* %scevgep[[R0]], align 4, !alias.scope !0, !noalias !2 +; CHECK: %scevgep[[R2:[0-9]*]] = getelementptr float* %scevgep{{[0-9]*}}, i64 %polly.indvar +; CHECK: %tmp6_p_scalar_ = load float* %scevgep[[R2]], align 4, !alias.scope !0, !noalias !2 +; CHECK: %p_tmp7 = fcmp oeq float %tmp3_p_scalar_, %tmp6_p_scalar_ +; CHECK: br i1 %p_tmp7, label %polly.stmt.bb8, label %polly.stmt.bb12.pre + +; CHECK: polly.stmt.bb8: +; CHECK: %scevgep[[R3:[0-9]*]] = getelementptr float* %A, i64 %polly.indvar +; CHECK: %tmp10_p_scalar_ = load float* %scevgep[[R3]], align 4, !alias.scope !0, !noalias !2 +; CHECK: %p_tmp11 = fadd float %tmp10_p_scalar_, 1.000000e+00 +; CHECK: store float %p_tmp11, float* %scevgep[[R3]], align 4, !alias.scope !0, !noalias !2 +; CHECK: br label %polly.stmt.bb12.pre + +; CHECK: polly.stmt.bb12.pre: +; CHECK: br label %polly.stmt.bb12 + +; CHECK: polly.stmt.bb12: +; CHECK: %scevgep[[R4:[0-9]*]] = getelementptr float* %A, i64 %polly.indvar +; CHECK: %tmp10b_p_scalar_ = load float* %scevgep[[R4]], align 4, !alias.scope !0, !noalias !2 +; CHECK: %p_tmp11b = fadd float %tmp10b_p_scalar_, 1.000000e+00 +; CHECK: store float %p_tmp11b, float* %scevgep[[R4]], align 4, !alias.scope !0, !noalias !2 +; CHECK: %polly.indvar_next = add nsw i64 %polly.indvar, 1 +; CHECK: %polly.loop_cond = icmp sle i64 %polly.indvar, 1022 +; CHECK: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(float* %A) { +bb: + br label %bb1 + +bb1: ; preds = %bb13, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb13 ], [ 0, %bb ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %bb2, label %bb14 + +bb2: ; preds = %bb1 + %tmp = getelementptr inbounds float* %A, i64 %indvars.iv + %tmp3 = load float* %tmp, align 4 + %tmp4 = add nsw i64 %indvars.iv, -1 + %tmp5 = getelementptr inbounds float* %A, i64 %tmp4 + %tmp6 = load float* %tmp5, align 4 + %tmp7 = fcmp oeq float %tmp3, %tmp6 + br i1 %tmp7, label %bb8, label %bb12 + +bb8: ; preds = %bb2 + %tmp9 = getelementptr inbounds float* %A, i64 %indvars.iv + %tmp10 = load float* %tmp9, align 4 + %tmp11 = fadd float %tmp10, 1.000000e+00 + store float %tmp11, float* %tmp9, align 4 + br label %bb12 + +bb12: ; preds = %bb8, %bb2 + %tmp9b = getelementptr inbounds float* %A, i64 %indvars.iv + %tmp10b = load float* %tmp9b, align 4 + %tmp11b = fadd float %tmp10b, 1.000000e+00 + store float %tmp11b, float* %tmp9b, align 4 + br label %bb13 + +bb13: ; preds = %bb12 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %bb1 + +bb14: ; preds = %bb1 + ret void +}