Index: include/polly/CodeGen/BlockGenerators.h =================================================================== --- include/polly/CodeGen/BlockGenerators.h +++ include/polly/CodeGen/BlockGenerators.h @@ -341,13 +341,11 @@ Value *getOrCreateAlloca(Value *ScalarBase, ScalarAllocaMapTy &Map, const char *NameExt); - /// @brief Generate reload of scalars demoted to memory and needed by @p Inst. + /// @brief Generate reload of scalars demoted to memory and needed by @p Stmt. /// /// @param Stmt The statement we generate code for. - /// @param Inst The instruction that might need reloaded values. /// @param BBMap A mapping from old values to their new values in this block. - virtual void generateScalarLoads(ScopStmt &Stmt, const Instruction *Inst, - ValueMapT &BBMap); + void generateScalarLoads(ScopStmt &Stmt, ValueMapT &BBMap); /// @brief Generate the scalar stores for the given statement. /// @@ -356,14 +354,13 @@ /// be demoted to memory. /// /// @param Stmt The statement we generate code for. - /// @param BB The basic block we generate code for. /// @param LTS A mapping from loops virtual canonical induction /// variable to their new values /// (for values recalculated in the new ScoP, but not /// within this basic block) /// @param BBMap A mapping from old values to their new values in this block. - virtual void generateScalarStores(ScopStmt &Stmt, BasicBlock *BB, - LoopToScevMapT <S, ValueMapT &BBMap); + virtual void generateScalarStores(ScopStmt &Stmt, LoopToScevMapT <S, + ValueMapT &BBMap); /// @brief Handle users of @p Inst outside the SCoP. /// @@ -746,14 +743,6 @@ void addOperandToPHI(ScopStmt &Stmt, const PHINode *PHI, PHINode *PHICopy, BasicBlock *IncomingBB, LoopToScevMapT <S); - /// @brief Generate reload of scalars demoted to memory and needed by @p Inst. - /// - /// @param Stmt The statement we generate code for. - /// @param Inst The instruction that might need reloaded values. - /// @param BBMap A mapping from old values to their new values in this block. - virtual void generateScalarLoads(ScopStmt &Stmt, const Instruction *Inst, - ValueMapT &BBMap) override; - /// @brief Generate the scalar stores for the given statement. /// /// After the statement @p Stmt was copied all inner-SCoP scalar dependences @@ -761,13 +750,11 @@ /// be demoted to memory. /// /// @param Stmt The statement we generate code for. - /// @param BB The basic block we generate code for. /// @param LTS A mapping from loops virtual canonical induction variable to /// their new values (for values recalculated in the new ScoP, /// but not within this basic block) /// @param BBMap A mapping from old values to their new values in this block. - virtual void generateScalarStores(ScopStmt &Stmt, BasicBlock *BB, - LoopToScevMapT <S, + virtual void generateScalarStores(ScopStmt &Stmt, LoopToScevMapT <S, ValueMapT &BBMAp) override; /// @brief Copy a single PHI instruction. Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -451,9 +451,6 @@ bool isAffine() const { return IsAffine; } - /// @brief Is this MemoryAccess modeling special PHI node accesses? - bool isPHI() const { return Origin == PHI; } - __isl_give isl_basic_map *createBasicAccessMap(ScopStmt *Statement); void assumeNoOutOfBound(); @@ -631,6 +628,9 @@ /// nodes. bool isImplicit() const { return !isExplicit(); } + /// @brief Is this MemoryAccess modeling special PHI node accesses? + bool isPHI() const { return Origin == PHI; } + /// @brief Get the statement that contains this memory access. ScopStmt *getStatement() const { return Statement; } Index: lib/CodeGen/BlockGenerators.cpp =================================================================== --- lib/CodeGen/BlockGenerators.cpp +++ lib/CodeGen/BlockGenerators.cpp @@ -231,10 +231,6 @@ void BlockGenerator::copyInstruction(ScopStmt &Stmt, Instruction *Inst, ValueMapT &BBMap, LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses) { - - // First check for possible scalar dependences for this instruction. - generateScalarLoads(Stmt, Inst, BBMap); - // Terminator instructions control the control flow. They are explicitly // expressed in the clast and do not need to be copied. if (Inst->isTerminator()) @@ -295,23 +291,24 @@ ValueMapT &BBMap, LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses) { BasicBlock *CopyBB = splitBB(BB); + Builder.SetInsertPoint(CopyBB->begin()); + generateScalarLoads(Stmt, BBMap); + copyBB(Stmt, BB, CopyBB, BBMap, LTS, NewAccesses); + + // After a basic block was copied store all scalars that escape this block in + // their alloca. + generateScalarStores(Stmt, LTS, BBMap); return CopyBB; } void BlockGenerator::copyBB(ScopStmt &Stmt, BasicBlock *BB, BasicBlock *CopyBB, ValueMapT &BBMap, LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses) { - Builder.SetInsertPoint(CopyBB->begin()); EntryBB = &CopyBB->getParent()->getEntryBlock(); for (Instruction &Inst : *BB) copyInstruction(Stmt, &Inst, BBMap, LTS, NewAccesses); - - // After a basic block was copied store all scalars that escape this block - // in their alloca. First the scalars that have dependences inside the SCoP, - // then the ones that might escape the SCoP. - generateScalarStores(Stmt, BB, LTS, BBMap); } Value *BlockGenerator::getOrCreateAlloca(Value *ScalarBase, @@ -382,16 +379,9 @@ EscapeMap[Inst] = std::make_pair(ScalarAddr, std::move(EscapeUsers)); } -void BlockGenerator::generateScalarLoads(ScopStmt &Stmt, - const Instruction *Inst, - ValueMapT &BBMap) { - auto *MAL = Stmt.lookupAccessesFor(Inst); - - if (!MAL) - return; - - for (MemoryAccess *MA : *MAL) { - if (MA->isExplicit() || !MA->isRead()) +void BlockGenerator::generateScalarLoads(ScopStmt &Stmt, ValueMapT &BBMap) { + for (MemoryAccess *MA : Stmt) { + if (MA->isExplicit() || MA->isWrite()) continue; auto *Address = getOrCreateAlloca(*MA); @@ -447,14 +437,13 @@ return ScalarValue; } -void BlockGenerator::generateScalarStores(ScopStmt &Stmt, BasicBlock *BB, - LoopToScevMapT <S, +void BlockGenerator::generateScalarStores(ScopStmt &Stmt, LoopToScevMapT <S, ValueMapT &BBMap) { const Region &R = Stmt.getParent()->getRegion(); - assert(Stmt.isBlockStmt() && BB == Stmt.getBasicBlock() && - "Region statements need to use the generateScalarStores() " - "function in the RegionGenerator"); + assert(Stmt.isBlockStmt() && "Region statements need to use the " + "generateScalarStores() function in the " + "RegionGenerator"); for (MemoryAccess *MA : Stmt) { if (MA->isExplicit() || MA->isRead()) @@ -990,6 +979,9 @@ RegionMaps.clear(); IncompletePHINodeMap.clear(); + // Collection of all values related to this subregion. + ValueMapT ValueMap; + // The region represented by the statement. Region *R = Stmt.getRegion(); @@ -1001,6 +993,8 @@ EntryBBCopy->setName("polly.stmt." + EntryBB->getName() + ".entry"); Builder.SetInsertPoint(EntryBBCopy->begin()); + generateScalarLoads(Stmt, RegionMaps[EntryBBCopy]); + for (auto PI = pred_begin(EntryBB), PE = pred_end(EntryBB); PI != PE; ++PI) if (!R->contains(*PI)) BlockMap[*PI] = EntryBBCopy; @@ -1025,9 +1019,11 @@ // Get the mapping for this block and initialize it with the mapping // available at its immediate dominator (in the new region). ValueMapT &RegionMap = RegionMaps[BBCopy]; - RegionMap = RegionMaps[BBCopyIDom]; + if (BBCopy != EntryBBCopy) + RegionMap = RegionMaps[BBCopyIDom]; // Copy the block with the BlockGenerator. + Builder.SetInsertPoint(BBCopy->begin()); copyBB(Stmt, BB, BBCopy, RegionMap, LTS, IdToAstExp); // In order to remap PHI nodes we store also basic block mappings. @@ -1042,6 +1038,9 @@ for (auto SI = succ_begin(BB), SE = succ_end(BB); SI != SE; SI++) if (R->contains(*SI) && SeenBlocks.insert(*SI).second) Blocks.push_back(*SI); + + // Remember value in case it is visible after this subregion. + ValueMap.insert(RegionMap.begin(), RegionMap.end()); } // Now create a new dedicated region exit block and add it to the region map. @@ -1107,26 +1106,14 @@ LTS[L] = SE.getUnknown(LoopPHI); } - // Reset the old insert point for the build. - Builder.SetInsertPoint(ExitBBCopy->begin()); -} - -void RegionGenerator::generateScalarLoads(ScopStmt &Stmt, - const Instruction *Inst, - ValueMapT &BBMap) { - - // Inside a non-affine region PHI nodes are copied not demoted. Once the - // phi is copied it will reload all inputs from outside the region, hence - // we do not need to generate code for the read access of the operands of a - // PHI. - if (isa(Inst)) - return; + // Continue generating code in the exit block. + Builder.SetInsertPoint(ExitBBCopy->getFirstInsertionPt()); - return BlockGenerator::generateScalarLoads(Stmt, Inst, BBMap); + // Write values visible to other statements. + generateScalarStores(Stmt, LTS, ValueMap); } -void RegionGenerator::generateScalarStores(ScopStmt &Stmt, BasicBlock *BB, - LoopToScevMapT <S, +void RegionGenerator::generateScalarStores(ScopStmt &Stmt, LoopToScevMapT <S, ValueMapT &BBMap) { const Region &R = Stmt.getParent()->getRegion(); @@ -1135,22 +1122,31 @@ "function in the BlockGenerator"); for (MemoryAccess *MA : Stmt) { - if (MA->isExplicit() || MA->isRead()) continue; Instruction *ScalarInst = MA->getAccessInstruction(); + Value *Val = MA->getAccessValue(); - // Only generate accesses that belong to this basic block. - if (ScalarInst->getParent() != BB) - continue; + // In case we add the store into an exiting block, we need to restore the + // position for stores in the exit node. + auto SavedInsertionPoint = Builder.GetInsertPoint(); - Value *Val = MA->getAccessValue(); + // Implicit writes induced by PHIs must be written in the incoming blocks. + if (MA->isPHI()) { + BasicBlock *ExitingBB = ScalarInst->getParent(); + BasicBlock *ExitingBBCopy = BlockMap[ExitingBB]; + Builder.SetInsertPoint(ExitingBBCopy->getTerminator()); + } auto Address = getOrCreateAlloca(*MA); Val = getNewScalarValue(Val, R, Stmt, LTS, BBMap); Builder.CreateStore(Val, Address); + + // Restore the insertion point if necessary. + if (MA->isPHI()) + Builder.SetInsertPoint(SavedInsertionPoint); } } Index: test/Isl/CodeGen/non-affine-phi-node-expansion-2.ll =================================================================== --- test/Isl/CodeGen/non-affine-phi-node-expansion-2.ll +++ test/Isl/CodeGen/non-affine-phi-node-expansion-2.ll @@ -4,7 +4,7 @@ ; CHECK: polly.stmt.bb3: ; preds = %polly.stmt.bb3.entry -; CHECK: %tmp6_p_scalar_ = load double, double* %arg11, !alias.scope !0, !noalias !2 +; CHECK: %tmp6_p_scalar_ = load double, double* %arg1{{[0-9]*}}, !alias.scope !0, !noalias !2 ; CHECK: %p_tmp7 = fadd double 1.000000e+00, %tmp6_p_scalar_ ; CHECK: %p_tmp8 = fcmp olt double 1.400000e+01, %p_tmp7 ; CHECK: br i1 %p_tmp8, label %polly.stmt.bb9, label %polly.stmt.bb10 Index: test/Isl/CodeGen/non-affine-phi-node-expansion-3.ll =================================================================== --- test/Isl/CodeGen/non-affine-phi-node-expansion-3.ll +++ test/Isl/CodeGen/non-affine-phi-node-expansion-3.ll @@ -17,8 +17,8 @@ ; CHECK-NEXT: %p_val0 = fadd float 1.000000e+00, 2.000000e+00 ; CHECK-NEXT: %p_val1 = fadd float 1.000000e+00, 2.000000e+00 ; CHECK-NEXT: %p_val2 = fadd float 1.000000e+00, 2.000000e+00 -; CHECK-NEXT: store float %p_val0, float* %merge.phiops ; CHECK-NEXT: %polly.subregion.iv.inc = add i32 %polly.subregion.iv, 1 +; CHECK-NEXT: store float %p_val0, float* %merge.phiops ; CHECK-NEXT: br i1 branch1: Index: test/Isl/CodeGen/non-affine-phi-node-expansion-4.ll =================================================================== --- test/Isl/CodeGen/non-affine-phi-node-expansion-4.ll +++ test/Isl/CodeGen/non-affine-phi-node-expansion-4.ll @@ -15,8 +15,8 @@ ; CHECK-NEXT: %polly.subregion.iv = phi i32 [ 0, %polly.stmt.loop.entry ] ; CHECK-NEXT: %p_val0 = fadd float 1.000000e+00, 2.000000e+00 ; CHECK-NEXT: %p_val1 = fadd float 1.000000e+00, 2.000000e+00 -; CHECK-NEXT: store float %p_val0, float* %merge.phiops ; CHECK-NEXT: %polly.subregion.iv.inc = add i32 %polly.subregion.iv, 1 +; CHECK-NEXT: store float %p_val0, float* %merge.phiops ; CHECK-NEXT: br i1 ; The interesting instruction here is %val2, which does not dominate the exit of Index: test/Isl/CodeGen/phi_loop_carried_float.ll =================================================================== --- test/Isl/CodeGen/phi_loop_carried_float.ll +++ test/Isl/CodeGen/phi_loop_carried_float.ll @@ -28,8 +28,8 @@ ; CHECK: store float %tmp.0.phiops.reload[[R1]], float* %tmp.0.s2a ; CHECK-LABEL: polly.stmt.bb4: -; CHECK: %tmp[[R5:[0-9]*]]_p_scalar_ = load float, float* %scevgep, align 4, !alias.scope !0, !noalias !2 ; CHECK: %tmp.0.s2a.reload[[R3:[0-9]*]] = load float, float* %tmp.0.s2a +; CHECK: %tmp[[R5:[0-9]*]]_p_scalar_ = load float, float* %scevgep, align 4, !alias.scope !0, !noalias !2 ; CHECK: %p_tmp[[R4:[0-9]*]] = fadd float %tmp.0.s2a.reload[[R3]], %tmp[[R5]]_p_scalar_ ; CHECK: store float %p_tmp[[R4]], float* %tmp.0.phiops Index: test/Isl/CodeGen/phi_loop_carried_float_escape.ll =================================================================== --- test/Isl/CodeGen/phi_loop_carried_float_escape.ll +++ test/Isl/CodeGen/phi_loop_carried_float_escape.ll @@ -28,8 +28,8 @@ ; CHECK-: store float %tmp.0.phiops.reload[[R1]], float* %tmp.0.s2a ; CHECK-LABEL: polly.stmt.bb4: -; CHECK: %tmp[[R5:[0-9]*]]_p_scalar_ = load float, float* %scevgep, align 4, !alias.scope !0, !noalias !2 ; CHECK: %tmp.0.s2a.reload[[R3:[0-9]*]] = load float, float* %tmp.0.s2a +; CHECK: %tmp[[R5:[0-9]*]]_p_scalar_ = load float, float* %scevgep, align 4, !alias.scope !0, !noalias !2 ; CHECK: %p_tmp[[R4:[0-9]*]] = fadd float %tmp.0.s2a.reload[[R3]], %tmp[[R5]]_p_scalar_ ; CHECK: store float %p_tmp[[R4]], float* %tmp.0.phiops Index: test/Isl/CodeGen/read-only-scalars.ll =================================================================== --- test/Isl/CodeGen/read-only-scalars.ll +++ test/Isl/CodeGen/read-only-scalars.ll @@ -14,8 +14,8 @@ ; SCALAR-NEXT: store float %scalar, float* %scalar.s2a ; SCALAR-LABEL: polly.stmt.stmt1: -; SCALAR-NEXT: %val_p_scalar_ = load float, float* %A, ; SCALAR-NEXT: %scalar.s2a.reload = load float, float* %scalar.s2a +; SCALAR-NEXT: %val_p_scalar_ = load float, float* %A, ; SCALAR-NEXT: %p_sum = fadd float %val_p_scalar_, %scalar.s2a.reload define void @foo(float* noalias %A, float %scalar) {