Index: include/polly/CodeGen/BlockGenerators.h =================================================================== --- include/polly/CodeGen/BlockGenerators.h +++ include/polly/CodeGen/BlockGenerators.h @@ -363,13 +363,11 @@ Value *getOrCreateAlloca(Value *ScalarBase, ScalarAllocaMapTy &Map, const char *NameExt); - /// @brief Generate reload of scalars demoted to memory and needed by @p Inst. + /// @brief Generate reload of scalars demoted to memory and needed by @p Stmt. /// /// @param Stmt The statement we generate code for. - /// @param Inst The instruction that might need reloaded values. /// @param BBMap A mapping from old values to their new values in this block. - virtual void generateScalarLoads(ScopStmt &Stmt, const Instruction *Inst, - ValueMapT &BBMap); + virtual void generateScalarLoads(ScopStmt &Stmt, ValueMapT &BBMap); /// @brief Generate the scalar stores for the given statement. /// @@ -378,14 +376,13 @@ /// be demoted to memory. /// /// @param Stmt The statement we generate code for. - /// @param BB The basic block we generate code for. /// @param LTS A mapping from loops virtual canonical induction /// variable to their new values /// (for values recalculated in the new ScoP, but not /// within this basic block) /// @param BBMap A mapping from old values to their new values in this block. - virtual void generateScalarStores(ScopStmt &Stmt, BasicBlock *BB, - LoopToScevMapT <S, ValueMapT &BBMap); + virtual void generateScalarStores(ScopStmt &Stmt, LoopToScevMapT <S, + ValueMapT &BBMap); /// @brief Handle users of @p Inst outside the SCoP. /// @@ -737,8 +734,8 @@ /// @brief A map from old to new blocks in the region. DenseMap BlockMap; - /// @brief The "BBMaps" for the whole region (one for each block). - DenseMap RegionMaps; + /// @brief The BBMaps for the whole region. + ValueMapT ValueMap; /// @brief Mapping to remember PHI nodes that still need incoming values. using PHINodePairTy = std::pair; @@ -760,13 +757,11 @@ void addOperandToPHI(ScopStmt &Stmt, const PHINode *PHI, PHINode *PHICopy, BasicBlock *IncomingBB, LoopToScevMapT <S); - /// @brief Generate reload of scalars demoted to memory and needed by @p Inst. + /// @brief Generate reload of scalars demoted to memory and needed by @p Stmt. /// /// @param Stmt The statement we generate code for. - /// @param Inst The instruction that might need reloaded values. /// @param BBMap A mapping from old values to their new values in this block. - virtual void generateScalarLoads(ScopStmt &Stmt, const Instruction *Inst, - ValueMapT &BBMap) override; + virtual void generateScalarLoads(ScopStmt &Stmt, ValueMapT &BBMap) override; /// @brief Generate the scalar stores for the given statement. /// @@ -775,13 +770,11 @@ /// be demoted to memory. /// /// @param Stmt The statement we generate code for. - /// @param BB The basic block we generate code for. /// @param LTS A mapping from loops virtual canonical induction variable to /// their new values (for values recalculated in the new ScoP, /// but not within this basic block) /// @param BBMap A mapping from old values to their new values in this block. - virtual void generateScalarStores(ScopStmt &Stmt, BasicBlock *BB, - LoopToScevMapT <S, + virtual void generateScalarStores(ScopStmt &Stmt, LoopToScevMapT <S, ValueMapT &BBMAp) override; /// @brief Copy a single PHI instruction. Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -451,9 +451,6 @@ bool isAffine() const { return IsAffine; } - /// @brief Is this MemoryAccess modeling special PHI node accesses? - bool isPHI() const { return Origin == PHI; } - __isl_give isl_basic_map *createBasicAccessMap(ScopStmt *Statement); void assumeNoOutOfBound(); @@ -631,6 +628,9 @@ /// nodes. bool isImplicit() const { return !isExplicit(); } + /// @brief Is this MemoryAccess modeling special PHI node accesses? + bool isPHI() const { return Origin == PHI; } + /// @brief Get the statement that contains this memory access. ScopStmt *getStatement() const { return Statement; } Index: lib/CodeGen/BlockGenerators.cpp =================================================================== --- lib/CodeGen/BlockGenerators.cpp +++ lib/CodeGen/BlockGenerators.cpp @@ -271,10 +271,6 @@ void BlockGenerator::copyInstruction(ScopStmt &Stmt, Instruction *Inst, ValueMapT &BBMap, LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses) { - - // First check for possible scalar dependences for this instruction. - generateScalarLoads(Stmt, Inst, BBMap); - // Terminator instructions control the control flow. They are explicitly // expressed in the clast and do not need to be copied. if (Inst->isTerminator()) @@ -335,24 +331,25 @@ ValueMapT &BBMap, LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses) { BasicBlock *CopyBB = splitBB(BB); + Builder.SetInsertPoint(CopyBB->begin()); + generateScalarLoads(Stmt, BBMap); + copyBB(Stmt, BB, CopyBB, BBMap, LTS, NewAccesses); + + // After a basic block was copied store all scalars that escape this block in + // their alloca. + generateScalarStores(Stmt, LTS, BBMap); return CopyBB; } void BlockGenerator::copyBB(ScopStmt &Stmt, BasicBlock *BB, BasicBlock *CopyBB, ValueMapT &BBMap, LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses) { - Builder.SetInsertPoint(CopyBB->begin()); EntryBB = &CopyBB->getParent()->getEntryBlock(); for (Instruction &Inst : *BB) copyInstruction(Stmt, &Inst, BBMap, LTS, NewAccesses); - // After a basic block was copied store all scalars that escape this block - // in their alloca. First the scalars that have dependences inside the SCoP, - // then the ones that might escape the SCoP. - generateScalarStores(Stmt, BB, LTS, BBMap); - const Region &R = Stmt.getParent()->getRegion(); for (Instruction &Inst : *BB) handleOutsideUsers(R, &Inst, BBMap[&Inst]); @@ -426,16 +423,9 @@ EscapeMap[Inst] = std::make_pair(ScalarAddr, std::move(EscapeUsers)); } -void BlockGenerator::generateScalarLoads(ScopStmt &Stmt, - const Instruction *Inst, - ValueMapT &BBMap) { - auto *MAL = Stmt.lookupAccessesFor(Inst); - - if (!MAL) - return; - - for (MemoryAccess *MA : *MAL) { - if (MA->isExplicit() || !MA->isRead()) +void BlockGenerator::generateScalarLoads(ScopStmt &Stmt, ValueMapT &BBMap) { + for (MemoryAccess *MA : Stmt) { + if (MA->isExplicit() || MA->isWrite()) continue; auto *Address = getOrCreateAlloca(*MA); @@ -491,14 +481,13 @@ return ScalarValue; } -void BlockGenerator::generateScalarStores(ScopStmt &Stmt, BasicBlock *BB, - LoopToScevMapT <S, +void BlockGenerator::generateScalarStores(ScopStmt &Stmt, LoopToScevMapT <S, ValueMapT &BBMap) { const Region &R = Stmt.getParent()->getRegion(); - assert(Stmt.isBlockStmt() && BB == Stmt.getBasicBlock() && - "Region statements need to use the generateScalarStores() " - "function in the RegionGenerator"); + assert(Stmt.isBlockStmt() && "Region statements need to use the " + "generateScalarStores() function in the " + "RegionGenerator"); for (MemoryAccess *MA : Stmt) { if (MA->isExplicit() || MA->isRead()) @@ -1007,7 +996,7 @@ // Forget all old mappings. BlockMap.clear(); - RegionMaps.clear(); + ValueMap.clear(); IncompletePHINodeMap.clear(); // The region represented by the statement. @@ -1021,6 +1010,8 @@ EntryBBCopy->setName("polly.stmt." + EntryBB->getName() + ".entry"); Builder.SetInsertPoint(EntryBBCopy->begin()); + generateScalarLoads(Stmt, ValueMap); + for (auto PI = pred_begin(EntryBB), PE = pred_end(EntryBB); PI != PE; ++PI) if (!R->contains(*PI)) BlockMap[*PI] = EntryBBCopy; @@ -1041,17 +1032,11 @@ // In order to remap PHI nodes we store also basic block mappings. BlockMap[BB] = BBCopy; - - // Get the mapping for this block and initialize it with the mapping - // available at its immediate dominator (in the new region). - ValueMapT &RegionMap = RegionMaps[BBCopy]; - RegionMap = RegionMaps[BBCopyIDom]; + ValueMap[BB] = BBCopy; // Copy the block with the BlockGenerator. - copyBB(Stmt, BB, BBCopy, RegionMap, LTS, IdToAstExp); - - // In order to remap PHI nodes we store also basic block mappings. - BlockMap[BB] = BBCopy; + Builder.SetInsertPoint(BBCopy->begin()); + copyBB(Stmt, BB, BBCopy, ValueMap, LTS, IdToAstExp); // Add values to incomplete PHI nodes waiting for this block to be copied. for (const PHINodePairTy &PHINodePair : IncompletePHINodeMap[BB]) @@ -1065,12 +1050,13 @@ } // Now create a new dedicated region exit block and add it to the region map. + BasicBlock *ExitBB = R->getExit(); BasicBlock *ExitBBCopy = SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI); - ExitBBCopy->setName("polly.stmt." + R->getExit()->getName() + ".exit"); - BlockMap[R->getExit()] = ExitBBCopy; - - repairDominance(R->getExit(), ExitBBCopy); + ExitBBCopy->setName("polly.stmt." + ExitBB->getName() + ".exit"); + BlockMap[ExitBB] = ExitBBCopy; + ValueMap[ExitBB] = ExitBBCopy; + repairDominance(ExitBB, ExitBBCopy); // As the block generator doesn't handle control flow we need to add the // region control flow by hand after all blocks have been copied. @@ -1087,11 +1073,8 @@ Instruction *BICopy = BBCopy->getTerminator(); - ValueMapT &RegionMap = RegionMaps[BBCopy]; - RegionMap.insert(BlockMap.begin(), BlockMap.end()); - Builder.SetInsertPoint(BICopy); - copyInstScalar(Stmt, TI, RegionMap, LTS); + copyInstScalar(Stmt, TI, ValueMap, LTS); BICopy->eraseFromParent(); } @@ -1127,50 +1110,98 @@ LTS[L] = SE.getUnknown(LoopPHI); } - // Reset the old insert point for the build. - Builder.SetInsertPoint(ExitBBCopy->begin()); + // Continue generating code in the exit block. + Builder.SetInsertPoint(ExitBBCopy->getFirstInsertionPt()); + generateScalarStores(Stmt, LTS, ValueMap); } -void RegionGenerator::generateScalarLoads(ScopStmt &Stmt, - const Instruction *Inst, - ValueMapT &BBMap) { +void RegionGenerator::generateScalarLoads(ScopStmt &Stmt, ValueMapT &BBMap) { + Region &R = Stmt.getParent()->getRegion(); - // Inside a non-affine region PHI nodes are copied not demoted. Once the - // phi is copied it will reload all inputs from outside the region, hence - // we do not need to generate code for the read access of the operands of a - // PHI. - if (isa(Inst)) - return; + for (MemoryAccess *MA : Stmt) { + if (MA->isExplicit() || MA->isWrite()) + continue; - return BlockGenerator::generateScalarLoads(Stmt, Inst, BBMap); + // Only generate loads for PHIs in the entry block. Intra-ScopStmt PHIs are + // copied directly. + Instruction *AccInst = MA->getAccessInstruction(); + if (MA->isPHI() && AccInst->getParent() != R.getEntry()) + continue; + + Value *Address = getOrCreateAlloca(*MA); + BBMap[MA->getBaseAddr()] = + Builder.CreateLoad(Address, Address->getName() + ".reload"); + } } -void RegionGenerator::generateScalarStores(ScopStmt &Stmt, BasicBlock *BB, - LoopToScevMapT <S, +void RegionGenerator::generateScalarStores(ScopStmt &Stmt, LoopToScevMapT <S, ValueMapT &BBMap) { + const Region &R = Stmt.getParent()->getRegion(); assert(Stmt.getRegion() && "Block statements need to use the generateScalarStores() " "function in the BlockGenerator"); - for (MemoryAccess *MA : Stmt) { + // This statement's region exit block, not the scop's exit. + BasicBlock *StmtExitBB = Stmt.getRegion()->getExit(); + for (MemoryAccess *MA : Stmt) { if (MA->isExplicit() || MA->isRead()) continue; Instruction *ScalarInst = MA->getAccessInstruction(); - - // Only generate accesses that belong to this basic block. - if (ScalarInst->getParent() != BB) + Value *Val = MA->getAccessValue(); + Value *BaseAddr = MA->getBaseAddr(); + + // Determine whether this store belongs to the write of a PHI's incoming + // value. These must be written in any case because the PHI receiving the + // value is outside this non-affine region. There are two cases for such PHI + // writes: + // - A regular PHI is in the stmt's region exit block, depending on the edge + // we are exiting it. + // - A value escaping the scop via a PHI in the scop's exit block. Such an + // access directly writes the scalar's demoted memory instead of the PHI's + // (.s2a instead of .phiops). Such escaping writes are connected to + // terminator instruction which in any other case do not write scalars. + // + // In both cases the write takes place on the terminator of a stmt's region + // exiting block. + bool IsPHIWrite = + (MA->isPHI() && cast(BaseAddr)->getParent() == StmtExitBB) || + (!MA->isPHI() && isa(ScalarInst)); + + // Only generate writes for PHIs in exiting blocks. Intra-ScopStmt PHIs are + // copied directly. + if (MA->isPHI() && !IsPHIWrite) continue; - Value *Val = MA->getAccessValue(); + // In case we add the store into an exiting block, we need to restore the + // position for stores in the exit node. + auto SavedInsertionPoint = Builder.GetInsertPoint(); + + // For PHI writes, change the insertion point. For scalar writes, first see + // whether there could be any use of it outside of this non-affine region; + // i.e. if the defintion is not dominating the exit and therefore any use + // that could follow, there can be no such use-def chain in the original + // program because it would be illegal. + if (IsPHIWrite) { + // Determine the position for PHI writes. + BasicBlock *ExitingBB = ScalarInst->getParent(); + BasicBlock *ExitingBBCopy = BlockMap[ExitingBB]; + Builder.SetInsertPoint(ExitingBBCopy->getTerminator()); + } else if (isa(Val) && + !DT.dominates(cast(Val)->getParent(), StmtExitBB)) + continue; auto Address = getOrCreateAlloca(*MA); Val = getNewScalarValue(Val, R, Stmt, LTS, BBMap); Builder.CreateStore(Val, Address); + + // Restore the insertion point if necessary. + if (IsPHIWrite) + Builder.SetInsertPoint(SavedInsertionPoint); } } @@ -1191,12 +1222,8 @@ Value *OpCopy = nullptr; if (StmtR->contains(IncomingBB)) { - assert(RegionMaps.count(BBCopy) && - "Incoming PHI block did not have a BBMap"); - ValueMapT &BBCopyMap = RegionMaps[BBCopy]; - Value *Op = PHI->getIncomingValueForBlock(IncomingBB); - OpCopy = getNewValue(Stmt, Op, BBCopyMap, LTS, getLoopForInst(PHI)); + OpCopy = getNewValue(Stmt, Op, ValueMap, LTS, getLoopForInst(PHI)); } else { if (PHICopy->getBasicBlockIndex(BBCopy) >= 0) Index: test/Isl/CodeGen/non-affine-phi-node-expansion-2.ll =================================================================== --- test/Isl/CodeGen/non-affine-phi-node-expansion-2.ll +++ test/Isl/CodeGen/non-affine-phi-node-expansion-2.ll @@ -4,7 +4,7 @@ ; CHECK: polly.stmt.bb3: ; preds = %polly.stmt.bb3.entry -; CHECK: %tmp6_p_scalar_ = load double, double* %arg11, !alias.scope !0, !noalias !2 +; CHECK: %tmp6_p_scalar_ = load double, double* %arg12, !alias.scope !0, !noalias !2 ; CHECK: %p_tmp7 = fadd double 1.000000e+00, %tmp6_p_scalar_ ; CHECK: %p_tmp8 = fcmp olt double 1.400000e+01, %p_tmp7 ; CHECK: br i1 %p_tmp8, label %polly.stmt.bb9, label %polly.stmt.bb10 Index: test/Isl/CodeGen/non-affine-phi-node-expansion-3.ll =================================================================== --- test/Isl/CodeGen/non-affine-phi-node-expansion-3.ll +++ test/Isl/CodeGen/non-affine-phi-node-expansion-3.ll @@ -17,13 +17,7 @@ ; CHECK-NEXT: %p_val0 = fadd float 1.000000e+00, 2.000000e+00 ; CHECK-NEXT: %p_val1 = fadd float 1.000000e+00, 2.000000e+00 ; CHECK-NEXT: %p_val2 = fadd float 1.000000e+00, 2.000000e+00 -; CHECK-NEXT: store float %p_val0, float* %merge.phiops -; CHECK-NEXT: store float %p_val1, float* %val1.s2a -; CHECK-NEXT: store float %p_val2, float* %val2.s2a - -; FIXME -> The last two writes are not really needed and can be dropped if the -; incoming block of the PHI and the value that is used share the same -; non-affine region. +; CHECK: store float %p_val0, float* %merge.phiops branch1: br i1 %cond1, label %branch2, label %backedge Index: test/Isl/CodeGen/phi_loop_carried_float.ll =================================================================== --- test/Isl/CodeGen/phi_loop_carried_float.ll +++ test/Isl/CodeGen/phi_loop_carried_float.ll @@ -28,8 +28,8 @@ ; CHECK: store float %tmp.0.phiops.reload[[R1]], float* %tmp.0.s2a ; CHECK-LABEL: polly.stmt.bb4: -; CHECK: %tmp[[R5:[0-9]*]]_p_scalar_ = load float, float* %scevgep, align 4, !alias.scope !0, !noalias !2 ; CHECK: %tmp.0.s2a.reload[[R3:[0-9]*]] = load float, float* %tmp.0.s2a +; CHECK: %tmp[[R5:[0-9]*]]_p_scalar_ = load float, float* %scevgep, align 4, !alias.scope !0, !noalias !2 ; CHECK: %p_tmp[[R4:[0-9]*]] = fadd float %tmp.0.s2a.reload[[R3]], %tmp[[R5]]_p_scalar_ ; CHECK: store float %p_tmp[[R4]], float* %tmp.0.phiops Index: test/Isl/CodeGen/phi_loop_carried_float_escape.ll =================================================================== --- test/Isl/CodeGen/phi_loop_carried_float_escape.ll +++ test/Isl/CodeGen/phi_loop_carried_float_escape.ll @@ -28,8 +28,8 @@ ; CHECK-: store float %tmp.0.phiops.reload[[R1]], float* %tmp.0.s2a ; CHECK-LABEL: polly.stmt.bb4: -; CHECK: %tmp[[R5:[0-9]*]]_p_scalar_ = load float, float* %scevgep, align 4, !alias.scope !0, !noalias !2 ; CHECK: %tmp.0.s2a.reload[[R3:[0-9]*]] = load float, float* %tmp.0.s2a +; CHECK: %tmp[[R5:[0-9]*]]_p_scalar_ = load float, float* %scevgep, align 4, !alias.scope !0, !noalias !2 ; CHECK: %p_tmp[[R4:[0-9]*]] = fadd float %tmp.0.s2a.reload[[R3]], %tmp[[R5]]_p_scalar_ ; CHECK: store float %p_tmp[[R4]], float* %tmp.0.phiops Index: test/Isl/CodeGen/read-only-scalars.ll =================================================================== --- test/Isl/CodeGen/read-only-scalars.ll +++ test/Isl/CodeGen/read-only-scalars.ll @@ -14,8 +14,8 @@ ; SCALAR-NEXT: store float %scalar, float* %scalar.s2a ; SCALAR-LABEL: polly.stmt.stmt1: -; SCALAR-NEXT: %val_p_scalar_ = load float, float* %A, ; SCALAR-NEXT: %scalar.s2a.reload = load float, float* %scalar.s2a +; SCALAR-NEXT: %val_p_scalar_ = load float, float* %A, ; SCALAR-NEXT: %p_sum = fadd float %val_p_scalar_, %scalar.s2a.reload define void @foo(float* noalias %A, float %scalar) {