Index: include/polly/CodeGen/BlockGenerators.h =================================================================== --- include/polly/CodeGen/BlockGenerators.h +++ include/polly/CodeGen/BlockGenerators.h @@ -46,9 +46,7 @@ /// Map types to resolve scalar dependences. /// ///@{ - - /// @see The ScalarMap and PHIOpMap member. - using ScalarAllocaMapTy = DenseMap, AssertingVH>; + using AllocaMapTy = DenseMap>; /// Simple vector of instructions to store escape users. using EscapeUserVectorTy = SmallVector; @@ -71,7 +69,6 @@ /// @param SE The scalar evolution info for the current function /// @param DT The dominator tree of this function. /// @param ScalarMap Map from scalars to their demoted location. - /// @param PHIOpMap Map from PHIs to their demoted operand location. /// @param EscapeMap Map from scalars to their escape users and locations. /// @param GlobalMap A mapping from llvm::Values used in the original scop /// region to a new set of llvm::Values. Each reference to @@ -80,10 +77,9 @@ /// @param ExprBuilder An expression builder to generate new access functions. /// @param StartBlock The first basic block after the RTC. BlockGenerator(PollyIRBuilder &Builder, LoopInfo &LI, ScalarEvolution &SE, - DominatorTree &DT, ScalarAllocaMapTy &ScalarMap, - ScalarAllocaMapTy &PHIOpMap, EscapeUsersAllocaMapTy &EscapeMap, - ValueMapT &GlobalMap, IslExprBuilder *ExprBuilder, - BasicBlock *StartBlock); + DominatorTree &DT, AllocaMapTy &ScalarMap, + EscapeUsersAllocaMapTy &EscapeMap, ValueMapT &GlobalMap, + IslExprBuilder *ExprBuilder, BasicBlock *StartBlock); /// Copy the basic block. /// @@ -99,21 +95,7 @@ void copyStmt(ScopStmt &Stmt, LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses); - /// Return the scalar alloca for @p ScalarBase. - /// - /// If no alloca was mapped to @p ScalarBase a new one is created. - /// - /// @param ScalarBase The demoted scalar value. - /// @param GlobalMap A mapping from Allocas to other memory locations that - /// can be used to replace the original alloca locations - /// with new memory locations, e.g. when passing values to - /// subfunctions while offloading parallel sections. - /// - /// @returns The alloca for @p ScalarBase or a replacement value taken from - /// GlobalMap. - Value *getOrCreateScalarAlloca(Value *ScalarBase); - - /// Remove a Value's allocation from the ScalarMap. + /// Remove a ScopArrayInfo's allocation from the ScalarMap. /// /// This function allows to remove values from the ScalarMap. This is useful /// if the corresponding alloca instruction will be deleted (or moved into @@ -121,18 +103,8 @@ /// AssertingVH will trigger due to us still keeping reference to this /// scalar. /// - /// @param ScalarBase The value to remove. - void freeScalarAlloc(Value *ScalarBase) { ScalarMap.erase(ScalarBase); } - - /// Return the PHi-node alloca for @p ScalarBase. - /// - /// If no alloca was mapped to @p ScalarBase a new one is created. - /// - /// @param ScalarBase The demoted scalar value. - /// - /// @returns The alloca for @p ScalarBase or a replacement value taken from - /// GlobalMap. - Value *getOrCreatePHIAlloca(Value *ScalarBase); + /// @param Array The array for which the alloca was generated. + void freeScalarAlloc(ScopArrayInfo *Array) { ScalarMap.erase(Array); } /// Return the alloca for @p Access. /// @@ -180,41 +152,38 @@ /// The entry block of the current function. BasicBlock *EntryBB; - /// Maps to resolve scalar dependences for PHI operands and scalars. + /// Map to resolve scalar dependences for PHI operands and scalars. /// /// When translating code that contains scalar dependences as they result from - /// inter-block scalar dependences (including the use of data carrying - /// PHI nodes), we do not directly regenerate in-register SSA code, but - /// instead allocate some stack memory through which these scalar values are - /// passed. Only a later pass of -mem2reg will then (re)introduce in-register + /// inter-block scalar dependences (including the use of data carrying PHI + /// nodes), we do not directly regenerate in-register SSA code, but instead + /// allocate some stack memory through which these scalar values are passed. + /// Only a later pass of -mem2reg will then (re)introduce in-register /// computations. /// /// To keep track of the memory location(s) used to store the data computed by - /// a given SSA instruction, we use the maps 'ScalarMap' and 'PHIOpMap'. Each - /// maps a given scalar value to a junk of stack allocated memory. - /// - /// 'ScalarMap' is used for normal scalar dependences that go from a scalar - /// definition to its use. Such dependences are lowered by directly writing - /// the value an instruction computes into the corresponding chunk of memory - /// and reading it back from this chunk of memory right before every use of - /// this original scalar value. The memory locations in 'ScalarMap' end with - /// '.s2a'. - /// - /// 'PHIOpMap' is used to model PHI nodes. For each PHI nodes we introduce, - /// besides the memory in 'ScalarMap', a second chunk of memory into which we - /// write at the end of each basic block preceeding the PHI instruction the - /// value passed through this basic block. At the place where the PHI node is - /// executed, we replace the PHI node with a load from the corresponding - /// memory location in the 'PHIOpMap' table. The memory locations in - /// 'PHIOpMap' end with '.phiops'. - /// - /// The ScopArrayInfo objects of accesses that belong to a PHI node may have - /// identical base pointers, even though they refer to two different memory - /// locations, the normal '.s2a' locations and the special '.phiops' - /// locations. For historic reasons we keep such accesses in two maps - /// 'ScalarMap' and 'PHIOpMap', index by the BasePointer. An alternative - /// implemenation, could use a single map that uses the ScopArrayInfo object - /// as index. + /// a given SSA instruction, we use the map 'ScalarMap'. ScalarMap maps a + /// given ScopArrayInfo to the junk of stack allocated memory, that is + /// used for code generation. + /// + /// Up to two different ScopArrayInfo objects are associated with each + /// llvm::Value: + /// + /// MemoryType::Value objects are used for normal scalar dependences that go + /// from a scalar definition to its use. Such dependences are lowered by + /// directly writing the value an instruction computes into the corresponding + /// chunk of memory and reading it back from this chunk of memory right before + /// every use of this original scalar value. The memory allocations for + /// MemoryType::Value objects end with '.s2a'. + /// + /// MemoryType::PHI (and MemoryType::ExitPHI) objects are used to model PHI + /// nodes. For each PHI nodes we introduce, besides the Array of type + /// MemoryType::Value, a second chunk of memory into which we write at the end + /// of each basic block preceeding the PHI instruction the value passed + /// through this basic block. At the place where the PHI node is executed, we + /// replace the PHI node with a load from the corresponding MemoryType::PHI + /// memory location. The memory allocations for MemoryType::PHI end with + /// '.phiops'. /// /// Example: /// @@ -259,9 +228,8 @@ /// add = add.s2a /// ... = add ... = add /// - /// ScalarMap = { x1 -> x1.s2a, x2 -> x2.s2a, add -> add.s2a } - /// PHIOpMap = { x2 -> x2.phiops } - /// + /// ScalarMap = { x1:Value -> x1.s2a, x2:Value -> x2.s2a, + /// add:Value -> add.s2a, x2:PHI -> x2.phiops } /// /// ??? Why does a PHI-node require two memory chunks ??? /// @@ -300,14 +268,8 @@ /// PHI node, has been run and has overwritten the PHI's old value. Hence, a /// single memory location is not enough to code-generate a PHI node. /// - ///{ - /// /// Memory locations used for the special PHI node modeling. - ScalarAllocaMapTy &PHIOpMap; - - /// Memory locations used to model scalar dependences. - ScalarAllocaMapTy &ScalarMap; - ///} + AllocaMapTy &ScalarMap; /// Map from instructions to their escape users as well as the alloca. EscapeUsersAllocaMapTy &EscapeMap; @@ -355,19 +317,6 @@ ValueMapT &BBMap, LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses); - /// Return the alloca for @p ScalarBase in @p Map. - /// - /// If no alloca was mapped to @p ScalarBase in @p Map a new one is created - /// and named after @p ScalarBase with the suffix @p NameExt. - /// - /// @param ScalarBase The demoted scalar value. - /// @param Map The map we should look for a mapped alloca value. - /// @param NameExt The suffix we add to the name of a new created alloca. - /// - /// @returns The alloca for @p ScalarBase. - Value *getOrCreateAlloca(Value *ScalarBase, ScalarAllocaMapTy &Map, - const char *NameExt); - /// Generate reload of scalars demoted to memory and needed by @p Stmt. /// /// @param Stmt The statement we generate code for. @@ -396,11 +345,11 @@ ValueMapT &BBMap, __isl_keep isl_id_to_ast_expr *NewAccesses); - /// Handle users of @p Inst outside the SCoP. + /// Handle users of @p Array outside the SCoP. /// /// @param S The current SCoP. - /// @param Inst The current instruction we check. - void handleOutsideUsers(const Scop &S, Instruction *Inst); + /// @param Inst The ScopArray to handle. + void handleOutsideUsers(const Scop &S, ScopArrayInfo *Array); /// Find scalar statements that have outside users. /// Index: include/polly/CodeGen/IslNodeBuilder.h =================================================================== --- include/polly/CodeGen/IslNodeBuilder.h +++ include/polly/CodeGen/IslNodeBuilder.h @@ -66,7 +66,7 @@ : S(S), Builder(Builder), Annotator(Annotator), ExprBuilder(S, Builder, IDToValue, ValueMap, DL, SE, DT, LI, StartBlock), - BlockGen(Builder, LI, SE, DT, ScalarMap, PHIOpMap, EscapeMap, ValueMap, + BlockGen(Builder, LI, SE, DT, ScalarMap, EscapeMap, ValueMap, &ExprBuilder, StartBlock), RegionGen(BlockGen), P(P), DL(DL), LI(LI), SE(SE), DT(DT), StartBlock(StartBlock) {} @@ -125,10 +125,7 @@ ///@{ /// See BlockGenerator::ScalarMap. - BlockGenerator::ScalarAllocaMapTy ScalarMap; - - /// See BlockGenerator::PhiOpMap. - BlockGenerator::ScalarAllocaMapTy PHIOpMap; + BlockGenerator::AllocaMapTy ScalarMap; /// See BlockGenerator::EscapeMap. BlockGenerator::EscapeUsersAllocaMapTy EscapeMap; Index: lib/CodeGen/BlockGenerators.cpp =================================================================== --- lib/CodeGen/BlockGenerators.cpp +++ lib/CodeGen/BlockGenerators.cpp @@ -50,12 +50,11 @@ BlockGenerator::BlockGenerator( PollyIRBuilder &B, LoopInfo &LI, ScalarEvolution &SE, DominatorTree &DT, - ScalarAllocaMapTy &ScalarMap, ScalarAllocaMapTy &PHIOpMap, - EscapeUsersAllocaMapTy &EscapeMap, ValueMapT &GlobalMap, - IslExprBuilder *ExprBuilder, BasicBlock *StartBlock) + AllocaMapTy &ScalarMap, EscapeUsersAllocaMapTy &EscapeMap, + ValueMapT &GlobalMap, IslExprBuilder *ExprBuilder, BasicBlock *StartBlock) : Builder(B), LI(LI), SE(SE), ExprBuilder(ExprBuilder), DT(DT), - EntryBB(nullptr), PHIOpMap(PHIOpMap), ScalarMap(ScalarMap), - EscapeMap(EscapeMap), GlobalMap(GlobalMap), StartBlock(StartBlock) {} + EntryBB(nullptr), ScalarMap(ScalarMap), EscapeMap(EscapeMap), + GlobalMap(GlobalMap), StartBlock(StartBlock) {} Value *BlockGenerator::trySynthesizeNewValue(ScopStmt &Stmt, Value *Old, ValueMapT &BBMap, @@ -214,13 +213,7 @@ LTS, NewAccesses, Access.getId(), Access.getAccessValue()->getType()); - if (Access.isLatestValueKind() || Access.isLatestExitPHIKind()) - return getOrCreateScalarAlloca(Access.getBaseAddr()); - - if (Access.isLatestPHIKind()) - return getOrCreatePHIAlloca(Access.getBaseAddr()); - - llvm_unreachable("Unknown access type"); + return getOrCreateAlloca(Access); } Loop *BlockGenerator::getLoopForStmt(const ScopStmt &Stmt) const { @@ -366,53 +359,57 @@ copyInstruction(Stmt, &Inst, BBMap, LTS, NewAccesses); } -Value *BlockGenerator::getOrCreateAlloca(Value *ScalarBase, - ScalarAllocaMapTy &Map, - const char *NameExt) { - // If no alloca was found create one and insert it in the entry block. - if (!Map.count(ScalarBase)) { - auto *Ty = ScalarBase->getType(); - auto NewAddr = new AllocaInst(Ty, ScalarBase->getName() + NameExt); - EntryBB = &Builder.GetInsertBlock()->getParent()->getEntryBlock(); - NewAddr->insertBefore(&*EntryBB->getFirstInsertionPt()); - Map[ScalarBase] = NewAddr; - } - - auto Addr = Map[ScalarBase]; - - if (auto NewAddr = GlobalMap.lookup(Addr)) - return NewAddr; - - return Addr; -} - Value *BlockGenerator::getOrCreateAlloca(const MemoryAccess &Access) { - assert(!Access.isArrayKind() && "Trying to get alloca for array kind"); + assert(!Access.isLatestArrayKind() && "Trying to get alloca for array kind"); - if (Access.isPHIKind()) - return getOrCreatePHIAlloca(Access.getBaseAddr()); - else - return getOrCreateScalarAlloca(Access.getBaseAddr()); + return getOrCreateAlloca(Access.getLatestScopArrayInfo()); } Value *BlockGenerator::getOrCreateAlloca(const ScopArrayInfo *Array) { assert(!Array->isArrayKind() && "Trying to get alloca for array kind"); + auto &Addr = ScalarMap[Array]; + + if (Addr) { + // Allow allocas to be (temporarily) redirected once by adding a new + // old-alloca-addr to new-addr mapping to GlobalMap. This funcitionality + // is used for example by the OpenMP code generation where a first use + // of a scalar while still in the host code allocates a normal alloca with + // getOrCreateAlloca. When the values of this scalar are accessed during + // the generation of the parallel subfunction, these values are copied over + // to the parallel subfunction and each request for a scalar alloca slot + // must be forwared to the temporary in-subfunction slot. This mapping is + // removed when the subfunction has been generated and again normal host + // code is generated. As GlobalMap may be changed multiple times (for + // each parallel loop), is commonly only known after the initial alloca + // has been generated, and the original alloca value must be restored at + // the end, it is not possible to perform the GlobalMap lookup right after + // creating the alloca below, but instead we need to check GlobalMap at + // call to getOrCreateAlloca. + if (Value *NewAddr = GlobalMap.lookup(Addr)) + return NewAddr; + return Addr; + } + + Type *Ty = Array->getElementType(); + Value *ScalarBase = Array->getBasePtr(); + std::string NameExt; if (Array->isPHIKind()) - return getOrCreatePHIAlloca(Array->getBasePtr()); + NameExt = ".phiops"; else - return getOrCreateScalarAlloca(Array->getBasePtr()); -} + NameExt = ".s2a"; -Value *BlockGenerator::getOrCreateScalarAlloca(Value *ScalarBase) { - return getOrCreateAlloca(ScalarBase, ScalarMap, ".s2a"); -} + auto *NewAddr = new AllocaInst(Ty, ScalarBase->getName() + NameExt); + EntryBB = &Builder.GetInsertBlock()->getParent()->getEntryBlock(); + NewAddr->insertBefore(&*EntryBB->getFirstInsertionPt()); + Addr = NewAddr; -Value *BlockGenerator::getOrCreatePHIAlloca(Value *ScalarBase) { - return getOrCreateAlloca(ScalarBase, PHIOpMap, ".phiops"); + return Addr; } -void BlockGenerator::handleOutsideUsers(const Scop &S, Instruction *Inst) { +void BlockGenerator::handleOutsideUsers(const Scop &S, ScopArrayInfo *Array) { + Instruction *Inst = cast(Array->getBasePtr()); + // If there are escape users we get the alloca for this instruction and put it // in the EscapeMap for later finalization. Lastly, if the instruction was // copied multiple times we already did this and can exit. @@ -438,7 +435,7 @@ return; // Get or create an escape alloca for this instruction. - auto *ScalarAddr = getOrCreateScalarAlloca(Inst); + auto *ScalarAddr = getOrCreateAlloca(Array); // Remember that this instruction has escape uses and the escape alloca. EscapeMap[Inst] = std::make_pair(ScalarAddr, std::move(EscapeUsers)); @@ -548,7 +545,7 @@ Value *ScalarValue = PHI->getIncomingValue(Idx); - Builder.CreateStore(ScalarValue, getOrCreatePHIAlloca(PHI)); + Builder.CreateStore(ScalarValue, getOrCreateAlloca(Array)); continue; } @@ -565,8 +562,7 @@ if (!S.hasSingleExitEdge() && PHI->getBasicBlockIndex(ExitBB) >= 0) continue; - Builder.CreateStore(Array->getBasePtr(), - getOrCreateScalarAlloca(Array->getBasePtr())); + Builder.CreateStore(Array->getBasePtr(), getOrCreateAlloca(Array)); } } @@ -636,7 +632,7 @@ if (!S.contains(Inst)) continue; - handleOutsideUsers(S, Inst); + handleOutsideUsers(S, Array); } } @@ -671,7 +667,7 @@ continue; std::string Name = PHI->getName(); - Value *ScalarAddr = getOrCreateScalarAlloca(PHI); + Value *ScalarAddr = getOrCreateAlloca(SAI); Value *Reload = Builder.CreateLoad(ScalarAddr, Name + ".ph.final_reload"); Reload = Builder.CreateBitOrPointerCast(Reload, PHI->getType()); Value *OriginalValue = PHI->getIncomingValueForBlock(MergeBB); Index: lib/CodeGen/IslNodeBuilder.cpp =================================================================== --- lib/CodeGen/IslNodeBuilder.cpp +++ lib/CodeGen/IslNodeBuilder.cpp @@ -1220,12 +1220,8 @@ } // For scalar derived SAIs we remap the alloca used for the derived value. - if (BasePtr == MA->getAccessInstruction()) { - if (DerivedSAI->isPHIKind()) - PHIOpMap[BasePtr] = Alloca; - else - ScalarMap[BasePtr] = Alloca; - } + if (BasePtr == MA->getAccessInstruction()) + ScalarMap[DerivedSAI] = Alloca; } } Index: lib/CodeGen/PPCGCodeGeneration.cpp =================================================================== --- lib/CodeGen/PPCGCodeGeneration.cpp +++ lib/CodeGen/PPCGCodeGeneration.cpp @@ -1206,10 +1206,8 @@ Instruction &HostInsertPoint = *Builder.GetInsertPoint(); IslExprBuilder::IDToValueTy HostIDs = IDToValue; ValueMapT HostValueMap = ValueMap; - BlockGenerator::ScalarAllocaMapTy HostScalarMap = ScalarMap; - BlockGenerator::ScalarAllocaMapTy HostPHIOpMap = PHIOpMap; + BlockGenerator::AllocaMapTy HostScalarMap = ScalarMap; ScalarMap.clear(); - PHIOpMap.clear(); SetVector Loops; @@ -1240,7 +1238,6 @@ ValueMap = std::move(HostValueMap); ScalarMap = std::move(HostScalarMap); - PHIOpMap = std::move(HostPHIOpMap); EscapeMap.clear(); IDToSAI.clear(); Annotator.resetAlternativeAliasBases();