Index: include/polly/CodeGen/BlockGenerators.h =================================================================== --- include/polly/CodeGen/BlockGenerators.h +++ include/polly/CodeGen/BlockGenerators.h @@ -77,7 +77,7 @@ /// /// @see The EscapeMap member. using EscapeUsersAllocaMapTy = - DenseMap>; + DenseMap>; ///@} @@ -110,6 +110,21 @@ /// @param LTS A map from old loops to new induction variables as SCEVs. void copyStmt(ScopStmt &Stmt, ValueMapT &GlobalMap, LoopToScevMapT <S); + /// @brief Return (and create) the allocations needed for a given ScopStmt + /// + /// In some cases, e.g. the generation of OpenMP parallel code, it is + /// sometimes necessary to know ahead of time which memory locations will be + /// used when code-generating scalar dependences of PHI nodes. + /// This function returns a vector containing such memory locations and + /// introduces them if they have not yet been allocated. + /// + /// @param Stmt The statement we look at + /// @param ScalarMap Map from scalars to their demoted location. + /// @param PHIOpMap Map from PHIs to their demoted operand location. + std::vector getScalarAllocationsForStmt(const ScopStmt &Stmt, + ScalarAllocaMapTy &ScalarMap, + ScalarAllocaMapTy &PHIOpMap); + /// @brief Finalize the code generation for the SCoP @p S. /// /// This will initialize and finalize the scalar variables we demoted during @@ -305,21 +320,29 @@ /// /// @param ScalarBase The demoted scalar instruction. /// @param Map The map we should look for a mapped alloca instruction. + /// @param GlobalMap A map that contains possible lobal rewrites for the + /// scalar or global map. /// @param NameExt The suffix we add to the name of a new created alloca. /// @param IsNew If set it will hold true iff the alloca was created. /// /// @returns The alloca for @p ScalarBase in @p Map. - AllocaInst *getOrCreateAlloca(Instruction *ScalarBase, ScalarAllocaMapTy &Map, - const char *NameExt = ".s2a", - bool *IsNew = nullptr); + Value *getOrCreateAlloca(Instruction *ScalarBase, ScalarAllocaMapTy &Map, + ValueMapT &GlobalMap, const char *NameExt = ".s2a", + bool *IsNew = nullptr); /// @brief Generate reload of scalars demoted to memory and needed by @p Inst. /// - /// @param Stmt The statement we generate code for. - /// @param Inst The instruction that might need reloaded values. - /// @param BBMap A mapping from old values to their new values in this block. - virtual void generateScalarLoads(ScopStmt &Stmt, const Instruction *Inst, - ValueMapT &BBMap); + /// @param Stmt The statement we generate code for. + /// @param Inst The instruction that might need reloaded values. + /// @param BBMap A mapping from old values to their new values in this + /// block. + /// @param GlobalMap A mapping from old values to their new values + /// (for values recalculated in the new ScoP, but not + /// within this basic block). + virtual void generateScalarLoads(const ScopStmt &Stmt, + const Instruction *Inst, ValueMapT &BBMap, + ValueMapT &GlobalMap, + std::vector *Addresses = nullptr); /// @brief Generate the scalar stores for the given statement. /// @@ -327,19 +350,29 @@ /// starting in @p Stmt (hence all scalar write accesses in @p Stmt) need to /// be demoted to memory. /// - /// @param Stmt The statement we generate code for. - /// @param BB The basic block we generate code for. - /// @param BBMap A mapping from old values to their new values in this block. - /// @param GlobalMap A mapping for globally replaced values. - virtual void generateScalarStores(ScopStmt &Stmt, BasicBlock *BB, - ValueMapT &BBMAp, ValueMapT &GlobalMap); + /// @param Stmt The statement we generate code for. + /// @param BB The basic block we generate code for. + /// @param BBMap A mapping from old values to their new values in this + /// block. + /// @param GlobalMap A mapping from old values to their new values + /// (for values recalculated in the new ScoP, but not + /// within this basic block). + virtual void generateScalarStores(const ScopStmt &Stmt, BasicBlock *BB, + ValueMapT &BBMAp, ValueMapT &GlobalMap, + std::vector *Addresses = nullptr); /// @brief Handle users of @p Inst outside the SCoP. /// - /// @param R The current SCoP region. - /// @param Inst The current instruction we check. - /// @param InstCopy The copy of the instruction @p Inst in the optimized SCoP. - void handleOutsideUsers(const Region &R, Instruction *Inst, Value *InstCopy); + /// @param R The current SCoP region. + /// @param Inst The current instruction we check. + /// @param InstCopy The copy of the instruction @p Inst in the optimized + /// SCoP. + /// @param GlobalMap A map that contains possible lobal rewrites for the + /// scalar or global map. + void handleOutsideUsers(const Region &R, Instruction *Inst, Value *InstCopy, + ValueMapT &GlobalMap, + + std::vector *Addresses = nullptr); /// @brief Initialize the memory of demoted scalars. /// @@ -665,11 +698,17 @@ /// @brief Generate reload of scalars demoted to memory and needed by @p Inst. /// - /// @param Stmt The statement we generate code for. - /// @param Inst The instruction that might need reloaded values. - /// @param BBMap A mapping from old values to their new values in this block. - virtual void generateScalarLoads(ScopStmt &Stmt, const Instruction *Inst, - ValueMapT &BBMap) override; + /// @param Stmt The statement we generate code for. + /// @param Inst The instruction that might need reloaded values. + /// @param BBMap A mapping from old values to their new values in this + /// block. + /// @param GlobalMap A mapping from old values to their new values + /// (for values recalculated in the new ScoP, but not + /// within this basic block). + virtual void + generateScalarLoads(const ScopStmt &Stmt, const Instruction *Inst, + ValueMapT &BBMap, ValueMapT &GlobalMap, + std::vector *Addresses = nullptr) override; /// @brief Generate the scalar stores for the given statement. /// @@ -683,9 +722,10 @@ /// @param GlobalMap A mapping from old values to their new values /// (for values recalculated in the new ScoP, but not /// within this basic block). - virtual void generateScalarStores(ScopStmt &Stmt, BasicBlock *BB, - ValueMapT &BBMAp, - ValueMapT &GlobalMap) override; + virtual void + generateScalarStores(const ScopStmt &Stmt, BasicBlock *BB, ValueMapT &BBMAp, + ValueMapT &GlobalMap, + std::vector *Addresses = nullptr) override; /// @brief Copy a single PHI instruction. /// Index: include/polly/CodeGen/IslNodeBuilder.h =================================================================== --- include/polly/CodeGen/IslNodeBuilder.h +++ include/polly/CodeGen/IslNodeBuilder.h @@ -48,6 +48,9 @@ IslExprBuilder &getExprBuilder() { return ExprBuilder; } + static int findValuesInBlock(struct FindValuesUser &User, + const ScopStmt *Stmt, const BasicBlock *BB); + private: Scop &S; PollyIRBuilder &Builder; Index: lib/CodeGen/BlockGenerators.cpp =================================================================== --- lib/CodeGen/BlockGenerators.cpp +++ lib/CodeGen/BlockGenerators.cpp @@ -249,7 +249,7 @@ LoopToScevMapT <S) { // First check for possible scalar dependences for this instruction. - generateScalarLoads(Stmt, Inst, BBMap); + generateScalarLoads(Stmt, Inst, BBMap, GlobalMap); // Terminator instructions control the control flow. They are explicitly // expressed in the clast and do not need to be copied. @@ -354,13 +354,13 @@ const Region &R = Stmt.getParent()->getRegion(); for (Instruction &Inst : *BB) - handleOutsideUsers(R, &Inst, BBMap[&Inst]); + handleOutsideUsers(R, &Inst, BBMap[&Inst], GlobalMap); } -AllocaInst *BlockGenerator::getOrCreateAlloca(Instruction *ScalarBase, - ScalarAllocaMapTy &Map, - const char *NameExt, - bool *IsNew) { +Value *BlockGenerator::getOrCreateAlloca(Instruction *ScalarBase, + ScalarAllocaMapTy &Map, + ValueMapT &GlobalMap, + const char *NameExt, bool *IsNew) { // Check if an alloca was cached for the base instruction. AllocaInst *&Addr = Map[ScalarBase]; @@ -376,11 +376,38 @@ Addr->insertBefore(EntryBB->getFirstInsertionPt()); } - return Addr; + Value *UpdatedAddr = Addr; + + if (Value *New = GlobalMap.lookup(UpdatedAddr)) + UpdatedAddr = New; + + return UpdatedAddr; +} + +std::vector +BlockGenerator::getScalarAllocationsForStmt(const ScopStmt &Stmt, + ScalarAllocaMapTy &ScalarMap, + ScalarAllocaMapTy &PHIOpMap) { + ValueMapT GlobalMap; + ValueMapT BBMap; + std::vector Allocations; + BasicBlock *BB = Stmt.getBasicBlock(); + + for (auto &Inst : *BB) + generateScalarLoads(Stmt, &Inst, BBMap, GlobalMap, &Allocations); + + generateScalarStores(Stmt, BB, BBMap, GlobalMap, &Allocations); + + auto &R = Stmt.getParent()->getRegion(); + for (auto &Inst : *BB) + handleOutsideUsers(R, &Inst, nullptr, GlobalMap, &Allocations); + + return Allocations; } void BlockGenerator::handleOutsideUsers(const Region &R, Instruction *Inst, - Value *InstCopy) { + Value *InstCopy, ValueMapT &GlobalMap, + std::vector *Addresses) { BasicBlock *ExitBB = R.getExit(); EscapeUserVectorTy EscapeUsers; @@ -411,8 +438,13 @@ // Get or create an escape alloca for this instruction. bool IsNew; - AllocaInst *ScalarAddr = - getOrCreateAlloca(Inst, ScalarMap, ".escape", &IsNew); + Value *ScalarAddr = + getOrCreateAlloca(Inst, ScalarMap, GlobalMap, ".escape", &IsNew); + + if (Addresses) { + Addresses->push_back(ScalarAddr); + return; + } // Remember that this instruction has escape uses and the escape alloca. EscapeMap[Inst] = std::make_pair(ScalarAddr, std::move(EscapeUsers)); @@ -425,27 +457,32 @@ } } -void BlockGenerator::generateScalarLoads(ScopStmt &Stmt, +void BlockGenerator::generateScalarLoads(const ScopStmt &Stmt, const Instruction *Inst, - ValueMapT &BBMap) { + ValueMapT &BBMap, ValueMapT &GlobalMap, + std::vector *Addresses) { auto *MAL = Stmt.lookupAccessesFor(Inst); if (!MAL) return; for (MemoryAccess &MA : *MAL) { - AllocaInst *Address; + Value *Address; if (!MA.isScalar() || !MA.isRead()) continue; auto Base = cast(MA.getBaseAddr()); if (MA.getScopArrayInfo()->isPHI()) - Address = getOrCreateAlloca(Base, PHIOpMap, ".phiops"); + Address = getOrCreateAlloca(Base, PHIOpMap, GlobalMap, ".phiops"); else - Address = getOrCreateAlloca(Base, ScalarMap, ".s2a"); + Address = getOrCreateAlloca(Base, ScalarMap, GlobalMap, ".s2a"); - BBMap[Base] = Builder.CreateLoad(Address, Address->getName() + ".reload"); + if (Addresses) { + Addresses->push_back(Address); + } else { + BBMap[Base] = Builder.CreateLoad(Address, Address->getName() + ".reload"); + } } } @@ -490,9 +527,10 @@ return ScalarValue; } -void BlockGenerator::generateScalarStores(ScopStmt &Stmt, BasicBlock *BB, +void BlockGenerator::generateScalarStores(const ScopStmt &Stmt, BasicBlock *BB, ValueMapT &BBMap, - ValueMapT &GlobalMap) { + ValueMapT &GlobalMap, + std::vector *Addresses) { const Region &R = Stmt.getParent()->getRegion(); assert(Stmt.isBlockStmt() && BB == Stmt.getBasicBlock() && @@ -507,19 +545,24 @@ Instruction *Inst = MA->getAccessInstruction(); Value *Val = nullptr; - AllocaInst *Address = nullptr; + Value *Address = nullptr; if (MA->getScopArrayInfo()->isPHI()) { PHINode *BasePHI = dyn_cast(Base); int PHIIdx = BasePHI->getBasicBlockIndex(BB); - Address = getOrCreateAlloca(Base, PHIOpMap, ".phiops"); + Address = getOrCreateAlloca(Base, PHIOpMap, GlobalMap, ".phiops"); Val = BasePHI->getIncomingValue(PHIIdx); } else { - Address = getOrCreateAlloca(Base, ScalarMap, ".s2a"); + Address = getOrCreateAlloca(Base, ScalarMap, GlobalMap, ".s2a"); Val = Inst; } - Val = getNewScalarValue(Val, R, ScalarMap, BBMap, GlobalMap); - Builder.CreateStore(Val, Address); + + if (Addresses) { + Addresses->push_back(Address); + } else { + Val = getNewScalarValue(Val, R, ScalarMap, BBMap, GlobalMap); + Builder.CreateStore(Val, Address); + } } } @@ -576,7 +619,7 @@ Instruction *EscapeInst = EscapeMapping.getFirst(); const auto &EscapeMappingValue = EscapeMapping.getSecond(); const EscapeUserVectorTy &EscapeUsers = EscapeMappingValue.second; - AllocaInst *ScalarAddr = EscapeMappingValue.first; + Value *ScalarAddr = EscapeMappingValue.first; // Reload the demoted instruction in the optimized version of the SCoP. Instruction *EscapeInstReload = @@ -1102,9 +1145,11 @@ Builder.SetInsertPoint(ExitBBCopy->begin()); } -void RegionGenerator::generateScalarLoads(ScopStmt &Stmt, +void RegionGenerator::generateScalarLoads(const ScopStmt &Stmt, const Instruction *Inst, - ValueMapT &BBMap) { + ValueMapT &BBMap, + ValueMapT &GlobalMap, + std::vector *Addresses) { // Inside a non-affine region PHI nodes are copied not demoted. Once the // phi is copied it will reload all inputs from outside the region, hence @@ -1113,12 +1158,14 @@ if (isa(Inst)) return; - return BlockGenerator::generateScalarLoads(Stmt, Inst, BBMap); + return BlockGenerator::generateScalarLoads(Stmt, Inst, BBMap, GlobalMap, + Addresses); } -void RegionGenerator::generateScalarStores(ScopStmt &Stmt, BasicBlock *BB, +void RegionGenerator::generateScalarStores(const ScopStmt &Stmt, BasicBlock *BB, ValueMapT &BBMap, - ValueMapT &GlobalMap) { + ValueMapT &GlobalMap, + std::vector *Addresses) { const Region &R = Stmt.getParent()->getRegion(); Region *StmtR = Stmt.getRegion(); @@ -1135,19 +1182,24 @@ PHINode *ScalarBasePHI = dyn_cast(ScalarBase); Value *Val = nullptr; - AllocaInst *ScalarAddr = nullptr; + Value *ScalarAddr = nullptr; if (MA->getScopArrayInfo()->isPHI()) { int PHIIdx = ScalarBasePHI->getBasicBlockIndex(BB); - ScalarAddr = getOrCreateAlloca(ScalarBase, PHIOpMap, ".phiops"); + ScalarAddr = + getOrCreateAlloca(ScalarBase, PHIOpMap, GlobalMap, ".phiops"); Val = ScalarBasePHI->getIncomingValue(PHIIdx); } else { - ScalarAddr = getOrCreateAlloca(ScalarBase, ScalarMap, ".s2a"); + ScalarAddr = getOrCreateAlloca(ScalarBase, ScalarMap, GlobalMap, ".s2a"); Val = ScalarInst; } - Val = getNewScalarValue(Val, R, ScalarMap, BBMap, GlobalMap); - Builder.CreateStore(Val, ScalarAddr); + if (Addresses) { + Addresses->push_back(ScalarAddr); + } else { + Val = getNewScalarValue(Val, R, ScalarMap, BBMap, GlobalMap); + Builder.CreateStore(Val, ScalarAddr); + } } } @@ -1181,8 +1233,8 @@ if (PHICopy->getBasicBlockIndex(BBCopy) >= 0) return; - AllocaInst *PHIOpAddr = - getOrCreateAlloca(const_cast(PHI), PHIOpMap, ".phiops"); + Value *PHIOpAddr = getOrCreateAlloca(const_cast(PHI), PHIOpMap, + GlobalMap, ".phiops"); OpCopy = new LoadInst(PHIOpAddr, PHIOpAddr->getName() + ".reload", BlockMap[IncomingBB]->getTerminator()); } Index: lib/CodeGen/IslNodeBuilder.cpp =================================================================== --- lib/CodeGen/IslNodeBuilder.cpp +++ lib/CodeGen/IslNodeBuilder.cpp @@ -155,11 +155,13 @@ Region &R; SetVector &Values; SetVector &SCEVs; + IslNodeBuilder *NodeBuilder; }; /// @brief Extract the values and SCEVs needed to generate code for a block. -static int findValuesInBlock(struct FindValuesUser &User, const ScopStmt *Stmt, - const BasicBlock *BB) { +int IslNodeBuilder::findValuesInBlock(struct FindValuesUser &User, + const ScopStmt *Stmt, + const BasicBlock *BB) { // Check all the operands of instructions in the basic block. for (const Instruction &Inst : *BB) { for (Value *SrcVal : Inst.operands()) { @@ -177,6 +179,11 @@ User.Values.insert(SrcVal); } } + + auto ScalarLocations = User.NodeBuilder->BlockGen.getScalarAllocationsForStmt( + *Stmt, User.NodeBuilder->ScalarMap, User.NodeBuilder->PHIOpMap); + + User.Values.insert(ScalarLocations.begin(), ScalarLocations.end()); return 0; } @@ -191,12 +198,12 @@ const ScopStmt *Stmt = static_cast(isl_id_get_user(Id)); if (Stmt->isBlockStmt()) - findValuesInBlock(User, Stmt, Stmt->getBasicBlock()); + IslNodeBuilder::findValuesInBlock(User, Stmt, Stmt->getBasicBlock()); else { assert(Stmt->isRegionStmt() && "Stmt was neither block nor region statement"); for (const BasicBlock *BB : Stmt->getRegion()->blocks()) - findValuesInBlock(User, Stmt, BB); + IslNodeBuilder::findValuesInBlock(User, Stmt, BB); } isl_id_free(Id); @@ -209,7 +216,8 @@ SetVector &Loops) { SetVector SCEVs; - struct FindValuesUser FindValues = {LI, SE, S.getRegion(), Values, SCEVs}; + struct FindValuesUser FindValues = {LI, SE, S.getRegion(), + Values, SCEVs, this}; for (const auto &I : IDToValue) Values.insert(I.second); Index: test/Isl/CodeGen/OpenMP/scalars-in-offloaded-section.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/OpenMP/scalars-in-offloaded-section.ll @@ -0,0 +1,45 @@ +; RUN: opt %loadPolly -polly-ignore-aliasing -polly-ast -analyze -polly-parallel -polly-parallel-force -polly-allow-nonaffine < %s | FileCheck %s -check-prefix=AST +; RUN: opt %loadPolly -polly-ignore-aliasing -polly-codegen -polly-parallel -polly-parallel-force -polly-allow-nonaffine -S < %s | FileCheck %s -check-prefix=IR +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; AST: { +; AST: Stmt_bb3(); +; AST: #pragma simd +; AST: #pragma omp parallel for +; AST: for (int c0 = 0; c0 <= p_0; c0 += 1) +; AST: Stmt_bb6(c0); +; AST: } + +; Check that we correctly pass scalar dependences (here trough %tmp4) from the +; host code to the device code. +; +; IR: %7 = getelementptr inbounds { i64, double*, double*, i64* }, { i64, double*, double*, i64* }* %polly.par.userContext, i32 0, i32 3 +; IR: store i64* %tmp4.s2a, i64** %7 + + +define void @hoge(double* noalias %arg, double* noalias %arg1, double* noalias dereferenceable(8) %arg2) { +bb: + br label %bb3 + +bb3: ; preds = %bb + %tmp = bitcast double* %arg2 to i64* + %tmp4 = load i64, i64* %tmp, align 8 + br i1 false, label %bb12, label %bb5 + +bb5: ; preds = %bb3 + br label %bb6 + +bb6: ; preds = %bb6, %bb5 + %tmp7 = phi double* [ %arg, %bb5 ], [ %tmp9, %bb6 ] + %tmp8 = bitcast double* %tmp7 to i64* + store i64 %tmp4, i64* %tmp8, align 8 + %tmp9 = getelementptr inbounds double, double* %tmp7, i64 1 + %tmp10 = icmp eq double* %tmp9, %arg1 + br i1 %tmp10, label %bb11, label %bb6 + +bb11: ; preds = %bb6 + br label %bb12 + +bb12: ; preds = %bb11, %bb3 + ret void +}