Index: include/polly/CodeGen/IRBuilder.h =================================================================== --- include/polly/CodeGen/IRBuilder.h +++ include/polly/CodeGen/IRBuilder.h @@ -165,5 +165,6 @@ Builder.SetInsertPoint(BB->getTerminator()); return Builder; } + } // namespace polly #endif Index: include/polly/CodeGen/Utils.h =================================================================== --- include/polly/CodeGen/Utils.h +++ include/polly/CodeGen/Utils.h @@ -70,5 +70,25 @@ executeScopConditionally(Scop &S, llvm::Value *RTC, llvm::DominatorTree &DT, llvm::RegionInfo &RI, llvm::LoopInfo &LI); +/// Alternative to llvm::SplitCriticalEdge. +/// +/// Creates a new block which branches to Succ. The edge to split is redirected +/// to the new block. +/// +/// The issue with llvm::SplitCriticalEdge is that it does nothing if the edge +/// is not critical. The issue with llvm::SplitEdge is that it does not always +/// create the middle block, but reuses Prev/Succ if it can. We always want a +/// new middle block. +/// +/// @param Prev Edge Source BB +/// @param Succ Edge Destination BB +/// @param Suffix Name to append +/// @param DT DominatorTree +/// @param LI LoopInfo +/// @param RI RegionInfo +llvm::BasicBlock *splitEdge(llvm::BasicBlock *Prev, llvm::BasicBlock *Succ, + const char *Suffix, llvm::DominatorTree *DT, + llvm::LoopInfo *LI, llvm::RegionInfo *RI); + } // namespace polly #endif Index: lib/CodeGen/BlockGenerators.cpp =================================================================== --- lib/CodeGen/BlockGenerators.cpp +++ lib/CodeGen/BlockGenerators.cpp @@ -886,8 +886,12 @@ void BlockGenerator::finalizeSCoP(Scop &S) { findOutsideUsers(S); createScalarInitialization(S); - createExitPHINodeMerges(S); - createScalarFinalization(S); + + if (!S.getRegion().isTopLevelRegion()) { + createExitPHINodeMerges(S); + createScalarFinalization(S); + } + invalidateScalarEvolution(S); } Index: lib/CodeGen/CodeGeneration.cpp =================================================================== --- lib/CodeGen/CodeGeneration.cpp +++ lib/CodeGen/CodeGeneration.cpp @@ -153,16 +153,182 @@ } } +static BasicBlock *unifyFunctionExitNodes(Function &F) { + // Loop over all of the blocks in a function, tracking all of the blocks that + // return. + // + SmallVector ReturningBlocks; + BasicBlock *ReturnBlock = nullptr; + for (BasicBlock &I : F) + if (isa(I.getTerminator())) + ReturningBlocks.push_back(&I); + + // Now handle return blocks. + if (ReturningBlocks.empty()) { + llvm_unreachable("Full function Scop does not return"); + } else if (ReturningBlocks.size() == 1) { + ReturnBlock = ReturningBlocks.front(); // Already has a single return block + return ReturnBlock; + } + + // Otherwise, we need to insert a new basic block into the function, add a PHI + // nodes (if the function returns values), and convert all of the return + // instructions into unconditional branches. + // + BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(), "polly.ret", &F); + + PHINode *PN = nullptr; + if (F.getReturnType()->isVoidTy()) { + ReturnInst::Create(F.getContext(), nullptr, NewRetBlock); + } else { + // If the function doesn't return void... add a PHI node to the block... + PN = PHINode::Create(F.getReturnType(), ReturningBlocks.size(), + "UnifiedRetVal"); + + NewRetBlock->getInstList().push_back(PN); + ReturnInst::Create(F.getContext(), PN, NewRetBlock); + } + + // Loop over all of the blocks, replacing the return instruction with an + // unconditional branch. + for (BasicBlock *BB : ReturningBlocks) { + // Add an incoming element to the PHI node for every return instruction that + // is merging into this new block... + if (PN) + PN->addIncoming(BB->getTerminator()->getOperand(0), BB); + + BB->getTerminator()->eraseFromParent(); + BranchInst::Create(NewRetBlock, BB); + } + return NewRetBlock; +} + +static bool FullFunctionCodeGen(Scop &S, IslAstInfo &AI, LoopInfo &LI, + DominatorTree &DT, ScalarEvolution &SE, + RegionInfo &RI) { + isl_ast_node *AstRoot = AI.getAst(); + if (!AstRoot) + return false; + Region *R = &S.getRegion(); + Function *F = R->getEntry()->getParent(); + + splitEntryBlockForAlloca(R->getEntry(), &DT, &LI, &RI); + + BasicBlock *SingleExitBlock = unifyFunctionExitNodes(*F); + + BasicBlock *ExitPred = SingleExitBlock->getSinglePredecessor(); + assert(ExitPred && "Exit block has multiple predessors"); + + ScopAnnotator Annotator; + PollyIRBuilder Builder = createPollyIRBuilder(R->getEntry(), Annotator); + + BasicBlock *SplitBlock = + BasicBlock::Create(F->getContext(), "polly.split", F); + + BasicBlock *JoinBlock = + splitEdge(ExitPred, SingleExitBlock, "polly.merge", &DT, &LI, &RI); + + BasicBlock *StartBlock = + BasicBlock::Create(F->getContext(), "polly.start", F); + + BasicBlock *ExitingBlock = + BasicBlock::Create(F->getContext(), "polly.exiting", F); + + Builder.SetInsertPoint(SplitBlock); + Builder.CreateBr(StartBlock); + + Builder.SetInsertPoint(StartBlock); + Builder.CreateBr(ExitingBlock); + + Builder.SetInsertPoint(ExitingBlock); + Builder.CreateBr(JoinBlock); + + BasicBlock *EntrySucc = R->getEntry()->getSingleSuccessor(); + assert(EntrySucc); + R->getEntry()->getTerminator()->eraseFromParent(); + Builder.SetInsertPoint(R->getEntry()); + Builder.CreateCondBr(Builder.getTrue(), EntrySucc, SplitBlock); + + DominatorTree FreeTree; + FreeTree.setNewRoot(SplitBlock); + FreeTree.addNewBlock(StartBlock, SplitBlock); + FreeTree.addNewBlock(ExitingBlock, StartBlock); + FreeTree.addNewBlock(JoinBlock, ExitingBlock); + + auto &DL = S.getFunction().getParent()->getDataLayout(); + IslNodeBuilder NodeBuilder(Builder, Annotator, DL, LI, SE, FreeTree, S, + R->getEntry()); + NodeBuilder.allocateNewArrays({StartBlock, ExitingBlock}); + Annotator.buildAliasScopes(S); + + // First generate code for the hoisted invariant loads and transitively the + // parameters they reference. Afterwards, for the remaining parameters that + // might reference the hoisted loads. Finally, build the runtime check + // that might reference both hoisted loads as well as parameters. + // If the hoisting fails we have to bail and execute the original code. + Builder.SetInsertPoint(R->getEntry()->getTerminator()); + if (!NodeBuilder.preloadInvariantLoads()) { + + // Patch the introduced branch condition to ensure that we always execute + // the original SCoP. + auto *FalseI1 = Builder.getFalse(); + auto *SplitBBTerm = Builder.GetInsertBlock()->getTerminator(); + SplitBBTerm->setOperand(0, FalseI1); + + // Since the other branch is hence ignored we mark it as unreachable and + // adjust the dominator tree accordingly. + auto *ExitingBlock = StartBlock->getUniqueSuccessor(); + assert(ExitingBlock); + auto *MergeBlock = ExitingBlock->getUniqueSuccessor(); + assert(MergeBlock); + markBlockUnreachable(*StartBlock, Builder); + markBlockUnreachable(*ExitingBlock, Builder); + auto *ExitingBB = S.getExitingBlock(); + assert(ExitingBB); + DT.changeImmediateDominator(MergeBlock, ExitingBB); + DT.eraseNode(ExitingBlock); + + } else { + NodeBuilder.addParameters(S.getContext().release()); + + Value *RTC = NodeBuilder.createRTC(AI.getRunCondition()); + assert(RTC); + R->getEntry()->getTerminator()->setOperand(0, RTC); + + // Explicitly set the insert point to the end of the block to avoid that a + // split at the builder's current + // insert position would move the malloc calls to the wrong BasicBlock. + // Ideally we would just split the block during allocation of the new + // arrays, but this would break the assumption that there are no blocks + // between polly.start and polly.exiting (at this point). + Builder.SetInsertPoint(StartBlock->getTerminator()); + NodeBuilder.create(AstRoot); + NodeBuilder.finalize(); + fixRegionInfo(*F, *R->getParent(), RI); + } + + verifyGeneratedFunction(S, *F, AI); + for (auto *SubF : NodeBuilder.getParallelSubfunctions()) + verifyGeneratedFunction(S, *SubF, AI); + + // Mark the function such that we run additional cleanup passes on this + // function (e.g. mem2reg to rediscover phi nodes). + F->addFnAttr("polly-optimized"); + return true; +} + static bool CodeGen(Scop &S, IslAstInfo &AI, LoopInfo &LI, DominatorTree &DT, ScalarEvolution &SE, RegionInfo &RI) { + Region *R = &S.getRegion(); + if (R->isTopLevelRegion()) { + return FullFunctionCodeGen(S, AI, LI, DT, SE, RI); + } // Check if we created an isl_ast root node, otherwise exit. isl_ast_node *AstRoot = AI.getAst(); if (!AstRoot) return false; auto &DL = S.getFunction().getParent()->getDataLayout(); - Region *R = &S.getRegion(); - assert(!R->isTopLevelRegion() && "Top level regions are not supported"); ScopAnnotator Annotator; Index: lib/CodeGen/Utils.cpp =================================================================== --- lib/CodeGen/Utils.cpp +++ lib/CodeGen/Utils.cpp @@ -21,16 +21,7 @@ using namespace llvm; -// Alternative to llvm::SplitCriticalEdge. -// -// Creates a new block which branches to Succ. The edge to split is redirected -// to the new block. -// -// The issue with llvm::SplitCriticalEdge is that it does nothing if the edge is -// not critical. -// The issue with llvm::SplitEdge is that it does not always create the middle -// block, but reuses Prev/Succ if it can. We always want a new middle block. -static BasicBlock *splitEdge(BasicBlock *Prev, BasicBlock *Succ, +BasicBlock *polly::splitEdge(BasicBlock *Prev, BasicBlock *Succ, const char *Suffix, DominatorTree *DT, LoopInfo *LI, RegionInfo *RI) { assert(Prev && Succ); Index: lib/Support/ScopHelper.cpp =================================================================== --- lib/Support/ScopHelper.cpp +++ lib/Support/ScopHelper.cpp @@ -291,7 +291,9 @@ else if (Inst && RTCBB->getParent() == Inst->getFunction()) IP = RTCBB->getTerminator(); else - IP = RTCBB->getParent()->getEntryBlock().getTerminator(); + IP = R.isTopLevelRegion() + ? R.getEntry()->getTerminator() + : RTCBB->getParent()->getEntryBlock().getTerminator(); if (!Inst || (Inst->getOpcode() != Instruction::SRem && Inst->getOpcode() != Instruction::SDiv)) Index: test/Isl/CodeGen/full-function.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/full-function.ll @@ -0,0 +1,29 @@ +; RUN: opt %loadPolly -polly-codegen -polly-detect-full-functions -S \ +; RUN: < %s | FileCheck %s +; CHECK: polly.split: +; CHECK: polly.start: +; CHECK: polly.stmt.bb2: + +@A = common global [1024 x i32] zeroinitializer, align 16 ; <[1024 x i32]*> [#uses=3] + +define void @bar(i64 %n) nounwind { +bb: + br label %bb1 + +bb1: ; preds = %bb3, %bb + %i.0 = phi i64 [ 0, %bb ], [ %tmp, %bb3 ] ; [#uses=3] + %scevgep = getelementptr [1024 x i32], [1024 x i32]* @A, i64 0, i64 %i.0 ; [#uses=1] + %exitcond = icmp ne i64 %i.0, %n ; [#uses=1] + br i1 %exitcond, label %bb2, label %bb4 + +bb2: ; preds = %bb1 + store i32 1, i32* %scevgep + br label %bb3 + +bb3: ; preds = %bb2 + %tmp = add nsw i64 %i.0, 1 ; [#uses=1] + br label %bb1 + +bb4: ; preds = %bb1 + ret void +}