Index: include/polly/CodeGen/IRBuilder.h =================================================================== --- include/polly/CodeGen/IRBuilder.h +++ include/polly/CodeGen/IRBuilder.h @@ -16,54 +16,35 @@ #define POLLY_CODEGEN_IRBUILDER_H #include "llvm/IR/IRBuilder.h" +#include "llvm/Analysis/LoopInfo.h" + namespace polly { -/// @brief Keeps information about generated loops. -class PollyLoopInfo { +/// @brief Helper class to annotate newly generated loops with metadata. +/// +/// This stack-like structure will keep track of all loops, and annotate +/// memory instructions and loop headers according to all parallel loops. +class LoopAnnotator { public: - PollyLoopInfo(llvm::BasicBlock *Header) - : LoopID(0), Header(Header), Parallel(false) {} + /// @brief Add a new loop @p L which is parallel if @p IsParallel is true. + void pushLoop(llvm::Loop *L, bool IsParallel); - /// @brief Get the loop id metadata node. - /// - /// Each loop is identified by a self referencing metadata node of the form: - /// - /// '!n = metadata !{metadata !n}' - /// - /// This functions creates such metadata on demand if not yet available. - /// - /// @return The loop id metadata node. - llvm::MDNode *GetLoopID() const; + /// @brief Remove the last added loop. + void popLoop(bool isParallel); - /// @brief Get the head basic block of this loop. - llvm::BasicBlock *GetHeader() const { return Header; } + /// @brief Annotate the new instruction @p I for all parallel loops. + void annotate(llvm::Instruction *I); - /// @brief Check if the loop is parallel. - /// - /// @return True, if the loop is parallel. - bool IsParallel() const { return Parallel; } - - /// @brief Set a loop as parallel. - /// - /// @IsParallel True, if the loop is to be marked as parallel. False, if the - // loop should be marked sequential. - void SetParallel(bool IsParallel = true) { Parallel = IsParallel; } + /// @brief Annotate the loop latch @p B wrt. parallel loops. + void annotateLoopLatch(llvm::BranchInst *B, llvm::Loop *L, + bool IsParallel) const; private: - mutable llvm::MDNode *LoopID; - llvm::BasicBlock *Header; - bool Parallel; -}; + /// @brief All loops currently under construction. + llvm::SmallVector ActiveLoops; -class LoopAnnotator { -public: - void Begin(llvm::BasicBlock *Header); - void SetCurrentParallel(); - void End(); - void Annotate(llvm::Instruction *I); - -private: - std::vector Active; + /// @brief Metadata pointing to parallel loops currently under construction. + llvm::SmallVector ParallelLoops; }; /// @brief Add Polly specifics when running IRBuilder. @@ -84,7 +65,7 @@ llvm::IRBuilderDefaultInserter::InsertHelper(I, Name, BB, InsertPt); if (Annotator) - Annotator->Annotate(I); + Annotator->annotate(I); } private: Index: lib/CodeGen/IRBuilder.cpp =================================================================== --- lib/CodeGen/IRBuilder.cpp +++ lib/CodeGen/IRBuilder.cpp @@ -13,48 +13,66 @@ //===----------------------------------------------------------------------===// #include "polly/CodeGen/IRBuilder.h" -#include "llvm/Analysis/LoopInfo.h" + #include "llvm/IR/Metadata.h" #include "llvm/Support/Debug.h" using namespace llvm; using namespace polly; -llvm::MDNode *polly::PollyLoopInfo::GetLoopID() const { - if (LoopID) - return LoopID; - - llvm::Value *Args[] = {0}; - LoopID = llvm::MDNode::get(Header->getContext(), Args); +/// @brief Get the loop id metadata node. +/// +/// Each loop is identified by a self referencing metadata node of the form: +/// +/// '!n = metadata !{metadata !n}' +/// +/// This functions creates such metadata on demand if not yet available. +/// +/// @return The loop id metadata node. +static MDNode *getLoopID(Loop *L) { + Value *Args[] = {0}; + MDNode *LoopID = MDNode::get(L->getHeader()->getContext(), Args); LoopID->replaceOperandWith(0, LoopID); return LoopID; } -void polly::LoopAnnotator::Begin(llvm::BasicBlock *Header) { - Active.push_back(PollyLoopInfo(Header)); +void polly::LoopAnnotator::pushLoop(Loop *L, bool IsParallel) { + ActiveLoops.push_back(L); + if (!IsParallel) + return; + + BasicBlock *Header = L->getHeader(); + MDNode *Id = getLoopID(L); + Value *Args[] = {Id}; + MDNode *Ids = ParallelLoops.empty() + ? MDNode::get(Header->getContext(), Args) + : MDNode::concatenate(ParallelLoops.back(), Id); + ParallelLoops.push_back(Ids); } -void polly::LoopAnnotator::End() { Active.pop_back(); } +void polly::LoopAnnotator::popLoop(bool IsParallel) { + ActiveLoops.pop_back(); + if (!IsParallel) + return; -void polly::LoopAnnotator::SetCurrentParallel() { - Active.back().SetParallel(true); + assert(!ParallelLoops.empty() && "Expected a parallel loop to pop"); + ParallelLoops.pop_back(); } -void polly::LoopAnnotator::Annotate(llvm::Instruction *Inst) { - if (Active.empty()) +void polly::LoopAnnotator::annotateLoopLatch(BranchInst *B, Loop *L, + bool IsParallel) const { + if (!IsParallel) return; - const PollyLoopInfo &L = Active.back(); - if (!L.IsParallel()) + assert(!ParallelLoops.empty() && "Expected a parallel loop to annotate"); + MDNode *Ids = ParallelLoops.back(); + MDNode *Id = cast(Ids->getOperand(Ids->getNumOperands() - 1)); + B->setMetadata("llvm.loop", Id); +} + +void polly::LoopAnnotator::annotate(Instruction *Inst) { + if (!Inst->mayReadOrWriteMemory() || ParallelLoops.empty()) return; - if (TerminatorInst *TI = dyn_cast(Inst)) { - for (unsigned i = 0, ie = TI->getNumSuccessors(); i != ie; ++i) - if (TI->getSuccessor(i) == L.GetHeader()) { - TI->setMetadata("llvm.loop", L.GetLoopID()); - break; - } - } else if (Inst->mayReadOrWriteMemory()) { - Inst->setMetadata("llvm.mem.parallel_loop_access", L.GetLoopID()); - } + Inst->setMetadata("llvm.mem.parallel_loop_access", ParallelLoops.back()); } Index: lib/CodeGen/IslCodeGeneration.cpp =================================================================== --- lib/CodeGen/IslCodeGeneration.cpp +++ lib/CodeGen/IslCodeGeneration.cpp @@ -319,8 +319,8 @@ CmpInst::Predicate Predicate; bool Parallel; - Parallel = IslAstInfo::isInnermostParallel(For) && - !IslAstInfo::isReductionParallel(For); + Parallel = + IslAstInfo::isParallel(For) && !IslAstInfo::isReductionParallel(For); Body = isl_ast_node_for_get_body(For); @@ -362,7 +362,7 @@ create(Body); - Annotator.End(); + Annotator.popLoop(Parallel); IDToValue.erase(IteratorID); Index: lib/CodeGen/LoopGenerators.cpp =================================================================== --- lib/CodeGen/LoopGenerators.cpp +++ lib/CodeGen/LoopGenerators.cpp @@ -64,12 +64,6 @@ BasicBlock *PreHeaderBB = BasicBlock::Create(Context, "polly.loop_preheader", F); - if (Annotator) { - Annotator->Begin(HeaderBB); - if (Parallel) - Annotator->SetCurrentParallel(); - } - // Update LoopInfo Loop *OuterLoop = LI.getLoopFor(BeforeBB); Loop *NewLoop = new Loop(); @@ -86,6 +80,11 @@ NewLoop->addBasicBlockToLoop(HeaderBB, LI.getBase()); + // Notify the annotator (if present) that we have a new loop, but only + // after the header block is set. + if (Annotator) + Annotator->pushLoop(NewLoop, Parallel); + // ExitBB ExitBB = SplitBlock(BeforeBB, Builder.GetInsertPoint()++, P); ExitBB->setName("polly.loop_exit"); @@ -122,7 +121,12 @@ UB = Builder.CreateSub(UB, Stride, "polly.adjust_ub"); LoopCondition = Builder.CreateICmp(Predicate, IV, UB); LoopCondition->setName("polly.loop_cond"); - Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB); + + // Create the loop latch and annotate it as such. + BranchInst *B = Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB); + if (Annotator) + Annotator->annotateLoopLatch(B, NewLoop, Parallel); + IV->addIncoming(IncrementedIV, HeaderBB); if (GuardBB) DT.changeImmediateDominator(ExitBB, GuardBB); Index: test/Isl/CodeGen/LoopParallelMD/loop_nest_param_parallel.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/LoopParallelMD/loop_nest_param_parallel.ll @@ -0,0 +1,64 @@ +; RUN: opt %loadPolly -polly-codegen-isl -polly-ast-detect-parallel -S < %s | FileCheck %s +; +; Check that we mark multiple parallel loops correctly including the memory instructions. +; +; CHECK-DAG: %polly.loop_cond[[COuter:[0-9]*]] = icmp sle i64 %polly.indvar{{[0-9]*}}, 1022 +; CHECK-DAG: br i1 %polly.loop_cond[[COuter]], label %polly.loop_header{{[0-9]*}}, label %polly.loop_exit{{[0-9]*}}, !llvm.loop !0 +; +; CHECK-DAG: %polly.loop_cond[[CInner:[0-9]*]] = icmp sle i64 %polly.indvar{{[0-9]*}}, 510 +; CHECK-DAG: br i1 %polly.loop_cond[[CInner]], label %polly.loop_header{{[0-9]*}}, label %polly.loop_exit{{[0-9]*}}, !llvm.loop !2 +; +; CHECK-DAG: store i32 %p_tmp{{[0-9]*}}, i32* %p_arrayidx{{[0-9]*}}, !llvm.mem.parallel_loop_access !1 +; +; CHECK: !0 = metadata !{metadata !0} +; CHECK: !1 = metadata !{metadata !0, metadata !2} +; CHECK: !2 = metadata !{metadata !2} +; +; void jd(int *A) { +; for (int i = 0; i < 1024; i++) +; for (int j = 0; j < 512; j++) +; A[i * 512 + j] = i + j; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @jd(i32* %A) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc5, %entry + %indvars.iv3 = phi i64 [ %indvars.iv.next4, %for.inc5 ], [ 0, %entry ] + %exitcond6 = icmp ne i64 %indvars.iv3, 1024 + br i1 %exitcond6, label %for.body, label %for.end7 + +for.body: ; preds = %for.cond + br label %for.cond1 + +for.cond1: ; preds = %for.inc, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body ] + %exitcond = icmp ne i64 %indvars.iv, 512 + br i1 %exitcond, label %for.body3, label %for.end + +for.body3: ; preds = %for.cond1 + %tmp = add nsw i64 %indvars.iv3, %indvars.iv + %tmp7 = shl nsw i64 %indvars.iv3, 9 + %tmp8 = add nsw i64 %tmp7, %indvars.iv + %arrayidx = getelementptr inbounds i32* %A, i64 %tmp8 + %tmp9 = trunc i64 %tmp to i32 + store i32 %tmp9, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body3 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond1 + +for.end: ; preds = %for.cond1 + br label %for.inc5 + +for.inc5: ; preds = %for.end + %indvars.iv.next4 = add nuw nsw i64 %indvars.iv3, 1 + br label %for.cond + +for.end7: ; preds = %for.cond + ret void +}