Index: include/polly/CodeGen/CodeGeneration.h =================================================================== --- include/polly/CodeGen/CodeGeneration.h +++ include/polly/CodeGen/CodeGeneration.h @@ -36,6 +36,9 @@ }; extern CodeGenChoice PollyCodeGenChoice; +/// @brief Flag to turn on/off annotation of alias scopes. +extern bool PollyAnnotateAliasScopes; + static inline int getNumberOfIterations(__isl_take isl_set *Domain) { int Dim = isl_set_dim(Domain, isl_dim_set); Index: include/polly/CodeGen/IRBuilder.h =================================================================== --- include/polly/CodeGen/IRBuilder.h +++ include/polly/CodeGen/IRBuilder.h @@ -18,14 +18,34 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/Analysis/LoopInfo.h" +namespace llvm { +class ScalarEvolution; +} + namespace polly { +class Scop; -/// @brief Helper class to annotate newly generated loops with metadata. +/// @brief Helper class to annotate newly generated SCoPs with metadata. /// -/// This stack-like structure will keep track of all loops, and annotate -/// memory instructions and loop headers according to all parallel loops. -class LoopAnnotator { +/// The annotations are twofold: +/// 1) Loops are stored in a stack-like structure in the order they are +/// constructed and the LoopID metadata node is added to the backedge. +/// Contained memory instructions and loop headers are annotated according +/// to all parallel surrounding loops. +/// 2) The new SCoP is assumed alias free (either due to the result of +/// AliasAnalysis queries or runtime alias checks). We annotate therefore +/// all memory instruction with alias scopes to indicate that fact to +/// later optimizations. +/// These alias scopes live in a new alias domain only used in this SCoP. +/// Each base pointer has its own alias scope and is annotated to not +/// alias with any access to different base pointers. +class ScopAnnotator { public: + ScopAnnotator(); + + /// @brief Build all alias scopes for the given SCoP. + void buildAliasScopes(Scop &S); + /// @brief Add a new loop @p L which is parallel if @p IsParallel is true. void pushLoop(llvm::Loop *L, bool IsParallel); @@ -40,11 +60,23 @@ bool IsParallel) const; private: + /// @brief The ScalarEvolution analysis we use to find base pointers. + llvm::ScalarEvolution *SE; + /// @brief All loops currently under construction. llvm::SmallVector ActiveLoops; /// @brief Metadata pointing to parallel loops currently under construction. llvm::SmallVector ParallelLoops; + + /// @brief The alias scope domain for the current SCoP. + llvm::MDNode *AliasScopeDomain; + + /// @brief A map from base pointers to its alias scope. + llvm::DenseMap AliasScopeMap; + + /// @brief A map from base pointers to an alias scope list of other pointers. + llvm::DenseMap OtherAliasScopeListMap; }; /// @brief Add Polly specifics when running IRBuilder. @@ -56,7 +88,7 @@ : protected llvm::IRBuilderDefaultInserter { public: PollyBuilderInserter() : Annotator(0) {} - PollyBuilderInserter(class LoopAnnotator &A) : Annotator(&A) {} + PollyBuilderInserter(class ScopAnnotator &A) : Annotator(&A) {} protected: void InsertHelper(llvm::Instruction *I, const llvm::Twine &Name, @@ -69,7 +101,7 @@ } private: - class LoopAnnotator *Annotator; + class ScopAnnotator *Annotator; }; // TODO: We should not name instructions in NDEBUG builds. @@ -81,7 +113,7 @@ /// @brief Return an IR builder pointed before the @p BB terminator. static inline PollyIRBuilder createPollyIRBuilder(llvm::BasicBlock *BB, - LoopAnnotator &LA) { + ScopAnnotator &LA) { PollyIRBuilder Builder(BB->getContext(), llvm::ConstantFolder(), polly::IRInserter(LA)); Builder.SetInsertPoint(BB->getTerminator()); Index: include/polly/CodeGen/LoopGenerators.h =================================================================== --- include/polly/CodeGen/LoopGenerators.h +++ include/polly/CodeGen/LoopGenerators.h @@ -40,8 +40,8 @@ /// @param DT The dominator tree we need to update /// @param ExitBlock The block the loop will exit to. /// @param Predicate The predicate used to generate the upper loop bound. -/// @param Annotator This function can (optionally) take a LoopAnnotator which -/// tracks the loop structure. +/// @param Annotator This function can (optionally) take a ScopAnnotator which +/// annotates loops and alias information in the SCoP. /// @param Parallel If this loop should be marked parallel in the Annotator. /// @param UseGuard Create a guard in front of the header to check if the /// loop is executed at least once, otherwise just assume it. @@ -51,7 +51,7 @@ PollyIRBuilder &Builder, Pass *P, LoopInfo &LI, DominatorTree &DT, BasicBlock *&ExitBlock, ICmpInst::Predicate Predicate, - LoopAnnotator *Annotator = NULL, bool Parallel = false, + ScopAnnotator *Annotator = NULL, bool Parallel = false, bool UseGuard = true); class OMPGenerator { Index: lib/CodeGen/IRBuilder.cpp =================================================================== --- lib/CodeGen/IRBuilder.cpp +++ lib/CodeGen/IRBuilder.cpp @@ -14,35 +14,83 @@ #include "polly/CodeGen/IRBuilder.h" +#include "polly/ScopInfo.h" +#include "polly/Support/ScopHelper.h" + #include "llvm/IR/Metadata.h" #include "llvm/Support/Debug.h" using namespace llvm; using namespace polly; -/// @brief Get the loop id metadata node. -/// -/// Each loop is identified by a self referencing metadata node of the form: +/// @brief Get a self referencing id metadata node. /// -/// '!n = metadata !{metadata !n}' +/// The MDNode looks like this (if arg0/arg1 are not null): /// -/// This functions creates such metadata on demand if not yet available. +/// '!n = metadata !{metadata !n, arg0, arg1}' /// -/// @return The loop id metadata node. -static MDNode *getLoopID(Loop *L) { - Value *Args[] = {0}; - MDNode *LoopID = MDNode::get(L->getHeader()->getContext(), Args); - LoopID->replaceOperandWith(0, LoopID); - return LoopID; +/// @return The self referencing id metadata node. +static MDNode *getID(LLVMContext &Ctx, Value *arg0 = nullptr, + Value *arg1 = nullptr) { + MDNode *ID; + SmallVector Args; + Args.push_back(nullptr); + + if (arg0) + Args.push_back(arg0); + if (arg1) + Args.push_back(arg1); + + ID = MDNode::get(Ctx, Args); + ID->replaceOperandWith(0, ID); + return ID; } -void polly::LoopAnnotator::pushLoop(Loop *L, bool IsParallel) { +ScopAnnotator::ScopAnnotator() : SE(nullptr), AliasScopeDomain(nullptr) {} + +void ScopAnnotator::buildAliasScopes(Scop &S) { + SE = S.getSE(); + + LLVMContext &Ctx = SE->getContext(); + AliasScopeDomain = getID(Ctx, MDString::get(Ctx, "polly.alias.scope.domain")); + + AliasScopeMap.clear(); + OtherAliasScopeListMap.clear(); + + SetVector BasePtrs; + for (ScopStmt *Stmt : S) + for (MemoryAccess *MA : *Stmt) + BasePtrs.insert(MA->getBaseAddr()); + + std::string AliasScopeStr = "polly.alias.scope."; + for (Value *BasePtr : BasePtrs) + AliasScopeMap[BasePtr] = getID( + Ctx, AliasScopeDomain, + MDString::get(Ctx, (AliasScopeStr + BasePtr->getName()).str().c_str())); + + for (Value *BasePtr : BasePtrs) { + MDNode *AliasScopeList = MDNode::get(Ctx, {}); + for (const auto &AliasScopePair : AliasScopeMap) { + if (BasePtr == AliasScopePair.first) + continue; + + Value *Args = {AliasScopePair.second}; + AliasScopeList = + MDNode::concatenate(AliasScopeList, MDNode::get(Ctx, Args)); + } + + OtherAliasScopeListMap[BasePtr] = AliasScopeList; + } +} + +void ScopAnnotator::pushLoop(Loop *L, bool IsParallel) { + ActiveLoops.push_back(L); if (!IsParallel) return; BasicBlock *Header = L->getHeader(); - MDNode *Id = getLoopID(L); + MDNode *Id = getID(Header->getContext()); Value *Args[] = {Id}; MDNode *Ids = ParallelLoops.empty() ? MDNode::get(Header->getContext(), Args) @@ -50,7 +98,7 @@ ParallelLoops.push_back(Ids); } -void polly::LoopAnnotator::popLoop(bool IsParallel) { +void ScopAnnotator::popLoop(bool IsParallel) { ActiveLoops.pop_back(); if (!IsParallel) return; @@ -59,8 +107,8 @@ ParallelLoops.pop_back(); } -void polly::LoopAnnotator::annotateLoopLatch(BranchInst *B, Loop *L, - bool IsParallel) const { +void ScopAnnotator::annotateLoopLatch(BranchInst *B, Loop *L, + bool IsParallel) const { if (!IsParallel) return; @@ -70,8 +118,27 @@ B->setMetadata("llvm.loop", Id); } -void polly::LoopAnnotator::annotate(Instruction *Inst) { - if (!Inst->mayReadOrWriteMemory() || ParallelLoops.empty()) +void ScopAnnotator::annotate(Instruction *Inst) { + if (!Inst->mayReadOrWriteMemory()) + return; + + // TODO: Use the ScopArrayInfo once available here. + if (AliasScopeDomain) { + Value *BasePtr = nullptr; + if (isa(Inst) || isa(Inst)) { + const SCEV *PtrSCEV = SE->getSCEV(getPointerOperand(*Inst)); + const SCEV *BaseSCEV = SE->getPointerBase(PtrSCEV); + if (const SCEVUnknown *SU = dyn_cast(BaseSCEV)) + BasePtr = SU->getValue(); + } + + if (BasePtr) { + Inst->setMetadata("alias.scope", AliasScopeMap[BasePtr]); + Inst->setMetadata("noalias", OtherAliasScopeListMap[BasePtr]); + } + } + + if (ParallelLoops.empty()) return; Inst->setMetadata("llvm.mem.parallel_loop_access", ParallelLoops.back()); Index: lib/CodeGen/IslCodeGeneration.cpp =================================================================== --- lib/CodeGen/IslCodeGeneration.cpp +++ lib/CodeGen/IslCodeGeneration.cpp @@ -56,7 +56,7 @@ class IslNodeBuilder { public: - IslNodeBuilder(PollyIRBuilder &Builder, LoopAnnotator &Annotator, Pass *P, + IslNodeBuilder(PollyIRBuilder &Builder, ScopAnnotator &Annotator, Pass *P, LoopInfo &LI, ScalarEvolution &SE, DominatorTree &DT) : Builder(Builder), Annotator(Annotator), ExprBuilder(Builder, IDToValue), P(P), LI(LI), SE(SE), DT(DT) {} @@ -69,7 +69,7 @@ private: PollyIRBuilder &Builder; - LoopAnnotator &Annotator; + ScopAnnotator &Annotator; IslExprBuilder ExprBuilder; Pass *P; LoopInfo &LI; @@ -580,7 +580,7 @@ ///} /// @brief The loop annotator to generate llvm.loop metadata. - LoopAnnotator Annotator; + ScopAnnotator Annotator; /// @brief Build the runtime condition. /// @@ -605,6 +605,10 @@ assert(!S.getRegion().isTopLevelRegion() && "Top level regions are not supported"); + // Build the alias scopes for annotations first. + if (PollyAnnotateAliasScopes) + Annotator.buildAliasScopes(S); + BasicBlock *EnteringBB = simplifyRegion(&S, this); PollyIRBuilder Builder = createPollyIRBuilder(EnteringBB, Annotator); Index: lib/CodeGen/LoopGenerators.cpp =================================================================== --- lib/CodeGen/LoopGenerators.cpp +++ lib/CodeGen/LoopGenerators.cpp @@ -48,7 +48,7 @@ PollyIRBuilder &Builder, Pass *P, LoopInfo &LI, DominatorTree &DT, BasicBlock *&ExitBB, ICmpInst::Predicate Predicate, - LoopAnnotator *Annotator, bool Parallel, + ScopAnnotator *Annotator, bool Parallel, bool UseGuard) { Function *F = Builder.GetInsertBlock()->getParent(); LLVMContext &Context = F->getContext(); Index: lib/Support/RegisterPasses.cpp =================================================================== --- lib/Support/RegisterPasses.cpp +++ lib/Support/RegisterPasses.cpp @@ -141,6 +141,13 @@ cl::desc("Show the Polly CFG right after code generation"), cl::Hidden, cl::init(false), cl::cat(PollyCategory)); +bool polly::PollyAnnotateAliasScopes; +static cl::opt XPollyAnnotateAliasScopes( + "polly-annotate-alias-scopes", + cl::desc("Annotate memory instructions with alias scopes"), + cl::location(PollyAnnotateAliasScopes), cl::init(true), cl::ZeroOrMore, + cl::cat(PollyCategory)); + namespace polly { void initializePollyPasses(PassRegistry &Registry) { #ifdef CLOOG_FOUND Index: test/Isl/CodeGen/LoopParallelMD/loop_nest_param_parallel.ll =================================================================== --- test/Isl/CodeGen/LoopParallelMD/loop_nest_param_parallel.ll +++ test/Isl/CodeGen/LoopParallelMD/loop_nest_param_parallel.ll @@ -3,16 +3,16 @@ ; Check that we mark multiple parallel loops correctly including the memory instructions. ; ; CHECK-DAG: %polly.loop_cond[[COuter:[0-9]*]] = icmp sle i64 %polly.indvar{{[0-9]*}}, 1022 -; CHECK-DAG: br i1 %polly.loop_cond[[COuter]], label %polly.loop_header{{[0-9]*}}, label %polly.loop_exit{{[0-9]*}}, !llvm.loop !0 +; CHECK-DAG: br i1 %polly.loop_cond[[COuter]], label %polly.loop_header{{[0-9]*}}, label %polly.loop_exit{{[0-9]*}}, !llvm.loop ![[IDOuter:[0-9]*]] ; ; CHECK-DAG: %polly.loop_cond[[CInner:[0-9]*]] = icmp sle i64 %polly.indvar{{[0-9]*}}, 510 -; CHECK-DAG: br i1 %polly.loop_cond[[CInner]], label %polly.loop_header{{[0-9]*}}, label %polly.loop_exit{{[0-9]*}}, !llvm.loop !2 +; CHECK-DAG: br i1 %polly.loop_cond[[CInner]], label %polly.loop_header{{[0-9]*}}, label %polly.loop_exit{{[0-9]*}}, !llvm.loop ![[IDInner:[0-9]*]] ; -; CHECK-DAG: store i32 %p_tmp{{[0-9]*}}, i32* %p_arrayidx{{[0-9]*}}, !llvm.mem.parallel_loop_access !1 +; CHECK-DAG: store i32 %p_tmp{{[0-9]*}}, i32* %p_arrayidx{{[0-9]*}}, {{[ ._!,a-zA-Z0-9]*}}, !llvm.mem.parallel_loop_access !4 ; -; CHECK: !0 = metadata !{metadata !0} -; CHECK: !1 = metadata !{metadata !0, metadata !2} -; CHECK: !2 = metadata !{metadata !2} +; CHECK-DAG: ![[IDOuter]] = metadata !{metadata ![[IDOuter]]} +; CHECK-DAG: ![[IDInner]] = metadata !{metadata ![[IDInner]]} +; CHECK-DAG: !4 = metadata !{metadata ![[IDOuter]], metadata ![[IDInner]]} ; ; void jd(int *A) { ; for (int i = 0; i < 1024; i++) Index: test/Isl/CodeGen/LoopParallelMD/single_loop_param_parallel.ll =================================================================== --- test/Isl/CodeGen/LoopParallelMD/single_loop_param_parallel.ll +++ test/Isl/CodeGen/LoopParallelMD/single_loop_param_parallel.ll @@ -38,17 +38,17 @@ ; SEQUENTIAL: @test-one ; SEQUENTIAL-NOT: !llvm.mem.parallel_loop_access -; SEQUENTIAL-NOT: !llvm.loop !0 +; SEQUENTIAL-NOT: !llvm.loop ; SEQUENTIAL-SCEV: @test-one ; SEQUENTIAL-SCEV-NOT: !llvm.mem.parallel_loop_access ; SEQUENTIAL-SCEV-NOT: !llvm.loop ; PARALLEL: @test-one -; PARALLEL: store i32 1, i32* %p_scevgep, !llvm.mem.parallel_loop_access !0 -; PARALLEL: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit, !llvm.loop !0 +; PARALLEL: store i32 1, i32* %p_scevgep, {{[ ._!,a-zA-Z0-9]*}}, !llvm.mem.parallel_loop_access ![[LoopID:[0-9]*]] +; PARALLEL: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit, !llvm.loop ![[LoopID]] ; PARALLEL-SCEV: @test-one -; PARALLEL-SCEV: store i32 1, i32* %scevgep1, !llvm.mem.parallel_loop_access !0 -; PARALLEL-SCEV: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit, !llvm.loop !0 +; PARALLEL-SCEV: store i32 1, i32* %scevgep1, {{[ ._!,a-zA-Z0-9]*}}, !llvm.mem.parallel_loop_access ![[LoopID:[0-9]*]] +; PARALLEL-SCEV: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit, !llvm.loop ![[LoopID]] ; This loop has memory dependences that require at least a simple dependence ; analysis to detect the parallelism. @@ -87,16 +87,16 @@ ; SEQUENTIAL: @test-two ; SEQUENTIAL-NOT: !llvm.mem.parallel_loop_access -; SEQUENTIAL-NOT: !llvm.loop !0 +; SEQUENTIAL-NOT: !llvm.loop ; SEQUENTIAL-SCEV: @test-two ; SEQUENTIAL-SCEV-NOT: !llvm.mem.parallel_loop_access ; SEQUENTIAL-SCEV-NOT: !llvm.loop ; PARALLEL: @test-two -; PARALLEL: %val_p_scalar_ = load i32* %p_scevgepload, !llvm.mem.parallel_loop_access !1 -; PARALLEL: store i32 %val_p_scalar_, i32* %p_scevgepstore, !llvm.mem.parallel_loop_access !1 -; PARALLEL: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit, !llvm.loop !1 +; PARALLEL: %val_p_scalar_ = load i32* %p_scevgepload, {{[ ._!,a-zA-Z0-9]*}}, !llvm.mem.parallel_loop_access ![[LoopID:[0-9]*]] +; PARALLEL: store i32 %val_p_scalar_, i32* %p_scevgepstore, {{[ ._!,a-zA-Z0-9]*}}, !llvm.mem.parallel_loop_access ![[LoopID]] +; PARALLEL: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit, !llvm.loop ![[LoopID]] ; PARALLEL-SCEV: @test-two -; PARALLEL-SCEV: %val_p_scalar_ = load i32* %scevgep, !llvm.mem.parallel_loop_access !1 -; PARALLEL-SCEV: store i32 %val_p_scalar_, i32* %scevgep1, !llvm.mem.parallel_loop_access !1 -; PARALLEL-SCEV: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit, !llvm.loop !1 +; PARALLEL-SCEV: %val_p_scalar_ = load i32* %scevgep, {{[ ._!,a-zA-Z0-9]*}}, !llvm.mem.parallel_loop_access ![[LoopID:[0-9]*]] +; PARALLEL-SCEV: store i32 %val_p_scalar_, i32* %scevgep1, {{[ ._!,a-zA-Z0-9]*}}, !llvm.mem.parallel_loop_access ![[LoopID]] +; PARALLEL-SCEV: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit, !llvm.loop ![[LoopID]] Index: test/Isl/CodeGen/annotated_alias_scopes.ll =================================================================== --- /dev/null +++ test/Isl/CodeGen/annotated_alias_scopes.ll @@ -0,0 +1,62 @@ +; RUN: opt %loadPolly -polly-code-generator=isl -polly-codegen-isl -polly-annotate-alias-scopes -S < %s | FileCheck %s +; +; Check that we create alias scopes that indicate the accesses to A, B and C cannot alias in any way. +; +; CHECK: %[[BIdx:[._a-zA-Z0-9]*]] = getelementptr inbounds i32* %B, i64 %polly.indvar +; CHECK: load i32* %[[BIdx]], !alias.scope ![[AliasScopeB:[0-9]*]], !noalias ![[NoAliasB:[0-9]*]] +; CHECK: %[[CIdx:[._a-zA-Z0-9]*]] = getelementptr inbounds float* %C, i64 %polly.indvar +; CHECK: load float* %[[CIdx]], !alias.scope ![[AliasScopeC:[0-9]*]], !noalias ![[NoAliasC:[0-9]*]] +; CHECK: %[[AIdx:[._a-zA-Z0-9]*]] = getelementptr inbounds i32* %A, i64 %polly.indvar +; CHECK: store i32 %{{[._a-zA-Z0-9]*}}, i32* %[[AIdx]], !alias.scope ![[AliasScopeA:[0-9]*]], !noalias ![[NoAliasA:[0-9]*]] +; +; CHECK-DAG: ![[AliasScopeB]] = metadata !{metadata ![[AliasScopeB]], metadata !{{[0-9]*}}, metadata !"polly.alias.scope.B"} +; CHECK: ![[NoAliasB]] = metadata !{ +; CHECK-DAG: metadata ![[AliasScopeA]] +; CHECK-DAG: metadata ![[AliasScopeC]] +; CHECK: } +; CHECK-DAG: ![[AliasScopeA]] = metadata !{metadata ![[AliasScopeA]], metadata !{{[0-9]*}}, metadata !"polly.alias.scope.A"} +; CHECK-DAG: ![[AliasScopeC]] = metadata !{metadata ![[AliasScopeC]], metadata !{{[0-9]*}}, metadata !"polly.alias.scope.C"} +; CHECK: ![[NoAliasC]] = metadata !{ +; CHECK-DAG: metadata ![[AliasScopeA]] +; CHECK-DAG: metadata ![[AliasScopeB]] +; CHECK: } +; CHECK: ![[NoAliasA]] = metadata !{ +; CHECK-DAG: metadata ![[AliasScopeB]] +; CHECK-DAG: metadata ![[AliasScopeC]] +; CHECK: } +; +; void jd(int *A, int *B, float *C) { +; for (int i = 0; i < 1024; i++) +; A[i] = B[i] + C[i]; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @jd(i32* %A, i32* %B, float* %C) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds i32* %B, i64 %indvars.iv + %tmp = load i32* %arrayidx, align 4 + %conv = sitofp i32 %tmp to float + %arrayidx2 = getelementptr inbounds float* %C, i64 %indvars.iv + %tmp1 = load float* %arrayidx2, align 4 + %add = fadd fast float %conv, %tmp1 + %conv3 = fptosi float %add to i32 + %arrayidx5 = getelementptr inbounds i32* %A, i64 %indvars.iv + store i32 %conv3, i32* %arrayidx5, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +}