Index: include/llvm/Transforms/Utils/LoopVersioning.h =================================================================== --- include/llvm/Transforms/Utils/LoopVersioning.h +++ include/llvm/Transforms/Utils/LoopVersioning.h @@ -80,8 +80,22 @@ /// \brief Annotate memory instructions in the versioned loop with no-alias /// metadata based on the memchecks issued. + /// + /// This is just wrapper that calls prepareNoAliasMetadata and + /// annotateInstWithNoAlias on the instructions of the versioned loop. void annotateLoopWithNoAlias(); + /// \brief Set up the aliasing scopes based on the memchecks. This needs to + /// be called before the first call to annotateInstWithNoAlias. + void prepareNoAliasMetadata(); + + /// \brief Add the noalias annotations to \p VersionedInst. + //// + ///\p OrigInst is the instruction corresponding to \p VersionedInst in the + /// original loop. Initialize the aliasing scopes with + /// prepareNoAliasMetadata once before this can be called. + void annotateInstWithNoAlias(Instruction *VersionedInst, + const Instruction *OrigInst); private: /// \brief Adds the necessary PHI nodes for the versioned loops based on the /// loop-defined values used outside of the loop. @@ -90,13 +104,11 @@ /// that are used outside the loop. void addPHINodes(const SmallVectorImpl &DefsUsedOutside); - /// \brief Set up the aliasing scopes based on the memchecks. This needs to - /// be called before the first call to annotateInstWithNoAlias. - void prepareNoAliasMetadata(); - /// \brief Add the noalias annotations to \p I. Initialize the aliasing /// scopes with prepareNoAliasMetadata once before this can be called. - void annotateInstWithNoAlias(Instruction *I); + void annotateInstWithNoAlias(Instruction *I) { + annotateInstWithNoAlias(I, I); + } /// \brief The original loop. This becomes the "versioned" one. I.e., /// control flows here if pointers in the loop don't alias. Index: lib/Transforms/Utils/LoopVersioning.cpp =================================================================== --- lib/Transforms/Utils/LoopVersioning.cpp +++ lib/Transforms/Utils/LoopVersioning.cpp @@ -209,25 +209,27 @@ } } -void LoopVersioning::annotateInstWithNoAlias(Instruction *I) { +void LoopVersioning::annotateInstWithNoAlias(Instruction *VersionedInst, + const Instruction *OrigInst) { LLVMContext &Context = VersionedLoop->getHeader()->getContext(); - Value *Ptr = isa(I) ? cast(I)->getPointerOperand() - : cast(I)->getPointerOperand(); + const Value *Ptr = isa(OrigInst) ? cast(OrigInst)->getPointerOperand() + : cast(OrigInst)->getPointerOperand(); // Find the group for the pointer and then add the scope metadata. auto Group = PtrToGroup.find(Ptr); if (Group != PtrToGroup.end()) { - I->setMetadata( + VersionedInst->setMetadata( LLVMContext::MD_alias_scope, - MDNode::concatenate(I->getMetadata(LLVMContext::MD_alias_scope), - MDNode::get(Context, GroupToScope[Group->second]))); + MDNode::concatenate(VersionedInst->getMetadata(LLVMContext::MD_alias_scope), + MDNode::get(Context, + GroupToScope[Group->second]))); // Add the no-alias metadata. auto NonAliasingScopeList = GroupToNonAliasingScopeList.find(Group->second); if (NonAliasingScopeList != GroupToNonAliasingScopeList.end()) - I->setMetadata( + VersionedInst->setMetadata( LLVMContext::MD_noalias, - MDNode::concatenate(I->getMetadata(LLVMContext::MD_noalias), + MDNode::concatenate(VersionedInst->getMetadata(LLVMContext::MD_noalias), NonAliasingScopeList->second)); } } Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -98,6 +98,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/LoopVersioning.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include @@ -445,6 +446,12 @@ /// Emit bypass checks to check any memory assumptions we may have made. void emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass); + /// \brief Propagate known metadata from one instruction to another. + void propagateMetadata(Instruction *To, const Instruction *From); + + /// \brief Propagate known metadata from one instruction to a vector of others. + void propagateMetadata(SmallVectorImpl &To, const Instruction *From); + /// This is a helper class that holds the vectorizer state. It maps scalar /// instructions to vector instructions. When the code is 'unrolled' then /// then a single scalar value is mapped to multiple vector parts. The parts @@ -502,6 +509,13 @@ /// Target Transform Info. const TargetTransformInfo *TTI; + /// \brief LoopVersioning. It's only set up (non-null) if memchecks were + /// used. + /// + /// This is currently only used to add no-alias metadata based on the + /// memchecks. The actually versioning is performed manually. + std::unique_ptr LVer; + /// The vectorization SIMD factor to use. Each vector will have this many /// vector elements. unsigned VF; @@ -619,7 +633,8 @@ #endif /// \brief Propagate known metadata from one instruction to another. -static void propagateMetadata(Instruction *To, const Instruction *From) { +void InnerLoopVectorizer::propagateMetadata(Instruction *To, + const Instruction *From) { SmallVector, 4> Metadata; From->getAllMetadataOtherThanDebugLoc(Metadata); @@ -640,11 +655,16 @@ To->setMetadata(Kind, M.second); } + + // If the loop was versioned with memchecks, add the corresponding no-alias + // metadata. + if (LVer && (isa(From) || isa(From))) + LVer->annotateInstWithNoAlias(To, From); } /// \brief Propagate known metadata from one instruction to a vector of others. -static void propagateMetadata(SmallVectorImpl &To, - const Instruction *From) { +void InnerLoopVectorizer::propagateMetadata(SmallVectorImpl &To, + const Instruction *From) { for (Value *V : To) if (Instruction *I = dyn_cast(V)) propagateMetadata(I, From); @@ -2812,6 +2832,11 @@ BranchInst::Create(Bypass, NewBB, MemRuntimeCheck)); LoopBypassBlocks.push_back(BB); AddedSafetyChecks = true; + + // We currently don't use LoopVersioning for the actual loop cloning but we + // still use it to add the noalias metadata. + LVer = llvm::make_unique(*Legal->getLAI(), OrigLoop, LI, DT, PSE.getSE()); + LVer->prepareNoAliasMetadata(); } Index: test/Transforms/LoopVectorize/noalias-md.ll =================================================================== --- /dev/null +++ test/Transforms/LoopVectorize/noalias-md.ll @@ -0,0 +1,76 @@ +; RUN: opt -basicaa -loop-vectorize -force-vector-width=2 \ +; RUN: -S < %s | FileCheck %s -check-prefix=BOTH -check-prefix=LV +; RUN: opt -basicaa -scoped-noalias -loop-vectorize -dse -force-vector-width=2 \ +; RUN: -S < %s | FileCheck %s -check-prefix=BOTH -check-prefix=DSE + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +; This loop needs to be versioned with memchecks between {A, B} x {C} before +; it can be vectorized. +; +; for (i = 0; i < n; i++) { +; C[i] = A[i] + 1; +; C[i] += B[i]; +; } +; +; Check that the corresponding noalias metadata is added to the vector loop +; but not to the scalar loop. +; +; Since in the versioned vector loop C and B can no longer alias, the first +; store to C[i] can be DSE'd. + + +define void @f(i32* %a, i32* %b, i32* %c) { +entry: + br label %for.body + +; BOTH: vector.memcheck: +; BOTH: vector.body: +for.body: ; preds = %for.body, %entry + %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ] + + %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind +; Scope 1 +; LV: = load {{.*}} !alias.scope !0 + %loadA = load i32, i32* %arrayidxA, align 4 + + %add = add nuw i32 %loadA, 2 + + %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind +; Noalias with scope 1 and 6 +; LV: store {{.*}} !alias.scope !3, !noalias !5 +; DSE-NOT: store + store i32 %add, i32* %arrayidxC, align 4 + + %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind +; Scope 6 +; LV: = load {{.*}} !alias.scope !7 + %loadB = load i32, i32* %arrayidxB, align 4 + + %add2 = add nuw i32 %add, %loadB + +; Noalias with scope 1 and 6 +; LV: store {{.*}} !alias.scope !3, !noalias !5 +; DSE: store + store i32 %add2, i32* %arrayidxC, align 4 + + %inc = add nuw nsw i64 %ind, 1 + %exitcond = icmp eq i64 %inc, 20 + br i1 %exitcond, label %for.end, label %for.body + +; BOTH: for.body: +; BOTH-NOT: !alias.scope +; BOTH-NOT: !noalias + +for.end: ; preds = %for.body + ret void +} + +; LV: !0 = !{!1} +; LV: !1 = distinct !{!1, !2} +; LV: !2 = distinct !{!2, !"LVerDomain"} +; LV: !3 = !{!4} +; LV: !4 = distinct !{!4, !2} +; LV: !5 = !{!1, !6} +; LV: !6 = distinct !{!6, !2} +; LV: !7 = !{!6}