Index: include/llvm/Transforms/Utils/LoopVersioning.h =================================================================== --- include/llvm/Transforms/Utils/LoopVersioning.h +++ include/llvm/Transforms/Utils/LoopVersioning.h @@ -78,6 +78,10 @@ /// \brief Sets the runtime SCEV checks for versioning the loop. void setSCEVChecks(SCEVUnionPredicate Check); + /// \brief Annotate memory instructions in the versioned loop with no-alias + /// metadata based on the memchecks issued. + void annotateLoopWithNoAlias(); + private: /// \brief Adds the necessary PHI nodes for the versioned loops based on the /// loop-defined values used outside of the loop. @@ -86,6 +90,14 @@ /// that are used outside the loop. void addPHINodes(const SmallVectorImpl &DefsUsedOutside); + /// \brief Set up the aliasing scopes based on the memchecks. This needs to + /// be called before the first call to annotateInstWithNoAlias. + void prepareNoAliasMetadata(); + + /// \brief Add the noalias annotations to \p I. Initialize the aliasing + /// scopes with prepareNoAliasMetadata once before this can be called. + void annotateInstWithNoAlias(Instruction *I); + /// \brief The original loop. This becomes the "versioned" one. I.e., /// control flows here if pointers in the loop don't alias. Loop *VersionedLoop; @@ -103,6 +115,19 @@ /// \brief The set of SCEV checks that we are versioning for. SCEVUnionPredicate Preds; + /// \brief Maps a pointer to the pointer checking group that the pointer + /// belongs to. + DenseMap + PtrToGroup; + + /// \brief The alias scope corresponding to a pointer checking group. + DenseMap + GroupToScope; + + /// \brief The list of alias scopes that a pointer checking group can't alias. + DenseMap + GroupToNonAliasingScopeList; + /// \brief Analyses used. const LoopAccessInfo &LAI; LoopInfo *LI; Index: lib/Transforms/Scalar/LoopDistribute.cpp =================================================================== --- lib/Transforms/Scalar/LoopDistribute.cpp +++ lib/Transforms/Scalar/LoopDistribute.cpp @@ -792,6 +792,7 @@ LVer.setAliasChecks(std::move(Checks)); LVer.setSCEVChecks(LAI.PSE.getUnionPredicate()); LVer.versionLoop(DefsUsedOutside); + LVer.annotateLoopWithNoAlias(); } // Create identical copies of the original loop for each partition and hook Index: lib/Transforms/Utils/LoopVersioning.cpp =================================================================== --- lib/Transforms/Utils/LoopVersioning.cpp +++ lib/Transforms/Utils/LoopVersioning.cpp @@ -18,11 +18,18 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" using namespace llvm; +static cl::opt + AnnotateNoAlias("loop-version-annotate-no-alias", cl::init(true), + cl::Hidden, + cl::desc("Add no-alias annotation for instructions that " + "are disambiguated by memchecks")); + LoopVersioning::LoopVersioning(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI, DominatorTree *DT, ScalarEvolution *SE, bool UseLAIChecks) @@ -146,6 +153,85 @@ } } +void LoopVersioning::prepareNoAliasMetadata() { + // We need to turn the no-alias relation between pointer checking groups into + // no-aliasing annotations between instructions. + // + // We accomplish this by mapping each pointer checking group (a set of + // pointers memchecked together) to an alias scope and then also mapping each + // group to the list of scopes it can't alias. + + const RuntimePointerChecking *RtPtrChecking = LAI.getRuntimePointerChecking(); + LLVMContext &Context = VersionedLoop->getHeader()->getContext(); + + // First allocate an aliasing scope for each pointer checking group. + // + // While traversing through the checking groups in the loop, also create a + // reverse map from pointers to the pointer checking group they were assigned + // to. + MDBuilder MDB(Context); + MDNode *Domain = MDB.createAnonymousAliasScopeDomain("LVerDomain"); + + for (const auto &Group : RtPtrChecking->CheckingGroups) { + GroupToScope[&Group] = MDB.createAnonymousAliasScope(Domain); + + for (unsigned PtrIdx : Group.Members) + PtrToGroup[RtPtrChecking->getPointerInfo(PtrIdx).PointerValue] = &Group; + } + + // Go through the checks and for each pointer group, collect the scopes for + // each non-aliasing pointer group. + DenseMap> + GroupToNonAliasingScopes; + + for (const auto &Check : AliasChecks) + GroupToNonAliasingScopes[Check.first].push_back(GroupToScope[Check.second]); + + // Finally, transform the above to actually map to scope list which is what + // the metadata uses. + + for (auto Pair : GroupToNonAliasingScopes) + GroupToNonAliasingScopeList[Pair.first] = + MDNode::get(Context, Pair.second); +} + +void LoopVersioning::annotateLoopWithNoAlias() { + if (!AnnotateNoAlias) + return; + + // First prepare the maps. + prepareNoAliasMetadata(); + + // Add the scope and no-alias metadata to the instructions. + for (Instruction *I : LAI.getDepChecker().getMemoryInstructions()) { + annotateInstWithNoAlias(I); + } +} + +void LoopVersioning::annotateInstWithNoAlias(Instruction *I) { + LLVMContext &Context = VersionedLoop->getHeader()->getContext(); + Value *Ptr = isa(I) ? cast(I)->getPointerOperand() + : cast(I)->getPointerOperand(); + + // Find the group for the pointer and then add the scope metadata. + auto Group = PtrToGroup.find(Ptr); + if (Group != PtrToGroup.end()) { + I->setMetadata( + LLVMContext::MD_alias_scope, + MDNode::concatenate(I->getMetadata(LLVMContext::MD_alias_scope), + MDNode::get(Context, GroupToScope[Group->second]))); + + // Add the no-alias metadata. + auto NonAliasingScopeList = GroupToNonAliasingScopeList.find(Group->second); + if (NonAliasingScopeList != GroupToNonAliasingScopeList.end()) + I->setMetadata( + LLVMContext::MD_noalias, + MDNode::concatenate(I->getMetadata(LLVMContext::MD_noalias), + NonAliasingScopeList->second)); + } +} + namespace { /// \brief Also expose this is a pass. Currently this is only used for /// unit-testing. It adds all memchecks necessary to remove all may-aliasing @@ -181,6 +267,7 @@ !LAI.PSE.getUnionPredicate().isAlwaysTrue()) { LoopVersioning LVer(LAI, L, LI, DT, SE); LVer.versionLoop(); + LVer.annotateLoopWithNoAlias(); Changed = true; } } Index: test/Transforms/LoopVersioning/noalias-version-twice.ll =================================================================== --- /dev/null +++ test/Transforms/LoopVersioning/noalias-version-twice.ll @@ -0,0 +1,106 @@ +; RUN: opt -basicaa -loop-distribute -scoped-noalias -loop-versioning -S < %s | FileCheck %s + +; Test the metadata generate when versioning an already versioned loop. Here +; we invoke loop distribution to perform the first round of versioning. It +; adds memchecks for accesses that can alias across the distribution boundary. +; Then we further version the distributed loops to fully disambiguate accesses +; within each. +; +; So as an example, we add noalias between C and A during the versioning +; within loop distribution and then add noalias between C and D during the +; second explicit versioning step: +; +; for (i = 0; i < n; i++) { +; A[i + 1] = A[i] * B[i]; +; ------------------------------- +; C[i] = D[i] * E[i]; +; } + +; To see it easier what's going on, I expanded every noalias/scope metadata +; reference below in a comment. For a scope I use the format scope(domain), +; e.g. scope 17 in domain 15 is written as 17(15). + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +@B = common global i32* null, align 8 +@A = common global i32* null, align 8 +@C = common global i32* null, align 8 +@D = common global i32* null, align 8 +@E = common global i32* null, align 8 + +define void @f() { +entry: + %a = load i32*, i32** @A, align 8 + %b = load i32*, i32** @B, align 8 + %c = load i32*, i32** @C, align 8 + %d = load i32*, i32** @D, align 8 + %e = load i32*, i32** @E, align 8 + br label %for.body + +for.body: ; preds = %for.body, %entry + %ind = phi i64 [ 0, %entry ], [ %add, %for.body ] + + %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind + +; CHECK: %loadA.ldist1 = {{.*}} !noalias !25 +; A noalias C: !25 -> { 17(15), 18(15), 19(15), 26(24) } +; ^^^^^^ + %loadA = load i32, i32* %arrayidxA, align 4 + + %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind + %loadB = load i32, i32* %arrayidxB, align 4 + + %mulA = mul i32 %loadB, %loadA + + %add = add nuw nsw i64 %ind, 1 + %arrayidxA_plus_4 = getelementptr inbounds i32, i32* %a, i64 %add + store i32 %mulA, i32* %arrayidxA_plus_4, align 4 + +; CHECK: for.body: + + %arrayidxD = getelementptr inbounds i32, i32* %d, i64 %ind + +; CHECK: %loadD = {{.*}} !alias.scope !31 +; D's scope: !31 -> { 18(15), 32(33) } +; ^^^^^^ + %loadD = load i32, i32* %arrayidxD, align 4 + + %arrayidxE = getelementptr inbounds i32, i32* %e, i64 %ind + +; CHECK: %loadE = {{.*}} !alias.scope !34 +; E's scope: !34 -> { 19(15), 35(33) } +; ^^^^^^ + %loadE = load i32, i32* %arrayidxE, align 4 + + %mulC = mul i32 %loadD, %loadE + + %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind + +; CHECK: store i32 %mulC, {{.*}} !alias.scope !36, !noalias !38 +; C's scope: !36 -> { 17(15), 37(33) } +; ^^^^^^ +; C noalias D and E: !38 -> { 21(15), 32(33), 35(33) } +; ^^^^^^ ^^^^^^ + store i32 %mulC, i32* %arrayidxC, align 4 + + %exitcond = icmp eq i64 %add, 20 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +; Domain for the second loop versioning for the top loop after +; distribution. +; CHECK: !15 = distinct !{!15, !"LVerDomain"} +; CHECK: !17 = distinct !{!17, !15} +; CHECK: !25 = !{!17, !18, !19, !26} +; CHECK: !31 = !{!18, !32} +; CHECK: !32 = distinct !{!32, !33} +; Domain for the second loop versioning for the bottom loop after +; distribution. +; CHECK: !33 = distinct !{!33, !"LVerDomain"} +; CHECK: !34 = !{!19, !35} +; CHECK: !35 = distinct !{!35, !33} +; CHECK: !36 = !{!17, !37} +; CHECK: !38 = !{!21, !32, !35} Index: test/Transforms/LoopVersioning/noalias.ll =================================================================== --- /dev/null +++ test/Transforms/LoopVersioning/noalias.ll @@ -0,0 +1,54 @@ +; RUN: opt -basicaa -loop-versioning -S < %s | FileCheck %s + +; A very simple case. After versioning the %loadA and %loadB can't alias with +; the store. +; +; To see it easier what's going on, I expanded every noalias/scope metadata +; reference below in a comment. For a scope I use the format scope(domain), +; e.g. scope 17 in domain 15 is written as 17(15). + +; CHECK_LABEL: @f( + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %a, i32* %b, i32* %c) { +entry: + br label %for.body + +; CHECK: for.body.lver.orig: +; CHECK: for.body: +for.body: ; preds = %for.body, %entry + %ind = phi i64 [ 0, %entry ], [ %add, %for.body ] + + %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind +; CHECK: %loadA = {{.*}} !alias.scope !0 +; A's scope: !0 -> { 1(2) } + %loadA = load i32, i32* %arrayidxA, align 4 + + %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind +; CHECK: %loadB = {{.*}} !alias.scope !3 +; B's scope: !3 -> { 4(2) } + %loadB = load i32, i32* %arrayidxB, align 4 + + %mulC = mul i32 %loadA, %loadB + + %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind +; CHECK: store {{.*}} !alias.scope !5, !noalias !7 +; C noalias A and B: !7 -> { 1(2), 4(2) } + store i32 %mulC, i32* %arrayidxC, align 4 + + %add = add nuw nsw i64 %ind, 1 + %exitcond = icmp eq i64 %add, 20 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} +; CHECK: !0 = !{!1} +; CHECK: !1 = distinct !{!1, !2} +; CHECK: !2 = distinct !{!2, !"LVerDomain"} +; CHECK: !3 = !{!4} +; CHECK: !4 = distinct !{!4, !2} +; CHECK: !5 = !{!6} +; CHECK: !6 = distinct !{!6, !2} +; CHECK: !7 = !{!1, !4}