Index: include/llvm/Transforms/Utils/LoopVersioning.h =================================================================== --- include/llvm/Transforms/Utils/LoopVersioning.h +++ include/llvm/Transforms/Utils/LoopVersioning.h @@ -78,6 +78,10 @@ /// \brief Sets the runtime SCEV checks for versioning the loop. void setSCEVChecks(SCEVUnionPredicate Check); + /// \brief Annotate memory instruction in the versioned loop with non-alias + /// metadata. + void annotateLoopWithNoAlias(); + private: /// \brief Adds the necessary PHI nodes for the versioned loops based on the /// loop-defined values used outside of the loop. Index: lib/Transforms/Scalar/LoopDistribute.cpp =================================================================== --- lib/Transforms/Scalar/LoopDistribute.cpp +++ lib/Transforms/Scalar/LoopDistribute.cpp @@ -792,6 +792,7 @@ LVer.setAliasChecks(std::move(Checks)); LVer.setSCEVChecks(LAI.PSE.getUnionPredicate()); LVer.versionLoop(DefsUsedOutside); + LVer.annotateLoopWithNoAlias(); } // Create identical copies of the original loop for each partition and hook Index: lib/Transforms/Utils/LoopVersioning.cpp =================================================================== --- lib/Transforms/Utils/LoopVersioning.cpp +++ lib/Transforms/Utils/LoopVersioning.cpp @@ -18,11 +18,18 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" using namespace llvm; +static cl::opt + AnnotateNoAlias("loop-version-annotate-no-alias", cl::init(true), + cl::Hidden, + cl::desc("Add no-alias annotation for instructions that " + "are disambiguated by memchecks")); + LoopVersioning::LoopVersioning(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI, DominatorTree *DT, ScalarEvolution *SE, bool UseLAIChecks) @@ -146,6 +153,86 @@ } } +void LoopVersioning::annotateLoopWithNoAlias() { + if (!AnnotateNoAlias) + return; + + // Here we need to turn the relation between pointer checking groups into + // non-aliasing annotations. + // + // 1. mark each memory instruction whose pointer memchecked with a scope that + // is unique to its containing pointer checking group + // + // 2. mark each memory instruction with non-alias annotation with the scope + // list of pointer groups that its pointer is checked against. + + const RuntimePointerChecking *RtPtrChecking = LAI.getRuntimePointerChecking(); + + // First map each pointer checking group to an aliasing scope. + DenseMap + GroupToScope; + + // While we're traversing through the checking groups in the loop below, also + // create a reverse map from pointers to the pointer checking group they were + // assigned to. + DenseMap + PtrToGroup; + + LLVMContext &Context = VersionedLoop->getHeader()->getContext(); + MDBuilder MDB(Context); + MDNode *Domain = MDB.createAnonymousAliasScopeDomain("LVerDomain"); + + for (const auto &Group : RtPtrChecking->CheckingGroups) { + GroupToScope[&Group] = MDB.createAnonymousAliasScope(Domain); + + for (unsigned PtrIdx : Group.Members) + PtrToGroup[RtPtrChecking->getPointerInfo(PtrIdx).PointerValue] = &Group; + } + + // Go through the checks and for each pointer group, collect the scopes for + // each non-aliasing pointer group. + DenseMap> + GroupToNonAliasingScopes; + + for (const auto &Check : AliasChecks) + GroupToNonAliasingScopes[Check.first].push_back(GroupToScope[Check.second]); + + // In the last step transform the above to actually map to scope list which is + // what the metadata uses. + DenseMap + GroupToNonAliasingScopeList; + + for (auto Pair : GroupToNonAliasingScopes) + GroupToNonAliasingScopeList[Pair.first] = + MDNode::get(Context, Pair.second); + + // Finally, add the scope and no-alias metadata to the instructions. + for (Instruction *I : LAI.getDepChecker().getMemoryInstructions()) { + Value *Ptr = isa(I) ? cast(I)->getPointerOperand() + : cast(I)->getPointerOperand(); + + // Add the scope metadata. + auto Group = PtrToGroup.find(Ptr); + if (Group != PtrToGroup.end()) { + I->setMetadata( + LLVMContext::MD_alias_scope, + MDNode::concatenate(I->getMetadata(LLVMContext::MD_alias_scope), + MDNode::get(Context, + GroupToScope[Group->second]))); + + // Add the non-alias metadata. + auto NonAliasingScopeList = + GroupToNonAliasingScopeList.find(Group->second); + if (NonAliasingScopeList != GroupToNonAliasingScopeList.end()) + I->setMetadata( + LLVMContext::MD_noalias, + MDNode::concatenate(I->getMetadata(LLVMContext::MD_noalias), + NonAliasingScopeList->second)); + } + } +} + namespace { /// \brief Also expose this is a pass. Currently this is only used for /// unit-testing. It adds all memchecks necessary to remove all may-aliasing @@ -180,6 +267,7 @@ if (LAI.getNumRuntimePointerChecks()) { LoopVersioning LVer(LAI, L, LI, DT, SE); LVer.versionLoop(); + LVer.annotateLoopWithNoAlias(); Changed = true; } } Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -98,6 +98,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/LoopVersioning.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include @@ -2796,6 +2797,11 @@ BranchInst::Create(Bypass, NewBB, MemRuntimeCheck)); LoopBypassBlocks.push_back(BB); AddedSafetyChecks = true; + + // We currently don't use LoopVersioning for the actual loop cloning but we + // still use it to add the noalias metadata. + LoopVersioning LVer(*Legal->getLAI(), L, LI, DT, PSE.getSE(), true); + LVer.annotateLoopWithNoAlias(); } Index: test/Transforms/LoopVersioning/noalias-version-twice.ll =================================================================== --- /dev/null +++ test/Transforms/LoopVersioning/noalias-version-twice.ll @@ -0,0 +1,106 @@ +; RUN: opt -basicaa -loop-distribute -scoped-noalias -loop-versioning -S < %s | FileCheck %s + +; Test the metadata generate when versioning an already versioned loop. Here +; we invoke loop distribution to perform the first round of versioning. It +; adds memchecks for accesses that can alias across the distribution boundary. +; Then we further version the distributed loops to fully disambiguate accesses +; within each. +; +; So as an example, we add noalias between C and A during the versioning +; within loop distribution and then add noalias between C and D during the +; second explicit versioning step: +; +; for (i = 0; i < n; i++) { +; A[i + 1] = A[i] * B[i]; +; ------------------------------- +; C[i] = D[i] * E[i]; +; } + +; To see it easier what's going on, I expanded every noalias/scope metadata +; reference below in a comment. For a scope I use the format scope(domain), +; e.g. scope 17 in domain 15 is written as 17(15). + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +@B = common global i32* null, align 8 +@A = common global i32* null, align 8 +@C = common global i32* null, align 8 +@D = common global i32* null, align 8 +@E = common global i32* null, align 8 + +define void @f() { +entry: + %a = load i32*, i32** @A, align 8 + %b = load i32*, i32** @B, align 8 + %c = load i32*, i32** @C, align 8 + %d = load i32*, i32** @D, align 8 + %e = load i32*, i32** @E, align 8 + br label %for.body + +for.body: ; preds = %for.body, %entry + %ind = phi i64 [ 0, %entry ], [ %add, %for.body ] + + %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind + +; CHECK: %loadA.ldist1 = {{.*}} !noalias !25 +; A noalias C: !25 -> { 17(15), 18(15), 19(15), 26(24) } +; ^^^^^^ + %loadA = load i32, i32* %arrayidxA, align 4 + + %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind + %loadB = load i32, i32* %arrayidxB, align 4 + + %mulA = mul i32 %loadB, %loadA + + %add = add nuw nsw i64 %ind, 1 + %arrayidxA_plus_4 = getelementptr inbounds i32, i32* %a, i64 %add + store i32 %mulA, i32* %arrayidxA_plus_4, align 4 + +; CHECK: for.body: + + %arrayidxD = getelementptr inbounds i32, i32* %d, i64 %ind + +; CHECK: %loadD = {{.*}} !alias.scope !31 +; D's scope: !31 -> { 18(15), 32(33) } +; ^^^^^^ + %loadD = load i32, i32* %arrayidxD, align 4 + + %arrayidxE = getelementptr inbounds i32, i32* %e, i64 %ind + +; CHECK: %loadE = {{.*}} !alias.scope !34 +; E's scope: !34 -> { 19(15), 35(33) } +; ^^^^^^ + %loadE = load i32, i32* %arrayidxE, align 4 + + %mulC = mul i32 %loadD, %loadE + + %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind + +; CHECK: store i32 %mulC, {{.*}} !alias.scope !36, !noalias !38 +; C's scope: !36 -> { 17(15), 37(33) } +; ^^^^^^ +; C noalias D and E: !38 -> { 21(15), 32(33), 35(33) } +; ^^^^^^ ^^^^^^ + store i32 %mulC, i32* %arrayidxC, align 4 + + %exitcond = icmp eq i64 %add, 20 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +; Domain for the second loop versioning for the top loop after +; distribution. +; CHECK: !15 = distinct !{!15, !"LVerDomain"} +; CHECK: !17 = distinct !{!17, !15} +; CHECK: !25 = !{!17, !18, !19, !26} +; CHECK: !31 = !{!18, !32} +; CHECK: !32 = distinct !{!32, !33} +; Domain for the second loop versioning for the bottom loop after +; distribution. +; CHECK: !33 = distinct !{!33, !"LVerDomain"} +; CHECK: !34 = !{!19, !35} +; CHECK: !35 = distinct !{!35, !33} +; CHECK: !36 = !{!17, !37} +; CHECK: !38 = !{!21, !32, !35} Index: test/Transforms/LoopVersioning/noalias.ll =================================================================== --- /dev/null +++ test/Transforms/LoopVersioning/noalias.ll @@ -0,0 +1,54 @@ +; RUN: opt -basicaa -loop-versioning -S < %s | FileCheck %s + +; A very simple case. After versioning the %loadA and %loadB can't alias with +; the store. +; +; To see it easier what's going on, I expanded every noalias/scope metadata +; reference below in a comment. For a scope I use the format scope(domain), +; e.g. scope 17 in domain 15 is written as 17(15). + +; CHECK_LABEL: @f( + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %a, i32* %b, i32* %c) { +entry: + br label %for.body + +; CHECK: for.body.lver.orig: +; CHECK: for.body: +for.body: ; preds = %for.body, %entry + %ind = phi i64 [ 0, %entry ], [ %add, %for.body ] + + %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind +; CHECK: %loadA = {{.*}} !alias.scope !0 +; A's scope: !0 -> { 1(2) } + %loadA = load i32, i32* %arrayidxA, align 4 + + %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind +; CHECK: %loadB = {{.*}} !alias.scope !3 +; B's scope: !3 -> { 4(2) } + %loadB = load i32, i32* %arrayidxB, align 4 + + %mulC = mul i32 %loadA, %loadB + + %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind +; CHECK: store {{.*}} !alias.scope !5, !noalias !7 +; C noalias A and B: !7 -> { 1(2), 4(2) } + store i32 %mulC, i32* %arrayidxC, align 4 + + %add = add nuw nsw i64 %ind, 1 + %exitcond = icmp eq i64 %add, 20 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} +; CHECK: !0 = !{!1} +; CHECK: !1 = distinct !{!1, !2} +; CHECK: !2 = distinct !{!2, !"LVerDomain"} +; CHECK: !3 = !{!4} +; CHECK: !4 = distinct !{!4, !2} +; CHECK: !5 = !{!6} +; CHECK: !6 = distinct !{!6, !2} +; CHECK: !7 = !{!1, !4}