diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h --- a/llvm/include/llvm/IR/Metadata.h +++ b/llvm/include/llvm/IR/Metadata.h @@ -1208,6 +1208,12 @@ return nullptr; return dyn_cast_or_null(Node->getOperand(1)); } + StringRef getName() const { + if (Node->getNumOperands() > 2) + if (MDString *N = dyn_cast_or_null(Node->getOperand(2))) + return N->getString(); + return StringRef(); + } }; /// Typed iterator through MDNode operands. diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h --- a/llvm/include/llvm/Transforms/Utils/Cloning.h +++ b/llvm/include/llvm/Transforms/Utils/Cloning.h @@ -268,6 +268,40 @@ Function *Callee, int64_t entryDelta, const ValueMap *VMap = nullptr); +/// Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified +/// basic blocks and extract their scope. These are candidates for duplication +/// when cloning. +void identifyNoAliasScopesToClone( + ArrayRef BBs, + SmallVectorImpl &NoAliasDeclScopes); + +/// Duplicate the specified list of noalias decl scopes. +/// The 'Ext' string is added as an extension to the name. +/// Afterwards, the ClonedMVScopes contains a mapping of the original MV onto +/// the cloned version. +/// The ClonedScopes contains the mapping of the original scope MDNode onto the +/// cloned scope. +/// Be aware that the cloned scopes are still part of the original scope domain. +void cloneNoAliasScopes( + ArrayRef NoAliasDeclScopes, + DenseMap &ClonedScopes, + DenseMap &ClonedMVScopes, + StringRef Ext, LLVMContext &Context); + +/// Adapt the metadata for the specified instruction according to the +/// provided mapping. This is normally used after cloning an instruction, when +/// some noalias scopes needed to be cloned. +void adaptNoAliasScopes( + llvm::Instruction *I, const DenseMap &ClonedScopes, + const DenseMap &ClonedMVScopes, + LLVMContext &Context); + +/// Clone the specified noalias decl scopes. Then adapt all instructions in the +/// NewBlocks basicblocks to the cloned versions. +/// 'Ext' will be added to the duplicate scope names. +void cloneAndAdaptNoAliasScopes(ArrayRef NoAliasDeclScopes, + ArrayRef NewBlocks, + LLVMContext &Context, StringRef Ext); } // end namespace llvm #endif // LLVM_TRANSFORMS_UTILS_CLONING_H diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp --- a/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -27,6 +27,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -36,6 +37,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "clone-function" + /// See comments in Cloning.h. BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix, Function *F, @@ -881,3 +884,96 @@ return NewBB; } + +void llvm::cloneNoAliasScopes( + ArrayRef NoAliasDeclScopes, + DenseMap &ClonedScopes, + DenseMap &ClonedMVScopes, + StringRef Ext, LLVMContext &Context) { + MDBuilder MDB(Context); + + for (auto *MV : NoAliasDeclScopes) { + SmallVector ScopeList; + for (auto &MDOperand : cast(MV->getMetadata())->operands()) { + if (MDNode *MD = dyn_cast(MDOperand)) { + AliasScopeNode SNANode(MD); + + std::string Name; + auto ScopeName = SNANode.getName(); + if (!ScopeName.empty()) + Name = (Twine(ScopeName) + ":" + Ext).str(); + else + Name = std::string(Ext); + + MDNode *NewScope = MDB.createAnonymousAliasScope( + const_cast(SNANode.getDomain()), Name); + ClonedScopes.insert(std::make_pair(MD, NewScope)); + ScopeList.push_back(NewScope); + } + } + MDNode *NewScopeList = MDNode::get(Context, ScopeList); + ClonedMVScopes.insert( + std::make_pair(MV, MetadataAsValue::get(Context, NewScopeList))); + } +} + +void llvm::adaptNoAliasScopes( + Instruction *I, const DenseMap &ClonedScopes, + const DenseMap &ClonedMVScopes, + LLVMContext &Context) { + // MetadataAsValue will always be replaced ! + for (Use &U : I->operands()) + if (MetadataAsValue *MV = dyn_cast(U)) + if (auto *NewMV = ClonedMVScopes.lookup(MV)) + U.set(NewMV); + + auto replaceWhenNeeded = [&](unsigned MD_ID) { + if (const MDNode *CSNoAlias = I->getMetadata(MD_ID)) { + bool NeedsReplacement = false; + SmallVector NewScopeList; + for (auto &MDOp : CSNoAlias->operands()) { + if (MDNode *MD = dyn_cast(MDOp)) { + if (auto *NewMD = ClonedScopes.lookup(MD)) { + NewScopeList.push_back(NewMD); + NeedsReplacement = true; + continue; + } + NewScopeList.push_back(MD); + } + } + if (NeedsReplacement) + I->setMetadata(MD_ID, MDNode::get(Context, NewScopeList)); + } + }; + replaceWhenNeeded(LLVMContext::MD_noalias); + replaceWhenNeeded(LLVMContext::MD_alias_scope); +} + +void llvm::cloneAndAdaptNoAliasScopes( + ArrayRef NoAliasDeclScopes, + ArrayRef NewBlocks, LLVMContext &Context, StringRef Ext) { + if (NoAliasDeclScopes.empty()) + return; + + DenseMap ClonedScopes; + DenseMap ClonedMVScopes; + LLVM_DEBUG(dbgs() << "cloneAndAdaptNoAliasScopes: cloning " + << NoAliasDeclScopes.size() << " node(s)\n"); + + cloneNoAliasScopes(NoAliasDeclScopes, ClonedScopes, ClonedMVScopes, Ext, + Context); + // Identify instructions using metadata that needs adaptation + for (BasicBlock *NewBlock : NewBlocks) + for (Instruction &I : *NewBlock) + adaptNoAliasScopes(&I, ClonedScopes, ClonedMVScopes, Context); +} + +void llvm::identifyNoAliasScopesToClone( + ArrayRef BBs, + SmallVectorImpl &NoAliasDeclScopes) { + for (BasicBlock *BB : BBs) + for (Instruction &I : *BB) + if (auto *Decl = dyn_cast(&I)) + NoAliasDeclScopes.push_back(cast( + Decl->getOperand(Intrinsic::NoAliasScopeDeclScopeArg))); +} diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp --- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -590,6 +590,11 @@ << DIL->getFilename() << " Line: " << DIL->getLine()); } + // Identify what noalias metadata is inside the loop: if it is inside the + // loop, the associated metadata must be cloned for each iteration. + SmallVector LoopLocalNoAliasDeclScopes; + identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes); + for (unsigned It = 1; It != ULO.Count; ++It) { SmallVector NewBlocks; SmallDenseMap NewLoops; @@ -683,6 +688,15 @@ AC->registerAssumption(II); } } + + { + // Identify what other metadata depends on the cloned version. After + // cloning, replace the metadata with the corrected version for both + // memory instructions and noalias intrinsics. + std::string ext = (Twine("It") + Twine(It)).str(); + cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks, + Header->getContext(), ext); + } } // Loop over the PHI nodes in the original block, setting incoming values. diff --git a/llvm/test/Transforms/LoopUnroll/noalias.ll b/llvm/test/Transforms/LoopUnroll/noalias.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/noalias.ll @@ -0,0 +1,103 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -loop-unroll -unroll-count=4 < %s | FileCheck %s + +define void @test_inside(i32* %addr1, i32* %addr2) { +; CHECK-LABEL: @test_inside( +; CHECK-NEXT: start: +; CHECK-NEXT: br label [[BODY:%.*]] +; CHECK: body: +; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata !0) +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[ADDR1:%.*]], align 4, !alias.scope !0 +; CHECK-NEXT: store i32 [[X]], i32* [[ADDR2:%.*]], align 4, !noalias !0 +; CHECK-NEXT: [[ADDR1I_1:%.*]] = getelementptr inbounds i32, i32* [[ADDR1]], i32 1 +; CHECK-NEXT: [[ADDR2I_1:%.*]] = getelementptr inbounds i32, i32* [[ADDR2]], i32 1 +; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata !3) +; CHECK-NEXT: [[X_1:%.*]] = load i32, i32* [[ADDR1I_1]], align 4, !alias.scope !3 +; CHECK-NEXT: store i32 [[X_1]], i32* [[ADDR2I_1]], align 4, !noalias !3 +; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata !5) +; CHECK-NEXT: [[X_2:%.*]] = load i32, i32* [[ADDR1]], align 4, !alias.scope !5 +; CHECK-NEXT: store i32 [[X_2]], i32* [[ADDR2]], align 4, !noalias !5 +; CHECK-NEXT: [[ADDR1I_3:%.*]] = getelementptr inbounds i32, i32* [[ADDR1]], i32 1 +; CHECK-NEXT: [[ADDR2I_3:%.*]] = getelementptr inbounds i32, i32* [[ADDR2]], i32 1 +; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata !7) +; CHECK-NEXT: [[X_3:%.*]] = load i32, i32* [[ADDR1I_3]], align 4, !alias.scope !7 +; CHECK-NEXT: store i32 [[X_3]], i32* [[ADDR2I_3]], align 4, !noalias !7 +; CHECK-NEXT: ret void +; +start: + br label %body + +body: + %i = phi i32 [ 0, %start ], [ %i2, %body ] + %j = and i32 %i, 1 + %addr1i = getelementptr inbounds i32, i32* %addr1, i32 %j + %addr2i = getelementptr inbounds i32, i32* %addr2, i32 %j + + call void @llvm.experimental.noalias.scope.decl(metadata !2) + %x = load i32, i32* %addr1i, !alias.scope !2 + store i32 %x, i32* %addr2i, !noalias !2 + + %i2 = add i32 %i, 1 + %cmp = icmp slt i32 %i2, 4 + br i1 %cmp, label %body, label %end + +end: + ret void +} + +define void @test_outside(i32* %addr1, i32* %addr2) { +; CHECK-LABEL: @test_outside( +; CHECK-NEXT: start: +; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata !0) +; CHECK-NEXT: br label [[BODY:%.*]] +; CHECK: body: +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[ADDR1:%.*]], align 4, !alias.scope !0 +; CHECK-NEXT: store i32 [[X]], i32* [[ADDR2:%.*]], align 4, !noalias !0 +; CHECK-NEXT: [[ADDR1I_1:%.*]] = getelementptr inbounds i32, i32* [[ADDR1]], i32 1 +; CHECK-NEXT: [[ADDR2I_1:%.*]] = getelementptr inbounds i32, i32* [[ADDR2]], i32 1 +; CHECK-NEXT: [[X_1:%.*]] = load i32, i32* [[ADDR1I_1]], align 4, !alias.scope !0 +; CHECK-NEXT: store i32 [[X_1]], i32* [[ADDR2I_1]], align 4, !noalias !0 +; CHECK-NEXT: [[X_2:%.*]] = load i32, i32* [[ADDR1]], align 4, !alias.scope !0 +; CHECK-NEXT: store i32 [[X_2]], i32* [[ADDR2]], align 4, !noalias !0 +; CHECK-NEXT: [[ADDR1I_3:%.*]] = getelementptr inbounds i32, i32* [[ADDR1]], i32 1 +; CHECK-NEXT: [[ADDR2I_3:%.*]] = getelementptr inbounds i32, i32* [[ADDR2]], i32 1 +; CHECK-NEXT: [[X_3:%.*]] = load i32, i32* [[ADDR1I_3]], align 4, !alias.scope !0 +; CHECK-NEXT: store i32 [[X_3]], i32* [[ADDR2I_3]], align 4, !noalias !0 +; CHECK-NEXT: ret void +; +start: + call void @llvm.experimental.noalias.scope.decl(metadata !2) + br label %body + +body: + %i = phi i32 [ 0, %start ], [ %i2, %body ] + %j = and i32 %i, 1 + %addr1i = getelementptr inbounds i32, i32* %addr1, i32 %j + %addr2i = getelementptr inbounds i32, i32* %addr2, i32 %j + + %x = load i32, i32* %addr1i, !alias.scope !2 + store i32 %x, i32* %addr2i, !noalias !2 + + %i2 = add i32 %i, 1 + %cmp = icmp slt i32 %i2, 4 + br i1 %cmp, label %body, label %end + +end: + ret void +} + +declare void @llvm.experimental.noalias.scope.decl(metadata) + +!0 = distinct !{!0} +!1 = distinct !{!1, !0} +!2 = !{!1} + +; CHECK: !0 = !{!1} +; CHECK: !1 = distinct !{!1, !2} +; CHECK: !2 = distinct !{!2} +; CHECK: !3 = !{!4} +; CHECK: !4 = distinct !{!4, !2, !"It1"} +; CHECK: !5 = !{!6} +; CHECK: !6 = distinct !{!6, !2, !"It2"} +; CHECK: !7 = !{!8} +; CHECK: !8 = distinct !{!8, !2, !"It3"} diff --git a/llvm/test/Transforms/PhaseOrdering/pr39282.ll b/llvm/test/Transforms/PhaseOrdering/pr39282.ll --- a/llvm/test/Transforms/PhaseOrdering/pr39282.ll +++ b/llvm/test/Transforms/PhaseOrdering/pr39282.ll @@ -15,24 +15,27 @@ ; Consider that %addr1 = %addr2 + 1, in which case %addr2i and %addr1i are ; noalias within one iteration, but may alias across iterations. -; TODO: This is a micompile. define void @pr39282(i32* %addr1, i32* %addr2) { ; CHECK-LABEL: @pr39282( ; CHECK-NEXT: start: -; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl([[META0:metadata !.*]]) -; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl([[META3:metadata !.*]]) +; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl +; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl ; CHECK-NEXT: [[X_I:%.*]] = load i32, i32* [[ADDR1:%.*]], align 4, !alias.scope !3, !noalias !0 +; CHECK-NEXT: store i32 [[X_I]], i32* [[ADDR2:%.*]], align 4, !alias.scope !0, !noalias !3 ; CHECK-NEXT: [[ADDR1I_1:%.*]] = getelementptr inbounds i32, i32* [[ADDR1]], i64 1 -; CHECK-NEXT: [[ADDR2I_1:%.*]] = getelementptr inbounds i32, i32* [[ADDR2:%.*]], i64 1 -; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl([[META0]]) -; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl([[META3]]) -; CHECK-NEXT: [[X_I_1:%.*]] = load i32, i32* [[ADDR1I_1]], align 4, !alias.scope !3, !noalias !0 -; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl([[META0]]) -; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl([[META3]]) -; CHECK-NEXT: store i32 [[X_I]], i32* [[ADDR2]], align 4, !alias.scope !0, !noalias !3 -; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl([[META0]]) -; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl([[META3]]) -; CHECK-NEXT: store i32 [[X_I_1]], i32* [[ADDR2I_1]], align 4, !alias.scope !0, !noalias !3 +; CHECK-NEXT: [[ADDR2I_1:%.*]] = getelementptr inbounds i32, i32* [[ADDR2]], i64 1 +; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl +; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl +; CHECK-NEXT: [[X_I_1:%.*]] = load i32, i32* [[ADDR1I_1]], align 4, !alias.scope !7, !noalias !5 +; CHECK-NEXT: store i32 [[X_I_1]], i32* [[ADDR2I_1]], align 4, !alias.scope !5, !noalias !7 +; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl +; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl +; CHECK-NEXT: [[X_I_2:%.*]] = load i32, i32* [[ADDR1]], align 4, !alias.scope !11, !noalias !9 +; CHECK-NEXT: store i32 [[X_I_2]], i32* [[ADDR2]], align 4, !alias.scope !9, !noalias !11 +; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl +; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl +; CHECK-NEXT: [[X_I_3:%.*]] = load i32, i32* [[ADDR1I_1]], align 4, !alias.scope !15, !noalias !13 +; CHECK-NEXT: store i32 [[X_I_3]], i32* [[ADDR2I_1]], align 4, !alias.scope !13, !noalias !15 ; CHECK-NEXT: ret void ; start: