Index: llvm/include/llvm/IR/Metadata.h =================================================================== --- llvm/include/llvm/IR/Metadata.h +++ llvm/include/llvm/IR/Metadata.h @@ -1208,6 +1208,12 @@ return nullptr; return dyn_cast_or_null(Node->getOperand(1)); } + StringRef getName() const { + if (Node->getNumOperands() > 2) + if (MDString *N = dyn_cast_or_null(Node->getOperand(2))) + return N->getString(); + return StringRef(); + } }; /// Typed iterator through MDNode operands. Index: llvm/include/llvm/Transforms/Utils/Cloning.h =================================================================== --- llvm/include/llvm/Transforms/Utils/Cloning.h +++ llvm/include/llvm/Transforms/Utils/Cloning.h @@ -268,6 +268,40 @@ Function *Callee, int64_t entryDelta, const ValueMap *VMap = nullptr); +/// Find back the 'llvm.experimental.noalias.scope.decl' intrinsics in the +/// specified basic blocks and extract their scope. These are candidates for +/// duplication when cloning. +void identifyNoAliasScopesToClone( + ArrayRef BBs, + SmallVectorImpl &NoAliasDeclScopes); + +/// Duplicate the specified list of noalias decl scopes. +/// The 'Ext' string is added as an extension to the name. +/// Afterwards, the ClonedMVScopes contains a mapping of the original MV onto +/// the cloned version. +/// The ClonedScopes contains the mapping of the original scope MDNode onto the +/// cloned scope. +/// Be aware that the cloned scopes are still part of the original scope domain. +void cloneNoAliasScopes( + ArrayRef NoAliasDeclScopes, + DenseMap &ClonedScopes, + DenseMap &ClonedMVScopes, + StringRef Ext, LLVMContext &Context); + +/// Adapt the metadata for the specified instruction according to the +/// provided mapping. This is normally used after cloning an instruction, when +/// some noalias scopes needed to be cloned. +void adaptNoAliasScopes( + llvm::Instruction *I, const DenseMap &ClonedScopes, + const DenseMap &ClonedMVScopes, + LLVMContext &Context); + +/// Clone the specified noalias decl scopes. Then adapt all instructions in the +/// NewBlocks basicblocks to the cloned versions. +/// 'Ext' will be added to the duplicate scope names +void cloneAndAdaptNoAliasScopes(ArrayRef NoAliasDeclScopes, + ArrayRef NewBlocks, + LLVMContext &Context, StringRef Ext); } // end namespace llvm #endif // LLVM_TRANSFORMS_UTILS_CLONING_H Index: llvm/lib/Transforms/Utils/CloneFunction.cpp =================================================================== --- llvm/lib/Transforms/Utils/CloneFunction.cpp +++ llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -27,6 +27,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -36,6 +37,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "clone-function" + /// See comments in Cloning.h. BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix, Function *F, @@ -883,3 +886,102 @@ return NewBB; } + +void llvm::cloneNoAliasScopes( + ArrayRef NoAliasDeclScopes, + DenseMap &ClonedScopes, + DenseMap &ClonedMVScopes, + StringRef Ext, LLVMContext &Context) { + MDBuilder MDB(Context); + + for (auto *MV : NoAliasDeclScopes) { + SmallVector ScopeList; + for (auto &MDOperand : cast(MV->getMetadata())->operands()) { + if (MDNode *MD = dyn_cast(MDOperand)) { + llvm::AliasScopeNode SNANode(MD); + + std::string Name; + auto ScopeName = SNANode.getName(); + if (!ScopeName.empty()) + Name = (Twine(ScopeName) + ":" + Ext).str(); + else + Name = std::string(Ext); + + MDNode *NewScope = MDB.createAnonymousAliasScope( + const_cast(SNANode.getDomain()), Name); + ClonedScopes.insert(std::make_pair(MD, NewScope)); + ScopeList.push_back(NewScope); + } + } + MDNode *NewScopeList = MDNode::get(Context, ScopeList); + ClonedMVScopes.insert( + std::make_pair(MV, MetadataAsValue::get(Context, NewScopeList))); + } +} + +void llvm::adaptNoAliasScopes( + Instruction *I, const DenseMap &ClonedScopes, + const DenseMap &ClonedMVScopes, + LLVMContext &Context) { + // MetadataAsValue will always be replaced ! + for (int opI = 0, opIEnd = I->getNumOperands(); opI < opIEnd; ++opI) { + if (MetadataAsValue *MV = dyn_cast(I->getOperand(opI))) { + auto MvIt = ClonedMVScopes.find(MV); + if (MvIt != ClonedMVScopes.end()) + I->setOperand(opI, MvIt->second); + } + } + + auto replaceWhenNeeded = [&](unsigned MD_ID) { + if (const MDNode *CSNoAlias = I->getMetadata(MD_ID)) { + bool needsReplacement = false; + SmallVector NewScopeList; + for (auto &MDOp : CSNoAlias->operands()) { + if (MDNode *MD = dyn_cast(MDOp)) { + auto MdIt = ClonedScopes.find(MD); + if (MdIt != ClonedScopes.end()) { + NewScopeList.push_back(MdIt->second); + needsReplacement = true; + continue; + } + NewScopeList.push_back(MD); + } + } + if (needsReplacement) + I->setMetadata(MD_ID, MDNode::get(Context, NewScopeList)); + } + }; + replaceWhenNeeded(LLVMContext::MD_noalias); + replaceWhenNeeded(LLVMContext::MD_alias_scope); +} + +void llvm::cloneAndAdaptNoAliasScopes( + ArrayRef NoAliasDeclScopes, + ArrayRef NewBlocks, LLVMContext &Context, StringRef Ext) { + if (NoAliasDeclScopes.empty()) + return; + + DenseMap ClonedScopes; + DenseMap ClonedMVScopes; + LLVM_DEBUG(llvm::dbgs() << "cloneAndAdaptNoAliasScopes: cloning " + << NoAliasDeclScopes.size() << " node(s)\n"); + + cloneNoAliasScopes(NoAliasDeclScopes, ClonedScopes, ClonedMVScopes, Ext, + Context); + // Identify instructions using metadata that needs adaptation + for (BasicBlock *NewBlock : NewBlocks) + for (Instruction &I : *NewBlock) + adaptNoAliasScopes(&I, ClonedScopes, ClonedMVScopes, Context); +} + +void llvm::identifyNoAliasScopesToClone( + ArrayRef BBs, + SmallVectorImpl &NoAliasDeclScopes) { + for (BasicBlock *BB : BBs) + for (Instruction &I : *BB) + if (auto *II = dyn_cast(&I)) + if (II->getIntrinsicID() == + Intrinsic::experimental_noalias_scope_decl) + NoAliasDeclScopes.push_back(cast( + II->getOperand(Intrinsic::NoAliasScopeDeclScopeArg))); +} Index: llvm/lib/Transforms/Utils/LoopUnroll.cpp =================================================================== --- llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -590,6 +590,11 @@ << DIL->getFilename() << " Line: " << DIL->getLine()); } + // Phase1: Identify what noalias metadata is inside the loop: if it is inside + // the loop, the associated metadata must be cloned for each iteration. + SmallVector LoopLocalNoAliasDeclScopes; + identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes); + for (unsigned It = 1; It != ULO.Count; ++It) { SmallVector NewBlocks; SmallDenseMap NewLoops; @@ -683,6 +688,15 @@ AC->registerAssumption(II); } } + + { + // Identify what other metadata depends on the cloned version. After + // cloning, replace the metadata with the corrected version for both + // memory instructions and noalias intrinsics. + std::string ext = (Twine("It") + Twine(It)).str(); + cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks, + Header->getContext(), ext); + } } // Loop over the PHI nodes in the original block, setting incoming values. Index: llvm/test/Transforms/LoopUnroll/noalias.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopUnroll/noalias.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -loop-unroll -unroll-count=4 < %s | FileCheck %s + +define void @pr39282(i32* %addr1, i32* %addr2) { +; CHECK-LABEL: @pr39282( +; CHECK-NEXT: start: +; CHECK-NEXT: br label [[BODY:%.*]] +; CHECK: body: +; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata !0) +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[ADDR1:%.*]], align 4, !alias.scope !0 +; CHECK-NEXT: store i32 [[X]], i32* [[ADDR2:%.*]], align 4, !noalias !0 +; CHECK-NEXT: [[ADDR1I_1:%.*]] = getelementptr inbounds i32, i32* [[ADDR1]], i32 1 +; CHECK-NEXT: [[ADDR2I_1:%.*]] = getelementptr inbounds i32, i32* [[ADDR2]], i32 1 +; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata !3) +; CHECK-NEXT: [[X_1:%.*]] = load i32, i32* [[ADDR1I_1]], align 4, !alias.scope !3 +; CHECK-NEXT: store i32 [[X_1]], i32* [[ADDR2I_1]], align 4, !noalias !3 +; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata !5) +; CHECK-NEXT: [[X_2:%.*]] = load i32, i32* [[ADDR1]], align 4, !alias.scope !5 +; CHECK-NEXT: store i32 [[X_2]], i32* [[ADDR2]], align 4, !noalias !5 +; CHECK-NEXT: [[ADDR1I_3:%.*]] = getelementptr inbounds i32, i32* [[ADDR1]], i32 1 +; CHECK-NEXT: [[ADDR2I_3:%.*]] = getelementptr inbounds i32, i32* [[ADDR2]], i32 1 +; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata !7) +; CHECK-NEXT: [[X_3:%.*]] = load i32, i32* [[ADDR1I_3]], align 4, !alias.scope !7 +; CHECK-NEXT: store i32 [[X_3]], i32* [[ADDR2I_3]], align 4, !noalias !7 +; CHECK-NEXT: ret void +; +start: + br label %body + +body: + %i = phi i32 [ 0, %start ], [ %i2, %body ] + %j = and i32 %i, 1 + %addr1i = getelementptr inbounds i32, i32* %addr1, i32 %j + %addr2i = getelementptr inbounds i32, i32* %addr2, i32 %j + + call void @llvm.experimental.noalias.scope.decl(metadata !2) + %x = load i32, i32* %addr1i, !alias.scope !2 + store i32 %x, i32* %addr2i, !noalias !2 + + %i2 = add i32 %i, 1 + %cmp = icmp slt i32 %i2, 4 + br i1 %cmp, label %body, label %end + +end: + ret void +} + +declare void @llvm.experimental.noalias.scope.decl(metadata) + +!0 = distinct !{!0} +!1 = distinct !{!1, !0} +!2 = !{!1} + +; CHECK: !0 = !{!1} +; CHECK: !1 = distinct !{!1, !2} +; CHECK: !2 = distinct !{!2} +; CHECK: !3 = !{!4} +; CHECK: !4 = distinct !{!4, !2, !"It1"} +; CHECK: !5 = !{!6} +; CHECK: !6 = distinct !{!6, !2, !"It2"} +; CHECK: !7 = !{!8} +; CHECK: !8 = distinct !{!8, !2, !"It3"} Index: llvm/test/Transforms/PhaseOrdering/pr39282.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/pr39282.ll +++ llvm/test/Transforms/PhaseOrdering/pr39282.ll @@ -19,20 +19,24 @@ define void @pr39282(i32* %addr1, i32* %addr2) { ; CHECK-LABEL: @pr39282( ; CHECK-NEXT: start: -; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl([[META0:metadata !.*]]) -; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl([[META3:metadata !.*]]) +; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl +; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl ; CHECK-NEXT: [[X_I:%.*]] = load i32, i32* [[ADDR1:%.*]], align 4, !alias.scope !3, !noalias !0 +; CHECK-NEXT: store i32 [[X_I]], i32* [[ADDR2:%.*]], align 4, !alias.scope !0, !noalias !3 ; CHECK-NEXT: [[ADDR1I_1:%.*]] = getelementptr inbounds i32, i32* [[ADDR1]], i64 1 -; CHECK-NEXT: [[ADDR2I_1:%.*]] = getelementptr inbounds i32, i32* [[ADDR2:%.*]], i64 1 -; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl([[META0]]) -; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl([[META3]]) -; CHECK-NEXT: [[X_I_1:%.*]] = load i32, i32* [[ADDR1I_1]], align 4, !alias.scope !3, !noalias !0 -; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl([[META0]]) -; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl([[META3]]) -; CHECK-NEXT: store i32 [[X_I]], i32* [[ADDR2]], align 4, !alias.scope !0, !noalias !3 -; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl([[META0]]) -; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl([[META3]]) -; CHECK-NEXT: store i32 [[X_I_1]], i32* [[ADDR2I_1]], align 4, !alias.scope !0, !noalias !3 +; CHECK-NEXT: [[ADDR2I_1:%.*]] = getelementptr inbounds i32, i32* [[ADDR2]], i64 1 +; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl +; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl +; CHECK-NEXT: [[X_I_1:%.*]] = load i32, i32* [[ADDR1I_1]], align 4, !alias.scope !7, !noalias !5 +; CHECK-NEXT: store i32 [[X_I_1]], i32* [[ADDR2I_1]], align 4, !alias.scope !5, !noalias !7 +; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl +; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl +; CHECK-NEXT: [[X_I_2:%.*]] = load i32, i32* [[ADDR1]], align 4, !alias.scope !11, !noalias !9 +; CHECK-NEXT: store i32 [[X_I_2]], i32* [[ADDR2]], align 4, !alias.scope !9, !noalias !11 +; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl +; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl +; CHECK-NEXT: [[X_I_3:%.*]] = load i32, i32* [[ADDR1I_1]], align 4, !alias.scope !15, !noalias !13 +; CHECK-NEXT: store i32 [[X_I_3]], i32* [[ADDR2I_1]], align 4, !alias.scope !13, !noalias !15 ; CHECK-NEXT: ret void ; start: