Index: clang/test/OpenMP/target_codegen.cpp =================================================================== --- clang/test/OpenMP/target_codegen.cpp +++ clang/test/OpenMP/target_codegen.cpp @@ -397,7 +397,7 @@ // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_nowait_mapper(%struct.ident_t* @{{.+}}, i64 [[DEVICE:%.+]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SIZE:%.+]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i8** null, i8** null) // CHECK-DAG: [[DEVICE]] = sext i32 [[DEV:%.+]] to i64 // CHECK-DAG: [[DEV]] = load i32, i32* [[DEVADDR:%.+]], align -// CHECK-DAG: [[DEVADDR]] = getelementptr inbounds [[ANON_T]], [[ANON_T]]* %12, i32 0, i32 2 +// CHECK-DAG: [[DEVADDR]] = getelementptr inbounds [[ANON_T]], [[ANON_T]]* {{%.+}}, i32 0, i32 2 // CHECK-DAG: [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BPRADDR:%.+]], i[[SZ]] 0, i[[SZ]] 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[PRADDR:%.+]], i[[SZ]] 0, i[[SZ]] 0 // CHECK-DAG: [[SIZE]] = getelementptr inbounds [2 x i64], [2 x i64]* [[SIZEADDR:%.+]], i[[SZ]] 0, i[[SZ]] 0 Index: llvm/include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -537,6 +537,7 @@ case Intrinsic::is_constant: case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: + case Intrinsic::noalias_decl: case Intrinsic::objectsize: case Intrinsic::ptr_annotation: case Intrinsic::var_annotation: Index: llvm/include/llvm/IR/IRBuilder.h =================================================================== --- llvm/include/llvm/IR/IRBuilder.h +++ llvm/include/llvm/IR/IRBuilder.h @@ -809,6 +809,21 @@ CallInst *CreateAssumption(Value *Cond, ArrayRef OpBundles = llvm::None); + /// Create a llvm.noalias.decl intrinsic call. + Instruction *CreateNoAliasDeclaration(Value *AllocaPtr, Value *ObjId, + Value *Scope); + Instruction *CreateNoAliasDeclaration(Value *AllocaPtr, uint64_t ObjId, + Value *Scope) { + return CreateNoAliasDeclaration( + AllocaPtr, + ConstantInt::get(IntegerType::getInt64Ty(getContext()), ObjId), Scope); + } + Instruction *CreateNoAliasDeclaration(Value *AllocaPtr, MDNode *ScopeTag) { + uint64_t Zero = 0; + return CreateNoAliasDeclaration(AllocaPtr, Zero, + MetadataAsValue::get(Context, ScopeTag)); + } + /// Create a call to the experimental.gc.statepoint intrinsic to /// start a new statepoint sequence. CallInst *CreateGCStatepointCall(uint64_t ID, uint32_t NumPatchBytes, Index: llvm/include/llvm/IR/Intrinsics.h =================================================================== --- llvm/include/llvm/IR/Intrinsics.h +++ llvm/include/llvm/IR/Intrinsics.h @@ -34,6 +34,11 @@ /// function known by LLVM. The enum values are returned by /// Function::getIntrinsicID(). namespace Intrinsic { + // Abstraction for the arguments of the noalias intrinsics + static const int NoAliasDeclAllocaArg = 0; + static const int NoAliasDeclObjIdArg = 1; + static const int NoAliasDeclScopeArg = 2; + // Intrinsic ID type. This is an opaque typedef to facilitate splitting up // the enum into target-specific enums. typedef unsigned ID; Index: llvm/include/llvm/IR/Intrinsics.td =================================================================== --- llvm/include/llvm/IR/Intrinsics.td +++ llvm/include/llvm/IR/Intrinsics.td @@ -536,6 +536,24 @@ def int_assume : DefaultAttrsIntrinsic<[], [llvm_i1_ty], [IntrWillReturn, NoUndef>]>; +// 'llvm.noalias.decl' intrinsic: Inserted at the location of a restrict +// pointer declaration. Makes it possible to identify that a restrict scope is +// only valid inside the body of a loop. +// +// Purpose of the different arguments: +// - arg0: p.alloca: associates the restrict pointer declaration to an alloca. +// (can be 'null' if the alloca is optimized away). The alloca can be +// associated to multiple restrict pointers. +// - arg1: p.objId: identifies different objects, associated to the same +// variable declaration. Is needed to track splitting of alloca's in SROA. +// - arg2: p.scope: metadata representing the variable declaration. +// - returns: a dummy i8 pointer that is used to track dependencies, so that cse +// is not migrating llvm.provenance.noalias over declarations +def int_noalias_decl + : DefaultAttrsIntrinsic<[llvm_anyptr_ty], + [llvm_anyptr_ty, llvm_anyint_ty, llvm_metadata_ty], + [IntrArgMemOnly]>; // ArgMemOnly: blocks LICM and some more + // Stack Protector Intrinsic - The stackprotector intrinsic writes the stack // guard to the correct place on the stack frame. def int_stackprotector : DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_ptrptr_ty], []>; Index: llvm/include/llvm/IR/Metadata.h =================================================================== --- llvm/include/llvm/IR/Metadata.h +++ llvm/include/llvm/IR/Metadata.h @@ -1209,6 +1209,12 @@ return nullptr; return dyn_cast_or_null(Node->getOperand(1)); } + StringRef getName() const { + if (Node->getNumOperands() > 2) + if (MDString *N = dyn_cast_or_null(Node->getOperand(2))) + return N->getString(); + return StringRef(); + } }; /// Typed iterator through MDNode operands. Index: llvm/include/llvm/Transforms/Utils/Cloning.h =================================================================== --- llvm/include/llvm/Transforms/Utils/Cloning.h +++ llvm/include/llvm/Transforms/Utils/Cloning.h @@ -17,6 +17,7 @@ #ifndef LLVM_TRANSFORMS_UTILS_CLONING_H #define LLVM_TRANSFORMS_UTILS_CLONING_H +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/AssumptionCache.h" @@ -268,6 +269,61 @@ Function *Callee, int64_t entryDelta, const ValueMap *VMap = nullptr); +/// Utility for cloning !noalias and !alias.scope metadata. When a code region +/// using scoped alias metadata is cloned, the aliasing relationships may not +/// hold between the two clones, in which case it is necessary to clone the +/// metadata using this utility. This comes up with inlining and unrolling. +class ScopedAliasMetadataCloner { + using MetadataMap = DenseMap; + SetVector MD; + MetadataMap Map; + void addRecursiveMetadataUses(); + +public: + ScopedAliasMetadataCloner(ArrayRef Blocks); + ScopedAliasMetadataCloner(const Function *F); + + /// Create a new clone of the scoped alias metadata, which will be used by + /// subsequent remap() calls. + void clone(); + + /// Remap instructions in the given VMap from the original to the cloned + /// metadata. + void remap(ValueToValueMapTy &VMap); +}; + +/// Find back the 'llvm.noalias.decl' intrinsics in the specified basic blocks +/// and extract their scope. This are candidates for duplication when cloning. +void identifyNoAliasScopesToClone( + ArrayRef BBs, + SmallVectorImpl &out_NoAliasDeclScopes); + +/// Duplicate the specified list of noalias decl scopes. +/// The 'Ext' string is added as an extension to the name. +/// Afterwards, the out_ClonedMVScopes contains a mapping of the original MV +/// onto the cloned version. +/// The out_ClonedScopes contains the mapping of the original scope MDNode +/// onto the cloned scope. +void cloneNoAliasScopes( + ArrayRef NoAliasDeclScopes, + DenseMap &out_ClonedScopes, + DenseMap &out_ClonedMVScopes, + StringRef Ext, LLVMContext &Context); + +/// Adapt the metadata for the specified instruction according to the +/// provided mapping. This is normally used after cloning an instruction, when +/// some noalias scopes needed to be cloned. +void adaptNoAliasScopes( + Instruction *I, DenseMap &ClonedScopes, + DenseMap &ClonedMVScopes, + LLVMContext &Context); + +/// Clone the specified noalias decl scopes. Then adapt all instructions in the +/// NewBlocks basicblocks to the cloned versions. +/// 'Ext' will be added to the duplicate scope names +void cloneAndAdaptNoAliasScopes(ArrayRef NoAliasDeclScopes, + ArrayRef NewBlocks, + LLVMContext &Context, StringRef Ext); } // end namespace llvm #endif // LLVM_TRANSFORMS_UTILS_CLONING_H Index: llvm/lib/Analysis/MemorySSA.cpp =================================================================== --- llvm/lib/Analysis/MemorySSA.cpp +++ llvm/lib/Analysis/MemorySSA.cpp @@ -285,6 +285,7 @@ case Intrinsic::invariant_start: case Intrinsic::invariant_end: case Intrinsic::assume: + case Intrinsic::noalias_decl: return {false, NoAlias}; case Intrinsic::dbg_addr: case Intrinsic::dbg_declare: @@ -1767,9 +1768,15 @@ // dependencies here. // FIXME: Replace this special casing with a more accurate modelling of // assume's control dependency. - if (IntrinsicInst *II = dyn_cast(I)) - if (II->getIntrinsicID() == Intrinsic::assume) + if (IntrinsicInst *II = dyn_cast(I)) { + switch (II->getIntrinsicID()) { + default: + break; + case Intrinsic::assume: + case Intrinsic::noalias_decl: return nullptr; + } + } // Using a nonstandard AA pipelines might leave us with unexpected modref // results for I, so add a check to not model instructions that may not read Index: llvm/lib/Analysis/ValueTracking.cpp =================================================================== --- llvm/lib/Analysis/ValueTracking.cpp +++ llvm/lib/Analysis/ValueTracking.cpp @@ -536,6 +536,7 @@ case Intrinsic::invariant_end: case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: + case Intrinsic::noalias_decl: case Intrinsic::objectsize: case Intrinsic::ptr_annotation: case Intrinsic::var_annotation: Index: llvm/lib/CodeGen/IntrinsicLowering.cpp =================================================================== --- llvm/lib/CodeGen/IntrinsicLowering.cpp +++ llvm/lib/CodeGen/IntrinsicLowering.cpp @@ -332,6 +332,11 @@ case Intrinsic::var_annotation: break; // Strip out these intrinsics + case Intrinsic::noalias_decl: + // Just forward the value + CI->replaceAllUsesWith(CI->getOperand(0)); + break; + case Intrinsic::memcpy: { Type *IntPtr = DL.getIntPtrType(Context); Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6417,6 +6417,12 @@ // Drop the intrinsic, but forward the value setValue(&I, getValue(I.getOperand(0))); return; + case Intrinsic::noalias_decl: + // Generate a dummy value - it will never be used and should get optimized + // away + setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout()))); + return; + case Intrinsic::assume: case Intrinsic::var_annotation: case Intrinsic::sideeffect: Index: llvm/lib/IR/IRBuilder.cpp =================================================================== --- llvm/lib/IR/IRBuilder.cpp +++ llvm/lib/IR/IRBuilder.cpp @@ -453,6 +453,21 @@ return createCallHelper(FnAssume, Ops, this, "", nullptr, OpBundles); } +Instruction *IRBuilderBase::CreateNoAliasDeclaration(Value *AllocaPtr, + Value *ObjId, + Value *Scope) { + assert(AllocaPtr); + + SmallVector Types = {Type::getInt8PtrTy(getContext()), + AllocaPtr->getType(), ObjId->getType()}; + SmallVector Ops = {AllocaPtr, ObjId, Scope}; + + Module *M = BB->getModule(); + auto *FnIntrinsic = + Intrinsic::getDeclaration(M, Intrinsic::noalias_decl, Types); + return createCallHelper(FnIntrinsic, Ops, this); +} + /// Create a call to a Masked Load intrinsic. /// \p Ptr - base pointer for the load /// \p Alignment - alignment of the source location Index: llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1652,6 +1652,8 @@ } break; } + case Intrinsic::noalias_decl: + break; default: { // Handle target specific intrinsics Optional V = targetInstCombineIntrinsic(*II); Index: llvm/lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2551,6 +2551,7 @@ case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: case Intrinsic::objectsize: + case Intrinsic::noalias_decl: Users.emplace_back(I); continue; } Index: llvm/lib/Transforms/Scalar/EarlyCSE.cpp =================================================================== --- llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -1226,6 +1226,13 @@ continue; } + // Likewise, noalias intrinsics don't actually write. + if (match(&Inst, m_Intrinsic())) { + LLVM_DEBUG(dbgs() << "EarlyCSE skipping noalias intrinsic: " << Inst + << '\n'); + continue; + } + // Skip sideeffect intrinsics, for the same reason as assume intrinsics. if (match(&Inst, m_Intrinsic())) { LLVM_DEBUG(dbgs() << "EarlyCSE skipping sideeffect: " << Inst << '\n'); Index: llvm/lib/Transforms/Utils/CloneFunction.cpp =================================================================== --- llvm/lib/Transforms/Utils/CloneFunction.cpp +++ llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -27,6 +27,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -36,6 +37,8 @@ #include using namespace llvm; +#define DEBUG_TYPE "clone-function" + /// See comments in Cloning.h. BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix, Function *F, @@ -883,3 +886,208 @@ return NewBB; } + +ScopedAliasMetadataCloner::ScopedAliasMetadataCloner( + ArrayRef Blocks) { + for (BasicBlock *BB : Blocks) { + for (const Instruction &I : *BB) { + if (const MDNode *M = I.getMetadata(LLVMContext::MD_alias_scope)) + MD.insert(M); + if (const MDNode *M = I.getMetadata(LLVMContext::MD_noalias)) + MD.insert(M); + } + } + addRecursiveMetadataUses(); +} + +ScopedAliasMetadataCloner::ScopedAliasMetadataCloner(const Function *F) { + for (const BasicBlock &BB : *F) { + for (const Instruction &I : BB) { + if (const MDNode *M = I.getMetadata(LLVMContext::MD_alias_scope)) + MD.insert(M); + if (const MDNode *M = I.getMetadata(LLVMContext::MD_noalias)) + MD.insert(M); + + // We also need to clone the metadata in noalias intrinsics. + if (const auto *II = dyn_cast(&I)) + if (II->getIntrinsicID() == Intrinsic::noalias_decl) + if (const auto *M = dyn_cast( + cast( + II->getOperand(Intrinsic::NoAliasDeclScopeArg)) + ->getMetadata())) + MD.insert(M); + } + } + addRecursiveMetadataUses(); +} + +void ScopedAliasMetadataCloner::addRecursiveMetadataUses() { + SmallVector Queue(MD.begin(), MD.end()); + while (!Queue.empty()) { + const MDNode *M = cast(Queue.pop_back_val()); + for (const Metadata *Op : M->operands()) + if (const MDNode *OpMD = dyn_cast(Op)) + if (MD.insert(OpMD)) + Queue.push_back(OpMD); + } +} + +void ScopedAliasMetadataCloner::clone() { + // Discard a previous clone that may exist. + Map.clear(); + + SmallVector DummyNodes; + for (const MDNode *I : MD) { + DummyNodes.push_back(MDTuple::getTemporary(I->getContext(), None)); + Map[I].reset(DummyNodes.back().get()); + } + + // Create new metadata nodes to replace the dummy nodes, replacing old + // metadata references with either a dummy node or an already-created new + // node. + SmallVector NewOps; + for (const MDNode *I : MD) { + for (const Metadata *Op : I->operands()) { + if (const MDNode *M = dyn_cast(Op)) + NewOps.push_back(Map[M]); + else + NewOps.push_back(const_cast(Op)); + } + + MDNode *NewM = MDNode::get(I->getContext(), NewOps); + MDTuple *TempM = cast(Map[I]); + assert(TempM->isTemporary() && "Expected temporary node"); + + TempM->replaceAllUsesWith(NewM); + NewOps.clear(); + } +} + +void ScopedAliasMetadataCloner::remap(ValueToValueMapTy &VMap) { + if (Map.empty()) + return; // Nothing to do. + + for (auto Entry : VMap) { + // Check that key is an instruction, to skip the Argument mapping, which + // points to an instruction in the original function, not the inlined one. + if (!Entry->second || !isa(Entry->first)) + continue; + + Instruction *I = dyn_cast(Entry->second); + if (!I) + continue; + + if (MDNode *M = I->getMetadata(LLVMContext::MD_alias_scope)) + I->setMetadata(LLVMContext::MD_alias_scope, Map[M]); + + if (MDNode *M = I->getMetadata(LLVMContext::MD_noalias)) + I->setMetadata(LLVMContext::MD_noalias, Map[M]); + } +} + +void llvm::cloneNoAliasScopes( + ArrayRef NoAliasDeclScopes, + DenseMap &out_ClonedScopes, + DenseMap &out_ClonedMVScopes, + StringRef Ext, LLVMContext &Context) { + MDBuilder MDB(Context); + + for (auto *MV : NoAliasDeclScopes) { + SmallVector ScopeList; + for (auto &MDOperand : cast(MV->getMetadata())->operands()) { + if (MDNode *MD = dyn_cast(MDOperand)) { + llvm::AliasScopeNode SNANode(MD); + + std::string Name; + auto ScopeName = SNANode.getName(); + if (!ScopeName.empty()) { + Name = (Twine(ScopeName) + ":" + Ext).str(); + } else { + Name = std::string(Ext); + } + + MDNode *NewScope = MDB.createAnonymousAliasScope( + const_cast(SNANode.getDomain()), Name); + out_ClonedScopes.insert(std::make_pair(MD, NewScope)); + ScopeList.push_back(NewScope); + } + } + MDNode *NewScopeList = MDNode::get(Context, ScopeList); + out_ClonedMVScopes.insert( + std::make_pair(MV, MetadataAsValue::get(Context, NewScopeList))); + } +} + +void llvm::adaptNoAliasScopes( + Instruction *I, DenseMap &ClonedScopes, + DenseMap &ClonedMVScopes, + LLVMContext &Context) { + // MetadataAsValue will always be replaced ! + for (int opI = 0, opIEnd = I->getNumOperands(); opI < opIEnd; ++opI) { + if (MetadataAsValue *MV = dyn_cast(I->getOperand(opI))) { + auto MvIt = ClonedMVScopes.find(MV); + if (MvIt != ClonedMVScopes.end()) { + I->setOperand(opI, MvIt->second); + } + } + } + + auto replaceWhenNeeded = [&](unsigned MD_ID) { + if (const MDNode *CSNoAlias = I->getMetadata(MD_ID)) { + bool needsReplacement = false; + SmallVector NewScopeList; + for (auto &MDOp : CSNoAlias->operands()) { + if (MDNode *MD = dyn_cast_or_null(MDOp)) { + auto MdIt = ClonedScopes.find(MD); + if (MdIt != ClonedScopes.end()) { + NewScopeList.push_back(MdIt->second); + needsReplacement = true; + continue; + } + NewScopeList.push_back(MD); + } + } + if (needsReplacement) { + I->setMetadata(MD_ID, MDNode::get(Context, NewScopeList)); + } + } + }; + replaceWhenNeeded(LLVMContext::MD_noalias); + replaceWhenNeeded(LLVMContext::MD_alias_scope); +} + +void llvm::cloneAndAdaptNoAliasScopes( + ArrayRef NoAliasDeclScopes, + ArrayRef NewBlocks, LLVMContext &Context, StringRef Ext) { + if (NoAliasDeclScopes.empty()) + return; + + DenseMap ClonedScopes; + DenseMap ClonedMVScopes; + LLVM_DEBUG(llvm::dbgs() << "cloneAndAdaptNoAliasScopes: cloning " + << NoAliasDeclScopes.size() << " node(s)\n"); + + cloneNoAliasScopes(NoAliasDeclScopes, ClonedScopes, ClonedMVScopes, Ext, + Context); + // Identify instructions using metadata that needs adaptation + for (BasicBlock *NewBlock : NewBlocks) { + for (Instruction &I : *NewBlock) { + adaptNoAliasScopes(&I, ClonedScopes, ClonedMVScopes, Context); + } + } +} + +void llvm::identifyNoAliasScopesToClone( + ArrayRef BBs, + SmallVectorImpl &out_NoAliasDeclScopes) { + for (auto BB : BBs) { + for (Instruction &I : *BB) { + if (auto II = dyn_cast(&I)) { + if (II->getIntrinsicID() == Intrinsic::noalias_decl) { + out_NoAliasDeclScopes.push_back(cast( + II->getOperand(Intrinsic::NoAliasDeclScopeArg))); + } + } + } + } +} Index: llvm/lib/Transforms/Utils/InlineFunction.cpp =================================================================== --- llvm/lib/Transforms/Utils/InlineFunction.cpp +++ llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -821,94 +821,6 @@ } } -/// When inlining a function that contains noalias scope metadata, -/// this metadata needs to be cloned so that the inlined blocks -/// have different "unique scopes" at every call site. Were this not done, then -/// aliasing scopes from a function inlined into a caller multiple times could -/// not be differentiated (and this would lead to miscompiles because the -/// non-aliasing property communicated by the metadata could have -/// call-site-specific control dependencies). -static void CloneAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap) { - const Function *CalledFunc = CB.getCalledFunction(); - SetVector MD; - - // Note: We could only clone the metadata if it is already used in the - // caller. I'm omitting that check here because it might confuse - // inter-procedural alias analysis passes. We can revisit this if it becomes - // an efficiency or overhead problem. - - for (const BasicBlock &I : *CalledFunc) - for (const Instruction &J : I) { - if (const MDNode *M = J.getMetadata(LLVMContext::MD_alias_scope)) - MD.insert(M); - if (const MDNode *M = J.getMetadata(LLVMContext::MD_noalias)) - MD.insert(M); - } - - if (MD.empty()) - return; - - // Walk the existing metadata, adding the complete (perhaps cyclic) chain to - // the set. - SmallVector Queue(MD.begin(), MD.end()); - while (!Queue.empty()) { - const MDNode *M = cast(Queue.pop_back_val()); - for (unsigned i = 0, ie = M->getNumOperands(); i != ie; ++i) - if (const MDNode *M1 = dyn_cast(M->getOperand(i))) - if (MD.insert(M1)) - Queue.push_back(M1); - } - - // Now we have a complete set of all metadata in the chains used to specify - // the noalias scopes and the lists of those scopes. - SmallVector DummyNodes; - DenseMap MDMap; - for (const MDNode *I : MD) { - DummyNodes.push_back(MDTuple::getTemporary(CalledFunc->getContext(), None)); - MDMap[I].reset(DummyNodes.back().get()); - } - - // Create new metadata nodes to replace the dummy nodes, replacing old - // metadata references with either a dummy node or an already-created new - // node. - for (const MDNode *I : MD) { - SmallVector NewOps; - for (unsigned i = 0, ie = I->getNumOperands(); i != ie; ++i) { - const Metadata *V = I->getOperand(i); - if (const MDNode *M = dyn_cast(V)) - NewOps.push_back(MDMap[M]); - else - NewOps.push_back(const_cast(V)); - } - - MDNode *NewM = MDNode::get(CalledFunc->getContext(), NewOps); - MDTuple *TempM = cast(MDMap[I]); - assert(TempM->isTemporary() && "Expected temporary node"); - - TempM->replaceAllUsesWith(NewM); - } - - // Now replace the metadata in the new inlined instructions with the - // repacements from the map. - for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); - VMI != VMIE; ++VMI) { - // Check that key is an instruction, to skip the Argument mapping, which - // points to an instruction in the original function, not the inlined one. - if (!VMI->second || !isa(VMI->first)) - continue; - - Instruction *NI = dyn_cast(VMI->second); - if (!NI) - continue; - - if (MDNode *M = NI->getMetadata(LLVMContext::MD_alias_scope)) - NI->setMetadata(LLVMContext::MD_alias_scope, MDMap[M]); - - if (MDNode *M = NI->getMetadata(LLVMContext::MD_noalias)) - NI->setMetadata(LLVMContext::MD_noalias, MDMap[M]); - } -} - /// If the inlined function has noalias arguments, /// then add new alias scopes for each noalias argument, tag the mapped noalias /// parameters with noalias metadata specifying the new scope, and tag all @@ -962,6 +874,19 @@ // property of the callee, but also all control dependencies in the caller. MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); NewScopes.insert(std::make_pair(A, NewScope)); + + // Introduce a llvm.noalias.decl for the noalias argument. + MDNode *AScopeList = MDNode::get(CalledFunc->getContext(), NewScope); + + // The alloca was optimized away -> use a nullptr + Value *MappedA = VMap[A]; + auto *IdentifyPAlloca = + ConstantPointerNull::get(MappedA->getType()->getPointerTo()); + auto *NoAliasDecl = + IRBuilder<>(&CB).CreateNoAliasDeclaration(IdentifyPAlloca, AScopeList); + // Ignore the result for now. The result will be used when the llvm.noalias + // intrinsic is introduced. + (void)NoAliasDecl; } // Iterate over all new instructions in the map; for all memory-access @@ -1760,6 +1685,14 @@ // Keep a list of pair (dst, src) to emit byval initializations. SmallVector, 4> ByValInit; + // When inlining a function that contains noalias scope metadata, + // this metadata needs to be cloned so that the inlined blocks + // have different "unique scopes" at every call site. + // Track the metadata that must be cloned. Do this before other changes to + // the function, so that we do not get in trouble when inlining caller == + // callee. + ScopedAliasMetadataCloner SAMetadataCloner(CB.getCalledFunction()); + auto &DL = Caller->getParent()->getDataLayout(); // Calculate the vector of arguments to pass into the function cloner, which @@ -1880,8 +1813,15 @@ fixupLineNumbers(Caller, FirstNewBlock, &CB, CalledFunc->getSubprogram() != nullptr); - // Clone existing noalias metadata if necessary. - CloneAliasScopeMetadata(CB, VMap); + // When inlining a function that contains noalias scope metadata, + // this metadata needs to be cloned so that the inlined blocks + // have different "unique scopes" at every call site. Were this not done, + // then aliasing scopes from a function inlined into a caller multiple times + // could not be differentiated (and this would lead to miscompiles because + // the non-aliasing property communicated by the metadata could have + // call-site-specific control dependencies). + SAMetadataCloner.clone(); + SAMetadataCloner.remap(VMap); // Add noalias metadata if necessary. AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR); Index: llvm/lib/Transforms/Utils/LoopUnroll.cpp =================================================================== --- llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -585,6 +585,12 @@ << DIL->getFilename() << " Line: " << DIL->getLine()); } + // Phase1: Identify what noalias metadata is inside the loop: if it is inside + // the loop, the associated metadata must be cloned for each iteration. + SmallVector LoopLocalNoAliasDeclScopes; + llvm::identifyNoAliasScopesToClone(L->getBlocks(), + LoopLocalNoAliasDeclScopes); + for (unsigned It = 1; It != ULO.Count; ++It) { SmallVector NewBlocks; SmallDenseMap NewLoops; @@ -678,6 +684,15 @@ AC->registerAssumption(II); } } + + { + // Phase2: identify what other metadata depends on the cloned version + // Phase3: after cloning, replace the metadata with the corrected version + // for both memory instructions and noalias intrinsics + std::string ext = (Twine("It") + Twine(It)).str(); + cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks, + Header->getContext(), ext); + } } // Loop over the PHI nodes in the original block, setting incoming values. Index: llvm/test/Transforms/Coroutines/ArgAddr.ll =================================================================== --- llvm/test/Transforms/Coroutines/ArgAddr.ll +++ llvm/test/Transforms/Coroutines/ArgAddr.ll @@ -54,10 +54,12 @@ ; CHECK-NEXT: bitcast i8* %index.addr12.i to i1* ; CHECK-NEXT: store i1 false ; CHECK-NEXT: store i32 3 +; CHECK-NEXT: call i8* @llvm.noalias.decl ; CHECK-NEXT: store i32 3 ; CHECK-NEXT: call void @print(i32 3) ; CHECK-NEXT: store i1 false ; CHECK-NEXT: store i32 2 +; CHECK-NEXT: call i8* @llvm.noalias.decl ; CHECK-NEXT: store i32 2 ; CHECK-NEXT: call void @print(i32 2) ; CHECK: ret i32 0 Index: llvm/test/Transforms/Coroutines/coro-retcon-resume-values.ll =================================================================== --- llvm/test/Transforms/Coroutines/coro-retcon-resume-values.ll +++ llvm/test/Transforms/Coroutines/coro-retcon-resume-values.ll @@ -67,7 +67,10 @@ ; CHECK-NEXT: entry: ; CHECK: [[BUFFER:%.*]] = alloca [8 x i8], align 4 ; CHECK: [[SLOT:%.*]] = bitcast [8 x i8]* [[BUFFER]] to i32* +; CHECK-NEXT: call i8* @llvm.noalias.decl +; CHECK-NEXT: call i8* @llvm.noalias.decl ; CHECK-NEXT: store i32 7, i32* [[SLOT]], align 4 +; CHECK-NEXT: call i8* @llvm.noalias.decl ; CHECK-NEXT: call void @print(i32 7) ; CHECK-NEXT: ret i32 0 Index: llvm/test/Transforms/Coroutines/coro-retcon.ll =================================================================== --- llvm/test/Transforms/Coroutines/coro-retcon.ll +++ llvm/test/Transforms/Coroutines/coro-retcon.ll @@ -69,14 +69,17 @@ ; CHECK: [[SLOT:%.*]] = bitcast [8 x i8]* [[BUFFER]] to i32* ; CHECK-NEXT: store i32 4, i32* [[SLOT]], align 4 ; CHECK-NEXT: call void @print(i32 4) +; CHECK-NEXT: call i8* @llvm.noalias.decl ; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[SLOT]], align 4 ; CHECK-NEXT: [[INC:%.*]] = add i32 [[LOAD]], 1 ; CHECK-NEXT: store i32 [[INC]], i32* [[SLOT]], align 4 ; CHECK-NEXT: call void @print(i32 [[INC]]) +; CHECK-NEXT: call i8* @llvm.noalias.decl ; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[SLOT]], align 4 ; CHECK-NEXT: [[INC:%.*]] = add i32 [[LOAD]], 1 ; NPM-NEXT: store i32 [[INC]], i32* [[SLOT]], align 4 ; CHECK-NEXT: call void @print(i32 [[INC]]) +; CHECK-NEXT: call i8* @llvm.noalias.decl ; CHECK-NEXT: ret i32 0 define hidden { i8*, i8* } @g(i8* %buffer, i16* %ptr) { Index: llvm/test/Transforms/Coroutines/ex2.ll =================================================================== --- llvm/test/Transforms/Coroutines/ex2.ll +++ llvm/test/Transforms/Coroutines/ex2.ll @@ -49,7 +49,9 @@ ret i32 0 ; CHECK-NOT: call i8* @CustomAlloc ; CHECK: call void @print(i32 4) +; CHECK-NEXT: call i8* @llvm.noalias.decl ; CHECK-NEXT: call void @print(i32 5) +; CHECK-NEXT: call i8* @llvm.noalias.decl ; CHECK-NEXT: call void @print(i32 6) ; CHECK-NEXT: ret i32 0 } Index: llvm/test/Transforms/Coroutines/ex3.ll =================================================================== --- llvm/test/Transforms/Coroutines/ex3.ll +++ llvm/test/Transforms/Coroutines/ex3.ll @@ -53,7 +53,9 @@ ret i32 0 ; CHECK-NOT: i8* @malloc ; CHECK: call void @print(i32 4) +; CHECK-NEXT: call i8* @llvm.noalias.decl ; CHECK-NEXT: call void @print(i32 -5) +; CHECK-NEXT: call i8* @llvm.noalias.decl ; CHECK-NEXT: call void @print(i32 5) ; CHECK: ret i32 0 } Index: llvm/test/Transforms/Coroutines/ex4.ll =================================================================== --- llvm/test/Transforms/Coroutines/ex4.ll +++ llvm/test/Transforms/Coroutines/ex4.ll @@ -50,7 +50,9 @@ call void @llvm.coro.destroy(i8* %hdl) ret i32 0 ; CHECK: call void @print(i32 4) +; CHECK-NEXT: call i8* @llvm.noalias.decl ; CHECK-NEXT: call void @print(i32 5) +; CHECK-NEXT: call i8* @llvm.noalias.decl ; CHECK-NEXT: call void @print(i32 6) ; CHECK: ret i32 0 } Index: llvm/test/Transforms/Inline/launder.invariant.group.ll =================================================================== --- llvm/test/Transforms/Inline/launder.invariant.group.ll +++ llvm/test/Transforms/Inline/launder.invariant.group.ll @@ -23,7 +23,7 @@ ; CHECK-LABEL: define i32 @foo(%struct.A* noalias define i32 @foo(%struct.A* noalias) { ; CHECK-NOT: call i32 @bar( - ; CHECK-NOT: noalias + ; CHECK-NOT: !noalias %2 = tail call i32 @bar(%struct.A* %0) ret i32 %2 } Index: llvm/test/Transforms/Inline/noalias-calls-always.ll =================================================================== --- llvm/test/Transforms/Inline/noalias-calls-always.ll +++ llvm/test/Transforms/Inline/noalias-calls-always.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -aa-pipeline=basic-aa -passes=always-inline -enable-noalias-to-md-conversion -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -6,6 +7,16 @@ declare void @hey() #0 define void @hello(i8* noalias nocapture %a, i8* noalias nocapture readonly %c, i8* nocapture %b) #1 { +; CHECK-LABEL: @hello( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[L:%.*]] = alloca i8, i32 512, align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[A:%.*]], i8* align 16 [[B:%.*]], i64 16, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[B]], i8* align 16 [[C:%.*]], i64 16, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[A]], i8* align 16 [[C]], i64 16, i1 false) +; CHECK-NEXT: call void @hey() +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[L]], i8* align 16 [[C]], i64 16, i1 false) +; CHECK-NEXT: ret void +; entry: %l = alloca i8, i32 512, align 1 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %a, i8* align 16 %b, i64 16, i1 0) @@ -17,12 +28,36 @@ } define void @foo(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #2 { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[L_I:%.*]] = alloca i8, i32 512, align 1 +; CHECK-NEXT: [[TMP0:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0i8.i64(i8** null, i64 0, [[META0:metadata !.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0i8.i64(i8** null, i64 0, [[META3:metadata !.*]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 512, i8* [[L_I]]) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[A:%.*]], i8* align 16 [[B:%.*]], i64 16, i1 false) [[ATTR4:#.*]], !noalias !3 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[B]], i8* align 16 [[C:%.*]], i64 16, i1 false) [[ATTR4]], !noalias !0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[A]], i8* align 16 [[C]], i64 16, i1 false) [[ATTR4]], !alias.scope !5 +; CHECK-NEXT: call void @hey() [[ATTR4]], !noalias !5 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[L_I]], i8* align 16 [[C]], i64 16, i1 false) [[ATTR4]], !noalias !0 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 512, i8* [[L_I]]) +; CHECK-NEXT: ret void +; entry: tail call void @hello(i8* %a, i8* %c, i8* %b) ret void } define void @hello_cs(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #1 { +; CHECK-LABEL: @hello_cs( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[L:%.*]] = alloca i8, i32 512, align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[A:%.*]], i8* align 16 [[B:%.*]], i64 16, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[B]], i8* align 16 [[C:%.*]], i64 16, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[A]], i8* align 16 [[C]], i64 16, i1 false) +; CHECK-NEXT: call void @hey() +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[L]], i8* align 16 [[C]], i64 16, i1 false) +; CHECK-NEXT: ret void +; entry: %l = alloca i8, i32 512, align 1 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %a, i8* align 16 %b, i64 16, i1 0) @@ -34,46 +69,39 @@ } define void @foo_cs(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #2 { +; CHECK-LABEL: @foo_cs( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[L_I:%.*]] = alloca i8, i32 512, align 1 +; CHECK-NEXT: [[TMP0:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0i8.i64(i8** null, i64 0, [[META6:metadata !.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0i8.i64(i8** null, i64 0, [[META9:metadata !.*]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 512, i8* [[L_I]]) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[A:%.*]], i8* align 16 [[B:%.*]], i64 16, i1 false) [[ATTR4]], !noalias !9 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[B]], i8* align 16 [[C:%.*]], i64 16, i1 false) [[ATTR4]], !noalias !6 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[A]], i8* align 16 [[C]], i64 16, i1 false) [[ATTR4]], !alias.scope !11 +; CHECK-NEXT: call void @hey() [[ATTR4]], !noalias !11 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[L_I]], i8* align 16 [[C]], i64 16, i1 false) [[ATTR4]], !noalias !6 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 512, i8* [[L_I]]) +; CHECK-NEXT: ret void +; entry: tail call void @hello_cs(i8* noalias %a, i8* noalias %c, i8* %b) ret void } -; CHECK: define void @foo(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) -; CHECK: entry: -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %a, i8* align 16 %b, i64 16, i1 false) #4, !noalias !0 -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %b, i8* align 16 %c, i64 16, i1 false) #4, !noalias !3 -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %a, i8* align 16 %c, i64 16, i1 false) #4, !alias.scope !5 -; CHECK: call void @hey() #4, !noalias !5 -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %{{.*}}, i8* align 16 %c, i64 16, i1 false) #4, !noalias !3 -; CHECK: ret void -; CHECK: } - -; CHECK: define void @foo_cs(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) -; CHECK: entry: -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %a, i8* align 16 %b, i64 16, i1 false) #4, !noalias !6 -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %b, i8* align 16 %c, i64 16, i1 false) #4, !noalias !9 -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %a, i8* align 16 %c, i64 16, i1 false) #4, !alias.scope !11 -; CHECK: call void @hey() #4, !noalias !11 -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %{{.*}}, i8* align 16 %c, i64 16, i1 false) #4, !noalias !9 -; CHECK: ret void -; CHECK: } - attributes #0 = { nounwind argmemonly willreturn } attributes #1 = { nounwind alwaysinline } attributes #2 = { nounwind uwtable } ; CHECK: !0 = !{!1} -; CHECK: !1 = distinct !{!1, !2, !"hello: %c"} +; CHECK: !1 = distinct !{!1, !2, !"hello: %a"} ; CHECK: !2 = distinct !{!2, !"hello"} ; CHECK: !3 = !{!4} -; CHECK: !4 = distinct !{!4, !2, !"hello: %a"} -; CHECK: !5 = !{!4, !1} +; CHECK: !4 = distinct !{!4, !2, !"hello: %c"} +; CHECK: !5 = !{!1, !4} ; CHECK: !6 = !{!7} -; CHECK: !7 = distinct !{!7, !8, !"hello_cs: %c"} +; CHECK: !7 = distinct !{!7, !8, !"hello_cs: %a"} ; CHECK: !8 = distinct !{!8, !"hello_cs"} ; CHECK: !9 = !{!10} -; CHECK: !10 = distinct !{!10, !8, !"hello_cs: %a"} -; CHECK: !11 = !{!10, !7} - +; CHECK: !10 = distinct !{!10, !8, !"hello_cs: %c"} +; CHECK: !11 = !{!7, !10} Index: llvm/test/Transforms/Inline/noalias-calls.ll =================================================================== --- llvm/test/Transforms/Inline/noalias-calls.ll +++ llvm/test/Transforms/Inline/noalias-calls.ll @@ -8,6 +8,17 @@ declare void @hey() #0 define void @hello(i8* noalias nocapture %a, i8* noalias nocapture readonly %c, i8* nocapture %b) #1 { +; CHECK-LABEL: define {{[^@]+}}@hello +; CHECK-SAME: (i8* noalias nocapture [[A:%.*]], i8* noalias nocapture readonly [[C:%.*]], i8* nocapture [[B:%.*]]) [[ATTR1:#.*]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[L:%.*]] = alloca i8, i32 512, align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[A]], i8* align 16 [[B]], i64 16, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[B]], i8* align 16 [[C]], i64 16, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[A]], i8* align 16 [[C]], i64 16, i1 false) +; CHECK-NEXT: call void @hey() +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[L]], i8* align 16 [[C]], i64 16, i1 false) +; CHECK-NEXT: ret void +; entry: %l = alloca i8, i32 512, align 1 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %a, i8* align 16 %b, i64 16, i1 0) @@ -19,12 +30,38 @@ } define void @foo(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #2 { +; CHECK-LABEL: define {{[^@]+}}@foo +; CHECK-SAME: (i8* nocapture [[A:%.*]], i8* nocapture readonly [[C:%.*]], i8* nocapture [[B:%.*]]) [[ATTR2:#.*]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[L_I:%.*]] = alloca i8, i32 512, align 1 +; CHECK-NEXT: [[TMP0:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0i8.i64(i8** null, i64 0, [[META0:metadata !.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0i8.i64(i8** null, i64 0, [[META3:metadata !.*]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 512, i8* [[L_I]]) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[A]], i8* align 16 [[B]], i64 16, i1 false) [[ATTR2]], !noalias !3 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[B]], i8* align 16 [[C]], i64 16, i1 false) [[ATTR2]], !noalias !0 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[A]], i8* align 16 [[C]], i64 16, i1 false) [[ATTR2]], !alias.scope !5 +; CHECK-NEXT: call void @hey() [[ATTR2]], !noalias !5 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[L_I]], i8* align 16 [[C]], i64 16, i1 false) [[ATTR2]], !noalias !0 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 512, i8* [[L_I]]) +; CHECK-NEXT: ret void +; entry: tail call void @hello(i8* %a, i8* %c, i8* %b) ret void } define void @hello_cs(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #1 { +; CHECK-LABEL: define {{[^@]+}}@hello_cs +; CHECK-SAME: (i8* nocapture [[A:%.*]], i8* nocapture readonly [[C:%.*]], i8* nocapture [[B:%.*]]) [[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[L:%.*]] = alloca i8, i32 512, align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[A]], i8* align 16 [[B]], i64 16, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[B]], i8* align 16 [[C]], i64 16, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[A]], i8* align 16 [[C]], i64 16, i1 false) +; CHECK-NEXT: call void @hey() +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[L]], i8* align 16 [[C]], i64 16, i1 false) +; CHECK-NEXT: ret void +; entry: %l = alloca i8, i32 512, align 1 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %a, i8* align 16 %b, i64 16, i1 0) @@ -36,46 +73,41 @@ } define void @foo_cs(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #2 { +; CHECK-LABEL: define {{[^@]+}}@foo_cs +; CHECK-SAME: (i8* nocapture [[A:%.*]], i8* nocapture readonly [[C:%.*]], i8* nocapture [[B:%.*]]) [[ATTR2]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[L_I:%.*]] = alloca i8, i32 512, align 1 +; CHECK-NEXT: [[TMP0:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0i8.i64(i8** null, i64 0, [[META6:metadata !.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0i8.i64(i8** null, i64 0, [[META9:metadata !.*]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 512, i8* [[L_I]]) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[A]], i8* align 16 [[B]], i64 16, i1 false) [[ATTR2]], !noalias !9 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[B]], i8* align 16 [[C]], i64 16, i1 false) [[ATTR2]], !noalias !6 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[A]], i8* align 16 [[C]], i64 16, i1 false) [[ATTR2]], !alias.scope !11 +; CHECK-NEXT: call void @hey() [[ATTR2]], !noalias !11 +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[L_I]], i8* align 16 [[C]], i64 16, i1 false) [[ATTR2]], !noalias !6 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 512, i8* [[L_I]]) +; CHECK-NEXT: ret void +; entry: tail call void @hello_cs(i8* noalias %a, i8* noalias %c, i8* %b) ret void } -; CHECK: define void @foo(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #2 { -; CHECK: entry: -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %a, i8* align 16 %b, i64 16, i1 false) #2, !noalias !0 -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %b, i8* align 16 %c, i64 16, i1 false) #2, !noalias !3 -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %a, i8* align 16 %c, i64 16, i1 false) #2, !alias.scope !5 -; CHECK: call void @hey() #2, !noalias !5 -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %{{.*}}, i8* align 16 %c, i64 16, i1 false) #2, !noalias !3 -; CHECK: ret void -; CHECK: } - -; CHECK: define void @foo_cs(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #2 { -; CHECK: entry: -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %a, i8* align 16 %b, i64 16, i1 false) #2, !noalias !6 -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %b, i8* align 16 %c, i64 16, i1 false) #2, !noalias !9 -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %a, i8* align 16 %c, i64 16, i1 false) #2, !alias.scope !11 -; CHECK: call void @hey() #2, !noalias !11 -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %{{.*}}, i8* align 16 %c, i64 16, i1 false) #2, !noalias !9 -; CHECK: ret void -; CHECK: } - attributes #0 = { argmemonly nofree nosync nounwind willreturn } attributes #1 = { argmemonly nounwind willreturn } attributes #2 = { nounwind } attributes #3 = { nounwind uwtable } ; CHECK: !0 = !{!1} -; CHECK: !1 = distinct !{!1, !2, !"hello: %c"} +; CHECK: !1 = distinct !{!1, !2, !"hello: %a"} ; CHECK: !2 = distinct !{!2, !"hello"} ; CHECK: !3 = !{!4} -; CHECK: !4 = distinct !{!4, !2, !"hello: %a"} -; CHECK: !5 = !{!4, !1} +; CHECK: !4 = distinct !{!4, !2, !"hello: %c"} +; CHECK: !5 = !{!1, !4} ; CHECK: !6 = !{!7} -; CHECK: !7 = distinct !{!7, !8, !"hello_cs: %c"} +; CHECK: !7 = distinct !{!7, !8, !"hello_cs: %a"} ; CHECK: !8 = distinct !{!8, !"hello_cs"} ; CHECK: !9 = !{!10} -; CHECK: !10 = distinct !{!10, !8, !"hello_cs: %a"} -; CHECK: !11 = !{!10, !7} +; CHECK: !10 = distinct !{!10, !8, !"hello_cs: %c"} +; CHECK: !11 = !{!7, !10} Index: llvm/test/Transforms/Inline/noalias.ll =================================================================== --- llvm/test/Transforms/Inline/noalias.ll +++ llvm/test/Transforms/Inline/noalias.ll @@ -19,14 +19,15 @@ ret void } -; CHECK: define void @foo(float* nocapture %a, float* nocapture readonly %c) #0 { +; CHECK-LABEL: define void @foo(float* nocapture %a, float* nocapture readonly %c) #0 { ; CHECK: entry: -; CHECK: %0 = load float, float* %c, align 4, !noalias !0 +; CHECK: call i8* @llvm.noalias.decl +; CHECK: [[TMP0:%.+]] = load float, float* %c, align 4, !noalias !0 ; CHECK: %arrayidx.i = getelementptr inbounds float, float* %a, i64 5 -; CHECK: store float %0, float* %arrayidx.i, align 4, !alias.scope !0 -; CHECK: %1 = load float, float* %c, align 4 +; CHECK: store float [[TMP0]], float* %arrayidx.i, align 4, !alias.scope !0 +; CHECK: [[TMP1:%.+]] = load float, float* %c, align 4 ; CHECK: %arrayidx = getelementptr inbounds float, float* %a, i64 7 -; CHECK: store float %1, float* %arrayidx, align 4 +; CHECK: store float [[TMP1]], float* %arrayidx, align 4 ; CHECK: ret void ; CHECK: } @@ -49,16 +50,18 @@ ret void } -; CHECK: define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 { +; CHECK-LABEL: define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 { ; CHECK: entry: -; CHECK: %0 = load float, float* %c, align 4, !noalias !3 +; CHECK: call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !3) +; CHECK: call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !6) +; CHECK: [[TMP0:%.+]] = load float, float* %c, align 4, !noalias !8 ; CHECK: %arrayidx.i = getelementptr inbounds float, float* %a, i64 5 -; CHECK: store float %0, float* %arrayidx.i, align 4, !alias.scope !7, !noalias !8 +; CHECK: store float [[TMP0]], float* %arrayidx.i, align 4, !alias.scope !3, !noalias !6 ; CHECK: %arrayidx1.i = getelementptr inbounds float, float* %b, i64 8 -; CHECK: store float %0, float* %arrayidx1.i, align 4, !alias.scope !8, !noalias !7 -; CHECK: %1 = load float, float* %c, align 4 +; CHECK: store float [[TMP0]], float* %arrayidx1.i, align 4, !alias.scope !6, !noalias !3 +; CHECK: [[TMP1:%.+]] = load float, float* %c, align 4 ; CHECK: %arrayidx = getelementptr inbounds float, float* %a, i64 7 -; CHECK: store float %1, float* %arrayidx, align 4 +; CHECK: store float [[TMP1]], float* %arrayidx, align 4 ; CHECK: ret void ; CHECK: } @@ -67,10 +70,9 @@ ; CHECK: !0 = !{!1} ; CHECK: !1 = distinct !{!1, !2, !"hello: %a"} ; CHECK: !2 = distinct !{!2, !"hello"} -; CHECK: !3 = !{!4, !6} +; CHECK: !3 = !{!4} ; CHECK: !4 = distinct !{!4, !5, !"hello2: %a"} ; CHECK: !5 = distinct !{!5, !"hello2"} -; CHECK: !6 = distinct !{!6, !5, !"hello2: %b"} -; CHECK: !7 = !{!4} -; CHECK: !8 = !{!6} - +; CHECK: !6 = !{!7} +; CHECK: !7 = distinct !{!7, !5, !"hello2: %b"} +; CHECK: !8 = !{!4, !7} Index: llvm/test/Transforms/Inline/noalias2.ll =================================================================== --- llvm/test/Transforms/Inline/noalias2.ll +++ llvm/test/Transforms/Inline/noalias2.ll @@ -7,7 +7,7 @@ define void @hello(float* noalias nocapture %a, float* noalias nocapture readonly %c) #0 { ; CHECK-LABEL: define {{[^@]+}}@hello -; CHECK-SAME: (float* noalias nocapture [[A:%.*]], float* noalias nocapture readonly [[C:%.*]]) #0 +; CHECK-SAME: (float* noalias nocapture [[A:%.*]], float* noalias nocapture readonly [[C:%.*]]) [[ATTR0:#.*]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[C]], align 4 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 5 @@ -29,14 +29,16 @@ define void @foo(float* noalias nocapture %a, float* noalias nocapture readonly %c) #0 { ; CHECK-LABEL: define {{[^@]+}}@foo -; CHECK-SAME: (float* noalias nocapture [[A:%.*]], float* noalias nocapture readonly [[C:%.*]]) #0 +; CHECK-SAME: (float* noalias nocapture [[A:%.*]], float* noalias nocapture readonly [[C:%.*]]) [[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[C]], align 4, !alias.scope !0, !noalias !3 +; CHECK-NEXT: [[TMP0:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, [[META0:metadata !.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, [[META3:metadata !.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[C]], align 4, !alias.scope !3, !noalias !0 ; CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds float, float* [[A]], i64 5 -; CHECK-NEXT: store float [[TMP0]], float* [[ARRAYIDX_I]], align 4, !alias.scope !3, !noalias !0 -; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[C]], align 4 +; CHECK-NEXT: store float [[TMP2]], float* [[ARRAYIDX_I]], align 4, !alias.scope !0, !noalias !3 +; CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[C]], align 4 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 7 -; CHECK-NEXT: store float [[TMP1]], float* [[ARRAYIDX]], align 4 +; CHECK-NEXT: store float [[TMP3]], float* [[ARRAYIDX]], align 4 ; CHECK-NEXT: ret void ; ; ASSUME-LABEL: @foo( @@ -59,7 +61,7 @@ define void @hello2(float* noalias nocapture %a, float* noalias nocapture %b, float* nocapture readonly %c) #0 { ; CHECK-LABEL: define {{[^@]+}}@hello2 -; CHECK-SAME: (float* noalias nocapture [[A:%.*]], float* noalias nocapture [[B:%.*]], float* nocapture readonly [[C:%.*]]) #0 +; CHECK-SAME: (float* noalias nocapture [[A:%.*]], float* noalias nocapture [[B:%.*]], float* nocapture readonly [[C:%.*]]) [[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[C]], align 4 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 6 @@ -81,22 +83,28 @@ ; foo2(), the noalias scopes are properly concatenated. define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 { ; CHECK-LABEL: define {{[^@]+}}@foo2 -; CHECK-SAME: (float* nocapture [[A:%.*]], float* nocapture [[B:%.*]], float* nocapture readonly [[C:%.*]]) #0 +; CHECK-SAME: (float* nocapture [[A:%.*]], float* nocapture [[B:%.*]], float* nocapture readonly [[C:%.*]]) [[ATTR0]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[C]], align 4, !alias.scope !5, !noalias !10 +; CHECK-NEXT: [[TMP0:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, [[META5:metadata !.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, [[META8:metadata !.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, [[META0]]) [[ATTR2:#.*]], !noalias !10 +; CHECK-NEXT: [[TMP3:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, [[META3]]) [[ATTR2]], !noalias !10 +; CHECK-NEXT: [[TMP4:%.*]] = load float, float* [[C]], align 4, !alias.scope !11, !noalias !14 ; CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds float, float* [[A]], i64 5 -; CHECK-NEXT: store float [[TMP0]], float* [[ARRAYIDX_I_I]], align 4, !alias.scope !10, !noalias !5 -; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[C]], align 4, !alias.scope !13, !noalias !14 +; CHECK-NEXT: store float [[TMP4]], float* [[ARRAYIDX_I_I]], align 4, !alias.scope !14, !noalias !11 +; CHECK-NEXT: [[TMP5:%.*]] = load float, float* [[C]], align 4, !alias.scope !8, !noalias !5 ; CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds float, float* [[A]], i64 7 -; CHECK-NEXT: store float [[TMP1]], float* [[ARRAYIDX_I]], align 4, !alias.scope !14, !noalias !13 -; CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[C]], align 4, !noalias !15 +; CHECK-NEXT: store float [[TMP5]], float* [[ARRAYIDX_I]], align 4, !alias.scope !5, !noalias !8 +; CHECK-NEXT: [[TMP6:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, [[META16:metadata !.*]]) +; CHECK-NEXT: [[TMP7:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, [[META19:metadata !.*]]) +; CHECK-NEXT: [[TMP8:%.*]] = load float, float* [[C]], align 4, !noalias !21 ; CHECK-NEXT: [[ARRAYIDX_I1:%.*]] = getelementptr inbounds float, float* [[A]], i64 6 -; CHECK-NEXT: store float [[TMP2]], float* [[ARRAYIDX_I1]], align 4, !alias.scope !19, !noalias !20 +; CHECK-NEXT: store float [[TMP8]], float* [[ARRAYIDX_I1]], align 4, !alias.scope !16, !noalias !19 ; CHECK-NEXT: [[ARRAYIDX1_I:%.*]] = getelementptr inbounds float, float* [[B]], i64 8 -; CHECK-NEXT: store float [[TMP2]], float* [[ARRAYIDX1_I]], align 4, !alias.scope !20, !noalias !19 -; CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[C]], align 4 +; CHECK-NEXT: store float [[TMP8]], float* [[ARRAYIDX1_I]], align 4, !alias.scope !19, !noalias !16 +; CHECK-NEXT: [[TMP9:%.*]] = load float, float* [[C]], align 4 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 7 -; CHECK-NEXT: store float [[TMP3]], float* [[ARRAYIDX]], align 4 +; CHECK-NEXT: store float [[TMP9]], float* [[ARRAYIDX]], align 4 ; CHECK-NEXT: ret void ; entry: @@ -109,26 +117,27 @@ } ; NO_ASSUME: !0 = !{!1} -; NO_ASSUME: !1 = distinct !{!1, !2, !"hello: %c"} +; NO_ASSUME: !1 = distinct !{!1, !2, !"hello: %a"} ; NO_ASSUME: !2 = distinct !{!2, !"hello"} ; NO_ASSUME: !3 = !{!4} -; NO_ASSUME: !4 = distinct !{!4, !2, !"hello: %a"} -; NO_ASSUME: !5 = !{!6, !8} -; NO_ASSUME: !6 = distinct !{!6, !7, !"hello: %c"} -; NO_ASSUME: !7 = distinct !{!7, !"hello"} -; NO_ASSUME: !8 = distinct !{!8, !9, !"foo: %c"} -; NO_ASSUME: !9 = distinct !{!9, !"foo"} -; NO_ASSUME: !10 = !{!11, !12} -; NO_ASSUME: !11 = distinct !{!11, !7, !"hello: %a"} -; NO_ASSUME: !12 = distinct !{!12, !9, !"foo: %a"} -; NO_ASSUME: !13 = !{!8} -; NO_ASSUME: !14 = !{!12} -; NO_ASSUME: !15 = !{!16, !18} -; NO_ASSUME: !16 = distinct !{!16, !17, !"hello2: %a"} -; NO_ASSUME: !17 = distinct !{!17, !"hello2"} -; NO_ASSUME: !18 = distinct !{!18, !17, !"hello2: %b"} -; NO_ASSUME: !19 = !{!16} -; NO_ASSUME: !20 = !{!18} +; NO_ASSUME: !4 = distinct !{!4, !2, !"hello: %c"} +; NO_ASSUME: !5 = !{!6} +; NO_ASSUME: !6 = distinct !{!6, !7, !"foo: %a"} +; NO_ASSUME: !7 = distinct !{!7, !"foo"} +; NO_ASSUME: !8 = !{!9} +; NO_ASSUME: !9 = distinct !{!9, !7, !"foo: %c"} +; NO_ASSUME: !10 = !{!6, !9} +; NO_ASSUME: !11 = !{!12, !9} +; NO_ASSUME: !12 = distinct !{!12, !13, !"hello: %c"} +; NO_ASSUME: !13 = distinct !{!13, !"hello"} +; NO_ASSUME: !14 = !{!15, !6} +; NO_ASSUME: !15 = distinct !{!15, !13, !"hello: %a"} +; NO_ASSUME: !16 = !{!17} +; NO_ASSUME: !17 = distinct !{!17, !18, !"hello2: %a"} +; NO_ASSUME: !18 = distinct !{!18, !"hello2"} +; NO_ASSUME: !19 = !{!20} +; NO_ASSUME: !20 = distinct !{!20, !18, !"hello2: %b"} +; NO_ASSUME: !21 = !{!17, !20} attributes #0 = { nounwind uwtable } Index: llvm/test/Transforms/LoopUnroll/noalias.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopUnroll/noalias.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -loop-unroll -unroll-count=4 < %s | FileCheck %s + +define void @pr39282(i32* %addr1, i32* %addr2) { +; CHECK-LABEL: @pr39282( +; CHECK-NEXT: start: +; CHECK-NEXT: br label [[BODY:%.*]] +; CHECK: body: +; CHECK-NEXT: call i8* @llvm.noalias.decl.p0i8.p0p0i8.i64(i8** null, i64 0, metadata !0) +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[ADDR1:%.*]], align 4, !alias.scope !0 +; CHECK-NEXT: store i32 [[X]], i32* [[ADDR2:%.*]], align 4, !noalias !0 +; CHECK-NEXT: [[ADDR1I_1:%.*]] = getelementptr inbounds i32, i32* [[ADDR1]], i32 1 +; CHECK-NEXT: [[ADDR2I_1:%.*]] = getelementptr inbounds i32, i32* [[ADDR2]], i32 1 +; CHECK-NEXT: call i8* @llvm.noalias.decl.p0i8.p0p0i8.i64(i8** null, i64 0, metadata !3) +; CHECK-NEXT: [[X_1:%.*]] = load i32, i32* [[ADDR1I_1]], align 4, !alias.scope !3 +; CHECK-NEXT: store i32 [[X_1]], i32* [[ADDR2I_1]], align 4, !noalias !3 +; CHECK-NEXT: call i8* @llvm.noalias.decl.p0i8.p0p0i8.i64(i8** null, i64 0, metadata !5) +; CHECK-NEXT: [[X_2:%.*]] = load i32, i32* [[ADDR1]], align 4, !alias.scope !5 +; CHECK-NEXT: store i32 [[X_2]], i32* [[ADDR2]], align 4, !noalias !5 +; CHECK-NEXT: [[ADDR1I_3:%.*]] = getelementptr inbounds i32, i32* [[ADDR1]], i32 1 +; CHECK-NEXT: [[ADDR2I_3:%.*]] = getelementptr inbounds i32, i32* [[ADDR2]], i32 1 +; CHECK-NEXT: call i8* @llvm.noalias.decl.p0i8.p0p0i8.i64(i8** null, i64 0, metadata !7) +; CHECK-NEXT: [[X_3:%.*]] = load i32, i32* [[ADDR1I_3]], align 4, !alias.scope !7 +; CHECK-NEXT: store i32 [[X_3]], i32* [[ADDR2I_3]], align 4, !noalias !7 +; CHECK-NEXT: ret void +; +start: + br label %body + +body: + %i = phi i32 [ 0, %start ], [ %i2, %body ] + %j = and i32 %i, 1 + %addr1i = getelementptr inbounds i32, i32* %addr1, i32 %j + %addr2i = getelementptr inbounds i32, i32* %addr2, i32 %j + + %nd = call i8* @llvm.noalias.decl.p0i8.p0p0i8.i64(i8** null, i64 0, metadata !2) + %x = load i32, i32* %addr1i, !alias.scope !2 + store i32 %x, i32* %addr2i, !noalias !2 + + %i2 = add i32 %i, 1 + %cmp = icmp slt i32 %i2, 4 + br i1 %cmp, label %body, label %end + +end: + ret void +} + +declare i8* @llvm.noalias.decl.p0i8.p0p0i8.i64(i8**, i64, metadata) + +!0 = distinct !{!0} +!1 = distinct !{!1, !0} +!2 = !{!1} + +; CHECK: !0 = !{!1} +; CHECK: !1 = distinct !{!1, !2} +; CHECK: !2 = distinct !{!2} +; CHECK: !3 = !{!4} +; CHECK: !4 = distinct !{!4, !2, !"It1"} +; CHECK: !5 = !{!6} +; CHECK: !6 = distinct !{!6, !2, !"It2"} +; CHECK: !7 = !{!8} +; CHECK: !8 = distinct !{!8, !2, !"It3"} Index: llvm/test/Transforms/PhaseOrdering/inlining-alignment-assumptions.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/inlining-alignment-assumptions.ll +++ llvm/test/Transforms/PhaseOrdering/inlining-alignment-assumptions.ll @@ -95,6 +95,7 @@ define amdgpu_kernel void @caller2() { ; CHECK-LABEL: @caller2( +; CHECK-NEXT: [[TMP1:%.*]] = tail call i8* @llvm.noalias.decl.p0i8.p0p0i64.i64(i64** null, i64 0, [[META0:metadata !.*]]) ; CHECK-NEXT: ret void ; %alloca = alloca i64, align 8, addrspace(5) Index: llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll +++ llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll @@ -70,6 +70,7 @@ ; CHECK-NEXT: [[I2:%.*]] = alloca [[TMP0:%.*]], align 8 ; CHECK-NEXT: [[I1_SROA_0_0_I5_SROA_IDX:%.*]] = getelementptr inbounds [[TMP0]], %0* [[ARG:%.*]], i64 0, i32 0 ; CHECK-NEXT: [[I1_SROA_0_0_COPYLOAD:%.*]] = load i32*, i32** [[I1_SROA_0_0_I5_SROA_IDX]], align 8 +; CHECK-NEXT: [[TMP0]] = tail call i8* @llvm.noalias.decl.p0i8.p0p0s_s.i64(%0** null, i64 0, [[META0:metadata !.*]]) ; CHECK-NEXT: [[I_SROA_0_0_I6_SROA_IDX:%.*]] = getelementptr inbounds [[TMP0]], %0* [[I2]], i64 0, i32 0 ; CHECK-NEXT: store i32* [[I1_SROA_0_0_COPYLOAD]], i32** [[I_SROA_0_0_I6_SROA_IDX]], align 8 ; CHECK-NEXT: tail call void @_Z7escape01S(%0* nonnull byval(%0) align 8 [[I2]]) @@ -109,6 +110,7 @@ ; CHECK-NEXT: bb: ; CHECK-NEXT: [[I1_SROA_0_0_I4_SROA_IDX:%.*]] = getelementptr inbounds [[TMP0:%.*]], %0* [[ARG:%.*]], i64 0, i32 0 ; CHECK-NEXT: [[I1_SROA_0_0_COPYLOAD:%.*]] = load i32*, i32** [[I1_SROA_0_0_I4_SROA_IDX]], align 8 +; CHECK-NEXT: [[TMP0]] = tail call i8* @llvm.noalias.decl.p0i8.p0p0s_s.i64(%0** null, i64 0, [[META3:metadata !.*]]) ; CHECK-NEXT: [[I5:%.*]] = tail call i32 @_Z4condv() ; CHECK-NEXT: [[I6_NOT:%.*]] = icmp eq i32 [[I5]], 0 ; CHECK-NEXT: br i1 [[I6_NOT]], label [[BB10:%.*]], label [[BB7:%.*]] Index: llvm/test/Transforms/PhaseOrdering/pr39282.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/pr39282.ll +++ llvm/test/Transforms/PhaseOrdering/pr39282.ll @@ -19,12 +19,24 @@ define void @pr39282(i32* %addr1, i32* %addr2) { ; CHECK-LABEL: @pr39282( ; CHECK-NEXT: start: -; CHECK-NEXT: [[X_I:%.*]] = load i32, i32* [[ADDR1:%.*]], align 4, !alias.scope !0, !noalias !3 +; CHECK-NEXT: call i8* @llvm.noalias.decl +; CHECK-NEXT: call i8* @llvm.noalias.decl +; CHECK-NEXT: [[X_I:%.*]] = load i32, i32* [[ADDR1:%.*]], align 4, !alias.scope !3, !noalias !0 +; CHECK-NEXT: store i32 [[X_I]], i32* [[ADDR2:%.*]], align 4, !alias.scope !0, !noalias !3 ; CHECK-NEXT: [[ADDR1I_1:%.*]] = getelementptr inbounds i32, i32* [[ADDR1]], i64 1 -; CHECK-NEXT: [[ADDR2I_1:%.*]] = getelementptr inbounds i32, i32* [[ADDR2:%.*]], i64 1 -; CHECK-NEXT: [[X_I_1:%.*]] = load i32, i32* [[ADDR1I_1]], align 4, !alias.scope !0, !noalias !3 -; CHECK-NEXT: store i32 [[X_I]], i32* [[ADDR2]], align 4, !alias.scope !3, !noalias !0 -; CHECK-NEXT: store i32 [[X_I_1]], i32* [[ADDR2I_1]], align 4, !alias.scope !3, !noalias !0 +; CHECK-NEXT: [[ADDR2I_1:%.*]] = getelementptr inbounds i32, i32* [[ADDR2]], i64 1 +; CHECK-NEXT: call i8* @llvm.noalias.decl +; CHECK-NEXT: call i8* @llvm.noalias.decl +; CHECK-NEXT: [[X_I_1:%.*]] = load i32, i32* [[ADDR1I_1]], align 4, !alias.scope !7, !noalias !5 +; CHECK-NEXT: store i32 [[X_I_1]], i32* [[ADDR2I_1]], align 4, !alias.scope !5, !noalias !7 +; CHECK-NEXT: call i8* @llvm.noalias.decl +; CHECK-NEXT: call i8* @llvm.noalias.decl +; CHECK-NEXT: [[X_I_2:%.*]] = load i32, i32* [[ADDR1]], align 4, !alias.scope !11, !noalias !9 +; CHECK-NEXT: store i32 [[X_I_2]], i32* [[ADDR2]], align 4, !alias.scope !9, !noalias !11 +; CHECK-NEXT: call i8* @llvm.noalias.decl +; CHECK-NEXT: call i8* @llvm.noalias.decl +; CHECK-NEXT: [[X_I_3:%.*]] = load i32, i32* [[ADDR1I_1]], align 4, !alias.scope !15, !noalias !13 +; CHECK-NEXT: store i32 [[X_I_3]], i32* [[ADDR2I_1]], align 4, !alias.scope !13, !noalias !15 ; CHECK-NEXT: ret void ; start: