Index: llvm/include/llvm/Transforms/Utils/NoAliasUtils.h =================================================================== --- /dev/null +++ llvm/include/llvm/Transforms/Utils/NoAliasUtils.h @@ -0,0 +1,25 @@ +//===- llvm/Transforms/Utils/NoAliasUtils.h - NoAlias utilities -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines utilities for noalias metadata and intrinsics. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_NOALIASUTILS_H +#define LLVM_TRANSFORMS_UTILS_NOALIASUTILS_H + +namespace llvm { +class Function; + +/// Connect llvm.noalias.decl to noalias/provenance.noalias intrinsics that are +/// associated with the unknown function scope and based on the same alloca. +/// At the same time, propagate the p.addr, p.objId and p.scope. +bool propagateAndConnectNoAliasDecl(Function *F); +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_UTILS_NOALIASUTILS_H Index: llvm/lib/Transforms/Utils/CMakeLists.txt =================================================================== --- llvm/lib/Transforms/Utils/CMakeLists.txt +++ llvm/lib/Transforms/Utils/CMakeLists.txt @@ -52,6 +52,7 @@ MisExpect.cpp ModuleUtils.cpp NameAnonGlobals.cpp + NoAliasUtils.cpp PredicateInfo.cpp PromoteMemoryToRegister.cpp ScalarEvolutionExpander.cpp Index: llvm/lib/Transforms/Utils/CloneFunction.cpp =================================================================== --- llvm/lib/Transforms/Utils/CloneFunction.cpp +++ llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -36,6 +36,21 @@ #include using namespace llvm; +static void PropagateNoAliasProvenanceInfo(Instruction *To, + const Instruction *From) { + // The ptr_provenance is not automatically copied over in a 'clone()' + // Let's do it here. + if (auto *LI = dyn_cast(From)) { + if (LI->hasNoaliasProvenanceOperand()) + cast(To)->setNoaliasProvenanceOperand( + LI->getNoaliasProvenanceOperand()); + } else if (auto SI = dyn_cast(From)) { + if (SI->hasNoaliasProvenanceOperand()) + cast(To)->setNoaliasProvenanceOperand( + SI->getNoaliasProvenanceOperand()); + } +} + /// See comments in Cloning.h. BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix, Function *F, @@ -55,6 +70,8 @@ DIFinder->processInstruction(*TheModule, I); Instruction *NewInst = I.clone(); + PropagateNoAliasProvenanceInfo(NewInst, &I); + if (I.hasName()) NewInst->setName(I.getName() + NameSuffix); NewBB->getInstList().push_back(NewInst); @@ -338,6 +355,7 @@ II != IE; ++II) { Instruction *NewInst = II->clone(); + PropagateNoAliasProvenanceInfo(NewInst, &*II); // Eagerly remap operands to the newly cloned instruction, except for PHI // nodes for which we defer processing until we update the CFG. @@ -869,6 +887,7 @@ // terminator gets replaced and StopAt == BB's terminator. for (; StopAt != &*BI && BB->getTerminator() != &*BI; ++BI) { Instruction *New = BI->clone(); + PropagateNoAliasProvenanceInfo(New, &*BI); New->setName(BI->getName()); New->insertBefore(NewTerm); ValueMapping[&*BI] = New; Index: llvm/lib/Transforms/Utils/InlineFunction.cpp =================================================================== --- llvm/lib/Transforms/Utils/InlineFunction.cpp +++ llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -28,7 +28,6 @@ #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ProfileSummaryInfo.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/Argument.h" @@ -44,6 +43,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -61,6 +61,8 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/NoAliasUtils.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include #include @@ -79,6 +81,11 @@ cl::Hidden, cl::desc("Convert noalias attributes to metadata during inlining.")); +static cl::opt UseNoAliasIntrinsic( + "use-noalias-intrinsic-during-inlining", cl::Hidden, cl::ZeroOrMore, + cl::init(false), + cl::desc("Use the llvm.noalias intrinsic during inlining.")); + // Disabled by default, because the added alignment assumptions may increase // compile-time and block optimizations. This option is not suitable for use // with frontends that emit comprehensive parameter alignment annotations. @@ -808,6 +815,51 @@ } } +/// When inlining a function that contains noalias scope metadata, +/// this metadata needs to be cloned so that the inlined blocks +/// have different "unique scopes" at every call site. +/// Track the metadata that must be cloned. Do this before other changes to the +/// function, so that we do not get in trouble when inlining caller == callee. +static void TrackAliasScopeMetaData(CallBase &CB, + SetVector &MD) { + const Function *CalledFunc = CB.getCalledFunction(); + for (const BasicBlock &I : *CalledFunc) + for (const Instruction &J : I) { + if (const MDNode *M = J.getMetadata(LLVMContext::MD_alias_scope)) + MD.insert(M); + if (const MDNode *M = J.getMetadata(LLVMContext::MD_noalias)) + MD.insert(M); + + // We also need to clone the metadata in noalias intrinsics. + if (const auto *II = dyn_cast(&J)) { + if (II->getIntrinsicID() == Intrinsic::noalias) + if (const auto *M = dyn_cast( + cast( + II->getOperand(Intrinsic::NoAliasScopeArg)) + ->getMetadata())) + MD.insert(M); + if (II->getIntrinsicID() == Intrinsic::provenance_noalias) + if (const auto *M = dyn_cast( + cast( + II->getOperand(Intrinsic::ProvenanceNoAliasScopeArg)) + ->getMetadata())) + MD.insert(M); + if (II->getIntrinsicID() == Intrinsic::noalias_decl) + if (const auto *M = dyn_cast( + cast( + II->getOperand(Intrinsic::NoAliasDeclScopeArg)) + ->getMetadata())) + MD.insert(M); + if (II->getIntrinsicID() == Intrinsic::noalias_copy_guard) + if (const auto *M = dyn_cast( + cast( + II->getOperand(Intrinsic::NoAliasCopyGuardScopeArg)) + ->getMetadata())) + MD.insert(M); + } + } +} + /// When inlining a function that contains noalias scope metadata, /// this metadata needs to be cloned so that the inlined blocks /// have different "unique scopes" at every call site. Were this not done, then @@ -815,25 +867,44 @@ /// not be differentiated (and this would lead to miscompiles because the /// non-aliasing property communicated by the metadata could have /// call-site-specific control dependencies). -static void CloneAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap) { +static void CloneAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap, + Function *Caller, + SetVector &MD) { const Function *CalledFunc = CB.getCalledFunction(); - SetVector MD; // Note: We could only clone the metadata if it is already used in the // caller. I'm omitting that check here because it might confuse // inter-procedural alias analysis passes. We can revisit this if it becomes // an efficiency or overhead problem. - for (const BasicBlock &I : *CalledFunc) - for (const Instruction &J : I) { - if (const MDNode *M = J.getMetadata(LLVMContext::MD_alias_scope)) - MD.insert(M); - if (const MDNode *M = J.getMetadata(LLVMContext::MD_noalias)) - MD.insert(M); + // Track function level !noalias metadata ('unknown function' scope). This + // should be merged with the data from the callee + MDNode *CallerNoAlias = Caller->getMetadata("noalias"); + MDNode *CalleeNoAlias = CalledFunc->getMetadata("noalias"); + llvm::MDNode *NewUnknownScope = nullptr; + + if ((CalleeNoAlias != nullptr) && (CallerNoAlias == nullptr)) { + // - NOTE: keep in sync with (clang) CGExpr: EmitLoadOfScalar + // - NOTE: keep in sync with (clang) CGDecl: EmitAutoVarNoAlias + // - EmitNoAliasDecl + // - NOTE: keep in sync with (llvm) InlineFunction: CloneAliasScopeMetadata + llvm::MDBuilder MDB(Caller->getContext()); + std::string Name(Caller->getName()); + auto NoAliasDomain = MDB.createAnonymousAliasScopeDomain(Name); + Name += ": unknown scope"; + + llvm::MDNode *UnknownScope = + MDB.createAnonymousAliasScope(NoAliasDomain, Name); + + { + SmallVector ScopeListEntries(1, UnknownScope); + CallerNoAlias = llvm::MDNode::get(Caller->getContext(), ScopeListEntries); + Caller->setMetadata("noalias", CallerNoAlias); } + NewUnknownScope = UnknownScope; + } - if (MD.empty()) - return; + // No early exit: make sure all memory instructions are annotated // Walk the existing metadata, adding the complete (perhaps cyclic) chain to // the set. @@ -850,6 +921,13 @@ // the noalias scopes and the lists of those scopes. SmallVector DummyNodes; DenseMap MDMap; + if ((CalleeNoAlias != nullptr) && (CalleeNoAlias != CallerNoAlias)) { + // Map CalleeNoAlias onto CallerNoAlias + MD.remove(CalleeNoAlias); + DummyNodes.push_back(MDTuple::getTemporary(CalledFunc->getContext(), None)); + MDMap[CalleeNoAlias].reset(DummyNodes.back().get()); + cast(MDMap[CalleeNoAlias])->replaceAllUsesWith(CallerNoAlias); + } for (const MDNode *I : MD) { DummyNodes.push_back(MDTuple::getTemporary(CalledFunc->getContext(), None)); MDMap[I].reset(DummyNodes.back().get()); @@ -877,6 +955,7 @@ // Now replace the metadata in the new inlined instructions with the // repacements from the map. + SmallPtrSet HandledInstructions; for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); VMI != VMIE; ++VMI) { if (!VMI->second) @@ -886,31 +965,183 @@ if (!NI) continue; + // Check if we already adapted this instruction + if (!HandledInstructions.insert(NI).second) + continue; + if (MDNode *M = NI->getMetadata(LLVMContext::MD_alias_scope)) { - MDNode *NewMD = MDMap[M]; - // If the call site also had alias scope metadata (a list of scopes to - // which instructions inside it might belong), propagate those scopes to - // the inlined instructions. - if (MDNode *CSM = CB.getMetadata(LLVMContext::MD_alias_scope)) - NewMD = MDNode::concatenate(NewMD, CSM); - NI->setMetadata(LLVMContext::MD_alias_scope, NewMD); + auto MI = MDMap.find(M); + if (MI != MDMap.end()) { + MDNode *NewMD = MI->second; + // If the call site also had alias scope metadata (a list of scopes to + // which instructions inside it might belong), propagate those scopes to + // the inlined instructions. + if (MDNode *CSM = CB.getMetadata(LLVMContext::MD_alias_scope)) + NewMD = MDNode::concatenate(NewMD, CSM); + NI->setMetadata(LLVMContext::MD_alias_scope, NewMD); + } } else if (NI->mayReadOrWriteMemory()) { if (MDNode *M = CB.getMetadata(LLVMContext::MD_alias_scope)) NI->setMetadata(LLVMContext::MD_alias_scope, M); } if (MDNode *M = NI->getMetadata(LLVMContext::MD_noalias)) { - MDNode *NewMD = MDMap[M]; - // If the call site also had noalias metadata (a list of scopes with - // which instructions inside it don't alias), propagate those scopes to - // the inlined instructions. - if (MDNode *CSM = CB.getMetadata(LLVMContext::MD_noalias)) - NewMD = MDNode::concatenate(NewMD, CSM); - NI->setMetadata(LLVMContext::MD_noalias, NewMD); + auto MI = MDMap.find(M); + if (MI != MDMap.end()) { + MDNode *NewMD = MI->second; + // If the call site also had noalias metadata (a list of scopes with + // which instructions inside it don't alias), propagate those scopes to + // the inlined instructions. + if (MDNode *CSM = CB.getMetadata(LLVMContext::MD_noalias)) + NewMD = MDNode::concatenate(NewMD, CSM); + NI->setMetadata(LLVMContext::MD_noalias, NewMD); + } } else if (NI->mayReadOrWriteMemory()) { if (MDNode *M = CB.getMetadata(LLVMContext::MD_noalias)) NI->setMetadata(LLVMContext::MD_noalias, M); } + + // Update the metadata referenced by a noalias intrinsic + if (auto *II = dyn_cast(NI)) { + auto ID = II->getIntrinsicID(); + if (ID == Intrinsic::noalias || ID == Intrinsic::provenance_noalias || + ID == Intrinsic::noalias_decl || + ID == Intrinsic::noalias_copy_guard) { + int NoAliasScope = 0; + if (ID == Intrinsic::noalias) + NoAliasScope = Intrinsic::NoAliasScopeArg; + if (ID == Intrinsic::provenance_noalias) + NoAliasScope = Intrinsic::ProvenanceNoAliasScopeArg; + if (ID == Intrinsic::noalias_decl) + NoAliasScope = Intrinsic::NoAliasDeclScopeArg; + if (ID == Intrinsic::noalias_copy_guard) + NoAliasScope = Intrinsic::NoAliasCopyGuardScopeArg; + + if (auto *M = dyn_cast( + cast(II->getOperand(NoAliasScope)) + ->getMetadata())) { + // If the metadata is not in the map, it could be a new intrinsic + // that was just added. + auto MI = MDMap.find(M); + if (MI != MDMap.end()) + II->setOperand( + NoAliasScope, + MetadataAsValue::get(CalledFunc->getContext(), MI->second)); + } + } + } + } + + if (NewUnknownScope) { + // We now need to add the out-of-function scope to _all_ instructions with + // noalias data in the 'caller' + // Note: following strange choice of variables names is similar to how it is + // done later + // FIXME: hmm this might be less than fast :( + // hmm it is also needed to do this _after_ the metadata cloning, otherwise + // we seem to lose information ! + for (BasicBlock &I : *Caller) { + for (Instruction &J : I) { + if (const MDNode *M = J.getMetadata(LLVMContext::MD_noalias)) { + SmallVector NewScopeList; + for (auto &MDOp : M->operands()) { + NewScopeList.push_back(MDOp); + } + NewScopeList.push_back(NewUnknownScope); + J.setMetadata(LLVMContext::MD_noalias, + MDNode::get(Caller->getContext(), NewScopeList)); + } else if (J.mayReadOrWriteMemory()) { + // no Noalias, but we need to add the (new) 'unknown scope' ! + J.setMetadata(LLVMContext::MD_noalias, CallerNoAlias); + } + } + } + } +} + +/// If the inlined function has noalias arguments, +/// then add a new alias scope to instructions that might access memory, and +/// noalias intrinsics corresponding to the noalias arguments. +static void AddNoAliasIntrinsics(CallBase &CB, ValueToValueMapTy &VMap, + MDNode *&NewScopeList) { + if (!EnableNoAliasConversion || !UseNoAliasIntrinsic) + return; + + const Function *CalledFunc = CB.getCalledFunction(); + SmallVector NoAliasArgs; + + for (const auto &Arg : CalledFunc->args()) { + if (Arg.hasNoAliasAttr() && !Arg.use_empty()) + NoAliasArgs.push_back(&Arg); + } + + if (NoAliasArgs.empty()) + return; + + MDBuilder MDB(CalledFunc->getContext()); + // Create a new scope domain for this function. + MDNode *NewDomain = + MDB.createAnonymousAliasScopeDomain(CalledFunc->getName()); + + // Create a new scope for each noalias argument. + SmallVector Scopes; + + // For each noalias argument, add a noalias intrinsic call, and update the + // value map to refer to the new result of the noalias call. + for (const Argument *A : NoAliasArgs) { + Value *MappedA = VMap[A]; + if (isa(MappedA)) { + // Skip generating restrict intrinsics for known 'null' pointers + continue; + } + + std::string Name(CalledFunc->getName()); + if (A->hasName()) { + Name += ": %"; + Name += A->getName(); + } else { + Name += ": argument "; + Name += utostr(A->getArgNo()); + } + + MDNode *AScope = MDB.createAnonymousAliasScope(NewDomain, Name); + Scopes.push_back(AScope); + + MDNode *AScopeList = MDNode::get(CalledFunc->getContext(), AScope); + + // The alloca was optimized away -> use a nullptr + auto *IdentifyPAlloca = + ConstantPointerNull::get(MappedA->getType()->getPointerTo()); + auto *NoAliasDecl = + IRBuilder<>(&CB).CreateNoAliasDeclaration(IdentifyPAlloca, AScopeList); + Value *NA = IRBuilder<>(&CB).CreateNoAliasPointer( + MappedA, NoAliasDecl, IdentifyPAlloca, AScopeList); + VMap[A] = NA; + } + + NewScopeList = MDNode::get(CalledFunc->getContext(), Scopes); +} + +static void AddNoAliasIntrinsicsScope(CallBase &CB, ValueToValueMapTy &VMap, + MDNode *NewScopeList) { + if (!EnableNoAliasConversion || !UseNoAliasIntrinsic || !NewScopeList) + return; + + // Iterate over all new instructions in the map; for all memory-access + // instructions, add the alias scope metadata. + for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); + VMI != VMIE; ++VMI) { + if (!isa(VMI->first) || !VMI->second) + continue; + + auto *NI = dyn_cast(VMI->second); + if (!NI || !NI->mayReadOrWriteMemory()) + continue; + + NI->setMetadata( + LLVMContext::MD_noalias, + MDNode::concatenate(NI->getMetadata(LLVMContext::MD_noalias), + NewScopeList)); } } @@ -920,7 +1151,7 @@ /// non-derived loads, stores and memory intrinsics with the new alias scopes. static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap, const DataLayout &DL, AAResults *CalleeAAR) { - if (!EnableNoAliasConversion) + if (!EnableNoAliasConversion || UseNoAliasIntrinsic) return; const Function *CalledFunc = CB.getCalledFunction(); @@ -1764,6 +1995,12 @@ ValueToValueMapTy VMap; // Keep a list of pair (dst, src) to emit byval initializations. SmallVector, 4> ByValInit; + MDNode *NAScopeList = nullptr; + SetVector MDToClone; + + // Track the Metadata that must be cloned. Do this before modifying + // the function, avoiding issues when caller == callee. + TrackAliasScopeMetaData(CB, MDToClone); auto &DL = Caller->getParent()->getDataLayout(); @@ -1801,6 +2038,9 @@ /// Preserve all attributes on of the call and its parameters. salvageKnowledge(&CB, AC); + // Add noalias intrinsics corresponding to noalias function arguments. + AddNoAliasIntrinsics(CB, VMap, NAScopeList); + // We want the inliner to prune the code as it copies. We would LOVE to // have no dead or constant instructions leftover after inlining occurs // (which can happen, e.g., because an argument was constant), but we'll be @@ -1886,7 +2126,7 @@ CalledFunc->getSubprogram() != nullptr); // Clone existing noalias metadata if necessary. - CloneAliasScopeMetadata(CB, VMap); + CloneAliasScopeMetadata(CB, VMap, Caller, MDToClone); // Add noalias metadata if necessary. AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR); @@ -1898,6 +2138,9 @@ // Propagate llvm.mem.parallel_loop_access if necessary. PropagateParallelLoopAccessMetadata(CB, VMap); + // Add scopes to memory accesses corresponding to added noalias intrinsics. + AddNoAliasIntrinsicsScope(CB, VMap, NAScopeList); + // Register any cloned assumptions. if (IFI.GetAssumptionCache) for (BasicBlock &NewBlock : @@ -2332,6 +2575,9 @@ // Since we are now done with the return instruction, delete it also. Returns[0]->eraseFromParent(); + // Already try to connect llvm.noalias.decl where possible + propagateAndConnectNoAliasDecl(Caller); + // We are now done with the inlining. return InlineResult::success(); } @@ -2482,6 +2728,9 @@ // Now we can remove the CalleeEntry block, which is now empty. Caller->getBasicBlockList().erase(CalleeEntry); + // Already try to connect llvm.noalias.decl where possible + propagateAndConnectNoAliasDecl(Caller); + // If we inserted a phi node, check to see if it has a single value (e.g. all // the entries are the same or undef). If so, remove the PHI so it doesn't // block other optimizations. Index: llvm/lib/Transforms/Utils/NoAliasUtils.cpp =================================================================== --- /dev/null +++ llvm/lib/Transforms/Utils/NoAliasUtils.cpp @@ -0,0 +1,160 @@ +//===-- NoAliasUtils.cpp - NoAlias Utility functions ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines common noalias metadatt and intrinsic utility functions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/NoAliasUtils.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "noalias-utils" + +bool llvm::propagateAndConnectNoAliasDecl(Function *F) { + auto *UnknownFunctionScope = F->getMetadata("noalias"); + if (UnknownFunctionScope == nullptr) + return false; + + SmallVector InterestingNoalias; + SmallMapVector KnownAllocaNoAliasDecl; + + auto TrackIfIsUnknownFunctionScope = [&](IntrinsicInst *I, unsigned Index) { + auto V = I->getOperand(Index); + if (cast(V)->getMetadata() == UnknownFunctionScope) { + InterestingNoalias.push_back(I); + } + }; + + for (Instruction &I : llvm::instructions(*F)) { + if (IntrinsicInst *II = dyn_cast(&I)) { + switch (II->getIntrinsicID()) { + case Intrinsic::noalias: { + TrackIfIsUnknownFunctionScope(II, Intrinsic::NoAliasScopeArg); + break; + } + case Intrinsic::provenance_noalias: { + TrackIfIsUnknownFunctionScope(II, Intrinsic::ProvenanceNoAliasScopeArg); + break; + } + case Intrinsic::noalias_copy_guard: { + TrackIfIsUnknownFunctionScope(II, Intrinsic::NoAliasCopyGuardScopeArg); + break; + } + case Intrinsic::noalias_decl: { + auto *depAlloca = dyn_cast(II->getOperand(0)); + if (depAlloca) { + KnownAllocaNoAliasDecl[depAlloca] = II; + } + break; + } + default: + break; + } + } + } + + if (KnownAllocaNoAliasDecl.empty() || InterestingNoalias.empty()) + return false; + + bool Changed = false; + for (auto *II : InterestingNoalias) { + SmallVector UO; + unsigned Index = + (II->getIntrinsicID() == Intrinsic::noalias + ? 0 + : (II->getIntrinsicID() == Intrinsic::provenance_noalias ? 1 : 2)); + const int IdentifyPArg[] = {Intrinsic::NoAliasIdentifyPArg, + Intrinsic::ProvenanceNoAliasIdentifyPArg, + Intrinsic::NoAliasCopyGuardIdentifyPBaseObject}; + const int ScopeArg[] = {Intrinsic::NoAliasScopeArg, + Intrinsic::ProvenanceNoAliasScopeArg, + Intrinsic::NoAliasCopyGuardScopeArg}; + const int NoAliasDeclArg[] = {Intrinsic::NoAliasNoAliasDeclArg, + Intrinsic::ProvenanceNoAliasNoAliasDeclArg, + Intrinsic::NoAliasCopyGuardNoAliasDeclArg}; + const int ObjIdArg[] = {Intrinsic::NoAliasIdentifyPObjIdArg, + Intrinsic::ProvenanceNoAliasIdentifyPObjIdArg, -1}; + + llvm::getUnderlyingObjects(II->getOperand(IdentifyPArg[Index]), UO); + if (UO.size() != 1) { + // Multiple objects possible - It would be nice to propagate, but we do + // not do it yet. That is ok as the unknown function scope assumes more + // aliasing. + LLVM_DEBUG(llvm::dbgs() + << "WARNING: no llvm.noalias.decl reconnect accross " + "PHI/select - YET (" + << UO.size() << " underlying objects)\n"); + continue; + } + + if (auto *UA = dyn_cast(UO[0])) { + auto it = KnownAllocaNoAliasDecl.find(UA); + if (it != KnownAllocaNoAliasDecl.end()) { + Instruction *Decl = it->second; + // found a simple matching declaration - propagate + II->setOperand(ScopeArg[Index], + Decl->getOperand(Intrinsic::NoAliasDeclScopeArg)); + II->setOperand(NoAliasDeclArg[Index], Decl); + + auto ObjIdIndex = ObjIdArg[Index]; + if (ObjIdIndex != -1) { + II->setOperand(ObjIdIndex, + Decl->getOperand(Intrinsic::NoAliasDeclObjIdArg)); + } + Changed = true; + } else if (UnknownFunctionScope && isa(UA)) { + if (cast(II->getOperand(ScopeArg[Index])) + ->getMetadata() == UnknownFunctionScope) { + // we have an alloca, but no llvm.noalias.decl and we have unknown + // function scope This is an indication of a temporary that (through a + // pointer or reference to a restrict pointer) introduces restrict. + // - the unknown scope is too broad for these cases + // - conceptually, the scope should be the lifetime of the local, but + // we don't have that information + // - the real restrictness should have been brought in through the + // 'depends on' relationship + // -> so we fall back on the 'depends on' and remove the restrictness + // information at this level. + LLVM_DEBUG( + llvm::dbgs() + << "- Temporary noalias object (without llvm.noalias.decl) " + "detected. Ignore restrictness: " + << *II << "\n"); + II->replaceAllUsesWith(II->getOperand(0)); + II->eraseFromParent(); + Changed = true; + } + } + } else { +#if !defined(NDEBUG) + if (isa(UO[0]) || isa(UO[0])) { + // Multiple objects possible - It would be nice to propagate, but we do + // not do it yet. That is ok as the unknown function scope assumes more + // aliasing. + LLVM_DEBUG(llvm::dbgs() + << "WARNING: no llvm.noalias.decl reconnect accross " + "PHI/select - YET: " + << *UO[0] << "\n"); + } +#endif + } + } + return Changed; +} Index: llvm/test/Transforms/Inline/noalias-calls.ll =================================================================== --- llvm/test/Transforms/Inline/noalias-calls.ll +++ llvm/test/Transforms/Inline/noalias-calls.ll @@ -1,9 +1,10 @@ -; RUN: opt -basic-aa -inline -enable-noalias-to-md-conversion -S < %s | FileCheck %s +; RUN: opt -basic-aa -inline -enable-noalias-to-md-conversion -use-noalias-intrinsic-during-inlining=0 -S < %s | FileCheck %s -check-prefix=MD-SCOPE +; RUN: opt -basic-aa -inline -enable-noalias-to-md-conversion -use-noalias-intrinsic-during-inlining=1 -S < %s | FileCheck %s -check-prefix=INTR-SCOPE target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) #0 -declare void @hey() #0 +declare void @hey() #1 define void @hello(i8* noalias nocapture %a, i8* noalias nocapture readonly %c, i8* nocapture %b) #1 { entry: @@ -16,12 +17,39 @@ ret void } -define void @foo(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #2 { +define void @foo(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #1 { entry: tail call void @hello(i8* %a, i8* %c, i8* %b) ret void } +; MD-SCOPE: define void @foo(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #1 { +; MD-SCOPE: entry: +; MD-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %a, i8* align 16 %b, i64 16, i1 false) #1, !noalias !0 +; MD-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %b, i8* align 16 %c, i64 16, i1 false) #1, !noalias !3 +; MD-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %a, i8* align 16 %c, i64 16, i1 false) #1, !alias.scope !5 +; MD-SCOPE: call void @hey() #1, !noalias !5 +; MD-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %{{.*}}, i8* align 16 %c, i64 16, i1 false) #1, !noalias !3 +; MD-SCOPE: ret void +; MD-SCOPE: } + +; INTR-SCOPE: define void @foo(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #1 { +; INTR-SCOPE: entry: +; INTR-SCOPE: %l.i = alloca i8, i32 512, align 1 +; INTR-SCOPE: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i8.i64(i8** null, i64 0, metadata !0) +; INTR-SCOPE: %1 = call i8* @llvm.noalias.p0i8.p0i8.p0p0i8.i64(i8* %a, i8* %0, i8** null, i64 0, metadata !0) +; INTR-SCOPE: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i8.i64(i8** null, i64 0, metadata !3) +; INTR-SCOPE: %3 = call i8* @llvm.noalias.p0i8.p0i8.p0p0i8.i64(i8* %c, i8* %2, i8** null, i64 0, metadata !3) +; INTR-SCOPE: call void @llvm.lifetime.start.p0i8(i64 512, i8* %l.i) +; INTR-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %1, i8* align 16 %b, i64 16, i1 false) #1, !noalias !5 +; INTR-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %b, i8* align 16 %3, i64 16, i1 false) #1, !noalias !5 +; INTR-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %1, i8* align 16 %3, i64 16, i1 false) #1, !noalias !5 +; INTR-SCOPE: call void @hey() #1, !noalias !5 +; INTR-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %l.i, i8* align 16 %3, i64 16, i1 false) #1, !noalias !5 +; INTR-SCOPE: call void @llvm.lifetime.end.p0i8(i64 512, i8* %l.i) +; INTR-SCOPE: ret void +; INTR-SCOPE: } + define void @hello_cs(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #1 { entry: %l = alloca i8, i32 512, align 1 @@ -39,41 +67,46 @@ ret void } -; CHECK: define void @foo(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #2 { -; CHECK: entry: -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %a, i8* align 16 %b, i64 16, i1 false) #1, !noalias !0 -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %b, i8* align 16 %c, i64 16, i1 false) #1, !noalias !3 -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %a, i8* align 16 %c, i64 16, i1 false) #1, !alias.scope !5 -; CHECK: call void @hey() #1, !noalias !5 -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %{{.*}}, i8* align 16 %c, i64 16, i1 false) #1, !noalias !3 -; CHECK: ret void -; CHECK: } +; MD-SCOPE: define void @foo_cs(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) { +; MD-SCOPE: entry: +; MD-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %a, i8* align 16 %b, i64 16, i1 false) #1, !noalias !6 +; MD-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %b, i8* align 16 %c, i64 16, i1 false) #1, !noalias !9 +; MD-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %a, i8* align 16 %c, i64 16, i1 false) #1, !alias.scope !11 +; MD-SCOPE: call void @hey() #1, !noalias !11 +; MD-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %l.i, i8* align 16 %c, i64 16, i1 false) #1, !noalias !9 +; MD-SCOPE: ret void +; MD-SCOPE: } -; CHECK: define void @foo_cs(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #2 { -; CHECK: entry: -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %a, i8* align 16 %b, i64 16, i1 false) #1, !noalias !6 -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %b, i8* align 16 %c, i64 16, i1 false) #1, !noalias !9 -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %a, i8* align 16 %c, i64 16, i1 false) #1, !alias.scope !11 -; CHECK: call void @hey() #1, !noalias !11 -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %{{.*}}, i8* align 16 %c, i64 16, i1 false) #1, !noalias !9 -; CHECK: ret void -; CHECK: } +; INTR-SCOPE: define void @foo_cs(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) { +; INTR-SCOPE: entry: +; INTR-SCOPE: %l.i = alloca i8, i32 512, align 1 +; INTR-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %a, i8* align 16 %b, i64 16, i1 false) #1 +; INTR-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %b, i8* align 16 %c, i64 16, i1 false) #1 +; INTR-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %a, i8* align 16 %c, i64 16, i1 false) #1 +; INTR-SCOPE: call void @hey() #1 +; INTR-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %l.i, i8* align 16 %c, i64 16, i1 false) #1 +; INTR-SCOPE: ret void +; INTR-SCOPE: } attributes #0 = { nounwind argmemonly willreturn } attributes #1 = { nounwind } -attributes #2 = { nounwind uwtable } - -; CHECK: !0 = !{!1} -; CHECK: !1 = distinct !{!1, !2, !"hello: %c"} -; CHECK: !2 = distinct !{!2, !"hello"} -; CHECK: !3 = !{!4} -; CHECK: !4 = distinct !{!4, !2, !"hello: %a"} -; CHECK: !5 = !{!4, !1} -; CHECK: !6 = !{!7} -; CHECK: !7 = distinct !{!7, !8, !"hello_cs: %c"} -; CHECK: !8 = distinct !{!8, !"hello_cs"} -; CHECK: !9 = !{!10} -; CHECK: !10 = distinct !{!10, !8, !"hello_cs: %a"} -; CHECK: !11 = !{!10, !7} +; MD-SCOPE:!0 = !{!1} +; MD-SCOPE:!1 = distinct !{!1, !2, !"hello: %c"} +; MD-SCOPE:!2 = distinct !{!2, !"hello"} +; MD-SCOPE:!3 = !{!4} +; MD-SCOPE:!4 = distinct !{!4, !2, !"hello: %a"} +; MD-SCOPE:!5 = !{!4, !1} +; MD-SCOPE:!6 = !{!7} +; MD-SCOPE:!7 = distinct !{!7, !8, !"hello_cs: %c"} +; MD-SCOPE:!8 = distinct !{!8, !"hello_cs"} +; MD-SCOPE:!9 = !{!10} +; MD-SCOPE:!10 = distinct !{!10, !8, !"hello_cs: %a"} +; MD-SCOPE:!11 = !{!10, !7} +; INTR-SCOPE: !0 = !{!1} +; INTR-SCOPE: !1 = distinct !{!1, !2, !"hello: %a"} +; INTR-SCOPE: !2 = distinct !{!2, !"hello"} +; INTR-SCOPE: !3 = !{!4} +; INTR-SCOPE: !4 = distinct !{!4, !2, !"hello: %c"} +; INTR-SCOPE: !5 = !{!1, !4} Index: llvm/test/Transforms/Inline/noalias-scopes.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Inline/noalias-scopes.ll @@ -0,0 +1,204 @@ +; RUN: opt -inline -enable-noalias-to-md-conversion -use-noalias-intrinsic-during-inlining=1 -S < %s | FileCheck %s -check-prefix=INTR-SCOPE +; verify that inlining result in scope duplication +; verify that llvm.noalias.decl is introduced at the location of the inlining + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nofree norecurse nounwind +define dso_local void @copy_npnp(i32* noalias nocapture %dst, i32* noalias nocapture readonly %src) local_unnamed_addr #0 { +entry: + %0 = load i32, i32* %src, ptr_provenance i32* undef, align 4, !tbaa !2 + store i32 %0, i32* %dst, ptr_provenance i32* undef, align 4, !tbaa !2 + ret void +} + +; Function Attrs: nounwind +define dso_local void @copy_rprp(i32* nocapture %dst, i32* nocapture readonly %src) local_unnamed_addr #1 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !6) + %1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !9) + %2 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %src, i8* %1, i32** null, i32** undef, i64 0, metadata !9), !tbaa !11, !noalias !13 + %3 = load i32, i32* %src, ptr_provenance i32* %2, align 4, !tbaa !2, !noalias !13 + %4 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %dst, i8* %0, i32** null, i32** undef, i64 0, metadata !6), !tbaa !11, !noalias !13 + store i32 %3, i32* %dst, ptr_provenance i32* %4, align 4, !tbaa !2, !noalias !13 + ret void +} + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32**, i64, metadata) #2 + +; Function Attrs: nofree norecurse nounwind +define dso_local void @test_npnp(i32* nocapture %dst, i32* nocapture readonly %src, i32 %n) local_unnamed_addr #0 { +entry: + tail call void @copy_npnp(i32* %dst, i32* %src) + br label %do.body + +do.body: ; preds = %do.body, %entry + %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.body ] + tail call void @copy_npnp(i32* %dst, i32* %src) + tail call void @copy_npnp(i32* %dst, i32* %src) + %dec = add nsw i32 %n.addr.0, -1 + %tobool = icmp eq i32 %n.addr.0, 0 + br i1 %tobool, label %do.end, label %do.body + +do.end: ; preds = %do.body + ret void +} + +; INTR-SCOPE: define dso_local void @test_npnp(i32* nocapture %dst, i32* nocapture readonly %src, i32 %n) local_unnamed_addr #0 { +; INTR-SCOPE: entry: +; INTR-SCOPE: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !14) +; INTR-SCOPE: %1 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %dst, i8* %0, i32** null, i64 0, metadata !14) +; INTR-SCOPE: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !17) +; INTR-SCOPE: %3 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %src, i8* %2, i32** null, i64 0, metadata !17) +; INTR-SCOPE: %4 = load i32, i32* %3, ptr_provenance i32* undef, align 4, !tbaa !2, !noalias !19 +; INTR-SCOPE: store i32 %4, i32* %1, ptr_provenance i32* undef, align 4, !tbaa !2, !noalias !19 +; INTR-SCOPE: br label %do.body +; INTR-SCOPE: do.body: ; preds = %do.body, %entry +; INTR-SCOPE: %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.body ] +; INTR-SCOPE: %5 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !20) +; INTR-SCOPE: %6 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %dst, i8* %5, i32** null, i64 0, metadata !20) +; INTR-SCOPE: %7 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !23) +; INTR-SCOPE: %8 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %src, i8* %7, i32** null, i64 0, metadata !23) +; INTR-SCOPE: %9 = load i32, i32* %8, ptr_provenance i32* undef, align 4, !tbaa !2, !noalias !25 +; INTR-SCOPE: store i32 %9, i32* %6, ptr_provenance i32* undef, align 4, !tbaa !2, !noalias !25 +; INTR-SCOPE: %10 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !26) +; INTR-SCOPE: %11 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %dst, i8* %10, i32** null, i64 0, metadata !26) +; INTR-SCOPE: %12 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !29) +; INTR-SCOPE: %13 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %src, i8* %12, i32** null, i64 0, metadata !29) +; INTR-SCOPE: %14 = load i32, i32* %13, ptr_provenance i32* undef, align 4, !tbaa !2, !noalias !31 +; INTR-SCOPE: store i32 %14, i32* %11, ptr_provenance i32* undef, align 4, !tbaa !2, !noalias !31 +; INTR-SCOPE: %dec = add nsw i32 %n.addr.0, -1 +; INTR-SCOPE: %tobool = icmp eq i32 %n.addr.0, 0 +; INTR-SCOPE: br i1 %tobool, label %do.end, label %do.body +; INTR-SCOPE: do.end: ; preds = %do.body +; INTR-SCOPE: ret void +; INTR-SCOPE: } + + +; Function Attrs: nounwind +define dso_local void @test_rprp(i32* nocapture %dst, i32* nocapture readonly %src, i32 %n) local_unnamed_addr #1 { +entry: + tail call void @copy_rprp(i32* %dst, i32* %src) + br label %do.body + +do.body: ; preds = %do.body, %entry + %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.body ] + tail call void @copy_rprp(i32* %dst, i32* %src) + tail call void @copy_rprp(i32* %dst, i32* %src) + %dec = add nsw i32 %n.addr.0, -1 + %tobool = icmp eq i32 %n.addr.0, 0 + br i1 %tobool, label %do.end, label %do.body + +do.end: ; preds = %do.body + ret void +} + +; INTR-SCOPE: define dso_local void @test_rprp(i32* nocapture %dst, i32* nocapture readonly %src, i32 %n) local_unnamed_addr #1 { +; INTR-SCOPE: entry: +; INTR-SCOPE: %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !32) #5 +; INTR-SCOPE: %1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !35) #5 +; INTR-SCOPE: %2 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %src, i8* %1, i32** null, i32** undef, i64 0, metadata !35) #5, !tbaa !11, !noalias !37 +; INTR-SCOPE: %3 = load i32, i32* %src, ptr_provenance i32* %2, align 4, !tbaa !2, !noalias !37 +; INTR-SCOPE: %4 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %dst, i8* %0, i32** null, i32** undef, i64 0, metadata !32) #5, !tbaa !11, !noalias !37 +; INTR-SCOPE: store i32 %3, i32* %dst, ptr_provenance i32* %4, align 4, !tbaa !2, !noalias !37 +; INTR-SCOPE: br label %do.body +; INTR-SCOPE: do.body: ; preds = %do.body, %entry +; INTR-SCOPE: %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.body ] +; INTR-SCOPE: %5 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !38) #5 +; INTR-SCOPE: %6 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !41) #5 +; INTR-SCOPE: %7 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %src, i8* %6, i32** null, i32** undef, i64 0, metadata !41) #5, !tbaa !11, !noalias !43 +; INTR-SCOPE: %8 = load i32, i32* %src, ptr_provenance i32* %7, align 4, !tbaa !2, !noalias !43 +; INTR-SCOPE: %9 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %dst, i8* %5, i32** null, i32** undef, i64 0, metadata !38) #5, !tbaa !11, !noalias !43 +; INTR-SCOPE: store i32 %8, i32* %dst, ptr_provenance i32* %9, align 4, !tbaa !2, !noalias !43 +; INTR-SCOPE: %10 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !44) #5 +; INTR-SCOPE: %11 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !47) #5 +; INTR-SCOPE: %12 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %src, i8* %11, i32** null, i32** undef, i64 0, metadata !47) #5, !tbaa !11, !noalias !49 +; INTR-SCOPE: %13 = load i32, i32* %src, ptr_provenance i32* %12, align 4, !tbaa !2, !noalias !49 +; INTR-SCOPE: %14 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %dst, i8* %10, i32** null, i32** undef, i64 0, metadata !44) #5, !tbaa !11, !noalias !49 +; INTR-SCOPE: store i32 %13, i32* %dst, ptr_provenance i32* %14, align 4, !tbaa !2, !noalias !49 +; INTR-SCOPE: %dec = add nsw i32 %n.addr.0, -1 +; INTR-SCOPE: %tobool = icmp eq i32 %n.addr.0, 0 +; INTR-SCOPE: br i1 %tobool, label %do.end, label %do.body +; INTR-SCOPE: do.end: ; preds = %do.body +; INTR-SCOPE: ret void +; INTR-SCOPE: } + + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32*, i8*, i32**, i32**, i64, metadata) #3 + +attributes #0 = { nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { argmemonly nounwind } +attributes #3 = { nounwind readnone speculatable } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3, !3, i64 0, i64 4} +!3 = !{!4, i64 4, !"int"} +!4 = !{!5, i64 1, !"omnipotent char"} +!5 = !{!"Simple C/C++ TBAA"} +!6 = !{!7} +!7 = distinct !{!7, !8, !"copy_rprp: rdst"} +!8 = distinct !{!8, !"copy_rprp"} +!9 = !{!10} +!10 = distinct !{!10, !8, !"copy_rprp: rsrc"} +!11 = !{!12, !12, i64 0, i64 4} +!12 = !{!4, i64 4, !"any pointer"} +!13 = !{!7, !10} + +; INTR-SCOPE: !0 = !{i32 1, !"wchar_size", i32 4} +; INTR-SCOPE: !1 = !{!"clang"} +; INTR-SCOPE: !2 = !{!3, !3, i64 0, i64 4} +; INTR-SCOPE: !3 = !{!4, i64 4, !"int"} +; INTR-SCOPE: !4 = !{!5, i64 1, !"omnipotent char"} +; INTR-SCOPE: !5 = !{!"Simple C/C++ TBAA"} +; INTR-SCOPE: !6 = !{!7} +; INTR-SCOPE: !7 = distinct !{!7, !8, !"copy_rprp: rdst"} +; INTR-SCOPE: !8 = distinct !{!8, !"copy_rprp"} +; INTR-SCOPE: !9 = !{!10} +; INTR-SCOPE: !10 = distinct !{!10, !8, !"copy_rprp: rsrc"} +; INTR-SCOPE: !11 = !{!12, !12, i64 0, i64 4} +; INTR-SCOPE: !12 = !{!4, i64 4, !"any pointer"} +; INTR-SCOPE: !13 = !{!7, !10} +; INTR-SCOPE: !14 = !{!15} +; INTR-SCOPE: !15 = distinct !{!15, !16, !"copy_npnp: %dst"} +; INTR-SCOPE: !16 = distinct !{!16, !"copy_npnp"} +; INTR-SCOPE: !17 = !{!18} +; INTR-SCOPE: !18 = distinct !{!18, !16, !"copy_npnp: %src"} +; INTR-SCOPE: !19 = !{!15, !18} +; INTR-SCOPE: !20 = !{!21} +; INTR-SCOPE: !21 = distinct !{!21, !22, !"copy_npnp: %dst"} +; INTR-SCOPE: !22 = distinct !{!22, !"copy_npnp"} +; INTR-SCOPE: !23 = !{!24} +; INTR-SCOPE: !24 = distinct !{!24, !22, !"copy_npnp: %src"} +; INTR-SCOPE: !25 = !{!21, !24} +; INTR-SCOPE: !26 = !{!27} +; INTR-SCOPE: !27 = distinct !{!27, !28, !"copy_npnp: %dst"} +; INTR-SCOPE: !28 = distinct !{!28, !"copy_npnp"} +; INTR-SCOPE: !29 = !{!30} +; INTR-SCOPE: !30 = distinct !{!30, !28, !"copy_npnp: %src"} +; INTR-SCOPE: !31 = !{!27, !30} +; INTR-SCOPE: !32 = !{!33} +; INTR-SCOPE: !33 = distinct !{!33, !34, !"copy_rprp: rdst"} +; INTR-SCOPE: !34 = distinct !{!34, !"copy_rprp"} +; INTR-SCOPE: !35 = !{!36} +; INTR-SCOPE: !36 = distinct !{!36, !34, !"copy_rprp: rsrc"} +; INTR-SCOPE: !37 = !{!33, !36} +; INTR-SCOPE: !38 = !{!39} +; INTR-SCOPE: !39 = distinct !{!39, !40, !"copy_rprp: rdst"} +; INTR-SCOPE: !40 = distinct !{!40, !"copy_rprp"} +; INTR-SCOPE: !41 = !{!42} +; INTR-SCOPE: !42 = distinct !{!42, !40, !"copy_rprp: rsrc"} +; INTR-SCOPE: !43 = !{!39, !42} +; INTR-SCOPE: !44 = !{!45} +; INTR-SCOPE: !45 = distinct !{!45, !46, !"copy_rprp: rdst"} +; INTR-SCOPE: !46 = distinct !{!46, !"copy_rprp"} +; INTR-SCOPE: !47 = !{!48} +; INTR-SCOPE: !48 = distinct !{!48, !46, !"copy_rprp: rsrc"} +; INTR-SCOPE: !49 = !{!45, !48} Index: llvm/test/Transforms/Inline/noalias.ll =================================================================== --- llvm/test/Transforms/Inline/noalias.ll +++ llvm/test/Transforms/Inline/noalias.ll @@ -1,4 +1,5 @@ -; RUN: opt -inline -enable-noalias-to-md-conversion -S < %s | FileCheck %s +; RUN: opt -inline -enable-noalias-to-md-conversion -use-noalias-intrinsic-during-inlining=0 -S < %s | FileCheck %s -check-prefix=MD-SCOPE +; RUN: opt -inline -enable-noalias-to-md-conversion -use-noalias-intrinsic-during-inlining=1 -S < %s | FileCheck %s -check-prefix=INTR-SCOPE target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -19,16 +20,29 @@ ret void } -; CHECK: define void @foo(float* nocapture %a, float* nocapture readonly %c) #0 { -; CHECK: entry: -; CHECK: %0 = load float, float* %c, align 4, !noalias !0 -; CHECK: %arrayidx.i = getelementptr inbounds float, float* %a, i64 5 -; CHECK: store float %0, float* %arrayidx.i, align 4, !alias.scope !0 -; CHECK: %1 = load float, float* %c, align 4 -; CHECK: %arrayidx = getelementptr inbounds float, float* %a, i64 7 -; CHECK: store float %1, float* %arrayidx, align 4 -; CHECK: ret void -; CHECK: } +; MD-SCOPE: define void @foo(float* nocapture %a, float* nocapture readonly %c) #0 { +; MD-SCOPE: entry: +; MD-SCOPE: %0 = load float, float* %c, align 4, !noalias !0 +; MD-SCOPE: %arrayidx.i = getelementptr inbounds float, float* %a, i64 5 +; MD-SCOPE: store float %0, float* %arrayidx.i, align 4, !alias.scope !0 +; MD-SCOPE: %1 = load float, float* %c, align 4 +; MD-SCOPE: %arrayidx = getelementptr inbounds float, float* %a, i64 7 +; MD-SCOPE: store float %1, float* %arrayidx, align 4 +; MD-SCOPE: ret void +; MD-SCOPE: } + +; INTR-SCOPE: define void @foo(float* nocapture %a, float* nocapture readonly %c) #0 { +; INTR-SCOPE: entry: +; INTR-SCOPE: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !0) +; INTR-SCOPE: %1 = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* %a, i8* %0, float** null, i64 0, metadata !0) +; INTR-SCOPE: %2 = load float, float* %c, align 4, !noalias !0 +; INTR-SCOPE: %arrayidx.i = getelementptr inbounds float, float* %1, i64 5 +; INTR-SCOPE: store float %2, float* %arrayidx.i, align 4, !noalias !0 +; INTR-SCOPE: %3 = load float, float* %c, align 4 +; INTR-SCOPE: %arrayidx = getelementptr inbounds float, float* %a, i64 7 +; INTR-SCOPE: store float %3, float* %arrayidx, align 4 +; INTR-SCOPE: ret void +; INTR-SCOPE: } define void @hello2(float* noalias nocapture %a, float* noalias nocapture %b, float* nocapture readonly %c) #0 { entry: @@ -49,28 +63,55 @@ ret void } -; CHECK: define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 { -; CHECK: entry: -; CHECK: %0 = load float, float* %c, align 4, !noalias !3 -; CHECK: %arrayidx.i = getelementptr inbounds float, float* %a, i64 5 -; CHECK: store float %0, float* %arrayidx.i, align 4, !alias.scope !7, !noalias !8 -; CHECK: %arrayidx1.i = getelementptr inbounds float, float* %b, i64 8 -; CHECK: store float %0, float* %arrayidx1.i, align 4, !alias.scope !8, !noalias !7 -; CHECK: %1 = load float, float* %c, align 4 -; CHECK: %arrayidx = getelementptr inbounds float, float* %a, i64 7 -; CHECK: store float %1, float* %arrayidx, align 4 -; CHECK: ret void -; CHECK: } +; MD-SCOPE: define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 { +; MD-SCOPE: entry: +; MD-SCOPE: %0 = load float, float* %c, align 4, !noalias !3 +; MD-SCOPE: %arrayidx.i = getelementptr inbounds float, float* %a, i64 5 +; MD-SCOPE: store float %0, float* %arrayidx.i, align 4, !alias.scope !7, !noalias !8 +; MD-SCOPE: %arrayidx1.i = getelementptr inbounds float, float* %b, i64 8 +; MD-SCOPE: store float %0, float* %arrayidx1.i, align 4, !alias.scope !8, !noalias !7 +; MD-SCOPE: %1 = load float, float* %c, align 4 +; MD-SCOPE: %arrayidx = getelementptr inbounds float, float* %a, i64 7 +; MD-SCOPE: store float %1, float* %arrayidx, align 4 +; MD-SCOPE: ret void +; MD-SCOPE: } + +; INTR-SCOPE: define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 { +; INTR-SCOPE: entry: +; INTR-SCOPE: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !3) +; INTR-SCOPE: %1 = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* %a, i8* %0, float** null, i64 0, metadata !3) +; INTR-SCOPE: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !6) +; INTR-SCOPE: %3 = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* %b, i8* %2, float** null, i64 0, metadata !6) +; INTR-SCOPE: %4 = load float, float* %c, align 4, !noalias !8 +; INTR-SCOPE: %arrayidx.i = getelementptr inbounds float, float* %1, i64 5 +; INTR-SCOPE: store float %4, float* %arrayidx.i, align 4, !noalias !8 +; INTR-SCOPE: %arrayidx1.i = getelementptr inbounds float, float* %3, i64 8 +; INTR-SCOPE: store float %4, float* %arrayidx1.i, align 4, !noalias !8 +; INTR-SCOPE: %5 = load float, float* %c, align 4 +; INTR-SCOPE: %arrayidx = getelementptr inbounds float, float* %a, i64 7 +; INTR-SCOPE: store float %5, float* %arrayidx, align 4 +; INTR-SCOPE: ret void +; INTR-SCOPE: } attributes #0 = { nounwind uwtable } +attributes #1 = { argmemonly nounwind } -; CHECK: !0 = !{!1} -; CHECK: !1 = distinct !{!1, !2, !"hello: %a"} -; CHECK: !2 = distinct !{!2, !"hello"} -; CHECK: !3 = !{!4, !6} -; CHECK: !4 = distinct !{!4, !5, !"hello2: %a"} -; CHECK: !5 = distinct !{!5, !"hello2"} -; CHECK: !6 = distinct !{!6, !5, !"hello2: %b"} -; CHECK: !7 = !{!4} -; CHECK: !8 = !{!6} +; MD-SCOPE: !0 = !{!1} +; MD-SCOPE: !1 = distinct !{!1, !2, !"hello: %a"} +; MD-SCOPE: !2 = distinct !{!2, !"hello"} +; MD-SCOPE: !3 = !{!4, !6} +; MD-SCOPE: !4 = distinct !{!4, !5, !"hello2: %a"} +; MD-SCOPE: !5 = distinct !{!5, !"hello2"} +; MD-SCOPE: !6 = distinct !{!6, !5, !"hello2: %b"} +; MD-SCOPE: !7 = !{!4} +; MD-SCOPE: !8 = !{!6} +; INTR-SCOPE: !0 = !{!1} +; INTR-SCOPE: !1 = distinct !{!1, !2, !"hello: %a"} +; INTR-SCOPE: !2 = distinct !{!2, !"hello"} +; INTR-SCOPE: !3 = !{!4} +; INTR-SCOPE: !4 = distinct !{!4, !5, !"hello2: %a"} +; INTR-SCOPE: !5 = distinct !{!5, !"hello2"} +; INTR-SCOPE: !6 = !{!7} +; INTR-SCOPE: !7 = distinct !{!7, !5, !"hello2: %b"} +; INTR-SCOPE: !8 = !{!4, !7} Index: llvm/test/Transforms/Inline/noalias2.ll =================================================================== --- llvm/test/Transforms/Inline/noalias2.ll +++ llvm/test/Transforms/Inline/noalias2.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature -; RUN: opt -inline -enable-noalias-to-md-conversion -S < %s | FileCheck %s --check-prefixes=CHECK,NO_ASSUME -; RUN: opt -inline -enable-noalias-to-md-conversion --enable-knowledge-retention -S < %s | FileCheck %s --check-prefixes=CHECK,USE_ASSUME +; RUN: opt -inline -enable-noalias-to-md-conversion -use-noalias-intrinsic-during-inlining=0 -S < %s | FileCheck %s --check-prefixes=CHECK,NO_ASSUME +; RUN: opt -inline -enable-noalias-to-md-conversion -use-noalias-intrinsic-during-inlining=0 --enable-knowledge-retention -S < %s | FileCheck %s --check-prefixes=CHECK,USE_ASSUME +; RUN: opt -inline -enable-noalias-to-md-conversion -use-noalias-intrinsic-during-inlining=1 -S < %s | FileCheck %s -check-prefixes=INTR-SCOPE +; RUN: opt -inline -enable-noalias-to-md-conversion -use-noalias-intrinsic-during-inlining=1 --enable-knowledge-retention -S < %s | FileCheck %s --check-prefixes=INTR-SCOPE,INTR-SCOPE-USE_ASSUME target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -14,12 +16,14 @@ ; CHECK-NEXT: store float [[TMP0]], float* [[ARRAYIDX]], align 4 ; CHECK-NEXT: ret void ; -; ASSUME-LABEL: @hello( -; ASSUME-NEXT: entry: -; ASSUME-NEXT: [[TMP0:%.*]] = load float, float* [[C:%.*]], align 4 -; ASSUME-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 5 -; ASSUME-NEXT: store float [[TMP0]], float* [[ARRAYIDX]], align 4 -; ASSUME-NEXT: ret void +; INTR-SCOPE-LABEL: define {{[^@]+}}@hello +; INTR-SCOPE-SAME: (float* noalias nocapture [[A:%.*]], float* noalias nocapture readonly [[C:%.*]]) #0 +; INTR-SCOPE-NEXT: entry: +; INTR-SCOPE-NEXT: [[TMP0:%.*]] = load float, float* [[C]], align 4 +; INTR-SCOPE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 5 +; INTR-SCOPE-NEXT: store float [[TMP0]], float* [[ARRAYIDX]], align 4 +; INTR-SCOPE-NEXT: ret void +; entry: %0 = load float, float* %c, align 4 %arrayidx = getelementptr inbounds float, float* %a, i64 5 @@ -39,16 +43,21 @@ ; CHECK-NEXT: store float [[TMP1]], float* [[ARRAYIDX]], align 4 ; CHECK-NEXT: ret void ; -; ASSUME-LABEL: @foo( -; ASSUME-NEXT: entry: -; ASSUME-NEXT: call void @llvm.assume(i1 true) [ "noalias"(float* [[A:%.*]]), "noalias"(float* [[C:%.*]]) ] -; ASSUME-NEXT: [[TMP0:%.*]] = load float, float* [[C]], align 4, !alias.scope !0, !noalias !3 -; ASSUME-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds float, float* [[A]], i64 5 -; ASSUME-NEXT: store float [[TMP0]], float* [[ARRAYIDX_I]], align 4, !alias.scope !3, !noalias !0 -; ASSUME-NEXT: [[TMP1:%.*]] = load float, float* [[C]], align 4 -; ASSUME-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 7 -; ASSUME-NEXT: store float [[TMP1]], float* [[ARRAYIDX]], align 4 -; ASSUME-NEXT: ret void +; INTR-SCOPE-LABEL: define {{[^@]+}}@foo +; INTR-SCOPE-SAME: (float* noalias nocapture [[A:%.*]], float* noalias nocapture readonly [[C:%.*]]) #0 +; INTR-SCOPE-NEXT: entry: +; INTR-SCOPE-NEXT: [[TMP0:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !0) +; INTR-SCOPE-NEXT: [[TMP1:%.*]] = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* [[A]], i8* [[TMP0]], float** null, i64 0, metadata !0) +; INTR-SCOPE-NEXT: [[TMP2:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !3) +; INTR-SCOPE-NEXT: [[TMP3:%.*]] = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* [[C]], i8* [[TMP2]], float** null, i64 0, metadata !3) +; INTR-SCOPE-NEXT: [[TMP4:%.*]] = load float, float* [[TMP3]], align 4, !noalias !5 +; INTR-SCOPE-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 5 +; INTR-SCOPE-NEXT: store float [[TMP4]], float* [[ARRAYIDX_I]], align 4, !noalias !5 +; INTR-SCOPE-NEXT: [[TMP5:%.*]] = load float, float* [[C]], align 4 +; INTR-SCOPE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 7 +; INTR-SCOPE-NEXT: store float [[TMP5]], float* [[ARRAYIDX]], align 4 +; INTR-SCOPE-NEXT: ret void +; entry: tail call void @hello(float* %a, float* %c) %0 = load float, float* %c, align 4 @@ -68,6 +77,16 @@ ; CHECK-NEXT: store float [[TMP0]], float* [[ARRAYIDX1]], align 4 ; CHECK-NEXT: ret void ; +; INTR-SCOPE-LABEL: define {{[^@]+}}@hello2 +; INTR-SCOPE-SAME: (float* noalias nocapture [[A:%.*]], float* noalias nocapture [[B:%.*]], float* nocapture readonly [[C:%.*]]) #0 +; INTR-SCOPE-NEXT: entry: +; INTR-SCOPE-NEXT: [[TMP0:%.*]] = load float, float* [[C]], align 4 +; INTR-SCOPE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 6 +; INTR-SCOPE-NEXT: store float [[TMP0]], float* [[ARRAYIDX]], align 4 +; INTR-SCOPE-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, float* [[B]], i64 8 +; INTR-SCOPE-NEXT: store float [[TMP0]], float* [[ARRAYIDX1]], align 4 +; INTR-SCOPE-NEXT: ret void +; entry: %0 = load float, float* %c, align 4 %arrayidx = getelementptr inbounds float, float* %a, i64 6 @@ -99,6 +118,37 @@ ; CHECK-NEXT: store float [[TMP3]], float* [[ARRAYIDX]], align 4 ; CHECK-NEXT: ret void ; +; INTR-SCOPE-LABEL: define {{[^@]+}}@foo2 +; INTR-SCOPE-SAME: (float* nocapture [[A:%.*]], float* nocapture [[B:%.*]], float* nocapture readonly [[C:%.*]]) #0 +; INTR-SCOPE-NEXT: entry: +; INTR-SCOPE-NEXT: [[TMP0:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !6) +; INTR-SCOPE-NEXT: [[TMP1:%.*]] = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* [[A]], i8* [[TMP0]], float** null, i64 0, metadata !6) +; INTR-SCOPE-NEXT: [[TMP2:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !9) +; INTR-SCOPE-NEXT: [[TMP3:%.*]] = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* [[C]], i8* [[TMP2]], float** null, i64 0, metadata !9) +; INTR-SCOPE-NEXT: [[TMP4:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !11) #3, !noalias !14 +; INTR-SCOPE-NEXT: [[TMP5:%.*]] = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* [[TMP1]], i8* [[TMP4]], float** null, i64 0, metadata !11) #3, !noalias !14 +; INTR-SCOPE-NEXT: [[TMP6:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !15) #3, !noalias !14 +; INTR-SCOPE-NEXT: [[TMP7:%.*]] = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* [[TMP3]], i8* [[TMP6]], float** null, i64 0, metadata !15) #3, !noalias !14 +; INTR-SCOPE-NEXT: [[TMP8:%.*]] = load float, float* [[TMP7]], align 4, !noalias !17 +; INTR-SCOPE-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds float, float* [[TMP5]], i64 5 +; INTR-SCOPE-NEXT: store float [[TMP8]], float* [[ARRAYIDX_I_I]], align 4, !noalias !17 +; INTR-SCOPE-NEXT: [[TMP9:%.*]] = load float, float* [[TMP3]], align 4, !noalias !14 +; INTR-SCOPE-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 7 +; INTR-SCOPE-NEXT: store float [[TMP9]], float* [[ARRAYIDX_I]], align 4, !noalias !14 +; INTR-SCOPE-NEXT: [[TMP10:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !18) +; INTR-SCOPE-NEXT: [[TMP11:%.*]] = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* [[A]], i8* [[TMP10]], float** null, i64 0, metadata !18) +; INTR-SCOPE-NEXT: [[TMP12:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !21) +; INTR-SCOPE-NEXT: [[TMP13:%.*]] = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* [[B]], i8* [[TMP12]], float** null, i64 0, metadata !21) +; INTR-SCOPE-NEXT: [[TMP14:%.*]] = load float, float* [[C]], align 4, !noalias !23 +; INTR-SCOPE-NEXT: [[ARRAYIDX_I1:%.*]] = getelementptr inbounds float, float* [[TMP11]], i64 6 +; INTR-SCOPE-NEXT: store float [[TMP14]], float* [[ARRAYIDX_I1]], align 4, !noalias !23 +; INTR-SCOPE-NEXT: [[ARRAYIDX1_I:%.*]] = getelementptr inbounds float, float* [[TMP13]], i64 8 +; INTR-SCOPE-NEXT: store float [[TMP14]], float* [[ARRAYIDX1_I]], align 4, !noalias !23 +; INTR-SCOPE-NEXT: [[TMP15:%.*]] = load float, float* [[C]], align 4 +; INTR-SCOPE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 7 +; INTR-SCOPE-NEXT: store float [[TMP15]], float* [[ARRAYIDX]], align 4 +; INTR-SCOPE-NEXT: ret void +; entry: tail call void @foo(float* %a, float* %c) tail call void @hello2(float* %a, float* %b, float* %c) @@ -108,6 +158,8 @@ ret void } +; FIXME: NO_ASSUME and CHECK produce the same metadata at this moment ? + ; NO_ASSUME: !0 = !{!1} ; NO_ASSUME: !1 = distinct !{!1, !2, !"hello: %c"} ; NO_ASSUME: !2 = distinct !{!2, !"hello"} @@ -130,5 +182,30 @@ ; NO_ASSUME: !19 = !{!16} ; NO_ASSUME: !20 = !{!18} -attributes #0 = { nounwind uwtable } +; INTR-SCOPE: !0 = !{!1} +; INTR-SCOPE: !1 = distinct !{!1, !2, !"hello: %a"} +; INTR-SCOPE: !2 = distinct !{!2, !"hello"} +; INTR-SCOPE: !3 = !{!4} +; INTR-SCOPE: !4 = distinct !{!4, !2, !"hello: %c"} +; INTR-SCOPE: !5 = !{!1, !4} +; INTR-SCOPE: !6 = !{!7} +; INTR-SCOPE: !7 = distinct !{!7, !8, !"foo: %a"} +; INTR-SCOPE: !8 = distinct !{!8, !"foo"} +; INTR-SCOPE: !9 = !{!10} +; INTR-SCOPE: !10 = distinct !{!10, !8, !"foo: %c"} +; INTR-SCOPE: !11 = !{!12} +; INTR-SCOPE: !12 = distinct !{!12, !13, !"hello: %a"} +; INTR-SCOPE: !13 = distinct !{!13, !"hello"} +; INTR-SCOPE: !14 = !{!7, !10} +; INTR-SCOPE: !15 = !{!16} +; INTR-SCOPE: !16 = distinct !{!16, !13, !"hello: %c"} +; INTR-SCOPE: !17 = !{!12, !16, !7, !10} +; INTR-SCOPE: !18 = !{!19} +; INTR-SCOPE: !19 = distinct !{!19, !20, !"hello2: %a"} +; INTR-SCOPE: !20 = distinct !{!20, !"hello2"} +; INTR-SCOPE: !21 = !{!22} +; INTR-SCOPE: !22 = distinct !{!22, !20, !"hello2: %b"} +; INTR-SCOPE: !23 = !{!19, !22} + +attributes #0 = { nounwind uwtable } Index: llvm/test/Transforms/Inline/noalias_recursive.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Inline/noalias_recursive.ll @@ -0,0 +1,113 @@ +; RUN: opt -inline -enable-noalias-to-md-conversion -use-noalias-intrinsic-during-inlining=1 -S < %s | FileCheck %s + +%class.ah = type { [8 x i8] } + +; Test for self recursion: + +; Function Attrs: nounwind uwtable +define void @Test01(%class.ah* noalias sret align 8 %agg.result, i32 %n) local_unnamed_addr #0 !noalias !1 { +entry: + %switch = icmp eq i32 %n, 0 + br i1 %switch, label %sw.bb, label %sw.bb1 + +sw.bb: ; preds = %entry + %0 = getelementptr inbounds %class.ah, %class.ah* %agg.result, i64 0, i32 0, i64 0 + store i8 42, i8* %0, !noalias !1 + ret void + +sw.bb1: ; preds = %entry + call void @Test01(%class.ah* nonnull sret align 8 %agg.result, i32 0), !noalias !1 + ret void +} + +; CHECK-LABEL: define void @Test01(%class.ah* noalias sret align 8 %agg.result, i32 %n) local_unnamed_addr #0 !noalias !1 { +; CHECK: sw.bb1: +; CHECK-NEXT: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0s_class.ahs.i64(%class.ah** null, i64 0, metadata !4) +; CHECK-NEXT: %2 = call %class.ah* @llvm.noalias.p0s_class.ahs.p0i8.p0p0s_class.ahs.i64(%class.ah* %agg.result, i8* %1, %class.ah** null, i64 0, metadata !4), !noalias !1 +; CHECK-NEXT: %3 = getelementptr inbounds %class.ah, %class.ah* %2, i64 0, i32 0, i64 0 +; CHECK-NEXT: store i8 42, i8* %3, align 1, !noalias !7 +; CHECK-NEXT: ret void + + +; And equivalent version, but without the selfrecursion: + +; Function Attrs: nounwind uwtable +declare void @Test02c(%class.ah* noalias sret align 8 %agg.result, i32 %n) local_unnamed_addr #0 + +; Function Attrs: nounwind uwtable +define void @Test02b(%class.ah* noalias sret align 8 %agg.result, i32 %n) local_unnamed_addr #0 !noalias !4 { +entry: + %switch = icmp eq i32 %n, 0 + br i1 %switch, label %sw.bb, label %sw.bb1 + +sw.bb: ; preds = %entry + %0 = getelementptr inbounds %class.ah, %class.ah* %agg.result, i64 0, i32 0, i64 0 + store i8 42, i8* %0, !noalias !4 + ret void + +sw.bb1: ; preds = %entry + call void @Test02c(%class.ah* nonnull sret align 8 %agg.result, i32 0), !noalias !4 + ret void +} + +; CHECK-LABEL: define void @Test02b(%class.ah* noalias sret align 8 %agg.result, i32 %n) local_unnamed_addr #0 !noalias !10 + +; Function Attrs: nounwind uwtable +define void @Test02a(%class.ah* noalias sret align 8 %agg.result, i32 %n) local_unnamed_addr #0 !noalias !7 { +entry: + %switch = icmp eq i32 %n, 0 + br i1 %switch, label %sw.bb, label %sw.bb1 + +sw.bb: ; preds = %entry + %0 = getelementptr inbounds %class.ah, %class.ah* %agg.result, i64 0, i32 0, i64 0 + store i8 42, i8* %0, !noalias !7 + ret void + +sw.bb1: ; preds = %entry + call void @Test02b(%class.ah* nonnull sret align 8 %agg.result, i32 0), !noalias !7 + ret void +} + +; CHECK-LABEL: define void @Test02a(%class.ah* noalias sret align 8 %agg.result, i32 %n) local_unnamed_addr #0 !noalias !13 +; CHECK: sw.bb1: +; CHECK-NEXT: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0s_class.ahs.i64(%class.ah** null, i64 0, metadata !16) +; CHECK-NEXT: %2 = call %class.ah* @llvm.noalias.p0s_class.ahs.p0i8.p0p0s_class.ahs.i64(%class.ah* %agg.result, i8* %1, %class.ah** null, i64 0, metadata !16), !noalias !13 +; CHECK-NEXT: %3 = getelementptr inbounds %class.ah, %class.ah* %2, i64 0, i32 0, i64 0 +; CHECK-NEXT: store i8 42, i8* %3, align 1, !noalias !19 +; CHECK-NEXT: ret void + +attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.ident = !{!0} + +!0 = !{!"clang"} +!1 = !{!2} +!2 = distinct !{!2, !3, !"Test01: unknown function scope"} +!3 = distinct !{!3, !"Test01"} +!4 = !{!5} +!5 = distinct !{!5, !6, !"Test02b: unknown function scope"} +!6 = distinct !{!6, !"Test02b"} +!7 = !{!8} +!8 = distinct !{!8, !9, !"Test02a: unknown function scope"} +!9 = distinct !{!9, !"Test02a"} + +; CHECK: !0 = !{!"clang"} +; CHECK-NEXT: !1 = !{!2} +; CHECK-NEXT: !2 = distinct !{!2, !3, !"Test01: unknown function scope"} +; CHECK-NEXT: !3 = distinct !{!3, !"Test01"} +; CHECK-NEXT: !4 = !{!5} +; CHECK-NEXT: !5 = distinct !{!5, !6, !"Test01: %agg.result"} +; CHECK-NEXT: !6 = distinct !{!6, !"Test01"} +; CHECK-NEXT: !7 = !{!8, !2, !5} +; CHECK-NEXT: !8 = distinct !{!8, !9, !"Test01: unknown function scope"} +; CHECK-NEXT: !9 = distinct !{!9, !"Test01"} +; CHECK-NEXT: !10 = !{!11} +; CHECK-NEXT: !11 = distinct !{!11, !12, !"Test02b: unknown function scope"} +; CHECK-NEXT: !12 = distinct !{!12, !"Test02b"} +; CHECK-NEXT: !13 = !{!14} +; CHECK-NEXT: !14 = distinct !{!14, !15, !"Test02a: unknown function scope"} +; CHECK-NEXT: !15 = distinct !{!15, !"Test02a"} +; CHECK-NEXT: !16 = !{!17} +; CHECK-NEXT: !17 = distinct !{!17, !18, !"Test02b: %agg.result"} +; CHECK-NEXT: !18 = distinct !{!18, !"Test02b"} +; CHECK-NEXT: !19 = !{!14, !17}