Index: llvm/include/llvm/Transforms/Utils/NoAliasUtils.h =================================================================== --- /dev/null +++ llvm/include/llvm/Transforms/Utils/NoAliasUtils.h @@ -0,0 +1,25 @@ +//===- llvm/Transforms/Utils/NoAliasUtils.h - NoAlias utilities -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines utilities for noalias metadata and intrinsics. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_NOALIASUTILS_H +#define LLVM_TRANSFORMS_UTILS_NOALIASUTILS_H + +namespace llvm { +class Function; + +/// Connect llvm.noalias.decl to noalias/side.noalias intrinsics that are +/// associated with the unknown function scope and based on the same alloca. +/// At the same time, propagate the p.addr, p.objId and p.scope. +bool propagateAndConnectNoAliasDecl(Function *F); +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_UTILS_NOALIASUTILS_H Index: llvm/lib/Transforms/Utils/CMakeLists.txt =================================================================== --- llvm/lib/Transforms/Utils/CMakeLists.txt +++ llvm/lib/Transforms/Utils/CMakeLists.txt @@ -43,6 +43,7 @@ MisExpect.cpp ModuleUtils.cpp NameAnonGlobals.cpp + NoAliasUtils.cpp PredicateInfo.cpp PromoteMemoryToRegister.cpp StripGCRelocates.cpp Index: llvm/lib/Transforms/Utils/CloneFunction.cpp =================================================================== --- llvm/lib/Transforms/Utils/CloneFunction.cpp +++ llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -36,6 +36,21 @@ #include using namespace llvm; +static void PropagateNoAliasSideChannelInfo(Instruction *To, + const Instruction *From) { + // The noalias_sidechannel is not automatically copied over in a 'clone()' + // Let's do it here. + if (auto *LI = dyn_cast(From)) { + if (LI->hasNoaliasSideChannelOperand()) + cast(To)->setNoaliasSideChannelOperand( + LI->getNoaliasSideChannelOperand()); + } else if (auto SI = dyn_cast(From)) { + if (SI->hasNoaliasSideChannelOperand()) + cast(To)->setNoaliasSideChannelOperand( + SI->getNoaliasSideChannelOperand()); + } +} + /// See comments in Cloning.h. BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix, Function *F, @@ -55,6 +70,8 @@ DIFinder->processInstruction(*TheModule, I); Instruction *NewInst = I.clone(); + PropagateNoAliasSideChannelInfo(NewInst, &I); + if (I.hasName()) NewInst->setName(I.getName() + NameSuffix); NewBB->getInstList().push_back(NewInst); @@ -334,6 +351,7 @@ II != IE; ++II) { Instruction *NewInst = II->clone(); + PropagateNoAliasSideChannelInfo(NewInst, &*II); // Eagerly remap operands to the newly cloned instruction, except for PHI // nodes for which we defer processing until we update the CFG. @@ -861,6 +879,7 @@ // terminator gets replaced and StopAt == BB's terminator. for (; StopAt != &*BI && BB->getTerminator() != &*BI; ++BI) { Instruction *New = BI->clone(); + PropagateNoAliasSideChannelInfo(New, &*BI); New->setName(BI->getName()); New->insertBefore(NewTerm); ValueMapping[&*BI] = New; Index: llvm/lib/Transforms/Utils/InlineFunction.cpp =================================================================== --- llvm/lib/Transforms/Utils/InlineFunction.cpp +++ llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -28,7 +28,6 @@ #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ProfileSummaryInfo.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/Argument.h" @@ -45,6 +44,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -61,6 +61,8 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/NoAliasUtils.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include #include @@ -79,10 +81,10 @@ cl::Hidden, cl::desc("Convert noalias attributes to metadata during inlining.")); -static cl::opt -UseNoAliasIntrinsic("use-noalias-intrinsic-during-inlining", cl::Hidden, - cl::init(true), - cl::desc("Use the llvm.noalias intrinsic during inlining.")); +static cl::opt UseNoAliasIntrinsic( + "use-noalias-intrinsic-during-inlining", cl::Hidden, cl::ZeroOrMore, + cl::init(false), + cl::desc("Use the llvm.noalias intrinsic during inlining.")); static cl::opt PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining", @@ -815,7 +817,8 @@ /// not be differentiated (and this would lead to miscompiles because the /// non-aliasing property communicated by the metadata could have /// call-site-specific control dependencies). -static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) { +static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap, + Function *Caller) { const Function *CalledFunc = CS.getCalledFunction(); SetVector MD; @@ -824,6 +827,32 @@ // inter-procedural alias analysis passes. We can revisit this if it becomes // an efficiency or overhead problem. + // Track function level !noalias metadata ('unknown function' scope). This + // should be merged with the data from the callee + MDNode *CallerNoAlias = Caller->getMetadata("noalias"); + MDNode *CalleeNoAlias = CalledFunc->getMetadata("noalias"); + llvm::MDNode *NewUnknownScope = nullptr; + + if ((CalleeNoAlias != nullptr) && (CallerNoAlias == nullptr)) { + // NOTE: keep in sync with (clang) CGExpr: EmitLoadOfScalar + // NOTE: keep in sync with (clang) CGDecl: EmitAutoVarNoAlias/EmitNoAliasDecl + // NOTE: keep in sync with (llvm) InlineFunction: CloneAliasScopeMetadata + llvm::MDBuilder MDB(Caller->getContext()); + std::string Name = Caller->getName(); + auto NoAliasDomain = MDB.createAnonymousAliasScopeDomain(Name); + Name += ": unknown scope"; + + llvm::MDNode *UnknownScope = + MDB.createAnonymousAliasScope(NoAliasDomain, Name); + + { + SmallVector ScopeListEntries(1, UnknownScope); + CallerNoAlias = llvm::MDNode::get(Caller->getContext(), ScopeListEntries); + Caller->setMetadata("noalias", CallerNoAlias); + } + NewUnknownScope = UnknownScope; + } + for (const BasicBlock &I : *CalledFunc) for (const Instruction &J : I) { if (const MDNode *M = J.getMetadata(LLVMContext::MD_alias_scope)) @@ -832,15 +861,35 @@ MD.insert(M); // We also need to clone the metadata in noalias intrinsics. - if (const auto *II = dyn_cast(&J)) + if (const auto *II = dyn_cast(&J)) { if (II->getIntrinsicID() == Intrinsic::noalias) - if (const auto *M = dyn_cast(cast( - II->getOperand(1))->getMetadata())) + if (const auto *M = dyn_cast( + cast( + II->getOperand(Intrinsic::NoAliasScopeArg)) + ->getMetadata())) + MD.insert(M); + if (II->getIntrinsicID() == Intrinsic::side_noalias) + if (const auto *M = dyn_cast( + cast( + II->getOperand(Intrinsic::SideNoAliasScopeArg)) + ->getMetadata())) + MD.insert(M); + if (II->getIntrinsicID() == Intrinsic::noalias_decl) + if (const auto *M = dyn_cast( + cast( + II->getOperand(Intrinsic::NoAliasDeclScopeArg)) + ->getMetadata())) MD.insert(M); + if (II->getIntrinsicID() == Intrinsic::noalias_copy_guard) + if (const auto *M = dyn_cast( + cast( + II->getOperand(Intrinsic::NoAliasCopyGuardScopeArg)) + ->getMetadata())) + MD.insert(M); + } } - if (MD.empty()) - return; + // No early exit: make sure all memory instructions are annotated // Walk the existing metadata, adding the complete (perhaps cyclic) chain to // the set. @@ -857,6 +906,13 @@ // the noalias scopes and the lists of those scopes. SmallVector DummyNodes; DenseMap MDMap; + if (CalleeNoAlias != nullptr) { + // Map CalleeNoAlias onto CallerNoAlias + MD.remove(CalleeNoAlias); + DummyNodes.push_back(MDTuple::getTemporary(CalledFunc->getContext(), None)); + MDMap[CalleeNoAlias].reset(DummyNodes.back().get()); + cast(MDMap[CalleeNoAlias])->replaceAllUsesWith(CallerNoAlias); + } for (const MDNode *I : MD) { DummyNodes.push_back(MDTuple::getTemporary(CalledFunc->getContext(), None)); MDMap[I].reset(DummyNodes.back().get()); @@ -884,6 +940,7 @@ // Now replace the metadata in the new inlined instructions with the // repacements from the map. + SmallPtrSet HandledInstructions; for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); VMI != VMIE; ++VMI) { if (!VMI->second) @@ -893,6 +950,10 @@ if (!NI) continue; + // Check if we already adapted this instruction + if (!HandledInstructions.insert(NI).second) + continue; + if (MDNode *M = NI->getMetadata(LLVMContext::MD_alias_scope)) { MDNode *NewMD = MDMap[M]; // If the call site also had alias scope metadata (a list of scopes to @@ -923,17 +984,60 @@ } // Update the metadata referenced by a noalias intrinsic - if (auto *II = dyn_cast(NI)) - if (II->getIntrinsicID() == Intrinsic::noalias) - if (auto *M = dyn_cast(cast( - II->getOperand(1))->getMetadata())) { + if (auto *II = dyn_cast(NI)) { + auto ID = II->getIntrinsicID(); + if (ID == Intrinsic::noalias || ID == Intrinsic::side_noalias || + ID == Intrinsic::noalias_decl || + ID == Intrinsic::noalias_copy_guard) { + int NoAliasScope = 0; + if (ID == Intrinsic::noalias) + NoAliasScope = Intrinsic::NoAliasScopeArg; + if (ID == Intrinsic::side_noalias) + NoAliasScope = Intrinsic::SideNoAliasScopeArg; + if (ID == Intrinsic::noalias_decl) + NoAliasScope = Intrinsic::NoAliasDeclScopeArg; + if (ID == Intrinsic::noalias_copy_guard) + NoAliasScope = Intrinsic::NoAliasCopyGuardScopeArg; + + if (auto *M = dyn_cast( + cast(II->getOperand(NoAliasScope)) + ->getMetadata())) { // If the metadata is not in the map, it could be a new intrinsic // that was just added. auto MI = MDMap.find(M); if (MI != MDMap.end()) - II->setOperand(1, MetadataAsValue::get(CalledFunc->getContext(), - MI->second)); + II->setOperand( + NoAliasScope, + MetadataAsValue::get(CalledFunc->getContext(), MI->second)); } + } + } + } + + if (NewUnknownScope) { + // We now need to add the out-of-function scope to _all_ instructions with + // noalias data in the 'caller' + // Note: following strange choice of variables names is similar to how it is + // done later + // FIXME: hmm this might be less than fast :( + // hmm it is also needed to do this _after_ the metadata cloning, otherwise + // we seem to lose information ! + for (BasicBlock &I : *Caller) { + for (Instruction &J : I) { + if (const MDNode *M = J.getMetadata(LLVMContext::MD_noalias)) { + SmallVector NewScopeList; + for (auto &MDOp : M->operands()) { + NewScopeList.push_back(MDOp); + } + NewScopeList.push_back(NewUnknownScope); + J.setMetadata(LLVMContext::MD_noalias, + MDNode::get(Caller->getContext(), NewScopeList)); + } else if (J.mayReadOrWriteMemory()) { + // no Noalias, but we need to add the (new) 'unknown scope' ! + J.setMetadata(LLVMContext::MD_noalias, CallerNoAlias); + } + } + } } } @@ -967,6 +1071,12 @@ // For each noalias argument, add a noalias intrinsic call, and update the // value map to refer to the new result of the noalias call. for (const Argument *A : NoAliasArgs) { + Value *MappedA = VMap[A]; + if (isa(MappedA)) { + // Skip generating restrict intrinsics for known 'null' pointers + continue; + } + std::string Name = CalledFunc->getName(); if (A->hasName()) { Name += ": %"; @@ -976,16 +1086,20 @@ Name += utostr(A->getArgNo()); } - MDNode *AScope = - MDB.createAnonymousAliasScope(NewDomain, Name); + MDNode *AScope = MDB.createAnonymousAliasScope(NewDomain, Name); Scopes.push_back(AScope); - MDNode *AScopeList = - MDNode::get(CalledFunc->getContext(), AScope); - - Value *NA = - IRBuilder<>(CS.getInstruction()).CreateNoAliasPointer(VMap[A], - AScopeList); + MDNode *AScopeList = MDNode::get(CalledFunc->getContext(), AScope); + + // The alloca was optimized away -> use a nullptr + auto *IdentifyPAlloca = + ConstantPointerNull::get(MappedA->getType()->getPointerTo()); + auto *NoAliasDecl = + IRBuilder<>(CS.getInstruction()) + .CreateNoAliasDeclaration(IdentifyPAlloca, AScopeList); + Value *NA = IRBuilder<>(CS.getInstruction()) + .CreateNoAliasPointer(MappedA, NoAliasDecl, IdentifyPAlloca, + AScopeList); VMap[A] = NA; } @@ -1903,7 +2017,7 @@ CalledFunc->getSubprogram() != nullptr); // Clone existing noalias metadata if necessary. - CloneAliasScopeMetadata(CS, VMap); + CloneAliasScopeMetadata(CS, VMap, Caller); // Add noalias metadata if necessary. AddAliasScopeMetadata(CS, VMap, DL, CalleeAAR); @@ -2361,6 +2475,9 @@ // Since we are now done with the return instruction, delete it also. Returns[0]->eraseFromParent(); + // Already try to connect llvm.noalias.decl where possible + propagateAndConnectNoAliasDecl(Caller); + // We are now done with the inlining. return true; } @@ -2511,6 +2628,9 @@ // Now we can remove the CalleeEntry block, which is now empty. Caller->getBasicBlockList().erase(CalleeEntry); + // Already try to connect llvm.noalias.decl where possible + propagateAndConnectNoAliasDecl(Caller); + // If we inserted a phi node, check to see if it has a single value (e.g. all // the entries are the same or undef). If so, remove the PHI so it doesn't // block other optimizations. Index: llvm/lib/Transforms/Utils/NoAliasUtils.cpp =================================================================== --- /dev/null +++ llvm/lib/Transforms/Utils/NoAliasUtils.cpp @@ -0,0 +1,161 @@ +//===-- NoAliasUtils.cpp - NoAlias Utility functions ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines common noalias metadatt and intrinsic utility functions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/NoAliasUtils.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "noalias-utils" + +bool llvm::propagateAndConnectNoAliasDecl(Function *F) { + auto *UnknownFunctionScope = F->getMetadata("noalias"); + if (UnknownFunctionScope == nullptr) + return false; + + SmallVector InterestingNoalias; + SmallMapVector KnownAllocaNoAliasDecl; + + auto TrackIfIsUnknownFunctionScope = [&](IntrinsicInst *I, unsigned Index) { + auto V = I->getOperand(Index); + if (cast(V)->getMetadata() == UnknownFunctionScope) { + InterestingNoalias.push_back(I); + } + }; + + for (Instruction &I : llvm::instructions(*F)) { + if (IntrinsicInst *II = dyn_cast(&I)) { + switch (II->getIntrinsicID()) { + case Intrinsic::noalias: { + TrackIfIsUnknownFunctionScope(II, Intrinsic::NoAliasScopeArg); + break; + } + case Intrinsic::side_noalias: { + TrackIfIsUnknownFunctionScope(II, Intrinsic::SideNoAliasScopeArg); + break; + } + case Intrinsic::noalias_copy_guard: { + TrackIfIsUnknownFunctionScope(II, Intrinsic::NoAliasCopyGuardScopeArg); + break; + } + case Intrinsic::noalias_decl: { + auto *depAlloca = dyn_cast(II->getOperand(0)); + if (depAlloca) { + KnownAllocaNoAliasDecl[depAlloca] = II; + } + break; + } + default: + break; + } + } + } + + if (KnownAllocaNoAliasDecl.empty() || InterestingNoalias.empty()) + return false; + + bool Changed = false; + for (auto *II : InterestingNoalias) { + SmallVector UO; + unsigned Index = + (II->getIntrinsicID() == Intrinsic::noalias + ? 0 + : (II->getIntrinsicID() == Intrinsic::side_noalias ? 1 : 2)); + const int IdentifyPArg[] = {Intrinsic::NoAliasIdentifyPArg, + Intrinsic::SideNoAliasIdentifyPArg, + Intrinsic::NoAliasCopyGuardIdentifyPBaseObject}; + const int ScopeArg[] = {Intrinsic::NoAliasScopeArg, + Intrinsic::SideNoAliasScopeArg, + Intrinsic::NoAliasCopyGuardScopeArg}; + const int NoAliasDeclArg[] = {Intrinsic::NoAliasNoAliasDeclArg, + Intrinsic::SideNoAliasNoAliasDeclArg, + Intrinsic::NoAliasCopyGuardNoAliasDeclArg}; + const int ObjIdArg[] = {Intrinsic::NoAliasIdentifyPObjIdArg, + Intrinsic::SideNoAliasIdentifyPObjIdArg, -1}; + + llvm::GetUnderlyingObjects(II->getOperand(IdentifyPArg[Index]), UO, + F->getParent()->getDataLayout()); + if (UO.size() != 1) { + // Multiple objects possible - It would be nice to propagate, but we do + // not do it yet. That is ok as the unknown function scope assumes more + // aliasing. + LLVM_DEBUG(llvm::dbgs() + << "WARNING: no llvm.noalias.decl reconnect accross " + "PHI/select - YET (" + << UO.size() << " underlying objects)\n"); + continue; + } + + auto UA = dyn_cast(UO[0]); + if (UA) { + auto it = KnownAllocaNoAliasDecl.find(UA); + if (it != KnownAllocaNoAliasDecl.end()) { + Instruction *Decl = it->second; + // found a simple matching declaration - propagate + II->setOperand(ScopeArg[Index], + Decl->getOperand(Intrinsic::NoAliasDeclScopeArg)); + II->setOperand(NoAliasDeclArg[Index], Decl); + + auto ObjIdIndex = ObjIdArg[Index]; + if (ObjIdIndex != -1) { + II->setOperand(ObjIdIndex, + Decl->getOperand(Intrinsic::NoAliasDeclObjIdArg)); + } + Changed = true; + } else if (UnknownFunctionScope && isa(UA)) { + if (cast(II->getOperand(ScopeArg[Index])) + ->getMetadata() == UnknownFunctionScope) { + // we have an alloca, but no llvm.noalias.decl and we have unknown + // function scope This is an indication of a temporary that (through a + // pointer or reference to a restrict pointer) introduces restrict. + // - the unknown scope is too broad for these cases + // - conceptually, the scope should be the lifetime of the local, but + // we don't have that information + // - the real restrictness should have been brought in through the + // 'depends on' relationship + // -> so we fall back on the 'depends on' and remove the restrictness + // information at this level. + LLVM_DEBUG( + llvm::dbgs() + << "- Temporary noalias object (without llvm.noalias.decl) " + "detected. Ignore restrictness: " + << *II << "\n"); + II->replaceAllUsesWith(II->getOperand(0)); + II->eraseFromParent(); + Changed = true; + } + } + } else { +#if !defined(NDEBUG) + if (isa(UO[0]) || isa(UO[0])) { + // Multiple objects possible - It would be nice to propagate, but we do + // not do it yet. That is ok as the unknown function scope assumes more + // aliasing. + LLVM_DEBUG(llvm::dbgs() + << "WARNING: no llvm.noalias.decl reconnect accross " + "PHI/select - YET: " + << *UO[0] << "\n"); + } +#endif + } + } + return Changed; +} Index: llvm/test/Transforms/Inline/noalias-calls.ll =================================================================== --- llvm/test/Transforms/Inline/noalias-calls.ll +++ llvm/test/Transforms/Inline/noalias-calls.ll @@ -1,5 +1,5 @@ ; RUN: opt -basicaa -inline -enable-noalias-to-md-conversion -use-noalias-intrinsic-during-inlining=0 -S < %s | FileCheck %s -check-prefix=MD-SCOPE -; RUN: opt -inline -enable-noalias-to-md-conversion -S < %s | FileCheck %s -check-prefix=INTR-SCOPE +; RUN: opt -inline -enable-noalias-to-md-conversion -use-noalias-intrinsic-during-inlining=1 -S < %s | FileCheck %s -check-prefix=INTR-SCOPE target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -22,27 +22,30 @@ tail call void @hello(i8* %a, i8* %c, i8* %b) ret void } - ; MD-SCOPE: define void @foo(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #1 { ; MD-SCOPE: entry: -; MD-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 16, i32 16, i1 false) #1, !noalias !0 -; MD-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %c, i64 16, i32 16, i1 false) #1, !noalias !3 -; MD-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %c, i64 16, i32 16, i1 false) #1, !alias.scope !5 +; MD-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %a, i8* align 16 %b, i64 16, i1 false) #1, !noalias !0 +; MD-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %b, i8* align 16 %c, i64 16, i1 false) #1, !noalias !3 +; MD-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %a, i8* align 16 %c, i64 16, i1 false) #1, !alias.scope !5 ; MD-SCOPE: call void @hey() #1, !noalias !5 -; MD-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.*}}, i8* %c, i64 16, i32 16, i1 false) #1, !noalias !3 +; MD-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %{{.*}}, i8* align 16 %c, i64 16, i1 false) #1, !noalias !3 ; MD-SCOPE: ret void ; MD-SCOPE: } ; INTR-SCOPE: define void @foo(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #1 { ; INTR-SCOPE: entry: ; INTR-SCOPE: %l.i = alloca i8, i32 512, align 1 -; INTR-SCOPE: %0 = call i8* @llvm.noalias.p0i8(i8* %a, metadata !0) -; INTR-SCOPE: %1 = call i8* @llvm.noalias.p0i8(i8* %c, metadata !3) -; INTR-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %b, i64 16, i32 16, i1 false) #1, !noalias !5 -; INTR-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %1, i64 16, i32 16, i1 false) #1, !noalias !5 -; INTR-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 16, i1 false) #1, !noalias !5 +; INTR-SCOPE: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i8.i64(i8** null, i64 0, metadata !0) +; INTR-SCOPE: %1 = call i8* @llvm.noalias.p0i8.p0i8.p0p0i8.i64(i8* %a, i8* %0, i8** null, i64 0, metadata !0) +; INTR-SCOPE: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i8.i64(i8** null, i64 0, metadata !3) +; INTR-SCOPE: %3 = call i8* @llvm.noalias.p0i8.p0i8.p0p0i8.i64(i8* %c, i8* %2, i8** null, i64 0, metadata !3) +; INTR-SCOPE: call void @llvm.lifetime.start.p0i8(i64 512, i8* %l.i) +; INTR-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %1, i8* align 16 %b, i64 16, i1 false) #1, !noalias !5 +; INTR-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %b, i8* align 16 %3, i64 16, i1 false) #1, !noalias !5 +; INTR-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %1, i8* align 16 %3, i64 16, i1 false) #1, !noalias !5 ; INTR-SCOPE: call void @hey() #1, !noalias !5 -; INTR-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %l.i, i8* %1, i64 16, i32 16, i1 false) #1, !noalias !5 +; INTR-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %l.i, i8* align 16 %3, i64 16, i1 false) #1, !noalias !5 +; INTR-SCOPE: call void @llvm.lifetime.end.p0i8(i64 512, i8* %l.i) ; INTR-SCOPE: ret void ; INTR-SCOPE: } @@ -55,7 +58,7 @@ ; MD-SCOPE: !3 = !{!4} ; MD-SCOPE: !4 = distinct !{!4, !2, !"hello: %a"} ; MD-SCOPE: !5 = !{!4, !1} - + ; INTR-SCOPE: !0 = !{!1} ; INTR-SCOPE: !1 = distinct !{!1, !2, !"hello: %a"} ; INTR-SCOPE: !2 = distinct !{!2, !"hello"} Index: llvm/test/Transforms/Inline/noalias-scopes.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Inline/noalias-scopes.ll @@ -0,0 +1,204 @@ +; RUN: opt -inline -enable-noalias-to-md-conversion -use-noalias-intrinsic-during-inlining=1 -S < %s | FileCheck %s -check-prefix=INTR-SCOPE +; verify that inlining result in scope duplication +; verify that llvm.noalias.decl is introduced at the location of the inlining + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nofree norecurse nounwind +define dso_local void @copy_npnp(i32* noalias nocapture %dst, i32* noalias nocapture readonly %src) local_unnamed_addr #0 { +entry: + %0 = load i32, i32* %src, noalias_sidechannel i32* undef, align 4, !tbaa !2 + store i32 %0, i32* %dst, noalias_sidechannel i32* undef, align 4, !tbaa !2 + ret void +} + +; Function Attrs: nounwind +define dso_local void @copy_rprp(i32* nocapture %dst, i32* nocapture readonly %src) local_unnamed_addr #1 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !6) + %1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !9) + %2 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %src, i8* %1, i32** null, i32** undef, i64 0, metadata !9), !tbaa !11, !noalias !13 + %3 = load i32, i32* %src, noalias_sidechannel i32* %2, align 4, !tbaa !2, !noalias !13 + %4 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %dst, i8* %0, i32** null, i32** undef, i64 0, metadata !6), !tbaa !11, !noalias !13 + store i32 %3, i32* %dst, noalias_sidechannel i32* %4, align 4, !tbaa !2, !noalias !13 + ret void +} + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32**, i64, metadata) #2 + +; Function Attrs: nofree norecurse nounwind +define dso_local void @test_npnp(i32* nocapture %dst, i32* nocapture readonly %src, i32 %n) local_unnamed_addr #0 { +entry: + tail call void @copy_npnp(i32* %dst, i32* %src) + br label %do.body + +do.body: ; preds = %do.body, %entry + %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.body ] + tail call void @copy_npnp(i32* %dst, i32* %src) + tail call void @copy_npnp(i32* %dst, i32* %src) + %dec = add nsw i32 %n.addr.0, -1 + %tobool = icmp eq i32 %n.addr.0, 0 + br i1 %tobool, label %do.end, label %do.body + +do.end: ; preds = %do.body + ret void +} + +; INTR-SCOPE: define dso_local void @test_npnp(i32* nocapture %dst, i32* nocapture readonly %src, i32 %n) local_unnamed_addr #0 { +; INTR-SCOPE: entry: +; INTR-SCOPE: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !14) +; INTR-SCOPE: %1 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %dst, i8* %0, i32** null, i64 0, metadata !14) +; INTR-SCOPE: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !17) +; INTR-SCOPE: %3 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %src, i8* %2, i32** null, i64 0, metadata !17) +; INTR-SCOPE: %4 = load i32, i32* %3, noalias_sidechannel i32* undef, align 4, !tbaa !2, !noalias !19 +; INTR-SCOPE: store i32 %4, i32* %1, noalias_sidechannel i32* undef, align 4, !tbaa !2, !noalias !19 +; INTR-SCOPE: br label %do.body +; INTR-SCOPE: do.body: ; preds = %do.body, %entry +; INTR-SCOPE: %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.body ] +; INTR-SCOPE: %5 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !20) +; INTR-SCOPE: %6 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %dst, i8* %5, i32** null, i64 0, metadata !20) +; INTR-SCOPE: %7 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !23) +; INTR-SCOPE: %8 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %src, i8* %7, i32** null, i64 0, metadata !23) +; INTR-SCOPE: %9 = load i32, i32* %8, noalias_sidechannel i32* undef, align 4, !tbaa !2, !noalias !25 +; INTR-SCOPE: store i32 %9, i32* %6, noalias_sidechannel i32* undef, align 4, !tbaa !2, !noalias !25 +; INTR-SCOPE: %10 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !26) +; INTR-SCOPE: %11 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %dst, i8* %10, i32** null, i64 0, metadata !26) +; INTR-SCOPE: %12 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !29) +; INTR-SCOPE: %13 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %src, i8* %12, i32** null, i64 0, metadata !29) +; INTR-SCOPE: %14 = load i32, i32* %13, noalias_sidechannel i32* undef, align 4, !tbaa !2, !noalias !31 +; INTR-SCOPE: store i32 %14, i32* %11, noalias_sidechannel i32* undef, align 4, !tbaa !2, !noalias !31 +; INTR-SCOPE: %dec = add nsw i32 %n.addr.0, -1 +; INTR-SCOPE: %tobool = icmp eq i32 %n.addr.0, 0 +; INTR-SCOPE: br i1 %tobool, label %do.end, label %do.body +; INTR-SCOPE: do.end: ; preds = %do.body +; INTR-SCOPE: ret void +; INTR-SCOPE: } + + +; Function Attrs: nounwind +define dso_local void @test_rprp(i32* nocapture %dst, i32* nocapture readonly %src, i32 %n) local_unnamed_addr #1 { +entry: + tail call void @copy_rprp(i32* %dst, i32* %src) + br label %do.body + +do.body: ; preds = %do.body, %entry + %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.body ] + tail call void @copy_rprp(i32* %dst, i32* %src) + tail call void @copy_rprp(i32* %dst, i32* %src) + %dec = add nsw i32 %n.addr.0, -1 + %tobool = icmp eq i32 %n.addr.0, 0 + br i1 %tobool, label %do.end, label %do.body + +do.end: ; preds = %do.body + ret void +} + +; INTR-SCOPE: define dso_local void @test_rprp(i32* nocapture %dst, i32* nocapture readonly %src, i32 %n) local_unnamed_addr #1 { +; INTR-SCOPE: entry: +; INTR-SCOPE: %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !32) #5 +; INTR-SCOPE: %1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !35) #5 +; INTR-SCOPE: %2 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %src, i8* %1, i32** null, i32** undef, i64 0, metadata !35) #5, !tbaa !11, !noalias !37 +; INTR-SCOPE: %3 = load i32, i32* %src, noalias_sidechannel i32* %2, align 4, !tbaa !2, !noalias !37 +; INTR-SCOPE: %4 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %dst, i8* %0, i32** null, i32** undef, i64 0, metadata !32) #5, !tbaa !11, !noalias !37 +; INTR-SCOPE: store i32 %3, i32* %dst, noalias_sidechannel i32* %4, align 4, !tbaa !2, !noalias !37 +; INTR-SCOPE: br label %do.body +; INTR-SCOPE: do.body: ; preds = %do.body, %entry +; INTR-SCOPE: %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.body ] +; INTR-SCOPE: %5 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !38) #5 +; INTR-SCOPE: %6 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !41) #5 +; INTR-SCOPE: %7 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %src, i8* %6, i32** null, i32** undef, i64 0, metadata !41) #5, !tbaa !11, !noalias !43 +; INTR-SCOPE: %8 = load i32, i32* %src, noalias_sidechannel i32* %7, align 4, !tbaa !2, !noalias !43 +; INTR-SCOPE: %9 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %dst, i8* %5, i32** null, i32** undef, i64 0, metadata !38) #5, !tbaa !11, !noalias !43 +; INTR-SCOPE: store i32 %8, i32* %dst, noalias_sidechannel i32* %9, align 4, !tbaa !2, !noalias !43 +; INTR-SCOPE: %10 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !44) #5 +; INTR-SCOPE: %11 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !47) #5 +; INTR-SCOPE: %12 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %src, i8* %11, i32** null, i32** undef, i64 0, metadata !47) #5, !tbaa !11, !noalias !49 +; INTR-SCOPE: %13 = load i32, i32* %src, noalias_sidechannel i32* %12, align 4, !tbaa !2, !noalias !49 +; INTR-SCOPE: %14 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %dst, i8* %10, i32** null, i32** undef, i64 0, metadata !44) #5, !tbaa !11, !noalias !49 +; INTR-SCOPE: store i32 %13, i32* %dst, noalias_sidechannel i32* %14, align 4, !tbaa !2, !noalias !49 +; INTR-SCOPE: %dec = add nsw i32 %n.addr.0, -1 +; INTR-SCOPE: %tobool = icmp eq i32 %n.addr.0, 0 +; INTR-SCOPE: br i1 %tobool, label %do.end, label %do.body +; INTR-SCOPE: do.end: ; preds = %do.body +; INTR-SCOPE: ret void +; INTR-SCOPE: } + + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32*, i8*, i32**, i32**, i64, metadata) #3 + +attributes #0 = { nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { argmemonly nounwind } +attributes #3 = { nounwind readnone speculatable } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3, !3, i64 0, i64 4} +!3 = !{!4, i64 4, !"int"} +!4 = !{!5, i64 1, !"omnipotent char"} +!5 = !{!"Simple C/C++ TBAA"} +!6 = !{!7} +!7 = distinct !{!7, !8, !"copy_rprp: rdst"} +!8 = distinct !{!8, !"copy_rprp"} +!9 = !{!10} +!10 = distinct !{!10, !8, !"copy_rprp: rsrc"} +!11 = !{!12, !12, i64 0, i64 4} +!12 = !{!4, i64 4, !"any pointer"} +!13 = !{!7, !10} + +; INTR-SCOPE: !0 = !{i32 1, !"wchar_size", i32 4} +; INTR-SCOPE: !1 = !{!"clang"} +; INTR-SCOPE: !2 = !{!3, !3, i64 0, i64 4} +; INTR-SCOPE: !3 = !{!4, i64 4, !"int"} +; INTR-SCOPE: !4 = !{!5, i64 1, !"omnipotent char"} +; INTR-SCOPE: !5 = !{!"Simple C/C++ TBAA"} +; INTR-SCOPE: !6 = !{!7} +; INTR-SCOPE: !7 = distinct !{!7, !8, !"copy_rprp: rdst"} +; INTR-SCOPE: !8 = distinct !{!8, !"copy_rprp"} +; INTR-SCOPE: !9 = !{!10} +; INTR-SCOPE: !10 = distinct !{!10, !8, !"copy_rprp: rsrc"} +; INTR-SCOPE: !11 = !{!12, !12, i64 0, i64 4} +; INTR-SCOPE: !12 = !{!4, i64 4, !"any pointer"} +; INTR-SCOPE: !13 = !{!7, !10} +; INTR-SCOPE: !14 = !{!15} +; INTR-SCOPE: !15 = distinct !{!15, !16, !"copy_npnp: %dst"} +; INTR-SCOPE: !16 = distinct !{!16, !"copy_npnp"} +; INTR-SCOPE: !17 = !{!18} +; INTR-SCOPE: !18 = distinct !{!18, !16, !"copy_npnp: %src"} +; INTR-SCOPE: !19 = !{!15, !18} +; INTR-SCOPE: !20 = !{!21} +; INTR-SCOPE: !21 = distinct !{!21, !22, !"copy_npnp: %dst"} +; INTR-SCOPE: !22 = distinct !{!22, !"copy_npnp"} +; INTR-SCOPE: !23 = !{!24} +; INTR-SCOPE: !24 = distinct !{!24, !22, !"copy_npnp: %src"} +; INTR-SCOPE: !25 = !{!21, !24} +; INTR-SCOPE: !26 = !{!27} +; INTR-SCOPE: !27 = distinct !{!27, !28, !"copy_npnp: %dst"} +; INTR-SCOPE: !28 = distinct !{!28, !"copy_npnp"} +; INTR-SCOPE: !29 = !{!30} +; INTR-SCOPE: !30 = distinct !{!30, !28, !"copy_npnp: %src"} +; INTR-SCOPE: !31 = !{!27, !30} +; INTR-SCOPE: !32 = !{!33} +; INTR-SCOPE: !33 = distinct !{!33, !34, !"copy_rprp: rdst"} +; INTR-SCOPE: !34 = distinct !{!34, !"copy_rprp"} +; INTR-SCOPE: !35 = !{!36} +; INTR-SCOPE: !36 = distinct !{!36, !34, !"copy_rprp: rsrc"} +; INTR-SCOPE: !37 = !{!33, !36} +; INTR-SCOPE: !38 = !{!39} +; INTR-SCOPE: !39 = distinct !{!39, !40, !"copy_rprp: rdst"} +; INTR-SCOPE: !40 = distinct !{!40, !"copy_rprp"} +; INTR-SCOPE: !41 = !{!42} +; INTR-SCOPE: !42 = distinct !{!42, !40, !"copy_rprp: rsrc"} +; INTR-SCOPE: !43 = !{!39, !42} +; INTR-SCOPE: !44 = !{!45} +; INTR-SCOPE: !45 = distinct !{!45, !46, !"copy_rprp: rdst"} +; INTR-SCOPE: !46 = distinct !{!46, !"copy_rprp"} +; INTR-SCOPE: !47 = !{!48} +; INTR-SCOPE: !48 = distinct !{!48, !46, !"copy_rprp: rsrc"} +; INTR-SCOPE: !49 = !{!45, !48} Index: llvm/test/Transforms/Inline/noalias.ll =================================================================== --- llvm/test/Transforms/Inline/noalias.ll +++ llvm/test/Transforms/Inline/noalias.ll @@ -1,5 +1,5 @@ ; RUN: opt -inline -enable-noalias-to-md-conversion -use-noalias-intrinsic-during-inlining=0 -S < %s | FileCheck %s -check-prefix=MD-SCOPE -; RUN: opt -inline -enable-noalias-to-md-conversion -S < %s | FileCheck %s -check-prefix=INTR-SCOPE +; RUN: opt -inline -enable-noalias-to-md-conversion -use-noalias-intrinsic-during-inlining=1 -S < %s | FileCheck %s -check-prefix=INTR-SCOPE target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -33,13 +33,14 @@ ; INTR-SCOPE: define void @foo(float* nocapture %a, float* nocapture readonly %c) #0 { ; INTR-SCOPE: entry: -; INTR-SCOPE: %0 = call float* @llvm.noalias.p0f32(float* %a, metadata !0) -; INTR-SCOPE: %1 = load float, float* %c, align 4, !noalias !0 -; INTR-SCOPE: %arrayidx.i = getelementptr inbounds float, float* %0, i64 5 -; INTR-SCOPE: store float %1, float* %arrayidx.i, align 4, !noalias !0 -; INTR-SCOPE: %2 = load float, float* %c, align 4 +; INTR-SCOPE: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !0) +; INTR-SCOPE: %1 = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* %a, i8* %0, float** null, i64 0, metadata !0) +; INTR-SCOPE: %2 = load float, float* %c, align 4, !noalias !0 +; INTR-SCOPE: %arrayidx.i = getelementptr inbounds float, float* %1, i64 5 +; INTR-SCOPE: store float %2, float* %arrayidx.i, align 4, !noalias !0 +; INTR-SCOPE: %3 = load float, float* %c, align 4 ; INTR-SCOPE: %arrayidx = getelementptr inbounds float, float* %a, i64 7 -; INTR-SCOPE: store float %2, float* %arrayidx, align 4 +; INTR-SCOPE: store float %3, float* %arrayidx, align 4 ; INTR-SCOPE: ret void ; INTR-SCOPE: } @@ -77,16 +78,18 @@ ; INTR-SCOPE: define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 { ; INTR-SCOPE: entry: -; INTR-SCOPE: %0 = call float* @llvm.noalias.p0f32(float* %a, metadata !3) -; INTR-SCOPE: %1 = call float* @llvm.noalias.p0f32(float* %b, metadata !6) -; INTR-SCOPE: %2 = load float, float* %c, align 4, !noalias !8 -; INTR-SCOPE: %arrayidx.i = getelementptr inbounds float, float* %0, i64 5 -; INTR-SCOPE: store float %2, float* %arrayidx.i, align 4, !noalias !8 -; INTR-SCOPE: %arrayidx1.i = getelementptr inbounds float, float* %1, i64 8 -; INTR-SCOPE: store float %2, float* %arrayidx1.i, align 4, !noalias !8 -; INTR-SCOPE: %3 = load float, float* %c, align 4 +; INTR-SCOPE: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !3) +; INTR-SCOPE: %1 = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* %a, i8* %0, float** null, i64 0, metadata !3) +; INTR-SCOPE: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !6) +; INTR-SCOPE: %3 = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* %b, i8* %2, float** null, i64 0, metadata !6) +; INTR-SCOPE: %4 = load float, float* %c, align 4, !noalias !8 +; INTR-SCOPE: %arrayidx.i = getelementptr inbounds float, float* %1, i64 5 +; INTR-SCOPE: store float %4, float* %arrayidx.i, align 4, !noalias !8 +; INTR-SCOPE: %arrayidx1.i = getelementptr inbounds float, float* %3, i64 8 +; INTR-SCOPE: store float %4, float* %arrayidx1.i, align 4, !noalias !8 +; INTR-SCOPE: %5 = load float, float* %c, align 4 ; INTR-SCOPE: %arrayidx = getelementptr inbounds float, float* %a, i64 7 -; INTR-SCOPE: store float %3, float* %arrayidx, align 4 +; INTR-SCOPE: store float %5, float* %arrayidx, align 4 ; INTR-SCOPE: ret void ; INTR-SCOPE: } @@ -112,4 +115,3 @@ ; INTR-SCOPE: !6 = !{!7} ; INTR-SCOPE: !7 = distinct !{!7, !5, !"hello2: %b"} ; INTR-SCOPE: !8 = !{!4, !7} - Index: llvm/test/Transforms/Inline/noalias2.ll =================================================================== --- llvm/test/Transforms/Inline/noalias2.ll +++ llvm/test/Transforms/Inline/noalias2.ll @@ -1,5 +1,5 @@ ; RUN: opt -inline -enable-noalias-to-md-conversion -use-noalias-intrinsic-during-inlining=0 -S < %s | FileCheck %s -check-prefix=MD-SCOPE -; RUN: opt -inline -enable-noalias-to-md-conversion -S < %s | FileCheck %s -check-prefix=INTR-SCOPE +; RUN: opt -inline -enable-noalias-to-md-conversion -use-noalias-intrinsic-during-inlining=1 -S < %s | FileCheck %s -check-prefix=INTR-SCOPE target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -34,14 +34,16 @@ ; INTR-SCOPE: define void @foo(float* noalias nocapture %a, float* noalias nocapture readonly %c) #0 { ; INTR-SCOPE: entry: -; INTR-SCOPE: %0 = call float* @llvm.noalias.p0f32(float* %a, metadata !0) -; INTR-SCOPE: %1 = call float* @llvm.noalias.p0f32(float* %c, metadata !3) -; INTR-SCOPE: %2 = load float, float* %1, align 4, !noalias !5 -; INTR-SCOPE: %arrayidx.i = getelementptr inbounds float, float* %0, i64 5 -; INTR-SCOPE: store float %2, float* %arrayidx.i, align 4, !noalias !5 -; INTR-SCOPE: %3 = load float, float* %c, align 4 +; INTR-SCOPE: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !0) +; INTR-SCOPE: %1 = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* %a, i8* %0, float** null, i64 0, metadata !0) +; INTR-SCOPE: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !3) +; INTR-SCOPE: %3 = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* %c, i8* %2, float** null, i64 0, metadata !3) +; INTR-SCOPE: %4 = load float, float* %3, align 4, !noalias !5 +; INTR-SCOPE: %arrayidx.i = getelementptr inbounds float, float* %1, i64 5 +; INTR-SCOPE: store float %4, float* %arrayidx.i, align 4, !noalias !5 +; INTR-SCOPE: %5 = load float, float* %c, align 4 ; INTR-SCOPE: %arrayidx = getelementptr inbounds float, float* %a, i64 7 -; INTR-SCOPE: store float %3, float* %arrayidx, align 4 +; INTR-SCOPE: store float %5, float* %arrayidx, align 4 ; INTR-SCOPE: ret void ; INTR-SCOPE: } @@ -88,26 +90,32 @@ ; INTR-SCOPE: define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 { ; INTR-SCOPE: entry: -; INTR-SCOPE: %0 = call float* @llvm.noalias.p0f32(float* %a, metadata !6) -; INTR-SCOPE: %1 = call float* @llvm.noalias.p0f32(float* %c, metadata !9) -; INTR-SCOPE: %2 = call float* @llvm.noalias.p0f32(float* %0, metadata !11) #2, !noalias !14 -; INTR-SCOPE: %3 = call float* @llvm.noalias.p0f32(float* %1, metadata !15) #2, !noalias !14 -; INTR-SCOPE: %4 = load float, float* %3, align 4, !noalias !17 -; INTR-SCOPE: %arrayidx.i.i = getelementptr inbounds float, float* %2, i64 5 -; INTR-SCOPE: store float %4, float* %arrayidx.i.i, align 4, !noalias !17 -; INTR-SCOPE: %5 = load float, float* %1, align 4, !noalias !14 -; INTR-SCOPE: %arrayidx.i = getelementptr inbounds float, float* %0, i64 7 -; INTR-SCOPE: store float %5, float* %arrayidx.i, align 4, !noalias !14 -; INTR-SCOPE: %6 = call float* @llvm.noalias.p0f32(float* %a, metadata !18) -; INTR-SCOPE: %7 = call float* @llvm.noalias.p0f32(float* %b, metadata !21) -; INTR-SCOPE: %8 = load float, float* %c, align 4, !noalias !23 -; INTR-SCOPE: %arrayidx.i1 = getelementptr inbounds float, float* %6, i64 6 -; INTR-SCOPE: store float %8, float* %arrayidx.i1, align 4, !noalias !23 -; INTR-SCOPE: %arrayidx1.i = getelementptr inbounds float, float* %7, i64 8 -; INTR-SCOPE: store float %8, float* %arrayidx1.i, align 4, !noalias !23 -; INTR-SCOPE: %9 = load float, float* %c, align 4 +; INTR-SCOPE: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !6) +; INTR-SCOPE: %1 = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* %a, i8* %0, float** null, i64 0, metadata !6) +; INTR-SCOPE: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !9) +; INTR-SCOPE: %3 = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* %c, i8* %2, float** null, i64 0, metadata !9) +; INTR-SCOPE: %4 = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !11) #3, !noalias !14 +; INTR-SCOPE: %5 = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* %1, i8* %4, float** null, i64 0, metadata !11) #3, !noalias !14 +; INTR-SCOPE: %6 = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !15) #3, !noalias !14 +; INTR-SCOPE: %7 = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* %3, i8* %6, float** null, i64 0, metadata !15) #3, !noalias !14 +; INTR-SCOPE: %8 = load float, float* %7, align 4, !noalias !17 +; INTR-SCOPE: %arrayidx.i.i = getelementptr inbounds float, float* %5, i64 5 +; INTR-SCOPE: store float %8, float* %arrayidx.i.i, align 4, !noalias !17 +; INTR-SCOPE: %9 = load float, float* %3, align 4, !noalias !14 +; INTR-SCOPE: %arrayidx.i = getelementptr inbounds float, float* %1, i64 7 +; INTR-SCOPE: store float %9, float* %arrayidx.i, align 4, !noalias !14 +; INTR-SCOPE: %10 = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !18) +; INTR-SCOPE: %11 = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* %a, i8* %10, float** null, i64 0, metadata !18) +; INTR-SCOPE: %12 = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !21) +; INTR-SCOPE: %13 = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* %b, i8* %12, float** null, i64 0, metadata !21) +; INTR-SCOPE: %14 = load float, float* %c, align 4, !noalias !23 +; INTR-SCOPE: %arrayidx.i1 = getelementptr inbounds float, float* %11, i64 6 +; INTR-SCOPE: store float %14, float* %arrayidx.i1, align 4, !noalias !23 +; INTR-SCOPE: %arrayidx1.i = getelementptr inbounds float, float* %13, i64 8 +; INTR-SCOPE: store float %14, float* %arrayidx1.i, align 4, !noalias !23 +; INTR-SCOPE: %15 = load float, float* %c, align 4 ; INTR-SCOPE: %arrayidx = getelementptr inbounds float, float* %a, i64 7 -; INTR-SCOPE: store float %9, float* %arrayidx, align 4 +; INTR-SCOPE: store float %15, float* %arrayidx, align 4 ; INTR-SCOPE: ret void ; INTR-SCOPE: } @@ -159,4 +167,3 @@ ; INTR-SCOPE: !23 = !{!19, !22} attributes #0 = { nounwind uwtable } -