Index: clang/include/clang/AST/Type.h =================================================================== --- clang/include/clang/AST/Type.h +++ clang/include/clang/AST/Type.h @@ -817,6 +817,19 @@ /// Returns true if it is not a class or if the class might not be dynamic. bool mayBeNotDynamicClass() const; + /// Returns true if this is a restrict pointer or contains a restrict pointer. + /// NOTE: A pointer to a restrict pointer does not count. + bool isRestrictOrContainsRestrictMembers() const; + + /// Get the encoded indices, describing where in memory restrict pointers are + /// located. + std::vector getRestrictIndices() const; + +private: + void getRestrictIndices(std::vector &OutEncodedIndices, + SmallVector &InCurrentIndices) const; + +public: // Don't promise in the API that anything besides 'const' can be // easily added. Index: clang/include/clang/Basic/CodeGenOptions.def =================================================================== --- clang/include/clang/Basic/CodeGenOptions.def +++ clang/include/clang/Basic/CodeGenOptions.def @@ -193,6 +193,8 @@ CODEGENOPT(RelaxedAliasing , 1, 0) ///< Set when -fno-strict-aliasing is enabled. CODEGENOPT(StructPathTBAA , 1, 0) ///< Whether or not to use struct-path TBAA. CODEGENOPT(NewStructPathTBAA , 1, 0) ///< Whether or not to use enhanced struct-path TBAA. +CODEGENOPT(FullRestrict , 1, 1) ///< Set when -ffull-restrict is enabled. +CODEGENOPT(NoNoAliasArgAttr , 1, 0) ///< Set when -fno-noalias-arguments is specified. CODEGENOPT(SaveTempLabels , 1, 0) ///< Save temporary labels. CODEGENOPT(SanitizeAddressUseAfterScope , 1, 0) ///< Enable use-after-scope detection ///< in AddressSanitizer Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -975,6 +975,15 @@ defm fixed_point : OptInFFlag<"fixed-point", "Enable", "Disable", " fixed point types">; defm cxx_static_destructors : OptOutFFlag<"c++-static-destructors", "", "Disable C++ static destructor registration">; +def ffull_restrict : Flag<["-"], "ffull-restrict">, Group, + Flags<[CoreOption, CC1Option]>, + HelpText<"Enable full restrict support">; +def fno_full_restrict : Flag<["-"], "fno-full-restrict">, Group, + Flags<[CoreOption, CC1Option]>, + HelpText<"Disable full restrict support, falling back to the legacy support">; +def fno_noalias_arguments : Flag<["-"], "fno-noalias-arguments">, + Group, Flags<[CoreOption, CC1Option]>, + HelpText<"Do not map restrict arguments onto noalias attribute">; def fsymbol_partition_EQ : Joined<["-"], "fsymbol-partition=">, Group, Flags<[CC1Option]>; Index: clang/lib/AST/Type.cpp =================================================================== --- clang/lib/AST/Type.cpp +++ clang/lib/AST/Type.cpp @@ -100,6 +100,65 @@ return !ClassDecl || ClassDecl->mayBeNonDynamicClass(); } +bool QualType::isRestrictOrContainsRestrictMembers() const { + if (isRestrictQualified()) { + return true; + } + + const Type *BaseElementType = getCanonicalType()->getBaseElementTypeUnsafe(); + assert(!isa(BaseElementType)); + + if (const RecordType *RecTy = dyn_cast(BaseElementType)) { + RecordDecl *RD = RecTy->getDecl(); + for (FieldDecl *FD : RD->fields()) { + if (FD->getType().isRestrictOrContainsRestrictMembers()) { + return true; + } + } + } + + return false; +} + +std::vector QualType::getRestrictIndices() const { + // -1 represents an unbounded array + std::vector EncodedIndices; + SmallVector BaseIndices = {-1}; + + getRestrictIndices(EncodedIndices, BaseIndices); + + return EncodedIndices; +} + +void QualType::getRestrictIndices( + std::vector &OutEncodedIndices, + SmallVector &InCurrentIndices) const { + if (isRestrictQualified()) { + OutEncodedIndices.push_back(InCurrentIndices.size()); + OutEncodedIndices.insert(OutEncodedIndices.end(), InCurrentIndices.begin(), + InCurrentIndices.end()); + + return; + } + + QualType CannonTy = getCanonicalType(); + if (const auto *RecTy = CannonTy->getAs()) { + RecordDecl *RD = RecTy->getDecl(); + InCurrentIndices.push_back(0); + for (FieldDecl *FD : RD->fields()) { + FD->getType().getRestrictIndices(OutEncodedIndices, InCurrentIndices); + InCurrentIndices.back()++; + } + InCurrentIndices.pop_back(); + } else if (const auto *ArrayTy = dyn_cast(CannonTy)) { + // Note: we use -1 for bounded and unbounded arrays + InCurrentIndices.push_back(-1); // -1 indicates that any index is fine + ArrayTy->getElementType().getRestrictIndices(OutEncodedIndices, + InCurrentIndices); + InCurrentIndices.pop_back(); + } +} + bool QualType::isConstant(QualType T, const ASTContext &Ctx) { if (T.isConstQualified()) return true; Index: clang/lib/CodeGen/Address.h =================================================================== --- clang/lib/CodeGen/Address.h +++ clang/lib/CodeGen/Address.h @@ -39,6 +39,13 @@ return Pointer; } + /// Replace the current pointer of the addres with a new pointer. + void adaptPointer(llvm::Value *newPointer) { + assert(Pointer->getType() == newPointer->getType() && + "Address: changing the pointer must not change the type"); + Pointer = newPointer; + } + /// Return the type of the pointer value. llvm::PointerType *getType() const { return llvm::cast(getPointer()->getType()); Index: clang/lib/CodeGen/CGCall.cpp =================================================================== --- clang/lib/CodeGen/CGCall.cpp +++ clang/lib/CodeGen/CGCall.cpp @@ -2546,7 +2546,10 @@ } // Set 'noalias' if an argument type has the `restrict` qualifier. - if (Arg->getType().isRestrictQualified()) + // For accurate full restrict support, we should not annotate arguments + // with noalias. The noalias atttribute is too strong. + if (Arg->getType().isRestrictQualified() && + (!CGM.getCodeGenOpts().NoNoAliasArgAttr)) AI->addAttr(llvm::Attribute::NoAlias); } @@ -4514,12 +4517,28 @@ ArgInfo.getDirectOffset() == 0) { assert(NumIRArgs == 1); llvm::Value *V; - if (!I->isAggregate()) + if (!I->isAggregate()) { V = I->getKnownRValue().getScalarVal(); - else - V = Builder.CreateLoad( - I->hasLValue() ? I->getKnownLValue().getAddress(*this) - : I->getKnownRValue().getAggregateAddress()); + } else { + Address Addr = I->hasLValue() + ? I->getKnownLValue().getAddress(*this) + : I->getKnownRValue().getAggregateAddress(); + if (I->getType().isRestrictOrContainsRestrictMembers() && + CGM.getCodeGenOpts().FullRestrict) { + // protect a load of an aggregate with restrict member pointers with + // an llvm.noalias.copy.guard. + // NOTE: also see CodeGenFunction::EmitAggregateCopy(); + auto NoAliasScopeMD = + getExistingOrUnknownNoAliasScope(Addr.getPointer()); + auto NoAliasDecl = getExistingNoAliasDeclOrNullptr(NoAliasScopeMD); + Addr.adaptPointer(Builder.CreateNoAliasCopyGuard( + Addr.getPointer(), NoAliasDecl, + I->getType().getRestrictIndices(), NoAliasScopeMD)); + } + llvm::LoadInst *LD = + Builder.CreateLoad(Addr, I->getType().isVolatileQualified()); + V = LD; + } // Implement swifterror by copying into a new swifterror argument. // We'll write back in the normal path out of the call. @@ -4601,6 +4620,22 @@ } else { // In the simple case, just pass the coerced loaded value. assert(NumIRArgs == 1); + if (Src.getElementType() == ArgInfo.getCoerceToType()) { + QualType Ty = I->getType(); + if (Ty.isRestrictOrContainsRestrictMembers() && + CGM.getCodeGenOpts().FullRestrict) { + // Protect a load of an aggregate with restrict member pointers with + // an llvm.noalias.copy.guard + // NOTE: also see CodeGenFunction::EmitAggregateCopy(); + auto NoAliasScopeMD = + getExistingOrUnknownNoAliasScope(Src.getPointer()); + auto NoAliasDecl = getExistingNoAliasDeclOrNullptr(NoAliasScopeMD); + Src.adaptPointer(Builder.CreateNoAliasCopyGuard( + Src.getPointer(), NoAliasDecl, Ty.getRestrictIndices(), + NoAliasScopeMD)); + } + } + llvm::Value *Load = CreateCoercedLoad(Src, ArgInfo.getCoerceToType(), *this); Index: clang/lib/CodeGen/CGDecl.cpp =================================================================== --- clang/lib/CodeGen/CGDecl.cpp +++ clang/lib/CodeGen/CGDecl.cpp @@ -36,6 +36,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/Type.h" using namespace clang; @@ -1282,6 +1283,7 @@ /// These turn into simple stack objects, or GlobalValues depending on target. void CodeGenFunction::EmitAutoVarDecl(const VarDecl &D) { AutoVarEmission emission = EmitAutoVarAlloca(D); + EmitAutoVarNoAlias(emission); EmitAutoVarInit(emission); EmitAutoVarCleanups(emission); } @@ -1873,6 +1875,126 @@ type.isVolatileQualified(), Builder, constant); } +// For all local restrict-qualified local variables, we create a noalias +// metadata scope. This scope is used to identify each restrict-qualified +// variable and the other memory accesses within the scope where its aliasing +// assumptions apply. The scope metadata is stored in the NoAliasAddrMap map +// where the pointer to the local variable is the key in the map. +// One variable can contain multiple restrict pointers. All of them are +// represented by a single scope. +void CodeGenFunction::EmitNoAliasDecl(const VarDecl &D, Address Loc) { + // Don't emit noalias intrinsics unless we're optimizing. + if (CGM.getCodeGenOpts().OptimizationLevel == 0) + return; + + if (!CGM.getCodeGenOpts().FullRestrict) + return; + + QualType type = D.getType(); + + // Emit a noalias intrinsic for restrict-qualified variables. + if (!type.isRestrictOrContainsRestrictMembers()) + return; + + // Only emit a llvm.noalias.decl if the address is an alloca. + if (!isa(Loc.getPointer()->stripPointerCasts())) + return; + + // NOTE: keep in sync with (clang) CGDecl: getExistingOrUnknownNoAliasScope + // NOTE: keep in sync with (clang) CGDecl: EmitAutoVarNoAlias/EmitNoAliasDecl + // NOTE: keep in sync with (llvm) InlineFunction: CloneAliasScopeMetadata + llvm::MDBuilder MDB(CurFn->getContext()); + if (!NoAliasDomain) + NoAliasDomain = MDB.createAnonymousAliasScopeDomain(CurFn->getName()); + + std::string Name = (llvm::Twine(CurFn->getName()) + ": " + D.getName()).str(); + + llvm::MDNode *Scope = MDB.createAnonymousAliasScope(NoAliasDomain, Name); + addNoAliasScope(Scope); + + SmallVector ScopeListEntries(1, Scope); + llvm::MDNode *ScopeList = + llvm::MDNode::get(CurFn->getContext(), ScopeListEntries); + + NoAliasAddrMap[Loc.getPointer()] = ScopeList; + + if (HaveInsertPoint()) { + NoAliasDeclMap[ScopeList] = + Builder.CreateNoAliasDeclaration(Loc.getPointer(), ScopeList); + } +} + +llvm::MDNode * +CodeGenFunction::getExistingOrUnknownNoAliasScope(llvm::Value *Ptr) { + auto NAI = NoAliasAddrMap.find( + Ptr->stripInBoundsOffsets()); // make sure to find the base object + + if (NAI != NoAliasAddrMap.end()) { + return NAI->second; + } + if (!NoAliasUnknownScope) { + // NOTE: keep in sync with (clang) CGDecl:getExistingOrUnknownNoAliasScope + // NOTE: keep in sync with (clang) CGDecl:EmitAutoVarNoAlias/EmitNoAliasDecl + // NOTE: keep in sync with (llvm) InlineFunction:CloneAliasScopeMetadata + + // The unknown scope is used when we cannot yet pinpoint the exact scope. + llvm::MDBuilder MDB(CurFn->getContext()); + if (!NoAliasDomain) + NoAliasDomain = MDB.createAnonymousAliasScopeDomain(CurFn->getName()); + std::string Name = + (llvm::Twine(CurFn->getName()) + ": unknown scope").str(); + + llvm::MDNode *Scope = MDB.createAnonymousAliasScope(NoAliasDomain, Name); + FnNoAliasInfo.addNoAliasScope(Scope); // keep this at function level + + SmallVector ScopeListEntries(1, Scope); + NoAliasUnknownScope = + llvm::MDNode::get(CurFn->getContext(), ScopeListEntries); + CurFn->setMetadata("noalias", NoAliasUnknownScope); + } + + return NoAliasUnknownScope; +} + +llvm::Value * +CodeGenFunction::getExistingNoAliasDeclOrNullptr(llvm::MDNode *NoAliasScopeMD) { + llvm::Value *&NoAliasDecl = NoAliasDeclMap[NoAliasScopeMD]; + if (NoAliasDecl == nullptr) { + NoAliasDecl = llvm::ConstantPointerNull::get( + llvm::Type::getInt8PtrTy(getLLVMContext())); + } + return NoAliasDecl; +} + +void CodeGenFunction::EmitAutoVarNoAlias(const AutoVarEmission &emission) { + assert(emission.Variable && "emission was not valid!"); + + // Don't emit noalias intrinsics unless we're optimizing. + if (CGM.getCodeGenOpts().OptimizationLevel == 0) + return; + + if (!CGM.getCodeGenOpts().FullRestrict) + return; + + const VarDecl &D = *emission.Variable; + + // Early exit: emission.getObjectAddress(..) can introduce extra code. + // Only do it if it is needed + if (!D.getType().isRestrictOrContainsRestrictMembers()) + return; + + // Check whether this is a byref variable that's potentially + // captured and moved by its own initializer. If so, we'll need to + // emit the initializer first, then copy into the variable. + const Expr *Init = D.getInit(); + bool capturedByInit = + Init && emission.IsEscapingByRef && isCapturedBy(D, Init); + + Address Loc = + capturedByInit ? emission.Addr : emission.getObjectAddress(*this); + EmitNoAliasDecl(D, Loc); +} + /// Emit an expression as an initializer for an object (variable, field, etc.) /// at the given location. The expression is not necessarily the normal /// initializer for the object, and the address is not necessarily @@ -2445,6 +2567,9 @@ llvm::Value *ArgVal = (DoStore ? Arg.getDirectValue() : nullptr); LValue lv = MakeAddrLValue(DeclPtr, Ty); + + EmitNoAliasDecl(D, DeclPtr); + if (IsScalar) { Qualifiers qs = Ty.getQualifiers(); if (Qualifiers::ObjCLifetime lt = qs.getObjCLifetime()) { Index: clang/lib/CodeGen/CGExpr.cpp =================================================================== --- clang/lib/CodeGen/CGExpr.cpp +++ clang/lib/CodeGen/CGExpr.cpp @@ -30,10 +30,12 @@ #include "clang/Basic/SourceManager.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Metadata.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Path.h" @@ -1722,6 +1724,24 @@ if (llvm::MDNode *RangeInfo = getRangeForLoadFromType(Ty)) Load->setMetadata(llvm::LLVMContext::MD_range, RangeInfo); + // If this is a load from a restrict-qualified variable, then we have pointer + // aliasing assumptions that can be applied to the pointer value being loaded. + if (Ty.isRestrictQualified() && CGM.getCodeGenOpts().FullRestrict) { + auto NoAliasScopeMD = getExistingOrUnknownNoAliasScope(Addr.getPointer()); + auto NoAliasDecl = getExistingNoAliasDeclOrNullptr(NoAliasScopeMD); + auto *NoAliasLoad = Builder.CreateNoAliasPointer( + Load, NoAliasDecl, Addr.getPointer(), NoAliasScopeMD); + + // The llvm.noalias intrinsic can make use of the available alias info + llvm::AAMDNodes AAMetadata; + Load->getAAMetadata(AAMetadata); + NoAliasLoad->setAAMetadata(AAMetadata); + + // ..as wel as the local restrict scope + // In both cases, this is about the 'P.addr'(getOperand(2) of llvm.noalias) + recordMemoryInstruction(NoAliasLoad); + return EmitFromMemory(NoAliasLoad, Ty); + } return EmitFromMemory(Load, Ty); } Index: clang/lib/CodeGen/CGExprAgg.cpp =================================================================== --- clang/lib/CodeGen/CGExprAgg.cpp +++ clang/lib/CodeGen/CGExprAgg.cpp @@ -1948,6 +1948,17 @@ } } + // Guard copies of structs containing restrict pointers + if (Ty.isRestrictOrContainsRestrictMembers() && + CGM.getCodeGenOpts().FullRestrict) { + // NOTE: also see CodeGenFunction::EmitCall() + auto NoAliasScopeMD = getExistingOrUnknownNoAliasScope(SrcPtr.getPointer()); + auto NoAliasDecl = getExistingNoAliasDeclOrNullptr(NoAliasScopeMD); + SrcPtr.adaptPointer(Builder.CreateNoAliasCopyGuard( + SrcPtr.getPointer(), NoAliasDecl, Ty.getRestrictIndices(), + NoAliasScopeMD)); + } + if (getLangOpts().CUDAIsDevice) { if (Ty->isCUDADeviceBuiltinSurfaceType()) { if (getTargetHooks().emitCUDADeviceBuiltinSurfaceDeviceCopy(*this, Dest, @@ -2036,6 +2047,8 @@ } auto Inst = Builder.CreateMemCpy(DestPtr, SrcPtr, SizeVal, isVolatile); + // track noalias scope for memcpy + recordMemoryInstruction(Inst); // Determine the metadata to describe the position of any padding in this // memcpy, as well as the TBAA tags for the members of the struct, in case Index: clang/lib/CodeGen/CGStmt.cpp =================================================================== --- clang/lib/CodeGen/CGStmt.cpp +++ clang/lib/CodeGen/CGStmt.cpp @@ -390,6 +390,28 @@ return true; } +bool CodeGenFunction::hasLocalRestrictVars(const CompoundStmt &S, + FunctionArgList *Args) { + // We may have restrict-qualified variables, but if we're not optimizing, we + // don't do anything special with them. + if (CGM.getCodeGenOpts().OptimizationLevel == 0) + return false; + + if (Args) + for (const auto *VD : *Args) + if (VD->getType().isRestrictOrContainsRestrictMembers()) + return true; + + for (const auto *C : S.body()) + if (const auto *DS = dyn_cast(C)) + for (const auto *I : DS->decls()) + if (const auto *VD = dyn_cast(I)) + if (VD->getType().isRestrictOrContainsRestrictMembers()) + return true; + + return false; +} + /// EmitCompoundStmt - Emit a compound statement {..} node. If GetLast is true, /// this captures the expression result of the last sub-statement and returns it /// (for use by the statement expression extension). @@ -399,7 +421,7 @@ "LLVM IR generation of compound statement ('{}')"); // Keep track of the current cleanup stack depth, including debug scopes. - LexicalScope Scope(*this, S.getSourceRange()); + LexicalScope Scope(*this, S.getSourceRange(), hasLocalRestrictVars(S)); return EmitCompoundStmtWithoutScope(S, GetLast, AggSlot); } @@ -603,6 +625,24 @@ } } +// For all of the instructions generated for this lexical scope that access +// memory, add the noalias metadata associated with any block-local +// restrict-qualified pointers from this scope. +void CodeGenFunction::LexicalNoAliasInfo::addNoAliasMD() { + if (MemoryInsts.empty() || NoAliasScopes.empty()) + return; + + llvm::MDNode *NewScopeList = llvm::MDNode::get( + MemoryInsts[0]->getParent()->getContext(), NoAliasScopes); + + for (auto &I : MemoryInsts) + I->setMetadata( + llvm::LLVMContext::MD_noalias, + llvm::MDNode::concatenate(I->getMetadata(llvm::LLVMContext::MD_noalias), + NewScopeList)); + + MemoryInsts.clear(); +} void CodeGenFunction::EmitLabelStmt(const LabelStmt &S) { EmitLabel(S.getDecl()); Index: clang/lib/CodeGen/CodeGenFunction.h =================================================================== --- clang/lib/CodeGen/CodeGenFunction.h +++ clang/lib/CodeGen/CodeGenFunction.h @@ -37,6 +37,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/TinyPtrVector.h" #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Debug.h" @@ -298,8 +299,7 @@ /// CGBuilder insert helper. This function is called after an /// instruction is created using Builder. void InsertHelper(llvm::Instruction *I, const llvm::Twine &Name, - llvm::BasicBlock *BB, - llvm::BasicBlock::iterator InsertPt) const; + llvm::BasicBlock *BB, llvm::BasicBlock::iterator InsertPt); /// CurFuncDecl - Holds the Decl for the current outermost /// non-closure context. @@ -813,11 +813,54 @@ } }; + // Check if any local variable or argument contains a (direct) restrict + // pointer. + bool hasLocalRestrictVars(const CompoundStmt &S, + FunctionArgList *Args = nullptr); + + // Get the associated NoAliasScope for the Ptr (cast and geps will be + // stripped). If there is no associated scope returns the 'unknown function' + // scope. (which is created on demand) + llvm::MDNode *getExistingOrUnknownNoAliasScope(llvm::Value *Ptr); + + // Get the associated declaration + llvm::Value *getExistingNoAliasDeclOrNullptr(llvm::MDNode *NoAliasScopeMD); + + // The noalias scopes used to tag pointer values assigned to block-local + // restrict-qualified variables, and the memory-accessing instructions within + // this lexical scope to which the associated pointer-aliasing assumptions + // might apply. One of these will exist for each lexical scope. + struct LexicalNoAliasInfo { + bool RecordMemoryInsts; + llvm::TinyPtrVector MemoryInsts; + llvm::TinyPtrVector NoAliasScopes; + + LexicalNoAliasInfo(bool RecordMemoryInsts = false) + : RecordMemoryInsts(RecordMemoryInsts) {} + + void recordMemoryInsts() { RecordMemoryInsts = true; } + + void recordMemoryInstruction(llvm::Instruction *I) { + if (RecordMemoryInsts) + MemoryInsts.push_back(I); + } + + void addNoAliasScope(llvm::MDNode *Scope) { + assert(RecordMemoryInsts && + "Adding noalias scope but not recording memory accesses!"); + NoAliasScopes.push_back(Scope); + } + + void addNoAliasMD(); + }; + + LexicalNoAliasInfo FnNoAliasInfo; + // Cleanup stack depth of the RunCleanupsScope that was pushed most recently. EHScopeStack::stable_iterator CurrentCleanupScopeDepth = EHScopeStack::stable_end(); - class LexicalScope : public RunCleanupsScope { + class LexicalScope : public RunCleanupsScope, public LexicalNoAliasInfo { SourceRange Range; SmallVector Labels; LexicalScope *ParentScope; @@ -827,8 +870,10 @@ public: /// Enter a new cleanup scope. - explicit LexicalScope(CodeGenFunction &CGF, SourceRange Range) - : RunCleanupsScope(CGF), Range(Range), ParentScope(CGF.CurLexicalScope) { + explicit LexicalScope(CodeGenFunction &CGF, SourceRange Range, + bool RecordMemoryInsts = false) + : RunCleanupsScope(CGF), LexicalNoAliasInfo(RecordMemoryInsts), + Range(Range), ParentScope(CGF.CurLexicalScope) { CGF.CurLexicalScope = this; if (CGDebugInfo *DI = CGF.getDebugInfo()) DI->EmitLexicalBlockStart(CGF.Builder, Range.getBegin()); @@ -839,6 +884,19 @@ Labels.push_back(label); } + // If we have block-local restrict-qualified pointers, we need to keep + // track of the memory-accessing instructions in the blocks where such + // pointers are declared (including lexical scopes that are children of + // those blocks) so that we can later add the appropriate metadata. Record + // this instruction and so the same in any parent scopes. + void recordMemoryInstruction(llvm::Instruction *I) { + LexicalNoAliasInfo::recordMemoryInstruction(I); + if (ParentScope) + ParentScope->recordMemoryInstruction(I); + else + CGF.FnNoAliasInfo.recordMemoryInstruction(I); + } + /// Exit this cleanup scope, emitting any accumulated /// cleanups. ~LexicalScope() { @@ -856,6 +914,8 @@ /// Force the emission of cleanups now, instead of waiting /// until this object is destroyed. void ForceCleanup() { + addNoAliasMD(); + CGF.CurLexicalScope = ParentScope; RunCleanupsScope::ForceCleanup(); @@ -872,6 +932,23 @@ typedef llvm::DenseMap DeclMapTy; + // Record this instruction for the purpose of later adding noalias metadata, + // is applicible, in order to support block-local restrict-qualified + // pointers. + void recordMemoryInstruction(llvm::Instruction *I) { + if (CurLexicalScope) + CurLexicalScope->recordMemoryInstruction(I); + else + FnNoAliasInfo.recordMemoryInstruction(I); + } + + void addNoAliasScope(llvm::MDNode *Scope) { + if (CurLexicalScope) + CurLexicalScope->addNoAliasScope(Scope); + else + FnNoAliasInfo.addNoAliasScope(Scope); + } + /// The class used to assign some variables some temporarily addresses. class OMPMapVars { DeclMapTy SavedLocals; @@ -1834,6 +1911,19 @@ void EmitOpenCLKernelMetadata(const FunctionDecl *FD, llvm::Function *Fn); + /// The noalias domain metadata for this function. + llvm::MDNode *NoAliasDomain = nullptr; + + /// A map between the addresses of local restrict-qualified variables and + /// their noalias scope. + llvm::DenseMap NoAliasAddrMap; + + /// A map between the noalias scope and its declaration + llvm::DenseMap NoAliasDeclMap; + + /// The node representing 'out-of-function' scope + llvm::MDNode *NoAliasUnknownScope = nullptr; + public: CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext=false); ~CodeGenFunction(); @@ -2981,6 +3071,9 @@ void emitAutoVarTypeCleanup(const AutoVarEmission &emission, QualType::DestructionKind dtorKind); + void EmitNoAliasDecl(const VarDecl &D, Address Loc); + void EmitAutoVarNoAlias(const AutoVarEmission &emission); + /// Emits the alloca and debug information for the size expressions for each /// dimension of an array. It registers the association of its (1-dimensional) /// QualTypes and size expression's debug node, so that CGDebugInfo can Index: clang/lib/CodeGen/CodeGenFunction.cpp =================================================================== --- clang/lib/CodeGen/CodeGenFunction.cpp +++ clang/lib/CodeGen/CodeGenFunction.cpp @@ -690,6 +690,11 @@ CurFn = Fn; CurFnInfo = &FnInfo; assert(CurFn->isDeclaration() && "Function already has body?"); + NoAliasUnknownScope = nullptr; // make sure we start without a function scope + + // Always track memory instructions. When a restrict usage is encountered, + // they will be annotated with the necessary scopes. + FnNoAliasInfo.recordMemoryInsts(); // If this function has been blacklisted for any of the enabled sanitizers, // disable the sanitizer for the function. @@ -1143,10 +1148,15 @@ void CodeGenFunction::EmitFunctionBody(const Stmt *Body) { incrementProfileCounter(Body); - if (const CompoundStmt *S = dyn_cast(Body)) + if (const CompoundStmt *S = dyn_cast(Body)) { EmitCompoundStmtWithoutScope(*S); - else + + // Now that we're done with the block, add noalias metadata if we had any + // block-local restrict-qualified pointers. + FnNoAliasInfo.addNoAliasMD(); + } else { EmitStmt(Body); + } } /// When instrumenting to collect profile data, the counts for some blocks @@ -2262,10 +2272,18 @@ void CodeGenFunction::InsertHelper(llvm::Instruction *I, const llvm::Twine &Name, llvm::BasicBlock *BB, - llvm::BasicBlock::iterator InsertPt) const { + llvm::BasicBlock::iterator InsertPt) { LoopStack.InsertHelper(I); if (IsSanitizerScope) CGM.getSanitizerMetadata()->disableSanitizerForInstruction(I); + + // When we have block-local restrict-qualified pointers, we need to record + // all memory-accessing instructions (i.e. any kind of instruction for which + // AA::getModRefInfo might return something other than NoModRef) so that they + // can be tagged with noalias metadata with noalias scopes corresponding to + // the applicable restrict-qualified pointers. + if (I->mayReadOrWriteMemory()) + recordMemoryInstruction(I); } void CGBuilderInserter::InsertHelper( Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -4463,6 +4463,18 @@ options::OPT_fdelete_null_pointer_checks, false)) CmdArgs.push_back("-fno-delete-null-pointer-checks"); + if (Arg *FullRestrictArg = Args.getLastArg(options::OPT_ffull_restrict, + options::OPT_fno_full_restrict)) { + if (FullRestrictArg->getOption().matches(options::OPT_fno_full_restrict)) { + // Disable noalias intrinsic support for noalias attribute on arguments + CmdArgs.push_back("-fno-full-restrict"); + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-use-noalias-intrinsic-during-inlining=0"); + } else { + CmdArgs.push_back("-ffull-restrict"); + } + } + // LLVM Code Generator Options. if (Args.hasArg(options::OPT_frewrite_map_file) || Index: clang/lib/Frontend/CompilerInvocation.cpp =================================================================== --- clang/lib/Frontend/CompilerInvocation.cpp +++ clang/lib/Frontend/CompilerInvocation.cpp @@ -838,6 +838,14 @@ Opts.StructPathTBAA = !Args.hasArg(OPT_no_struct_path_tbaa); Opts.NewStructPathTBAA = !Args.hasArg(OPT_no_struct_path_tbaa) && Args.hasArg(OPT_new_struct_path_tbaa); + if (Arg *FullRestrictArg = + Args.getLastArg(OPT_ffull_restrict, OPT_fno_full_restrict)) { + Opts.FullRestrict = + FullRestrictArg->getOption().matches(OPT_ffull_restrict); + } + // Keep this behind an option - the alias analysis still works better + // with the noalias attribute on arguments. + Opts.NoNoAliasArgAttr = Args.hasArg(OPT_fno_noalias_arguments); Opts.FineGrainedBitfieldAccesses = Args.hasFlag(OPT_ffine_grained_bitfield_accesses, OPT_fno_fine_grained_bitfield_accesses, false); Index: clang/test/CodeGen/arm_neon_intrinsics.c =================================================================== --- clang/test/CodeGen/arm_neon_intrinsics.c +++ clang/test/CodeGen/arm_neon_intrinsics.c @@ -2,6 +2,7 @@ // RUN: -target-cpu swift -fallow-half-arguments-and-returns \ // RUN: -target-feature +fullfp16 -ffreestanding \ // RUN: -flax-vector-conversions=none \ +// RUN: -mllvm --use-noalias-intrinsic-during-inlining=0 \ // RUN: -disable-O0-optnone -emit-llvm -o - %s \ // RUN: | opt -S -mem2reg | FileCheck %s Index: clang/test/CodeGen/restrict/arg_reuse.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/arg_reuse.c @@ -0,0 +1,36 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -O2 -ffull-restrict -fno-noalias-arguments %s -emit-llvm -o - | FileCheck %s + +// NOTE: use -no-noalias-arguments to block mapping restrict arguments on the 'noalias +// attribute which is too strong for restrict + +// A number of testcases from our wiki (2018/6/7_llvm_restrict_examples +// As llvm/clang treat __restrict differently in following cases: +int test_arg_restrict_vs_local_restrict_01(int *__restrict pA, int *pB, int *pC) { + int *tmp = pA; + *tmp = 42; + pA = pB; + *pA = 43; + *pC = 99; + return *tmp; // fail: needs a load !!! (either 42 or 43) +} + +// CHECK: @test_arg_restrict_vs_local_restrict_01 +// CHECK-NOT: ret i32 42 +// CHECK-NOT: ret i32 43 +// CHECK-NOT: ret i32 99 + +int test_arg_restrict_vs_local_restrict_02(int *pA_, int *pB, int *pC) { + int *__restrict pA; + pA = pA_; + int *tmp = pA; + *tmp = 42; + pA = pB; + *pA = 43; + *pC = 99; + return *tmp; // needs a load !!! (either 42 or 43) +} + +// CHECK: @test_arg_restrict_vs_local_restrict_02 +// CHECK-NOT: ret i32 42 +// CHECK-NOT: ret i32 43 +// CHECK-NOT: ret i32 99 Index: clang/test/CodeGen/restrict/array.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/array.c @@ -0,0 +1,53 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -O1 -disable-llvm-optzns -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +int r; +void ex1(int *); + +void test_FOO_local(int *pA, int *pB, int *pC) { + int *restrict tmp[3] = {pA, pB, pC}; + *tmp[0] = 42; + *tmp[1] = 43; +} + +// CHECK-LABEL: void @test_FOO_local( +// CHECK: [[tmp:%.*]] = alloca [3 x i32*], align 16 +// CHECK: [[TMP1:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0a3p0i32.i64([3 x i32*]* [[tmp]], i64 0, metadata [[TAG_6:!.*]]) +// CHECK: [[arrayidx_begin:%.*]] = getelementptr inbounds [3 x i32*], [3 x i32*]* [[tmp]], i64 0, i64 0 +// CHECK: [[arrayidx:%.*]] = getelementptr inbounds [3 x i32*], [3 x i32*]* [[tmp]], i64 0, i64 0 +// CHECK: [[TMP5:%.*]] = load i32*, i32** [[arrayidx]], align 16, !tbaa [[TAG_2:!.*]], !noalias [[TAG_6]] +// CHECK: [[TMP6:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP5]], i8* [[TMP1]], i32** [[arrayidx]], i64 0, metadata [[TAG_6]]), !tbaa [[TAG_2]], !noalias [[TAG_6]] +// CHECK: store i32 42, i32* [[TMP6]], align 4, !tbaa [[TAG_9:!.*]], !noalias [[TAG_6]] +// CHECK: [[arrayidx2:%.*]] = getelementptr inbounds [3 x i32*], [3 x i32*]* [[tmp]], i64 0, i64 1 +// CHECK: [[TMP7:%.*]] = load i32*, i32** [[arrayidx2]], align 8, !tbaa [[TAG_2]], !noalias [[TAG_6]] +// CHECK: [[TMP8:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP7]], i8* [[TMP1]], i32** [[arrayidx2]], i64 0, metadata [[TAG_6]]), !tbaa [[TAG_2]], !noalias [[TAG_6]] +// CHECK: store i32 43, i32* [[TMP8]], align 4, !tbaa [[TAG_9]], !noalias [[TAG_6]] +// CHECK: ret void + +void test_FOO_p(int *restrict p) { + *p = 42; +} + +// define void @test_FOO_p(i32* noalias %p) #0 { +// CHECK-LABEL: void @test_FOO_p( +// CHECK: [[p_addr:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[TMP0:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** [[p_addr]], i64 0, metadata [[TAG_11:!.*]]) +// CHECK-NEXT: store i32* [[p:%.*]], i32** [[p_addr]], align 8, !tbaa [[TAG_2:!.*]], !noalias [[TAG_11]] +// CHECK-NEXT: [[TMP1:%.*]] = load i32*, i32** [[p_addr]], align 8, !tbaa [[TAG_2]], !noalias [[TAG_11]] +// CHECK-NEXT: [[TMP2:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP1]], i8* [[TMP0]], i32** [[p_addr]], i64 0, metadata [[TAG_11]]), !tbaa [[TAG_2]], !noalias [[TAG_11]] +// CHECK-NEXT: store i32 42, i32* [[TMP2]], align 4, !tbaa [[TAG_9:!.*]], !noalias [[TAG_11]] +// CHECK-NEXT: ret void + +void test_FOO_pp(int *restrict *p) { + *p[0] = 42; +} + +// define void @test_FOO_pp(i32** %p) #0 !noalias !14 { +// CHECK: void @test_FOO_pp(i32** [[p:%.*]]) #0 !noalias [[TAG_14:!.*]] { +// CHECK: [[p_addr:%.*]] = alloca i32**, align 8 +// CHECK-NEXT: store i32** [[p]], i32*** [[p_addr]], align 8, !tbaa [[TAG_2:!.*]], !noalias [[TAG_14:!.*]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32**, i32*** [[p_addr]], align 8, !tbaa [[TAG_2]], !noalias [[TAG_14]] +// CHECK-NEXT: [[arrayidx:%.*]] = getelementptr inbounds i32*, i32** [[TMP0]], i64 0 +// CHECK-NEXT: [[TMP1:%.*]] = load i32*, i32** [[arrayidx]], align 8, !tbaa [[TAG_2]], !noalias [[TAG_14]] +// CHECK-NEXT: [[TMP2:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP1]], i8* null, i32** [[arrayidx]], i64 0, metadata [[TAG_14]]), !tbaa [[TAG_2]], !noalias [[TAG_14]] +// CHECK-NEXT: store i32 42, i32* [[TMP2]], align 4, !tbaa [[TAG_9:!.*]], !noalias [[TAG_14]] +// CHECK-NEXT: ret void Index: clang/test/CodeGen/restrict/basic.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/basic.c @@ -0,0 +1,129 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -O1 -disable-llvm-optzns -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +int r; +void ex1(int *); + +int *a; +int *foo() { + int *restrict x = a; + return x; + + // CHECK-LABEL: i32* @foo( + // CHECK: [[x:%.*]] = alloca i32*, align 8 + // CHECK: [[TMP1:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** [[x]], i64 0, metadata [[TAG_2:!.*]]) + // CHECK: [[TMP3:%.*]] = load i32*, i32** [[x]], align 8, !tbaa !5, !noalias [[TAG_2]] + // CHECK: [[TMP4:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP3]], i8* [[TMP1]], i32** [[x]], i64 0, metadata !2), !tbaa !5, !noalias [[TAG_2]] + // CHECK: ret i32* [[TMP4]] +} + +int *a2; +int *foo1(int b) { + int *restrict x; + + // CHECK-LABEL: define i32* @foo1(i32 %b) + // CHECK: [[b_addr:%.*]] = alloca i32, align 4 + // CHECK: [[x:%.*]] = alloca i32*, align 8 + // CHECK: [[x2:%.*]] = alloca i32*, align 8 + // CHECK: [[TMP1:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** [[x]], i64 0, metadata [[TAG_x:!.*]]) + + if (b) { + x = a; + r += *x; + ex1(x); + + // CHECK: [[TMP3:%.*]] = load i32*, i32** @a, align 8, !tbaa [[TAG_5:!.*]], !noalias [[TAG_x_x2:!.*]] + // CHECK: store i32* [[TMP3]], i32** [[x]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + + // CHECK: [[TMP4:%.*]] = load i32*, i32** [[x]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + // CHECK: [[TMP5:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP4]], i8* [[TMP1]], i32** [[x]], i64 0, metadata [[TAG_x]]), !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + // CHECK: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4, !tbaa [[TAG_9:!.*]], !noalias [[TAG_x_x2]] + // CHECK: [[TMP7:%.*]] = load i32, i32* @r, align 4, !tbaa [[TAG_9]], !noalias [[TAG_x_x2]] + // CHECK: [[add:%.*]] = add nsw i32 [[TMP7]], [[TMP6]] + // CHECK: store i32 [[add]], i32* @r, align 4, !tbaa [[TAG_9]], !noalias [[TAG_x_x2]] + + // CHECK: [[TMP8:%.*]] = load i32*, i32** [[x]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + // CHECK: [[TMP9:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP8]], i8* [[TMP1]], i32** [[x]], i64 0, metadata [[TAG_x]]), !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + // CHECK: call void @ex1(i32* [[TMP9]]), !noalias [[TAG_x_x2]] + + ++x; + *x = r; + ex1(x); + + // CHECK: [[TMP10:%.*]] = load i32*, i32** [[x]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + // CHECK: [[TMP11:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP10]], i8* [[TMP1]], i32** [[x]], i64 0, metadata [[TAG_x]]), !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + // CHECK: [[incdec_ptr:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 1 + // CHECK: store i32* [[incdec_ptr]], i32** [[x]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + + // CHECK: [[TMP12:%.*]] = load i32, i32* @r, align 4, !tbaa [[TAG_9]], !noalias [[TAG_x_x2]] + // CHECK: [[TMP13:%.*]] = load i32*, i32** [[x]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + // CHECK: [[TMP14:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP13]], i8* [[TMP1]], i32** [[x]], i64 0, metadata [[TAG_x]]), !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + // CHECK: store i32 [[TMP12]], i32* [[TMP14]], align 4, !tbaa [[TAG_9]], !noalias [[TAG_x_x2]] + + // CHECK: [[TMP15:%.*]] = load i32*, i32** [[x]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + // CHECK: [[TMP16:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP15]], i8* [[TMP1]], i32** [[x]], i64 0, metadata [[TAG_x]]), !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + // CHECK: call void @ex1(i32* [[TMP16]]), !noalias [[TAG_x_x2]] + + x += b; + *x = r; + ex1(x); + + // CHECK: [[TMP17:%.*]] = load i32, i32* [[b_addr]], align 4, !tbaa [[TAG_9]], !noalias [[TAG_x_x2]] + // CHECK: [[TMP18:%.*]] = load i32*, i32** [[x]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + // CHECK: [[TMP19:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP18]], i8* [[TMP1]], i32** [[x]], i64 0, metadata [[TAG_x]]), !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + // CHECK: [[idx_ext:%.*]] = sext i32 [[TMP17]] to i64 + // CHECK: [[add_ptr:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i64 [[idx_ext]] + // CHECK: store i32* [[add_ptr]], i32** [[x]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + + // CHECK: [[TMP20:%.*]] = load i32, i32* @r, align 4, !tbaa [[TAG_9]], !noalias [[TAG_x_x2]] + // CHECK: [[TMP21:%.*]] = load i32*, i32** [[x]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + // CHECK: [[TMP22:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP21]], i8* [[TMP1]], i32** [[x]], i64 0, metadata [[TAG_x]]), !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + // CHECK: store i32 [[TMP20]], i32* [[TMP22]], align 4, !tbaa [[TAG_9]], !noalias [[TAG_x_x2]] + + // CHECK: [[TMP23:%.*]] = load i32*, i32** [[x]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + // CHECK: [[TMP24:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP23]], i8* [[TMP1]], i32** [[x]], i64 0, metadata [[TAG_x]]), !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + // CHECK: call void @ex1(i32* [[TMP24]]), !noalias [[TAG_x_x2]] + + int *restrict x2 = a2; + *x2 = r; + ex1(x2); + + // CHECK: [[TMP26:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** [[x2]], i64 0, metadata [[TAG_x2:!.*]]) + // CHECK: [[TMP27:%.*]] = load i32*, i32** @a2, align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + // CHECK: store i32* [[TMP27]], i32** [[x2]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + + // CHECK: [[TMP28:%.*]] = load i32, i32* @r, align 4, !tbaa [[TAG_9]], !noalias [[TAG_x_x2]] + // CHECK: [[TMP29:%.*]] = load i32*, i32** [[x2]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + // CHECK: [[TMP30:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP29]], i8* [[TMP26]], i32** [[x2]], i64 0, metadata [[TAG_x2]]), !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + // CHECK: store i32 [[TMP28]], i32* [[TMP30]], align 4, !tbaa [[TAG_9]], !noalias [[TAG_x_x2]] + + // CHECK: [[TMP31:%.*]] = load i32*, i32** [[x2]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + // CHECK: [[TMP32:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP31]], i8* [[TMP26]], i32** [[x2]], i64 0, metadata [[TAG_x2]]), !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + // CHECK: call void @ex1(i32* [[TMP32]]), !noalias [[TAG_x_x2]] + } else { + x = a2; + r += *x; + // CHECK: [[TMP34:%.*]] = load i32*, i32** @a2, align 8, !tbaa [[TAG_5]], !noalias [[TAG_x]] + // CHECK: store i32* [[TMP34]], i32** [[x]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x]] + + // CHECK: [[TMP35:%.*]] = load i32*, i32** [[x]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x]] + // CHECK: [[TMP36:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP35]], i8* [[TMP1]], i32** [[x]], i64 0, metadata [[TAG_x]]), !tbaa [[TAG_5]], !noalias [[TAG_x]] + // CHECK: [[TMP37:%.*]] = load i32, i32* [[TMP36]], align 4, !tbaa [[TAG_9]], !noalias [[TAG_x]] + // CHECK: [[TMP38:%.*]] = load i32, i32* @r, align 4, !tbaa [[TAG_9]], !noalias [[TAG_x]] + // CHECK: [[add1:%.*]] = add nsw i32 [[TMP38]], [[TMP37]] + // CHECK: store i32 [[add1]], i32* @r, align 4, !tbaa [[TAG_9]], !noalias [[TAG_x]] + } + + return x; + // CHECK: [[TMP39:%.*]] = load i32*, i32** [[x]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x]] + // CHECK: [[TMP40:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP39]], i8* [[TMP1]], i32** [[x]], i64 0, metadata [[TAG_x]]), !tbaa [[TAG_5]], !noalias [[TAG_x]] + // CHECK: ret i32* [[TMP40]] +} + +int *bar() { + int *x = a; + return x; + + // CHECK-LABEL: define i32* @bar() + // CHECK-NOT: noalias + // CHECK: ret i32* +} Index: clang/test/CodeGen/restrict/basic_opt_01.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/basic_opt_01.c @@ -0,0 +1,137 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +// Rough test to verify basic functionality of the 'restrict member pointer' rewrite + +// CHECK: @test01A +// CHECK: ret i32 42 +int test01A(int *pA, int *pB) { + int *__restrict prA; + prA = pA; + + *prA = 42; + *pB = 43; + return *prA; +} + +// CHECK: @test01B +// CHECK: ret i32 42 +int test01B(int *__restrict prA, int *pB) { + *prA = 42; + *pB = 43; + return *prA; +} + +// CHECK: @test02A +// CHECK: ret i32 42 +int test02A(int b, int *pA, char *pB, int *pC) { + int *__restrict prA; + prA = pA; + char *__restrict prB; + prB = pB; + char *lp = b ? (char *)prA : (char *)prB; + + *lp = 42; + *pC = 43; + return *lp; +} + +// CHECK: @test02B +// CHECK: ret i32 42 +int test02B(int b, int *__restrict prA, char *__restrict prB, int *pC) { + char *lp = b ? (char *)prA : (char *)prB; + + *lp = 42; + *pC = 43; + return *lp; +} + +// CHECK: @test03 +// CHECK: ret i32 42 +int test03(int n, int *pA, char *pB, int *pC) { + do { + int *__restrict prA; + prA = pA; + + *prA = 42; + *pC = 43; + } while (n--); + return *pA; +} + +// CHECK: @test04A0 +// CHECK: ret i32 42 +int test04A0(int n, int *pA, char *pB, int *pC) { + int *__restrict prA; + do { + prA = pA; + + *prA = 42; + *pC = 43; + } while (n--); + return *prA; +} + +// CHECK: @test04A1 +// CHECK: ret i32 42 +int test04A1(int n, int *pA, char *pB, int *pC) { + int *__restrict prA; + prA = pA; + do { + prA = pA; + + *prA = 42; + *pC = 43; + } while (n--); + return *prA; +} + +// CHECK: @test04B0 +// CHECK: ret i32 42 +int test04B0(int n, int *__restrict prA, char *pB, int *pC) { + do { + prA = prA; + + *prA = 42; + *pC = 43; + } while (n--); + return *prA; +} + +// CHECK: @test04B1 +// CHECK: ret i32 42 +int test04B1(int n, int *__restrict prA, char *pB, int *pC) { + prA = prA; + do { + prA = prA; + + *prA = 42; + *pC = 43; + } while (n--); + return *prA; +} + +// CHECK: @test05A +// CHECK: ret i32 42 +int test05A(int n, int *pA, char *pB, int *pC) { + int *__restrict prA; + prA = pA; + do { + prA++; + + *prA = 42; + *pC = 43; + } while (n--); + return *prA; +} + +// CHECK: @test05B +// CHECK: ret i32 42 +int test05B(int n, int *__restrict prA, char *pB, int *pC) { + do { + prA++; + + *prA = 42; + *pC = 43; + } while (n--); + return *prA; +} Index: clang/test/CodeGen/restrict/basic_opt_02.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/basic_opt_02.c @@ -0,0 +1,52 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +int foo1(int *a, int *restrict b, int c) { + *a = 0; + *b = c; + return *a; // OK: returns 0 +} +// CHECK: @foo1 +// CHECK: ret i32 0 + +int foo2(int *a, int *restrict b, int c) { + int *bc = b + c; + *a = 0; + *bc = c; // OK: bc keeps the restrictness + return *a; // returns 0 +} +// CHECK: @foo2 +// CHECK: ret i32 0 + +static int *copy(int *b) { return b; } + +int foo3(int *a, int *restrict b, int c) { + int *bc = copy(b); // a fix to support this is in the works + *a = 0; + *bc = c; + return *a; +} +// CHECK: @foo3 +// CHECK: ret i32 0 + +// Finally: +inline void update(int *p, int c) { *p = c; } + +int foo6(int *a, int *b, int c) { + int *restrict bc = b; // local restrict + *a = 0; + update(bc, c); // Oops: inlining loses local restrict annotation + return *a; +} + +// CHECK: @foo6 +// CHECK: ret i32 0 + +// Notice the difference with: +int foo7(int *a, int *restrict b, int c) { + *a = 0; + update(b, c); // restrict argument preserved after inlining. + return *a; // returns 0 +} + +// CHECK: @foo7 +// CHECK: ret i32 0 Index: clang/test/CodeGen/restrict/basic_opt_03.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/basic_opt_03.c @@ -0,0 +1,73 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s +// XFAIL: * + +// NOTE: SROA needs to be able to see through llvm.noalias. This is introduced in case of returning of larger structs. +struct A { + int a, b, c, d, e, f, g, h; +}; +struct A constructIt(int a) { + struct A tmp = {a, a, a, a, a, a, a, a}; + return tmp; +} +int test_sroa01a(unsigned c) { + int tmp = 0; + for (int i = 0; i < c; ++i) { + struct A a = constructIt(i); + tmp = tmp + a.e; + } + return tmp; +} + +// CHECK: @test_sroa01a +// CHECK: FIXME + +int test_sroa01b(unsigned c) { + int tmp = 0; + for (int i = 0; i < c; ++i) { + struct A a = {i, i, i, i, i, i, i, i}; + tmp = tmp + a.e; + } + return tmp; +} + +// CHECK: @test_sroa01b +// CHECK: FIXME + +int test_sroa01c(unsigned c) { + int tmp = 0; + for (int i = 0; i < c; ++i) { + int *__restrict dummy; // should not influence optimizations ! + struct A a = {i, i, i, i, i, i, i, i}; + tmp = tmp + a.e; + } + return tmp; +} + +// CHECK: @test_sroa01b +// CHECK: FIXME + +int test_sroa02a(unsigned c) { + int tmp = 0; + struct A a; + for (int i = 0; i < c; ++i) { + a = constructIt(i); + tmp = tmp + a.e; + } + return tmp; +} + +// CHECK: @test_sroa02a +// CHECK: FIXME + +int test_sroa02b(unsigned c) { + struct A a; + int tmp = 0; + for (int i = 0; i < c; ++i) { + a = (struct A){i, i, i, i, i, i, i, i}; + tmp = tmp + a.e; + } + return tmp; +} + +// CHECK: @test_sroa02b +// CHECK: FIXME Index: clang/test/CodeGen/restrict/basic_opt_04.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/basic_opt_04.c @@ -0,0 +1,296 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +// verify effect of restrict on optimizations +void dummy_restrict01_n(int *p) { + if (0) { + *p = 0xdeadbeef; + } +} + +void dummy_restrict01_a(int *__restrict p) { + if (0) { + *p = 0xdeadbeef; + } +} + +void dummy_restrict01_r(int *p_) { + int *__restrict p = p_; + if (0) { + *p = 0xdeadbeef; + } +} + +void dummy_restrict01_R(int *p_) { + int *__restrict p; + p = p_; + if (0) { + *p = 0xdeadbeef; + } +} + +void dummy_restrict02_n(int *p) { + p++; + if (0) { + *p = 0xdeadbeef; + } +} + +void dummy_restrict02_a(int *__restrict p) { + p++; + if (0) { + *p = 0xdeadbeef; + } +} + +void dummy_restrict02_r(int *p_) { + int *__restrict p = p_; + p++; + if (0) { + *p = 0xdeadbeef; + } +} + +void dummy_restrict02_R(int *p_) { + int *__restrict p; + p = p_; + p++; + if (0) { + *p = 0xdeadbeef; + } +} + +// --------------------------------- + +int test01_n(int *pA, int c) { + if (0) { + *pA = 0xdeadbeef; + } + return c; +} + +// CHECK: @test01_n +// CHECK-NOT: .noalias +// CHECK: ret i32 + +int test01_a(int *__restrict pA, int c) { + if (0) { + *pA = 0xdeadbeef; + } + return c; +} + +// CHECK: @test01_a +// CHECK-NOT: .noalias +// CHECK: ret i32 + +int test01_r(int *pA_, int c) { + int *__restrict pA = pA_; + if (0) { + *pA = 0xdeadbeef; + } + return c; +} +// CHECK: @test01_r +// CHECK-NOT: .noalias +// CHECK: ret i32 + +int test01_R(int *pA_, int c) { + int *__restrict pA; + pA = pA_; + if (0) { + *pA = 0xdeadbeef; + } + return c; +} +// CHECK: @test01_R +// CHECK-NOT: .noalias +// CHECK: ret i32 + +int test01_nn(int *pA, int c) { + dummy_restrict01_n(pA); + return c; +} +// CHECK: @test01_nn +// CHECK-NOT: .noalias +// CHECK: ret i32 + +int test01_na(int *pA, int c) { + dummy_restrict01_a(pA); + return c; +} +// CHECK: @test01_na +// CHECK-NOT: .noalias +// CHECK: ret i32 + +int test01_nr(int *pA, int c) { + dummy_restrict01_r(pA); + return c; +} +// CHECK: @test01_nr +// CHECK-NOT: .noalias +// CHECK: ret i32 + +int test01_nR(int *pA, int c) { + dummy_restrict01_R(pA); + return c; +} +// CHECK: @test01_nR +// CHECK-NOT: .noalias +// CHECK: ret i32 + +// ---------------------------------- +int test02_n(int *pA, int c) { + pA++; + if (0) { + *pA = 0xdeadbeef; + } + return c; +} +// CHECK: @test02_n +// CHECK-NOT: .noalias +// CHECK: ret i32 + +int test02_a(int *__restrict pA, int c) { + pA++; + if (0) { + *pA = 0xdeadbeef; + } + return c; +} +// CHECK: @test02_a +// CHECK-NOT: .noalias +// CHECK: ret i32 + +int test02_r(int *pA_, int c) { + int *__restrict pA = pA_; + pA++; + if (0) { + *pA = 0xdeadbeef; + } + return c; +} +// CHECK: @test02_r +// CHECK-NOT: .noalias +// CHECK: ret i32 + +int test02_R(int *pA_, int c) { + int *__restrict pA; + pA = pA_; + pA++; + if (0) { + *pA = 0xdeadbeef; + } + return c; +} +// CHECK: @test02_R +// CHECK-NOT: .noalias +// CHECK: ret i32 + +int test02_nn(int *pA, int c) { + dummy_restrict02_n(pA); + return c; +} +// CHECK: @test02_nn +// CHECK-NOT: .noalias +// CHECK: ret i32 + +int test02_na(int *pA, int c) { + dummy_restrict02_a(pA); + return c; +} +// CHECK: @test02_na +// CHECK-NOT: .noalias +// CHECK: ret i32 + +int test02_nr(int *pA, int c) { + dummy_restrict02_r(pA); + return c; +} +// CHECK: @test02_nr +// CHECK-NOT: .noalias +// CHECK: ret i32 + +int test02_nR(int *pA, int c) { + dummy_restrict02_R(pA); + return c; +} +// CHECK: @test02_nR +// CHECK-NOT: .noalias +// CHECK: ret i32 + +int test11_n(int *pA) { + unsigned total = 0; + for (int i = 0; i < 10; ++i) { + int *p = pA; + total = total + 1; + } + return total; +} +// CHECK: @test11_n +// CHECK-NOT: .noalias +// CHECK: ret i32 + +int test11_lr(int *pA) { + unsigned total = 0; + for (int i = 0; i < 10; ++i) { + int *__restrict p = pA; + total = total + 1; + } + return total; +} +// CHECK: @test11_lr +// CHECK-NOT: .noalias +// CHECK: ret i32 + +int test11_lR(int *pA) { + unsigned total = 0; + for (int i = 0; i < 10; ++i) { + int *__restrict p; + p = pA; + total = total + 1; + } + return total; +} +// CHECK: @test11_lR +// CHECK-NOT: .noalias +// CHECK: ret i32 + +int test12_n(int *pA) { + unsigned total = 0; + for (int i = 0; i < 10; ++i) { + int *p = pA; + p++; + total = total + 1; + } + return total; +} +// CHECK: @test12_n +// CHECK-NOT: .noalias +// CHECK: ret i32 + +int test12_lr(int *pA) { + unsigned total = 0; + for (int i = 0; i < 10; ++i) { + int *__restrict p = pA; + p++; + total = total + 1; + } + return total; +} +// CHECK: @test12_lr +// CHECK-NOT: .noalias +// CHECK: ret i32 + +int test12_lR(int *pA) { + unsigned total = 0; + for (int i = 0; i < 10; ++i) { + int *__restrict p; + p = pA; + p++; + total = total + 1; + } + return total; +} +// CHECK: @test12_lR +// CHECK-NOT: .noalias +// CHECK: ret i32 Index: clang/test/CodeGen/restrict/escape_through_volatile.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/escape_through_volatile.c @@ -0,0 +1,31 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +// a volatile pointer can confuse llvm: (p2 depends on p0) +int test_escape_through_volatile_01(int *a_p0) { + int *__restrict p0; + p0 = a_p0; + int *volatile p1 = p0; + int *p2 = p1; + *p0 = 42; + *p2 = 99; + + return *p0; // 42 or 99 +} + +// CHECK: @test_escape_through_volatile_01 +// either a reload or 99, but must never be 42 +// CHECK-NOT: ret i32 42 + +// but not in: +int test_escape_through_volatile_02(int *__restrict p0) { + int *volatile p1 = p0; + int *p2 = p1; + *p0 = 42; + *p2 = 99; + + return *p0; // 42 or 99 +} + +// CHECK: @test_escape_through_volatile_02 +// either a reload or 99, but must never be 42 +// CHECK-NOT: ret i32 42 Index: clang/test/CodeGen/restrict/inlining_01.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/inlining_01.c @@ -0,0 +1,156 @@ +// sfg-check: check resulting chains +// RUN: %clang_cc1 -triple x86_64-apple-darwin -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +// check how restrict propagation wrt inlining works + +//#define INLINE __attribute__((alwaysinline)) +//#define INLINE inline +#define INLINE + +// Variations: +// - n : no restrict +// - a : argument restrict +// - R : local restrict + +#define ARGRESTRICT_n +#define ARGRESTRICT_a __restrict +#define ARGRESTRICT_R + +#define LOCALRESTRICT_n(T, A, B) T A = B +#define LOCALRESTRICT_a(T, A, B) T A = B +#define LOCALRESTRICT_R(T, A, B) T __restrict A = B + +#define CREATE_ALL(CS) \ + CS(n, n) \ + CS(a, n) \ + CS(n, a) \ + CS(a, a) \ + CS(R, n) \ + CS(n, R) \ + CS(R, R) + +#define CREATE_SET(A, B) \ + INLINE int set_##A##B(int *ARGRESTRICT_##A pA, int *ARGRESTRICT_##B pB) { \ + LOCALRESTRICT_##A(int *, lpA, pA); \ + LOCALRESTRICT_##B(int *, lpB, pB); \ + *lpA = 42; \ + *lpB = 99; \ + return *lpA; \ + } + +#define CREATE_CALL_SET1(A, B) \ + int test01_nn_call_set_##A##B(int *pA, int *pB) { \ + set_##A##B(pA, pB); \ + return *pA; \ + } + +#define CREATE_CALL_SET2(A, B) \ + int test02_##A##B##_call_set_##A##B(int *ARGRESTRICT_##A pA, int *ARGRESTRICT_##B pB) { \ + LOCALRESTRICT_##A(int *, lpA, pA); \ + LOCALRESTRICT_##B(int *, lpB, pB); \ + set_##A##B(lpA, lpB); \ + return *lpA; \ + } + +#define CREATE_CALL_SET3(A, B) \ + int test03_##A##B##_call_set_nn(int *ARGRESTRICT_##A pA, int *ARGRESTRICT_##B pB) { \ + LOCALRESTRICT_##A(int *, lpA, pA); \ + LOCALRESTRICT_##B(int *, lpB, pB); \ + set_nn(lpA, lpB); \ + return *lpA; \ + } + +CREATE_ALL(CREATE_SET) +CREATE_ALL(CREATE_CALL_SET1) +CREATE_ALL(CREATE_CALL_SET2) +CREATE_ALL(CREATE_CALL_SET3) + +// CHECK-LABEL: @set_nn( +// CHECK-NOT: ret i32 42 + +// CHECK-LABEL: @set_an( +// CHECK: ret i32 42 + +// CHECK-LABEL: @set_na( +// CHECK: ret i32 42 + +// CHECK_LABEL: @set_aa( +// CHECK: ret i32 42 + +// CHECK-LABEL: @set_Rn( +// CHECK: ret i32 42 + +// CHECK-LABEL: @set_nR( +// CHECK: ret i32 42 + +// CHECK-LABEL: @set_RR( +// CHECK: ret i32 42 + +// CHECK-LABEL: @test01_nn_call_set_nn( +// CHECK-NOT: ret i32 42 + +//@ NOTE: missed store-load propagation +// CHECK-LABEL: @test01_nn_call_set_an( +// CHECK-NOT: ret i32 42 + +//@ NOTE: missed store-load propagation +// CHECK-LABEL: @test01_nn_call_set_na( +// CHECK-NOT: ret i32 42 + +//@ NOTE: missed store-load propagation +// CHECK-LABEL: @test01_nn_call_set_aa( +// CHECK-NOT: ret i32 42 + +//@ NOTE: missed store-load propagation +// CHECK-LABEL: @test01_nn_call_set_Rn( +// CHECK-NOT: ret i32 42 + +//@ NOTE: missed store-load propagation +// CHECK-LABEL: @test01_nn_call_set_nR( +// CHECK-NOT: ret i32 42 + +//@ NOTE: missed store-load propagation +// CHECK-LABEL: @test01_nn_call_set_RR( +// CHECK-NOT: ret i32 42 + +// CHECK-LABEL: @test02_nn_call_set_nn( +// CHECK-NOT: ret i32 42 + +// CHECK-LABEL: @test02_an_call_set_an( +// CHECK: ret i32 42 + +// CHECK-LABEL: @test02_na_call_set_na( +// CHECK: ret i32 42 + +// CHECK-LABEL: @test02_aa_call_set_aa( +// CHECK: ret i32 42 + +// CHECK-LABEL: @test02_Rn_call_set_Rn( +// CHECK: ret i32 42 + +// CHECK-LABEL: @test02_nR_call_set_nR( +// CHECK: ret i32 42 + +// CHECK-LABEL: @test02_RR_call_set_RR( +// CHECK: ret i32 42 + +// CHECK-LABEL: @test03_nn_call_set_nn( +// CHECK-NOT: ret i32 42 + +// CHECK-LABEL: @test03_an_call_set_nn( +// CHECK: ret i32 42 + +// CHECK-LABEL: @test03_na_call_set_nn( +// CHECK: ret i32 42 + +// CHECK-LABEL: @test03_aa_call_set_nn( +// CHECK: ret i32 42 + +// CHECK-LABEL: @test03_Rn_call_set_nn( +// CHECK: ret i32 42 + +// CHECK-LABEL: @test03_nR_call_set_nn( +// CHECK: ret i32 42 + +// CHECK-LABEL: @test03_RR_call_set_nn( +// CHECK: ret i32 42 Index: clang/test/CodeGen/restrict/inlining_02.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/inlining_02.c @@ -0,0 +1,144 @@ +// sfg-check: check resulting chains +// RUN: %clang_cc1 -triple x86_64-apple-darwin -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +// Base test for restrict propagation with inlining + +//#define INLINE __attribute__((alwaysinline)) +//#define INLINE inline +#define INLINE + +void set_nnn(int *pA, int *pB, int *dummy) { + int *lpA; + lpA = pA; + int *lpB; + lpB = pB; + int *ldummy; + ldummy = dummy; + *lpA = 42; + *lpB = 43; + + *ldummy = 99; +} +// CHECK-LABEL: @set_nnn( + +void set_nna(int *pA, int *pB, int *__restrict dummy) { + int *lpA; + lpA = pA; + int *lpB; + lpB = pB; + int *ldummy; + ldummy = dummy; + *lpA = 42; + *lpB = 43; + + *ldummy = 99; +} +// CHECK-LABEL: @set_nna( + +void set_nnr(int *pA, int *pB, int *dummy) { + int *lpA; + lpA = pA; + int *lpB; + lpB = pB; + int *__restrict ldummy; + ldummy = dummy; + + *lpA = 42; + *lpB = 43; + + *ldummy = 99; +} +// CHECK-LABEL: @set_nnr( + +int test_rr_nnn(int *pA, int *pB, int *dummy) { + int *__restrict lpA; + lpA = pA; + int *__restrict lpB; + lpB = pB; + + set_nnn(lpA, lpB, dummy); + return *lpA; +} + +// CHECK-LABEL: @test_rr_nnn( +// CHECK: ret i32 42 + +int test_rr_nna(int *pA, int *pB, int *dummy) { + int *__restrict lpA; + lpA = pA; + int *__restrict lpB; + lpB = pB; + + set_nna(lpA, lpB, dummy); + return *lpA; +} + +// CHECK-LABEL: @test_rr_nna( +// CHECK: ret i32 42 + +int test_rr_nnr(int *pA, int *pB, int *dummy) { + int *__restrict lpA; + lpA = pA; + int *__restrict lpB; + lpB = pB; + + set_nnr(lpA, lpB, dummy); + return *lpA; +} + +// CHECK-LABEL: @test_rr_nnr( +// CHECK: ret i32 42 + +// ----------------------------------------------------------- + +int test_rr_local_nnn(int *pA, int *pB, int *dummy) { + int *__restrict lpA; + lpA = pA; + int *__restrict lpB; + lpB = pB; + int *ldummy; + ldummy = dummy; + + *lpA = 10; + { + int *l2pA; + l2pA = lpA; + int *l2pB; + l2pB = lpB; + int *l2dummy; + l2dummy = ldummy; + *l2pA = 42; + *l2pB = 43; + + *l2dummy = 99; + } + return *lpA; +} +// CHECK-LABEL: @test_rr_local_nnn( +// CHECK: ret i32 42 + +int test_rr_local_nnr(int *pA, int *pB, int *dummy) { + int *__restrict lpA; + lpA = pA; + int *__restrict lpB; + lpB = pB; + int *ldummy; + ldummy = dummy; + + *lpA = 10; + { + int *l2pA; + l2pA = lpA; + int *l2pB; + l2pB = lpB; + int *__restrict l2dummy; + l2dummy = ldummy; + *l2pA = 42; + *l2pB = 43; + + *l2dummy = 99; + } + return *lpA; +} +// CHECK-LABEL: @test_rr_local_nnr( +// CHECK: ret i32 42 Index: clang/test/CodeGen/restrict/provenance.noalias_reduction_01.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/provenance.noalias_reduction_01.c @@ -0,0 +1,197 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +// Check that unnecessary llvm.provenance.noalias calls are collapsed + +int *test01(int *p, int n) { + int *__restrict rp; + rp = p; + for (int i = 0; i < n; ++i) { + *rp = 10; + rp++; + rp++; + rp++; + rp++; + } + return rp; +} + +// CHECK-LABEL: @test01( +// CHECK: = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64 +// CHECK: = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64 +// CHECK: = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64 +// CHECK: = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64 +// CHECK-NOT: llvm.provenance.noalias + +int *test02(int *p, int n) { + int *__restrict rp; + rp = p; + rp++; + rp++; + rp++; + rp++; + for (int i = 0; i < n; ++i) { + *rp = 10; + rp++; + rp++; + rp++; + rp++; + } + return rp; +} + +// CHECK-LABEL: @test02( +// CHECK: = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64 +// CHECK: = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64 +// CHECK-NOT: llvm.provenance.noalias + +int *test03(int *p, int n) { + int *__restrict rp; + rp = p; + rp++; + rp++; + rp++; + rp++; + for (int i = 0; i < n; ++i) { + *rp = 10; + rp++; + rp++; + if (*rp == 42) { + rp++; + rp++; + } + rp++; + rp++; + } + return rp; +} + +// CHECK-LABEL: @test03( +// CHECK: = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64 +// CHECK: = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64 +// CHECK-NOT: llvm.provenance.noalias + +int *test04(int *p, int n) { + int *__restrict rp; + rp = p; + rp++; + rp++; + rp++; + rp++; + for (int i = 0; i < n; ++i) { + *rp = 10; + rp++; + rp++; + switch (*rp) { + default: + rp++; + case 10: + rp++; + case 20: + rp++; + case 30: + rp++; + break; + } + if (*rp == 42) { + rp++; + rp++; + } + rp++; + rp++; + } + return rp; +} + +// CHECK-LABEL: @test04( +// CHECK: = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64 +// CHECK: = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64 +// CHECK-NOT: llvm.provenance.noalias + +int *test05(int *p, int n) { + int *__restrict rp; + rp = p; + rp++; + rp++; + rp++; + rp++; + for (int i = 0; i < n; ++i) { + *rp = 10; + rp++; + rp++; + switch (*rp) { + default: + rp++; + case 10: + rp++; + case 20: + rp++; + case 30: + rp++; + break; + } + if (*rp == 42) { + rp++; + rp++; + } + rp++; + rp++; + } + return rp; +} + +// CHECK-LABEL: @test05( +// CHECK: = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64 +// CHECK: = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64 +// CHECK-NOT: llvm.provenance.noalias + +int *test06(int *p, int n) { + int *__restrict rp1; + rp1 = p; + // llvm.provenance.noalias rp1 (p) + + { + int *__restrict rp; + rp = p; + // llvm.provenance.noalias rp (p) + // llvm.provenance.noalias rp (rp1) + rp++; + rp++; + rp++; + rp++; + for (int i = 0; i < n; ++i) { + *rp = 10; + rp = rp1; + rp++; + rp++; + + switch (*rp) { + default: + rp++; + case 10: + rp++; + case 20: + rp++; + case 30: + rp++; + break; + } + if (*rp == 42) { + rp++; + rp++; + } + rp++; + rp++; + } + return rp; + } +} + +// CHECK-LABEL: @test06( +// CHECK: = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64 +// CHECK: = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64 +// CHECK: = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64 +// CHECK: = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64 +// CHECK: = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64 +// CHECK-NOT: llvm.provenance.noalias + +// CHECK: declare i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64 Index: clang/test/CodeGen/restrict/struct.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/struct.c @@ -0,0 +1,138 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -O1 -disable-llvm-optzns -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +int r; +void ex1(int *); + +struct FOO { + int *restrict rp0; + int *restrict rp1; + int *restrict rp2; +}; + +void test_FOO_local(int *pA, int *pB, int *pC) { + struct FOO tmp = {pA, pB, pC}; + *tmp.rp0 = 42; + *tmp.rp1 = 43; +} +// CHECK-LABEL: void @test_FOO_local( +// CHECK: [[tmp:%.*]] = alloca %struct.FOO, align 8 +// CHECK: [[TMP1:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FOOs.i64(%struct.FOO* [[tmp]], i64 0, metadata [[TAG_6:!.*]]) +// CHECK: [[rp0:%.*]] = getelementptr inbounds %struct.FOO, %struct.FOO* [[tmp]], i32 0, i32 0 +// CHECK: [[rp01:%.*]] = getelementptr inbounds %struct.FOO, %struct.FOO* [[tmp]], i32 0, i32 0 +// CHECK: [[TMP5:%.*]] = load i32*, i32** [[rp01]], align 8, !tbaa [[TAG_9:!.*]], !noalias [[TAG_6]] +// CHECK: [[TMP6:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP5]], i8* [[TMP1]], i32** [[rp01]], i64 0, metadata [[TAG_6]]), !tbaa [[TAG_9]], !noalias [[TAG_6]] +// CHECK: store i32 42, i32* [[TMP6]], align 4, !tbaa [[TAG_13:!.*]], !noalias [[TAG_6]] +// CHECK: [[rp12:%.*]] = getelementptr inbounds %struct.FOO, %struct.FOO* [[tmp]], i32 0, i32 1 +// CHECK: [[TMP7:%.*]] = load i32*, i32** [[rp12]], align 8, !tbaa [[TAG_11:!.*]], !noalias [[TAG_6]] +// CHECK: [[TMP8:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP7]], i8* [[TMP1]], i32** [[rp12]], i64 0, metadata [[TAG_6]]), !tbaa [[TAG_11]], !noalias [[TAG_6]] +// CHECK: store i32 43, i32* [[TMP8]], align 4, !tbaa [[TAG_13]], !noalias [[TAG_6]] +// CHECK: ret void + +void test_FOO_arg_pointer(struct FOO *p) { + *p->rp0 = 42; + *p->rp1 = 43; +} + +// define void @test_FOO_arg_pointer(%struct.FOO* %p) #0 !noalias !15 { +// CHECK: void @test_FOO_arg_pointer(%struct.FOO* [[p:%.*]]) #0 !noalias [[TAG_15:!.*]] { +// CHECK: [[p_addr:%.*]] = alloca %struct.FOO*, align 8 +// CHECK-NEXT: store %struct.FOO* [[p]], %struct.FOO** [[p_addr]], align 8, !tbaa [[TAG_2:!.*]], !noalias [[TAG_15]] +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.FOO*, %struct.FOO** [[p_addr]], align 8, !tbaa [[TAG_2]], !noalias [[TAG_15]] +// CHECK-NEXT: [[rp0:%.*]] = getelementptr inbounds %struct.FOO, %struct.FOO* [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP1:%.*]] = load i32*, i32** [[rp0]], align 8, !tbaa [[TAG_9:!.*]], !noalias [[TAG_15]] +// CHECK-NEXT: [[TMP2:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP1]], i8* null, i32** [[rp0]], i64 0, metadata [[TAG_15]]), !tbaa [[TAG_9]], !noalias [[TAG_15]] +// CHECK-NEXT: store i32 42, i32* [[TMP2]], align 4, !tbaa [[TAG_13:!.*]], !noalias [[TAG_15]] +// CHECK-NEXT: [[TMP3:%.*]] = load %struct.FOO*, %struct.FOO** [[p_addr]], align 8, !tbaa [[TAG_2]], !noalias [[TAG_15]] +// CHECK-NEXT: [[rp1:%.*]] = getelementptr inbounds %struct.FOO, %struct.FOO* [[TMP3]], i32 0, i32 1 +// CHECK-NEXT: [[TMP4:%.*]] = load i32*, i32** [[rp1]], align 8, !tbaa [[TAG_11:!.*]], !noalias [[TAG_15]] +// CHECK-NEXT: [[TMP5:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP4]], i8* null, i32** [[rp1]], i64 0, metadata [[TAG_15]]), !tbaa [[TAG_11]], !noalias [[TAG_15]] +// CHECK-NEXT: store i32 43, i32* [[TMP5]], align 4, !tbaa [[TAG_13]], !noalias [[TAG_15]] +// CHECK-NEXT: ret void + +void test_FOO_arg_value(struct FOO p) { + *p.rp0 = 42; + *p.rp1 = 43; +} +// NOTE: the struct is mapped 'byval', the scope will be introduced after inlining. + +// define void @test_FOO_arg_value(%struct.FOO* byval(%struct.FOO) align 8 %p) #0 !noalias !18 { +// CHECK: void @test_FOO_arg_value(%struct.FOO* byval(%struct.FOO) align 8 %p) #0 !noalias [[TAG_18:!.*]] { +// CHECK: [[rp0:%.*]] = getelementptr inbounds %struct.FOO, %struct.FOO* [[p:%.*]], i32 0, i32 0 +// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[rp0]], align 8, !tbaa [[TAG_9:!.*]], !noalias [[TAG_18]] +// CHECK-NEXT: [[TMP1:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP0]], i8* null, i32** [[rp0]], i64 0, metadata [[TAG_18]]), !tbaa [[TAG_9]], !noalias [[TAG_18]] +// CHECK-NEXT: store i32 42, i32* [[TMP1]], align 4, !tbaa [[TAG_13:!.*]], !noalias [[TAG_18]] +// CHECK-NEXT: [[rp1:%.*]] = getelementptr inbounds %struct.FOO, %struct.FOO* [[p]], i32 0, i32 1 +// CHECK-NEXT: [[TMP2:%.*]] = load i32*, i32** [[rp1]], align 8, !tbaa [[TAG_11:!.*]], !noalias [[TAG_18]] +// CHECK-NEXT: [[TMP3:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP2]], i8* null, i32** [[rp1]], i64 0, metadata [[TAG_18]]), !tbaa [[TAG_11]], !noalias [[TAG_18]] +// CHECK-NEXT: store i32 43, i32* [[TMP3]], align 4, !tbaa [[TAG_13]], !noalias [[TAG_18]] +// CHECK-NEXT: ret void + +struct FOO test_FOO_pass(struct FOO p) { + return p; +} + +// define void @test_FOO_pass(%struct.FOO* noalias sret align 8 %agg.result, %struct.FOO* byval(%struct.FOO) align 8 %p) #0 !noalias !21 { +// CHECK: void @test_FOO_pass(%struct.FOO* noalias sret align 8 %agg.result, %struct.FOO* byval(%struct.FOO) align 8 %p) #0 !noalias [[TAG_21:!.*]] { +// CHECK: [[TMP0:%.*]] = call %struct.FOO* @llvm.noalias.copy.guard.p0s_struct.FOOs.p0i8(%struct.FOO* [[p:%.*]], i8* null, metadata [[TAG_24:!.*]], metadata [[TAG_21]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast %struct.FOO* [[agg_result:%.*]] to i8* +// CHECK-NEXT: [[TMP2:%.*]] = bitcast %struct.FOO* [[TMP0]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i64 24, i1 false), !tbaa.struct [[TAG_28:!.*]], !noalias [[TAG_21]] +// CHECK-NEXT: ret void + +struct FUM { + struct FOO m; +}; + +void test_FUM_local(int *pA, int *pB, int *pC) { + struct FUM tmp = {{pA, pB, pC}}; + *tmp.m.rp0 = 42; + *tmp.m.rp1 = 43; +} + +// CHECK-LABEL: void @test_FUM_local( +// CHECK: [[TMP1:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FUMs.i64(%struct.FUM* [[tmp]], i64 0, metadata [[TAG_29:!.*]]) +// CHECK: [[TMP6:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP5:%.*]], i8* [[TMP1]], i32** [[rp02:%.*]], i64 0, metadata [[TAG_29]]), !tbaa [[TAG_32:!.*]], !noalias [[TAG_29]] +// CHECK: [[TMP8:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP7:%.*]], i8* [[TMP1]], i32** [[rp14:%.*]], i64 0, metadata [[TAG_29]]), !tbaa [[TAG_34:!.*]], !noalias [[TAG_29]] + +void test_FUM_arg_pointer(struct FUM *p) { + *p->m.rp0 = 42; + *p->m.rp1 = 43; +} +// define void @test_FUM_arg_pointer(%struct.FUM* %p) #0 !noalias !35 { +// CHECK: void @test_FUM_arg_pointer(%struct.FUM* [[p:%.*]]) #0 !noalias [[TAG_35:!.*]] { +// CHECK: [[TMP2:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP1:%.*]], i8* null, i32** [[rp0:%.*]], i64 0, metadata [[TAG_35]]), !tbaa [[TAG_32:!.*]], !noalias [[TAG_35]] +// CHECK: [[TMP5:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP4:%.*]], i8* null, i32** [[rp1:%.*]], i64 0, metadata [[TAG_35]]), !tbaa [[TAG_34:!.*]], !noalias [[TAG_35]] + +void test_FUM_arg_value(struct FUM p) { + *p.m.rp0 = 42; + *p.m.rp1 = 43; +} + +// define void @test_FUM_arg_value(%struct.FUM* byval(%struct.FUM) align 8 %p) #0 !noalias !38 { +// CHECK: void @test_FUM_arg_value(%struct.FUM* byval(%struct.FUM) align 8 %p) #0 !noalias [[TAG_38:!.*]] { +// CHECK: [[TMP1:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP0:%.*]], i8* null, i32** [[rp0:%.*]], i64 0, metadata [[TAG_38]]), !tbaa [[TAG_32:!.*]], !noalias [[TAG_38]] +// CHECK: [[TMP3:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP2:%.*]], i8* null, i32** [[rp1:%.*]], i64 0, metadata [[TAG_38]]), !tbaa [[TAG_34:!.*]], !noalias [[TAG_38]] + +struct FUM test_FUM_pass(struct FUM p) { + return p; +} + +// define void @test_FUM_pass(%struct.FUM* noalias sret align 8 %agg.result, %struct.FUM* byval(%struct.FUM) align 8 %p) #0 !noalias !41 { +// CHECK: void @test_FUM_pass(%struct.FUM* noalias sret align 8 %agg.result, %struct.FUM* byval(%struct.FUM) align 8 %p) #0 !noalias [[TAG_41:!.*]] { +// CHECK: [[TMP0:%.*]] = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* [[p:%.*]], i8* null, metadata [[TAG_44:!.*]], metadata [[TAG_41]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast %struct.FUM* [[agg_result:%.*]] to i8* +// CHECK-NEXT: [[TMP2:%.*]] = bitcast %struct.FUM* [[TMP0]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i64 24, i1 false), !tbaa.struct [[TAG_28:!.*]], !noalias [[TAG_41]] +// CHECK-NEXT: ret void + +// indices for llvm.noalias.copy.guard + +// CHECK: [[TAG_24]] = !{[[TAG_25:!.*]], [[TAG_26:!.*]], [[TAG_27:!.*]]} +// CHECK: [[TAG_25]] = !{i32 -1, i32 0} +// CHECK: [[TAG_26]] = !{i32 -1, i32 1} +// CHECK: [[TAG_27]] = !{i32 -1, i32 2} + +// CHECK: [[TAG_44]] = !{[[TAG_45:!.*]], [[TAG_46:!.*]], [[TAG_47:!.*]]} +// CHECK: [[TAG_45]] = !{i32 -1, i32 0, i32 0} +// CHECK: [[TAG_46]] = !{i32 -1, i32 0, i32 1} +// CHECK: [[TAG_47]] = !{i32 -1, i32 0, i32 2} Index: clang/test/CodeGen/restrict/struct_member_01.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/struct_member_01.c @@ -0,0 +1,73 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +struct FOO { + int *__restrict pA; + int *__restrict pB; +}; + +int test00a(int *pA, int *pB) { + int *__restrict rpA; + int *__restrict rpB; + rpA = pA; + rpB = pB; + + *rpA = 42; + *rpB = 43; + return *rpA; +} + +// CHECK-LABEL: @test00a( +// CHECK: ret i32 42 + +int test00b(int *pA, int *pB) { + int *__restrict rp[2]; + rp[0] = pA; + rp[1] = pB; + + *rp[0] = 42; + *rp[1] = 43; + return *rp[0]; +} + +// CHECK-LABEL: @test00b( +// CHECK: ret i32 42 + +int test01(struct FOO *p0, struct FOO *p1) { + *p0->pA = 42; + *p1->pA = 43; + + return *p0->pA; // 42 or 43 +} +// CHECK-LABEL: @test01( +// CHECK-NOT: ret i32 42 + +int test11(struct FOO *p0, struct FOO *p1) { + *p0->pA = 42; + *p1->pB = 43; + + return *p0->pA; // 42 +} + +// CHECK-LABEL: @test11( +// CHECK: ret i32 42 + +int test21(struct FOO p0, struct FOO p1) { + *p0.pA = 42; + *p1.pB = 43; + + return *p0.pA; // 42 +} + +// CHECK-LABEL: @test21( +// CHECK: ret i32 42 + +int test31(struct FOO *p0, struct FOO *__restrict p1) { + *p0->pA = 42; + *p1->pA = 43; + + return *p0->pA; // 42 +} + +// CHECK-LABEL: @test31( +// CHECK: ret i32 42 Index: clang/test/CodeGen/restrict/struct_member_02.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/struct_member_02.c @@ -0,0 +1,110 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -ffull-restrict -disable-llvm-passes %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -O2 -ffull-restrict -disable-llvm-passes %s -emit-llvm -o - | FileCheck %s + +//#define __restrict volatile + +// this test checks for what variables a restrict scope is created + +struct FOO_ipp { + int **p; +}; + +struct FOO_irpp { + int *__restrict *p; +}; + +struct FOO_iprp { + int dummy; + int **__restrict p; +}; + +struct FOO_irprp { + int *__restrict *__restrict p; +}; + +struct FOO_NESTED { + struct FOO_iprp m; +}; + +struct FOO_NESTED_A { + struct FOO_iprp m[2][3][4]; +}; + +typedef struct FOO_NESTED FUM; +typedef int *__restrict t_irp; + +int foo(int **p) { + struct FOO_ipp m1; // no + struct FOO_irpp m2; // no + struct FOO_iprp m3; // yes + struct FOO_irprp m4; // yes + struct FOO_NESTED m5; // yes + struct FOO_NESTED m6[2][4][5]; // yes + struct FOO_NESTED_A m7[2][4][5]; // yes + t_irp p0; // yes + FUM m8[3]; // yes + int *a1[2]; // no + int **a2[2]; // no + int *__restrict *a3[2]; // no + int **__restrict a4[2]; // yes + int **__restrict a5[2][3][4]; // yes + int *__restrict *a6; // no + m1.p = p; + m2.p = p; + m3.p = p; + m4.p = p; + a1[0] = *p; + a1[1] = *p; + + return **m1.p + **m2.p + **m3.p + **m4.p + *a1[0] + *a1[1]; +} + +// check the scopes of various variables +// CHECK-LABEL: @foo( + +// the local variables: +// CHECK: alloca +// CHECK: alloca +// CHECK: alloca +// CHECK: alloca +// CHECK: alloca +// CHECK: alloca +// CHECK: alloca +// CHECK: alloca +// CHECK: alloca +// CHECK: alloca +// CHECK: alloca +// CHECK: alloca +// CHECK: alloca +// CHECK: alloca +// CHECK: alloca +// CHECK: alloca +// CHECK-NOT: alloca + +// the local variables that have a restrict scope: +// CHECK: llvm.noalias.decl +// CHECK: llvm.noalias.decl +// CHECK: llvm.noalias.decl +// CHECK: llvm.noalias.decl +// CHECK: llvm.noalias.decl +// CHECK: llvm.noalias.decl +// CHECK: llvm.noalias.decl +// CHECK: llvm.noalias.decl +// CHECK: llvm.noalias.decl +// CHECK-NOT: llvm.noalias.decl +// CHECK: ret i32 + +// the restrict related metadata +// CHECK: foo: unknown scope +// CHECK-NEXT: foo +// CHECK-NOT: foo: +// CHECK: foo: m3 +// CHECK-NEXT: foo: m4 +// CHECK-NEXT: foo: m5 +// CHECK-NEXT: foo: m6 +// CHECK-NEXT: foo: m7 +// CHECK-NEXT: foo: p0 +// CHECK-NEXT: foo: m8 +// CHECK-NEXT: foo: a4 +// CHECK-NEXT: foo: a5 +// CHECK-NOT: foo: Index: clang/test/CodeGen/restrict/struct_member_03.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/struct_member_03.c @@ -0,0 +1,100 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +struct FOO { + int *__restrict pA; + int *__restrict pB; +}; + +int test10(int *pA, int *pB) { + struct FOO rp; + rp.pA = pA; + rp.pB = pB; + + *rp.pA = 42; + *rp.pB = 99; + + return *rp.pA; //42 +} +// CHECK-LABEL: @test10( +// CHECK: ret i32 42 + +int test11(struct FOO rp) { + *rp.pA = 42; + *rp.pB = 99; + + return *rp.pA; //42 +} +// CHECK-LABEL: @test11( +// CHECK: ret i32 42 + +int test12(struct FOO *rp) { + *rp->pA = 42; + *rp->pB = 99; + + return *rp->pA; //42 +} +// CHECK-LABEL: @test12( +// CHECK: ret i32 42 + +int test20(int *pA, int *pB) { + struct FOO rp0; + struct FOO rp1; + rp0.pB = pA; + rp1.pB = pB; + + *rp0.pB = 42; + *rp1.pB = 99; + + return *rp0.pB; //42 +} +// CHECK-LABEL: @test20( +// CHECK: ret i32 42 + +int test21(struct FOO rp0, struct FOO rp1) { + *rp0.pB = 42; + *rp1.pB = 99; + + return *rp0.pB; //42 +} +// CHECK-LABEL: @test21( +// CHECK: ret i32 42 + +int test22(struct FOO *rp0, struct FOO *rp1) { + *rp0->pB = 42; + *rp1->pB = 99; + + return *rp0->pB; // needs load +} +// CHECK-LABEL: @test22( +// CHECK-NOT: ret i32 42 +// CHECK-NOT: ret i32 99 + +int test23(struct FOO *rp0, struct FOO *rp1) { + *rp0->pA = 42; + *rp1->pB = 99; + + return *rp0->pA; // 42, rp0->pA and rp1->pB are not overlapping +} +// CHECK-LABEL: @test23( +// CHECK: ret i32 42 + +int test24(struct FOO *__restrict rp0, struct FOO *rp1) { + *rp0->pB = 42; + *rp1->pB = 99; + + return *rp0->pB; // 42 +} +// CHECK-LABEL: @test24( +// CHECK: ret i32 42 + +int test25(struct FOO *p0, struct FOO *rp1) { + struct FOO *__restrict rp0; + rp0 = p0; + *rp0->pB = 42; + *rp1->pB = 99; + + return *rp0->pB; // 42 +} +// CHECK-LABEL: @test25( +// CHECK: ret i32 42 Index: clang/test/CodeGen/restrict/struct_member_04.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/struct_member_04.c @@ -0,0 +1,168 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +// restrict member pointers and inlining - basic functionality test + +struct FOO { + int *__restrict pA; + int *__restrict pB; +}; + +void setFOO(struct FOO *p) { + *p->pA = 42; + *p->pB = 43; +} + +int test10(int *pA, int *pB, int *pC) { + + *pA = 40; + { + struct FOO rp; + rp.pA = pA; + rp.pB = pB; + + setFOO(&rp); + + *pC = 99; + return *rp.pA; // 42 + } +} + +// CHECK-LABEL: @test10( +// CHECK: ret i32 42 + +int test11(int *pA, int *pB, int *pC) { + + *pA = 40; + { + struct FOO rp; + rp.pA = pA; + rp.pB = pB; + + setFOO(&rp); + + *pC = 99; + } + return *pA; // 42 // should be, but llvm does not see it +} + +// CHECK-LABEL: @test11( +// CHECK-NOT: ret i32 40 +// CHECK-NOT: ret i32 42 +// CHECK-NOT: ret i32 43 +// CHECK-NOT: ret i32 99 + +int test12(int *pA, int *pB, int *pC) { + + *pA = 40; + { + struct FOO rp; + rp.pA = pA; + rp.pB = pB; + + setFOO(&rp); + } + + *pC = 99; + return *pA; // 42 or 99 +} + +// CHECK-LABEL: @test12( +// CHECK-NOT: ret i32 40 +// CHECK-NOT: ret i32 42 +// CHECK-NOT: ret i32 43 +// CHECK-NOT: ret i32 99 + +// out-of-function scope +int getFOO(struct FOO *p) { + return *p->pA; +} + +// fully defined +int test20(int *pA, int *pC) { + *pA = 42; + { + struct FOO rp; + rp.pA = pA; + *pC = 99; + return getFOO(&rp); + } +} +// CHECK-LABEL: @test20( +// CHECK: ret i32 42 + +// fully defined +int test21(int *pA, int *pC) { + *pA = 42; + *pC = 99; + { + struct FOO rp; + rp.pA = pA; + return getFOO(&rp); + } +} + +// CHECK-LABEL: @test21( +// CHECK: ret i32 % + +// mixed defined +int test22(int *pA, struct FOO *pB0, int b0, int *pC) { + *pA = 42; + { + struct FOO rp; + rp.pA = pA; + *pC = 99; + return getFOO(b0 ? &rp : pB0); + } +} +// CHECK-LABEL: @test22( +// CHECK: ret i32 % + +// mixed-mixed defined +int test23(int *pA, struct FOO *pB0, int b0, struct FOO *pB1, int b1, int *pC) { + *pA = 41; + { + struct FOO rp; + rp.pA = pA; + *pC = 98; + return test22(pA, b1 ? &rp : pB0, b0, pC); + } +} +// CHECK-LABEL: @test23( +// CHECK: ret i32 % + +// fully defined +int test24(int *pA, int *pB0, int b0, int *pB1, int b1, int *pC) { + *pA = 40; + { + struct FOO fb0; + fb0.pA = pB0; + { + struct FOO fb1; + fb1.pA = pB1; + + return test23(pA, &fb0, b0, &fb1, b1, pC); + } + } +} + +// CHECK-LABEL: @test24( +// CHECK: ret i32 % + +int test25(int *pA, int b0, int b1, int *pC) { + *pA = 40; + { + struct FOO fb0; + fb0.pA = pA; + { + struct FOO fb1; + fb1.pA = pA; + + return test23(pA, &fb0, b0, &fb1, b1, pC); + } + } +} + +// CHECK-LABEL: @test25( +// FIXME: should be: ret i32 42 +// CHECK: ret i32 % Index: clang/test/CodeGen/restrict/struct_member_05.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/struct_member_05.c @@ -0,0 +1,107 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +// restrict member pointers and inlining - basic functionality test + +struct FOO { + int *__restrict pA; + int *__restrict pB; +}; + +struct FOO_plain { + int *pA; + int *pB; +}; + +int test10(int *pA, int *pB, int *pC) { + + *pA = 40; + { + struct FOO rp; + rp.pA = pA; + rp.pB = pB; + + { + struct FOO *p = &rp; + *p->pA = 42; + *p->pB = 43; + } + + *pC = 99; + return *rp.pA; // 42 + } +} + +// CHECK-LABEL: @test10( +// CHECK: ret i32 42 + +int test11(int *pA, int *pB, int *pC) { + + *pA = 40; + { + struct FOO rp; + rp.pA = pA; + rp.pB = pB; + + { + *rp.pA = 42; + *rp.pB = 43; + } + + *pC = 99; + return *rp.pA; // 42 + } +} + +// CHECK-LABEL: @test11( +// CHECK: ret i32 42 + +int test12a(int *pA, int *pB, int *pC, struct FOO *pF) { + + *pA = 40; + { + struct FOO rp; + rp.pA = pA; + rp.pB = pB; + + { + struct FOO *p = pF ? pF : &rp; + *p->pA = 42; + *p->pB = 43; + } + + *pC = 99; + return *rp.pA; // 42 or 40 + } +} + +// CHECK-LABEL: @test12a( +// CHECK-NOT: ret i32 40 +// CHECK-NOT: ret i32 42 +// CHECK-NOT: ret i32 43 +// CHECK-NOT: ret i32 99 + +int test12b(int *pA, int *pB, int *pC, struct FOO_plain *pF) { + + *pA = 40; + { + struct FOO_plain rp; + rp.pA = pA; + rp.pB = pB; + + { + struct FOO_plain *p = pF ? pF : &rp; + *p->pA = 42; + *p->pB = 43; + } + + *pC = 99; + return *rp.pA; // 42 or 40 or 99 or ... + } +} + +// CHECK-LABEL: @test12b( +// CHECK-NOT: ret i32 40 +// CHECK-NOT: ret i32 42 +// CHECK-NOT: ret i32 43 +// CHECK-NOT: ret i32 99 Index: clang/test/CodeGen/restrict/struct_member_06.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/struct_member_06.c @@ -0,0 +1,108 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +// restrict member pointers and inlining - basic functionality test + +struct FOO { + int *__restrict p; +}; + +struct FOO_plain { + int *pA; + int *pB; +}; + +int test01_p_pp(int c, int *pA, int *pB) { + int *__restrict rpA; + rpA = pA; + int *__restrict rpB; + rpB = pB; + + int *p = c ? rpA : rpB; + + return *p; +} +// CHECK-LABEL: @test01_p_pp( +// CHECK: @llvm.noalias.decl +// CHECK-NEXT: @llvm.noalias.decl +// CHECK-NEXT: icmp +// CHECK-NEXT: @llvm.provenance.noalias +// CHECK-NEXT: @llvm.provenance.noalias +// CHECK-NEXT: select +// CHECK-NEXT: select +// CHECK-NEXT: load +// CHECK-NEXT: ret i32 + +int test01_p_ss(int c, int *pA, int *pB) { + struct FOO spA; + spA.p = pA; + struct FOO spB; + spB.p = pB; + + int *p = c ? spA.p : spB.p; + + return *p; +} +// CHECK-LABEL: @test01_p_ss( +// CHECK: @llvm.noalias.decl +// CHECK-NEXT: @llvm.noalias.decl +// CHECK-NEXT: icmp +// CHECK-NEXT: @llvm.provenance.noalias +// CHECK-NEXT: @llvm.provenance.noalias +// CHECK-NEXT: select +// CHECK-NEXT: select +// CHECK-NEXT: load +// CHECK-NEXT: ret i32 + +int test01_s_ss(int c, int *pA, int *pB) { + struct FOO spA; + spA.p = pA; + struct FOO spB; + spB.p = pB; + + { + struct FOO p = c ? spA : spB; + + return *p.p; + } +} +// CHECK-LABEL: @test01_s_ss( +// CHECK: @llvm.noalias.decl +// CHECK-NEXT: @llvm.noalias.decl +// CHECK-NEXT: @llvm.noalias.decl +// CHECK-NEXT: icmp +// CHECK-NEXT: @llvm.provenance.noalias +// CHECK-NEXT: @llvm.provenance.noalias +// CHECK-NEXT: select +// CHECK-NEXT: select +// CHECK-NEXT: @llvm.provenance.noalias +// CHECK-NEXT: load +// CHECK-NEXT: ret i32 + +// FIXME: this one currently results in bad code :( +int test01_ps_ss(int c, int *pA, int *pB) { + struct FOO spA; + spA.p = pA; + struct FOO spB; + spB.p = pB; + + struct FOO *p = c ? &spA : &spB; + + return *p->p; +} +// CHECK-LABEL: @test01_ps_ss( +// CHECK: ret i32 + +int test01_ps_psps(int c, struct FOO *ppA, struct FOO *ppB) { + struct FOO *p = c ? ppA : ppB; + + return *p->p; +} +// CHECK-LABEL: @test01_ps_psps( +// CHECK: icmp +// CHECK-NEXT: select +// CHECK-NEXT: getelementptr +// CHECK-NEXT: load +// CHECK-NEXT: llvm.provenance.noalias +// CHECK-NEXT: load +// CHECK-NEXT: ret i32 Index: clang/test/CodeGen/restrict/struct_member_07.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/struct_member_07.c @@ -0,0 +1,60 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +struct FOO { + int *__restrict pA; +}; + +struct BAR { + int *pA; +}; + +static void adaptFOO(struct FOO *p) { + *p->pA = 99; +}; + +static void adaptBAR(struct BAR *p) { + *p->pA = 99; +}; + +static void adaptInt(int *p) { + *p = 99; +}; + +// has 'unknown scope': caller: no, callee: yes +int test10(int *pA, struct FOO *pB) { + *pA = 42; + adaptFOO(pB); + return *pA; +} +// CHECK-LABEL: @test10( +// CHECK: ret i32 42 + +// has 'unknown scope': caller: yes, callee: no +int test11(int *pA, struct FOO *pB) { + *pB->pA = 42; + adaptInt(pA); + return *pB->pA; +} +// CHECK-LABEL: @test11( +// CHECK: ret i32 42 + +// has 'unknown scope': caller: no, callee: no +int test12(int *pA, struct BAR *pB) { + *pA = 42; + adaptBAR(pB); + return *pA; +} +// CHECK-LABEL: @test12( +// CHECK-NOT: ret i32 42 + +// has 'unknown scope': caller: yes, callee: yes +int test13(int *pA, struct FOO *pB) { + *pB->pA = 41; // introduce 'unknown scope' + *pA = 42; + adaptFOO(pB); + return *pA; +} + +// CHECK-LABEL: @test13( +// CHECK: ret i32 42 Index: clang/test/CodeGen/restrict/struct_member_08.cpp =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/struct_member_08.cpp @@ -0,0 +1,162 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK64 +// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK32 + +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -ffull-restrict %s -emit-llvm -o - -DDUMMY_BEFORE | FileCheck %s --check-prefixes=CHECK,CHECK64 +// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -O2 -ffull-restrict %s -emit-llvm -o - -DDUMMY_BEFORE | FileCheck %s --check-prefixes=CHECK,CHECK32 + +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -ffull-restrict %s -emit-llvm -o - -DDUMMY_AFTER | FileCheck %s --check-prefixes=CHECK,CHECK64 +// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -O2 -ffull-restrict %s -emit-llvm -o - -DDUMMY_AFTER | FileCheck %s --check-prefixes=CHECK,CHECK32 + +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -ffull-restrict %s -emit-llvm -o - -DDUMMY_BEFORE -DDUMMY_AFTER | FileCheck %s --check-prefixes=CHECK,CHECK64 +// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -O2 -ffull-restrict %s -emit-llvm -o - -DDUMMY_BEFORE -DDUMMY_AFTER | FileCheck %s --check-prefixes=CHECK,CHECK32 + +// NOTE: this test in C++ mode + +struct Fum { + Fum(unsigned long long d) { + ptr1 = ((int *)(d & 0xffffffff)); + ptr2 = ((int *)((d >> 32) & 0xffffffff)); + } + Fum(const Fum &) = default; + +#ifdef DUMMY_BEFORE + int *dummyb0; + int *dummyb1; +#endif + int *__restrict ptr1; + int *__restrict ptr2; +#ifdef DUMMY_AFTER + int *dummya0; + int *dummya1; +#endif +}; + +static Fum pass(Fum d) { return d; } + +int test_Fum_01(unsigned long long data, int *p1) { + Fum tmp = {data}; + + int *p0 = tmp.ptr1; + + *p0 = 42; + *p1 = 99; + return *p0; +} +// CHECK-LABEL: @_Z11test_Fum_01yPi +// CHECK-NOT: alloca +// CHECK: ret i32 42 + +int test_Fum_02(unsigned long long data) { + Fum tmp = {data}; + + int *p0 = tmp.ptr1; + int *p1 = tmp.ptr2; + + *p0 = 42; + *p1 = 99; + return *p0; +} +// CHECK-LABEL: @_Z11test_Fum_02y +// CHECK-NOT: alloca +// CHECK: ret i32 42 + +int test_Fum_pass_01(unsigned long long data, int *p1) { + Fum tmp = {data}; + + int *p0 = pass(tmp).ptr1; + + *p0 = 42; + *p1 = 99; + return *p0; +} +// CHECK-LABEL: @_Z16test_Fum_pass_01yPi +// CHECK-NOT: alloca +// CHECK: ret i32 42 + +int test_Fum_pass_02(unsigned long long data) { + Fum tmp = {data}; + + int *p0 = pass(tmp).ptr1; + int *p1 = pass(tmp).ptr2; + + *p0 = 42; + *p1 = 99; + return *p0; +} +// CHECK-LABEL: @_Z16test_Fum_pass_02y +// CHECK-NOT: alloca +// CHECK: ret i32 42 + +int test_Fum_pass_03(unsigned long long data) { + Fum tmp = {data}; + + int *b0 = tmp.ptr1; + *b0 = 42; + + int *p0 = pass(tmp).ptr1; + + *p0 = 99; + return *b0; // 99 +} +// CHECK-LABEL: @_Z16test_Fum_pass_03y +// CHECK-NOT: alloca +// CHECK-NOT: ret i32 42 +// CHECK: ret i32 99 + +int test_Fum_pass_04(unsigned long long data, int *px) { + Fum tmp = {data}; + + int *b0 = tmp.ptr1; + *b0 = 42; + tmp.ptr1 = px; + + int *p0 = pass(tmp).ptr1; + + *p0 = 99; + return *b0; // 42 or 99 +} +// CHECK-LABEL: @_Z16test_Fum_pass_04yPi +// CHECK-NOT: alloca +// CHECK-NOT: ret i32 42 +// CHECK-NOT: ret i32 99 +// CHECK: ret i32 % + +class S { +public: + S(int *d) : data(d) {} + int *getData() { return data; } + +private: + int *__restrict__ data; +}; + +int test_S__01(int *pA, long N) { + int *__restrict__ x = pA; + + *x = 42; + { + S s(x + N); + *s.getData() = 99; + } + return *x; // N could be 0 +} + +// CHECK-LABEL: @_Z10test_S__01Pil +// CHECK-NOT: alloca +// CHECK-NOT: ret i32 42 +// CHECK: ret i32 % + +int test_S__02(int *pA, long N) { + int *__restrict__ x = pA; + + *x = 42; + { + S s(x + N); + *s.getData() = 99; + return *x; // restrict rules say that N cannot be 0 + } +} + +// CHECK-LABEL: @_Z10test_S__02Pil +// CHECK-NOT: alloca +// CHECK: ret i32 42 Index: clang/test/Driver/full-restrict.c =================================================================== --- /dev/null +++ clang/test/Driver/full-restrict.c @@ -0,0 +1,14 @@ +// RUN: %clang -### -ffull-restrict -S %s 2>&1 | FileCheck %s --check-prefix=CHECK_FULL +// RUN: %clang -### -fno-full-restrict -S %s 2>&1 | FileCheck %s --check-prefix=CHECK_NO_FULL +// RUN: %clang -### -S %s 2>&1 | FileCheck %s --check-prefix=CHECK_NOTHING + +// CHECK_FULL-NOT: -fno-full-restrict +// CHECK_FULL: -ffull-restrict +// CHECK_FULL-NOT: -fno-full-restrict + +// CHECK_NO_FULL-NOT: -ffull-restrict +// CHECK_NO_FULL: -fno-full-restrict +// CHECK_NO_FULL-NOT: -ffull-restrict + +// CHECK_NOTHING-NOT: -fno-full-restrict +// CHECK_NOTHING-NOT: -ffull-restrict Index: clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp =================================================================== --- clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp +++ clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp @@ -116,15 +116,15 @@ // CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}}) // CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]]) -// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], align // CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8* -// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], align // CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1 // CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]]) -// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], -// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], -// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], -// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], align +// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], align +// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], align +// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], align // CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 #endif Index: clang/test/OpenMP/for_reduction_task_codegen.cpp =================================================================== --- clang/test/OpenMP/for_reduction_task_codegen.cpp +++ clang/test/OpenMP/for_reduction_task_codegen.cpp @@ -116,15 +116,15 @@ // CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}}) // CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]]) -// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], align // CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8* -// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], align // CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1 // CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]]) -// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], -// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], -// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], -// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], align +// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], align +// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], align +// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], align // CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 #endif Index: clang/test/OpenMP/master_taskloop_firstprivate_codegen.cpp =================================================================== --- clang/test/OpenMP/master_taskloop_firstprivate_codegen.cpp +++ clang/test/OpenMP/master_taskloop_firstprivate_codegen.cpp @@ -295,7 +295,8 @@ // CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) @@ -445,7 +446,8 @@ // CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], Index: clang/test/OpenMP/master_taskloop_lastprivate_codegen.cpp =================================================================== --- clang/test/OpenMP/master_taskloop_lastprivate_codegen.cpp +++ clang/test/OpenMP/master_taskloop_lastprivate_codegen.cpp @@ -273,7 +273,8 @@ // CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) @@ -441,7 +442,9 @@ // CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], + // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], Index: clang/test/OpenMP/master_taskloop_private_codegen.cpp =================================================================== --- clang/test/OpenMP/master_taskloop_private_codegen.cpp +++ clang/test/OpenMP/master_taskloop_private_codegen.cpp @@ -235,7 +235,8 @@ // CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) // CHECK: [[PRIV_VAR:%.+]] = load [[S_DOUBLE_TY]]*, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], @@ -363,7 +364,8 @@ // CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], Index: clang/test/OpenMP/master_taskloop_simd_firstprivate_codegen.cpp =================================================================== --- clang/test/OpenMP/master_taskloop_simd_firstprivate_codegen.cpp +++ clang/test/OpenMP/master_taskloop_simd_firstprivate_codegen.cpp @@ -274,7 +274,8 @@ // CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) @@ -424,7 +425,8 @@ // CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], Index: clang/test/OpenMP/master_taskloop_simd_lastprivate_codegen.cpp =================================================================== --- clang/test/OpenMP/master_taskloop_simd_lastprivate_codegen.cpp +++ clang/test/OpenMP/master_taskloop_simd_lastprivate_codegen.cpp @@ -264,7 +264,8 @@ // CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) @@ -432,7 +433,8 @@ // CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], Index: clang/test/OpenMP/master_taskloop_simd_private_codegen.cpp =================================================================== --- clang/test/OpenMP/master_taskloop_simd_private_codegen.cpp +++ clang/test/OpenMP/master_taskloop_simd_private_codegen.cpp @@ -228,7 +228,8 @@ // CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) // CHECK: [[PRIV_VAR:%.+]] = load [[S_DOUBLE_TY]]*, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], @@ -356,7 +357,8 @@ // CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], Index: clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp =================================================================== --- clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp +++ clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp @@ -115,15 +115,15 @@ // CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}}) // CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]]) -// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], align // CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8* -// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], align // CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1 // CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]]) -// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], -// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], -// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], -// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], align +// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], align +// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], align +// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], align // CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 #endif Index: clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp =================================================================== --- clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp +++ clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp @@ -114,15 +114,15 @@ // CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}}) // CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]]) -// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], align // CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8* -// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], align // CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1 // CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]]) -// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], -// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], -// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], -// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], align +// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], align +// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], align +// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], align // CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 #endif Index: clang/test/OpenMP/parallel_master_taskloop_firstprivate_codegen.cpp =================================================================== --- clang/test/OpenMP/parallel_master_taskloop_firstprivate_codegen.cpp +++ clang/test/OpenMP/parallel_master_taskloop_firstprivate_codegen.cpp @@ -283,7 +283,8 @@ // CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) @@ -422,7 +423,8 @@ // CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], Index: clang/test/OpenMP/parallel_master_taskloop_lastprivate_codegen.cpp =================================================================== --- clang/test/OpenMP/parallel_master_taskloop_lastprivate_codegen.cpp +++ clang/test/OpenMP/parallel_master_taskloop_lastprivate_codegen.cpp @@ -261,7 +261,8 @@ // CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) @@ -423,7 +424,8 @@ // CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], Index: clang/test/OpenMP/parallel_master_taskloop_private_codegen.cpp =================================================================== --- clang/test/OpenMP/parallel_master_taskloop_private_codegen.cpp +++ clang/test/OpenMP/parallel_master_taskloop_private_codegen.cpp @@ -227,7 +227,8 @@ // CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) // CHECK: [[PRIV_VAR:%.+]] = load [[S_DOUBLE_TY]]*, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], @@ -349,7 +350,8 @@ // CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], Index: clang/test/OpenMP/parallel_master_taskloop_simd_firstprivate_codegen.cpp =================================================================== --- clang/test/OpenMP/parallel_master_taskloop_simd_firstprivate_codegen.cpp +++ clang/test/OpenMP/parallel_master_taskloop_simd_firstprivate_codegen.cpp @@ -283,7 +283,8 @@ // CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) @@ -422,7 +423,8 @@ // CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], Index: clang/test/OpenMP/parallel_master_taskloop_simd_lastprivate_codegen.cpp =================================================================== --- clang/test/OpenMP/parallel_master_taskloop_simd_lastprivate_codegen.cpp +++ clang/test/OpenMP/parallel_master_taskloop_simd_lastprivate_codegen.cpp @@ -261,7 +261,8 @@ // CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) @@ -423,7 +424,8 @@ // CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0f{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], Index: clang/test/OpenMP/parallel_master_taskloop_simd_private_codegen.cpp =================================================================== --- clang/test/OpenMP/parallel_master_taskloop_simd_private_codegen.cpp +++ clang/test/OpenMP/parallel_master_taskloop_simd_private_codegen.cpp @@ -227,7 +227,8 @@ // CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0f{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) // CHECK: [[PRIV_VAR:%.+]] = load [[S_DOUBLE_TY]]*, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], @@ -349,7 +350,8 @@ // CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0f{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], Index: clang/test/OpenMP/parallel_reduction_task_codegen.cpp =================================================================== --- clang/test/OpenMP/parallel_reduction_task_codegen.cpp +++ clang/test/OpenMP/parallel_reduction_task_codegen.cpp @@ -114,15 +114,15 @@ // CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}}) // CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]]) -// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], align // CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8* -// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], align // CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1 // CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]]) -// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], -// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], -// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], -// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], align +// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], align +// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], align +// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], align // CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 #endif Index: clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp =================================================================== --- clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp +++ clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp @@ -119,15 +119,15 @@ // CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}}) // CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]]) -// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], align // CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8* -// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], align // CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1 // CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]]) -// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], -// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], -// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], -// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], align +// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], align +// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], align +// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], align // CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 #endif Index: clang/test/OpenMP/sections_reduction_task_codegen.cpp =================================================================== --- clang/test/OpenMP/sections_reduction_task_codegen.cpp +++ clang/test/OpenMP/sections_reduction_task_codegen.cpp @@ -120,15 +120,15 @@ // CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}}) // CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]]) -// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], align // CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8* -// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], align // CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1 // CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]]) -// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], -// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], -// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], -// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], align +// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], align +// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], align +// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], align // CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 #endif Index: clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp =================================================================== --- clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp +++ clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp @@ -115,15 +115,15 @@ // CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}}) // CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]]) -// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], align // CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8* -// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], align // CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1 // CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]]) -// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], -// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], -// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], -// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], align +// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], align +// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], align +// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], align // CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 #endif Index: clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp =================================================================== --- clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp +++ clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp @@ -114,15 +114,15 @@ // CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}}) // CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]]) -// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], align // CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8* -// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], align // CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1 // CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]]) -// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], -// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], -// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], -// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], align +// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], align +// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], align +// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], align // CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 #endif Index: clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp =================================================================== --- clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp +++ clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp @@ -115,15 +115,15 @@ // CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}}) // CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]]) -// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], align // CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8* -// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], align // CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1 // CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]]) -// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], -// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], -// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], -// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], align +// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], align +// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], align +// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], align // CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 #endif Index: clang/test/OpenMP/task_codegen.cpp =================================================================== --- clang/test/OpenMP/task_codegen.cpp +++ clang/test/OpenMP/task_codegen.cpp @@ -273,7 +273,7 @@ return a; } // CHECK: define internal i32 [[TASK_ENTRY1]](i32 %0, [[KMP_TASK_T]]{{.*}}* noalias %1) -// CHECK: store i32 15, i32* [[A_PTR:@.+]] +// CHECK: store i32 15, i32* [[A_PTR:@.+]], align 4 // CHECK: [[A_VAL:%.+]] = load i32, i32* [[A_PTR]] // CHECK: [[A_VAL_I8:%.+]] = trunc i32 [[A_VAL]] to i8 // CHECK: store i8 [[A_VAL_I8]], i8* %{{.+}} Index: clang/test/OpenMP/task_firstprivate_codegen.cpp =================================================================== --- clang/test/OpenMP/task_firstprivate_codegen.cpp +++ clang/test/OpenMP/task_firstprivate_codegen.cpp @@ -272,7 +272,8 @@ // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, // CHECK: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_DOUBLE_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**, [[S_DOUBLE_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.*]] = call void (i8*, ...)* @llvm.noalias.p0{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**, [[S_DOUBLE_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]], [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]]) @@ -393,7 +394,8 @@ // CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0f{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], Index: clang/test/OpenMP/task_private_codegen.cpp =================================================================== --- clang/test/OpenMP/task_private_codegen.cpp +++ clang/test/OpenMP/task_private_codegen.cpp @@ -228,7 +228,8 @@ // CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.*]] = call void (i8*, ...)* @llvm.noalias.p0{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) // CHECK: [[PRIV_VAR:%.+]] = load [[S_DOUBLE_TY]]*, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], @@ -339,7 +340,8 @@ // CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.*]] = call void (i8*, ...)* @llvm.noalias.p0f{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], Index: clang/test/OpenMP/taskloop_firstprivate_codegen.cpp =================================================================== --- clang/test/OpenMP/taskloop_firstprivate_codegen.cpp +++ clang/test/OpenMP/taskloop_firstprivate_codegen.cpp @@ -274,7 +274,8 @@ // CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0f{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) @@ -424,7 +425,8 @@ // CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0f{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], Index: clang/test/OpenMP/taskloop_lastprivate_codegen.cpp =================================================================== --- clang/test/OpenMP/taskloop_lastprivate_codegen.cpp +++ clang/test/OpenMP/taskloop_lastprivate_codegen.cpp @@ -266,7 +266,8 @@ // CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0f{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) @@ -434,7 +435,8 @@ // CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0f{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], Index: clang/test/OpenMP/taskloop_private_codegen.cpp =================================================================== --- clang/test/OpenMP/taskloop_private_codegen.cpp +++ clang/test/OpenMP/taskloop_private_codegen.cpp @@ -228,7 +228,8 @@ // CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0f{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) // CHECK: [[PRIV_VAR:%.+]] = load [[S_DOUBLE_TY]]*, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], @@ -356,7 +357,8 @@ // CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0f{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], Index: clang/test/OpenMP/taskloop_simd_firstprivate_codegen.cpp =================================================================== --- clang/test/OpenMP/taskloop_simd_firstprivate_codegen.cpp +++ clang/test/OpenMP/taskloop_simd_firstprivate_codegen.cpp @@ -274,7 +274,8 @@ // CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0f{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) @@ -424,7 +425,8 @@ // CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0f{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], Index: clang/test/OpenMP/taskloop_simd_lastprivate_codegen.cpp =================================================================== --- clang/test/OpenMP/taskloop_simd_lastprivate_codegen.cpp +++ clang/test/OpenMP/taskloop_simd_lastprivate_codegen.cpp @@ -264,7 +264,8 @@ // CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0f{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) @@ -432,7 +433,8 @@ // CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0f{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], Index: clang/test/OpenMP/taskloop_simd_private_codegen.cpp =================================================================== --- clang/test/OpenMP/taskloop_simd_private_codegen.cpp +++ clang/test/OpenMP/taskloop_simd_private_codegen.cpp @@ -228,7 +228,8 @@ // CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0f{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) // CHECK: [[PRIV_VAR:%.+]] = load [[S_DOUBLE_TY]]*, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], @@ -356,7 +357,8 @@ // CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0f{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], Index: clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp =================================================================== --- clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp +++ clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp @@ -116,15 +116,15 @@ // CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}}) // CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]]) -// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]], align // CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8* -// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], +// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]], align // CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1 // CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]]) -// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], -// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], -// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], -// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], +// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]], align +// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]], align +// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]], align +// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]], align // CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2 #endif Index: llvm/docs/LangRef.rst =================================================================== --- llvm/docs/LangRef.rst +++ llvm/docs/LangRef.rst @@ -5436,6 +5436,8 @@ 4 byte gap between the two fields. This gap represents padding which does not carry useful data and need not be preserved. +.. _noalias_and_aliasscope: + '``noalias``' and '``alias.scope``' Metadata ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -9100,8 +9102,8 @@ :: - = load [volatile] , * [, align ][, !nontemporal !][, !invariant.load !][, !invariant.group !][, !nonnull !][, !dereferenceable !][, !dereferenceable_or_null !][, !align !] - = load atomic [volatile] , * [syncscope("")] , align [, !invariant.group !] + = load [volatile] , * [, ptr_provenance * ][, align ][, !nontemporal !][, !invariant.load !][, !invariant.group !][, !nonnull !][, !dereferenceable !][, !dereferenceable_or_null !][, !align !] + = load atomic [volatile] , * [, ptr_provenance * ] [syncscope("")] , align [, !invariant.group !] ! = !{ i32 1 } ! = !{i64 } ! = !{ i64 } @@ -9132,6 +9134,13 @@ alignment is not set to a value which is at least the size in bytes of the pointee. ``!nontemporal`` does not have any defined semantics for atomic loads. +The optional ``ptr_provenance`` argument specifies the noalias chain of the +pointer operand. It has the same type as the pointer operand. Together with the +``!noalias`` metadata on the instruction, and the ``llvm.provenance.noalias``, +``llvm.noalias.arg.guard`` intrinsics in the chain, this is used to deduce if +two load/store instructions may or may not alias. (See `Scoped NoAlias Related +Intrinsics`_) + The optional constant ``align`` argument specifies the alignment of the operation (that is, the alignment of the memory address). A value of 0 or an omitted ``align`` argument means that the operation has the ABI @@ -9224,8 +9233,8 @@ :: - store [volatile] , * [, align ][, !nontemporal !][, !invariant.group !] ; yields void - store atomic [volatile] , * [syncscope("")] , align [, !invariant.group !] ; yields void + store [volatile] , * [, ptr_provenance * ][, align ][, !nontemporal !][, !invariant.group !] ; yields void + store atomic [volatile] , * [, ptr_provenance * ] [syncscope("")] , align [, !invariant.group !] ; yields void Overview: """"""""" @@ -9255,6 +9264,13 @@ the alignment is not set to a value which is at least the size in bytes of the pointee. ``!nontemporal`` does not have any defined semantics for atomic stores. +The optional ``ptr_provenance`` argument specifies the noalias chain of the +pointer operand. It has the same type as the pointer operand. Together with the +``!noalias`` metadata on the instruction, and the ``llvm.provenance.noalias``, +``llvm.noalias.arg.guard`` intrinsics in the chain, this is used to deduce if +two load/store instructions may or may not alias. (See `Scoped NoAlias Related +Intrinsics`_) + The optional constant ``align`` argument specifies the alignment of the operation (that is, the alignment of the memory address). A value of 0 or an omitted ``align`` argument means that the operation has the ABI @@ -18805,6 +18821,312 @@ This function returns the same values as the libm ``trunc`` functions would and handles error conditions in the same way. +Scoped NoAlias Related Intrinsics +--------------------------------- + +This set of intrinsics provide the basis for full C99 '``restrict``' support +(See: iso-C99-n1256, 6.7.3.1 'Formal definition of restrict'). + +In C99 restrict, accesses using a pointer based on a restrict pointer ``p`` +are assumed to not alias with other accesses that are not based on ``p``, as +long as both accesses are within the scope of the declaration of ``p``. + +The intrinsics work together with the '``!noalias``' +:ref:`metadata ` annotations on memory instructions and +the ``ptr_provenance`` of ``load`` and ``store`` instructions. + +The full set of intrinsics is: + +.. code-block:: llvm + + %p.decl = i8* @llvm.noalias.decl.XXX(i8** %p.alloc, i32 , metadata !p.scope) + %p.noalias = i8* @llvm.noalias.XXX(i8* %p, i8* %p.decl, i8** %p.addr, + i32 , metadata !p.scope) !noalias !VisibleScopes + %prov.p = i8* @llvm.provenance.noalias.XXX(i8* %prov.p, i8* %p.decl, + i8** p.addr, i8** %prov.p.addr, + i32 , metadata !p.scope) !noalias !VisibleScopes + %p.guard = i8* @llvm.noalias.arg.guard.XXX(i8* %p, i8* %prov.p) + %p.copy.guard = %struct.FOO* @llvm.noalias.copy.guard.XXX(%struct.FOO* %p.block, i8* %p.decl, + metadata !p.indices, metadata !p.scope) + + +A detailed description of these intrinsics and how the work is explained in +::doc::`Restrict and NoAlias Information in LLVM ` + +.. _int_noalias_decl: + +'``llvm.noalias.decl``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. The return type and argument types are encoded +in ``XXX``. + +:: + + declare i8* @llvm.noalias.decl.XXX(* %p.alloca, i32 , metadata !p.scope) + +Overview: +""""""""" + +``llvm.noalias.decl`` is inserted at the location of a restrict pointer +declaration. It makes it possible to identify that a restrict scope is only +valid inside the body of a loop. It also makes it possible to identify that a +certain ``alloca`` is associated to an object that contains one or more +restrict pointers. + +The handle it returns is always of type ``i8*`` and does +not really represent a value. It is merely used to track a dependency on the +declaration. + +Arguments: +"""""""""" + +The first argument ``%p.alloca`` points to the ``alloca`` that contains one +or more restrict pointers. It can also be ``null`` if the ``alloca`` has been +optimized away. + +The second argument ``p.objId`` is an integer representing an object id. + +The third argument ``!p.scope`` is metadata that is a list of ``noalias`` +metadata references. The format is identical to that required for ``noalias`` +metadata. This list must have exactly one element. + +Semantics: +"""""""""" + +The ``llvm.noalias.decl`` intrinsic is used to identify the exact location of +a *restrict pointer declaration*. When this is done inside the loop body, +care must be taken to duplicate and uniquify the scopes and intrinsics when +the loop is unrolled. Otherwise the restrict scope could spill across +iterations. + +It also associates specific restrict properties to an ``alloca`` and is used +to propagate those properties to ``llvm.noalias``, ``llvm.provenance.noalias`` and +``llvm.noalias.copy.guard`` intrinsics when inlining and optimizations make +the relationship between those intrinsics and the actual variable declaration +visible. + +A detailed description of these intrinsics and how the work is explained in +::doc::`Restrict and NoAlias Information in LLVM ` + +.. _int_noalias: + +'``llvm.noalias``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. The return type and argument types are encoded +in ``XXX``. + +:: + + declare * @llvm.noalias.XXX(* %p, i8* %p.decl, ** %p.addr, + i32 , metadata !p.scope) + +Overview: +""""""""" + +``llvm.noalias`` is inserted at the moment that restrict properties are +introduced. This is typically done after loading a restrict pointer from +memory. Its return value can be seen as *the pointer value with restrict +properties* + +Arguments: +"""""""""" + +The first argument ``%p`` is the pointer on which the aliasing assumption is +being placed. + +The second argument ``%p.decl`` refers to the ``llvm.noalias.decl`` that is +associated with the pointer declaration. + +The third argument ``%p.addr`` is the address in memory of this pointer. + +The fourth argument ``p.objId`` is an integer representing an object id. + +The fifth argument ``!p.scope`` is metadata that is a list of ``noalias`` +metadata references. The format is identical to that required for ``noalias`` +metadata. This list must have exactly one element. + +Semantics: +"""""""""" + +The ``llvm.noalias`` intrinsic adds alias assumptions to the pointer +computation path. It also blocks optimizations on this computation path. + +It will be transformed into a ``llvm.provenance.noalias`` intrinsic and moved onto +the ``ptr_provenance`` path, so that pointer optimizations can still be +done and the restrict information is not lost. + +A detailed description of these intrinsics and how the work is explained in +::doc::`Restrict and NoAlias Information in LLVM ` + +.. _int_provenance_noalias: + +'``llvm.provenance.noalias``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. The return type and argument types are encoded +in ``XXX``. + +:: + + declare * @llvm.provenance.noalias.XXX(* %p, i8* %p.decl, + ** %p.addr, ** %prov.p.addr, + i32 , metadata !p.scope) + +Overview: +""""""""" + +``llvm.provenance.noalias`` is inserted at the moment that restrict properties are +introduced on the ``ptr_provenance``. This is typically done when a +``llvm.noalias`` is converted into the ``llvm.provenance.noalias`` + +The value it returns (``%prov.p``) is representing the pointer ``%p`` on the +``ptr_provenance``. + +Arguments: +"""""""""" + +The first argument ``%p`` is the pointer (or its ``ptr_provenance``) on +which the aliasing assumption is being placed. + +The second argument ``%p.decl`` refers to the ``llvm.noalias.decl`` that is +associated with the pointer declaration. + +The third argument ``%p.addr`` is the address in memory of this pointer. + +The fourth argument ``%prov.p.addr`` represents ``%p.addr`` on the +``ptr_provenance``. + +The fifth argument ``p.objId`` is an integer representing an object id. + +The sixth argument ``!p.scope`` is metadata that is a list of ``noalias`` +metadata references. The format is identical to that required for ``noalias`` +metadata. This list must have exactly one element. + + +Semantics: +"""""""""" + +The ``llvm.provenance.noalias`` intrinsic adds alias assumptions to the +``ptr_provenance`` of the memory instructions that depends on it. + +For this purpose, the ``llvm.provenance.noalias`` itself is also considered to be +a memory instruction and has its own ``ptr_provenance`` for the ``%p.addr`` +argument. + +A detailed description of these intrinsics and how the work is explained in +::doc::`Restrict and NoAlias Information in LLVM ` + + +.. _int_noalias_arg_guard: + +'``llvm.noalias.arg.guard``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. The return type and argument types are encoded +in ``XXX``. + +:: + + declare * @llvm.noalias.arg.guard.XXX(* %p, * %prov.p) + +Overview: +""""""""" + +The ``llvm.noalias.arg.guard`` intrinsic brings alias assumption properties that +are on the ``ptr_provenance`` path of a pointer back into the +computation path of that pointer. + +Arguments: +"""""""""" + +The first argument ``%p`` represents the computation path of the pointer. + +The second argument ``%prov.p`` represents the ``ptr_provenance`` path +of that pointer. + +Semantics: +"""""""""" + +The ``llvm.noalias.arg.guard`` is typically generated when a ``llvm.noalias`` +intrinsic is converted to a ``llvm.provenance.noalias``, but the pointer escapes +because it is used as an argument to a function or it is returned. + +It can typically be optimized away after inlining: +* When it is encountered on the computation path, it is assumed to return the +first argument ``%p``. + +When it is encountered on a ``ptr_provenance`` path, it is assumed to +return the second argument ``%prov.p``. + +A detailed description of these intrinsics and how the work is explained in +::doc::`Restrict and NoAlias Information in LLVM ` + + +.. _int_noalias_copy_guard: + +'``llvm.noalias.copy.guard``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. The return type and argument types are encoded +in ``XXX``. + +:: + + declare * @llvm.noalias.copy.guard.XXX(* %p.block, i8* %p.decl, + metadata !p.indices, metadata !p.scope) + +Overview: +""""""""" + +``llvm.noalias.copy.guard`` is inserted in the source pointer argument when a +block of memory that will be copied (either using ``llvm.memcpy`` or a +combination of ``load``/``store`` instructions) is associated to a variable +that contains at least one restrict pointer. This could be a ``struct`` that +contains one or more restrict member pointers, or an array of restrict pointers. + +The intrinsic returns the first argument. + +Arguments: +"""""""""" + +The first argument ``%p.block`` represents the block that will be copied. + +The second argument ``%p.decl`` refers to the ``llvm.noalias.decl`` that is +associated with the block. + +The third argument ``!p.indices`` refers to a metadata list of metadata. Each +entry refers to another metadata list of integers, describing the GEP path +that contains a restrict pointer. A -1 value indicates that any index value +is a match. See above for an example. + +Semantics: +"""""""""" + +The ``llvm.noalias.copy.guard`` provides extra restrict information about a +block of memory that is copied over. When the memory copy is optimized away, +they will still be able to match the pointer access to the correct restrict +information. + +A detailed description of these intrinsics and how the work is explained in +::doc::`Restrict and NoAlias Information in LLVM ` + Floating Point Environment Manipulation intrinsics -------------------------------------------------- Index: llvm/docs/NoAliasInfo.rst =================================================================== --- /dev/null +++ llvm/docs/NoAliasInfo.rst @@ -0,0 +1,1399 @@ +======================================== +Restrict and NoAlias Information in LLVM +======================================== + +.. contents:: + :local: + :depth: 2 + +Introduction +============ + +LLVM provides a number of mechanisms to annotate that two accesses will not +alias. This document describes the provenance annotations on pointers with the +``noalias`` intrinsics, and ``noalias`` metadata on memory instructions +(load/store). + +Together, they provide a means to decide if two memory instructions **will not +alias**, by looking at the pointer provenance and combining this with the active +scopes (specified by the ``noalias`` metadata) of the memory instructions. + +All of C99 restrict can be mapped onto these annotations, resulting in a +powerful mechanism to provide extra alias information. + + +Relation with C99 ``restrict`` +============================== + +The noalias infrastructure can be used to fully support *C99 restrict* [#R1]_: +restrict pointers as function arguments, as local variables, as struct members. +(See [#R2]_: iso-C99-n1256, 6.7.3.1 'Formal definition of restrict'). + +Modeling ``restrict`` requires two pieces of information for a memory +instruction: + +- the *depends on* relationship of the pointer path on a restrict + pointer (pointer provenance) +- the *visibility* of that restrict pointer to the memory instruction + +Every variable, that contains at least one restrict pointer, that is defined in +a function (function arguments and local variables), will get a noalias scope +that is associate d to this variable declaration. The ``!noalias`` metadata is +used to annotate every memory instruction in the function with the restrict +variables (noalias scopes) that are visible to that memory instruction. + +Each restrict variable also gets a ``@llvm.noalias.decl`` at the place in the +control flow where it is defined. This identifies if restrict scopes must be +duplicated or not when loops are transformed. + +Whenever a restrict pointer is read, a ``@llvm.noalias`` intrinsic is introduced +to indicate the dependency on a restrict pointer. This intrinsic depends on the +pointer value and on the address of the pointer. Different addresses represent +different restrict pointers. Different restrict pointers point to different sets +of objects. + +A struct can contain multiple restrict pointers. As such, a single variable +definition (single scope) can contain multiple restrict pointers. The addresses +of the pointers ensure that they can be differentiated. + +When a struct is copied using the internal ``@llvm.memcpy``, the ``@llvm.noalias`` +intrinsic cannot be used. The ``@llvm.noalias.copy.guard`` provides information +on what parts of the struct represent restrict pointers. This ensures that the +correct dependencies can be reconstructed when the ``@llvm.memcpy`` is optimized +away. + +When a pointer to a restrict pointer is dereferenced, there is no local scope +available. These restrict pointers are associated with an ``unknown function +scope``. It is sometimes possible to refine those scopes later to actual +variable definitions. + +.. _noaliasinfo_basic_mechanism: + +Basic Mechanism +=============== + +Describing the full set of intrinsics with all their arguments at once, might be +confusing and can make it difficult to understand. Therefore, we start with +explaining the basic technology. We then build upon it to add missing parts. + +Throughout the explanation, C99 and LLVM-IR code fragments are used to provide +examples. This does not mean that the provided infrastructure can only be used +for implementing C99 restrict. It just shows that at least C99 restrict can be +mapped onto it. Other provenance based alias annotations will likely map just as +well onto it, without (or with only small) adaptations to the provided +infrastructure. + + +Single ``noalias`` Scope +------------------------ + +Following code fragment: + +.. code-block:: C + + void foo(int *pA, int *pB, int i) { + int * restrict rpA = pA; + int * pX = &rpA[i]; + + *rpA = 42; // (1) based on rpA + *pB = 43; // (2) not based on rpA + *pX = 44; // (3) based on rpA + } + +contains one *restrict* pointer ``rpA``, one pointer ``pX`` depending on it, and +one pointer ``pB`` not depending on ``rpA``. Based on the C99 restrict +description, \*rpA and \*pX can alias with each other. They will not alias with +\*pB. + +In pseudo LLVM-IR code, this can be represented as: + +.. code-block:: llvm + + define void @foo(i32* %pA, i32* %pB, i64 %i) { + %rpA = tail call i32* @llvm.noalias(i32* %pA, metadata !2) + %arrayidx = getelementptr inbounds i32, i32* %pA, i64 %i + store i32 42, i32* %rpA, !noalias !2 ; (1) + store i32 43, i32* %pB, !noalias !2 ; (2) + store i32 44, i32* %arrayidx, !noalias !2 ; (3) + ret void + } + + ; MetaData + !2 = !{!3} ; contains a single scope: !3 + !3 = distinct !{!3, !4, !"foo: rpA"} ; this scope represents rpA + !4 = distinct !{!4, !"foo"} + +* Metadata !2 defines a list of a single scope ``!3`` that represents ``rpA`` +* The ``@llvm.noalias`` intrinsic is associated with the single scope ``!3`` in + ``metadata !2``. It indicates that accesses based on this pointer are depending + on this ``!3`` scope. They will not alias with accesses *not* depending on the + same ``!3`` scope, as long as the scope is visible to both accesses. +* For this example, the ``!3`` scope is visible to all three stores (``!noalias + !2`` annotation on the stores). Because of this: + + * ``(1)`` and ``(3)`` may alias to each other: ``%rpA`` and ``%arrayidx`` + depend on the same ``!3`` scope. + * ``(1)`` and ``(3)`` will not alias with ``(2)``: ``%pB`` does not depend on + the ``!3`` scope. + + +Multiple ``noalias`` Scopes +--------------------------- + +Let's extend the example: + +.. code-block:: C + + void foo(int *pA, int *pB, int *pC, int i) { + int * restrict rpA = pA; + int * pX = &rpA[i]; + + *rpA = 42; // (1) based on rpA + *pB = 43; // (2) not based on rpA + *pX = 44; // (3) based on rpA + { + int * restrict rpC = pC; + // rpA and rpC visible + + *rpA = 45; // (4) based on rpA + *pB = 46; // (5) not based on rpA nor rpC + *rpC = 47; // (6) based on rpC + } + } + +with following pseudo LLVM-IR code: + +.. code-block:: llvm + + define void @foo(i32* %pA, i32* %pB, i32* %pC, i64 %i) { + %rpA = tail call i32* @llvm.noalias(i32* %pA, metadata !2) ; rpA + %arrayidx = getelementptr inbounds i32, i32* %pA, i64 %i + %rpC = tail call i32* @llvm.noalias(i32* %pC, metadata !11) ; rpC + store i32 42, i32* %rpA, !noalias !2 ; (1) rpA + store i32 43, i32* %pB, !noalias !2 ; (2) rpA + store i32 44, i32* %arrayidx, !noalias !2 ; (3) rpA + store i32 45, i32* %rpA, !noalias !13 ; (4) rpA and rpC + store i32 46, i32* %pB, !noalias !13 ; (5) rpA and rpC + store i32 47, i32* %rpC, !noalias !13 ; (6) rpA and rpC + ret void + } + + ; MetaData + !2 = !{!3} ; single scope: rpA + !3 = distinct !{!3, !4, !"foo: rpA"} + !4 = distinct !{!4, !"foo"} + !11 = !{!12} ; single scope: rpC + !12 = distinct !{!12, !4, !"foo: rpC"} + !13 = !{!12, !3} ; scopes: rpA and rpC + +In this fragment: + +* ``%rpA`` is associated with scope ``!3`` +* ``%rpC`` is associated with scope ``!12`` +* ``(1)``, ``(2)`` and ``(3)`` only see ``rpA``. (scope ``!3``) +* ``(4)``, ``(5)`` and ``(6)`` see ``rpA`` and ``rpC`` (scopes ``!3`` and ``!12``) + +Following C99 restrict: + +* ``(4)``, ``(5)`` and ``(6)`` will not alias each other. +* ``(6)`` will not alias ``(3)``: + + * ``(6)`` is based on ``rpC``, which is visible to ``(6)``, but not to + ``(3)`` => no conclusion. + * ``(3)`` is based on ``rpA`` which is visible to both ``(6)`` and ``(3)`` => + will not alias + +* ``(6)`` might alias with ``(2)``: + + * ``rpC`` is visible to ``(6)``, but not to ``(2)``. + * There are no other dependencies for those accesses. + + +Location of the ``restrict`` Declaration +---------------------------------------- + +Some optimization passes need to know where a restrict variable has been +declared. Only when that information is known, they can perform the correct +transformations. + +One of those transformations is *loop unrolling*. When restrict is applicable +across iterations, the loop can be unrolled without extra changes. But when +restrict is only applicable inside a single iteration, care must be taken to +also duplicate the noalias scopes while duplicating the loop body. + +Following code example shows those two cases: + +.. code-block:: c + + void restrictInLoop(int *pA, int *pB, int *pC, long N) { + for (int i=0; i defines the ``object P``. + +Example A: + +.. code-block:: C + + int foo(int* pA, int* pB) { + int * restrict rpA=pA; + *rpA=42; + *pB=99; + return *rpA; + } + +And in pseudo LLVM-IR as how clang would produce it: + +.. code-block:: llvm + + define i32 @foo(i32* %pA, i32* %pB) { + %rpA.address = alloca i32* + %rpA.decl = call @llvm.noalias.decl %rpA.address, !metadata !10 ; declaration of a restrict pointer + store i32* %pA, i32** %rpA.address, !noalias !10 + %rpA = load i32*, i32** %rpA.address, !noalias !10 + %rpA.1 = i32* call @llvm.noalias %rpA, %rpA.decl, %rpA.address ; reading of a restrict pointer + store i32 42, i32* %rpA.1, !noalias !10 + store i32 99, i32* %pB, !noalias !10 + %1 = load i32, i32* %rpA.1, !noalias !10 + ret i32 %1 + } + +With this representation, we have enough information to decide whether two +load/stores are not aliasing, based on the ``noalias`` annotations. But, the +added intrinsics must block optimizations. Later on we will see how the +infrastructure is expanded to allow for optimizations. + +Summary: + +* ``%p.decl = @llvm.noalias.decl %p.alloc, metadata !Scope`` +* ``%p.val = @llvm.noalias %p, %p.decl, %p.addr`` + + +Pointer Provenance +------------------ + +In order to keep track of the dependency on the ``@llvm.noalias`` intrinsics, +but still allow most optimization passes to do their work, an extra optional +operand for ``load``/``store`` instruction is introduced: the ``ptr_provenance`` +operand. + +The idea is that the *pointer operand* is used for normal pointer +computations. The ``ptr_provenance`` operand is used to track ``noalias`` +related dependencies. Optimizations (like LSR) can modify the *pointer operand* +as they see fit. As long as the ``ptr_provenance operand`` is not touched, we +are still able to deduce the noalias related information. + +When an optimization introduces a ``load``/``store`` without keeping the +``ptr_provenance`` operand and the ``!noalias`` metadata, we fall back to the +fail-safe *worst case*. + +Although the actual pointer computations can be removed from the +``ptr_provenance``, it can still contain *PHI* nodes, *select* instructions and +*casts*. + +For clang, it is hard to track the usage of a pointer and it will not generate +the ``ptr_provenance`` operand. At LLVM-IR level, this is much easier. Because +of that the annotations exist in two states and a conversion pass is introduced: + +* Before *noalias propagation*: + + This state is produced by clang and sometimes by SROA. The ``@llvm.noalias`` + intrinsic is used in the computation path of the pointer. It is treated as a + mostly opaque intrinsic and blocks most optimizations. + + +* After *noalias propagation*: + + A *noalias propagation and conversion* pass is introduced: + + * ``@llvm.noalias`` intrinsics are converted into ``@llvm.provenance.noalias`` + intrinsics. + * their usage is removed from the main pointer computations of + ``load``/``store`` instructions and moved to the ``ptr_provenance`` operand. + * When a pointer depending on a ``@llvm.noalias`` intrinsic is passed as an + argument, returned from a function or stored into memory, a + ``@llvm.noalias.arg.guard`` is introduced. This combines the original + pointer computation with the provenance information. After inlining, it is + also used to propagate the noalias information to the ``load``/``store`` + instructions. + +So, we now have two extra intrinsics: + +* ``@llvm.provenance.noalias`` %prov.p, %p.decl, %p.addr + + * provides restrict information to a ``ptr_provenance`` operand + + * ``%prov.p``: tracks the provenance information associated with the pointer + value that was read. + * ``%p.decl`` refers to the ``@llvm.noalias.decl`` that is associated with the + restrict pointer. + * ``%p.addr``: represents the address of ``object P``. + +* ``@llvm.noalias.arg.guard %p, %prov.p`` + + * combines pointer and ``ptr_provenance`` information when a pointer value + with ``noalias`` dependencies escapes. It is normally used for function + arguments, returns, or stores to memory. + * ``%p`` tracks the pointer computation + * ``%prov.p`` tracks the provenance of the pointer. + +After noalias propagation and conversion, example A becomes: + +.. code-block:: llvm + + define i32 @foo(i32* %pA, i32* %pB) { + %rpA.address = alloca i32* + %rpA.decl = i8* call @llvm.noalias.decl i32* %rpA.address, !metadata !10 ; declaration of a restrict pointer + store i32* %pA, i32** %rpA.address, !noalias !10 + %rpA = load i32*, i32** %rpA.address, !noalias !10 + ; reading of a restrict pointer: + %prov.rpA.1 = i32* call @llvm.provenance.noalias i32* %rpA, i8* %rpA.decl, i32* %rpA.address + store i32 42, i32* %rpA, ptr_provenance i32* %prov.rpA.1, !noalias !10 + store i32 99, i32* %pB, !noalias !10 + %1 = load i32, i32* %rpA.1, !noalias !10 + ret i32 %1 + } + +Summary: + +* ``%p.decl = @llvm.noalias.decl %p.alloc, metadata !Scope`` +* ``%p.noalias = @llvm.noalias %p, %p.decl, %p.addr`` +* ``%prov.p = @llvm.provenance.noalias %prov.p.2, %p.decl, %p.addr`` +* ``%p.guard = @llvm.noalias.arg.guard %p, %prov.p`` + + +.. _noalias_vs_provenance_noalias: + +``@llvm.noalias`` vs ``@llvm.provenance.noalias`` +------------------------------------------------- + +The ``@llvm.noalias`` intrinsic is a convenience shortcut for the combination of +``@llvm.provenance.noalias``, which can only reside on the ptr_provenance path, +and ``@llvm.noalias.arg.guard``, which combines the normal pointer with the +ptr_provenance path: + +* This results in less initial code to be generated by ``clang``. +* It also helps during SROA when introducing ``noalias`` information for pointers + inside a struct. +* The noalias propagation and conversion pass depends on the property of + ``@llvm.provenance.noalias`` to only reside on the ``ptr_provenance`` path to + reduce the amount of work. + +.. code-block:: llvm + + ; Following: + %rpA = load i32*, i32** %rpA.address, !noalias !10 + %rpA.1 = i32* call @llvm.noalias %rpA, %rpA.decl, %rpA.address + store i32 42, i32* %rpA.1, !noalias !10 + + ; is a shortcut for: + %rpA = load i32*, i32** %rpA.address, !noalias !10 + %rpA.prov = i32* call @llvm.provenance.noalias %rpA, %rpA.decl, %rpA.address + %rpA.guard = i32* call @llvm.noalias.arg.guard %rpA, %rpA.prov + store i32 42, i32* %rpA.guard, !noalias !10 + + ; and after noalias propagation and conversion, this becomes: + %rpA = load i32*, i32** %rpA.address, !noalias !10 + %prov.rpA = i32* call @llvm.provenance.noalias %rpA, %rpA.decl, %rpA.address + store i32 42, i32* %rpA, ptr_provenance i32* %prov.rpA, !noalias !10 + + + +SROA and Stack optimizations +---------------------------- + +When SROA eliminates a local variable, we do not have an address for ``object P`` +anymore (the alloca is removed and ``%p.addr`` becomes ``null``). At that moment +we can only depend on the ``!Scope`` metadata to differentiate restrict +objects. For convenience, we also add this information to the ``@llvm.noalias`` +and ``@llvm.provenance.noalias`` intrinsics. + +It is also possible that a single variable declaration contains multiple +restrict pointers (think of a struct containing multiple restrict pointers, or +an array of restrict pointers). For correctness, SROA must introduce new scopes +when splitting it up. But cloning and adapting scopes can be very +expensive. Because of that, we introduce an extra *object ID* (``objId``) +parameter for ``@llvm.noalias.decl``, ``@llvm.noalias`` and +``llvm.provenance.noalias``. This can be thought of as the *offset in the +variable*. This allows us to differentiate *noalias* dependencies coming from +the same variable, but representing different *noalias* pointers. + +Summary: + +* ``%p.decl = @llvm.noalias.decl %p.alloc, i64 objId, metadata !Scope`` +* ``%p.noalias = @llvm.noalias %p, %p.decl, %p.addr, i64 objId, metadata !Scope`` +* ``%prov.p = @llvm.provenance.noalias %prov.p.2, %p.decl, %p.addr, i64 objId, metadata !Scope`` +* ``%p.guard = @llvm.noalias.arg.guard %p, %prov.p`` + +For alias analysis, this means that two ``@llvm.provenance.noalias`` intrinsics represent a +different ``object P0`` and, ``object P1``, if: + +* ``%p0.addr`` and ``%p1.addr`` are different +* or, ``objId0`` and ``objId1`` are different +* or, ``!Scope0`` and ``!Scope1`` are different + + +Optimizing a restrict pointer pointing to a restrict pointer +------------------------------------------------------------ + +Example: + +.. code-block:: C + + int * restrict * restrict ppA = ...; + int * restrict * restrict ppB = ...; + + **ppA=42; + **ppB=99; + return **ppA; // according to C99, 6.7.3.1 paragraph 4, **ppA and **ppB are not aliasing + +In order to allow this optimization, we also need to track the ``!noalias`` scope +when the ``@llvm.noalias`` intrinsic is introduced. The ``%p.addr`` parameter in the +``@llvm.provenance.noalias`` version will also get a ``ptr_provenance`` operand, +through the ``%prov.p.addr`` argument. + +In short, the ``@llvm.noalias`` and ``@llvm.provenance.noalias`` intrinsics are +treated as if they are a memory operation. + +Summary: + +* ``%p.decl = @llvm.noalias.decl %p.alloc, i64 objId, metadata !Scope`` +* ``%p.noalias = @llvm.noalias %p, %p.decl, %p.addr, i64 objId, metadata !Scope, !noalias !VisibleScopes`` +* ``%prov.p = @llvm.provenance.noalias %prov.p.2, %p.decl, %p.addr, %prov.p.addr, i64 objId, metadata !Scope, !noalias !VisibleScopes`` +* ``%p.guard = @llvm.noalias.arg.guard %p, %prov.p`` + +For alias analysis, this means that two ``@llvm.provenance.noalias`` intrinsics represent a +different ``object P0`` and ``object P1`` if: + +* ``%p0.addr`` and ``%p1.addr`` are different +* or, ``objId0`` and ``objId1`` are different +* or, ``!Scope0`` and ``!Scope1`` are different +* or we can prove that { ``%p0.addr``, ``%prov.p0.addr``, ``!VisibleScopes0`` } and + { ``%p1.addr``, ``%prov.p1.addr``, ``!VisibleScopes1`` } do not alias for both + intrinsics. (As if we treat each of the two ``@llvm.provenance.noalias`` as a + **store to ``%p.addr``** and we must prove that the two stores do not alias; + also see [#R8]_, question 2) + + +``Unknown function`` Scope +-------------------------- + +When the declaration of a restrict pointer is not visible, *C99, 6.7.3.1 +paragraph 2*, says that the pointer is assumed to start living from ``main``. + +This case can be handled by the ``unknown function`` scope, which is annotated +to the function itself. This can be treated as saying: the scope of this restrict +pointer starts somewhere outside this function. In such case, the +``@llvm.noalias`` and ``@llvm.provenance.noalias`` will not be associated with a +``@llvm.noalias.decl``. It is possible that after inlining, the scopes can be +refined to a declaration which became visible. + +For convenience, each function can have its own ``unknown function`` scope +specified by a ``noalias !UnknownScope`` metadata attribute on the function itself. + + +Aggregate Copies +---------------- + +Restrictness is introduced by *reading a restrict pointer*. It is not always +possible to add the necessary ``@llvm.noalias`` annotation when this is done. An +aggregate containing one or more restrict pointers can be copied with a single +``load``/``store` pair or a ``@llvm.memcpy``. This makes it hard to track when a +restrict pointer is copied over. As long as this is treated as an memory escape, +there is no issue. At the moment that the copy is optimized away, we must be +able to reconstruct the ``noalias`` dependencies for correctness. + +For this, a final intrinsic is introduced: ``@llvm.noalias.copy.guard``: + +* ``@llvm.noalias.copy.guard %p.addr, %p.decl, metadata !Indices, metadata !Scope`` + + * Guards a ``%p.addr`` object that is copied as a single aggregate or ``@llvm.memcpy`` + * ``%p.addr``: the object to guard + * ``%p.decl``: (when available), the ``@llvm.noalias.decl`` associated with the object + * ``!Indices``: this refers to a metadata list. Each element of the list + refers to a set of indices where a restrict pointer is located, similar to + the indices for a ``getelementptr``. + * ``!Scope``: the declaration scope of ``%p.decl`` + +This information allows *SROA* to introduce the needed ``@llvm.noalias`` intrinsics +when a struct is split up. + +Summary: + +* potential ``!noalias !UnknownScope`` annotation at function level +* ``%p.decl = @llvm.noalias.decl %p.alloc, i64 objId, metadata !Scope`` +* ``%p.noalias = @llvm.noalias %p, %p.decl, %p.addr, i64 objId, metadata !Scope, !noalias !VisibleScopes`` +* ``%prov.p = @llvm.provenance.noalias %prov.p.2, %p.decl, %p.addr, %prov.p.addr, i64 objId, metadata !Scope, !noalias !VisibleScopes`` +* ``%p.guard = @llvm.noalias.arg.guard %p, %prov.p`` +* ``%p.addr.guard = @llvm.noalias.copy.guard %p.addr, %p.decl, metadata !Indices, metadata !Scope, !noalias !VisibleScopes`` + +Optimization passes +------------------- + +For correctness, some optimization passes must be aware of the *noalias intrinsics*: +inlining [#R7]_, unrolling [#R6]_, loop rotation, ... Whenever a body is duplicated that +contains a ``@llvm.noalias.decl``, it must be decided how that duplication must be done. +Sometimes new unique scopes must be introduced, sometimes not. + +Other optimization passes can perform better by knowing about the ``ptr_provenance``: when +new ``load``/``store`` instructions are introduced, adding ``ptr_provenance`` +information can result in better alias analysis for those instructions. + +It is possible that an optimization pass is doing a wrong optimization, by doing +a transformation that omits the ``ptr_provenance`` operand, but keeps the +``!noalias`` information. This can happen when the ``!noalias`` metadata is +copied directly, instead of using ``AAMetadata`` and +``getAAMetadata/setAAMetadata``: + +.. code-block:: C + + AAMDNodes AAMD; + OldLoad->getAAMetadata(AAMD); + NewLoad->setAAMetadata(AAMD); + + // only do this if it is safe to copy over the 'ptr_provenance' info + // The !noalias info will then also be copied over + NewLoad->setAAMetadataNoAliasProvenance(AAMD); + +Possible Future Enhancements +---------------------------- + +* c++ alias_set + +With this framework in place, it should be easy to extend it to support the +*alias_set* proposal [#R3]_. This can be done by tracking a separate *universe +object*, instead of *object P*. + + +Detailed Description +==================== + +This section gives a detailed description of the various intrinsics and +metadata. + +``!noalias`` Scope Metadata +--------------------------- + +The ``!noalias`` metadata consists of a *list of scopes*. Each scope is also +associated to the function to which it belongs. + +.. code-block:: llvm + + ; MetaData + !2 = !{!3} ; single scope: rpA + !3 = distinct !{!3, !4, !"foo: rpA"} ; variable 'rpA' + !4 = distinct !{!4, !"foo"} ; function 'foo' + !5 = !{!6} + !6 = distinct !{!6, !7, !"foo: unknown scope"} + !7 = distinct !{!7, !"foo"} + !11 = !{!12} ; single scope: rpC + !12 = distinct !{!12, !4, !"foo: rpC"} ; variable 'rpC' + !13 = !{!12, !3} ; multiple scopes: rpA and rpC + +This structure is used in following places: + +* as a single scope: + + * used as *metavalue* argument by ``@llvm.noalias.decl``, ``@llvm.noalias``, + ``@llvm.provenance.noalias``, ``@llvm.noalias.copy.guard``. (``!2, !11``) to + describe the scope that is associated with the noalias intrinsic. + * used as ``!noalias`` metadata on a function to describe the ``unknown + function scope``. (``!5``) + +* as one or more scopes: + + * used as ``!noalias`` metadata describingthe *visible scopes* on memory + instructions (``load``/``store``) and ``@llvm.noalias`` and + ``@llvm.provenance.noalias`` intrinsics. + +.. note:: The ``Unknown Function Scope`` is a special scope that is attached + through ``!noalias`` metadata on a function defintion. It identifies + the scope that is used for *noalias* pointers for which the + declaration is not known. + + +``ptr_provenance`` path +----------------------- + +The ``ptr_provenance`` path is reserved for tracking *noalias* information that +is associated to pointers. Value computations should be omitted as much as +possible. + +For memory instructions, this means that the actual pointer value and the +provenance information can be separated. This allows optimization passes to +rewrite the pointer computation and still keep the correct provenance information. + +A ``ptr_provenance`` path normally starts: + +* with the ``ptr_provenance`` operand of a ``load``/``store`` instruction +* with the ``ptr_provenance`` operand of the ``@llvm.noalias.arg.guard`` + intrinsic +* with the ``ptr.provenance`` operand of the ``@llvm.provenance.noalias`` + intrinsic + +As the ``@llvm.provenance.noalias``, can only be part of a ``ptr_provenance`` +path, its ``%p`` operand is also part of the ``ptr_provenance`` path. + +Although all uses of a ``@llvm.provenance.noalias`` must be on a +``ptr_provenance`` path, following the *based on* path must end at a normal +pointer value. This can for example be the input argument of a +function. Optimizations like inlining can provide extra information for such a +pointer. + +Examples +-------- + +This section contains some examples that are used in the description of the +intrinsics. + +.. _noaliasinfo_local_restrict: + +Example A: local restrict +""""""""""""""""""""""""" + +.. _noaliasinfo_local_restrict_C: + +C99 code with local restrict variables: + +.. code-block:: C + + int foo(int * pA, int i, int *pC) { + int * restrict rpA = pA; + int * restrict rpB = pA+i; + + // The three accesses are promised to not alias each other + *rpA = 10; + *rpB = 20; + *pC = 30; + + return *rpA+*rpB+*pC; + } + +.. _noaliasinfo_local_restrict_llvm_0: + +LLVM-IR code as produced by clang: + +.. code-block:: llvm + + ; Function Attrs: nounwind + define dso_local i32 @foo(i32* %pA, i32 %i, i32* %pC) #0 { + entry: + %pA.addr = alloca i32*, align 4 + %i.addr = alloca i32, align 4 + %pC.addr = alloca i32*, align 4 + %rpA = alloca i32*, align 4 + %rpB = alloca i32*, align 4 + store i32* %pA, i32** %pA.addr, align 4, !tbaa !3, !noalias !7 + store i32 %i, i32* %i.addr, align 4, !tbaa !11, !noalias !7 + store i32* %pC, i32** %pC.addr, align 4, !tbaa !3, !noalias !7 + %0 = bitcast i32** %rpA to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* %0) #4, !noalias !7 + %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** %rpA, i64 0, metadata !13), !noalias !7 + %2 = load i32*, i32** %pA.addr, align 4, !tbaa !3, !noalias !7 + store i32* %2, i32** %rpA, align 4, !tbaa !3, !noalias !7 + %3 = bitcast i32** %rpB to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* %3) #4, !noalias !7 + %4 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** %rpB, i64 0, metadata !14), !noalias !7 + %5 = load i32*, i32** %pA.addr, align 4, !tbaa !3, !noalias !7 + %6 = load i32, i32* %i.addr, align 4, !tbaa !11, !noalias !7 + %add.ptr = getelementptr inbounds i32, i32* %5, i32 %6 + store i32* %add.ptr, i32** %rpB, align 4, !tbaa !3, !noalias !7 + %7 = load i32*, i32** %rpA, align 4, !tbaa !3, !noalias !7 + %8 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %7, i8* %1, i32** %rpA, i64 0, metadata !13), + !tbaa !3, !noalias !7 + store i32 10, i32* %8, align 4, !tbaa !11, !noalias !7 + %9 = load i32*, i32** %rpB, align 4, !tbaa !3, !noalias !7 + %10 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %9, i8* %4, i32** %rpB, i64 0, metadata !14), + !tbaa !3, !noalias !7 + store i32 20, i32* %10, align 4, !tbaa !11, !noalias !7 + %11 = load i32*, i32** %pC.addr, align 4, !tbaa !3, !noalias !7 + store i32 30, i32* %11, align 4, !tbaa !11, !noalias !7 + %12 = load i32*, i32** %rpA, align 4, !tbaa !3, !noalias !7 + %13 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %12, i8* %1, i32** %rpA, i64 0, metadata !13), + !tbaa !3, !noalias !7 + %14 = load i32, i32* %13, align 4, !tbaa !11, !noalias !7 + %15 = load i32*, i32** %rpB, align 4, !tbaa !3, !noalias !7 + %16 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %15, i8* %4, i32** %rpB, i64 0, metadata !14), + !tbaa !3, !noalias !7 + %17 = load i32, i32* %16, align 4, !tbaa !11, !noalias !7 + %add = add nsw i32 %14, %17 + %18 = load i32*, i32** %pC.addr, align 4, !tbaa !3, !noalias !7 + %19 = load i32, i32* %18, align 4, !tbaa !11, !noalias !7 + %add1 = add nsw i32 %add, %19 + %20 = bitcast i32** %rpB to i8* + call void @llvm.lifetime.end.p0i8(i64 4, i8* %20) #4 + %21 = bitcast i32** %rpA to i8* + call void @llvm.lifetime.end.p0i8(i64 4, i8* %21) #4 + ret i32 %add1 + } + + ; .... + + !7 = !{!15, !17} + !13 = !{!15} + !14 = !{!17} + !15 = distinct !{!15, !16, !"foo: rpA"} + !16 = distinct !{!16, !"foo"} + !17 = distinct !{!17, !16, !"foo: rpB"} + +.. _noaliasinfo_local_restrict_llvm_1: + +LLVM-IR code during optimization: stack objects have already been optimized +away, ``@llvm.noalias`` has been converted into ``@llvm.provenance.noalias`` and +propagated to the ``ptr_provenance`` path. + +.. code-block:: llvm + + ; Function Attrs: nounwind + define dso_local i32 @foo(i32* %pA, i32 %i, i32* %pC) #0 { + entry: + %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !3) + %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !6) + %add.ptr = getelementptr inbounds i32, i32* %pA, i32 %i + %2 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %pA, i8* %0, + i32** null, i32** undef, i64 0, metadata !3), !tbaa !8, !noalias !12 + store i32 10, i32* %pA, ptr_provenance i32* %2, align 4, !tbaa !13, !noalias !12 + %3 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %add.ptr, i8* %1, + i32** null, i32** undef, i64 0, metadata !6), !tbaa !8, !noalias !12 + store i32 20, i32* %add.ptr, ptr_provenance i32* %3, align 4, !tbaa !13, !noalias !12 + store i32 30, i32* %pC, align 4, !tbaa !13, !noalias !12 + %4 = load i32, i32* %pA, ptr_provenance i32* %2, align 4, !tbaa !13, !noalias !12 + %5 = load i32, i32* %add.ptr, ptr_provenance i32* %3, align 4, !tbaa !13, !noalias !12 + %add = add nsw i32 %4, %5 + %add1 = add nsw i32 %add, 30 + ret i32 %add1 + } + + ; ... + + !3 = !{!4} + !4 = distinct !{!4, !5, !"foo: rpA"} + !5 = distinct !{!5, !"foo"} + !6 = !{!7} + !7 = distinct !{!7, !5, !"foo: rpB"} + !8 = !{!9, !9, i64 0} + !12 = !{!4, !7} + +.. _noaliasinfo_local_restrict_llvm_2: + +And LLVM-IR code after optimizations: alias analysis found the the stores do not +alias to each other and the values have been propagated. + +.. code-block:: llvm + + ; Function Attrs: nounwind + define dso_local i32 @foo(i32* nocapture %pA, i32 %i, i32* nocapture %pC) local_unnamed_addr #0 { + entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !3) + %1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !6) + %add.ptr = getelementptr inbounds i32, i32* %pA, i32 %i + %2 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %pA, i8* %0, + i32** null, i32** undef, i64 0, metadata !3), !tbaa !8, !noalias !12 + store i32 10, i32* %pA, ptr_provenance i32* %2, align 4, !tbaa !13, !noalias !12 + %3 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* nonnull %add.ptr, i8* %1, + i32** null, i32** undef, i64 0, metadata !6), !tbaa !8, !noalias !12 + store i32 20, i32* %add.ptr, ptr_provenance i32* %3, align 4, !tbaa !13, !noalias !12 + store i32 30, i32* %pC, align 4, !tbaa !13, !noalias !12 + ret i32 60 + } + + ; .... + + !3 = !{!4} + !4 = distinct !{!4, !5, !"foo: rpA"} + !5 = distinct !{!5, !"foo"} + !6 = !{!7} + !7 = distinct !{!7, !5, !"foo: rpB"} + + !12 = !{!4, !7} + +.. _noaliasinfo_pass_restrict: + +Example B: pass a restrict pointer +"""""""""""""""""""""""""""""""""" + +.. _noaliasinfo_pass_restrict_C: + +C99 code with local restrict variables: + +.. code-block:: C + + int fum(int * p); + + int foo(int * pA) { + int * restrict rpA = pA; + *rpA = 10; + + return fum(rpA); + } + + +.. _noaliasinfo_pass_restrict_llvm_0: + +LLVM-IR code as produced by clang: + +.. code-block:: llvm + + ; Function Attrs: nounwind + define dso_local i32 @foo(i32* %pA) #0 { + entry: + %pA.addr = alloca i32*, align 4 + %rpA = alloca i32*, align 4 + store i32* %pA, i32** %pA.addr, align 4, !tbaa !3, !noalias !7 + %0 = bitcast i32** %rpA to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* %0) #5, !noalias !7 + %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** %rpA, i64 0, metadata !7), !noalias !7 + %2 = load i32*, i32** %pA.addr, align 4, !tbaa !3, !noalias !7 + store i32* %2, i32** %rpA, align 4, !tbaa !3, !noalias !7 + %3 = load i32*, i32** %rpA, align 4, !tbaa !3, !noalias !7 + %4 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %3, i8* %1, i32** %rpA, i64 0, metadata !7), + !tbaa !3, !noalias !7 + store i32 10, i32* %4, align 4, !tbaa !10, !noalias !7 + %5 = load i32*, i32** %rpA, align 4, !tbaa !3, !noalias !7 + %6 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %5, i8* %1, i32** %rpA, i64 0, metadata !7), + !tbaa !3, !noalias !7 + %call = call i32 @fum(i32* %6), !noalias !7 + %7 = bitcast i32** %rpA to i8* + call void @llvm.lifetime.end.p0i8(i64 4, i8* %7) #5 + ret i32 %call + } + + +.. _noaliasinfo_pass_restrict_llvm_1: + +And LLVM-IR code after optimizations: stack objects have been optimized +away; ``@llvm.noalias`` has been converted into ``@llvm.provenance.noalias`` and +propagated to the ``ptr_provenance`` path. A ``@llvm.noalias.arg.guard`` has +been introduced to combine the ``ptr_provenance`` and the pointer value before +passing it to ``@fum``. + +.. code-block:: llvm + + ; Function Attrs: nounwind + define dso_local i32 @foo(i32* %pA) local_unnamed_addr #0 { + entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !3) + %1 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %pA, i8* %0, + i32** null, i32** undef, i64 0, metadata !3), !tbaa !6, !noalias !3 + store i32 10, i32* %pA, ptr_provenance i32* %1, align 4, !tbaa !10, !noalias !3 + %.guard.guard.guard.guard = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* nonnull %pA, i32* %1) + %call = tail call i32 @fum(i32* nonnull %.guard.guard.guard.guard) #4, !noalias !3 + ret i32 %call + } + +``@llvm.noalias.decl`` Intrinsic +-------------------------------- + +Syntax: +""""""" + +.. code-block:: llvm + + %p.decl = + i8* call @llvm.noalias.decl + T* %p.alloca, i64 objId, metadata !Scope + + +Overview: +""""""""" + +Identify where in the control flow a *noalias* declaration happened. + +Arguments: +"""""""""" + +* ``%p.alloca``: points to the ``alloca`` to which this declaration is + associated. Or ``null`` when the ``alloca`` was optimized away. +* ``objId``: an ID that is associated to this declaration. *SROA* treats this as + an offset wrt to the original ``alloca``. +* ``!Scope``: a single scope that is associated with this declaration. + +Semantics: +"""""""""" + +Identify where in the control flow a *noalias* declaration happened. When this +intrinsic is duplicated, care must be taken to decide if the associated +``!Scope`` metadata must be duplicated as well (in case of loop unrolling) or +not (in case of code hoisting over then/else paths). + +The function returns a handle to the *noalias* declaration. + +Examples: +========= +See :ref:`Example A: local restrict` and +:ref:`Example B: pass a restrict pointer`. + + +``@llvm.noalias`` Intrinsic +--------------------------- + +Syntax: +""""""" + +.. code-block:: llvm + + %p.noalias = + T* call @llvm.noalias + T* %p, i8* %p.decl, + T** %p.addr, i64 objId, metadata !Scope, + !noalias !VisibleScopes + +Overview: +""""""""" + +Adds *noalias* provenance information to a pointer. + +Arguments: +"""""""""" + +* ``%p``: the original value of the pointer. +* ``%p.decl``: the associated *noalias* declaration (or ``null`` if the + declaration is not available). +* ``%p.addr``: the address of the pointer. +* ``objId``: the ID that is associated to the noalisa declaration. *SROA* treats + this as an offset wrt to the original ``alloca``. +* ``!Scope``: a single scope that is associated with the noalias declaration. +* ``!VisibleScopes``: the scopes related to *noalias* declarations that are + visible to location in the control flow where the noalias pointer is read from + memory. + +Semantics: +"""""""""" + +Adds *noalias* provenance information so that all memory instructions that +depend on ``%p.noalias`` are known to be based on a pointer with extra *noalias* +info. This is a mostly opaque intrinsic for optimizations. In order to not block +optimizations, it will be converted into a ``@llvm.provenance.noalias`` and +moved to the ``ptr_provenance`` path of memory instructions. + +When a ``%p.decl`` is available, following arguments must match the ones in that +declaration: ``objId``, ``!Scope``. + +When ``!Scope`` points to the *unknown function scope*, ``%p.decl`` must be +``null``. + +.. note:: + ``@llvm.noalias`` can be seen as a shortcut for ``@llvm.provenance.noalias`` + and ``@llvm.noalias.arg.guard``. See + :ref:`@llvm.noalias vs @llvm.provenance.noalias`. + +Examples: +========= +See :ref:`Example A: local restrict` and +:ref:`Example B: pass a restrict pointer`. + + + +``@llvm.provenance.noalias`` Intrinsic +-------------------------------------- + +Syntax: +""""""" + +.. code-block:: llvm + + %prov.p = + T* call @llvm.provenance.noalias + T* %p, i8* %p.decl, + T** %p.addr, T** %prov.p.addr, i64 objId, metadata !Scope, + !noalias !VisibleScopes`` + +Overview: +""""""""" + +Adds *noalias* provenance information to a pointer. This version, which is +similar to ``@llvm.noalias``, must only be found on the ``ptr_provenance`` path. + +Arguments: +"""""""""" + +* ``%p``: the original value of the pointer, or a depending + ``@llvm.provenance.noalias``. +* ``%p.decl``: the associated *noalias* declaration (or ``null`` if the + declaration is not available). +* ``%p.addr``: the address of the pointer. +* ``%prov.p.addr``: the ``ptr_provenance`` associated to ``%p.addr``. If this is + ``Undef``, the original ``%p.addr`` must be followed. +* ``objId``: the ID that is associated to the noalisa declaration. *SROA* treats + this as an offset wrt to the original ``alloca``. +* ``!Scope``: a single scope that is associated with the noalias declaration. +* ``!VisibleScopes``: the scopes related to *noalias* declarations that are + visible to location in the control flow where the noalias pointer is read from + memory. + +Semantics: +"""""""""" + +Adds *noalias* provenance information to a pointer. This is similar to +``@llvm.noalias``, but this version must only be found on the ``ptr_provenance`` +path of memory instructions or of the ``@llvm.noalias.arg.guard`` intrinsic. + +It can also be found on the path of the ``%prov.p.addr`` and on the ``%p`` +arguments of another ``@llvm.provenance.noalias`` intrinsic. + +When a ``%p.decl`` is available, following arguments must match the ones in that +declaration: ``objId``, ``!Scope``. + +When ``!Scope`` points to the *unknown function scope*, ``%p.decl`` must be +``null``. + +Examples: +========= +See :ref:`Example A: local restrict` and +:ref:`Example B: pass a restrict pointer`. + + +``@llvm.noalias.arg.guard`` Intrinsic +------------------------------------- + +Syntax: +""""""" + +.. code-block:: llvm + + %p.guard = + T* call @llvm.noalias.arg.guard + T* %p, T* %prov.p + +Overview: +""""""""" + +Combines the value of a pointer with its *noalias* provenance information. + +Arguments: +"""""""""" + +* ``%p``: the value of the pointer +* ``%prov.p``: the provenance information associated to ``%p`` + + +Semantics: +"""""""""" + +Combines the value of a pointer with its *noalias* provenance information. This +is normally introduced when converting ``@llvm.noalias`` into +``@llvm.provenance.noalias`` and the pointer is passed as a function +argument, returned from a function or stored to memory. This intrinsic ensures +that at a later time (after inlining and/or other optimizations), the provenance +information can be propagated to the memory instructions depending on the guard. + +Examples: +========= +See :ref:`Example B: pass a restrict pointer`. + + +``@llvm.noalias.copy.guard`` Intrinsic +-------------------------------------- + +Syntax: +""""""" + +.. code-block:: llvm + + %p.addr.guard = + T* call @llvm.noalias.copy.guard + T* %p.addr, i8* %p.decl, + metadata !Indices, + metadata !Scope, + !noalias !VisibleScopes + +Overview: +""""""""" + +Annotates that the memory block pointed to by ``%p.addr`` contains *noalias +annotated pointers* (restrict pointers). + +Arguments: +"""""""""" + +* ``%p.addr``: points to the block of memory that will be copied +* ``%p.decl``: the associated *noalias* declaration (or ``null`` if the + declaration is not available). +* ``!Indices``: the set of indices, describing on what locations a *noalias* + pointer can be found. +* ``!Scope``: a single scope that is associated with the noalias declaration. +* ``!VisibleScopes``: the scopes related to *noalias* declarations that are + visible to location in the control flow where the noalias pointer is read from + memory. + +Semantics: +"""""""""" + +Annotates that the memory block pointed to by ``%p.addr`` contains *noalias +annotated pointers* (restrict pointers). The ``!Indices`` indicate where in +memory the *noalias* pointers are located. + +When a block copy (aggregate load/store or ``@llvm.memcpy``) uses +``%p.addr.guard`` as a source, *SROA* is able to reconstruct the implied +``@llvm.noalias`` intrinsics. This ensure that the *noalias* information for +those pointers is tracked. + +When a ``%p.decl`` is available, the ``!Scope`` argument must match the one in +that declaration. + +When ``!Scope`` points to the *unknown function scope*, ``%p.decl`` must be +``null``. + + +``!Indices`` points to a list of metadata. Each entry in that list contains a +set of ``i32`` values, corresponding to the indices that would be past to +``getelementptr`` to retrieve a field in the struct. When the ``i32`` value is +**-1**, it indicates that any possible value should be checked (0, 1, 2, ...), +as long as the resulting address fits the size of the memory copy. + +Examples: +""""""""" + +Code example with a ``llvm.noalias.copy.guard``: + +* Note the **-1** to represent ``a[i]`` in the indices of ``!15``. +* After optimization, the ``alloca`` is gone. The ``llvm.memcpy`` is also gone, + but the remaining dependency on restrict pointers is kept in the + ``llvm.noalias.provenance``. Two are needed for this example: one related to + the declaration of ``struct B tmp``. One related to the ``unknown function + scope``. + +.. code-block:: C + + struct B { + int * restrict p; + struct A { + int m; + int * restrict p; + } a[5]; + }; + + + void FOO(struct B* b) { + struct B tmp = *b; + + *tmp.a[1].p=32; + } + +Results in following code: + +.. code-block:: llvm + + %struct.B = type { i32*, [5 x %struct.A] } + %struct.A = type { i32, i32* } + + ; Function Attrs: nounwind + define dso_local void @FOO(%struct.B* %b) #0 !noalias !3 { + entry: + %b.addr = alloca %struct.B*, align 4 + %tmp = alloca %struct.B, align 4 + store %struct.B* %b, %struct.B** %b.addr, align 4, !tbaa !6, !noalias !10 + %0 = bitcast %struct.B* %tmp to i8* + call void @llvm.lifetime.start.p0i8(i64 44, i8* %0) #5, !noalias !10 + %1 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.Bs.i64(%struct.B* %tmp, i64 0, metadata !12), + !noalias !10 + %2 = load %struct.B*, %struct.B** %b.addr, align 4, !tbaa !6, !noalias !10 + %3 = call %struct.B* @llvm.noalias.copy.guard.p0s_struct.Bs.p0i8(%struct.B* %2, + i8* null, metadata !13, metadata !3) + %4 = bitcast %struct.B* %tmp to i8* + %5 = bitcast %struct.B* %3 to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %4, i8* align 4 %5, i32 44, i1 false), + !tbaa.struct !16, !noalias !10 + %a = getelementptr inbounds %struct.B, %struct.B* %tmp, i32 0, i32 1 + %arrayidx = getelementptr inbounds [5 x %struct.A], [5 x %struct.A]* %a, i32 0, i32 1 + %p = getelementptr inbounds %struct.A, %struct.A* %arrayidx, i32 0, i32 1 + %6 = load i32*, i32** %p, align 4, !tbaa !18, !noalias !10 + %7 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %6, + i8* %1, i32** %p, i64 0, metadata !12), !tbaa !18, !noalias !10 + store i32 32, i32* %7, align 4, !tbaa !21, !noalias !10 + %8 = bitcast %struct.B* %tmp to i8* + call void @llvm.lifetime.end.p0i8(i64 44, i8* %8) #5 + ret void + } + + ... + + !3 = !{!4} + !4 = distinct !{!4, !5, !"FOO: unknown scope"} + !5 = distinct !{!5, !"FOO"} + !10 = !{!11, !4} + !11 = distinct !{!11, !5, !"FOO: tmp"} + !12 = !{!11} + !13 = !{!14, !15} + !14 = !{i32 -1, i32 0} + !15 = !{i32 -1, i32 1, i32 -1, i32 1} + +And after optimizations: + +.. code-block:: llvm + + ; Function Attrs: nounwind + define dso_local void @FOO(%struct.B* nocapture %b) local_unnamed_addr #0 !noalias !3 { + entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 16, metadata !6) + %tmp.sroa.69.0..sroa_idx10 = getelementptr inbounds %struct.B, %struct.B* %b, i32 0, i32 1, i32 1, i32 1 + %tmp.sroa.69.0.copyload = load i32*, i32** %tmp.sroa.69.0..sroa_idx10, align 4, !tbaa.struct !8, !noalias !14 + %1 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %tmp.sroa.69.0.copyload, + i8* null, i32** nonnull %tmp.sroa.69.0..sroa_idx10, i32** undef, i64 0, metadata !3) + %2 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %1, + i8* %0, i32** null, i32** undef, i64 16, metadata !6), !tbaa !15, !noalias !14 + store i32 32, i32* %tmp.sroa.69.0.copyload, ptr_provenance i32* %2, align 4, !tbaa !18, !noalias !14 + ret void + } + + ... + + !3 = !{!4} + !4 = distinct !{!4, !5, !"FOO: unknown scope"} + !5 = distinct !{!5, !"FOO"} + !6 = !{!7} + !7 = distinct !{!7, !5, !"FOO: tmp"} + + +Other usages of ``noalias`` inside LLVM +======================================= + + +``noalias`` attribute on parameters or function +----------------------------------------------- + +This indicates that memory locations accessed via pointer values +:ref:`based ` on the argument or return value are not also +accessed, during the execution of the function, via pointer values not +*based* on the argument or return value. + +See :ref:`noalias attribute` + +``noalias`` and ``alias.scope`` Metadata +---------------------------------------- + +``noalias`` and ``alias.scope`` metadata provide the ability to specify generic +noalias memory-access sets. + +See :ref:`noalias and alias.scope Metadata ` + +The usage of this construct is not recommended, as it can result in wrong code +when inlining and loop unrolling optimizations are applied. + + +References +========== + +.. rubric:: References + +.. [#R1] https://en.wikipedia.org/wiki/Restrict +.. [#R2] WG14 N1256: http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1256.pdf (Chapter 6.7.3.1 Formal definition of restrict) +.. [#R3] WG21 N4150: http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2014/n4150.pdf +.. [#R4] https://reviews.llvm.org/D9375 Hal Finkel's local restrict patches +.. [#R5] https://bugs.llvm.org/show_bug.cgi?id=39240 "clang/llvm looses restrictness, resulting in wrong code" +.. [#R6] https://bugs.llvm.org/show_bug.cgi?id=39282 "Loop unrolling incorrectly duplicates noalias metadata" +.. [#R7] https://www.godbolt.org/z/cUk6To "testcase showing that LLVM-IR is not able to differentiate if restrict is done inside or outside the loop" +.. [#R8] DR294: http://www.open-std.org/jtc1/sc22/wg14/www/docs/dr_294.htm +.. [#R9] WG14 N2250: http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2260.pdf Clarifying the restrict Keyword v2 +.. [#R10] RFC: Full 'restrict' support in LLVM https://lists.llvm.org/pipermail/llvm-dev/2019-October/135672.html Index: llvm/docs/UserGuides.rst =================================================================== --- llvm/docs/UserGuides.rst +++ llvm/docs/UserGuides.rst @@ -43,6 +43,7 @@ MergeFunctions MCJITDesignAndImplementation NVPTXUsage + NoAliasInfo Phabricator Passes ReportingGuide @@ -121,6 +122,10 @@ Information on how to write a new alias analysis implementation or how to use existing analyses. +:doc:`NoAliasInfo` + Information on how provenance based alias analysis, used to implement C99 + restrict, works. + :doc:`MemorySSA` Information about the MemorySSA utility in LLVM, as well as how to use it. Index: llvm/include/llvm/Analysis/AliasSetTracker.h =================================================================== --- llvm/include/llvm/Analysis/AliasSetTracker.h +++ llvm/include/llvm/Analysis/AliasSetTracker.h @@ -133,6 +133,10 @@ } delete this; } + void updateNoAliasProvenanceIfMatching(Value *Old, Value *New) { + if (AAInfo.NoAliasProvenance == Old) + AAInfo.NoAliasProvenance = New; + } }; // Doubly linked list of nodes. @@ -349,6 +353,25 @@ // Map from pointers to their node PointerMapType PointerMap; + // FIXME: NOTE: when we get a callback, the resulting update might be _SLOW_ + class ASTProvenanceCallbackVH final : public CallbackVH { + AliasSetTracker *AST; + + void deleted() override; + void allUsesReplacedWith(Value *) override; + + public: + ASTProvenanceCallbackVH(Value *V, AliasSetTracker *AST = nullptr); + ASTProvenanceCallbackVH &operator=(Value *V); + }; + + /// Traits to tell DenseMap that tell us how to compare and hash the value + /// handle. + struct ASTProvenanceCallbackVHDenseMapInfo : public DenseMapInfo {}; + using ProvenanceSetType = + DenseSet; + ProvenanceSetType ProvenancePointers; + public: /// Create an empty collection of AliasSets, and use the specified alias /// analysis object to disambiguate load and store addresses. @@ -407,6 +430,19 @@ /// tracker already knows about a value, it will ignore the request. void copyValue(Value *From, Value *To); + /// This method is used to remove a ptr_provenance pointer value from the + /// AliasSetTracker entirely. It should be used when an instruction is deleted + /// from the program to update the AST. If you don't use this, you would have + /// dangling pointers to deleted instructions. + void deleteProvenanceValue(Value *PtrVal); + + /// This method should be used whenever a preexisting ptr_provenance value in + /// the program is copied or cloned, introducing a new value. Note that it is + /// ok for clients that use this method to introduce the same value multiple + /// times: if the tracker already knows about a value, it will ignore the + /// request. + void copyProvenanceValue(Value *From, Value *To); + using iterator = ilist::iterator; using const_iterator = ilist::const_iterator; Index: llvm/include/llvm/Analysis/BasicAliasAnalysis.h =================================================================== --- llvm/include/llvm/Analysis/BasicAliasAnalysis.h +++ llvm/include/llvm/Analysis/BasicAliasAnalysis.h @@ -61,6 +61,7 @@ DominatorTree *DT; LoopInfo *LI; PhiValues *PV; + unsigned RecurseLevel = 0; public: BasicAAResult(const DataLayout &DL, const Function &F, Index: llvm/include/llvm/Analysis/ScopedNoAliasAA.h =================================================================== --- llvm/include/llvm/Analysis/ScopedNoAliasAA.h +++ llvm/include/llvm/Analysis/ScopedNoAliasAA.h @@ -20,6 +20,7 @@ #include namespace llvm { +class DominatorTree; class Function; class MDNode; @@ -30,6 +31,8 @@ friend AAResultBase; public: + ScopedNoAliasAAResult(DominatorTree *DT) : AAResultBase(), DT(DT) {} + /// Handle invalidation events from the new pass manager. /// /// By definition, this result is stateless and so remains valid. @@ -45,8 +48,24 @@ ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2, AAQueryInfo &AAQI); + // FIXME: This interface can be removed once the legacy-pass-manager support + // is removed. + void setDT(DominatorTree *DT) { this->DT = DT; } + private: bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias) const; + + bool findCompatibleNoAlias(const Value *P, const MDNode *ANoAlias, + const MDNode *BNoAlias, const DataLayout &DL, + SmallPtrSetImpl &Visited, + SmallVectorImpl &CompatibleSet, + int Depth = 0); + bool noAliasByIntrinsic(const MDNode *ANoAlias, const Value *APtr, + const MDNode *BNoAlias, const Value *BPtr, + const CallBase *CallA, const CallBase *CallB, + AAQueryInfo &AAQI); + + DominatorTree *DT; }; /// Analysis pass providing a never-invalidated alias analysis result. @@ -70,12 +89,18 @@ ScopedNoAliasAAWrapperPass(); - ScopedNoAliasAAResult &getResult() { return *Result; } + ScopedNoAliasAAResult &getResult() { + setDT(); + return *Result; + } const ScopedNoAliasAAResult &getResult() const { return *Result; } bool doInitialization(Module &M) override; bool doFinalization(Module &M) override; void getAnalysisUsage(AnalysisUsage &AU) const override; + +private: + void setDT(); }; //===--------------------------------------------------------------------===// Index: llvm/include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -505,6 +505,11 @@ case Intrinsic::is_constant: case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: + case Intrinsic::noalias_decl: + case Intrinsic::noalias: + case Intrinsic::provenance_noalias: + case Intrinsic::noalias_arg_guard: + case Intrinsic::noalias_copy_guard: case Intrinsic::objectsize: case Intrinsic::ptr_annotation: case Intrinsic::var_annotation: Index: llvm/include/llvm/Analysis/ValueTracking.h =================================================================== --- llvm/include/llvm/Analysis/ValueTracking.h +++ llvm/include/llvm/Analysis/ValueTracking.h @@ -365,12 +365,16 @@ /// the specified value, returning the original object being addressed. Note /// that the returned value has pointer type if the specified value does. If /// the MaxLookup value is non-zero, it limits the number of instructions to - /// be stripped off. + /// be stripped off. If a NoAlias vector is provided, it is filled with any + /// llvm.noalias intrinsics looked through to find the underlying object. Value *GetUnderlyingObject(Value *V, const DataLayout &DL, - unsigned MaxLookup = 6); - inline const Value *GetUnderlyingObject(const Value *V, const DataLayout &DL, - unsigned MaxLookup = 6) { - return GetUnderlyingObject(const_cast(V), DL, MaxLookup); + unsigned MaxLookup = 6, + SmallVectorImpl *NoAlias = nullptr); + inline const Value * + GetUnderlyingObject(const Value *V, const DataLayout &DL, + unsigned MaxLookup = 6, + SmallVectorImpl *NoAlias = nullptr) { + return GetUnderlyingObject(const_cast(V), DL, MaxLookup, NoAlias); } /// This method is similar to GetUnderlyingObject except that it can @@ -400,11 +404,14 @@ /// /// Since A[i] and A[i-1] are independent pointers, getUnderlyingObjects /// should not assume that Curr and Prev share the same underlying object thus - /// it shouldn't look through the phi above. + /// it shouldn't look through the phi above. If a NoAlias vector is provided, + /// it is filled with any llvm.noalias intrinsics looked through to find the + /// underlying objects. void GetUnderlyingObjects(const Value *V, SmallVectorImpl &Objects, const DataLayout &DL, LoopInfo *LI = nullptr, - unsigned MaxLookup = 6); + unsigned MaxLookup = 6, + SmallVectorImpl *NoAlias = nullptr); /// This is a wrapper around GetUnderlyingObjects and adds support for basic /// ptrtoint+arithmetic+inttoptr sequences. Index: llvm/include/llvm/Analysis/VectorUtils.h =================================================================== --- llvm/include/llvm/Analysis/VectorUtils.h +++ llvm/include/llvm/Analysis/VectorUtils.h @@ -456,8 +456,11 @@ /// metadata value that covers all of the individual values), and set I's /// metadata for M equal to the intersection value. /// +/// When RemoveNoAlias is true, MD_noalias will always get a null value. +/// /// This function always sets a (possibly null) value for each K in Kinds. -Instruction *propagateMetadata(Instruction *I, ArrayRef VL); +Instruction *propagateMetadata(Instruction *I, ArrayRef VL, + bool RemoveNoAlias = true); /// Create a mask that filters the members of an interleave group where there /// are gaps. Index: llvm/include/llvm/IR/IRBuilder.h =================================================================== --- llvm/include/llvm/IR/IRBuilder.h +++ llvm/include/llvm/IR/IRBuilder.h @@ -788,6 +788,63 @@ CallInst *CreateAssumption(Value *Cond, ArrayRef OpBundles = llvm::None); + /// Create a llvm.noalias.decl intrinsic call. + Instruction *CreateNoAliasDeclaration(Value *AllocaPtr, Value *ObjId, + Value *Scope); + Instruction *CreateNoAliasDeclaration(Value *AllocaPtr, uint64_t ObjId, + Value *Scope) { + return CreateNoAliasDeclaration( + AllocaPtr, + ConstantInt::get(IntegerType::getInt64Ty(getContext()), ObjId), Scope); + } + Instruction *CreateNoAliasDeclaration(Value *AllocaPtr, MDNode *ScopeTag) { + uint64_t Zero = 0; + return CreateNoAliasDeclaration(AllocaPtr, Zero, + MetadataAsValue::get(Context, ScopeTag)); + } + + /// Create a llvm.noalias intrinsic call. + Instruction *CreateNoAliasPointer(Value *Ptr, Value *NoAliasDecl, + Value *AddrP, MDNode *ScopeTag, + const Twine &Name = "", + uint64_t ObjectId = 0) { + return CreateNoAliasPointer(Ptr, NoAliasDecl, AddrP, + MetadataAsValue::get(getContext(), ScopeTag), + Name, ObjectId); + } + Instruction *CreateNoAliasPointer(Value *Ptr, Value *NoAliasDecl, + Value *AddrP, Value *ScopeTag, + const Twine &Name = "", + uint64_t ObjectId = 0); + + /// Create a llvm.provenance.noalias intrinsic call. + Instruction *CreateProvenanceNoAliasPlain(Value *Ptr, Value *NoAliasDecl, + Value *AddrP, Value *AddrP_Provenance, + Value *ObjId, MDNode *ScopeTag, + const Twine &Name = ""); + Instruction *CreateProvenanceNoAliasPlain(Value *Ptr, Value *NoAliasDecl, + Value *AddrP, Value *AddrP_Provenance, + Value *ObjId, Value *ScopeValue, + const Twine &Name = ""); + + /// Create a llvm.noalias.arg.guard intrinsic call. + Instruction *CreateNoAliasArgGuard(Value *Ptr, Value *Provenance, + const Twine &Name = ""); + + /// Create a llvm.noalias_copy_guard intrinsic call. + Instruction *CreateNoAliasCopyGuard(Value *BasePtr, Value *NoAliasDel, + ArrayRef EncodedIndices, + MDNode *ScopeTag, const Twine &Name = ""); + +private: + /// Helper for creating noalias intrinsics + Instruction *CreateGenericNoAliasIntrinsic(Intrinsic::ID ID, Value *arg0, + ArrayRef args_opt, + ArrayRef MDNodes, + ArrayRef MDValues, + const Twine &Name = ""); + +public: /// Create a call to the experimental.gc.statepoint intrinsic to /// start a new statepoint sequence. CallInst *CreateGCStatepointCall(uint64_t ID, uint32_t NumPatchBytes, Index: llvm/include/llvm/IR/InstVisitor.h =================================================================== --- llvm/include/llvm/IR/InstVisitor.h +++ llvm/include/llvm/IR/InstVisitor.h @@ -167,7 +167,7 @@ RetTy visitICmpInst(ICmpInst &I) { DELEGATE(CmpInst);} RetTy visitFCmpInst(FCmpInst &I) { DELEGATE(CmpInst);} RetTy visitAllocaInst(AllocaInst &I) { DELEGATE(UnaryInstruction);} - RetTy visitLoadInst(LoadInst &I) { DELEGATE(UnaryInstruction);} + RetTy visitLoadInst(LoadInst &I) { DELEGATE(Instruction); } RetTy visitStoreInst(StoreInst &I) { DELEGATE(Instruction);} RetTy visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) { DELEGATE(Instruction);} RetTy visitAtomicRMWInst(AtomicRMWInst &I) { DELEGATE(Instruction);} Index: llvm/include/llvm/IR/Instruction.h =================================================================== --- llvm/include/llvm/IR/Instruction.h +++ llvm/include/llvm/IR/Instruction.h @@ -344,8 +344,15 @@ /// @} /// Sets the metadata on this instruction from the AAMDNodes structure. + /// NOTE: The ptr_provenance must be copied over explicitely using + /// 'setAAMetadataNoAliasProvenance'. This must only be done if the dominator + /// relationship between the ptr_provenance and this instruction holds. void setAAMetadata(const AAMDNodes &N); + /// Sets (only) the ptr_provenance. Normally used in combination with + /// setAAMetadata. + void setAAMetadataNoAliasProvenance(const AAMDNodes &N); + /// Retrieve the raw weight values of a conditional branch or select. /// Returns true on success with profile weights filled in. /// Returns false if no metadata or invalid metadata was found. Index: llvm/include/llvm/IR/Instructions.h =================================================================== --- llvm/include/llvm/IR/Instructions.h +++ llvm/include/llvm/IR/Instructions.h @@ -170,7 +170,7 @@ /// An instruction for reading from memory. This uses the SubclassData field in /// Value to store whether or not the load is volatile. -class LoadInst : public UnaryInstruction { +class LoadInst : public Instruction { using VolatileField = BoolBitfieldElementT<0>; using AlignmentField = AlignmentBitfieldElementT; using OrderingField = AtomicOrderingBitfieldElementT; @@ -206,6 +206,16 @@ Align Align, AtomicOrdering Order, SyncScope::ID SSID, BasicBlock *InsertAtEnd); + ~LoadInst() { + // FIXME: needed by operator delete + setLoadInstNumOperands(2); + } + // allocate space for exactly two operands + void *operator new(size_t s) { return User::operator new(s, 2); } + + /// Transparently provide more efficient getOperand methods. + DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); + /// Return true if this is a load from a volatile memory location. bool isVolatile() const { return getSubclassData(); } @@ -272,6 +282,23 @@ return getPointerOperandType()->getPointerAddressSpace(); } + bool hasNoaliasProvenanceOperand() const { return getNumOperands() == 2; } + Value *getNoaliasProvenanceOperand() const { + assert(hasNoaliasProvenanceOperand() && "we need a ptr_provenance"); + return getOperand(1); + } + static unsigned getNoaliasProvenanceOperandIndex() { return 1U; } + void setNoaliasProvenanceOperand(Value *Provenance) { + assert(Provenance && "Needs a provenance"); + setLoadInstNumOperands(2); + setOperand(1, Provenance); + } + void removeNoaliasProvenanceOperand() { + assert(hasNoaliasProvenanceOperand() && "nothing to remove"); + // make sure 'uses' are updated + setOperand(getNoaliasProvenanceOperandIndex(), nullptr); + setLoadInstNumOperands(1); + } // Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::Load; @@ -294,6 +321,11 @@ SyncScope::ID SSID; }; +template <> +struct OperandTraits : public OptionalOperandTraits {}; + +DEFINE_TRANSPARENT_OPERAND_ACCESSORS(LoadInst, Value) + //===----------------------------------------------------------------------===// // StoreInst Class //===----------------------------------------------------------------------===// @@ -330,10 +362,11 @@ StoreInst(Value *Val, Value *Ptr, bool isVolatile, Align Align, AtomicOrdering Order, SyncScope::ID SSID, BasicBlock *InsertAtEnd); - // allocate space for exactly two operands - void *operator new(size_t s) { - return User::operator new(s, 2); + ~StoreInst() { + // FIXME: needed by operator delete + setStoreInstNumOperands(3); } + void *operator new(size_t s) { return User::operator new(s, 3); } /// Return true if this is a store to a volatile memory location. bool isVolatile() const { return getSubclassData(); } @@ -407,6 +440,23 @@ return getPointerOperandType()->getPointerAddressSpace(); } + bool hasNoaliasProvenanceOperand() const { return getNumOperands() == 3; } + Value *getNoaliasProvenanceOperand() const { + assert(hasNoaliasProvenanceOperand() && "we need a ptr_provenance"); + return getOperand(2); + } + static unsigned getNoaliasProvenanceOperandIndex() { return 2U; } + void setNoaliasProvenanceOperand(Value *Provenance) { + assert(Provenance && "Needs a provenance"); + setStoreInstNumOperands(3); + setOperand(2, Provenance); + } + void removeNoaliasProvenanceOperand() { + assert(hasNoaliasProvenanceOperand() && "nothing to remove"); + // make sure 'uses' are updated + setOperand(getNoaliasProvenanceOperandIndex(), nullptr); + setStoreInstNumOperands(2); + } // Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::Store; @@ -430,8 +480,7 @@ }; template <> -struct OperandTraits : public FixedNumOperandTraits { -}; +struct OperandTraits : public OptionalOperandTraits {}; DEFINE_TRANSPARENT_OPERAND_ACCESSORS(StoreInst, Value) Index: llvm/include/llvm/IR/IntrinsicInst.h =================================================================== --- llvm/include/llvm/IR/IntrinsicInst.h +++ llvm/include/llvm/IR/IntrinsicInst.h @@ -937,6 +937,19 @@ } }; + /// Returns true when two llvm.provenance.noalias represent the same noalias info. + inline bool areProvenanceNoAliasCompatible(const IntrinsicInst *lhs, + const IntrinsicInst *rhs) { + assert(lhs->getIntrinsicID() == Intrinsic::provenance_noalias && + rhs->getIntrinsicID() == Intrinsic::provenance_noalias && + "Can only check noalias compatibility of provenance.noalias"); + return (lhs->getOperand(Intrinsic::ProvenanceNoAliasScopeArg) == + rhs->getOperand(Intrinsic::ProvenanceNoAliasScopeArg)) && + (lhs->getOperand(Intrinsic::ProvenanceNoAliasIdentifyPObjIdArg) == + rhs->getOperand(Intrinsic::ProvenanceNoAliasIdentifyPObjIdArg)) && + (lhs->getOperand(Intrinsic::ProvenanceNoAliasIdentifyPArg) == + rhs->getOperand(Intrinsic::ProvenanceNoAliasIdentifyPArg)); + } } // end namespace llvm #endif // LLVM_IR_INTRINSICINST_H Index: llvm/include/llvm/IR/Intrinsics.h =================================================================== --- llvm/include/llvm/IR/Intrinsics.h +++ llvm/include/llvm/IR/Intrinsics.h @@ -34,6 +34,27 @@ /// function known by LLVM. The enum values are returned by /// Function::getIntrinsicID(). namespace Intrinsic { + // Abstraction for the arguments of the noalias intrinsics + static const int ProvenanceNoAliasNoAliasDeclArg = 1; + static const int ProvenanceNoAliasIdentifyPArg = 2; + static const int ProvenanceNoAliasIdentifyPProvenanceArg = 3; + static const int ProvenanceNoAliasIdentifyPObjIdArg = 4; + static const int ProvenanceNoAliasScopeArg = 5; + + static const int NoAliasNoAliasDeclArg = 1; + static const int NoAliasIdentifyPArg = 2; + static const int NoAliasIdentifyPObjIdArg = 3; + static const int NoAliasScopeArg = 4; + + static const int NoAliasDeclAllocaArg = 0; + static const int NoAliasDeclObjIdArg = 1; + static const int NoAliasDeclScopeArg = 2; + + static const int NoAliasCopyGuardIdentifyPBaseObject = 0; + static const int NoAliasCopyGuardNoAliasDeclArg = 1; + static const int NoAliasCopyGuardIndicesArg = 2; + static const int NoAliasCopyGuardScopeArg = 3; + // Intrinsic ID type. This is an opaque typedef to facilitate splitting up // the enum into target-specific enums. typedef unsigned ID; Index: llvm/include/llvm/IR/Intrinsics.td =================================================================== --- llvm/include/llvm/IR/Intrinsics.td +++ llvm/include/llvm/IR/Intrinsics.td @@ -507,6 +507,179 @@ // control dependencies will be maintained. def int_assume : Intrinsic<[], [llvm_i1_ty], [IntrWillReturn]>; +// The 'noalias intrinsics' allow to track dependencies so that the C99 restrict +// rules can be implemented. +// +// - llvm.noalias.decl +// - llvm.noalias +// - llvm.provenance.noalias +// - llvm.noalias.arg.guard +// - llvm.noalias.copy.guard +// +// +// Following arguments are typically used in the various intrinsics: +// - p: the value of the restrict pointer +// - p.addr: identifyP: the address of P: either a real object or a constant +// where the value is relative to 0: different 'identifyP' represent +// different restrict pointers, pointing to disjunct objects. +// - p.objId: when an alloca-object is split by SROA, multiple versions, +// representing the same variable declaration (=scope) can exist. +// This objId allows to differentiate between them. This is useful +// when later on, the alloca's are optimized away. +// - p.scope: metadata argument that refers to a list of alias.scope metadata +// entries that contains exactly one element. It represents the variable +// declaration that contains one or more restrict pointers. +// - p.decl: points to the @llvm.noalias.decl intrinsic associated with the +// declaration of a restrict variable. +// - p.alloca: points to the alloca associated with the declaration of a +// restrict variable +// - prov.p: the noalias pointer provenance associated with 'p'. +// - prov.p.addr: the noalias pointer provenance associated with 'p.addr'. +// - p.indices: metadata argument that refers to a list of metadata references. +// each reference points to a metadata array of indices. At the specified +// location, a restrict pointer is located. A '-1' indicates any index. +// (see llvm.noalias.copy.guard for an example) +// +// {p.addr, p.objId, p.scope} represent different ways of tracking the +// 'underlying P' object. A unique triplet represents a unique 'object P' +// +// NOTE: future enhancements might relax/enhance this notion for supporting +// an implementation of n4150 (alias sets). It is likely that an extra +// parameter should be introduced to differentiate p.addr from the p.universe. +// As far as C99 is concerned, p.addr == p.universe +// +// Also see: +// - LangRef.rst (Scoped NoAlias Related Intrinsics) +// - [llvm-dev] RFC: Full 'restrict' support in LLVM +// https://lists.llvm.org/pipermail/llvm-dev/2019-March/131127.html + +// 'llvm.noalias.decl' intrinsic: Inserted at the location of a restrict +// pointer declaration. Makes it possible to identify that a restrict scope is +// only valid inside the body of a loop. +// +// Purpose of the different arguments: +// - arg0: p.alloca: associates the restrict pointer declaration to an alloca. +// (can be 'null' if the alloca is optimized away). The alloca can be +// associated to multiple restrict pointers. +// - arg1: p.objId: identifies different objects, associated to the same +// variable declaration. Is needed to track splitting of alloca's in SROA. +// - arg2: p.scope: metadata representing the variable declaration. +// - returns: a dummy i8 pointer that is used to track dependencies, so that cse +// is not migrating llvm.provenance.noalias over declarations +def int_noalias_decl + : Intrinsic<[llvm_anyptr_ty], + [llvm_anyptr_ty, llvm_anyint_ty, llvm_metadata_ty], + [IntrArgMemOnly]>; // ArgMemOnly: blocks LICM and some more + +// 'llvm.noalias' intrinsic: Introduces noalias information in the pointer +// computation path, normally right after the loading of the pointer value. +// It also blocks a number of optimizations. Once it is transformed into +// a llvm.provenance.noalias version, it can track noalias information and support +// complex optimizations. +// +// Purpose of the different arguments: +// - arg0: p: the incoming pointer value. +// - arg1: p.decl: dependency on llvm.noalias.decl (if available). The +// dependency makes it easier to handle the loop-unrolling case. +// - arg2: p.addr: identifyP: the address of P: either a real object or a +// constant where the value is relative to 0: different 'identifyP' +// represent different restrict pointers. +// - arg3: p.objId: identifies different objects, associated to the same +// variable declaration. Is needed to track splitting of alloca's in +// SROA. +// - arg4: p.scope: metadata representing the variable declaration. +// - returns: returns arg0 (although this is hidden for most optimization +// passes) +def int_noalias + : Intrinsic<[llvm_anyptr_ty], + [LLVMMatchType<0>, + llvm_anyptr_ty, llvm_anyptr_ty, // p.decl, p.addr + llvm_anyint_ty, llvm_metadata_ty], // p.objId, p.scope + [IntrArgMemOnly, IntrSpeculatable]>; + +// 'llvm.provenance.noalias' intrinsic: Introduces the noalias information on the +// provenance path. This intrinsic originates from a transformed +// 'llvm.noalias' intrinsic. +// +// Purpose of the different arguments: +// - arg0: p: the incoming ptr value. +// - arg1: p.decl: dependency on llvm.noalias.decl (if available). The +// dependency makes it easier to handle the loop-unrolling case. +// - arg2: p.addr: identifyP: the address of P: either a real object or a +// constant where the value is relative to 0: different 'identifyP' +// represent different restrict pointers. +// - arg3: prov.p.addr: the ptr_provenance associated with the identifyP: +// this is needed to handle cases like: 'int* restrict* restrict rprpi;' +// - arg4: p.objId: identifies different objects, associated to the same +// variable declaration. Is needed to track splitting of alloca's in +// SROA. +// - arg5: p.scope: metadata representing the variable declaration. +// - returns: returns arg0 (although this is hidden for most optimization +// passes) +// +// NOTES: +// - prov.p.addr (together with the other metadata set on the intrinsic), can be +// used to check if there are other reasons why this 'underlying P' object is +// different from another 'underlying P' (using tbaa, noalias annotations, ...) +// - p.decl, p.scope: it is possible that there initially is no +// llvm.noalias.decl dependency. For 'unknown-scope' scopes, it is possible +// that a later analysis connects the right scope and llvm.noalias.decl +// dependency. +// NOTE: Returned<0> must not be used here, or some optimizations (INDVARS) +// will be too optimistic as they see through the annotations, resulting in +// wrong code. +def int_provenance_noalias + : Intrinsic<[llvm_anyptr_ty], + [LLVMMatchType<0>, + llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyptr_ty, + llvm_anyint_ty, llvm_metadata_ty], + [IntrNoMem, IntrSpeculatable]>; + +// 'llvm.noalias.arg.guard' intrinsic: helps tracking the ptr_provenance. +// It guards pointers that escape through function arguments or are returned. +// After inlining, the noalias information can still be propagated +// +// Purpose of the different arguments: +// - arg0: p: the incoming ptr value. +// - arg1: prov.p: the ptr_provenance, associated with this pointer +// computation. +// - returns: arg0 (hidden for most optimization passes) +def int_noalias_arg_guard + : Intrinsic<[llvm_anyptr_ty], + [LLVMMatchType<0>, llvm_anyptr_ty], + [IntrNoMem, IntrSpeculatable]>; // NOTE: Returned<0> must not be used here + +// 'llvm.noalias.copy.guard' intrinsic: a guard that allows to track +// restrict pointers through a memcpy or a structured load/store. When such a +// memcpy or load/store pair is optimized away, the noalias dependency chain can +// be reconstructed. +// +// Purpose of the different arguments: +// %p.guard = +// llvm.noalias.copy.guard %p.alloca, %p.decl, !metadata !99, !metadata !98 +// - arg0: p.alloca: block of memory with potential restrict variables. +// - arg1: p.decl: associated llvm.noalias.decl instruction (if available) +// - arg2: p.indices: metadata list of list of indices, indicating where in the +// memory restrict pointers are located. +// - arg3: p.scope: metadata representing the variable declaration. +// - returns: arg0 (hidden for most optimization passes) +// +// Example of p.indices: +// struct FOO { +// int* restrict p1; +// int* p0; +// int* restrict p2; +// }; +// results in '!10' refering to: +// !10 = {!11, !12} +// !11 = { -1, 0 } +// !12 = { -1, 2 } +def int_noalias_copy_guard + : Intrinsic<[llvm_anyptr_ty], + [LLVMMatchType<0>, llvm_anyptr_ty, llvm_metadata_ty, + llvm_metadata_ty], + [IntrNoMem]>; + // Stack Protector Intrinsic - The stackprotector intrinsic writes the stack // guard to the correct place on the stack frame. def int_stackprotector : Intrinsic<[], [llvm_ptr_ty, llvm_ptrptr_ty], []>; Index: llvm/include/llvm/IR/Metadata.h =================================================================== --- llvm/include/llvm/IR/Metadata.h +++ llvm/include/llvm/IR/Metadata.h @@ -641,18 +641,25 @@ /// memory access used by the alias-analysis infrastructure. struct AAMDNodes { explicit AAMDNodes() = default; - explicit AAMDNodes(MDNode *T, MDNode *TS, MDNode *S, MDNode *N) - : TBAA(T), TBAAStruct(TS), Scope(S), NoAlias(N) {} + explicit AAMDNodes(MDNode *T, MDNode *TS, MDNode *S, MDNode *N, + Value *NSC = nullptr) + : TBAA(T), TBAAStruct(TS), Scope(S), NoAlias(N), NoAliasProvenance(NSC) {} bool operator==(const AAMDNodes &A) const { return TBAA == A.TBAA && TBAAStruct == A.TBAAStruct && Scope == A.Scope && - NoAlias == A.NoAlias; + NoAlias == A.NoAlias && NoAliasProvenance == A.NoAliasProvenance; } bool operator!=(const AAMDNodes &A) const { return !(*this == A); } explicit operator bool() const { - return TBAA || TBAAStruct || Scope || NoAlias; + return TBAA || TBAAStruct || Scope || NoAlias || NoAliasProvenance; + } + + void clearNoAliasInfo() { + Scope = nullptr; + NoAlias = nullptr; + NoAliasProvenance = nullptr; } /// The tag for type-based alias analysis. @@ -667,6 +674,9 @@ /// The tag specifying the noalias scope. MDNode *NoAlias = nullptr; + /// The NoAlias Provenance pointer path + Value *NoAliasProvenance = nullptr; + /// Given two sets of AAMDNodes that apply to the same pointer, /// give the best AAMDNodes that are compatible with both (i.e. a set of /// nodes whose allowable aliasing conclusions are a subset of those @@ -678,6 +688,9 @@ Result.TBAAStruct = Other.TBAAStruct == TBAAStruct ? TBAAStruct : nullptr; Result.Scope = Other.Scope == Scope ? Scope : nullptr; Result.NoAlias = Other.NoAlias == NoAlias ? NoAlias : nullptr; + Result.NoAliasProvenance = Other.NoAliasProvenance == NoAliasProvenance + ? NoAliasProvenance + : nullptr; return Result; } }; @@ -699,7 +712,8 @@ return DenseMapInfo::getHashValue(Val.TBAA) ^ DenseMapInfo::getHashValue(Val.TBAAStruct) ^ DenseMapInfo::getHashValue(Val.Scope) ^ - DenseMapInfo::getHashValue(Val.NoAlias); + DenseMapInfo::getHashValue(Val.NoAlias) ^ + DenseMapInfo::getHashValue(Val.NoAliasProvenance); } static bool isEqual(const AAMDNodes &LHS, const AAMDNodes &RHS) { @@ -1430,6 +1444,32 @@ // Create wrappers for C Binding types (see CBindingWrapping.h). DEFINE_ISA_CONVERSION_FUNCTIONS(NamedMDNode, LLVMNamedMDNodeRef) +/// This is a simple wrapper around an MDNode which provides a higher-level +/// interface by hiding the details of how alias analysis information is encoded +/// in its operands. +class AliasScopeNode { + const MDNode *Node = nullptr; + +public: + AliasScopeNode() = default; + explicit AliasScopeNode(const MDNode *N) : Node(N) {} + + /// Get the MDNode for this AliasScopeNode. + const MDNode *getNode() const { return Node; } + + /// Get the MDNode for this AliasScopeNode's domain. + const MDNode *getDomain() const { + if (Node->getNumOperands() < 2) + return nullptr; + return dyn_cast_or_null(Node->getOperand(1)); + } + StringRef getName() const { + if (Node->getNumOperands() > 2) + if (MDString *N = dyn_cast_or_null(Node->getOperand(2))) + return N->getString(); + return StringRef(); + } +}; } // end namespace llvm #endif // LLVM_IR_METADATA_H Index: llvm/include/llvm/IR/User.h =================================================================== --- llvm/include/llvm/IR/User.h +++ llvm/include/llvm/IR/User.h @@ -167,12 +167,12 @@ } Value *getOperand(unsigned i) const { - assert(i < NumUserOperands && "getOperand() out of range!"); + assert(i < getNumOperands() && "getOperand() out of range!"); return getOperandList()[i]; } void setOperand(unsigned i, Value *Val) { - assert(i < NumUserOperands && "setOperand() out of range!"); + assert(i < getNumOperands() && "setOperand() out of range!"); assert((!isa((const Value*)this) || isa((const Value*)this)) && "Cannot mutate a constant with setOperand!"); @@ -180,15 +180,17 @@ } const Use &getOperandUse(unsigned i) const { - assert(i < NumUserOperands && "getOperandUse() out of range!"); + assert(i < getNumOperands() && "getOperandUse() out of range!"); return getOperandList()[i]; } Use &getOperandUse(unsigned i) { - assert(i < NumUserOperands && "getOperandUse() out of range!"); + assert(i < getNumOperands() && "getOperandUse() out of range!"); return getOperandList()[i]; } - unsigned getNumOperands() const { return NumUserOperands; } + unsigned getNumOperands() const { + return NumUserOperands - NumUserOperandsDelta; + } /// Returns the descriptor co-allocated with this User instance. ArrayRef getDescriptor() const; @@ -209,6 +211,24 @@ NumUserOperands = NumOps; } + /// FIXME: As that the number of operands is used to find the start of + /// the allocated memory in operator delete, we need to always think we have + /// 3 operand before delete. + void setStoreInstNumOperands(unsigned NumOps) { + assert((2 <= NumOps) && (NumOps <= 3) && + "StoreInst can only have 2 or 3 operands"); + NumUserOperandsDelta = 3 - NumOps; + } + + /// FIXME: As that the number of operands is used to find the start of + /// the allocated memory in operator delete, we need to always think we have + /// 2 operand before delete. + void setLoadInstNumOperands(unsigned NumOps) { + assert((1 <= NumOps) && (NumOps <= 2) && + "LoadInst can only have 1 or 2 operands"); + NumUserOperandsDelta = 2 - NumOps; + } + /// Subclasses with hung off uses need to manage the operand count /// themselves. In these instances, the operand count isn't used to find the /// OperandList, so there's no issue in having the operand count change. @@ -234,10 +254,10 @@ op_iterator op_begin() { return getOperandList(); } const_op_iterator op_begin() const { return getOperandList(); } op_iterator op_end() { - return getOperandList() + NumUserOperands; + return getOperandList() + NumUserOperands - NumUserOperandsDelta; } const_op_iterator op_end() const { - return getOperandList() + NumUserOperands; + return getOperandList() + NumUserOperands - NumUserOperandsDelta; } op_range operands() { return op_range(op_begin(), op_end()); Index: llvm/include/llvm/IR/Value.h =================================================================== --- llvm/include/llvm/IR/Value.h +++ llvm/include/llvm/IR/Value.h @@ -110,7 +110,7 @@ /// /// Note, this should *NOT* be used directly by any class other than User. /// User uses this value to find the Use list. - enum : unsigned { NumUserOperandsBits = 28 }; + enum : unsigned { NumUserOperandsBits = 27 }; unsigned NumUserOperands : NumUserOperandsBits; // Use the same type as the bitfield above so that MSVC will pack them. @@ -118,6 +118,7 @@ unsigned HasName : 1; unsigned HasHungOffUses : 1; unsigned HasDescriptor : 1; + unsigned NumUserOperandsDelta : 1; private: template // UseT == 'Use' or 'const Use' @@ -318,6 +319,13 @@ /// values or constant users. void replaceUsesOutsideBlock(Value *V, BasicBlock *BB); + /// replaceUsesInsideBlock - Go through the uses list for this definition and + /// make each use point to "V" instead of "this" when the use is inside the + /// block. + /// Unlike replaceAllUsesWith this function does not support basic block + /// values or constant users. + void replaceUsesInsideBlock(Value *V, BasicBlock *BB); + //---------------------------------------------------------------------- // Methods for handling the chain of uses of this Value. // @@ -582,6 +590,18 @@ ->stripPointerCastsAndInvariantGroups()); } + /// Strip off pointer casts, all-zero GEPs, aliases, invariant group + /// info and noalias intrinsics. + /// + /// Returns the original uncasted value. If this is called on a non-pointer + /// value, it returns 'this'. + const Value *stripPointerCastsAndInvariantGroupsAndNoAliasIntr() const; + Value *stripPointerCastsAndInvariantGroupsAndNoAliasIntr() { + return const_cast( + static_cast(this) + ->stripPointerCastsAndInvariantGroupsAndNoAliasIntr()); + } + /// Strip off pointer casts and all-constant inbounds GEPs. /// /// Returns the original pointer value. If this is called on a non-pointer Index: llvm/include/llvm/InitializePasses.h =================================================================== --- llvm/include/llvm/InitializePasses.h +++ llvm/include/llvm/InitializePasses.h @@ -111,6 +111,7 @@ void initializeCallSiteSplittingLegacyPassPass(PassRegistry&); void initializeCalledValuePropagationLegacyPassPass(PassRegistry &); void initializeCodeGenPreparePass(PassRegistry&); +void initializeConnectNoAliasDeclLegacyPassPass(PassRegistry &); void initializeConstantHoistingLegacyPassPass(PassRegistry&); void initializeConstantMergeLegacyPassPass(PassRegistry&); void initializeConstantPropagationPass(PassRegistry&); @@ -353,6 +354,7 @@ void initializeProcessImplicitDefsPass(PassRegistry&); void initializeProfileSummaryInfoWrapperPassPass(PassRegistry&); void initializePromoteLegacyPassPass(PassRegistry&); +void initializePropagateAndConvertNoAliasLegacyPassPass(PassRegistry &); void initializePruneEHPass(PassRegistry&); void initializeRABasicPass(PassRegistry&); void initializeRAGreedyPass(PassRegistry&); Index: llvm/include/llvm/LinkAllPasses.h =================================================================== --- llvm/include/llvm/LinkAllPasses.h +++ llvm/include/llvm/LinkAllPasses.h @@ -50,6 +50,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Scalar/InstSimplifyPass.h" +#include "llvm/Transforms/Scalar/PropagateAndConvertNoAlias.h" #include "llvm/Transforms/Scalar/Scalarizer.h" #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/SymbolRewriter.h" @@ -219,6 +220,7 @@ (void) llvm::createSeparateConstOffsetFromGEPPass(); (void) llvm::createSpeculativeExecutionPass(); (void) llvm::createSpeculativeExecutionIfHasBranchDivergencePass(); + (void) llvm::createPropagateAndConvertNoAliasPass(); (void) llvm::createRewriteSymbolsPass(); (void) llvm::createStraightLineStrengthReducePass(); (void) llvm::createMemDerefPrinter(); Index: llvm/include/llvm/Transforms/Scalar.h =================================================================== --- llvm/include/llvm/Transforms/Scalar.h +++ llvm/include/llvm/Transforms/Scalar.h @@ -29,6 +29,12 @@ // FunctionPass *createConstantPropagationPass(); +//===----------------------------------------------------------------------===// +// +// ConnectNoAliasDecl - Connects llvm.noalias.XX intrinsics to llvm.noalias.decl +// +FunctionPass *createConnectNoAliasDeclPass(); + //===----------------------------------------------------------------------===// // // AlignmentFromAssumptions - Use assume intrinsics to set load/store @@ -433,6 +439,10 @@ // TargetTransformInfo::hasBranchDivergence() is true. FunctionPass *createSpeculativeExecutionIfHasBranchDivergencePass(); +// PropagateAndConvertNoAlias: move noalias intrinsics into a provenance of +// load/store instructions +FunctionPass *createPropagateAndConvertNoAliasPass(); + //===----------------------------------------------------------------------===// // // StraightLineStrengthReduce - This pass strength-reduces some certain Index: llvm/include/llvm/Transforms/Scalar/ConnectNoAliasDecl.h =================================================================== --- /dev/null +++ llvm/include/llvm/Transforms/Scalar/ConnectNoAliasDecl.h @@ -0,0 +1,34 @@ +//===- ConnectNoAliasDecl.h -------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This pass connects provenance.noalias intrinsics to the corresponding +/// llvm.noalias.decl, based on the alloca of the pointer. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_CONNECTNOALIASDECL_H +#define LLVM_TRANSFORMS_SCALAR_CONNECTNOALIASDECL_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { +class DominatorTree; + +class ConnectNoAliasDeclPass : public PassInfoMixin { +public: + ConnectNoAliasDeclPass(); + + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + // Glue for old PM + bool runImpl(Function &F); +}; +} // namespace llvm + +#endif // LLVM_TRANSFORMS_SCALAR_CONNECTNOALIASDECL_H Index: llvm/include/llvm/Transforms/Scalar/PropagateAndConvertNoAlias.h =================================================================== --- /dev/null +++ llvm/include/llvm/Transforms/Scalar/PropagateAndConvertNoAlias.h @@ -0,0 +1,39 @@ +//===- PropagateAndConvertNoAlias.h -----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This pass moves dependencies on llvm.noalias onto the ptr_provenance. +/// It also introduces and propagates provenance.noalias and noalias.arg.guard +/// intrinsics. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_PROPAGATEANDCONVERTNOALIAS_H +#define LLVM_TRANSFORMS_SCALAR_PROPAGATEANDCONVERTNOALIAS_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { +class DominatorTree; + +class PropagateAndConvertNoAliasPass + : public PassInfoMixin { +public: + PropagateAndConvertNoAliasPass(); + + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + // Glue for old PM + bool runImpl(Function &F, llvm::DominatorTree &DT); + +private: + bool doit(Function &F, llvm::DominatorTree &DT); +}; +} // namespace llvm + +#endif // LLVM_TRANSFORMS_SCALAR_PROPAGATEANDCONVERTNOALIAS_H Index: llvm/include/llvm/Transforms/Utils/Cloning.h =================================================================== --- llvm/include/llvm/Transforms/Utils/Cloning.h +++ llvm/include/llvm/Transforms/Utils/Cloning.h @@ -268,6 +268,11 @@ Function *Callee, int64_t entryDelta, const ValueMap *VMap = nullptr); +/// Connects noalias, provenance.noalias, noalias.copy.guard intrinsics to the +/// corresponding llvm.noalias.decl, based on the alloca of the underlying +/// p.addr. +/// \returns true when the function was modified. +bool propagateAndConnectNoAliasDecl(Function *F); } // end namespace llvm #endif // LLVM_TRANSFORMS_UTILS_CLONING_H Index: llvm/include/llvm/Transforms/Utils/NoAliasUtils.h =================================================================== --- /dev/null +++ llvm/include/llvm/Transforms/Utils/NoAliasUtils.h @@ -0,0 +1,78 @@ +//===- llvm/Transforms/Utils/NoAliasUtils.h - NoAlias utilities -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines utilities for noalias metadata and intrinsics. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_NOALIASUTILS_H +#define LLVM_TRANSFORMS_UTILS_NOALIASUTILS_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" + +namespace llvm { +class Function; + +/// Connect llvm.noalias.decl to noalias/provenance.noalias intrinsics that are +/// associated with the unknown function scope and based on the same alloca. +/// At the same time, propagate the p.addr, p.objId and p.scope. +bool propagateAndConnectNoAliasDecl(Function *F); + +/// Find back the 'llvm.noalias.decl' intrinsics in the specified basic blocks +/// and extract their scope. This are candidates for duplication when cloning. +template +void identifyNoAliasScopesToClone(ArrayRef BBs, + C &out_NoAliasDeclScopes) { + for (auto BB : BBs) { + for (Instruction &I : *BB) { + if (auto II = dyn_cast(&I)) { + if (II->getIntrinsicID() == Intrinsic::noalias_decl) { + out_NoAliasDeclScopes.push_back(cast( + II->getOperand(Intrinsic::NoAliasDeclScopeArg))); + } + } + } + } +} + +/// Duplicate the specified list of noalias decl scopes. +/// The 'Ext' string is added as an extension to the name. +/// Afterwards, the out_ClonedMVScopes contains a mapping of the original MV +/// onto the cloned version. +/// The out_ClonedScopes contains the mapping of the original scope MDNode +/// onto the cloned scope. +void cloneNoAliasScopes( + ArrayRef NoAliasDeclScopes, + DenseMap &out_ClonedScopes, + DenseMap &out_ClonedMVScopes, + StringRef Ext, LLVMContext &Context); + +/// Adapt the metadata for the specified instruction according to the +/// provided mapping. This is normally used after cloning an instruction, when +/// some noalias scopes needed to be cloned. +void adaptNoAliasScopes( + Instruction *I, DenseMap &ClonedScopes, + DenseMap &ClonedMVScopes, + LLVMContext &Context); + +/// Clone the specified noalias decl scopes. Then adapt all instructions in the +/// NewBlocks basicblocks to the cloned versions. +/// 'Ext' will be added to the duplicate scope names +void cloneAndAdaptNoAliasScopes(ArrayRef NoAliasDeclScopes, + ArrayRef NewBlocks, + LLVMContext &Context, StringRef Ext); +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_UTILS_NOALIASUTILS_H Index: llvm/lib/Analysis/AliasSetTracker.cpp =================================================================== --- llvm/lib/Analysis/AliasSetTracker.cpp +++ llvm/lib/Analysis/AliasSetTracker.cpp @@ -293,6 +293,7 @@ I->second->eraseFromList(); PointerMap.clear(); + ProvenancePointers.clear(); // The alias sets should all be clear now. AliasSets.clear(); @@ -358,6 +359,10 @@ AliasSet::PointerRec &Entry = getEntryFor(Pointer); + if (AAInfo.NoAliasProvenance) + ProvenancePointers.insert( + ASTProvenanceCallbackVH(AAInfo.NoAliasProvenance, this)); + if (AliasAnyAS) { // At this point, the AST is saturated, so we only have one active alias // set. That means we already know which alias set we want to return, and @@ -733,6 +738,65 @@ return *this = ASTCallbackVH(V, AST); } +//===----------------------------------------------------------------------===// +// ASTProvenanceCallbackVH Class Implementation +//===----------------------------------------------------------------------===// + +void AliasSetTracker::ASTProvenanceCallbackVH::deleted() { + assert(AST && "ASTCallbackVH called with a null AliasSetTracker!"); + AST->deleteProvenanceValue(getValPtr()); + // this now dangles! +} + +void AliasSetTracker::ASTProvenanceCallbackVH::allUsesReplacedWith(Value *V) { + AST->copyProvenanceValue(getValPtr(), V); +} + +AliasSetTracker::ASTProvenanceCallbackVH::ASTProvenanceCallbackVH( + Value *V, AliasSetTracker *ast) + : CallbackVH(V), AST(ast) {} + +AliasSetTracker::ASTProvenanceCallbackVH & +AliasSetTracker::ASTProvenanceCallbackVH::operator=(Value *V) { + return *this = ASTProvenanceCallbackVH(V, AST); +} + +void AliasSetTracker::deleteProvenanceValue(Value *PtrVal) { + // FIXME: slow algorithms ahead + ProvenanceSetType::iterator I = ProvenancePointers.find_as(PtrVal); + if (I == ProvenancePointers.end()) + return; // nope + + // iterate over all PointerRecords to update the AAMDNodes that contain this + // pointer + for (auto &AS : *this) { + for (auto &PR : AS) { + PR.updateNoAliasProvenanceIfMatching(PtrVal, nullptr); + } + } + + ProvenancePointers.erase(I); +} + +void AliasSetTracker::copyProvenanceValue(Value *From, Value *To) { + // FIXME: slow algorithms ahead + ProvenanceSetType::iterator I = ProvenancePointers.find_as(From); + if (I == ProvenancePointers.end()) + return; // nope + + // iterate over all PointerRecords to update the AAMDNodes that contain this + // pointer + for (auto &AS : *this) { + for (auto &PR : AS) { + PR.updateNoAliasProvenanceIfMatching(From, To); + } + } + + // Update the set + ProvenancePointers.erase(I); + ProvenancePointers.insert(ASTProvenanceCallbackVH(To, this)); +} + //===----------------------------------------------------------------------===// // AliasSetPrinter Pass //===----------------------------------------------------------------------===// Index: llvm/lib/Analysis/BasicAliasAnalysis.cpp =================================================================== --- llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -856,10 +856,15 @@ if (CacheIt != AAQI.AliasCache.end()) return CacheIt->second; + ++RecurseLevel; AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.AATags, LocB.Ptr, LocB.Size, LocB.AATags, AAQI); - VisitedPhiBBs.clear(); + // protect cache against recursive calls from ScopedNoAliasAA: only clean up + // at the top level + if (--RecurseLevel == 0) { + VisitedPhiBBs.clear(); + } return Alias; } @@ -1751,14 +1756,27 @@ LocationSize V2Size, AAMDNodes V2AAInfo, AAQueryInfo &AAQI, const Value *O1, const Value *O2) { + auto hasValidNoAliasProvenance = [](AAMDNodes &AA) { + return AA.NoAliasProvenance && !isa(AA.NoAliasProvenance); + }; + // If either of the memory references is empty, it doesn't matter what the // pointer values are. if (V1Size.isZero() || V2Size.isZero()) return NoAlias; // Strip off any casts if they exist. - V1 = V1->stripPointerCastsAndInvariantGroups(); - V2 = V2->stripPointerCastsAndInvariantGroups(); + if (hasValidNoAliasProvenance(V1AAInfo) || + hasValidNoAliasProvenance(V2AAInfo)) { + V1 = V1->stripPointerCastsAndInvariantGroups(); + V2 = V2->stripPointerCastsAndInvariantGroups(); + } else { + // No noalias info - look through noalias intrinsics in a safe way + V1 = V1->stripPointerCastsAndInvariantGroupsAndNoAliasIntr(); + V2 = V2->stripPointerCastsAndInvariantGroupsAndNoAliasIntr(); + V1AAInfo.clearNoAliasInfo(); + V2AAInfo.clearNoAliasInfo(); + } // If V1 or V2 is undef, the result is NoAlias because we can always pick a // value for undef that aliases nothing in the program. Index: llvm/lib/Analysis/CaptureTracking.cpp =================================================================== --- llvm/lib/Analysis/CaptureTracking.cpp +++ llvm/lib/Analysis/CaptureTracking.cpp @@ -25,6 +25,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/Support/CommandLine.h" using namespace llvm; @@ -262,6 +263,15 @@ switch (I->getOpcode()) { case Instruction::Call: case Instruction::Invoke: { + if (auto *II = dyn_cast(I)) + if (II->getIntrinsicID() == Intrinsic::noalias || + II->getIntrinsicID() == Intrinsic::provenance_noalias || + II->getIntrinsicID() == Intrinsic::noalias_arg_guard || + II->getIntrinsicID() == Intrinsic::noalias_copy_guard) { + AddUses(I); + break; + } + auto *Call = cast(I); // Not captured if the callee is readonly, doesn't return a copy through // its return value and doesn't unwind (a readonly function can leak bits Index: llvm/lib/Analysis/InstructionSimplify.cpp =================================================================== --- llvm/lib/Analysis/InstructionSimplify.cpp +++ llvm/lib/Analysis/InstructionSimplify.cpp @@ -35,6 +35,7 @@ #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/ValueHandle.h" @@ -5391,6 +5392,13 @@ // TODO: maxnum(nnan ninf x, -flt_max) -> x break; } + case Intrinsic::noalias_arg_guard: { + // Only follow the plain path if undefined. Do not propagate null, that + // would incorrectly omit noalias information. + if (isa(Op0)) + return Op0; + break; + } default: break; } @@ -5398,8 +5406,30 @@ return nullptr; } -static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) { +static Value *simplifyProvenanceNoAlias(const Value *V) { + const IntrinsicInst *II = cast(V); + assert(II->getIntrinsicID() == Intrinsic::provenance_noalias); + + // Only follow the plain path if undefined. Do not propagate null, that + // would incorrectly omit noalias information. + Value *Op0 = II->getOperand(0); + if (isa(Op0)) { + return Op0; + } + // Check for compatibility: provenance.noalias(provenance.noalias) -> + // provenance.noalias + if (llvm::IntrinsicInst *DepII = dyn_cast(Op0)) { + if (DepII->getIntrinsicID() == Intrinsic::provenance_noalias) { + if (llvm::areProvenanceNoAliasCompatible(DepII, II)) { + return DepII; + } + } + } + return nullptr; +} + +static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) { // Intrinsics with no operands have some kind of side effect. Don't simplify. unsigned NumOperands = Call->getNumArgOperands(); if (!NumOperands) @@ -5456,6 +5486,18 @@ return V; return nullptr; } + case Intrinsic::noalias: + case Intrinsic::noalias_copy_guard: { + // Only follow the plain path if undefined. Do not propagate null, that + // would incorrectly omit noalias information. + if (auto Op0 = dyn_cast(Call->getArgOperand(0))) { + return Op0; + } + return nullptr; + } + case Intrinsic::provenance_noalias: { + return simplifyProvenanceNoAlias(Call); + } default: return nullptr; } Index: llvm/lib/Analysis/MemorySSA.cpp =================================================================== --- llvm/lib/Analysis/MemorySSA.cpp +++ llvm/lib/Analysis/MemorySSA.cpp @@ -285,6 +285,11 @@ case Intrinsic::invariant_start: case Intrinsic::invariant_end: case Intrinsic::assume: + case Intrinsic::noalias_decl: + case Intrinsic::noalias: + case Intrinsic::provenance_noalias: + case Intrinsic::noalias_arg_guard: + case Intrinsic::noalias_copy_guard: return {false, NoAlias}; case Intrinsic::dbg_addr: case Intrinsic::dbg_declare: @@ -1739,9 +1744,19 @@ // dependencies here. // FIXME: Replace this special casing with a more accurate modelling of // assume's control dependency. - if (IntrinsicInst *II = dyn_cast(I)) - if (II->getIntrinsicID() == Intrinsic::assume) + if (IntrinsicInst *II = dyn_cast(I)) { + switch (II->getIntrinsicID()) { + default: + break; + case Intrinsic::assume: + case Intrinsic::noalias_decl: + case Intrinsic::noalias: + case Intrinsic::provenance_noalias: + case Intrinsic::noalias_arg_guard: + case Intrinsic::noalias_copy_guard: return nullptr; + } + } // Using a nonstandard AA pipelines might leave us with unexpected modref // results for I, so add a check to not model instructions that may not read Index: llvm/lib/Analysis/ScopedNoAliasAA.cpp =================================================================== --- llvm/lib/Analysis/ScopedNoAliasAA.cpp +++ llvm/lib/Analysis/ScopedNoAliasAA.cpp @@ -33,47 +33,46 @@ #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" using namespace llvm; +#define DEBUG_TYPE "scoped-noalias" + // A handy option for disabling scoped no-alias functionality. The same effect // can also be achieved by stripping the associated metadata tags from IR, but // this option is sometimes more convenient. -static cl::opt EnableScopedNoAlias("enable-scoped-noalias", - cl::init(true), cl::Hidden); - -namespace { - -/// This is a simple wrapper around an MDNode which provides a higher-level -/// interface by hiding the details of how alias analysis information is encoded -/// in its operands. -class AliasScopeNode { - const MDNode *Node = nullptr; - -public: - AliasScopeNode() = default; - explicit AliasScopeNode(const MDNode *N) : Node(N) {} - - /// Get the MDNode for this AliasScopeNode. - const MDNode *getNode() const { return Node; } - - /// Get the MDNode for this AliasScopeNode's domain. - const MDNode *getDomain() const { - if (Node->getNumOperands() < 2) - return nullptr; - return dyn_cast_or_null(Node->getOperand(1)); - } -}; - -} // end anonymous namespace +static cl::opt + EnableScopedNoAlias("enable-scoped-noalias", cl::init(true), cl::Hidden, + cl::desc("Enable use of scoped-noalias metadata")); + +static cl::opt + MaxNoAliasDepth("scoped-noalias-max-depth", cl::init(12), cl::Hidden, + cl::desc("Maximum depth for noalias intrinsic search")); + +// A 'Undef'as 'NoAliasProvenance' means 'no known extra information' about +// the pointer provenance. In that case, we need to follow the real pointer +// as it might contain extrainformation provided through llvm.noalias.arg.guard. +// A absent (nullptr) 'NoAliasProvenance', indicates that this access does not +// contain noalias provenance info. +static const Value *selectMemoryProvenance(const MemoryLocation &Loc) { + if (!Loc.AATags.NoAliasProvenance || + isa(Loc.AATags.NoAliasProvenance)) + return Loc.Ptr; + return Loc.AATags.NoAliasProvenance; // can be 'nullptr' +} AliasResult ScopedNoAliasAAResult::alias(const MemoryLocation &LocA, const MemoryLocation &LocB, @@ -92,6 +91,15 @@ if (!mayAliasInScopes(BScopes, ANoAlias)) return NoAlias; + LLVM_DEBUG(llvm::dbgs() << "ScopedNoAliasAAResult::alias\n"); + if (noAliasByIntrinsic(ANoAlias, selectMemoryProvenance(LocA), BNoAlias, + selectMemoryProvenance(LocB), nullptr, nullptr, AAQI)) + return NoAlias; + + if (noAliasByIntrinsic(BNoAlias, selectMemoryProvenance(LocB), ANoAlias, + selectMemoryProvenance(LocA), nullptr, nullptr, AAQI)) + return NoAlias; + // If they may alias, chain to the next AliasAnalysis. return AAResultBase::alias(LocA, LocB, AAQI); } @@ -102,12 +110,21 @@ if (!EnableScopedNoAlias) return AAResultBase::getModRefInfo(Call, Loc, AAQI); - if (!mayAliasInScopes(Loc.AATags.Scope, - Call->getMetadata(LLVMContext::MD_noalias))) + const MDNode *CSNoAlias = Call->getMetadata(LLVMContext::MD_noalias); + if (!mayAliasInScopes(Loc.AATags.Scope, CSNoAlias)) + return ModRefInfo::NoModRef; + + const MDNode *CSScopes = Call->getMetadata(LLVMContext::MD_alias_scope); + if (!mayAliasInScopes(CSScopes, Loc.AATags.NoAlias)) + return ModRefInfo::NoModRef; + + LLVM_DEBUG(llvm::dbgs() << "ScopedNoAliasAAResult::getModRefInfo - 1\n"); + if (noAliasByIntrinsic(Loc.AATags.NoAlias, selectMemoryProvenance(Loc), + CSNoAlias, nullptr, nullptr, Call, AAQI)) return ModRefInfo::NoModRef; - if (!mayAliasInScopes(Call->getMetadata(LLVMContext::MD_alias_scope), - Loc.AATags.NoAlias)) + if (noAliasByIntrinsic(CSNoAlias, nullptr, Loc.AATags.NoAlias, + selectMemoryProvenance(Loc), Call, nullptr, AAQI)) return ModRefInfo::NoModRef; return AAResultBase::getModRefInfo(Call, Loc, AAQI); @@ -119,12 +136,22 @@ if (!EnableScopedNoAlias) return AAResultBase::getModRefInfo(Call1, Call2, AAQI); - if (!mayAliasInScopes(Call1->getMetadata(LLVMContext::MD_alias_scope), - Call2->getMetadata(LLVMContext::MD_noalias))) + const MDNode *CS1Scopes = Call1->getMetadata(LLVMContext::MD_alias_scope); + const MDNode *CS2Scopes = Call2->getMetadata(LLVMContext::MD_alias_scope); + const MDNode *CS1NoAlias = Call1->getMetadata(LLVMContext::MD_noalias); + const MDNode *CS2NoAlias = Call2->getMetadata(LLVMContext::MD_noalias); + if (!mayAliasInScopes(CS1Scopes, Call2->getMetadata(LLVMContext::MD_noalias))) return ModRefInfo::NoModRef; - if (!mayAliasInScopes(Call2->getMetadata(LLVMContext::MD_alias_scope), - Call1->getMetadata(LLVMContext::MD_noalias))) + if (!mayAliasInScopes(CS2Scopes, Call1->getMetadata(LLVMContext::MD_noalias))) + return ModRefInfo::NoModRef; + + if (noAliasByIntrinsic(CS1NoAlias, nullptr, CS2NoAlias, nullptr, Call1, Call2, + AAQI)) + return ModRefInfo::NoModRef; + + if (noAliasByIntrinsic(CS2NoAlias, nullptr, CS1NoAlias, nullptr, Call2, Call1, + AAQI)) return ModRefInfo::NoModRef; return AAResultBase::getModRefInfo(Call1, Call2, AAQI); @@ -176,11 +203,488 @@ return true; } +bool ScopedNoAliasAAResult::findCompatibleNoAlias( + const Value *P, const MDNode *ANoAlias, const MDNode *BNoAlias, + const DataLayout &DL, SmallPtrSetImpl &Visited, + SmallVectorImpl &CompatibleSet, int Depth) { + // When a pointer is derived from multiple noalias calls, there are two + // potential reasons: + // 1. The path of derivation is uncertain (because of a select, PHI, etc.). + // 2. Some noalias calls are derived from other noalias calls. + // Logically, we need to treat (1) as an "and" and (2) as an "or" when + // checking for scope compatibility. If we don't know from which noalias call + // a pointer is derived, then we need to require compatibility with all of + // them. If we're derived from a noalias call that is derived from another + // noalias call, then we need the ability to effectively ignore the inner one + // in favor of the outer one (thus, we only need compatibility with one or + // the other). + // + // Scope compatibility means that, as with the noalias metadata, within each + // domain, the set of noalias intrinsic scopes is a subset of the noalias + // scopes. + // + // Given this, we check compatibility of the relevant sets of noalias calls + // from which LocA.Ptr might derive with both LocA.AATags.NoAlias and + // LocB.AATags.NoAlias, and LocB.Ptr does not derive from any of the noalias + // calls in some set, then we can conclude NoAlias. + // + // So if we have: + // noalias1 noalias3 + // | | + // noalias2 noalias4 + // | | + // \ / + // \ / + // \ / + // \ / + // select + // | + // noalias5 + // | + // noalias6 + // | + // PtrA + // + // - If PtrA is compatible with noalias6, and PtrB is also compatible, + // but does not derive from noalias6, then NoAlias. + // - If PtrA is compatible with noalias5, and PtrB is also compatible, + // but does not derive from noalias5, then NoAlias. + // - If PtrA is compatible with noalias2 and noalias4, and PtrB is also + // compatible, but does not derive from either, then NoAlias. + // - If PtrA is compatible with noalias2 and noalias3, and PtrB is also + // compatible, but does not derive from either, then NoAlias. + // - If PtrA is compatible with noalias1 and noalias4, and PtrB is also + // compatible, but does not derive from either, then NoAlias. + // - If PtrA is compatible with noalias1 and noalias3, and PtrB is also + // compatible, but does not derive from either, then NoAlias. + // + // We don't need, or want, to explicitly build N! sets to check for scope + // compatibility. Instead, recurse through the tree of underlying objects. + + SmallVector NoAliasCalls; + P = GetUnderlyingObject(P, DL, 0, &NoAliasCalls); + + // If we've already visited this underlying value (likely because this is a + // PHI that depends on itself, directly or indirectly), we must not have + // returned false the first time, so don't do so this time either. + if (!Visited.insert(P).second) + return true; + + auto getNoAliasScopeMDNode = [](IntrinsicInst *II) { + return dyn_cast( + cast( + II->getOperand( + II->getIntrinsicID() == Intrinsic::provenance_noalias ? + Intrinsic::ProvenanceNoAliasScopeArg : + Intrinsic::NoAliasScopeArg + ))->getMetadata()); + }; + + // Our pointer is derived from P, with NoAliasCalls along the way. + // Compatibility with any of them is fine. + auto NAI = find_if(NoAliasCalls, [&](Instruction *A) { + return !mayAliasInScopes(getNoAliasScopeMDNode(cast(A)), + ANoAlias) && + !mayAliasInScopes(getNoAliasScopeMDNode(cast(A)), + BNoAlias); + }); + if (NAI != NoAliasCalls.end()) { + CompatibleSet.push_back(*NAI); + return true; + } + + // We've not found a compatible noalias call, but we might be able to keep + // looking. If this underlying object is really a PHI or a select, we can + // check the incoming values. They all need to be compatible, and if so, we + // can take the union of all of the compatible noalias calls as the set to + // return for further validation. + SmallVector Children; + if (const auto *SI = dyn_cast(P)) { + Children.push_back(SI->getTrueValue()); + Children.push_back(SI->getFalseValue()); + } else if (const auto *PN = dyn_cast(P)) { + for (Value *IncommingValue : PN->incoming_values()) + Children.push_back(IncommingValue); + } + + if (Children.empty() || Depth == MaxNoAliasDepth) + return false; + + SmallPtrSet ChildVisited; + SmallVector ChildCompatSet; + for (auto &C : Children) { + ChildVisited.clear(); + ChildVisited.insert(Visited.begin(), Visited.end()); + ChildVisited.insert(P); + + ChildCompatSet.clear(); + if (!findCompatibleNoAlias(C, ANoAlias, BNoAlias, DL, ChildVisited, + ChildCompatSet, Depth + 1)) + return false; + + CompatibleSet.insert(CompatibleSet.end(), ChildCompatSet.begin(), + ChildCompatSet.end()); + } + + // All children were compatible, and we've added them to CompatibleSet. + return true; +} + +bool ScopedNoAliasAAResult::noAliasByIntrinsic( + const MDNode *ANoAlias, const Value *APtr, const MDNode *BNoAlias, + const Value *BPtr, const CallBase *CallA, const CallBase *CallB, + AAQueryInfo &AAQI) { + LLVM_DEBUG(llvm::dbgs() << ">ScopedNoAliasAAResult::noAliasByIntrinsic:{" + << (const void *)ANoAlias << "," << (const void *)APtr + << "},{" << (const void *)BNoAlias << "," + << (const void *)BPtr << "}\n"); + if (!ANoAlias || !BNoAlias) + return false; + + if (CallA) { + // We're querying a callsite against something else, where we want to know + // if the callsite (CallA) is derived from some noalias call(s) and the + // other thing is not derived from those noalias call(s). This can be + // determined only if CallA only accesses memory through its arguments. + FunctionModRefBehavior MRB = getModRefBehavior(CallA); + if (MRB != FMRB_OnlyAccessesArgumentPointees && + MRB != FMRB_OnlyReadsArgumentPointees) + return false; + + LLVM_DEBUG(dbgs() << "SNA: CSA: " << *CallA << "\n"); + // Since the memory-access behavior of CallA is determined only by its + // arguments, we can answer this query in the affirmative if we can prove a + // lack of aliasing for all pointer arguments. + for (Value *Arg : CallA->args()) { + if (!Arg->getType()->isPointerTy()) + continue; + + if (!noAliasByIntrinsic(ANoAlias, Arg, BNoAlias, BPtr, nullptr, CallB, + AAQI)) { + LLVM_DEBUG(dbgs() << "SNA: CSA: noalias fail for arg: " << *Arg + << "\n"); + return false; + } + } + + return true; + } + + const auto *AInst = dyn_cast(APtr); + if (!AInst || !AInst->getParent()) + return false; + const DataLayout &DL = AInst->getParent()->getModule()->getDataLayout(); + + if (!CallB && !BPtr) + return false; + + LLVM_DEBUG(dbgs() << "SNA: A: " << *APtr << "\n"); + LLVM_DEBUG(dbgs() << "SNB: "; if (CallB) dbgs() << "CSB: " << *CallB; + else if (BPtr) dbgs() << "B: " << *BPtr; + else dbgs() << "B: nullptr"; dbgs() << "\n"); + + SmallPtrSet Visited; + SmallVector CompatibleSet; + if (!findCompatibleNoAlias(APtr, ANoAlias, BNoAlias, DL, Visited, + CompatibleSet)) + return false; + + assert(!CompatibleSet.empty() && + "Fould an empty set of compatible intrinsics?"); + + LLVM_DEBUG(dbgs() << "SNA: Found a compatible set!\n"); +#ifndef NDEBUG + for (auto &C : CompatibleSet) + LLVM_DEBUG(dbgs() << "\t" << *C << "\n"); + LLVM_DEBUG(dbgs() << "\n"); +#endif + + // We have a set of compatible noalias calls (compatible with the scopes from + // both LocA and LocB) from which LocA.Ptr potentially derives. We now need + // to make sure that LocB.Ptr does not derive from any in that set. For + // correctness, there cannot be a depth limit here (if a pointer is derived + // from a noalias call, we must know). + SmallVector BObjs; + SmallVector BNoAliasCalls; + if (CallB) { + for (Value *Arg : CallB->args()) + GetUnderlyingObjects(Arg, BObjs, DL, nullptr, 0, &BNoAliasCalls); + } else { + GetUnderlyingObjects(const_cast(BPtr), BObjs, DL, nullptr, 0, + &BNoAliasCalls); + } + + LLVM_DEBUG(dbgs() << "SNA: B/CSB noalias:\n"); +#ifndef NDEBUG + for (auto &B : BNoAliasCalls) + LLVM_DEBUG(dbgs() << "\t" << *B << "\n"); + LLVM_DEBUG(dbgs() << "\n"); +#endif + + // We need to check now if any compatible llvm.provenance.noalias call is + // potentially using the same 'P' object as one of the 'BNoAliasCalls'. + // If this is true for at least one entry, we must bail out and assume + // 'may_alias' + + { + MDNode *NoAliasUnknownScopeMD = + AInst->getParent()->getParent()->getMetadata("noalias"); + + const struct { + unsigned IdentifyPObjIdArg; + unsigned ScopeArg; + unsigned IdentifyPArg; + } ProvNoAlias[2] = { + { + Intrinsic::NoAliasIdentifyPObjIdArg, + Intrinsic::NoAliasScopeArg, + Intrinsic::NoAliasIdentifyPArg + }, { + Intrinsic::ProvenanceNoAliasIdentifyPObjIdArg, + Intrinsic::ProvenanceNoAliasScopeArg, + Intrinsic::ProvenanceNoAliasIdentifyPArg + } + }; + for (Instruction *CA : CompatibleSet) { + LLVM_DEBUG(llvm::dbgs() << "- CA:" << *CA << "\n"); + assert(isa(CA) && + (cast(CA)->getIntrinsicID() == + llvm::Intrinsic::provenance_noalias || + cast(CA)->getIntrinsicID() == + llvm::Intrinsic::noalias + )); + const int CA_IsProv = cast(CA)->getIntrinsicID() == + llvm::Intrinsic::provenance_noalias ? 1 : 0; + for (Instruction *CB : BNoAliasCalls) { + LLVM_DEBUG(llvm::dbgs() << "- CB:" << *CB << "\n"); + assert(isa(CB) && + (cast(CB)->getIntrinsicID() == + llvm::Intrinsic::provenance_noalias || + cast(CB)->getIntrinsicID() == + llvm::Intrinsic::noalias + )); + const int CB_IsProv = cast(CB)->getIntrinsicID() == + llvm::Intrinsic::provenance_noalias ? 1 : 0; + + // With the llvm.provenance.noalias version, we have different parts + // that can represent a P: + // - the actual 'identifyP' address (or an offset vs an optimized away + // alloca) + // - the objectId (objectId's currently represent an offset to the + // original alloca of the object) + // - the scope (different scopes = different objects; with the exception + // of the 'unknown scope' an unknown scope can potentially be the same + // as a real variable scope. + // If any of these are different, the P will not alias => *P will also + // not alias + + // Let's start with the fast checks first; + + // Same call ? + if (CA == CB) { + LLVM_DEBUG(llvm::dbgs() << "SNA == SNB\n"); + return false; + } + + // - different objectId ? + { + // check ObjId first: if the obj id's (aka, offset in the object) are + // different, they represent different objects + auto ObjIdA = + cast( + CA->getOperand(ProvNoAlias[CA_IsProv].IdentifyPObjIdArg)) + ->getZExtValue(); + auto ObjIdB = + cast( + CB->getOperand(ProvNoAlias[CB_IsProv].IdentifyPObjIdArg)) + ->getZExtValue(); + if (ObjIdA != ObjIdB) { + LLVM_DEBUG(llvm::dbgs() << "SNA.ObjId != SNB.ObjId\n"); + continue; + } + } + + // Different Scope ? (except for unknown scope) + { + bool isDifferentPByScope = true; + auto *CASnaScope = CA->getOperand(ProvNoAlias[CA_IsProv].ScopeArg); + auto *CBSnaScope = CB->getOperand(ProvNoAlias[CB_IsProv].ScopeArg); + if (CASnaScope == CBSnaScope) { + // compatibility check below will resolve + isDifferentPByScope = false; + } else { + if (NoAliasUnknownScopeMD) { + if ((cast(CASnaScope)->getMetadata() == + NoAliasUnknownScopeMD) || + (cast(CBSnaScope)->getMetadata() == + NoAliasUnknownScopeMD)) { + isDifferentPByScope = false; + } + } + } + if (isDifferentPByScope) { + LLVM_DEBUG(llvm::dbgs() + << "SNA.Scope != SNB.Scope (and not 'unknown scope')\n"); + continue; + } + } + + // Different 'P' ? + { + Value *P_A = CA->getOperand(ProvNoAlias[CA_IsProv].IdentifyPArg); + Value *P_B = CB->getOperand(ProvNoAlias[CA_IsProv].IdentifyPArg); + + if (P_A == P_B) { + LLVM_DEBUG(dbgs() << " SNA.Scope == SNB.Scope, SNA.P == SNB.P\n"); + return false; + } + + Constant *CP_A = dyn_cast(P_A); + if (CP_A) { + Constant *CP_B = dyn_cast(P_B); + if (CP_B) { + CP_B = ConstantExpr::getBitCast(CP_B, CP_A->getType()); + Constant *Cmp = + ConstantExpr::getCompare(CmpInst::ICMP_NE, CP_A, CP_B, true); + if (Cmp && Cmp->isNullValue()) { + LLVM_DEBUG(dbgs() << " SNA.Scope == SNB.Scope, !(SNA.P != " + "SNB.P) as constant\n"); + return false; + } + } + } + // Check if P_A.addr and P_B.addr alias. If they don't, they describe + // different pointers. + LLVM_DEBUG(llvm::dbgs() + << " SNA.P=" << *P_A << ", SNB.P=" << *P_B << "\n"); + AAMDNodes P_A_Metadata; + AAMDNodes P_B_Metadata; + CA->getAAMetadata(P_A_Metadata); + CB->getAAMetadata(P_B_Metadata); + + // The ptr_provenance is not handled in the + // Instruction::getAAMetadata for intrinsics + if (CA_IsProv) { + P_A_Metadata.NoAliasProvenance = CA->getOperand( + Intrinsic::ProvenanceNoAliasIdentifyPProvenanceArg); + } + if (CB_IsProv) { + P_B_Metadata.NoAliasProvenance = CB->getOperand( + Intrinsic::ProvenanceNoAliasIdentifyPProvenanceArg); + } + + // Check with 1 unit + MemoryLocation ML_P_A(P_A, 1ull, P_A_Metadata); + MemoryLocation ML_P_B(P_B, 1ull, P_B_Metadata); + if (getBestAAResults().alias(ML_P_A, ML_P_B, AAQI) != + AliasResult::NoAlias) { + LLVM_DEBUG(llvm::dbgs() << " P ... may alias\n"); + return false; + } + LLVM_DEBUG(llvm::dbgs() << " P is NoAlias\n"); + continue; + } + } + } + } + + // The noalias scope from the compatible intrinsics are really identified by + // their scope argument, and we need to make sure that LocB.Ptr is not only + // not derived from the calls currently in CompatibleSet, but also from any + // other intrinsic with the same scope. We can't just search the list of + // noalias intrinsics in BNoAliasCalls because we care not just about those + // direct dependence, but also dependence through capturing. Metadata + // do not have use lists, but MetadataAsValue objects do (and they are + // uniqued), so we can search their use list. As a result, however, + // correctness demands that the scope list has only one element (so that we + // can find all uses of that scope by noalias intrinsics by looking at the + // use list of the associated scope list). + SmallPtrSet CompatibleSetMembers(CompatibleSet.begin(), + CompatibleSet.end()); + SmallVector CompatibleSetMVs; + for (auto &C : CompatibleSet) { + CompatibleSetMVs.push_back(cast( + C->getOperand( + cast(C)->getIntrinsicID() == Intrinsic::provenance_noalias ? + Intrinsic::ProvenanceNoAliasScopeArg : + Intrinsic::NoAliasScopeArg))); + } + for (auto &MV : CompatibleSetMVs) + for (Use &U : MV->uses()) + if (auto *UI = dyn_cast(U.getUser())) { + // Skip noalias declarations + if (auto *CB = dyn_cast(UI)) + if (CB->getIntrinsicID() == Intrinsic::noalias_decl) + continue; + if (CompatibleSetMembers.insert(UI).second) { + CompatibleSet.push_back(UI); + LLVM_DEBUG(dbgs() << "SNA: Adding to compatible set based on MD use: " + << *UI << "\n"); + } + } + + LLVM_DEBUG(dbgs() << "SNA: B does not derive from the compatible set!\n"); + + // Note: This can be removed when legacy-pass-manager support is removed; + // BasicAA always has a DT available, and only under the hack where this is + // an immutable pass, not a function pass, might we not have one. + LLVM_DEBUG(dbgs() << "SNA: DT is " << (DT ? "available" : "unavailable") + << "\n"); + + // We now know that LocB.Ptr does not derive from any of the noalias calls in + // CompatibleSet directly. We do, however, need to make sure that it cannot + // derive from them by capture. + for (auto &V : BObjs) { + // If the underlying object is not an instruction, then it can't be + // capturing the output value of an instruction (specifically, the noalias + // intrinsic call), and we can ignore it. + auto *I = dyn_cast(V); + if (!I) + continue; + if (isIdentifiedFunctionLocal(I)) + continue; + + LLVM_DEBUG(dbgs() << "SNA: Capture check for B/CSB UO: " << *I << "\n"); + + // If the value from the noalias intrinsic has been captured prior to the + // instruction defining the underlying object, then LocB.Ptr might yet be + // derived from the return value of the noalias intrinsic, and we cannot + // conclude anything about the aliasing. + for (auto &C : CompatibleSet) + if (PointerMayBeCapturedBefore(C, /* ReturnCaptures */ false, + /* StoreCaptures */ false, I, DT, + /* IncludeI */ false, 80)) { + LLVM_DEBUG(dbgs() << "SNA: Pointer " << *C << " might be captured!\n"); + return false; + } + } + + if (CallB) { + FunctionModRefBehavior MRB = getModRefBehavior(CallB); + if (MRB != FMRB_OnlyAccessesArgumentPointees && + MRB != FMRB_OnlyReadsArgumentPointees) { + // If we're querying against a callsite, and it might read from memory + // not based on its arguments, then we need to check whether or not the + // relevant noalias results have been captured prior to the callsite. + for (auto &C : CompatibleSet) + if (PointerMayBeCapturedBefore(C, /* ReturnCaptures */ false, + /* StoreCaptures */ false, CallB, DT)) { + LLVM_DEBUG(dbgs() + << "SNA: CSB: Pointer " << *C << " might be captured!\n"); + return false; + } + } + } + + LLVM_DEBUG(dbgs() << " SNA: noalias!\n"); + return true; +} + AnalysisKey ScopedNoAliasAA::Key; ScopedNoAliasAAResult ScopedNoAliasAA::run(Function &F, FunctionAnalysisManager &AM) { - return ScopedNoAliasAAResult(); + return ScopedNoAliasAAResult(&AM.getResult(F)); } char ScopedNoAliasAAWrapperPass::ID = 0; @@ -197,7 +701,7 @@ } bool ScopedNoAliasAAWrapperPass::doInitialization(Module &M) { - Result.reset(new ScopedNoAliasAAResult()); + Result.reset(new ScopedNoAliasAAResult(nullptr)); return false; } @@ -206,6 +710,13 @@ return false; } +void ScopedNoAliasAAWrapperPass::setDT() { + auto *DTWP = getAnalysisIfAvailable(); + if (DTWP) + Result->setDT(&DTWP->getDomTree()); +} + void ScopedNoAliasAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); + AU.addUsedIfAvailable(); } Index: llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp =================================================================== --- llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -112,6 +112,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/InitializePasses.h" @@ -521,6 +522,21 @@ } void Instruction::getAAMetadata(AAMDNodes &N, bool Merge) const { + auto mergeProvenance = [](AAMDNodes &N, Value *rhs) { + if (rhs && isa(rhs)) + rhs = nullptr; + if (N.NoAliasProvenance && isa(N.NoAliasProvenance)) + N.NoAliasProvenance = nullptr; + + if (N.NoAliasProvenance == rhs) + return; + + // ptr provenance differs - clean NoAlias scope information + N.NoAliasProvenance = nullptr; + N.NoAlias = nullptr; + N.Scope = nullptr; + }; + if (Merge) { N.TBAA = MDNode::getMostGenericTBAA(N.TBAA, getMetadata(LLVMContext::MD_tbaa)); @@ -535,6 +551,20 @@ N.Scope = getMetadata(LLVMContext::MD_alias_scope); N.NoAlias = getMetadata(LLVMContext::MD_noalias); } + + Value *NoAliasProvenance = nullptr; + if (const LoadInst *LI = dyn_cast(this)) { + if (LI->hasNoaliasProvenanceOperand()) + NoAliasProvenance = LI->getNoaliasProvenanceOperand(); + } else if (const StoreInst *SI = dyn_cast(this)) { + if (SI->hasNoaliasProvenanceOperand()) + NoAliasProvenance = SI->getNoaliasProvenanceOperand(); + } + if (Merge) { + mergeProvenance(N, NoAliasProvenance); + } else { + N.NoAliasProvenance = NoAliasProvenance; + } } static const MDNode *createAccessTag(const MDNode *AccessType) { Index: llvm/lib/Analysis/ValueTracking.cpp =================================================================== --- llvm/lib/Analysis/ValueTracking.cpp +++ llvm/lib/Analysis/ValueTracking.cpp @@ -608,6 +608,11 @@ case Intrinsic::invariant_end: case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: + case Intrinsic::noalias_decl: + case Intrinsic::noalias: + case Intrinsic::provenance_noalias: + case Intrinsic::noalias_arg_guard: + case Intrinsic::noalias_copy_guard: case Intrinsic::objectsize: case Intrinsic::ptr_annotation: case Intrinsic::var_annotation: @@ -4096,6 +4101,8 @@ const CallBase *Call, bool MustPreserveNullness) { switch (Call->getIntrinsicID()) { case Intrinsic::launder_invariant_group: + case Intrinsic::noalias_arg_guard: + case Intrinsic::noalias_copy_guard: case Intrinsic::strip_invariant_group: case Intrinsic::aarch64_irg: case Intrinsic::aarch64_tagp: @@ -4135,7 +4142,8 @@ } Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL, - unsigned MaxLookup) { + unsigned MaxLookup, + SmallVectorImpl *NoAlias) { if (!V->getType()->isPointerTy()) return V; for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) { @@ -4158,6 +4166,29 @@ continue; } } else if (auto *Call = dyn_cast(V)) { + if (NoAlias) { + // We are gathering information for ScopedAANoAlias - + // Look through noalias and provenance.noalias intrinsic + if (Call->getIntrinsicID() == Intrinsic::provenance_noalias || + Call->getIntrinsicID() == Intrinsic::noalias) { + NoAlias->push_back(Call); + V = Call->getArgOperand(0); + continue; + } + // Look trough noalias.arg.guard, and follow the ptr_provenance + if (Call->getIntrinsicID() == Intrinsic::noalias_arg_guard) { + V = Call->getArgOperand(1); + continue; + } + } else { + // We are not gathering information for SCopedAANoAlias - + // Look through noalias.arg.guard and follow the pointer path + if (Call->getIntrinsicID() == Intrinsic::noalias_arg_guard) { + V = Call->getArgOperand(0); + continue; + } + } + // CaptureTracking can know about special capturing properties of some // intrinsics like launder.invariant.group, that can't be expressed with // the attributes, but have properties like returning aliasing pointer. @@ -4183,13 +4214,14 @@ void llvm::GetUnderlyingObjects(const Value *V, SmallVectorImpl &Objects, const DataLayout &DL, LoopInfo *LI, - unsigned MaxLookup) { + unsigned MaxLookup, + SmallVectorImpl *NoAlias) { SmallPtrSet Visited; SmallVector Worklist; Worklist.push_back(V); do { const Value *P = Worklist.pop_back_val(); - P = GetUnderlyingObject(P, DL, MaxLookup); + P = GetUnderlyingObject(P, DL, MaxLookup, NoAlias); if (!Visited.insert(P).second) continue; Index: llvm/lib/Analysis/VectorUtils.cpp =================================================================== --- llvm/lib/Analysis/VectorUtils.cpp +++ llvm/lib/Analysis/VectorUtils.cpp @@ -119,7 +119,8 @@ if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start || ID == Intrinsic::lifetime_end || ID == Intrinsic::assume || - ID == Intrinsic::sideeffect) + ID == Intrinsic::sideeffect || ID == Intrinsic::noalias || + ID == Intrinsic::provenance_noalias) return ID; return Intrinsic::not_intrinsic; } @@ -701,7 +702,8 @@ } /// \returns \p I after propagating metadata from \p VL. -Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef VL) { +Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef VL, + bool RemoveNoAlias) { Instruction *I0 = cast(VL[0]); SmallVector, 4> Metadata; I0->getAllMetadataOtherThanDebugLoc(Metadata); @@ -712,6 +714,8 @@ LLVMContext::MD_access_group}) { MDNode *MD = I0->getMetadata(Kind); + if (RemoveNoAlias && (Kind == LLVMContext::MD_noalias)) + MD = nullptr; for (int J = 1, E = VL.size(); MD && J != E; ++J) { const Instruction *IJ = cast(VL[J]); MDNode *IMD = IJ->getMetadata(Kind); @@ -737,7 +741,6 @@ llvm_unreachable("unhandled metadata"); } } - Inst->setMetadata(Kind, MD); } Index: llvm/lib/AsmParser/LLLexer.cpp =================================================================== --- llvm/lib/AsmParser/LLLexer.cpp +++ llvm/lib/AsmParser/LLLexer.cpp @@ -545,6 +545,7 @@ KEYWORD(deplibs); // FIXME: Remove in 4.0. KEYWORD(datalayout); KEYWORD(volatile); + KEYWORD(ptr_provenance); KEYWORD(atomic); KEYWORD(unordered); KEYWORD(monotonic); Index: llvm/lib/AsmParser/LLParser.h =================================================================== --- llvm/lib/AsmParser/LLParser.h +++ llvm/lib/AsmParser/LLParser.h @@ -281,6 +281,9 @@ bool ParseOrdering(AtomicOrdering &Ordering); bool ParseOptionalStackAlignment(unsigned &Alignment); bool ParseOptionalCommaAlign(MaybeAlign &Alignment, bool &AteExtraComma); + bool ParseOptionalCommaNoaliasProvenance(Value *&V, LocTy &Loc, + PerFunctionState &PFS, + bool &AteExtraComma); bool ParseOptionalCommaAddrSpace(unsigned &AddrSpace, LocTy &Loc, bool &AteExtraComma); bool ParseOptionalCommaInAlloca(bool &IsInAlloca); Index: llvm/lib/AsmParser/LLParser.cpp =================================================================== --- llvm/lib/AsmParser/LLParser.cpp +++ llvm/lib/AsmParser/LLParser.cpp @@ -2197,16 +2197,20 @@ /// end. bool LLParser::ParseOptionalCommaAlign(MaybeAlign &Alignment, bool &AteExtraComma) { - AteExtraComma = false; - while (EatIfPresent(lltok::comma)) { + while (AteExtraComma || EatIfPresent(lltok::comma)) { + AteExtraComma = false; // Metadata at the end is an early exit. if (Lex.getKind() == lltok::MetadataVar) { AteExtraComma = true; return false; } - if (Lex.getKind() != lltok::kw_align) + // if we get here, ptr_provenance must not be possible any more + if (Lex.getKind() == lltok::kw_ptr_provenance) return Error(Lex.getLoc(), "expected metadata or 'align'"); + if (Lex.getKind() != lltok::kw_align) + return Error(Lex.getLoc(), + "expected metadata or 'align' or 'ptr_provenance'"); if (ParseOptionalAlignment(Alignment)) return true; } @@ -2214,6 +2218,27 @@ return false; } +/// ParseOptionalCommandNoaliasProvenance +/// ::= +/// ::= ',' ptr_provenance int* %3 +/// +/// This returns with AteExtraComma set to true if it ate an excess comma at the +/// end. +bool LLParser::ParseOptionalCommaNoaliasProvenance( + Value *&V, LLParser::LocTy &Loc, LLParser::PerFunctionState &PFS, + bool &AteExtraComma) { + assert(AteExtraComma == false); + if (EatIfPresent(lltok::comma)) { + if (Lex.getKind() == lltok::kw_ptr_provenance) { + Lex.Lex(); + return ParseTypeAndValue(V, Loc, PFS); + } + + AteExtraComma = true; + } + return false; +} + /// ParseOptionalCommaAddrSpace /// ::= /// ::= ',' addrspace(1) @@ -7073,6 +7098,8 @@ /// 'singlethread'? AtomicOrdering (',' 'align' i32)? int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) { Value *Val; LocTy Loc; + Value *NoaliasProvenance = nullptr; + LocTy NoaliasProvenanceLoc; MaybeAlign Alignment; bool AteExtraComma = false; bool isAtomic = false; @@ -7095,6 +7122,8 @@ if (ParseType(Ty) || ParseToken(lltok::comma, "expected comma after load's type") || ParseTypeAndValue(Val, Loc, PFS) || + ParseOptionalCommaNoaliasProvenance( + NoaliasProvenance, NoaliasProvenanceLoc, PFS, AteExtraComma) || ParseScopeAndOrdering(isAtomic, SSID, Ordering) || ParseOptionalCommaAlign(Alignment, AteExtraComma)) return true; @@ -7115,7 +7144,10 @@ return Error(ExplicitTypeLoc, "loading unsized types is not allowed"); if (!Alignment) Alignment = M->getDataLayout().getABITypeAlign(Ty); - Inst = new LoadInst(Ty, Val, "", isVolatile, *Alignment, Ordering, SSID); + auto *LI = new LoadInst(Ty, Val, "", isVolatile, *Alignment, Ordering, SSID); + if (NoaliasProvenance) + LI->setNoaliasProvenanceOperand(NoaliasProvenance); + Inst = LI; return AteExtraComma ? InstExtraComma : InstNormal; } @@ -7126,6 +7158,8 @@ /// 'singlethread'? AtomicOrdering (',' 'align' i32)? int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS) { Value *Val, *Ptr; LocTy Loc, PtrLoc; + Value *NoaliasProvenance = nullptr; + LocTy NoaliasProvenanceLoc; MaybeAlign Alignment; bool AteExtraComma = false; bool isAtomic = false; @@ -7146,6 +7180,8 @@ if (ParseTypeAndValue(Val, Loc, PFS) || ParseToken(lltok::comma, "expected ',' after store operand") || ParseTypeAndValue(Ptr, PtrLoc, PFS) || + ParseOptionalCommaNoaliasProvenance( + NoaliasProvenance, NoaliasProvenanceLoc, PFS, AteExtraComma) || ParseScopeAndOrdering(isAtomic, SSID, Ordering) || ParseOptionalCommaAlign(Alignment, AteExtraComma)) return true; @@ -7167,7 +7203,11 @@ if (!Alignment) Alignment = M->getDataLayout().getABITypeAlign(Val->getType()); - Inst = new StoreInst(Val, Ptr, isVolatile, *Alignment, Ordering, SSID); + auto *SI = new StoreInst(Val, Ptr, isVolatile, *Alignment, Ordering, SSID); + if (NoaliasProvenance) + SI->setNoaliasProvenanceOperand(NoaliasProvenance); + Inst = SI; + return AteExtraComma ? InstExtraComma : InstNormal; } Index: llvm/lib/AsmParser/LLToken.h =================================================================== --- llvm/lib/AsmParser/LLToken.h +++ llvm/lib/AsmParser/LLToken.h @@ -90,6 +90,7 @@ kw_deplibs, // FIXME: Remove in 4.0 kw_datalayout, kw_volatile, + kw_ptr_provenance, kw_atomic, kw_unordered, kw_monotonic, Index: llvm/lib/CodeGen/IntrinsicLowering.cpp =================================================================== --- llvm/lib/CodeGen/IntrinsicLowering.cpp +++ llvm/lib/CodeGen/IntrinsicLowering.cpp @@ -332,6 +332,15 @@ case Intrinsic::var_annotation: break; // Strip out these intrinsics + case Intrinsic::noalias_decl: + case Intrinsic::noalias: + case Intrinsic::provenance_noalias: + case Intrinsic::noalias_arg_guard: + case Intrinsic::noalias_copy_guard: + // Just forward the value + CI->replaceAllUsesWith(CI->getOperand(0)); + break; + case Intrinsic::memcpy: { Type *IntPtr = DL.getIntPtrType(Context); Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, Index: llvm/lib/CodeGen/MachineOperand.cpp =================================================================== --- llvm/lib/CodeGen/MachineOperand.cpp +++ llvm/lib/CodeGen/MachineOperand.cpp @@ -1167,6 +1167,10 @@ OS << ", !noalias "; AAInfo.NoAlias->printAsOperand(OS, MST); } + if (AAInfo.NoAliasProvenance) { + OS << ", ptr_provenance "; + AAInfo.NoAliasProvenance->printAsOperand(OS, true, MST); + } if (getRanges()) { OS << ", !range "; getRanges()->printAsOperand(OS, MST); Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6464,11 +6464,21 @@ case Intrinsic::annotation: case Intrinsic::ptr_annotation: + case Intrinsic::noalias: case Intrinsic::launder_invariant_group: case Intrinsic::strip_invariant_group: + case Intrinsic::provenance_noalias: + case Intrinsic::noalias_arg_guard: + case Intrinsic::noalias_copy_guard: // Drop the intrinsic, but forward the value setValue(&I, getValue(I.getOperand(0))); return; + case Intrinsic::noalias_decl: + // Generate a dummy value - it will never be used and should get optimized + // away + setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout()))); + return; + case Intrinsic::assume: case Intrinsic::var_annotation: case Intrinsic::sideeffect: Index: llvm/lib/IR/AsmWriter.cpp =================================================================== --- llvm/lib/IR/AsmWriter.cpp +++ llvm/lib/IR/AsmWriter.cpp @@ -4139,15 +4139,30 @@ } Out << ", "; TypePrinter.print(I.getType(), Out); - } else if (Operand) { // Print the normal way. + } else if (const auto *LI = dyn_cast(&I)) { + Out << ' '; + TypePrinter.print(LI->getType(), Out); + Out << ", "; + writeOperand(I.getOperand(0), true); + if (LI->hasNoaliasProvenanceOperand()) { + Out << ", ptr_provenance "; + writeOperand(LI->getNoaliasProvenanceOperand(), true); + } + } else if (const auto *SI = dyn_cast(&I)) { + Out << ' '; + writeOperand(I.getOperand(0), true); + Out << ", "; + writeOperand(I.getOperand(1), true); + + if (SI->hasNoaliasProvenanceOperand()) { + Out << ", ptr_provenance "; + writeOperand(SI->getNoaliasProvenanceOperand(), true); + } + } else if (Operand) { // Print the normal way. if (const auto *GEP = dyn_cast(&I)) { Out << ' '; TypePrinter.print(GEP->getSourceElementType(), Out); Out << ','; - } else if (const auto *LI = dyn_cast(&I)) { - Out << ' '; - TypePrinter.print(LI->getType(), Out); - Out << ','; } // PrintAllTypes - Instructions who have operands of all the same type @@ -4157,8 +4172,7 @@ Type *TheType = Operand->getType(); // Select, Store and ShuffleVector always print all types. - if (isa(I) || isa(I) || isa(I) - || isa(I)) { + if (isa(I) || isa(I) || isa(I)) { PrintAllTypes = true; } else { for (unsigned i = 1, E = I.getNumOperands(); i != E; ++i) { Index: llvm/lib/IR/IRBuilder.cpp =================================================================== --- llvm/lib/IR/IRBuilder.cpp +++ llvm/lib/IR/IRBuilder.cpp @@ -462,6 +462,130 @@ return createCallHelper(FnAssume, Ops, this, "", nullptr, OpBundles); } +Instruction *IRBuilderBase::CreateNoAliasDeclaration(Value *AllocaPtr, + Value *ObjId, + Value *Scope) { + assert(AllocaPtr); + + SmallVector Types = {Type::getInt8PtrTy(getContext()), + AllocaPtr->getType(), ObjId->getType()}; + SmallVector Ops = {AllocaPtr, ObjId, Scope}; + + Module *M = BB->getModule(); + auto *FnIntrinsic = + Intrinsic::getDeclaration(M, Intrinsic::noalias_decl, Types); + return createCallHelper(FnIntrinsic, Ops, this); +} + +Instruction *IRBuilderBase::CreateNoAliasPointer(Value *Ptr, Value *NoAliasDecl, + Value *AddrP, Value *ScopeTag, + const Twine &Name, + uint64_t ObjectId) { + assert(Ptr && AddrP); + Value *I64_0 = + ConstantInt::get(IntegerType::getInt64Ty(getContext()), ObjectId); + return CreateGenericNoAliasIntrinsic(Intrinsic::noalias, Ptr, + {NoAliasDecl, AddrP, I64_0}, {}, + {ScopeTag}, Name); +} + +Instruction *IRBuilderBase::CreateProvenanceNoAliasPlain( + Value *Ptr, Value *NoAliasDecl, Value *AddrP, Value *AddrP_Provenance, + Value *ObjId, MDNode *ScopeTag, const Twine &Name) { + assert(Ptr && AddrP && AddrP_Provenance); + return CreateGenericNoAliasIntrinsic(Intrinsic::provenance_noalias, Ptr, + {NoAliasDecl, AddrP, AddrP_Provenance, ObjId}, + {ScopeTag}, {}, Name); +} + +Instruction *IRBuilderBase::CreateProvenanceNoAliasPlain( + Value *Ptr, Value *NoAliasDecl, Value *AddrP, Value *AddrP_Provenance, + Value *ObjId, Value *ScopeValue, const Twine &Name) { + assert(Ptr && AddrP && AddrP_Provenance); + return CreateGenericNoAliasIntrinsic(Intrinsic::provenance_noalias, Ptr, + {NoAliasDecl, AddrP, AddrP_Provenance, ObjId}, + {}, {ScopeValue}, Name); +} + +Instruction *IRBuilderBase::CreateNoAliasArgGuard(Value *Ptr, + Value *Provenance, + const Twine &Name) { + return CreateGenericNoAliasIntrinsic(Intrinsic::noalias_arg_guard, Ptr, + {Provenance}, {}, {}, Name); +} + +Instruction * +IRBuilderBase::CreateNoAliasCopyGuard(Value *BasePtr, Value *NoAliasDecl, + ArrayRef EncodedIndices, + MDNode *ScopeTag, const Twine &Name) { + SmallVector IndicesArray; + + assert(NoAliasDecl != nullptr); + assert(ScopeTag != nullptr); + assert(!EncodedIndices.empty() && + "NoAliasCopyGuard should have at least one set of indices"); + + for (unsigned i = 0, ci = EncodedIndices.size(); i < ci;) { + auto length = EncodedIndices[i++]; + assert(length > 0 && + "NoAliasCopyGuard: encoded indices problem: zero length seen"); + length += i; + assert(length <= ci && + "NoAliasCopyGuard: encoded indices problem: too few indices"); + + SmallVector Indices; + while (i < length) { + // Note: getElementPtr requires i32 numbers + Indices.push_back( + llvm::ConstantAsMetadata::get(getInt32(EncodedIndices[i]))); + ++i; + } + IndicesArray.push_back(llvm::MDNode::get(Context, Indices)); + } + + return CreateGenericNoAliasIntrinsic( + Intrinsic::noalias_copy_guard, BasePtr, {NoAliasDecl}, + {llvm::MDNode::get(Context, IndicesArray), ScopeTag}, {}, Name); +} + +Instruction *IRBuilderBase::CreateGenericNoAliasIntrinsic( + Intrinsic::ID ID, Value *Ptr, ArrayRef args_opt, + ArrayRef MDNodes, ArrayRef MDValues, const Twine &Name) { + // FIXME: We can currently mangle just about everything, but not literal + // structs (for those, bitcast to i8*). + // FIXME: as far as I see in 'getMangledTypeStr', this should be ok now. + Value *CPtr = Ptr; + if (auto *STyp = + dyn_cast(CPtr->getType()->getPointerElementType())) { + if (STyp->isLiteral()) + CPtr = getCastedInt8PtrValue(CPtr); + } + + SmallVector Types = {CPtr->getType()}; + SmallVector Ops = {CPtr}; + for (auto *A : args_opt) { + Types.push_back(A->getType()); + Ops.push_back(A); + } + // For the metadata info, types must not be added: + for (auto *MD : MDNodes) { + Ops.push_back(MetadataAsValue::get(Context, MD)); + } + Ops.insert(Ops.end(), MDValues.begin(), MDValues.end()); + Module *M = BB->getModule(); + auto *FnIntrinsic = Intrinsic::getDeclaration(M, ID, Types); + Instruction *Ret = createCallHelper(FnIntrinsic, Ops, this, Name); + + if (Ret->getType() != Ptr->getType()) { + BitCastInst *BCI = new BitCastInst(Ret, Ptr->getType(), Name + ".cast"); + BB->getInstList().insert(InsertPt, BCI); + SetInstDebugLocation(BCI); + Ret = BCI; + } + + return Ret; +} + /// Create a call to a Masked Load intrinsic. /// \p Ptr - base pointer for the load /// \p Alignment - alignment of the source location Index: llvm/lib/IR/Instructions.cpp =================================================================== --- llvm/lib/IR/Instructions.cpp +++ llvm/lib/IR/Instructions.cpp @@ -1381,8 +1381,11 @@ LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile, Align Align, AtomicOrdering Order, SyncScope::ID SSID, Instruction *InsertBef) - : UnaryInstruction(Ty, Load, Ptr, InsertBef) { + : Instruction(Ty, Load, OperandTraits::op_begin(this), 2, + InsertBef) { assert(Ty == cast(Ptr->getType())->getElementType()); + Op<0>() = Ptr; + setLoadInstNumOperands(1); setVolatile(isVolatile); setAlignment(Align); setAtomic(Order, SSID); @@ -1393,7 +1396,10 @@ LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile, Align Align, AtomicOrdering Order, SyncScope::ID SSID, BasicBlock *InsertAE) - : UnaryInstruction(Ty, Load, Ptr, InsertAE) { + : Instruction(Ty, Load, OperandTraits::op_begin(this), 2, + InsertAE) { + Op<0>() = Ptr; + setLoadInstNumOperands(1); assert(Ty == cast(Ptr->getType())->getElementType()); setVolatile(isVolatile); setAlignment(Align); @@ -1449,10 +1455,10 @@ AtomicOrdering Order, SyncScope::ID SSID, Instruction *InsertBefore) : Instruction(Type::getVoidTy(val->getContext()), Store, - OperandTraits::op_begin(this), - OperandTraits::operands(this), InsertBefore) { + OperandTraits::op_begin(this), 3, InsertBefore) { Op<0>() = val; Op<1>() = addr; + setStoreInstNumOperands(2); setVolatile(isVolatile); setAlignment(Align); setAtomic(Order, SSID); @@ -1463,10 +1469,10 @@ AtomicOrdering Order, SyncScope::ID SSID, BasicBlock *InsertAtEnd) : Instruction(Type::getVoidTy(val->getContext()), Store, - OperandTraits::op_begin(this), - OperandTraits::operands(this), InsertAtEnd) { + OperandTraits::op_begin(this), 3, InsertAtEnd) { Op<0>() = val; Op<1>() = addr; + setStoreInstNumOperands(2); setVolatile(isVolatile); setAlignment(Align); setAtomic(Order, SSID); @@ -4258,13 +4264,32 @@ } LoadInst *LoadInst::cloneImpl() const { - return new LoadInst(getType(), getOperand(0), Twine(), isVolatile(), - getAlign(), getOrdering(), getSyncScopeID()); + LoadInst *Result = + new LoadInst(getType(), getOperand(0), Twine(), isVolatile(), getAlign(), + getOrdering(), getSyncScopeID()); + // - we must keep the same number of arguments (for vector optimizations) + // - if we duplicate the provenance, we can get into problems with passes + // that don't know how to handle it (Like MergeLoadStoreMotion shows) + // - safe alternative: keep the argument, but map it to undef. + if (hasNoaliasProvenanceOperand()) + Result->setNoaliasProvenanceOperand( + UndefValue::get(getNoaliasProvenanceOperand()->getType())); + return Result; } StoreInst *StoreInst::cloneImpl() const { - return new StoreInst(getOperand(0), getOperand(1), isVolatile(), getAlign(), - getOrdering(), getSyncScopeID()); + StoreInst *Result = + new StoreInst(getOperand(0), getOperand(1), isVolatile(), getAlign(), + getOrdering(), getSyncScopeID()); + + // we must keep the same number of arguments (for vector optimizations) + // - if we duplicate the provenance, we can get into problems with passes + // that don't know how to handle it (Like MergeLoadStoreMotion shows) + // - safe alternative: keep the argument, but map it to undef. + if (hasNoaliasProvenanceOperand()) + Result->setNoaliasProvenanceOperand( + UndefValue::get(getNoaliasProvenanceOperand()->getType())); + return Result; } AtomicCmpXchgInst *AtomicCmpXchgInst::cloneImpl() const { Index: llvm/lib/IR/Metadata.cpp =================================================================== --- llvm/lib/IR/Metadata.cpp +++ llvm/lib/IR/Metadata.cpp @@ -10,6 +10,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/Metadata.h" #include "LLVMContextImpl.h" #include "MetadataImpl.h" #include "SymbolTableListTraitsImpl.h" @@ -37,8 +38,8 @@ #include "llvm/IR/GlobalObject.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/TrackingMDRef.h" #include "llvm/IR/Type.h" @@ -1264,7 +1265,23 @@ setMetadata(LLVMContext::MD_tbaa, N.TBAA); setMetadata(LLVMContext::MD_tbaa_struct, N.TBAAStruct); setMetadata(LLVMContext::MD_alias_scope, N.Scope); - setMetadata(LLVMContext::MD_noalias, N.NoAlias); + if (!N.NoAliasProvenance) + setMetadata(LLVMContext::MD_noalias, N.NoAlias); + // else postpone until setAAMetadataNoAliasProvenance +} + +void Instruction::setAAMetadataNoAliasProvenance(const AAMDNodes &N) { + // It is not correct to always propagate the ptr_provenance. + // 'setAAMetadata' must already have been called ! + if (N.NoAliasProvenance) { + // postponed from setAAMetadata + setMetadata(LLVMContext::MD_noalias, N.NoAlias); + if (LoadInst *LI = dyn_cast(this)) { + LI->setNoaliasProvenanceOperand(N.NoAliasProvenance); + } else if (StoreInst *SI = dyn_cast(this)) { + SI->setNoaliasProvenanceOperand(N.NoAliasProvenance); + } + } } MDNode *Instruction::getMetadataImpl(unsigned KindID) const { Index: llvm/lib/IR/User.cpp =================================================================== --- llvm/lib/IR/User.cpp +++ llvm/lib/IR/User.cpp @@ -177,20 +177,19 @@ Use **HungOffOperandList = static_cast(Usr) - 1; // drop the hung off uses. - Use::zap(*HungOffOperandList, *HungOffOperandList + Obj->NumUserOperands, + Use::zap(*HungOffOperandList, *HungOffOperandList + Obj->getNumOperands(), /* Delete */ true); ::operator delete(HungOffOperandList); } else if (Obj->HasDescriptor) { Use *UseBegin = static_cast(Usr) - Obj->NumUserOperands; - Use::zap(UseBegin, UseBegin + Obj->NumUserOperands, /* Delete */ false); + Use::zap(UseBegin, UseBegin + Obj->getNumOperands(), /* Delete */ false); auto *DI = reinterpret_cast(UseBegin) - 1; uint8_t *Storage = reinterpret_cast(DI) - DI->SizeInBytes; ::operator delete(Storage); } else { Use *Storage = static_cast(Usr) - Obj->NumUserOperands; - Use::zap(Storage, Storage + Obj->NumUserOperands, - /* Delete */ false); + Use::zap(Storage, Storage + Obj->getNumOperands(), /* Delete */ false); ::operator delete(Storage); } } Index: llvm/lib/IR/Value.cpp =================================================================== --- llvm/lib/IR/Value.cpp +++ llvm/lib/IR/Value.cpp @@ -51,9 +51,9 @@ } Value::Value(Type *ty, unsigned scid) - : VTy(checkType(ty)), UseList(nullptr), SubclassID(scid), - HasValueHandle(0), SubclassOptionalData(0), SubclassData(0), - NumUserOperands(0), IsUsedByMD(false), HasName(false) { + : VTy(checkType(ty)), UseList(nullptr), SubclassID(scid), HasValueHandle(0), + SubclassOptionalData(0), SubclassData(0), NumUserOperands(0), + IsUsedByMD(false), HasName(false), NumUserOperandsDelta(0) { static_assert(ConstantFirstVal == 0, "!(SubclassID < ConstantFirstVal)"); // FIXME: Why isn't this in the subclass gunk?? // Note, we cannot call isa before the CallInst has been @@ -508,6 +508,26 @@ }); } +// Like replaceAllUsesWith except it does not handle constants or basic blocks. +// This routine leaves uses outside BB. +void Value::replaceUsesInsideBlock(Value *New, BasicBlock *BB) { + assert(New && "Value::replaceUsesInsideBlock(, BB) is invalid!"); + assert(!contains(New, this) && + "this->replaceUsesInsideBlock(expr(this), BB) is NOT valid!"); + assert(New->getType() == getType() && + "replaceUses of value with new value of different type!"); + assert(BB && "Basic block that may contain a use of 'New' must be defined\n"); + + use_iterator UI = use_begin(), E = use_end(); + for (; UI != E;) { + Use &U = *UI; + ++UI; + auto *Usr = dyn_cast(U.getUser()); + if (Usr && Usr->getParent() == BB) + U.set(New); + } +} + namespace { // Various metrics for how much to strip off of pointers. enum PointerStripKind { @@ -515,6 +535,7 @@ PSK_ZeroIndicesAndAliases, PSK_ZeroIndicesSameRepresentation, PSK_ZeroIndicesAndInvariantGroups, + PSK_ZeroIndicesAndInvariantGroupsAndNoAliasIntr, PSK_InBoundsConstantIndices, PSK_InBounds }; @@ -541,6 +562,7 @@ case PSK_ZeroIndicesAndAliases: case PSK_ZeroIndicesSameRepresentation: case PSK_ZeroIndicesAndInvariantGroups: + case PSK_ZeroIndicesAndInvariantGroupsAndNoAliasIntr: if (!GEP->hasAllZeroIndices()) return V; break; @@ -580,6 +602,17 @@ V = Call->getArgOperand(0); continue; } + // Same as above, but also for noalias intrinsics + if (StripKind == PSK_ZeroIndicesAndInvariantGroupsAndNoAliasIntr && + (Call->getIntrinsicID() == Intrinsic::launder_invariant_group || + Call->getIntrinsicID() == Intrinsic::strip_invariant_group || + Call->getIntrinsicID() == Intrinsic::noalias || + Call->getIntrinsicID() == Intrinsic::provenance_noalias || + Call->getIntrinsicID() == Intrinsic::noalias_arg_guard || + Call->getIntrinsicID() == Intrinsic::noalias_copy_guard)) { + V = Call->getArgOperand(0); + continue; + } } return V; } @@ -610,6 +643,11 @@ return stripPointerCastsAndOffsets(this); } +const Value *Value::stripPointerCastsAndInvariantGroupsAndNoAliasIntr() const { + return stripPointerCastsAndOffsets< + PSK_ZeroIndicesAndInvariantGroupsAndNoAliasIntr>(this); +} + const Value *Value::stripAndAccumulateConstantOffsets( const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, function_ref ExternalAnalysis) const { Index: llvm/lib/IR/Verifier.cpp =================================================================== --- llvm/lib/IR/Verifier.cpp +++ llvm/lib/IR/Verifier.cpp @@ -5049,6 +5049,81 @@ "result of a matrix operation does not fit in the returned vector"); break; } + case Intrinsic::noalias_decl: { + Assert( + (isa( + Call.getArgOperand(Intrinsic::NoAliasDeclAllocaArg)) || + isa(Call.getArgOperand(Intrinsic::NoAliasDeclAllocaArg))), + "llvm.noalias.decl must refer to a null-ptr or an alloca instruction", + Call); + break; + } + case Intrinsic::noalias: { + auto *DA = Call.getArgOperand(Intrinsic::NoAliasNoAliasDeclArg); + if (!isa(DA) && !isa(DA)) { + auto *DeclSite = dyn_cast(DA); + Assert(DeclSite, "llvm.noalias arg1 must refer to a llvm.noalias.decl", + Call); + Assert((DeclSite->getArgOperand(Intrinsic::NoAliasDeclObjIdArg) == + Call.getArgOperand(Intrinsic::NoAliasIdentifyPObjIdArg)), + "llvm.noalias objId arg must match with llvm.noalias.decl", Call); + Assert((DeclSite->getArgOperand(Intrinsic::NoAliasDeclScopeArg) == + Call.getArgOperand(Intrinsic::NoAliasScopeArg)), + "llvm.noalias scope arg must match with llvm.noalias.decl", Call); + } + break; + } + case Intrinsic::provenance_noalias: { + auto *DA = Call.getArgOperand(Intrinsic::ProvenanceNoAliasNoAliasDeclArg); + if (!isa(DA) && !isa(DA)) { + // JumpThreading can duplicate llvm.noalias.decl. + // I do not think we should prohibit that. + SmallVector NoAliasDeclarations; + { + DenseSet Handled; + SmallVector Worklist = {DA}; + while (!Worklist.empty()) { + auto *V = Worklist.pop_back_val(); + if (Handled.insert(V).second) { + if (isa(V)) { + NoAliasDeclarations.push_back(cast(V)); + } else if (PHINode *PHI = dyn_cast(V)) { + auto OV = PHI->operand_values(); + Worklist.insert(Worklist.end(), OV.begin(), OV.end()); + } else if (isa(V)) { + // ignore undefs + } else if (isa(V)) { + // ignore nullptr + } else { + Assert(false, + "llvm.provenance.noalias depends on something that is not a " + "llvm.noalias.decl", + Call); + } + } + } + } + + Assert(!NoAliasDeclarations.empty(), + "llvm.provenance.noalias should depend on a llvm.noalias.decl", Call); + for (auto *CB : NoAliasDeclarations) { + Assert(CB, "llvm.provenance.noalias arg1 must refer to a llvm.noalias.decl", + Call); + Assert(CB->getIntrinsicID() == Intrinsic::noalias_decl, + "llvm.provenance.noalias arg1 must refer to a llvm.noalias.decl", + Call); + Assert((CB->getArgOperand(Intrinsic::NoAliasDeclObjIdArg) == + Call.getArgOperand(Intrinsic::ProvenanceNoAliasIdentifyPObjIdArg)), + "llvm.provenance.noalias objId arg must match with llvm.noalias.decl", + Call); + Assert((CB->getArgOperand(Intrinsic::NoAliasDeclScopeArg) == + Call.getArgOperand(Intrinsic::ProvenanceNoAliasScopeArg)), + "llvm.provenance.noalias scope arg must match with llvm.noalias.decl", + Call); + } + } + break; + } }; } Index: llvm/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/lib/Passes/PassBuilder.cpp +++ llvm/lib/Passes/PassBuilder.cpp @@ -119,6 +119,7 @@ #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h" #include "llvm/Transforms/Scalar/BDCE.h" #include "llvm/Transforms/Scalar/CallSiteSplitting.h" +#include "llvm/Transforms/Scalar/ConnectNoAliasDecl.h" #include "llvm/Transforms/Scalar/ConstantHoisting.h" #include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h" #include "llvm/Transforms/Scalar/DCE.h" @@ -163,6 +164,7 @@ #include "llvm/Transforms/Scalar/NaryReassociate.h" #include "llvm/Transforms/Scalar/NewGVN.h" #include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h" +#include "llvm/Transforms/Scalar/PropagateAndConvertNoAlias.h" #include "llvm/Transforms/Scalar/Reassociate.h" #include "llvm/Transforms/Scalar/RewriteStatepointsForGC.h" #include "llvm/Transforms/Scalar/SCCP.h" @@ -438,6 +440,9 @@ FunctionPassManager FPM(DebugLogging); + // Connect NoAlias Declarations. Do this before SROA + FPM.addPass(ConnectNoAliasDeclPass()); + // Form SSA out of local memory accesses after breaking apart aggregates into // scalars. FPM.addPass(SROA()); @@ -445,6 +450,10 @@ // Catch trivial redundancies FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */)); + // Propagate and Convert noalias intrinsics as early as possible. + // But do it after SROA and EarlyCSE ! + FPM.addPass(PropagateAndConvertNoAliasPass()); + // Hoisting of scalars and load expressions. FPM.addPass(SimplifyCFGPass()); FPM.addPass(InstCombinePass()); @@ -515,9 +524,16 @@ FPM.addPass(createFunctionToLoopPassAdaptor( std::move(LPM2), /*UseMemorySSA=*/false, DebugLogging)); + // Connect NoAlias Declarations. Do this before SROA + FPM.addPass(ConnectNoAliasDeclPass()); + // Delete small array after loop unroll. FPM.addPass(SROA()); + // Propagate and Convert noalias intrinsics as early as possible. + // But do it after SROA and EarlyCSE ! + FPM.addPass(PropagateAndConvertNoAliasPass()); + // Specially optimize memory movement as it doesn't look like dataflow in SSA. FPM.addPass(MemCpyOptPass()); @@ -566,12 +582,20 @@ FunctionPassManager FPM(DebugLogging); + // Connect NoAlias Declarations. Do this before SROA + FPM.addPass(ConnectNoAliasDeclPass()); + // Form SSA out of local memory accesses after breaking apart aggregates into // scalars. FPM.addPass(SROA()); // Catch trivial redundancies FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */)); + + // Propagate and Convert noalias intrinsics as early as possible. + // But do it after SROA and EarlyCSE ! + FPM.addPass(PropagateAndConvertNoAliasPass()); + if (EnableKnowledgeRetention) FPM.addPass(AssumeSimplifyPass()); @@ -673,9 +697,16 @@ FPM.addPass(createFunctionToLoopPassAdaptor( std::move(LPM2), /*UseMemorySSA=*/false, DebugLogging)); + // Connect NoAlias Declarations. Do this before SROA + FPM.addPass(ConnectNoAliasDeclPass()); + // Delete small array after loop unroll. FPM.addPass(SROA()); + // Propagate and Convert noalias intrinsics as early as possible. + // But do it after SROA and EarlyCSE ! + FPM.addPass(PropagateAndConvertNoAliasPass()); + // Eliminate redundancies. FPM.addPass(MergedLoadStoreMotionPass()); if (RunNewGVN) @@ -756,8 +787,16 @@ CGSCCPassManager &CGPipeline = MIWP.getPM(); FunctionPassManager FPM; + // Connect NoAlias Declarations. Do this before SROA + FPM.addPass(ConnectNoAliasDeclPass()); + FPM.addPass(SROA()); FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies. + + // Propagate and Convert as early as possible. + // But do it after SROA and EarlyCSE ! + FPM.addPass(PropagateAndConvertNoAliasPass()); + FPM.addPass(SimplifyCFGPass()); // Merge & remove basic blocks. FPM.addPass(InstCombinePass()); // Combine silly sequences. invokePeepholeEPCallbacks(FPM, Level); @@ -930,8 +969,16 @@ // frontend. FunctionPassManager EarlyFPM(DebugLogging); EarlyFPM.addPass(SimplifyCFGPass()); + // Connect NoAlias Declarations. Do this before SROA + EarlyFPM.addPass(ConnectNoAliasDeclPass()); + EarlyFPM.addPass(SROA()); EarlyFPM.addPass(EarlyCSEPass()); + + // Propagate and Convert as early as possible. + // But do it after SROA and EarlyCSE ! + EarlyFPM.addPass(PropagateAndConvertNoAliasPass()); + EarlyFPM.addPass(LowerExpectIntrinsicPass()); if (PTO.Coroutines) EarlyFPM.addPass(CoroEarlyPass()); @@ -1484,9 +1531,16 @@ PGOOpt->ProfileRemappingFile); } + // Connect NoAlias Declarations. Do this before SROA + FPM.addPass(ConnectNoAliasDeclPass()); + // Break up allocas FPM.addPass(SROA()); + // Propagate and Convert as early as possible. + // But do it after SROA and EarlyCSE ! + FPM.addPass(PropagateAndConvertNoAliasPass()); + // LTO provides additional opportunities for tailcall elimination due to // link-time inlining, and visibility of nocapture attribute. FPM.addPass(TailCallElimPass()); Index: llvm/lib/Passes/PassRegistry.def =================================================================== --- llvm/lib/Passes/PassRegistry.def +++ llvm/lib/Passes/PassRegistry.def @@ -175,7 +175,9 @@ FUNCTION_PASS("bounds-checking", BoundsCheckingPass()) FUNCTION_PASS("break-crit-edges", BreakCriticalEdgesPass()) FUNCTION_PASS("callsite-splitting", CallSiteSplittingPass()) +FUNCTION_PASS("connect-noaliasdecl", ConnectNoAliasDeclPass()) FUNCTION_PASS("consthoist", ConstantHoistingPass()) +FUNCTION_PASS("convert-noalias", PropagateAndConvertNoAliasPass()) FUNCTION_PASS("chr", ControlHeightReductionPass()) FUNCTION_PASS("coro-early", CoroEarlyPass()) FUNCTION_PASS("coro-elide", CoroElidePass()) Index: llvm/lib/Transforms/IPO/ArgumentPromotion.cpp =================================================================== --- llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -167,24 +167,26 @@ for (User *U : I->users()) { Instruction *UI = cast(U); Type *SrcTy; - if (LoadInst *L = dyn_cast(UI)) + IndicesVector Indices; + LoadInst *L = dyn_cast(UI); + if (L) SrcTy = L->getType(); else SrcTy = cast(UI)->getSourceElementType(); - IndicesVector Indices; - Indices.reserve(UI->getNumOperands() - 1); - // Since loads will only have a single operand, and GEPs only a single - // non-index operand, this will record direct loads without any indices, - // and gep+loads with the GEP indices. - for (User::op_iterator II = UI->op_begin() + 1, IE = UI->op_end(); - II != IE; ++II) - Indices.push_back(cast(*II)->getSExtValue()); - // GEPs with a single 0 index can be merged with direct loads - if (Indices.size() == 1 && Indices.front() == 0) - Indices.clear(); + if (L == nullptr) { // not a direct load (loads can have 2 operands) + Indices.reserve(UI->getNumOperands() - 1); + // This will record direct loads without any indices, and gep+loads + // with the GEP indices. + for (User::op_iterator II = UI->op_begin() + 1, IE = UI->op_end(); + II != IE; ++II) + Indices.push_back(cast(*II)->getSExtValue()); + // GEPs with a single 0 index can be merged with direct loads + if (Indices.size() == 1 && Indices.front() == 0) + Indices.clear(); + } ArgIndices.insert(std::make_pair(SrcTy, Indices)); LoadInst *OrigLoad; - if (LoadInst *L = dyn_cast(UI)) + if (L) OrigLoad = L; else // Take any load, we will use it only to update Alias Analysis @@ -308,6 +310,7 @@ AAMDNodes AAInfo; OrigLoad->getAAMetadata(AAInfo); newLoad->setAAMetadata(AAInfo); + newLoad->setAAMetadataNoAliasProvenance(AAInfo); Args.push_back(newLoad); ArgAttrVec.push_back(AttributeSet()); Index: llvm/lib/Transforms/IPO/FunctionAttrs.cpp =================================================================== --- llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -1012,6 +1012,12 @@ case Instruction::Call: case Instruction::Invoke: { CallBase &CB = cast(*RVI); + if (CB.getIntrinsicID() == Intrinsic::noalias_arg_guard || + CB.getIntrinsicID() == Intrinsic::noalias_copy_guard) { + // Look through a noalias arg/copy guard + FlowsToReturn.insert(RVI->getOperand(0)); + continue; + } Function *Callee = CB.getCalledFunction(); // A call to a node within the SCC is assumed to return null until // proven otherwise Index: llvm/lib/Transforms/IPO/GlobalOpt.cpp =================================================================== --- llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -216,6 +216,9 @@ Value *V = SI->getValueOperand(); if (isa(V)) { Changed = true; + // skip potential second use from ptr_provenance before the erase + if ((UI != E) && (*UI == SI)) + UI++; SI->eraseFromParent(); } else if (Instruction *I = dyn_cast(V)) { if (I->hasOneUse()) @@ -793,6 +796,9 @@ Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV); // If we were able to delete all uses of the loads if (LI->use_empty()) { + // skip potential second use from ptr_provenance before the erase + if ((GUI != E) && (*GUI == LI)) + GUI++; LI->eraseFromParent(); Changed = true; } else { @@ -1280,6 +1286,9 @@ // users. for (auto UI = PN->user_begin(), E = PN->user_end(); UI != E;) { Instruction *User = cast(*UI++); + // skip potential second use from ptr_provenance + if ((UI != E) && (*UI == User)) + ++UI; RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); } } @@ -1292,6 +1301,8 @@ std::vector > &PHIsToRewrite) { for (auto UI = Load->user_begin(), E = Load->user_end(); UI != E;) { Instruction *User = cast(*UI++); + if ((UI != E) && (*UI == User)) + ++UI; RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); } @@ -1429,6 +1440,9 @@ // of the per-field globals instead. for (auto UI = GV->user_begin(), E = GV->user_end(); UI != E;) { Instruction *User = cast(*UI++); + // skip potential second use from ptr_provenance + if ((UI != E) && (*UI == User)) + ++UI; if (LoadInst *LI = dyn_cast(User)) { RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite); Index: llvm/lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -299,8 +299,16 @@ addInitialAliasAnalysisPasses(FPM); FPM.add(createCFGSimplificationPass()); + FPM.add(createConnectNoAliasDeclPass()); FPM.add(createSROAPass()); FPM.add(createEarlyCSEPass()); + + // Propagate and Convert as early as possible. + // But do it after SROA! + FPM.add(createPropagateAndConvertNoAliasPass()); + if (VerifyOutput) + FPM.add(createVerifierPass()); + FPM.add(createLowerExpectIntrinsicPass()); } @@ -329,8 +337,16 @@ IP.HintThreshold = 325; MPM.add(createFunctionInliningPass(IP)); + MPM.add(createConnectNoAliasDeclPass()); MPM.add(createSROAPass()); MPM.add(createEarlyCSEPass()); // Catch trivial redundancies + + // Propagate and Convert as early as possible. + // But do it after SROA and EarlyCSE ! + MPM.add(createPropagateAndConvertNoAliasPass()); + if (VerifyOutput) + MPM.add(createVerifierPass()); + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs MPM.add(createInstructionCombiningPass()); // Combine silly seq's addExtensionsToPM(EP_Peephole, MPM); @@ -358,6 +374,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses( legacy::PassManagerBase &MPM) { // Start of function pass. + MPM.add(createConnectNoAliasDeclPass()); // Break up aggregate allocas, using SSAUpdater. assert(OptLevel >= 1 && "Calling function optimizer with no optimization level!"); MPM.add(createSROAPass()); @@ -365,6 +382,12 @@ if (EnableKnowledgeRetention) MPM.add(createAssumeSimplifyPass()); + // Propagate and Convert as early as possible. + // But do it after SROA and EarlyCSE ! + MPM.add(createPropagateAndConvertNoAliasPass()); + if (VerifyOutput) + MPM.add(createVerifierPass()); + if (OptLevel > 1) { if (EnableGVNHoist) MPM.add(createGVNHoistPass()); @@ -453,6 +476,14 @@ // opened up by them. MPM.add(createInstructionCombiningPass()); addExtensionsToPM(EP_Peephole, MPM); + + MPM.add(createConnectNoAliasDeclPass()); + // Propagate and Convert as early as possible. + // But do it after SROA! + MPM.add(createPropagateAndConvertNoAliasPass()); + if (VerifyOutput) + MPM.add(createVerifierPass()); + if (OptLevel > 1) { MPM.add(createJumpThreadingPass()); // Thread jumps MPM.add(createCorrelatedValuePropagationPass()); @@ -610,6 +641,12 @@ MPM.add(Inliner); Inliner = nullptr; RunInliner = true; + + // Propagate and Convert as early as possible. + // But do it after SROA! + MPM.add(createPropagateAndConvertNoAliasPass()); + if (VerifyOutput) + MPM.add(createVerifierPass()); } // Infer attributes on declarations, call sites, arguments, etc. for an SCC. @@ -953,6 +990,12 @@ if (RunInliner) { PM.add(Inliner); Inliner = nullptr; + + // Propagate and Convert as early as possible. + // But do it after SROA! + PM.add(createPropagateAndConvertNoAliasPass()); + if (VerifyOutput) + PM.add(createVerifierPass()); } PM.add(createPruneEHPass()); // Remove dead EH info. @@ -986,6 +1029,12 @@ // Break up allocas PM.add(createSROAPass()); + // Propagate and Convert as early as possible. + // But do it after SROA! + PM.add(createPropagateAndConvertNoAliasPass()); + if (VerifyOutput) + PM.add(createVerifierPass()); + // LTO provides additional opportunities for tailcall elimination due to // link-time inlining, and visibility of nocapture attribute. if (OptLevel > 1) Index: llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -4367,6 +4367,22 @@ } break; } + case Intrinsic::noalias_decl: { + Value *Op0 = II->getOperand(0); + Value *BaseOp0 = Op0->stripPointerCasts(); + if (Op0 != BaseOp0) { + auto *NewNoAlias = Builder.CreateNoAliasDeclaration( + BaseOp0, II->getOperand(1), II->getOperand(2)); + // bwaahh seems we need to do everything ourselves ? + II->replaceAllUsesWith(NewNoAlias); + Worklist.remove(II); + II->eraseFromParent(); + MadeIRChange = true; + + return nullptr; + } + break; + } } return visitCallBase(*II); } Index: llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -435,10 +435,20 @@ NewPtr->getType()->getPointerAddressSpace() == AS)) NewPtr = Builder.CreateBitCast(Ptr, NewTy->getPointerTo(AS)); + Value *NewProvenancePtr = nullptr; + if (LI.hasNoaliasProvenanceOperand()) { + NewProvenancePtr = Builder.CreateBitCast(LI.getNoaliasProvenanceOperand(), + NewTy->getPointerTo(AS)); + } + LoadInst *NewLoad = Builder.CreateAlignedLoad( NewTy, NewPtr, LI.getAlign(), LI.isVolatile(), LI.getName() + Suffix); NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); copyMetadataForLoad(*NewLoad, LI); + if (LI.hasNoaliasProvenanceOperand()) { + assert(NewProvenancePtr); + NewLoad->setNoaliasProvenanceOperand(NewProvenancePtr); + } return NewLoad; } @@ -454,6 +464,12 @@ SmallVector, 8> MD; SI.getAllMetadata(MD); + Value *NewProvenancePtr = nullptr; + if (SI.hasNoaliasProvenanceOperand()) { + NewProvenancePtr = IC.Builder.CreateBitCast( + SI.getNoaliasProvenanceOperand(), V->getType()->getPointerTo(AS)); + } + StoreInst *NewStore = IC.Builder.CreateAlignedStore( V, IC.Builder.CreateBitCast(Ptr, V->getType()->getPointerTo(AS)), SI.getAlign(), SI.isVolatile()); @@ -494,6 +510,11 @@ } } + if (SI.hasNoaliasProvenanceOperand()) { + assert(NewProvenancePtr); + NewStore->setNoaliasProvenanceOperand(NewProvenancePtr); + } + return NewStore; } @@ -576,6 +597,10 @@ // Replace all the stores with stores of the newly loaded value. for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) { auto *SI = cast(*UI++); + // skip the ptr_provenance + if ((UI != UE) && (*UI == SI)) + ++UI; + IC.Builder.SetInsertPoint(SI); combineStoreToNewValue(IC, *SI, NewLoad); IC.eraseInstFromFunction(*SI); @@ -658,6 +683,7 @@ AAMDNodes AAMD; LI.getAAMetadata(AAMD); L->setAAMetadata(AAMD); + L->setAAMetadataNoAliasProvenance(AAMD); V = IC.Builder.CreateInsertValue(V, L, i); } @@ -707,6 +733,7 @@ AAMDNodes AAMD; LI.getAAMetadata(AAMD); L->setAAMetadata(AAMD); + L->setAAMetadataNoAliasProvenance(AAMD); V = IC.Builder.CreateInsertValue(V, L, i); Offset += EltSize; } @@ -919,6 +946,15 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { Value *Op = LI.getOperand(0); + if (LI.hasNoaliasProvenanceOperand()) { + if (LI.getNoaliasProvenanceOperand() == LI.getPointerOperand() || + isa(LI.getNoaliasProvenanceOperand())) { + // degenerated ptr_provenance + LI.removeNoaliasProvenanceOperand(); + return &LI; + } + } + // Try to canonicalize the loaded type. if (Instruction *Res = combineLoadToOperationType(*this, LI)) return Res; @@ -1177,6 +1213,7 @@ AAMDNodes AAMD; SI.getAAMetadata(AAMD); NS->setAAMetadata(AAMD); + NS->setAAMetadataNoAliasProvenance(AAMD); } return true; @@ -1225,6 +1262,7 @@ AAMDNodes AAMD; SI.getAAMetadata(AAMD); NS->setAAMetadata(AAMD); + NS->setAAMetadataNoAliasProvenance(AAMD); Offset += EltSize; } @@ -1318,6 +1356,15 @@ Value *Val = SI.getOperand(0); Value *Ptr = SI.getOperand(1); + if (SI.hasNoaliasProvenanceOperand()) { + if (SI.getNoaliasProvenanceOperand() == SI.getPointerOperand() || + isa(SI.getNoaliasProvenanceOperand())) { + // degenerated ptr_provenance + SI.removeNoaliasProvenanceOperand(); + return &SI; + } + } + // Try to canonicalize the stored type. if (combineStoreToValueType(*this, SI)) return eraseInstFromFunction(SI); @@ -1567,6 +1614,7 @@ if (AATags) { OtherStore->getAAMetadata(AATags, /* Merge = */ true); NewSI->setAAMetadata(AATags); + NewSI->setAAMetadataNoAliasProvenance(AATags); } // Nuke the old stores. Index: llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -606,11 +606,8 @@ new LoadInst(FirstLI->getType(), NewPN, "", isVolatile, LoadAlignment); unsigned KnownIDs[] = { - LLVMContext::MD_tbaa, LLVMContext::MD_range, LLVMContext::MD_invariant_load, - LLVMContext::MD_alias_scope, - LLVMContext::MD_noalias, LLVMContext::MD_nonnull, LLVMContext::MD_align, LLVMContext::MD_dereferenceable, @@ -622,14 +619,19 @@ NewLI->setMetadata(ID, FirstLI->getMetadata(ID)); // Add all operands to the new PHI and combine TBAA metadata. + AAMDNodes AAInfo; + FirstLI->getAAMetadata(AAInfo); for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) { LoadInst *LI = cast(PN.getIncomingValue(i)); combineMetadata(NewLI, LI, KnownIDs, true); + LI->getAAMetadata(AAInfo, true); Value *NewInVal = LI->getOperand(0); if (NewInVal != InVal) InVal = nullptr; NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i)); } + NewLI->setAAMetadata(AAInfo); + NewLI->setAAMetadataNoAliasProvenance(AAInfo); if (InVal) { // The new PHI unions all of the same values together. This is really Index: llvm/lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2486,6 +2486,7 @@ case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: case Intrinsic::objectsize: + case Intrinsic::noalias_decl: Users.emplace_back(I); continue; } @@ -2932,9 +2933,21 @@ AAMDNodes Nodes; L->getAAMetadata(Nodes); NL->setAAMetadata(Nodes); + NL->setAAMetadataNoAliasProvenance(Nodes); // Returning the load directly will cause the main loop to insert it in - // the wrong spot, so use replaceInstUsesWith(). - return replaceInstUsesWith(EV, NL); + // the wrong spot, so use ReplaceInstUsesWith(). + { + for (auto mdtag : + {LLVMContext::MD_dbg, LLVMContext::MD_prof, LLVMContext::MD_fpmath, + LLVMContext::MD_tbaa_struct, LLVMContext::MD_invariant_load, + LLVMContext::MD_nontemporal, + LLVMContext::MD_mem_parallel_loop_access}) { + if (auto *MD = L->getMetadata(mdtag)) { + NL->setMetadata(mdtag, MD); + } + } + return replaceInstUsesWith(EV, NL); + } } // We could simplify extracts from other values. Note that nested extracts may // already be simplified implicitly by the above: extract (extract (insert) ) Index: llvm/lib/Transforms/Scalar/CMakeLists.txt =================================================================== --- llvm/lib/Transforms/Scalar/CMakeLists.txt +++ llvm/lib/Transforms/Scalar/CMakeLists.txt @@ -3,6 +3,7 @@ AlignmentFromAssumptions.cpp BDCE.cpp CallSiteSplitting.cpp + ConnectNoAliasDecl.cpp ConstantHoisting.cpp ConstantProp.cpp CorrelatedValuePropagation.cpp @@ -57,6 +58,7 @@ NewGVN.cpp PartiallyInlineLibCalls.cpp PlaceSafepoints.cpp + PropagateAndConvertNoAlias.cpp Reassociate.cpp Reg2Mem.cpp RewriteStatepointsForGC.cpp Index: llvm/lib/Transforms/Scalar/ConnectNoAliasDecl.cpp =================================================================== --- /dev/null +++ llvm/lib/Transforms/Scalar/ConnectNoAliasDecl.cpp @@ -0,0 +1,114 @@ +//===- ConnectNoAliasDecl.h -------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This pass connects provenance.noalias intrinsics to the corresponding +/// llvm.noalias.decl, based on the alloca of the pointer. +/// +//===----------------------------------------------------------------------===// +// +// When the original restrict declaration is not directly available, +// llvm.noalias, llvm.provenance.noalias and llvm.noalias.copy.guard can be +// associated with an 'unknown' (out of function) noalias scope. After certain +// optimizations, like SROA, inlining, ... it is possible that a +// llvm.noalias.decl is associated to an alloca, to which a llvm.noalias, +// llvm.provenance.noalias orllvm.noalias.copy.guard intrinsics is also +// associated. When the latter intrinsics are still refering to the 'unknown' +// scope, we can now refine the information by associating the llvm.noalias.decl +// and its information to the other noalias intrinsics that are depending on the +// same alloca. +// +// This pass will connect those llvm.noalias.decl to those +// llvm.noalias,llvm.provenance.noalias and llvm.noalias.copy.guard. It will +// also propagate the embedded information. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/ConnectNoAliasDecl.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/IR/Dominators.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Utils/Cloning.h" + +using namespace llvm; + +namespace { +class ConnectNoAliasDeclLegacyPass : public FunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + ConnectNoAliasDeclLegacyPass() : FunctionPass(ID) { + initializeConnectNoAliasDeclLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + // FIXME: is all of this valid ? + AU.addPreserved(); + AU.addPreserved(); // FIXME: not sure this is + // valid. It ensures the same pass + // order as if this pass was not + // there + AU.addPreserved(); + AU.addRequiredTransitive(); + AU.setPreservesCFG(); + } + +private: + ConnectNoAliasDeclPass Impl; +}; +} // namespace + +char ConnectNoAliasDeclLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN( + ConnectNoAliasDeclLegacyPass, "connect-noaliasdecl", + "Connect llvm.noalias.decl", false, + false) // to llvm.noalias/llvm.provenance.noalias/llvm.noalias.copy.guard + // intrinsics +INITIALIZE_PASS_END( + ConnectNoAliasDeclLegacyPass, "connect-noaliasdecl", + "Connect llvm.noalias.decl", false, + false) // to llvm.noalias/llvm.provenance.noalias/llvm.noalias.copy.guard + // intrinsics + +bool ConnectNoAliasDeclLegacyPass::runOnFunction(Function &F) { + if (skipFunction(F)) + return false; + + return Impl.runImpl(F); +} + +namespace llvm { + +bool ConnectNoAliasDeclPass::runImpl(Function &F) { + return llvm::propagateAndConnectNoAliasDecl(&F); +} + +ConnectNoAliasDeclPass::ConnectNoAliasDeclPass() {} + +PreservedAnalyses ConnectNoAliasDeclPass::run(Function &F, + FunctionAnalysisManager &AM) { + bool Changed = runImpl(F); + + if (!Changed) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserve(); + //?? PA.preserve(); // FIXME: not sure this is valid, + // see above + + return PA; +} + +FunctionPass *createConnectNoAliasDeclPass() { + return new ConnectNoAliasDeclLegacyPass(); +} +} // namespace llvm Index: llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp =================================================================== --- llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1350,11 +1350,13 @@ C, Earlier->getPointerOperand(), false, Earlier->getAlign(), Earlier->getOrdering(), Earlier->getSyncScopeID(), DepWrite); - unsigned MDToKeep[] = {LLVMContext::MD_dbg, LLVMContext::MD_tbaa, - LLVMContext::MD_alias_scope, - LLVMContext::MD_noalias, + unsigned MDToKeep[] = {LLVMContext::MD_dbg, LLVMContext::MD_nontemporal}; SI->copyMetadata(*DepWrite, MDToKeep); + AAMDNodes AAInfo; + Earlier->getAAMetadata(AAInfo); + SI->setAAMetadata(AAInfo); + SI->setAAMetadataNoAliasProvenance(AAInfo); ++NumModifiedStores; // Delete the old stores and now-dead instructions that feed them. Index: llvm/lib/Transforms/Scalar/EarlyCSE.cpp =================================================================== --- llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -986,7 +986,17 @@ << '\n'); AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext())); } else - LLVM_DEBUG(dbgs() << "EarlyCSE skipping assumption: " << Inst << '\n'); + LLVM_DEBUG(dbgs() << "EarlyCSE skipping intrinsic: " << Inst << '\n'); + continue; + } + + // Likewise, noalias intrinsics don't actually write. + if (match(&Inst, m_Intrinsic()) || + match(&Inst, m_Intrinsic()) || + match(&Inst, m_Intrinsic()) || + match(&Inst, m_Intrinsic())) { + LLVM_DEBUG(dbgs() << "EarlyCSE skipping noalias intrinsic: " << Inst + << '\n'); continue; } Index: llvm/lib/Transforms/Scalar/GVN.cpp =================================================================== --- llvm/lib/Transforms/Scalar/GVN.cpp +++ llvm/lib/Transforms/Scalar/GVN.cpp @@ -1276,6 +1276,8 @@ LI->getAAMetadata(Tags); if (Tags) NewLoad->setAAMetadata(Tags); + // FIXME: not sure if ptr_provenance propagation should be allowed + // here: dependend provenances should also migrate first ! if (auto *MD = LI->getMetadata(LLVMContext::MD_invariant_load)) NewLoad->setMetadata(LLVMContext::MD_invariant_load, MD); Index: llvm/lib/Transforms/Scalar/JumpThreading.cpp =================================================================== --- llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -1425,6 +1425,8 @@ NewVal->setDebugLoc(LoadI->getDebugLoc()); if (AATags) NewVal->setAAMetadata(AATags); + // FIXME: not sure if ptr_provenance propagation should be allowed + // here: dependend provenances should also migrate first ! AvailablePreds.emplace_back(UnavailablePred, NewVal); } Index: llvm/lib/Transforms/Scalar/LICM.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LICM.cpp +++ llvm/lib/Transforms/Scalar/LICM.cpp @@ -1774,6 +1774,8 @@ NewSI->setDebugLoc(DL); if (AATags) NewSI->setAAMetadata(AATags); + // FIXME: not sure if ptr_provenance propagation should be allowed + // here: dependend provenances should also migrate first ! if (MSSAU) { MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i]; @@ -1927,7 +1929,9 @@ if (SomePtr->getType() != ASIV->getType()) return false; - for (User *U : ASIV->users()) { + for (auto U_it = ASIV->user_begin(), U_it_end = ASIV->user_end(); + U_it != U_it_end; ++U_it) { + User *U = *U_it; // Ignore instructions that are outside the loop. Instruction *UI = dyn_cast(U); if (!UI || !CurLoop->contains(UI)) @@ -1936,6 +1940,9 @@ // If there is an non-load/store instruction in the loop, we can't promote // it. if (LoadInst *Load = dyn_cast(UI)) { + if (U_it.getUse().getOperandNo() == + Load->getNoaliasProvenanceOperandIndex()) + continue; if (!Load->isUnordered()) return false; @@ -1955,6 +1962,10 @@ Alignment = std::max(Alignment, InstAlignment); } } else if (const StoreInst *Store = dyn_cast(UI)) { + if (U_it.getUse().getOperandNo() == + Store->getNoaliasProvenanceOperandIndex()) + continue; + // Stores *of* the pointer are not interesting, only stores *to* the // pointer. if (UI->getOperand(1) != ASIV) @@ -1999,8 +2010,18 @@ Store->getPointerOperand(), Store->getValueOperand()->getType(), Store->getAlign(), MDL, Preheader->getTerminator(), DT); } - } else - return false; // Not a load or store. + } else { + // Not a load or store. + if (IntrinsicInst *II = dyn_cast(UI)) { + if (II->getIntrinsicID() == Intrinsic::provenance_noalias || + II->getIntrinsicID() == Intrinsic::noalias_arg_guard || + II->getIntrinsicID() == Intrinsic::noalias_copy_guard) { + // those must not block promotion. + continue; + } + } + return false; + } // Merge the AA tags. if (LoopUses.empty()) { @@ -2087,6 +2108,8 @@ PreheaderLoad->setDebugLoc(DebugLoc()); if (AATags) PreheaderLoad->setAAMetadata(AATags); + // FIXME: not sure if ptr_provenance propagation should be allowed + // here: dependend provenances should also migrate first ! SSA.AddAvailableValue(Preheader, PreheaderLoad); if (MSSAU) { Index: llvm/lib/Transforms/Scalar/PropagateAndConvertNoAlias.cpp =================================================================== --- /dev/null +++ llvm/lib/Transforms/Scalar/PropagateAndConvertNoAlias.cpp @@ -0,0 +1,1240 @@ +//===- PropagateAndConvertNoAlias.h -----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass moves dependencies on llvm.noalias onto the ptr_provenance. +// It also introduces and propagates provenance.noalias and noalias.arg.guard +// intrinsics. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/PropagateAndConvertNoAlias.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" + +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "convert-noalias" + +namespace { + +class PropagateAndConvertNoAliasLegacyPass : public FunctionPass { +public: + static char ID; + explicit PropagateAndConvertNoAliasLegacyPass() : FunctionPass(ID), Impl() { + initializePropagateAndConvertNoAliasLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnFunction(Function &F) override; + + StringRef getPassName() const override { + return "Propagate and Convert Noalias intrinsics"; + } + +private: + PropagateAndConvertNoAliasPass Impl; +}; +} // namespace + +char PropagateAndConvertNoAliasLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(PropagateAndConvertNoAliasLegacyPass, "convert-noalias", + "Propagate And Convert llvm.noalias intrinsics", false, + false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(PropagateAndConvertNoAliasLegacyPass, "convert-noalias", + "Propagate And Convert llvm.noalias intrinsics", false, + false) + +void PropagateAndConvertNoAliasLegacyPass::getAnalysisUsage( + AnalysisUsage &AU) const { + AU.addPreserved(); + // FIXME: not sure the CallGraphWrapperPass is needed. It ensures the same + // pass order is kept as if the PropagateAndConvertNoAlias pass was not there. + AU.addPreserved(); + AU.addPreserved(); + AU.addRequiredTransitive(); +} + +bool PropagateAndConvertNoAliasLegacyPass::runOnFunction(Function &F) { + if (skipFunction(F)) + return false; + + return Impl.runImpl(F, getAnalysis().getDomTree()); +} + +namespace llvm { + +bool PropagateAndConvertNoAliasPass::runImpl(Function &F, DominatorTree &DT) { + bool Changed = false; + + // find all 'llvm.noalias', 'llvm.noalias.gep' and 'llvm.noalias.arg.guard' + // intrinsics + Changed |= doit(F, DT); + + return Changed; +} + +FunctionPass *createPropagateAndConvertNoAliasPass() { + return new PropagateAndConvertNoAliasLegacyPass(); +} + +PropagateAndConvertNoAliasPass::PropagateAndConvertNoAliasPass() {} + +PreservedAnalyses +PropagateAndConvertNoAliasPass::run(Function &F, FunctionAnalysisManager &AM) { + bool Changed = runImpl(F, AM.getResult(F)); + + if (!Changed) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserve(); + // FIXME: not sure this is valid: + //?? PA.preserve(); // See above + + return PA; +} + +typedef SmallVector ProvenanceWorklist; +typedef SmallVector DepsVector; +typedef std::map I2Deps; +typedef SmallPtrSet InstructionSet; +typedef SmallPtrSet BasicBlockSet; + +// Analyse and propagate the instructions that need provenances: +// - InstructionsForProvenance: instructions that need a provenance +// representation +// - at entry: (A) +// -- llvm.noalias -> llvm.provenance.noalias +// -- llvm.noalias.arg.guard a, prov_a -> prov_a +// +// - during propagation: (B) +// -- select a, b, c -> select a, prov_b, prov_c +// -- PHI a, b,... -> PHI prov_a, prov_b, ... +// +// - Handled: Instructions that have been investigated. The Deps side refers to +// the provenance dependency. (C) +// -- a nullptr indicates that the normal dependency must be used for that +// operand +// -- an I indictates that the provenance representation of I must be used for +// that operand +// +// The algorithm: +// - We start from the llvm.noalias and llvm.noalias.arg.guard instructions +// - We go over their users, and check if they are special or not +// -- special users need a provenance representation and are annotated as such +// in 'Handled' (non-empty Dep) +// -- normal instructions are a passthrough, and are annotated with an empty Dep +// in 'Handled' (I->{}) +// -- some instructions stop the recursion: +// --- ICmp +// --- first arg of select +// --- llvm.provenance.noalias, llvm.noalias +// +// After the analysis, 'Handled' contains an overview of all instructions that +// depend on (A) +// - those instructions that were seen, but ignored otherwise have no +// dependencies (I -> {} ) +// - instructions that refer to one ore more provenances have explicit +// dependencies. (I -> { op0, op1, op2, ... }) +// -- if opX == nullptr -> not a real ptr_provenance dependency +// -- if opX == someI : +// ---- if someI points to an instruction in Handled, it must be one of the +// instructions that have a provenance representation +// ---- otherwise, it points to a not-handle plain dependency (coming from a +// noalias.arg.guard) +static void propagateInstructionsForProvenance( + ProvenanceWorklist &InstructionsForProvenance, I2Deps &Handled, + ProvenanceWorklist &out_CreationList, InstructionSet &ProvenancePHIs, + BasicBlockSet &DeadBasicBlocks) { + auto updateMatchingOperands = [](Instruction *U, Instruction *I, + DepsVector &Deps, Instruction *I4SC) { + assert(U->getNumOperands() == Deps.size()); + auto it = Deps.begin(); + for (Value *UOp : U->operands()) { + if (UOp == I) { + assert(*it == nullptr || *it == I4SC); + *it = I4SC; + } + ++it; + } + }; + + while (!InstructionsForProvenance.empty()) { + Instruction *I4SC = InstructionsForProvenance.pop_back_val(); + LLVM_DEBUG(llvm::dbgs() + << "-- Propagating provenance instruction: " << *I4SC << "\n"); + if (DeadBasicBlocks.count(I4SC->getParent())) { + LLVM_DEBUG(llvm::dbgs() << "--- Skipped - dead basic block\n"); + continue; + } + SmallVector WorkList = {I4SC}; + if (auto *CB = dyn_cast(I4SC)) { + if (CB->getIntrinsicID() == Intrinsic::noalias_arg_guard) { + // llvm.noalias.arg.guard: delegate to ptr_provenance (operand 1) + Handled.insert(I2Deps::value_type(I4SC, {})); + // no need to add to out_CreationList + + assert(!isa(I4SC->getOperand(0)) && + !isa(I4SC->getOperand(1)) && + "Degenerated case must have been resolved already"); + assert(I4SC->getOperand(0) != I4SC->getOperand(1) && + "Degenerated case must have been resolved already"); + + I4SC = dyn_cast(I4SC->getOperand(1)); + if (I4SC == nullptr) { + // Provenance became a constant ? Then the arg guard is not needed + // any more and there is nothing to propagate + continue; + } + } + } + while (!WorkList.empty()) { + Instruction *I = WorkList.pop_back_val(); + LLVM_DEBUG(llvm::dbgs() << "-- checking:" << *I << "\n"); + if (DeadBasicBlocks.count(I->getParent())) { + LLVM_DEBUG(llvm::dbgs() << "--- skipped - dead basic block\n"); + continue; + } + bool isPtrToInt = isa(I); + for (auto &UOp : I->uses()) { + auto *U_ = UOp.getUser(); + LLVM_DEBUG(llvm::dbgs() << "--- used by:" << *U_ + << ", operand:" << UOp.getOperandNo() << "\n"); + Instruction *U = dyn_cast(U_); + if (U == nullptr) + continue; + + // Only see through a ptr2int if it used by a int2ptr + if (isPtrToInt && !isa(U)) + continue; + + if (isa(U)) { + // ======================================== select -> { lhs, rhs } + bool MatchesOp1 = (U->getOperand(1) == I); + bool MatchesOp2 = (U->getOperand(2) == I); + + if (MatchesOp1 || MatchesOp2) { + auto HI = Handled.insert(I2Deps::value_type(U, {nullptr, nullptr})); + if (HI.second) + out_CreationList.push_back(U); + if (MatchesOp1) { + HI.first->second[0] = I4SC; + } + if (MatchesOp2) { + HI.first->second[1] = I4SC; + } + if (HI.second) { + InstructionsForProvenance.push_back(U); + } + } + } else if (isa(U)) { + // ======================================== load -> { ptr } + if (UOp.getOperandNo() == + LoadInst::getNoaliasProvenanceOperandIndex()) + continue; // tracking on provenance -> ignore + + auto HI = Handled.insert(I2Deps::value_type(U, {I4SC})); + if (HI.second) + out_CreationList.push_back(U); + assert(U->getOperand(0) == I); + if (HI.second) { + // continue + } + } else if (isa(U)) { + // ======================================== store -> { val, ptr } + if (UOp.getOperandNo() == + StoreInst::getNoaliasProvenanceOperandIndex()) + continue; // tracking on provenance -> ignore + + // also track if we are storing a restrict annotated pointer value... + // This might provide useful information about 'escaping pointers' + bool MatchesOp0 = (U->getOperand(0) == I); + bool MatchesOp1 = (U->getOperand(1) == I); + + if (MatchesOp0 || MatchesOp1) { + auto HI = Handled.insert(I2Deps::value_type(U, {nullptr, nullptr})); + if (HI.second) + out_CreationList.push_back(U); + if (MatchesOp0) { + HI.first->second[0] = I4SC; + } + if (MatchesOp1) { + HI.first->second[1] = I4SC; + } + } + } else if (isa(U)) { + // ======================================== insertvalue -> { val } + // track for injecting llvm.noalias.arg.guard + assert(U->getOperand(1) == I); + // need to introduce a guard + auto HI = Handled.insert(I2Deps::value_type(U, {I4SC})); + if (HI.second) + out_CreationList.push_back(U); + } else if (isa(U)) { + // ======================================== ptr2int -> { val } + // track for injecting llvm.noalias.arg.guard + assert(U->getOperand(0) == I); + // need to introduce a guard + auto HI = Handled.insert(I2Deps::value_type(U, {I4SC})); + if (HI.second) + out_CreationList.push_back(U); + } else if (isa(U)) { + auto HI = Handled.insert(I2Deps::value_type(U, {I4SC})); + if (HI.second) + out_CreationList.push_back(U); + } else if (isa(U)) { + // ======================================== PHI -> { ..... } + PHINode *PU = cast(U); + auto HI = Handled.insert(I2Deps::value_type(U, {})); + if (HI.second) { + HI.first->second.resize(U->getNumOperands(), nullptr); + if (ProvenancePHIs.count(U) == 0) { + // This is a normal PHI, consider it for propagation + InstructionsForProvenance.push_back(U); + } + if (U->getNumOperands()) + out_CreationList.push_back(U); + } + updateMatchingOperands(PU, I, HI.first->second, I4SC); + } else if (auto *CS = dyn_cast(U)) { + // =============================== call/invoke/intrinsic -> { ...... } + + // NOTES: + // - we always block at a call... + // - the known intrinsics should not have any extra annotations + switch (CS->getIntrinsicID()) { + case Intrinsic::provenance_noalias: + case Intrinsic::noalias: { + bool MatchesOp0 = (U->getOperand(0) == I); + bool MatchesOpP = + (U->getOperand(Intrinsic::NoAliasIdentifyPArg) == I); + static_assert(Intrinsic::NoAliasIdentifyPArg == + Intrinsic::ProvenanceNoAliasIdentifyPArg, + "those must be identical"); + + if (MatchesOp0 || MatchesOpP) { + auto HI = + Handled.insert(I2Deps::value_type(U, {nullptr, nullptr})); + if (HI.second) + out_CreationList.push_back(U); + if (MatchesOp0) { + HI.first->second[0] = I4SC; + } + if (MatchesOpP) { + HI.first->second[1] = I4SC; + } + } + continue; + } + case Intrinsic::noalias_arg_guard: { + // ignore - should be handled by the outer loop ! + continue; + } + + default: + break; + } + // if we get here, we need to inject guards for certain arguments. + // Track which arguments will need one. + auto HI = Handled.insert(I2Deps::value_type(U, {})); + if (HI.second) { + HI.first->second.resize(U->getNumOperands(), nullptr); + if (U->getNumOperands()) { + out_CreationList.push_back(U); + } + } + updateMatchingOperands(U, I, HI.first->second, I4SC); + if (I == CS->getReturnedArgOperand()) { + // also see through call - this does not omit the need of + // introducing a noalias_arg_guard + WorkList.push_back(U); + } + } else { + // ======================================== other -> {} + // this is the generic case... not sure if we should have a elaborate + // check for 'all other instructions'. just acknowledge that we saw it + // and propagate to any users + // - NOTE: if we happen have already handled it, this might indicate + // something interesting that we should handle separately + + switch (U->getOpcode()) { + case Instruction::ICmp: + // restrict pointer used in comparison - do not propagate + // provenance + continue; + default: + break; + } + + auto HI = Handled.insert(I2Deps::value_type(U, {})); + // No need to add to out_CreationList + if (!HI.second) { + llvm::errs() + << "WARNING: found an instruction that was already handled:" + << *U << "\n"; + assert(!HI.second && + "We should not encounter a handled instruction ??"); + } + + if (HI.second) { + WorkList.push_back(U); + } + } + } + } + } +} + +typedef SmallDenseMap, Value *, 16> + ValueType2CastMap; +static Value *createBitOrPointerOrAddrSpaceCast(Value *V, Type *T, + ValueType2CastMap &VT2C) { + if (V->getType() == T) + return V; + + // Make sure we remember what casts we introduced + Value *&Entry = VT2C[std::make_pair(V, T)]; + if (Entry == nullptr) { + Instruction *InsertionPoint = cast(V); + if (auto *PHI = dyn_cast(V)) { + InsertionPoint = PHI->getParent()->getFirstNonPHI(); + } else { + InsertionPoint = InsertionPoint->getNextNode(); + } + + IRBuilder<> Builder(InsertionPoint); + Entry = Builder.CreateBitOrPointerCast(V, T); + } + return Entry; +} + +static bool isValidProvenanceNoAliasInsertionPlace(IntrinsicInst *SNA, + Value *InsertionPointV, + DominatorTree &DT) { + assert(SNA->getIntrinsicID() == Intrinsic::provenance_noalias && + "Expect a provenance.noalias"); + Instruction *InsertionPointI = dyn_cast(InsertionPointV); + if (InsertionPointI == nullptr) + return false; + + auto isDominatingOn = [&](Value *Arg) { + auto *ArgI = dyn_cast(Arg); + if (ArgI == nullptr) + return true; + return DT.dominates(ArgI, InsertionPointI); + }; + + for (auto Op : {Intrinsic::ProvenanceNoAliasNoAliasDeclArg, + Intrinsic::ProvenanceNoAliasIdentifyPArg, + Intrinsic::ProvenanceNoAliasIdentifyPProvenanceArg, + Intrinsic::ProvenanceNoAliasIdentifyPObjIdArg, + Intrinsic::ProvenanceNoAliasScopeArg}) { + if (!isDominatingOn(SNA->getOperand(Op))) + return false; + } + + return true; +} + +// combine llvm.provenance.noalias intrinsics as much as possible +void collapseProvenanceNoAlias( + ProvenanceWorklist &CollapseableProvenanceNoAliasIntrinsics, + DominatorTree &DT) { + if (CollapseableProvenanceNoAliasIntrinsics.empty()) + return; + + if (!CollapseableProvenanceNoAliasIntrinsics.empty()) { + // sweep from back to front, then from front to back etc... until no + // modifications are done + do { + LLVM_DEBUG(llvm::dbgs() + << "- Trying to collapse llvm.provenance.noalias\n"); + ProvenanceWorklist NextList; + bool Changed = false; + + // 1) provenance.noaliasA (provenance.noaliasB (....), ...) -> + // provenance.noaliasB(...) + while (!CollapseableProvenanceNoAliasIntrinsics.empty()) { + IntrinsicInst *I = + cast(CollapseableProvenanceNoAliasIntrinsics.back()); + assert(I->getIntrinsicID() == Intrinsic::provenance_noalias); + + CollapseableProvenanceNoAliasIntrinsics.pop_back(); + + // provenance.noalias (provenance.noalias(....), .... ) -> + // provenance.noalias(....) + if (IntrinsicInst *DepI = dyn_cast(I->getOperand(0))) { + // Check if the depending intrinsic is compatible) + if (DepI->getIntrinsicID() == Intrinsic::provenance_noalias && + areProvenanceNoAliasCompatible(DepI, I)) { + // similar enough - look through + LLVM_DEBUG(llvm::dbgs() << "-- Collapsing(1):" << *I << "\n"); + I->replaceAllUsesWith(DepI); + I->eraseFromParent(); + Changed = true; + continue; + } + } + + if (PHINode *DepI = dyn_cast(I->getOperand(0))) { + //@ FIXME: TODO: make more general ? + // provenance.noalias(PHI (fum, self)) -> PHI(provenance.noalias(fum), + // phi self ref) + // - NOTE: only handle the 'simple' case for now ! At least that will + // be correct. + if ((DepI->getNumIncomingValues() == 2) && + (DepI->getNumUses() == 1)) { + LLVM_DEBUG(llvm::dbgs() + << "--- Investigating interesting PHI depenceny\n"); + bool SelfDep0 = (DepI->getOperand(0) == I); + bool SelfDep1 = (DepI->getOperand(1) == I); + if (SelfDep0 || SelfDep1) { + LLVM_DEBUG(llvm::dbgs() << "---- has self dependency\n"); + unsigned ChannelToFollow = SelfDep0 ? 1 : 0; + // Try to find a possible insertion point + if (isValidProvenanceNoAliasInsertionPlace( + I, DepI->getOperand(ChannelToFollow), DT)) { + // create a new provenance.noalias at the insertion point + // FIXME: if DepDepI is not an instruction, we could take the + // end of the BB as insertion location ?? + LLVM_DEBUG(llvm::dbgs() << "----- Migrating !\n"); + Instruction *DepDepI = + cast(DepI->getOperand(ChannelToFollow)); + auto DepDepIIt = DepDepI->getIterator(); + if (isa(DepDepI)) { + DepDepIIt = DepDepI->getParent()->getFirstInsertionPt(); + } else { + ++DepDepIIt; + } + IRBuilder<> builder(DepDepI->getParent(), DepDepIIt); + + auto *NewSNA = builder.CreateProvenanceNoAliasPlain( + DepDepI, + I->getOperand(Intrinsic::ProvenanceNoAliasNoAliasDeclArg), + I->getOperand(Intrinsic::ProvenanceNoAliasIdentifyPArg), + I->getOperand( + Intrinsic::ProvenanceNoAliasIdentifyPProvenanceArg), + I->getOperand( + Intrinsic::ProvenanceNoAliasIdentifyPObjIdArg), + I->getOperand(Intrinsic::ProvenanceNoAliasScopeArg)); + AAMDNodes Metadata; + I->getAAMetadata(Metadata); + NewSNA->setAAMetadata(Metadata); + I->replaceAllUsesWith(NewSNA); + I->eraseFromParent(); + Changed = true; + // And handle the new provenance.noalias for the next sweep + NextList.push_back(NewSNA); + continue; + } + } + } + } + + NextList.push_back(I); + } + + // 2) provenance.noaliasA (...), provenance.noaliasB(...) --> + // provenance.noaliasA(...) + { + for (Instruction *I : NextList) { + IntrinsicInst *II = cast(I); + Instruction *DominatingUse = II; + + ProvenanceWorklist similarProvenances; + for (User *U : II->getOperand(0)->users()) { + if (IntrinsicInst *UII = dyn_cast(U)) { + if (UII->getParent() && // still valid - ignore already removed + // instructions + UII->getIntrinsicID() == Intrinsic::provenance_noalias && + areProvenanceNoAliasCompatible(II, UII)) { + similarProvenances.push_back(UII); + if (DT.dominates(UII, DominatingUse)) + DominatingUse = UII; + } + } + } + + for (Instruction *SI : similarProvenances) { + if ((SI != DominatingUse) && DT.dominates(DominatingUse, SI)) { + LLVM_DEBUG(llvm::dbgs() << "-- Collapsing(2):" << *SI << "\n"); + Changed = true; + SI->replaceAllUsesWith(DominatingUse); + SI->removeFromParent(); // do not yet erase ! + assert((std::find(NextList.begin(), NextList.end(), SI) != + NextList.end()) && + "Similar ptr_provenance must be on the NextList"); + } + } + } + + if (!Changed) + break; + + // Now eliminate all removed intrinsics + llvm::erase_if(NextList, [](Instruction *I) { + if (I->getParent()) { + return false; + } else { + I->deleteValue(); + return true; + } + }); + } + + CollapseableProvenanceNoAliasIntrinsics = NextList; + } while (CollapseableProvenanceNoAliasIntrinsics.size() > 1); + } +} + +// Look at users of llvm.provenance.noalias to find PHI nodes that are used for +// pointer provenance +static void +deduceProvenancePHIs(ProvenanceWorklist &ProvenanceNoAliasIntrinsics, + InstructionSet &out_ProvenancePHIs, + InstructionSet &out_NoAliasArgGuard, + BasicBlockSet &DeadBasicBlocks) { + LLVM_DEBUG(llvm::dbgs() << "-- Looking up ptr_provenance PHI nodes\n"); + for (Instruction *SNI : ProvenanceNoAliasIntrinsics) { + ProvenanceWorklist worklist = {SNI}; + while (!worklist.empty()) { + Instruction *worker = worklist.pop_back_val(); + LLVM_DEBUG(llvm::dbgs() << "worker" << *worker << "\n"); + if (DeadBasicBlocks.count(worker->getParent())) + continue; // Degenerated llvm-ir; Skip + for (auto *SNIUser_ : worker->users()) { + Instruction *SNIUser = dyn_cast(SNIUser_); + if (SNIUser == nullptr) + continue; + + if (isa(SNIUser)) { + // Identify as a ptr_provenance PHI + if (out_ProvenancePHIs.insert(cast(SNIUser)).second) { + LLVM_DEBUG(llvm::dbgs() << "--- " << *SNIUser << "\n"); + // and propagate + worklist.push_back(SNIUser); + } + } else if (isa(SNIUser) || isa(SNIUser) || + isa(SNIUser) || + isa(SNIUser)) { + assert(SNIUser != worker && "not in ssa form ?"); + // look through select/bitcast/addressspacecast + worklist.push_back(SNIUser); + } else { + // load/store/provenance.noalias/arg.guard -> stop looking + if (auto *CB = dyn_cast(SNIUser)) { + auto CBIID = CB->getIntrinsicID(); + if (CBIID == Intrinsic::noalias_arg_guard) { + assert(CB->getOperand(1) == worker && + "a noalias.arg.guard provenance should be linked to " + "operand 1"); + out_NoAliasArgGuard.insert(CB); + } else if (CBIID == Intrinsic::provenance_noalias) { + // ok + } else { + LLVM_DEBUG(llvm::dbgs() + << "ERROR: unexpected call/intrinsic depending on " + "llvm.provenance.noalias:" + << *CB << "\n"); + assert(false && + "Unexpected llvm.provenance.noalias dependency (1)"); + } + } else { + if (isa(SNIUser) || isa(SNIUser)) { + // ok + } else { + LLVM_DEBUG(llvm::dbgs() + << "ERROR: unexpected instruction depending on " + "llvm.provenance.noalias:" + << *SNIUser << "\n"); + assert(false && + "Unexpected llvm.provenance.noalias dependency (2)"); + } + } + } + } + } + } +} + +static void RetrieveDeadBasicBlocks(Function &F, + BasicBlockSet &out_DeadBasicBlocks) { + df_iterator_default_set Reachable; + + // Mark all reachable blocks. + for (BasicBlock *BB : depth_first_ext(&F, Reachable)) + (void)BB /* Mark all reachable blocks */; + + for (auto &BB : F) { + if (!Reachable.count(&BB)) { + out_DeadBasicBlocks.insert(&BB); + LLVM_DEBUG(llvm::dbgs() << "- Unreachable BB:" << BB.getName() << "\n"); + } + } + + LLVM_DEBUG(llvm::dbgs() << "- There are " << out_DeadBasicBlocks.size() + << " unreachable BB on a total of " + << F.getBasicBlockList().size() << "\n"); +} + +void removeNoAliasIntrinsicsFromDeadBlocks(BasicBlockSet &DeadBlocks) { + LLVM_DEBUG(llvm::dbgs() << "- removing NoAlias intrinsics from " + << DeadBlocks.size() << " dead blocks\n"); + ProvenanceWorklist ToBeRemoved; + + for (auto *BB : DeadBlocks) { + for (auto &I : *BB) { + if (auto CB = dyn_cast(&I)) { + switch (CB->getIntrinsicID()) { + case Intrinsic::noalias: + case Intrinsic::noalias_decl: + case Intrinsic::provenance_noalias: + case Intrinsic::noalias_arg_guard: + case Intrinsic::noalias_copy_guard: + ToBeRemoved.push_back(&I); + break; + default: + break; + } + } + } + } + + LLVM_DEBUG(llvm::dbgs() << "-- Removing " << ToBeRemoved.size() + << " intrinsics\n"); + for (auto *I : ToBeRemoved) { + I->replaceAllUsesWith(UndefValue::get(I->getType())); + I->eraseFromParent(); + } +} + +bool PropagateAndConvertNoAliasPass::doit(Function &F, DominatorTree &DT) { + LLVM_DEBUG(llvm::dbgs() << "PropagateAndConvertNoAliasPass:\n"); + + // PHASE 0: find interesting instructions + // - Find all: + // -- Propagatable noalias intrinsics + // -- Load instructions + // -- Store instructions + ProvenanceWorklist InstructionsForProvenance; + ProvenanceWorklist LoadStoreIntrinsicInstructions; + ProvenanceWorklist LookThroughIntrinsics; + ProvenanceWorklist CollapseableProvenanceNoAliasIntrinsics; + ValueType2CastMap VT2C; + InstructionSet ProvenancePHIs; + ProvenanceWorklist DegeneratedNoAliasAndNoAliasArgGuards; + ProvenanceWorklist RemainingNoAliasArgGuards; + InstructionSet DecentNoAliasArgGuards; + + // Do not depend on simplifyCFG or eliminateDeadBlocks. Forcing any of them + // before the propagate can result in significant code degradations :( + // Live with the fact that we can observe degenerated llvm-ir. + BasicBlockSet DeadBasicBlocks; + RetrieveDeadBasicBlocks(F, DeadBasicBlocks); + + LLVM_DEBUG(llvm::dbgs() << "- gathering intrinsics, stores, loads:\n"); + for (auto &BB : F) { + if (DeadBasicBlocks.count(&BB)) + continue; // Skip dead basic blocks + + for (auto &I : BB) { + if (auto CB = dyn_cast(&I)) { + auto ID = CB->getIntrinsicID(); + if (ID == Intrinsic::noalias) { + LLVM_DEBUG(llvm::dbgs() << "-- found intrinsic:" << I << "\n"); + auto Op0 = I.getOperand(0); + if (isa(Op0)) { + LLVM_DEBUG(llvm::dbgs() << "--- degenerated\n"); + DegeneratedNoAliasAndNoAliasArgGuards.push_back(&I); + } else { + InstructionsForProvenance.push_back(&I); + LoadStoreIntrinsicInstructions.push_back(&I); + LookThroughIntrinsics.push_back(&I); + } + } else if (ID == Intrinsic::noalias_arg_guard) { + LLVM_DEBUG(llvm::dbgs() << "-- found intrinsic:" << I << "\n"); + auto Op0 = I.getOperand(0); + auto Op1 = I.getOperand(1); + if (isa(Op0) || isa(Op1) || (Op0 == Op1)) { + LLVM_DEBUG(llvm::dbgs() << "--- degenerated\n"); + DegeneratedNoAliasAndNoAliasArgGuards.push_back(&I); + } else { + RemainingNoAliasArgGuards.push_back(&I); + } + } else if (ID == Intrinsic::provenance_noalias) { + CollapseableProvenanceNoAliasIntrinsics.push_back(&I); + } + } else if (auto LI = dyn_cast(&I)) { + LLVM_DEBUG(llvm::dbgs() << "-- found load:" << I << "\n"); + LoadStoreIntrinsicInstructions.push_back(LI); + } else if (auto SI = dyn_cast(&I)) { + LLVM_DEBUG(llvm::dbgs() << "-- found store:" << I << "\n"); + LoadStoreIntrinsicInstructions.push_back(SI); + } + } + } + + // When there are no noalias related intrinsics, don't do anything. + if (LookThroughIntrinsics.empty() && InstructionsForProvenance.empty() && + DegeneratedNoAliasAndNoAliasArgGuards.empty() && + CollapseableProvenanceNoAliasIntrinsics.empty() && + RemainingNoAliasArgGuards.empty()) { + LLVM_DEBUG(llvm::dbgs() << "- Nothing to do\n"); + return false; + } + + if (!DeadBasicBlocks.empty()) { + removeNoAliasIntrinsicsFromDeadBlocks(DeadBasicBlocks); + } + + LLVM_DEBUG( + llvm::dbgs() << "- Looking through degenerated llvm.noalias.arg.guard\n"); + for (Instruction *I : DegeneratedNoAliasAndNoAliasArgGuards) { + I->replaceAllUsesWith(I->getOperand(0)); + I->eraseFromParent(); + } + + LLVM_DEBUG(llvm::dbgs() << "- Retrieving ptr_provenance PHI nodes and decent " + "llvm.noalias.arg.guard\n"); + deduceProvenancePHIs(CollapseableProvenanceNoAliasIntrinsics, ProvenancePHIs, + DecentNoAliasArgGuards, DeadBasicBlocks); + + LLVM_DEBUG( + llvm::dbgs() << "- looking through remaining llvm.noalias.arg.guard"); + for (Instruction *I : RemainingNoAliasArgGuards) { + if (DecentNoAliasArgGuards.find(I) != DecentNoAliasArgGuards.end()) { + InstructionsForProvenance.push_back(I); + LoadStoreIntrinsicInstructions.push_back(I); + LookThroughIntrinsics.push_back(I); + } else { + I->replaceAllUsesWith(I->getOperand(0)); + I->eraseFromParent(); + } + } + + LLVM_DEBUG(llvm::dbgs() << "- Find out what to do:\n"); + + // PHASE 1: forward pass: + // - Start with all intrinsics + // -- Track all users + // -- Interesting users (noalias intrinsics, select, PHI, load/store) + // -- Do this recursively for users that we can look through + I2Deps Handled; // instruction -> { dependencies } + ProvenanceWorklist + CreationList; // Tracks all keys in Handled, but in a reproducable way + propagateInstructionsForProvenance(InstructionsForProvenance, Handled, + CreationList, ProvenancePHIs, + DeadBasicBlocks); + + // PHASE 2: add missing load/store/intrinsic instructions: + for (auto *I : LoadStoreIntrinsicInstructions) { + if (isa(I)) { + if (Handled.insert(I2Deps::value_type(I, {nullptr})).second) + CreationList.push_back(I); + } else { // Store or llvm.no_alias + if (Handled.insert(I2Deps::value_type(I, {nullptr, nullptr})).second) + CreationList.push_back(I); + } + } + +#if !defined(NDEBUG) + auto dumpit = [](I2Deps::value_type &H) { + auto &out = llvm::dbgs(); + out << *H.first << " -> {"; + bool comma = false; + for (auto D : H.second) { + if (comma) + out << ","; + comma = true; + if (D == nullptr) { + out << "nullptr"; + } else { + out << *D; + } + } + out << "}\n"; + }; +#endif + + // PHASE 3: reconstruct alternative tree + // - detected dependencies: replace them by new instructions + // - undetected dependencies: use the original dependency + // NOTE: See explanation in propagateInstructionsForProvenance for more + // information ! + LLVM_DEBUG(llvm::dbgs() << "- Reconstructing tree:\n"); + + ProvenanceWorklist UnresolvedPHI; + SmallDenseMap I2NewV; + SmallDenseMap I2ArgGuard; + + auto getNewIOrOperand = [&](Instruction *DepOp, Value *OrigOp) { + assert(((!DepOp) || I2NewV.count(DepOp)) && "DepOp should be known"); + return DepOp ? static_cast(I2NewV[DepOp]) : OrigOp; + }; + + // Helper lambda for inserting a new noalias.arg.guard + auto setNewNoaliasArgGuard = [&](Instruction *I, unsigned Index, + Instruction *DepOp) { + auto *ProvOp = cast(I2NewV[DepOp]); + // if we get here, the operand has to be an 'Instruction' + // (otherwise, DepOp would not be set). + auto *OpI = cast(I->getOperand(Index)); + auto &ArgGuard = I2ArgGuard[OpI]; + if (ArgGuard == nullptr) { + // create the instruction close to the origin, so that we don't introduce + // bad dependencies + auto InsertionPointIt = OpI->getIterator(); + ++InsertionPointIt; + if (isa(OpI)) { + auto End = OpI->getParent()->end(); + while (InsertionPointIt != End) { + if (!isa(*InsertionPointIt)) + break; + ++InsertionPointIt; + } + } + IRBuilder<> BuilderForArgs(OpI->getParent(), InsertionPointIt); + ArgGuard = BuilderForArgs.CreateNoAliasArgGuard( + OpI, createBitOrPointerOrAddrSpaceCast(ProvOp, OpI->getType(), VT2C), + OpI->getName() + ".guard"); + } + I->setOperand(Index, ArgGuard); + }; + + // Map known provenance.noalias that are not handle to themselves + for (auto SNI : CollapseableProvenanceNoAliasIntrinsics) + if (Handled.find(SNI) == Handled.end()) + I2NewV[SNI] = SNI; + + // We are doing a number of sweeps. This should always end. Normally the + // amount of sweeps is low. During initial development, a number of bugs where + // found by putting a hard limit on the the amount. + unsigned Watchdog = 1000000; // Only used in assertions build + (void)Watchdog; + for (auto CloneableInst : CreationList) { + assert(Handled.count(CloneableInst) && + "Entries in CreationList must also be in Handled"); + assert(!Handled[CloneableInst].empty() && + "Only non-empty items should be added to the CreationList"); + + LLVM_DEBUG(llvm::dbgs() << "- "; dumpit(*Handled.find(CloneableInst))); + ProvenanceWorklist Worklist = {CloneableInst}; + + while (!Worklist.empty()) { + Instruction *I = Worklist.back(); + + if (I2NewV.count(I)) { + // already exists - skip + Worklist.pop_back(); + continue; + } + + LLVM_DEBUG(llvm::dbgs() << "-- Reconstructing:" << *I << "\n"); + + // Check if we have all the needed arguments + auto HandledIt = Handled.find(I); + if (HandledIt == Handled.end()) { + // This can happen after propagation of a llvm.noalias.arg.guard + Worklist.pop_back(); + I2NewV[I] = I; + LLVM_DEBUG(llvm::dbgs() << "--- Connected to an existing path!\n"); + continue; + } + + // If we are a PHI node, just create it + if (isa(I)) { + if (ProvenancePHIs.count(cast(I)) == 0) { + // But only if it is _not_ a ptr_provenance PHI node + // ======================================== PHI -> { ..... } + IRBuilder<> Builder(I); + I2NewV[I] = Builder.CreatePHI(I->getType(), I->getNumOperands(), + Twine("prov.") + I->getName()); + + UnresolvedPHI.push_back(I); + } else { + I2NewV[I] = I; // Map already existing Provenance PHI to itself + } + Worklist.pop_back(); + continue; + } + + LLVM_DEBUG(llvm::dbgs() << "--- "; dumpit(*HandledIt)); + auto &Deps = HandledIt->second; + assert((!Deps.empty()) && + "Any creatable instruction must have some dependent operands"); + bool canCreateInstruction = true; + for (auto *DepOp : Deps) { + if (DepOp != nullptr) { + if (I2NewV.count(DepOp) == 0) { + canCreateInstruction = false; + Worklist.push_back(DepOp); + } + } + } +#if !defined(NDEBUG) + if (--Watchdog == 0) { + llvm::errs() + << "PropagateAndConvertNoAlias: ERROR: WATCHDOG TRIGGERED !\n"; + assert(false && "PropagateAndConvertNoAlias: WATCHDOG TRIGGERED"); + } +#endif + if (canCreateInstruction) { + Worklist.pop_back(); + IRBuilder<> Builder(I); + + if (isa(I)) { + // ======================================== select -> { lhs, rhs } + I2NewV[I] = Builder.CreateSelect( + I->getOperand(0), + createBitOrPointerOrAddrSpaceCast( + getNewIOrOperand(Deps[0], I->getOperand(1)), I->getType(), + VT2C), + createBitOrPointerOrAddrSpaceCast( + getNewIOrOperand(Deps[1], I->getOperand(2)), I->getType(), + VT2C), + Twine("prov.") + I->getName()); + } else if (isa(I)) { + // ======================================== load -> { ptr } + LoadInst *LI = cast(I); + + if (Deps[0]) { + if (!LI->hasNoaliasProvenanceOperand() || + isa(LI->getNoaliasProvenanceOperand()) || + (LI->getPointerOperand() == + LI->getNoaliasProvenanceOperand())) { + LI->setNoaliasProvenanceOperand(createBitOrPointerOrAddrSpaceCast( + I2NewV[Deps[0]], LI->getPointerOperandType(), VT2C)); + } else { + // nothing to do - propagation should have happend through the + // provenance ! + // TODO: we might want to add an extra check that the load + // ptr_provenance was updated + } + } else { + // No extra dependency -> do nothing + // Note: originally we were adding a 'UndefValue' if there was no + // ptr_provenance. But that has the same effect as doing nothing. + } + I2NewV[I] = I; + } else if (isa(I)) { + // ======================================== store -> { val, ptr } + StoreInst *SI = cast(I); + + if (Deps[0]) { + // We try to store a restrict pointer - restrictness + Instruction *DepOp = Deps[0]; + setNewNoaliasArgGuard(I, 0, DepOp); + } + if (Deps[1]) { + if (!SI->hasNoaliasProvenanceOperand() || + isa(SI->getNoaliasProvenanceOperand()) || + (SI->getPointerOperand() == + SI->getNoaliasProvenanceOperand())) { + SI->setNoaliasProvenanceOperand(createBitOrPointerOrAddrSpaceCast( + I2NewV[Deps[1]], SI->getPointerOperandType(), VT2C)); + } else { + // nothing to do - propagation should have happend through the + // provenance ! + // TODO: we might want to add an extra check that the store + // ptr_provenance was updated + } + } else { + // No extra dependency -> do nothing + // Note: originally we were adding a 'UndefValue' if there was no + // ptr_provenance. But that has the same effect as doing nothing. + } + I2NewV[I] = I; + } else if (isa(I)) { + // We try to insert a restrict pointer into a struct - track it. + // Track generated noalias_arg_guard also in I2NewI + assert(Deps.size() == 1 && + "InsertValue tracks exactly one dependency"); + Instruction *DepOp = Deps[0]; + setNewNoaliasArgGuard(I, 1, DepOp); + } else if (isa(I)) { + // We try to convert a restrict pointer to an integer - track it s + // SROA can produce this. + // Track generated noalias_arg_guard also in I2NewI + assert(Deps.size() == 1 && + "InsertValue tracks exactly one dependency"); + Instruction *DepOp = Deps[0]; + setNewNoaliasArgGuard(I, 0, DepOp); + } else { + // =============================== ret -> { ...... } + // =============================== call/invoke/intrinsic -> { ...... } + auto CB = dyn_cast(I); + if (CB) { + assert(CB && "If we get here, we should have a Call"); + switch (CB->getIntrinsicID()) { + case Intrinsic::noalias: { + // convert + assert(Deps.size() == 2); + Value *IdentifyPProvenance; + if (Deps[1]) { + // do the same as with the ptr_provenance in the load + // instruction + IdentifyPProvenance = createBitOrPointerOrAddrSpaceCast( + I2NewV[Deps[1]], + I->getOperand(Intrinsic::NoAliasIdentifyPArg)->getType(), + VT2C); + } else { + IdentifyPProvenance = UndefValue::get( + I->getOperand(Intrinsic::NoAliasIdentifyPArg)->getType()); + } + Instruction *NewI = Builder.CreateProvenanceNoAliasPlain( + getNewIOrOperand(Deps[0], I->getOperand(0)), + I->getOperand(Intrinsic::NoAliasNoAliasDeclArg), + I->getOperand(Intrinsic::NoAliasIdentifyPArg), + IdentifyPProvenance, + I->getOperand(Intrinsic::NoAliasIdentifyPObjIdArg), + I->getOperand(Intrinsic::NoAliasScopeArg)); + I2NewV[I] = NewI; + CollapseableProvenanceNoAliasIntrinsics.push_back(NewI); + + // Copy over metadata that is related to the 'getOperand(1)' (aka + // P) + AAMDNodes AAMetadata; + I->getAAMetadata(AAMetadata); + NewI->setAAMetadata(AAMetadata); + continue; + } + case Intrinsic::noalias_arg_guard: { + // no update needed - depending llvm.provenance.noalias/gep must + // have been updated + continue; + } + case Intrinsic::provenance_noalias: { + // update + assert((Deps[0] || Deps[1]) && + "provenance.noalias update needs a depending operand"); + if (Deps[0]) + I->setOperand(0, createBitOrPointerOrAddrSpaceCast( + I2NewV[Deps[0]], I->getType(), VT2C)); + if (Deps[1]) + I->setOperand( + Intrinsic::ProvenanceNoAliasIdentifyPProvenanceArg, + createBitOrPointerOrAddrSpaceCast( + I2NewV[Deps[1]], + I->getOperand(Intrinsic::ProvenanceNoAliasIdentifyPArg) + ->getType(), + VT2C)); + I2NewV[I] = I; + continue; + } + default: + break; + } + } else { + assert(isa(I)); + } + + // Introduce a noalias_arg_guard for every argument that is + // annotated + assert(I->getNumOperands() == Deps.size()); + for (unsigned i = 0, ci = I->getNumOperands(); i < ci; ++i) { + Instruction *DepOp = Deps[i]; + if (DepOp) { + setNewNoaliasArgGuard(I, i, DepOp); + } + } + I2NewV[I] = I; + } + } + } + } + + // Phase 4: resolve the generated PHI nodes + LLVM_DEBUG(llvm::dbgs() << "- Resolving " << UnresolvedPHI.size() + << " PHI nodes\n"); + for (auto *PHI_ : ProvenancePHIs) { + PHINode *PHI = cast(PHI_); + auto it = Handled.find(PHI); + if (it != Handled.end()) { + LLVM_DEBUG(llvm::dbgs() << "-- Orig PHI:" << *PHI << "\n"); + auto &Deps = it->second; + for (unsigned i = 0, ci = Deps.size(); i < ci; ++i) { + LLVM_DEBUG(if (Deps[i]) llvm::dbgs() + << "--- UPDATING:Deps:" << *Deps[i] << "\n"); + Value *IncomingValue = Deps[i] ? I2NewV[Deps[i]] : nullptr; + if (IncomingValue) { + if (IncomingValue->getType() != PHI->getType()) { + IncomingValue = createBitOrPointerOrAddrSpaceCast( + IncomingValue, PHI->getType(), VT2C); + } + LLVM_DEBUG(llvm::dbgs() + << "--- IncomingValue:" << *IncomingValue << "\n"); + PHI->setIncomingValue(i, IncomingValue); + } + } + LLVM_DEBUG(llvm::dbgs() << "-- Adapted PHI:" << *PHI << "\n"); + } + } + + for (auto &PHI : UnresolvedPHI) { + PHINode *BasePHI = cast(PHI); + PHINode *NewPHI = cast(I2NewV[PHI]); + auto &Deps = Handled[PHI]; + + LLVM_DEBUG(llvm::dbgs() << "-- Orig PHI:" << *BasePHI << "\n"); + LLVM_DEBUG(llvm::dbgs() << "-- New PHI:" << *NewPHI << "\n"); + LLVM_DEBUG(llvm::dbgs() << "-- Deps: " << Deps.size() << "\n"); + for (unsigned i = 0, ci = BasePHI->getNumOperands(); i < ci; ++i) { + auto *BB = BasePHI->getIncomingBlock(i); + Value *IncomingValue = + Deps[i] ? I2NewV[Deps[i]] : BasePHI->getIncomingValue(i); + if (IncomingValue == nullptr) { + LLVM_DEBUG(llvm::dbgs() + << "--- hmm.. operand " << i << " became undef\n"); + IncomingValue = UndefValue::get(NewPHI->getType()); + } + if (IncomingValue->getType() != NewPHI->getType()) { + IncomingValue = createBitOrPointerOrAddrSpaceCast( + IncomingValue, NewPHI->getType(), VT2C); + } + NewPHI->addIncoming(IncomingValue, BB); + } + } + + // Phase 5: Removing the llvm.noalias + LLVM_DEBUG(llvm::dbgs() << "- Looking through intrinsics:\n"); + for (Instruction *I : LookThroughIntrinsics) { + auto CB = dyn_cast(I); + if (CB->getIntrinsicID() == Intrinsic::noalias || + CB->getIntrinsicID() == Intrinsic::noalias_arg_guard) { + LLVM_DEBUG(llvm::dbgs() << "-- Eliminating: " << *I << "\n"); + I->replaceAllUsesWith(I->getOperand(0)); + I->eraseFromParent(); + } else { + llvm_unreachable("unhandled lookthrough intrinsic"); + } + } + + // Phase 6: Collapse llvm.provenance.noalias where possible... + // - hmm: should we do this as a complete separate pass ?? + collapseProvenanceNoAlias(CollapseableProvenanceNoAliasIntrinsics, DT); + + return true; +} + +} // namespace llvm Index: llvm/lib/Transforms/Scalar/SROA.cpp =================================================================== --- llvm/lib/Transforms/Scalar/SROA.cpp +++ llvm/lib/Transforms/Scalar/SROA.cpp @@ -117,6 +117,14 @@ namespace { +static llvm::IntrinsicInst *getBaseAsCopyGuardOrNull(llvm::Value *V) { + llvm::IntrinsicInst *II = dyn_cast(V->stripInBoundsOffsets()); + if (II && (II->getIntrinsicID() == Intrinsic::noalias_copy_guard)) + return II; + + return nullptr; +} + /// A custom IRBuilder inserter which prefixes all names, but only in /// Assert builds. class IRBuilderPrefixedInserter final : public IRBuilderDefaultInserter { @@ -649,12 +657,25 @@ /// Set to de-duplicate dead instructions found in the use walk. SmallPtrSet VisitedDeadInsts; + // llvm.noalias.copy.guard, offset + SmallVector, 4> PendingNoAliasCopyGuards; + public: SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS) : PtrUseVisitor(DL), AllocSize(DL.getTypeAllocSize(AI.getAllocatedType()).getFixedSize()), AS(AS) {} + PtrInfo visitPtrAndNoAliasCopyGuards(Instruction &I) { + PtrInfo PI = visitPtr(I); + if (!PI.isAborted()) { + for (auto &CGAndOffset : PendingNoAliasCopyGuards) + visitPendingNoAliasCopyGuard(*CGAndOffset.first, CGAndOffset.second); + } + + return PI; + } + private: void markAsDead(Instruction &I) { if (VisitedDeadInsts.insert(&I).second) @@ -773,6 +794,13 @@ assert((!LI.isSimple() || LI.getType()->isSingleValueType()) && "All simple FCA loads should have been pre-split"); + if (U->getOperandNo() == LI.getNoaliasProvenanceOperandIndex()) { + // Skip provenance + assert(LI.hasNoaliasProvenanceOperand() && + LI.getNoaliasProvenanceOperand() == *U); + return; + } + if (!IsOffsetKnown) return PI.setAborted(&LI); @@ -785,6 +813,13 @@ } void visitStoreInst(StoreInst &SI) { + if (U->getOperandNo() == SI.getNoaliasProvenanceOperandIndex()) { + // Skip provenance + assert(SI.hasNoaliasProvenanceOperand() && + SI.getNoaliasProvenanceOperand() == *U); + return; + } + Value *ValOp = SI.getValueOperand(); if (ValOp == *U) return PI.setEscapedAndAborted(&SI); @@ -816,6 +851,14 @@ assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) && "All simple FCA stores should have been pre-split"); handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile()); + + if (auto *LI = dyn_cast(SI.getValueOperand())) { + // When we get here, the Store is based on an AllocaInst. + // Make sure to track any source dependencies on a llvm.noalias.copy.guard + // when the source was not based on the AllocaInst. + rememberIfBasedOnNoAliasCopyGuard(LI->getPointerOperand(), + Offset.getZExtValue()); + } } void visitMemSetInst(MemSetInst &II) { @@ -914,6 +957,142 @@ // Check that we ended up with a valid index in the map. assert(AS.Slices[PrevIdx].getUse()->getUser() == &II && "Map index doesn't point back to a slice with this user."); + + // When we get here, one of the operands is based on an AllocaInst. + // Make sure to track any source dependencies on a llvm.noalias.copy.guard + // when the source was not based on the AllocaInst. + rememberIfBasedOnNoAliasCopyGuard(II.getSource(), Offset.getZExtValue()); + } + + void gatherValidNoAliasPointerOffsets( + IRBuilderTy &IRB, IntrinsicInst &II, + SmallVectorImpl> &PtrOffsetSizes, + SmallVectorImpl &Indices, + SmallVectorImpl &MinusOneIndices, uint64_t BaseOffset, + unsigned Index = 0) { + if (Index == MinusOneIndices.size()) { + // Check if the indices are compatible with the aggregate + auto PointeeType = + cast(II.getType()->getScalarType())->getElementType(); + auto LeafType = GetElementPtrInst::getIndexedType(PointeeType, Indices); + + if (LeafType == nullptr || !isa(LeafType)) { + LLVM_DEBUG(llvm::dbgs() << "gatherValidNoAliasPointerOffsets: " + "incompatible struct ? Is it a union?\n" + << II << "\n"); + return; + } + + // don't know how to compute the offsets without creating a GEP + GetElementPtrInst *GEP = + cast(IRB.CreateGEP(&II, Indices)); + assert(isa(GEP->getType()->getPointerElementType()) && + "noalias pointer is not a pointer?"); + APInt GEPOffset(DL.getPointerSizeInBits(), 0); + if (GEP->accumulateConstantOffset(DL, GEPOffset)) { + auto Offset = GEPOffset.getZExtValue(); + if (Offset >= BaseOffset && Offset < AllocSize) { + PtrOffsetSizes.push_back( + std::make_pair((unsigned)(Offset - BaseOffset), + (unsigned)DL.getTypeAllocSize( + GEP->getType()->getPointerElementType()))); + } + } + GEP->eraseFromParent(); + return; + } + + while (true) { + // Increment + ConstantInt *CI = cast(Indices[MinusOneIndices[Index]]); + Indices[MinusOneIndices[Index]] = + ConstantInt::get(CI->getType(), CI->getSExtValue() + 1, true); + + unsigned CurrentSize = PtrOffsetSizes.size(); + gatherValidNoAliasPointerOffsets(IRB, II, PtrOffsetSizes, Indices, + MinusOneIndices, BaseOffset, Index + 1); + if (CurrentSize == PtrOffsetSizes.size()) { + // no new entries - recurse back; prepare for next iteration + Indices[MinusOneIndices[Index]] = + ConstantInt::get(CI->getType(), -1, true); + break; + } + } + } + + void rememberIfBasedOnNoAliasCopyGuard(Value *V, uint64_t TheOffset) { + auto II = getBaseAsCopyGuardOrNull(V); + if (II != nullptr) { + for (auto CGOff : PendingNoAliasCopyGuards) { + if (CGOff.first == II && CGOff.second == TheOffset) + return; + } + + PendingNoAliasCopyGuards.emplace_back(II, TheOffset); + } + } + + void visitNoAliasCopyGuard(IntrinsicInst &II) { + assert(II.getIntrinsicID() == Intrinsic::noalias_copy_guard && + "We need a llvm.noalias.copy.guard here"); + rememberIfBasedOnNoAliasCopyGuard(&II, Offset.getZExtValue()); + enqueueUsers(II); + } + + void visitPendingNoAliasCopyGuard(IntrinsicInst &II, uint64_t BaseOffset) { + LLVM_DEBUG( + llvm::dbgs() + << "AllocaSlices::SliceBuilder: handling llvm.noalias.copy.guard:" << II + << ":@" << BaseOffset << ":" << AllocSize << "\n"); + // Identify the usage, so that it can be split + if (II.use_empty()) + return markAsDead(II); + + SmallVector, 4> PtrOffsetSizes; + + // Provide as many slices as we have restrict pointers + MDNode *CopyGuardIndices = + cast(cast( + II.getOperand(Intrinsic::NoAliasCopyGuardIndicesArg)) + ->getMetadata()); + + IRBuilderTy IRB(II.getNextNode()); + SmallVector Indices; + SmallVector MinusOneIndices; + for (const MDOperand &MDOp : CopyGuardIndices->operands()) { + if (const MDNode *IMD = dyn_cast(MDOp)) { + Indices.clear(); + MinusOneIndices.clear(); + + unsigned CGIndex = 0; + + for (const MDOperand &MDIndex : IMD->operands()) { + ConstantInt *C = + cast(cast(MDIndex)->getValue()); + if (C->isMinusOne()) // accept any index at this place + MinusOneIndices.push_back(CGIndex); + Indices.push_back(C); + ++CGIndex; + } + gatherValidNoAliasPointerOffsets(IRB, II, PtrOffsetSizes, Indices, + MinusOneIndices, BaseOffset); + } + } + + LLVM_DEBUG(llvm::dbgs() << "noalias pointers are at:\n"; + for (auto &P + : PtrOffsetSizes) { + llvm::dbgs() + << " - [" << P.first << "," << P.first + P.second << ")\n"; + }); + + U = &II.getOperandUse(0); + unsigned AS = II.getOperand(0)->getType()->getPointerAddressSpace(); + APInt TheBaseOffset(DL.getIndexSizeInBits(AS), BaseOffset); + for (auto &P : PtrOffsetSizes) { + APInt TheOffset = TheBaseOffset + P.first; + insertUse(II, TheOffset, P.second, false); + } } // Disable SRoA for any intrinsics except for lifetime invariants. @@ -930,6 +1109,52 @@ insertUse(II, Offset, Size, true); return; } + // look through noalias intrinsics + if (II.getIntrinsicID() == Intrinsic::noalias_decl) { + insertUse(II, Offset, AllocSize, true); + // do not enqueue direct users (?) They should be handled through a + // dependency on the original alloca + return; + } + if (II.getIntrinsicID() == Intrinsic::noalias) { + if (U->getOperandNo() == Intrinsic::NoAliasIdentifyPArg) { + insertUse(II, Offset, + DL.getTypeStoreSize( + II.getOperand(Intrinsic::NoAliasIdentifyPArg)->getType()), + false); + return; + } + if (U->getOperandNo() == 0) { + assert(II.getOperand(0) == *U); + // _only_ look through the first argument + enqueueUsers(II); + } + return; + } + if (II.getIntrinsicID() == Intrinsic::provenance_noalias) { + if (U->getOperandNo() == Intrinsic::ProvenanceNoAliasIdentifyPArg) { + insertUse(II, Offset, + DL.getTypeStoreSize( + II.getOperand(Intrinsic::ProvenanceNoAliasIdentifyPArg) + ->getType()), + false); + return; + } + // hmmm - do not look through the first argument for a + // llvm.provenance.noalias + return; + } + if (II.getIntrinsicID() == Intrinsic::noalias_arg_guard) { + if (U->getOperandNo() == 0) { + // _only_ look through the first argument + enqueueUsers(II); + } + return; + } + if (II.getIntrinsicID() == Intrinsic::noalias_copy_guard) { + visitNoAliasCopyGuard(II); + return; + } Base::visitIntrinsicInst(II); } @@ -1047,7 +1272,7 @@ #endif PointerEscapingInstr(nullptr) { SliceBuilder PB(DL, AI, *this); - SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI); + SliceBuilder::PtrInfo PtrI = PB.visitPtrAndNoAliasCopyGuards(AI); if (PtrI.isEscaped() || PtrI.isAborted()) { // FIXME: We should sink the escape vs. abort info into the caller nicely, // possibly by just storing the PtrInfo in the AllocaSlices. @@ -1107,6 +1332,18 @@ #endif // !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +static IntrinsicInst *partitionRepresentsNoAliasPointer(Partition &P) { + // A partition that has a 'llvm.noalias.copy.guard' use, represents a + // noalias pointer + for (auto &I : P) { + Use *U = I.getUse(); + if (auto *II = dyn_cast(U->getUser())) + if (II->getIntrinsicID() == Intrinsic::noalias_copy_guard) + return II; + } + return nullptr; +} + /// Walk the range of a partitioning looking for a common type to cover this /// sequence of slices. static Type *findCommonType(AllocaSlices::const_iterator B, @@ -1248,7 +1485,14 @@ LLVM_DEBUG(dbgs() << " original: " << PN << "\n"); LoadInst *SomeLoad = cast(PN.user_back()); - Type *LoadTy = SomeLoad->getType(); + if (SomeLoad->getPointerOperand() != &PN) { + // this must be the provenance -> ignore the speculation for now + LLVM_DEBUG(llvm::dbgs() << " not speculating dependency on provenance: " + << *SomeLoad << "\n"); + return; + } + + Type *LoadTy = cast(PN.getType())->getElementType(); IRBuilderTy PHIBuilder(&PN); PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(), PN.getName() + ".sroa.speculated"); @@ -2268,6 +2512,9 @@ const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset; Type *NewAllocaTy; + IntrinsicInst *OldNoAliasDecl = nullptr; + IntrinsicInst *NewNoAliasDecl = nullptr; + // This is a convenience and flag variable that will be null unless the new // alloca's integer operations should be widened to this integer type due to // passing isIntegerWideningViable above. If it is non-null, the desired @@ -2299,6 +2546,7 @@ uint64_t SliceSize = 0; bool IsSplittable = false; bool IsSplit = false; + bool RepresentsNoAlias = false; Use *OldUse = nullptr; Instruction *OldPtr = nullptr; @@ -2317,7 +2565,8 @@ uint64_t NewAllocaEndOffset, bool IsIntegerPromotable, VectorType *PromotableVecTy, SmallSetVector &PHIUsers, - SmallSetVector &SelectUsers) + SmallSetVector &SelectUsers, + bool ReprNoAlias) : DL(DL), AS(AS), Pass(Pass), OldAI(OldAI), NewAI(NewAI), NewAllocaBeginOffset(NewAllocaBeginOffset), NewAllocaEndOffset(NewAllocaEndOffset), @@ -2332,14 +2581,15 @@ ElementTy(VecTy ? VecTy->getElementType() : nullptr), ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy).getFixedSize() / 8 : 0), - PHIUsers(PHIUsers), SelectUsers(SelectUsers), - IRB(NewAI.getContext(), ConstantFolder()) { + RepresentsNoAlias(ReprNoAlias), PHIUsers(PHIUsers), + SelectUsers(SelectUsers), IRB(NewAI.getContext(), ConstantFolder()) { if (VecTy) { assert((DL.getTypeSizeInBits(ElementTy).getFixedSize() % 8) == 0 && "Only multiple-of-8 sized vector elements are viable"); ++NumVectorized; } assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy)); + prepareNoAliasDecl(); } bool visit(AllocaSlices::const_iterator I) { @@ -2362,7 +2612,7 @@ SliceSize = NewEndOffset - NewBeginOffset; OldUse = I->getUse(); - OldPtr = cast(OldUse->get()); + OldPtr = dyn_cast(OldUse->get()); Instruction *OldUserI = cast(OldUse->getUser()); IRB.SetInsertPoint(OldUserI); @@ -2878,6 +3128,77 @@ return !II.isVolatile(); } + Instruction *detachFromNoAliasCopyGuard(Instruction *DepPtr, + Instruction *NACG) { + assert(getBaseAsCopyGuardOrNull(DepPtr) == NACG && + "DepPtr must depend on NACG"); + + // Follow first arg until we hit the llvm.noalias.copy.guard + Instruction *Ptr = cast(DepPtr); + while (true) { + if (Ptr->getNumUses() != 1) + break; + Instruction *ParentPtr = cast(Ptr->getOperand(0)); + if (ParentPtr == NACG) { + // we got here with only single uses - just remove the dependency + Ptr->setOperand(0, NACG->getOperand(0)); + return DepPtr; + } + Ptr = ParentPtr; + } + + // Multiple use found, we should + assert(false && "Multiple use found - we should duplicate the chain"); + return DepPtr; + } + + Instruction *maybeIntroduceNoAlias(LoadInst *Load, Value *PtrOperand) { + if (!RepresentsNoAlias) + return Load; + + auto NACG = getBaseAsCopyGuardOrNull(PtrOperand); + if (NACG == nullptr) { + // strange, but could happen + LLVM_DEBUG(llvm::dbgs() + << "maybeIntroduceNoAlias: RepresentsNoAlias is true," + "but no copy.guard seen\n"); + return Load; + } + + Value *NoAliasDecl = + NACG->getOperand(Intrinsic::NoAliasCopyGuardNoAliasDeclArg); + + auto ScopeArg = NACG->getOperand(Intrinsic::NoAliasCopyGuardScopeArg); + + auto NewDepPtr = Load->getPointerOperand(); + if (auto NACG2 = getBaseAsCopyGuardOrNull(NewDepPtr)) { + assert(NACG2 == NACG && "llvm.noalias.copy.guard dep must be identical"); + NewDepPtr = detachFromNoAliasCopyGuard( + cast(Load->getPointerOperand()), NACG); + Load->setOperand(Load->getPointerOperandIndex(), NewDepPtr); + } + + if (Load->getType()->isPointerTy()) { + auto NoAlias = IRB.CreateNoAliasPointer( + Load, NoAliasDecl, NewDepPtr, ScopeArg, Load->getName() + ".noalias"); + if (NoAliasDecl == OldNoAliasDecl) { + assert(NewNoAliasDecl->getOperand(Intrinsic::NoAliasDeclScopeArg) == + ScopeArg && + "New llvm.noalias.decl must have same scope"); + NoAlias->setOperand( + Intrinsic::NoAliasIdentifyPObjIdArg, + NewNoAliasDecl->getOperand(Intrinsic::NoAliasDeclObjIdArg)); + } + + LLVM_DEBUG(llvm::dbgs() << " introduce: " << *NoAlias << "\n"); + + return NoAlias; + } + + assert(false && "Need PtrToInt ?"); + return Load; + } + bool visitMemTransferInst(MemTransferInst &II) { // Rewriting of memory transfer instructions can be a bit tricky. We break // them into two categories: split intrinsics and unsplit intrinsics. @@ -3035,12 +3356,14 @@ Value *Src; if (VecTy && !IsWholeAlloca && !IsDest) { + // FIXME: should we handle noalias annotations here ? Src = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, NewAI.getAlign(), "load"); Src = extractVector(IRB, Src, BeginIndex, EndIndex, "vec"); } else if (IntTy && !IsWholeAlloca && !IsDest) { - Src = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, - NewAI.getAlign(), "load"); + LoadInst *Load = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, + NewAI.getAlign(), "load"); + Src = maybeIntroduceNoAlias(Load, II.getSource()); Src = convertValue(DL, IRB, Src, IntTy); uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset; Src = extractInteger(DL, IRB, Src, SubIntTy, Offset, "extract"); @@ -3049,7 +3372,7 @@ II.isVolatile(), "copyload"); if (AATags) Load->setAAMetadata(AATags); - Src = Load; + Src = maybeIntroduceNoAlias(Load, II.getSource()); } if (VecTy && !IsWholeAlloca && IsDest) { @@ -3069,11 +3392,129 @@ IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile())); if (AATags) Store->setAAMetadata(AATags); - LLVM_DEBUG(dbgs() << " to: " << *Store << "\n"); + LLVM_DEBUG(dbgs() << "(3) to: " << *Store << "\n"); return !II.isVolatile(); } - bool visitIntrinsicInst(IntrinsicInst &II) { + void prepareNoAliasDecl() { + OldNoAliasDecl = nullptr; + NewNoAliasDecl = nullptr; + for (auto U : OldAI.users()) { + IntrinsicInst *II = dyn_cast(U); + if (II && II->getIntrinsicID() == Intrinsic::noalias_decl) { + if (OldNoAliasDecl) { + // We alreay found a llvm.noalias.decl - leave it up to the visiter to + // propagate + OldNoAliasDecl = nullptr; + NewNoAliasDecl = nullptr; + break; + } + IRB.SetInsertPoint(II); + IRB.SetCurrentDebugLocation(II->getDebugLoc()); + IRB.getInserter().SetNamePrefix(Twine(NewAI.getName()) + + ".noalias.decl."); + + OldNoAliasDecl = II; + LLVM_DEBUG(dbgs() << "Found llvm.noalias.decl: " << *II << "\n"); + ConstantInt *OldId = cast( + II->getArgOperand(Intrinsic::NoAliasDeclObjIdArg)); + NewNoAliasDecl = cast(IRB.CreateNoAliasDeclaration( + &NewAI, NewAllocaBeginOffset + OldId->getZExtValue(), + II->getArgOperand(2))); + LLVM_DEBUG(dbgs() << "New llvm.noalias.decl: " << *NewNoAliasDecl + << "\n"); + // continue - it is possible we see multiple llvm.noalias.decl! + } + } + } + + bool visitNoAliasDeclIntrinsicInst(IntrinsicInst &II) { + assert(II.getIntrinsicID() == Intrinsic::noalias_decl); + LLVM_DEBUG(dbgs() << " original: " << II << "\n"); + Value *New; + if (OldNoAliasDecl) { + assert(OldNoAliasDecl == &II); + assert(NewNoAliasDecl != nullptr); + New = NewNoAliasDecl; + } else { + assert(NewNoAliasDecl == nullptr); + ConstantInt *OldId = + cast(II.getArgOperand(Intrinsic::NoAliasDeclObjIdArg)); + New = cast(IRB.CreateNoAliasDeclaration( + &NewAI, NewAllocaBeginOffset + OldId->getZExtValue(), + II.getArgOperand(2))); + } + (void)New; + LLVM_DEBUG(dbgs() << " to: " << *New << "\n"); + + // Record this instruction for deletion. + Pass.DeadInsts.insert(&II); + + // nothing else to do - preparation was already done + return true; + } + + bool visitProvenanceNoAliasIntrinsicInst(IntrinsicInst &II) { + assert(II.getIntrinsicID() == Intrinsic::provenance_noalias); + assert(II.getArgOperand(Intrinsic::ProvenanceNoAliasIdentifyPArg) == + OldPtr); + LLVM_DEBUG(dbgs() << " original: " << II << "\n"); + if (II.getArgOperand(Intrinsic::ProvenanceNoAliasNoAliasDeclArg) == + OldNoAliasDecl) { + assert(OldNoAliasDecl && NewNoAliasDecl && + "If we get here, we must have an old and a new llvm.noalias.decl"); + II.setArgOperand(Intrinsic::ProvenanceNoAliasNoAliasDeclArg, + NewNoAliasDecl); + } + II.setArgOperand( + Intrinsic::ProvenanceNoAliasIdentifyPArg, + getNewAllocaSlicePtr( + IRB, II.getArgOperand(Intrinsic::ProvenanceNoAliasIdentifyPArg) + ->getType())); + if (NewAllocaBeginOffset > 0) { + Value *OldObjIdV = + II.getArgOperand(Intrinsic::ProvenanceNoAliasIdentifyPObjIdArg); + auto NewObjId = ConstantInt::get( + OldObjIdV->getType(), + cast(OldObjIdV)->getZExtValue() + NewAllocaBeginOffset); + II.setArgOperand(Intrinsic::ProvenanceNoAliasIdentifyPObjIdArg, NewObjId); + } + LLVM_DEBUG(dbgs() << " to: " << II << "\n"); + deleteIfTriviallyDead(OldPtr); + return true; + } + + bool visitNoAliasIntrinsicInst(IntrinsicInst &II) { + assert(II.getIntrinsicID() == Intrinsic::noalias); + assert(II.getArgOperand(Intrinsic::NoAliasIdentifyPArg) == OldPtr); + LLVM_DEBUG(dbgs() << " original: " << II << "\n"); + if (II.getArgOperand(Intrinsic::NoAliasNoAliasDeclArg) == OldNoAliasDecl) { + assert(OldNoAliasDecl && NewNoAliasDecl && + "If we get here, we must have an old and a new llvm.noalias.decl"); + II.setArgOperand(Intrinsic::NoAliasNoAliasDeclArg, NewNoAliasDecl); + } + II.setArgOperand( + Intrinsic::NoAliasIdentifyPArg, + getNewAllocaSlicePtr( + IRB, II.getArgOperand(Intrinsic::NoAliasIdentifyPArg)->getType())); + if (NewAllocaBeginOffset > 0) { + Value *OldObjIdV = II.getArgOperand(Intrinsic::NoAliasIdentifyPObjIdArg); + auto NewObjId = ConstantInt::get( + OldObjIdV->getType(), + cast(OldObjIdV)->getZExtValue() + NewAllocaBeginOffset); + II.setArgOperand(Intrinsic::NoAliasIdentifyPObjIdArg, NewObjId); + } + LLVM_DEBUG(dbgs() << " to: " << II << "\n"); + deleteIfTriviallyDead(OldPtr); + return true; + } + + bool visitNoAliasCopyGuardIntrinsicInst(IntrinsicInst &II) { + assert(II.getIntrinsicID() == Intrinsic::noalias_copy_guard); + return true; + } + + bool visitLifetimeIntrinsicInst(IntrinsicInst &II) { assert(II.isLifetimeStartOrEnd()); LLVM_DEBUG(dbgs() << " original: " << II << "\n"); assert(II.getArgOperand(1) == OldPtr); @@ -3111,6 +3552,25 @@ return true; } + bool visitIntrinsicInst(IntrinsicInst &II) { + switch (II.getIntrinsicID()) { + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + return visitLifetimeIntrinsicInst(II); + case Intrinsic::noalias_decl: + return visitNoAliasDeclIntrinsicInst(II); + case Intrinsic::noalias: + return visitNoAliasIntrinsicInst(II); + case Intrinsic::provenance_noalias: + return visitProvenanceNoAliasIntrinsicInst(II); + case Intrinsic::noalias_copy_guard: + return visitNoAliasCopyGuardIntrinsicInst(II); + default: + assert(false && "SROA: SliceRewriter: unhandled intrinsic"); + return false; + } + } + void fixLoadStoreAlign(Instruction &Root) { // This algorithm implements the same visitor loop as // hasUnsafePHIOrSelectUse, and fixes the alignment of each load @@ -3203,6 +3663,264 @@ namespace { +// Returns true if the indicices of the provided GEP are compatible with +// the indices in the llvm.noalias.copy.guard. +// - treatMissingIndicesAsZero: if the number of indices from the GEP +// is smaller, treat the missing indices as zero +// Note: A nullptr GEP can be combined with treatMissingIndicesAsZero=true +static bool +areGepIndicesCompatibleWithCopyGuard(GetElementPtrInst *GEP, + llvm::Instruction *CopyGuardII, + bool treatMissingIndicesAsZero = false) { + assert(CopyGuardII && "We need a llvm.noalias.copy.guard"); + + MDNode *CopyGuardIndices = cast( + cast( + CopyGuardII->getOperand(Intrinsic::NoAliasCopyGuardIndicesArg)) + ->getMetadata()); + for (const MDOperand &MDOp : CopyGuardIndices->operands()) { + if (const MDNode *IMD = dyn_cast(MDOp)) { + unsigned CGIndex = 0; + bool IndicesAreCompatible = true; + if (GEP) { + if (IMD->getNumOperands() < GEP->getNumIndices()) + continue; + for (Value *Index : GEP->indices()) { + const MDOperand &MDIndex = IMD->getOperand(CGIndex); + ++CGIndex; + ConstantInt *C_lhs = + cast(cast(MDIndex)->getValue()); + if (C_lhs->isMinusOne()) + continue; // accept any index at this place + ConstantInt *C_rhs = dyn_cast(Index); + if ((C_rhs == nullptr) || + (C_lhs->getSExtValue() != + C_rhs->getSExtValue())) { // compare int64 - the ConstantInt can + // have different types + IndicesAreCompatible = false; + break; + } + } + if (!IndicesAreCompatible) + continue; + } + // If there are more indices, check that they are zero + unsigned CGMaxIndex = IMD->getNumOperands(); + if (treatMissingIndicesAsZero) { + for (; CGIndex < CGMaxIndex; ++CGIndex) { + const MDOperand &MDIndex = IMD->getOperand(CGIndex); + ConstantInt *C_lhs = + cast(cast(MDIndex)->getValue()); + if (C_lhs->isZero() || C_lhs->isMinusOne()) + continue; // accept 0 or any index at this place + // otherwise, we do not have all-zero indices + IndicesAreCompatible = false; + break; + } + } else { + IndicesAreCompatible = (CGIndex == CGMaxIndex); + } + if (IndicesAreCompatible) { + return true; + } + } + } + return false; +} + +static Type *GetZeroIndexLeafType(Type *TypeToLoad) { + while (true) { + if (StructType *ST = dyn_cast(TypeToLoad)) { + TypeToLoad = ST->getElementType(0); + continue; + } + if (ArrayType *AT = dyn_cast(TypeToLoad)) { + TypeToLoad = AT->getElementType(); + continue; + } + if (VectorType *VT = dyn_cast(TypeToLoad)) { + TypeToLoad = VT->getElementType(); + continue; + } + break; + } + assert(TypeToLoad->isPointerTy() && "Only pointers can have noalias info"); + + return TypeToLoad; +} + +static void detachIfSingleUse(Value *What, Instruction *NACG) { + // Be on the safe side + Instruction *I = dyn_cast(What); + if (I && I->getNumUses() == 1 && I->getOperand(0) == NACG) { + I->setOperand(0, NACG->getOperand(0)); + } +} + +// Check if the load corresponds to a restrict pointer, as specified in the +// CopyGuard information. +// If so, add and return 'llvm.noalias' before the load. If the original load +// needs to be replaced, due to bitcasts, it is returned through the 'Load' +// argument. +static llvm::Instruction *introduceNoAliasWhenCopyGuardIndicesAreCompatible( + llvm::LoadInst *Load, llvm::Instruction *CopyGuardII, const DataLayout &DL, + SmallVector *TrackSliceUses = nullptr) { + Value *PtrOp = Load->getPointerOperand(); + + if (TrackSliceUses) + TrackSliceUses->push_back( + &Load->getOperandUse(Load->getPointerOperandIndex())); + + if (CopyGuardII == nullptr) + return Load; + + // Possible cases: + // 1) load ( gep ( CopyGuard) ) + if (GetElementPtrInst *GEP = dyn_cast(PtrOp)) { + if (areGepIndicesCompatibleWithCopyGuard(GEP, CopyGuardII)) { + IRBuilderTy IRB(Load->getNextNode()); + + detachIfSingleUse(GEP, CopyGuardII); + + // A compatible set of indices was found - introduce a noalias intrinsic + // FIXME: what AAMetadata should we put on the llvm.noalias ? + auto NoAlias = IRB.CreateNoAliasPointer( + Load, + CopyGuardII->getOperand(Intrinsic::NoAliasCopyGuardNoAliasDeclArg), + GEP, CopyGuardII->getOperand(Intrinsic::NoAliasCopyGuardScopeArg), + Load->getName() + ".noalias"); + // juggle around + Load->replaceAllUsesWith(NoAlias); + NoAlias->setOperand(0, Load); + LLVM_DEBUG(llvm::dbgs() + << " - compatible, introduced:" << *NoAlias << "\n"); + + if (TrackSliceUses) + TrackSliceUses->push_back( + &NoAlias->getOperandUse(Intrinsic::NoAliasIdentifyPArg)); + + return NoAlias; + } + + return Load; + } + + if (BitCastInst *BCI = dyn_cast(PtrOp)) { + // We want to pass it as an integer type or pointer + if (!(Load->getType()->isIntegerTy() || Load->getType()->isPointerTy())) { + LLVM_DEBUG(llvm::dbgs() + << " ** copy.guard: ignoring non integer or pointer:" << *Load + << "\n"); + return Load; + } + + bool IsLoadOfInteger = Load->getType()->isIntegerTy(); + + // 2) load iXX (bitcast (gep (CopyGuard))) + // load pXX (bitcast (gep (CopyGuard))) + // 3) load iXX (bitcast (CopyGuard)) + // load pXX (bitcast (CopyGuard)) + Value *BCIOp = BCI->getOperand(0); + GetElementPtrInst *GEP = dyn_cast(BCI->getOperand(0)); + if (GEP || (BCIOp == CopyGuardII)) { + Type *TypeToLoad = BCIOp->getType()->getPointerElementType(); + + // Also handles a null GEP + if (!areGepIndicesCompatibleWithCopyGuard(GEP, CopyGuardII, true)) { + return Load; + } + + TypeToLoad = GetZeroIndexLeafType(TypeToLoad); + + // Sizes must be identical + if (DL.getTypeStoreSizeInBits(TypeToLoad) != + DL.getTypeStoreSizeInBits(Load->getType())) { + LLVM_DEBUG(llvm::dbgs() << " ** copy.guard: type sizes do not match\n"); + return Load; + } + + IRBuilderTy IRB(Load->getNextNode()); + + if (GEP) { + detachIfSingleUse(GEP, CopyGuardII); + } else { + // Look through BCIop == CopyGuardII + BCIOp = CopyGuardII->getOperand(0); + } + + LLVM_DEBUG( + llvm::dbgs() + << (GEP ? " - compatible bitcast(gep(copy.guard)), introduced:\n" + : " - compatible bitcast(copy.guard), introduced:\n")); + + LoadInst *NewLoad = Load; + Value *NewPtr = BCIOp; + if (IsLoadOfInteger) { + NewPtr = IRB.CreatePointerBitCastOrAddrSpaceCast( + BCIOp, TypeToLoad->getPointerTo()); + + if (NewPtr != BCIOp) { + LLVM_DEBUG(llvm::dbgs() << " -- " << *NewPtr << "\n"); + } + + NewLoad = IRB.CreateAlignedLoad(TypeToLoad, NewPtr, Load->getAlign(), + Load->getName() + ".sroa_as_ptr"); + AAMDNodes AATags; + Load->getAAMetadata(AATags); + NewLoad->setAAMetadata(AATags); + LLVM_DEBUG(llvm::dbgs() << " -- " << *NewLoad << "\n"); + } + + // A compatible set of indices was found - introduce a noalias intrinsic + // FIXME: what AAMetadata should we put on the llvm.noalias ? + auto NoAlias = IRB.CreateNoAliasPointer( + NewLoad, + CopyGuardII->getOperand(Intrinsic::NoAliasCopyGuardNoAliasDeclArg), + NewPtr, CopyGuardII->getOperand(Intrinsic::NoAliasCopyGuardScopeArg), + NewLoad->getName() + ".noalias"); + + LLVM_DEBUG(llvm::dbgs() << " -- " << *NoAlias << "\n"); + Value *RetVal; + if (IsLoadOfInteger) { + auto PtrCast = IRB.CreatePtrToInt(NoAlias, Load->getType(), + Load->getName() + ".sroa_as_int"); + + LLVM_DEBUG(llvm::dbgs() << " -- " << *PtrCast << "\n"); + + // juggle around + Load->replaceAllUsesWith(PtrCast); + Load->eraseFromParent(); + + RetVal = PtrCast; + } else { + Load->replaceAllUsesWith(NoAlias); + + RetVal = NoAlias; + } + + if (BCI->use_empty()) + BCI->eraseFromParent(); + + if (TrackSliceUses) { + TrackSliceUses->back() = + &NewLoad->getOperandUse(NewLoad->getPointerOperandIndex()); + TrackSliceUses->push_back( + &NoAlias->getOperandUse(Intrinsic::NoAliasIdentifyPArg)); + } + + return cast(RetVal); + } + + LLVM_DEBUG(llvm::dbgs() + << " ** copy.guard: unhandled bitcast:" << BCI << "\n"); + return Load; + } + + LLVM_DEBUG(llvm::dbgs() << "copy.guard: unhandled:" << Load << "\n"); + // unhandled other situation + return Load; +} + /// Visitor to rewrite aggregate loads and stores as scalar. /// /// This pass aggressively rewrites all aggregate loads and stores on @@ -3211,6 +3929,7 @@ class AggLoadStoreRewriter : public InstVisitor { // Befriend the base class so it can delegate to private visit methods. friend class InstVisitor; + typedef InstVisitor Base; /// Queue of pointer uses to analyze and potentially rewrite. SmallVector Queue; @@ -3345,40 +4064,69 @@ struct LoadOpSplitter : public OpSplitter { AAMDNodes AATags; + Instruction *CopyGuardII = nullptr; + unsigned CGIIndex = 0; LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy, - AAMDNodes AATags, Align BaseAlign, const DataLayout &DL) + AAMDNodes AATags, Align BaseAlign, const DataLayout &DL, + Instruction *CopyGuardII_) : OpSplitter(InsertionPoint, Ptr, BaseTy, BaseAlign, DL), - AATags(AATags) {} + AATags(AATags), CopyGuardII(CopyGuardII_) {} /// Emit a leaf load of a single value. This is called at the leaves of the /// recursive emission to actually load values. void emitFunc(Type *Ty, Value *&Agg, Align Alignment, const Twine &Name) { assert(Ty->isSingleValueType()); // Load the single value and insert it using the indices. + auto Ptr = this->Ptr; // Make sure _NOT_ to overwrite the Ptr member + if (CopyGuardII) { + assert(CopyGuardII == Ptr && "Ptr != CopyGuardII ???"); + Ptr = CopyGuardII->getOperand(0); // look through noalias.copy.guard + } Value *GEP = IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep"); - LoadInst *Load = + Instruction *PValue; + LoadInst *PLoad = IRB.CreateAlignedLoad(Ty, GEP, Alignment, Name + ".load"); if (AATags) - Load->setAAMetadata(AATags); - Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert"); - LLVM_DEBUG(dbgs() << " to: " << *Load << "\n"); + PLoad->setAAMetadata(AATags); + PValue = introduceNoAliasWhenCopyGuardIndicesAreCompatible( + PLoad, CopyGuardII, DL); + + Agg = IRB.CreateInsertValue(Agg, PValue, Indices, Name + ".insert"); + LLVM_DEBUG(dbgs() << " to: " << *PValue << "\n"); } }; bool visitLoadInst(LoadInst &LI) { - assert(LI.getPointerOperand() == *U); - if (!LI.isSimple() || LI.getType()->isSingleValueType()) + if (U->getOperandNo() == LI.getNoaliasProvenanceOperandIndex()) { + // Skip provenance + assert(LI.hasNoaliasProvenanceOperand() && + LI.getNoaliasProvenanceOperand() == *U); return false; + } + assert(LI.getPointerOperand() == *U); + Instruction *CopyGuardII = getBaseAsCopyGuardOrNull(LI.getPointerOperand()); + if (CopyGuardII) { + LLVM_DEBUG(llvm::dbgs() << " Replacing Load:" << LI + << "\n" + " Depends on:" + << *CopyGuardII << "\n"); + } + if (!LI.isSimple() || LI.getType()->isSingleValueType()) { + LoadInst *PLI = &LI; + auto Load = introduceNoAliasWhenCopyGuardIndicesAreCompatible( + PLI, CopyGuardII, DL); + return (Load != PLI); + } // We have an aggregate being loaded, split it apart. LLVM_DEBUG(dbgs() << " original: " << LI << "\n"); AAMDNodes AATags; LI.getAAMetadata(AATags); LoadOpSplitter Splitter(&LI, *U, LI.getType(), AATags, - getAdjustedAlignment(&LI, 0), DL); + getAdjustedAlignment(&LI, 0), DL, CopyGuardII); Value *V = UndefValue::get(LI.getType()); Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca"); Visited.erase(&LI); @@ -3423,6 +4171,16 @@ // We have an aggregate being stored, split it apart. LLVM_DEBUG(dbgs() << " original: " << SI << "\n"); + + if (auto *LI = dyn_cast(SI.getValueOperand())) { + // Try to split up the depending load, helpful for tracking noalias info + if (Visited.insert(LI).second) { + LLVM_DEBUG(llvm::dbgs() + << " - Forcing split of of StoreInst value operand\n"); + Queue.push_back(&LI->getOperandUse(LI->getPointerOperandIndex())); + } + } + AAMDNodes AATags; SI.getAAMetadata(AATags); StoreOpSplitter Splitter(&SI, *U, V->getType(), AATags, @@ -3438,6 +4196,30 @@ return false; } + // Look through noalias intrinsics + bool visitIntrinsicInst(IntrinsicInst &II) { + if (II.getIntrinsicID() == Intrinsic::noalias) { + if (II.getOperand(0) == *U) { + enqueueUsers(II); + } + return false; + } + if (II.getIntrinsicID() == Intrinsic::provenance_noalias || + II.getIntrinsicID() == Intrinsic::noalias_decl) { + return false; + } + if (II.getIntrinsicID() == Intrinsic::noalias_copy_guard) { + LLVM_DEBUG(llvm::dbgs() + << "AggLoadStoreRewriter: handling llvm.noalias.copy.guard:" + << (II.getOperand(0) == *U) << ":" << II << "\n"); + if (II.getOperand(0) == *U) + enqueueUsers(II); + return false; + } + + return Base::visitIntrinsicInst(II); + } + bool visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) { enqueueUsers(ASC); return false; @@ -3946,8 +4728,8 @@ // First, we rewrite all of the split loads, and just accumulate each split // load in a parallel structure. We also build the slices for them and append // them to the alloca slices. - SmallDenseMap, 1> SplitLoadsMap; - std::vector SplitLoads; + SmallDenseMap, 1> SplitLoadsMap; + std::vector SplitLoads; const DataLayout &DL = AI.getModule()->getDataLayout(); for (LoadInst *LI : Loads) { SplitLoads.clear(); @@ -3966,6 +4748,8 @@ Instruction *BasePtr = cast(LI->getPointerOperand()); IRB.SetInsertPoint(LI); + Instruction *CopyGuardII = getBaseAsCopyGuardOrNull(BasePtr); + LLVM_DEBUG(dbgs() << " Splitting load: " << *LI << "\n"); uint64_t PartOffset = 0, PartSize = Offsets.Splits.front(); @@ -3984,18 +4768,23 @@ PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access, LLVMContext::MD_access_group}); + SmallVector UsesToTrack; + auto *PValue = introduceNoAliasWhenCopyGuardIndicesAreCompatible( + PLoad, CopyGuardII, DL, &UsesToTrack); + // Append this load onto the list of split loads so we can find it later // to rewrite the stores. - SplitLoads.push_back(PLoad); + SplitLoads.push_back(PValue); // Now build a new slice for the alloca. - NewSlices.push_back( - Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize, - &PLoad->getOperandUse(PLoad->getPointerOperandIndex()), - /*IsSplittable*/ false)); - LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset() - << ", " << NewSlices.back().endOffset() - << "): " << *PLoad << "\n"); + for (Use *PUse : UsesToTrack) { + NewSlices.push_back(Slice(BaseOffset + PartOffset, + BaseOffset + PartOffset + PartSize, PUse, + /*IsSplittable*/ false)); + LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset() + << ", " << NewSlices.back().endOffset() + << "): " << *PUse->getUser() << "\n"); + } // See if we've handled all the splits. if (Idx >= Size) @@ -4026,7 +4815,7 @@ LLVM_DEBUG(dbgs() << " Splitting store of load: " << *SI << "\n"); for (int Idx = 0, Size = SplitLoads.size(); Idx < Size; ++Idx) { - LoadInst *PLoad = SplitLoads[Idx]; + auto *PLoad = SplitLoads[Idx]; uint64_t PartOffset = Idx == 0 ? 0 : Offsets.Splits[Idx - 1]; auto *PartPtrTy = PLoad->getType()->getPointerTo(SI->getPointerAddressSpace()); @@ -4088,13 +4877,14 @@ "Cannot represent alloca access size using 64-bit integers!"); Value *LoadBasePtr = LI->getPointerOperand(); + Instruction *CopyGuardII = getBaseAsCopyGuardOrNull(LoadBasePtr); Instruction *StoreBasePtr = cast(SI->getPointerOperand()); LLVM_DEBUG(dbgs() << " Splitting store: " << *SI << "\n"); // Check whether we have an already split load. auto SplitLoadsMapI = SplitLoadsMap.find(LI); - std::vector *SplitLoads = nullptr; + std::vector *SplitLoads = nullptr; if (SplitLoadsMapI != SplitLoadsMap.end()) { SplitLoads = &SplitLoadsMapI->second; assert(SplitLoads->size() == Offsets.Splits.size() + 1 && @@ -4111,19 +4901,21 @@ auto *StorePartPtrTy = PartTy->getPointerTo(SI->getPointerAddressSpace()); // Either lookup a split load or create one. - LoadInst *PLoad; + Instruction *PLoad; if (SplitLoads) { PLoad = (*SplitLoads)[Idx]; } else { IRB.SetInsertPoint(LI); auto AS = LI->getPointerAddressSpace(); - PLoad = IRB.CreateAlignedLoad( + LoadInst *NewPLoad = IRB.CreateAlignedLoad( PartTy, getAdjustedPtr(IRB, DL, LoadBasePtr, APInt(DL.getIndexSizeInBits(AS), PartOffset), LoadPartPtrTy, LoadBasePtr->getName() + "."), getAdjustedAlignment(LI, PartOffset), /*IsVolatile*/ false, LI->getName()); + PLoad = introduceNoAliasWhenCopyGuardIndicesAreCompatible( + NewPLoad, CopyGuardII, DL); } // And store this partition. @@ -4235,6 +5027,7 @@ // or an i8 array of an appropriate size. Type *SliceTy = nullptr; const DataLayout &DL = AI.getModule()->getDataLayout(); + auto RepresentsNoAlias = (partitionRepresentsNoAliasPointer(P) != nullptr); if (Type *CommonUseTy = findCommonType(P.begin(), P.end(), P.endOffset())) if (DL.getTypeAllocSize(CommonUseTy).getFixedSize() >= P.size()) SliceTy = CommonUseTy; @@ -4257,6 +5050,14 @@ if (VecTy) SliceTy = VecTy; + if (RepresentsNoAlias && !SliceTy->isPointerTy()) { + if (DL.getTypeStoreSizeInBits(SliceTy) == + DL.getTypeStoreSizeInBits(SliceTy->getPointerTo())) { + // a restrict pointer must be a pointer + SliceTy = SliceTy->getPointerTo(); + } + } + // Check for the case where we're going to rewrite to a new alloca of the // exact same type as the original, and with the same access offsets. In that // case, re-use the existing alloca, but still run through the rewriter to @@ -4298,7 +5099,7 @@ AllocaSliceRewriter Rewriter(DL, AS, *this, AI, *NewAI, P.beginOffset(), P.endOffset(), IsIntegerPromotable, VecTy, - PHIUsers, SelectUsers); + PHIUsers, SelectUsers, RepresentsNoAlias); bool Promotable = true; for (Slice *S : P.splitSliceTails()) { Promotable &= Rewriter.visit(S); @@ -4549,6 +5350,7 @@ /// rewritten as needed. bool SROA::runOnAlloca(AllocaInst &AI) { LLVM_DEBUG(dbgs() << "SROA alloca: " << AI << "\n"); + LLVM_DEBUG(AI.getParent()->getParent()->dump()); ++NumAllocasAnalyzed; // Special case dead allocas, as they're trivial. Index: llvm/lib/Transforms/Scalar/Scalar.cpp =================================================================== --- llvm/lib/Transforms/Scalar/Scalar.cpp +++ llvm/lib/Transforms/Scalar/Scalar.cpp @@ -37,6 +37,7 @@ initializeBDCELegacyPassPass(Registry); initializeAlignmentFromAssumptionsPass(Registry); initializeCallSiteSplittingLegacyPassPass(Registry); + initializeConnectNoAliasDeclLegacyPassPass(Registry); initializeConstantHoistingLegacyPassPass(Registry); initializeConstantPropagationPass(Registry); initializeCorrelatedValuePropagationPass(Registry); @@ -89,6 +90,7 @@ initializeMergedLoadStoreMotionLegacyPassPass(Registry); initializeNaryReassociateLegacyPassPass(Registry); initializePartiallyInlineLibCallsLegacyPassPass(Registry); + initializePropagateAndConvertNoAliasLegacyPassPass(Registry); initializeReassociateLegacyPassPass(Registry); initializeRedundantDbgInstEliminationPass(Registry); initializeRegToMemPass(Registry); Index: llvm/lib/Transforms/Utils/CMakeLists.txt =================================================================== --- llvm/lib/Transforms/Utils/CMakeLists.txt +++ llvm/lib/Transforms/Utils/CMakeLists.txt @@ -51,6 +51,7 @@ MisExpect.cpp ModuleUtils.cpp NameAnonGlobals.cpp + NoAliasUtils.cpp PredicateInfo.cpp PromoteMemoryToRegister.cpp ScalarEvolutionExpander.cpp Index: llvm/lib/Transforms/Utils/CloneFunction.cpp =================================================================== --- llvm/lib/Transforms/Utils/CloneFunction.cpp +++ llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -36,6 +36,21 @@ #include using namespace llvm; +static void PropagateNoAliasProvenanceInfo(Instruction *To, + const Instruction *From) { + // The ptr_provenance is not automatically copied over in a 'clone()' + // Let's do it here. + if (auto *LI = dyn_cast(From)) { + if (LI->hasNoaliasProvenanceOperand()) + cast(To)->setNoaliasProvenanceOperand( + LI->getNoaliasProvenanceOperand()); + } else if (auto SI = dyn_cast(From)) { + if (SI->hasNoaliasProvenanceOperand()) + cast(To)->setNoaliasProvenanceOperand( + SI->getNoaliasProvenanceOperand()); + } +} + /// See comments in Cloning.h. BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix, Function *F, @@ -55,6 +70,8 @@ DIFinder->processInstruction(*TheModule, I); Instruction *NewInst = I.clone(); + PropagateNoAliasProvenanceInfo(NewInst, &I); + if (I.hasName()) NewInst->setName(I.getName() + NameSuffix); NewBB->getInstList().push_back(NewInst); @@ -331,6 +348,7 @@ II != IE; ++II) { Instruction *NewInst = II->clone(); + PropagateNoAliasProvenanceInfo(NewInst, &*II); // Eagerly remap operands to the newly cloned instruction, except for PHI // nodes for which we defer processing until we update the CFG. @@ -862,6 +880,7 @@ // terminator gets replaced and StopAt == BB's terminator. for (; StopAt != &*BI && BB->getTerminator() != &*BI; ++BI) { Instruction *New = BI->clone(); + PropagateNoAliasProvenanceInfo(New, &*BI); New->setName(BI->getName()); New->insertBefore(NewTerm); ValueMapping[&*BI] = New; Index: llvm/lib/Transforms/Utils/InlineFunction.cpp =================================================================== --- llvm/lib/Transforms/Utils/InlineFunction.cpp +++ llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -28,7 +28,6 @@ #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ProfileSummaryInfo.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/Argument.h" @@ -44,6 +43,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -61,6 +61,8 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/NoAliasUtils.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include #include @@ -79,6 +81,11 @@ cl::Hidden, cl::desc("Convert noalias attributes to metadata during inlining.")); +static cl::opt UseNoAliasIntrinsic( + "use-noalias-intrinsic-during-inlining", cl::Hidden, cl::ZeroOrMore, + cl::init(true), + cl::desc("Use the llvm.noalias intrinsic during inlining.")); + // Disabled by default, because the added alignment assumptions may increase // compile-time and block optimizations. This option is not suitable for use // with frontends that emit comprehensive parameter alignment annotations. @@ -815,7 +822,8 @@ /// not be differentiated (and this would lead to miscompiles because the /// non-aliasing property communicated by the metadata could have /// call-site-specific control dependencies). -static void CloneAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap) { +static void CloneAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap, + Function *Caller) { const Function *CalledFunc = CB.getCalledFunction(); SetVector MD; @@ -824,16 +832,70 @@ // inter-procedural alias analysis passes. We can revisit this if it becomes // an efficiency or overhead problem. + // Track function level !noalias metadata ('unknown function' scope). This + // should be merged with the data from the callee + MDNode *CallerNoAlias = Caller->getMetadata("noalias"); + MDNode *CalleeNoAlias = CalledFunc->getMetadata("noalias"); + llvm::MDNode *NewUnknownScope = nullptr; + + if ((CalleeNoAlias != nullptr) && (CallerNoAlias == nullptr)) { + // - NOTE: keep in sync with (clang) CGExpr: EmitLoadOfScalar + // - NOTE: keep in sync with (clang) CGDecl: EmitAutoVarNoAlias + // - EmitNoAliasDecl + // - NOTE: keep in sync with (llvm) InlineFunction: CloneAliasScopeMetadata + llvm::MDBuilder MDB(Caller->getContext()); + std::string Name(Caller->getName()); + auto NoAliasDomain = MDB.createAnonymousAliasScopeDomain(Name); + Name += ": unknown scope"; + + llvm::MDNode *UnknownScope = + MDB.createAnonymousAliasScope(NoAliasDomain, Name); + + { + SmallVector ScopeListEntries(1, UnknownScope); + CallerNoAlias = llvm::MDNode::get(Caller->getContext(), ScopeListEntries); + Caller->setMetadata("noalias", CallerNoAlias); + } + NewUnknownScope = UnknownScope; + } + for (const BasicBlock &I : *CalledFunc) for (const Instruction &J : I) { if (const MDNode *M = J.getMetadata(LLVMContext::MD_alias_scope)) MD.insert(M); if (const MDNode *M = J.getMetadata(LLVMContext::MD_noalias)) MD.insert(M); + + // We also need to clone the metadata in noalias intrinsics. + if (const auto *II = dyn_cast(&J)) { + if (II->getIntrinsicID() == Intrinsic::noalias) + if (const auto *M = dyn_cast( + cast( + II->getOperand(Intrinsic::NoAliasScopeArg)) + ->getMetadata())) + MD.insert(M); + if (II->getIntrinsicID() == Intrinsic::provenance_noalias) + if (const auto *M = dyn_cast( + cast( + II->getOperand(Intrinsic::ProvenanceNoAliasScopeArg)) + ->getMetadata())) + MD.insert(M); + if (II->getIntrinsicID() == Intrinsic::noalias_decl) + if (const auto *M = dyn_cast( + cast( + II->getOperand(Intrinsic::NoAliasDeclScopeArg)) + ->getMetadata())) + MD.insert(M); + if (II->getIntrinsicID() == Intrinsic::noalias_copy_guard) + if (const auto *M = dyn_cast( + cast( + II->getOperand(Intrinsic::NoAliasCopyGuardScopeArg)) + ->getMetadata())) + MD.insert(M); + } } - if (MD.empty()) - return; + // No early exit: make sure all memory instructions are annotated // Walk the existing metadata, adding the complete (perhaps cyclic) chain to // the set. @@ -850,6 +912,13 @@ // the noalias scopes and the lists of those scopes. SmallVector DummyNodes; DenseMap MDMap; + if (CalleeNoAlias != nullptr) { + // Map CalleeNoAlias onto CallerNoAlias + MD.remove(CalleeNoAlias); + DummyNodes.push_back(MDTuple::getTemporary(CalledFunc->getContext(), None)); + MDMap[CalleeNoAlias].reset(DummyNodes.back().get()); + cast(MDMap[CalleeNoAlias])->replaceAllUsesWith(CallerNoAlias); + } for (const MDNode *I : MD) { DummyNodes.push_back(MDTuple::getTemporary(CalledFunc->getContext(), None)); MDMap[I].reset(DummyNodes.back().get()); @@ -877,6 +946,7 @@ // Now replace the metadata in the new inlined instructions with the // repacements from the map. + SmallPtrSet HandledInstructions; for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); VMI != VMIE; ++VMI) { if (!VMI->second) @@ -886,31 +956,183 @@ if (!NI) continue; + // Check if we already adapted this instruction + if (!HandledInstructions.insert(NI).second) + continue; + if (MDNode *M = NI->getMetadata(LLVMContext::MD_alias_scope)) { - MDNode *NewMD = MDMap[M]; - // If the call site also had alias scope metadata (a list of scopes to - // which instructions inside it might belong), propagate those scopes to - // the inlined instructions. - if (MDNode *CSM = CB.getMetadata(LLVMContext::MD_alias_scope)) - NewMD = MDNode::concatenate(NewMD, CSM); - NI->setMetadata(LLVMContext::MD_alias_scope, NewMD); + auto MI = MDMap.find(M); + if (MI != MDMap.end()) { + MDNode *NewMD = MI->second; + // If the call site also had alias scope metadata (a list of scopes to + // which instructions inside it might belong), propagate those scopes to + // the inlined instructions. + if (MDNode *CSM = CB.getMetadata(LLVMContext::MD_alias_scope)) + NewMD = MDNode::concatenate(NewMD, CSM); + NI->setMetadata(LLVMContext::MD_alias_scope, NewMD); + } } else if (NI->mayReadOrWriteMemory()) { if (MDNode *M = CB.getMetadata(LLVMContext::MD_alias_scope)) NI->setMetadata(LLVMContext::MD_alias_scope, M); } if (MDNode *M = NI->getMetadata(LLVMContext::MD_noalias)) { - MDNode *NewMD = MDMap[M]; - // If the call site also had noalias metadata (a list of scopes with - // which instructions inside it don't alias), propagate those scopes to - // the inlined instructions. - if (MDNode *CSM = CB.getMetadata(LLVMContext::MD_noalias)) - NewMD = MDNode::concatenate(NewMD, CSM); - NI->setMetadata(LLVMContext::MD_noalias, NewMD); + auto MI = MDMap.find(M); + if (MI != MDMap.end()) { + MDNode *NewMD = MI->second; + // If the call site also had noalias metadata (a list of scopes with + // which instructions inside it don't alias), propagate those scopes to + // the inlined instructions. + if (MDNode *CSM = CB.getMetadata(LLVMContext::MD_noalias)) + NewMD = MDNode::concatenate(NewMD, CSM); + NI->setMetadata(LLVMContext::MD_noalias, NewMD); + } } else if (NI->mayReadOrWriteMemory()) { if (MDNode *M = CB.getMetadata(LLVMContext::MD_noalias)) NI->setMetadata(LLVMContext::MD_noalias, M); } + + // Update the metadata referenced by a noalias intrinsic + if (auto *II = dyn_cast(NI)) { + auto ID = II->getIntrinsicID(); + if (ID == Intrinsic::noalias || ID == Intrinsic::provenance_noalias || + ID == Intrinsic::noalias_decl || + ID == Intrinsic::noalias_copy_guard) { + int NoAliasScope = 0; + if (ID == Intrinsic::noalias) + NoAliasScope = Intrinsic::NoAliasScopeArg; + if (ID == Intrinsic::provenance_noalias) + NoAliasScope = Intrinsic::ProvenanceNoAliasScopeArg; + if (ID == Intrinsic::noalias_decl) + NoAliasScope = Intrinsic::NoAliasDeclScopeArg; + if (ID == Intrinsic::noalias_copy_guard) + NoAliasScope = Intrinsic::NoAliasCopyGuardScopeArg; + + if (auto *M = dyn_cast( + cast(II->getOperand(NoAliasScope)) + ->getMetadata())) { + // If the metadata is not in the map, it could be a new intrinsic + // that was just added. + auto MI = MDMap.find(M); + if (MI != MDMap.end()) + II->setOperand( + NoAliasScope, + MetadataAsValue::get(CalledFunc->getContext(), MI->second)); + } + } + } + } + + if (NewUnknownScope) { + // We now need to add the out-of-function scope to _all_ instructions with + // noalias data in the 'caller' + // Note: following strange choice of variables names is similar to how it is + // done later + // FIXME: hmm this might be less than fast :( + // hmm it is also needed to do this _after_ the metadata cloning, otherwise + // we seem to lose information ! + for (BasicBlock &I : *Caller) { + for (Instruction &J : I) { + if (const MDNode *M = J.getMetadata(LLVMContext::MD_noalias)) { + SmallVector NewScopeList; + for (auto &MDOp : M->operands()) { + NewScopeList.push_back(MDOp); + } + NewScopeList.push_back(NewUnknownScope); + J.setMetadata(LLVMContext::MD_noalias, + MDNode::get(Caller->getContext(), NewScopeList)); + } else if (J.mayReadOrWriteMemory()) { + // no Noalias, but we need to add the (new) 'unknown scope' ! + J.setMetadata(LLVMContext::MD_noalias, CallerNoAlias); + } + } + } + } +} + +/// If the inlined function has noalias arguments, +/// then add a new alias scope to instructions that might access memory, and +/// noalias intrinsics corresponding to the noalias arguments. +static void AddNoAliasIntrinsics(CallBase &CB, ValueToValueMapTy &VMap, + MDNode *&NewScopeList) { + if (!EnableNoAliasConversion || !UseNoAliasIntrinsic) + return; + + const Function *CalledFunc = CB.getCalledFunction(); + SmallVector NoAliasArgs; + + for (const auto &Arg : CalledFunc->args()) { + if (Arg.hasNoAliasAttr() && !Arg.use_empty()) + NoAliasArgs.push_back(&Arg); + } + + if (NoAliasArgs.empty()) + return; + + MDBuilder MDB(CalledFunc->getContext()); + // Create a new scope domain for this function. + MDNode *NewDomain = + MDB.createAnonymousAliasScopeDomain(CalledFunc->getName()); + + // Create a new scope for each noalias argument. + SmallVector Scopes; + + // For each noalias argument, add a noalias intrinsic call, and update the + // value map to refer to the new result of the noalias call. + for (const Argument *A : NoAliasArgs) { + Value *MappedA = VMap[A]; + if (isa(MappedA)) { + // Skip generating restrict intrinsics for known 'null' pointers + continue; + } + + std::string Name(CalledFunc->getName()); + if (A->hasName()) { + Name += ": %"; + Name += A->getName(); + } else { + Name += ": argument "; + Name += utostr(A->getArgNo()); + } + + MDNode *AScope = MDB.createAnonymousAliasScope(NewDomain, Name); + Scopes.push_back(AScope); + + MDNode *AScopeList = MDNode::get(CalledFunc->getContext(), AScope); + + // The alloca was optimized away -> use a nullptr + auto *IdentifyPAlloca = + ConstantPointerNull::get(MappedA->getType()->getPointerTo()); + auto *NoAliasDecl = + IRBuilder<>(&CB).CreateNoAliasDeclaration(IdentifyPAlloca, AScopeList); + Value *NA = IRBuilder<>(&CB).CreateNoAliasPointer( + MappedA, NoAliasDecl, IdentifyPAlloca, AScopeList); + VMap[A] = NA; + } + + NewScopeList = MDNode::get(CalledFunc->getContext(), Scopes); +} + +static void AddNoAliasIntrinsicsScope(CallBase &CB, ValueToValueMapTy &VMap, + MDNode *NewScopeList) { + if (!EnableNoAliasConversion || !UseNoAliasIntrinsic || !NewScopeList) + return; + + // Iterate over all new instructions in the map; for all memory-access + // instructions, add the alias scope metadata. + for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); + VMI != VMIE; ++VMI) { + if (!isa(VMI->first) || !VMI->second) + continue; + + auto *NI = dyn_cast(VMI->second); + if (!NI || !NI->mayReadOrWriteMemory()) + continue; + + NI->setMetadata( + LLVMContext::MD_noalias, + MDNode::concatenate(NI->getMetadata(LLVMContext::MD_noalias), + NewScopeList)); } } @@ -920,7 +1142,7 @@ /// non-derived loads, stores and memory intrinsics with the new alias scopes. static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap, const DataLayout &DL, AAResults *CalleeAAR) { - if (!EnableNoAliasConversion) + if (!EnableNoAliasConversion || UseNoAliasIntrinsic) return; const Function *CalledFunc = CB.getCalledFunction(); @@ -1764,6 +1986,7 @@ ValueToValueMapTy VMap; // Keep a list of pair (dst, src) to emit byval initializations. SmallVector, 4> ByValInit; + MDNode *NAScopeList = nullptr; auto &DL = Caller->getParent()->getDataLayout(); @@ -1801,6 +2024,9 @@ /// Preserve all attributes on of the call and its parameters. salvageKnowledge(&CB, AC); + // Add noalias intrinsics corresponding to noalias function arguments. + AddNoAliasIntrinsics(CB, VMap, NAScopeList); + // We want the inliner to prune the code as it copies. We would LOVE to // have no dead or constant instructions leftover after inlining occurs // (which can happen, e.g., because an argument was constant), but we'll be @@ -1886,7 +2112,7 @@ CalledFunc->getSubprogram() != nullptr); // Clone existing noalias metadata if necessary. - CloneAliasScopeMetadata(CB, VMap); + CloneAliasScopeMetadata(CB, VMap, Caller); // Add noalias metadata if necessary. AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR); @@ -1898,6 +2124,9 @@ // Propagate llvm.mem.parallel_loop_access if necessary. PropagateParallelLoopAccessMetadata(CB, VMap); + // Add scopes to memory accesses corresponding to added noalias intrinsics. + AddNoAliasIntrinsicsScope(CB, VMap, NAScopeList); + // Register any cloned assumptions. if (IFI.GetAssumptionCache) for (BasicBlock &NewBlock : @@ -2332,6 +2561,9 @@ // Since we are now done with the return instruction, delete it also. Returns[0]->eraseFromParent(); + // Already try to connect llvm.noalias.decl where possible + propagateAndConnectNoAliasDecl(Caller); + // We are now done with the inlining. return InlineResult::success(); } @@ -2482,6 +2714,9 @@ // Now we can remove the CalleeEntry block, which is now empty. Caller->getBasicBlockList().erase(CalleeEntry); + // Already try to connect llvm.noalias.decl where possible + propagateAndConnectNoAliasDecl(Caller); + // If we inserted a phi node, check to see if it has a single value (e.g. all // the entries are the same or undef). If so, remove the PHI so it doesn't // block other optimizations. Index: llvm/lib/Transforms/Utils/Local.cpp =================================================================== --- llvm/lib/Transforms/Utils/Local.cpp +++ llvm/lib/Transforms/Utils/Local.cpp @@ -432,6 +432,29 @@ return false; } + + // noalias intrinsics are dead if they have no uses (they're tagged as + // writing, but that is only to maintain control dependencies, not because + // they actually write anything). + if (II->getIntrinsicID() == Intrinsic::noalias || + II->getIntrinsicID() == Intrinsic::provenance_noalias || + II->getIntrinsicID() == Intrinsic::noalias_copy_guard || + II->getIntrinsicID() == Intrinsic::noalias_arg_guard) + return II->use_empty(); + + // llvm.noalias.decl which operand only has a single use are trivially dead. + // NOTE: this assumes that noalias intrinsics nodes are never combined ! + if (II->getIntrinsicID() == Intrinsic::noalias_decl) { + auto *DAA = II->getOperand(Intrinsic::NoAliasDeclAllocaArg); + if (isa(DAA)) { + // no associated alloca -> if there are no uses, it is trivially dead + return II->getNumUses() == 0; + } else { + return (DAA->getNumUses() == 1) && + (II->getOperand(Intrinsic::NoAliasDeclScopeArg)->getNumUses() == + 1); + } + } } if (isAllocLikeFn(I, TLI)) Index: llvm/lib/Transforms/Utils/LoopRotationUtils.cpp =================================================================== --- llvm/lib/Transforms/Utils/LoopRotationUtils.cpp +++ llvm/lib/Transforms/Utils/LoopRotationUtils.cpp @@ -38,6 +38,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/NoAliasUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Transforms/Utils/ValueMapper.h" using namespace llvm; @@ -391,6 +392,85 @@ break; } + // check if there are local restrict declarations and how they are used. + // - avoid breaking up mixed usage: + // -- either all usages must be in the OrigHeader, or no usages must be in + // the OrigHeader. + // -- when usages are outside the function, and we decide to continue, break + // the connection with the llvm.noalias.decl, as it will have no impact + // any more. + SmallVector NoAliasDeclInstructions; + { + SmallVector ProvenanceNoAliasOrNoAliasToDisconnect; + for (Instruction &I : *OrigHeader) { + if (IntrinsicInst *II = dyn_cast(&I)) { + if (II->getIntrinsicID() == Intrinsic::noalias_decl) { + // Check usage validity: + bool UsedInsideHeader = false; + bool UsedOutsideHeaderInsideLoop = false; + for (User *U : II->users()) { + Instruction *UI = cast(U); + if (UI->getParent() == OrigHeader) { + UsedInsideHeader = true; + } else if (L->contains(UI)) { + UsedOutsideHeaderInsideLoop = true; + } else { + if (PHINode *PUI = dyn_cast(UI)) { + if (PUI->getNumIncomingValues() > 1) { + LLVM_DEBUG( + llvm::dbgs() + << "LoopRotation: NOT rotating - " << *II + << "\n used in PHI node " << *PUI + << ",\n in exit block with multiple entries.\n"); + return false; + } + } + ProvenanceNoAliasOrNoAliasToDisconnect.push_back(UI); + } + } + + if (UsedInsideHeader && UsedOutsideHeaderInsideLoop) { + LLVM_DEBUG(llvm::dbgs() + << "LoopRotation: NOT rotating - " << *II + << " used in header and other parts of the loop.\n" + " Rotation would reduced the llvm.noalias quality " + "too much.\n"); + return false; + } + + NoAliasDeclInstructions.push_back(II); + } + } + } + + // If we get here, we will do the rotate. + // First break the link between any llvm.noalias.decl and its outside-loop + // usage + for (Instruction *I : ProvenanceNoAliasOrNoAliasToDisconnect) { + unsigned OpN; + if (PHINode *PI = dyn_cast(I)) { + // can only happen when in the exit block -> single predecessor ! + assert(PI->getNumIncomingValues() == 1 && + "PHI node should have a single value"); + (void)PI; // Silence not-used warning in Release builds + OpN = 0; + } else { + IntrinsicInst *II = cast(I); + if (II->getIntrinsicID() == Intrinsic::provenance_noalias) { + OpN = Intrinsic::ProvenanceNoAliasNoAliasDeclArg; + } else if (II->getIntrinsicID() == Intrinsic::noalias) { + OpN = Intrinsic::NoAliasNoAliasDeclArg; + } else { + assert(II->getIntrinsicID() == Intrinsic::noalias_copy_guard); + OpN = Intrinsic::NoAliasCopyGuardNoAliasDeclArg; + } + } + + auto *PT = cast(I->getOperand(OpN)->getType()); + I->setOperand(OpN, ConstantPointerNull::get(PT)); + } + } + while (I != E) { Instruction *Inst = &*I++; @@ -451,6 +531,75 @@ } } + if (!NoAliasDeclInstructions.empty()) { + // There are local restrict declarations: + // (general): + // Original: OrigPre { OrigHeader NewHeader ... Latch } + // after: (OrigPre+OrigHeader') { NewHeader ... Latch OrigHeader } + // + // with D: llvm.noalias.decl, U: provenance.noalias, depending on D + // ... { D U } can transform into: + // (0) : ... { D U } // no relevant rotation for this part + // (1) : ... D' { U D } + // (2) : ... D' U' { D U } + // + // We now want to transform: + // (1) -> : ... D' { D U D'' } + // (2) -> : ... D' U' { D D'' U'' } + // D: original llvm.noalias.decl + // D', U': duplicate with replaced scopes + // D'', U'': different duplicate with replaced scopes + // This ensures a safe fallback to 'may_alias' introduced by the rotate, + // as U'' and U' scopes will not be compatible wrt to the local restrict + + // Clone the NoAliasDecl again, insert right after the original, move the + // original to the NewHeader. + + Instruction *NewHeaderInsertionPoint = &(*NewHeader->getFirstNonPHI()); + for (Instruction *NAD : NoAliasDeclInstructions) { + LLVM_DEBUG(llvm::dbgs() + << " Cloning llvm.noalias.decl:" << *NAD << "\n"); + Instruction *NewNAD = NAD->clone(); + NewNAD->insertBefore(NAD); + + // remap dependencies in the OrigHeader block to NewNAD + NAD->replaceUsesInsideBlock(NewNAD, OrigHeader); + + // And move the original NAD to the NewHeader + NAD->moveBefore(NewHeaderInsertionPoint); + + // Now forget about the original NAD mapping + auto tmp = + ValueMap[NAD]; // use a local copy to avoid undefined behavior + ValueMap[NewNAD] = tmp; // this could trigger a reallocation. + ValueMap.erase(NAD); + } + + // Scopes must now be duplicated, once for OrigHeader and once for + // OrigPreHeader' + { + auto &Context = NewHeader->getContext(); + + SmallVector NoAliasDeclScopes; + for (Instruction *NAD : NoAliasDeclInstructions) + NoAliasDeclScopes.push_back(cast( + NAD->getOperand(Intrinsic::NoAliasDeclScopeArg))); + + LLVM_DEBUG(llvm::dbgs() << " Updating OrigHeader scopes\n"); + llvm::cloneAndAdaptNoAliasScopes(NoAliasDeclScopes, {OrigHeader}, + Context, "h.rot1"); + LLVM_DEBUG(OrigHeader->dump()); + + LLVM_DEBUG(llvm::dbgs() << " Updating OrigPreheader scopes\n"); + llvm::cloneAndAdaptNoAliasScopes(NoAliasDeclScopes, {OrigPreheader}, + Context, "pre.rot1"); + LLVM_DEBUG(OrigPreheader->dump()); + + LLVM_DEBUG(llvm::dbgs() << " Updated NewHeader:\n"); + LLVM_DEBUG(NewHeader->dump()); + } + } + // Along with all the other instructions, we just cloned OrigHeader's // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's // successors by duplicating their incoming values for OrigHeader. Index: llvm/lib/Transforms/Utils/LoopUnroll.cpp =================================================================== --- llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -61,6 +61,7 @@ #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/NoAliasUtils.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h" #include "llvm/Transforms/Utils/UnrollLoop.h" #include "llvm/Transforms/Utils/ValueMapper.h" @@ -583,6 +584,51 @@ << DIL->getFilename() << " Line: " << DIL->getLine()); } + // Phase1: Identify what noalias metadata is inside the loop: if it is inside + // the loop, the associated metadata must be cloned for each iteration. + SmallVector LoopLocalNoAliasDeclScopes; + llvm::identifyNoAliasScopesToClone(L->getBlocks(), + LoopLocalNoAliasDeclScopes); + + if (!LoopLocalNoAliasDeclScopes.empty() && (ULO.Count > 1)) { + // We have loop local llvm.noalias.decl. If they are used by code that is + // considered to be outside the loop, the must go through the latch block. + // We duplicate the llvm.noalias.decl to the latch block, so that migrated + // code can still locally benefit from the noalias information. But it will + // get disconnected from the information inside the loop body itself. + SmallVector UniqueExitBlocks; + L->getUniqueExitBlocks(UniqueExitBlocks); + for (auto *EB : UniqueExitBlocks) { + SmallVector, 6> ClonedNoAlias; + + for (auto &PN : EB->phis()) { + Value *V = PN.getIncomingValue(0); + while (true) { + if (IntrinsicInst *II = dyn_cast(V)) { + if (II->getIntrinsicID() == Intrinsic::noalias_decl) { + ClonedNoAlias.emplace_back(II->clone(), &PN); + } + } else if (PHINode *PN2 = dyn_cast(V)) { + if (PN2->getNumIncomingValues() == 1) { + // look through phi ( phi (.. ) ) in case of nested loops + V = PN2->getIncomingValue(0); + continue; + } + } + break; + } + } + + auto IP = EB->getFirstInsertionPt(); + for (auto P : ClonedNoAlias) { + EB->getInstList().insert(IP, P.first); + } + for (auto P : ClonedNoAlias) { + P.second->replaceAllUsesWith(P.first); + } + } + } + for (unsigned It = 1; It != ULO.Count; ++It) { SmallVector NewBlocks; SmallDenseMap NewLoops; @@ -676,6 +722,26 @@ AC->registerAssumption(II); } } + + { + // Phase2: identify what other metadata depends on the cloned version + // Phase3: after cloning, replace the metadata with the corrected version + // for both memory instructions and noalias intrinsics + std::string ext = "It"; + ext += utostr(It); + cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks, + Header->getContext(), ext); + } + } + + if (!LoopLocalNoAliasDeclScopes.empty() && (ULO.Count > 1)) { + // Also adapt the scopes of the first iteration to avoid any + // conflicts with instructions outside the loop using the + // scopes. Those have already been taken care of by + // duplicating the llvm.noalias.decl. + std::vector OldBlocks(BlockBegin, BlockEnd); + cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, OldBlocks, + Header->getContext(), "It0"); } // Loop over the PHI nodes in the original block, setting incoming values. Index: llvm/lib/Transforms/Utils/NoAliasUtils.cpp =================================================================== --- /dev/null +++ llvm/lib/Transforms/Utils/NoAliasUtils.cpp @@ -0,0 +1,252 @@ +//===-- NoAliasUtils.cpp - NoAlias Utility functions ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines common noalias metadatt and intrinsic utility functions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/NoAliasUtils.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "noalias-utils" + +bool llvm::propagateAndConnectNoAliasDecl(Function *F) { + auto *UnknownFunctionScope = F->getMetadata("noalias"); + if (UnknownFunctionScope == nullptr) + return false; + + SmallVector InterestingNoalias; + SmallMapVector KnownAllocaNoAliasDecl; + + auto TrackIfIsUnknownFunctionScope = [&](IntrinsicInst *I, unsigned Index) { + auto V = I->getOperand(Index); + if (cast(V)->getMetadata() == UnknownFunctionScope) { + InterestingNoalias.push_back(I); + } + }; + + for (Instruction &I : llvm::instructions(*F)) { + if (IntrinsicInst *II = dyn_cast(&I)) { + switch (II->getIntrinsicID()) { + case Intrinsic::noalias: { + TrackIfIsUnknownFunctionScope(II, Intrinsic::NoAliasScopeArg); + break; + } + case Intrinsic::provenance_noalias: { + TrackIfIsUnknownFunctionScope(II, Intrinsic::ProvenanceNoAliasScopeArg); + break; + } + case Intrinsic::noalias_copy_guard: { + TrackIfIsUnknownFunctionScope(II, Intrinsic::NoAliasCopyGuardScopeArg); + break; + } + case Intrinsic::noalias_decl: { + auto *depAlloca = dyn_cast(II->getOperand(0)); + if (depAlloca) { + KnownAllocaNoAliasDecl[depAlloca] = II; + } + break; + } + default: + break; + } + } + } + + if (KnownAllocaNoAliasDecl.empty() || InterestingNoalias.empty()) + return false; + + bool Changed = false; + for (auto *II : InterestingNoalias) { + SmallVector UO; + unsigned Index = + (II->getIntrinsicID() == Intrinsic::noalias + ? 0 + : (II->getIntrinsicID() == Intrinsic::provenance_noalias ? 1 : 2)); + const int IdentifyPArg[] = {Intrinsic::NoAliasIdentifyPArg, + Intrinsic::ProvenanceNoAliasIdentifyPArg, + Intrinsic::NoAliasCopyGuardIdentifyPBaseObject}; + const int ScopeArg[] = {Intrinsic::NoAliasScopeArg, + Intrinsic::ProvenanceNoAliasScopeArg, + Intrinsic::NoAliasCopyGuardScopeArg}; + const int NoAliasDeclArg[] = {Intrinsic::NoAliasNoAliasDeclArg, + Intrinsic::ProvenanceNoAliasNoAliasDeclArg, + Intrinsic::NoAliasCopyGuardNoAliasDeclArg}; + const int ObjIdArg[] = {Intrinsic::NoAliasIdentifyPObjIdArg, + Intrinsic::ProvenanceNoAliasIdentifyPObjIdArg, -1}; + + llvm::GetUnderlyingObjects(II->getOperand(IdentifyPArg[Index]), UO, + F->getParent()->getDataLayout()); + if (UO.size() != 1) { + // Multiple objects possible - It would be nice to propagate, but we do + // not do it yet. That is ok as the unknown function scope assumes more + // aliasing. + LLVM_DEBUG(llvm::dbgs() + << "WARNING: no llvm.noalias.decl reconnect accross " + "PHI/select - YET (" + << UO.size() << " underlying objects)\n"); + continue; + } + + auto UA = dyn_cast(UO[0]); + if (UA) { + auto it = KnownAllocaNoAliasDecl.find(UA); + if (it != KnownAllocaNoAliasDecl.end()) { + Instruction *Decl = it->second; + // found a simple matching declaration - propagate + II->setOperand(ScopeArg[Index], + Decl->getOperand(Intrinsic::NoAliasDeclScopeArg)); + II->setOperand(NoAliasDeclArg[Index], Decl); + + auto ObjIdIndex = ObjIdArg[Index]; + if (ObjIdIndex != -1) { + II->setOperand(ObjIdIndex, + Decl->getOperand(Intrinsic::NoAliasDeclObjIdArg)); + } + Changed = true; + } else if (UnknownFunctionScope && isa(UA)) { + if (cast(II->getOperand(ScopeArg[Index])) + ->getMetadata() == UnknownFunctionScope) { + // we have an alloca, but no llvm.noalias.decl and we have unknown + // function scope This is an indication of a temporary that (through a + // pointer or reference to a restrict pointer) introduces restrict. + // - the unknown scope is too broad for these cases + // - conceptually, the scope should be the lifetime of the local, but + // we don't have that information + // - the real restrictness should have been brought in through the + // 'depends on' relationship + // -> so we fall back on the 'depends on' and remove the restrictness + // information at this level. + LLVM_DEBUG( + llvm::dbgs() + << "- Temporary noalias object (without llvm.noalias.decl) " + "detected. Ignore restrictness: " + << *II << "\n"); + II->replaceAllUsesWith(II->getOperand(0)); + II->eraseFromParent(); + Changed = true; + } + } + } else { +#if !defined(NDEBUG) + if (isa(UO[0]) || isa(UO[0])) { + // Multiple objects possible - It would be nice to propagate, but we do + // not do it yet. That is ok as the unknown function scope assumes more + // aliasing. + LLVM_DEBUG(llvm::dbgs() + << "WARNING: no llvm.noalias.decl reconnect accross " + "PHI/select - YET: " + << *UO[0] << "\n"); + } +#endif + } + } + return Changed; +} + +void llvm::cloneNoAliasScopes( + ArrayRef NoAliasDeclScopes, + DenseMap &out_ClonedScopes, + DenseMap &out_ClonedMVScopes, + StringRef Ext, LLVMContext &Context) { + MDBuilder MDB(Context); + + for (auto *MV : NoAliasDeclScopes) { + SmallVector ScopeList; + for (auto &MDOperand : cast(MV->getMetadata())->operands()) { + if (MDNode *MD = dyn_cast(MDOperand)) { + llvm::AliasScopeNode SNANode(MD); + + std::string Name; + auto ScopeName = SNANode.getName(); + if (!ScopeName.empty()) { + Name = std::string(ScopeName); + Name += ":"; + Name += Ext; + } else { + Name = std::string(Ext); + } + + MDNode *NewScope = MDB.createAnonymousAliasScope( + const_cast(SNANode.getDomain()), Name); + out_ClonedScopes.insert(std::make_pair(MD, NewScope)); + ScopeList.push_back(NewScope); + } + } + MDNode *NewScopeList = MDNode::get(Context, ScopeList); + out_ClonedMVScopes.insert( + std::make_pair(MV, MetadataAsValue::get(Context, NewScopeList))); + } +} + +void llvm::adaptNoAliasScopes( + Instruction *I, DenseMap &ClonedScopes, + DenseMap &ClonedMVScopes, + LLVMContext &Context) { + // MetadataAsValue will always be replaced ! + for (int opI = 0, opIEnd = I->getNumOperands(); opI < opIEnd; ++opI) { + if (MetadataAsValue *MV = dyn_cast(I->getOperand(opI))) { + auto MvIt = ClonedMVScopes.find(MV); + if (MvIt != ClonedMVScopes.end()) { + I->setOperand(opI, MvIt->second); + } + } + } + if (const MDNode *CSNoAlias = I->getMetadata(LLVMContext::MD_noalias)) { + bool needsReplacement = false; + SmallVector NewScopeList; + for (auto &MDOp : CSNoAlias->operands()) { + if (MDNode *MD = dyn_cast_or_null(MDOp)) { + auto MdIt = ClonedScopes.find(MD); + if (MdIt != ClonedScopes.end()) { + NewScopeList.push_back(MdIt->second); + needsReplacement = true; + continue; + } + NewScopeList.push_back(MD); + } + } + if (needsReplacement) { + I->setMetadata(LLVMContext::MD_noalias, + MDNode::get(Context, NewScopeList)); + } + } +} + +void llvm::cloneAndAdaptNoAliasScopes( + ArrayRef NoAliasDeclScopes, + ArrayRef NewBlocks, LLVMContext &Context, StringRef Ext) { + if (NoAliasDeclScopes.empty()) + return; + + DenseMap ClonedScopes; + DenseMap ClonedMVScopes; + LLVM_DEBUG(llvm::dbgs() << "cloneAndAdaptNoAliasScopes: cloning " + << NoAliasDeclScopes.size() << " node(s)\n"); + + cloneNoAliasScopes(NoAliasDeclScopes, ClonedScopes, ClonedMVScopes, Ext, + Context); + // Identify instructions using metadata that needs adaptation + for (BasicBlock *NewBlock : NewBlocks) { + for (Instruction &I : *NewBlock) { + adaptNoAliasScopes(&I, ClonedScopes, ClonedMVScopes, Context); + } + } +} Index: llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp =================================================================== --- llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -25,7 +25,6 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/IteratedDominanceFrontier.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -35,6 +34,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -45,6 +45,7 @@ #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/Support/Casting.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include #include @@ -61,40 +62,190 @@ STATISTIC(NumDeadAlloca, "Number of dead alloca's removed"); STATISTIC(NumPHIInsert, "Number of PHI nodes inserted"); -bool llvm::isAllocaPromotable(const AllocaInst *AI) { +class PromotableChecker { +public: + bool check(bool c) { return c; } + void trackRemovable(const Instruction *I) {} + void trackOperandToZero(const Instruction *I, int operand) {} + void trackNoAliasDecl(const IntrinsicInst *II) {} +}; + +class PromotableTracker { +public: + bool check(bool c) { + assert(!c && "PromotableTracker::check failed"); + return false; + } + void trackRemovable(Instruction *I) { + // FIXME: Performance Warning: linear search - might become slow (?) + if (std::find(mRemovables.begin(), mRemovables.end(), I) == + mRemovables.end()) + mRemovables.push_back(I); + } + void trackOperandToZero(Instruction *I, int operand) { + mZeroOperands.emplace_back(I, operand); + } + void trackNoAliasDecl(IntrinsicInst *II) { mNoAliasDecls.push_back(II); } + +public: + SmallVector mRemovables; + SmallVector, 4> mZeroOperands; + SmallVector mNoAliasDecls; +}; + +// Return true if the only usage of this pointer is as identifyP argument for +// llvm.noalias or llvm.provenance.noalias (either direct or recursive) +// Look through bitcast, getelementptr, llvm.noalias, llvm.provenance.noalias + +template +bool onlyUsedByNoaliasOrProvenanceNoAliasIdentifyPArg(Value *V, PT &pt); + +template +bool isAndOnlyUsedByNoaliasOrProvenanceNoAliasIdentifyPArg(IntrinsicInst *II, + unsigned OpNo, + PT &pt) { + if (II->getIntrinsicID() == Intrinsic::provenance_noalias) { + if (OpNo == 0) { + if (!onlyUsedByNoaliasOrProvenanceNoAliasIdentifyPArg(II, pt)) + return false; + pt.trackRemovable(II); + } else if (OpNo == Intrinsic::ProvenanceNoAliasIdentifyPArg) { + pt.trackOperandToZero(II, OpNo); + } else if (OpNo == Intrinsic::ProvenanceNoAliasIdentifyPProvenanceArg) { + pt.trackOperandToZero(II, OpNo); + } else { + assert(false && "Unexpected llvm.provenance.noalias dependency"); + } + return true; + } else if (II->getIntrinsicID() == Intrinsic::noalias) { + if (OpNo == 0) { + if (!onlyUsedByNoaliasOrProvenanceNoAliasIdentifyPArg(II, pt)) + return false; + pt.trackRemovable(II); + } else if (OpNo == Intrinsic::NoAliasIdentifyPArg) { + pt.trackOperandToZero(II, OpNo); + } else { + assert(false && "Unexpected llvm.provenance.noalias dependency"); + } + return true; + } + + return false; +} + +template +bool onlyUsedByNoaliasOrProvenanceNoAliasIdentifyPArg(Value *V, PT &pt) { + for (Use &U_ : V->uses()) { + unsigned OpNo = U_.getOperandNo(); + User *U = U_.getUser(); + if (IntrinsicInst *II = dyn_cast(U)) { + if (isAndOnlyUsedByNoaliasOrProvenanceNoAliasIdentifyPArg(II, OpNo, pt)) + continue; + return false; + } else if (BitCastInst *BCI = dyn_cast(U)) { + if (!onlyUsedByNoaliasOrProvenanceNoAliasIdentifyPArg(BCI, pt)) + return false; + pt.trackRemovable(BCI); + } else if (GetElementPtrInst *GEPI = dyn_cast(U)) { + if (!onlyUsedByNoaliasOrProvenanceNoAliasIdentifyPArg(GEPI, pt)) + return false; + pt.trackRemovable(GEPI); + } else if (AddrSpaceCastInst *ASCI = dyn_cast(U)) { + if (!onlyUsedByNoaliasOrProvenanceNoAliasIdentifyPArg(ASCI, pt)) + return false; + pt.trackRemovable(ASCI); + } else { + return false; + } + } + return true; +} + +template bool trackAllocaPromotable(AllocaInst *AI, PT &pt) { // FIXME: If the memory unit is of pointer or integer type, we can permit // assignments to subsections of the memory unit. unsigned AS = AI->getType()->getAddressSpace(); // Only allow direct and non-volatile loads and stores... - for (const User *U : AI->users()) { - if (const LoadInst *LI = dyn_cast(U)) { + for (Use &U_ : AI->uses()) { + unsigned OpNo = U_.getOperandNo(); + User *U = U_.getUser(); + + if (LoadInst *LI = dyn_cast(U)) { // Note that atomic loads can be transformed; atomic semantics do // not have any meaning for a local alloca. - if (LI->isVolatile()) + if (pt.check(LI->isVolatile())) return false; - } else if (const StoreInst *SI = dyn_cast(U)) { - if (SI->getOperand(0) == AI) + if (OpNo == LI->getNoaliasProvenanceOperandIndex()) { + // Load will be removed. Disconnect provenance.noalias dependency + pt.trackOperandToZero(LI, OpNo); + } + } else if (StoreInst *SI = dyn_cast(U)) { + if (pt.check(OpNo == 0)) return false; // Don't allow a store OF the AI, only INTO the AI. // Note that atomic stores can be transformed; atomic semantics do // not have any meaning for a local alloca. - if (SI->isVolatile()) + if (pt.check(SI->isVolatile())) return false; - } else if (const IntrinsicInst *II = dyn_cast(U)) { - if (!II->isLifetimeStartOrEnd()) + if (OpNo == SI->getNoaliasProvenanceOperandIndex()) { + // Store will be removed. Disconnect provenance.noalias dependency + pt.trackOperandToZero(SI, OpNo); + } + } else if (IntrinsicInst *II = dyn_cast(U)) { + switch (II->getIntrinsicID()) { + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + pt.trackRemovable(II); + break; + case Intrinsic::noalias_decl: + pt.trackNoAliasDecl(II); + break; + case Intrinsic::noalias: + case Intrinsic::provenance_noalias: + if (!isAndOnlyUsedByNoaliasOrProvenanceNoAliasIdentifyPArg(II, OpNo, + pt)) + return false; + break; + default: return false; - } else if (const BitCastInst *BCI = dyn_cast(U)) { - if (BCI->getType() != Type::getInt8PtrTy(U->getContext(), AS)) + } + } else if (BitCastInst *BCI = dyn_cast(U)) { + if (onlyUsedByNoaliasOrProvenanceNoAliasIdentifyPArg(BCI, pt)) { + pt.trackRemovable(BCI); + continue; + } + if (pt.check(BCI->getType() != Type::getInt8PtrTy(U->getContext(), AS))) return false; - if (!onlyUsedByLifetimeMarkers(BCI)) + if (pt.check(!onlyUsedByLifetimeMarkers(BCI))) return false; - } else if (const GetElementPtrInst *GEPI = dyn_cast(U)) { - if (GEPI->getType() != Type::getInt8PtrTy(U->getContext(), AS)) + + for (auto *U2 : BCI->users()) { + pt.trackRemovable(cast(U2)); + } + pt.trackRemovable(BCI); + } else if (GetElementPtrInst *GEPI = dyn_cast(U)) { + if (onlyUsedByNoaliasOrProvenanceNoAliasIdentifyPArg(GEPI, pt)) { + pt.trackRemovable(GEPI); + continue; + } + if (pt.check(GEPI->getType() != Type::getInt8PtrTy(U->getContext(), AS))) return false; - if (!GEPI->hasAllZeroIndices()) + if (pt.check(!GEPI->hasAllZeroIndices())) return false; - if (!onlyUsedByLifetimeMarkers(GEPI)) + if (pt.check(!onlyUsedByLifetimeMarkers(GEPI))) return false; + + for (auto *U2 : GEPI->users()) { + pt.trackRemovable(cast(U2)); + } + pt.trackRemovable(GEPI); + } else if (AddrSpaceCastInst *ASCI = dyn_cast(U)) { + if (onlyUsedByNoaliasOrProvenanceNoAliasIdentifyPArg(ASCI, pt)) { + pt.trackRemovable(ASCI); + continue; + } + + return false; } else { return false; } @@ -103,6 +254,11 @@ return true; } +bool llvm::isAllocaPromotable(const AllocaInst *AI) { + PromotableChecker pc; + return trackAllocaPromotable(const_cast(AI), pc); +} + namespace { struct AllocaInfo { @@ -312,26 +468,192 @@ AC->registerAssumption(CI); } -static void removeLifetimeIntrinsicUsers(AllocaInst *AI) { - // Knowing that this alloca is promotable, we know that it's safe to kill all - // instructions except for load and store. +static void removeIntrinsicUsers(AllocaInst *AI) { + // The AI is going to be deleted. Remove lifeftime intrinsic users. + // Also disconnect and remove noalias/provenance.noalias/noalias_decl + // intrinsic users. - for (auto UI = AI->user_begin(), UE = AI->user_end(); UI != UE;) { - Instruction *I = cast(*UI); - ++UI; - if (isa(I) || isa(I)) - continue; + // Track the possible intrinsics. If we do not have a noalias.decl or we do + // not have an unknown function scope, no extra modificiations are needed. If + // both are there, we need to propagate the MetadataValue from the declaration + // to those intrinsics that are using the unknown scope. + PromotableTracker pt; - if (!I->getType()->isVoidTy()) { - // The only users of this bitcast/GEP instruction are lifetime intrinsics. - // Follow the use/def chain to erase them now instead of leaving it for - // dead code elimination later. - for (auto UUI = I->user_begin(), UUE = I->user_end(); UUI != UUE;) { - Instruction *Inst = cast(*UUI); - ++UUI; - Inst->eraseFromParent(); + if (!trackAllocaPromotable(AI, pt)) { + assert(false && "trackAllocaPromotable not consistent"); + } + + // Propagate NoaliasDecl + MDNode *NoAliasUnknownScopeMD = + AI->getParent()->getParent()->getMetadata("noalias"); + Instruction *NoAliasDecl = nullptr; + if (pt.mNoAliasDecls.size() == 1) + NoAliasDecl = pt.mNoAliasDecls[0]; + + if (NoAliasUnknownScopeMD) { + if (NoAliasDecl) { + LLVM_DEBUG(llvm::dbgs() + << "- Propagating " << *NoAliasDecl << " scope to:\n"); + auto NoAliasDeclScope = + NoAliasDecl->getOperand(Intrinsic::NoAliasDeclScopeArg); + for (auto PairIO : pt.mZeroOperands) { + Instruction *I = PairIO.first; + auto OpNo = PairIO.second; + (void)OpNo; // Silence not used warning in Release builds. + if (IntrinsicInst *II = dyn_cast(I)) { + auto ID = II->getIntrinsicID(); + if (ID == Intrinsic::noalias || ID == Intrinsic::provenance_noalias) { + // If we get here, we can assume the identifyP or its provenance + // are dependencies + assert( + (ID == Intrinsic::noalias) + ? (OpNo == Intrinsic::NoAliasIdentifyPArg) + : (OpNo == Intrinsic::ProvenanceNoAliasIdentifyPArg || + OpNo == + Intrinsic::ProvenanceNoAliasIdentifyPProvenanceArg)); + unsigned ScopeArg = (ID == Intrinsic::noalias + ? Intrinsic::NoAliasScopeArg + : Intrinsic::ProvenanceNoAliasScopeArg); + unsigned DeclArg = + (ID == Intrinsic::noalias + ? Intrinsic::NoAliasNoAliasDeclArg + : Intrinsic::ProvenanceNoAliasNoAliasDeclArg); + MetadataAsValue *MV = + cast(I->getOperand(ScopeArg)); + if (NoAliasUnknownScopeMD == MV->getMetadata()) { + // Propagate the declaration scope + // Note: splitting already took care of updating the ObjId + LLVM_DEBUG(llvm::dbgs() << "-- " << *I << "\n"); + II->setOperand(ScopeArg, NoAliasDeclScope); + + // also update the noalias declaration + II->setOperand(DeclArg, NoAliasDecl); + } + } + } + } + } else if (pt.mNoAliasDecls.empty()) { + for (auto PairIO : pt.mZeroOperands) { + Instruction *I = PairIO.first; + auto OpNo = PairIO.second; + (void)OpNo; // Silence not used warning in Release builds. + if (IntrinsicInst *II = dyn_cast(I)) { + auto ID = II->getIntrinsicID(); + if (ID == Intrinsic::noalias || ID == Intrinsic::provenance_noalias) { + // If we get here, we can assume the identifyP or its provenance + // are dependencies + assert( + (ID == Intrinsic::noalias) + ? (OpNo == Intrinsic::NoAliasIdentifyPArg) + : (OpNo == Intrinsic::ProvenanceNoAliasIdentifyPArg || + OpNo == + Intrinsic::ProvenanceNoAliasIdentifyPProvenanceArg)); + unsigned ScopeArg = (ID == Intrinsic::noalias + ? Intrinsic::NoAliasScopeArg + : Intrinsic::ProvenanceNoAliasScopeArg); + MetadataAsValue *MV = + cast(I->getOperand(ScopeArg)); + if (NoAliasUnknownScopeMD == MV->getMetadata()) { + // Propagate a more or less unique id + LLVM_DEBUG(llvm::dbgs() + << "-- No llvm.noalias.decl, looking through: " << *I + << "\n"); + II->replaceAllUsesWith(II->getOperand(0)); + } + } + } } } + } + + if (NoAliasDecl) { + // Check if we need to split up llvm.noalias.decl with unique ObjId's + // This is needed to differentiate restrict pointers, once the alloca is + // removed. NOTE: we might as well have depended on 'constant propagation of + // null' and work with a 'constant pointer' + // for IdentifyP. Not sure what mechanism would be the best. + const DataLayout &DL = AI->getParent()->getModule()->getDataLayout(); + std::map ObjId2NoAliasDecl; + + auto BaseObjId = cast(NoAliasDecl->getOperand( + Intrinsic::NoAliasDeclObjIdArg)) + ->getZExtValue(); + ObjId2NoAliasDecl[BaseObjId] = NoAliasDecl; + + for (auto PairIO : pt.mZeroOperands) { + IntrinsicInst *II = dyn_cast(PairIO.first); + if (II && ((II->getIntrinsicID() == Intrinsic::noalias) || + (II->getIntrinsicID() == Intrinsic::provenance_noalias))) { + auto OpNo = PairIO.second; + unsigned IdentifyPArg = (II->getIntrinsicID() == Intrinsic::noalias) + ? Intrinsic::NoAliasIdentifyPArg + : Intrinsic::ProvenanceNoAliasIdentifyPArg; + unsigned ObjIdArg = (II->getIntrinsicID() == Intrinsic::noalias) + ? Intrinsic::NoAliasIdentifyPObjIdArg + : Intrinsic::ProvenanceNoAliasIdentifyPObjIdArg; + unsigned NoAliasDeclArg = + (II->getIntrinsicID() == Intrinsic::noalias) + ? Intrinsic::NoAliasNoAliasDeclArg + : Intrinsic::ProvenanceNoAliasNoAliasDeclArg; + + if ((unsigned)OpNo != IdentifyPArg) + continue; + + auto CurrentObjId = + cast(II->getOperand(ObjIdArg))->getZExtValue(); + + assert(CurrentObjId == BaseObjId && + "Initial object id difference detected."); + + APInt PPointerOffset(DL.getPointerSizeInBits(), 0ull); + assert(AI == II->getOperand(IdentifyPArg) + ->stripAndAccumulateInBoundsConstantOffsets( + DL, PPointerOffset) && + "hmm.. expected stripped P to map to alloca"); + if (!PPointerOffset.isNullValue()) { + CurrentObjId += PPointerOffset.getZExtValue(); + auto &NewNoAliasDecl = ObjId2NoAliasDecl[CurrentObjId]; + if (NewNoAliasDecl == nullptr) { + LLVM_DEBUG(llvm::dbgs() + << "Creating llvm.noalias.decl for IdentifyPObjId " + << CurrentObjId << "\n"); + IRBuilder NoAliasDeclBuilder(NoAliasDecl); + NewNoAliasDecl = NoAliasDeclBuilder.CreateNoAliasDeclaration( + ConstantPointerNull::get(cast(AI->getType())), + CurrentObjId, + NoAliasDecl->getOperand(Intrinsic::NoAliasDeclScopeArg)); + LLVM_DEBUG(llvm::dbgs() << "- " << *NewNoAliasDecl << "\n"); + } + II->setOperand(NoAliasDeclArg, NewNoAliasDecl); + II->setOperand(ObjIdArg, + ConstantInt::get(II->getOperand(ObjIdArg)->getType(), + CurrentObjId)); + LLVM_DEBUG(llvm::dbgs() + << "Remapping noalias.decl dependency: " << *II << "\n"); + } + } + } + } + + // set args to zero + for (auto II : pt.mNoAliasDecls) { + LLVM_DEBUG(llvm::dbgs() << "Zeoring noalias.decl dep: " << *II << "\n"); + assert(II->getIntrinsicID() == Intrinsic::noalias_decl); + II->setOperand(Intrinsic::NoAliasDeclAllocaArg, + ConstantPointerNull::get(cast(AI->getType()))); + } + for (auto PairIO : pt.mZeroOperands) { + Instruction *I = PairIO.first; + auto OpNo = PairIO.second; + LLVM_DEBUG(llvm::dbgs() + << "Zeroing operand " << OpNo << " of " << *I << "\n"); + I->setOperand(OpNo, ConstantPointerNull::get( + cast(I->getOperand(OpNo)->getType()))); + } + + // remove + for (auto I : pt.mRemovables) { + LLVM_DEBUG(llvm::dbgs() << "Removing " << *I << "\n"); I->eraseFromParent(); } } @@ -357,6 +679,9 @@ for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) { Instruction *UserInst = cast(*UI++); + // load/store can have a provenance + if ((UI != E) && (*UI == UserInst)) + ++UI; if (UserInst == OnlyStore) continue; LoadInst *LI = cast(UserInst); @@ -467,6 +792,9 @@ // store above them, if any. for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) { LoadInst *LI = dyn_cast(*UI++); + // load/store can have a provenance + if ((UI != E) && (*UI == LI)) + ++UI; if (!LI) continue; @@ -544,7 +872,7 @@ assert(AI->getParent()->getParent() == &F && "All allocas should be in the same function, which is same as DF!"); - removeLifetimeIntrinsicUsers(AI); + removeIntrinsicUsers(AI); if (AI->use_empty()) { // If there are no uses of the alloca, just delete it now. Index: llvm/lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1342,6 +1342,7 @@ LLVMContext::MD_dereferenceable, LLVMContext::MD_dereferenceable_or_null, LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_noalias, LLVMContext::MD_access_group, LLVMContext::MD_preserve_access_index}; combineMetadata(I1, I2, KnownIDs, true); @@ -3145,8 +3146,9 @@ StoreInst *SI = cast(QB.CreateStore(QPHI, Address)); AAMDNodes AAMD; PStore->getAAMetadata(AAMD, /*Merge=*/false); - PStore->getAAMetadata(AAMD, /*Merge=*/true); + QStore->getAAMetadata(AAMD, /*Merge=*/true); SI->setAAMetadata(AAMD); + SI->setAAMetadataNoAliasProvenance(AAMD); // Choose the minimum alignment. If we could prove both stores execute, we // could use biggest one. In this case, though, we only know that one of the // stores executes. And we don't know it's safe to take the alignment from a Index: llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -264,9 +264,10 @@ // The real propagateMetadata expects a SmallVector, but we deal in // vectors of Instructions. -static void propagateMetadata(Instruction *I, ArrayRef IL) { +static void propagateMetadata(Instruction *I, ArrayRef IL, + bool RemoveNoAlias) { SmallVector VL(IL.begin(), IL.end()); - propagateMetadata(I, VL); + propagateMetadata(I, VL, RemoveNoAlias); } // Vectorizer Implementation @@ -1115,11 +1116,15 @@ } } + bool HasProvenance = + std::any_of(Chain.begin(), Chain.end(), [](const auto &I) { + return cast(I)->hasNoaliasProvenanceOperand(); + }); StoreInst *SI = Builder.CreateAlignedStore( Vec, Builder.CreateBitCast(S0->getPointerOperand(), VecTy->getPointerTo(AS)), Alignment); - propagateMetadata(SI, Chain); + propagateMetadata(SI, Chain, HasProvenance); eraseInstructions(Chain); ++NumVectorInstructions; @@ -1237,11 +1242,15 @@ std::tie(First, Last) = getBoundaryInstrs(Chain); Builder.SetInsertPoint(&*First); + bool HasProvenance = + std::any_of(Chain.begin(), Chain.end(), [](const auto &I) { + return cast(I)->hasNoaliasProvenanceOperand(); + }); Value *Bitcast = Builder.CreateBitCast(L0->getPointerOperand(), VecTy->getPointerTo(AS)); LoadInst *LI = Builder.CreateAlignedLoad(VecTy, Bitcast, MaybeAlign(Alignment)); - propagateMetadata(LI, Chain); + propagateMetadata(LI, Chain, HasProvenance); if (VecLoadTy) { SmallVector InstrsToErase; Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -921,7 +921,13 @@ void InnerLoopVectorizer::addMetadata(Instruction *To, Instruction *From) { - propagateMetadata(To, From); + bool HasProvenance = true; + if (auto *SI = dyn_cast(From)) { + HasProvenance = SI->hasNoaliasProvenanceOperand(); + } else if (auto *LI = dyn_cast(From)) { + HasProvenance = LI->hasNoaliasProvenanceOperand(); + } + propagateMetadata(To, From, HasProvenance); addNewMetadata(To, From); } @@ -3302,11 +3308,6 @@ if (VF == 1) return ScalarCallCost; - // Compute corresponding vector type for return value and arguments. - Type *RetTy = ToVectorTy(ScalarRetTy, VF); - for (Type *ScalarTy : ScalarTys) - Tys.push_back(ToVectorTy(ScalarTy, VF)); - // Compute costs of unpacking argument values for the scalar calls and // packing the return values to a vector. unsigned ScalarizationCost = getScalarizationOverhead(CI, VF); @@ -3322,6 +3323,11 @@ if (!TLI || CI->isNoBuiltin() || !VecFunc) return Cost; + // Compute corresponding vector type for return value and arguments. + Type *RetTy = ToVectorTy(ScalarRetTy, VF); + for (Type *ScalarTy : ScalarTys) + Tys.push_back(ToVectorTy(ScalarTy, VF)); + // If the corresponding vector cost is cheaper, return its cost. unsigned VectorCallCost = TTI.getCallInstrCost(nullptr, RetTy, Tys, TTI::TCK_RecipThroughput); @@ -6970,7 +6976,8 @@ Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); if (ID && (ID == Intrinsic::assume || ID == Intrinsic::lifetime_end || - ID == Intrinsic::lifetime_start || ID == Intrinsic::sideeffect)) + ID == Intrinsic::lifetime_start || ID == Intrinsic::sideeffect || + ID == Intrinsic::noalias || ID == Intrinsic::provenance_noalias)) return nullptr; auto willWiden = [&](unsigned VF) -> bool { Index: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1521,6 +1521,53 @@ } } + /// Set the operands of this bundle of load or store instructions in their + /// original order. + void setLoadStoreOperandsInOrder() { + assert(Operands.empty() && "Already initialized?"); + auto *I0 = cast(Scalars[0]); + assert((isa(I0) || isa(I0)) && + "Expect a load or store instruction"); + unsigned NumOperands = isa(I0) ? 1 : 2; + + // Check if any instruction has a ptr_provenance + bool HasProvenance = + std::any_of(Scalars.begin(), Scalars.end(), [&](auto *V) { + return cast(V)->getNumOperands() != NumOperands; + }); + + Operands.resize(NumOperands + HasProvenance); + unsigned NumLanes = Scalars.size(); + for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) { + Operands[OpIdx].resize(NumLanes); + for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { + auto *I = cast(Scalars[Lane]); + assert(((I->getNumOperands() == NumOperands) || + (I->getNumOperands() == NumOperands + 1)) && + "Expected same number of operands (ignoring the " + "ptr_provenance"); + Operands[OpIdx][Lane] = I->getOperand(OpIdx); + } + } + + if (HasProvenance) { + // At least one instruction has a ptr_provenance. + // Keep track of the dependencies brought in by it. Later on we will + // omit the noalias information. + unsigned OpIdx = NumOperands; + Operands[OpIdx].resize(NumLanes); + for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { + auto *I = cast(Scalars[Lane]); + if (I->getNumOperands() != NumOperands) { + Operands[OpIdx][Lane] = I->getOperand(OpIdx); + } else { + Operands[OpIdx][Lane] = + UndefValue::get(I->getOperand(OpIdx - 1)->getType()); + } + } + } + } + /// \returns the \p OpIdx operand of this TreeEntry. ValueList &getOperand(unsigned OpIdx) { assert(OpIdx < Operands.size() && "Off bounds"); @@ -2040,6 +2087,7 @@ auto *In = TE->getMainOp(); assert(In && (isa(In) || isa(In) || + isa(In) || isa(In) || In->getNumOperands() == TE->getNumOperands()) && "Missed TreeEntry operands?"); (void)In; // fake use to avoid build failure when assertions disabled @@ -2735,7 +2783,7 @@ ++NumOpsWantToKeepOriginalOrder; TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); - TE->setOperandsInOrder(); + TE->setLoadStoreOperandsInOrder(); LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n"); } else { // Need to reorder. @@ -2744,7 +2792,7 @@ TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies, I->getFirst()); - TE->setOperandsInOrder(); + TE->setLoadStoreOperandsInOrder(); LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n"); } return; @@ -2997,7 +3045,7 @@ ++NumOpsWantToKeepOriginalOrder; TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); - TE->setOperandsInOrder(); + TE->setLoadStoreOperandsInOrder(); buildTree_rec(Operands, Depth + 1, {TE, 0}); LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n"); } else { @@ -3007,7 +3055,7 @@ TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies, I->getFirst()); - TE->setOperandsInOrder(); + TE->setLoadStoreOperandsInOrder(); buildTree_rec(Operands, Depth + 1, {TE, 0}); LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled stores.\n"); } @@ -4228,7 +4276,7 @@ PointerType::get(VecTy, LI->getPointerAddressSpace()); Value *Ptr = Builder.CreateBitCast(LI->getOperand(0), PtrTy); LoadInst *V = Builder.CreateAlignedLoad(VecTy, Ptr, LI->getAlign()); - Value *NewV = propagateMetadata(V, E->Scalars); + Value *NewV = propagateMetadata(V, E->Scalars, true); if (!E->ReorderIndices.empty()) { SmallVector Mask; inversePermutation(E->ReorderIndices, Mask); @@ -4409,7 +4457,7 @@ ExternalUses.push_back(ExternalUser(PO, cast(VecPtr), 0)); LI = Builder.CreateAlignedLoad(VecTy, VecPtr, LI->getAlign()); - Value *V = propagateMetadata(LI, E->Scalars); + Value *V = propagateMetadata(LI, E->Scalars, (E->getNumOperands() == 2)); if (IsReorder) { SmallVector Mask; inversePermutation(E->ReorderIndices, Mask); @@ -4453,7 +4501,7 @@ if (getTreeEntry(ScalarPtr)) ExternalUses.push_back(ExternalUser(ScalarPtr, cast(VecPtr), 0)); - Value *V = propagateMetadata(ST, E->Scalars); + Value *V = propagateMetadata(ST, E->Scalars, (E->getNumOperands() == 3)); if (NeedToShuffleReuses) { V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy), E->ReuseShuffleIndices, "shuffle"); Index: llvm/test/Analysis/BasicAA/noalias-intr.ll =================================================================== --- /dev/null +++ llvm/test/Analysis/BasicAA/noalias-intr.ll @@ -0,0 +1,143 @@ +; RUN: opt < %s -basic-aa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; Function Attrs: nounwind +define void @test01() #0 { +entry: + %_pA = alloca i32, align 4 + %_pB = alloca i32, align 4 + %t1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %t3 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !5) + %t5 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %t1, i32** null, i32 0, metadata !2), !tbaa !7, !noalias !11 + store i32 42, i32* %t5, align 4, !tbaa !12, !noalias !11 + %t7 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pB, i8* %t3, i32** null, i32 0, metadata !5), !tbaa !7, !noalias !11 + store i32 43, i32* %t7, align 4, !tbaa !12, !noalias !11 + ret void +} +; CHECK-LABEL: Function: test01: +; CHECK: NoAlias: i32* %t5, i32* %t7 + +; Function Attrs: nounwind +define void @test02() #0 { +entry: + %_pA = alloca i32, align 4 + %_pB = alloca i32, align 4 + %t1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %t3 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !5) + %t5 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %t1, i32** null, i32 0, metadata !2), !tbaa !7, !noalias !11 + store i32 42, i32* %t5, align 4, !tbaa !12, !noalias !11 + %t7 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %t3, i32** null, i32 0, metadata !5), !tbaa !7, !noalias !11 + store i32 43, i32* %t7, align 4, !tbaa !12, !noalias !11 + ret void +} +; CHECK-LABEL: Function: test02: +; CHECK: MustAlias: i32* %t5, i32* %t7 + + +; Function Attrs: nounwind +define void @test11() #0 { +entry: + %_pA = alloca i32, align 4 + %_pB = alloca i32, align 4 + %t1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %t3 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !5) + %t5 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %t1, i32** null, i32** undef, i32 0, metadata !2), !tbaa !7, !noalias !11 + %.guard1 = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %_pA, i32* %t5) + store i32 42, i32* %.guard1, align 4, !tbaa !12, !noalias !11 + %.guard2 = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %_pB, i32* %t5) + store i32 43, i32* %.guard2, align 4, !tbaa !12, !noalias !11 + ret void +} +; CHECK-LABEL: Function: test11: +; CHECK: NoAlias: i32* %.guard1, i32* %.guard2 + +; Function Attrs: nounwind +define void @test12() #0 { +entry: + %_pA = alloca i32, align 4 + %_pB = alloca i32, align 4 + %t1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %t3 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !5) + %t5 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %t1, i32** null, i32** undef, i32 0, metadata !2), !tbaa !7, !noalias !11 + %.guard1 = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %_pA, i32* %t5) + store i32 42, i32* %.guard1, align 4, !tbaa !12, !noalias !11 + %.guard2 = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %_pA, i32* %t5) + store i32 43, i32* %.guard2, align 4, !tbaa !12, !noalias !11 + ret void +} +; CHECK-LABEL: Function: test12: +; CHECK: MustAlias: i32* %.guard1, i32* %.guard2 + +; Function Attrs: nounwind +define void @test21() #0 { +entry: + %_pA = alloca i32, align 4 + %_pB = alloca i32, align 4 + %t1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %t3 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !5) + %.guard1 = call i32* @llvm.noalias.copy.guard.p0i32.p0i8(i32* %_pA, i8* %t1, metadata !14, metadata !2) + store i32 42, i32* %.guard1, align 4, !tbaa !12, !noalias !11 + %.guard2 = call i32* @llvm.noalias.copy.guard.p0i32.p0i8(i32* %_pB, i8* %t3, metadata !14, metadata !5) + store i32 43, i32* %.guard2, align 4, !tbaa !12, !noalias !11 + ret void +} +; CHECK-LABEL: Function: test21: +; CHECK: NoAlias: i32* %.guard1, i32* %.guard2 + +; Function Attrs: nounwind +define void @test22() #0 { +entry: + %_pA = alloca i32, align 4 + %_pB = alloca i32, align 4 + %t1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %t3 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !5) + %.guard1 = call i32* @llvm.noalias.copy.guard.p0i32.p0i8(i32* %_pA, i8* %t1, metadata !14, metadata !2) + store i32 42, i32* %.guard1, align 4, !tbaa !12, !noalias !11 + %.guard2 = call i32* @llvm.noalias.copy.guard.p0i32.p0i8(i32* %_pA, i8* %t3, metadata !14, metadata !5) + store i32 43, i32* %.guard2, align 4, !tbaa !12, !noalias !11 + ret void +} +; CHECK-LABEL: Function: test22: +; CHECK: MustAlias: i32* %.guard1, i32* %.guard2 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) #1 + +; Function Attrs: argmemonly nounwind speculatable +declare i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32*, i8*, i32**, i32, metadata) #2 + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32*, i8*, i32**, i32**, i32, metadata) #3 + +; Function Attrs: nounwind readnone +declare i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32*, i32*) #4 + +; Function Attrs: nounwind readnone +declare i32* @llvm.noalias.copy.guard.p0i32.p0i8(i32*, i8*, metadata, metadata) #4 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { argmemonly nounwind speculatable } +attributes #3 = { nounwind readnone speculatable } +attributes #4 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3} +!3 = distinct !{!3, !4, !"test01: _pA"} +!4 = distinct !{!4, !"test01"} +!5 = !{!6} +!6 = distinct !{!6, !4, !"test01: _pB"} +!7 = !{!8, !8, i64 0, i64 4} +!8 = !{!9, i64 4, !"any pointer"} +!9 = !{!10, i64 1, !"omnipotent char"} +!10 = !{!"Simple C/C++ TBAA"} +!11 = !{!3, !6} +!12 = !{!13, !13, i64 0, i64 4} +!13 = !{!9, i64 4, !"int"} +!14 = !{!15} +!15 = !{i64 -1, i64 0} Index: llvm/test/Analysis/ScopedNoAliasAA/basic-domains.ll =================================================================== --- llvm/test/Analysis/ScopedNoAliasAA/basic-domains.ll +++ llvm/test/Analysis/ScopedNoAliasAA/basic-domains.ll @@ -52,4 +52,3 @@ ; CHECK: NoAlias: store float %1, float* %arrayidx.i2, align 4, !noalias !6 <-> store float %0, float* %arrayidx.i, align 4, !noalias !6 ; CHECK: NoAlias: store float %2, float* %arrayidx.i3, align 4, !noalias !7 <-> store float %0, float* %arrayidx.i, align 4, !noalias !6 ; CHECK: NoAlias: store float %2, float* %arrayidx.i3, align 4, !noalias !7 <-> store float %1, float* %arrayidx.i2, align 4, !noalias !6 - Index: llvm/test/Analysis/ScopedNoAliasAA/basic2.ll =================================================================== --- llvm/test/Analysis/ScopedNoAliasAA/basic2.ll +++ llvm/test/Analysis/ScopedNoAliasAA/basic2.ll @@ -38,4 +38,3 @@ !3 = !{!3, !2, !"some other scope"} !4 = !{!1} !5 = !{!3} - Index: llvm/test/Analysis/ScopedNoAliasAA/noalias-calls.ll =================================================================== --- /dev/null +++ llvm/test/Analysis/ScopedNoAliasAA/noalias-calls.ll @@ -0,0 +1,62 @@ +; RUN: opt < %s -basic-aa -scoped-noalias -aa-eval -evaluate-aa-metadata -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #1 + +; Function Attrs: nounwind +declare void @hey() #1 + +; Function Attrs: nounwind uwtable +define void @foo(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #2 { +entry: + %l.i = alloca i8, i32 512, align 1 + %prov.a = call i8* @llvm.provenance.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i32(i8* %a, i8* null, i8** null, i8** null, i32 0, metadata !0) #0 + %prov.b = call i8* @llvm.provenance.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i32(i8* %c, i8* null, i8** null, i8** null, i32 0, metadata !3) #0 + %a.guard = call i8* @llvm.noalias.arg.guard.p0i8.p0i8(i8* %a, i8* %prov.a) + %c.guard = call i8* @llvm.noalias.arg.guard.p0i8.p0i8(i8* %c, i8* %prov.b) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a.guard, i8* %b, i64 16, i1 false) #1, !noalias !5 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %c.guard, i64 16, i1 false) #1, !noalias !5 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a.guard, i8* %c.guard, i64 16, i1 false) #1, !noalias !5 + call void @hey() #1, !noalias !5 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %l.i, i8* %c.guard, i64 16, i1 false) #1, !noalias !5 + ret void +} + +; CHECK-LABEL: Function: foo: +; CHECK: Just Ref: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a.guard, i8* %b, i64 16, i1 false) #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %c.guard, i64 16, i1 false) #1, !noalias !5 +; CHECK: Just Mod: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a.guard, i8* %b, i64 16, i1 false) #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a.guard, i8* %c.guard, i64 16, i1 false) #1, !noalias !5 +; CHECK: Just Ref: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a.guard, i8* %b, i64 16, i1 false) #1, !noalias !5 <-> call void @hey() #1, !noalias !5 +; CHECK: NoModRef: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a.guard, i8* %b, i64 16, i1 false) #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %l.i, i8* %c.guard, i64 16, i1 false) #1, !noalias !5 +; CHECK: Just Mod: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %c.guard, i64 16, i1 false) #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a.guard, i8* %b, i64 16, i1 false) #1, !noalias !5 +; CHECK: NoModRef: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %c.guard, i64 16, i1 false) #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a.guard, i8* %c.guard, i64 16, i1 false) #1, !noalias !5 +; CHECK: Just Mod: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %c.guard, i64 16, i1 false) #1, !noalias !5 <-> call void @hey() #1, !noalias !5 +; CHECK: NoModRef: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %c.guard, i64 16, i1 false) #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %l.i, i8* %c.guard, i64 16, i1 false) #1, !noalias !5 +; CHECK: Just Mod: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a.guard, i8* %c.guard, i64 16, i1 false) #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a.guard, i8* %b, i64 16, i1 false) #1, !noalias !5 +; CHECK: NoModRef: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a.guard, i8* %c.guard, i64 16, i1 false) #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %c.guard, i64 16, i1 false) #1, !noalias !5 +; CHECK: NoModRef: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a.guard, i8* %c.guard, i64 16, i1 false) #1, !noalias !5 <-> call void @hey() #1, !noalias !5 +; CHECK: NoModRef: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a.guard, i8* %c.guard, i64 16, i1 false) #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %l.i, i8* %c.guard, i64 16, i1 false) #1, !noalias !5 +; CHECK: Just Mod: call void @hey() #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a.guard, i8* %b, i64 16, i1 false) #1, !noalias !5 +; CHECK: Both ModRef: call void @hey() #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %c.guard, i64 16, i1 false) #1, !noalias !5 +; CHECK: NoModRef: call void @hey() #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a.guard, i8* %c.guard, i64 16, i1 false) #1, !noalias !5 +; CHECK: NoModRef: call void @hey() #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %l.i, i8* %c.guard, i64 16, i1 false) #1, !noalias !5 +; CHECK: NoModRef: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %l.i, i8* %c.guard, i64 16, i1 false) #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a.guard, i8* %b, i64 16, i1 false) #1, !noalias !5 +; CHECK: NoModRef: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %l.i, i8* %c.guard, i64 16, i1 false) #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %c.guard, i64 16, i1 false) #1, !noalias !5 +; CHECK: NoModRef: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %l.i, i8* %c.guard, i64 16, i1 false) #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a.guard, i8* %c.guard, i64 16, i1 false) #1, !noalias !5 +; CHECK: NoModRef: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %l.i, i8* %c.guard, i64 16, i1 false) #1, !noalias !5 <-> call void @hey() #1, !noalias !5 + +declare i8* @llvm.provenance.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i32(i8*, i8*, i8**, i8**, i32, metadata) nounwind readnone speculatable +declare i8* @llvm.noalias.arg.guard.p0i8.p0i8(i8*, i8*) nounwind readnone + +attributes #0 = { argmemonly nounwind } +attributes #1 = { nounwind } +attributes #2 = { nounwind uwtable } + + +!0 = !{!1} +!1 = distinct !{!1, !2, !"hello: %a"} +!2 = distinct !{!2, !"hello"} +!3 = !{!4} +!4 = distinct !{!4, !2, !"hello: %c"} +!5 = !{!1, !4} Index: llvm/test/Analysis/ScopedNoAliasAA/noalias-dup-scope.ll =================================================================== --- /dev/null +++ llvm/test/Analysis/ScopedNoAliasAA/noalias-dup-scope.ll @@ -0,0 +1,161 @@ +; RUN: opt < %s -basic-aa -domtree -scoped-noalias -aa-eval -loops -evaluate-aa-metadata -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s -check-prefix=WITHDT +; Note: The -loops above can be anything that requires the domtree, and is +; necessary to work around a pass-manager bug. + +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +@a = common global i32* null, align 8 +@r = common global i32 0, align 4 +@a2 = common global i32* null, align 8 + +; Function Attrs: nounwind +define i32* @foo() #0 { +entry: + %0 = load i32*, i32** @a, align 8, !tbaa !1, !noalias !5 + %1 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %0, i8* null, i32** @a, i32** null, i32 0, metadata !5) #0 + %2 = tail call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %0, i32* %1) #0 + ret i32* %1 +} + +; Function Attrs: nounwind +declare i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32*, i8*, i32**, i32**, i32, metadata) #0 + +; Function Attrs: nounwind +declare i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32*, i32*) #0 + +; Function Attrs: nounwind +define i32* @foo1(i32 signext %b) #0 { +entry: + %tobool = icmp eq i32 %b, 0 + br i1 %tobool, label %if.else, label %if.then + +if.then: ; preds = %entry + %0 = load i32*, i32** @a, ptr_provenance i32** null, align 8, !tbaa !1, !noalias !8 + %1 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %0, i8* null, i32** @a, i32** null, i32 0, metadata !12) #0 + %2 = load i32, i32* %0, ptr_provenance i32* %1, align 4, !tbaa !13, !noalias !8 + %3 = load i32, i32* @r, ptr_provenance i32* null, align 4, !tbaa !13, !noalias !8 + %add = add nsw i32 %3, %2 + store i32 %add, i32* @r, ptr_provenance i32* null, align 4, !tbaa !13, !noalias !8 + %guard.1 = tail call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %0, i32* %1) #0 + tail call void @ex1(i32* %guard.1) #0, !noalias !8 + %incdec.ptr = getelementptr inbounds i32, i32* %0, i64 1 + %4 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %0, i8* null, i32** @a, i32** null, i32 0, metadata !12) #0 + %5 = load i32, i32* @r, ptr_provenance i32* null, align 4, !tbaa !13, !noalias !8 + store i32 %5, i32* %incdec.ptr, ptr_provenance i32* %4, align 4, !tbaa !13, !noalias !8 + %guard.5 = tail call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %incdec.ptr, i32* %4) #0 + tail call void @ex1(i32* %guard.5) #0, !noalias !8 + %idx.ext = sext i32 %b to i64 + %add.ptr = getelementptr inbounds i32, i32* %incdec.ptr, i64 %idx.ext + %6 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %0, i8* null, i32** @a, i32** null, i32 0, metadata !12) #0 + %7 = load i32, i32* @r, ptr_provenance i32* null, align 4, !tbaa !13, !noalias !8 + store i32 %7, i32* %add.ptr, ptr_provenance i32* %6, align 4, !tbaa !13, !noalias !8 + %guard.6 = tail call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %add.ptr, i32* %6) #0 + tail call void @ex1(i32* %guard.6) #0, !noalias !8 + %8 = load i32*, i32** @a2, ptr_provenance i32** null, align 8, !tbaa !1, !noalias !8 + %9 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %8, i8* null, i32** @a2, i32** null, i32 0, metadata !15) #0 + %10 = load i32, i32* @r, ptr_provenance i32* null, align 4, !tbaa !13, !noalias !8 + store i32 %10, i32* %8, ptr_provenance i32* %9, align 4, !tbaa !13, !noalias !8 + %guard.9 = tail call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %8, i32* %9) #0 + tail call void @ex1(i32* %guard.9) #0, !noalias !8 + br label %if.end + +if.else: ; preds = %entry + %11 = load i32*, i32** @a2, ptr_provenance i32** null, align 8, !tbaa !1, !noalias !12 + %12 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %11, i8* null, i32** @a2, i32** null, i32 0, metadata !12) #0 + %13 = load i32, i32* %11, ptr_provenance i32* %12, align 4, !tbaa !13, !noalias !12 + %14 = load i32, i32* @r, ptr_provenance i32* null, align 4, !tbaa !13, !noalias !12 + %add1 = add nsw i32 %14, %13 + store i32 %add1, i32* @r, ptr_provenance i32* null, align 4, !tbaa !13, !noalias !12 + br label %if.end + +if.end: ; preds = %if.else, %if.then + %x.0 = phi i32* [ %0, %if.then ], [ %11, %if.else ] + %prov.x.0 = phi i32* [ %6, %if.then ], [ %12, %if.else ] + %x.0.guard = tail call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %x.0, i32* %prov.x.0) #0 + ret i32* %x.0.guard +} + +; WITHDT: NoAlias: %0 = load i32*, i32** @a, ptr_provenance i32** null, align 8, !tbaa !1, !noalias !5 <-> store i32 %add, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %0 = load i32*, i32** @a, ptr_provenance i32** null, align 8, !tbaa !1, !noalias !5 <-> store i32 %5, i32* %incdec.ptr, ptr_provenance i32* %4, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %0 = load i32*, i32** @a, ptr_provenance i32** null, align 8, !tbaa !1, !noalias !5 <-> store i32 %7, i32* %add.ptr, ptr_provenance i32* %6, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %0 = load i32*, i32** @a, ptr_provenance i32** null, align 8, !tbaa !1, !noalias !5 <-> store i32 %10, i32* %8, ptr_provenance i32* %9, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %0 = load i32*, i32** @a, ptr_provenance i32** null, align 8, !tbaa !1, !noalias !5 <-> store i32 %add1, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !9 +; WITHDT: NoAlias: %2 = load i32, i32* %0, ptr_provenance i32* %1, align 4, !tbaa !10, !noalias !5 <-> store i32 %add, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %2 = load i32, i32* %0, ptr_provenance i32* %1, align 4, !tbaa !10, !noalias !5 <-> store i32 %5, i32* %incdec.ptr, ptr_provenance i32* %4, align 4, !tbaa !10, !noalias !5 +; WITHDT: MayAlias: %2 = load i32, i32* %0, ptr_provenance i32* %1, align 4, !tbaa !10, !noalias !5 <-> store i32 %7, i32* %add.ptr, ptr_provenance i32* %6, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %2 = load i32, i32* %0, ptr_provenance i32* %1, align 4, !tbaa !10, !noalias !5 <-> store i32 %10, i32* %8, ptr_provenance i32* %9, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %2 = load i32, i32* %0, ptr_provenance i32* %1, align 4, !tbaa !10, !noalias !5 <-> store i32 %add1, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !9 +; WITHDT: MustAlias: %3 = load i32, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !5 <-> store i32 %add, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %3 = load i32, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !5 <-> store i32 %5, i32* %incdec.ptr, ptr_provenance i32* %4, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %3 = load i32, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !5 <-> store i32 %7, i32* %add.ptr, ptr_provenance i32* %6, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %3 = load i32, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !5 <-> store i32 %10, i32* %8, ptr_provenance i32* %9, align 4, !tbaa !10, !noalias !5 +; WITHDT: MustAlias: %3 = load i32, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !5 <-> store i32 %add1, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !9 +; WITHDT: MustAlias: %5 = load i32, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !5 <-> store i32 %add, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %5 = load i32, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !5 <-> store i32 %5, i32* %incdec.ptr, ptr_provenance i32* %4, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %5 = load i32, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !5 <-> store i32 %7, i32* %add.ptr, ptr_provenance i32* %6, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %5 = load i32, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !5 <-> store i32 %10, i32* %8, ptr_provenance i32* %9, align 4, !tbaa !10, !noalias !5 +; WITHDT: MustAlias: %5 = load i32, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !5 <-> store i32 %add1, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !9 +; WITHDT: MustAlias: %7 = load i32, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !5 <-> store i32 %add, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %7 = load i32, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !5 <-> store i32 %5, i32* %incdec.ptr, ptr_provenance i32* %4, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %7 = load i32, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !5 <-> store i32 %7, i32* %add.ptr, ptr_provenance i32* %6, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %7 = load i32, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !5 <-> store i32 %10, i32* %8, ptr_provenance i32* %9, align 4, !tbaa !10, !noalias !5 +; WITHDT: MustAlias: %7 = load i32, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !5 <-> store i32 %add1, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !9 +; WITHDT: NoAlias: %8 = load i32*, i32** @a2, ptr_provenance i32** null, align 8, !tbaa !1, !noalias !5 <-> store i32 %add, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %8 = load i32*, i32** @a2, ptr_provenance i32** null, align 8, !tbaa !1, !noalias !5 <-> store i32 %5, i32* %incdec.ptr, ptr_provenance i32* %4, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %8 = load i32*, i32** @a2, ptr_provenance i32** null, align 8, !tbaa !1, !noalias !5 <-> store i32 %7, i32* %add.ptr, ptr_provenance i32* %6, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %8 = load i32*, i32** @a2, ptr_provenance i32** null, align 8, !tbaa !1, !noalias !5 <-> store i32 %10, i32* %8, ptr_provenance i32* %9, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %8 = load i32*, i32** @a2, ptr_provenance i32** null, align 8, !tbaa !1, !noalias !5 <-> store i32 %add1, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !9 +; WITHDT: MustAlias: %10 = load i32, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !5 <-> store i32 %add, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %10 = load i32, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !5 <-> store i32 %5, i32* %incdec.ptr, ptr_provenance i32* %4, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %10 = load i32, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !5 <-> store i32 %7, i32* %add.ptr, ptr_provenance i32* %6, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %10 = load i32, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !5 <-> store i32 %10, i32* %8, ptr_provenance i32* %9, align 4, !tbaa !10, !noalias !5 +; WITHDT: MustAlias: %10 = load i32, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !5 <-> store i32 %add1, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !9 +; WITHDT: NoAlias: %11 = load i32*, i32** @a2, ptr_provenance i32** null, align 8, !tbaa !1, !noalias !9 <-> store i32 %add, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %11 = load i32*, i32** @a2, ptr_provenance i32** null, align 8, !tbaa !1, !noalias !9 <-> store i32 %5, i32* %incdec.ptr, ptr_provenance i32* %4, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %11 = load i32*, i32** @a2, ptr_provenance i32** null, align 8, !tbaa !1, !noalias !9 <-> store i32 %7, i32* %add.ptr, ptr_provenance i32* %6, align 4, !tbaa !10, !noalias !5 +; WITHDT: MayAlias: %11 = load i32*, i32** @a2, ptr_provenance i32** null, align 8, !tbaa !1, !noalias !9 <-> store i32 %10, i32* %8, ptr_provenance i32* %9, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %11 = load i32*, i32** @a2, ptr_provenance i32** null, align 8, !tbaa !1, !noalias !9 <-> store i32 %add1, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !9 +; WITHDT: NoAlias: %13 = load i32, i32* %11, ptr_provenance i32* %12, align 4, !tbaa !10, !noalias !9 <-> store i32 %add, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %13 = load i32, i32* %11, ptr_provenance i32* %12, align 4, !tbaa !10, !noalias !9 <-> store i32 %5, i32* %incdec.ptr, ptr_provenance i32* %4, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %13 = load i32, i32* %11, ptr_provenance i32* %12, align 4, !tbaa !10, !noalias !9 <-> store i32 %7, i32* %add.ptr, ptr_provenance i32* %6, align 4, !tbaa !10, !noalias !5 +; WITHDT: MayAlias: %13 = load i32, i32* %11, ptr_provenance i32* %12, align 4, !tbaa !10, !noalias !9 <-> store i32 %10, i32* %8, ptr_provenance i32* %9, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %13 = load i32, i32* %11, ptr_provenance i32* %12, align 4, !tbaa !10, !noalias !9 <-> store i32 %add1, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !9 +; WITHDT: MustAlias: %14 = load i32, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !9 <-> store i32 %add, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %14 = load i32, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !9 <-> store i32 %5, i32* %incdec.ptr, ptr_provenance i32* %4, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %14 = load i32, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !9 <-> store i32 %7, i32* %add.ptr, ptr_provenance i32* %6, align 4, !tbaa !10, !noalias !5 +; WITHDT: MayAlias: %14 = load i32, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !9 <-> store i32 %10, i32* %8, ptr_provenance i32* %9, align 4, !tbaa !10, !noalias !5 +; WITHDT: MustAlias: %14 = load i32, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !9 <-> store i32 %add1, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !9 +; WITHDT: NoAlias: store i32 %5, i32* %incdec.ptr, ptr_provenance i32* %4, align 4, !tbaa !10, !noalias !5 <-> store i32 %add, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: store i32 %7, i32* %add.ptr, ptr_provenance i32* %6, align 4, !tbaa !10, !noalias !5 <-> store i32 %add, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !5 +; WITHDT: MayAlias: store i32 %7, i32* %add.ptr, ptr_provenance i32* %6, align 4, !tbaa !10, !noalias !5 <-> store i32 %5, i32* %incdec.ptr, ptr_provenance i32* %4, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: store i32 %10, i32* %8, ptr_provenance i32* %9, align 4, !tbaa !10, !noalias !5 <-> store i32 %add, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: store i32 %10, i32* %8, ptr_provenance i32* %9, align 4, !tbaa !10, !noalias !5 <-> store i32 %5, i32* %incdec.ptr, ptr_provenance i32* %4, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: store i32 %10, i32* %8, ptr_provenance i32* %9, align 4, !tbaa !10, !noalias !5 <-> store i32 %7, i32* %add.ptr, ptr_provenance i32* %6, align 4, !tbaa !10, !noalias !5 +; WITHDT: MustAlias: store i32 %add1, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !9 <-> store i32 %add, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: store i32 %add1, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !9 <-> store i32 %5, i32* %incdec.ptr, ptr_provenance i32* %4, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: store i32 %add1, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !9 <-> store i32 %7, i32* %add.ptr, ptr_provenance i32* %6, align 4, !tbaa !10, !noalias !5 +; WITHDT: MayAlias: store i32 %add1, i32* @r, ptr_provenance i32* null, align 4, !tbaa !10, !noalias !9 <-> store i32 %10, i32* %8, ptr_provenance i32* %9, align 4, !tbaa !10, !noalias !5 + +declare void @ex1(i32*) + +attributes #0 = { nounwind } + +!llvm.ident = !{!0} + +!0 = !{!"clang"} +!1 = !{!2, !2, i64 0} +!2 = !{!"any pointer", !3, i64 0} +!3 = !{!"omnipotent char", !4, i64 0} +!4 = !{!"Simple C/C++ TBAA"} +!5 = !{!6} +!6 = distinct !{!6, !7, !"foo: x"} +!7 = distinct !{!7, !"foo"} +!8 = !{!9, !11} +!9 = distinct !{!9, !10, !"foo1: x2"} +!10 = distinct !{!10, !"foo1"} +!11 = distinct !{!11, !10, !"foo1: x"} +!12 = !{!11} +!13 = !{!14, !14, i64 0} +!14 = !{!"int", !3, i64 0} +!15 = !{!9} Index: llvm/test/Analysis/ScopedNoAliasAA/noalias.ll =================================================================== --- /dev/null +++ llvm/test/Analysis/ScopedNoAliasAA/noalias.ll @@ -0,0 +1,66 @@ +; RUN: opt < %s -basic-aa -scoped-noalias -aa-eval -evaluate-aa-metadata -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define void @foo(float* nocapture %a, float* nocapture readonly %c, i64 %i0, i64 %i1) #0 { +entry: + %prov.a = call float* @llvm.provenance.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float* %a, i8* null, float** null, float** null, i32 0, metadata !0) #1 + %0 = load float, float* %c, ptr_provenance float* null, align 4, !noalias !0 + %arrayidx.i = getelementptr inbounds float, float* %a, i64 %i0 + store float %0, float* %arrayidx.i, ptr_provenance float* %prov.a, align 4, !noalias !0 + %1 = load float, float* %c, align 4 + %arrayidx = getelementptr inbounds float, float* %a, i64 %i1 + store float %1, float* %arrayidx, ptr_provenance float* null, align 4, !noalias !0 + ret void +} + +; CHECK-LABEL: Function: foo: +; CHECK: NoAlias: %0 = load float, float* %c, ptr_provenance float* null, align 4, !noalias !0 <-> store float %0, float* %arrayidx.i, ptr_provenance float* %prov.a, align 4, !noalias !0 +; CHECK: MayAlias: %0 = load float, float* %c, ptr_provenance float* null, align 4, !noalias !0 <-> store float %1, float* %arrayidx, ptr_provenance float* null, align 4, !noalias !0 +; CHECK: MayAlias: %1 = load float, float* %c, align 4 <-> store float %0, float* %arrayidx.i, ptr_provenance float* %prov.a, align 4, !noalias !0 +; CHECK: MayAlias: %1 = load float, float* %c, align 4 <-> store float %1, float* %arrayidx, ptr_provenance float* null, align 4, !noalias !0 +; CHECK: NoAlias: store float %1, float* %arrayidx, ptr_provenance float* null, align 4, !noalias !0 <-> store float %0, float* %arrayidx.i, ptr_provenance float* %prov.a, align 4, !noalias !0 + + +; Function Attrs: nounwind uwtable +define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c, i64 %i0, i64 %i1) #0 { +entry: + %0 = call float* @llvm.provenance.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float* %a, i8* null, float** null, float** null, i32 0, metadata !3) #1 + %1 = call float* @llvm.provenance.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float* %b, i8* null, float** null, float** null, i32 0, metadata !6) #1 + %2 = load float, float* %c, ptr_provenance float* null, align 4, !noalias !8 + %arrayidx.i = getelementptr inbounds float, float* %a, i64 5 + store float %2, float* %arrayidx.i, ptr_provenance float* %0, align 4, !noalias !8 + %arrayidx1.i = getelementptr inbounds float, float* %b, i64 %i0 + store float %2, float* %arrayidx1.i, ptr_provenance float* %1, align 4, !noalias !8 + %3 = load float, float* %c, align 4 + %arrayidx = getelementptr inbounds float, float* %a, i64 %i1 + store float %3, float* %arrayidx, ptr_provenance float* null, align 4, !noalias !8 + ret void +} + +; CHECK-LABEL: Function: foo2: +; CHECK: NoAlias: %2 = load float, float* %c, ptr_provenance float* null, align 4, !noalias !5 <-> store float %2, float* %arrayidx.i, ptr_provenance float* %0, align 4, !noalias !5 +; CHECK: NoAlias: %2 = load float, float* %c, ptr_provenance float* null, align 4, !noalias !5 <-> store float %2, float* %arrayidx1.i, ptr_provenance float* %1, align 4, !noalias !5 +; CHECK: MayAlias: %2 = load float, float* %c, ptr_provenance float* null, align 4, !noalias !5 <-> store float %3, float* %arrayidx, ptr_provenance float* null, align 4, !noalias !5 +; CHECK: MayAlias: %3 = load float, float* %c, align 4 <-> store float %2, float* %arrayidx.i, ptr_provenance float* %0, align 4, !noalias !5 +; CHECK: MayAlias: %3 = load float, float* %c, align 4 <-> store float %2, float* %arrayidx1.i, ptr_provenance float* %1, align 4, !noalias !5 +; CHECK: MayAlias: %3 = load float, float* %c, align 4 <-> store float %3, float* %arrayidx, ptr_provenance float* null, align 4, !noalias !5 +; CHECK: NoAlias: store float %2, float* %arrayidx1.i, ptr_provenance float* %1, align 4, !noalias !5 <-> store float %2, float* %arrayidx.i, ptr_provenance float* %0, align 4, !noalias !5 +; CHECK: NoAlias: store float %3, float* %arrayidx, ptr_provenance float* null, align 4, !noalias !5 <-> store float %2, float* %arrayidx.i, ptr_provenance float* %0, align 4, !noalias !5 +; CHECK: NoAlias: store float %3, float* %arrayidx, ptr_provenance float* null, align 4, !noalias !5 <-> store float %2, float* %arrayidx1.i, ptr_provenance float* %1, align 4, !noalias !5 + +declare float* @llvm.provenance.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float*, i8*, float**, float**, i32, metadata ) nounwind + +attributes #0 = { nounwind uwtable } +attributes #1 = { nounwind } + +!0 = !{!1} +!1 = distinct !{!1, !2, !"hello: %a"} +!2 = distinct !{!2, !"hello"} +!3 = !{!4} +!4 = distinct !{!4, !5, !"hello2: %a"} +!5 = distinct !{!5, !"hello2"} +!6 = !{!7} +!7 = distinct !{!7, !5, !"hello2: %b"} +!8 = !{!4, !7} Index: llvm/test/Analysis/ScopedNoAliasAA/noalias2.ll =================================================================== --- /dev/null +++ llvm/test/Analysis/ScopedNoAliasAA/noalias2.ll @@ -0,0 +1,113 @@ +; RUN: opt < %s -basic-aa -scoped-noalias -aa-eval -evaluate-aa-metadata -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define void @foo(float* noalias nocapture %a, float* noalias nocapture readonly %c) #0 { +entry: + %0 = call float* @llvm.provenance.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float* %a, i8* null, float** null, float** null, i32 0, metadata !0) #1 + %1 = call float* @llvm.provenance.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float* %c, i8* null, float** null, float** null, i32 0, metadata !3) #1 + %2 = load float, float* %c, ptr_provenance float* %1, align 4, !noalias !5 + %arrayidx.i = getelementptr inbounds float, float* %a, i64 5 + store float %2, float* %arrayidx.i, ptr_provenance float* %0, align 4, !noalias !5 + %3 = load float, float* %c, ptr_provenance float* null, align 4, !noalias !5 + %arrayidx = getelementptr inbounds float, float* %a, i64 7 + store float %3, float* %arrayidx, ptr_provenance float* null, align 4, !noalias !5 + ret void +} + +; CHECK-LABEL: Function: foo: +; CHECK: NoAlias: %2 = load float, float* %c, ptr_provenance float* %1, align 4, !noalias !5 <-> store float %2, float* %arrayidx.i, ptr_provenance float* %0, align 4, !noalias !5 +; CHECK: NoAlias: %2 = load float, float* %c, ptr_provenance float* %1, align 4, !noalias !5 <-> store float %3, float* %arrayidx, ptr_provenance float* null, align 4, !noalias !5 +; CHECK: NoAlias: %3 = load float, float* %c, ptr_provenance float* null, align 4, !noalias !5 <-> store float %2, float* %arrayidx.i, ptr_provenance float* %0, align 4, !noalias !5 +; CHECK: NoAlias: %3 = load float, float* %c, ptr_provenance float* null, align 4, !noalias !5 <-> store float %3, float* %arrayidx, ptr_provenance float* null, align 4, !noalias !5 +; CHECK: NoAlias: store float %3, float* %arrayidx, ptr_provenance float* null, align 4, !noalias !5 <-> store float %2, float* %arrayidx.i, ptr_provenance float* %0, align 4, !noalias !5 + +; Function Attrs: nounwind uwtable +define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 { +entry: + %0 = call float* @llvm.provenance.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float* %a, i8* null, float** null, float** null, i32 0, metadata !6) #1 + %1 = call float* @llvm.provenance.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float* %c, i8* null, float** null, float** null, i32 0, metadata !9) #1 + %2 = call float* @llvm.provenance.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float* %0, i8* null, float** null, float** null, i32 0, metadata !11) #1, !noalias !14 + %3 = call float* @llvm.provenance.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float* %1, i8* null, float** null, float** null, i32 0, metadata !15) #1, !noalias !14 + %4 = load float, float* %c, ptr_provenance float* %3, align 4, !noalias !17 + %arrayidx.i.i = getelementptr inbounds float, float* %a, i64 5 + store float %4, float* %arrayidx.i.i, ptr_provenance float* %2, align 4, !noalias !17 + %5 = load float, float* %c, ptr_provenance float* %1, align 4, !noalias !14 + %arrayidx.i = getelementptr inbounds float, float* %a, i64 7 + store float %5, float* %arrayidx.i, ptr_provenance float* %0, align 4, !noalias !14 + %6 = call float* @llvm.provenance.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float* %a, i8* null, float** null, float** null, i32 0, metadata !18) #1 + %7 = call float* @llvm.provenance.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float* %b, i8* null, float** null, float** null, i32 0, metadata !21) #1 + %8 = load float, float* %c, ptr_provenance float* null, align 4, !noalias !23 + %arrayidx.i1 = getelementptr inbounds float, float* %a, i64 6 + store float %8, float* %arrayidx.i1, ptr_provenance float* %6, align 4, !noalias !23 + %arrayidx1.i = getelementptr inbounds float, float* %b, i64 8 + store float %8, float* %arrayidx1.i, ptr_provenance float* %7, align 4, !noalias !23 + ; %9 = load float, float* %c, ptr_provenance float* null, align 4, !noalias !23 + %9 = load float, float* %c, align 4 + %arrayidx = getelementptr inbounds float, float* %a, i64 7 + store float %9, float* %arrayidx, ptr_provenance float* null, align 4, !noalias !23 + ret void +} + +; CHECK-LABEL: Function: foo2: +; CHECK: NoAlias: %4 = load float, float* %c, ptr_provenance float* %3, align 4, !noalias !11 <-> store float %4, float* %arrayidx.i.i, ptr_provenance float* %2, align 4, !noalias !11 +; CHECK: NoAlias: %4 = load float, float* %c, ptr_provenance float* %3, align 4, !noalias !11 <-> store float %5, float* %arrayidx.i, ptr_provenance float* %0, align 4, !noalias !8 +; CHECK: MayAlias: %4 = load float, float* %c, ptr_provenance float* %3, align 4, !noalias !11 <-> store float %8, float* %arrayidx.i1, ptr_provenance float* %6, align 4, !noalias !17 +; CHECK: MayAlias: %4 = load float, float* %c, ptr_provenance float* %3, align 4, !noalias !11 <-> store float %8, float* %arrayidx1.i, ptr_provenance float* %7, align 4, !noalias !17 +; CHECK: MayAlias: %4 = load float, float* %c, ptr_provenance float* %3, align 4, !noalias !11 <-> store float %9, float* %arrayidx, ptr_provenance float* null, align 4, !noalias !17 +; CHECK: NoAlias: %5 = load float, float* %c, ptr_provenance float* %1, align 4, !noalias !8 <-> store float %4, float* %arrayidx.i.i, ptr_provenance float* %2, align 4, !noalias !11 +; CHECK: NoAlias: %5 = load float, float* %c, ptr_provenance float* %1, align 4, !noalias !8 <-> store float %5, float* %arrayidx.i, ptr_provenance float* %0, align 4, !noalias !8 +; CHECK: MayAlias: %5 = load float, float* %c, ptr_provenance float* %1, align 4, !noalias !8 <-> store float %8, float* %arrayidx.i1, ptr_provenance float* %6, align 4, !noalias !17 +; CHECK: MayAlias: %5 = load float, float* %c, ptr_provenance float* %1, align 4, !noalias !8 <-> store float %8, float* %arrayidx1.i, ptr_provenance float* %7, align 4, !noalias !17 +; CHECK: MayAlias: %5 = load float, float* %c, ptr_provenance float* %1, align 4, !noalias !8 <-> store float %9, float* %arrayidx, ptr_provenance float* null, align 4, !noalias !17 +; CHECK: MayAlias: %8 = load float, float* %c, ptr_provenance float* null, align 4, !noalias !17 <-> store float %4, float* %arrayidx.i.i, ptr_provenance float* %2, align 4, !noalias !11 +; CHECK: MayAlias: %8 = load float, float* %c, ptr_provenance float* null, align 4, !noalias !17 <-> store float %5, float* %arrayidx.i, ptr_provenance float* %0, align 4, !noalias !8 +; CHECK: NoAlias: %8 = load float, float* %c, ptr_provenance float* null, align 4, !noalias !17 <-> store float %8, float* %arrayidx.i1, ptr_provenance float* %6, align 4, !noalias !17 +; CHECK: NoAlias: %8 = load float, float* %c, ptr_provenance float* null, align 4, !noalias !17 <-> store float %8, float* %arrayidx1.i, ptr_provenance float* %7, align 4, !noalias !17 +; CHECK: MayAlias: %8 = load float, float* %c, ptr_provenance float* null, align 4, !noalias !17 <-> store float %9, float* %arrayidx, ptr_provenance float* null, align 4, !noalias !17 +; CHECK: MayAlias: %9 = load float, float* %c, align 4 <-> store float %4, float* %arrayidx.i.i, ptr_provenance float* %2, align 4, !noalias !11 +; CHECK: MayAlias: %9 = load float, float* %c, align 4 <-> store float %5, float* %arrayidx.i, ptr_provenance float* %0, align 4, !noalias !8 +; CHECK: MayAlias: %9 = load float, float* %c, align 4 <-> store float %8, float* %arrayidx.i1, ptr_provenance float* %6, align 4, !noalias !17 +; CHECK: MayAlias: %9 = load float, float* %c, align 4 <-> store float %8, float* %arrayidx1.i, ptr_provenance float* %7, align 4, !noalias !17 +; CHECK: MayAlias: %9 = load float, float* %c, align 4 <-> store float %9, float* %arrayidx, ptr_provenance float* null, align 4, !noalias !17 +; CHECK: NoAlias: store float %5, float* %arrayidx.i, ptr_provenance float* %0, align 4, !noalias !8 <-> store float %4, float* %arrayidx.i.i, ptr_provenance float* %2, align 4, !noalias !11 +; CHECK: NoAlias: store float %8, float* %arrayidx.i1, ptr_provenance float* %6, align 4, !noalias !17 <-> store float %4, float* %arrayidx.i.i, ptr_provenance float* %2, align 4, !noalias !11 +; CHECK: NoAlias: store float %8, float* %arrayidx.i1, ptr_provenance float* %6, align 4, !noalias !17 <-> store float %5, float* %arrayidx.i, ptr_provenance float* %0, align 4, !noalias !8 +; CHECK: MayAlias: store float %8, float* %arrayidx1.i, ptr_provenance float* %7, align 4, !noalias !17 <-> store float %4, float* %arrayidx.i.i, ptr_provenance float* %2, align 4, !noalias !11 +; CHECK: MayAlias: store float %8, float* %arrayidx1.i, ptr_provenance float* %7, align 4, !noalias !17 <-> store float %5, float* %arrayidx.i, ptr_provenance float* %0, align 4, !noalias !8 +; CHECK: NoAlias: store float %8, float* %arrayidx1.i, ptr_provenance float* %7, align 4, !noalias !17 <-> store float %8, float* %arrayidx.i1, ptr_provenance float* %6, align 4, !noalias !17 +; CHECK: NoAlias: store float %9, float* %arrayidx, ptr_provenance float* null, align 4, !noalias !17 <-> store float %4, float* %arrayidx.i.i, ptr_provenance float* %2, align 4, !noalias !11 +; CHECK: MustAlias: store float %9, float* %arrayidx, ptr_provenance float* null, align 4, !noalias !17 <-> store float %5, float* %arrayidx.i, ptr_provenance float* %0, align 4, !noalias !8 +; CHECK: NoAlias: store float %9, float* %arrayidx, ptr_provenance float* null, align 4, !noalias !17 <-> store float %8, float* %arrayidx.i1, ptr_provenance float* %6, align 4, !noalias !17 +; CHECK: NoAlias: store float %9, float* %arrayidx, ptr_provenance float* null, align 4, !noalias !17 <-> store float %8, float* %arrayidx1.i, ptr_provenance float* %7, align 4, !noalias !17 + +declare float* @llvm.provenance.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float*, i8*, float**, float**, i32, metadata ) nounwind + +attributes #0 = { nounwind uwtable } +attributes #1 = { nounwind } + +!0 = !{!1} +!1 = distinct !{!1, !2, !"hello: %a"} +!2 = distinct !{!2, !"hello"} +!3 = !{!4} +!4 = distinct !{!4, !2, !"hello: %c"} +!5 = !{!1, !4} +!6 = !{!7} +!7 = distinct !{!7, !8, !"foo: %a"} +!8 = distinct !{!8, !"foo"} +!9 = !{!10} +!10 = distinct !{!10, !8, !"foo: %c"} +!11 = !{!12} +!12 = distinct !{!12, !13, !"hello: %a"} +!13 = distinct !{!13, !"hello"} +!14 = !{!7, !10} +!15 = !{!16} +!16 = distinct !{!16, !13, !"hello: %c"} +!17 = !{!12, !16, !7, !10} +!18 = !{!19} +!19 = distinct !{!19, !20, !"hello2: %a"} +!20 = distinct !{!20, !"hello2"} +!21 = !{!22} +!22 = distinct !{!22, !20, !"hello2: %b"} +!23 = !{!19, !22} Index: llvm/test/Analysis/ScopedNoAliasAA/noalias_basics.ll =================================================================== --- /dev/null +++ llvm/test/Analysis/ScopedNoAliasAA/noalias_basics.ll @@ -0,0 +1,142 @@ +; RUN: opt < %s -basic-aa -scoped-noalias -aa-eval -evaluate-aa-metadata -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nofree norecurse nounwind uwtable writeonly +define dso_local void @test_p_p(i32* nocapture %_pA, i32* nocapture %_pB) local_unnamed_addr #0 { +entry: + store i32 42, i32* %_pA, align 4, !tbaa !2 + store i32 99, i32* %_pB, align 4, !tbaa !2 + ret void +} +; CHECK-LABEL: Function: test_p_p: +; CHECK: MayAlias: store i32 99, i32* %_pB, align 4, !tbaa !2 <-> store i32 42, i32* %_pA, align 4, !tbaa !2 + +; Function Attrs: nounwind uwtable +define dso_local void @test_rp_p(i32* nocapture %_pA, i32* nocapture %_pB) local_unnamed_addr #1 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !6) + %1 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !6), !tbaa !9, !noalias !6 + store i32 42, i32* %_pA, ptr_provenance i32* %1, align 4, !tbaa !2, !noalias !6 + store i32 99, i32* %_pB, ptr_provenance i32* undef, align 4, !tbaa !2, !noalias !6 + ret void +} +; CHECK-LABEL: Function: test_rp_p: +; CHECK: NoAlias: store i32 99, i32* %_pB, ptr_provenance i32* undef, align 4, !tbaa !9, !noalias !2 <-> store i32 42, i32* %_pA, ptr_provenance i32* %1, align 4, !tbaa !9, !noalias !2 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) #2 + +; Function Attrs: nounwind uwtable +define dso_local void @test_rp_rp_00(i32* nocapture %_pA, i32* nocapture %_pB) local_unnamed_addr #1 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) + %1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !14) + %2 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !11), !tbaa !9, !noalias !16 + store i32 42, i32* %_pA, ptr_provenance i32* %2, align 4, !tbaa !2, !noalias !16 + %3 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pB, i8* %1, i32** null, i32** undef, i32 0, metadata !14), !tbaa !9, !noalias !16 + store i32 99, i32* %_pB, ptr_provenance i32* %3, align 4, !tbaa !2, !noalias !16 + ret void +} +; CHECK-LABEL: Function: test_rp_rp_00: +; CHECK: NoAlias: store i32 99, i32* %_pB, ptr_provenance i32* %3, align 4, !tbaa !12, !noalias !11 <-> store i32 42, i32* %_pA, ptr_provenance i32* %2, align 4, !tbaa !12, !noalias !11 + +; Now test variants: {objectP, objectID, Scope } +; NOTE: in the following tests, the Scope information is recycled from previous tests + +; Same info -> MayAlias +; Function Attrs: nounwind uwtable +define dso_local void @test_rp_rp_01(i32* nocapture %_pA, i32* nocapture %_pB) local_unnamed_addr #1 { +entry: + %a.p = alloca i32*, align 8 + %b.p = alloca i32*, align 8 + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** %a.p, i32 0, metadata !11) + %1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** %b.p, i32 0, metadata !11) + %2 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* null, i32** %a.p, i32** undef, i32 0, metadata !11), !tbaa !9, !noalias !16 + store i32 42, i32* %_pA, ptr_provenance i32* %2, align 4, !tbaa !2, !noalias !16 + %3 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pB, i8* null, i32** %a.p, i32** undef, i32 0, metadata !11), !tbaa !9, !noalias !16 + store i32 99, i32* %_pB, ptr_provenance i32* %3, align 4, !tbaa !2, !noalias !16 + ret void +} +; CHECK-LABEL: Function: test_rp_rp_01: +; CHECK: MayAlias: store i32 99, i32* %_pB, ptr_provenance i32* %3, align 4, !tbaa !11, !noalias !9 <-> store i32 42, i32* %_pA, ptr_provenance i32* %2, align 4, !tbaa !11, !noalias !9 + +; Variants with different info -> NoAlias + +; Function Attrs: nounwind uwtable +define dso_local void @test_rp_rp_02(i32* nocapture %_pA, i32* nocapture %_pB) local_unnamed_addr #1 { +entry: + %a.p = alloca i32*, align 8 + %b.p = alloca i32*, align 8 + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** %a.p, i32 0, metadata !11) + %1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** %b.p, i32 0, metadata !11) + %2 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* null, i32** %a.p, i32** undef, i32 0, metadata !11), !tbaa !9, !noalias !16 + store i32 42, i32* %_pA, ptr_provenance i32* %2, align 4, !tbaa !2, !noalias !16 + %3 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pB, i8* null, i32** %b.p, i32** undef, i32 0, metadata !11), !tbaa !9, !noalias !16 + store i32 99, i32* %_pB, ptr_provenance i32* %3, align 4, !tbaa !2, !noalias !16 + ret void +} +; CHECK-LABEL: Function: test_rp_rp_02: +; CHECK: NoAlias: store i32 99, i32* %_pB, ptr_provenance i32* %3, align 4, !tbaa !11, !noalias !9 <-> store i32 42, i32* %_pA, ptr_provenance i32* %2, align 4, !tbaa !11, !noalias !9 + +; Function Attrs: nounwind uwtable +define dso_local void @test_rp_rp_03(i32* nocapture %_pA, i32* nocapture %_pB) local_unnamed_addr #1 { +entry: + %a.p = alloca i32*, align 8 + %b.p = alloca i32*, align 8 + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** %a.p, i32 0, metadata !11) + %1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** %b.p, i32 0, metadata !11) + %2 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* null, i32** %a.p, i32** undef, i32 0, metadata !11), !tbaa !9, !noalias !16 + store i32 42, i32* %_pA, ptr_provenance i32* %2, align 4, !tbaa !2, !noalias !16 + %3 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pB, i8* null, i32** %a.p, i32** undef, i32 1, metadata !11), !tbaa !9, !noalias !16 + store i32 99, i32* %_pB, ptr_provenance i32* %3, align 4, !tbaa !2, !noalias !16 + ret void +} +; CHECK-LABEL: Function: test_rp_rp_03: +; CHECK: NoAlias: store i32 99, i32* %_pB, ptr_provenance i32* %3, align 4, !tbaa !11, !noalias !9 <-> store i32 42, i32* %_pA, ptr_provenance i32* %2, align 4, !tbaa !11, !noalias !9 + +; Function Attrs: nounwind uwtable +define dso_local void @test_rp_rp_04(i32* nocapture %_pA, i32* nocapture %_pB) local_unnamed_addr #1 { +entry: + %a.p = alloca i32*, align 8 + %b.p = alloca i32*, align 8 + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** %a.p, i32 0, metadata !11) + %1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** %b.p, i32 0, metadata !11) + %2 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* null, i32** %a.p, i32** undef, i32 0, metadata !11), !tbaa !9, !noalias !16 + store i32 42, i32* %_pA, ptr_provenance i32* %2, align 4, !tbaa !2, !noalias !16 + %3 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pB, i8* null, i32** %a.p, i32** undef, i32 0, metadata !14), !tbaa !9, !noalias !16 + store i32 99, i32* %_pB, ptr_provenance i32* %3, align 4, !tbaa !2, !noalias !16 + ret void +} +; CHECK-LABEL: Function: test_rp_rp_04: +; CHECK: NoAlias: store i32 99, i32* %_pB, ptr_provenance i32* %3, align 4, !tbaa !11, !noalias !9 <-> store i32 42, i32* %_pA, ptr_provenance i32* %2, align 4, !tbaa !11, !noalias !9 + + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32*, i8*, i32**, i32**, i32, metadata) #3 + +attributes #0 = { nofree norecurse nounwind uwtable writeonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { argmemonly nounwind } +attributes #3 = { nounwind readnone speculatable } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3, !3, i64 0} +!3 = !{!"int", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C/C++ TBAA"} +!6 = !{!7} +!7 = distinct !{!7, !8, !"test_rp_p: pA"} +!8 = distinct !{!8, !"test_rp_p"} +!9 = !{!10, !10, i64 0} +!10 = !{!"any pointer", !4, i64 0} +!11 = !{!12} +!12 = distinct !{!12, !13, !"test_rp_rp: pA"} +!13 = distinct !{!13, !"test_rp_rp"} +!14 = !{!15} +!15 = distinct !{!15, !13, !"test_rp_rp: pB"} +!16 = !{!12, !15} Index: llvm/test/Analysis/ScopedNoAliasAA/noalias_member.ll =================================================================== --- /dev/null +++ llvm/test/Analysis/ScopedNoAliasAA/noalias_member.ll @@ -0,0 +1,64 @@ +; RUN: opt < %s -basic-aa -scoped-noalias -aa-eval -evaluate-aa-metadata -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.FOO = type { i32*, i32* } + +; Function Attrs: nofree nounwind +define dso_local void @test_prp0_prp1(i32** nocapture %_pA) local_unnamed_addr #0 !noalias !2 { +entry: + %0 = load i32*, i32** %_pA, ptr_provenance i32** undef, align 4, !tbaa !5, !noalias !2 + %1 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %0, i8* null, i32** %_pA, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 + %arrayidx1 = getelementptr inbounds i32*, i32** %_pA, i32 1 + %2 = load i32*, i32** %arrayidx1, ptr_provenance i32** undef, align 4, !tbaa !5, !noalias !2 + %3 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %2, i8* null, i32** nonnull %arrayidx1, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 + store i32 42, i32* %0, ptr_provenance i32* %1, align 4, !tbaa !9, !noalias !2 + store i32 99, i32* %2, ptr_provenance i32* %3, align 4, !tbaa !9, !noalias !2 + ret void +} +; CHECK-LABEL: Function: test_prp0_prp1: +; CHECK: NoAlias: store i32 99, i32* %2, ptr_provenance i32* %3, align 4, !tbaa !9, !noalias !2 <-> store i32 42, i32* %0, ptr_provenance i32* %1, align 4, !tbaa !9, !noalias !2 + +; Function Attrs: nofree nounwind +define dso_local void @test_prS0_prS1(%struct.FOO* nocapture %_pS) local_unnamed_addr #0 !noalias !11 { +entry: + %mpA = getelementptr inbounds %struct.FOO, %struct.FOO* %_pS, i32 0, i32 0 + %0 = load i32*, i32** %mpA, ptr_provenance i32** undef, align 4, !tbaa !14, !noalias !11 + %1 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %0, i8* null, i32** %mpA, i32** undef, i32 0, metadata !11), !tbaa !14, !noalias !11 + %mpB = getelementptr inbounds %struct.FOO, %struct.FOO* %_pS, i32 0, i32 1 + %2 = load i32*, i32** %mpB, ptr_provenance i32** undef, align 4, !tbaa !16, !noalias !11 + %3 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %2, i8* null, i32** nonnull %mpB, i32** undef, i32 0, metadata !11), !tbaa !16, !noalias !11 + store i32 42, i32* %0, ptr_provenance i32* %1, align 4, !tbaa !9, !noalias !11 + store i32 99, i32* %2, ptr_provenance i32* %3, align 4, !tbaa !9, !noalias !11 + ret void +} +; CHECK-LABEL: Function: test_prS0_prS1: +; CHECK: NoAlias: store i32 99, i32* %2, ptr_provenance i32* %3, align 4, !tbaa !11, !noalias !2 <-> store i32 42, i32* %0, ptr_provenance i32* %1, align 4, !tbaa !11, !noalias !2 + + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32*, i8*, i32**, i32**, i32, metadata) #1 + +attributes #0 = { nofree nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone speculatable } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3} +!3 = distinct !{!3, !4, !"test_prp0_prp1: unknown scope"} +!4 = distinct !{!4, !"test_prp0_prp1"} +!5 = !{!6, !6, i64 0, i64 4} +!6 = !{!7, i64 4, !"any pointer"} +!7 = !{!8, i64 1, !"omnipotent char"} +!8 = !{!"Simple C/C++ TBAA"} +!9 = !{!10, !10, i64 0, i64 4} +!10 = !{!7, i64 4, !"int"} +!11 = !{!12} +!12 = distinct !{!12, !13, !"test_prS0_prS1: unknown scope"} +!13 = distinct !{!13, !"test_prS0_prS1"} +!14 = !{!15, !6, i64 0, i64 4} +!15 = !{!7, i64 8, !"FOO", !6, i64 0, i64 4, !6, i64 4, i64 4} +!16 = !{!15, !6, i64 4, i64 4} Index: llvm/test/Analysis/ScopedNoAliasAA/noalias_phi.ll =================================================================== --- /dev/null +++ llvm/test/Analysis/ScopedNoAliasAA/noalias_phi.ll @@ -0,0 +1,230 @@ +; RUN: opt < %s -basic-aa -scoped-noalias -aa-eval -evaluate-aa-metadata -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nofree norecurse nounwind writeonly +define dso_local void @test_phi_p_p_p(i32* nocapture %_pA, i32* nocapture %_pB, i32* nocapture %_pC, i32 %c) local_unnamed_addr #0 { +entry: + %tobool = icmp eq i32 %c, 0 + br i1 %tobool, label %cond.false, label %cond.true + +cond.true: ; preds = %entry + br label %cond.end + +cond.false: ; preds = %entry + br label %cond.end + +cond.end: ; preds = %cond.false, %cond.true + %cond = phi i32* [ %_pA, %cond.true ], [ %_pB, %cond.false ] + store i32 42, i32* %cond, ptr_provenance i32* undef, align 4, !tbaa !2 + store i32 99, i32* %_pC, ptr_provenance i32* undef, align 4, !tbaa !2 + ret void +} +; CHECK-LABEL: Function: test_phi_p_p_p: +; CHECK: MayAlias: store i32 99, i32* %_pC, ptr_provenance i32* undef, align 4, !tbaa !2 <-> store i32 42, i32* %cond, ptr_provenance i32* undef, align 4, !tbaa !2 + +; Function Attrs: nounwind +define dso_local void @test_phi_rp_p_p(i32* nocapture %_pA, i32* nocapture %_pB, i32* nocapture %_pC, i32 %c) local_unnamed_addr #1 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !6) + %tobool = icmp ne i32 %c, 0 + %1 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !6) + br i1 %tobool, label %cond.false, label %cond.true + +cond.true: ; preds = %entry + br label %cond.end + +cond.false: ; preds = %entry + br label %cond.end + +cond.end: ; preds = %cond.false, %cond.true + %cond = phi i32* [ %_pA, %cond.true ], [ %_pB, %cond.false ] + %prov.cond = phi i32* [ %1, %cond.true ], [ %_pB, %cond.false ] + store i32 42, i32* %cond, ptr_provenance i32* %prov.cond, align 4, !tbaa !2, !noalias !6 + store i32 99, i32* %_pC, ptr_provenance i32* undef, align 4, !tbaa !2, !noalias !6 + ret void +} +; CHECK-LABEL: Function: test_phi_rp_p_p: +; CHECK: MayAlias: store i32 99, i32* %_pC, ptr_provenance i32* undef, align 4, !tbaa !5, !noalias !2 <-> store i32 42, i32* %cond, ptr_provenance i32* %prov.cond, align 4, !tbaa !5, !noalias !2 + + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) #2 + +; Function Attrs: nounwind +define dso_local void @test_phi_p_rp_p(i32* nocapture %_pA, i32* nocapture %_pB, i32* nocapture %_pC, i32 %c) local_unnamed_addr #1 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !9) + %tobool = icmp ne i32 %c, 0 + %1 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pB, i8* %0, i32** null, i32** undef, i32 0, metadata !9) + br i1 %tobool, label %cond.false, label %cond.true + +cond.true: ; preds = %entry + br label %cond.end + +cond.false: ; preds = %entry + br label %cond.end + +cond.end: ; preds = %cond.false, %cond.true + %cond = phi i32* [ %_pA, %cond.true ], [ %_pB, %cond.false ] + %prov.cond = phi i32* [ %_pA, %cond.true ], [ %1, %cond.false ] + store i32 42, i32* %cond, ptr_provenance i32* %prov.cond, align 4, !tbaa !2, !noalias !9 + store i32 99, i32* %_pC, ptr_provenance i32* undef, align 4, !tbaa !2, !noalias !9 + ret void +} +; CHECK-LABEL: Function: test_phi_p_rp_p: +; CHECK: MayAlias: store i32 99, i32* %_pC, ptr_provenance i32* undef, align 4, !tbaa !5, !noalias !2 <-> store i32 42, i32* %cond, ptr_provenance i32* %prov.cond, align 4, !tbaa !5, !noalias !2 + +; Function Attrs: nounwind +define dso_local void @test_phi_rp_rp_p(i32* nocapture %_pA, i32* nocapture %_pB, i32* nocapture %_pC, i32 %c) local_unnamed_addr #1 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !12) + %1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !15) + %tobool = icmp ne i32 %c, 0 + %2 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !12) + %3 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pB, i8* %1, i32** null, i32** undef, i32 0, metadata !15) + br i1 %tobool, label %cond.false, label %cond.true + +cond.true: ; preds = %entry + br label %cond.end + +cond.false: ; preds = %entry + br label %cond.end + +cond.end: ; preds = %cond.false, %cond.true + %cond = phi i32* [ %_pA, %cond.true ], [ %_pB, %cond.false ] + %prov.cond = phi i32* [ %2, %cond.true ], [ %3, %cond.false ] + store i32 42, i32* %cond, ptr_provenance i32* %prov.cond, align 4, !tbaa !2, !noalias !17 + store i32 99, i32* %_pC, ptr_provenance i32* undef, align 4, !tbaa !2, !noalias !17 + ret void +} +; CHECK-LABEL: Function: test_phi_rp_rp_p: +; CHECK: NoAlias: store i32 99, i32* %_pC, ptr_provenance i32* undef, align 4, !tbaa !7, !noalias !11 <-> store i32 42, i32* %cond, ptr_provenance i32* %prov.cond, align 4, !tbaa !7, !noalias !11 + +; Function Attrs: nounwind +define dso_local void @test_phi_p_p_rp(i32* nocapture %_pA, i32* nocapture %_pB, i32* nocapture %_pC, i32 %c) local_unnamed_addr #1 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !18) + %tobool = icmp eq i32 %c, 0 + br i1 %tobool, label %cond.false, label %cond.true + +cond.true: ; preds = %entry + br label %cond.end + +cond.false: ; preds = %entry + br label %cond.end + +cond.end: ; preds = %cond.false, %cond.true + %cond = phi i32* [ %_pA, %cond.true ], [ %_pB, %cond.false ] + store i32 42, i32* %cond, ptr_provenance i32* undef, align 4, !tbaa !2, !noalias !18 + %1 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pC, i8* %0, i32** null, i32** undef, i32 0, metadata !18), !tbaa !21, !noalias !18 + store i32 99, i32* %_pC, ptr_provenance i32* %1, align 4, !tbaa !2, !noalias !18 + ret void +} +; CHECK-LABEL: Function: test_phi_p_p_rp: +; CHECK: NoAlias: store i32 99, i32* %_pC, ptr_provenance i32* %1, align 4, !tbaa !5, !noalias !2 <-> store i32 42, i32* %cond, ptr_provenance i32* undef, align 4, !tbaa !5, !noalias !2 + +; Function Attrs: nounwind +define dso_local void @test_phi_rp_rp_rp_01(i32* nocapture %_pA, i32* nocapture %_pB, i32* nocapture %_pC, i32 %c) local_unnamed_addr #1 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !23) + %1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !26) + %2 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !28) + %tobool = icmp ne i32 %c, 0 + %3 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !23) + %4 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pB, i8* %1, i32** null, i32** undef, i32 0, metadata !26) + br i1 %tobool, label %cond.false, label %cond.true + +cond.true: ; preds = %entry + br label %cond.end + +cond.false: ; preds = %entry + br label %cond.end + +cond.end: ; preds = %cond.false, %cond.true + %cond = phi i32* [ %_pA, %cond.true ], [ %_pB, %cond.false ] + %prov.cond = phi i32* [ %3, %cond.true ], [ %4, %cond.false ] + store i32 42, i32* %cond, ptr_provenance i32* %prov.cond, align 4, !tbaa !2, !noalias !30 + %5 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pC, i8* %2, i32** null, i32** undef, i32 0, metadata !28), !tbaa !21, !noalias !30 + store i32 99, i32* %_pC, ptr_provenance i32* %5, align 4, !tbaa !2, !noalias !30 + ret void +} + +; CHECK-LABEL: Function: test_phi_rp_rp_rp_01: +; CHECK: NoAlias: store i32 99, i32* %_pC, ptr_provenance i32* %5, align 4, !tbaa !9, !noalias !13 <-> store i32 42, i32* %cond, ptr_provenance i32* %prov.cond, align 4, !tbaa !9, !noalias !13 + +; Function Attrs: nounwind +define dso_local void @test_phi_rp_rp_rp_02(i32* nocapture %_pA, i32* nocapture readnone %_pB, i32* nocapture %_pC, i32 %c) local_unnamed_addr #1 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !31) + %1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !34) + %tobool = icmp ne i32 %c, 0 + %2 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !31) + %3 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pC, i8* %1, i32** null, i32** undef, i32 0, metadata !34) + br i1 %tobool, label %cond.false, label %cond.true + +cond.true: ; preds = %entry + br label %cond.end + +cond.false: ; preds = %entry + br label %cond.end + +cond.end: ; preds = %cond.false, %cond.true + %cond = phi i32* [ %_pA, %cond.true ], [ %_pC, %cond.false ] + %prov.cond = phi i32* [ %2, %cond.true ], [ %3, %cond.false ] + store i32 42, i32* %cond, ptr_provenance i32* %prov.cond, align 4, !tbaa !2, !noalias !36 + store i32 99, i32* %_pC, ptr_provenance i32* %3, align 4, !tbaa !2, !noalias !36 + ret void +} +; CHECK-LABEL: Function: test_phi_rp_rp_rp_02: +; CHECK: MayAlias: store i32 99, i32* %_pC, ptr_provenance i32* %3, align 4, !tbaa !7, !noalias !11 <-> store i32 42, i32* %cond, ptr_provenance i32* %prov.cond, align 4, !tbaa !7, !noalias !11 + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32*, i8*, i32**, i32**, i32, metadata) #3 + +attributes #0 = { nofree norecurse nounwind writeonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { argmemonly nounwind } +attributes #3 = { nounwind readnone speculatable } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3, !3, i64 0, i64 4} +!3 = !{!4, i64 4, !"int"} +!4 = !{!5, i64 1, !"omnipotent char"} +!5 = !{!"Simple C/C++ TBAA"} +!6 = !{!7} +!7 = distinct !{!7, !8, !"test_phi_rp_p_p: pA"} +!8 = distinct !{!8, !"test_phi_rp_p_p"} +!9 = !{!10} +!10 = distinct !{!10, !11, !"test_phi_p_rp_p: pB"} +!11 = distinct !{!11, !"test_phi_p_rp_p"} +!12 = !{!13} +!13 = distinct !{!13, !14, !"test_phi_rp_rp_p: pA"} +!14 = distinct !{!14, !"test_phi_rp_rp_p"} +!15 = !{!16} +!16 = distinct !{!16, !14, !"test_phi_rp_rp_p: pB"} +!17 = !{!13, !16} +!18 = !{!19} +!19 = distinct !{!19, !20, !"test_phi_p_p_rp: pC"} +!20 = distinct !{!20, !"test_phi_p_p_rp"} +!21 = !{!22, !22, i64 0, i64 4} +!22 = !{!4, i64 4, !"any pointer"} +!23 = !{!24} +!24 = distinct !{!24, !25, !"test_phi_rp_rp_rp_01: pA"} +!25 = distinct !{!25, !"test_phi_rp_rp_rp_01"} +!26 = !{!27} +!27 = distinct !{!27, !25, !"test_phi_rp_rp_rp_01: pB"} +!28 = !{!29} +!29 = distinct !{!29, !25, !"test_phi_rp_rp_rp_01: pC"} +!30 = !{!24, !27, !29} +!31 = !{!32} +!32 = distinct !{!32, !33, !"test_phi_rp_rp_rp_02: pA"} +!33 = distinct !{!33, !"test_phi_rp_rp_rp_02"} +!34 = !{!35} +!35 = distinct !{!35, !33, !"test_phi_rp_rp_rp_02: pC"} +!36 = !{!32, !37, !35} +!37 = distinct !{!37, !33, !"test_phi_rp_rp_rp_02: pB"} Index: llvm/test/Analysis/ScopedNoAliasAA/noalias_phi_in_loop.ll =================================================================== --- /dev/null +++ llvm/test/Analysis/ScopedNoAliasAA/noalias_phi_in_loop.ll @@ -0,0 +1,154 @@ +; RUN: opt < %s -basic-aa -scoped-noalias -aa-eval -evaluate-aa-metadata -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind +define dso_local void @test_complex_phi_01(i32* nocapture %_pA, i32** nocapture readonly %_pB, i32 %n) local_unnamed_addr #0 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %1 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 + %2 = load i32*, i32** %_pB, ptr_provenance i32** undef, align 4, !tbaa !5, !noalias !2 + %arrayidx1 = getelementptr inbounds i32*, i32** %_pB, i32 1 + %3 = load i32*, i32** %arrayidx1, ptr_provenance i32** undef, align 4, !tbaa !5, !noalias !2 + %arrayidx2 = getelementptr inbounds i32*, i32** %_pB, i32 2 + %4 = load i32*, i32** %arrayidx2, ptr_provenance i32** undef, align 4, !tbaa !5, !noalias !2 + %arrayidx3 = getelementptr inbounds i32*, i32** %_pB, i32 3 + %5 = load i32*, i32** %arrayidx3, ptr_provenance i32** undef, align 4, !tbaa !5, !noalias !2 + %arrayidx4 = getelementptr inbounds i32*, i32** %_pB, i32 4 + %6 = load i32*, i32** %arrayidx4, ptr_provenance i32** undef, align 4, !tbaa !5, !noalias !2 + %arrayidx5 = getelementptr inbounds i32*, i32** %_pB, i32 5 + %7 = load i32*, i32** %arrayidx5, ptr_provenance i32** undef, align 4, !tbaa !5, !noalias !2 + %arrayidx6 = getelementptr inbounds i32*, i32** %_pB, i32 6 + %8 = load i32*, i32** %arrayidx6, ptr_provenance i32** undef, align 4, !tbaa !5, !noalias !2 + %arrayidx7 = getelementptr inbounds i32*, i32** %_pB, i32 7 + %9 = load i32*, i32** %arrayidx7, ptr_provenance i32** undef, align 4, !tbaa !5, !noalias !2 + %arrayidx8 = getelementptr inbounds i32*, i32** %_pB, i32 8 + %10 = load i32*, i32** %arrayidx8, ptr_provenance i32** undef, align 4, !tbaa !5, !noalias !2 + %arrayidx9 = getelementptr inbounds i32*, i32** %_pB, i32 9 + %11 = load i32*, i32** %arrayidx9, ptr_provenance i32** undef, align 4, !tbaa !5, !noalias !2 + br label %for.cond + +for.cond: ; preds = %for.cond, %entry + %prov.pTmp00.0 = phi i32* [ %2, %entry ], [ %prov.pTmp01.0, %for.cond ] + %pTmp00.0 = phi i32* [ %2, %entry ], [ %pTmp01.0, %for.cond ] + %prov.pTmp01.0 = phi i32* [ %3, %entry ], [ %prov.pTmp02.0, %for.cond ] + %pTmp01.0 = phi i32* [ %3, %entry ], [ %pTmp02.0, %for.cond ] + %prov.pTmp02.0 = phi i32* [ %4, %entry ], [ %prov.pTmp03.0, %for.cond ] + %pTmp02.0 = phi i32* [ %4, %entry ], [ %pTmp03.0, %for.cond ] + %prov.pTmp03.0 = phi i32* [ %5, %entry ], [ %prov.pTmp04.0, %for.cond ] + %pTmp03.0 = phi i32* [ %5, %entry ], [ %pTmp04.0, %for.cond ] + %prov.pTmp04.0 = phi i32* [ %6, %entry ], [ %prov.pTmp05.0, %for.cond ] + %pTmp04.0 = phi i32* [ %6, %entry ], [ %pTmp05.0, %for.cond ] + %prov.pTmp05.0 = phi i32* [ %7, %entry ], [ %prov.pTmp06.0, %for.cond ] + %pTmp05.0 = phi i32* [ %7, %entry ], [ %pTmp06.0, %for.cond ] + %prov.pTmp06.0 = phi i32* [ %8, %entry ], [ %prov.pTmp07.0, %for.cond ] + %pTmp06.0 = phi i32* [ %8, %entry ], [ %pTmp07.0, %for.cond ] + %prov.pTmp07.0 = phi i32* [ %9, %entry ], [ %prov.pTmp08.0, %for.cond ] + %pTmp07.0 = phi i32* [ %9, %entry ], [ %pTmp08.0, %for.cond ] + %prov.pTmp08.0 = phi i32* [ %10, %entry ], [ %prov.pTmp09.0, %for.cond ] + %pTmp08.0 = phi i32* [ %10, %entry ], [ %pTmp09.0, %for.cond ] + %prov.pTmp09.0 = phi i32* [ %11, %entry ], [ %1, %for.cond ] + %pTmp09.0 = phi i32* [ %11, %entry ], [ %_pA, %for.cond ] + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.cond ] + %cmp = icmp slt i32 %i.0, %n + %inc = add nuw nsw i32 %i.0, 1 + br i1 %cmp, label %for.cond, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + store i32 99, i32* %_pA, ptr_provenance i32* %1, align 4, !tbaa !9, !noalias !2 + store i32 42, i32* %pTmp00.0, ptr_provenance i32* %prov.pTmp00.0, align 4, !tbaa !9, !noalias !2 + ret void +} + +; CHECK-LABEL: Function: test_complex_phi_01: +; CHECK: MayAlias: store i32 42, i32* %pTmp00.0, ptr_provenance i32* %prov.pTmp00.0, align 4, !tbaa !9, !noalias !2 <-> store i32 99, i32* %_pA, ptr_provenance i32* %1, align 4, !tbaa !9, !noalias !2 + + +; Adapted version where the ptr_provenance chains don't collapse +; Function Attrs: nounwind +define dso_local void @test_complex_phi_02(i32* nocapture %_pA, i32** nocapture readonly %_pB, i32 %n) local_unnamed_addr #0 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %decl2 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 1, metadata !2) + %1 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 + %extra_ptr = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %decl2, i32** null, i32** undef, i32 1, metadata !2), !tbaa !5, !noalias !2 + %2 = load i32*, i32** %_pB, ptr_provenance i32** undef, align 4, !tbaa !5, !noalias !2 + %arrayidx1 = getelementptr inbounds i32*, i32** %_pB, i32 1 + %3 = load i32*, i32** %arrayidx1, ptr_provenance i32** undef, align 4, !tbaa !5, !noalias !2 + %arrayidx2 = getelementptr inbounds i32*, i32** %_pB, i32 2 + %4 = load i32*, i32** %arrayidx2, ptr_provenance i32** undef, align 4, !tbaa !5, !noalias !2 + %arrayidx3 = getelementptr inbounds i32*, i32** %_pB, i32 3 + %5 = load i32*, i32** %arrayidx3, ptr_provenance i32** undef, align 4, !tbaa !5, !noalias !2 + %arrayidx4 = getelementptr inbounds i32*, i32** %_pB, i32 4 + %6 = load i32*, i32** %arrayidx4, ptr_provenance i32** undef, align 4, !tbaa !5, !noalias !2 + %arrayidx5 = getelementptr inbounds i32*, i32** %_pB, i32 5 + %7 = load i32*, i32** %arrayidx5, ptr_provenance i32** undef, align 4, !tbaa !5, !noalias !2 + %arrayidx6 = getelementptr inbounds i32*, i32** %_pB, i32 6 + %8 = load i32*, i32** %arrayidx6, ptr_provenance i32** undef, align 4, !tbaa !5, !noalias !2 + %arrayidx7 = getelementptr inbounds i32*, i32** %_pB, i32 7 + %9 = load i32*, i32** %arrayidx7, ptr_provenance i32** undef, align 4, !tbaa !5, !noalias !2 + %arrayidx8 = getelementptr inbounds i32*, i32** %_pB, i32 8 + %10 = load i32*, i32** %arrayidx8, ptr_provenance i32** undef, align 4, !tbaa !5, !noalias !2 + %arrayidx9 = getelementptr inbounds i32*, i32** %_pB, i32 9 + %11 = load i32*, i32** %arrayidx9, ptr_provenance i32** undef, align 4, !tbaa !5, !noalias !2 + br label %for.cond + +for.cond: ; preds = %for.cond, %entry + %prov.pTmp00.0 = phi i32* [ %2, %entry ], [ %prov.pTmp01.0, %for.cond ] + %pTmp00.0 = phi i32* [ %2, %entry ], [ %pTmp01.0, %for.cond ] + %prov.pTmp01.0 = phi i32* [ %3, %entry ], [ %prov.pTmp02.0, %for.cond ] + %pTmp01.0 = phi i32* [ %3, %entry ], [ %pTmp02.0, %for.cond ] + %prov.pTmp02.0 = phi i32* [ %4, %entry ], [ %prov.pTmp03.0, %for.cond ] + %pTmp02.0 = phi i32* [ %4, %entry ], [ %pTmp03.0, %for.cond ] + %prov.pTmp03.0 = phi i32* [ %5, %entry ], [ %prov.pTmp04.0, %for.cond ] + %pTmp03.0 = phi i32* [ %5, %entry ], [ %pTmp04.0, %for.cond ] + %prov.pTmp04.0 = phi i32* [ %6, %entry ], [ %prov.pTmp05.0, %for.cond ] + %pTmp04.0 = phi i32* [ %6, %entry ], [ %pTmp05.0, %for.cond ] + %prov.pTmp05.0 = phi i32* [ %7, %entry ], [ %prov.pTmp06.0, %for.cond ] + %pTmp05.0 = phi i32* [ %7, %entry ], [ %pTmp06.0, %for.cond ] + %prov.pTmp06.0 = phi i32* [ %8, %entry ], [ %prov.pTmp07.0, %for.cond ] + %pTmp06.0 = phi i32* [ %8, %entry ], [ %pTmp07.0, %for.cond ] + %prov.pTmp07.0 = phi i32* [ %9, %entry ], [ %prov.pTmp08.0, %for.cond ] + %pTmp07.0 = phi i32* [ %9, %entry ], [ %pTmp08.0, %for.cond ] + %prov.pTmp08.0 = phi i32* [ %10, %entry ], [ %prov.pTmp09.0, %for.cond ] + %pTmp08.0 = phi i32* [ %10, %entry ], [ %pTmp09.0, %for.cond ] + %prov.pTmp09.0 = phi i32* [ %11, %entry ], [ %extra_ptr, %for.cond ] + %pTmp09.0 = phi i32* [ %11, %entry ], [ %_pA, %for.cond ] + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.cond ] + %cmp = icmp slt i32 %i.0, %n + %inc = add nuw nsw i32 %i.0, 1 + br i1 %cmp, label %for.cond, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + store i32 99, i32* %_pA, ptr_provenance i32* %1, align 4, !tbaa !9, !noalias !2 + store i32 42, i32* %pTmp00.0, ptr_provenance i32* %prov.pTmp00.0, align 4, !tbaa !9, !noalias !2 + ret void +} + +; CHECK-LABEL: Function: test_complex_phi_02: +; CHECK: NoAlias: store i32 42, i32* %pTmp00.0, ptr_provenance i32* %prov.pTmp00.0, align 4, !tbaa !9, !noalias !2 <-> store i32 99, i32* %_pA, ptr_provenance i32* %1, align 4, !tbaa !9, !noalias !2 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) #1 + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32*, i8*, i32**, i32**, i32, metadata) #2 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { nounwind readnone speculatable } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3} +!3 = distinct !{!3, !4, !"test_complex_phi: rpTmp"} +!4 = distinct !{!4, !"test_complex_phi"} +!5 = !{!6, !6, i64 0, i64 4} +!6 = !{!7, i64 4, !"any pointer"} +!7 = !{!8, i64 1, !"omnipotent char"} +!8 = !{!"Simple C/C++ TBAA"} +!9 = !{!10, !10, i64 0, i64 4} +!10 = !{!7, i64 4, !"int"} Index: llvm/test/Analysis/ScopedNoAliasAA/noalias_recursive.ll =================================================================== --- /dev/null +++ llvm/test/Analysis/ScopedNoAliasAA/noalias_recursive.ll @@ -0,0 +1,127 @@ +; RUN: opt < %s -basic-aa -scoped-noalias -aa-eval -evaluate-aa-metadata -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s +; RUN: opt < %s -aa-pipeline=basic-aa,scoped-noalias-aa -passes=aa-eval -evaluate-aa-metadata -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind +define dso_local void @test_prp_prp(i32** nocapture readonly %_pA, i32** nocapture readonly %_pB) local_unnamed_addr #0 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0p0i32.i64(i32*** null, i64 0, metadata !2) + %1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0p0i32.i64(i32*** null, i64 0, metadata !5) + %2 = tail call i32** @llvm.provenance.noalias.p0p0i32.p0i8.p0p0p0i32.p0p0p0i32.i64(i32** %_pA, i8* %0, i32*** null, i32*** undef, i64 0, metadata !2), !tbaa !7, !noalias !11 + %3 = load i32*, i32** %_pA, ptr_provenance i32** %2, align 4, !tbaa !7, !noalias !11 + store i32 42, i32* %3, ptr_provenance i32* undef, align 4, !tbaa !12, !noalias !11 + %4 = tail call i32** @llvm.provenance.noalias.p0p0i32.p0i8.p0p0p0i32.p0p0p0i32.i64(i32** %_pB, i8* %1, i32*** null, i32*** undef, i64 0, metadata !5), !tbaa !7, !noalias !11 + %5 = load i32*, i32** %_pB, ptr_provenance i32** %4, align 4, !tbaa !7, !noalias !11 + store i32 99, i32* %5, ptr_provenance i32* undef, align 4, !tbaa !12, !noalias !11 + ret void +} +; CHECK-LABEL: Function: test_prp_prp: +; CHECK: MayAlias: store i32 99, i32* %5, ptr_provenance i32* undef, align 4, !tbaa !12, !noalias !11 <-> store i32 42, i32* %3, ptr_provenance i32* undef, align 4, !tbaa !12, !noalias !11 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0p0i32.i64(i32***, i64, metadata) #1 + +; Function Attrs: nofree nounwind +define dso_local void @test_rpp_rpp(i32** nocapture %_pA, i32** nocapture %_pB) local_unnamed_addr #2 !noalias !14 { +entry: + %0 = load i32*, i32** %_pA, ptr_provenance i32** undef, align 4, !tbaa !7, !noalias !14 + %1 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %0, i8* null, i32** %_pA, i32** undef, i64 0, metadata !14), !tbaa !7, !noalias !14 + store i32 42, i32* %0, ptr_provenance i32* %1, align 4, !tbaa !12, !noalias !14 + %2 = load i32*, i32** %_pB, ptr_provenance i32** undef, align 4, !tbaa !7, !noalias !14 + %3 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %2, i8* null, i32** %_pB, i32** undef, i64 0, metadata !14), !tbaa !7, !noalias !14 + store i32 99, i32* %2, ptr_provenance i32* %3, align 4, !tbaa !12, !noalias !14 + ret void +} +; CHECK-LABEL: Function: test_rpp_rpp: +; CHECK: MayAlias: store i32 99, i32* %2, ptr_provenance i32* %3, align 4, !tbaa !9, !noalias !2 <-> store i32 42, i32* %0, ptr_provenance i32* %1, align 4, !tbaa !9, !noalias !2 + +; Function Attrs: nounwind +define dso_local void @test_rprp_rprp(i32** nocapture %_pA, i32** nocapture %_pB) local_unnamed_addr #0 !noalias !17 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0p0i32.i64(i32*** null, i64 0, metadata !20) + %1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0p0i32.i64(i32*** null, i64 0, metadata !22) + %2 = tail call i32** @llvm.provenance.noalias.p0p0i32.p0i8.p0p0p0i32.p0p0p0i32.i64(i32** %_pA, i8* %0, i32*** null, i32*** undef, i64 0, metadata !20), !tbaa !7, !noalias !24 + %3 = load i32*, i32** %_pA, ptr_provenance i32** %2, align 4, !tbaa !7, !noalias !24 + %4 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %3, i8* null, i32** %_pA, i32** %2, i64 0, metadata !17), !tbaa !7, !noalias !24 + store i32 42, i32* %3, ptr_provenance i32* %4, align 4, !tbaa !12, !noalias !24 + %5 = tail call i32** @llvm.provenance.noalias.p0p0i32.p0i8.p0p0p0i32.p0p0p0i32.i64(i32** %_pB, i8* %1, i32*** null, i32*** undef, i64 0, metadata !22), !tbaa !7, !noalias !24 + %6 = load i32*, i32** %_pB, ptr_provenance i32** %5, align 4, !tbaa !7, !noalias !24 + %7 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %6, i8* null, i32** %_pB, i32** %5, i64 0, metadata !17), !tbaa !7, !noalias !24 + store i32 99, i32* %6, ptr_provenance i32* %7, align 4, !tbaa !12, !noalias !24 + ret void +} + +; CHECK-LABEL: Function: test_rprp_rprp: +; CHECK: NoAlias: store i32 99, i32* %6, ptr_provenance i32* %7, align 4, !tbaa !14, !noalias !13 <-> store i32 42, i32* %3, ptr_provenance i32* %4, align 4, !tbaa !14, !noalias !13 + +; Function Attrs: nounwind +define dso_local void @test_prp_01(i32** nocapture %pA) local_unnamed_addr #0 !noalias !17 { +entry: + %0 = load i32*, i32** %pA, ptr_provenance i32** undef, align 8, !tbaa !7, !noalias !17 + %1 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %0, i8* null, i32** %pA, i32** undef, i64 0, metadata !17), !tbaa !7, !noalias !17 + store i32 42, i32* %0, ptr_provenance i32* %1, align 4, !tbaa !12, !noalias !17 + %arrayidx1 = getelementptr inbounds i32*, i32** %pA, i64 0 + %2 = load i32*, i32** %arrayidx1, ptr_provenance i32** undef, align 8, !tbaa !7, !noalias !17 + %3 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %2, i8* null, i32** %arrayidx1, i32** undef, i64 0, metadata !17), !tbaa !7, !noalias !17 + store i32 43, i32* %2, ptr_provenance i32* %3, align 4, !tbaa !12, !noalias !17 + ret void +} + +; CHECK-LABEL: Function: test_prp_01: +; CHECK: MayAlias: store i32 43, i32* %2, ptr_provenance i32* %3, align 4, !tbaa !9, !noalias !2 <-> store i32 42, i32* %0, ptr_provenance i32* %1, align 4, !tbaa !9, !noalias !2 +; Function Attrs: nounwind +define dso_local void @test_prp_02(i32** nocapture %pA) local_unnamed_addr #0 !noalias !17 { +entry: + %0 = load i32*, i32** %pA, ptr_provenance i32** undef, align 8, !tbaa !7, !noalias !17 + %1 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %0, i8* null, i32** %pA, i32** undef, i64 0, metadata !17), !tbaa !7, !noalias !17 + store i32 42, i32* %0, ptr_provenance i32* %1, align 4, !tbaa !12, !noalias !17 + %arrayidx1 = getelementptr inbounds i32*, i32** %pA, i64 1 + %2 = load i32*, i32** %arrayidx1, ptr_provenance i32** undef, align 8, !tbaa !7, !noalias !17 + %3 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %2, i8* null, i32** %arrayidx1, i32** undef, i64 0, metadata !17), !tbaa !7, !noalias !17 + store i32 43, i32* %2, ptr_provenance i32* %3, align 4, !tbaa !12, !noalias !17 + ret void +} + +; CHECK-LABEL: Function: test_prp_02: +; CHECK: NoAlias: store i32 43, i32* %2, ptr_provenance i32* %3, align 4, !tbaa !9, !noalias !2 <-> store i32 42, i32* %0, ptr_provenance i32* %1, align 4, !tbaa !9, !noalias !2 + +; Function Attrs: nounwind readnone speculatable +declare i32** @llvm.provenance.noalias.p0p0i32.p0i8.p0p0p0i32.p0p0p0i32.i64(i32**, i8*, i32***, i32***, i64, metadata) #3 + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32*, i8*, i32**, i32**, i64, metadata) #3 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { nofree nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind readnone speculatable } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3} +!3 = distinct !{!3, !4, !"test_prp_prp: pA"} +!4 = distinct !{!4, !"test_prp_prp"} +!5 = !{!6} +!6 = distinct !{!6, !4, !"test_prp_prp: pB"} +!7 = !{!8, !8, i64 0, i64 4} +!8 = !{!9, i64 4, !"any pointer"} +!9 = !{!10, i64 1, !"omnipotent char"} +!10 = !{!"Simple C/C++ TBAA"} +!11 = !{!3, !6} +!12 = !{!13, !13, i64 0, i64 4} +!13 = !{!9, i64 4, !"int"} +!14 = !{!15} +!15 = distinct !{!15, !16, !"test_rpp_rpp: unknown scope"} +!16 = distinct !{!16, !"test_rpp_rpp"} +!17 = !{!18} +!18 = distinct !{!18, !19, !"test_rprp_rprp: unknown scope"} +!19 = distinct !{!19, !"test_rprp_rprp"} +!20 = !{!21} +!21 = distinct !{!21, !19, !"test_rprp_rprp: pA"} +!22 = !{!23} +!23 = distinct !{!23, !19, !"test_rprp_rprp: pB"} +!24 = !{!21, !23, !18} Index: llvm/test/Analysis/ScopedNoAliasAA/noalias_select.ll =================================================================== --- /dev/null +++ llvm/test/Analysis/ScopedNoAliasAA/noalias_select.ll @@ -0,0 +1,167 @@ +; RUN: opt < %s -basic-aa -scoped-noalias -aa-eval -evaluate-aa-metadata -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nofree norecurse nounwind writeonly +define dso_local void @test_select_p_p_p(i32* nocapture %_pA, i32* nocapture %_pB, i32* nocapture %_pC, i32 %c) local_unnamed_addr #0 { +entry: + %tobool = icmp eq i32 %c, 0 + %cond = select i1 %tobool, i32* %_pB, i32* %_pA + store i32 42, i32* %cond, ptr_provenance i32* undef, align 4, !tbaa !2 + store i32 99, i32* %_pC, ptr_provenance i32* undef, align 4, !tbaa !2 + ret void +} +; CHECK-LABEL: Function: test_select_p_p_p: +; CHECK: MayAlias: store i32 99, i32* %_pC, ptr_provenance i32* undef, align 4, !tbaa !2 <-> store i32 42, i32* %cond, ptr_provenance i32* undef, align 4, !tbaa !2 + +; Function Attrs: nounwind +define dso_local void @test_select_rp_p_p(i32* nocapture %_pA, i32* nocapture %_pB, i32* nocapture %_pC, i32 %c) local_unnamed_addr #1 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !6) + %tobool = icmp ne i32 %c, 0 + %1 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !6) + %prov.cond = select i1 %tobool, i32* %1, i32* %_pB + %cond = select i1 %tobool, i32* %_pA, i32* %_pB + store i32 42, i32* %cond, ptr_provenance i32* %prov.cond, align 4, !tbaa !2, !noalias !6 + store i32 99, i32* %_pC, ptr_provenance i32* undef, align 4, !tbaa !2, !noalias !6 + ret void +} +; CHECK-LABEL: Function: test_select_rp_p_p: +; CHECK: MayAlias: store i32 99, i32* %_pC, ptr_provenance i32* undef, align 4, !tbaa !5, !noalias !2 <-> store i32 42, i32* %cond, ptr_provenance i32* %prov.cond, align 4, !tbaa !5, !noalias !2 + + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) #2 + +; Function Attrs: nounwind +define dso_local void @test_select_p_rp_p(i32* nocapture %_pA, i32* nocapture %_pB, i32* nocapture %_pC, i32 %c) local_unnamed_addr #1 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !9) + %tobool = icmp ne i32 %c, 0 + %1 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pB, i8* %0, i32** null, i32** undef, i32 0, metadata !9) + %prov.cond = select i1 %tobool, i32* %_pA, i32* %1 + %cond = select i1 %tobool, i32* %_pA, i32* %_pB + store i32 42, i32* %cond, ptr_provenance i32* %prov.cond, align 4, !tbaa !2, !noalias !9 + store i32 99, i32* %_pC, ptr_provenance i32* undef, align 4, !tbaa !2, !noalias !9 + ret void +} +; CHECK-LABEL: Function: test_select_p_rp_p: +; CHECK: MayAlias: store i32 99, i32* %_pC, ptr_provenance i32* undef, align 4, !tbaa !5, !noalias !2 <-> store i32 42, i32* %cond, ptr_provenance i32* %prov.cond, align 4, !tbaa !5, !noalias !2 + +; Function Attrs: nounwind +define dso_local void @test_select_rp_rp_p(i32* nocapture %_pA, i32* nocapture %_pB, i32* nocapture %_pC, i32 %c) local_unnamed_addr #1 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !12) + %1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !15) + %tobool = icmp ne i32 %c, 0 + %2 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !12) + %3 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pB, i8* %1, i32** null, i32** undef, i32 0, metadata !15) + %prov.cond = select i1 %tobool, i32* %2, i32* %3 + %cond = select i1 %tobool, i32* %_pA, i32* %_pB + store i32 42, i32* %cond, ptr_provenance i32* %prov.cond, align 4, !tbaa !2, !noalias !17 + store i32 99, i32* %_pC, ptr_provenance i32* undef, align 4, !tbaa !2, !noalias !17 + ret void +} +; CHECK-LABEL: Function: test_select_rp_rp_p: +; CHECK: NoAlias: store i32 99, i32* %_pC, ptr_provenance i32* undef, align 4, !tbaa !7, !noalias !11 <-> store i32 42, i32* %cond, ptr_provenance i32* %prov.cond, align 4, !tbaa !7, !noalias !11 + +; Function Attrs: nounwind +define dso_local void @test_select_p_p_rp(i32* nocapture %_pA, i32* nocapture %_pB, i32* nocapture %_pC, i32 %c) local_unnamed_addr #1 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !18) + %tobool = icmp eq i32 %c, 0 + %cond = select i1 %tobool, i32* %_pB, i32* %_pA + store i32 42, i32* %cond, ptr_provenance i32* undef, align 4, !tbaa !2, !noalias !18 + %1 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pC, i8* %0, i32** null, i32** undef, i32 0, metadata !18), !tbaa !21, !noalias !18 + store i32 99, i32* %_pC, ptr_provenance i32* %1, align 4, !tbaa !2, !noalias !18 + ret void +} +; CHECK-LABEL: Function: test_select_p_p_rp: +; CHECK: NoAlias: store i32 99, i32* %_pC, ptr_provenance i32* %1, align 4, !tbaa !5, !noalias !2 <-> store i32 42, i32* %cond, ptr_provenance i32* undef, align 4, !tbaa !5, !noalias !2 + +; Function Attrs: nounwind +define dso_local void @test_select_rp_rp_rp_01(i32* nocapture %_pA, i32* nocapture %_pB, i32* nocapture %_pC, i32 %c) local_unnamed_addr #1 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !23) + %1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !26) + %2 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !28) + %tobool = icmp ne i32 %c, 0 + %3 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !23) + %4 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pB, i8* %1, i32** null, i32** undef, i32 0, metadata !26) + %prov.cond = select i1 %tobool, i32* %3, i32* %4 + %cond = select i1 %tobool, i32* %_pA, i32* %_pB + store i32 42, i32* %cond, ptr_provenance i32* %prov.cond, align 4, !tbaa !2, !noalias !30 + %5 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pC, i8* %2, i32** null, i32** undef, i32 0, metadata !28), !tbaa !21, !noalias !30 + store i32 99, i32* %_pC, ptr_provenance i32* %5, align 4, !tbaa !2, !noalias !30 + ret void +} + +; CHECK-LABEL: Function: test_select_rp_rp_rp_01: +; CHECK: NoAlias: store i32 99, i32* %_pC, ptr_provenance i32* %5, align 4, !tbaa !9, !noalias !13 <-> store i32 42, i32* %cond, ptr_provenance i32* %prov.cond, align 4, !tbaa !9, !noalias !13 + +; Function Attrs: nounwind +define dso_local void @test_select_rp_rp_rp_02(i32* nocapture %_pA, i32* nocapture readnone %_pB, i32* nocapture %_pC, i32 %c) local_unnamed_addr #1 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !31) + %1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !34) + %tobool = icmp ne i32 %c, 0 + %2 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !31) + %3 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pC, i8* %1, i32** null, i32** undef, i32 0, metadata !34) + %prov.cond = select i1 %tobool, i32* %2, i32* %3 + %cond = select i1 %tobool, i32* %_pA, i32* %_pC + store i32 42, i32* %cond, ptr_provenance i32* %prov.cond, align 4, !tbaa !2, !noalias !36 + store i32 99, i32* %_pC, ptr_provenance i32* %3, align 4, !tbaa !2, !noalias !36 + ret void +} +; CHECK-LABEL: Function: test_select_rp_rp_rp_02: +; CHECK: MayAlias: store i32 99, i32* %_pC, ptr_provenance i32* %3, align 4, !tbaa !7, !noalias !11 <-> store i32 42, i32* %cond, ptr_provenance i32* %prov.cond, align 4, !tbaa !7, !noalias !11 + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32*, i8*, i32**, i32**, i32, metadata) #3 + +attributes #0 = { nofree norecurse nounwind writeonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { argmemonly nounwind } +attributes #3 = { nounwind readnone speculatable } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3, !3, i64 0, i64 4} +!3 = !{!4, i64 4, !"int"} +!4 = !{!5, i64 1, !"omnipotent char"} +!5 = !{!"Simple C/C++ TBAA"} +!6 = !{!7} +!7 = distinct !{!7, !8, !"test_select_rp_p_p: pA"} +!8 = distinct !{!8, !"test_select_rp_p_p"} +!9 = !{!10} +!10 = distinct !{!10, !11, !"test_select_p_rp_p: pB"} +!11 = distinct !{!11, !"test_select_p_rp_p"} +!12 = !{!13} +!13 = distinct !{!13, !14, !"test_select_rp_rp_p: pA"} +!14 = distinct !{!14, !"test_select_rp_rp_p"} +!15 = !{!16} +!16 = distinct !{!16, !14, !"test_select_rp_rp_p: pB"} +!17 = !{!13, !16} +!18 = !{!19} +!19 = distinct !{!19, !20, !"test_select_p_p_rp: pC"} +!20 = distinct !{!20, !"test_select_p_p_rp"} +!21 = !{!22, !22, i64 0, i64 4} +!22 = !{!4, i64 4, !"any pointer"} +!23 = !{!24} +!24 = distinct !{!24, !25, !"test_select_rp_rp_rp_01: pA"} +!25 = distinct !{!25, !"test_select_rp_rp_rp_01"} +!26 = !{!27} +!27 = distinct !{!27, !25, !"test_select_rp_rp_rp_01: pB"} +!28 = !{!29} +!29 = distinct !{!29, !25, !"test_select_rp_rp_rp_01: pC"} +!30 = !{!24, !27, !29} +!31 = !{!32} +!32 = distinct !{!32, !33, !"test_select_rp_rp_rp_02: pA"} +!33 = distinct !{!33, !"test_select_rp_rp_rp_02"} +!34 = !{!35} +!35 = distinct !{!35, !33, !"test_select_rp_rp_rp_02: pC"} +!36 = !{!32, !37, !35} +!37 = distinct !{!37, !33, !"test_select_rp_rp_rp_02: pB"} Index: llvm/test/Bitcode/loadstore_ptr_provenance.ll =================================================================== --- /dev/null +++ llvm/test/Bitcode/loadstore_ptr_provenance.ll @@ -0,0 +1,17 @@ +; RUN: opt --verify -S < %s | FileCheck %s +; JDO_FIXME_NOW: fix the bicode support.. R U N: llvm-as < %s | llvm-dis | FileCheck %s +; JDO_FIXME_NOW: R U N: verify-uselistorder < %s + +define i32 @f(i32* %p, i32* %q) { + store i32 42, i32* %p, ptr_provenance i32* %p + store i32 43, i32* %q, ptr_provenance i32* %q + %r = load i32, i32* %p, ptr_provenance i32* %p + ret i32 %r +} + +; CHECK: define i32 @f(i32* %p, i32* %q) { +; CHECK-NEXT: store i32 42, i32* %p, ptr_provenance i32* %p +; CHECK-NEXT: store i32 43, i32* %q, ptr_provenance i32* %q +; CHECK-NEXT: %r = load i32, i32* %p, ptr_provenance i32* %p +; CHECK-NEXT: ret i32 %r +; CHECK-NEXT: } Index: llvm/test/CodeGen/AMDGPU/opt-pipeline.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/opt-pipeline.ll +++ llvm/test/CodeGen/AMDGPU/opt-pipeline.ll @@ -44,8 +44,10 @@ ; GCN-O1-NEXT: Instrument function entry/exit with calls to e.g. mcount() (pre inlining) ; GCN-O1-NEXT: Simplify the CFG ; GCN-O1-NEXT: Dominator Tree Construction +; GCN-O1-NEXT: Connect llvm.noalias.decl ; GCN-O1-NEXT: SROA ; GCN-O1-NEXT: Early CSE +; GCN-O1-NEXT: Propagate and Convert Noalias intrinsics ; GCN-O1-NEXT: Lower 'expect' Intrinsics ; GCN-O1-NEXT: Pass Arguments: @@ -96,6 +98,9 @@ ; GCN-O1-NEXT: Call Graph SCC Pass Manager ; GCN-O1-NEXT: Remove unused exception handling info ; GCN-O1-NEXT: AMDGPU Function Integration/Inlining +; GCN-O1-NEXT: FunctionPass Manager +; GCN-O1-NEXT: Dominator Tree Construction +; GCN-O1-NEXT: Propagate and Convert Noalias intrinsics ; GCN-O1-NEXT: Deduce function attributes ; GCN-O1-NEXT: FunctionPass Manager ; GCN-O1-NEXT: Infer address spaces @@ -103,11 +108,13 @@ ; GCN-O1-NEXT: FunctionPass Manager ; GCN-O1-NEXT: AMDGPU Promote Alloca to vector ; GCN-O1-NEXT: Dominator Tree Construction +; GCN-O1-NEXT: Connect llvm.noalias.decl ; GCN-O1-NEXT: SROA ; GCN-O1-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O1-NEXT: Function Alias Analysis Results ; GCN-O1-NEXT: Memory SSA ; GCN-O1-NEXT: Early CSE w/ MemorySSA +; GCN-O1-NEXT: Propagate and Convert Noalias intrinsics ; GCN-O1-NEXT: Simplify the CFG ; GCN-O1-NEXT: Dominator Tree Construction ; GCN-O1-NEXT: Basic Alias Analysis (stateless AA impl) @@ -174,6 +181,8 @@ ; GCN-O1-NEXT: Lazy Block Frequency Analysis ; GCN-O1-NEXT: Optimization Remark Emitter ; GCN-O1-NEXT: Combine redundant instructions +; GCN-O1-NEXT: Connect llvm.noalias.decl +; GCN-O1-NEXT: Propagate and Convert Noalias intrinsics ; GCN-O1-NEXT: Post-Dominator Tree Construction ; GCN-O1-NEXT: Aggressive Dead Code Elimination ; GCN-O1-NEXT: Simplify the CFG @@ -349,8 +358,10 @@ ; GCN-O2-NEXT: Instrument function entry/exit with calls to e.g. mcount() (pre inlining) ; GCN-O2-NEXT: Simplify the CFG ; GCN-O2-NEXT: Dominator Tree Construction +; GCN-O2-NEXT: Connect llvm.noalias.decl ; GCN-O2-NEXT: SROA ; GCN-O2-NEXT: Early CSE +; GCN-O2-NEXT: Propagate and Convert Noalias intrinsics ; GCN-O2-NEXT: Lower 'expect' Intrinsics ; GCN-O2-NEXT: Pass Arguments: @@ -401,6 +412,9 @@ ; GCN-O2-NEXT: Call Graph SCC Pass Manager ; GCN-O2-NEXT: Remove unused exception handling info ; GCN-O2-NEXT: AMDGPU Function Integration/Inlining +; GCN-O2-NEXT: FunctionPass Manager +; GCN-O2-NEXT: Dominator Tree Construction +; GCN-O2-NEXT: Propagate and Convert Noalias intrinsics ; GCN-O2-NEXT: OpenMP specific optimizations ; GCN-O2-NEXT: Deduce function attributes ; GCN-O2-NEXT: FunctionPass Manager @@ -409,12 +423,15 @@ ; GCN-O2-NEXT: FunctionPass Manager ; GCN-O2-NEXT: AMDGPU Promote Alloca to vector ; GCN-O2-NEXT: Dominator Tree Construction +; GCN-O2-NEXT: Connect llvm.noalias.decl ; GCN-O2-NEXT: SROA ; GCN-O2-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O2-NEXT: Function Alias Analysis Results ; GCN-O2-NEXT: Memory SSA ; GCN-O2-NEXT: Early CSE w/ MemorySSA +; GCN-O2-NEXT: Propagate and Convert Noalias intrinsics ; GCN-O2-NEXT: Speculatively execute instructions if target has divergent branches +; GCN-O2-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O2-NEXT: Function Alias Analysis Results ; GCN-O2-NEXT: Lazy Value Information Analysis ; GCN-O2-NEXT: Jump Threading @@ -502,6 +519,10 @@ ; GCN-O2-NEXT: Lazy Block Frequency Analysis ; GCN-O2-NEXT: Optimization Remark Emitter ; GCN-O2-NEXT: Combine redundant instructions +; GCN-O2-NEXT: Connect llvm.noalias.decl +; GCN-O2-NEXT: Propagate and Convert Noalias intrinsics +; GCN-O2-NEXT: Basic Alias Analysis (stateless AA impl) +; GCN-O2-NEXT: Function Alias Analysis Results ; GCN-O2-NEXT: Lazy Value Information Analysis ; GCN-O2-NEXT: Jump Threading ; GCN-O2-NEXT: Value Propagation @@ -702,8 +723,10 @@ ; GCN-O3-NEXT: Instrument function entry/exit with calls to e.g. mcount() (pre inlining) ; GCN-O3-NEXT: Simplify the CFG ; GCN-O3-NEXT: Dominator Tree Construction +; GCN-O3-NEXT: Connect llvm.noalias.decl ; GCN-O3-NEXT: SROA ; GCN-O3-NEXT: Early CSE +; GCN-O3-NEXT: Propagate and Convert Noalias intrinsics ; GCN-O3-NEXT: Lower 'expect' Intrinsics ; GCN-O3-NEXT: Pass Arguments: @@ -757,6 +780,9 @@ ; GCN-O3-NEXT: Call Graph SCC Pass Manager ; GCN-O3-NEXT: Remove unused exception handling info ; GCN-O3-NEXT: AMDGPU Function Integration/Inlining +; GCN-O3-NEXT: FunctionPass Manager +; GCN-O3-NEXT: Dominator Tree Construction +; GCN-O3-NEXT: Propagate and Convert Noalias intrinsics ; GCN-O3-NEXT: OpenMP specific optimizations ; GCN-O3-NEXT: Deduce function attributes ; GCN-O3-NEXT: Promote 'by reference' arguments to scalars @@ -766,12 +792,15 @@ ; GCN-O3-NEXT: FunctionPass Manager ; GCN-O3-NEXT: AMDGPU Promote Alloca to vector ; GCN-O3-NEXT: Dominator Tree Construction +; GCN-O3-NEXT: Connect llvm.noalias.decl ; GCN-O3-NEXT: SROA ; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O3-NEXT: Function Alias Analysis Results ; GCN-O3-NEXT: Memory SSA ; GCN-O3-NEXT: Early CSE w/ MemorySSA +; GCN-O3-NEXT: Propagate and Convert Noalias intrinsics ; GCN-O3-NEXT: Speculatively execute instructions if target has divergent branches +; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O3-NEXT: Function Alias Analysis Results ; GCN-O3-NEXT: Lazy Value Information Analysis ; GCN-O3-NEXT: Jump Threading @@ -860,6 +889,10 @@ ; GCN-O3-NEXT: Lazy Block Frequency Analysis ; GCN-O3-NEXT: Optimization Remark Emitter ; GCN-O3-NEXT: Combine redundant instructions +; GCN-O3-NEXT: Connect llvm.noalias.decl +; GCN-O3-NEXT: Propagate and Convert Noalias intrinsics +; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl) +; GCN-O3-NEXT: Function Alias Analysis Results ; GCN-O3-NEXT: Lazy Value Information Analysis ; GCN-O3-NEXT: Jump Threading ; GCN-O3-NEXT: Value Propagation Index: llvm/test/CodeGen/Generic/noalias.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/Generic/noalias.ll @@ -0,0 +1,13 @@ +; RUN: llc < %s + +define i32* @test(i32* %p) { + %p.decl = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !0) + %v = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %p, i8* %p.decl, i32** null, i32 0, metadata !0) + ret i32* %v +} + +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) argmemonly nounwind +declare i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32*, i8*, i32**, i32, metadata) argmemonly nounwind speculatable + +!0 = !{!0, !"some domain"} +!1 = !{!1, !0, !"some scope"} Index: llvm/test/CodeGen/Generic/provenance.noalias.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/Generic/provenance.noalias.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s + +define i32* @test(i32* %p) { + %p.decl = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !0) + %p.provenance = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %p, i8* %p.decl, i32** null, i32** undef, i32 0, metadata !0) + %p.guard = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %p, i32* %p.provenance) + ret i32* %p.guard +} + +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) argmemonly nounwind +declare i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32*, i8*, i32**, i32**, i32, metadata) argmemonly nounwind speculatable +declare i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32*, i32*) nounwind readnone + +!0 = !{!0, !"some domain"} +!1 = !{!1, !0, !"some scope"} Index: llvm/test/Other/new-pm-defaults.ll =================================================================== --- llvm/test/Other/new-pm-defaults.ll +++ llvm/test/Other/new-pm-defaults.ll @@ -106,10 +106,12 @@ ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running analysis: TargetIRAnalysis ; CHECK-O-NEXT: Running analysis: AssumptionAnalysis +; CHECK-O-NEXT: Running pass: ConnectNoAliasDeclPass ; CHECK-O-NEXT: Running pass: SROA ; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis +; CHECK-O-NEXT: Running pass: PropagateAndConvertNoAliasPass ; CHECK-O-NEXT: Running pass: LowerExpectIntrinsicPass ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass ; CHECK-O-NEXT: Finished llvm::Function pass manager run. @@ -149,9 +151,11 @@ ; CHECK-O3-NEXT: Running pass: OpenMPOptPass on (foo) ; CHECK-O-NEXT: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> ; CHECK-O-NEXT: Starting llvm::Function pass manager run. +; CHECK-O-NEXT: Running pass: ConnectNoAliasDeclPass ; CHECK-O-NEXT: Running pass: SROA ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis +; CHECK-O-NEXT: Running pass: PropagateAndConvertNoAliasPass ; CHECK-O23SZ-NEXT: Running pass: SpeculativeExecutionPass ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass ; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis @@ -198,7 +202,9 @@ ; CHECK-O-NEXT: Running pass: LoopFullUnrollPass ; CHECK-EP-LOOP-END-NEXT: Running pass: NoOpLoopPass ; CHECK-O-NEXT: Finished Loop pass manager run. +; CHECK-O-NEXT: Running pass: ConnectNoAliasDeclPass ; CHECK-O-NEXT: Running pass: SROA on foo +; CHECK-O-NEXT: Running pass: PropagateAndConvertNoAliasPass ; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass ; CHECK-O23SZ-NEXT: Running pass: GVN ; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis Index: llvm/test/Other/new-pm-lto-defaults.ll =================================================================== --- llvm/test/Other/new-pm-lto-defaults.ll +++ llvm/test/Other/new-pm-lto-defaults.ll @@ -89,7 +89,9 @@ ; CHECK-EP-Peephole-NEXT: Running pass: NoOpFunctionPass ; CHECK-O2-NEXT: Running pass: JumpThreadingPass ; CHECK-O2-NEXT: Running analysis: LazyValueAnalysis +; CHECK-O2-NEXT: Running pass: ConnectNoAliasDeclPass ; CHECK-O2-NEXT: Running pass: SROA on foo +; CHECK-O2-NEXT: Running pass: PropagateAndConvertNoAliasPass ; CHECK-O2-NEXT: Running pass: TailCallElimPass on foo ; CHECK-O2-NEXT: Finished llvm::Function pass manager run. ; CHECK-O2-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}PostOrderFunctionAttrsPass> Index: llvm/test/Other/new-pm-thinlto-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-defaults.ll +++ llvm/test/Other/new-pm-thinlto-defaults.ll @@ -72,10 +72,12 @@ ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running analysis: TargetIRAnalysis ; CHECK-O-NEXT: Running analysis: AssumptionAnalysis +; CHECK-O-NEXT: Running pass: ConnectNoAliasDeclPass ; CHECK-O-NEXT: Running pass: SROA ; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis +; CHECK-O-NEXT: Running pass: PropagateAndConvertNoAliasPass ; CHECK-O-NEXT: Running pass: LowerExpectIntrinsicPass ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass ; CHECK-O-NEXT: Finished llvm::Function pass manager run. @@ -114,9 +116,11 @@ ; CHECK-O3-NEXT: Running pass: OpenMPOptPass on (foo) ; CHECK-O-NEXT: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> ; CHECK-O-NEXT: Starting llvm::Function pass manager run. +; CHECK-O-NEXT: Running pass: ConnectNoAliasDeclPass ; CHECK-O-NEXT: Running pass: SROA ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis +; CHECK-O-NEXT: Running pass: PropagateAndConvertNoAliasPass ; CHECK-O23SZ-NEXT: Running pass: SpeculativeExecutionPass ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass ; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis @@ -160,7 +164,9 @@ ; CHECK-O-NEXT: Running pass: LoopDeletionPass ; CHECK-O-NEXT: Running pass: LoopFullUnrollPass ; CHECK-O-NEXT: Finished Loop pass manager run. +; CHECK-O-NEXT: Running pass: ConnectNoAliasDeclPass ; CHECK-O-NEXT: Running pass: SROA on foo +; CHECK-O-NEXT: Running pass: PropagateAndConvertNoAliasPass ; CHECK-Os-NEXT: Running pass: MergedLoadStoreMotionPass ; CHECK-Os-NEXT: Running pass: GVN ; CHECK-Os-NEXT: Running analysis: MemoryDependenceAnalysis Index: llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -41,10 +41,12 @@ ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running analysis: TargetIRAnalysis ; CHECK-O-NEXT: Running analysis: AssumptionAnalysis +; CHECK-O-NEXT: Running pass: ConnectNoAliasDeclPass ; CHECK-O-NEXT: Running pass: SROA ; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis +; CHECK-O-NEXT: Running pass: PropagateAndConvertNoAliasPass ; CHECK-O-NEXT: Running pass: LowerExpectIntrinsicPass ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass ; CHECK-O-NEXT: Finished {{.*}}Function pass manager run. @@ -87,9 +89,11 @@ ; CHECK-O3-NEXT: Running pass: OpenMPOptPass ; CHECK-O-NEXT: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> ; CHECK-O-NEXT: Starting {{.*}}Function pass manager run. +; CHECK-O-NEXT: Running pass: ConnectNoAliasDeclPass ; CHECK-O-NEXT: Running pass: SROA ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis +; CHECK-O-NEXT: Running pass: PropagateAndConvertNoAliasPass ; CHECK-O23SZ-NEXT: Running pass: SpeculativeExecutionPass ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass ; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis @@ -132,7 +136,9 @@ ; CHECK-O-NEXT: Running pass: LoopDeletionPass ; CHECK-O-NEXT: Running pass: LoopFullUnrollPass ; CHECK-O-NEXT: Finished Loop pass manager run. +; CHECK-O-NEXT: Running pass: ConnectNoAliasDeclPass ; CHECK-O-NEXT: Running pass: SROA on foo +; CHECK-O-NEXT: Running pass: PropagateAndConvertNoAliasPass ; CHECK-Os-NEXT: Running pass: MergedLoadStoreMotionPass ; CHECK-Os-NEXT: Running pass: GVN ; CHECK-Os-NEXT: Running analysis: MemoryDependenceAnalysis Index: llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -43,10 +43,12 @@ ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running analysis: TargetIRAnalysis ; CHECK-O-NEXT: Running analysis: AssumptionAnalysis +; CHECK-O-NEXT: Running pass: ConnectNoAliasDeclPass ; CHECK-O-NEXT: Running pass: SROA ; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis +; CHECK-O-NEXT: Running pass: PropagateAndConvertNoAliasPass ; CHECK-O-NEXT: Running pass: LowerExpectIntrinsicPass ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass ; CHECK-O-NEXT: Running pass: InstCombinePass on foo @@ -95,9 +97,11 @@ ; CHECK-O3-NEXT: Running pass: OpenMPOptPass ; CHECK-O-NEXT: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> ; CHECK-O-NEXT: Starting {{.*}}Function pass manager run. +; CHECK-O-NEXT: Running pass: ConnectNoAliasDeclPass ; CHECK-O-NEXT: Running pass: SROA ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis +; CHECK-O-NEXT: Running pass: PropagateAndConvertNoAliasPass ; CHECK-O23SZ-NEXT: Running pass: SpeculativeExecutionPass ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass ; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis @@ -140,7 +144,9 @@ ; CHECK-O-NEXT: Running pass: LoopDeletionPass ; CHECK-O-NEXT: Running pass: LoopFullUnrollPass ; CHECK-O-NEXT: Finished Loop pass manager run. +; CHECK-O-NEXT: Running pass: ConnectNoAliasDeclPass ; CHECK-O-NEXT: Running pass: SROA on foo +; CHECK-O-NEXT: Running pass: PropagateAndConvertNoAliasPass ; CHECK-Os-NEXT: Running pass: MergedLoadStoreMotionPass ; CHECK-Os-NEXT: Running pass: GVN ; CHECK-Os-NEXT: Running analysis: MemoryDependenceAnalysis Index: llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll +++ llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll @@ -45,10 +45,12 @@ ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running analysis: TargetIRAnalysis ; CHECK-O-NEXT: Running analysis: AssumptionAnalysis +; CHECK-O-NEXT: Running pass: ConnectNoAliasDeclPass ; CHECK-O-NEXT: Running pass: SROA ; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis +; CHECK-O-NEXT: Running pass: PropagateAndConvertNoAliasPass ; CHECK-O-NEXT: Running pass: LowerExpectIntrinsicPass ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass ; CHECK-O-NEXT: Finished {{.*}}Function pass manager run. @@ -122,6 +124,7 @@ ; CHECK-O3-NEXT: Running pass: OpenMPOptPass ; CHECK-O-NEXT: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> ; CHECK-O-NEXT: Starting {{.*}}Function pass manager run. +; CHECK-O-NEXT: Running pass: ConnectNoAliasDeclPass ; CHECK-O-NEXT: Running pass: SROA ; These next two can appear in any order since they are accessed as parameters ; on the same call to SROA::runImpl @@ -133,6 +136,7 @@ ; CHECK-Os-NEXT: Running analysis: TargetIRAnalysis on foo ; CHECK-Oz-NEXT: Running analysis: TargetIRAnalysis on foo ; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis +; CHECK-O-NEXT: Running pass: PropagateAndConvertNoAliasPass ; CHECK-O23SZ-NEXT: Running pass: SpeculativeExecutionPass ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass ; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis @@ -184,7 +188,9 @@ ; CHECK-O-NEXT: Running pass: LoopDeletionPass ; CHECK-O-NEXT: Running pass: LoopFullUnrollPass ; CHECK-O-NEXT: Finished Loop pass manager run. +; CHECK-O-NEXT: Running pass: ConnectNoAliasDeclPass ; CHECK-O-NEXT: Running pass: SROA on foo +; CHECK-O-NEXT: Running pass: PropagateAndConvertNoAliasPass ; CHECK-Os-NEXT: Running pass: MergedLoadStoreMotionPass ; CHECK-Os-NEXT: Running pass: GVN ; CHECK-Os-NEXT: Running analysis: MemoryDependenceAnalysis Index: llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll +++ llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll @@ -44,10 +44,12 @@ ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running analysis: TargetIRAnalysis ; CHECK-O-NEXT: Running analysis: AssumptionAnalysis +; CHECK-O-NEXT: Running pass: ConnectNoAliasDeclPass ; CHECK-O-NEXT: Running pass: SROA ; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis +; CHECK-O-NEXT: Running pass: PropagateAndConvertNoAliasPass ; CHECK-O-NEXT: Running pass: LowerExpectIntrinsicPass ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass ; CHECK-O-NEXT: Running pass: InstCombinePass on foo @@ -95,9 +97,11 @@ ; CHECK-O3-NEXT: Running pass: OpenMPOptPass ; CHECK-O-NEXT: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}PassManager{{.*}}> ; CHECK-O-NEXT: Starting {{.*}}Function pass manager run. +; CHECK-O-NEXT: Running pass: ConnectNoAliasDeclPass ; CHECK-O-NEXT: Running pass: SROA ; CHECK-O-NEXT: Running pass: EarlyCSEPass ; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis +; CHECK-O-NEXT: Running pass: PropagateAndConvertNoAliasPass ; CHECK-O23SZ-NEXT: Running pass: SpeculativeExecutionPass ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass ; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis @@ -139,7 +143,9 @@ ; CHECK-O-NEXT: Running pass: LoopIdiomRecognizePass ; CHECK-O-NEXT: Running pass: LoopDeletionPass ; CHECK-O-NEXT: Finished Loop pass manager run. +; CHECK-O-NEXT: Running pass: ConnectNoAliasDeclPass ; CHECK-O-NEXT: Running pass: SROA on foo +; CHECK-O-NEXT: Running pass: PropagateAndConvertNoAliasPass ; CHECK-Os-NEXT: Running pass: MergedLoadStoreMotionPass ; CHECK-Os-NEXT: Running pass: GVN ; CHECK-Os-NEXT: Running analysis: MemoryDependenceAnalysis Index: llvm/test/Other/opt-O2-pipeline.ll =================================================================== --- llvm/test/Other/opt-O2-pipeline.ll +++ llvm/test/Other/opt-O2-pipeline.ll @@ -15,8 +15,10 @@ ; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (pre inlining) ; CHECK-NEXT: Simplify the CFG ; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Connect llvm.noalias.decl ; CHECK-NEXT: SROA ; CHECK-NEXT: Early CSE +; CHECK-NEXT: Propagate and Convert Noalias intrinsics ; CHECK-NEXT: Lower 'expect' Intrinsics ; CHECK-NEXT: Pass Arguments: ; CHECK-NEXT: Target Library Information @@ -59,16 +61,22 @@ ; CHECK-NEXT: Call Graph SCC Pass Manager ; CHECK-NEXT: Remove unused exception handling info ; CHECK-NEXT: Function Integration/Inlining +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Propagate and Convert Noalias intrinsics ; CHECK-NEXT: OpenMP specific optimizations ; CHECK-NEXT: Deduce function attributes ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Connect llvm.noalias.decl ; CHECK-NEXT: SROA ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Memory SSA ; CHECK-NEXT: Early CSE w/ MemorySSA +; CHECK-NEXT: Propagate and Convert Noalias intrinsics ; CHECK-NEXT: Speculatively execute instructions if target has divergent branches +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Lazy Value Information Analysis ; CHECK-NEXT: Jump Threading @@ -153,6 +161,10 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Combine redundant instructions +; CHECK-NEXT: Connect llvm.noalias.decl +; CHECK-NEXT: Propagate and Convert Noalias intrinsics +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Lazy Value Information Analysis ; CHECK-NEXT: Jump Threading ; CHECK-NEXT: Value Propagation Index: llvm/test/Other/opt-O3-pipeline.ll =================================================================== --- llvm/test/Other/opt-O3-pipeline.ll +++ llvm/test/Other/opt-O3-pipeline.ll @@ -15,8 +15,10 @@ ; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (pre inlining) ; CHECK-NEXT: Simplify the CFG ; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Connect llvm.noalias.decl ; CHECK-NEXT: SROA ; CHECK-NEXT: Early CSE +; CHECK-NEXT: Propagate and Convert Noalias intrinsics ; CHECK-NEXT: Lower 'expect' Intrinsics ; CHECK-NEXT: Pass Arguments: ; CHECK-NEXT: Target Library Information @@ -62,17 +64,23 @@ ; CHECK-NEXT: Call Graph SCC Pass Manager ; CHECK-NEXT: Remove unused exception handling info ; CHECK-NEXT: Function Integration/Inlining +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Propagate and Convert Noalias intrinsics ; CHECK-NEXT: OpenMP specific optimizations ; CHECK-NEXT: Deduce function attributes ; CHECK-NEXT: Promote 'by reference' arguments to scalars ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Connect llvm.noalias.decl ; CHECK-NEXT: SROA ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Memory SSA ; CHECK-NEXT: Early CSE w/ MemorySSA +; CHECK-NEXT: Propagate and Convert Noalias intrinsics ; CHECK-NEXT: Speculatively execute instructions if target has divergent branches +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Lazy Value Information Analysis ; CHECK-NEXT: Jump Threading @@ -158,6 +166,10 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Combine redundant instructions +; CHECK-NEXT: Connect llvm.noalias.decl +; CHECK-NEXT: Propagate and Convert Noalias intrinsics +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Lazy Value Information Analysis ; CHECK-NEXT: Jump Threading ; CHECK-NEXT: Value Propagation Index: llvm/test/Other/opt-Os-pipeline.ll =================================================================== --- llvm/test/Other/opt-Os-pipeline.ll +++ llvm/test/Other/opt-Os-pipeline.ll @@ -15,8 +15,10 @@ ; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (pre inlining) ; CHECK-NEXT: Simplify the CFG ; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Connect llvm.noalias.decl ; CHECK-NEXT: SROA ; CHECK-NEXT: Early CSE +; CHECK-NEXT: Propagate and Convert Noalias intrinsics ; CHECK-NEXT: Lower 'expect' Intrinsics ; CHECK-NEXT: Pass Arguments: ; CHECK-NEXT: Target Library Information @@ -59,16 +61,22 @@ ; CHECK-NEXT: Call Graph SCC Pass Manager ; CHECK-NEXT: Remove unused exception handling info ; CHECK-NEXT: Function Integration/Inlining +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Propagate and Convert Noalias intrinsics ; CHECK-NEXT: OpenMP specific optimizations ; CHECK-NEXT: Deduce function attributes ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Connect llvm.noalias.decl ; CHECK-NEXT: SROA ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Memory SSA ; CHECK-NEXT: Early CSE w/ MemorySSA +; CHECK-NEXT: Propagate and Convert Noalias intrinsics ; CHECK-NEXT: Speculatively execute instructions if target has divergent branches +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Lazy Value Information Analysis ; CHECK-NEXT: Jump Threading @@ -139,6 +147,10 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Combine redundant instructions +; CHECK-NEXT: Connect llvm.noalias.decl +; CHECK-NEXT: Propagate and Convert Noalias intrinsics +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Lazy Value Information Analysis ; CHECK-NEXT: Jump Threading ; CHECK-NEXT: Value Propagation Index: llvm/test/Other/pass-pipelines.ll =================================================================== --- llvm/test/Other/pass-pipelines.ll +++ llvm/test/Other/pass-pipelines.ll @@ -46,6 +46,9 @@ ; CHECK-O2-NEXT: Call Graph SCC Pass Manager ; CHECK-O2-NEXT: Remove unused exception handling info ; CHECK-O2-NEXT: Function Integration/Inlining +; CHECK-O2-NEXT: FunctionPass Manager +; CHECK-O2-NEXT: Dominator Tree Construction +; CHECK-O2-NEXT: Propagate and Convert Noalias intrinsics ; CHECK-O2-NEXT: OpenMP specific optimizations ; CHECK-O2-NEXT: Deduce function attributes ; Next up is the main function pass pipeline. It shouldn't be split up and Index: llvm/test/Transforms/ConnectNoAliasDecl/basictest.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/ConnectNoAliasDecl/basictest.ll @@ -0,0 +1,179 @@ +; RUN: opt < %s -connect-noaliasdecl -verify -S | FileCheck %s +; RUN: opt < %s -passes=connect-noaliasdecl,verify -S | FileCheck %s + +target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" + + +%struct.FOO = type { i32*, i32*, i32* } + +; Function Attrs: nounwind +define dso_local void @test_01_before(i32** %_p, i32 %c) #0 !noalias !2 { +entry: + %rp = alloca [2 x i32*], align 4 + %other = alloca i32*, align 4 + %local_tmp = alloca i32*, align 4 + %tmp.0 = bitcast [2 x i32*]* %rp to i8* + %.fca.0.gep = getelementptr inbounds [2 x i32*], [2 x i32*]* %rp, i32 0, i32 0 + %.fca.1.gep = getelementptr inbounds [2 x i32*], [2 x i32*]* %rp, i32 0, i32 1 + call void @llvm.lifetime.start.p0i8(i64 8, i8* %tmp.0) #5, !noalias !5 + %tmp.1 = call i8* @llvm.noalias.decl.p0i8.p0a2p0i32.i32([2 x i32*]* %rp, i32 0, metadata !7) + %tmp.2 = load i32*, i32** %_p, align 4, !tbaa !8, !noalias !5 + store i32* %tmp.2, i32** %.fca.0.gep, align 4, !tbaa !8, !noalias !5 + %arrayinit.element = getelementptr inbounds i32*, i32** %.fca.0.gep, i32 1 + %arrayidx1 = getelementptr inbounds i32*, i32** %_p, i32 1 + %tmp.3 = load i32*, i32** %arrayidx1, align 4, !tbaa !8, !noalias !5 + store i32* %tmp.3, i32** %arrayinit.element, align 4, !tbaa !8, !noalias !5 + %tmp.4 = bitcast i32** %other to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* %tmp.4) #5, !noalias !5 + %arrayidx2 = getelementptr inbounds i32*, i32** %_p, i32 2 + %tmp.5 = load i32*, i32** %arrayidx2, align 4, !tbaa !8, !noalias !5 + store i32* %tmp.5, i32** %other, align 4, !tbaa !8, !noalias !5 + %tobool = icmp ne i32 %c, 0 + %cond = select i1 %tobool, i32** %.fca.0.gep, i32** %other + %tmp.6 = load i32*, i32** %arrayinit.element, align 4, !tbaa !8, !noalias !5 + %through_local_tmp = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %tmp.6, i8* null, i32** %arrayinit.element, i32 0, metadata !2), !tbaa !8, !noalias !5 + %tmp.7 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %through_local_tmp, i8* null, i32** %local_tmp, i32 0, metadata !2), !tbaa !8, !noalias !5 + %tmp.8 = load i32, i32* %tmp.7, align 4, !tbaa !12, !noalias !5 + %tmp.9 = load i32*, i32** %.fca.0.gep, align 4, !tbaa !8, !noalias !5 + %tmp.10 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %tmp.9, i8* null, i32** %.fca.0.gep, i32 0, metadata !2), !tbaa !8, !noalias !5 + store i32 %tmp.8, i32* %tmp.10, align 4, !tbaa !12, !noalias !5 + %tmp.11 = load i32*, i32** %cond, align 4, !tbaa !8, !noalias !5 + %tmp.12 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %tmp.11, i8* null, i32** %cond, i32 0, metadata !2), !tbaa !8, !noalias !5 + store i32 42, i32* %tmp.12, align 4, !tbaa !12, !noalias !5 + call void @llvm.lifetime.end.p0i8(i64 4, i8* %tmp.4) #5 + call void @llvm.lifetime.end.p0i8(i64 8, i8* %tmp.0) #5 + ret void +} + +; CHECK-LABEL: @test_01_before( +; CHECK: %tmp.1 = call i8* @llvm.noalias.decl.p0i8.p0a2p0i32.i32([2 x i32*]* %rp, i32 0, metadata !7) +; CHECK: %through_local_tmp = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %tmp.6, i8* %tmp.1, i32** %arrayinit.element, i32 0, metadata !7), !tbaa !8, !noalias !5 +; CHECK: %tmp.10 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %tmp.9, i8* %tmp.1, i32** %.fca.0.gep, i32 0, metadata !7), !tbaa !8, !noalias !5 +; CHECK: %tmp.12 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %tmp.11, i8* null, i32** %cond, i32 0, metadata !2), !tbaa !8, !noalias !5 +; CHECK-NOT: llvm.noalias + +; Function Attrs: nounwind +define dso_local void @test_02(i32** %_p, i32 %c) #0 !noalias !14 { +entry: + %foo = alloca %struct.FOO, align 4 + %tmp = alloca %struct.FOO, align 4 + %tmp.1 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FOOs.i32(%struct.FOO* %foo, i32 0, metadata !17) + %tmp.10 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FOOs.i32(%struct.FOO* %tmp, i32 0, metadata !19) + %tmp.12 = call %struct.FOO* @llvm.noalias.copy.guard.p0s_struct.FOOs.p0i8(%struct.FOO* %foo, i8* null, metadata !21, metadata !14) + %tmp.13 = load %struct.FOO, %struct.FOO* %tmp.12, align 4, !noalias !25 + store %struct.FOO %tmp.13, %struct.FOO* %tmp, !noalias !25 + ret void +} + +; CHECK-LABEL: @test_02( +; CHECK: %tmp.1 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FOOs.i32(%struct.FOO* %foo, i32 0, metadata !17) +; CHECK: %tmp.10 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FOOs.i32(%struct.FOO* %tmp, i32 0, metadata !19) +; CHECK: %tmp.12 = call %struct.FOO* @llvm.noalias.copy.guard.p0s_struct.FOOs.p0i8(%struct.FOO* %foo, i8* %tmp.1, metadata !21, metadata !17) +; CHECK-NOT: llvm.noalias + +; Function Attrs: nounwind +define dso_local void @test_01_after(i32** %_p, i32 %c) #0 !noalias !2 { +entry: + %rp = alloca [2 x i32*], align 4 + %local_tmp = alloca i32*, align 4 + %other = alloca i32*, align 4 + %.fca.0.gep = getelementptr inbounds [2 x i32*], [2 x i32*]* %rp, i32 0, i32 0 + %.fca.1.gep = getelementptr inbounds [2 x i32*], [2 x i32*]* %rp, i32 0, i32 1 + %tmp.0 = bitcast [2 x i32*]* %rp to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %tmp.0) #5, !noalias !5 + %tmp.1 = call i8* @llvm.noalias.decl.p0i8.p0a2p0i32.i32([2 x i32*]* %rp, i32 0, metadata !7) + %tmp.2 = load i32*, i32** %_p, ptr_provenance i32** undef, align 4, !tbaa !8, !noalias !5 + store i32* %tmp.2, i32** %.fca.0.gep, ptr_provenance i32** undef, align 4, !tbaa !8, !noalias !5 + %arrayinit.element = getelementptr inbounds i32*, i32** %.fca.0.gep, i32 1 + %arrayidx1 = getelementptr inbounds i32*, i32** %_p, i32 1 + %tmp.3 = load i32*, i32** %arrayidx1, ptr_provenance i32** undef, align 4, !tbaa !8, !noalias !5 + store i32* %tmp.3, i32** %arrayinit.element, ptr_provenance i32** undef, align 4, !tbaa !8, !noalias !5 + %tmp.4 = bitcast i32** %other to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* %tmp.4) #5, !noalias !5 + %arrayidx2 = getelementptr inbounds i32*, i32** %_p, i32 2 + %tmp.5 = load i32*, i32** %arrayidx2, ptr_provenance i32** undef, align 4, !tbaa !8, !noalias !5 + store i32* %tmp.5, i32** %other, ptr_provenance i32** undef, align 4, !tbaa !8, !noalias !5 + %tobool = icmp ne i32 %c, 0 + %cond = select i1 %tobool, i32** %.fca.0.gep, i32** %other + %tmp.6 = load i32*, i32** %arrayinit.element, ptr_provenance i32** undef, align 4, !tbaa !8, !noalias !5 + %through_local_tmp = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %tmp.6, i8* null, i32** %arrayinit.element, i32** undef, i32 0, metadata !2), !tbaa !8, !noalias !5 + %tmp.7 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %through_local_tmp, i8* null, i32** %local_tmp, i32** undef, i32 0, metadata !2), !tbaa !8, !noalias !5 + %tmp.8 = load i32, i32* %tmp.6, ptr_provenance i32* %tmp.7, align 4, !tbaa !12, !noalias !5 + %tmp.9 = load i32*, i32** %.fca.0.gep, ptr_provenance i32** undef, align 4, !tbaa !8, !noalias !5 + %tmp.10 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %tmp.9, i8* null, i32** %.fca.0.gep, i32** undef, i32 0, metadata !2), !tbaa !8, !noalias !5 + store i32 %tmp.8, i32* %tmp.9, ptr_provenance i32* %tmp.10, align 4, !tbaa !12, !noalias !5 + %tmp.11 = load i32*, i32** %cond, ptr_provenance i32** undef, align 4, !tbaa !8, !noalias !5 + %tmp.12 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %tmp.11, i8* null, i32** %cond, i32** undef, i32 0, metadata !2), !tbaa !8, !noalias !5 + store i32 42, i32* %tmp.11, ptr_provenance i32* %tmp.12, align 4, !tbaa !12, !noalias !5 + call void @llvm.lifetime.end.p0i8(i64 4, i8* %tmp.4) #5 + call void @llvm.lifetime.end.p0i8(i64 8, i8* %tmp.0) #5 + ret void +} + +; CHECK-LABEL: @test_01_after( +; CHECK: %tmp.1 = call i8* @llvm.noalias.decl.p0i8.p0a2p0i32.i32([2 x i32*]* %rp, i32 0, metadata !7) +; CHECK: %through_local_tmp = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %tmp.6, i8* %tmp.1, i32** %arrayinit.element, i32** undef, i32 0, metadata !7), !tbaa !8, !noalias !5 +; CHECK: %tmp.10 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %tmp.9, i8* %tmp.1, i32** %.fca.0.gep, i32** undef, i32 0, metadata !7), !tbaa !8, !noalias !5 +; CHECK: %tmp.12 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %tmp.11, i8* null, i32** %cond, i32** undef, i32 0, metadata !2), !tbaa !8, !noalias !5 +; CHECK-NOT: llvm.noalias + +; CHECK: declare + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0a2p0i32.i32([2 x i32*]*, i32, metadata) #1 + +; Function Attrs: argmemonly nounwind speculatable +declare i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32*, i8*, i32**, i32, metadata) #2 + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0s_struct.FOOs.i32(%struct.FOO*, i32, metadata) #1 + +; Function Attrs: nounwind readnone +declare %struct.FOO* @llvm.noalias.copy.guard.p0s_struct.FOOs.p0i8(%struct.FOO*, i8*, metadata, metadata) #3 + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32*, i8*, i32**, i32**, i32, metadata) #4 + + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { argmemonly nounwind speculatable } +attributes #3 = { nounwind readnone } +attributes #4 = { nounwind readnone speculatable } +attributes #5 = { nounwind } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 10.0.0 (sgasip@krachtcs10:/u/sgasip/ipd/repositories/llvm_ipd 8efa7b085788c7ef2e29af74da59051ee86c2fd1)"} +!2 = !{!3} +!3 = distinct !{!3, !4, !"test_01: unknown scope"} +!4 = distinct !{!4, !"test_01"} +!5 = !{!6, !3} +!6 = distinct !{!6, !4, !"test_01: rp"} +!7 = !{!6} +!8 = !{!9, !9, i64 0, i64 4} +!9 = !{!10, i64 4, !"any pointer"} +!10 = !{!11, i64 1, !"omnipotent char"} +!11 = !{!"Simple C/C++ TBAA"} +!12 = !{!13, !13, i64 0, i64 4} +!13 = !{!10, i64 4, !"int"} +!14 = !{!15} +!15 = distinct !{!15, !16, !"test_02: unknown scope"} +!16 = distinct !{!16, !"test_02"} +!17 = !{!18} +!18 = distinct !{!18, !16, !"test_02: foo"} +!19 = !{!20} +!20 = distinct !{!20, !16, !"test_02: tmp"} +!21 = !{!22, !23, !24} +!22 = !{i64 -1, i64 0} +!23 = !{i64 -1, i64 1} +!24 = !{i64 -1, i64 2} +!25 = !{!20, !18, !15} Index: llvm/test/Transforms/Coroutines/ArgAddr.ll =================================================================== --- llvm/test/Transforms/Coroutines/ArgAddr.ll +++ llvm/test/Transforms/Coroutines/ArgAddr.ll @@ -47,6 +47,7 @@ ret i32 0 ; CHECK: call void @ctor ; CHECK-NEXT: call void @print(i32 4) +; CHECK-NEXT: bitcast i8* ; CHECK-NEXT: call void @print(i32 3) ; CHECK-NEXT: call void @print(i32 2) ; CHECK: ret i32 0 Index: llvm/test/Transforms/Coroutines/coro-retcon-resume-values.ll =================================================================== --- llvm/test/Transforms/Coroutines/coro-retcon-resume-values.ll +++ llvm/test/Transforms/Coroutines/coro-retcon-resume-values.ll @@ -65,8 +65,12 @@ ; CHECK-LABEL: define i32 @main ; CHECK-NEXT: entry: ; CHECK: [[BUFFER:%.*]] = alloca [8 x i8], align 4 +; CHECK: [[SUB:%.*]] = getelementptr inbounds [8 x i8], [8 x i8]* [[BUFFER]], i64 0, i64 0 ; CHECK: [[SLOT:%.*]] = bitcast [8 x i8]* [[BUFFER]] to i32* -; CHECK-NEXT: store i32 7, i32* [[SLOT]], align 4 +; CHECK-NEXT: [[SUB_0_DECL:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0i8.i64(i8** null, i64 0, metadata [[SUB_0_META:!.*]]) +; CHECK-NEXT: [[SUB_0_PROVENANCE:%.*]] = call i8* @llvm.provenance.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i64(i8* nonnull [[SUB]], i8* [[SUB_0_DECL]], i8** null, i8** undef, i64 0, metadata [[SUB_0_META]]) +; CHECK-NEXT: [[SUB_0_PROVENANCE_I32:%.*]] = bitcast i8* [[SUB_0_PROVENANCE]] to i32* +; CHECK-NEXT: store i32 7, i32* [[SLOT]], ptr_provenance i32* [[SUB_0_PROVENANCE_I32]], align 4, !noalias [[SUB_0_META]] ; CHECK-NEXT: call void @print(i32 7) ; CHECK-NEXT: ret i32 0 Index: llvm/test/Transforms/Coroutines/coro-retcon-value.ll =================================================================== --- llvm/test/Transforms/Coroutines/coro-retcon-value.ll +++ llvm/test/Transforms/Coroutines/coro-retcon-value.ll @@ -75,16 +75,23 @@ ; CHECK-LABEL: define i32 @main ; CHECK-NEXT: entry: ; CHECK: [[BUFFER:%.*]] = alloca [8 x i8], align 4 +; CHECK: [[SUB:%.*]] = getelementptr inbounds [8 x i8], [8 x i8]* [[BUFFER]], i64 0, i64 0 ; CHECK: [[SLOT:%.*]] = bitcast [8 x i8]* [[BUFFER]] to i32* ; CHECK-NEXT: store i32 4, i32* [[SLOT]], align 4 ; CHECK-NEXT: call void @print(i32 4) -; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[SLOT]], align 4 +; CHECK-NEXT: [[SUB_0_DECL:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0i8.i64(i8** null, i64 0, metadata [[SUB_0_META:!.*]]) +; CHECK-NEXT: [[SUB_0_PROVENANCE:%.*]] = call i8* @llvm.provenance.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i64(i8* nonnull [[SUB]], i8* [[SUB_0_DECL]], i8** null, i8** undef, i64 0, metadata [[SUB_0_META]]) +; CHECK-NEXT: [[SUB_0_PROVENANCE_I32:%.*]] = bitcast i8* [[SUB_0_PROVENANCE]] to i32* +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[SLOT]], ptr_provenance i32* [[SUB_0_PROVENANCE_I32]], align 4, !noalias [[SUB_0_META]] ; CHECK-NEXT: [[INC:%.*]] = add i32 [[LOAD]], 1 -; CHECK-NEXT: store i32 [[INC]], i32* [[SLOT]], align 4 +; CHECK-NEXT: store i32 [[INC]], i32* [[SLOT]], ptr_provenance i32* [[SUB_0_PROVENANCE_I32]], align 4, !noalias [[SUB_0_META]] ; CHECK-NEXT: call void @print(i32 [[INC]]) -; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[SLOT]], align 4 +; CHECK-NEXT: [[SUB_1_DECL:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0i8.i64(i8** null, i64 0, metadata [[SUB_1_META:!.*]]) +; CHECK-NEXT: [[SUB_1_PROVENANCE:%.*]] = call i8* @llvm.provenance.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i64(i8* nonnull [[SUB]], i8* [[SUB_1_DECL]], i8** null, i8** undef, i64 0, metadata [[SUB_1_META]]) +; CHECK-NEXT: [[SUB_1_PROVENANCE_I32:%.*]] = bitcast i8* [[SUB_1_PROVENANCE]] to i32* +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[SLOT]], ptr_provenance i32* [[SUB_1_PROVENANCE_I32]], align 4, !noalias [[SUB_1_META]] ; CHECK-NEXT: [[INC:%.*]] = add i32 [[LOAD]], 1 -; CHECK-NEXT: store i32 [[INC]], i32* [[SLOT]], align 4 +; CHECK-NEXT: store i32 [[INC]], i32* [[SLOT]], ptr_provenance i32* [[SUB_1_PROVENANCE_I32]], align 4, !noalias [[SUB_1_META]] ; CHECK-NEXT: call void @print(i32 [[INC]]) ; CHECK-NEXT: ret i32 0 Index: llvm/test/Transforms/Coroutines/coro-retcon.ll =================================================================== --- llvm/test/Transforms/Coroutines/coro-retcon.ll +++ llvm/test/Transforms/Coroutines/coro-retcon.ll @@ -65,16 +65,23 @@ ; CHECK-LABEL: define i32 @main ; CHECK-NEXT: entry: ; CHECK: [[BUFFER:%.*]] = alloca [8 x i8], align 4 +; CHECK: [[SUB:%.*]] = getelementptr inbounds [8 x i8], [8 x i8]* [[BUFFER]], i64 0, i64 0 ; CHECK: [[SLOT:%.*]] = bitcast [8 x i8]* [[BUFFER]] to i32* ; CHECK-NEXT: store i32 4, i32* [[SLOT]], align 4 ; CHECK-NEXT: call void @print(i32 4) -; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[SLOT]], align 4 +; CHECK-NEXT: [[SUB_0_DECL:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0i8.i64(i8** null, i64 0, metadata [[SUB_0_META:!.*]]) +; CHECK-NEXT: [[SUB_0_PROVENANCE:%.*]] = call i8* @llvm.provenance.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i64(i8* nonnull [[SUB]], i8* [[SUB_0_DECL]], i8** null, i8** undef, i64 0, metadata [[SUB_0_META]]) +; CHECK-NEXT: [[SUB_0_PROVENANCE_I32:%.*]] = bitcast i8* [[SUB_0_PROVENANCE]] to i32* +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[SLOT]], ptr_provenance i32* [[SUB_0_PROVENANCE_I32]], align 4, !noalias [[SUB_0_META]] ; CHECK-NEXT: [[INC:%.*]] = add i32 [[LOAD]], 1 -; CHECK-NEXT: store i32 [[INC]], i32* [[SLOT]], align 4 +; CHECK-NEXT: store i32 [[INC]], i32* [[SLOT]], ptr_provenance i32* [[SUB_0_PROVENANCE_I32]], align 4, !noalias [[SUB_0_META]] ; CHECK-NEXT: call void @print(i32 [[INC]]) -; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[SLOT]], align 4 +; CHECK-NEXT: [[SUB_1_DECL:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0i8.i64(i8** null, i64 0, metadata [[SUB_1_META:!.*]]) +; CHECK-NEXT: [[SUB_1_PROVENANCE:%.*]] = call i8* @llvm.provenance.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i64(i8* nonnull [[SUB]], i8* [[SUB_1_DECL]], i8** null, i8** undef, i64 0, metadata [[SUB_1_META]]) +; CHECK-NEXT: [[SUB_1_PROVENANCE_I32:%.*]] = bitcast i8* [[SUB_1_PROVENANCE]] to i32* +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[SLOT]], ptr_provenance i32* [[SUB_1_PROVENANCE_I32]], align 4, !noalias [[SUB_1_META]] ; CHECK-NEXT: [[INC:%.*]] = add i32 [[LOAD]], 1 -; CHECK-NEXT: store i32 [[INC]], i32* [[SLOT]], align 4 +; CHECK-NEXT: store i32 [[INC]], i32* [[SLOT]], ptr_provenance i32* [[SUB_1_PROVENANCE_I32]], align 4, !noalias [[SUB_1_META]] ; CHECK-NEXT: call void @print(i32 [[INC]]) ; CHECK-NEXT: ret i32 0 Index: llvm/test/Transforms/EarlyCSE/basic.ll =================================================================== --- llvm/test/Transforms/EarlyCSE/basic.ll +++ llvm/test/Transforms/EarlyCSE/basic.ll @@ -54,6 +54,26 @@ ; CHECK: ret i32 0 } +; CHECK-LABEL: @test2b( +define i32 @test2b(i32 *%P, i1 %b) { + %V1 = load i32, i32* %P + call i8* @llvm.noalias.p0i8.p0i8.p0p0i8.i32(i8* undef, i8* null, i8** null, i32 0, metadata !1) + %V2 = load i32, i32* %P + %Diff = sub i32 %V1, %V2 + ret i32 %Diff + ; CHECK: ret i32 0 +} + +; CHECK-LABEL: @test2c( +define i32 @test2c(i32 *%P, i1 %b) { + %V1 = load i32, i32* %P + call i8* @llvm.provenance.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i32(i8* undef, i8* null, i8** null, i8** null, i32 0, metadata !1) + %V2 = load i32, i32* %P + %Diff = sub i32 %V1, %V2 + ret i32 %Diff + ; CHECK: ret i32 0 +} + ;; Cross block load value numbering. ; CHECK-LABEL: @test3( define i32 @test3(i32 *%P, i1 %Cond) { @@ -134,6 +154,24 @@ ; CHECK: ret i32 42 } +; CHECK-LABEL: @test6b( +define i32 @test6b(i32 *%P, i1 %b) { + store i32 42, i32* %P + call i8* @llvm.noalias.p0i8.p0i8.p0p0i8.i32(i8* undef, i8* null, i8** null, i32 0, metadata !1) + %V1 = load i32, i32* %P + ret i32 %V1 + ; CHECK: ret i32 42 +} + +; CHECK-LABEL: @test6c( +define i32 @test6c(i32 *%P, i1 %b) { + store i32 42, i32* %P + call i8* @llvm.provenance.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i32(i8* undef, i8* null, i8** null, i8** null, i32 0, metadata !1) + %V1 = load i32, i32* %P + ret i32 %V1 + ; CHECK: ret i32 42 +} + ;; Trivial dead store elimination. ; CHECK-LABEL: @test7( define void @test7(i32 *%P) { @@ -302,3 +340,9 @@ %and = and i1 %b, %c ret i1 %and } + +declare i8* @llvm.noalias.p0i8.p0i8.p0p0i8.i32(i8*, i8*, i8**, i32, metadata ) nounwind +declare i8* @llvm.provenance.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i32(i8*, i8*, i8**, i8**, i32, metadata ) nounwind + +!0 = !{!0, !"some domain"} +!1 = !{!1, !0, !"some scope"} Index: llvm/test/Transforms/FunctionAttrs/nocapture.ll =================================================================== --- llvm/test/Transforms/FunctionAttrs/nocapture.ll +++ llvm/test/Transforms/FunctionAttrs/nocapture.ll @@ -106,6 +106,40 @@ ret i32 %val } +; FNATTR: define i32 @nc1a(i32* %q, i32* nocapture %p, i1 %b) +define i32 @nc1a(i32* %q, i32* %p, i1 %b) { +e: + %pa = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %p, i8* null, i32** null, i32 0, metadata !1) + br label %l +l: + %x = phi i32* [ %pa, %e ] + %y = phi i32* [ %q, %e ] + %tmp = bitcast i32* %x to i32* ; [#uses=2] + %tmp2 = select i1 %b, i32* %tmp, i32* %y + %val = load i32, i32* %tmp2 ; [#uses=1] + store i32 0, i32* %tmp + store i32* %y, i32** @g + ret i32 %val +} + +; FNATTR: define i32 @nc1b(i32* %q, i32* nocapture %p, i1 %b) +define i32 @nc1b(i32* %q, i32* %p, i1 %b) { +e: + %prov.p = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %p, i8* null, i32** null, i32** null, i32 0, metadata !1) + br label %l +l: + %x = phi i32* [ %p, %e ] + %prov.x = phi i32* [ %prov.p, %e ] + %y = phi i32* [ %q, %e ] + %tmp = bitcast i32* %x to i32* ; [#uses=2] + %tmp2 = select i1 %b, i32* %tmp, i32* %y + %prov.tmp2 = select i1 %b, i32* %prov.x, i32* %y + %val = load i32, i32* %tmp2 ; [#uses=1] + store i32 0, i32* %tmp, ptr_provenance i32* %prov.tmp2 + store i32* %y, i32** @g + ret i32 %val +} + ; FNATTR: define i32 @nc1_addrspace(i32* %q, i32 addrspace(1)* nocapture %p, i1 %b) define i32 @nc1_addrspace(i32* %q, i32 addrspace(1)* %p, i1 %b) { e: @@ -319,3 +353,9 @@ declare i8* @llvm.launder.invariant.group.p0i8(i8*) declare i8* @llvm.strip.invariant.group.p0i8(i8*) + +declare i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32*, i8*, i32**, i32, metadata ) nounwind +declare i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32*, i8*, i32**, i32**, i32, metadata ) nounwind + +!0 = !{!0, !"some domain"} +!1 = !{!1, !0, !"some scope"} Index: llvm/test/Transforms/FunctionAttrs/nonnull.ll =================================================================== --- llvm/test/Transforms/FunctionAttrs/nonnull.ll +++ llvm/test/Transforms/FunctionAttrs/nonnull.ll @@ -770,5 +770,15 @@ br i1 %11, label %7, label %8 } +; Return a pointer trivially nonnull (argument attribute) through a llvm.noalias.arg.guard +; FNATTR: define nonnull i8* @test_noalias_arg_guard +define i8* @test_noalias_arg_guard(i8* nonnull %p) { + %ret = tail call i8* @llvm.noalias.arg.guard.p0i8.p0i8(i8* %p, i8* %p) + ret i8* %p +} + +; Function Attrs: nounwind readnone +declare i8* @llvm.noalias.arg.guard.p0i8.p0i8(i8*, i8*) nounwind readnone + attributes #0 = { null_pointer_is_valid } attributes #1 = { nounwind willreturn} Index: llvm/test/Transforms/Inline/launder.invariant.group.ll =================================================================== --- llvm/test/Transforms/Inline/launder.invariant.group.ll +++ llvm/test/Transforms/Inline/launder.invariant.group.ll @@ -1,6 +1,10 @@ -; RUN: opt -S -inline < %s | FileCheck %s -; RUN: opt -S -O3 < %s | FileCheck %s -; RUN: opt -S -inline -inline-threshold=1 < %s | FileCheck %s +; RUN: opt -S -inline --use-noalias-intrinsic-during-inlining=0 < %s | FileCheck %s --check-prefixes=CHECK,CHECK_SCOPED +; RUN: opt -S -O3 --use-noalias-intrinsic-during-inlining=0 < %s | FileCheck %s --check-prefixes=CHECK,CHECK_SCOPED,CHECK_OPT +; RUN: opt -S -inline -inline-threshold=1 --use-noalias-intrinsic-during-inlining=0 < %s | FileCheck %s --check-prefixes=CHECK,CHECK_SCOPED + +; RUN: opt -S -inline --use-noalias-intrinsic-during-inlining=1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK_NOALIAS +; RUN: opt -S -O3 --use-noalias-intrinsic-during-inlining=1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK_PROVENANCE,CHECK_OPT +; RUN: opt -S -inline -inline-threshold=1 --use-noalias-intrinsic-during-inlining=1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK_NOALIAS %struct.A = type <{ i32 (...)**, i32, [4 x i8] }> @@ -9,7 +13,7 @@ ; sometimes it would be considered noalias. ; CHECK-LABEL: define i32 @bar(%struct.A* noalias define i32 @bar(%struct.A* noalias) { -; CHECK-NOT: noalias +; CHECK_SCOPED-NOT: noalias %2 = bitcast %struct.A* %0 to i8* %3 = call i8* @llvm.launder.invariant.group.p0i8(i8* %2) %4 = getelementptr inbounds i8, i8* %3, i64 8 @@ -22,10 +26,24 @@ ; CHECK-LABEL: define i32 @foo(%struct.A* noalias define i32 @foo(%struct.A* noalias) { - ; CHECK-NOT: call i32 @bar( - ; CHECK-NOT: noalias + ; CHECK_SCOPED-NOT: call i32 @bar( + ; CHECK_SCOPED-NOT: noalias + + ; CHECK_NOALIAS-NOT: call i32 @bar( + ; CHECK_NOALIAS: @llvm.noalias.decl.p0 + ; CHECK_NOALIAS-NEXT: @llvm.noalias.p0 + ; CHECK_NOALIAS-NOT: call i32 @bar( + + ; CHECK_PROVENANCE-NOT: call i32 @bar( + ; CHECK_PROVENANCE: @llvm.noalias.decl.p0 + ; CHECK_PROVENANCE-NEXT: @llvm.provenance.noalias.p0 + ; CHECK_PROVENANCE-NOT: call i32 @bar( + ; CHECK_PROVENANCE: @llvm.noalias.arg.guard.p0 + ; CHECK_PROVENANCE-NOT: call i32 @bar( %2 = tail call i32 @bar(%struct.A* %0) ret i32 %2 + + ; CHECK_OPT: ret i32 42 } Index: llvm/test/Transforms/Inline/noalias-calls.ll =================================================================== --- llvm/test/Transforms/Inline/noalias-calls.ll +++ llvm/test/Transforms/Inline/noalias-calls.ll @@ -1,9 +1,10 @@ -; RUN: opt -basic-aa -inline -enable-noalias-to-md-conversion -S < %s | FileCheck %s +; RUN: opt -basic-aa -inline -enable-noalias-to-md-conversion -use-noalias-intrinsic-during-inlining=0 -S < %s | FileCheck %s -check-prefix=MD-SCOPE +; RUN: opt -basic-aa -inline -enable-noalias-to-md-conversion -use-noalias-intrinsic-during-inlining=1 -S < %s | FileCheck %s -check-prefix=INTR-SCOPE target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) #0 -declare void @hey() #0 +declare void @hey() #1 define void @hello(i8* noalias nocapture %a, i8* noalias nocapture readonly %c, i8* nocapture %b) #1 { entry: @@ -16,12 +17,39 @@ ret void } -define void @foo(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #2 { +define void @foo(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #1 { entry: tail call void @hello(i8* %a, i8* %c, i8* %b) ret void } +; MD-SCOPE: define void @foo(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #1 { +; MD-SCOPE: entry: +; MD-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %a, i8* align 16 %b, i64 16, i1 false) #1, !noalias !0 +; MD-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %b, i8* align 16 %c, i64 16, i1 false) #1, !noalias !3 +; MD-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %a, i8* align 16 %c, i64 16, i1 false) #1, !alias.scope !5 +; MD-SCOPE: call void @hey() #1, !noalias !5 +; MD-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %{{.*}}, i8* align 16 %c, i64 16, i1 false) #1, !noalias !3 +; MD-SCOPE: ret void +; MD-SCOPE: } + +; INTR-SCOPE: define void @foo(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #1 { +; INTR-SCOPE: entry: +; INTR-SCOPE: %l.i = alloca i8, i32 512, align 1 +; INTR-SCOPE: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i8.i64(i8** null, i64 0, metadata !0) +; INTR-SCOPE: %1 = call i8* @llvm.noalias.p0i8.p0i8.p0p0i8.i64(i8* %a, i8* %0, i8** null, i64 0, metadata !0) +; INTR-SCOPE: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i8.i64(i8** null, i64 0, metadata !3) +; INTR-SCOPE: %3 = call i8* @llvm.noalias.p0i8.p0i8.p0p0i8.i64(i8* %c, i8* %2, i8** null, i64 0, metadata !3) +; INTR-SCOPE: call void @llvm.lifetime.start.p0i8(i64 512, i8* %l.i) +; INTR-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %1, i8* align 16 %b, i64 16, i1 false) #1, !noalias !5 +; INTR-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %b, i8* align 16 %3, i64 16, i1 false) #1, !noalias !5 +; INTR-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %1, i8* align 16 %3, i64 16, i1 false) #1, !noalias !5 +; INTR-SCOPE: call void @hey() #1, !noalias !5 +; INTR-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %l.i, i8* align 16 %3, i64 16, i1 false) #1, !noalias !5 +; INTR-SCOPE: call void @llvm.lifetime.end.p0i8(i64 512, i8* %l.i) +; INTR-SCOPE: ret void +; INTR-SCOPE: } + define void @hello_cs(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #1 { entry: %l = alloca i8, i32 512, align 1 @@ -39,41 +67,46 @@ ret void } -; CHECK: define void @foo(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #2 { -; CHECK: entry: -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %a, i8* align 16 %b, i64 16, i1 false) #1, !noalias !0 -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %b, i8* align 16 %c, i64 16, i1 false) #1, !noalias !3 -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %a, i8* align 16 %c, i64 16, i1 false) #1, !alias.scope !5 -; CHECK: call void @hey() #1, !noalias !5 -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %{{.*}}, i8* align 16 %c, i64 16, i1 false) #1, !noalias !3 -; CHECK: ret void -; CHECK: } +; MD-SCOPE: define void @foo_cs(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) { +; MD-SCOPE: entry: +; MD-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %a, i8* align 16 %b, i64 16, i1 false) #1, !noalias !6 +; MD-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %b, i8* align 16 %c, i64 16, i1 false) #1, !noalias !9 +; MD-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %a, i8* align 16 %c, i64 16, i1 false) #1, !alias.scope !11 +; MD-SCOPE: call void @hey() #1, !noalias !11 +; MD-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %l.i, i8* align 16 %c, i64 16, i1 false) #1, !noalias !9 +; MD-SCOPE: ret void +; MD-SCOPE: } -; CHECK: define void @foo_cs(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #2 { -; CHECK: entry: -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %a, i8* align 16 %b, i64 16, i1 false) #1, !noalias !6 -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %b, i8* align 16 %c, i64 16, i1 false) #1, !noalias !9 -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %a, i8* align 16 %c, i64 16, i1 false) #1, !alias.scope !11 -; CHECK: call void @hey() #1, !noalias !11 -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %{{.*}}, i8* align 16 %c, i64 16, i1 false) #1, !noalias !9 -; CHECK: ret void -; CHECK: } +; INTR-SCOPE: define void @foo_cs(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) { +; INTR-SCOPE: entry: +; INTR-SCOPE: %l.i = alloca i8, i32 512, align 1 +; INTR-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %a, i8* align 16 %b, i64 16, i1 false) #1 +; INTR-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %b, i8* align 16 %c, i64 16, i1 false) #1 +; INTR-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %a, i8* align 16 %c, i64 16, i1 false) #1 +; INTR-SCOPE: call void @hey() #1 +; INTR-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %l.i, i8* align 16 %c, i64 16, i1 false) #1 +; INTR-SCOPE: ret void +; INTR-SCOPE: } attributes #0 = { nounwind argmemonly willreturn } attributes #1 = { nounwind } -attributes #2 = { nounwind uwtable } - -; CHECK: !0 = !{!1} -; CHECK: !1 = distinct !{!1, !2, !"hello: %c"} -; CHECK: !2 = distinct !{!2, !"hello"} -; CHECK: !3 = !{!4} -; CHECK: !4 = distinct !{!4, !2, !"hello: %a"} -; CHECK: !5 = !{!4, !1} -; CHECK: !6 = !{!7} -; CHECK: !7 = distinct !{!7, !8, !"hello_cs: %c"} -; CHECK: !8 = distinct !{!8, !"hello_cs"} -; CHECK: !9 = !{!10} -; CHECK: !10 = distinct !{!10, !8, !"hello_cs: %a"} -; CHECK: !11 = !{!10, !7} +; MD-SCOPE:!0 = !{!1} +; MD-SCOPE:!1 = distinct !{!1, !2, !"hello: %c"} +; MD-SCOPE:!2 = distinct !{!2, !"hello"} +; MD-SCOPE:!3 = !{!4} +; MD-SCOPE:!4 = distinct !{!4, !2, !"hello: %a"} +; MD-SCOPE:!5 = !{!4, !1} +; MD-SCOPE:!6 = !{!7} +; MD-SCOPE:!7 = distinct !{!7, !8, !"hello_cs: %c"} +; MD-SCOPE:!8 = distinct !{!8, !"hello_cs"} +; MD-SCOPE:!9 = !{!10} +; MD-SCOPE:!10 = distinct !{!10, !8, !"hello_cs: %a"} +; MD-SCOPE:!11 = !{!10, !7} +; INTR-SCOPE: !0 = !{!1} +; INTR-SCOPE: !1 = distinct !{!1, !2, !"hello: %a"} +; INTR-SCOPE: !2 = distinct !{!2, !"hello"} +; INTR-SCOPE: !3 = !{!4} +; INTR-SCOPE: !4 = distinct !{!4, !2, !"hello: %c"} +; INTR-SCOPE: !5 = !{!1, !4} Index: llvm/test/Transforms/Inline/noalias-scopes.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Inline/noalias-scopes.ll @@ -0,0 +1,204 @@ +; RUN: opt -inline -enable-noalias-to-md-conversion -use-noalias-intrinsic-during-inlining=1 -S < %s | FileCheck %s -check-prefix=INTR-SCOPE +; verify that inlining result in scope duplication +; verify that llvm.noalias.decl is introduced at the location of the inlining + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nofree norecurse nounwind +define dso_local void @copy_npnp(i32* noalias nocapture %dst, i32* noalias nocapture readonly %src) local_unnamed_addr #0 { +entry: + %0 = load i32, i32* %src, ptr_provenance i32* undef, align 4, !tbaa !2 + store i32 %0, i32* %dst, ptr_provenance i32* undef, align 4, !tbaa !2 + ret void +} + +; Function Attrs: nounwind +define dso_local void @copy_rprp(i32* nocapture %dst, i32* nocapture readonly %src) local_unnamed_addr #1 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !6) + %1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !9) + %2 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %src, i8* %1, i32** null, i32** undef, i64 0, metadata !9), !tbaa !11, !noalias !13 + %3 = load i32, i32* %src, ptr_provenance i32* %2, align 4, !tbaa !2, !noalias !13 + %4 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %dst, i8* %0, i32** null, i32** undef, i64 0, metadata !6), !tbaa !11, !noalias !13 + store i32 %3, i32* %dst, ptr_provenance i32* %4, align 4, !tbaa !2, !noalias !13 + ret void +} + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32**, i64, metadata) #2 + +; Function Attrs: nofree norecurse nounwind +define dso_local void @test_npnp(i32* nocapture %dst, i32* nocapture readonly %src, i32 %n) local_unnamed_addr #0 { +entry: + tail call void @copy_npnp(i32* %dst, i32* %src) + br label %do.body + +do.body: ; preds = %do.body, %entry + %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.body ] + tail call void @copy_npnp(i32* %dst, i32* %src) + tail call void @copy_npnp(i32* %dst, i32* %src) + %dec = add nsw i32 %n.addr.0, -1 + %tobool = icmp eq i32 %n.addr.0, 0 + br i1 %tobool, label %do.end, label %do.body + +do.end: ; preds = %do.body + ret void +} + +; INTR-SCOPE: define dso_local void @test_npnp(i32* nocapture %dst, i32* nocapture readonly %src, i32 %n) local_unnamed_addr #0 { +; INTR-SCOPE: entry: +; INTR-SCOPE: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !14) +; INTR-SCOPE: %1 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %dst, i8* %0, i32** null, i64 0, metadata !14) +; INTR-SCOPE: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !17) +; INTR-SCOPE: %3 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %src, i8* %2, i32** null, i64 0, metadata !17) +; INTR-SCOPE: %4 = load i32, i32* %3, ptr_provenance i32* undef, align 4, !tbaa !2, !noalias !19 +; INTR-SCOPE: store i32 %4, i32* %1, ptr_provenance i32* undef, align 4, !tbaa !2, !noalias !19 +; INTR-SCOPE: br label %do.body +; INTR-SCOPE: do.body: ; preds = %do.body, %entry +; INTR-SCOPE: %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.body ] +; INTR-SCOPE: %5 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !20) +; INTR-SCOPE: %6 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %dst, i8* %5, i32** null, i64 0, metadata !20) +; INTR-SCOPE: %7 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !23) +; INTR-SCOPE: %8 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %src, i8* %7, i32** null, i64 0, metadata !23) +; INTR-SCOPE: %9 = load i32, i32* %8, ptr_provenance i32* undef, align 4, !tbaa !2, !noalias !25 +; INTR-SCOPE: store i32 %9, i32* %6, ptr_provenance i32* undef, align 4, !tbaa !2, !noalias !25 +; INTR-SCOPE: %10 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !26) +; INTR-SCOPE: %11 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %dst, i8* %10, i32** null, i64 0, metadata !26) +; INTR-SCOPE: %12 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !29) +; INTR-SCOPE: %13 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %src, i8* %12, i32** null, i64 0, metadata !29) +; INTR-SCOPE: %14 = load i32, i32* %13, ptr_provenance i32* undef, align 4, !tbaa !2, !noalias !31 +; INTR-SCOPE: store i32 %14, i32* %11, ptr_provenance i32* undef, align 4, !tbaa !2, !noalias !31 +; INTR-SCOPE: %dec = add nsw i32 %n.addr.0, -1 +; INTR-SCOPE: %tobool = icmp eq i32 %n.addr.0, 0 +; INTR-SCOPE: br i1 %tobool, label %do.end, label %do.body +; INTR-SCOPE: do.end: ; preds = %do.body +; INTR-SCOPE: ret void +; INTR-SCOPE: } + + +; Function Attrs: nounwind +define dso_local void @test_rprp(i32* nocapture %dst, i32* nocapture readonly %src, i32 %n) local_unnamed_addr #1 { +entry: + tail call void @copy_rprp(i32* %dst, i32* %src) + br label %do.body + +do.body: ; preds = %do.body, %entry + %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.body ] + tail call void @copy_rprp(i32* %dst, i32* %src) + tail call void @copy_rprp(i32* %dst, i32* %src) + %dec = add nsw i32 %n.addr.0, -1 + %tobool = icmp eq i32 %n.addr.0, 0 + br i1 %tobool, label %do.end, label %do.body + +do.end: ; preds = %do.body + ret void +} + +; INTR-SCOPE: define dso_local void @test_rprp(i32* nocapture %dst, i32* nocapture readonly %src, i32 %n) local_unnamed_addr #1 { +; INTR-SCOPE: entry: +; INTR-SCOPE: %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !32) #5 +; INTR-SCOPE: %1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !35) #5 +; INTR-SCOPE: %2 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %src, i8* %1, i32** null, i32** undef, i64 0, metadata !35) #5, !tbaa !11, !noalias !37 +; INTR-SCOPE: %3 = load i32, i32* %src, ptr_provenance i32* %2, align 4, !tbaa !2, !noalias !37 +; INTR-SCOPE: %4 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %dst, i8* %0, i32** null, i32** undef, i64 0, metadata !32) #5, !tbaa !11, !noalias !37 +; INTR-SCOPE: store i32 %3, i32* %dst, ptr_provenance i32* %4, align 4, !tbaa !2, !noalias !37 +; INTR-SCOPE: br label %do.body +; INTR-SCOPE: do.body: ; preds = %do.body, %entry +; INTR-SCOPE: %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.body ] +; INTR-SCOPE: %5 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !38) #5 +; INTR-SCOPE: %6 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !41) #5 +; INTR-SCOPE: %7 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %src, i8* %6, i32** null, i32** undef, i64 0, metadata !41) #5, !tbaa !11, !noalias !43 +; INTR-SCOPE: %8 = load i32, i32* %src, ptr_provenance i32* %7, align 4, !tbaa !2, !noalias !43 +; INTR-SCOPE: %9 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %dst, i8* %5, i32** null, i32** undef, i64 0, metadata !38) #5, !tbaa !11, !noalias !43 +; INTR-SCOPE: store i32 %8, i32* %dst, ptr_provenance i32* %9, align 4, !tbaa !2, !noalias !43 +; INTR-SCOPE: %10 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !44) #5 +; INTR-SCOPE: %11 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !47) #5 +; INTR-SCOPE: %12 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %src, i8* %11, i32** null, i32** undef, i64 0, metadata !47) #5, !tbaa !11, !noalias !49 +; INTR-SCOPE: %13 = load i32, i32* %src, ptr_provenance i32* %12, align 4, !tbaa !2, !noalias !49 +; INTR-SCOPE: %14 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %dst, i8* %10, i32** null, i32** undef, i64 0, metadata !44) #5, !tbaa !11, !noalias !49 +; INTR-SCOPE: store i32 %13, i32* %dst, ptr_provenance i32* %14, align 4, !tbaa !2, !noalias !49 +; INTR-SCOPE: %dec = add nsw i32 %n.addr.0, -1 +; INTR-SCOPE: %tobool = icmp eq i32 %n.addr.0, 0 +; INTR-SCOPE: br i1 %tobool, label %do.end, label %do.body +; INTR-SCOPE: do.end: ; preds = %do.body +; INTR-SCOPE: ret void +; INTR-SCOPE: } + + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32*, i8*, i32**, i32**, i64, metadata) #3 + +attributes #0 = { nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { argmemonly nounwind } +attributes #3 = { nounwind readnone speculatable } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3, !3, i64 0, i64 4} +!3 = !{!4, i64 4, !"int"} +!4 = !{!5, i64 1, !"omnipotent char"} +!5 = !{!"Simple C/C++ TBAA"} +!6 = !{!7} +!7 = distinct !{!7, !8, !"copy_rprp: rdst"} +!8 = distinct !{!8, !"copy_rprp"} +!9 = !{!10} +!10 = distinct !{!10, !8, !"copy_rprp: rsrc"} +!11 = !{!12, !12, i64 0, i64 4} +!12 = !{!4, i64 4, !"any pointer"} +!13 = !{!7, !10} + +; INTR-SCOPE: !0 = !{i32 1, !"wchar_size", i32 4} +; INTR-SCOPE: !1 = !{!"clang"} +; INTR-SCOPE: !2 = !{!3, !3, i64 0, i64 4} +; INTR-SCOPE: !3 = !{!4, i64 4, !"int"} +; INTR-SCOPE: !4 = !{!5, i64 1, !"omnipotent char"} +; INTR-SCOPE: !5 = !{!"Simple C/C++ TBAA"} +; INTR-SCOPE: !6 = !{!7} +; INTR-SCOPE: !7 = distinct !{!7, !8, !"copy_rprp: rdst"} +; INTR-SCOPE: !8 = distinct !{!8, !"copy_rprp"} +; INTR-SCOPE: !9 = !{!10} +; INTR-SCOPE: !10 = distinct !{!10, !8, !"copy_rprp: rsrc"} +; INTR-SCOPE: !11 = !{!12, !12, i64 0, i64 4} +; INTR-SCOPE: !12 = !{!4, i64 4, !"any pointer"} +; INTR-SCOPE: !13 = !{!7, !10} +; INTR-SCOPE: !14 = !{!15} +; INTR-SCOPE: !15 = distinct !{!15, !16, !"copy_npnp: %dst"} +; INTR-SCOPE: !16 = distinct !{!16, !"copy_npnp"} +; INTR-SCOPE: !17 = !{!18} +; INTR-SCOPE: !18 = distinct !{!18, !16, !"copy_npnp: %src"} +; INTR-SCOPE: !19 = !{!15, !18} +; INTR-SCOPE: !20 = !{!21} +; INTR-SCOPE: !21 = distinct !{!21, !22, !"copy_npnp: %dst"} +; INTR-SCOPE: !22 = distinct !{!22, !"copy_npnp"} +; INTR-SCOPE: !23 = !{!24} +; INTR-SCOPE: !24 = distinct !{!24, !22, !"copy_npnp: %src"} +; INTR-SCOPE: !25 = !{!21, !24} +; INTR-SCOPE: !26 = !{!27} +; INTR-SCOPE: !27 = distinct !{!27, !28, !"copy_npnp: %dst"} +; INTR-SCOPE: !28 = distinct !{!28, !"copy_npnp"} +; INTR-SCOPE: !29 = !{!30} +; INTR-SCOPE: !30 = distinct !{!30, !28, !"copy_npnp: %src"} +; INTR-SCOPE: !31 = !{!27, !30} +; INTR-SCOPE: !32 = !{!33} +; INTR-SCOPE: !33 = distinct !{!33, !34, !"copy_rprp: rdst"} +; INTR-SCOPE: !34 = distinct !{!34, !"copy_rprp"} +; INTR-SCOPE: !35 = !{!36} +; INTR-SCOPE: !36 = distinct !{!36, !34, !"copy_rprp: rsrc"} +; INTR-SCOPE: !37 = !{!33, !36} +; INTR-SCOPE: !38 = !{!39} +; INTR-SCOPE: !39 = distinct !{!39, !40, !"copy_rprp: rdst"} +; INTR-SCOPE: !40 = distinct !{!40, !"copy_rprp"} +; INTR-SCOPE: !41 = !{!42} +; INTR-SCOPE: !42 = distinct !{!42, !40, !"copy_rprp: rsrc"} +; INTR-SCOPE: !43 = !{!39, !42} +; INTR-SCOPE: !44 = !{!45} +; INTR-SCOPE: !45 = distinct !{!45, !46, !"copy_rprp: rdst"} +; INTR-SCOPE: !46 = distinct !{!46, !"copy_rprp"} +; INTR-SCOPE: !47 = !{!48} +; INTR-SCOPE: !48 = distinct !{!48, !46, !"copy_rprp: rsrc"} +; INTR-SCOPE: !49 = !{!45, !48} Index: llvm/test/Transforms/Inline/noalias.ll =================================================================== --- llvm/test/Transforms/Inline/noalias.ll +++ llvm/test/Transforms/Inline/noalias.ll @@ -1,4 +1,5 @@ -; RUN: opt -inline -enable-noalias-to-md-conversion -S < %s | FileCheck %s +; RUN: opt -inline -enable-noalias-to-md-conversion -use-noalias-intrinsic-during-inlining=0 -S < %s | FileCheck %s -check-prefix=MD-SCOPE +; RUN: opt -inline -enable-noalias-to-md-conversion -use-noalias-intrinsic-during-inlining=1 -S < %s | FileCheck %s -check-prefix=INTR-SCOPE target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -19,16 +20,29 @@ ret void } -; CHECK: define void @foo(float* nocapture %a, float* nocapture readonly %c) #0 { -; CHECK: entry: -; CHECK: %0 = load float, float* %c, align 4, !noalias !0 -; CHECK: %arrayidx.i = getelementptr inbounds float, float* %a, i64 5 -; CHECK: store float %0, float* %arrayidx.i, align 4, !alias.scope !0 -; CHECK: %1 = load float, float* %c, align 4 -; CHECK: %arrayidx = getelementptr inbounds float, float* %a, i64 7 -; CHECK: store float %1, float* %arrayidx, align 4 -; CHECK: ret void -; CHECK: } +; MD-SCOPE: define void @foo(float* nocapture %a, float* nocapture readonly %c) #0 { +; MD-SCOPE: entry: +; MD-SCOPE: %0 = load float, float* %c, align 4, !noalias !0 +; MD-SCOPE: %arrayidx.i = getelementptr inbounds float, float* %a, i64 5 +; MD-SCOPE: store float %0, float* %arrayidx.i, align 4, !alias.scope !0 +; MD-SCOPE: %1 = load float, float* %c, align 4 +; MD-SCOPE: %arrayidx = getelementptr inbounds float, float* %a, i64 7 +; MD-SCOPE: store float %1, float* %arrayidx, align 4 +; MD-SCOPE: ret void +; MD-SCOPE: } + +; INTR-SCOPE: define void @foo(float* nocapture %a, float* nocapture readonly %c) #0 { +; INTR-SCOPE: entry: +; INTR-SCOPE: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !0) +; INTR-SCOPE: %1 = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* %a, i8* %0, float** null, i64 0, metadata !0) +; INTR-SCOPE: %2 = load float, float* %c, align 4, !noalias !0 +; INTR-SCOPE: %arrayidx.i = getelementptr inbounds float, float* %1, i64 5 +; INTR-SCOPE: store float %2, float* %arrayidx.i, align 4, !noalias !0 +; INTR-SCOPE: %3 = load float, float* %c, align 4 +; INTR-SCOPE: %arrayidx = getelementptr inbounds float, float* %a, i64 7 +; INTR-SCOPE: store float %3, float* %arrayidx, align 4 +; INTR-SCOPE: ret void +; INTR-SCOPE: } define void @hello2(float* noalias nocapture %a, float* noalias nocapture %b, float* nocapture readonly %c) #0 { entry: @@ -49,28 +63,55 @@ ret void } -; CHECK: define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 { -; CHECK: entry: -; CHECK: %0 = load float, float* %c, align 4, !noalias !3 -; CHECK: %arrayidx.i = getelementptr inbounds float, float* %a, i64 5 -; CHECK: store float %0, float* %arrayidx.i, align 4, !alias.scope !7, !noalias !8 -; CHECK: %arrayidx1.i = getelementptr inbounds float, float* %b, i64 8 -; CHECK: store float %0, float* %arrayidx1.i, align 4, !alias.scope !8, !noalias !7 -; CHECK: %1 = load float, float* %c, align 4 -; CHECK: %arrayidx = getelementptr inbounds float, float* %a, i64 7 -; CHECK: store float %1, float* %arrayidx, align 4 -; CHECK: ret void -; CHECK: } +; MD-SCOPE: define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 { +; MD-SCOPE: entry: +; MD-SCOPE: %0 = load float, float* %c, align 4, !noalias !3 +; MD-SCOPE: %arrayidx.i = getelementptr inbounds float, float* %a, i64 5 +; MD-SCOPE: store float %0, float* %arrayidx.i, align 4, !alias.scope !7, !noalias !8 +; MD-SCOPE: %arrayidx1.i = getelementptr inbounds float, float* %b, i64 8 +; MD-SCOPE: store float %0, float* %arrayidx1.i, align 4, !alias.scope !8, !noalias !7 +; MD-SCOPE: %1 = load float, float* %c, align 4 +; MD-SCOPE: %arrayidx = getelementptr inbounds float, float* %a, i64 7 +; MD-SCOPE: store float %1, float* %arrayidx, align 4 +; MD-SCOPE: ret void +; MD-SCOPE: } + +; INTR-SCOPE: define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 { +; INTR-SCOPE: entry: +; INTR-SCOPE: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !3) +; INTR-SCOPE: %1 = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* %a, i8* %0, float** null, i64 0, metadata !3) +; INTR-SCOPE: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !6) +; INTR-SCOPE: %3 = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* %b, i8* %2, float** null, i64 0, metadata !6) +; INTR-SCOPE: %4 = load float, float* %c, align 4, !noalias !8 +; INTR-SCOPE: %arrayidx.i = getelementptr inbounds float, float* %1, i64 5 +; INTR-SCOPE: store float %4, float* %arrayidx.i, align 4, !noalias !8 +; INTR-SCOPE: %arrayidx1.i = getelementptr inbounds float, float* %3, i64 8 +; INTR-SCOPE: store float %4, float* %arrayidx1.i, align 4, !noalias !8 +; INTR-SCOPE: %5 = load float, float* %c, align 4 +; INTR-SCOPE: %arrayidx = getelementptr inbounds float, float* %a, i64 7 +; INTR-SCOPE: store float %5, float* %arrayidx, align 4 +; INTR-SCOPE: ret void +; INTR-SCOPE: } attributes #0 = { nounwind uwtable } +attributes #1 = { argmemonly nounwind } -; CHECK: !0 = !{!1} -; CHECK: !1 = distinct !{!1, !2, !"hello: %a"} -; CHECK: !2 = distinct !{!2, !"hello"} -; CHECK: !3 = !{!4, !6} -; CHECK: !4 = distinct !{!4, !5, !"hello2: %a"} -; CHECK: !5 = distinct !{!5, !"hello2"} -; CHECK: !6 = distinct !{!6, !5, !"hello2: %b"} -; CHECK: !7 = !{!4} -; CHECK: !8 = !{!6} +; MD-SCOPE: !0 = !{!1} +; MD-SCOPE: !1 = distinct !{!1, !2, !"hello: %a"} +; MD-SCOPE: !2 = distinct !{!2, !"hello"} +; MD-SCOPE: !3 = !{!4, !6} +; MD-SCOPE: !4 = distinct !{!4, !5, !"hello2: %a"} +; MD-SCOPE: !5 = distinct !{!5, !"hello2"} +; MD-SCOPE: !6 = distinct !{!6, !5, !"hello2: %b"} +; MD-SCOPE: !7 = !{!4} +; MD-SCOPE: !8 = !{!6} +; INTR-SCOPE: !0 = !{!1} +; INTR-SCOPE: !1 = distinct !{!1, !2, !"hello: %a"} +; INTR-SCOPE: !2 = distinct !{!2, !"hello"} +; INTR-SCOPE: !3 = !{!4} +; INTR-SCOPE: !4 = distinct !{!4, !5, !"hello2: %a"} +; INTR-SCOPE: !5 = distinct !{!5, !"hello2"} +; INTR-SCOPE: !6 = !{!7} +; INTR-SCOPE: !7 = distinct !{!7, !5, !"hello2: %b"} +; INTR-SCOPE: !8 = !{!4, !7} Index: llvm/test/Transforms/Inline/noalias2.ll =================================================================== --- llvm/test/Transforms/Inline/noalias2.ll +++ llvm/test/Transforms/Inline/noalias2.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature -; RUN: opt -inline -enable-noalias-to-md-conversion -S < %s | FileCheck %s --check-prefixes=CHECK,NO_ASSUME -; RUN: opt -inline -enable-noalias-to-md-conversion --enable-knowledge-retention -S < %s | FileCheck %s --check-prefixes=CHECK,USE_ASSUME +; RUN: opt -inline -enable-noalias-to-md-conversion -use-noalias-intrinsic-during-inlining=0 -S < %s | FileCheck %s --check-prefixes=CHECK,NO_ASSUME +; RUN: opt -inline -enable-noalias-to-md-conversion -use-noalias-intrinsic-during-inlining=0 --enable-knowledge-retention -S < %s | FileCheck %s --check-prefixes=CHECK,USE_ASSUME +; RUN: opt -inline -enable-noalias-to-md-conversion -use-noalias-intrinsic-during-inlining=1 -S < %s | FileCheck %s -check-prefixes=INTR-SCOPE +; RUN: opt -inline -enable-noalias-to-md-conversion -use-noalias-intrinsic-during-inlining=1 --enable-knowledge-retention -S < %s | FileCheck %s --check-prefixes=INTR-SCOPE,INTR-SCOPE-USE_ASSUME target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -14,12 +16,14 @@ ; CHECK-NEXT: store float [[TMP0]], float* [[ARRAYIDX]], align 4 ; CHECK-NEXT: ret void ; -; ASSUME-LABEL: @hello( -; ASSUME-NEXT: entry: -; ASSUME-NEXT: [[TMP0:%.*]] = load float, float* [[C:%.*]], align 4 -; ASSUME-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 5 -; ASSUME-NEXT: store float [[TMP0]], float* [[ARRAYIDX]], align 4 -; ASSUME-NEXT: ret void +; INTR-SCOPE-LABEL: define {{[^@]+}}@hello +; INTR-SCOPE-SAME: (float* noalias nocapture [[A:%.*]], float* noalias nocapture readonly [[C:%.*]]) #0 +; INTR-SCOPE-NEXT: entry: +; INTR-SCOPE-NEXT: [[TMP0:%.*]] = load float, float* [[C]], align 4 +; INTR-SCOPE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 5 +; INTR-SCOPE-NEXT: store float [[TMP0]], float* [[ARRAYIDX]], align 4 +; INTR-SCOPE-NEXT: ret void +; entry: %0 = load float, float* %c, align 4 %arrayidx = getelementptr inbounds float, float* %a, i64 5 @@ -39,16 +43,21 @@ ; CHECK-NEXT: store float [[TMP1]], float* [[ARRAYIDX]], align 4 ; CHECK-NEXT: ret void ; -; ASSUME-LABEL: @foo( -; ASSUME-NEXT: entry: -; ASSUME-NEXT: call void @llvm.assume(i1 true) [ "noalias"(float* [[A:%.*]]), "noalias"(float* [[C:%.*]]) ] -; ASSUME-NEXT: [[TMP0:%.*]] = load float, float* [[C]], align 4, !alias.scope !0, !noalias !3 -; ASSUME-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds float, float* [[A]], i64 5 -; ASSUME-NEXT: store float [[TMP0]], float* [[ARRAYIDX_I]], align 4, !alias.scope !3, !noalias !0 -; ASSUME-NEXT: [[TMP1:%.*]] = load float, float* [[C]], align 4 -; ASSUME-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 7 -; ASSUME-NEXT: store float [[TMP1]], float* [[ARRAYIDX]], align 4 -; ASSUME-NEXT: ret void +; INTR-SCOPE-LABEL: define {{[^@]+}}@foo +; INTR-SCOPE-SAME: (float* noalias nocapture [[A:%.*]], float* noalias nocapture readonly [[C:%.*]]) #0 +; INTR-SCOPE-NEXT: entry: +; INTR-SCOPE-NEXT: [[TMP0:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !0) +; INTR-SCOPE-NEXT: [[TMP1:%.*]] = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* [[A]], i8* [[TMP0]], float** null, i64 0, metadata !0) +; INTR-SCOPE-NEXT: [[TMP2:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !3) +; INTR-SCOPE-NEXT: [[TMP3:%.*]] = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* [[C]], i8* [[TMP2]], float** null, i64 0, metadata !3) +; INTR-SCOPE-NEXT: [[TMP4:%.*]] = load float, float* [[TMP3]], align 4, !noalias !5 +; INTR-SCOPE-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 5 +; INTR-SCOPE-NEXT: store float [[TMP4]], float* [[ARRAYIDX_I]], align 4, !noalias !5 +; INTR-SCOPE-NEXT: [[TMP5:%.*]] = load float, float* [[C]], align 4 +; INTR-SCOPE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 7 +; INTR-SCOPE-NEXT: store float [[TMP5]], float* [[ARRAYIDX]], align 4 +; INTR-SCOPE-NEXT: ret void +; entry: tail call void @hello(float* %a, float* %c) %0 = load float, float* %c, align 4 @@ -68,6 +77,16 @@ ; CHECK-NEXT: store float [[TMP0]], float* [[ARRAYIDX1]], align 4 ; CHECK-NEXT: ret void ; +; INTR-SCOPE-LABEL: define {{[^@]+}}@hello2 +; INTR-SCOPE-SAME: (float* noalias nocapture [[A:%.*]], float* noalias nocapture [[B:%.*]], float* nocapture readonly [[C:%.*]]) #0 +; INTR-SCOPE-NEXT: entry: +; INTR-SCOPE-NEXT: [[TMP0:%.*]] = load float, float* [[C]], align 4 +; INTR-SCOPE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 6 +; INTR-SCOPE-NEXT: store float [[TMP0]], float* [[ARRAYIDX]], align 4 +; INTR-SCOPE-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, float* [[B]], i64 8 +; INTR-SCOPE-NEXT: store float [[TMP0]], float* [[ARRAYIDX1]], align 4 +; INTR-SCOPE-NEXT: ret void +; entry: %0 = load float, float* %c, align 4 %arrayidx = getelementptr inbounds float, float* %a, i64 6 @@ -99,6 +118,37 @@ ; CHECK-NEXT: store float [[TMP3]], float* [[ARRAYIDX]], align 4 ; CHECK-NEXT: ret void ; +; INTR-SCOPE-LABEL: define {{[^@]+}}@foo2 +; INTR-SCOPE-SAME: (float* nocapture [[A:%.*]], float* nocapture [[B:%.*]], float* nocapture readonly [[C:%.*]]) #0 +; INTR-SCOPE-NEXT: entry: +; INTR-SCOPE-NEXT: [[TMP0:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !6) +; INTR-SCOPE-NEXT: [[TMP1:%.*]] = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* [[A]], i8* [[TMP0]], float** null, i64 0, metadata !6) +; INTR-SCOPE-NEXT: [[TMP2:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !9) +; INTR-SCOPE-NEXT: [[TMP3:%.*]] = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* [[C]], i8* [[TMP2]], float** null, i64 0, metadata !9) +; INTR-SCOPE-NEXT: [[TMP4:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !11) #3, !noalias !14 +; INTR-SCOPE-NEXT: [[TMP5:%.*]] = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* [[TMP1]], i8* [[TMP4]], float** null, i64 0, metadata !11) #3, !noalias !14 +; INTR-SCOPE-NEXT: [[TMP6:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !15) #3, !noalias !14 +; INTR-SCOPE-NEXT: [[TMP7:%.*]] = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* [[TMP3]], i8* [[TMP6]], float** null, i64 0, metadata !15) #3, !noalias !14 +; INTR-SCOPE-NEXT: [[TMP8:%.*]] = load float, float* [[TMP7]], align 4, !noalias !17 +; INTR-SCOPE-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds float, float* [[TMP5]], i64 5 +; INTR-SCOPE-NEXT: store float [[TMP8]], float* [[ARRAYIDX_I_I]], align 4, !noalias !17 +; INTR-SCOPE-NEXT: [[TMP9:%.*]] = load float, float* [[TMP3]], align 4, !noalias !14 +; INTR-SCOPE-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds float, float* [[TMP1]], i64 7 +; INTR-SCOPE-NEXT: store float [[TMP9]], float* [[ARRAYIDX_I]], align 4, !noalias !14 +; INTR-SCOPE-NEXT: [[TMP10:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !18) +; INTR-SCOPE-NEXT: [[TMP11:%.*]] = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* [[A]], i8* [[TMP10]], float** null, i64 0, metadata !18) +; INTR-SCOPE-NEXT: [[TMP12:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !21) +; INTR-SCOPE-NEXT: [[TMP13:%.*]] = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* [[B]], i8* [[TMP12]], float** null, i64 0, metadata !21) +; INTR-SCOPE-NEXT: [[TMP14:%.*]] = load float, float* [[C]], align 4, !noalias !23 +; INTR-SCOPE-NEXT: [[ARRAYIDX_I1:%.*]] = getelementptr inbounds float, float* [[TMP11]], i64 6 +; INTR-SCOPE-NEXT: store float [[TMP14]], float* [[ARRAYIDX_I1]], align 4, !noalias !23 +; INTR-SCOPE-NEXT: [[ARRAYIDX1_I:%.*]] = getelementptr inbounds float, float* [[TMP13]], i64 8 +; INTR-SCOPE-NEXT: store float [[TMP14]], float* [[ARRAYIDX1_I]], align 4, !noalias !23 +; INTR-SCOPE-NEXT: [[TMP15:%.*]] = load float, float* [[C]], align 4 +; INTR-SCOPE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 7 +; INTR-SCOPE-NEXT: store float [[TMP15]], float* [[ARRAYIDX]], align 4 +; INTR-SCOPE-NEXT: ret void +; entry: tail call void @foo(float* %a, float* %c) tail call void @hello2(float* %a, float* %b, float* %c) @@ -108,6 +158,8 @@ ret void } +; FIXME: NO_ASSUME and CHECK produce the same metadata at this moment ? + ; NO_ASSUME: !0 = !{!1} ; NO_ASSUME: !1 = distinct !{!1, !2, !"hello: %c"} ; NO_ASSUME: !2 = distinct !{!2, !"hello"} @@ -130,5 +182,30 @@ ; NO_ASSUME: !19 = !{!16} ; NO_ASSUME: !20 = !{!18} -attributes #0 = { nounwind uwtable } +; INTR-SCOPE: !0 = !{!1} +; INTR-SCOPE: !1 = distinct !{!1, !2, !"hello: %a"} +; INTR-SCOPE: !2 = distinct !{!2, !"hello"} +; INTR-SCOPE: !3 = !{!4} +; INTR-SCOPE: !4 = distinct !{!4, !2, !"hello: %c"} +; INTR-SCOPE: !5 = !{!1, !4} +; INTR-SCOPE: !6 = !{!7} +; INTR-SCOPE: !7 = distinct !{!7, !8, !"foo: %a"} +; INTR-SCOPE: !8 = distinct !{!8, !"foo"} +; INTR-SCOPE: !9 = !{!10} +; INTR-SCOPE: !10 = distinct !{!10, !8, !"foo: %c"} +; INTR-SCOPE: !11 = !{!12} +; INTR-SCOPE: !12 = distinct !{!12, !13, !"hello: %a"} +; INTR-SCOPE: !13 = distinct !{!13, !"hello"} +; INTR-SCOPE: !14 = !{!7, !10} +; INTR-SCOPE: !15 = !{!16} +; INTR-SCOPE: !16 = distinct !{!16, !13, !"hello: %c"} +; INTR-SCOPE: !17 = !{!12, !16, !7, !10} +; INTR-SCOPE: !18 = !{!19} +; INTR-SCOPE: !19 = distinct !{!19, !20, !"hello2: %a"} +; INTR-SCOPE: !20 = distinct !{!20, !"hello2"} +; INTR-SCOPE: !21 = !{!22} +; INTR-SCOPE: !22 = distinct !{!22, !20, !"hello2: %b"} +; INTR-SCOPE: !23 = !{!19, !22} + +attributes #0 = { nounwind uwtable } Index: llvm/test/Transforms/Inline/parallel-loop-md-merge.ll =================================================================== --- llvm/test/Transforms/Inline/parallel-loop-md-merge.ll +++ llvm/test/Transforms/Inline/parallel-loop-md-merge.ll @@ -1,4 +1,5 @@ -; RUN: opt -always-inline -globalopt -S < %s | FileCheck %s +; RUN: opt -always-inline -globalopt --use-noalias-intrinsic-during-inlining=0 -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK_SCOPE +; RUN: opt -always-inline -globalopt --use-noalias-intrinsic-during-inlining=1 -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK_NOALIAS ; ; static void __attribute__((always_inline)) callee(long n, double A[static const restrict n], long i) { ; for (long j = 0; j < n; j += 1) @@ -64,14 +65,21 @@ !11 = distinct !{!11, !12} ; LoopID !12 = !{!"llvm.loop.parallel_accesses", !10} +; There is a small reordering when noalias intrinsics are introduced ; CHECK: store double 4.200000e+01, {{.*}} !llvm.access.group ![[ACCESS_GROUP_LIST_3:[0-9]+]] ; CHECK: br label %for.cond.i, !llvm.loop ![[LOOP_INNER:[0-9]+]] ; CHECK: br label %for.cond, !llvm.loop ![[LOOP_OUTER:[0-9]+]] -; CHECK: ![[ACCESS_GROUP_LIST_3]] = !{![[ACCESS_GROUP_INNER:[0-9]+]], ![[ACCESS_GROUP_OUTER:[0-9]+]]} -; CHECK: ![[ACCESS_GROUP_INNER]] = distinct !{} -; CHECK: ![[ACCESS_GROUP_OUTER]] = distinct !{} + +; CHECK_SCOPE: ![[ACCESS_GROUP_LIST_3]] = !{![[ACCESS_GROUP_INNER:[0-9]+]], ![[ACCESS_GROUP_OUTER:[0-9]+]]} +; CHECK_SCOPE: ![[ACCESS_GROUP_INNER]] = distinct !{} +; CHECK_SCOPE: ![[ACCESS_GROUP_OUTER]] = distinct !{} + +; CHECK_NOALIAS: ![[ACCESS_GROUP_OUTER:[0-9]+]] = distinct !{} +; CHECK_NOALIAS: ![[ACCESS_GROUP_LIST_3]] = !{![[ACCESS_GROUP_INNER:[0-9]+]], ![[ACCESS_GROUP_OUTER]]} +; CHECK_NOALIAS: ![[ACCESS_GROUP_INNER]] = distinct !{} + ; CHECK: ![[LOOP_INNER]] = distinct !{![[LOOP_INNER]], ![[ACCESSES_INNER:[0-9]+]]} ; CHECK: ![[ACCESSES_INNER]] = !{!"llvm.loop.parallel_accesses", ![[ACCESS_GROUP_INNER]]} ; CHECK: ![[LOOP_OUTER]] = distinct !{![[LOOP_OUTER]], ![[ACCESSES_OUTER:[0-9]+]]} Index: llvm/test/Transforms/InstCombine/fold-phi-load-metadata.ll =================================================================== --- llvm/test/Transforms/InstCombine/fold-phi-load-metadata.ll +++ llvm/test/Transforms/InstCombine/fold-phi-load-metadata.ll @@ -40,10 +40,10 @@ ; CHECK: ![[TBAA]] = !{![[TAG1:[0-9]+]], ![[TAG1]], i64 0} ; CHECK: ![[TAG1]] = !{!"int", !{{[0-9]+}}, i64 0} ; CHECK: ![[RANGE]] = !{i32 10, i32 25} -; CHECK: ![[ALIAS_SCOPE]] = !{![[SCOPE0:[0-9]+]], ![[SCOPE2:[0-9]+]], ![[SCOPE1:[0-9]+]]} +; CHECK: ![[ALIAS_SCOPE]] = !{![[SCOPE0:[0-9]+]], ![[SCOPE1:[0-9]+]], ![[SCOPE2:[0-9]+]]} ; CHECK: ![[SCOPE0]] = distinct !{![[SCOPE0]], !{{[0-9]+}}, !"scope0"} -; CHECK: ![[SCOPE2]] = distinct !{![[SCOPE2]], !{{[0-9]+}}, !"scope2"} ; CHECK: ![[SCOPE1]] = distinct !{![[SCOPE1]], !{{[0-9]+}}, !"scope1"} +; CHECK: ![[SCOPE2]] = distinct !{![[SCOPE2]], !{{[0-9]+}}, !"scope2"} ; CHECK: ![[NOALIAS]] = !{![[SCOPE3:[0-9]+]]} ; CHECK: ![[SCOPE3]] = distinct !{![[SCOPE3]], !{{[0-9]+}}, !"scope3"} Index: llvm/test/Transforms/InstCombine/noalias.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InstCombine/noalias.ll @@ -0,0 +1,49 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-p:32:32:32:32:8-s0:32:32-a0:8:8-S32-n32-v128:32:32-P0-p0:32:32:32:32:8-p10:32:32:32:32:8-p20:32:32:32:32:8" + +@__const.f1.i = private unnamed_addr constant { i8*, [4 x i8] } { i8* null, [4 x i8] undef }, align 4 + +declare void @do_something(i8*) + +; Function Attrs: nounwind optsize +define dso_local i64 @test01() local_unnamed_addr #0 { +entry: + %i.sroa.6 = alloca [2 x i8], align 2 + %i.sroa.6.0..sroa_idx6 = getelementptr inbounds [2 x i8], [2 x i8]* %i.sroa.6, i32 0, i32 0 + call void @llvm.lifetime.start.p0i8(i64 2, i8* %i.sroa.6.0..sroa_idx6) + %0 = call i8* @llvm.noalias.decl.p0i8.p0a2i8.i32([2 x i8]* %i.sroa.6, i32 2, metadata !2) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %i.sroa.6.0..sroa_idx6, i8* align 2 getelementptr inbounds (i8, i8* bitcast ({ i8*, [4 x i8] }* @__const.f1.i to i8*), i32 2), i32 2, i1 false) + %i.sroa.6.cast = bitcast [2 x i8]* %i.sroa.6 to i8* + call void @do_something(i8* %i.sroa.6.cast) + call void @llvm.lifetime.end.p0i8(i64 2, i8* %i.sroa.6.0..sroa_idx6) + ret i64 undef +} + +; CHECK-LABEL: @test01( +; CHECK: %i.sroa.6 = alloca i16, align 2 +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0i16.i32(i16* nonnull %i.sroa.6, i32 2, metadata !2) + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 + +; Function Attrs: argmemonly nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1 immarg) #1 + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0a2i8.i32([2 x i8]*, i32, metadata) #1 + +attributes #0 = { nounwind optsize "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3} +!3 = distinct !{!3, !4, !"f1: i"} +!4 = distinct !{!4, !"f1"} Index: llvm/test/Transforms/InstSimplify/noalias.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InstSimplify/noalias.ll @@ -0,0 +1,172 @@ +; RUN: opt -instsimplify -S < %s | FileCheck %s + +define void @test01(i8* %ptr) { + call i8* @llvm.noalias.p0i8.p0i8.p0p0i8.i32(i8* %ptr, i8* null, i8** null, i32 0, metadata !1) + ret void + +; CHECK-LABEL: @test01 +; CHECK-NOT: llvm.noalias.p0i8 +; CHECK: ret void +} + +define i8* @test02() { + %v = call i8* @llvm.noalias.p0i8.p0i8.p0p0i8.i32(i8* null, i8* null, i8** null, i32 0, metadata !1) + ret i8* %v + +; CHECK-LABEL: @test02 +; CHECK: llvm.noalias.p0i8 +; CHECK: ret i8* %v +} + +define i8* @test03() { + %v = call i8* @llvm.noalias.p0i8.p0i8.p0p0i8.i32(i8* undef, i8* null, i8** null, i32 0, metadata !1) + ret i8* %v + +; CHECK-LABEL: @test03 +; CHECK-NOT: llvm.noalias.p0i8 +; CHECK: ret i8* undef +} + +declare i8* @llvm.noalias.p0i8.p0i8.p0p0i8.i32(i8*, i8*, i8**, i32, metadata ) nounwind + +define void @test11(i8* %ptr) { + call i8* @llvm.provenance.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i32(i8* %ptr, i8* null, i8** null, i8** null, i32 0, metadata !1) + ret void + +; CHECK-LABEL: @test11 +; CHECK-NOT: llvm.provenance.noalias.p0i8 +; CHECK: ret void +} + +define i8* @test12() { + %v = call i8* @llvm.provenance.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i32(i8* null, i8* null, i8** null, i8** null, i32 0, metadata !1) + ret i8* %v + +; CHECK-LABEL: @test12 +; CHECK: llvm.provenance.noalias.p0i8 +; CHECK: ret i8* %v +} + +define i8* @test13() { + %v = call i8* @llvm.provenance.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i32(i8* undef, i8* null, i8** null, i8** null, i32 0, metadata !1) + ret i8* %v + +; CHECK-LABEL: @test13 +; CHECK-NOT: llvm.provenance.noalias.p0i8 +; CHECK: ret i8* undef +} + +define i8* @test14() { + %u = call i8* @llvm.provenance.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i32(i8* null, i8* null, i8** null, i8** null, i32 0, metadata !1) + %v = call i8* @llvm.provenance.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i32(i8* %u, i8* null, i8** null, i8** null, i32 0, metadata !1) + ret i8* %v + +; CHECK-LABEL: @test14 +; CHECK: llvm.provenance.noalias.p0i8 +; CHECK-NOT: llvm.provenance.noalias.p0i8 +; CHECK: ret i8* %u +} + +define i8* @test15() { + %u = call i8* @llvm.provenance.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i32(i8* null, i8* null, i8** null, i8** null, i32 1, metadata !1) + %v = call i8* @llvm.provenance.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i32(i8* %u, i8* null, i8** null, i8** null, i32 0, metadata !1) + ret i8* %v + +; CHECK-LABEL: @test15 +; CHECK: llvm.provenance.noalias.p0i8 +; CHECK: llvm.provenance.noalias.p0i8 +; CHECK: ret i8* %v +} + +define i8* @test20() { + %u = call i8* @llvm.provenance.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i32(i8* null, i8* null, i8** null, i8** null, i32 0, metadata !1) + %v = call i8* @llvm.noalias.arg.guard.p0i8.p0i8(i8* null, i8* %u) + ret i8* %v + +; CHECK-LABEL: @test20 +; CHECK: llvm.provenance.noalias.p0i8 +; CHECK: llvm.noalias.arg.guard.p0i8 +; CHECK: ret i8* %v +} + +define i8* @test21() { + %u = call i8* @llvm.provenance.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i32(i8* null, i8* null, i8** null, i8** null, i32 0, metadata !1) + %v = call i8* @llvm.noalias.arg.guard.p0i8.p0i8(i8* undef, i8* %u) + ret i8* %v + +; CHECK-LABEL: @test21 +; CHECK-NOT: llvm.provenance.noalias.p0i8 +; CHECK-NOT: llvm.noalias.arg.guard.p0i8 +; CHECK: ret i8* undef +} + +define i8* @test30() { + %v = call i8* @llvm.noalias.copy.guard.p0i8.p0i8(i8* null, i8* null, metadata !2, metadata !1) + ret i8* %v + +; CHECK-LABEL: @test30 +; CHECK: llvm.noalias.copy.guard.p0i8 +; CHECK: ret i8* %v +} + +define void @test31() { + %v = call i8* @llvm.noalias.copy.guard.p0i8.p0i8(i8* null, i8* null, metadata !2, metadata !1) + ret void + +; CHECK-LABEL: @test31 +; CHECK-NOT: llvm.noalias.copy.guard.p0i8 +; CHECK: ret void +} + +define i8* @test32() { + %v = call i8* @llvm.noalias.copy.guard.p0i8.p0i8(i8* undef, i8* null, metadata !2, metadata !1) + ret i8* %v + +; CHECK-LABEL: @test32 +; CHECK-NOT: llvm.noalias.copy.guard.p0i8 +; CHECK: ret i8* undef +} + +define void @test40() { + %v = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i8** null, i32 0, metadata !1) + ret void + +; CHECK-LABEL: @test40 +; CHECK-NOT: llvm.noalias.decl.p0i8 +; CHECK: ret void +} + +define void @test41() { + %u = alloca i8* + %v = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i8** %u, i32 0, metadata !4) + ret void + +; CHECK-LABEL: @test41 +; CHECK-NOT: alloca +; CHECK-NOT: llvm.noalias.decl.p0i8 +; CHECK: ret void +} + +define i8** @test42() { + %u = alloca i8* + %v = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i8** %u, i32 0, metadata !0) + ret i8** %u + +; CHECK-LABEL: @test42 +; CHECK: alloca +; CHECK: llvm.noalias.decl.p0i8 +; CHECK: ret i8** %u +} + + +declare i8* @llvm.provenance.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i32(i8*, i8*, i8**, i8**, i32, metadata ) nounwind +declare i8* @llvm.noalias.arg.guard.p0i8.p0i8(i8*, i8*) nounwind readnone +declare i8* @llvm.noalias.copy.guard.p0i8.p0i8(i8*, i8*, metadata, metadata) +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i8**, i32, metadata) argmemonly nounwind + +!0 = !{!0, !"some domain"} +!1 = !{!1, !0, !"some scope"} +!2 = !{!3} +!3 = !{ i64 -1, i64 0 } +!4 = !{!4, !"some other domain"} +!5 = !{!5, !4, !"some other scope"} Index: llvm/test/Transforms/LICM/noalias.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LICM/noalias.ll @@ -0,0 +1,87 @@ +; RUN: opt -S -basic-aa -licm -enable-mssa-loop-dependency=false %s | FileCheck -check-prefixes=CHECK,AST %s +; RUN: opt -S -basic-aa -licm -enable-mssa-loop-dependency=true %s | FileCheck -check-prefixes=CHECK,MSSA %s +; RUN: opt -aa-pipeline=basic-aa -passes='require,require,require,require,loop(licm)' < %s -S | FileCheck -check-prefixes=CHECK,AST %s + + +; Function Attrs: nounwind +define dso_local void @test01(i32* %_p, i32 %n) #0 { +entry: + %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + br label %do.body + +do.body: ; preds = %do.body, %entry + %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.body ] + %1 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_p, i8* %0, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 + store i32 42, i32* %_p, ptr_provenance i32* %1, align 4, !tbaa !9, !noalias !2 + %dec = add nsw i32 %n.addr.0, -1 + %cmp = icmp ne i32 %dec, 0 + br i1 %cmp, label %do.body, label %do.end + +do.end: ; preds = %do.body + ret void +} + +; CHECK-LABEL: @test01( +; CHECK-LABEL: entry: +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) +; CHECK: %1 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_p, i8* %0, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 +; MSSA: store i32 42, i32* %_p, ptr_provenance i32* %1, align 4, !tbaa !9, !noalias !2 +; CHECK-LABEL: do.body: +; CHECK-LABEL: do.end: +; AST: store i32 42, i32* %_p, align 4, !tbaa !9 +; CHECK: ret void + +; Function Attrs: nounwind +define dso_local void @test02(i32* %_p, i32 %n) #0 { +entry: + br label %do.body + +do.body: ; preds = %do.body, %entry + %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.body ] + %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) + %1 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_p, i8* %0, i32** null, i32** undef, i32 0, metadata !11), !tbaa !5, !noalias !11 + store i32 42, i32* %_p, ptr_provenance i32* %1, align 4, !tbaa !9, !noalias !11 + %dec = add nsw i32 %n.addr.0, -1 + %cmp = icmp ne i32 %dec, 0 + br i1 %cmp, label %do.body, label %do.end + +do.end: ; preds = %do.body + ret void +} + +; CHECK-LABEL: @test02( +; CHECK-LABEL: entry: +; CHECK-LABEL: do.body: +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) +; CHECK: %1 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_p, i8* %0, i32** null, i32** undef, i32 0, metadata !11), !tbaa !5, !noalias !11 +; CHECK-LABEL: do.end: +; CHECK: store i32 42, i32* %_p, align 4, !tbaa !9 +; CHECK: ret void + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) #1 + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32*, i8*, i32**, i32**, i32, metadata) #2 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { nounwind readnone speculatable } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3} +!3 = distinct !{!3, !4, !"test01: rp"} +!4 = distinct !{!4, !"test01"} +!5 = !{!6, !6, i64 0, i64 4} +!6 = !{!7, i64 4, !"any pointer"} +!7 = !{!8, i64 1, !"omnipotent char"} +!8 = !{!"Simple C/C++ TBAA"} +!9 = !{!10, !10, i64 0, i64 4} +!10 = !{!7, i64 4, !"int"} +!11 = !{!12} +!12 = distinct !{!12, !13, !"test02: rp"} +!13 = distinct !{!13, !"test02"} Index: llvm/test/Transforms/LoopRotate/noalias.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopRotate/noalias.ll @@ -0,0 +1,186 @@ +; RUN: opt -S -loop-rotate < %s | FileCheck %s +; RUN: opt -S -loop-rotate -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s +; RUN: opt -S -passes='require,require,loop(rotate)' < %s | FileCheck %s +; RUN: opt -S -passes='require,require,loop(rotate)' -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare void @g(i32*) + +define void @test_02(i32* nocapture %_pA) nounwind ssp { +entry: + %array = alloca [20 x i32], align 16 + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %p.decl = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %prov.p = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %p.decl, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 + store i32 42, i32* %_pA, ptr_provenance i32* %prov.p, align 16 + %cmp = icmp slt i32 %i.0, 100 + %arrayidx = getelementptr inbounds [20 x i32], [20 x i32]* %array, i64 0, i64 0 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + store i32 0, i32* %arrayidx, align 16 + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + %arrayidx.lcssa = phi i32* [ %arrayidx, %for.cond ] + call void @g(i32* %arrayidx.lcssa) nounwind + ret void +} + +; CHECK-LABEL: @test_02( +; CHECK: entry: +; CHECK: %p.decl1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) +; CHECK: %prov.p2 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %p.decl1, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 +; CHECK: store i32 42, i32* %_pA, ptr_provenance i32* undef, align 16 +; CHECK: for.body: +; CHECK: %p.decl = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !9) +; CHECK: %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) +; CHECK: %prov.p = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !11), !tbaa !5, !noalias !11 +; CHECK: store i32 42, i32* %_pA, ptr_provenance i32* %prov.p, align 16 +; CHECK: for.end: + + +define void @test_03(i32* nocapture %_pA) nounwind ssp { +entry: + %array = alloca [20 x i32], align 16 + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %cmp = icmp slt i32 %i.0, 100 + %arrayidx = getelementptr inbounds [20 x i32], [20 x i32]* %array, i64 0, i64 0 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %p.decl = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %prov.p = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %p.decl, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 + store i32 42, i32* %_pA, ptr_provenance i32* %prov.p, align 16 + store i32 0, i32* %arrayidx, align 16 + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + %arrayidx.lcssa = phi i32* [ %arrayidx, %for.cond ] + call void @g(i32* %arrayidx.lcssa) nounwind + ret void +} +; CHECK-LABEL: @test_03( +; CHECK: entry: +; CHECK: for.body: +; CHECK: %p.decl = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !9) +; CHECK: %prov.p = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %p.decl, i32** null, i32** undef, i32 0, metadata !9), !tbaa !5, !noalias !9 +; CHECK: store i32 42, i32* %_pA, ptr_provenance i32* %prov.p, align 16 +; CHECK: for.end: + +define void @test_04(i32* nocapture %_pA) nounwind ssp { +entry: + %array = alloca [20 x i32], align 16 + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %p.decl = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %prov.p = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %p.decl, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 + store i32 42, i32* %_pA, ptr_provenance i32* %prov.p, align 16 + %cmp = icmp slt i32 %i.0, 100 + %arrayidx = getelementptr inbounds [20 x i32], [20 x i32]* %array, i64 0, i64 0 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + store i32 0, i32* %arrayidx, align 16 + store i32 43, i32* %_pA, ptr_provenance i32* %prov.p, align 16 + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + %arrayidx.lcssa = phi i32* [ %arrayidx, %for.cond ] + call void @g(i32* %arrayidx.lcssa) nounwind + ret void +} +; CHECK-LABEL: @test_04( +; CHECK: entry: +; CHECK: %p.decl1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !13) +; CHECK: %prov.p2 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %p.decl1, i32** null, i32** undef, i32 0, metadata !13), !tbaa !5, !noalias !13 +; CHECK: store i32 42, i32* %_pA, ptr_provenance i32* undef, align 16 +; CHECK: for.body: +; CHECK: %prov.p4 = phi i32* [ %prov.p2, %entry ], [ %prov.p, %for.body ] +; CHECK: %p.decl = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !9) +; CHECK: store i32 43, i32* %_pA, ptr_provenance i32* %prov.p4, align 16 +; CHECK: %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !15) +; CHECK: %prov.p = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !15), !tbaa !5, !noalias !15 +; CHECK: store i32 42, i32* %_pA, ptr_provenance i32* %prov.p, align 16 +; CHECK: for.end: + +define void @test_05(i32* nocapture %_pA) nounwind ssp { +entry: + %array = alloca [20 x i32], align 16 + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %p.decl = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %prov.p = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %p.decl, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 + store i32 42, i32* %_pA, ptr_provenance i32* %prov.p, align 16 + %cmp = icmp slt i32 %i.0, 100 + %arrayidx = getelementptr inbounds [20 x i32], [20 x i32]* %array, i64 0, i64 0 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + store i32 0, i32* %arrayidx, align 16 + store i32 43, i32* %_pA, ptr_provenance i32* %prov.p, align 16 + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + %arrayidx.lcssa = phi i32* [ %arrayidx, %for.cond ] + store i32 44, i32* %_pA, ptr_provenance i32* %prov.p, align 16 + call void @g(i32* %arrayidx.lcssa) nounwind + ret void +} +; CHECK-LABEL: @test_05( +; CHECK: entry: +; CHECK: %p.decl1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !17) +; CHECK: %prov.p2 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %p.decl1, i32** null, i32** undef, i32 0, metadata !17), !tbaa !5, !noalias !17 +; CHECK: store i32 42, i32* %_pA, ptr_provenance i32* undef, align 16 +; CHECK: for.body: +; CHECK: %prov.p4 = phi i32* [ %prov.p2, %entry ], [ %prov.p, %for.body ] +; CHECK: %p.decl = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !9) +; CHECK: store i32 43, i32* %_pA, ptr_provenance i32* %prov.p4, align 16 +; CHECK: %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !19) +; CHECK: %prov.p = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !19), !tbaa !5, !noalias !19 +; CHECK: store i32 42, i32* %_pA, ptr_provenance i32* %prov.p, align 16 +; CHECK: for.end: +; CHECK: %prov.p.lcssa = phi i32* [ %prov.p, %for.body ] +; CHECK: store i32 44, i32* %_pA, ptr_provenance i32* %prov.p.lcssa, align 16 + + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) #1 + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32*, i8*, i32**, i32**, i32, metadata) #2 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { nounwind readnone speculatable } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3} +!3 = distinct !{!3, !4, !"test_loop_rotate_XX: pA"} +!4 = distinct !{!4, !"test_loop_rotate_XX"} +!5 = !{!6, !6, i64 0, i64 4} +!6 = !{!7, i64 4, !"any pointer"} +!7 = !{!8, i64 1, !"omnipotent char"} +!8 = !{!"Simple C/C++ TBAA"} +!9 = !{!10, !10, i64 0, i64 4} +!10 = !{!7, i64 4, !"int"} Index: llvm/test/Transforms/LoopUnroll/noalias.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopUnroll/noalias.ll @@ -0,0 +1,200 @@ +; RUN: opt < %s -S -unroll-runtime -unroll-count=2 -loop-unroll | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind +define dso_local void @test_loop_unroll_01(i32* nocapture %_pA) local_unnamed_addr #0 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %1 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret void + +for.body: ; preds = %for.body, %entry + %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %_pA, i32 %i.06 + store i32 %i.06, i32* %arrayidx, ptr_provenance i32* %1, align 4, !tbaa !9, !noalias !2 + %inc = add nuw nsw i32 %i.06, 1 + %exitcond = icmp eq i32 %inc, 4 + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} + +; CHECK: define dso_local void @test_loop_unroll_01(i32* nocapture %_pA) local_unnamed_addr #0 { +; CHECK: entry: +; CHECK: %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) +; CHECK: %1 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 +; CHECK: br label %for.body +; CHECK: for.cond.cleanup: ; preds = %for.body +; CHECK: ret void +; CHECK: for.body: ; preds = %for.body, %entry +; CHECK: %i.06 = phi i32 [ 0, %entry ], [ %inc.1, %for.body ] +; CHECK: %arrayidx = getelementptr inbounds i32, i32* %_pA, i32 %i.06 +; CHECK: store i32 %i.06, i32* %arrayidx, ptr_provenance i32* %1, align 4, !tbaa !9, !noalias !2 +; CHECK: %inc = add nuw nsw i32 %i.06, 1 +; CHECK: %arrayidx.1 = getelementptr inbounds i32, i32* %_pA, i32 %inc +; CHECK: store i32 %inc, i32* %arrayidx.1, ptr_provenance i32* %1, align 4, !tbaa !9, !noalias !2 +; CHECK: %inc.1 = add nuw nsw i32 %inc, 1 +; CHECK: %exitcond.1 = icmp eq i32 %inc.1, 4 +; CHECK: br i1 %exitcond.1, label %for.cond.cleanup, label %for.body, !llvm.loop !11 +; CHECK: } + + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) #1 + +; Function Attrs: nounwind +define dso_local void @test_loop_unroll_02(i32* nocapture %_pA) local_unnamed_addr #0 { +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret void + +for.body: ; preds = %for.body, %entry + %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) + %1 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !11), !tbaa !5, !noalias !11 + %arrayidx = getelementptr inbounds i32, i32* %_pA, i32 %i.06 + store i32 %i.06, i32* %arrayidx, ptr_provenance i32* %1, align 4, !tbaa !9, !noalias !11 + %inc = add nuw nsw i32 %i.06, 1 + %exitcond = icmp eq i32 %inc, 4 + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} + +; CHECK: define dso_local void @test_loop_unroll_02(i32* nocapture %_pA) local_unnamed_addr #0 { +; CHECK: entry: +; CHECK: br label %for.body +; CHECK: for.cond.cleanup: ; preds = %for.body +; CHECK: ret void +; CHECK: for.body: ; preds = %for.body, %entry +; CHECK: %i.06 = phi i32 [ 0, %entry ], [ %inc.1, %for.body ] +; CHECK: %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !13) +; CHECK: %1 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !13), !tbaa !5, !noalias !13 +; CHECK: %arrayidx = getelementptr inbounds i32, i32* %_pA, i32 %i.06 +; CHECK: store i32 %i.06, i32* %arrayidx, ptr_provenance i32* %1, align 4, !tbaa !9, !noalias !13 +; CHECK: %inc = add nuw nsw i32 %i.06, 1 +; CHECK: %2 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !16) +; CHECK: %3 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %2, i32** null, i32** undef, i32 0, metadata !16), !tbaa !5, !noalias !16 +; CHECK: %arrayidx.1 = getelementptr inbounds i32, i32* %_pA, i32 %inc +; CHECK: store i32 %inc, i32* %arrayidx.1, ptr_provenance i32* %3, align 4, !tbaa !9, !noalias !16 +; CHECK: %inc.1 = add nuw nsw i32 %inc, 1 +; CHECK: %exitcond.1 = icmp eq i32 %inc.1, 4 +; CHECK: br i1 %exitcond.1, label %for.cond.cleanup, label %for.body, !llvm.loop !18 +; CHECK: } + +; Function Attrs: nounwind +define dso_local void @test_loop_unroll_03(i32* nocapture %_pA, i1 %c) local_unnamed_addr #0 { +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret void + +for.body: ; preds = %for.body, %entry + %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body1 ] + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !14) + %arrayidx = getelementptr inbounds i32, i32* %_pA, i32 %i.06 + br i1 %c, label %alt.exit, label %for.body1 + +for.body1: + store i32 1, i32* %arrayidx, align 4, !tbaa !9, !noalias !14 + %inc = add nuw nsw i32 %i.06, 1 + %exitcond = icmp eq i32 %inc, 4 + br i1 %exitcond, label %for.cond.cleanup, label %for.body + +alt.exit: + %s = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !14), !tbaa !5, !noalias !14 + store i32 %i.06, i32* %arrayidx, ptr_provenance i32* %s, align 4, !tbaa !9, !noalias !14 + ret void +} + +; CHECK: define dso_local void @test_loop_unroll_03(i32* nocapture %_pA, i1 %c) local_unnamed_addr #0 { +; CHECK: entry: +; CHECK: br label %for.body +; CHECK: for.cond.cleanup: ; preds = %for.body1.1 +; CHECK: ret void +; CHECK: for.body: ; preds = %for.body1.1, %entry +; CHECK: %i.06 = phi i32 [ 0, %entry ], [ %inc.1, %for.body1.1 ] +; CHECK: %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !19) +; CHECK: %arrayidx = getelementptr inbounds i32, i32* %_pA, i32 %i.06 +; CHECK: br i1 %c, label %alt.exit, label %for.body1 +; CHECK: for.body1: ; preds = %for.body +; CHECK: store i32 1, i32* %arrayidx, align 4, !tbaa !9, !noalias !19 +; CHECK: %inc = add nuw nsw i32 %i.06, 1 +; CHECK: %1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !22) +; CHECK: %arrayidx.1 = getelementptr inbounds i32, i32* %_pA, i32 %inc +; CHECK: br i1 %c, label %alt.exit, label %for.body1.1 +; CHECK: alt.exit: ; preds = %for.body1, %for.body +; CHECK: %i.06.lcssa = phi i32 [ %i.06, %for.body ], [ %inc, %for.body1 ] +; CHECK: %.lcssa = phi i8* [ %0, %for.body ], [ %1, %for.body1 ] +; CHECK: %arrayidx.lcssa = phi i32* [ %arrayidx, %for.body ], [ %arrayidx.1, %for.body1 ] +; CHECK: %2 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !24) +; CHECK: %s = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %2, i32** null, i32** undef, i32 0, metadata !24), !tbaa !5, !noalias !24 +; CHECK: store i32 %i.06.lcssa, i32* %arrayidx.lcssa, ptr_provenance i32* %s, align 4, !tbaa !9, !noalias !24 +; CHECK: ret void +; CHECK: for.body1.1: ; preds = %for.body1 +; CHECK: store i32 1, i32* %arrayidx.1, align 4, !tbaa !9, !noalias !22 +; CHECK: %inc.1 = add nuw nsw i32 %inc, 1 +; CHECK: %exitcond.1 = icmp eq i32 %inc.1, 4 +; CHECK: br i1 %exitcond.1, label %for.cond.cleanup, label %for.body, !llvm.loop !26 +; CHECK: } + + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32*, i8*, i32**, i32**, i32, metadata) #2 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { nounwind readnone speculatable } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3} +!3 = distinct !{!3, !4, !"test_loop_unroll_01: pA"} +!4 = distinct !{!4, !"test_loop_unroll_01"} +!5 = !{!6, !6, i64 0, i64 4} +!6 = !{!7, i64 4, !"any pointer"} +!7 = !{!8, i64 1, !"omnipotent char"} +!8 = !{!"Simple C/C++ TBAA"} +!9 = !{!10, !10, i64 0, i64 4} +!10 = !{!7, i64 4, !"int"} +!11 = !{!12} +!12 = distinct !{!12, !13, !"test_loop_unroll_02: pA"} +!13 = distinct !{!13, !"test_loop_unroll_02"} +!14 = !{!15} +!15 = distinct !{!15, !16, !"test_loop_unroll_03: pA"} +!16 = distinct !{!16, !"test_loop_unroll_03"} + +; CHECK: !0 = !{i32 1, !"wchar_size", i32 4} +; CHECK: !1 = !{!"clang"} +; CHECK: !2 = !{!3} +; CHECK: !3 = distinct !{!3, !4, !"test_loop_unroll_01: pA"} +; CHECK: !4 = distinct !{!4, !"test_loop_unroll_01"} +; CHECK: !5 = !{!6, !6, i64 0, i64 4} +; CHECK: !6 = !{!7, i64 4, !"any pointer"} +; CHECK: !7 = !{!8, i64 1, !"omnipotent char"} +; CHECK: !8 = !{!"Simple C/C++ TBAA"} +; CHECK: !9 = !{!10, !10, i64 0, i64 4} +; CHECK: !10 = !{!7, i64 4, !"int"} +; CHECK: !11 = distinct !{!11, !12} +; CHECK: !12 = !{!"llvm.loop.unroll.disable"} +; CHECK: !13 = !{!14} +; CHECK: !14 = distinct !{!14, !15, !"test_loop_unroll_02: pA:It0"} +; CHECK: !15 = distinct !{!15, !"test_loop_unroll_02"} +; CHECK: !16 = !{!17} +; CHECK: !17 = distinct !{!17, !15, !"test_loop_unroll_02: pA:It1"} +; CHECK: !18 = distinct !{!18, !12} +; CHECK: !19 = !{!20} +; CHECK: !20 = distinct !{!20, !21, !"test_loop_unroll_03: pA:It0"} +; CHECK: !21 = distinct !{!21, !"test_loop_unroll_03"} +; CHECK: !22 = !{!23} +; CHECK: !23 = distinct !{!23, !21, !"test_loop_unroll_03: pA:It1"} +; CHECK: !24 = !{!25} +; CHECK: !25 = distinct !{!25, !21, !"test_loop_unroll_03: pA"} +; CHECK: !26 = distinct !{!26, !12} Index: llvm/test/Transforms/LoopVectorize/noalias.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/noalias.ll @@ -0,0 +1,90 @@ +; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +; Make sure we can vectorize loops which contain noalias intrinsics + + +; A not-used llvm.noalias should not interfere +; CHECK-LABEL: @test_noalias_not_connected( +; CHECK: @llvm.noalias.p0i32 +; CHECK: store <2 x i32> +; CHECK: ret +define void @test_noalias_not_connected(i32 *%d) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %d2 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %d, i8* null, i32** null, i32 0, metadata !1) + %arrayidx = getelementptr inbounds i32, i32* %d, i64 %indvars.iv + %v1 = load i32, i32* %arrayidx, align 8 + store i32 100, i32* %arrayidx, align 8 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp ne i32 %lftr.wideiv, 128 + br i1 %exitcond, label %for.body, label %for.end + +for.end: + ret void +} + +; A used llvm.noalias should block vectorization. +; CHECK-LABEL: @test_noalias_connected( +; CHECK: @llvm.noalias.p0i32 +; CHECK-NOT: store <2 x i32> +; CHECK: ret +define void @test_noalias_connected(i32 *%d) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %d2 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %d, i8* null, i32** null, i32 0, metadata !1) + %arrayidx = getelementptr inbounds i32, i32* %d2, i64 %indvars.iv + %v1 = load i32, i32* %arrayidx, align 8 + store i32 100, i32* %arrayidx, align 8 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp ne i32 %lftr.wideiv, 128 + br i1 %exitcond, label %for.body, label %for.end + +for.end: + ret void +} + +; A used llvm.provenance.noalias should NOT block vectorization. +; CHECK-LABEL: @test_provenance.noalias( +; CHECK: @llvm.provenance.noalias.p0i32 +; NOTE: the ptr_provenance is omitted +; CHECK: store <2 x i32> , <2 x i32>* {{%[0-9.a-zA-Z]*}}, align 8 +; CHECK: ret + +define void @test_provenance.noalias(i32 *%d) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %prov.d = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %d, i8* null, i32** null, i32** null, i32 0, metadata !1) + %arrayidx = getelementptr inbounds i32, i32* %d, i64 %indvars.iv + %v1 = load i32, i32* %arrayidx, ptr_provenance i32* %prov.d, align 8 + store i32 100, i32* %arrayidx, ptr_provenance i32* %prov.d, align 8 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp ne i32 %lftr.wideiv, 128 + br i1 %exitcond, label %for.body, label %for.end + +for.end: + ret void +} + +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i8**, i32, metadata) argmemonly nounwind +declare i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32*, i8*, i32**, i32, metadata ) nounwind +declare i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32*, i8*, i32**, i32**, i32, metadata ) nounwind + + +;declare i32* @llvm.noalias.p0i32(i32*, metadata) nounwind argmemonly + +!0 = !{!0, !"some domain"} +!1 = !{!1, !0, !"some scope"} Index: llvm/test/Transforms/PropagateAndConvertNoAlias/basictest.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/PropagateAndConvertNoAlias/basictest.ll @@ -0,0 +1,80 @@ +; RUN: opt < %s -convert-noalias -verify -S | FileCheck %s +; RUN: opt < %s -passes=convert-noalias,verify -S | FileCheck %s + +@gpA = common dso_local global i32* null, align 4 + +; Function Attrs: nounwind +define dso_local void @test01(i32* %_pA) #0 { +entry: + %pA.decl = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %pA = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %pA.decl, i32** null, i32 0, metadata !2), !tbaa !5, !noalias !2 + %arrayidx = getelementptr inbounds i32, i32* %pA, i32 10 + store i32 42, i32* %arrayidx, align 4, !tbaa !9, !noalias !2 + %pA.2 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %pA.decl, i32** null, i32 0, metadata !2), !tbaa !5, !noalias !2 + %add.ptr = getelementptr inbounds i32, i32* %pA.2, i32 1 + %pA.3 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %add.ptr, i8* %pA.decl, i32** null, i32 0, metadata !2), !tbaa !5, !noalias !2 + %arrayidx1 = getelementptr inbounds i32, i32* %pA.3, i32 11 + store i32 43, i32* %arrayidx1, align 4, !tbaa !9, !noalias !2 + ret void +} + +; CHECK-LABEL: @test01( +; CHECK-NEXT: entry: +; CHECK-NEXT: %pA.decl = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) +; CHECK-NEXT: %0 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %pA.decl, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 +; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, i32* %_pA, i32 10 +; CHECK-NEXT: store i32 42, i32* %arrayidx, ptr_provenance i32* %0, align 4, !tbaa !9, !noalias !2 +; CHECK-NEXT: %add.ptr = getelementptr inbounds i32, i32* %_pA, i32 1 +; CHECK-NEXT: %arrayidx1 = getelementptr inbounds i32, i32* %add.ptr, i32 11 +; CHECK-NEXT: store i32 43, i32* %arrayidx1, ptr_provenance i32* %0, align 4, !tbaa !9, !noalias !2 +; CHECK-NEXT: ret void +; CHECK-NEXT: } + +; Function Attrs: nounwind +define dso_local i32* @test02(i32* %_pA) #0 { +entry: + %pA.decl = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) + %pA.1 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %pA.decl, i32** null, i32 0, metadata !11), !tbaa !5, !noalias !11 + store i32* %pA.1, i32** @gpA, align 4, !tbaa !5, !noalias !11 + %pA.2 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %pA.decl, i32** null, i32 0, metadata !11), !tbaa !5, !noalias !11 + ret i32* %pA.2 +} + +; CHECK-LABEL: @test02( +; CHECK-NEXT: entry: +; CHECK-NEXT: %pA.decl = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) +; CHECK-NEXT: %0 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %pA.decl, i32** null, i32** undef, i32 0, metadata !11), !tbaa !5, !noalias !11 +; CHECK-NEXT: %pA.1.guard = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %_pA, i32* %0) +; CHECK-NEXT: store i32* %pA.1.guard, i32** @gpA, align 4, !tbaa !5, !noalias !11 +; CHECK-NEXT: %pA.2.guard = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %_pA, i32* %0) +; CHECK-NEXT: ret i32* %pA.2.guard +; CHECK-NEXT: } + + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) #1 + +; Function Attrs: argmemonly nounwind speculatable +declare i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32*, i8*, i32**, i32, metadata) #2 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { argmemonly nounwind speculatable } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3} +!3 = distinct !{!3, !4, !"test01: pA"} +!4 = distinct !{!4, !"test01"} +!5 = !{!6, !6, i64 0, i64 4} +!6 = !{!7, i64 4, !"any pointer"} +!7 = !{!8, i64 1, !"omnipotent char"} +!8 = !{!"Simple C/C++ TBAA"} +!9 = !{!10, !10, i64 0, i64 4} +!10 = !{!7, i64 4, !"int"} +!11 = !{!12} +!12 = distinct !{!12, !13, !"test02: pA"} +!13 = distinct !{!13, !"test02"} Index: llvm/test/Transforms/PropagateAndConvertNoAlias/call.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/PropagateAndConvertNoAlias/call.ll @@ -0,0 +1,112 @@ +; RUN: opt < %s -convert-noalias -verify -S | FileCheck %s +; RUN: opt < %s -passes=convert-noalias,verify -S | FileCheck %s + +target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" + +; Function Attrs: nounwind +define dso_local i32* @passP(i32* %_pA) #0 { +entry: + %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %1 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32 0, metadata !2), !tbaa !5, !noalias !2 + store i32 42, i32* %1, align 4, !tbaa !9, !noalias !2 + %2 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32 0, metadata !2), !tbaa !5, !noalias !2 + ret i32* %2 +} + +; CHECK-LABEL: @passP( +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) +; CHECK-NEXT: %1 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 +; CHECK-NEXT: store i32 42, i32* %_pA, ptr_provenance i32* %1, align 4, !tbaa !9, !noalias !2 +; CHECK-NEXT: %.guard = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %_pA, i32* %1) +; CHECK-NEXT: ret i32* %.guard +; CHECK-NEXT: } + +; Function Attrs: nounwind +define dso_local void @test01(i32* %_pA) #0 { +entry: + %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) + %1 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32 0, metadata !11), !tbaa !5, !noalias !11 + store i32 41, i32* %1, align 4, !tbaa !9, !noalias !11 + %2 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32 0, metadata !11), !tbaa !5, !noalias !11 + %call = call i32* @passP(i32* %2), !noalias !11 + store i32 43, i32* %call, align 4, !tbaa !9, !noalias !11 + ret void +} + +; CHECK-LABEL: @test01( +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) +; CHECK-NEXT: %1 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !11), !tbaa !5, !noalias !11 +; CHECK-NEXT: store i32 41, i32* %_pA, ptr_provenance i32* %1, align 4, !tbaa !9, !noalias !11 +; CHECK-NEXT: %.guard = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %_pA, i32* %1) +; CHECK-NEXT: %call = call i32* @passP(i32* %.guard), !noalias !11 +; CHECK-NEXT: store i32 43, i32* %call, align 4, !tbaa !9, !noalias !11 +; CHECK-NEXT: ret void +; CHECK-NEXT: } + +; Function Attrs: nounwind +define dso_local void @test02(i32* %_pA) #0 { +entry: + %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) + %1 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32 0, metadata !11), !tbaa !5, !noalias !11 + store i32 41, i32* %1, align 4, !tbaa !9, !noalias !11 + %2 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32 0, metadata !11), !tbaa !5, !noalias !11 + br label %block + +block: + %tmp0 = phi i32* [ %2, %entry ] + %tmp1 = phi i32* [ %1, %entry ] + %call = call i32* @passP(i32* %tmp0), !noalias !11 + store i32 43, i32* %call, align 4, !tbaa !9, !noalias !11 + ret void +} + + +; CHECK-LABEL: @test02( +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) +; CHECK-NEXT: %1 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !11), !tbaa !5, !noalias !11 +; CHECK-NEXT: store i32 41, i32* %_pA, ptr_provenance i32* %1, align 4, !tbaa !9, !noalias !11 +; CHECK-NEXT: br label %block +; CHECK: block: +; CHECK-NEXT: %prov.tmp0 = phi i32* [ %1, %entry ] +; CHECK-NEXT: %tmp0 = phi i32* [ %_pA, %entry ] +; CHECK-NEXT: %prov.tmp1 = phi i32* [ %1, %entry ] +; CHECK-NEXT: %tmp1 = phi i32* [ %_pA, %entry ] +; CHECK-NEXT: %tmp0.guard = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %tmp0, i32* %prov.tmp0) +; CHECK-NEXT: %call = call i32* @passP(i32* %tmp0.guard), !noalias !11 +; CHECK-NEXT: store i32 43, i32* %call, align 4, !tbaa !9, !noalias !11 +; CHECK-NEXT: ret void +; CHECK-NEXT: } + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) #2 + +; Function Attrs: argmemonly nounwind speculatable +declare i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32*, i8*, i32**, i32, metadata) #3 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind willreturn } +attributes #2 = { argmemonly nounwind } +attributes #3 = { argmemonly nounwind speculatable } +attributes #4 = { nounwind readnone speculatable } +attributes #5 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3} +!3 = distinct !{!3, !4, !"passP: pA"} +!4 = distinct !{!4, !"passP"} +!5 = !{!6, !6, i64 0, i64 4} +!6 = !{!7, i64 4, !"any pointer"} +!7 = !{!8, i64 1, !"omnipotent char"} +!8 = !{!"Simple C/C++ TBAA"} +!9 = !{!10, !10, i64 0, i64 4} +!10 = !{!7, i64 4, !"int"} +!11 = !{!12} +!12 = distinct !{!12, !13, !"test01: p1"} +!13 = distinct !{!13, !"test01"} Index: llvm/test/Transforms/PropagateAndConvertNoAlias/degenerated.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/PropagateAndConvertNoAlias/degenerated.ll @@ -0,0 +1,126 @@ +; RUN: opt < %s -convert-noalias -verify -S | FileCheck %s +; RUN: opt < %s -passes=convert-noalias,verify -S | FileCheck %s + +target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" + +; Function Attrs: nounwind +define dso_local void @test01(i32* %_pA) local_unnamed_addr #0 { +entry: + %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %1 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 + store i32 41, i32* %_pA, ptr_provenance i32* %1, align 4, !tbaa !9, !noalias !2 + %.guard = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %_pA, i32* %1) + %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) #5, !noalias !2 + %3 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %.guard, i8* %2, i32** null, i32** undef, i32 0, metadata !11) #3, !tbaa !5, !noalias !14 + store i32 42, i32* %.guard, ptr_provenance i32* %3, align 4, !tbaa !9, !noalias !14 + %.guard.guard.guard.guard.i = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %.guard, i32* %3) #1 + store i32 43, i32* %.guard.guard.guard.guard.i, ptr_provenance i32* undef, align 4, !tbaa !9, !noalias !2 + ret void +} + +; CHECK-LABEL: @test01( +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) +; CHECK-NEXT: %1 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 +; CHECK-NEXT: store i32 41, i32* %_pA, ptr_provenance i32* %1, align 4, !tbaa !9, !noalias !2 +; CHECK-NEXT: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) #{{[0-9]+}}, !noalias !2 +; CHECK-NEXT: %3 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %1, i8* %2, i32** null, i32** undef, i32 0, metadata !11) #{{[0-9]+}}, !tbaa !5, !noalias !14 +; CHECK-NEXT: store i32 42, i32* %_pA, ptr_provenance i32* %3, align 4, !tbaa !9, !noalias !14 +; CHECK-NEXT: store i32 43, i32* %_pA, ptr_provenance i32* %3, align 4, !tbaa !9, !noalias !2 +; CHECK-NEXT: ret void +; CHECK-NEXT: } + +; Function Attrs: nounwind +define dso_local void @test02(i32* %_pA) local_unnamed_addr #0 { +entry: + %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %1 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 + store i32 41, i32* %_pA, ptr_provenance i32* %1, align 4, !tbaa !9, !noalias !2 + %.guard = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %_pA, i32* %1) + %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) #5, !noalias !2 + %3 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %.guard, i8* %2, i32** null, i32** undef, i32 0, metadata !11) #3, !tbaa !5, !noalias !14 + store i32 42, i32* %.guard, ptr_provenance i32* %3, align 4, !tbaa !9, !noalias !14 + %deg01 = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %.guard, i32* %.guard) #1 + %deg02 = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %.guard, i32* undef) #1 + call void @foo(i32* %deg01), !noalias !14 + call void @foo(i32* %deg01), !noalias !14 + ret void +} + +; CHECK-LABEL: @test02( +; CHECK: ret void + +%class.e = type { %class.a } +%class.a = type { i32 } + +@g = global %class.e zeroinitializer, align 4 + +; Function Attrs: nounwind +define internal fastcc void @test03() unnamed_addr #0 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0s_class.es.i64(%class.e** null, i64 0, metadata !15) + %1 = tail call %class.e* @llvm.provenance.noalias.p0s_class.es.p0i8.p0p0s_class.es.p0p0s_class.es.i64(%class.e* nonnull @g, i8* %0, %class.e** null, %class.e** undef, i64 0, metadata !15) + %2 = getelementptr inbounds %class.e, %class.e* %1, i32 0, i32 0 + %.guard.guard = tail call %class.a* @llvm.noalias.arg.guard.p0s_class.as.p0s_class.as(%class.a* getelementptr inbounds (%class.e, %class.e* @g, i32 0, i32 0), %class.a* %2) + call void @foobar03(%class.a* %.guard.guard, i32 5), !noalias !15 + ret void +} + +; CHECK-LABEL: @test03( +; CHECK: call %class.a* @llvm.noalias.arg.guard.p0s_class.as.p0s_class.as +; CHECK: ret void + +; Function Attrs: nounwind +declare void @foobar03(%class.a*, i32) local_unnamed_addr #0 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) #1 + +; Function Attrs: argmemonly nounwind speculatable +declare i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32*, i8*, i32**, i32, metadata) #2 + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32*, i8*, i32**, i32**, i32, metadata) #3 + +; Function Attrs: nounwind readnone +declare i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32*, i32*) #4 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0s_class.es.i64(%class.e**, i64, metadata) #1 + +; Function Attrs: nounwind readnone speculatable +declare %class.e* @llvm.provenance.noalias.p0s_class.es.p0i8.p0p0s_class.es.p0p0s_class.es.i64(%class.e*, i8*, %class.e**, %class.e**, i64, metadata) #3 + +; Function Attrs: nounwind readnone +declare %class.a* @llvm.noalias.arg.guard.p0s_class.as.p0s_class.as(%class.a*, %class.a*) #4 + +declare void @foo(i32*) + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { argmemonly nounwind speculatable } +attributes #3 = { nounwind readnone speculatable } +attributes #4 = { nounwind readnone } +attributes #5 = { nounwind } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3} +!3 = distinct !{!3, !4, !"test01: p1"} +!4 = distinct !{!4, !"test01"} +!5 = !{!6, !6, i64 0, i64 4} +!6 = !{!7, i64 4, !"any pointer"} +!7 = !{!8, i64 1, !"omnipotent char"} +!8 = !{!"Simple C/C++ TBAA"} +!9 = !{!10, !10, i64 0, i64 4} +!10 = !{!7, i64 4, !"int"} +!11 = !{!12} +!12 = distinct !{!12, !13, !"passP: pA"} +!13 = distinct !{!13, !"passP"} +!14 = !{!12, !3} +!15 = !{!16} +!16 = distinct !{!16, !17, !"test03: %agg.result"} +!17 = distinct !{!17, !"test03"} Index: llvm/test/Transforms/PropagateAndConvertNoAlias/double_noalias.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/PropagateAndConvertNoAlias/double_noalias.ll @@ -0,0 +1,69 @@ +; RUN: opt < %s -convert-noalias -verify -S | FileCheck %s +; RUN: opt < %s -passes=convert-noalias,verify -S | FileCheck %s + +target datalayout = "e-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f16:16:16-f32:32:32-f64:32:32-p:32:32:32:32:8-s0:32:32-a0:0:32-S32-n16:32-v128:32:32-P0-p0:32:32:32:32:8" + +; Function Attrs: nounwind +define dso_local void @test_rr(i32** %_p) #0 !noalias !2 { +entry: + %0 = call i8* @llvm.noalias.decl.p0i8.p0p0p0i32.i32(i32*** null, i32 0, metadata !5) + %1 = call i32** @llvm.noalias.p0p0i32.p0i8.p0p0p0i32.i32(i32** %_p, i8* %0, i32*** null, i32 0, metadata !5), !tbaa !7, !noalias !11 + %arrayidx = getelementptr inbounds i32*, i32** %1, i32 1 + %2 = load i32*, i32** %arrayidx, align 4, !tbaa !7, !noalias !11 + %3 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %2, i8* null, i32** %arrayidx, i32 0, metadata !2), !tbaa !7, !noalias !11 + %arrayidx1 = getelementptr inbounds i32, i32* %3, i32 2 + %4 = load i32, i32* %arrayidx1, align 4, !tbaa !12, !noalias !11 + %add = add nsw i32 %4, 1 + %5 = call i32** @llvm.noalias.p0p0i32.p0i8.p0p0p0i32.i32(i32** %_p, i8* %0, i32*** null, i32 0, metadata !5), !tbaa !7, !noalias !11 + %6 = load i32*, i32** %5, align 4, !tbaa !7, !noalias !11 + %7 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %6, i8* null, i32** %5, i32 0, metadata !2), !tbaa !7, !noalias !11 + store i32 %add, i32* %7, align 4, !tbaa !12, !noalias !11 + ret void +} + +; CHECK-LABEL: @test_rr( +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0p0i32.i32(i32*** null, i32 0, metadata !5) +; CHECK-NEXT: %1 = call i32** @llvm.provenance.noalias.p0p0i32.p0i8.p0p0p0i32.p0p0p0i32.i32(i32** %_p, i8* %0, i32*** null, i32*** undef, i32 0, metadata !5), !tbaa !7, !noalias !11 +; CHECK-NEXT: %arrayidx = getelementptr inbounds i32*, i32** %_p, i32 1 +; CHECK-NEXT: %2 = load i32*, i32** %arrayidx, ptr_provenance i32** %1, align 4, !tbaa !7, !noalias !11 +; CHECK-NEXT: %3 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %2, i8* null, i32** %arrayidx, i32** %1, i32 0, metadata !2), !tbaa !7, !noalias !11 +; CHECK-NEXT: %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 2 +; CHECK-NEXT: %4 = load i32, i32* %arrayidx1, ptr_provenance i32* %3, align 4, !tbaa !12, !noalias !11 +; CHECK-NEXT: %add = add nsw i32 %4, 1 +; CHECK-NEXT: %5 = load i32*, i32** %_p, ptr_provenance i32** %1, align 4, !tbaa !7, !noalias !11 +; CHECK-NEXT: %6 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %5, i8* null, i32** %_p, i32** %1, i32 0, metadata !2), !tbaa !7, !noalias !11 +; CHECK-NEXT: store i32 %add, i32* %5, ptr_provenance i32* %6, align 4, !tbaa !12, !noalias !11 +; CHECK-NEXT: ret void +; CHECK-NEXT: } + +; Function Attrs: argmemonly nounwind speculatable +declare i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32*, i8*, i32**, i32, metadata) #1 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0p0i32.i32(i32***, i32, metadata) #2 + +; Function Attrs: argmemonly nounwind speculatable +declare i32** @llvm.noalias.p0p0i32.p0i8.p0p0p0i32.i32(i32**, i8*, i32***, i32, metadata) #1 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind speculatable } +attributes #2 = { argmemonly nounwind } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3} +!3 = distinct !{!3, !4, !"test_rr: unknown scope"} +!4 = distinct !{!4, !"test_rr"} +!5 = !{!6} +!6 = distinct !{!6, !4, !"test_rr: rprp"} +!7 = !{!8, !8, i64 0, i64 4} +!8 = !{!9, i64 4, !"any pointer"} +!9 = !{!10, i64 1, !"omnipotent char"} +!10 = !{!"Simple C/C++ TBAA"} +!11 = !{!6, !3} +!12 = !{!13, !13, i64 0, i64 4} +!13 = !{!9, i64 4, !"int"} Index: llvm/test/Transforms/PropagateAndConvertNoAlias/inlined.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/PropagateAndConvertNoAlias/inlined.ll @@ -0,0 +1,69 @@ +; RUN: opt < %s -convert-noalias -verify -S | FileCheck %s +; RUN: opt < %s -passes=convert-noalias,verify -S | FileCheck %s + +target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" + +; Function Attrs: nounwind +define dso_local void @test01(i32* %_pA) local_unnamed_addr #0 { +entry: + %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %1 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 + store i32 41, i32* %_pA, ptr_provenance i32* %1, align 4, !tbaa !9, !noalias !2 + %.guard = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %_pA, i32* %1) + %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) #5, !noalias !2 + %3 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %.guard, i8* %2, i32** null, i32** undef, i32 0, metadata !11) #3, !tbaa !5, !noalias !14 + store i32 42, i32* %.guard, ptr_provenance i32* %3, align 4, !tbaa !9, !noalias !14 + %.guard.guard.guard.guard.i = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %.guard, i32* %3) #1 + store i32 43, i32* %.guard.guard.guard.guard.i, ptr_provenance i32* undef, align 4, !tbaa !9, !noalias !2 + ret void +} + +; CHECK-LABEL: @test01( +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) +; CHECK-NEXT: %1 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 +; CHECK-NEXT: store i32 41, i32* %_pA, ptr_provenance i32* %1, align 4, !tbaa !9, !noalias !2 +; CHECK-NEXT: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) #{{[0-9]+}}, !noalias !2 +; CHECK-NEXT: %3 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %1, i8* %2, i32** null, i32** undef, i32 0, metadata !11) #{{[0-9]+}}, !tbaa !5, !noalias !14 +; CHECK-NEXT: store i32 42, i32* %_pA, ptr_provenance i32* %3, align 4, !tbaa !9, !noalias !14 +; CHECK-NEXT: store i32 43, i32* %_pA, ptr_provenance i32* %3, align 4, !tbaa !9, !noalias !2 +; CHECK-NEXT: ret void +; CHECK-NEXT: } + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) #1 + +; Function Attrs: argmemonly nounwind speculatable +declare i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32*, i8*, i32**, i32, metadata) #2 + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32*, i8*, i32**, i32**, i32, metadata) #3 + +; Function Attrs: nounwind readnone +declare i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32*, i32*) #4 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { argmemonly nounwind speculatable } +attributes #3 = { nounwind readnone speculatable } +attributes #4 = { nounwind readnone } +attributes #5 = { nounwind } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3} +!3 = distinct !{!3, !4, !"test01: p1"} +!4 = distinct !{!4, !"test01"} +!5 = !{!6, !6, i64 0, i64 4} +!6 = !{!7, i64 4, !"any pointer"} +!7 = !{!8, i64 1, !"omnipotent char"} +!8 = !{!"Simple C/C++ TBAA"} +!9 = !{!10, !10, i64 0, i64 4} +!10 = !{!7, i64 4, !"int"} +!11 = !{!12} +!12 = distinct !{!12, !13, !"passP: pA"} +!13 = distinct !{!13, !"passP"} +!14 = !{!12, !3} Index: llvm/test/Transforms/PropagateAndConvertNoAlias/noalias_cleanup.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/PropagateAndConvertNoAlias/noalias_cleanup.ll @@ -0,0 +1,47 @@ +; RUN: opt < %s -convert-noalias -verify -S | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define dso_local void @foo() local_unnamed_addr #0 { +entry: + br label %for.cond + +for.cond: ; preds = %if.then, %entry + %prov.bar.0 = phi i32* [ undef, %entry ], [ %prov.bar.0, %if.then ] + br i1 undef, label %for.cond3thread-pre-split, label %if.then + +if.then: ; preds = %for.cond + %0 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %prov.bar.0, i8* undef, i32** null, i32** undef, i64 0, metadata !1) + br i1 undef, label %for.cond, label %for.body + +for.body: ; preds = %for.body, %if.then + %prov.bar.116 = phi i32* [ %1, %for.body ], [ %prov.bar.0, %if.then ] + %1 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %prov.bar.116, i8* undef, i32** null, i32** undef, i64 0, metadata !1) + br label %for.body + +for.cond3thread-pre-split: ; preds = %for.cond + br label %for.body5 + +for.body5: ; preds = %for.body5, %for.cond3thread-pre-split + %prov.bar.220 = phi i32* [ %2, %for.body5 ], [ %prov.bar.0, %for.cond3thread-pre-split ] + %2 = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %prov.bar.220, i8* undef, i32** null, i32** undef, i64 0, metadata !1) + br label %for.body5 +} + +; CHECK-LABEL: @foo +; CHECK: call i32* @llvm.provenance.noalias +; CHECK-NOT: call i32* @llvm.provenance.noalias + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32*, i8*, i32**, i32**, i64, metadata) #1 + +attributes #0 = { "use-soft-float"="false" } +attributes #1 = { nounwind readnone speculatable } + +!llvm.ident = !{!0} + +!0 = !{!"clang)"} +!1 = !{!2} +!2 = distinct !{!2, !3, !"foo: bar"} +!3 = distinct !{!3, !"foo"} Index: llvm/test/Transforms/PropagateAndConvertNoAlias/reduced01.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/PropagateAndConvertNoAlias/reduced01.ll @@ -0,0 +1,74 @@ +; RUN: opt < %s -convert-noalias -verify -S | FileCheck %s +; RUN: opt < %s -passes=convert-noalias,verify -S | FileCheck %s +; RUN: opt < %s -convert-noalias -verify -convert-noalias -verify -S | FileCheck %s + +target datalayout = "e-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f16:16:16-f32:32:32-f64:32:32-p:32:32:32:32:8-s0:32:32-a0:0:32-S32-n16:32-v128:32:32-P0-p0:32:32:32:32:8" + +%struct.a = type { i8 } + +; Function Attrs: noreturn +define dso_local void @_Z3fooPii(i32* %_f, i32 %g) local_unnamed_addr #0 { +entry: + %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %1 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_f, i8* %0, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 + %2 = bitcast i32* %1 to %struct.a* + %3 = bitcast i32* %_f to %struct.a* + br label %for.cond + +for.cond: ; preds = %for.cond, %entry + %prov.h.0 = phi %struct.a* [ %2, %entry ], [ %h.0.guard, %for.cond ] + %h.0 = phi %struct.a* [ %3, %entry ], [ %h.0.guard, %for.cond ] + %h.0.guard = call %struct.a* @llvm.noalias.arg.guard.p0s_struct.as.p0s_struct.as(%struct.a* %h.0, %struct.a* %prov.h.0) + %4 = getelementptr inbounds %struct.a, %struct.a* %h.0, i32 0, i32 0 + %.unpack = load i8, i8* %4, ptr_provenance %struct.a* %prov.h.0, align 1, !noalias !2 + %5 = insertvalue %struct.a undef, i8 %.unpack, 0 + %call = call i32 @_Z1b1a(%struct.a %5), !noalias !2 + br label %for.cond +} +; CHECK: define dso_local void @_Z3fooPii(i32* %_f, i32 %g) local_unnamed_addr #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) +; CHECK-NEXT: %1 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_f, i8* %0, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 +; CHECK-NEXT: %2 = bitcast i32* %1 to %struct.a* +; CHECK-NEXT: %3 = bitcast i32* %_f to %struct.a* +; CHECK-NEXT: br label %for.cond +; CHECK: for.cond: ; preds = %for.cond, %entry +; CHECK-NEXT: %prov.h.0 = phi %struct.a* [ %2, %entry ], [ %prov.h.0, %for.cond ] +; CHECK-NEXT: %prov.h.01 = phi %struct.a* [ %3, %entry ], [ %prov.h.0, %for.cond ] +; CHECK-NEXT: %h.0 = phi %struct.a* [ %3, %entry ], [ %h.0, %for.cond ] +; CHECK-NEXT: %4 = getelementptr inbounds %struct.a, %struct.a* %h.0, i32 0, i32 0 +; CHECK-NEXT: %.unpack = load i8, i8* %4, ptr_provenance %struct.a* %prov.h.0, align 1, !noalias !2 +; CHECK-NEXT: %5 = insertvalue %struct.a undef, i8 %.unpack, 0 +; CHECK-NEXT: %call = call i32 @_Z1b1a(%struct.a %5), !noalias !2 +; CHECK-NEXT: br label %for.cond + + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) #1 + +declare dso_local i32 @_Z1b1a(%struct.a) local_unnamed_addr #2 + +; Function Attrs: nounwind readnone +declare %struct.a* @llvm.noalias.arg.guard.p0s_struct.as.p0s_struct.as(%struct.a*, %struct.a*) #3 + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32*, i8*, i32**, i32**, i32, metadata) #4 + +attributes #0 = { noreturn "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind readnone } +attributes #4 = { nounwind readnone speculatable } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 7.0.0 "} +!2 = !{!3} +!3 = distinct !{!3, !4, !"_Z3fooPii: f"} +!4 = distinct !{!4, !"_Z3fooPii"} +!5 = !{!6, !6, i64 0} +!6 = !{!"any pointer", !7, i64 0} +!7 = !{!"omnipotent char", !8, i64 0} +!8 = !{!"Simple C++ TBAA"} Index: llvm/test/Transforms/PropagateAndConvertNoAlias/select_and_phi.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/PropagateAndConvertNoAlias/select_and_phi.ll @@ -0,0 +1,208 @@ +; RUN: opt < %s -convert-noalias -verify -S | FileCheck %s +; RUN: opt < %s -passes=convert-noalias,verify -S | FileCheck %s +; RUN: opt < %s -convert-noalias -verify -convert-noalias -verify -S | FileCheck %s + +; Function Attrs: nounwind +define dso_local void @test_phi01(i32* %_pA, i32** %_pB, i32 %n) #0 { +entry: + %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !5) + %arrayidx = getelementptr inbounds i32*, i32** %_pB, i32 2 + %2 = load i32*, i32** %arrayidx, align 4, !tbaa !7, !noalias !11 + %3 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32 0, metadata !2), !tbaa !7, !noalias !11 + %4 = load i32*, i32** %_pB, align 4, !tbaa !7, !noalias !11 + %arrayidx2 = getelementptr inbounds i32*, i32** %_pB, i32 1 + %5 = load i32*, i32** %arrayidx2, align 4, !tbaa !7, !noalias !11 + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %pTmp00.0 = phi i32* [ %4, %entry ], [ %pTmp01.0, %for.body ] + %pTmp01.0 = phi i32* [ %5, %entry ], [ %3, %for.body ] + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %cmp = icmp slt i32 %i.0, %n + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + %6 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32 0, metadata !2), !tbaa !7, !noalias !11 + store i32 99, i32* %6, align 4, !tbaa !12, !noalias !11 + store i32 42, i32* %pTmp00.0, align 4, !tbaa !12, !noalias !11 + %cmp5 = icmp sgt i32 %n, 5 + %7 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %2, i8* %1, i32** null, i32 0, metadata !5) + %cond = select i1 %cmp5, i32* %pTmp00.0, i32* %7 + store i32 43, i32* %cond, align 4, !tbaa !12, !noalias !11 + ret void + +for.body: ; preds = %for.cond + %arrayidx3 = getelementptr inbounds i32, i32* %pTmp01.0, i32 1 + %8 = load i32, i32* %arrayidx3, align 4, !tbaa !12, !noalias !11 + %arrayidx4 = getelementptr inbounds i32, i32* %pTmp00.0, i32 1 + store i32 %8, i32* %arrayidx4, align 4, !tbaa !12, !noalias !11 + %inc = add nsw i32 %i.0, 1 + br label %for.cond +} + +; CHECK-LABEL: @test_phi01( +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) +; CHECK-NEXT: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !5) +; CHECK-NEXT: %arrayidx = getelementptr inbounds i32*, i32** %_pB, i32 2 +; CHECK-NEXT: %2 = load i32*, i32** %arrayidx, align 4, !tbaa !7, !noalias !11 +; CHECK-NEXT: %3 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !2), !tbaa !7, !noalias !11 +; CHECK-NEXT: %4 = load i32*, i32** %_pB, align 4, !tbaa !7, !noalias !11 +; CHECK-NEXT: %arrayidx2 = getelementptr inbounds i32*, i32** %_pB, i32 1 +; CHECK-NEXT: %5 = load i32*, i32** %arrayidx2, align 4, !tbaa !7, !noalias !11 +; CHECK-NEXT: br label %for.cond +; CHECK: for.cond: +; CHECK-NEXT: %prov.pTmp00.0 = phi i32* [ %4, %entry ], [ %prov.pTmp01.0, %for.body ] +; CHECK-NEXT: %pTmp00.0 = phi i32* [ %4, %entry ], [ %pTmp01.0, %for.body ] +; CHECK-NEXT: %prov.pTmp01.0 = phi i32* [ %5, %entry ], [ %3, %for.body ] +; CHECK-NEXT: %pTmp01.0 = phi i32* [ %5, %entry ], [ %_pA, %for.body ] +; CHECK-NEXT: %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] +; CHECK-NEXT: %cmp = icmp slt i32 %i.0, %n +; CHECK-NEXT: br i1 %cmp, label %for.body, label %for.cond.cleanup +; CHECK: for.cond.cleanup: +; CHECK-NEXT: store i32 99, i32* %_pA, ptr_provenance i32* %3, align 4, !tbaa !12, !noalias !11 +; CHECK-NEXT: store i32 42, i32* %pTmp00.0, ptr_provenance i32* %prov.pTmp00.0, align 4, !tbaa !12, !noalias !11 +; CHECK-NEXT: %cmp5 = icmp sgt i32 %n, 5 +; CHECK-NEXT: %6 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %2, i8* %1, i32** null, i32** undef, i32 0, metadata !5) +; CHECK-NEXT: %prov.cond = select i1 %cmp5, i32* %prov.pTmp00.0, i32* %6 +; CHECK-NEXT: %cond = select i1 %cmp5, i32* %pTmp00.0, i32* %2 +; CHECK-NEXT: store i32 43, i32* %cond, ptr_provenance i32* %prov.cond, align 4, !tbaa !12, !noalias !11 +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: %arrayidx3 = getelementptr inbounds i32, i32* %pTmp01.0, i32 1 +; CHECK-NEXT: %7 = load i32, i32* %arrayidx3, ptr_provenance i32* %prov.pTmp01.0, align 4, !tbaa !12, !noalias !11 +; CHECK-NEXT: %arrayidx4 = getelementptr inbounds i32, i32* %pTmp00.0, i32 1 +; CHECK-NEXT: store i32 %7, i32* %arrayidx4, ptr_provenance i32* %prov.pTmp00.0, align 4, !tbaa !12, !noalias !11 +; CHECK-NEXT: %inc = add nsw i32 %i.0, 1 +; CHECK-NEXT: br label %for.cond +; CHECK-NEXT: } + + +; Function Attrs: nounwind +define dso_local void @test_phi02(i32* %_pA, i32** %_pB, i32 %n) #0 { +entry: + %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !14) + %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !17) + %arrayidx = getelementptr inbounds i32*, i32** %_pB, i32 2 + %2 = load i32*, i32** %arrayidx, align 4, !tbaa !7, !noalias !19 + %3 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32 0, metadata !14), !tbaa !7, !noalias !19 + %4 = load i32*, i32** %_pB, align 4, !tbaa !7, !noalias !19 + %arrayidx2 = getelementptr inbounds i32*, i32** %_pB, i32 1 + %5 = load i32*, i32** %arrayidx2, align 4, !tbaa !7, !noalias !19 + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %pTmp00.0 = phi i32* [ %4, %entry ], [ %pTmp01.0, %for.body ] + %pTmp01.0 = phi i32* [ %5, %entry ], [ %3, %for.body ] + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %cmp = icmp slt i32 %i.0, %n + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + %6 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32 0, metadata !14), !tbaa !7, !noalias !19 + %7 = bitcast i32* %6 to i16* + store i16 99, i16* %7, align 2, !tbaa !20, !noalias !19 + %8 = bitcast i32* %pTmp00.0 to i16* + store i16 42, i16* %8, align 2, !tbaa !20, !noalias !19 + %cmp5 = icmp sgt i32 %n, 5 + %9 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %2, i8* %1, i32** null, i32 0, metadata !17) + %cond = select i1 %cmp5, i32* %pTmp00.0, i32* %9 + %10 = bitcast i32* %cond to i16* + store i16 43, i16* %10, align 2, !tbaa !20, !noalias !19 + ret void + +for.body: ; preds = %for.cond + %11 = bitcast i32* %pTmp01.0 to i16* + %arrayidx3 = getelementptr inbounds i16, i16* %11, i32 1 + %12 = load i16, i16* %arrayidx3, align 2, !tbaa !20, !noalias !19 + %13 = bitcast i32* %pTmp00.0 to i16* + %arrayidx4 = getelementptr inbounds i16, i16* %13, i32 1 + store i16 %12, i16* %arrayidx4, align 2, !tbaa !20, !noalias !19 + %inc = add nsw i32 %i.0, 1 + br label %for.cond +} + +; CHECK-LABEL: @test_phi02( +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !14) +; CHECK-NEXT: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !17) +; CHECK-NEXT: %arrayidx = getelementptr inbounds i32*, i32** %_pB, i32 2 +; CHECK-NEXT: %2 = load i32*, i32** %arrayidx, align 4, !tbaa !7, !noalias !19 +; CHECK-NEXT: %3 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !14), !tbaa !7, !noalias !19 +; CHECK-NEXT: %4 = load i32*, i32** %_pB, align 4, !tbaa !7, !noalias !19 +; CHECK-NEXT: %arrayidx2 = getelementptr inbounds i32*, i32** %_pB, i32 1 +; CHECK-NEXT: %5 = load i32*, i32** %arrayidx2, align 4, !tbaa !7, !noalias !19 +; CHECK-NEXT: br label %for.cond +; CHECK: for.cond: +; CHECK-NEXT: %prov.pTmp00.0 = phi i32* [ %4, %entry ], [ %prov.pTmp01.0, %for.body ] +; CHECK-NEXT: %pTmp00.0 = phi i32* [ %4, %entry ], [ %pTmp01.0, %for.body ] +; CHECK-NEXT: %prov.pTmp01.0 = phi i32* [ %5, %entry ], [ %3, %for.body ] +; CHECK-NEXT: %pTmp01.0 = phi i32* [ %5, %entry ], [ %_pA, %for.body ] +; CHECK-NEXT: %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] +; CHECK-NEXT: %6 = bitcast i32* %prov.pTmp00.0 to i16* +; CHECK-NEXT: %7 = bitcast i32* %prov.pTmp01.0 to i16* +; CHECK-NEXT: %cmp = icmp slt i32 %i.0, %n +; CHECK-NEXT: br i1 %cmp, label %for.body, label %for.cond.cleanup +; CHECK: for.cond.cleanup: +; CHECK-NEXT: %8 = bitcast i32* %3 to i16* +; CHECK-NEXT: %9 = bitcast i32* %_pA to i16* +; CHECK-NEXT: store i16 99, i16* %9, ptr_provenance i16* %8, align 2, !tbaa !20, !noalias !19 +; CHECK-NEXT: %10 = bitcast i32* %pTmp00.0 to i16* +; CHECK-NEXT: store i16 42, i16* %10, ptr_provenance i16* %6, align 2, !tbaa !20, !noalias !19 +; CHECK-NEXT: %cmp5 = icmp sgt i32 %n, 5 +; CHECK-NEXT: %11 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %2, i8* %1, i32** null, i32** undef, i32 0, metadata !17) +; CHECK-NEXT: %prov.cond = select i1 %cmp5, i32* %prov.pTmp00.0, i32* %11 +; CHECK-NEXT: %12 = bitcast i32* %prov.cond to i16* +; CHECK-NEXT: %cond = select i1 %cmp5, i32* %pTmp00.0, i32* %2 +; CHECK-NEXT: %13 = bitcast i32* %cond to i16* +; CHECK-NEXT: store i16 43, i16* %13, ptr_provenance i16* %12, align 2, !tbaa !20, !noalias !19 +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: %14 = bitcast i32* %pTmp01.0 to i16* +; CHECK-NEXT: %arrayidx3 = getelementptr inbounds i16, i16* %14, i32 1 +; CHECK-NEXT: %15 = load i16, i16* %arrayidx3, ptr_provenance i16* %7, align 2, !tbaa !20, !noalias !19 +; CHECK-NEXT: %16 = bitcast i32* %pTmp00.0 to i16* +; CHECK-NEXT: %arrayidx4 = getelementptr inbounds i16, i16* %16, i32 1 +; CHECK-NEXT: store i16 %15, i16* %arrayidx4, ptr_provenance i16* %6, align 2, !tbaa !20, !noalias !19 +; CHECK-NEXT: %inc = add nsw i32 %i.0, 1 +; CHECK-NEXT: br label %for.cond +; CHECK-NEXT: } + + + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) #1 + +; Function Attrs: argmemonly nounwind speculatable +declare i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32*, i8*, i32**, i32, metadata) #2 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { argmemonly nounwind speculatable } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 10.0.0 (sgasip@krachtcs10:/u/sgasip/ipd/repositories/llvm_ipd 6068136b7c40a5bb7a7291dd621185a209673e09)"} +!2 = !{!3} +!3 = distinct !{!3, !4, !"test_phi01: rpTmp"} +!4 = distinct !{!4, !"test_phi01"} +!5 = !{!6} +!6 = distinct !{!6, !4, !"test_phi01: rp2"} +!7 = !{!8, !8, i64 0, i64 4} +!8 = !{!9, i64 4, !"any pointer"} +!9 = !{!10, i64 1, !"omnipotent char"} +!10 = !{!"Simple C/C++ TBAA"} +!11 = !{!3, !6} +!12 = !{!13, !13, i64 0, i64 4} +!13 = !{!9, i64 4, !"int"} +!14 = !{!15} +!15 = distinct !{!15, !16, !"test_phi02: rpTmp"} +!16 = distinct !{!16, !"test_phi02"} +!17 = !{!18} +!18 = distinct !{!18, !16, !"test_phi02: rp2"} +!19 = !{!15, !18} +!20 = !{!21, !21, i64 0, i64 2} +!21 = !{!9, i64 2, !"short"} Index: llvm/test/Transforms/PropagateAndConvertNoAlias/struct.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/PropagateAndConvertNoAlias/struct.ll @@ -0,0 +1,117 @@ +; RUN: opt < %s -convert-noalias -verify -S | FileCheck %s +; RUN: opt < %s -passes=convert-noalias,verify -S | FileCheck %s +target datalayout = "e-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f16:16:16-f32:32:32-f64:32:32-p:32:32:32:32:8-s0:32:32-a0:0:32-S32-n16:32-v128:32:32-P0-p0:32:32:32:32:8" + +%struct.FOO = type { i32*, i32*, i32* } + +; Function Attrs: nounwind +define dso_local void @test_rFOO(%struct.FOO* %_pFOO, i32* %_pA) #0 !noalias !2 { +entry: + %tmp = alloca %struct.FOO, align 4 + %0 = call i8* @llvm.noalias.decl.p0i8.p0p0s_struct.FOOs.i32(%struct.FOO** null, i32 0, metadata !5) + %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !7) + %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 4, metadata !7) + %3 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 8, metadata !7) + %4 = call %struct.FOO* @llvm.noalias.p0s_struct.FOOs.p0i8.p0p0s_struct.FOOs.i32(%struct.FOO* %_pFOO, i8* %0, %struct.FOO** null, i32 0, metadata !5), !tbaa !9, !noalias !13 + %5 = call %struct.FOO* @llvm.noalias.copy.guard.p0s_struct.FOOs.p0i8(%struct.FOO* %4, i8* null, metadata !14, metadata !2) + %6 = load %struct.FOO, %struct.FOO* %5, align 4, !noalias !13 + %.fca.0.extract = extractvalue %struct.FOO %6, 0 + %.fca.1.extract = extractvalue %struct.FOO %6, 1 + %.fca.2.extract = extractvalue %struct.FOO %6, 2 + %7 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %.fca.0.extract, i8* %1, i32** null, i32 0, metadata !7), !tbaa !18, !noalias !13 + %8 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %.fca.1.extract, i8* %2, i32** null, i32 4, metadata !7), !tbaa !20, !noalias !13 + %9 = load i32, i32* %8, align 4, !tbaa !21, !noalias !13 + store i32 %9, i32* %7, align 4, !tbaa !21, !noalias !13 + %10 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %1, i32** null, i32 0, metadata !7), !tbaa !18, !noalias !13 + store i32 42, i32* %10, align 4, !tbaa !21, !noalias !13 + %.fca.0.load.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %1, i32** null, i32 0, metadata !7) + %.fca.0.insert = insertvalue %struct.FOO undef, i32* %.fca.0.load.noalias, 0 + %.fca.1.load.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %.fca.1.extract, i8* %2, i32** null, i32 4, metadata !7) + %.fca.1.insert = insertvalue %struct.FOO %.fca.0.insert, i32* %.fca.1.load.noalias, 1 + %.fca.2.load.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %.fca.2.extract, i8* %3, i32** null, i32 8, metadata !7) + %.fca.2.insert = insertvalue %struct.FOO %.fca.1.insert, i32* %.fca.2.load.noalias, 2 + call void @fum(%struct.FOO %.fca.2.insert), !noalias !13 + ret void +} + +; CHECK-LABEL: @test_rFOO( +; CHECK-NEXT: entry: +; CHECK-NEXT: %tmp = alloca %struct.FOO, align 4 +; CHECK-NEXT: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0s_struct.FOOs.i32(%struct.FOO** null, i32 0, metadata !5) +; CHECK-NEXT: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !7) +; CHECK-NEXT: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 4, metadata !7) +; CHECK-NEXT: %3 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 8, metadata !7) +; CHECK-NEXT: %4 = call %struct.FOO* @llvm.provenance.noalias.p0s_struct.FOOs.p0i8.p0p0s_struct.FOOs.p0p0s_struct.FOOs.i32(%struct.FOO* %_pFOO, i8* %0, %struct.FOO** null, %struct.FOO** undef, i32 0, metadata !5), !tbaa !9, !noalias !13 +; CHECK-NEXT: %.guard = call %struct.FOO* @llvm.noalias.arg.guard.p0s_struct.FOOs.p0s_struct.FOOs(%struct.FOO* %_pFOO, %struct.FOO* %4) +; CHECK-NEXT: %5 = call %struct.FOO* @llvm.noalias.copy.guard.p0s_struct.FOOs.p0i8(%struct.FOO* %.guard, i8* null, metadata !14, metadata !2) +; CHECK-NEXT: %6 = load %struct.FOO, %struct.FOO* %5, align 4, !noalias !13 +; CHECK-NEXT: %.fca.0.extract = extractvalue %struct.FOO %6, 0 +; CHECK-NEXT: %.fca.1.extract = extractvalue %struct.FOO %6, 1 +; CHECK-NEXT: %.fca.2.extract = extractvalue %struct.FOO %6, 2 +; CHECK-NEXT: %7 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %.fca.0.extract, i8* %1, i32** null, i32** undef, i32 0, metadata !7), !tbaa !18, !noalias !13 +; CHECK-NEXT: %8 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %.fca.1.extract, i8* %2, i32** null, i32** undef, i32 4, metadata !7), !tbaa !20, !noalias !13 +; CHECK-NEXT: %9 = load i32, i32* %.fca.1.extract, ptr_provenance i32* %8, align 4, !tbaa !21, !noalias !13 +; CHECK-NEXT: store i32 %9, i32* %.fca.0.extract, ptr_provenance i32* %7, align 4, !tbaa !21, !noalias !13 +; CHECK-NEXT: %10 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %1, i32** null, i32** undef, i32 0, metadata !7), !tbaa !18, !noalias !13 +; CHECK-NEXT: store i32 42, i32* %_pA, ptr_provenance i32* %10, align 4, !tbaa !21, !noalias !13 +; CHECK-NEXT: %.fca.0.load.noalias.guard = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %_pA, i32* %10) +; CHECK-NEXT: %.fca.0.insert = insertvalue %struct.FOO undef, i32* %.fca.0.load.noalias.guard, 0 +; CHECK-NEXT: %.fca.1.load.noalias.guard = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %.fca.1.extract, i32* %8) +; CHECK-NEXT: %.fca.1.insert = insertvalue %struct.FOO %.fca.0.insert, i32* %.fca.1.load.noalias.guard, 1 +; CHECK-NEXT: %11 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %.fca.2.extract, i8* %3, i32** null, i32** undef, i32 8, metadata !7) +; CHECK-NEXT: %.fca.2.load.noalias.guard = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %.fca.2.extract, i32* %11) +; CHECK-NEXT: %.fca.2.insert = insertvalue %struct.FOO %.fca.1.insert, i32* %.fca.2.load.noalias.guard, 2 +; CHECK-NEXT: call void @fum(%struct.FOO %.fca.2.insert), !noalias !13 +; CHECK-NEXT: ret void +; CHECK-NEXT: } + + +; Function Attrs: argmemonly nounwind speculatable +declare %struct.FOO* @llvm.noalias.p0s_struct.FOOs.p0i8.p0p0s_struct.FOOs.i32(%struct.FOO*, i8*, %struct.FOO**, i32, metadata) #1 + +; Function Attrs: nounwind readnone +declare %struct.FOO* @llvm.noalias.copy.guard.p0s_struct.FOOs.p0i8(%struct.FOO*, i8*, metadata, metadata) #2 + +; Function Attrs: argmemonly nounwind speculatable +declare i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32*, i8*, i32**, i32, metadata) #1 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) #3 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0s_struct.FOOs.i32(%struct.FOO**, i32, metadata) #3 + +declare dso_local void @fum(%struct.FOO) #4 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind speculatable } +attributes #2 = { nounwind readnone } +attributes #3 = { argmemonly nounwind } +attributes #4 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3} +!3 = distinct !{!3, !4, !"test_rFOO: unknown scope"} +!4 = distinct !{!4, !"test_rFOO"} +!5 = !{!6} +!6 = distinct !{!6, !4, !"test_rFOO: rpFOO"} +!7 = !{!8} +!8 = distinct !{!8, !4, !"test_rFOO: tmp"} +!9 = !{!10, !10, i64 0, i64 4} +!10 = !{!11, i64 4, !"any pointer"} +!11 = !{!12, i64 1, !"omnipotent char"} +!12 = !{!"Simple C/C++ TBAA"} +!13 = !{!6, !8, !3} +!14 = !{!15, !16, !17} +!15 = !{i64 -1, i64 0} +!16 = !{i64 -1, i64 1} +!17 = !{i64 -1, i64 2} +!18 = !{!19, !10, i64 0, i64 4} +!19 = !{!11, i64 12, !"FOO", !10, i64 0, i64 4, !10, i64 4, i64 4, !10, i64 8, i64 4} +!20 = !{!19, !10, i64 4, i64 4} +!21 = !{!22, !22, i64 0, i64 4} +!22 = !{!11, i64 4, !"int"} Index: llvm/test/Transforms/SLPVectorizer/noalias.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SLPVectorizer/noalias.ll @@ -0,0 +1,55 @@ +; RUN: opt -S < %s -slp-vectorizer -slp-max-reg-size=128 -slp-min-reg-size=128 | FileCheck %s + +; SLP vectorization across a @llvm.provenance.noalias and provenance + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare void @llvm.sideeffect() #0 + +define void @test(float* %p) { +; CHECK-LABEL: @test( +; CHECK-NEXT: [[P1_DECL:%.*]] = tail call i8* @llvm.noalias.decl.p0i8.p0p0f32.i32(float** null, i32 0, metadata !0) +; CHECK-NEXT: [[P0:%.*]] = getelementptr float, float* [[P:%.*]], i64 0 +; CHECK-NEXT: [[P1:%.*]] = getelementptr float, float* [[P]], i64 1 +; CHECK-NEXT: [[PROVENANCE_P1:%.*]] = tail call float* @llvm.provenance.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float* [[P1]], i8* [[P1_DECL]], float** null, float** undef, i32 0, metadata !0), !noalias !0 +; CHECK-NEXT: [[P2:%.*]] = getelementptr float, float* [[P]], i64 2 +; CHECK-NEXT: [[P3:%.*]] = getelementptr float, float* [[P]], i64 3 +; CHECK-NEXT: call void @llvm.sideeffect() +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P0]] to <4 x float>* +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 +; CHECK-NEXT: call void @llvm.sideeffect() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[P0]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4 +; CHECK-NEXT: ret void +; + %p1.decl = tail call i8* @llvm.noalias.decl.p0i8.p0p0f32.i32(float** null, i32 0, metadata !0) + %p0 = getelementptr float, float* %p, i64 0 + %p1 = getelementptr float, float* %p, i64 1 + %prov.p1 = tail call float* @llvm.provenance.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float* %p1, i8* %p1.decl, float** null, float** undef, i32 0, metadata !0), !noalias !0 + %p2 = getelementptr float, float* %p, i64 2 + %p3 = getelementptr float, float* %p, i64 3 + %l0 = load float, float* %p0, !noalias !0 + %l1 = load float, float* %p1, ptr_provenance float* %prov.p1, !noalias !0 + %l2 = load float, float* %p2, !noalias !0 + call void @llvm.sideeffect() + %l3 = load float, float* %p3, !noalias !0 + store float %l0, float* %p0, !noalias !0 + call void @llvm.sideeffect() + store float %l1, float* %p1, ptr_provenance float* %prov.p1, !noalias !0 + store float %l2, float* %p2, !noalias !0 + store float %l3, float* %p3, !noalias !0 + ret void +} + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0f32.i32(float**, i32, metadata) #1 + +; Function Attrs: nounwind readnone speculatable +declare float* @llvm.provenance.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float*, i8*, float**, float**, i32, metadata) #2 + +attributes #0 = { inaccessiblememonly nounwind willreturn } +attributes #1 = { argmemonly nounwind } +attributes #2 = { nounwind readnone speculatable } + +!0 = !{!1} +!1 = distinct !{!1, !2, !"test_f: p"} +!2 = distinct !{!2, !"test_f"} Index: llvm/test/Transforms/SROA/noalias.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SROA/noalias.ll @@ -0,0 +1,302 @@ +; RUN: opt < %s -sroa -S | FileCheck %s +; RUN: opt < %s -passes=sroa -S | FileCheck %s + +target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" + + +%struct.FOO = type { i32*, i32*, i32* } + +; Function Attrs: nounwind +define dso_local void @test_ri(i32* %_p) #0 { +entry: + %_p.addr = alloca i32*, align 4 + %rp = alloca i32*, align 4 + store i32* %_p, i32** %_p.addr, align 4, !tbaa !2, !noalias !6 + store i32* undef, i32** %rp, align 4, !noalias !6 + %0 = bitcast i32** %rp to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* %0) #4, !noalias !6 + %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** %rp, i64 0, metadata !6) + %2 = load i32*, i32** %_p.addr, align 4, !tbaa !2, !noalias !6 + store i32* %2, i32** %rp, align 4, !tbaa !2, !noalias !6 + %3 = load i32*, i32** %rp, align 4, !tbaa !2, !noalias !6 + %4 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %3, i8* %1, i32** %rp, i64 0, metadata !6), !tbaa !2, !noalias !6 + store i32 42, i32* %4, align 4, !tbaa !9, !noalias !6 + %5 = bitcast i32** %rp to i8* + call void @llvm.lifetime.end.p0i8(i64 4, i8* %5) #4 + ret void +} + +; CHECK-LABEL: @test_ri( +; CHECK-NOT: alloca +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !2) +; CHECK: %1 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %_p, i8* %0, i32** null, i64 0, metadata !2), !tbaa !5, !noalias !2 + +; Function Attrs: nounwind +define dso_local void @test_ra(i32* %_p) #0 { +entry: + %_p.addr = alloca i32*, align 4 + %rp = alloca [3 x i32*], align 4 + store i32* %_p, i32** %_p.addr, align 4, !tbaa !2, !noalias !11 + store [3 x i32*] undef, [3 x i32*]* %rp, align 4, !noalias !11 + %0 = bitcast [3 x i32*]* %rp to i8* + call void @llvm.lifetime.start.p0i8(i64 12, i8* %0) #4, !noalias !11 + %1 = call i8* @llvm.noalias.decl.p0i8.p0a3p0i32.i64([3 x i32*]* %rp, i64 0, metadata !11) + %arrayinit.begin = getelementptr inbounds [3 x i32*], [3 x i32*]* %rp, i32 0, i32 0 + %2 = load i32*, i32** %_p.addr, align 4, !tbaa !2, !noalias !11 + store i32* %2, i32** %arrayinit.begin, align 4, !tbaa !2, !noalias !11 + %arrayinit.element = getelementptr inbounds i32*, i32** %arrayinit.begin, i32 1 + %3 = load i32*, i32** %_p.addr, align 4, !tbaa !2, !noalias !11 + %add.ptr = getelementptr inbounds i32, i32* %3, i32 1 + store i32* %add.ptr, i32** %arrayinit.element, align 4, !tbaa !2, !noalias !11 + %arrayinit.element1 = getelementptr inbounds i32*, i32** %arrayinit.element, i32 1 + %4 = load i32*, i32** %_p.addr, align 4, !tbaa !2, !noalias !11 + %add.ptr2 = getelementptr inbounds i32, i32* %4, i32 2 + store i32* %add.ptr2, i32** %arrayinit.element1, align 4, !tbaa !2, !noalias !11 + %arrayidx = getelementptr inbounds [3 x i32*], [3 x i32*]* %rp, i32 0, i32 0 + %5 = load i32*, i32** %arrayidx, align 4, !tbaa !2, !noalias !11 + %6 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %5, i8* %1, i32** %arrayidx, i64 0, metadata !11), !tbaa !2, !noalias !11 + store i32 42, i32* %6, align 4, !tbaa !9, !noalias !11 + %arrayidx3 = getelementptr inbounds [3 x i32*], [3 x i32*]* %rp, i32 0, i32 1 + %7 = load i32*, i32** %arrayidx3, align 4, !tbaa !2, !noalias !11 + %8 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %7, i8* %1, i32** %arrayidx3, i64 0, metadata !11), !tbaa !2, !noalias !11 + store i32 43, i32* %8, align 4, !tbaa !9, !noalias !11 + %arrayidx4 = getelementptr inbounds [3 x i32*], [3 x i32*]* %rp, i32 0, i32 2 + %9 = load i32*, i32** %arrayidx4, align 4, !tbaa !2, !noalias !11 + %10 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %9, i8* %1, i32** %arrayidx4, i64 0, metadata !11), !tbaa !2, !noalias !11 + store i32 44, i32* %10, align 4, !tbaa !9, !noalias !11 + %11 = bitcast [3 x i32*]* %rp to i8* + call void @llvm.lifetime.end.p0i8(i64 12, i8* %11) #4 + ret void +} + +; CHECK-LABEL: @test_ra( +; CHECK-NOT: alloca +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !11) +; CHECK: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 8, metadata !11) +; CHECK: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 16, metadata !11) +; CHECK: %3 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %_p, i8* %0, i32** null, i64 0, metadata !11), !tbaa !5, !noalias !11 +; CHECK: %4 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %add.ptr, i8* %1, i32** null, i64 8, metadata !11), !tbaa !5, !noalias !11 +; CHECK: %5 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %add.ptr2, i8* %2, i32** null, i64 16, metadata !11), !tbaa !5, !noalias !11 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0a3p0i32.i64([3 x i32*]*, i64, metadata) #1 + +; Function Attrs: nounwind +define dso_local void @test_rs(i32* %_p) #0 { +entry: + %_p.addr = alloca i32*, align 4 + %foo = alloca %struct.FOO, align 4 + store i32* %_p, i32** %_p.addr, align 4, !tbaa !2, !noalias !14 + store %struct.FOO undef, %struct.FOO* %foo, align 4, !noalias !14 + %0 = bitcast %struct.FOO* %foo to i8* + call void @llvm.lifetime.start.p0i8(i64 12, i8* %0) #4, !noalias !14 + %1 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FOOs.i64(%struct.FOO* %foo, i64 0, metadata !14) + %mP0 = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 0 + %2 = load i32*, i32** %_p.addr, align 4, !tbaa !2, !noalias !14 + store i32* %2, i32** %mP0, align 4, !tbaa !17, !noalias !14 + %mP1 = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 1 + %3 = load i32*, i32** %_p.addr, align 4, !tbaa !2, !noalias !14 + %add.ptr = getelementptr inbounds i32, i32* %3, i32 1 + store i32* %add.ptr, i32** %mP1, align 4, !tbaa !19, !noalias !14 + %mP2 = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 2 + %4 = load i32*, i32** %_p.addr, align 4, !tbaa !2, !noalias !14 + %add.ptr1 = getelementptr inbounds i32, i32* %4, i32 2 + store i32* %add.ptr1, i32** %mP2, align 4, !tbaa !20, !noalias !14 + %mP02 = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 0 + %5 = load i32*, i32** %mP02, align 4, !tbaa !17, !noalias !14 + %6 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %5, i8* %1, i32** %mP02, i64 0, metadata !14), !tbaa !17, !noalias !14 + store i32 42, i32* %6, align 4, !tbaa !9, !noalias !14 + %mP13 = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 1 + %7 = load i32*, i32** %mP13, align 4, !tbaa !19, !noalias !14 + %8 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %7, i8* %1, i32** %mP13, i64 0, metadata !14), !tbaa !19, !noalias !14 + store i32 43, i32* %8, align 4, !tbaa !9, !noalias !14 + %mP24 = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 2 + %9 = load i32*, i32** %mP24, align 4, !tbaa !20, !noalias !14 + %10 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %9, i8* %1, i32** %mP24, i64 0, metadata !14), !tbaa !20, !noalias !14 + store i32 44, i32* %10, align 4, !tbaa !9, !noalias !14 + %11 = bitcast %struct.FOO* %foo to i8* + call void @llvm.lifetime.end.p0i8(i64 12, i8* %11) #4 + ret void +} + +; CHECK-LABEL: @test_rs( +; CHECK-NOT: alloca +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !14) +; CHECK: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 8, metadata !14) +; CHECK: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 16, metadata !14) +; CHECK: %3 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %_p, i8* %0, i32** null, i64 0, metadata !14), !tbaa !17, !noalias !14 +; CHECK: %4 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %add.ptr, i8* %1, i32** null, i64 8, metadata !14), !tbaa !19, !noalias !14 +; CHECK: %5 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %add.ptr1, i8* %2, i32** null, i64 16, metadata !14), !tbaa !20, !noalias !14 + +; Function Attrs: argmemonly nounwind speculatable +define dso_local void @test_ri_inlined(i32* %_p) local_unnamed_addr #2 !noalias !21 { +entry: + %rp = alloca i32*, align 4 + %0 = bitcast i32** %rp to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #4, !noalias !24 + %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** nonnull %rp, i64 0, metadata !27), !noalias !21 + store i32* %_p, i32** %rp, ptr_provenance i32** undef, align 4, !noalias !24 + %2 = load i32*, i32** %rp, ptr_provenance i32** undef, align 4, !noalias !28 + %3 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %2, i8* %1, i32** %rp, i32** undef, i64 0, metadata !27) #4, !noalias !28 + store i32 42, i32* %2, ptr_provenance i32* %3, align 4, !noalias !28 + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #4, !noalias !21 + ret void +} + +; CHECK-LABEL: @test_ri_inlined( +; CHECK-NOT: alloca +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !24) +; CHECK: %1 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %_p, i8* %0, i32** null, i32** undef, i64 0, metadata !24){{.*}}, !noalias !27 + +; Function Attrs: argmemonly nounwind speculatable +define dso_local void @test_ra_inlined(i32* %_p) local_unnamed_addr #2 !noalias !29 { +entry: + %rp = alloca [3 x i32*], align 4 + %.fca.0.gep = getelementptr inbounds [3 x i32*], [3 x i32*]* %rp, i32 0, i32 0 + %0 = bitcast [3 x i32*]* %rp to i8* + call void @llvm.lifetime.start.p0i8(i64 12, i8* nonnull %0) #4, !noalias !32 + %1 = call i8* @llvm.noalias.decl.p0i8.p0a3p0i32.i64([3 x i32*]* nonnull %rp, i64 0, metadata !35), !noalias !29 + store i32* %_p, i32** %.fca.0.gep, ptr_provenance i32** undef, align 4, !noalias !32 + %arrayinit.element = getelementptr inbounds [3 x i32*], [3 x i32*]* %rp, i32 0, i32 1 + %add.ptr = getelementptr inbounds i32, i32* %_p, i32 1 + store i32* %add.ptr, i32** %arrayinit.element, ptr_provenance i32** undef, align 4, !noalias !32 + %arrayinit.element1 = getelementptr inbounds [3 x i32*], [3 x i32*]* %rp, i32 0, i32 2 + %add.ptr2 = getelementptr inbounds i32, i32* %_p, i32 2 + store i32* %add.ptr2, i32** %arrayinit.element1, ptr_provenance i32** undef, align 4, !noalias !32 + %2 = load i32*, i32** %.fca.0.gep, ptr_provenance i32** undef, align 4, !noalias !36 + %3 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %2, i8* %1, i32** %.fca.0.gep, i32** undef, i64 0, metadata !35) #4, !noalias !36 + store i32 42, i32* %2, ptr_provenance i32* %3, align 4, !noalias !36 + %arrayidx1.i = getelementptr inbounds i32*, i32** %.fca.0.gep, i32 1 + %4 = load i32*, i32** %arrayidx1.i, ptr_provenance i32** undef, align 4, !noalias !36 + %5 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %4, i8* %1, i32** nonnull %arrayidx1.i, i32** undef, i64 0, metadata !35) #4, !noalias !36 + store i32 43, i32* %4, ptr_provenance i32* %5, align 4, !noalias !36 + %arrayidx2.i = getelementptr inbounds i32*, i32** %.fca.0.gep, i32 2 + %6 = load i32*, i32** %arrayidx2.i, ptr_provenance i32** undef, align 4, !noalias !36 + %7 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %6, i8* %1, i32** nonnull %arrayidx2.i, i32** undef, i64 0, metadata !35) #4, !noalias !36 + store i32 44, i32* %6, ptr_provenance i32* %7, align 4, !noalias !36 + call void @llvm.lifetime.end.p0i8(i64 12, i8* nonnull %0) #4, !noalias !29 + ret void +} + +; CHECK-LABEL: @test_ra_inlined( +; CHECK-NOT: alloca +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !31) +; CHECK: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 8, metadata !31) +; CHECK: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 16, metadata !31) +; CHECK: %3 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %_p, i8* %0, i32** null, i32** undef, i64 0, metadata !31){{.*}}, !noalias !34 +; CHECK: %4 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %add.ptr, i8* %1, i32** nonnull null, i32** undef, i64 8, metadata !31){{.*}}, !noalias !34 +; CHECK: %5 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %add.ptr2, i8* %2, i32** nonnull null, i32** undef, i64 16, metadata !31){{.*}}, !noalias !34 + + +; Function Attrs: argmemonly nounwind speculatable +define dso_local void @test_rs_inlined(i32* %_p) local_unnamed_addr #2 !noalias !37 { +entry: + %foo = alloca %struct.FOO, align 4 + %.fca.0.gep = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 0 + %.fca.1.gep = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 1 + %.fca.2.gep = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 2 + %0 = bitcast %struct.FOO* %foo to i8* + call void @llvm.lifetime.start.p0i8(i64 12, i8* nonnull %0) #4, !noalias !40 + %1 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FOOs.i64(%struct.FOO* nonnull %foo, i64 0, metadata !43), !noalias !37 + store i32* %_p, i32** %.fca.0.gep, ptr_provenance i32** undef, align 4, !noalias !40 + %add.ptr = getelementptr inbounds i32, i32* %_p, i32 1 + store i32* %add.ptr, i32** %.fca.1.gep, ptr_provenance i32** undef, align 4, !noalias !40 + %add.ptr1 = getelementptr inbounds i32, i32* %_p, i32 2 + store i32* %add.ptr1, i32** %.fca.2.gep, ptr_provenance i32** undef, align 4, !noalias !40 + %mP0.i = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 0 + %2 = load i32*, i32** %mP0.i, ptr_provenance i32** undef, align 4, !noalias !44 + %3 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %2, i8* %1, i32** %mP0.i, i32** undef, i64 0, metadata !43) #4, !noalias !44 + store i32 42, i32* %2, ptr_provenance i32* %3, align 4, !noalias !44 + %mP1.i = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 1 + %4 = load i32*, i32** %mP1.i, ptr_provenance i32** undef, align 4, !noalias !44 + %5 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %4, i8* %1, i32** nonnull %mP1.i, i32** undef, i64 0, metadata !43) #4, !noalias !44 + store i32 43, i32* %4, ptr_provenance i32* %5, align 4, !noalias !44 + %mP2.i = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 2 + %6 = load i32*, i32** %mP2.i, ptr_provenance i32** undef, align 4, !noalias !44 + %7 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %6, i8* %1, i32** nonnull %mP2.i, i32** undef, i64 0, metadata !43) #4, !noalias !44 + store i32 44, i32* %6, ptr_provenance i32* %7, align 4, !noalias !44 + call void @llvm.lifetime.end.p0i8(i64 12, i8* nonnull %0) #4, !noalias !37 + ret void +} + +; CHECK-LABEL: @test_rs_inlined( +; CHECK-NOT: alloca +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !38) +; CHECK: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 8, metadata !38) +; CHECK: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 16, metadata !38) +; CHECK: %3 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %_p, i8* %0, i32** null, i32** undef, i64 0, metadata !38){{.*}}, !noalias !41 +; CHECK: %4 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %add.ptr, i8* %1, i32** nonnull null, i32** undef, i64 8, metadata !38){{.*}}, !noalias !41 +; CHECK: %5 = call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %add.ptr1, i8* %2, i32** nonnull null, i32** undef, i64 16, metadata !38){{.*}}, !noalias !41 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0s_struct.FOOs.i64(%struct.FOO*, i64, metadata) #1 + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32**, i64, metadata) #1 + +; Function Attrs: argmemonly nounwind speculatable +declare i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32*, i8*, i32**, i64, metadata) #2 + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32*, i8*, i32**, i32**, i64, metadata) #3 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { argmemonly nounwind speculatable } +attributes #3 = { nounwind readnone speculatable } +attributes #4 = { nounwind } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3, !3, i64 0, i64 4} +!3 = !{!4, i64 4, !"any pointer"} +!4 = !{!5, i64 1, !"omnipotent char"} +!5 = !{!"Simple C/C++ TBAA"} +!6 = !{!7} +!7 = distinct !{!7, !8, !"test_ri: rp"} +!8 = distinct !{!8, !"test_ri"} +!9 = !{!10, !10, i64 0, i64 4} +!10 = !{!4, i64 4, !"int"} +!11 = !{!12} +!12 = distinct !{!12, !13, !"test_ra: rp"} +!13 = distinct !{!13, !"test_ra"} +!14 = !{!15} +!15 = distinct !{!15, !16, !"test_rs: foo"} +!16 = distinct !{!16, !"test_rs"} +!17 = !{!18, !3, i64 0, i64 4} +!18 = !{!4, i64 12, !"FOO", !3, i64 0, i64 4, !3, i64 4, i64 4, !3, i64 8, i64 4} +!19 = !{!18, !3, i64 4, i64 4} +!20 = !{!18, !3, i64 8, i64 4} +!21 = !{!22} +!22 = distinct !{!22, !23, !"test_ri_inlined: unknown scope"} +!23 = distinct !{!23, !"test_ri_inlined"} +!24 = !{!25, !22} +!25 = distinct !{!25, !26, !"test_ri_inlined: rp"} +!26 = distinct !{!26, !"test_ri_inlined"} +!27 = !{!25} +!28 = !{!22, !25, !22} +!29 = !{!30} +!30 = distinct !{!30, !31, !"test_ra_inlined: unknown scope"} +!31 = distinct !{!31, !"test_ra_inlined"} +!32 = !{!33, !30} +!33 = distinct !{!33, !34, !"test_ra_inlined: rp"} +!34 = distinct !{!34, !"test_ra_inlined"} +!35 = !{!33} +!36 = !{!30, !33, !30} +!37 = !{!38} +!38 = distinct !{!38, !39, !"test_rs_inlined: unknown scope"} +!39 = distinct !{!39, !"test_rs_inlined"} +!40 = !{!41, !38} +!41 = distinct !{!41, !42, !"test_rs_inlined: foo"} +!42 = distinct !{!42, !"test_rs_inlined"} +!43 = !{!41} +!44 = !{!38, !41, !38} Index: llvm/test/Transforms/SROA/noalias2.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SROA/noalias2.ll @@ -0,0 +1,459 @@ +; RUN: opt < %s -sroa -S | FileCheck %s --check-prefixes=CHECK,CHECK_U +; RUN: opt < %s -passes=sroa -S | FileCheck %s --check-prefixes=CHECK,CHECK_U + + +; Share the dependency on llvm.noalias.copy.guard: +; RUN: sed < %s -e 's/tmp18,/tmp10,/' -e 's/cp3,/cp2,/' | opt -sroa -S | FileCheck %s --check-prefixes=CHECK,CHECK_S +; RUN: sed < %s -e 's/tmp18,/tmp10,/' -e 's/cp3,/cp2,/' | opt -passes=sroa -S | FileCheck %s --check-prefixes=CHECK,CHECK_S + +; Validate that SROA correctly deduces noalias pointers when removing: +; - llvm.mempcy +; - aggregate load/store +; - copying the struct through i64 + +; General form of each function is based on: +; ------ +; struct FOO { +; int* __restrict p; +; }; +; +; struct FUM { +; int* __restrict p0; +; struct FOO m1; +; }; +; +; void testXXXXXX(struct FUM* a_fum) // Scope A +; { +; struct FUM l_fum = *a_fum; // Scope B +; { +; struct FUM l2_fum = l_fum; // Scope C1 +; +; *l2_fum.p0 = 42; +; } +; { +; struct FUM l3_fum = l_fum; // Scope C2 +; +; *l3_fum.m1.p = 43; +; } +; } +; ---- +; After SROA, we expect to see following llvm.noalias dependencies: +; store 42 -> C1 -> B -> A +; store 43 -> C2 -> B -> A + + +; ModuleID = 'test.c' +source_filename = "test.c" +target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:32-n8:16:32-S128" +target triple = "i386-unknown-linux-gnu" + +%struct.FUM = type { i32*, %struct.FOO } +%struct.FOO = type { i32* } + +; Function Attrs: nounwind +define dso_local void @test01_memcpy(%struct.FUM* %a_fum) #0 !noalias !3 { +entry: + %a_fum.addr = alloca %struct.FUM*, align 4 + %l_fum = alloca %struct.FUM, align 4 + %l2_fum = alloca %struct.FUM, align 4 + %l3_fum = alloca %struct.FUM, align 4 + store %struct.FUM* %a_fum, %struct.FUM** %a_fum.addr, align 4, !tbaa !6, !noalias !10 + %tmp0 = bitcast %struct.FUM* %l_fum to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %tmp0) #5, !noalias !10 + %tmp1 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FUMs.i64(%struct.FUM* %l_fum, i64 0, metadata !12), !noalias !10 + %tmp2 = load %struct.FUM*, %struct.FUM** %a_fum.addr, align 4, !tbaa !6, !noalias !10 + %tmp3 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %tmp2, i8* null, metadata !13, metadata !3) + %tmp4 = bitcast %struct.FUM* %l_fum to i8* + %tmp5 = bitcast %struct.FUM* %tmp3 to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %tmp4, i8* align 4 %tmp5, i32 8, i1 false), !tbaa.struct !16, !noalias !10 + %tmp6 = bitcast %struct.FUM* %l2_fum to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %tmp6) #5, !noalias !17 + %tmp7 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FUMs.i64(%struct.FUM* %l2_fum, i64 0, metadata !19), !noalias !17 + %tmp8 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %l_fum, i8* %tmp1, metadata !13, metadata !12) + %tmp9 = bitcast %struct.FUM* %l2_fum to i8* + %tmp10 = bitcast %struct.FUM* %tmp8 to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %tmp9, i8* align 4 %tmp10, i32 8, i1 false), !tbaa.struct !16, !noalias !17 + %p0 = getelementptr inbounds %struct.FUM, %struct.FUM* %l2_fum, i32 0, i32 0 + %tmp11 = load i32*, i32** %p0, align 4, !tbaa !20, !noalias !17 + %tmp12 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %tmp11, i8* %tmp7, i32** %p0, i64 0, metadata !19), !tbaa !20, !noalias !17 + store i32 42, i32* %tmp12, align 4, !tbaa !23, !noalias !17 + %tmp13 = bitcast %struct.FUM* %l2_fum to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %tmp13) #5, !noalias !10 + %tmp14 = bitcast %struct.FUM* %l3_fum to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %tmp14) #5, !noalias !25 + %tmp15 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FUMs.i64(%struct.FUM* %l3_fum, i64 0, metadata !27), !noalias !25 + %tmp16 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %l_fum, i8* %tmp1, metadata !13, metadata !12) + %tmp17 = bitcast %struct.FUM* %l3_fum to i8* + %tmp18 = bitcast %struct.FUM* %tmp16 to i8* + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %tmp17, i8* align 4 %tmp18, i32 8, i1 false), !tbaa.struct !16, !noalias !25 + %m1 = getelementptr inbounds %struct.FUM, %struct.FUM* %l3_fum, i32 0, i32 1 + %p = getelementptr inbounds %struct.FOO, %struct.FOO* %m1, i32 0, i32 0 + %tmp19 = load i32*, i32** %p, align 4, !tbaa !28, !noalias !25 + %tmp20 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %tmp19, i8* %tmp15, i32** %p, i64 0, metadata !27), !tbaa !28, !noalias !25 + store i32 43, i32* %tmp20, align 4, !tbaa !23, !noalias !25 + %tmp21 = bitcast %struct.FUM* %l3_fum to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %tmp21) #5, !noalias !10 + %tmp22 = bitcast %struct.FUM* %l_fum to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %tmp22) #5 + ret void +} + +; CHECK-LABEL: @test01_memcpy +; CHECK-NOT: alloca +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !6) +; CHECK: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 4, metadata !6) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: %l_fum.sroa.0.0.l_fum.sroa.0.0.copyload.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %l_fum.sroa.0.0.copyload, i8* null, i32** %l_fum.sroa.0.0.tmp5.sroa_idx, i64 0, metadata !3) +; CHECK: %l_fum.sroa.[[SROA:[0-9]+]].0.l_fum.sroa.[[SROA]].0.copyload.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %l_fum.sroa.[[SROA]].0.copyload, i8* null, i32** %l_fum.sroa.[[SROA]].0.tmp5.sroa_idx3, i64 0, metadata !3) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !14) +; CHECK: %3 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 4, metadata !14) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: %l2_fum.sroa.0.0.l2_fum.sroa.0.0.copyload.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %l_fum.sroa.0.0.l_fum.sroa.0.0.copyload.noalias, i8* %0, i32** null, i64 0, metadata !6) +; CHECK: %l2_fum.sroa.7.0.l2_fum.sroa.7.0.copyload.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %l_fum.sroa.[[SROA]].0.l_fum.sroa.[[SROA]].0.copyload.noalias, i8* %1, i32** null, i64 4, metadata !6) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: %tmp12 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %l2_fum.sroa.0.0.l2_fum.sroa.0.0.copyload.noalias, i8* %2, i32** null, i64 0, metadata !14), !tbaa !16, !noalias !19 +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: %4 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !22) +; CHECK: %5 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 4, metadata !22) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: %l3_fum.sroa.0.0.l3_fum.sroa.0.0.copyload.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %l_fum.sroa.0.0.l_fum.sroa.0.0.copyload.noalias, i8* %0, i32** null, i64 0, metadata !6) +; CHECK: %l3_fum.sroa.5.0.l3_fum.sroa.5.0.copyload.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %l_fum.sroa.[[SROA]].0.l_fum.sroa.[[SROA]].0.copyload.noalias, i8* %1, i32** null, i64 4, metadata !6) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: %tmp20 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %l3_fum.sroa.5.0.l3_fum.sroa.5.0.copyload.noalias, i8* %5, i32** null, i64 4, metadata !22), !tbaa !24, !noalias !25 +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: ret void + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.start.p0i8(i64 immarg %0, i8* nocapture %1) #1 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0s_struct.FUMs.i64(%struct.FUM* %0, i64 %1, metadata %2) #2 + +; Function Attrs: nounwind readnone +declare %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %0, i8* %1, metadata %2, metadata %3) #3 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* noalias nocapture writeonly %0, i8* noalias nocapture readonly %1, i32 %2, i1 immarg %3) #1 + +; Function Attrs: argmemonly nounwind speculatable +declare i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %0, i8* %1, i32** %2, i64 %3, metadata %4) #4 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.end.p0i8(i64 immarg %0, i8* nocapture %1) #1 + +; Function Attrs: nounwind +define dso_local void @test02_aggloadstore(%struct.FUM* %a_fum) #0 !noalias !29 { +entry: + %a_fum.addr = alloca %struct.FUM*, align 4 + %l_fum = alloca %struct.FUM, align 4 + %l2_fum = alloca %struct.FUM, align 4 + %l3_fum = alloca %struct.FUM, align 4 + store %struct.FUM* %a_fum, %struct.FUM** %a_fum.addr, align 4, !tbaa !6, !noalias !32 + %tmp0 = bitcast %struct.FUM* %l_fum to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %tmp0) #5, !noalias !32 + %tmp1 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FUMs.i64(%struct.FUM* %l_fum, i64 0, metadata !34), !noalias !32 + %tmp2 = load %struct.FUM*, %struct.FUM** %a_fum.addr, align 4, !tbaa !6, !noalias !32 + %tmp3 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %tmp2, i8* null, metadata !13, metadata !29) + %cp1 = load %struct.FUM, %struct.FUM* %tmp3, align 4 + store %struct.FUM %cp1, %struct.FUM* %l_fum, align 4 + %tmp6 = bitcast %struct.FUM* %l2_fum to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %tmp6) #5, !noalias !35 + %tmp7 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FUMs.i64(%struct.FUM* %l2_fum, i64 0, metadata !37), !noalias !35 + %tmp8 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %l_fum, i8* %tmp1, metadata !13, metadata !34) + %cp2 = load %struct.FUM, %struct.FUM* %tmp8, align 4 + store %struct.FUM %cp2, %struct.FUM* %l2_fum, align 4 + %p0 = getelementptr inbounds %struct.FUM, %struct.FUM* %l2_fum, i32 0, i32 0 + %tmp11 = load i32*, i32** %p0, align 4, !tbaa !20, !noalias !35 + %tmp12 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %tmp11, i8* %tmp7, i32** %p0, i64 0, metadata !37), !tbaa !20, !noalias !35 + store i32 42, i32* %tmp12, align 4, !tbaa !23, !noalias !35 + %tmp13 = bitcast %struct.FUM* %l2_fum to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %tmp13) #5, !noalias !32 + %tmp14 = bitcast %struct.FUM* %l3_fum to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %tmp14) #5, !noalias !38 + %tmp15 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FUMs.i64(%struct.FUM* %l3_fum, i64 0, metadata !40), !noalias !38 + %tmp16 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %l_fum, i8* %tmp1, metadata !13, metadata !34) + %cp3 = load %struct.FUM, %struct.FUM* %tmp16, align 4 + store %struct.FUM %cp3, %struct.FUM* %l3_fum, align 4 + %m1 = getelementptr inbounds %struct.FUM, %struct.FUM* %l3_fum, i32 0, i32 1 + %p = getelementptr inbounds %struct.FOO, %struct.FOO* %m1, i32 0, i32 0 + %tmp19 = load i32*, i32** %p, align 4, !tbaa !28, !noalias !38 + %tmp20 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %tmp19, i8* %tmp15, i32** %p, i64 0, metadata !40), !tbaa !28, !noalias !38 + store i32 43, i32* %tmp20, align 4, !tbaa !23, !noalias !38 + %tmp21 = bitcast %struct.FUM* %l3_fum to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %tmp21) #5, !noalias !32 + %tmp22 = bitcast %struct.FUM* %l_fum to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %tmp22) #5 + ret void +} + +; CHECK-LABEL: @test02_aggloadstore +; CHECK-NOT: alloca +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !29) +; CHECK: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 4, metadata !29) +; CHECK: %tmp3 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %a_fum, i8* null, metadata !31, metadata !26) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: %cp1.fca.0.load.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %cp1.fca.0.load, i8* null, i32** %cp1.fca.0.{{gep5|gep3}}, i64 0, metadata !26) +; CHECK: %cp1.fca.1.0.load.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %cp1.fca.1.0.load, i8* null, i32** %cp1.fca.1.0.{{gep6|gep4}}, i64 0, metadata !26) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !34) +; CHECK: %3 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 4, metadata !34) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: %cp2.fca.0.load.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %cp1.fca.0.extract, i8* %0, i32** null, i64 0, metadata !29) +; CHECK: %cp2.fca.1.0.load.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %cp1.fca.1.0.extract, i8* %1, i32** null, i64 4, metadata !29) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: %tmp12 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %cp2.fca.{{.*}}.extract, i8* %2, i32** null, i64 0, metadata !34), !tbaa !16, !noalias !36 +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: %4 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !37) +; CHECK: %5 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 4, metadata !37) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: %cp3.fca.0.load.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %cp1.fca.0.extract, i8* %0, i32** null, i64 0, metadata !29) +; CHECK: %cp3.fca.1.0.load.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %cp1.fca.1.0.extract, i8* %1, i32** null, i64 4, metadata !29) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: %tmp20 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %{{cp3|cp2}}.fca.1.0.extract, i8* %5, i32** null, i64 4, metadata !37), !tbaa !24, !noalias !39 +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: ret void + +; Function Attrs: nounwind +define dso_local void @test03_i64loadstore(%struct.FUM* %a_fum) #0 !noalias !41 { +entry: + %a_fum.addr = alloca %struct.FUM*, align 4 + %l_fum = alloca %struct.FUM, align 4 + %l2_fum = alloca %struct.FUM, align 4 + %l3_fum = alloca %struct.FUM, align 4 + store %struct.FUM* %a_fum, %struct.FUM** %a_fum.addr, align 4, !tbaa !6, !noalias !44 + %tmp0 = bitcast %struct.FUM* %l_fum to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %tmp0) #5, !noalias !44 + %tmp1 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FUMs.i64(%struct.FUM* %l_fum, i64 0, metadata !46), !noalias !44 + %tmp2 = load %struct.FUM*, %struct.FUM** %a_fum.addr, align 4, !tbaa !6, !noalias !44 + %tmp3 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %tmp2, i8* null, metadata !13, metadata !41) + %tmp4 = bitcast %struct.FUM* %l_fum to i64* + %tmp5 = bitcast %struct.FUM* %tmp3 to i64* + %cp1 = load i64, i64* %tmp5, align 4 + store i64 %cp1, i64* %tmp4, align 4 + %tmp6 = bitcast %struct.FUM* %l2_fum to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %tmp6) #5, !noalias !47 + %tmp7 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FUMs.i64(%struct.FUM* %l2_fum, i64 0, metadata !49), !noalias !47 + %tmp8 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %l_fum, i8* %tmp1, metadata !13, metadata !46) + %tmp9 = bitcast %struct.FUM* %l2_fum to i64* + %tmp10 = bitcast %struct.FUM* %tmp8 to i64* + %cp2 = load i64, i64* %tmp10, align 4 + store i64 %cp2, i64* %tmp9, align 4 + %p0 = getelementptr inbounds %struct.FUM, %struct.FUM* %l2_fum, i32 0, i32 0 + %tmp11 = load i32*, i32** %p0, align 4, !tbaa !20, !noalias !47 + %tmp12 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %tmp11, i8* %tmp7, i32** %p0, i64 0, metadata !49), !tbaa !20, !noalias !47 + store i32 42, i32* %tmp12, align 4, !tbaa !23, !noalias !47 + %tmp13 = bitcast %struct.FUM* %l2_fum to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %tmp13) #5, !noalias !44 + %tmp14 = bitcast %struct.FUM* %l3_fum to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %tmp14) #5, !noalias !50 + %tmp15 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FUMs.i64(%struct.FUM* %l3_fum, i64 0, metadata !52), !noalias !50 + %tmp16 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %l_fum, i8* %tmp1, metadata !13, metadata !46) + %tmp17 = bitcast %struct.FUM* %l3_fum to i64* + %tmp18 = bitcast %struct.FUM* %tmp16 to i64* + %cp3 = load i64, i64* %tmp18, align 4 + store i64 %cp3, i64* %tmp17, align 4 + %m1 = getelementptr inbounds %struct.FUM, %struct.FUM* %l3_fum, i32 0, i32 1 + %p = getelementptr inbounds %struct.FOO, %struct.FOO* %m1, i32 0, i32 0 + %tmp19 = load i32*, i32** %p, align 4, !tbaa !28, !noalias !50 + %tmp20 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %tmp19, i8* %tmp15, i32** %p, i64 0, metadata !52), !tbaa !28, !noalias !50 + store i32 43, i32* %tmp20, align 4, !tbaa !23, !noalias !50 + %tmp21 = bitcast %struct.FUM* %l3_fum to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %tmp21) #5, !noalias !44 + %tmp22 = bitcast %struct.FUM* %l_fum to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %tmp22) #5 + ret void +} + +; CHECK-LABEL: @test03_i64loadstore +; CHECK-NOT: alloca +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !43) +; CHECK: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 4, metadata !43) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: %{{cp1[0-9]*}}.sroa_as_ptr.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %{{cp1[0-9]*}}.sroa_as_ptr, i8* null, i32** %2, i64 0, metadata !40) +; CHECK: %{{cp1[0-9]*}}.sroa_as_ptr.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %{{cp1[0-9]*}}.sroa_as_ptr, i8* null, i32** %3, i64 0, metadata !40) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: %6 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !45) +; CHECK: %7 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 4, metadata !45) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK_S: %cp21.sroa_as_ptr.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %4, i8* %0, i32** null, i64 0, metadata !43) +; CHECK_S: %cp22.sroa_as_ptr.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %5, i8* %1, i32** null, i64 4, metadata !43) +; CHECK: %cp24.sroa_as_ptr.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %4, i8* %0, i32** null, i64 0, metadata !43) +; CHECK: %{{cp26|cp25}}.sroa_as_ptr.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %5, i8* %1, i32** null, i64 4, metadata !43) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: %tmp12 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %8, i8* %6, i32** null, i64 0, metadata !45), !tbaa !16, !noalias !47 +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: %10 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !48) +; CHECK: %11 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 4, metadata !48) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: %{{cp3[0-9]*}}.sroa_as_ptr.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %4, i8* %0, i32** null, i64 0, metadata !43) +; CHECK: %{{cp3[0-9]*}}.sroa_as_ptr.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %5, i8* %1, i32** null, i64 4, metadata !43) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: %tmp20 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %13, i8* %11, i32** null, i64 4, metadata !48), !tbaa !24, !noalias !50 +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK-NOT: llvm.noalias.p +; CHECK-NOT: llvm.noalias.decl +; CHECK: ret void + + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind willreturn } +attributes #2 = { argmemonly nounwind } +attributes #3 = { nounwind readnone } +attributes #4 = { argmemonly nounwind speculatable } +attributes #5 = { nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"NumRegisterParameters", i32 0} +!1 = !{i32 1, !"wchar_size", i32 4} +!2 = !{!"clang version"} +!3 = !{!4} +!4 = distinct !{!4, !5, !"test01_memcpy: unknown scope"} +!5 = distinct !{!5, !"test01_memcpy"} +!6 = !{!7, !7, i64 0} +!7 = !{!"any pointer", !8, i64 0} +!8 = !{!"omnipotent char", !9, i64 0} +!9 = !{!"Simple C/C++ TBAA"} +!10 = !{!11, !4} +!11 = distinct !{!11, !5, !"test01_memcpy: l_fum"} +!12 = !{!11} +!13 = !{!14, !15} +!14 = !{i32 -1, i32 0} +!15 = !{i32 -1, i32 1, i32 0} +!16 = !{i64 0, i64 4, !6, i64 4, i64 4, !6} +!17 = !{!18, !11, !4} +!18 = distinct !{!18, !5, !"test01_memcpy: l2_fum"} +!19 = !{!18} +!20 = !{!21, !7, i64 0} +!21 = !{!"FUM", !7, i64 0, !22, i64 4} +!22 = !{!"FOO", !7, i64 0} +!23 = !{!24, !24, i64 0} +!24 = !{!"int", !8, i64 0} +!25 = !{!26, !11, !4} +!26 = distinct !{!26, !5, !"test01_memcpy: l3_fum"} +!27 = !{!26} +!28 = !{!21, !7, i64 4} +!29 = !{!30} +!30 = distinct !{!30, !31, !"test02_aggloadstore: unknown scope"} +!31 = distinct !{!31, !"test02_aggloadstore"} +!32 = !{!33, !30} +!33 = distinct !{!33, !31, !"test02_aggloadstore: l_fum"} +!34 = !{!33} +!35 = !{!36, !33, !30} +!36 = distinct !{!36, !31, !"test02_aggloadstore: l2_fum"} +!37 = !{!36} +!38 = !{!39, !33, !30} +!39 = distinct !{!39, !31, !"test02_aggloadstore: l3_fum"} +!40 = !{!39} +!41 = !{!42} +!42 = distinct !{!42, !43, !"test03_i64loadstore: unknown scope"} +!43 = distinct !{!43, !"test03_i64loadstore"} +!44 = !{!45, !42} +!45 = distinct !{!45, !43, !"test03_i64loadstore: l_fum"} +!46 = !{!45} +!47 = !{!48, !45, !42} +!48 = distinct !{!48, !43, !"test03_i64loadstore: l2_fum"} +!49 = !{!48} +!50 = !{!51, !45, !42} +!51 = distinct !{!51, !43, !"test03_i64loadstore: l3_fum"} +!52 = !{!51} + +; CHECK: !0 = !{i32 1, !"NumRegisterParameters", i32 0} +; CHECK: !1 = !{i32 1, !"wchar_size", i32 4} +; CHECK: !2 = !{!"clang version"} +; CHECK: !3 = !{!4} +; CHECK: !4 = distinct !{!4, !5, !"test01_memcpy: unknown scope"} +; CHECK: !5 = distinct !{!5, !"test01_memcpy"} +; CHECK: !6 = !{!7} +; CHECK: !7 = distinct !{!7, !5, !"test01_memcpy: l_fum"} +; CHECK: !8 = !{i64 0, i64 4, !9, i64 4, i64 4, !9} +; CHECK: !9 = !{!10, !10, i64 0} +; CHECK: !10 = !{!"any pointer", !11, i64 0} +; CHECK: !11 = !{!"omnipotent char", !12, i64 0} +; CHECK: !12 = !{!"Simple C/C++ TBAA"} +; CHECK: !13 = !{!7, !4} +; CHECK: !14 = !{!15} +; CHECK: !15 = distinct !{!15, !5, !"test01_memcpy: l2_fum"} +; CHECK: !16 = !{!17, !10, i64 0} +; CHECK: !17 = !{!"FUM", !10, i64 0, !18, i64 4} +; CHECK: !18 = !{!"FOO", !10, i64 0} +; CHECK: !19 = !{!15, !7, !4} +; CHECK: !20 = !{!21, !21, i64 0} +; CHECK: !21 = !{!"int", !11, i64 0} +; CHECK: !22 = !{!23} +; CHECK: !23 = distinct !{!23, !5, !"test01_memcpy: l3_fum"} +; CHECK: !24 = !{!17, !10, i64 4} +; CHECK: !25 = !{!23, !7, !4} +; CHECK: !26 = !{!27} +; CHECK: !27 = distinct !{!27, !28, !"test02_aggloadstore: unknown scope"} +; CHECK: !28 = distinct !{!28, !"test02_aggloadstore"} +; CHECK: !29 = !{!30} +; CHECK: !30 = distinct !{!30, !28, !"test02_aggloadstore: l_fum"} +; CHECK: !31 = !{!32, !33} +; CHECK: !32 = !{i32 -1, i32 0} +; CHECK: !33 = !{i32 -1, i32 1, i32 0} +; CHECK: !34 = !{!35} +; CHECK: !35 = distinct !{!35, !28, !"test02_aggloadstore: l2_fum"} +; CHECK: !36 = !{!35, !30, !27} +; CHECK: !37 = !{!38} +; CHECK: !38 = distinct !{!38, !28, !"test02_aggloadstore: l3_fum"} +; CHECK: !39 = !{!38, !30, !27} +; CHECK: !40 = !{!41} +; CHECK: !41 = distinct !{!41, !42, !"test03_i64loadstore: unknown scope"} +; CHECK: !42 = distinct !{!42, !"test03_i64loadstore"} +; CHECK: !43 = !{!44} +; CHECK: !44 = distinct !{!44, !42, !"test03_i64loadstore: l_fum"} +; CHECK: !45 = !{!46} +; CHECK: !46 = distinct !{!46, !42, !"test03_i64loadstore: l2_fum"} +; CHECK: !47 = !{!46, !44, !41} +; CHECK: !48 = !{!49} +; CHECK: !49 = distinct !{!49, !42, !"test03_i64loadstore: l3_fum"} +; CHECK: !50 = !{!49, !44, !41} Index: llvm/test/Transforms/SROA/noalias_copy_guard.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SROA/noalias_copy_guard.ll @@ -0,0 +1,357 @@ +; using memcpy: +; RUN: sed < %s -e 's,;V1 , ,' | opt -sroa -S | FileCheck %s --check-prefixes=CHECK,CHECK_V1 +; RUN: sed < %s -e 's,;V1 , ,' | opt -passes=sroa -S | FileCheck %s --check-prefixes=CHECK,CHECK_V1 + +; using aggregate load/store: +; RUN: sed < %s -e 's,;V2 , ,' | opt -sroa -S | FileCheck %s --check-prefixes=CHECK,CHECK_V2 +; RUN: sed < %s -e 's,;V2 , ,' | opt -passes=sroa -S | FileCheck %s --check-prefixes=CHECK,CHECK_V2 + +; using i64 load/store: +; RUN: sed < %s -e 's,;V3 , ,' | opt -sroa -S | FileCheck %s --check-prefixes=CHECK,CHECK_V3 +; RUN: sed < %s -e 's,;V3 , ,' | opt -passes=sroa -S | FileCheck %s --check-prefixes=CHECK,CHECK_V3 + + +; Validate that SROA correctly deduces noalias pointers when removing: +; - llvm.mempcy +; - aggregate load/store +; - copying the struct through i64 + +; General form of each function is based on: +; ------ +; struct FOO { +; int* __restrict p; +; }; +; +; struct FUM { +; int* __restrict p0; +; struct FOO m1; +; }; +; +; void test01(struct FUM* a_fum) +; { +; struct FUM l_fum = *a_fum; +; *l_fum.p0 = 42; +; } +; +; void test02(struct FUM* a_fum) +; { +; struct FUM l_fum = *a_fum; +; *l_fum.m1.p = 43; +; } +; +; void test03(struct FUM* a_fum) +; { +; struct FUM l_fum = *a_fum; +; *l_fum.p0 = 42; +; *l_fum.m1.p = 43; +; } + + +; ModuleID = 'test3.c' +source_filename = "test3.c" +target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:32-n8:16:32-S128" +target triple = "i386-unknown-linux-gnu" + +%struct.FUM = type { i32*, %struct.FOO } +%struct.FOO = type { i32* } + +; Function Attrs: nounwind +define dso_local void @test01(%struct.FUM* %a_fum) #0 !noalias !3 { +entry: + %a_fum.addr = alloca %struct.FUM*, align 4 + %l_fum = alloca %struct.FUM, align 4 + store %struct.FUM* %a_fum, %struct.FUM** %a_fum.addr, align 4, !tbaa !6, !noalias !10 + %tmp0 = bitcast %struct.FUM* %l_fum to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %tmp0) #5, !noalias !10 + %tmp1 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FUMs.i64(%struct.FUM* %l_fum, i64 0, metadata !12), !noalias !10 + %tmp2 = load %struct.FUM*, %struct.FUM** %a_fum.addr, align 4, !tbaa !6, !noalias !10 + %tmp3 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %tmp2, i8* null, metadata !13, metadata !3) + +;V1 %tmp4 = bitcast %struct.FUM* %l_fum to i8* +;V1 %tmp5 = bitcast %struct.FUM* %tmp3 to i8* +;V1 call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %tmp4, i8* align 4 %tmp5, i32 8, i1 false), !tbaa.struct !16, !noalias !10 + +;V2 %cp1 = load %struct.FUM, %struct.FUM* %tmp3, align 4 +;V2 store %struct.FUM %cp1, %struct.FUM* %l_fum, align 4 + +;V3 %tmp4 = bitcast %struct.FUM* %l_fum to i64* +;V3 %tmp5 = bitcast %struct.FUM* %tmp3 to i64* +;V3 %cp1 = load i64, i64* %tmp5, align 4 +;V3 store i64 %cp1, i64* %tmp4, align 4 + + %p0 = getelementptr inbounds %struct.FUM, %struct.FUM* %l_fum, i32 0, i32 0 + %tmp6 = load i32*, i32** %p0, align 4, !tbaa !17, !noalias !10 + %tmp7 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %tmp6, i8* %tmp1, i32** %p0, i64 0, metadata !12), !tbaa !17, !noalias !10 + store i32 42, i32* %tmp7, align 4, !tbaa !20, !noalias !10 + %tmp8 = bitcast %struct.FUM* %l_fum to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %tmp8) #5 + ret void +} + +; CHECK-LABEL: test01 +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !6) +; CHECK: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 4, metadata !6) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK_V2: %tmp3 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %a_fum, i8* null, metadata !8, metadata !3) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK: %{{.*}} = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %{{.*}}, i8* null, i32** %{{.*}}, i64 0, metadata !3) +; CHECK: %{{.*}} = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %{{.*}}, i8* null, i32** %{{.*}}, i64 0, metadata !3) +; CHECK: %tmp7 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %{{.*}}, i8* %0, i32** null, i64 0, metadata !6) +; CHECK: ret void + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.start.p0i8(i64 immarg %tmp0, i8* nocapture %tmp1) #1 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0s_struct.FUMs.i64(%struct.FUM* %tmp0, i64 %tmp1, metadata %tmp2) #2 + +; Function Attrs: nounwind readnone +declare %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %tmp0, i8* %tmp1, metadata %tmp2, metadata %tmp3) #3 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* noalias nocapture writeonly %tmp0, i8* noalias nocapture readonly %tmp1, i32 %tmp2, i1 immarg %tmp3) #1 + +; Function Attrs: argmemonly nounwind speculatable +declare i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %tmp0, i8* %tmp1, i32** %tmp2, i64 %tmp3, metadata %tmp4) #4 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.end.p0i8(i64 immarg %tmp0, i8* nocapture %tmp1) #1 + +; Function Attrs: nounwind +define dso_local void @test02(%struct.FUM* %a_fum) #0 !noalias !22 { +entry: + %a_fum.addr = alloca %struct.FUM*, align 4 + %l_fum = alloca %struct.FUM, align 4 + store %struct.FUM* %a_fum, %struct.FUM** %a_fum.addr, align 4, !tbaa !6, !noalias !25 + %tmp0 = bitcast %struct.FUM* %l_fum to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %tmp0) #5, !noalias !25 + %tmp1 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FUMs.i64(%struct.FUM* %l_fum, i64 0, metadata !27), !noalias !25 + %tmp2 = load %struct.FUM*, %struct.FUM** %a_fum.addr, align 4, !tbaa !6, !noalias !25 + %tmp3 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %tmp2, i8* null, metadata !13, metadata !22) + +;V1 %tmp4 = bitcast %struct.FUM* %l_fum to i8* +;V1 %tmp5 = bitcast %struct.FUM* %tmp3 to i8* +;V1 call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %tmp4, i8* align 4 %tmp5, i32 8, i1 false), !tbaa.struct !16, !noalias !25 + +;V2 %cp1 = load %struct.FUM, %struct.FUM* %tmp3, align 4 +;V2 store %struct.FUM %cp1, %struct.FUM* %l_fum, align 4 + +;V3 %tmp4 = bitcast %struct.FUM* %l_fum to i64* +;V3 %tmp5 = bitcast %struct.FUM* %tmp3 to i64* +;V3 %cp1 = load i64, i64* %tmp5, align 4 +;V3 store i64 %cp1, i64* %tmp4, align 4 + + %m1 = getelementptr inbounds %struct.FUM, %struct.FUM* %l_fum, i32 0, i32 1 + %p = getelementptr inbounds %struct.FOO, %struct.FOO* %m1, i32 0, i32 0 + %tmp6 = load i32*, i32** %p, align 4, !tbaa !28, !noalias !25 + %tmp7 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %tmp6, i8* %tmp1, i32** %p, i64 0, metadata !27), !tbaa !28, !noalias !25 + store i32 43, i32* %tmp7, align 4, !tbaa !20, !noalias !25 + %tmp8 = bitcast %struct.FUM* %l_fum to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %tmp8) #5 + ret void +} + +; CHECK-LABEL: test02 +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata ![[SCOPE2:[0-9]+]]) +; CHECK: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 4, metadata ![[SCOPE2]]) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK_V2: %tmp3 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %a_fum, i8* null, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK: %{{.*}} = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %{{.*}}, i8* null, i32** %{{.*}}, i64 0, metadata ![[SCOPE2_OUT:[0-9]+]]) +; CHECK: %{{.*}} = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %{{.*}}, i8* null, i32** %{{.*}}, i64 0, metadata ![[SCOPE2_OUT]]) +; CHECK: %tmp7 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %{{.*}}, i8* %1, i32** null, i64 4, metadata ![[SCOPE2]]) +; CHECK: ret void + +; Function Attrs: nounwind +define dso_local void @test03(%struct.FUM* %a_fum) #0 !noalias !29 { +entry: + %a_fum.addr = alloca %struct.FUM*, align 4 + %l_fum = alloca %struct.FUM, align 4 + store %struct.FUM* %a_fum, %struct.FUM** %a_fum.addr, align 4, !tbaa !6, !noalias !32 + %tmp0 = bitcast %struct.FUM* %l_fum to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %tmp0) #5, !noalias !32 + %tmp1 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FUMs.i64(%struct.FUM* %l_fum, i64 0, metadata !34), !noalias !32 + %tmp2 = load %struct.FUM*, %struct.FUM** %a_fum.addr, align 4, !tbaa !6, !noalias !32 + %tmp3 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %tmp2, i8* null, metadata !13, metadata !29) + +;V1 %tmp4 = bitcast %struct.FUM* %l_fum to i8* +;V1 %tmp5 = bitcast %struct.FUM* %tmp3 to i8* +;V1 call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %tmp4, i8* align 4 %tmp5, i32 8, i1 false), !tbaa.struct !16, !noalias !32 + +;V2 %cp1 = load %struct.FUM, %struct.FUM* %tmp3, align 4 +;V2 store %struct.FUM %cp1, %struct.FUM* %l_fum, align 4 + +;V3 %tmp4 = bitcast %struct.FUM* %l_fum to i64* +;V3 %tmp5 = bitcast %struct.FUM* %tmp3 to i64* +;V3 %cp1 = load i64, i64* %tmp5, align 4 +;V3 store i64 %cp1, i64* %tmp4, align 4 + + %p0 = getelementptr inbounds %struct.FUM, %struct.FUM* %l_fum, i32 0, i32 0 + %tmp6 = load i32*, i32** %p0, align 4, !tbaa !17, !noalias !32 + %tmp7 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %tmp6, i8* %tmp1, i32** %p0, i64 0, metadata !34), !tbaa !17, !noalias !32 + store i32 42, i32* %tmp7, align 4, !tbaa !20, !noalias !32 + %m1 = getelementptr inbounds %struct.FUM, %struct.FUM* %l_fum, i32 0, i32 1 + %p = getelementptr inbounds %struct.FOO, %struct.FOO* %m1, i32 0, i32 0 + %tmp8 = load i32*, i32** %p, align 4, !tbaa !28, !noalias !32 + %tmp9 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %tmp8, i8* %tmp1, i32** %p, i64 0, metadata !34), !tbaa !28, !noalias !32 + store i32 43, i32* %tmp9, align 4, !tbaa !20, !noalias !32 + %tmp10 = bitcast %struct.FUM* %l_fum to i8* + call void @llvm.lifetime.end.p0i8(i64 8, i8* %tmp10) #5 + ret void +} + +; CHECK-LABEL: test03 +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata ![[SCOPE3:[0-9]+]]) +; CHECK: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 4, metadata ![[SCOPE3]]) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK_V2: %tmp3 = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* %a_fum, i8* null, metadata !8, metadata !{{[0-9]+}}) +; CHECK-NOT: llvm.noalias.copy.guard +; CHECK: %{{.*}} = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %{{.*}}, i8* null, i32** %{{.*}}, i64 0, metadata ![[SCOPE3_OUT:[0-9]+]]) +; CHECK: %{{.*}} = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %{{.*}}, i8* null, i32** %{{.*}}, i64 0, metadata ![[SCOPE3_OUT]]) +; CHECK: %tmp7 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %{{.*}}, i8* %0, i32** null, i64 0, metadata ![[SCOPE3]]) +; CHECK: %tmp9 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %{{.*}}, i8* %1, i32** null, i64 4, metadata ![[SCOPE3]]) +; CHECK: ret void + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind willreturn } +attributes #2 = { argmemonly nounwind } +attributes #3 = { nounwind readnone } +attributes #4 = { argmemonly nounwind speculatable } +attributes #5 = { nounwind } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"NumRegisterParameters", i32 0} +!1 = !{i32 1, !"wchar_size", i32 4} +!2 = !{!"clang version"} +!3 = !{!4} +!4 = distinct !{!4, !5, !"test01: unknown scope"} +!5 = distinct !{!5, !"test01"} +!6 = !{!7, !7, i64 0} +!7 = !{!"any pointer", !8, i64 0} +!8 = !{!"omnipotent char", !9, i64 0} +!9 = !{!"Simple C/C++ TBAA"} +!10 = !{!11, !4} +!11 = distinct !{!11, !5, !"test01: l_fum"} +!12 = !{!11} +!13 = !{!14, !15} +!14 = !{i32 -1, i32 0} +!15 = !{i32 -1, i32 1, i32 0} +!16 = !{i64 0, i64 4, !6, i64 4, i64 4, !6} +!17 = !{!18, !7, i64 0} +!18 = !{!"FUM", !7, i64 0, !19, i64 4} +!19 = !{!"FOO", !7, i64 0} +!20 = !{!21, !21, i64 0} +!21 = !{!"int", !8, i64 0} +!22 = !{!23} +!23 = distinct !{!23, !24, !"test02: unknown scope"} +!24 = distinct !{!24, !"test02"} +!25 = !{!26, !23} +!26 = distinct !{!26, !24, !"test02: l_fum"} +!27 = !{!26} +!28 = !{!18, !7, i64 4} +!29 = !{!30} +!30 = distinct !{!30, !31, !"test03: unknown scope"} +!31 = distinct !{!31, !"test03"} +!32 = !{!33, !30} +!33 = distinct !{!33, !31, !"test03: l_fum"} +!34 = !{!33} + +; CHECK_V1: !0 = !{i32 1, !"NumRegisterParameters", i32 0} +; CHECK_V1: !1 = !{i32 1, !"wchar_size", i32 4} +; CHECK_V1: !2 = !{!"clang version"} +; CHECK_V1: !3 = !{!4} +; CHECK_V1: !4 = distinct !{!4, !5, !"test01: unknown scope"} +; CHECK_V1: !5 = distinct !{!5, !"test01"} +; CHECK_V1: !6 = !{!7} +; CHECK_V1: !7 = distinct !{!7, !5, !"test01: l_fum"} +; CHECK_V1: !8 = !{i64 0, i64 4, !9, i64 4, i64 4, !9} +; CHECK_V1: !9 = !{!10, !10, i64 0} +; CHECK_V1: !10 = !{!"any pointer", !11, i64 0} +; CHECK_V1: !11 = !{!"omnipotent char", !12, i64 0} +; CHECK_V1: !12 = !{!"Simple C/C++ TBAA"} +; CHECK_V1: !13 = !{!7, !4} +; CHECK_V1: !14 = !{!15, !10, i64 0} +; CHECK_V1: !15 = !{!"FUM", !10, i64 0, !16, i64 4} +; CHECK_V1: !16 = !{!"FOO", !10, i64 0} +; CHECK_V1: !17 = !{!18, !18, i64 0} +; CHECK_V1: !18 = !{!"int", !11, i64 0} +; CHECK_V1: !19 = !{!20} +; CHECK_V1: !20 = distinct !{!20, !21, !"test02: unknown scope"} +; CHECK_V1: !21 = distinct !{!21, !"test02"} +; CHECK_V1: !22 = !{!23} +; CHECK_V1: !23 = distinct !{!23, !21, !"test02: l_fum"} +; CHECK_V1: !24 = !{!23, !20} +; CHECK_V1: !25 = !{!15, !10, i64 4} +; CHECK_V1: !26 = !{!27} +; CHECK_V1: !27 = distinct !{!27, !28, !"test03: unknown scope"} +; CHECK_V1: !28 = distinct !{!28, !"test03"} +; CHECK_V1: !29 = !{!30} +; CHECK_V1: !30 = distinct !{!30, !28, !"test03: l_fum"} +; CHECK_V1: !31 = !{!30, !27} + +; CHECK_V2: !0 = !{i32 1, !"NumRegisterParameters", i32 0} +; CHECK_V2: !1 = !{i32 1, !"wchar_size", i32 4} +; CHECK_V2: !2 = !{!"clang version"} +; CHECK_V2: !3 = !{!4} +; CHECK_V2: !4 = distinct !{!4, !5, !"test01: unknown scope"} +; CHECK_V2: !5 = distinct !{!5, !"test01"} +; CHECK_V2: !6 = !{!7} +; CHECK_V2: !7 = distinct !{!7, !5, !"test01: l_fum"} +; CHECK_V2: !8 = !{!9, !10} +; CHECK_V2: !9 = !{i32 -1, i32 0} +; CHECK_V2: !10 = !{i32 -1, i32 1, i32 0} +; CHECK_V2: !11 = !{!12, !13, i64 0} +; CHECK_V2: !12 = !{!"FUM", !13, i64 0, !16, i64 4} +; CHECK_V2: !13 = !{!"any pointer", !14, i64 0} +; CHECK_V2: !14 = !{!"omnipotent char", !15, i64 0} +; CHECK_V2: !15 = !{!"Simple C/C++ TBAA"} +; CHECK_V2: !16 = !{!"FOO", !13, i64 0} +; CHECK_V2: !17 = !{!7, !4} +; CHECK_V2: !18 = !{!19, !19, i64 0} +; CHECK_V2: !19 = !{!"int", !14, i64 0} +; CHECK_V2: !20 = !{!21} +; CHECK_V2: !21 = distinct !{!21, !22, !"test02: unknown scope"} +; CHECK_V2: !22 = distinct !{!22, !"test02"} +; CHECK_V2: !23 = !{!24} +; CHECK_V2: !24 = distinct !{!24, !22, !"test02: l_fum"} +; CHECK_V2: !25 = !{!12, !13, i64 4} +; CHECK_V2: !26 = !{!24, !21} +; CHECK_V2: !27 = !{!28} +; CHECK_V2: !28 = distinct !{!28, !29, !"test03: unknown scope"} +; CHECK_V2: !29 = distinct !{!29, !"test03"} +; CHECK_V2: !30 = !{!31} +; CHECK_V2: !31 = distinct !{!31, !29, !"test03: l_fum"} +; CHECK_V2: !32 = !{!31, !28} + +; CHECK_V3: !0 = !{i32 1, !"NumRegisterParameters", i32 0} +; CHECK_V3: !1 = !{i32 1, !"wchar_size", i32 4} +; CHECK_V3: !2 = !{!"clang version"} +; CHECK_V3: !3 = !{!4} +; CHECK_V3: !4 = distinct !{!4, !5, !"test01: unknown scope"} +; CHECK_V3: !5 = distinct !{!5, !"test01"} +; CHECK_V3: !6 = !{!7} +; CHECK_V3: !7 = distinct !{!7, !5, !"test01: l_fum"} +; CHECK_V3: !8 = !{!9, !10, i64 0} +; CHECK_V3: !9 = !{!"FUM", !10, i64 0, !13, i64 4} +; CHECK_V3: !10 = !{!"any pointer", !11, i64 0} +; CHECK_V3: !11 = !{!"omnipotent char", !12, i64 0} +; CHECK_V3: !12 = !{!"Simple C/C++ TBAA"} +; CHECK_V3: !13 = !{!"FOO", !10, i64 0} +; CHECK_V3: !14 = !{!7, !4} +; CHECK_V3: !15 = !{!16, !16, i64 0} +; CHECK_V3: !16 = !{!"int", !11, i64 0} +; CHECK_V3: !17 = !{!18} +; CHECK_V3: !18 = distinct !{!18, !19, !"test02: unknown scope"} +; CHECK_V3: !19 = distinct !{!19, !"test02"} +; CHECK_V3: !20 = !{!21} +; CHECK_V3: !21 = distinct !{!21, !19, !"test02: l_fum"} +; CHECK_V3: !22 = !{!9, !10, i64 4} +; CHECK_V3: !23 = !{!21, !18} +; CHECK_V3: !24 = !{!25} +; CHECK_V3: !25 = distinct !{!25, !26, !"test03: unknown scope"} +; CHECK_V3: !26 = distinct !{!26, !"test03"} +; CHECK_V3: !27 = !{!28} +; CHECK_V3: !28 = distinct !{!28, !26, !"test03: l_fum"} +; CHECK_V3: !29 = !{!28, !25} Index: llvm/unittests/IR/IRBuilderTest.cpp =================================================================== --- llvm/unittests/IR/IRBuilderTest.cpp +++ llvm/unittests/IR/IRBuilderTest.cpp @@ -13,6 +13,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/IntrinsicsAArch64.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" @@ -332,6 +333,202 @@ EXPECT_EQ(fp::ebStrict, CII->getExceptionBehavior()); } +TEST_F(IRBuilderTest, NoAlias) { + IRBuilder<> Builder(BB); + AllocaInst *Var1 = Builder.CreateAlloca(Builder.getInt8Ty()); + + AllocaInst *VarP = Builder.CreateAlloca(Builder.getInt8PtrTy()); + Builder.CreateStore(Var1, VarP); + + MDBuilder MDB(BB->getContext()); + MDNode *NewDomain = + MDB.createAnonymousAliasScopeDomain("Test Domain"); + MDNode *AScope = + MDB.createAnonymousAliasScope(NewDomain, "Test Scope"); + + CallInst *NAD = + cast(Builder.CreateNoAliasDeclaration(VarP, AScope)); + Value *Pointer = Builder.CreateLoad(VarP); + CallInst *NAP = + cast(Builder.CreateNoAliasPointer(Pointer, NAD, VarP, AScope)); + + // llvm.noalias.decl + EXPECT_EQ(NAD->getArgOperand(Intrinsic::NoAliasDeclAllocaArg), VarP); + EXPECT_EQ( + cast(NAD->getArgOperand(Intrinsic::NoAliasDeclObjIdArg)) + ->getZExtValue(), + 0ull); + EXPECT_EQ( + cast(NAD->getArgOperand(Intrinsic::NoAliasDeclScopeArg)) + ->getMetadata(), + AScope); + + IntrinsicInst *II_NAD = dyn_cast(NAD); + ASSERT_TRUE(II_NAD != nullptr); + EXPECT_EQ(II_NAD->getIntrinsicID(), Intrinsic::noalias_decl); + + EXPECT_TRUE(NAD->onlyAccessesArgMemory()); + EXPECT_TRUE(NAD->doesNotThrow()); + + // llvm.noalias + EXPECT_EQ(NAP->getArgOperand(0), Pointer); + EXPECT_EQ(NAP->getArgOperand(Intrinsic::NoAliasNoAliasDeclArg), NAD); + EXPECT_EQ(NAP->getArgOperand(Intrinsic::NoAliasIdentifyPArg), VarP); + EXPECT_EQ( + cast(NAP->getArgOperand(Intrinsic::NoAliasScopeArg)) + ->getMetadata(), + AScope); + EXPECT_EQ( + cast(NAP->getArgOperand(Intrinsic::NoAliasIdentifyPObjIdArg)) + ->getZExtValue(), + 0ull); // default object id is 0 + + IntrinsicInst *II_NAP = dyn_cast(NAP); + ASSERT_TRUE(II_NAP != nullptr); + EXPECT_EQ(II_NAP->getIntrinsicID(), Intrinsic::noalias); + + EXPECT_TRUE(NAP->onlyAccessesArgMemory()); + EXPECT_TRUE(NAP->doesNotThrow()); +} + +TEST_F(IRBuilderTest, ProvenanceNoAliasAndArgGuard) { + IRBuilder<> Builder(BB); + AllocaInst *Var1 = Builder.CreateAlloca(Builder.getInt8Ty()); + + AllocaInst *VarP = Builder.CreateAlloca(Builder.getInt8PtrTy()); + Builder.CreateStore(Var1, VarP); + + MDBuilder MDB(BB->getContext()); + MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("Test Domain"); + MDNode *AScope = MDB.createAnonymousAliasScope(NewDomain, "Test Scope"); + + CallInst *NAD = + cast(Builder.CreateNoAliasDeclaration(VarP, AScope)); + Value *Pointer = Builder.CreateLoad(VarP); + CallInst *SNAP = cast(Builder.CreateProvenanceNoAliasPlain( + Pointer, NAD, VarP, llvm::ConstantPointerNull::get(VarP->getType()), + NAD->getArgOperand(Intrinsic::NoAliasDeclObjIdArg), AScope)); + + CallInst *NAG = cast(Builder.CreateNoAliasArgGuard(Pointer, SNAP)); + + // create load on Pointer, with SNAP provenance + + // llvm.provenance.noalias + EXPECT_EQ(SNAP->getArgOperand(0), Pointer); + EXPECT_EQ(SNAP->getArgOperand(Intrinsic::ProvenanceNoAliasNoAliasDeclArg), NAD); + EXPECT_EQ(SNAP->getArgOperand(Intrinsic::ProvenanceNoAliasIdentifyPArg), VarP); + EXPECT_EQ( + cast(SNAP->getArgOperand(Intrinsic::ProvenanceNoAliasScopeArg)) + ->getMetadata(), + AScope); + ASSERT_TRUE(cast( + SNAP->getArgOperand(Intrinsic::ProvenanceNoAliasIdentifyPProvenanceArg))); + EXPECT_EQ(cast( + SNAP->getArgOperand(Intrinsic::ProvenanceNoAliasIdentifyPObjIdArg)) + ->getZExtValue(), + 0ull); // default object id is 0 + + IntrinsicInst *II_SNAP = dyn_cast(SNAP); + ASSERT_TRUE(II_SNAP != nullptr); + EXPECT_EQ(II_SNAP->getIntrinsicID(), Intrinsic::provenance_noalias); + + EXPECT_TRUE(SNAP->doesNotAccessMemory()); + EXPECT_TRUE(SNAP->doesNotThrow()); + + // llvm.noalias.arg.guard + EXPECT_EQ(NAG->getArgOperand(0), Pointer); + EXPECT_EQ(NAG->getArgOperand(1), SNAP); + + EXPECT_TRUE(NAG->doesNotAccessMemory()); + EXPECT_TRUE(NAG->doesNotThrow()); +} + +TEST_F(IRBuilderTest, NoAliasCopyGuard) { + // // roughly equivalent code for: + // struct FOO { char* __reststrict p; } + // char copy_restrict(struct FOO* p) { + // struct FOO local=*p; // Introduces llvm.noalias.copy.guard + // return *local.p; // NOTE: this part is omitted + // } + + IRBuilder<> Builder(BB); + StructType *ST = + StructType::create(BB->getContext(), {Builder.getInt8PtrTy()}); + + // pointer to incoming struct + AllocaInst *VarIn = Builder.CreateAlloca(ST->getPointerTo()); + + // local struct + AllocaInst *Var1 = Builder.CreateAlloca(ST); + // pointer to local struct + AllocaInst *VarP = Builder.CreateAlloca(ST->getPointerTo()); + Builder.CreateStore(Var1, VarP); + + MDBuilder MDB(BB->getContext()); + MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("Test Domain"); + MDNode *AScope = MDB.createAnonymousAliasScope(NewDomain, "Test Scope"); + + CallInst *NAD = + cast(Builder.CreateNoAliasDeclaration(VarP, AScope)); + (void)NAD; // not used as we don't do the 'return *local.p' here + + auto *SrcPointer = Builder.CreateLoad(VarIn); + // one entry of length 2, containing {-1, 0} : + const int64_t Indices[] = {2, -1, 0}; + auto Int8PtrNull = ConstantPointerNull::get(Builder.getInt8PtrTy()); + CallInst *GP = cast(Builder.CreateNoAliasCopyGuard( + SrcPointer, + Int8PtrNull, // out-of-function __restrict declaration + Indices, // For struct FOO* f, all f[X].p (X ~ -1, .p ~ 0) are restrict + AScope)); + auto *Copy = Builder.CreateMemCpy( + Var1, Align(4), GP, Align(4), + BB->getParent()->getParent()->getDataLayout().getTypeStoreSize(ST)); + (void)Copy; + + // llvm.provenance.noalias + EXPECT_EQ(GP->getArgOperand(Intrinsic::NoAliasCopyGuardIdentifyPBaseObject), + SrcPointer); + EXPECT_EQ(GP->getArgOperand(Intrinsic::NoAliasCopyGuardNoAliasDeclArg), + Int8PtrNull); + + // hmm. how to easily check that we are pointing to a { { -1, 0 } } array ? + EXPECT_TRUE(cast(GP->getArgOperand( + Intrinsic::NoAliasCopyGuardIndicesArg)) != nullptr); + EXPECT_EQ(cast( + GP->getArgOperand(Intrinsic::NoAliasCopyGuardScopeArg)) + ->getMetadata(), + AScope); + + EXPECT_TRUE(GP->doesNotAccessMemory()); + EXPECT_TRUE(GP->doesNotThrow()); +} + +TEST_F(IRBuilderTest, Provenance) { + IRBuilder<> Builder(BB); + + auto *L = Builder.CreateLoad(GV->getValueType(), GV); + EXPECT_TRUE(!L->hasNoaliasProvenanceOperand()); + + auto *S = Builder.CreateStore(L, GV); + EXPECT_TRUE(!S->hasNoaliasProvenanceOperand()); + + L->setNoaliasProvenanceOperand(GV); + EXPECT_TRUE(L->hasNoaliasProvenanceOperand()); + + S->setNoaliasProvenanceOperand(GV); + EXPECT_TRUE(S->hasNoaliasProvenanceOperand()); + + EXPECT_EQ(L->getNoaliasProvenanceOperand(), GV); + EXPECT_EQ(S->getNoaliasProvenanceOperand(), GV); + + L->removeNoaliasProvenanceOperand(); + EXPECT_TRUE(!L->hasNoaliasProvenanceOperand()); + + S->removeNoaliasProvenanceOperand(); + EXPECT_TRUE(!S->hasNoaliasProvenanceOperand()); +} + TEST_F(IRBuilderTest, Lifetime) { IRBuilder<> Builder(BB); AllocaInst *Var1 = Builder.CreateAlloca(Builder.getInt8Ty());