Index: clang/include/clang/AST/Type.h =================================================================== --- clang/include/clang/AST/Type.h +++ clang/include/clang/AST/Type.h @@ -805,6 +805,19 @@ /// Returns true if it is not a class or if the class might not be dynamic. bool mayBeNotDynamicClass() const; + /// Returns true if this is a restrict pointer or contains a restrict pointer. + /// NOTE: A pointer to a restrict pointer does not count. + bool isRestrictOrContainsRestrictMembers() const; + + /// Get the encoded indices, describing where in memory restrict pointers are + /// located. + std::vector getRestrictIndices() const; + +private: + void getRestrictIndices(std::vector &OutEncodedIndices, + SmallVector &InCurrentIndices) const; + +public: // Don't promise in the API that anything besides 'const' can be // easily added. Index: clang/include/clang/Basic/CodeGenOptions.def =================================================================== --- clang/include/clang/Basic/CodeGenOptions.def +++ clang/include/clang/Basic/CodeGenOptions.def @@ -178,6 +178,8 @@ CODEGENOPT(RelaxedAliasing , 1, 0) ///< Set when -fno-strict-aliasing is enabled. CODEGENOPT(StructPathTBAA , 1, 0) ///< Whether or not to use struct-path TBAA. CODEGENOPT(NewStructPathTBAA , 1, 0) ///< Whether or not to use enhanced struct-path TBAA. +CODEGENOPT(FullRestrict , 1, 1) ///< Set when -ffull-restrict is enabled. +CODEGENOPT(NoNoAliasArgAttr , 1, 0) ///< Set when -fno-noalias-arguments is specified. CODEGENOPT(SaveTempLabels , 1, 0) ///< Save temporary labels. CODEGENOPT(SanitizeAddressUseAfterScope , 1, 0) ///< Enable use-after-scope detection ///< in AddressSanitizer Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -952,6 +952,15 @@ Flags<[CC1Option]>, HelpText<"Enable fixed point types">; def fno_fixed_point : Flag<["-"], "fno-fixed-point">, Group, HelpText<"Disable fixed point types">; +def ffull_restrict : Flag<["-"], "ffull-restrict">, Group, + Flags<[CoreOption, CC1Option]>, + HelpText<"Enable full restrict support">; +def fno_full_restrict : Flag<["-"], "fno-full-restrict">, Group, + Flags<[CoreOption, CC1Option]>, + HelpText<"Disable full restrict support, falling back to the legacy support">; +def fno_noalias_arguments : Flag<["-"], "fno-noalias-arguments">, + Group, Flags<[CoreOption, CC1Option]>, + HelpText<"Do not map restrict arguments onto noalias attribute">; def fcxx_static_destructors : Flag<["-"], "fc++-static-destructors">, Group, HelpText<"Enable C++ static destructor registration (the default)">; Index: clang/lib/AST/Type.cpp =================================================================== --- clang/lib/AST/Type.cpp +++ clang/lib/AST/Type.cpp @@ -99,6 +99,65 @@ return !ClassDecl || ClassDecl->mayBeNonDynamicClass(); } +bool QualType::isRestrictOrContainsRestrictMembers() const { + if (isRestrictQualified()) { + return true; + } + + const Type* BaseElementType = getCanonicalType()->getBaseElementTypeUnsafe(); + assert (!isa(BaseElementType)); + + if (const RecordType *RecTy = dyn_cast(BaseElementType)) { + RecordDecl *RD = RecTy->getDecl(); + for (FieldDecl *FD : RD->fields()) { + if (FD->getType().isRestrictOrContainsRestrictMembers()) { + return true; + } + } + } + + return false; +} + +std::vector QualType::getRestrictIndices() const { + // -1 represents an unbounded array + std::vector EncodedIndices; + SmallVector BaseIndices = {-1}; + + getRestrictIndices(EncodedIndices, BaseIndices); + + return EncodedIndices; +} + +void QualType::getRestrictIndices( + std::vector &OutEncodedIndices, + SmallVector &InCurrentIndices) const { + if (isRestrictQualified()) { + OutEncodedIndices.push_back(InCurrentIndices.size()); + OutEncodedIndices.insert(OutEncodedIndices.end(), InCurrentIndices.begin(), + InCurrentIndices.end()); + + return; + } + + QualType CannonTy = getCanonicalType(); + if (const auto *RecTy = CannonTy->getAs()) { + RecordDecl *RD = RecTy->getDecl(); + InCurrentIndices.push_back(0); + for (FieldDecl *FD : RD->fields()) { + FD->getType().getRestrictIndices(OutEncodedIndices, InCurrentIndices); + InCurrentIndices.back()++; + } + InCurrentIndices.pop_back(); + } else if (const auto *ArrayTy = dyn_cast(CannonTy)) { + // Note: we use -1 for bounded and unbounded arrays + InCurrentIndices.push_back(-1); // -1 indicates that any index is fine + ArrayTy->getElementType().getRestrictIndices(OutEncodedIndices, + InCurrentIndices); + InCurrentIndices.pop_back(); + } +} + bool QualType::isConstant(QualType T, const ASTContext &Ctx) { if (T.isConstQualified()) return true; Index: clang/lib/CodeGen/Address.h =================================================================== --- clang/lib/CodeGen/Address.h +++ clang/lib/CodeGen/Address.h @@ -39,6 +39,13 @@ return Pointer; } + /// Replace the current pointer of the addres with a new pointer. + void adaptPointer(llvm::Value *newPointer) { + assert(Pointer->getType() == newPointer->getType() && + "Address: changing the pointer must not change the type"); + Pointer = newPointer; + } + /// Return the type of the pointer value. llvm::PointerType *getType() const { return llvm::cast(getPointer()->getType()); Index: clang/lib/CodeGen/CGCall.cpp =================================================================== --- clang/lib/CodeGen/CGCall.cpp +++ clang/lib/CodeGen/CGCall.cpp @@ -2412,7 +2412,10 @@ } } - if (Arg->getType().isRestrictQualified()) + // for accurate full restrict support, we should not annotate arguments + // with noalias. The noalias atttribute is too strong. + if (Arg->getType().isRestrictQualified() && + (!CGM.getCodeGenOpts().NoNoAliasArgAttr)) AI->addAttr(llvm::Attribute::NoAlias); // LLVM expects swifterror parameters to be used in very restricted @@ -4035,12 +4038,28 @@ ArgInfo.getDirectOffset() == 0) { assert(NumIRArgs == 1); llvm::Value *V; - if (!I->isAggregate()) + if (!I->isAggregate()) { V = I->getKnownRValue().getScalarVal(); - else - V = Builder.CreateLoad( - I->hasLValue() ? I->getKnownLValue().getAddress() - : I->getKnownRValue().getAggregateAddress()); + } else { + Address Addr = I->hasLValue() + ? I->getKnownLValue().getAddress() + : I->getKnownRValue().getAggregateAddress(); + if (I->getType().isRestrictOrContainsRestrictMembers() && + CGM.getCodeGenOpts().FullRestrict) { + // protect a load of an aggregate with restrict member pointers with + // an llvm.noalias.copy.guard. + // NOTE: also see CodeGenFunction::EmitAggregateCopy(); + auto NoAliasScopeMD = + getExistingOrUnknownNoAliasScope(Addr.getPointer()); + auto NoAliasDecl = getExistingNoAliasDeclOrNullptr(NoAliasScopeMD); + Addr.adaptPointer(Builder.CreateNoAliasCopyGuard( + Addr.getPointer(), NoAliasDecl, + I->getType().getRestrictIndices(), NoAliasScopeMD)); + } + llvm::LoadInst *LD = + Builder.CreateLoad(Addr, I->getType().isVolatileQualified()); + V = LD; + } // Implement swifterror by copying into a new swifterror argument. // We'll write back in the normal path out of the call. @@ -4122,6 +4141,22 @@ } else { // In the simple case, just pass the coerced loaded value. assert(NumIRArgs == 1); + + if (Src.getElementType() == ArgInfo.getCoerceToType()) { + QualType Ty = I->getType(); + if (Ty.isRestrictOrContainsRestrictMembers() && + CGM.getCodeGenOpts().FullRestrict) { + // Protect a load of an aggregate with restrict member pointers with + // an llvm.noalias.copy.guard + // NOTE: also see CodeGenFunction::EmitAggregateCopy(); + auto NoAliasScopeMD = + getExistingOrUnknownNoAliasScope(Src.getPointer()); + auto NoAliasDecl = getExistingNoAliasDeclOrNullptr(NoAliasScopeMD); + Src.adaptPointer(Builder.CreateNoAliasCopyGuard( + Src.getPointer(), NoAliasDecl, Ty.getRestrictIndices(), + NoAliasScopeMD)); + } + } IRCallArgs[FirstIRArg] = CreateCoercedLoad(Src, ArgInfo.getCoerceToType(), *this); } Index: clang/lib/CodeGen/CGDecl.cpp =================================================================== --- clang/lib/CodeGen/CGDecl.cpp +++ clang/lib/CodeGen/CGDecl.cpp @@ -34,6 +34,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/Type.h" using namespace clang; @@ -1276,6 +1277,7 @@ /// These turn into simple stack objects, or GlobalValues depending on target. void CodeGenFunction::EmitAutoVarDecl(const VarDecl &D) { AutoVarEmission emission = EmitAutoVarAlloca(D); + EmitAutoVarNoAlias(emission); EmitAutoVarInit(emission); EmitAutoVarCleanups(emission); } @@ -1852,6 +1854,127 @@ type.isVolatileQualified(), Builder, constant); } +// For all local restrict-qualified local variables, we create a noalias +// metadata scope. This scope is used to identify each restrict-qualified +// variable and the other memory accesses within the scope where its aliasing +// assumptions apply. The scope metadata is stored in the NoAliasAddrMap map +// where the pointer to the local variable is the key in the map. +// One variable can contain multiple restrict pointers. All of them are +// represented by a single scope. +void CodeGenFunction::EmitNoAliasDecl(const VarDecl &D, Address Loc) { + // Don't emit noalias intrinsics unless we're optimizing. + if (CGM.getCodeGenOpts().OptimizationLevel == 0) + return; + + if (!CGM.getCodeGenOpts().FullRestrict) + return; + + QualType type = D.getType(); + + // Emit a noalias intrinsic for restrict-qualified variables. + if (!type.isRestrictOrContainsRestrictMembers()) + return; + + // Only emit a llvm.noalias.decl if the address is an alloca. + if (!isa(Loc.getPointer()->stripPointerCasts())) + return; + + // NOTE: keep in sync with (clang) CGDecl: getExistingOrUnknownNoAliasScope + // NOTE: keep in sync with (clang) CGDecl: EmitAutoVarNoAlias/EmitNoAliasDecl + // NOTE: keep in sync with (llvm) InlineFunction: CloneAliasScopeMetadata + llvm::MDBuilder MDB(CurFn->getContext()); + if (!NoAliasDomain) + NoAliasDomain = MDB.createAnonymousAliasScopeDomain(CurFn->getName()); + + std::string Name = (llvm::Twine(CurFn->getName()) + ": " + D.getName()).str(); + + llvm::MDNode *Scope = + MDB.createAnonymousAliasScope(NoAliasDomain, Name); + addNoAliasScope(Scope); + + SmallVector ScopeListEntries(1, Scope); + llvm::MDNode *ScopeList = + llvm::MDNode::get(CurFn->getContext(), ScopeListEntries); + + NoAliasAddrMap[Loc.getPointer()] = ScopeList; + + if (HaveInsertPoint()) { + NoAliasDeclMap[ScopeList] = + Builder.CreateNoAliasDeclaration(Loc.getPointer(), ScopeList); + } +} + +llvm::MDNode * +CodeGenFunction::getExistingOrUnknownNoAliasScope(llvm::Value *Ptr) { + auto NAI = NoAliasAddrMap.find( + Ptr->stripInBoundsOffsets()); // make sure to find the base object + + if (NAI != NoAliasAddrMap.end()) { + return NAI->second; + } + if (!NoAliasUnknownScope) { + // NOTE: keep in sync with (clang) CGDecl:getExistingOrUnknownNoAliasScope + // NOTE: keep in sync with (clang) CGDecl:EmitAutoVarNoAlias/EmitNoAliasDecl + // NOTE: keep in sync with (llvm) InlineFunction:CloneAliasScopeMetadata + + // The unknown scope is used when we cannot yet pinpoint the exact scope. + llvm::MDBuilder MDB(CurFn->getContext()); + if (!NoAliasDomain) + NoAliasDomain = MDB.createAnonymousAliasScopeDomain(CurFn->getName()); + std::string Name = + (llvm::Twine(CurFn->getName()) + ": unknown scope").str(); + + llvm::MDNode *Scope = MDB.createAnonymousAliasScope(NoAliasDomain, Name); + FnNoAliasInfo.addNoAliasScope(Scope); // keep this at function level + + SmallVector ScopeListEntries(1, Scope); + NoAliasUnknownScope = + llvm::MDNode::get(CurFn->getContext(), ScopeListEntries); + CurFn->setMetadata("noalias", NoAliasUnknownScope); + } + + return NoAliasUnknownScope; +} + +llvm::Value * +CodeGenFunction::getExistingNoAliasDeclOrNullptr(llvm::MDNode *NoAliasScopeMD) { + llvm::Value *&NoAliasDecl = NoAliasDeclMap[NoAliasScopeMD]; + if (NoAliasDecl == nullptr) { + NoAliasDecl = llvm::ConstantPointerNull::get( + llvm::Type::getInt8PtrTy(getLLVMContext())); + } + return NoAliasDecl; +} + +void CodeGenFunction::EmitAutoVarNoAlias(const AutoVarEmission &emission) { + assert(emission.Variable && "emission was not valid!"); + + // Don't emit noalias intrinsics unless we're optimizing. + if (CGM.getCodeGenOpts().OptimizationLevel == 0) + return; + + if (!CGM.getCodeGenOpts().FullRestrict) + return; + + const VarDecl &D = *emission.Variable; + + // Early exit: emission.getObjectAddress(..) can introduce extra code. + // Only do it if it is needed + if (!D.getType().isRestrictOrContainsRestrictMembers()) + return; + + // Check whether this is a byref variable that's potentially + // captured and moved by its own initializer. If so, we'll need to + // emit the initializer first, then copy into the variable. + const Expr *Init = D.getInit(); + bool capturedByInit = + Init && emission.IsEscapingByRef && isCapturedBy(D, Init); + + Address Loc = + capturedByInit ? emission.Addr : emission.getObjectAddress(*this); + EmitNoAliasDecl(D, Loc); +} + /// Emit an expression as an initializer for an object (variable, field, etc.) /// at the given location. The expression is not necessarily the normal /// initializer for the object, and the address is not necessarily @@ -2427,6 +2550,9 @@ llvm::Value *ArgVal = (DoStore ? Arg.getDirectValue() : nullptr); LValue lv = MakeAddrLValue(DeclPtr, Ty); + + EmitNoAliasDecl(D, DeclPtr); + if (IsScalar) { Qualifiers qs = Ty.getQualifiers(); if (Qualifiers::ObjCLifetime lt = qs.getObjCLifetime()) { Index: clang/lib/CodeGen/CGExpr.cpp =================================================================== --- clang/lib/CodeGen/CGExpr.cpp +++ clang/lib/CodeGen/CGExpr.cpp @@ -29,10 +29,12 @@ #include "clang/Basic/CodeGenOptions.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Metadata.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Path.h" @@ -1685,6 +1687,24 @@ if (llvm::MDNode *RangeInfo = getRangeForLoadFromType(Ty)) Load->setMetadata(llvm::LLVMContext::MD_range, RangeInfo); + // If this is a load from a restrict-qualified variable, then we have pointer + // aliasing assumptions that can be applied to the pointer value being loaded. + if (Ty.isRestrictQualified() && CGM.getCodeGenOpts().FullRestrict) { + auto NoAliasScopeMD = getExistingOrUnknownNoAliasScope(Addr.getPointer()); + auto NoAliasDecl = getExistingNoAliasDeclOrNullptr(NoAliasScopeMD); + auto *NoAliasLoad = Builder.CreateNoAliasPointer( + Load, NoAliasDecl, Addr.getPointer(), NoAliasScopeMD); + + // The llvm.noalias intrinsic can make use of the available alias info + llvm::AAMDNodes AAMetadata; + Load->getAAMetadata(AAMetadata); + NoAliasLoad->setAAMetadata(AAMetadata); + + // ..as wel as the local restrict scope + // In both cases, this is about the 'P.addr'(getOperand(2) of llvm.noalias) + recordMemoryInstruction(NoAliasLoad); + return EmitFromMemory(NoAliasLoad, Ty); + } return EmitFromMemory(Load, Ty); } Index: clang/lib/CodeGen/CGExprAgg.cpp =================================================================== --- clang/lib/CodeGen/CGExprAgg.cpp +++ clang/lib/CodeGen/CGExprAgg.cpp @@ -1935,6 +1935,16 @@ } } + // Guard copies of structs containing restrict pointers + if (Ty.isRestrictOrContainsRestrictMembers()) { + // NOTE: also see CodeGenFunction::EmitCall() + auto NoAliasScopeMD = getExistingOrUnknownNoAliasScope(SrcPtr.getPointer()); + auto NoAliasDecl = getExistingNoAliasDeclOrNullptr(NoAliasScopeMD); + SrcPtr.adaptPointer(Builder.CreateNoAliasCopyGuard( + SrcPtr.getPointer(), NoAliasDecl, Ty.getRestrictIndices(), + NoAliasScopeMD)); + } + // Aggregate assignment turns into llvm.memcpy. This is almost valid per // C99 6.5.16.1p3, which states "If the value being stored in an object is // read from another object that overlaps in anyway the storage of the first @@ -2011,6 +2021,8 @@ } auto Inst = Builder.CreateMemCpy(DestPtr, SrcPtr, SizeVal, isVolatile); + // track noalias scope for memcpy + recordMemoryInstruction(Inst); // Determine the metadata to describe the position of any padding in this // memcpy, as well as the TBAA tags for the members of the struct, in case Index: clang/lib/CodeGen/CGStmt.cpp =================================================================== --- clang/lib/CodeGen/CGStmt.cpp +++ clang/lib/CodeGen/CGStmt.cpp @@ -373,6 +373,28 @@ return true; } +bool CodeGenFunction::hasLocalRestrictVars(const CompoundStmt &S, + FunctionArgList *Args) { + // We may have restrict-qualified variables, but if we're not optimizing, we + // don't do anything special with them. + if (CGM.getCodeGenOpts().OptimizationLevel == 0) + return false; + + if (Args) + for (const auto *VD : *Args) + if (VD->getType().isRestrictOrContainsRestrictMembers()) + return true; + + for (const auto *C : S.body()) + if (const auto *DS = dyn_cast(C)) + for (const auto *I : DS->decls()) + if (const auto *VD = dyn_cast(I)) + if (VD->getType().isRestrictOrContainsRestrictMembers()) + return true; + + return false; +} + /// EmitCompoundStmt - Emit a compound statement {..} node. If GetLast is true, /// this captures the expression result of the last sub-statement and returns it /// (for use by the statement expression extension). @@ -382,7 +404,7 @@ "LLVM IR generation of compound statement ('{}')"); // Keep track of the current cleanup stack depth, including debug scopes. - LexicalScope Scope(*this, S.getSourceRange()); + LexicalScope Scope(*this, S.getSourceRange(), hasLocalRestrictVars(S)); return EmitCompoundStmtWithoutScope(S, GetLast, AggSlot); } @@ -587,6 +609,26 @@ } } +// For all of the instructions generated for this lexical scope that access +// memory, add the noalias metadata associated with any block-local +// restrict-qualified pointers from this scope. +void CodeGenFunction::LexicalNoAliasInfo::addNoAliasMD() { + if (MemoryInsts.empty() || NoAliasScopes.empty()) + return; + + llvm::MDNode *NewScopeList = + llvm::MDNode::get(MemoryInsts[0]->getParent()->getContext(), + NoAliasScopes); + + for (auto &I : MemoryInsts) + I->setMetadata( + llvm::LLVMContext::MD_noalias, + llvm::MDNode::concatenate(I->getMetadata( + llvm::LLVMContext::MD_noalias), + NewScopeList)); + + MemoryInsts.clear(); +} void CodeGenFunction::EmitLabelStmt(const LabelStmt &S) { EmitLabel(S.getDecl()); Index: clang/lib/CodeGen/CodeGenFunction.h =================================================================== --- clang/lib/CodeGen/CodeGenFunction.h +++ clang/lib/CodeGen/CodeGenFunction.h @@ -36,6 +36,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/TinyPtrVector.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/Utils/SanitizerStats.h" @@ -288,7 +289,7 @@ /// instruction is created using Builder. void InsertHelper(llvm::Instruction *I, const llvm::Twine &Name, llvm::BasicBlock *BB, - llvm::BasicBlock::iterator InsertPt) const; + llvm::BasicBlock::iterator InsertPt); /// CurFuncDecl - Holds the Decl for the current outermost /// non-closure context. @@ -760,11 +761,56 @@ } }; + // Check if any local variable or argument contains a (direct) restrict + // pointer. + bool hasLocalRestrictVars(const CompoundStmt &S, + FunctionArgList *Args = nullptr); + + // Get the associated NoAliasScope for the Ptr (cast and geps will be + // stripped). If there is no associated scope returns the 'unknown function' + // scope. (which is created on demand) + llvm::MDNode *getExistingOrUnknownNoAliasScope(llvm::Value *Ptr); + + // Get the associated declaration + llvm::Value *getExistingNoAliasDeclOrNullptr(llvm::MDNode *NoAliasScopeMD); + + // The noalias scopes used to tag pointer values assigned to block-local + // restrict-qualified variables, and the memory-accessing instructions within + // this lexical scope to which the associated pointer-aliasing assumptions + // might apply. One of these will exist for each lexical scope. + struct LexicalNoAliasInfo { + bool RecordMemoryInsts; + llvm::TinyPtrVector MemoryInsts; + llvm::TinyPtrVector NoAliasScopes; + + LexicalNoAliasInfo(bool RecordMemoryInsts = false) + : RecordMemoryInsts(RecordMemoryInsts) {} + + void recordMemoryInsts() { + RecordMemoryInsts = true; + } + + void recordMemoryInstruction(llvm::Instruction *I) { + if (RecordMemoryInsts) + MemoryInsts.push_back(I); + } + + void addNoAliasScope(llvm::MDNode *Scope) { + assert(RecordMemoryInsts && + "Adding noalias scope but not recording memory accesses!"); + NoAliasScopes.push_back(Scope); + } + + void addNoAliasMD(); + }; + + LexicalNoAliasInfo FnNoAliasInfo; + // Cleanup stack depth of the RunCleanupsScope that was pushed most recently. EHScopeStack::stable_iterator CurrentCleanupScopeDepth = EHScopeStack::stable_end(); - class LexicalScope : public RunCleanupsScope { + class LexicalScope : public RunCleanupsScope, public LexicalNoAliasInfo { SourceRange Range; SmallVector Labels; LexicalScope *ParentScope; @@ -774,8 +820,10 @@ public: /// Enter a new cleanup scope. - explicit LexicalScope(CodeGenFunction &CGF, SourceRange Range) - : RunCleanupsScope(CGF), Range(Range), ParentScope(CGF.CurLexicalScope) { + explicit LexicalScope(CodeGenFunction &CGF, SourceRange Range, + bool RecordMemoryInsts = false) + : RunCleanupsScope(CGF), LexicalNoAliasInfo(RecordMemoryInsts), + Range(Range), ParentScope(CGF.CurLexicalScope) { CGF.CurLexicalScope = this; if (CGDebugInfo *DI = CGF.getDebugInfo()) DI->EmitLexicalBlockStart(CGF.Builder, Range.getBegin()); @@ -786,6 +834,19 @@ Labels.push_back(label); } + // If we have block-local restrict-qualified pointers, we need to keep + // track of the memory-accessing instructions in the blocks where such + // pointers are declared (including lexical scopes that are children of + // those blocks) so that we can later add the appropriate metadata. Record + // this instruction and so the same in any parent scopes. + void recordMemoryInstruction(llvm::Instruction *I) { + LexicalNoAliasInfo::recordMemoryInstruction(I); + if (ParentScope) + ParentScope->recordMemoryInstruction(I); + else + CGF.FnNoAliasInfo.recordMemoryInstruction(I); + } + /// Exit this cleanup scope, emitting any accumulated /// cleanups. ~LexicalScope() { @@ -803,6 +864,8 @@ /// Force the emission of cleanups now, instead of waiting /// until this object is destroyed. void ForceCleanup() { + addNoAliasMD(); + CGF.CurLexicalScope = ParentScope; RunCleanupsScope::ForceCleanup(); @@ -819,6 +882,23 @@ typedef llvm::DenseMap DeclMapTy; + // Record this instruction for the purpose of later adding noalias metadata, + // is applicible, in order to support block-local restrict-qualified + // pointers. + void recordMemoryInstruction(llvm::Instruction *I) { + if (CurLexicalScope) + CurLexicalScope->recordMemoryInstruction(I); + else + FnNoAliasInfo.recordMemoryInstruction(I); + } + + void addNoAliasScope(llvm::MDNode *Scope) { + if (CurLexicalScope) + CurLexicalScope->addNoAliasScope(Scope); + else + FnNoAliasInfo.addNoAliasScope(Scope); + } + /// The class used to assign some variables some temporarily addresses. class OMPMapVars { DeclMapTy SavedLocals; @@ -1609,6 +1689,19 @@ void EmitOpenCLKernelMetadata(const FunctionDecl *FD, llvm::Function *Fn); + /// The noalias domain metadata for this function. + llvm::MDNode *NoAliasDomain = nullptr; + + /// A map between the addresses of local restrict-qualified variables and + /// their noalias scope. + llvm::DenseMap NoAliasAddrMap; + + /// A map between the noalias scope and its declaration + llvm::DenseMap NoAliasDeclMap; + + /// The node representing 'out-of-function' scope + llvm::MDNode *NoAliasUnknownScope = nullptr; + public: CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext=false); ~CodeGenFunction(); @@ -2762,6 +2855,9 @@ void emitAutoVarTypeCleanup(const AutoVarEmission &emission, QualType::DestructionKind dtorKind); + void EmitNoAliasDecl(const VarDecl &D, Address Loc); + void EmitAutoVarNoAlias(const AutoVarEmission &emission); + /// Emits the alloca and debug information for the size expressions for each /// dimension of an array. It registers the association of its (1-dimensional) /// QualTypes and size expression's debug node, so that CGDebugInfo can Index: clang/lib/CodeGen/CodeGenFunction.cpp =================================================================== --- clang/lib/CodeGen/CodeGenFunction.cpp +++ clang/lib/CodeGen/CodeGenFunction.cpp @@ -657,6 +657,11 @@ CurFn = Fn; CurFnInfo = &FnInfo; assert(CurFn->isDeclaration() && "Function already has body?"); + NoAliasUnknownScope = nullptr; // make sure we start without a function scope + + // Always track memory instructions. When a restrict usage is encountered, + // they will be annotated with the necessary scopes. + FnNoAliasInfo.recordMemoryInsts(); // If this function has been blacklisted for any of the enabled sanitizers, // disable the sanitizer for the function. @@ -1038,10 +1043,15 @@ void CodeGenFunction::EmitFunctionBody(const Stmt *Body) { incrementProfileCounter(Body); - if (const CompoundStmt *S = dyn_cast(Body)) + if (const CompoundStmt *S = dyn_cast(Body)) { EmitCompoundStmtWithoutScope(*S); - else + + // Now that we're done with the block, add noalias metadata if we had any + // block-local restrict-qualified pointers. + FnNoAliasInfo.addNoAliasMD(); + } else { EmitStmt(Body); + } } /// When instrumenting to collect profile data, the counts for some blocks @@ -2129,10 +2139,18 @@ void CodeGenFunction::InsertHelper(llvm::Instruction *I, const llvm::Twine &Name, llvm::BasicBlock *BB, - llvm::BasicBlock::iterator InsertPt) const { + llvm::BasicBlock::iterator InsertPt) { LoopStack.InsertHelper(I); if (IsSanitizerScope) CGM.getSanitizerMetadata()->disableSanitizerForInstruction(I); + + // When we have block-local restrict-qualified pointers, we need to record + // all memory-accessing instructions (i.e. any kind of instruction for which + // AA::getModRefInfo might return something other than NoModRef) so that they + // can be tagged with noalias metadata with noalias scopes corresponding to + // the applicable restrict-qualified pointers. + if (I->mayReadOrWriteMemory()) + recordMemoryInstruction(I); } void CGBuilderInserter::InsertHelper( Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -3886,6 +3886,15 @@ options::OPT_fdelete_null_pointer_checks, false)) CmdArgs.push_back("-fno-delete-null-pointer-checks"); + if (Arg *FullRestrictArg = Args.getLastArg(options::OPT_ffull_restrict, + options::OPT_fno_full_restrict)) { + if (FullRestrictArg->getOption().matches(options::OPT_fno_full_restrict)) { + // Disable noalias intrinsic support for noalias attribute on arguments + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-use-noalias-intrinsic-during-inlining=0"); + } + } + // LLVM Code Generator Options. if (Args.hasArg(options::OPT_frewrite_map_file) || Index: clang/lib/Frontend/CompilerInvocation.cpp =================================================================== --- clang/lib/Frontend/CompilerInvocation.cpp +++ clang/lib/Frontend/CompilerInvocation.cpp @@ -799,6 +799,14 @@ Opts.StructPathTBAA = !Args.hasArg(OPT_no_struct_path_tbaa); Opts.NewStructPathTBAA = !Args.hasArg(OPT_no_struct_path_tbaa) && Args.hasArg(OPT_new_struct_path_tbaa); + if (Arg *FullRestrictArg = + Args.getLastArg(OPT_ffull_restrict, OPT_fno_full_restrict)) { + Opts.FullRestrict = + FullRestrictArg->getOption().matches(OPT_ffull_restrict); + } + // Keep this behind an option - the alias analysis still works better + // with the noalias attribute on arguments. + Opts.NoNoAliasArgAttr = Args.hasArg(OPT_fno_noalias_arguments); Opts.FineGrainedBitfieldAccesses = Args.hasFlag(OPT_ffine_grained_bitfield_accesses, OPT_fno_fine_grained_bitfield_accesses, false); Index: clang/test/CodeGen/arm_neon_intrinsics.c =================================================================== --- clang/test/CodeGen/arm_neon_intrinsics.c +++ clang/test/CodeGen/arm_neon_intrinsics.c @@ -2,6 +2,7 @@ // RUN: -target-cpu swift -fallow-half-arguments-and-returns \ // RUN: -target-feature +fullfp16 -ffreestanding \ // RUN: -flax-vector-conversions=none \ +// RUN: -mllvm --use-noalias-intrinsic-during-inlining=0 \ // RUN: -disable-O0-optnone -emit-llvm -o - %s \ // RUN: | opt -S -mem2reg | FileCheck %s Index: clang/test/CodeGen/restrict/arg_reuse.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/arg_reuse.c @@ -0,0 +1,35 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -O2 -ffull-restrict -fno-noalias-arguments %s -emit-llvm -o - | FileCheck %s + +// NOTE: use -no-noalias-arguments to block mapping restrict arguments on the 'noalias +// attribute which is too strong for restrict + +// A number of testcases from our wiki (2018/6/7_llvm_restrict_examples +// As llvm/clang treat __restrict differently in following cases: +int test_arg_restrict_vs_local_restrict_01(int* __restrict pA, int* pB, int* pC) { + int * tmp=pA; + *tmp=42; + pA=pB; + *pA=43; + *pC=99; + return *tmp; // fail: needs a load !!! (either 42 or 43) +} + +// CHECK: @test_arg_restrict_vs_local_restrict_01 +// CHECK-NOT: ret i32 42 +// CHECK-NOT: ret i32 43 +// CHECK-NOT: ret i32 99 + +int test_arg_restrict_vs_local_restrict_02(int* pA_, int* pB, int* pC) { + int *__restrict pA; pA=pA_; + int * tmp=pA; + *tmp=42; + pA=pB; + *pA=43; + *pC=99; + return *tmp; // needs a load !!! (either 42 or 43) +} + +// CHECK: @test_arg_restrict_vs_local_restrict_02 +// CHECK-NOT: ret i32 42 +// CHECK-NOT: ret i32 43 +// CHECK-NOT: ret i32 99 Index: clang/test/CodeGen/restrict/array.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/array.c @@ -0,0 +1,55 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -O1 -disable-llvm-optzns -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +int r; +void ex1(int *); + +void test_FOO_local(int* pA, int* pB, int* pC) { + int* restrict tmp[3] = { pA, pB, pC }; + *tmp[0]=42; + *tmp[1]=43; +} + +// CHECK-LABEL: void @test_FOO_local( +// CHECK: [[tmp:%.*]] = alloca [3 x i32*], align 16 +// CHECK: [[TMP1:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0a3p0i32.i64([3 x i32*]* [[tmp]], i64 0, metadata [[TAG_6:!.*]]) +// CHECK: [[arrayidx_begin:%.*]] = getelementptr inbounds [3 x i32*], [3 x i32*]* [[tmp]], i64 0, i64 0 +// CHECK: [[arrayidx:%.*]] = getelementptr inbounds [3 x i32*], [3 x i32*]* [[tmp]], i64 0, i64 0 +// CHECK: [[TMP5:%.*]] = load i32*, i32** [[arrayidx]], align 16, !tbaa [[TAG_2:!.*]], !noalias [[TAG_6]] +// CHECK: [[TMP6:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP5]], i8* [[TMP1]], i32** [[arrayidx]], i64 0, metadata [[TAG_6]]), !tbaa [[TAG_2]], !noalias [[TAG_6]] +// CHECK: store i32 42, i32* [[TMP6]], align 4, !tbaa [[TAG_9:!.*]], !noalias [[TAG_6]] +// CHECK: [[arrayidx2:%.*]] = getelementptr inbounds [3 x i32*], [3 x i32*]* [[tmp]], i64 0, i64 1 +// CHECK: [[TMP7:%.*]] = load i32*, i32** [[arrayidx2]], align 8, !tbaa [[TAG_2]], !noalias [[TAG_6]] +// CHECK: [[TMP8:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP7]], i8* [[TMP1]], i32** [[arrayidx2]], i64 0, metadata [[TAG_6]]), !tbaa [[TAG_2]], !noalias [[TAG_6]] +// CHECK: store i32 43, i32* [[TMP8]], align 4, !tbaa [[TAG_9]], !noalias [[TAG_6]] +// CHECK: ret void + + +void test_FOO_p(int* restrict p) { + *p=42; +} + +// define void @test_FOO_p(i32* noalias %p) #0 { +// CHECK-LABEL: void @test_FOO_p( +// CHECK: [[p_addr:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[TMP0:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** [[p_addr]], i64 0, metadata [[TAG_11:!.*]]) +// CHECK-NEXT: store i32* [[p:%.*]], i32** [[p_addr]], align 8, !tbaa [[TAG_2:!.*]], !noalias [[TAG_11]] +// CHECK-NEXT: [[TMP1:%.*]] = load i32*, i32** [[p_addr]], align 8, !tbaa [[TAG_2]], !noalias [[TAG_11]] +// CHECK-NEXT: [[TMP2:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP1]], i8* [[TMP0]], i32** [[p_addr]], i64 0, metadata [[TAG_11]]), !tbaa [[TAG_2]], !noalias [[TAG_11]] +// CHECK-NEXT: store i32 42, i32* [[TMP2]], align 4, !tbaa [[TAG_9:!.*]], !noalias [[TAG_11]] +// CHECK-NEXT: ret void + + +void test_FOO_pp(int* restrict* p) { + *p[0]=42; +} + +// define void @test_FOO_pp(i32** %p) #0 !noalias !14 { +// CHECK: void @test_FOO_pp(i32** [[p:%.*]]) #0 !noalias [[TAG_14:!.*]] { +// CHECK: [[p_addr:%.*]] = alloca i32**, align 8 +// CHECK-NEXT: store i32** [[p]], i32*** [[p_addr]], align 8, !tbaa [[TAG_2:!.*]], !noalias [[TAG_14:!.*]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32**, i32*** [[p_addr]], align 8, !tbaa [[TAG_2]], !noalias [[TAG_14]] +// CHECK-NEXT: [[arrayidx:%.*]] = getelementptr inbounds i32*, i32** [[TMP0]], i64 0 +// CHECK-NEXT: [[TMP1:%.*]] = load i32*, i32** [[arrayidx]], align 8, !tbaa [[TAG_2]], !noalias [[TAG_14]] +// CHECK-NEXT: [[TMP2:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP1]], i8* null, i32** [[arrayidx]], i64 0, metadata [[TAG_14]]), !tbaa [[TAG_2]], !noalias [[TAG_14]] +// CHECK-NEXT: store i32 42, i32* [[TMP2]], align 4, !tbaa [[TAG_9:!.*]], !noalias [[TAG_14]] +// CHECK-NEXT: ret void Index: clang/test/CodeGen/restrict/basic.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/basic.c @@ -0,0 +1,130 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -O1 -disable-llvm-optzns -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +int r; +void ex1(int *); + +int *a; +int *foo() { + int * restrict x = a; + return x; + +// CHECK-LABEL: i32* @foo( +// CHECK: [[x:%.*]] = alloca i32*, align 8 +// CHECK: [[TMP1:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** [[x]], i64 0, metadata [[TAG_2:!.*]]) +// CHECK: [[TMP3:%.*]] = load i32*, i32** [[x]], align 8, !tbaa !5, !noalias [[TAG_2]] +// CHECK: [[TMP4:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP3]], i8* [[TMP1]], i32** [[x]], i64 0, metadata !2), !tbaa !5, !noalias [[TAG_2]] +// CHECK: ret i32* [[TMP4]] +} + +int *a2; +int *foo1(int b) { + int * restrict x; + +// CHECK-LABEL: define i32* @foo1(i32 %b) +// CHECK: [[b_addr:%.*]] = alloca i32, align 4 +// CHECK: [[x:%.*]] = alloca i32*, align 8 +// CHECK: [[x2:%.*]] = alloca i32*, align 8 +// CHECK: [[TMP1:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** [[x]], i64 0, metadata [[TAG_x:!.*]]) + + if (b) { + x = a; + r += *x; + ex1(x); + +// CHECK: [[TMP3:%.*]] = load i32*, i32** @a, align 8, !tbaa [[TAG_5:!.*]], !noalias [[TAG_x_x2:!.*]] +// CHECK: store i32* [[TMP3]], i32** [[x]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + +// CHECK: [[TMP4:%.*]] = load i32*, i32** [[x]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] +// CHECK: [[TMP5:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP4]], i8* [[TMP1]], i32** [[x]], i64 0, metadata [[TAG_x]]), !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] +// CHECK: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4, !tbaa [[TAG_9:!.*]], !noalias [[TAG_x_x2]] +// CHECK: [[TMP7:%.*]] = load i32, i32* @r, align 4, !tbaa [[TAG_9]], !noalias [[TAG_x_x2]] +// CHECK: [[add:%.*]] = add nsw i32 [[TMP7]], [[TMP6]] +// CHECK: store i32 [[add]], i32* @r, align 4, !tbaa [[TAG_9]], !noalias [[TAG_x_x2]] + +// CHECK: [[TMP8:%.*]] = load i32*, i32** [[x]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] +// CHECK: [[TMP9:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP8]], i8* [[TMP1]], i32** [[x]], i64 0, metadata [[TAG_x]]), !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] +// CHECK: call void @ex1(i32* [[TMP9]]), !noalias [[TAG_x_x2]] + + ++x; + *x = r; + ex1(x); + +// CHECK: [[TMP10:%.*]] = load i32*, i32** [[x]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] +// CHECK: [[TMP11:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP10]], i8* [[TMP1]], i32** [[x]], i64 0, metadata [[TAG_x]]), !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] +// CHECK: [[incdec_ptr:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 1 +// CHECK: store i32* [[incdec_ptr]], i32** [[x]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + +// CHECK: [[TMP12:%.*]] = load i32, i32* @r, align 4, !tbaa [[TAG_9]], !noalias [[TAG_x_x2]] +// CHECK: [[TMP13:%.*]] = load i32*, i32** [[x]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] +// CHECK: [[TMP14:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP13]], i8* [[TMP1]], i32** [[x]], i64 0, metadata [[TAG_x]]), !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] +// CHECK: store i32 [[TMP12]], i32* [[TMP14]], align 4, !tbaa [[TAG_9]], !noalias [[TAG_x_x2]] + +// CHECK: [[TMP15:%.*]] = load i32*, i32** [[x]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] +// CHECK: [[TMP16:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP15]], i8* [[TMP1]], i32** [[x]], i64 0, metadata [[TAG_x]]), !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] +// CHECK: call void @ex1(i32* [[TMP16]]), !noalias [[TAG_x_x2]] + + x += b; + *x = r; + ex1(x); + +// CHECK: [[TMP17:%.*]] = load i32, i32* [[b_addr]], align 4, !tbaa [[TAG_9]], !noalias [[TAG_x_x2]] +// CHECK: [[TMP18:%.*]] = load i32*, i32** [[x]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] +// CHECK: [[TMP19:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP18]], i8* [[TMP1]], i32** [[x]], i64 0, metadata [[TAG_x]]), !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] +// CHECK: [[idx_ext:%.*]] = sext i32 [[TMP17]] to i64 +// CHECK: [[add_ptr:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i64 [[idx_ext]] +// CHECK: store i32* [[add_ptr]], i32** [[x]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + +// CHECK: [[TMP20:%.*]] = load i32, i32* @r, align 4, !tbaa [[TAG_9]], !noalias [[TAG_x_x2]] +// CHECK: [[TMP21:%.*]] = load i32*, i32** [[x]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] +// CHECK: [[TMP22:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP21]], i8* [[TMP1]], i32** [[x]], i64 0, metadata [[TAG_x]]), !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] +// CHECK: store i32 [[TMP20]], i32* [[TMP22]], align 4, !tbaa [[TAG_9]], !noalias [[TAG_x_x2]] + +// CHECK: [[TMP23:%.*]] = load i32*, i32** [[x]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] +// CHECK: [[TMP24:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP23]], i8* [[TMP1]], i32** [[x]], i64 0, metadata [[TAG_x]]), !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] +// CHECK: call void @ex1(i32* [[TMP24]]), !noalias [[TAG_x_x2]] + + + int * restrict x2 = a2; + *x2 = r; + ex1(x2); + +// CHECK: [[TMP26:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** [[x2]], i64 0, metadata [[TAG_x2:!.*]]) +// CHECK: [[TMP27:%.*]] = load i32*, i32** @a2, align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] +// CHECK: store i32* [[TMP27]], i32** [[x2]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + +// CHECK: [[TMP28:%.*]] = load i32, i32* @r, align 4, !tbaa [[TAG_9]], !noalias [[TAG_x_x2]] +// CHECK: [[TMP29:%.*]] = load i32*, i32** [[x2]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] +// CHECK: [[TMP30:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP29]], i8* [[TMP26]], i32** [[x2]], i64 0, metadata [[TAG_x2]]), !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] +// CHECK: store i32 [[TMP28]], i32* [[TMP30]], align 4, !tbaa [[TAG_9]], !noalias [[TAG_x_x2]] + +// CHECK: [[TMP31:%.*]] = load i32*, i32** [[x2]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] +// CHECK: [[TMP32:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP31]], i8* [[TMP26]], i32** [[x2]], i64 0, metadata [[TAG_x2]]), !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] +// CHECK: call void @ex1(i32* [[TMP32]]), !noalias [[TAG_x_x2]] + } else { + x = a2; + r += *x; +// CHECK: [[TMP34:%.*]] = load i32*, i32** @a2, align 8, !tbaa [[TAG_5]], !noalias [[TAG_x]] +// CHECK: store i32* [[TMP34]], i32** [[x]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x]] + +// CHECK: [[TMP35:%.*]] = load i32*, i32** [[x]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x]] +// CHECK: [[TMP36:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP35]], i8* [[TMP1]], i32** [[x]], i64 0, metadata [[TAG_x]]), !tbaa [[TAG_5]], !noalias [[TAG_x]] +// CHECK: [[TMP37:%.*]] = load i32, i32* [[TMP36]], align 4, !tbaa [[TAG_9]], !noalias [[TAG_x]] +// CHECK: [[TMP38:%.*]] = load i32, i32* @r, align 4, !tbaa [[TAG_9]], !noalias [[TAG_x]] +// CHECK: [[add1:%.*]] = add nsw i32 [[TMP38]], [[TMP37]] +// CHECK: store i32 [[add1]], i32* @r, align 4, !tbaa [[TAG_9]], !noalias [[TAG_x]] + } + + return x; +// CHECK: [[TMP39:%.*]] = load i32*, i32** [[x]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x]] +// CHECK: [[TMP40:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP39]], i8* [[TMP1]], i32** [[x]], i64 0, metadata [[TAG_x]]), !tbaa [[TAG_5]], !noalias [[TAG_x]] +// CHECK: ret i32* [[TMP40]] +} + +int *bar() { + int * x = a; + return x; + +// CHECK-LABEL: define i32* @bar() +// CHECK-NOT: noalias +// CHECK: ret i32* +} Index: clang/test/CodeGen/restrict/basic_opt_01.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/basic_opt_01.c @@ -0,0 +1,135 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +// Rough test to verify basic functionality of the 'restrict member pointer' rewrite + +// CHECK: @test01A +// CHECK: ret i32 42 +int test01A(int* pA, int* pB) { + int* __restrict prA; prA=pA; + + *prA=42; + *pB=43; + return *prA; +} + +// CHECK: @test01B +// CHECK: ret i32 42 +int test01B(int* __restrict prA, int* pB) { + *prA=42; + *pB=43; + return *prA; +} + +// CHECK: @test02A +// CHECK: ret i32 42 +int test02A(int b, int* pA, char* pB, int* pC) { + int* __restrict prA; prA=pA; + char* __restrict prB; prB=pB; + char* lp= b? (char*)prA : (char*)prB; + + *lp=42; + *pC=43; + return *lp; +} + +// CHECK: @test02B +// CHECK: ret i32 42 +int test02B(int b, int* __restrict prA, char* __restrict prB, int* pC) { + char* lp= b? (char*)prA : (char*)prB; + + *lp=42; + *pC=43; + return *lp; +} + +// CHECK: @test03 +// CHECK: ret i32 42 +int test03(int n, int* pA, char* pB, int* pC) { + do { + int* __restrict prA; prA=pA; + + *prA=42; + *pC=43; + } while (n--); + return *pA; +} + +// CHECK: @test04A0 +// CHECK: ret i32 42 +int test04A0(int n, int* pA, char* pB, int* pC) { + int* __restrict prA; + do { + prA=pA; + + *prA=42; + *pC=43; + } while (n--); + return *prA; +} + +// CHECK: @test04A1 +// CHECK: ret i32 42 +int test04A1(int n, int* pA, char* pB, int* pC) { + int* __restrict prA; + prA=pA; + do { + prA=pA; + + *prA=42; + *pC=43; + } while (n--); + return *prA; +} + +// CHECK: @test04B0 +// CHECK: ret i32 42 +int test04B0(int n, int* __restrict prA, char* pB, int* pC) { + do { + prA=prA; + + *prA=42; + *pC=43; + } while (n--); + return *prA; +} + +// CHECK: @test04B1 +// CHECK: ret i32 42 +int test04B1(int n, int* __restrict prA, char* pB, int* pC) { + prA=prA; + do { + prA=prA; + + *prA=42; + *pC=43; + } while (n--); + return *prA; +} + + + +// CHECK: @test05A +// CHECK: ret i32 42 +int test05A(int n, int* pA, char* pB, int* pC) { + int* __restrict prA; + prA=pA; + do { + prA++; + + *prA=42; + *pC=43; + } while (n--); + return *prA; +} + +// CHECK: @test05B +// CHECK: ret i32 42 +int test05B(int n, int* __restrict prA, char* pB, int* pC) { + do { + prA++; + + *prA=42; + *pC=43; + } while (n--); + return *prA; +} Index: clang/test/CodeGen/restrict/basic_opt_02.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/basic_opt_02.c @@ -0,0 +1,53 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +int foo1(int* a, int* restrict b, int c) { + *a = 0; + *b = c; + return *a; // OK: returns 0 +} +// CHECK: @foo1 +// CHECK: ret i32 0 + +int foo2(int* a, int* restrict b, int c) { + int* bc = b + c; + *a = 0; + *bc = c; // OK: bc keeps the restrictness + return *a; // returns 0 +} +// CHECK: @foo2 +// CHECK: ret i32 0 + +static int* copy(int *b) { return b; } + +int foo3(int* a, int* restrict b, int c) { + int* bc = copy(b); // a fix to support this is in the works + *a = 0; + *bc = c; + return *a; +} +// CHECK: @foo3 +// CHECK: ret i32 0 + +// Finally: +inline void update(int* p, int c) { *p = c; } + +int foo6(int* a, int* b, int c) { + int* restrict bc = b; // local restrict + *a = 0; + update(bc,c); // Oops: inlining loses local restrict annotation + return *a; +} + +// CHECK: @foo6 +// CHECK: ret i32 0 + + +// Notice the difference with: +int foo7(int* a, int* restrict b, int c) { + *a = 0; + update(b,c); // restrict argument preserved after inlining. + return *a; // returns 0 +} + +// CHECK: @foo7 +// CHECK: ret i32 0 Index: clang/test/CodeGen/restrict/basic_opt_03.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/basic_opt_03.c @@ -0,0 +1,69 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s +// XFAIL: * + +// NOTE: SROA needs to be able to see through llvm.noalias. This is introduced in case of returning of larger structs. +struct A { int a,b,c,d,e,f,g,h; }; +struct A constructIt(int a) { struct A tmp = { a,a,a,a,a,a,a,a }; return tmp; } +int test_sroa01a(unsigned c) { + int tmp=0; + for(int i=0; ivolatile pointer can confuse llvm: (p2 depends on p0) +int test_escape_through_volatile_01(int* a_p0) { + int* __restrict p0; + p0=a_p0; + int* volatile p1=p0; + int* p2=p1; + *p0=42; + *p2=99; + + return *p0; // 42 or 99 +} + +// CHECK: @test_escape_through_volatile_01 +// either a reload or 99, but must never be 42 +// CHECK-NOT: ret i32 42 + +// but not in: +int test_escape_through_volatile_02(int* __restrict p0) { + int* volatile p1=p0; + int* p2=p1; + *p0=42; + *p2=99; + + return *p0; // 42 or 99 +} + +// CHECK: @test_escape_through_volatile_02 +// either a reload or 99, but must never be 42 +// CHECK-NOT: ret i32 42 Index: clang/test/CodeGen/restrict/inlining_01.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/inlining_01.c @@ -0,0 +1,157 @@ +// sfg-check: check resulting chains +// RUN: %clang_cc1 -triple x86_64-apple-darwin -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +// check how restrict propagation wrt inlining works + +//#define INLINE __attribute__((alwaysinline)) +//#define INLINE inline +#define INLINE + +// Variations: +// - n : no restrict +// - a : argument restrict +// - R : local restrict + +#define ARGRESTRICT_n +#define ARGRESTRICT_a __restrict +#define ARGRESTRICT_R + +#define LOCALRESTRICT_n(T,A,B) T A=B +#define LOCALRESTRICT_a(T,A,B) T A=B +#define LOCALRESTRICT_R(T,A,B) T __restrict A=B + +#define CREATE_ALL(CS) \ +CS(n,n) \ +CS(a,n) \ +CS(n,a) \ +CS(a,a) \ +CS(R,n) \ +CS(n,R) \ +CS(R,R) + +#define CREATE_SET(A,B) \ +INLINE int set_ ## A ## B(int* ARGRESTRICT_ ## A pA, int* ARGRESTRICT_ ## B pB) { \ + LOCALRESTRICT_ ## A(int*, lpA, pA); \ + LOCALRESTRICT_ ## B(int*, lpB, pB); \ + *lpA=42; \ + *lpB=99; \ + return *lpA; \ +} + +#define CREATE_CALL_SET1(A,B) \ +int test01_nn_call_set_ ## A ## B(int* pA, int* pB) { \ + set_ ## A ## B (pA, pB); \ + return *pA; \ +} + +#define CREATE_CALL_SET2(A,B) \ +int test02_ ## A ## B ## _call_set_ ## A ## B(int* ARGRESTRICT_ ## A pA, int* ARGRESTRICT_ ## B pB) { \ + LOCALRESTRICT_ ## A(int*, lpA, pA); \ + LOCALRESTRICT_ ## B(int*, lpB, pB); \ + set_ ## A ## B (lpA, lpB); \ + return *lpA; \ +} + +#define CREATE_CALL_SET3(A,B) \ +int test03_ ## A ## B ## _call_set_nn(int* ARGRESTRICT_ ## A pA, int* ARGRESTRICT_ ## B pB) { \ + LOCALRESTRICT_ ## A(int*, lpA, pA); \ + LOCALRESTRICT_ ## B(int*, lpB, pB); \ + set_nn (lpA, lpB); \ + return *lpA; \ +} + + +CREATE_ALL(CREATE_SET) +CREATE_ALL(CREATE_CALL_SET1) +CREATE_ALL(CREATE_CALL_SET2) +CREATE_ALL(CREATE_CALL_SET3) + +// CHECK-LABEL: @set_nn( +// CHECK-NOT: ret i32 42 + +// CHECK-LABEL: @set_an( +// CHECK: ret i32 42 + +// CHECK-LABEL: @set_na( +// CHECK: ret i32 42 + +// CHECK_LABEL: @set_aa( +// CHECK: ret i32 42 + +// CHECK-LABEL: @set_Rn( +// CHECK: ret i32 42 + +// CHECK-LABEL: @set_nR( +// CHECK: ret i32 42 + +// CHECK-LABEL: @set_RR( +// CHECK: ret i32 42 + +// CHECK-LABEL: @test01_nn_call_set_nn( +// CHECK-NOT: ret i32 42 + +//@ NOTE: missed store-load propagation +// CHECK-LABEL: @test01_nn_call_set_an( +// CHECK-NOT: ret i32 42 + +//@ NOTE: missed store-load propagation +// CHECK-LABEL: @test01_nn_call_set_na( +// CHECK-NOT: ret i32 42 + +//@ NOTE: missed store-load propagation +// CHECK-LABEL: @test01_nn_call_set_aa( +// CHECK-NOT: ret i32 42 + +//@ NOTE: missed store-load propagation +// CHECK-LABEL: @test01_nn_call_set_Rn( +// CHECK-NOT: ret i32 42 + +//@ NOTE: missed store-load propagation +// CHECK-LABEL: @test01_nn_call_set_nR( +// CHECK-NOT: ret i32 42 + +//@ NOTE: missed store-load propagation +// CHECK-LABEL: @test01_nn_call_set_RR( +// CHECK-NOT: ret i32 42 + +// CHECK-LABEL: @test02_nn_call_set_nn( +// CHECK-NOT: ret i32 42 + +// CHECK-LABEL: @test02_an_call_set_an( +// CHECK: ret i32 42 + +// CHECK-LABEL: @test02_na_call_set_na( +// CHECK: ret i32 42 + +// CHECK-LABEL: @test02_aa_call_set_aa( +// CHECK: ret i32 42 + +// CHECK-LABEL: @test02_Rn_call_set_Rn( +// CHECK: ret i32 42 + +// CHECK-LABEL: @test02_nR_call_set_nR( +// CHECK: ret i32 42 + +// CHECK-LABEL: @test02_RR_call_set_RR( +// CHECK: ret i32 42 + +// CHECK-LABEL: @test03_nn_call_set_nn( +// CHECK-NOT: ret i32 42 + +// CHECK-LABEL: @test03_an_call_set_nn( +// CHECK: ret i32 42 + +// CHECK-LABEL: @test03_na_call_set_nn( +// CHECK: ret i32 42 + +// CHECK-LABEL: @test03_aa_call_set_nn( +// CHECK: ret i32 42 + +// CHECK-LABEL: @test03_Rn_call_set_nn( +// CHECK: ret i32 42 + +// CHECK-LABEL: @test03_nR_call_set_nn( +// CHECK: ret i32 42 + +// CHECK-LABEL: @test03_RR_call_set_nn( +// CHECK: ret i32 42 Index: clang/test/CodeGen/restrict/inlining_02.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/inlining_02.c @@ -0,0 +1,121 @@ +// sfg-check: check resulting chains +// RUN: %clang_cc1 -triple x86_64-apple-darwin -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +// Base test for restrict propagation with inlining + +//#define INLINE __attribute__((alwaysinline)) +//#define INLINE inline +#define INLINE + + +void set_nnn(int* pA, int* pB, int* dummy) { + int* lpA; lpA=pA; + int* lpB; lpB=pB; + int* ldummy; ldummy=dummy; + *lpA=42; + *lpB=43; + + *ldummy=99; +} +// CHECK-LABEL: @set_nnn( + + +void set_nna(int* pA, int* pB, int* __restrict dummy) { + int* lpA; lpA=pA; + int* lpB; lpB=pB; + int* ldummy; ldummy=dummy; + *lpA=42; + *lpB=43; + + *ldummy=99; +} +// CHECK-LABEL: @set_nna( + +void set_nnr(int* pA, int* pB, int* dummy) { + int* lpA; lpA=pA; + int* lpB; lpB=pB; + int* __restrict ldummy; ldummy=dummy; + + *lpA=42; + *lpB=43; + + *ldummy=99; +} +// CHECK-LABEL: @set_nnr( + +int test_rr_nnn(int* pA, int* pB, int* dummy) { + int* __restrict lpA; lpA=pA; + int* __restrict lpB; lpB=pB; + + set_nnn(lpA,lpB,dummy); + return *lpA; +} + +// CHECK-LABEL: @test_rr_nnn( +// CHECK: ret i32 42 + +int test_rr_nna(int* pA, int* pB, int* dummy) { + int* __restrict lpA; lpA=pA; + int* __restrict lpB; lpB=pB; + + set_nna(lpA,lpB,dummy); + return *lpA; +} + +// CHECK-LABEL: @test_rr_nna( +// CHECK: ret i32 42 + + +int test_rr_nnr(int* pA, int* pB, int* dummy) { + int* __restrict lpA; lpA=pA; + int* __restrict lpB; lpB=pB; + + set_nnr(lpA,lpB,dummy); + return *lpA; +} + +// CHECK-LABEL: @test_rr_nnr( +// CHECK: ret i32 42 + +// ----------------------------------------------------------- + +int test_rr_local_nnn(int* pA, int* pB, int* dummy) { + int* __restrict lpA; lpA=pA; + int* __restrict lpB; lpB=pB; + int* ldummy; ldummy=dummy; + + *lpA=10; + { + int* l2pA; l2pA=lpA; + int* l2pB; l2pB=lpB; + int* l2dummy; l2dummy=ldummy; + *l2pA=42; + *l2pB=43; + + *l2dummy=99; + } + return *lpA; +} +// CHECK-LABEL: @test_rr_local_nnn( +// CHECK: ret i32 42 + + +int test_rr_local_nnr(int* pA, int* pB, int* dummy) { + int* __restrict lpA; lpA=pA; + int* __restrict lpB; lpB=pB; + int* ldummy; ldummy=dummy; + + *lpA=10; + { + int* l2pA; l2pA=lpA; + int* l2pB; l2pB=lpB; + int* __restrict l2dummy; l2dummy=ldummy; + *l2pA=42; + *l2pB=43; + + *l2dummy=99; + } + return *lpA; +} +// CHECK-LABEL: @test_rr_local_nnr( +// CHECK: ret i32 42 Index: clang/test/CodeGen/restrict/side_noalias_reduction_01.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/side_noalias_reduction_01.c @@ -0,0 +1,188 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +// Check that unnecessary llvm.side.noalias calls are collapsed + +int* test01(int* p, int n) { + int* __restrict rp; rp=p; + for (int i=0; irp0=42; + *p->rp1=43; +} + +// define void @test_FOO_arg_pointer(%struct.FOO* %p) #0 !noalias !15 { +// CHECK: void @test_FOO_arg_pointer(%struct.FOO* [[p:%.*]]) #0 !noalias [[TAG_15:!.*]] { +// CHECK: [[p_addr:%.*]] = alloca %struct.FOO*, align 8 +// CHECK-NEXT: store %struct.FOO* [[p]], %struct.FOO** [[p_addr]], align 8, !tbaa [[TAG_2:!.*]], !noalias [[TAG_15]] +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.FOO*, %struct.FOO** [[p_addr]], align 8, !tbaa [[TAG_2]], !noalias [[TAG_15]] +// CHECK-NEXT: [[rp0:%.*]] = getelementptr inbounds %struct.FOO, %struct.FOO* [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP1:%.*]] = load i32*, i32** [[rp0]], align 8, !tbaa [[TAG_9:!.*]], !noalias [[TAG_15]] +// CHECK-NEXT: [[TMP2:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP1]], i8* null, i32** [[rp0]], i64 0, metadata [[TAG_15]]), !tbaa [[TAG_9]], !noalias [[TAG_15]] +// CHECK-NEXT: store i32 42, i32* [[TMP2]], align 4, !tbaa [[TAG_13:!.*]], !noalias [[TAG_15]] +// CHECK-NEXT: [[TMP3:%.*]] = load %struct.FOO*, %struct.FOO** [[p_addr]], align 8, !tbaa [[TAG_2]], !noalias [[TAG_15]] +// CHECK-NEXT: [[rp1:%.*]] = getelementptr inbounds %struct.FOO, %struct.FOO* [[TMP3]], i32 0, i32 1 +// CHECK-NEXT: [[TMP4:%.*]] = load i32*, i32** [[rp1]], align 8, !tbaa [[TAG_11:!.*]], !noalias [[TAG_15]] +// CHECK-NEXT: [[TMP5:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP4]], i8* null, i32** [[rp1]], i64 0, metadata [[TAG_15]]), !tbaa [[TAG_11]], !noalias [[TAG_15]] +// CHECK-NEXT: store i32 43, i32* [[TMP5]], align 4, !tbaa [[TAG_13]], !noalias [[TAG_15]] +// CHECK-NEXT: ret void + + +void test_FOO_arg_value(struct FOO p) { + *p.rp0=42; + *p.rp1=43; +} +// NOTE: the struct is mapped 'byval', the scope will be introduced after inlining. + +// define void @test_FOO_arg_value(%struct.FOO* byval(%struct.FOO) align 8 %p) #0 !noalias !18 { +// CHECK: void @test_FOO_arg_value(%struct.FOO* byval(%struct.FOO) align 8 %p) #0 !noalias [[TAG_18:!.*]] { +// CHECK: [[rp0:%.*]] = getelementptr inbounds %struct.FOO, %struct.FOO* [[p:%.*]], i32 0, i32 0 +// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[rp0]], align 8, !tbaa [[TAG_9:!.*]], !noalias [[TAG_18]] +// CHECK-NEXT: [[TMP1:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP0]], i8* null, i32** [[rp0]], i64 0, metadata [[TAG_18]]), !tbaa [[TAG_9]], !noalias [[TAG_18]] +// CHECK-NEXT: store i32 42, i32* [[TMP1]], align 4, !tbaa [[TAG_13:!.*]], !noalias [[TAG_18]] +// CHECK-NEXT: [[rp1:%.*]] = getelementptr inbounds %struct.FOO, %struct.FOO* [[p]], i32 0, i32 1 +// CHECK-NEXT: [[TMP2:%.*]] = load i32*, i32** [[rp1]], align 8, !tbaa [[TAG_11:!.*]], !noalias [[TAG_18]] +// CHECK-NEXT: [[TMP3:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP2]], i8* null, i32** [[rp1]], i64 0, metadata [[TAG_18]]), !tbaa [[TAG_11]], !noalias [[TAG_18]] +// CHECK-NEXT: store i32 43, i32* [[TMP3]], align 4, !tbaa [[TAG_13]], !noalias [[TAG_18]] +// CHECK-NEXT: ret void + +struct FOO test_FOO_pass(struct FOO p) { + return p; +} + +// define void @test_FOO_pass(%struct.FOO* noalias sret %agg.result, %struct.FOO* byval(%struct.FOO) align 8 %p) #0 !noalias !21 { +// CHECK: void @test_FOO_pass(%struct.FOO* noalias sret %agg.result, %struct.FOO* byval(%struct.FOO) align 8 %p) #0 !noalias [[TAG_21:!.*]] { +// CHECK: [[TMP0:%.*]] = call %struct.FOO* @llvm.noalias.copy.guard.p0s_struct.FOOs.p0i8(%struct.FOO* [[p:%.*]], i8* null, metadata [[TAG_24:!.*]], metadata [[TAG_21]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast %struct.FOO* [[agg_result:%.*]] to i8* +// CHECK-NEXT: [[TMP2:%.*]] = bitcast %struct.FOO* [[TMP0]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i64 24, i1 false), !tbaa.struct [[TAG_28:!.*]], !noalias [[TAG_21]] +// CHECK-NEXT: ret void + + +struct FUM { + struct FOO m; +}; + +void test_FUM_local(int* pA, int* pB, int* pC) { + struct FUM tmp = { { pA, pB, pC } }; + *tmp.m.rp0=42; + *tmp.m.rp1=43; +} + +// CHECK-LABEL: void @test_FUM_local( +// CHECK: [[TMP1:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FUMs.i64(%struct.FUM* [[tmp]], i64 0, metadata [[TAG_29:!.*]]) +// CHECK: [[TMP6:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP5:%.*]], i8* [[TMP1]], i32** [[rp02:%.*]], i64 0, metadata [[TAG_29]]), !tbaa [[TAG_32:!.*]], !noalias [[TAG_29]] +// CHECK: [[TMP8:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP7:%.*]], i8* [[TMP1]], i32** [[rp14:%.*]], i64 0, metadata [[TAG_29]]), !tbaa [[TAG_34:!.*]], !noalias [[TAG_29]] + +void test_FUM_arg_pointer(struct FUM* p) { + *p->m.rp0=42; + *p->m.rp1=43; +} +// define void @test_FUM_arg_pointer(%struct.FUM* %p) #0 !noalias !35 { +// CHECK: void @test_FUM_arg_pointer(%struct.FUM* [[p:%.*]]) #0 !noalias [[TAG_35:!.*]] { +// CHECK: [[TMP2:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP1:%.*]], i8* null, i32** [[rp0:%.*]], i64 0, metadata [[TAG_35]]), !tbaa [[TAG_32:!.*]], !noalias [[TAG_35]] +// CHECK: [[TMP5:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP4:%.*]], i8* null, i32** [[rp1:%.*]], i64 0, metadata [[TAG_35]]), !tbaa [[TAG_34:!.*]], !noalias [[TAG_35]] + +void test_FUM_arg_value(struct FUM p) { + *p.m.rp0=42; + *p.m.rp1=43; +} + +// define void @test_FUM_arg_value(%struct.FUM* byval(%struct.FUM) align 8 %p) #0 !noalias !38 { +// CHECK: void @test_FUM_arg_value(%struct.FUM* byval(%struct.FUM) align 8 %p) #0 !noalias [[TAG_38:!.*]] { +// CHECK: [[TMP1:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP0:%.*]], i8* null, i32** [[rp0:%.*]], i64 0, metadata [[TAG_38]]), !tbaa [[TAG_32:!.*]], !noalias [[TAG_38]] +// CHECK: [[TMP3:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP2:%.*]], i8* null, i32** [[rp1:%.*]], i64 0, metadata [[TAG_38]]), !tbaa [[TAG_34:!.*]], !noalias [[TAG_38]] + +struct FUM test_FUM_pass(struct FUM p) { + return p; +} + +// define void @test_FUM_pass(%struct.FUM* noalias sret %agg.result, %struct.FUM* byval(%struct.FUM) align 8 %p) #0 !noalias !41 { +// CHECK: void @test_FUM_pass(%struct.FUM* noalias sret %agg.result, %struct.FUM* byval(%struct.FUM) align 8 %p) #0 !noalias [[TAG_41:!.*]] { +// CHECK: [[TMP0:%.*]] = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* [[p:%.*]], i8* null, metadata [[TAG_44:!.*]], metadata [[TAG_41]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast %struct.FUM* [[agg_result:%.*]] to i8* +// CHECK-NEXT: [[TMP2:%.*]] = bitcast %struct.FUM* [[TMP0]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i64 24, i1 false), !tbaa.struct [[TAG_28:!.*]], !noalias [[TAG_41]] +// CHECK-NEXT: ret void + + +// indices for llvm.noalias.copy.guard + +// CHECK: [[TAG_24]] = !{[[TAG_25:!.*]], [[TAG_26:!.*]], [[TAG_27:!.*]]} +// CHECK: [[TAG_25]] = !{i64 -1, i64 0} +// CHECK: [[TAG_26]] = !{i64 -1, i64 1} +// CHECK: [[TAG_27]] = !{i64 -1, i64 2} + +// CHECK: [[TAG_44]] = !{[[TAG_45:!.*]], [[TAG_46:!.*]], [[TAG_47:!.*]]} +// CHECK: [[TAG_45]] = !{i64 -1, i64 0, i64 0} +// CHECK: [[TAG_46]] = !{i64 -1, i64 0, i64 1} +// CHECK: [[TAG_47]] = !{i64 -1, i64 0, i64 2} Index: clang/test/CodeGen/restrict/struct_member_01.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/struct_member_01.c @@ -0,0 +1,80 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +struct FOO { + int* __restrict pA; + int* __restrict pB; +}; + + +int test00a(int* pA, int* pB) +{ + int * __restrict rpA; + int * __restrict rpB; + rpA=pA; + rpB=pB; + + *rpA=42; + *rpB=43; + return *rpA; +} + +// CHECK-LABEL: @test00a( +// CHECK: ret i32 42 + +int test00b(int* pA, int* pB) +{ + int * __restrict rp[2]; + rp[0]=pA; + rp[1]=pB; + + *rp[0]=42; + *rp[1]=43; + return *rp[0]; +} + +// CHECK-LABEL: @test00b( +// CHECK: ret i32 42 + + +int test01(struct FOO* p0, struct FOO* p1) +{ + *p0->pA=42; + *p1->pA=43; + + return *p0->pA; // 42 or 43 +} +// CHECK-LABEL: @test01( +// CHECK-NOT: ret i32 42 + +int test11(struct FOO* p0, struct FOO* p1) +{ + *p0->pA=42; + *p1->pB=43; + + return *p0->pA; // 42 +} + +// CHECK-LABEL: @test11( +// CHECK: ret i32 42 + +int test21(struct FOO p0, struct FOO p1) +{ + *p0.pA=42; + *p1.pB=43; + + return *p0.pA; // 42 +} + +// CHECK-LABEL: @test21( +// CHECK: ret i32 42 + +int test31(struct FOO* p0, struct FOO* __restrict p1) +{ + *p0->pA=42; + *p1->pA=43; + + return *p0->pA; // 42 +} + +// CHECK-LABEL: @test31( +// CHECK: ret i32 42 Index: clang/test/CodeGen/restrict/struct_member_02.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/struct_member_02.c @@ -0,0 +1,110 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -O2 -ffull-restrict -disable-llvm-passes %s -emit-llvm -o - | FileCheck %s + +//#define __restrict volatile + +// this test checks for what variables a restrict scope is created + +struct FOO_ipp { + int** p; +}; + +struct FOO_irpp { + int* __restrict * p; +}; + +struct FOO_iprp { + int dummy; + int** __restrict p; +}; + +struct FOO_irprp { + int* __restrict * __restrict p; +}; + +struct FOO_NESTED { + struct FOO_iprp m; +}; + +struct FOO_NESTED_A { + struct FOO_iprp m[2][3][4]; +}; + +typedef struct FOO_NESTED FUM; +typedef int* __restrict t_irp; + +int foo(int** p) +{ + struct FOO_ipp m1; // no + struct FOO_irpp m2; // no + struct FOO_iprp m3; // yes + struct FOO_irprp m4; // yes + struct FOO_NESTED m5; // yes + struct FOO_NESTED m6[2][4][5]; // yes + struct FOO_NESTED_A m7[2][4][5]; // yes + t_irp p0; // yes + FUM m8[3]; // yes + int *a1[2]; // no + int **a2[2]; // no + int *__restrict *a3[2]; // no + int ** __restrict a4[2]; // yes + int ** __restrict a5[2][3][4]; // yes + int *__restrict *a6; // no + m1.p=p; + m2.p=p; + m3.p=p; + m4.p=p; + a1[0]=*p; + a1[1]=*p; + + return **m1.p+**m2.p+**m3.p+**m4.p+*a1[0]+*a1[1]; +} + +// check the scopes of various variables +// CHECK-LABEL: @foo( + +// the local variables: +// CHECK: alloca +// CHECK: alloca +// CHECK: alloca +// CHECK: alloca +// CHECK: alloca +// CHECK: alloca +// CHECK: alloca +// CHECK: alloca +// CHECK: alloca +// CHECK: alloca +// CHECK: alloca +// CHECK: alloca +// CHECK: alloca +// CHECK: alloca +// CHECK: alloca +// CHECK: alloca +// CHECK-NOT: alloca + +// the local variables that have a restrict scope: +// CHECK: llvm.noalias.decl +// CHECK: llvm.noalias.decl +// CHECK: llvm.noalias.decl +// CHECK: llvm.noalias.decl +// CHECK: llvm.noalias.decl +// CHECK: llvm.noalias.decl +// CHECK: llvm.noalias.decl +// CHECK: llvm.noalias.decl +// CHECK: llvm.noalias.decl +// CHECK-NOT: llvm.noalias.decl +// CHECK: ret i32 + +// the restrict related metadata +// CHECK: foo: unknown scope +// CHECK-NEXT: foo +// CHECK-NOT: foo: +// CHECK: foo: m3 +// CHECK-NEXT: foo: m4 +// CHECK-NEXT: foo: m5 +// CHECK-NEXT: foo: m6 +// CHECK-NEXT: foo: m7 +// CHECK-NEXT: foo: p0 +// CHECK-NEXT: foo: m8 +// CHECK-NEXT: foo: a4 +// CHECK-NEXT: foo: a5 +// CHECK-NOT: foo: Index: clang/test/CodeGen/restrict/struct_member_03.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/struct_member_03.c @@ -0,0 +1,105 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +struct FOO { + int* __restrict pA; + int* __restrict pB; +}; + + +int test10(int* pA, int* pB) { + struct FOO rp; + rp.pA=pA; + rp.pB=pB; + + *rp.pA=42; + *rp.pB=99; + + return *rp.pA; //42 +} +// CHECK-LABEL: @test10( +// CHECK: ret i32 42 + +int test11(struct FOO rp) { + *rp.pA=42; + *rp.pB=99; + + return *rp.pA; //42 +} +// CHECK-LABEL: @test11( +// CHECK: ret i32 42 + + +int test12(struct FOO* rp) { + *rp->pA=42; + *rp->pB=99; + + return *rp->pA; //42 +} +// CHECK-LABEL: @test12( +// CHECK: ret i32 42 + + + +int test20(int* pA, int* pB) { + struct FOO rp0; + struct FOO rp1; + rp0.pB=pA; + rp1.pB=pB; + + *rp0.pB=42; + *rp1.pB=99; + + return *rp0.pB; //42 +} +// CHECK-LABEL: @test20( +// CHECK: ret i32 42 + + +int test21(struct FOO rp0, struct FOO rp1) { + *rp0.pB=42; + *rp1.pB=99; + + return *rp0.pB; //42 +} +// CHECK-LABEL: @test21( +// CHECK: ret i32 42 + +int test22(struct FOO* rp0, struct FOO* rp1) { + *rp0->pB=42; + *rp1->pB=99; + + return *rp0->pB; // needs load +} +// CHECK-LABEL: @test22( +// CHECK-NOT: ret i32 42 +// CHECK-NOT: ret i32 99 + + +int test23(struct FOO* rp0, struct FOO* rp1) { + *rp0->pA=42; + *rp1->pB=99; + + return *rp0->pA; // 42, rp0->pA and rp1->pB are not overlapping +} +// CHECK-LABEL: @test23( +// CHECK: ret i32 42 + +int test24(struct FOO* __restrict rp0, struct FOO* rp1) { + *rp0->pB=42; + *rp1->pB=99; + + return *rp0->pB; // 42 +} +// CHECK-LABEL: @test24( +// CHECK: ret i32 42 + + +int test25(struct FOO* p0, struct FOO* rp1) { + struct FOO* __restrict rp0; rp0=p0; + *rp0->pB=42; + *rp1->pB=99; + + return *rp0->pB; // 42 +} +// CHECK-LABEL: @test25( +// CHECK: ret i32 42 Index: clang/test/CodeGen/restrict/struct_member_04.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/struct_member_04.c @@ -0,0 +1,170 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +// restrict member pointers and inlining - basic functionality test + +struct FOO { + int* __restrict pA; + int* __restrict pB; +}; + + +void setFOO(struct FOO* p) +{ + *p->pA=42; + *p->pB=43; +} + +int test10(int *pA, int* pB, int* pC) { + + *pA=40; + { + struct FOO rp; + rp.pA=pA; + rp.pB=pB; + + setFOO(&rp); + + *pC=99; + return *rp.pA; // 42 + } +} + +// CHECK-LABEL: @test10( +// CHECK: ret i32 42 + +int test11(int *pA, int* pB, int* pC) { + + *pA=40; + { + struct FOO rp; + rp.pA=pA; + rp.pB=pB; + + setFOO(&rp); + + *pC=99; + } + return *pA; // 42 // should be, but llvm does not see it +} + +// CHECK-LABEL: @test11( +// CHECK-NOT: ret i32 40 +// CHECK-NOT: ret i32 42 +// CHECK-NOT: ret i32 43 +// CHECK-NOT: ret i32 99 + +int test12(int *pA, int* pB, int* pC) { + + *pA=40; + { + struct FOO rp; + rp.pA=pA; + rp.pB=pB; + + setFOO(&rp); + } + + *pC=99; + return *pA; // 42 or 99 +} + +// CHECK-LABEL: @test12( +// CHECK-NOT: ret i32 40 +// CHECK-NOT: ret i32 42 +// CHECK-NOT: ret i32 43 +// CHECK-NOT: ret i32 99 + + +// out-of-function scope +int getFOO(struct FOO* p) { + return *p->pA; +} + +// fully defined +int test20(int* pA, int* pC) { + *pA=42; + { + struct FOO rp; + rp.pA=pA; + *pC=99; + return getFOO(&rp); + } +} +// CHECK-LABEL: @test20( +// CHECK: ret i32 42 + +// fully defined +int test21(int* pA, int* pC) { + *pA=42; + *pC=99; + { + struct FOO rp; + rp.pA=pA; + return getFOO(&rp); + } +} + +// CHECK-LABEL: @test21( +// CHECK: ret i32 % + +// mixed defined +int test22(int* pA, struct FOO* pB0, int b0, int* pC) { + *pA=42; + { + struct FOO rp; + rp.pA=pA; + *pC=99; + return getFOO(b0 ? &rp : pB0); + } +} +// CHECK-LABEL: @test22( +// CHECK: ret i32 % + +// mixed-mixed defined +int test23(int* pA, struct FOO* pB0, int b0, struct FOO* pB1, int b1, int* pC) { + *pA=41; + { + struct FOO rp; + rp.pA=pA; + *pC=98; + return test22(pA, b1 ? &rp : pB0, b0, pC); + } +} +// CHECK-LABEL: @test23( +// CHECK: ret i32 % + +// fully defined +int test24(int* pA, int* pB0, int b0, int* pB1, int b1, int* pC) { + *pA=40; + { + struct FOO fb0; + fb0.pA=pB0; + { + struct FOO fb1; + fb1.pA=pB1; + + return test23(pA, &fb0, b0, &fb1, b1, pC); + } + } +} + +// CHECK-LABEL: @test24( +// CHECK: ret i32 % + +int test25(int* pA, int b0, int b1, int* pC) { + *pA=40; + { + struct FOO fb0; + fb0.pA=pA; + { + struct FOO fb1; + fb1.pA=pA; + + return test23(pA, &fb0, b0, &fb1, b1, pC); + } + } +} + +// CHECK-LABEL: @test25( +// FIXME: should be: ret i32 42 +// CHECK: ret i32 % Index: clang/test/CodeGen/restrict/struct_member_05.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/struct_member_05.c @@ -0,0 +1,109 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s + + +// restrict member pointers and inlining - basic functionality test + +struct FOO { + int* __restrict pA; + int* __restrict pB; +}; + +struct FOO_plain { + int* pA; + int* pB; +}; + + +int test10(int *pA, int* pB, int* pC) { + + *pA=40; + { + struct FOO rp; + rp.pA=pA; + rp.pB=pB; + + { + struct FOO* p=&rp; + *p->pA=42; + *p->pB=43; + } + + *pC=99; + return *rp.pA; // 42 + } +} + +// CHECK-LABEL: @test10( +// CHECK: ret i32 42 + + +int test11(int *pA, int* pB, int* pC) { + + *pA=40; + { + struct FOO rp; + rp.pA=pA; + rp.pB=pB; + + { + *rp.pA=42; + *rp.pB=43; + } + + *pC=99; + return *rp.pA; // 42 + } +} + +// CHECK-LABEL: @test11( +// CHECK: ret i32 42 + +int test12a(int *pA, int* pB, int* pC, struct FOO* pF) { + + *pA=40; + { + struct FOO rp; + rp.pA=pA; + rp.pB=pB; + + { + struct FOO* p= pF ? pF : &rp; + *p->pA=42; + *p->pB=43; + } + + *pC=99; + return *rp.pA; // 42 or 40 + } +} + +// CHECK-LABEL: @test12a( +// CHECK-NOT: ret i32 40 +// CHECK-NOT: ret i32 42 +// CHECK-NOT: ret i32 43 +// CHECK-NOT: ret i32 99 + +int test12b(int *pA, int* pB, int* pC, struct FOO_plain* pF) { + + *pA=40; + { + struct FOO_plain rp; + rp.pA=pA; + rp.pB=pB; + + { + struct FOO_plain* p= pF ? pF : &rp; + *p->pA=42; + *p->pB=43; + } + + *pC=99; + return *rp.pA; // 42 or 40 or 99 or ... + } +} + +// CHECK-LABEL: @test12b( +// CHECK-NOT: ret i32 40 +// CHECK-NOT: ret i32 42 +// CHECK-NOT: ret i32 43 +// CHECK-NOT: ret i32 99 Index: clang/test/CodeGen/restrict/struct_member_06.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/struct_member_06.c @@ -0,0 +1,105 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +// restrict member pointers and inlining - basic functionality test + +struct FOO { + int* __restrict p; +}; + +struct FOO_plain { + int* pA; + int* pB; +}; + +int test01_p_pp(int c, int* pA, int* pB) +{ + int* __restrict rpA; rpA=pA; + int* __restrict rpB; rpB=pB; + + int* p = c ? rpA : rpB; + + return *p; +} +// CHECK-LABEL: @test01_p_pp( +// CHECK: @llvm.noalias.decl +// CHECK-NEXT: @llvm.noalias.decl +// CHECK-NEXT: icmp +// CHECK-NEXT: @llvm.side.noalias +// CHECK-NEXT: @llvm.side.noalias +// CHECK-NEXT: select +// CHECK-NEXT: select +// CHECK-NEXT: load +// CHECK-NEXT: ret i32 + +int test01_p_ss(int c, int* pA, int* pB) +{ + struct FOO spA; spA.p=pA; + struct FOO spB; spB.p=pB; + + int* p = c ? spA.p : spB.p; + + return *p; +} +// CHECK-LABEL: @test01_p_ss( +// CHECK: @llvm.noalias.decl +// CHECK-NEXT: @llvm.noalias.decl +// CHECK-NEXT: icmp +// CHECK-NEXT: @llvm.side.noalias +// CHECK-NEXT: @llvm.side.noalias +// CHECK-NEXT: select +// CHECK-NEXT: select +// CHECK-NEXT: load +// CHECK-NEXT: ret i32 + +int test01_s_ss(int c, int* pA, int* pB) +{ + struct FOO spA; spA.p=pA; + struct FOO spB; spB.p=pB; + + { + struct FOO p = c ? spA : spB; + + return *p.p; + } +} +// CHECK-LABEL: @test01_s_ss( +// CHECK: @llvm.noalias.decl +// CHECK-NEXT: @llvm.noalias.decl +// CHECK-NEXT: @llvm.noalias.decl +// CHECK-NEXT: icmp +// CHECK-NEXT: @llvm.side.noalias +// CHECK-NEXT: @llvm.side.noalias +// CHECK-NEXT: select +// CHECK-NEXT: select +// CHECK-NEXT: @llvm.side.noalias +// CHECK-NEXT: load +// CHECK-NEXT: ret i32 + + +// FIXME: this one currently results in bad code :( +int test01_ps_ss(int c, int* pA, int* pB) +{ + struct FOO spA; spA.p=pA; + struct FOO spB; spB.p=pB; + + struct FOO* p = c ? &spA : &spB; + + return *p->p; +} +// CHECK-LABEL: @test01_ps_ss( +// CHECK: ret i32 + +int test01_ps_psps(int c, struct FOO* ppA, struct FOO* ppB) +{ + struct FOO* p = c ? ppA : ppB; + + return *p->p; +} +// CHECK-LABEL: @test01_ps_psps( +// CHECK: icmp +// CHECK-NEXT: select +// CHECK-NEXT: getelementptr +// CHECK-NEXT: load +// CHECK-NEXT: llvm.side.noalias +// CHECK-NEXT: load +// CHECK-NEXT: ret i32 Index: clang/test/CodeGen/restrict/struct_member_07.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/struct_member_07.c @@ -0,0 +1,62 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s + + +struct FOO { + int* __restrict pA; +}; + +struct BAR { + int* pA; +}; + + +static void adaptFOO(struct FOO* p) { + *p->pA=99; +}; + +static void adaptBAR(struct BAR* p) { + *p->pA=99; +}; + +static void adaptInt(int* p) { + *p=99; +}; + + +// has 'unknown scope': caller: no, callee: yes +int test10(int* pA, struct FOO* pB) { + *pA=42; + adaptFOO(pB); + return *pA; +} +// CHECK-LABEL: @test10( +// CHECK: ret i32 42 + +// has 'unknown scope': caller: yes, callee: no +int test11(int* pA, struct FOO* pB) { + *pB->pA=42; + adaptInt(pA); + return *pB->pA; +} +// CHECK-LABEL: @test11( +// CHECK: ret i32 42 + +// has 'unknown scope': caller: no, callee: no +int test12(int* pA, struct BAR* pB) { + *pA=42; + adaptBAR(pB); + return *pA; +} +// CHECK-LABEL: @test12( +// CHECK-NOT: ret i32 42 + +// has 'unknown scope': caller: yes, callee: yes +int test13(int* pA, struct FOO* pB) { + *pB->pA=41; // introduce 'unknown scope' + *pA=42; + adaptFOO(pB); + return *pA; +} + +// CHECK-LABEL: @test13( +// CHECK: ret i32 42 Index: clang/test/CodeGen/restrict/struct_member_08.cpp =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/struct_member_08.cpp @@ -0,0 +1,67 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +// NOTE: this test in C++ mode + +struct Fum { + Fum(unsigned long long d) { + ptr1=((int*)(d & 0xffffffff)); + ptr2=((int*)((d >> 32)& 0xffffffff)); + } + Fum(const Fum&) = default; + + int* __restrict ptr1; + int* __restrict ptr2; +}; + +static Fum pass(Fum d) { return d; } + +int test_Fum_01(unsigned long long data, int* p1) { + Fum tmp={data}; + + int* p0=tmp.ptr1; + + *p0=42; + *p1=99; + return *p0; +} +// CHECK: @_Z11test_Fum_01yPi +// CHECK: ret i32 42 + +int test_Fum_02(unsigned long long data) { + Fum tmp={data}; + + int* p0=tmp.ptr1; + int* p1=tmp.ptr2; + + *p0=42; + *p1=99; + return *p0; +} +// CHECK: @_Z11test_Fum_02y +// CHECK: ret i32 42 + +int test_Fum_pass_01(unsigned long long data, int* p1) { + Fum tmp={data}; + + int* p0=pass(tmp).ptr1; + + *p0=42; + *p1=99; + return *p0; +} +// CHECK: @_Z16test_Fum_pass_01yPi +// CHECK: ret i32 42 + + +int test_Fum_pass_02(unsigned long long data) { + Fum tmp={data}; + + int* p0=pass(tmp).ptr1; + int* p1=pass(tmp).ptr2; + + *p0=42; + *p1=99; + return *p0; +} +// CHECK: @_Z16test_Fum_pass_02y +// CHECK: ret i32 42 Index: clang/test/OpenMP/master_taskloop_firstprivate_codegen.cpp =================================================================== --- clang/test/OpenMP/master_taskloop_firstprivate_codegen.cpp +++ clang/test/OpenMP/master_taskloop_firstprivate_codegen.cpp @@ -312,7 +312,8 @@ // CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) @@ -469,7 +470,8 @@ // CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], Index: clang/test/OpenMP/master_taskloop_lastprivate_codegen.cpp =================================================================== --- clang/test/OpenMP/master_taskloop_lastprivate_codegen.cpp +++ clang/test/OpenMP/master_taskloop_lastprivate_codegen.cpp @@ -273,7 +273,8 @@ // CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) @@ -441,7 +442,9 @@ // CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], + // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], Index: clang/test/OpenMP/master_taskloop_private_codegen.cpp =================================================================== --- clang/test/OpenMP/master_taskloop_private_codegen.cpp +++ clang/test/OpenMP/master_taskloop_private_codegen.cpp @@ -235,7 +235,8 @@ // CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) // CHECK: [[PRIV_VAR:%.+]] = load [[S_DOUBLE_TY]]*, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], @@ -363,7 +364,8 @@ // CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], Index: clang/test/OpenMP/master_taskloop_simd_firstprivate_codegen.cpp =================================================================== --- clang/test/OpenMP/master_taskloop_simd_firstprivate_codegen.cpp +++ clang/test/OpenMP/master_taskloop_simd_firstprivate_codegen.cpp @@ -291,7 +291,8 @@ // CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) @@ -448,7 +449,8 @@ // CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], Index: clang/test/OpenMP/master_taskloop_simd_lastprivate_codegen.cpp =================================================================== --- clang/test/OpenMP/master_taskloop_simd_lastprivate_codegen.cpp +++ clang/test/OpenMP/master_taskloop_simd_lastprivate_codegen.cpp @@ -264,7 +264,8 @@ // CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) @@ -432,7 +433,8 @@ // CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], Index: clang/test/OpenMP/master_taskloop_simd_private_codegen.cpp =================================================================== --- clang/test/OpenMP/master_taskloop_simd_private_codegen.cpp +++ clang/test/OpenMP/master_taskloop_simd_private_codegen.cpp @@ -228,7 +228,8 @@ // CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) // CHECK: [[PRIV_VAR:%.+]] = load [[S_DOUBLE_TY]]*, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], @@ -356,7 +357,8 @@ // CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], Index: clang/test/OpenMP/parallel_master_taskloop_firstprivate_codegen.cpp =================================================================== --- clang/test/OpenMP/parallel_master_taskloop_firstprivate_codegen.cpp +++ clang/test/OpenMP/parallel_master_taskloop_firstprivate_codegen.cpp @@ -300,7 +300,8 @@ // CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) @@ -446,7 +447,8 @@ // CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], Index: clang/test/OpenMP/parallel_master_taskloop_lastprivate_codegen.cpp =================================================================== --- clang/test/OpenMP/parallel_master_taskloop_lastprivate_codegen.cpp +++ clang/test/OpenMP/parallel_master_taskloop_lastprivate_codegen.cpp @@ -261,7 +261,8 @@ // CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) @@ -423,7 +424,8 @@ // CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], Index: clang/test/OpenMP/parallel_master_taskloop_private_codegen.cpp =================================================================== --- clang/test/OpenMP/parallel_master_taskloop_private_codegen.cpp +++ clang/test/OpenMP/parallel_master_taskloop_private_codegen.cpp @@ -227,7 +227,8 @@ // CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) // CHECK: [[PRIV_VAR:%.+]] = load [[S_DOUBLE_TY]]*, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], @@ -349,7 +350,8 @@ // CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], Index: clang/test/OpenMP/task_codegen.cpp =================================================================== --- clang/test/OpenMP/task_codegen.cpp +++ clang/test/OpenMP/task_codegen.cpp @@ -275,7 +275,7 @@ return a; } // CHECK: define internal i32 [[TASK_ENTRY1]](i32 %0, [[KMP_TASK_T]]{{.*}}* noalias %1) -// CHECK: store i32 15, i32* [[A_PTR:@.+]] +// CHECK: store i32 15, i32* [[A_PTR:@.+]], align 4 // CHECK: [[A_VAL:%.+]] = load i32, i32* [[A_PTR]] // CHECK: [[A_VAL_I8:%.+]] = trunc i32 [[A_VAL]] to i8 // CHECK: store i8 [[A_VAL_I8]], i8* %{{.+}} Index: clang/test/OpenMP/task_firstprivate_codegen.cpp =================================================================== --- clang/test/OpenMP/task_firstprivate_codegen.cpp +++ clang/test/OpenMP/task_firstprivate_codegen.cpp @@ -292,7 +292,8 @@ // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, // CHECK: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_DOUBLE_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**, [[S_DOUBLE_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.*]] = call void (i8*, ...)* @llvm.noalias.p0{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**, [[S_DOUBLE_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]], [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]]) @@ -432,7 +433,8 @@ // CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0f{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], Index: clang/test/OpenMP/task_private_codegen.cpp =================================================================== --- clang/test/OpenMP/task_private_codegen.cpp +++ clang/test/OpenMP/task_private_codegen.cpp @@ -228,7 +228,8 @@ // CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.*]] = call void (i8*, ...)* @llvm.noalias.p0{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) // CHECK: [[PRIV_VAR:%.+]] = load [[S_DOUBLE_TY]]*, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], @@ -339,7 +340,8 @@ // CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.*]] = call void (i8*, ...)* @llvm.noalias.p0f{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], Index: clang/test/OpenMP/taskloop_firstprivate_codegen.cpp =================================================================== --- clang/test/OpenMP/taskloop_firstprivate_codegen.cpp +++ clang/test/OpenMP/taskloop_firstprivate_codegen.cpp @@ -291,7 +291,8 @@ // CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0f{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) @@ -448,7 +449,8 @@ // CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0f{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], Index: clang/test/OpenMP/taskloop_lastprivate_codegen.cpp =================================================================== --- clang/test/OpenMP/taskloop_lastprivate_codegen.cpp +++ clang/test/OpenMP/taskloop_lastprivate_codegen.cpp @@ -266,7 +266,8 @@ // CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0f{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) @@ -434,7 +435,8 @@ // CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0f{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], Index: clang/test/OpenMP/taskloop_private_codegen.cpp =================================================================== --- clang/test/OpenMP/taskloop_private_codegen.cpp +++ clang/test/OpenMP/taskloop_private_codegen.cpp @@ -228,7 +228,8 @@ // CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0f{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) // CHECK: [[PRIV_VAR:%.+]] = load [[S_DOUBLE_TY]]*, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], @@ -356,7 +357,8 @@ // CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0f{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], Index: clang/test/OpenMP/taskloop_simd_firstprivate_codegen.cpp =================================================================== --- clang/test/OpenMP/taskloop_simd_firstprivate_codegen.cpp +++ clang/test/OpenMP/taskloop_simd_firstprivate_codegen.cpp @@ -291,7 +291,8 @@ // CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0f{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) @@ -448,7 +449,8 @@ // CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0f{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], Index: clang/test/OpenMP/taskloop_simd_lastprivate_codegen.cpp =================================================================== --- clang/test/OpenMP/taskloop_simd_lastprivate_codegen.cpp +++ clang/test/OpenMP/taskloop_simd_lastprivate_codegen.cpp @@ -264,7 +264,8 @@ // CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0f{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) @@ -432,7 +433,8 @@ // CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0f{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], Index: clang/test/OpenMP/taskloop_simd_private_codegen.cpp =================================================================== --- clang/test/OpenMP/taskloop_simd_private_codegen.cpp +++ clang/test/OpenMP/taskloop_simd_private_codegen.cpp @@ -228,7 +228,8 @@ // CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0f{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) // CHECK: [[PRIV_VAR:%.+]] = load [[S_DOUBLE_TY]]*, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], @@ -356,7 +357,8 @@ // CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIVATES_MAP_FN_NOALIAS:%.+]] = call void (i8*, ...)* @llvm.noalias.p0f{{.*}}(void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIVATES_MAP_FN_NOALIAS]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], Index: llvm/docs/LangRef.rst =================================================================== --- llvm/docs/LangRef.rst +++ llvm/docs/LangRef.rst @@ -5114,6 +5114,8 @@ 4 byte gap between the two fields. This gap represents padding which does not carry useful data and need not be preserved. +.. _noalias_and_aliasscope: + '``noalias``' and '``alias.scope``' Metadata ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -8663,8 +8665,8 @@ :: - = load [volatile] , * [, align ][, !nontemporal !][, !invariant.load !][, !invariant.group !][, !nonnull !][, !dereferenceable !][, !dereferenceable_or_null !][, !align !] - = load atomic [volatile] , * [syncscope("")] , align [, !invariant.group !] + = load [volatile] , * [, noalias_sidechannel * ][, align ][, !nontemporal !][, !invariant.load !][, !invariant.group !][, !nonnull !][, !dereferenceable !][, !dereferenceable_or_null !][, !align !] + = load atomic [volatile] , * [, noalias_sidechannel * ] [syncscope("")] , align [, !invariant.group !] ! = !{ i32 1 } ! = !{i64 } ! = !{ i64 } @@ -8695,6 +8697,13 @@ alignment is not set to a value which is at least the size in bytes of the pointee. ``!nontemporal`` does not have any defined semantics for atomic loads. +The optional ``noalias_sidechannel`` argument specifies the noalias chain of the +pointer operand. It has the same type as the pointer operand. Together with the +``!noalias`` metadata on the instruction, and the ``llvm.side.noalias``, +``llvm.noalias.arg.guard`` intrinsics in the chain, this is used to deduce if +two load/store instructions may or may not alias. (See `Scoped NoAlias Related +Intrinsics`_) + The optional constant ``align`` argument specifies the alignment of the operation (that is, the alignment of the memory address). A value of 0 or an omitted ``align`` argument means that the operation has the ABI @@ -8787,8 +8796,8 @@ :: - store [volatile] , * [, align ][, !nontemporal !][, !invariant.group !] ; yields void - store atomic [volatile] , * [syncscope("")] , align [, !invariant.group !] ; yields void + store [volatile] , * [, noalias_sidechannel * ][, align ][, !nontemporal !][, !invariant.group !] ; yields void + store atomic [volatile] , * [, noalias_sidechannel * ] [syncscope("")] , align [, !invariant.group !] ; yields void Overview: """"""""" @@ -8818,6 +8827,13 @@ the alignment is not set to a value which is at least the size in bytes of the pointee. ``!nontemporal`` does not have any defined semantics for atomic stores. +The optional ``noalias_sidechannel`` argument specifies the noalias chain of the +pointer operand. It has the same type as the pointer operand. Together with the +``!noalias`` metadata on the instruction, and the ``llvm.side.noalias``, +``llvm.noalias.arg.guard`` intrinsics in the chain, this is used to deduce if +two load/store instructions may or may not alias. (See `Scoped NoAlias Related +Intrinsics`_) + The optional constant ``align`` argument specifies the alignment of the operation (that is, the alignment of the memory address). A value of 0 or an omitted ``align`` argument means that the operation has the ABI @@ -16405,6 +16421,520 @@ This function returns the same values as the libm ``trunc`` functions would and handles error conditions in the same way. +Scoped NoAlias Related Intrinsics +--------------------------------- + +This set of intrinsics provide the basis for full C99 '``restrict``' support +(See: iso-C99-n1256, 6.7.3.1 'Formal definition of restrict'). + +In C99 restrict, accesses using a pointer based on a restrict pointer ``p`` +are assumed to not alias with other accesses that are not based on ``p``, as +long as both accesses are within the scope of the declaration of ``p``. + +The intrinsics work together with the '``!noalias``' +:ref:`metadata ` annotations on memory instructions and +the ``noalias_sidechannel`` of ``load`` and ``store`` instructions. The +documentation below explains how it works, with ``restrict`` in mind. +The intrinsics can also be used to specify alias assumptions that are +not restrict based. + +The full set of intrinsics is: + +.. code-block:: llvm + + %p.decl = i8* @llvm.noalias.decl.XXX(i8** %p.alloc, i32 , metadata !p.scope) + %p.noalias = i8* @llvm.noalias.XXX(i8* %p, i8* %p.decl, i8** %p.addr, + i32 , metadata !p.scope) !noalias !VisibleScopes + %side.p = i8* @llvm.side.noalias.XXX(i8* %side.p, i8* %p.decl, + i8** p.addr, i8** %side.p.addr, + i32 , metadata !p.scope) !noalias !VisibleScopes + %p.guard = i8* @llvm.noalias.arg.guard.XXX(i8* %p, i8* %side.p) + %p.copy.guard = %struct.FOO* @llvm.noalias.copy.guard.XXX(%struct.FOO* %p.block, i8* %p.decl, + metadata !p.indices, metadata !p.scope) + + +Note: ``XXX`` is the encoding of the return type and the types of the arguments. + +Overview +^^^^^^^^ + +Modeling ``restrict`` requires two pieces of information for a memory +instruction: + +- the *depends on* relationship of the pointer path on a restrict pointer +- the *visibility* of that restrict pointer to the memory instruction + +The ``!noalias`` metadata specifies which noalias scopes are visible for the +specified memory instruction. + +Unlike ``alias.scope`` metadata, which applies only to the access to which it +is attached, the ``llvm.noalias`` and ``llvm.side.noalias`` intrinsics apply +to all pointers based on the return value. They are used to specify the +*depends on* relationship and the associated noalias scope for the pointer. + +The ``llvm.noalias`` intrinsic introduces alias assumptions in the normal +computation path of a pointer and it will be opaque for most optimizations. The +``PropagateAndConvertNoAlias`` pass converts ``llvm.noalias`` intrinsics into +``llvm.side.noalias`` intrinsics. At the same time, it splits the pointer path +in a computation path (without ``llvm.noalias`` intrinsics) and a +``noalias_sidechannel`` path (with ``llvm.side.noalias`` intrinsics). This +allows optimization passes to adapt the normal pointer path, without impacting +the knowledge about the *depends on* relationship. + +The ``llvm.noalias.decl`` intrinsic is used to identify the exact location of +a *restrict pointer declaration*. When this is done inside the loop body, +care must be taken to duplicate and uniquify the scopes and intrinsics when +the loop is unrolled. Otherwise the restrict scope could spill across +iterations. + +The ``llvm.noalias.arg.guard`` intrinsic is used to track the computaton path +and the noalias_sidechannel path of a pointer this forwarded to a function, +or returned from a function. After inlining, the correct dependencies can still +be propagated. + +The ``llvm.noalias.copy.guard`` intrinsic is used to annotate that the returned +pointer points to a blob of memory that contains restrict pointers. This allows +to track the *based on* dependency when copying such blocks of memory. + +Arguments +^^^^^^^^^ + +Following arguments are typically used in the various intrinscis: + +- ``%p``: the value of the restrict pointer +- ``%p.addr``: the address of ``%p`` (*object P* in the C99 standard). + Either a real object, a constant where the value is relative to 0 or ``null``. + Different ``%p.addr`` represent different restrict pointers, pointing to + disjunct objects. +- ``p.objId``: a number that can be used to differentiate different *object P* + when ``%p.addr`` is optimized away. +- ``!p.scope``: metadata argument that refers to a list of alias.scope metadata + entries that contains exactly one element. It represents the variable + declaration that contains one or more restrict pointers. +- ``%p.decl``: points to the ``@llvm.noalias.decl`` intrinsic associated with + the declaration of a restrict variable. +- ``%p.alloca``: points to the alloca associated with the declaration of a + restrict variable +- ``%side.p``: the noalias_sidechannel associated with ``%p``. +- ``%side.p.addr``: the noalias_sidechannel associated with ``%p.addr``. +- ``%p.block``: the address of a block in memory or on the stack is associated + with a variable that contains at least one restrict pointer. +- ``!p.indices``: metadata argument that refers to a list of metadata + references. Each reference points to a metadata array of indices. At the + specified location, a restrict pointer is located. A '-1' indicates any index. + +Unknown Function Scope +^^^^^^^^^^^^^^^^^^^^^^ + +A special ``unknown function scope`` can be used to identify restrict +properties when the actual variable declaration is not known. In that case, +the ``%p.decl`` argument will be ``null`` and the ``!p.scope`` refers to the +``unknown function scope``. + +The ``unknown function scope`` is annotated as ``!noalias`` metadata on the +function itself. + +Restrictness +^^^^^^^^^^^^ + +The restrictness of a restrict pointer ``%p``, as given by the +``llvm.side.noalias`` intrinsic, can be expressed by following arguments: + +- ``%p.addr``: its address (*object P* in the C99 standard) +- ``p.objId``: an extra number +- ``!p.scope``: a declaration scope, related to the variable declaration. + +The { ``%p.addr``, ``p.objId``, ``!p.scope`` } tuple represents a +unique restrict pointer, which means that it refers to its own set of objects. + +- Let *MemA* and *MemB* be two memory accesses. +- Let *A* be the set of ``llvm.side.noalias`` intrinsics on which *MemA* + depends and for which the scopes are visible to both *MemA* and *MemB* +- Let *B* be the set of ``llvm.side.noalias`` intrinsics on which *MemB* + depends and for which the scopes are visible to both *MemA* and *MemB* +- When the intersection of both sets, comparing the + { ``%p.addr``, ``p.objId``, ``!p.scope`` } tuples, is empty, *MemA* and *MemB* + can be treated as not-aliasing accesses. +- A tuple related to the ``unknown function scope`` is handled as if it is + identical to any other tuple, unless it can be proven that their ``%p.addr`` + are disjunct. + +Examples +^^^^^^^^ + +Using a restrict pointer and passing it to a function: + +.. code-block:: c + + void foo(int*); + void setP(int* restrict p) { + p[3]=42; + foo(&p[3]); + } + +Before optimizations, there is the declaration of the restrict pointer and +``llvm.noalias`` is used whenever the value of the restrict pointer is read. + +.. code-block:: llvm + + ; Function Attrs: nounwind uwtable + define dso_local void @setP(i32* %p) #0 { + entry: + %p.addr = alloca i32*, align 8 + %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** %p.addr, i32 0, metadata !2) + store i32* %p, i32** %p.addr, align 8, !tbaa !5, !noalias !2 + %1 = load i32*, i32** %p.addr, align 8, !tbaa !5, !noalias !2 + %2 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %1, i8* %0, + i32** %p.addr, i32 0, metadata !2), !tbaa !5, !noalias !2 + %arrayidx = getelementptr inbounds i32, i32* %2, i64 3 + store i32 42, i32* %arrayidx, align 4, !tbaa !9, !noalias !2 + %3 = load i32*, i32** %p.addr, align 8, !tbaa !5, !noalias !2 + %4 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %3, i8* %0, + i32** %p.addr, i32 0, metadata !2), !tbaa !5, !noalias !2 + %arrayidx1 = getelementptr inbounds i32, i32* %4, i64 3 + call void @foo(i32* %arrayidx1), !noalias !2 + ret void + } + +During optimizations, the ``llvm.noalias`` intrinsic is moved onto the +``noalias_sidechannel`` and converted into a ``llvm.side.noalias``. The argument +to ``foo`` has been guarded to keep track of the restrict information. Notice +that the pointer computation is not done on the ``noalias_sidechannel`` path. + +.. code-block:: llvm + + ; Function Attrs: nounwind uwtable + define dso_local void @setP(i32* %p) local_unnamed_addr #0 { + entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %1 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %p, i8* %0, + i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 + %arrayidx = getelementptr inbounds i32, i32* %p, i64 3 + store i32 42, i32* %arrayidx, noalias_sidechannel i32* %1, align 4, !tbaa !9, !noalias !2 + %arrayidx1.guard = + call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* nonnull %arrayidx, i32* %1) + tail call void @foo(i32* nonnull %arrayidx1.guard) #6, !noalias !2 + ret void + } + + [...] + + !2 = !{!3} + !3 = distinct !{!3, !4, !"setP: p"} + !4 = distinct !{!4, !"setP"} + !5 = !{!6, !6, i64 0} + !6 = !{!"any pointer", !7, i64 0} + !7 = !{!"omnipotent char", !8, i64 0} + !8 = !{!"Simple C/C++ TBAA"} + !9 = !{!10, !10, i64 0} + !10 = !{!"int", !7, i64 0} + + +Copying a struct that contains restrict member pointers: + +.. code-block:: c + + struct FOO { + int* restrict mP0; + int* mP1; + int* restrict mP2; + }; + void copyFOO(struct FOO* dst, struct FOO* src) { + *dst = *src; + } + +The ``llvm.noalias.copy.guard`` tracks the location of those restrict pointer +members through metadata !14. Also notice the usage of the +``unknown function scope`` !11, as the function refers to an object whose +declaration must live outside the function. + +.. code-block:: llvm + + ; Function Attrs: nounwind uwtable + define dso_local void @copyFOO(%struct.FOO* nocapture %dst, + %struct.FOO* nocapture readonly %src) local_unnamed_addr #0 !noalias !11 { + entry: + %0 = tail call %struct.FOO* @llvm.noalias.copy.guard.p0s_struct.FOOs.p0i8(%struct.FOO* %src, + i8* null, metadata !14, metadata !11) + %1 = bitcast %struct.FOO* %dst to i8* + %2 = bitcast %struct.FOO* %0 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 dereferenceable(24) %1, + i8* nonnull align 8 dereferenceable(24) %2, i64 24, i1 false), !tbaa.struct !17, !noalias !11 + ret void + } + + [...] + + !11 = !{!12} + !12 = distinct !{!12, !13, !"copyFOO: unknown scope"} + !13 = distinct !{!13, !"copyFOO"} + !14 = !{!15, !16} + !15 = !{i64 -1, i64 0} + !16 = !{i64 -1, i64 2} + !17 = !{i64 0, i64 8, !5, i64 8, i64 8, !5, i64 16, i64 8, !5} + + +.. _int_noalias_decl: + +'``llvm.noalias.decl``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. The return type and argument types are encoded +in ``XXX``. + +:: + + declare i8* @llvm.noalias.decl.XXX(* %p.alloca, i32 , metadata !p.scope) + +Overview: +""""""""" + +``llvm.noalias.decl`` is inserted at the location of a restrict pointer +declaration. It makes it possible to identify that a restrict scope is only +valid inside the body of a loop. It also makes it possible to identify that a +certain ``alloca`` is associated to an object that contains one or more +restrict pointers. + +The handle it returns is always of type ``i8*`` and does +not really represent a value. It is merely used to track a dependency on the +declaration. + +Arguments: +"""""""""" + +The first argument ``%p.alloca`` points to the ``alloca`` that contains one +or more restrict pointers. It can also be ``null`` if the ``alloca`` has been +optimized away. + +The second argument ``p.objId`` is an integer representing an object id. + +The third argument ``!p.scope`` is metadata that is a list of ``noalias`` +metadata references. The format is identical to that required for ``noalias`` +metadata. This list must have exactly one element. + +Semantics: +"""""""""" + +The ``llvm.noalias.decl`` intrinsic is used to identify the exact location of +a *restrict pointer declaration*. When this is done inside the loop body, +care must be taken to duplicate and uniquify the scopes and intrinsics when +the loop is unrolled. Otherwise the restrict scope could spill across +iterations. + +It also associates specific restrict properties to an ``alloca`` and is used +to propagate those properties to ``llvm.noalias``, ``llvm.side.noalias`` and +``llvm.noalias.copy.guard`` intrinsics when inlining and optimizations make +the relationship between those intrinsics and the actual variable declaration +visible. + + +.. _int_noalias: + +'``llvm.noalias``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. The return type and argument types are encoded +in ``XXX``. + +:: + + declare * @llvm.noalias.XXX(* %p, i8* %p.decl, ** %p.addr, + i32 , metadata !p.scope) + +Overview: +""""""""" + +``llvm.noalias`` is inserted at the moment that restrict properties are +introduced. This is typically done after loading a restrict pointer from +memory. Its return value can be seen as *the pointer value with restrict +properties* + +Arguments: +"""""""""" + +The first argument ``%p`` is the pointer on which the aliasing assumption is +being placed. + +The second argument ``%p.decl`` refers to the ``llvm.noalias.decl`` that is +associated with the pointer declaration. + +The third argument ``%p.addr`` is the address in memory of this pointer. + +The fourth argument ``p.objId`` is an integer representing an object id. + +The fifth argument ``!p.scope`` is metadata that is a list of ``noalias`` +metadata references. The format is identical to that required for ``noalias`` +metadata. This list must have exactly one element. + +Semantics: +"""""""""" + +The ``llvm.noalias`` intrinsic adds alias assumptions to the pointer +computation path. It also blocks optimizations on this computation path. + +It will be transformed into a ``llvm.side.noalias`` intrinsic and moved onto +the ``noalias_sidechannel`` path, so that pointer optimizations can still be +done and the restrict information is not lost. + + +.. _int_side_noalias: + +'``llvm.side.noalias``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. The return type and argument types are encoded +in ``XXX``. + +:: + + declare * @llvm.side.noalias.XXX(* %p, i8* %p.decl, + ** %p.addr, ** %side.p.addr, + i32 , metadata !p.scope) + +Overview: +""""""""" + +``llvm.side.noalias`` is inserted at the moment that restrict properties are +introduced on the ``noalias_sidechannel``. This is typically done when a +``llvm.noalias`` is converted into the ``llvm.side.noalias`` + +The value it returns (``%side.p``) is representing the pointer ``%p`` on the +``noalias_sidechannel``. + +Arguments: +"""""""""" + +The first argument ``%p`` is the pointer (or its ``noalias_sidechannel``) on +which the aliasing assumption is being placed. + +The second argument ``%p.decl`` refers to the ``llvm.noalias.decl`` that is +associated with the pointer declaration. + +The third argument ``%p.addr`` is the address in memory of this pointer. + +The fourth argument ``%side.p.addr`` represents ``%p.addr`` on the +``noalias_sidechannel``. + +The fifth argument ``p.objId`` is an integer representing an object id. + +The sixth argument ``!p.scope`` is metadata that is a list of ``noalias`` +metadata references. The format is identical to that required for ``noalias`` +metadata. This list must have exactly one element. + + +Semantics: +"""""""""" + +The ``llvm.side.noalias`` intrinsic adds alias assumptions to the +``noalias_sidechannel`` of the memory instructions that depends on it. + +For this purpose, the ``llvm.side.noalias`` itself is also considered to be +a memory instruction and has its own ``noalias_sidechannel`` for the ``%p.addr`` +argument. + + +.. _int_noalias_arg_guard: + +'``llvm.noalias.arg.guard``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. The return type and argument types are encoded +in ``XXX``. + +:: + + declare * @llvm.noalias.arg.guard.XXX(* %p, * %side.p) + +Overview: +""""""""" + +The ``llvm.noalias.arg.guard`` intrinsic brings alias assumption properties that +are on the ``noalias_sidechannel`` path of a pointer back into the +computation path of that pointer. + +Arguments: +"""""""""" + +The first argument ``%p`` represents the computation path of the pointer. + +The second argument ``%side.p`` represents the ``noalias_sidechannel`` path +of that pointer. + +Semantics: +"""""""""" + +The ``llvm.noalias.arg.guard`` is typically generated when a ``llvm.noalias`` +intrinsic is converted to a ``llvm.side.noalias``, but the pointer escapes +because it is used as an argument to a function or it is returned. + +It can typically be optimized away after inlining: +* When it is encountered on the computation path, it is assumed to return the +first argument ``%p``. + +When it is encountered on a ``noalias_sidechannel`` path, it is assumed to +return the second argument ``%side.p``. + + +.. _int_noalias.copy.guard: + +'``llvm.noalias.copy.guard``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. The return type and argument types are encoded +in ``XXX``. + +:: + + declare * @llvm.noalias.copy.guard.XXX(* %p.block, i8* %p.decl, + metadata !p.indices, metadata !p.scope) + +Overview: +""""""""" + +``llvm.noalias.copy.guard`` is inserted in the source pointer argument when a +block of memory that will be copied (either using ``llvm.memcpy`` or a +combination of ``load``/``store`` instructions) is associated to a variable +that contains at least one restrict pointer. This could be a ``struct`` that +contains one or more restrict member pointers, or an array of restrict pointers. + +The intrinsic returns the first argument. + +Arguments: +"""""""""" + +The first argument ``%p.block`` represents the block that will be copied. + +The second argument ``%p.decl`` refers to the ``llvm.noalias.decl`` that is +associated with the block. + +The third argument ``!p.indices`` refers to a metadata list of metadata. Each +entry refers to another metadata list of integers, describing the GEP path +that contains a restrict pointer. A -1 value indicates that any index value +is a match. See above for an example. + +Semantics: +"""""""""" + +The ``llvm.noalias.copy.guard`` provides extra restrict information about a +block of memory that is copied over. When the memory copy is optimized away, +they will still be able to match the pointer access to the correct restrict +information. + General Intrinsics ------------------ Index: llvm/include/llvm/Analysis/AliasSetTracker.h =================================================================== --- llvm/include/llvm/Analysis/AliasSetTracker.h +++ llvm/include/llvm/Analysis/AliasSetTracker.h @@ -138,6 +138,10 @@ } delete this; } + void updateNoAliasSideChannelIfMatching(Value *Old, Value *New) { + if (AAInfo.NoAliasSideChannel == Old) + AAInfo.NoAliasSideChannel = New; + } }; // Doubly linked list of nodes. @@ -354,6 +358,25 @@ // Map from pointers to their node PointerMapType PointerMap; + // FIXME: NOTE: when we get a callback, the resulting update might be _SLOW_ + class ASTSideChannelCallbackVH final : public CallbackVH { + AliasSetTracker *AST; + + void deleted() override; + void allUsesReplacedWith(Value *) override; + + public: + ASTSideChannelCallbackVH(Value *V, AliasSetTracker *AST = nullptr); + ASTSideChannelCallbackVH &operator=(Value *V); + }; + + /// Traits to tell DenseMap that tell us how to compare and hash the value + /// handle. + struct ASTSideChannelCallbackVHDenseMapInfo : public DenseMapInfo {}; + using SideChannelSetType = + DenseSet; + SideChannelSetType SideChannelPointers; + public: /// Create an empty collection of AliasSets, and use the specified alias /// analysis object to disambiguate load and store addresses. @@ -412,6 +435,19 @@ /// tracker already knows about a value, it will ignore the request. void copyValue(Value *From, Value *To); + /// This method is used to remove a side_channel pointer value from the + /// AliasSetTracker entirely. It should be used when an instruction is deleted + /// from the program to update the AST. If you don't use this, you would have + /// dangling pointers to deleted instructions. + void deleteSideChannelValue(Value *PtrVal); + + /// This method should be used whenever a preexisting side_channel value in + /// the program is copied or cloned, introducing a new value. Note that it is + /// ok for clients that use this method to introduce the same value multiple + /// times: if the tracker already knows about a value, it will ignore the + /// request. + void copySideChannelValue(Value *From, Value *To); + using iterator = ilist::iterator; using const_iterator = ilist::const_iterator; Index: llvm/include/llvm/Analysis/BasicAliasAnalysis.h =================================================================== --- llvm/include/llvm/Analysis/BasicAliasAnalysis.h +++ llvm/include/llvm/Analysis/BasicAliasAnalysis.h @@ -61,6 +61,7 @@ DominatorTree *DT; LoopInfo *LI; PhiValues *PV; + unsigned RecurseLevel = 0; public: BasicAAResult(const DataLayout &DL, const Function &F, Index: llvm/include/llvm/Analysis/ScopedNoAliasAA.h =================================================================== --- llvm/include/llvm/Analysis/ScopedNoAliasAA.h +++ llvm/include/llvm/Analysis/ScopedNoAliasAA.h @@ -21,6 +21,7 @@ #include namespace llvm { +class DominatorTree; class Function; class MDNode; @@ -31,6 +32,8 @@ friend AAResultBase; public: + ScopedNoAliasAAResult(DominatorTree *DT) : AAResultBase(), DT(DT) {} + /// Handle invalidation events from the new pass manager. /// /// By definition, this result is stateless and so remains valid. @@ -46,8 +49,27 @@ ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2, AAQueryInfo &AAQI); + // FIXME: This interface can be removed once the legacy-pass-manager support + // is removed. + void setDT(DominatorTree *DT) { + this->DT = DT; + } + private: bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias) const; + + bool findCompatibleNoAlias(const Value *P, + const MDNode *ANoAlias, const MDNode *BNoAlias, + const DataLayout &DL, + SmallPtrSetImpl &Visited, + SmallVectorImpl &CompatibleSet, + int Depth = 0); + bool noAliasByIntrinsic(const MDNode *ANoAlias, const Value *APtr, + const MDNode *BNoAlias, const Value *BPtr, + const CallBase *CallA, const CallBase *CallB, + AAQueryInfo &AAQI); + + DominatorTree *DT; }; /// Analysis pass providing a never-invalidated alias analysis result. @@ -71,12 +93,18 @@ ScopedNoAliasAAWrapperPass(); - ScopedNoAliasAAResult &getResult() { return *Result; } + ScopedNoAliasAAResult &getResult() { + setDT(); + return *Result; + } const ScopedNoAliasAAResult &getResult() const { return *Result; } bool doInitialization(Module &M) override; bool doFinalization(Module &M) override; void getAnalysisUsage(AnalysisUsage &AU) const override; + +private: + void setDT(); }; //===--------------------------------------------------------------------===// Index: llvm/include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -811,6 +811,11 @@ case Intrinsic::is_constant: case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: + case Intrinsic::noalias_decl: + case Intrinsic::noalias: + case Intrinsic::side_noalias: + case Intrinsic::noalias_arg_guard: + case Intrinsic::noalias_copy_guard: case Intrinsic::objectsize: case Intrinsic::ptr_annotation: case Intrinsic::var_annotation: Index: llvm/include/llvm/Analysis/ValueTracking.h =================================================================== --- llvm/include/llvm/Analysis/ValueTracking.h +++ llvm/include/llvm/Analysis/ValueTracking.h @@ -334,12 +334,15 @@ /// the specified value, returning the original object being addressed. Note /// that the returned value has pointer type if the specified value does. If /// the MaxLookup value is non-zero, it limits the number of instructions to - /// be stripped off. + /// be stripped off. If a NoAlias vector is provided, it is filled with any + /// llvm.noalias intrinsics looked through to find the underlying object. Value *GetUnderlyingObject(Value *V, const DataLayout &DL, - unsigned MaxLookup = 6); - inline const Value *GetUnderlyingObject(const Value *V, const DataLayout &DL, - unsigned MaxLookup = 6) { - return GetUnderlyingObject(const_cast(V), DL, MaxLookup); + unsigned MaxLookup = 6, + SmallVectorImpl *NoAlias = nullptr); + inline const Value *GetUnderlyingObject(const Value *V, + const DataLayout &DL, unsigned MaxLookup = 6, + SmallVectorImpl *NoAlias = nullptr) { + return GetUnderlyingObject(const_cast(V), DL, MaxLookup, NoAlias); } /// This method is similar to GetUnderlyingObject except that it can @@ -369,11 +372,14 @@ /// /// Since A[i] and A[i-1] are independent pointers, getUnderlyingObjects /// should not assume that Curr and Prev share the same underlying object thus - /// it shouldn't look through the phi above. + /// it shouldn't look through the phi above. If a NoAlias vector is provided, + /// it is filled with any llvm.noalias intrinsics looked through to find the + /// underlying objects. void GetUnderlyingObjects(const Value *V, SmallVectorImpl &Objects, const DataLayout &DL, LoopInfo *LI = nullptr, - unsigned MaxLookup = 6); + unsigned MaxLookup = 6, + SmallVectorImpl *NoAlias = nullptr); /// This is a wrapper around GetUnderlyingObjects and adds support for basic /// ptrtoint+arithmetic+inttoptr sequences. Index: llvm/include/llvm/Analysis/VectorUtils.h =================================================================== --- llvm/include/llvm/Analysis/VectorUtils.h +++ llvm/include/llvm/Analysis/VectorUtils.h @@ -255,8 +255,11 @@ /// metadata value that covers all of the individual values), and set I's /// metadata for M equal to the intersection value. /// +/// When RemoveNoAlias is true, MD_noalias will always get a null value. +/// /// This function always sets a (possibly null) value for each K in Kinds. -Instruction *propagateMetadata(Instruction *I, ArrayRef VL); +Instruction *propagateMetadata(Instruction *I, ArrayRef VL, + bool RemoveNoAlias = true); /// Create a mask that filters the members of an interleave group where there /// are gaps. Index: llvm/include/llvm/IR/IRBuilder.h =================================================================== --- llvm/include/llvm/IR/IRBuilder.h +++ llvm/include/llvm/IR/IRBuilder.h @@ -656,6 +656,63 @@ /// assume that the provided condition will be true. CallInst *CreateAssumption(Value *Cond); + /// Create a llvm.noalias.decl intrinsic call. + Instruction *CreateNoAliasDeclaration(Value *AllocaPtr, Value *ObjId, + Value *Scope); + Instruction *CreateNoAliasDeclaration(Value *AllocaPtr, uint64_t ObjId, + Value *Scope) { + return CreateNoAliasDeclaration( + AllocaPtr, + ConstantInt::get(IntegerType::getInt64Ty(getContext()), ObjId), Scope); + } + Instruction *CreateNoAliasDeclaration(Value *AllocaPtr, MDNode *ScopeTag) { + uint64_t Zero = 0; + return CreateNoAliasDeclaration(AllocaPtr, Zero, + MetadataAsValue::get(Context, ScopeTag)); + } + + /// Create a llvm.noalias intrinsic call. + Instruction *CreateNoAliasPointer(Value *Ptr, Value *NoAliasDecl, + Value *AddrP, MDNode *ScopeTag, + const Twine &Name = "", + uint64_t ObjectId = 0) { + return CreateNoAliasPointer(Ptr, NoAliasDecl, AddrP, + MetadataAsValue::get(getContext(), ScopeTag), + Name, ObjectId); + } + Instruction *CreateNoAliasPointer(Value *Ptr, Value *NoAliasDecl, + Value *AddrP, Value *ScopeTag, + const Twine &Name = "", + uint64_t ObjectId = 0); + + /// Create a llvm.side.noalias intrinsic call. + Instruction *CreateSideNoAliasPlain(Value *Ptr, Value *NoAliasDecl, + Value *AddrP, Value *AddrP_Side, + Value *ObjId, MDNode *ScopeTag, + const Twine &Name = ""); + Instruction *CreateSideNoAliasPlain(Value *Ptr, Value *NoAliasDecl, + Value *AddrP, Value *AddrP_Side, + Value *ObjId, Value *ScopeValue, + const Twine &Name = ""); + + /// Create a llvm.noalias.arg.guard intrinsic call. + Instruction *CreateNoAliasArgGuard(Value *Ptr, Value *SideChannel, + const Twine &Name = ""); + + /// Create a llvm.noalias_copy_guard intrinsic call. + Instruction *CreateNoAliasCopyGuard(Value *BasePtr, Value *NoAliasDel, + ArrayRef EncodedIndices, + MDNode *ScopeTag, const Twine &Name = ""); + +private: + /// Helper for creating noalias intrinsics + Instruction *CreateGenericNoAliasIntrinsic(Intrinsic::ID ID, Value *arg0, + ArrayRef args_opt, + ArrayRef MDNodes, + ArrayRef MDValues, + const Twine &Name = ""); + +public: /// Create a call to the experimental.gc.statepoint intrinsic to /// start a new statepoint sequence. CallInst *CreateGCStatepointCall(uint64_t ID, uint32_t NumPatchBytes, Index: llvm/include/llvm/IR/InstVisitor.h =================================================================== --- llvm/include/llvm/IR/InstVisitor.h +++ llvm/include/llvm/IR/InstVisitor.h @@ -168,7 +168,7 @@ RetTy visitICmpInst(ICmpInst &I) { DELEGATE(CmpInst);} RetTy visitFCmpInst(FCmpInst &I) { DELEGATE(CmpInst);} RetTy visitAllocaInst(AllocaInst &I) { DELEGATE(UnaryInstruction);} - RetTy visitLoadInst(LoadInst &I) { DELEGATE(UnaryInstruction);} + RetTy visitLoadInst(LoadInst &I) { DELEGATE(Instruction); } RetTy visitStoreInst(StoreInst &I) { DELEGATE(Instruction);} RetTy visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) { DELEGATE(Instruction);} RetTy visitAtomicRMWInst(AtomicRMWInst &I) { DELEGATE(Instruction);} Index: llvm/include/llvm/IR/Instruction.h =================================================================== --- llvm/include/llvm/IR/Instruction.h +++ llvm/include/llvm/IR/Instruction.h @@ -309,8 +309,15 @@ /// @} /// Sets the metadata on this instruction from the AAMDNodes structure. + /// NOTE: The noalias_sidechannel must be copied over explicitely using + /// 'setAAMetadataNoAliasSideChannel'. This must only be done if the dominator + /// relationship between the noalias_sidechannel and this instruction holds. void setAAMetadata(const AAMDNodes &N); + /// Sets (only) the noalias_sidechannel. Normally used in combination with + /// setAAMetadata. + void setAAMetadataNoAliasSideChannel(const AAMDNodes &N); + /// Retrieve the raw weight values of a conditional branch or select. /// Returns true on success with profile weights filled in. /// Returns false if no metadata or invalid metadata was found. Index: llvm/include/llvm/IR/Instructions.h =================================================================== --- llvm/include/llvm/IR/Instructions.h +++ llvm/include/llvm/IR/Instructions.h @@ -166,7 +166,7 @@ /// An instruction for reading from memory. This uses the SubclassData field in /// Value to store whether or not the load is volatile. -class LoadInst : public UnaryInstruction { +class LoadInst : public Instruction { void AssertOK(); protected: @@ -229,6 +229,16 @@ : LoadInst(Ptr->getType()->getPointerElementType(), Ptr, NameStr, isVolatile, Align, Order, SSID, InsertAtEnd) {} + ~LoadInst() { + // FIXME: needed by operator delete + setLoadInstNumOperands(2); + } + // allocate space for exactly two operands + void *operator new(size_t s) { return User::operator new(s, 2); } + + /// Transparently provide more efficient getOperand methods. + DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); + /// Return true if this is a load from a volatile memory location. bool isVolatile() const { return getSubclassDataFromInstruction() & 1; } @@ -296,6 +306,23 @@ return getPointerOperandType()->getPointerAddressSpace(); } + bool hasNoaliasSideChannelOperand() const { return getNumOperands() == 2; } + Value *getNoaliasSideChannelOperand() const { + assert(hasNoaliasSideChannelOperand() && "we need a noalias_sidechannel"); + return getOperand(1); + } + static unsigned getNoaliasSideChannelOperandIndex() { return 1U; } + void setNoaliasSideChannelOperand(Value *SideChannel) { + assert(SideChannel && "Needs a side channel"); + setLoadInstNumOperands(2); + setOperand(1, SideChannel); + } + void removeNoaliasSideChannelOperand() { + assert(hasNoaliasSideChannelOperand() && "nothing to remove"); + // make sure 'uses' are updated + setOperand(getNoaliasSideChannelOperandIndex(), nullptr); + setLoadInstNumOperands(1); + } // Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::Load; @@ -317,6 +344,11 @@ SyncScope::ID SSID; }; +template <> +struct OperandTraits : public OptionalOperandTraits {}; + +DEFINE_TRANSPARENT_OPERAND_ACCESSORS(LoadInst, Value) + //===----------------------------------------------------------------------===// // StoreInst Class //===----------------------------------------------------------------------===// @@ -347,10 +379,11 @@ StoreInst(Value *Val, Value *Ptr, bool isVolatile, MaybeAlign Align, AtomicOrdering Order, SyncScope::ID SSID, BasicBlock *InsertAtEnd); - // allocate space for exactly two operands - void *operator new(size_t s) { - return User::operator new(s, 2); + ~StoreInst() { + // FIXME: needed by operator delete + setStoreInstNumOperands(3); } + void *operator new(size_t s) { return User::operator new(s, 3); } /// Return true if this is a store to a volatile memory location. bool isVolatile() const { return getSubclassDataFromInstruction() & 1; } @@ -425,6 +458,23 @@ return getPointerOperandType()->getPointerAddressSpace(); } + bool hasNoaliasSideChannelOperand() const { return getNumOperands() == 3; } + Value *getNoaliasSideChannelOperand() const { + assert(hasNoaliasSideChannelOperand() && "we need a noalias_sidechannel"); + return getOperand(2); + } + static unsigned getNoaliasSideChannelOperandIndex() { return 2U; } + void setNoaliasSideChannelOperand(Value *SideChannel) { + assert(SideChannel && "Needs a side channel"); + setStoreInstNumOperands(3); + setOperand(2, SideChannel); + } + void removeNoaliasSideChannelOperand() { + assert(hasNoaliasSideChannelOperand() && "nothing to remove"); + // make sure 'uses' are updated + setOperand(getNoaliasSideChannelOperandIndex(), nullptr); + setStoreInstNumOperands(2); + } // Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::Store; @@ -447,8 +497,7 @@ }; template <> -struct OperandTraits : public FixedNumOperandTraits { -}; +struct OperandTraits : public OptionalOperandTraits {}; DEFINE_TRANSPARENT_OPERAND_ACCESSORS(StoreInst, Value) Index: llvm/include/llvm/IR/IntrinsicInst.h =================================================================== --- llvm/include/llvm/IR/IntrinsicInst.h +++ llvm/include/llvm/IR/IntrinsicInst.h @@ -888,6 +888,19 @@ } }; + /// Returns true when two llvm.side.noalias represent the same noalias info. + inline bool areSideNoaliasCompatible(const IntrinsicInst *lhs, + const IntrinsicInst *rhs) { + assert(lhs->getIntrinsicID() == Intrinsic::side_noalias && + rhs->getIntrinsicID() == Intrinsic::side_noalias && + "Can only check noalias compatibility of side_noalias"); + return (lhs->getOperand(Intrinsic::SideNoAliasScopeArg) == + rhs->getOperand(Intrinsic::SideNoAliasScopeArg)) && + (lhs->getOperand(Intrinsic::SideNoAliasIdentifyPObjIdArg) == + rhs->getOperand(Intrinsic::SideNoAliasIdentifyPObjIdArg)) && + (lhs->getOperand(Intrinsic::SideNoAliasIdentifyPArg) == + rhs->getOperand(Intrinsic::SideNoAliasIdentifyPArg)); + } } // end namespace llvm #endif // LLVM_IR_INTRINSICINST_H Index: llvm/include/llvm/IR/Intrinsics.h =================================================================== --- llvm/include/llvm/IR/Intrinsics.h +++ llvm/include/llvm/IR/Intrinsics.h @@ -33,6 +33,27 @@ /// function known by LLVM. The enum values are returned by /// Function::getIntrinsicID(). namespace Intrinsic { + // Abstraction for the arguments of the noalias intrinsics + static const int SideNoAliasNoAliasDeclArg = 1; + static const int SideNoAliasIdentifyPArg = 2; + static const int SideNoAliasIdentifyPSideChannelArg = 3; + static const int SideNoAliasIdentifyPObjIdArg = 4; + static const int SideNoAliasScopeArg = 5; + + static const int NoAliasNoAliasDeclArg = 1; + static const int NoAliasIdentifyPArg = 2; + static const int NoAliasIdentifyPObjIdArg = 3; + static const int NoAliasScopeArg = 4; + + static const int NoAliasDeclAllocaArg = 0; + static const int NoAliasDeclObjIdArg = 1; + static const int NoAliasDeclScopeArg = 2; + + static const int NoAliasCopyGuardIdentifyPBaseObject = 0; + static const int NoAliasCopyGuardNoAliasDeclArg = 1; + static const int NoAliasCopyGuardIndicesArg = 2; + static const int NoAliasCopyGuardScopeArg = 3; + enum ID : unsigned { not_intrinsic = 0, // Must be zero Index: llvm/include/llvm/IR/Intrinsics.td =================================================================== --- llvm/include/llvm/IR/Intrinsics.td +++ llvm/include/llvm/IR/Intrinsics.td @@ -478,6 +478,179 @@ // control dependencies will be maintained. def int_assume : Intrinsic<[], [llvm_i1_ty], [IntrWillReturn]>; +// The 'noalias intrinsics' allow to track dependencies so that the C99 restrict +// rules can be implemented. +// +// - llvm.noalias.decl +// - llvm.noalias +// - llvm.side.noalias +// - llvm.noalias.arg.guard +// - llvm.noalias.copy.guard +// +// +// Following arguments are typically used in the various intrinsics: +// - p: the value of the restrict pointer +// - p.addr: identifyP: the address of P: either a real object or a constant +// where the value is relative to 0: different 'identifyP' represent +// different restrict pointers, pointing to disjunct objects. +// - p.objId: when an alloca-object is split by SROA, multiple versions, +// representing the same variable declaration (=scope) can exist. +// This objId allows to differentiate between them. This is useful +// when later on, the alloca's are optimized away. +// - p.scope: metadata argument that refers to a list of alias.scope metadata +// entries that contains exactly one element. It represents the variable +// declaration that contains one or more restrict pointers. +// - p.decl: points to the @llvm.noalias.decl intrinsic associated with the +// declaration of a restrict variable. +// - p.alloca: points to the alloca associated with the declaration of a +// restrict variable +// - side.p: the noalias side channel associated with 'p'. +// - side.p.addr: the noalias side channel associated with 'p.addr'. +// - p.indices: metadata argument that refers to a list of metadata references. +// each reference points to a metadata array of indices. At the specified +// location, a restrict pointer is located. A '-1' indicates any index. +// (see llvm.noalias.copy.guard for an example) +// +// {p.addr, p.objId, p.scope} represent different ways of tracking the +// 'underlying P' object. A unique triplet represents a unique 'object P' +// +// NOTE: future enhancements might relax/enhance this notion for supporting +// an implementation of n4150 (alias sets). It is likely that an extra +// parameter should be introduced to differentiate p.addr from the p.universe. +// As far as C99 is concerned, p.addr == p.universe +// +// Also see: +// - LangRef.rst (Scoped NoAlias Related Intrinsics) +// - [llvm-dev] RFC: Full 'restrict' support in LLVM +// https://lists.llvm.org/pipermail/llvm-dev/2019-March/131127.html + +// 'llvm.noalias.decl' intrinsic: Inserted at the location of a restrict +// pointer declaration. Makes it possible to identify that a restrict scope is +// only valid inside the body of a loop. +// +// Purpose of the different arguments: +// - arg0: p.alloca: associates the restrict pointer declaration to an alloca. +// (can be 'null' if the alloca is optimized away). The alloca can be +// associated to multiple restrict pointers. +// - arg1: p.objId: identifies different objects, associated to the same +// variable declaration. Is needed to track splitting of alloca's in SROA. +// - arg2: p.scope: metadata representing the variable declaration. +// - returns: a dummy i8 pointer that is used to track dependencies, so that cse +// is not migrating llvm.side.noalias over declarations +def int_noalias_decl + : Intrinsic<[llvm_anyptr_ty], + [llvm_anyptr_ty, llvm_anyint_ty, llvm_metadata_ty], + [IntrArgMemOnly]>; // ArgMemOnly: blocks LICM and some more + +// 'llvm.noalias' intrinsic: Introduces noalias information in the pointer +// computation path, normally right after the loading of the pointer value. +// It also blocks a number of optimizations. Once it is transformed into +// a llvm.side.noalias version, it can track noalias information and support +// complex optimizations. +// +// Purpose of the different arguments: +// - arg0: p: the incoming pointer value. +// - arg1: p.decl: dependency on llvm.noalias.decl (if available). The +// dependency makes it easier to handle the loop-unrolling case. +// - arg2: p.addr: identifyP: the address of P: either a real object or a +// constant where the value is relative to 0: different 'identifyP' +// represent different restrict pointers. +// - arg3: p.objId: identifies different objects, associated to the same +// variable declaration. Is needed to track splitting of alloca's in +// SROA. +// - arg4: p.scope: metadata representing the variable declaration. +// - returns: returns arg0 (although this is hidden for most optimization +// passes) +def int_noalias + : Intrinsic<[llvm_anyptr_ty], + [LLVMMatchType<0>, + llvm_anyptr_ty, llvm_anyptr_ty, // p.decl, p.addr + llvm_anyint_ty, llvm_metadata_ty], // p.objId, p.scope + [IntrArgMemOnly, IntrSpeculatable]>; + +// 'llvm.side.noalias' intrinsic: Introduces the noalias information on the +// side channel path. This intrinsic originates from a transformed +// 'llvm.noalias' intrinsic. +// +// Purpose of the different arguments: +// - arg0: p: the incoming ptr value. +// - arg1: p.decl: dependency on llvm.noalias.decl (if available). The +// dependency makes it easier to handle the loop-unrolling case. +// - arg2: p.addr: identifyP: the address of P: either a real object or a +// constant where the value is relative to 0: different 'identifyP' +// represent different restrict pointers. +// - arg3: side.p.addr: the noalias_sidechannel associated with the identifyP: +// this is needed to handle cases like: 'int* restrict* restrict rprpi;' +// - arg4: p.objId: identifies different objects, associated to the same +// variable declaration. Is needed to track splitting of alloca's in +// SROA. +// - arg5: p.scope: metadata representing the variable declaration. +// - returns: returns arg0 (although this is hidden for most optimization +// passes) +// +// NOTES: +// - side.p.addr (together with the other metadata set on the intrinsic), can be +// used to check if there are other reasons why this 'underlying P' object is +// different from another 'underlying P' (using tbaa, noalias annotations, ...) +// - p.decl, p.scope: it is possible that there initially is no +// llvm.noalias.decl dependency. For 'unknown-scope' scopes, it is possible +// that a later analysis connects the right scope and llvm.noalias.decl +// dependency. +// NOTE: Returned<0> must not be used here, or some optimizations (INDVARS) +// will be too optimistic as they see through the annotations, resulting in +// wrong code. +def int_side_noalias + : Intrinsic<[llvm_anyptr_ty], + [LLVMMatchType<0>, + llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyptr_ty, + llvm_anyint_ty, llvm_metadata_ty], + [IntrNoMem, IntrSpeculatable]>; + +// 'llvm.noalias.arg.guard' intrinsic: helps tracking the noalias_sidechannel. +// It guards pointers that escape through function arguments or are returned. +// After inlining, the noalias information can still be propagated +// +// Purpose of the different arguments: +// - arg0: p: the incoming ptr value. +// - arg1: side.p: the noalias_sidechannel, associated with this pointer +// computation. +// - returns: arg0 (hidden for most optimization passes) +def int_noalias_arg_guard + : Intrinsic<[llvm_anyptr_ty], + [LLVMMatchType<0>, llvm_anyptr_ty], + [IntrNoMem]>; // NOTE: Returned<0> must not be used here + +// 'llvm.noalias.copy.guard' intrinsic: another guard that allows to track +// restrict pointers through a memcpy or a structured load/store. When such a +// memcpy or load/store pair is optimized away, the noalias dependency chain can +// be reconstructed. +// +// Purpose of the different arguments: +// %p.guard = +// llvm.noalias.copy.guard %p.alloca, %p.decl, !metadata !99, !metadata !98 +// - arg0: p.alloca: block of memory with potential restrict variables. +// - arg1: p.decl: associated llvm.noalias.decl instruction (if available) +// - arg2: p.indices: metadata list of list of indices, indicating where in the +// memory restrict pointers are located. +// - arg3: p.scope: metadata representing the variable declaration. +// - returns: arg0 (hidden for most optimization passes) +// +// Example of p.indices: +// struct FOO { +// int* restrict p1; +// int* p0; +// int* restrict p2; +// }; +// results in '!10' refering to: +// !10 = {!11, !12} +// !11 = { -1, 0 } +// !12 = { -1, 2 } +def int_noalias_copy_guard + : Intrinsic<[llvm_anyptr_ty], + [LLVMMatchType<0>, llvm_anyptr_ty, llvm_metadata_ty, + llvm_metadata_ty], + [IntrNoMem]>; + // Stack Protector Intrinsic - The stackprotector intrinsic writes the stack // guard to the correct place on the stack frame. def int_stackprotector : Intrinsic<[], [llvm_ptr_ty, llvm_ptrptr_ty], []>; Index: llvm/include/llvm/IR/Metadata.h =================================================================== --- llvm/include/llvm/IR/Metadata.h +++ llvm/include/llvm/IR/Metadata.h @@ -642,16 +642,19 @@ /// memory access used by the alias-analysis infrastructure. struct AAMDNodes { explicit AAMDNodes(MDNode *T = nullptr, MDNode *S = nullptr, - MDNode *N = nullptr) - : TBAA(T), Scope(S), NoAlias(N) {} + MDNode *N = nullptr, Value *NSC = nullptr) + : TBAA(T), Scope(S), NoAlias(N), NoAliasSideChannel(NSC) {} bool operator==(const AAMDNodes &A) const { - return TBAA == A.TBAA && Scope == A.Scope && NoAlias == A.NoAlias; + return TBAA == A.TBAA && Scope == A.Scope && NoAlias == A.NoAlias && + NoAliasSideChannel == A.NoAliasSideChannel; } bool operator!=(const AAMDNodes &A) const { return !(*this == A); } - explicit operator bool() const { return TBAA || Scope || NoAlias; } + explicit operator bool() const { + return TBAA || Scope || NoAlias || NoAliasSideChannel; + } /// The tag for type-based alias analysis. MDNode *TBAA; @@ -662,6 +665,9 @@ /// The tag specifying the noalias scope. MDNode *NoAlias; + /// The NoAlias SideChannel pointer path + Value *NoAliasSideChannel; + /// Given two sets of AAMDNodes that apply to the same pointer, /// give the best AAMDNodes that are compatible with both (i.e. a set of /// nodes whose allowable aliasing conclusions are a subset of those @@ -672,6 +678,9 @@ Result.TBAA = Other.TBAA == TBAA ? TBAA : nullptr; Result.Scope = Other.Scope == Scope ? Scope : nullptr; Result.NoAlias = Other.NoAlias == NoAlias ? NoAlias : nullptr; + Result.NoAliasSideChannel = Other.NoAliasSideChannel == NoAliasSideChannel + ? NoAliasSideChannel + : nullptr; return Result; } }; @@ -692,7 +701,8 @@ static unsigned getHashValue(const AAMDNodes &Val) { return DenseMapInfo::getHashValue(Val.TBAA) ^ DenseMapInfo::getHashValue(Val.Scope) ^ - DenseMapInfo::getHashValue(Val.NoAlias); + DenseMapInfo::getHashValue(Val.NoAlias) ^ + DenseMapInfo::getHashValue(Val.NoAliasSideChannel); } static bool isEqual(const AAMDNodes &LHS, const AAMDNodes &RHS) { @@ -1425,6 +1435,32 @@ // Create wrappers for C Binding types (see CBindingWrapping.h). DEFINE_ISA_CONVERSION_FUNCTIONS(NamedMDNode, LLVMNamedMDNodeRef) +/// This is a simple wrapper around an MDNode which provides a higher-level +/// interface by hiding the details of how alias analysis information is encoded +/// in its operands. +class AliasScopeNode { + const MDNode *Node = nullptr; + +public: + AliasScopeNode() = default; + explicit AliasScopeNode(const MDNode *N) : Node(N) {} + + /// Get the MDNode for this AliasScopeNode. + const MDNode *getNode() const { return Node; } + + /// Get the MDNode for this AliasScopeNode's domain. + const MDNode *getDomain() const { + if (Node->getNumOperands() < 2) + return nullptr; + return dyn_cast_or_null(Node->getOperand(1)); + } + StringRef getName() const { + if (Node->getNumOperands() > 2) + if (MDString *N = dyn_cast_or_null(Node->getOperand(2))) + return N->getString(); + return StringRef(); + } +}; } // end namespace llvm #endif // LLVM_IR_METADATA_H Index: llvm/include/llvm/IR/User.h =================================================================== --- llvm/include/llvm/IR/User.h +++ llvm/include/llvm/IR/User.h @@ -167,12 +167,12 @@ } Value *getOperand(unsigned i) const { - assert(i < NumUserOperands && "getOperand() out of range!"); + assert(i < getNumOperands() && "getOperand() out of range!"); return getOperandList()[i]; } void setOperand(unsigned i, Value *Val) { - assert(i < NumUserOperands && "setOperand() out of range!"); + assert(i < getNumOperands() && "setOperand() out of range!"); assert((!isa((const Value*)this) || isa((const Value*)this)) && "Cannot mutate a constant with setOperand!"); @@ -180,15 +180,17 @@ } const Use &getOperandUse(unsigned i) const { - assert(i < NumUserOperands && "getOperandUse() out of range!"); + assert(i < getNumOperands() && "getOperandUse() out of range!"); return getOperandList()[i]; } Use &getOperandUse(unsigned i) { - assert(i < NumUserOperands && "getOperandUse() out of range!"); + assert(i < getNumOperands() && "getOperandUse() out of range!"); return getOperandList()[i]; } - unsigned getNumOperands() const { return NumUserOperands; } + unsigned getNumOperands() const { + return NumUserOperands - NumUserOperandsDelta; + } /// Returns the descriptor co-allocated with this User instance. ArrayRef getDescriptor() const; @@ -209,6 +211,24 @@ NumUserOperands = NumOps; } + /// FIXME: As that the number of operands is used to find the start of + /// the allocated memory in operator delete, we need to always think we have + /// 3 operand before delete. + void setStoreInstNumOperands(unsigned NumOps) { + assert((2 <= NumOps) && (NumOps <= 3) && + "StoreInst can only have 2 or 3 operands"); + NumUserOperandsDelta = 3 - NumOps; + } + + /// FIXME: As that the number of operands is used to find the start of + /// the allocated memory in operator delete, we need to always think we have + /// 2 operand before delete. + void setLoadInstNumOperands(unsigned NumOps) { + assert((1 <= NumOps) && (NumOps <= 2) && + "LoadInst can only have 1 or 2 operands"); + NumUserOperandsDelta = 2 - NumOps; + } + /// Subclasses with hung off uses need to manage the operand count /// themselves. In these instances, the operand count isn't used to find the /// OperandList, so there's no issue in having the operand count change. @@ -229,10 +249,10 @@ op_iterator op_begin() { return getOperandList(); } const_op_iterator op_begin() const { return getOperandList(); } op_iterator op_end() { - return getOperandList() + NumUserOperands; + return getOperandList() + NumUserOperands - NumUserOperandsDelta; } const_op_iterator op_end() const { - return getOperandList() + NumUserOperands; + return getOperandList() + NumUserOperands - NumUserOperandsDelta; } op_range operands() { return op_range(op_begin(), op_end()); Index: llvm/include/llvm/IR/Value.h =================================================================== --- llvm/include/llvm/IR/Value.h +++ llvm/include/llvm/IR/Value.h @@ -112,7 +112,7 @@ /// /// Note, this should *NOT* be used directly by any class other than User. /// User uses this value to find the Use list. - enum : unsigned { NumUserOperandsBits = 28 }; + enum : unsigned { NumUserOperandsBits = 27 }; unsigned NumUserOperands : NumUserOperandsBits; // Use the same type as the bitfield above so that MSVC will pack them. @@ -120,6 +120,7 @@ unsigned HasName : 1; unsigned HasHungOffUses : 1; unsigned HasDescriptor : 1; + unsigned NumUserOperandsDelta : 1; private: template // UseT == 'Use' or 'const Use' @@ -320,6 +321,13 @@ /// values or constant users. void replaceUsesOutsideBlock(Value *V, BasicBlock *BB); + /// replaceUsesInsideBlock - Go through the uses list for this definition and + /// make each use point to "V" instead of "this" when the use is inside the + /// block. + /// Unlike replaceAllUsesWith this function does not support basic block + /// values or constant users. + void replaceUsesInsideBlock(Value *V, BasicBlock *BB); + //---------------------------------------------------------------------- // Methods for handling the chain of uses of this Value. // @@ -556,6 +564,18 @@ ->stripPointerCastsAndInvariantGroups()); } + /// Strip off pointer casts, all-zero GEPs, aliases, invariant group + /// info and noalias intrinsics. + /// + /// Returns the original uncasted value. If this is called on a non-pointer + /// value, it returns 'this'. + const Value *stripPointerCastsAndInvariantGroupsAndNoAliasIntr() const; + Value *stripPointerCastsAndInvariantGroupsAndNoAliasIntr() { + return const_cast( + static_cast(this) + ->stripPointerCastsAndInvariantGroupsAndNoAliasIntr()); + } + /// Strip off pointer casts and all-constant inbounds GEPs. /// /// Returns the original pointer value. If this is called on a non-pointer Index: llvm/include/llvm/InitializePasses.h =================================================================== --- llvm/include/llvm/InitializePasses.h +++ llvm/include/llvm/InitializePasses.h @@ -104,6 +104,7 @@ void initializeCallSiteSplittingLegacyPassPass(PassRegistry&); void initializeCalledValuePropagationLegacyPassPass(PassRegistry &); void initializeCodeGenPreparePass(PassRegistry&); +void initializeConnectNoAliasDeclLegacyPassPass(PassRegistry &); void initializeConstantHoistingLegacyPassPass(PassRegistry&); void initializeConstantMergeLegacyPassPass(PassRegistry&); void initializeConstantPropagationPass(PassRegistry&); @@ -340,6 +341,7 @@ void initializeProcessImplicitDefsPass(PassRegistry&); void initializeProfileSummaryInfoWrapperPassPass(PassRegistry&); void initializePromoteLegacyPassPass(PassRegistry&); +void initializePropagateAndConvertNoAliasLegacyPassPass(PassRegistry &); void initializePruneEHPass(PassRegistry&); void initializeRABasicPass(PassRegistry&); void initializeRAGreedyPass(PassRegistry&); Index: llvm/include/llvm/LinkAllPasses.h =================================================================== --- llvm/include/llvm/LinkAllPasses.h +++ llvm/include/llvm/LinkAllPasses.h @@ -50,6 +50,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Scalar/InstSimplifyPass.h" +#include "llvm/Transforms/Scalar/PropagateAndConvertNoAlias.h" #include "llvm/Transforms/Scalar/Scalarizer.h" #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/SymbolRewriter.h" @@ -216,6 +217,7 @@ (void) llvm::createSeparateConstOffsetFromGEPPass(); (void) llvm::createSpeculativeExecutionPass(); (void) llvm::createSpeculativeExecutionIfHasBranchDivergencePass(); + (void) llvm::createPropagateAndConvertNoAliasPass(); (void) llvm::createRewriteSymbolsPass(); (void) llvm::createStraightLineStrengthReducePass(); (void) llvm::createMemDerefPrinter(); Index: llvm/include/llvm/Transforms/Scalar.h =================================================================== --- llvm/include/llvm/Transforms/Scalar.h +++ llvm/include/llvm/Transforms/Scalar.h @@ -34,6 +34,12 @@ // FunctionPass *createConstantPropagationPass(); +//===----------------------------------------------------------------------===// +// +// ConnectNoAliasDecl - Connects llvm.noalias.XX intrinsics to llvm.noalias.decl +// +FunctionPass *createConnectNoAliasDeclPass(); + //===----------------------------------------------------------------------===// // // AlignmentFromAssumptions - Use assume intrinsics to set load/store @@ -426,6 +432,10 @@ // TargetTransformInfo::hasBranchDivergence() is true. FunctionPass *createSpeculativeExecutionIfHasBranchDivergencePass(); +// PropagateAndConvertNoAlias: move noalias intrinsics into a side channel of +// load/store instructions +FunctionPass *createPropagateAndConvertNoAliasPass(); + //===----------------------------------------------------------------------===// // // StraightLineStrengthReduce - This pass strength-reduces some certain Index: llvm/include/llvm/Transforms/Scalar/ConnectNoAliasDecl.h =================================================================== --- /dev/null +++ llvm/include/llvm/Transforms/Scalar/ConnectNoAliasDecl.h @@ -0,0 +1,34 @@ +//===- ConnectNoAliasDecl.h -------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This pass connects side.noalias intrinsics to the corresponding +/// llvm.noalias.decl, based on the alloca of the pointer. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_CONNECTNOALIASDECL_H +#define LLVM_TRANSFORMS_SCALAR_CONNECTNOALIASDECL_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { +class DominatorTree; + +class ConnectNoAliasDeclPass : public PassInfoMixin { +public: + ConnectNoAliasDeclPass(); + + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + // Glue for old PM + bool runImpl(Function &F); +}; +} // namespace llvm + +#endif // LLVM_TRANSFORMS_SCALAR_CONNECTNOALIASDECL_H Index: llvm/include/llvm/Transforms/Scalar/PropagateAndConvertNoAlias.h =================================================================== --- /dev/null +++ llvm/include/llvm/Transforms/Scalar/PropagateAndConvertNoAlias.h @@ -0,0 +1,39 @@ +//===- PropagateAndConvertNoAlias.h -----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This pass moves dependencies on llvm.noalias onto the noalias_sidechannel. +/// It also introduces and propagates side.noalias and noalias.arg.guard +/// intrinsics. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_PROPAGATEANDCONVERTNOALIAS_H +#define LLVM_TRANSFORMS_SCALAR_PROPAGATEANDCONVERTNOALIAS_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { +class DominatorTree; + +class PropagateAndConvertNoAliasPass + : public PassInfoMixin { +public: + PropagateAndConvertNoAliasPass(); + + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + // Glue for old PM + bool runImpl(Function &F, llvm::DominatorTree &DT); + +private: + bool doit(Function &F, llvm::DominatorTree &DT); +}; +} // namespace llvm + +#endif // LLVM_TRANSFORMS_SCALAR_PROPAGATEANDCONVERTNOALIAS_H Index: llvm/include/llvm/Transforms/Utils/Cloning.h =================================================================== --- llvm/include/llvm/Transforms/Utils/Cloning.h +++ llvm/include/llvm/Transforms/Utils/Cloning.h @@ -272,6 +272,11 @@ Function *Callee, int64_t entryDelta, const ValueMap *VMap = nullptr); +/// Connects noalias, side.noalias, noalias.copy.guard intrinsics to the +/// corresponding llvm.noalias.decl, based on the alloca of the underlying +/// p.addr. +/// \returns true when the function was modified. +bool propagateAndConnectNoAliasDecl(Function *F); } // end namespace llvm #endif // LLVM_TRANSFORMS_UTILS_CLONING_H Index: llvm/include/llvm/Transforms/Utils/NoAliasUtils.h =================================================================== --- /dev/null +++ llvm/include/llvm/Transforms/Utils/NoAliasUtils.h @@ -0,0 +1,78 @@ +//===- llvm/Transforms/Utils/NoAliasUtils.h - NoAlias utilities -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines utilities for noalias metadata and intrinsics. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_NOALIASUTILS_H +#define LLVM_TRANSFORMS_UTILS_NOALIASUTILS_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" + +namespace llvm { +class Function; + +/// Connect llvm.noalias.decl to noalias/side.noalias intrinsics that are +/// associated with the unknown function scope and based on the same alloca. +/// At the same time, propagate the p.addr, p.objId and p.scope. +bool propagateAndConnectNoAliasDecl(Function *F); + +/// Find back the 'llvm.noalias.decl' intrinsics in the specified basic blocks +/// and extract their scope. This are candidates for duplication when cloning. +template +void identifyNoAliasScopesToClone(ArrayRef BBs, + C &out_NoAliasDeclScopes) { + for (auto BB : BBs) { + for (Instruction &I : *BB) { + if (auto II = dyn_cast(&I)) { + if (II->getIntrinsicID() == Intrinsic::noalias_decl) { + out_NoAliasDeclScopes.push_back(cast( + II->getOperand(Intrinsic::NoAliasDeclScopeArg))); + } + } + } + } +} + +/// Duplicate the specified list of noalias decl scopes. +/// The 'Ext' string is added as an extension to the name. +/// Afterwards, the out_ClonedMVScopes contains a mapping of the original MV +/// onto the cloned version. +/// The out_ClonedScopes contains the mapping of the original scope MDNode +/// onto the cloned scope. +void cloneNoAliasScopes( + ArrayRef NoAliasDeclScopes, + DenseMap &out_ClonedScopes, + DenseMap &out_ClonedMVScopes, + StringRef Ext, LLVMContext &Context); + +/// Adapt the metadata for the specified instruction according to the +/// provided mapping. This is normally used after cloning an instruction, when +/// some noalias scopes needed to be cloned. +void adaptNoAliasScopes( + Instruction *I, DenseMap &ClonedScopes, + DenseMap &ClonedMVScopes, + LLVMContext &Context); + +/// Clone the specified noalias decl scopes. Then adapt all instructions in the +/// NewBlocks basicblocks to the cloned versions. +/// 'Ext' will be added to the duplicate scope names +void cloneAndAdaptNoAliasScopes(ArrayRef NoAliasDeclScopes, + ArrayRef NewBlocks, + LLVMContext &Context, StringRef Ext); +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_UTILS_NOALIASUTILS_H Index: llvm/lib/Analysis/AliasSetTracker.cpp =================================================================== --- llvm/lib/Analysis/AliasSetTracker.cpp +++ llvm/lib/Analysis/AliasSetTracker.cpp @@ -292,6 +292,7 @@ I->second->eraseFromList(); PointerMap.clear(); + SideChannelPointers.clear(); // The alias sets should all be clear now. AliasSets.clear(); @@ -357,6 +358,10 @@ AliasSet::PointerRec &Entry = getEntryFor(Pointer); + if (AAInfo.NoAliasSideChannel) + SideChannelPointers.insert( + ASTSideChannelCallbackVH(AAInfo.NoAliasSideChannel, this)); + if (AliasAnyAS) { // At this point, the AST is saturated, so we only have one active alias // set. That means we already know which alias set we want to return, and @@ -732,6 +737,65 @@ return *this = ASTCallbackVH(V, AST); } +//===----------------------------------------------------------------------===// +// ASTSideChannelCallbackVH Class Implementation +//===----------------------------------------------------------------------===// + +void AliasSetTracker::ASTSideChannelCallbackVH::deleted() { + assert(AST && "ASTCallbackVH called with a null AliasSetTracker!"); + AST->deleteSideChannelValue(getValPtr()); + // this now dangles! +} + +void AliasSetTracker::ASTSideChannelCallbackVH::allUsesReplacedWith(Value *V) { + AST->copySideChannelValue(getValPtr(), V); +} + +AliasSetTracker::ASTSideChannelCallbackVH::ASTSideChannelCallbackVH( + Value *V, AliasSetTracker *ast) + : CallbackVH(V), AST(ast) {} + +AliasSetTracker::ASTSideChannelCallbackVH & +AliasSetTracker::ASTSideChannelCallbackVH::operator=(Value *V) { + return *this = ASTSideChannelCallbackVH(V, AST); +} + +void AliasSetTracker::deleteSideChannelValue(Value *PtrVal) { + // FIXME: slow algorithms ahead + SideChannelSetType::iterator I = SideChannelPointers.find_as(PtrVal); + if (I == SideChannelPointers.end()) + return; // nope + + // iterate over all PointerRecords to update the AAMDNodes that contain this + // pointer + for (auto &AS : *this) { + for (auto &PR : AS) { + PR.updateNoAliasSideChannelIfMatching(PtrVal, nullptr); + } + } + + SideChannelPointers.erase(I); +} + +void AliasSetTracker::copySideChannelValue(Value *From, Value *To) { + // FIXME: slow algorithms ahead + SideChannelSetType::iterator I = SideChannelPointers.find_as(From); + if (I == SideChannelPointers.end()) + return; // nope + + // iterate over all PointerRecords to update the AAMDNodes that contain this + // pointer + for (auto &AS : *this) { + for (auto &PR : AS) { + PR.updateNoAliasSideChannelIfMatching(From, To); + } + } + + // Update the set + SideChannelPointers.erase(I); + SideChannelPointers.insert(ASTSideChannelCallbackVH(To, this)); +} + //===----------------------------------------------------------------------===// // AliasSetPrinter Pass //===----------------------------------------------------------------------===// Index: llvm/lib/Analysis/BasicAliasAnalysis.cpp =================================================================== --- llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -853,10 +853,15 @@ if (CacheIt != AAQI.AliasCache.end()) return CacheIt->second; + ++RecurseLevel; AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.AATags, LocB.Ptr, LocB.Size, LocB.AATags, AAQI); - VisitedPhiBBs.clear(); + // protect cache against recursive calls from ScopedNoAliasAA: only clean up + // at the top level + if (--RecurseLevel == 0) { + VisitedPhiBBs.clear(); + } return Alias; } @@ -1740,8 +1745,8 @@ return NoAlias; // Strip off any casts if they exist. - V1 = V1->stripPointerCastsAndInvariantGroups(); - V2 = V2->stripPointerCastsAndInvariantGroups(); + V1 = V1->stripPointerCastsAndInvariantGroupsAndNoAliasIntr(); + V2 = V2->stripPointerCastsAndInvariantGroupsAndNoAliasIntr(); // If V1 or V2 is undef, the result is NoAlias because we can always pick a // value for undef that aliases nothing in the program. Index: llvm/lib/Analysis/CaptureTracking.cpp =================================================================== --- llvm/lib/Analysis/CaptureTracking.cpp +++ llvm/lib/Analysis/CaptureTracking.cpp @@ -26,6 +26,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" using namespace llvm; @@ -254,6 +255,15 @@ switch (I->getOpcode()) { case Instruction::Call: case Instruction::Invoke: { + if (auto *II = dyn_cast(I)) + if (II->getIntrinsicID() == Intrinsic::noalias || + II->getIntrinsicID() == Intrinsic::side_noalias || + II->getIntrinsicID() == Intrinsic::noalias_arg_guard || + II->getIntrinsicID() == Intrinsic::noalias_copy_guard) { + AddUses(I); + break; + } + auto *Call = cast(I); // Not captured if the callee is readonly, doesn't return a copy through // its return value and doesn't unwind (a readonly function can leak bits Index: llvm/lib/Analysis/InstructionSimplify.cpp =================================================================== --- llvm/lib/Analysis/InstructionSimplify.cpp +++ llvm/lib/Analysis/InstructionSimplify.cpp @@ -35,6 +35,7 @@ #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/ValueHandle.h" @@ -5130,6 +5131,13 @@ // TODO: maxnum(nnan ninf x, -flt_max) -> x break; } + case Intrinsic::noalias_arg_guard: { + // Only follow the plain path if undefined. Do not propagate null, that + // would incorrectly omit noalias information. + if (isa(Op0)) + return Op0; + break; + } default: break; } @@ -5137,8 +5145,29 @@ return nullptr; } -static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) { +static Value *simplifySideNoalias(const Value *V) { + const IntrinsicInst *II = cast(V); + assert(II->getIntrinsicID() == Intrinsic::side_noalias); + + // Only follow the plain path if undefined. Do not propagate null, that + // would incorrectly omit noalias information. + Value *Op0 = II->getOperand(0); + if (isa(Op0)) { + return Op0; + } + // Check for compatibility: side_noalias(side_noalias) -> side_noalias + if (llvm::IntrinsicInst *DepII = dyn_cast(Op0)) { + if (DepII->getIntrinsicID() == Intrinsic::side_noalias) { + if (llvm::areSideNoaliasCompatible(DepII, II)) { + return DepII; + } + } + } + return nullptr; +} + +static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) { // Intrinsics with no operands have some kind of side effect. Don't simplify. unsigned NumOperands = Call->getNumArgOperands(); if (!NumOperands) @@ -5195,6 +5224,18 @@ return V; return nullptr; } + case Intrinsic::noalias: + case Intrinsic::noalias_copy_guard: { + // Only follow the plain path if undefined. Do not propagate null, that + // would incorrectly omit noalias information. + if (auto Op0 = dyn_cast(Call->getArgOperand(0))) { + return Op0; + } + return nullptr; + } + case Intrinsic::side_noalias: { + return simplifySideNoalias(Call); + } default: return nullptr; } Index: llvm/lib/Analysis/MemorySSA.cpp =================================================================== --- llvm/lib/Analysis/MemorySSA.cpp +++ llvm/lib/Analysis/MemorySSA.cpp @@ -284,6 +284,11 @@ case Intrinsic::invariant_start: case Intrinsic::invariant_end: case Intrinsic::assume: + case Intrinsic::noalias_decl: + case Intrinsic::noalias: + case Intrinsic::side_noalias: + case Intrinsic::noalias_arg_guard: + case Intrinsic::noalias_copy_guard: return {false, NoAlias}; case Intrinsic::dbg_addr: case Intrinsic::dbg_declare: @@ -1736,9 +1741,18 @@ // dependencies here. // FIXME: Replace this special casing with a more accurate modelling of // assume's control dependency. - if (IntrinsicInst *II = dyn_cast(I)) - if (II->getIntrinsicID() == Intrinsic::assume) + if (IntrinsicInst *II = dyn_cast(I)) { + switch(II->getIntrinsicID()) { + default: break; + case Intrinsic::assume: + case Intrinsic::noalias_decl: + case Intrinsic::noalias: + case Intrinsic::side_noalias: + case Intrinsic::noalias_arg_guard: + case Intrinsic::noalias_copy_guard: return nullptr; + } + } // Using a nonstandard AA pipelines might leave us with unexpected modref // results for I, so add a check to not model instructions that may not read Index: llvm/lib/Analysis/ScopedNoAliasAA.cpp =================================================================== --- llvm/lib/Analysis/ScopedNoAliasAA.cpp +++ llvm/lib/Analysis/ScopedNoAliasAA.cpp @@ -33,46 +33,33 @@ #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Dominators.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" using namespace llvm; +#define DEBUG_TYPE "scoped-noalias" + // A handy option for disabling scoped no-alias functionality. The same effect // can also be achieved by stripping the associated metadata tags from IR, but // this option is sometimes more convenient. static cl::opt EnableScopedNoAlias("enable-scoped-noalias", - cl::init(true), cl::Hidden); - -namespace { - -/// This is a simple wrapper around an MDNode which provides a higher-level -/// interface by hiding the details of how alias analysis information is encoded -/// in its operands. -class AliasScopeNode { - const MDNode *Node = nullptr; - -public: - AliasScopeNode() = default; - explicit AliasScopeNode(const MDNode *N) : Node(N) {} + cl::init(true), cl::Hidden, + cl::desc("Enable use of scoped-noalias metadata")); - /// Get the MDNode for this AliasScopeNode. - const MDNode *getNode() const { return Node; } - - /// Get the MDNode for this AliasScopeNode's domain. - const MDNode *getDomain() const { - if (Node->getNumOperands() < 2) - return nullptr; - return dyn_cast_or_null(Node->getOperand(1)); - } -}; - -} // end anonymous namespace +static cl::opt +MaxNoAliasDepth("scoped-noalias-max-depth", cl::init(12), cl::Hidden, + cl::desc("Maximum depth for noalias intrinsic search")); AliasResult ScopedNoAliasAAResult::alias(const MemoryLocation &LocA, const MemoryLocation &LocB, @@ -91,6 +78,18 @@ if (!mayAliasInScopes(BScopes, ANoAlias)) return NoAlias; + if (LocA.AATags.NoAliasSideChannel && LocB.AATags.NoAliasSideChannel) { + if (noAliasByIntrinsic(ANoAlias, LocA.AATags.NoAliasSideChannel, BNoAlias, + LocB.AATags.NoAliasSideChannel, nullptr, nullptr, + AAQI)) + return NoAlias; + + if (noAliasByIntrinsic(BNoAlias, LocB.AATags.NoAliasSideChannel, ANoAlias, + LocA.AATags.NoAliasSideChannel, nullptr, nullptr, + AAQI)) + return NoAlias; + } + // If they may alias, chain to the next AliasAnalysis. return AAResultBase::alias(LocA, LocB, AAQI); } @@ -101,14 +100,26 @@ if (!EnableScopedNoAlias) return AAResultBase::getModRefInfo(Call, Loc, AAQI); - if (!mayAliasInScopes(Loc.AATags.Scope, - Call->getMetadata(LLVMContext::MD_noalias))) + const MDNode *CSNoAlias = + Call->getMetadata(LLVMContext::MD_noalias); + if (!mayAliasInScopes(Loc.AATags.Scope, CSNoAlias)) return ModRefInfo::NoModRef; - if (!mayAliasInScopes(Call->getMetadata(LLVMContext::MD_alias_scope), - Loc.AATags.NoAlias)) + const MDNode *CSScopes = + Call->getMetadata(LLVMContext::MD_alias_scope); + if (!mayAliasInScopes(CSScopes, Loc.AATags.NoAlias)) return ModRefInfo::NoModRef; + if (Loc.AATags.NoAliasSideChannel) { + if (noAliasByIntrinsic(Loc.AATags.NoAlias, Loc.AATags.NoAliasSideChannel, + CSNoAlias, nullptr, nullptr, Call, AAQI)) + return ModRefInfo::NoModRef; + + if (noAliasByIntrinsic(CSNoAlias, nullptr, Loc.AATags.NoAlias, + Loc.AATags.NoAliasSideChannel, Call, nullptr, AAQI)) + return ModRefInfo::NoModRef; + } + return AAResultBase::getModRefInfo(Call, Loc, AAQI); } @@ -118,12 +129,26 @@ if (!EnableScopedNoAlias) return AAResultBase::getModRefInfo(Call1, Call2, AAQI); - if (!mayAliasInScopes(Call1->getMetadata(LLVMContext::MD_alias_scope), - Call2->getMetadata(LLVMContext::MD_noalias))) + const MDNode *CS1Scopes = + Call1->getMetadata(LLVMContext::MD_alias_scope); + const MDNode *CS2Scopes = + Call2->getMetadata(LLVMContext::MD_alias_scope); + const MDNode *CS1NoAlias = + Call1->getMetadata(LLVMContext::MD_noalias); + const MDNode *CS2NoAlias = + Call2->getMetadata(LLVMContext::MD_noalias); + if (!mayAliasInScopes(CS1Scopes, Call2->getMetadata(LLVMContext::MD_noalias))) + return ModRefInfo::NoModRef; + + if (!mayAliasInScopes(CS2Scopes, Call1->getMetadata(LLVMContext::MD_noalias))) return ModRefInfo::NoModRef; - if (!mayAliasInScopes(Call2->getMetadata(LLVMContext::MD_alias_scope), - Call1->getMetadata(LLVMContext::MD_noalias))) + if (noAliasByIntrinsic(CS1NoAlias, nullptr, CS2NoAlias, nullptr, Call1, Call2, + AAQI)) + return ModRefInfo::NoModRef; + + if (noAliasByIntrinsic(CS2NoAlias, nullptr, CS1NoAlias, nullptr, Call2, Call1, + AAQI)) return ModRefInfo::NoModRef; return AAResultBase::getModRefInfo(Call1, Call2, AAQI); @@ -175,11 +200,452 @@ return true; } +bool ScopedNoAliasAAResult::findCompatibleNoAlias(const Value *P, + const MDNode *ANoAlias, const MDNode *BNoAlias, const DataLayout &DL, + SmallPtrSetImpl &Visited, + SmallVectorImpl &CompatibleSet, int Depth) { + // When a pointer is derived from multiple noalias calls, there are two + // potential reasons: + // 1. The path of derivation is uncertain (because of a select, PHI, etc.). + // 2. Some noalias calls are derived from other noalias calls. + // Logically, we need to treat (1) as an "and" and (2) as an "or" when + // checking for scope compatibility. If we don't know from which noalias call + // a pointer is derived, then we need to require compatibility with all of + // them. If we're derived from a noalias call that is derived from another + // noalias call, then we need the ability to effectively ignore the inner one + // in favor of the outer one (thus, we only need compatibility with one or + // the other). + // + // Scope compatibility means that, as with the noalias metadata, within each + // domain, the set of noalias intrinsic scopes is a subset of the noalias + // scopes. + // + // Given this, we check compatibility of the relevant sets of noalias calls + // from which LocA.Ptr might derive with both LocA.AATags.NoAlias and + // LocB.AATags.NoAlias, and LocB.Ptr does not derive from any of the noalias + // calls in some set, then we can conclude NoAlias. + // + // So if we have: + // noalias1 noalias3 + // | | + // noalias2 noalias4 + // | | + // \ / + // \ / + // \ / + // \ / + // select + // | + // noalias5 + // | + // noalias6 + // | + // PtrA + // + // - If PtrA is compatible with noalias6, and PtrB is also compatible, + // but does not derive from noalias6, then NoAlias. + // - If PtrA is compatible with noalias5, and PtrB is also compatible, + // but does not derive from noalias5, then NoAlias. + // - If PtrA is compatible with noalias2 and noalias4, and PtrB is also + // compatible, but does not derive from either, then NoAlias. + // - If PtrA is compatible with noalias2 and noalias3, and PtrB is also + // compatible, but does not derive from either, then NoAlias. + // - If PtrA is compatible with noalias1 and noalias4, and PtrB is also + // compatible, but does not derive from either, then NoAlias. + // - If PtrA is compatible with noalias1 and noalias3, and PtrB is also + // compatible, but does not derive from either, then NoAlias. + // + // We don't need, or want, to explicitly build N! sets to check for scope + // compatibility. Instead, recurse through the tree of underlying objects. + + SmallVector NoAliasCalls; + P = GetUnderlyingObject(P, DL, 0, &NoAliasCalls); + + // If we've already visited this underlying value (likely because this is a + // PHI that depends on itself, directly or indirectly), we must not have + // returned false the first time, so don't do so this time either. + if (!Visited.insert(P).second) + return true; + + // Our pointer is derived from P, with NoAliasCalls along the way. + // Compatibility with any of them is fine. + auto NAI = find_if(NoAliasCalls, [&](Instruction *A) { + return !mayAliasInScopes( + dyn_cast( + cast( + A->getOperand(Intrinsic::SideNoAliasScopeArg)) + ->getMetadata()), + ANoAlias) && + !mayAliasInScopes( + dyn_cast( + cast( + A->getOperand(Intrinsic::SideNoAliasScopeArg)) + ->getMetadata()), + BNoAlias); + }); + if (NAI != NoAliasCalls.end()) { + CompatibleSet.push_back(*NAI); + return true; + } + + // We've not found a compatible noalias call, but we might be able to keep + // looking. If this underlying object is really a PHI or a select, we can + // check the incoming values. They all need to be compatible, and if so, we + // can take the union of all of the compatible noalias calls as the set to + // return for further validation. + SmallVector Children; + if (const auto *SI = dyn_cast(P)) { + Children.push_back(SI->getTrueValue()); + Children.push_back(SI->getFalseValue()); + } else if (const auto *PN = dyn_cast(P)) { + for (Value *IncommingValue : PN->incoming_values()) + Children.push_back(IncommingValue); + } + + if (Children.empty() || Depth == MaxNoAliasDepth) + return false; + + SmallPtrSet ChildVisited; + SmallVector ChildCompatSet; + for (auto &C : Children) { + ChildVisited.clear(); + ChildVisited.insert(Visited.begin(), Visited.end()); + ChildVisited.insert(P); + + ChildCompatSet.clear(); + if (!findCompatibleNoAlias(C, ANoAlias, BNoAlias, DL, ChildVisited, + ChildCompatSet, Depth+1)) + return false; + + CompatibleSet.insert(CompatibleSet.end(), + ChildCompatSet.begin(), ChildCompatSet.end()); + } + + // All children were compatible, and we've added them to CompatibleSet. + return true; +} + +bool ScopedNoAliasAAResult::noAliasByIntrinsic( + const MDNode *ANoAlias, const Value *APtr, const MDNode *BNoAlias, + const Value *BPtr, const CallBase *CallA, const CallBase *CallB, + AAQueryInfo &AAQI) { + LLVM_DEBUG(llvm::dbgs() << ">ScopedNoAliasAAResult::noAliasByIntrinsic:{" + << (const void *)ANoAlias << "," << (const void *)APtr + << "},{" << (const void *)BNoAlias << "," + << (const void *)BPtr << "}\n"); + if (!ANoAlias || !BNoAlias) + return false; + + if (CallA) { + // We're querying a callsite against something else, where we want to know + // if the callsite (CallA) is derived from some noalias call(s) and the other + // thing is not derived from those noalias call(s). This can be determined + // only if CallA only accesses memory through its arguments. + FunctionModRefBehavior MRB = getModRefBehavior(CallA); + if (MRB != FMRB_OnlyAccessesArgumentPointees && + MRB != FMRB_OnlyReadsArgumentPointees) + return false; + + LLVM_DEBUG(dbgs() << "SNA: CSA: " << *CallA << "\n"); + // Since the memory-access behavior of CallA is determined only by its + // arguments, we can answer this query in the affirmative if we can prove a + // lack of aliasing for all pointer arguments. + for (Value *Arg : CallA->args()) { + if (!Arg->getType()->isPointerTy()) + continue; + + if (!noAliasByIntrinsic(ANoAlias, Arg, BNoAlias, BPtr, nullptr, CallB, + AAQI)) { + LLVM_DEBUG(dbgs() << "SNA: CSA: noalias fail for arg: " << *Arg << "\n"); + return false; + } + } + + return true; + } + + const auto *AInst = dyn_cast(APtr); + if (!AInst || !AInst->getParent()) + return false; + const DataLayout &DL = AInst->getParent()->getModule()->getDataLayout(); + + if (!CallB && !BPtr) + return false; + + LLVM_DEBUG(dbgs() << "SNA: A: " << *APtr << "\n"); + LLVM_DEBUG(dbgs() << "SNB: "; if (CallB) dbgs() << "CSB: " << *CallB; + else if (BPtr) dbgs() << "B: " << *BPtr; + else dbgs() << "B: nullptr"; dbgs() << "\n"); + + SmallPtrSet Visited; + SmallVector CompatibleSet; + if (!findCompatibleNoAlias(APtr, ANoAlias, BNoAlias, DL, Visited, + CompatibleSet)) + return false; + + assert(!CompatibleSet.empty() && + "Fould an empty set of compatible intrinsics?"); + + LLVM_DEBUG(dbgs() << "SNA: Found a compatible set!\n"); +#ifndef NDEBUG + for (auto &C : CompatibleSet) + LLVM_DEBUG(dbgs() << "\t" << *C << "\n"); + LLVM_DEBUG(dbgs() << "\n"); +#endif + + // We have a set of compatible noalias calls (compatible with the scopes from + // both LocA and LocB) from which LocA.Ptr potentially derives. We now need + // to make sure that LocB.Ptr does not derive from any in that set. For + // correctness, there cannot be a depth limit here (if a pointer is derived + // from a noalias call, we must know). + SmallVector BObjs; + SmallVector BNoAliasCalls; + if (CallB) { + for (Value *Arg : CallB->args()) + GetUnderlyingObjects(Arg, BObjs, DL, nullptr, 0, &BNoAliasCalls); + } else { + GetUnderlyingObjects(const_cast(BPtr), BObjs, DL, nullptr, 0, + &BNoAliasCalls); + } + + LLVM_DEBUG(dbgs() << "SNA: B/CSB noalias:\n"); +#ifndef NDEBUG + for (auto &B : BNoAliasCalls) + LLVM_DEBUG(dbgs() << "\t" << *B << "\n"); + LLVM_DEBUG(dbgs() << "\n"); +#endif + + // We need to check now if any compatible llvm.side.noalias call is + // potentially using the same 'P' object as one of the 'BNoAliasCalls'. + // If this is true for at least one entry, we must bail out and assume + // 'may_alias' + + { + MDNode *NoAliasUnknownScopeMD = + AInst->getParent()->getParent()->getMetadata("noalias"); + + for (Instruction *CA : CompatibleSet) { + LLVM_DEBUG(llvm::dbgs() << "- CA:" << *CA << "\n"); + assert(isa(CA) && + (cast(CA)->getIntrinsicID() == + llvm::Intrinsic::side_noalias)); + for (Instruction *CB : BNoAliasCalls) { + LLVM_DEBUG(llvm::dbgs() << "- CB:" << *CB << "\n"); + assert(isa(CB) && + (cast(CB)->getIntrinsicID() == + llvm::Intrinsic::side_noalias)); + + // With the llvm.side.noalias version, we have different parts that can + // represent a P: + // - the actual 'identifyP' address (or an offset vs an optimized away + // alloca) + // - the objectId (objectId's currently represent an offset to the + // original alloca of the object) + // - the scope (different scopes = different objects; with the exception + // of the 'unknown scope' an unknown scope can potentially be the same + // as a real variable scope. + // If any of these are different, the P will not alias => *P will also + // not alias + + // Let's start with the fast checks first; + + // Same call ? + if (CA == CB) { + LLVM_DEBUG(llvm::dbgs() << "SNA == SNB\n"); + return false; + } + + // - different objectId ? + { + // check ObjId first: if the obj id's (aka, offset in the object) are + // different, they represent different objects + auto ObjIdA = + cast( + CA->getOperand(Intrinsic::SideNoAliasIdentifyPObjIdArg)) + ->getZExtValue(); + auto ObjIdB = + cast( + CB->getOperand(Intrinsic::SideNoAliasIdentifyPObjIdArg)) + ->getZExtValue(); + if (ObjIdA != ObjIdB) { + LLVM_DEBUG(llvm::dbgs() << "SNA.ObjId != SNB.ObjId\n"); + continue; + } + } + + // Different Scope ? (except for unknown scope) + { + bool isDifferentPByScope = true; + auto *CASnaScope = CA->getOperand(Intrinsic::SideNoAliasScopeArg); + auto *CBSnaScope = CB->getOperand(Intrinsic::SideNoAliasScopeArg); + if (CASnaScope == CBSnaScope) { + // compatibility check below will resolve + isDifferentPByScope = false; + } else { + if (NoAliasUnknownScopeMD) { + if ((cast(CASnaScope)->getMetadata() == + NoAliasUnknownScopeMD) || + (cast(CBSnaScope)->getMetadata() == + NoAliasUnknownScopeMD)) { + isDifferentPByScope = false; + } + } + } + if (isDifferentPByScope) { + LLVM_DEBUG(llvm::dbgs() + << "SNA.Scope != SNB.Scope (and not 'unknown scope')\n"); + continue; + } + } + + // Different 'P' ? + { + Value *P_A = CA->getOperand(Intrinsic::SideNoAliasIdentifyPArg); + Value *P_B = CB->getOperand(Intrinsic::SideNoAliasIdentifyPArg); + + if (P_A == P_B) { + LLVM_DEBUG(dbgs() << " SNA.Scope == SNB.Scope, SNA.P == SNB.P\n"); + return false; + } + + Constant *CP_A = dyn_cast(P_A); + if (CP_A) { + Constant *CP_B = dyn_cast(P_B); + if (CP_B) { + CP_B = ConstantExpr::getBitCast(CP_B, CP_A->getType()); + Constant *Cmp = + ConstantExpr::getCompare(CmpInst::ICMP_NE, CP_A, CP_B, true); + if (Cmp && Cmp->isNullValue()) { + LLVM_DEBUG(dbgs() << " SNA.Scope == SNB.Scope, !(SNA.P != " + "SNB.P) as constant\n"); + return false; + } + } + } + // Check if P_A.addr and P_B.addr alias. If they don't, they describe + // different pointers. + LLVM_DEBUG(llvm::dbgs() + << " SNA.P=" << *P_A << ", SNB.P=" << *P_B << "\n"); + AAMDNodes P_A_Metadata; + AAMDNodes P_B_Metadata; + CA->getAAMetadata(P_A_Metadata); + CB->getAAMetadata(P_B_Metadata); + + // The noalias_sidechannel is not handled in the + // Instruction::getAAMetadata for intrinsics + P_A_Metadata.NoAliasSideChannel = + CA->getOperand(Intrinsic::SideNoAliasIdentifyPSideChannelArg); + P_B_Metadata.NoAliasSideChannel = + CB->getOperand(Intrinsic::SideNoAliasIdentifyPSideChannelArg); + + // Check with 1 unit + MemoryLocation ML_P_A(P_A, 1ull, P_A_Metadata); + MemoryLocation ML_P_B(P_B, 1ull, P_B_Metadata); + if (getBestAAResults().alias(ML_P_A, ML_P_B, AAQI) != + AliasResult::NoAlias) { + LLVM_DEBUG(llvm::dbgs() << " P ... may alias\n"); + return false; + } + LLVM_DEBUG(llvm::dbgs() << " P is NoAlias\n"); + continue; + } + } + } + } + + // The noalias scope from the compatible intrinsics are really identified by + // their scope argument, and we need to make sure that LocB.Ptr is not only + // not derived from the calls currently in CompatibleSet, but also from any + // other intrinsic with the same scope. We can't just search the list of + // noalias intrinsics in BNoAliasCalls because we care not just about those + // direct dependence, but also dependence through capturing. Metadata + // do not have use lists, but MetadataAsValue objects do (and they are + // uniqued), so we can search their use list. As a result, however, + // correctness demands that the scope list has only one element (so that we + // can find all uses of that scope by noalias intrinsics by looking at the + // use list of the associated scope list). + SmallPtrSet CompatibleSetMembers(CompatibleSet.begin(), + CompatibleSet.end()); + SmallVector CompatibleSetMVs; + for (auto &C : CompatibleSet) + CompatibleSetMVs.push_back( + cast(C->getOperand(Intrinsic::SideNoAliasScopeArg))); + for (auto &MV : CompatibleSetMVs) + for (Use &U : MV->uses()) + if (auto *UI = dyn_cast(U.getUser())) { + // Skip noalias declarations + if (auto *CB = dyn_cast(UI)) + if (CB->getIntrinsicID() == Intrinsic::noalias_decl) + continue; + if (CompatibleSetMembers.insert(UI).second) { + CompatibleSet.push_back(UI); + LLVM_DEBUG(dbgs() << "SNA: Adding to compatible set based on MD use: " << + *UI << "\n"); + } + } + + LLVM_DEBUG(dbgs() << "SNA: B does not derive from the compatible set!\n"); + + // Note: This can be removed when legacy-pass-manager support is removed; + // BasicAA always has a DT available, and only under the hack where this is + // an immutable pass, not a function pass, might we not have one. + LLVM_DEBUG(dbgs() << "SNA: DT is " << (DT ? "available" : "unavailable") << "\n"); + + // We now know that LocB.Ptr does not derive from any of the noalias calls in + // CompatibleSet directly. We do, however, need to make sure that it cannot + // derive from them by capture. + for (auto &V : BObjs) { + // If the underlying object is not an instruction, then it can't be + // capturing the output value of an instruction (specifically, the noalias + // intrinsic call), and we can ignore it. + auto *I = dyn_cast(V); + if (!I) + continue; + if (isIdentifiedFunctionLocal(I)) + continue; + + LLVM_DEBUG(dbgs() << "SNA: Capture check for B/CSB UO: " << *I << "\n"); + + // If the value from the noalias intrinsic has been captured prior to the + // instruction defining the underlying object, then LocB.Ptr might yet be + // derived from the return value of the noalias intrinsic, and we cannot + // conclude anything about the aliasing. + for (auto &C : CompatibleSet) + if (PointerMayBeCapturedBefore(C, /* ReturnCaptures */ false, + /* StoreCaptures */ false, I, DT, + /* IncludeI */ false, nullptr, 80)) { + LLVM_DEBUG(dbgs() << "SNA: Pointer " << *C << " might be captured!\n"); + return false; + } + } + + if (CallB) { + FunctionModRefBehavior MRB = getModRefBehavior(CallB); + if (MRB != FMRB_OnlyAccessesArgumentPointees && + MRB != FMRB_OnlyReadsArgumentPointees) { + // If we're querying against a callsite, and it might read from memory + // not based on its arguments, then we need to check whether or not the + // relevant noalias results have been captured prior to the callsite. + for (auto &C : CompatibleSet) + if (PointerMayBeCapturedBefore(C, /* ReturnCaptures */false, + /* StoreCaptures */false, + CallB, DT)) { + LLVM_DEBUG(dbgs() << "SNA: CSB: Pointer " << *C << + " might be captured!\n"); + return false; + } + } + } + + LLVM_DEBUG(dbgs() << " SNA: noalias!\n"); + return true; +} + AnalysisKey ScopedNoAliasAA::Key; ScopedNoAliasAAResult ScopedNoAliasAA::run(Function &F, FunctionAnalysisManager &AM) { - return ScopedNoAliasAAResult(); + return ScopedNoAliasAAResult(&AM.getResult(F)); } char ScopedNoAliasAAWrapperPass::ID = 0; @@ -196,7 +662,7 @@ } bool ScopedNoAliasAAWrapperPass::doInitialization(Module &M) { - Result.reset(new ScopedNoAliasAAResult()); + Result.reset(new ScopedNoAliasAAResult(nullptr)); return false; } @@ -205,6 +671,13 @@ return false; } +void ScopedNoAliasAAWrapperPass::setDT() { + auto *DTWP = getAnalysisIfAvailable(); + if (DTWP) + Result->setDT(&DTWP->getDomTree()); +} + void ScopedNoAliasAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); + AU.addUsedIfAvailable(); } Index: llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp =================================================================== --- llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -112,6 +112,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" #include "llvm/Pass.h" @@ -519,6 +520,14 @@ return Ret; } +static Value *MergeSideChannel(Value *lhs, Value *rhs) { + // Similar to what we do for N.NoAlias: only identical side_channels can be + // merged + if (lhs == rhs) + return lhs; + return nullptr; +} + void Instruction::getAAMetadata(AAMDNodes &N, bool Merge) const { if (Merge) N.TBAA = @@ -537,6 +546,25 @@ MDNode::intersect(N.NoAlias, getMetadata(LLVMContext::MD_noalias)); else N.NoAlias = getMetadata(LLVMContext::MD_noalias); + if (const LoadInst *LI = dyn_cast(this)) { + if (LI->hasNoaliasSideChannelOperand()) { + if (Merge) { + N.NoAliasSideChannel = MergeSideChannel( + N.NoAliasSideChannel, LI->getNoaliasSideChannelOperand()); + } else { + N.NoAliasSideChannel = LI->getNoaliasSideChannelOperand(); + } + } + } else if (const StoreInst *SI = dyn_cast(this)) { + if (SI->hasNoaliasSideChannelOperand()) { + if (Merge) { + N.NoAliasSideChannel = MergeSideChannel( + N.NoAliasSideChannel, SI->getNoaliasSideChannelOperand()); + } else { + N.NoAliasSideChannel = SI->getNoaliasSideChannelOperand(); + } + } + } } static const MDNode *createAccessTag(const MDNode *AccessType) { Index: llvm/lib/Analysis/ValueTracking.cpp =================================================================== --- llvm/lib/Analysis/ValueTracking.cpp +++ llvm/lib/Analysis/ValueTracking.cpp @@ -513,6 +513,11 @@ case Intrinsic::invariant_end: case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: + case Intrinsic::noalias_decl: + case Intrinsic::noalias: + case Intrinsic::side_noalias: + case Intrinsic::noalias_arg_guard: + case Intrinsic::noalias_copy_guard: case Intrinsic::objectsize: case Intrinsic::ptr_annotation: case Intrinsic::var_annotation: @@ -3678,6 +3683,8 @@ const CallBase *Call, bool MustPreserveNullness) { return Call->getIntrinsicID() == Intrinsic::launder_invariant_group || Call->getIntrinsicID() == Intrinsic::strip_invariant_group || + Call->getIntrinsicID() == Intrinsic::noalias_arg_guard || + Call->getIntrinsicID() == Intrinsic::noalias_copy_guard || Call->getIntrinsicID() == Intrinsic::aarch64_irg || Call->getIntrinsicID() == Intrinsic::aarch64_tagp || (!MustPreserveNullness && @@ -3712,7 +3719,8 @@ } Value *llvm::GetUnderlyingObject(Value *V, const DataLayout &DL, - unsigned MaxLookup) { + unsigned MaxLookup, + SmallVectorImpl *NoAlias) { if (!V->getType()->isPointerTy()) return V; for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) { @@ -3730,6 +3738,28 @@ return V; } else { if (auto *Call = dyn_cast(V)) { + if (NoAlias) { + // We are gathering information for ScopedAANoAlias - + // Look through side.noalias intrinsic + if (Call->getIntrinsicID() == Intrinsic::side_noalias) { + NoAlias->push_back(Call); + V = Call->getArgOperand(0); + continue; + } + // Look trough noalias.arg.guard, and follow the sidechannel + if (Call->getIntrinsicID() == Intrinsic::noalias_arg_guard) { + V = Call->getArgOperand(1); + continue; + } + } else { + // We are not gathering information for SCopedAANoAlias - + // Look through noalias.arg.guard and follow the pointer path + if (Call->getIntrinsicID() == Intrinsic::noalias_arg_guard) { + V = Call->getArgOperand(0); + continue; + } + } + // CaptureTracking can know about special capturing properties of some // intrinsics like launder.invariant.group, that can't be expressed with // the attributes, but have properties like returning aliasing pointer. @@ -3763,13 +3793,14 @@ void llvm::GetUnderlyingObjects(const Value *V, SmallVectorImpl &Objects, const DataLayout &DL, LoopInfo *LI, - unsigned MaxLookup) { + unsigned MaxLookup, + SmallVectorImpl *NoAlias) { SmallPtrSet Visited; SmallVector Worklist; Worklist.push_back(V); do { const Value *P = Worklist.pop_back_val(); - P = GetUnderlyingObject(P, DL, MaxLookup); + P = GetUnderlyingObject(P, DL, MaxLookup, NoAlias); if (!Visited.insert(P).second) continue; Index: llvm/lib/Analysis/VectorUtils.cpp =================================================================== --- llvm/lib/Analysis/VectorUtils.cpp +++ llvm/lib/Analysis/VectorUtils.cpp @@ -117,7 +117,8 @@ if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start || ID == Intrinsic::lifetime_end || ID == Intrinsic::assume || - ID == Intrinsic::sideeffect) + ID == Intrinsic::sideeffect || ID == Intrinsic::noalias || + ID == Intrinsic::side_noalias) return ID; return Intrinsic::not_intrinsic; } @@ -591,7 +592,8 @@ } /// \returns \p I after propagating metadata from \p VL. -Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef VL) { +Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef VL, + bool RemoveNoAlias) { Instruction *I0 = cast(VL[0]); SmallVector, 4> Metadata; I0->getAllMetadataOtherThanDebugLoc(Metadata); @@ -602,6 +604,8 @@ LLVMContext::MD_access_group}) { MDNode *MD = I0->getMetadata(Kind); + if (RemoveNoAlias && (Kind == LLVMContext::MD_noalias)) + MD = nullptr; for (int J = 1, E = VL.size(); MD && J != E; ++J) { const Instruction *IJ = cast(VL[J]); MDNode *IMD = IJ->getMetadata(Kind); @@ -627,7 +631,6 @@ llvm_unreachable("unhandled metadata"); } } - Inst->setMetadata(Kind, MD); } Index: llvm/lib/AsmParser/LLLexer.cpp =================================================================== --- llvm/lib/AsmParser/LLLexer.cpp +++ llvm/lib/AsmParser/LLLexer.cpp @@ -545,6 +545,7 @@ KEYWORD(deplibs); // FIXME: Remove in 4.0. KEYWORD(datalayout); KEYWORD(volatile); + KEYWORD(noalias_sidechannel); KEYWORD(atomic); KEYWORD(unordered); KEYWORD(monotonic); Index: llvm/lib/AsmParser/LLParser.h =================================================================== --- llvm/lib/AsmParser/LLParser.h +++ llvm/lib/AsmParser/LLParser.h @@ -289,6 +289,9 @@ bool ParseOrdering(AtomicOrdering &Ordering); bool ParseOptionalStackAlignment(unsigned &Alignment); bool ParseOptionalCommaAlign(MaybeAlign &Alignment, bool &AteExtraComma); + bool ParseOptionalCommaNoaliasSideChannel(Value *&V, LocTy &Loc, + PerFunctionState &PFS, + bool &AteExtraComma); bool ParseOptionalCommaAddrSpace(unsigned &AddrSpace, LocTy &Loc, bool &AteExtraComma); bool ParseOptionalCommaInAlloca(bool &IsInAlloca); Index: llvm/lib/AsmParser/LLParser.cpp =================================================================== --- llvm/lib/AsmParser/LLParser.cpp +++ llvm/lib/AsmParser/LLParser.cpp @@ -2123,16 +2123,20 @@ /// end. bool LLParser::ParseOptionalCommaAlign(MaybeAlign &Alignment, bool &AteExtraComma) { - AteExtraComma = false; - while (EatIfPresent(lltok::comma)) { + while (AteExtraComma || EatIfPresent(lltok::comma)) { + AteExtraComma = false; // Metadata at the end is an early exit. if (Lex.getKind() == lltok::MetadataVar) { AteExtraComma = true; return false; } - if (Lex.getKind() != lltok::kw_align) + // if we get here, noalias_sidechannel must not be possible any more + if (Lex.getKind() == lltok::kw_noalias_sidechannel) return Error(Lex.getLoc(), "expected metadata or 'align'"); + if (Lex.getKind() != lltok::kw_align) + return Error(Lex.getLoc(), + "expected metadata or 'align' or 'noalias_sidechannel'"); if (ParseOptionalAlignment(Alignment)) return true; } @@ -2140,6 +2144,27 @@ return false; } +/// ParseOptionalCommandNoaliasSideChannel +/// ::= +/// ::= ',' noalias_sidechannel int* %3 +/// +/// This returns with AteExtraComma set to true if it ate an excess comma at the +/// end. +bool LLParser::ParseOptionalCommaNoaliasSideChannel( + Value *&V, LLParser::LocTy &Loc, LLParser::PerFunctionState &PFS, + bool &AteExtraComma) { + assert(AteExtraComma == false); + if (EatIfPresent(lltok::comma)) { + if (Lex.getKind() == lltok::kw_noalias_sidechannel) { + Lex.Lex(); + return ParseTypeAndValue(V, Loc, PFS); + } + + AteExtraComma = true; + } + return false; +} + /// ParseOptionalCommaAddrSpace /// ::= /// ::= ',' addrspace(1) @@ -6932,6 +6957,8 @@ /// 'singlethread'? AtomicOrdering (',' 'align' i32)? int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) { Value *Val; LocTy Loc; + Value *NoaliasSideChannel = nullptr; + LocTy NoaliasSideChannelLoc; MaybeAlign Alignment; bool AteExtraComma = false; bool isAtomic = false; @@ -6954,6 +6981,8 @@ if (ParseType(Ty) || ParseToken(lltok::comma, "expected comma after load's type") || ParseTypeAndValue(Val, Loc, PFS) || + ParseOptionalCommaNoaliasSideChannel( + NoaliasSideChannel, NoaliasSideChannelLoc, PFS, AteExtraComma) || ParseScopeAndOrdering(isAtomic, SSID, Ordering) || ParseOptionalCommaAlign(Alignment, AteExtraComma)) return true; @@ -6970,7 +6999,10 @@ return Error(ExplicitTypeLoc, "explicit pointee type doesn't match operand's pointee type"); - Inst = new LoadInst(Ty, Val, "", isVolatile, Alignment, Ordering, SSID); + auto *LI = new LoadInst(Ty, Val, "", isVolatile, Alignment, Ordering, SSID); + if (NoaliasSideChannel) + LI->setNoaliasSideChannelOperand(NoaliasSideChannel); + Inst = LI; return AteExtraComma ? InstExtraComma : InstNormal; } @@ -6981,6 +7013,8 @@ /// 'singlethread'? AtomicOrdering (',' 'align' i32)? int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS) { Value *Val, *Ptr; LocTy Loc, PtrLoc; + Value *NoaliasSideChannel = nullptr; + LocTy NoaliasSideChannelLoc; MaybeAlign Alignment; bool AteExtraComma = false; bool isAtomic = false; @@ -7001,6 +7035,8 @@ if (ParseTypeAndValue(Val, Loc, PFS) || ParseToken(lltok::comma, "expected ',' after store operand") || ParseTypeAndValue(Ptr, PtrLoc, PFS) || + ParseOptionalCommaNoaliasSideChannel( + NoaliasSideChannel, NoaliasSideChannelLoc, PFS, AteExtraComma) || ParseScopeAndOrdering(isAtomic, SSID, Ordering) || ParseOptionalCommaAlign(Alignment, AteExtraComma)) return true; @@ -7017,7 +7053,11 @@ Ordering == AtomicOrdering::AcquireRelease) return Error(Loc, "atomic store cannot use Acquire ordering"); - Inst = new StoreInst(Val, Ptr, isVolatile, Alignment, Ordering, SSID); + auto *SI = new StoreInst(Val, Ptr, isVolatile, Alignment, Ordering, SSID); + if (NoaliasSideChannel) + SI->setNoaliasSideChannelOperand(NoaliasSideChannel); + Inst = SI; + return AteExtraComma ? InstExtraComma : InstNormal; } Index: llvm/lib/AsmParser/LLToken.h =================================================================== --- llvm/lib/AsmParser/LLToken.h +++ llvm/lib/AsmParser/LLToken.h @@ -90,6 +90,7 @@ kw_deplibs, // FIXME: Remove in 4.0 kw_datalayout, kw_volatile, + kw_noalias_sidechannel, kw_atomic, kw_unordered, kw_monotonic, Index: llvm/lib/CodeGen/IntrinsicLowering.cpp =================================================================== --- llvm/lib/CodeGen/IntrinsicLowering.cpp +++ llvm/lib/CodeGen/IntrinsicLowering.cpp @@ -371,6 +371,15 @@ case Intrinsic::var_annotation: break; // Strip out these intrinsics + case Intrinsic::noalias_decl: + case Intrinsic::noalias: + case Intrinsic::side_noalias: + case Intrinsic::noalias_arg_guard: + case Intrinsic::noalias_copy_guard: + // Just forward the value + CI->replaceAllUsesWith(CI->getOperand(0)); + break; + case Intrinsic::memcpy: { Type *IntPtr = DL.getIntPtrType(Context); Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6418,11 +6418,21 @@ case Intrinsic::annotation: case Intrinsic::ptr_annotation: + case Intrinsic::noalias: case Intrinsic::launder_invariant_group: case Intrinsic::strip_invariant_group: + case Intrinsic::side_noalias: + case Intrinsic::noalias_arg_guard: + case Intrinsic::noalias_copy_guard: // Drop the intrinsic, but forward the value setValue(&I, getValue(I.getOperand(0))); return; + case Intrinsic::noalias_decl: + // Generate a dummy value - it will never be used and should get optimized + // away + setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout()))); + return; + case Intrinsic::assume: case Intrinsic::var_annotation: case Intrinsic::sideeffect: Index: llvm/lib/IR/AsmWriter.cpp =================================================================== --- llvm/lib/IR/AsmWriter.cpp +++ llvm/lib/IR/AsmWriter.cpp @@ -4008,15 +4008,30 @@ } Out << ", "; TypePrinter.print(I.getType(), Out); - } else if (Operand) { // Print the normal way. + } else if (const auto *LI = dyn_cast(&I)) { + Out << ' '; + TypePrinter.print(LI->getType(), Out); + Out << ", "; + writeOperand(I.getOperand(0), true); + if (LI->hasNoaliasSideChannelOperand()) { + Out << ", noalias_sidechannel "; + writeOperand(LI->getNoaliasSideChannelOperand(), true); + } + } else if (const auto *SI = dyn_cast(&I)) { + Out << ' '; + writeOperand(I.getOperand(0), true); + Out << ", "; + writeOperand(I.getOperand(1), true); + + if (SI->hasNoaliasSideChannelOperand()) { + Out << ", noalias_sidechannel "; + writeOperand(SI->getNoaliasSideChannelOperand(), true); + } + } else if (Operand) { // Print the normal way. if (const auto *GEP = dyn_cast(&I)) { Out << ' '; TypePrinter.print(GEP->getSourceElementType(), Out); Out << ','; - } else if (const auto *LI = dyn_cast(&I)) { - Out << ' '; - TypePrinter.print(LI->getType(), Out); - Out << ','; } // PrintAllTypes - Instructions who have operands of all the same type @@ -4026,8 +4041,7 @@ Type *TheType = Operand->getType(); // Select, Store and ShuffleVector always print all types. - if (isa(I) || isa(I) || isa(I) - || isa(I)) { + if (isa(I) || isa(I) || isa(I)) { PrintAllTypes = true; } else { for (unsigned i = 1, E = I.getNumOperands(); i != E; ++i) { Index: llvm/lib/IR/IRBuilder.cpp =================================================================== --- llvm/lib/IR/IRBuilder.cpp +++ llvm/lib/IR/IRBuilder.cpp @@ -459,6 +459,129 @@ return createCallHelper(FnAssume, Ops, this); } +Instruction *IRBuilderBase::CreateNoAliasDeclaration(Value *AllocaPtr, + Value *ObjId, + Value *Scope) { + assert(AllocaPtr); + + SmallVector Types = {Type::getInt8PtrTy(getContext()), + AllocaPtr->getType(), ObjId->getType()}; + SmallVector Ops = {AllocaPtr, ObjId, Scope}; + + Module *M = BB->getModule(); + auto *FnIntrinsic = + Intrinsic::getDeclaration(M, Intrinsic::noalias_decl, Types); + return createCallHelper(FnIntrinsic, Ops, this); +} + +Instruction *IRBuilderBase::CreateNoAliasPointer(Value *Ptr, Value *NoAliasDecl, + Value *AddrP, Value *ScopeTag, + const Twine &Name, + uint64_t ObjectId) { + assert(Ptr && AddrP); + Value *I64_0 = + ConstantInt::get(IntegerType::getInt64Ty(getContext()), ObjectId); + return CreateGenericNoAliasIntrinsic(Intrinsic::noalias, Ptr, + {NoAliasDecl, AddrP, I64_0}, {}, + {ScopeTag}, Name); +} + +Instruction *IRBuilderBase::CreateSideNoAliasPlain( + Value *Ptr, Value *NoAliasDecl, Value *AddrP, Value *AddrP_Side, + Value *ObjId, MDNode *ScopeTag, const Twine &Name) { + assert(Ptr && AddrP && AddrP_Side); + return CreateGenericNoAliasIntrinsic(Intrinsic::side_noalias, Ptr, + {NoAliasDecl, AddrP, AddrP_Side, ObjId}, + {ScopeTag}, {}, Name); +} + +Instruction *IRBuilderBase::CreateSideNoAliasPlain( + Value *Ptr, Value *NoAliasDecl, Value *AddrP, Value *AddrP_Side, + Value *ObjId, Value *ScopeValue, const Twine &Name) { + assert(Ptr && AddrP && AddrP_Side); + return CreateGenericNoAliasIntrinsic(Intrinsic::side_noalias, Ptr, + {NoAliasDecl, AddrP, AddrP_Side, ObjId}, + {}, {ScopeValue}, Name); +} + +Instruction *IRBuilderBase::CreateNoAliasArgGuard(Value *Ptr, + Value *SideChannel, + const Twine &Name) { + return CreateGenericNoAliasIntrinsic(Intrinsic::noalias_arg_guard, Ptr, + {SideChannel}, {}, {}, Name); +} + +Instruction * +IRBuilderBase::CreateNoAliasCopyGuard(Value *BasePtr, Value *NoAliasDecl, + ArrayRef EncodedIndices, + MDNode *ScopeTag, const Twine &Name) { + SmallVector IndicesArray; + + assert(NoAliasDecl != nullptr); + assert(ScopeTag != nullptr); + assert(!EncodedIndices.empty() && + "NoAliasCopyGuard should have at least one set of indices"); + + for (unsigned i = 0, ci = EncodedIndices.size(); i < ci;) { + auto length = EncodedIndices[i++]; + assert(length > 0 && + "NoAliasCopyGuard: encoded indices problem: zero length seen"); + length += i; + assert(length <= ci && + "NoAliasCopyGuard: encoded indices problem: too few indices"); + + SmallVector Indices; + while (i < length) { + Indices.push_back( + llvm::ConstantAsMetadata::get(getInt64(EncodedIndices[i]))); + ++i; + } + IndicesArray.push_back(llvm::MDNode::get(Context, Indices)); + } + + return CreateGenericNoAliasIntrinsic( + Intrinsic::noalias_copy_guard, BasePtr, {NoAliasDecl}, + {llvm::MDNode::get(Context, IndicesArray), ScopeTag}, {}, Name); +} + +Instruction *IRBuilderBase::CreateGenericNoAliasIntrinsic( + Intrinsic::ID ID, Value *Ptr, ArrayRef args_opt, + ArrayRef MDNodes, ArrayRef MDValues, const Twine &Name) { + // FIXME: We can currently mangle just about everything, but not literal + // structs (for those, bitcast to i8*). + // FIXME: as far as I see in 'getMangledTypeStr', this should be ok now. + Value *CPtr = Ptr; + if (auto *STyp = + dyn_cast(CPtr->getType()->getPointerElementType())) { + if (STyp->isLiteral()) + CPtr = getCastedInt8PtrValue(CPtr); + } + + SmallVector Types = {CPtr->getType()}; + SmallVector Ops = {CPtr}; + for (auto *A : args_opt) { + Types.push_back(A->getType()); + Ops.push_back(A); + } + // For the metadata info, types must not be added: + for (auto *MD : MDNodes) { + Ops.push_back(MetadataAsValue::get(Context, MD)); + } + Ops.insert(Ops.end(), MDValues.begin(), MDValues.end()); + Module *M = BB->getModule(); + auto *FnIntrinsic = Intrinsic::getDeclaration(M, ID, Types); + Instruction *Ret = createCallHelper(FnIntrinsic, Ops, this, Name); + + if (Ret->getType() != Ptr->getType()) { + BitCastInst *BCI = new BitCastInst(Ret, Ptr->getType(), Name + ".cast"); + BB->getInstList().insert(InsertPt, BCI); + SetInstDebugLocation(BCI); + Ret = BCI; + } + + return Ret; +} + /// Create a call to a Masked Load intrinsic. /// \p Ptr - base pointer for the load /// \p Align - alignment of the source location Index: llvm/lib/IR/Instructions.cpp =================================================================== --- llvm/lib/IR/Instructions.cpp +++ llvm/lib/IR/Instructions.cpp @@ -1316,8 +1316,11 @@ LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile, MaybeAlign Align, AtomicOrdering Order, SyncScope::ID SSID, Instruction *InsertBef) - : UnaryInstruction(Ty, Load, Ptr, InsertBef) { + : Instruction(Ty, Load, OperandTraits::op_begin(this), 2, + InsertBef) { assert(Ty == cast(Ptr->getType())->getElementType()); + Op<0>() = Ptr; + setLoadInstNumOperands(1); setVolatile(isVolatile); setAlignment(MaybeAlign(Align)); setAtomic(Order, SSID); @@ -1328,7 +1331,10 @@ LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile, MaybeAlign Align, AtomicOrdering Order, SyncScope::ID SSID, BasicBlock *InsertAE) - : UnaryInstruction(Ty, Load, Ptr, InsertAE) { + : Instruction(Ty, Load, OperandTraits::op_begin(this), 2, + InsertAE) { + Op<0>() = Ptr; + setLoadInstNumOperands(1); assert(Ty == cast(Ptr->getType())->getElementType()); setVolatile(isVolatile); setAlignment(Align); @@ -1392,10 +1398,10 @@ AtomicOrdering Order, SyncScope::ID SSID, Instruction *InsertBefore) : Instruction(Type::getVoidTy(val->getContext()), Store, - OperandTraits::op_begin(this), - OperandTraits::operands(this), InsertBefore) { + OperandTraits::op_begin(this), 3, InsertBefore) { Op<0>() = val; Op<1>() = addr; + setStoreInstNumOperands(2); setVolatile(isVolatile); setAlignment(Align); setAtomic(Order, SSID); @@ -1406,10 +1412,10 @@ AtomicOrdering Order, SyncScope::ID SSID, BasicBlock *InsertAtEnd) : Instruction(Type::getVoidTy(val->getContext()), Store, - OperandTraits::op_begin(this), - OperandTraits::operands(this), InsertAtEnd) { + OperandTraits::op_begin(this), 3, InsertAtEnd) { Op<0>() = val; Op<1>() = addr; + setStoreInstNumOperands(2); setVolatile(isVolatile); setAlignment(Align); setAtomic(Order, SSID); @@ -4136,15 +4142,32 @@ } LoadInst *LoadInst::cloneImpl() const { - return new LoadInst(getType(), getOperand(0), Twine(), isVolatile(), - MaybeAlign(getAlignment()), getOrdering(), - getSyncScopeID()); + LoadInst *Result = + new LoadInst(getType(), getOperand(0), Twine(), isVolatile(), + MaybeAlign(getAlignment()), getOrdering(), getSyncScopeID()); + // - we must keep the same number of arguments (for vector optimizations) + // - if we duplicate the side channel, we can get into problems with passes + // that don't know how to handle it (Like MergeLoadStoreMotion shows) + // - safe alternative: keep the argument, but map it to undef. + if (hasNoaliasSideChannelOperand()) + Result->setNoaliasSideChannelOperand( + UndefValue::get(getNoaliasSideChannelOperand()->getType())); + return Result; } StoreInst *StoreInst::cloneImpl() const { - return new StoreInst(getOperand(0), getOperand(1), isVolatile(), - MaybeAlign(getAlignment()), getOrdering(), - getSyncScopeID()); + StoreInst *Result = new StoreInst(getOperand(0), getOperand(1), isVolatile(), + MaybeAlign(getAlignment()), getOrdering(), + getSyncScopeID()); + + // we must keep the same number of arguments (for vector optimizations) + // - if we duplicate the side channel, we can get into problems with passes + // that don't know how to handle it (Like MergeLoadStoreMotion shows) + // - safe alternative: keep the argument, but map it to undef. + if (hasNoaliasSideChannelOperand()) + Result->setNoaliasSideChannelOperand( + UndefValue::get(getNoaliasSideChannelOperand()->getType())); + return Result; } AtomicCmpXchgInst *AtomicCmpXchgInst::cloneImpl() const { Index: llvm/lib/IR/Metadata.cpp =================================================================== --- llvm/lib/IR/Metadata.cpp +++ llvm/lib/IR/Metadata.cpp @@ -10,6 +10,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/Metadata.h" #include "LLVMContextImpl.h" #include "MetadataImpl.h" #include "SymbolTableListTraitsImpl.h" @@ -37,8 +38,8 @@ #include "llvm/IR/GlobalObject.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/TrackingMDRef.h" #include "llvm/IR/Type.h" @@ -1261,7 +1262,23 @@ void Instruction::setAAMetadata(const AAMDNodes &N) { setMetadata(LLVMContext::MD_tbaa, N.TBAA); setMetadata(LLVMContext::MD_alias_scope, N.Scope); - setMetadata(LLVMContext::MD_noalias, N.NoAlias); + if (!N.NoAliasSideChannel) + setMetadata(LLVMContext::MD_noalias, N.NoAlias); + // else postpone until setAAMetadataNoAliasSideChannel +} + +void Instruction::setAAMetadataNoAliasSideChannel(const AAMDNodes &N) { + // It is not correct to always propagate the noalias_sidechannel. + // 'setAAMetadata' must already have been called ! + if (N.NoAliasSideChannel) { + // postponed from setAAMetadata + setMetadata(LLVMContext::MD_noalias, N.NoAlias); + if (LoadInst *LI = dyn_cast(this)) { + LI->setNoaliasSideChannelOperand(N.NoAliasSideChannel); + } else if (StoreInst *SI = dyn_cast(this)) { + SI->setNoaliasSideChannelOperand(N.NoAliasSideChannel); + } + } } MDNode *Instruction::getMetadataImpl(unsigned KindID) const { Index: llvm/lib/IR/User.cpp =================================================================== --- llvm/lib/IR/User.cpp +++ llvm/lib/IR/User.cpp @@ -171,20 +171,19 @@ Use **HungOffOperandList = static_cast(Usr) - 1; // drop the hung off uses. - Use::zap(*HungOffOperandList, *HungOffOperandList + Obj->NumUserOperands, + Use::zap(*HungOffOperandList, *HungOffOperandList + Obj->getNumOperands(), /* Delete */ true); ::operator delete(HungOffOperandList); } else if (Obj->HasDescriptor) { Use *UseBegin = static_cast(Usr) - Obj->NumUserOperands; - Use::zap(UseBegin, UseBegin + Obj->NumUserOperands, /* Delete */ false); + Use::zap(UseBegin, UseBegin + Obj->getNumOperands(), /* Delete */ false); auto *DI = reinterpret_cast(UseBegin) - 1; uint8_t *Storage = reinterpret_cast(DI) - DI->SizeInBytes; ::operator delete(Storage); } else { Use *Storage = static_cast(Usr) - Obj->NumUserOperands; - Use::zap(Storage, Storage + Obj->NumUserOperands, - /* Delete */ false); + Use::zap(Storage, Storage + Obj->getNumOperands(), /* Delete */ false); ::operator delete(Storage); } } Index: llvm/lib/IR/Value.cpp =================================================================== --- llvm/lib/IR/Value.cpp +++ llvm/lib/IR/Value.cpp @@ -50,9 +50,9 @@ } Value::Value(Type *ty, unsigned scid) - : VTy(checkType(ty)), UseList(nullptr), SubclassID(scid), - HasValueHandle(0), SubclassOptionalData(0), SubclassData(0), - NumUserOperands(0), IsUsedByMD(false), HasName(false) { + : VTy(checkType(ty)), UseList(nullptr), SubclassID(scid), HasValueHandle(0), + SubclassOptionalData(0), SubclassData(0), NumUserOperands(0), + IsUsedByMD(false), HasName(false), NumUserOperandsDelta(0) { static_assert(ConstantFirstVal == 0, "!(SubclassID < ConstantFirstVal)"); // FIXME: Why isn't this in the subclass gunk?? // Note, we cannot call isa before the CallInst has been @@ -451,6 +451,26 @@ }); } +// Like replaceAllUsesWith except it does not handle constants or basic blocks. +// This routine leaves uses outside BB. +void Value::replaceUsesInsideBlock(Value *New, BasicBlock *BB) { + assert(New && "Value::replaceUsesInsideBlock(, BB) is invalid!"); + assert(!contains(New, this) && + "this->replaceUsesInsideBlock(expr(this), BB) is NOT valid!"); + assert(New->getType() == getType() && + "replaceUses of value with new value of different type!"); + assert(BB && "Basic block that may contain a use of 'New' must be defined\n"); + + use_iterator UI = use_begin(), E = use_end(); + for (; UI != E;) { + Use &U = *UI; + ++UI; + auto *Usr = dyn_cast(U.getUser()); + if (Usr && Usr->getParent() == BB) + U.set(New); + } +} + namespace { // Various metrics for how much to strip off of pointers. enum PointerStripKind { @@ -458,6 +478,7 @@ PSK_ZeroIndicesAndAliases, PSK_ZeroIndicesSameRepresentation, PSK_ZeroIndicesAndInvariantGroups, + PSK_ZeroIndicesAndInvariantGroupsAndNoAliasIntr, PSK_InBoundsConstantIndices, PSK_InBounds }; @@ -479,6 +500,7 @@ case PSK_ZeroIndicesAndAliases: case PSK_ZeroIndicesSameRepresentation: case PSK_ZeroIndicesAndInvariantGroups: + case PSK_ZeroIndicesAndInvariantGroupsAndNoAliasIntr: if (!GEP->hasAllZeroIndices()) return V; break; @@ -516,6 +538,18 @@ V = Call->getArgOperand(0); continue; } + // Same as above, but also for noalias intrinsics + if (StripKind == + PSK_ZeroIndicesAndInvariantGroupsAndNoAliasIntr && + (Call->getIntrinsicID() == Intrinsic::launder_invariant_group || + Call->getIntrinsicID() == Intrinsic::strip_invariant_group || + Call->getIntrinsicID() == Intrinsic::noalias || + Call->getIntrinsicID() == Intrinsic::side_noalias || + Call->getIntrinsicID() == Intrinsic::noalias_arg_guard || + Call->getIntrinsicID() == Intrinsic::noalias_copy_guard)) { + V = Call->getArgOperand(0); + continue; + } } return V; } @@ -546,6 +580,11 @@ return stripPointerCastsAndOffsets(this); } +const Value *Value::stripPointerCastsAndInvariantGroupsAndNoAliasIntr() const { + return stripPointerCastsAndOffsets< + PSK_ZeroIndicesAndInvariantGroupsAndNoAliasIntr>(this); +} + const Value * Value::stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds) const { Index: llvm/lib/IR/Verifier.cpp =================================================================== --- llvm/lib/IR/Verifier.cpp +++ llvm/lib/IR/Verifier.cpp @@ -4732,6 +4732,81 @@ "Intrinsic does not support vectors", &Call); break; } + case Intrinsic::noalias_decl: { + Assert( + (isa( + Call.getArgOperand(Intrinsic::NoAliasDeclAllocaArg)) || + isa(Call.getArgOperand(Intrinsic::NoAliasDeclAllocaArg))), + "llvm.noalias.decl must refer to a null-ptr or an alloca instruction", + Call); + break; + } + case Intrinsic::noalias: { + auto *DA = Call.getArgOperand(Intrinsic::NoAliasNoAliasDeclArg); + if (!isa(DA) && !isa(DA)) { + auto *DeclSite = dyn_cast(DA); + Assert(DeclSite, "llvm.noalias arg1 must refer to a llvm.noalias.decl", + Call); + Assert((DeclSite->getArgOperand(Intrinsic::NoAliasDeclObjIdArg) == + Call.getArgOperand(Intrinsic::NoAliasIdentifyPObjIdArg)), + "llvm.noalias objId arg must match with llvm.noalias.decl", Call); + Assert((DeclSite->getArgOperand(Intrinsic::NoAliasDeclScopeArg) == + Call.getArgOperand(Intrinsic::NoAliasScopeArg)), + "llvm.noalias scope arg must match with llvm.noalias.decl", Call); + } + break; + } + case Intrinsic::side_noalias: { + auto *DA = Call.getArgOperand(Intrinsic::SideNoAliasNoAliasDeclArg); + if (!isa(DA) && !isa(DA)) { + // JumpThreading can duplicate llvm.noalias.decl. + // I do not think we should prohibit that. + SmallVector NoAliasDeclarations; + { + DenseSet Handled; + SmallVector Worklist = {DA}; + while (!Worklist.empty()) { + auto *V = Worklist.pop_back_val(); + if (Handled.insert(V).second) { + if (isa(V)) { + NoAliasDeclarations.push_back(cast(V)); + } else if (PHINode *PHI = cast(V)) { + auto OV = PHI->operand_values(); + Worklist.insert(Worklist.end(), OV.begin(), OV.end()); + } else if (isa(V)) { + // ignore undefs + } else if (isa(V)) { + // ignore nullptr + } else { + Assert(false, + "llvm.side.noalias depends on something that is not a " + "llvm.noalias.decl", + Call); + } + } + } + } + + Assert(!NoAliasDeclarations.empty(), + "llvm.side.noalias should depend on a llvm.noalias.decl", Call); + for (auto *CB : NoAliasDeclarations) { + Assert(CB, "llvm.side.noalias arg1 must refer to a llvm.noalias.decl", + Call); + Assert(CB->getIntrinsicID() == Intrinsic::noalias_decl, + "llvm.side.noalias arg1 must refer to a llvm.noalias.decl", + Call); + Assert((CB->getArgOperand(Intrinsic::NoAliasDeclObjIdArg) == + Call.getArgOperand(Intrinsic::SideNoAliasIdentifyPObjIdArg)), + "llvm.side.noalias objId arg must match with llvm.noalias.decl", + Call); + Assert((CB->getArgOperand(Intrinsic::NoAliasDeclScopeArg) == + Call.getArgOperand(Intrinsic::SideNoAliasScopeArg)), + "llvm.side.noalias scope arg must match with llvm.noalias.decl", + Call); + } + } + break; + } }; } Index: llvm/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/lib/Passes/PassBuilder.cpp +++ llvm/lib/Passes/PassBuilder.cpp @@ -110,6 +110,7 @@ #include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h" #include "llvm/Transforms/Scalar/BDCE.h" #include "llvm/Transforms/Scalar/CallSiteSplitting.h" +#include "llvm/Transforms/Scalar/ConnectNoAliasDecl.h" #include "llvm/Transforms/Scalar/ConstantHoisting.h" #include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h" #include "llvm/Transforms/Scalar/DCE.h" @@ -153,6 +154,7 @@ #include "llvm/Transforms/Scalar/NaryReassociate.h" #include "llvm/Transforms/Scalar/NewGVN.h" #include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h" +#include "llvm/Transforms/Scalar/PropagateAndConvertNoAlias.h" #include "llvm/Transforms/Scalar/Reassociate.h" #include "llvm/Transforms/Scalar/RewriteStatepointsForGC.h" #include "llvm/Transforms/Scalar/SCCP.h" Index: llvm/lib/Passes/PassRegistry.def =================================================================== --- llvm/lib/Passes/PassRegistry.def +++ llvm/lib/Passes/PassRegistry.def @@ -164,7 +164,9 @@ FUNCTION_PASS("bounds-checking", BoundsCheckingPass()) FUNCTION_PASS("break-crit-edges", BreakCriticalEdgesPass()) FUNCTION_PASS("callsite-splitting", CallSiteSplittingPass()) +FUNCTION_PASS("connect-noaliasdecl", ConnectNoAliasDeclPass()) FUNCTION_PASS("consthoist", ConstantHoistingPass()) +FUNCTION_PASS("convert-noalias", PropagateAndConvertNoAliasPass()) FUNCTION_PASS("chr", ControlHeightReductionPass()) FUNCTION_PASS("correlated-propagation", CorrelatedValuePropagationPass()) FUNCTION_PASS("dce", DCEPass()) Index: llvm/lib/Transforms/IPO/ArgumentPromotion.cpp =================================================================== --- llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -167,24 +167,26 @@ for (User *U : I->users()) { Instruction *UI = cast(U); Type *SrcTy; - if (LoadInst *L = dyn_cast(UI)) + IndicesVector Indices; + LoadInst *L = dyn_cast(UI); + if (L) SrcTy = L->getType(); else SrcTy = cast(UI)->getSourceElementType(); - IndicesVector Indices; - Indices.reserve(UI->getNumOperands() - 1); - // Since loads will only have a single operand, and GEPs only a single - // non-index operand, this will record direct loads without any indices, - // and gep+loads with the GEP indices. - for (User::op_iterator II = UI->op_begin() + 1, IE = UI->op_end(); - II != IE; ++II) - Indices.push_back(cast(*II)->getSExtValue()); - // GEPs with a single 0 index can be merged with direct loads - if (Indices.size() == 1 && Indices.front() == 0) - Indices.clear(); + if (L == nullptr) { // not a direct load (loads can have 2 operands) + Indices.reserve(UI->getNumOperands() - 1); + // This will record direct loads without any indices, and gep+loads + // with the GEP indices. + for (User::op_iterator II = UI->op_begin() + 1, IE = UI->op_end(); + II != IE; ++II) + Indices.push_back(cast(*II)->getSExtValue()); + // GEPs with a single 0 index can be merged with direct loads + if (Indices.size() == 1 && Indices.front() == 0) + Indices.clear(); + } ArgIndices.insert(std::make_pair(SrcTy, Indices)); LoadInst *OrigLoad; - if (LoadInst *L = dyn_cast(UI)) + if (L) OrigLoad = L; else // Take any load, we will use it only to update Alias Analysis @@ -309,6 +311,7 @@ AAMDNodes AAInfo; OrigLoad->getAAMetadata(AAInfo); newLoad->setAAMetadata(AAInfo); + newLoad->setAAMetadataNoAliasSideChannel(AAInfo); Args.push_back(newLoad); ArgAttrVec.push_back(AttributeSet()); Index: llvm/lib/Transforms/IPO/FunctionAttrs.cpp =================================================================== --- llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -1013,6 +1013,12 @@ case Instruction::Call: case Instruction::Invoke: { CallSite CS(RVI); + if (CS.getIntrinsicID() == Intrinsic::noalias_arg_guard || + CS.getIntrinsicID() == Intrinsic::noalias_copy_guard) { + // Look through a noalias arg/copy guard + FlowsToReturn.insert(RVI->getOperand(0)); + continue; + } Function *Callee = CS.getCalledFunction(); // A call to a node within the SCC is assumed to return null until // proven otherwise Index: llvm/lib/Transforms/IPO/GlobalOpt.cpp =================================================================== --- llvm/lib/Transforms/IPO/GlobalOpt.cpp +++ llvm/lib/Transforms/IPO/GlobalOpt.cpp @@ -211,6 +211,9 @@ Value *V = SI->getValueOperand(); if (isa(V)) { Changed = true; + // skip potential second use from noalias_sidechannel before the erase + if ((UI != E) && (*UI == SI)) + UI++; SI->eraseFromParent(); } else if (Instruction *I = dyn_cast(V)) { if (I->hasOneUse()) @@ -763,6 +766,9 @@ Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV); // If we were able to delete all uses of the loads if (LI->use_empty()) { + // skip potential second use from noalias_sidechannel before the erase + if ((GUI != E) && (*GUI == LI)) + GUI++; LI->eraseFromParent(); Changed = true; } else { @@ -1250,6 +1256,9 @@ // users. for (auto UI = PN->user_begin(), E = PN->user_end(); UI != E;) { Instruction *User = cast(*UI++); + // skip potential second use from noalias_sidechannel + if ((UI != E) && (*UI == User)) + ++UI; RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); } } @@ -1262,6 +1271,8 @@ std::vector > &PHIsToRewrite) { for (auto UI = Load->user_begin(), E = Load->user_end(); UI != E;) { Instruction *User = cast(*UI++); + if ((UI != E) && (*UI == User)) + ++UI; RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite); } @@ -1399,6 +1410,9 @@ // of the per-field globals instead. for (auto UI = GV->user_begin(), E = GV->user_end(); UI != E;) { Instruction *User = cast(*UI++); + // skip potential second use from noalias_sidechannel + if ((UI != E) && (*UI == User)) + ++UI; if (LoadInst *LI = dyn_cast(User)) { RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite); Index: llvm/lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -260,8 +260,16 @@ addInitialAliasAnalysisPasses(FPM); FPM.add(createCFGSimplificationPass()); + FPM.add(createConnectNoAliasDeclPass()); FPM.add(createSROAPass()); FPM.add(createEarlyCSEPass()); + + // Propagate and Convert as early as possible. + // But do it after SROA! + FPM.add(createPropagateAndConvertNoAliasPass()); + if (VerifyOutput) + FPM.add(createVerifierPass()); + FPM.add(createLowerExpectIntrinsicPass()); } @@ -290,8 +298,16 @@ IP.HintThreshold = 325; MPM.add(createFunctionInliningPass(IP)); + MPM.add(createConnectNoAliasDeclPass()); MPM.add(createSROAPass()); MPM.add(createEarlyCSEPass()); // Catch trivial redundancies + + // Propagate and Convert as early as possible. + // But do it after SROA and EarlyCSE ! + MPM.add(createPropagateAndConvertNoAliasPass()); + if (VerifyOutput) + MPM.add(createVerifierPass()); + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs MPM.add(createInstructionCombiningPass()); // Combine silly seq's addExtensionsToPM(EP_Peephole, MPM); @@ -319,9 +335,17 @@ void PassManagerBuilder::addFunctionSimplificationPasses( legacy::PassManagerBase &MPM) { // Start of function pass. + MPM.add(createConnectNoAliasDeclPass()); // Break up aggregate allocas, using SSAUpdater. MPM.add(createSROAPass()); MPM.add(createEarlyCSEPass(true /* Enable mem-ssa. */)); // Catch trivial redundancies + + // Propagate and Convert as early as possible. + // But do it after SROA and EarlyCSE ! + MPM.add(createPropagateAndConvertNoAliasPass()); + if (VerifyOutput) + MPM.add(createVerifierPass()); + if (EnableGVNHoist) MPM.add(createGVNHoistPass()); if (EnableGVNSink) { @@ -402,6 +426,14 @@ // opened up by them. addInstructionCombiningPass(MPM); addExtensionsToPM(EP_Peephole, MPM); + + MPM.add(createConnectNoAliasDeclPass()); + // Propagate and Convert as early as possible. + // But do it after SROA! + MPM.add(createPropagateAndConvertNoAliasPass()); + if (VerifyOutput) + MPM.add(createVerifierPass()); + MPM.add(createJumpThreadingPass()); // Thread jumps MPM.add(createCorrelatedValuePropagationPass()); MPM.add(createDeadStoreEliminationPass()); // Delete dead stores @@ -555,6 +587,12 @@ MPM.add(Inliner); Inliner = nullptr; RunInliner = true; + + // Propagate and Convert as early as possible. + // But do it after SROA! + MPM.add(createPropagateAndConvertNoAliasPass()); + if (VerifyOutput) + MPM.add(createVerifierPass()); } MPM.add(createPostOrderFunctionAttrsLegacyPass()); @@ -873,6 +911,12 @@ if (RunInliner) { PM.add(Inliner); Inliner = nullptr; + + // Propagate and Convert as early as possible. + // But do it after SROA! + PM.add(createPropagateAndConvertNoAliasPass()); + if (VerifyOutput) + PM.add(createVerifierPass()); } PM.add(createPruneEHPass()); // Remove dead EH info. @@ -897,6 +941,12 @@ // Break up allocas PM.add(createSROAPass()); + // Propagate and Convert as early as possible. + // But do it after SROA! + PM.add(createPropagateAndConvertNoAliasPass()); + if (VerifyOutput) + PM.add(createVerifierPass()); + // LTO provides additional opportunities for tailcall elimination due to // link-time inlining, and visibility of nocapture attribute. PM.add(createTailCallEliminationPass()); Index: llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -4060,6 +4060,22 @@ } break; } + case Intrinsic::noalias_decl: { + Value *Op0 = II->getOperand(0); + Value *BaseOp0 = Op0->stripPointerCasts(); + if (Op0 != BaseOp0) { + auto *NewNoAlias = Builder.CreateNoAliasDeclaration( + BaseOp0, II->getOperand(1), II->getOperand(2)); + // bwaahh seems we need to do everything ourselves ? + II->replaceAllUsesWith(NewNoAlias); + Worklist.Remove(II); + II->eraseFromParent(); + MadeIRChange = true; + + return nullptr; + } + break; + } } return visitCallBase(*II); } Index: llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -462,10 +462,20 @@ NewPtr->getType()->getPointerAddressSpace() == AS)) NewPtr = IC.Builder.CreateBitCast(Ptr, NewTy->getPointerTo(AS)); + Value *NewSidePtr = nullptr; + if (LI.hasNoaliasSideChannelOperand()) { + NewSidePtr = IC.Builder.CreateBitCast(LI.getNoaliasSideChannelOperand(), + NewTy->getPointerTo(AS)); + } + LoadInst *NewLoad = IC.Builder.CreateAlignedLoad( NewTy, NewPtr, LI.getAlignment(), LI.isVolatile(), LI.getName() + Suffix); NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); copyMetadataForLoad(*NewLoad, LI); + if (LI.hasNoaliasSideChannelOperand()) { + assert(NewSidePtr); + NewLoad->setNoaliasSideChannelOperand(NewSidePtr); + } return NewLoad; } @@ -481,6 +491,12 @@ SmallVector, 8> MD; SI.getAllMetadata(MD); + Value *NewSidePtr = nullptr; + if (SI.hasNoaliasSideChannelOperand()) { + NewSidePtr = IC.Builder.CreateBitCast(SI.getNoaliasSideChannelOperand(), + V->getType()->getPointerTo(AS)); + } + StoreInst *NewStore = IC.Builder.CreateAlignedStore( V, IC.Builder.CreateBitCast(Ptr, V->getType()->getPointerTo(AS)), SI.getAlignment(), SI.isVolatile()); @@ -521,6 +537,11 @@ } } + if (SI.hasNoaliasSideChannelOperand()) { + assert(NewSidePtr); + NewStore->setNoaliasSideChannelOperand(NewSidePtr); + } + return NewStore; } @@ -602,6 +623,10 @@ // Replace all the stores with stores of the newly loaded value. for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) { auto *SI = cast(*UI++); + // skip the noalias_sidechannel + if ((UI != UE) && (*UI == SI)) + ++UI; + IC.Builder.SetInsertPoint(SI); combineStoreToNewValue(IC, *SI, NewLoad); IC.eraseInstFromFunction(*SI); @@ -687,6 +712,7 @@ AAMDNodes AAMD; LI.getAAMetadata(AAMD); L->setAAMetadata(AAMD); + L->setAAMetadataNoAliasSideChannel(AAMD); V = IC.Builder.CreateInsertValue(V, L, i); } @@ -737,6 +763,7 @@ AAMDNodes AAMD; LI.getAAMetadata(AAMD); L->setAAMetadata(AAMD); + L->setAAMetadataNoAliasSideChannel(AAMD); V = IC.Builder.CreateInsertValue(V, L, i); Offset += EltSize; } @@ -949,6 +976,13 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { Value *Op = LI.getOperand(0); + if (LI.hasNoaliasSideChannelOperand() && + (LI.getNoaliasSideChannelOperand() == LI.getPointerOperand())) { + // degenerated side-channel + LI.removeNoaliasSideChannelOperand(); + return &LI; + } + // Try to canonicalize the loaded type. if (Instruction *Res = combineLoadToOperationType(*this, LI)) return Res; @@ -1219,6 +1253,7 @@ AAMDNodes AAMD; SI.getAAMetadata(AAMD); NS->setAAMetadata(AAMD); + NS->setAAMetadataNoAliasSideChannel(AAMD); } return true; @@ -1269,6 +1304,7 @@ AAMDNodes AAMD; SI.getAAMetadata(AAMD); NS->setAAMetadata(AAMD); + NS->setAAMetadataNoAliasSideChannel(AAMD); Offset += EltSize; } @@ -1351,6 +1387,13 @@ Value *Val = SI.getOperand(0); Value *Ptr = SI.getOperand(1); + if (SI.hasNoaliasSideChannelOperand() && + (SI.getNoaliasSideChannelOperand() == SI.getPointerOperand())) { + // degenerated side-channel + SI.removeNoaliasSideChannelOperand(); + return &SI; + } + // Try to canonicalize the stored type. if (combineStoreToValueType(*this, SI)) return eraseInstFromFunction(SI); @@ -1591,6 +1634,7 @@ if (AATags) { OtherStore->getAAMetadata(AATags, /* Merge = */ true); NewSI->setAAMetadata(AATags); + NewSI->setAAMetadataNoAliasSideChannel(AATags); } // Nuke the old stores. Index: llvm/lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2752,9 +2752,27 @@ AAMDNodes Nodes; L->getAAMetadata(Nodes); NL->setAAMetadata(Nodes); + NL->setAAMetadataNoAliasSideChannel(Nodes); // Returning the load directly will cause the main loop to insert it in - // the wrong spot, so use replaceInstUsesWith(). - return replaceInstUsesWith(EV, NL); + // the wrong spot, so use ReplaceInstUsesWith(). + { + for (auto mdtag : { + LLVMContext::MD_dbg, + LLVMContext::MD_tbaa, + LLVMContext::MD_prof, + LLVMContext::MD_fpmath, + LLVMContext::MD_tbaa_struct, + LLVMContext::MD_invariant_load, + LLVMContext::MD_alias_scope, + LLVMContext::MD_noalias, + LLVMContext::MD_nontemporal, + LLVMContext::MD_mem_parallel_loop_access }) { + if(auto *MD = L->getMetadata(mdtag)) { + NL->setMetadata(mdtag, MD); + } + } + return replaceInstUsesWith(EV, NL); + } } // We could simplify extracts from other values. Note that nested extracts may // already be simplified implicitly by the above: extract (extract (insert) ) Index: llvm/lib/Transforms/Scalar/CMakeLists.txt =================================================================== --- llvm/lib/Transforms/Scalar/CMakeLists.txt +++ llvm/lib/Transforms/Scalar/CMakeLists.txt @@ -3,6 +3,7 @@ AlignmentFromAssumptions.cpp BDCE.cpp CallSiteSplitting.cpp + ConnectNoAliasDecl.cpp ConstantHoisting.cpp ConstantProp.cpp CorrelatedValuePropagation.cpp @@ -56,6 +57,7 @@ NewGVN.cpp PartiallyInlineLibCalls.cpp PlaceSafepoints.cpp + PropagateAndConvertNoAlias.cpp Reassociate.cpp Reg2Mem.cpp RewriteStatepointsForGC.cpp Index: llvm/lib/Transforms/Scalar/ConnectNoAliasDecl.cpp =================================================================== --- /dev/null +++ llvm/lib/Transforms/Scalar/ConnectNoAliasDecl.cpp @@ -0,0 +1,112 @@ +//===- ConnectNoAliasDecl.h -------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This pass connects side.noalias intrinsics to the corresponding +/// llvm.noalias.decl, based on the alloca of the pointer. +/// +//===----------------------------------------------------------------------===// +// +// When the original restrict declaration is not directly available, +// llvm.noalias, llvm.side.noalias and llvm.noalias.copy.guard can be associated +// with an 'unknown' (out of function) noalias scope. After certain +// optimizations, like SROA, inlining, ... it is possible that a +// llvm.noalias.decl is associated to an alloca, to which a llvm.noalias, +// llvm.side.noalias orllvm.noalias.copy.guard intrinsics is also associated. +// When the latter intrinsics are still refering to the 'unknown' scope, we can +// now refine the information by associating the llvm.noalias.decl and its +// information to the other noalias intrinsics that are depending on the same +// alloca. +// +// This pass will connect those llvm.noalias.decl to those +// llvm.noalias,llvm.side.noalias and llvm.noalias.copy.guard. It will also +// propagate the embedded information. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/ConnectNoAliasDecl.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/IR/Dominators.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Utils/Cloning.h" + +using namespace llvm; + +namespace { +class ConnectNoAliasDeclLegacyPass : public FunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + ConnectNoAliasDeclLegacyPass() : FunctionPass(ID) { + initializeConnectNoAliasDeclLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + // FIXME: is all of this valid ? + AU.addPreserved(); + AU.addPreserved(); // FIXME: not sure this is + // valid. It ensures the same pass + // order as if this pass was not + // there + AU.addPreserved(); + AU.addRequiredTransitive(); + AU.setPreservesCFG(); + } + +private: + ConnectNoAliasDeclPass Impl; +}; +} // namespace + +char ConnectNoAliasDeclLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN( + ConnectNoAliasDeclLegacyPass, "connect-noaliasdecl", + "Connect llvm.noalias.decl", false, + false) // to llvm.noalias/llvm.side.noalias/llvm.noalias.copy.guard + // intrinsics +INITIALIZE_PASS_END( + ConnectNoAliasDeclLegacyPass, "connect-noaliasdecl", + "Connect llvm.noalias.decl", false, + false) // to llvm.noalias/llvm.side.noalias/llvm.noalias.copy.guard + // intrinsics + +bool ConnectNoAliasDeclLegacyPass::runOnFunction(Function &F) { + if (skipFunction(F)) + return false; + + return Impl.runImpl(F); +} + +namespace llvm { + +bool ConnectNoAliasDeclPass::runImpl(Function &F) { + return llvm::propagateAndConnectNoAliasDecl(&F); +} + +ConnectNoAliasDeclPass::ConnectNoAliasDeclPass() {} + +PreservedAnalyses ConnectNoAliasDeclPass::run(Function &F, + FunctionAnalysisManager &AM) { + bool Changed = runImpl(F); + + if (!Changed) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserve(); + //?? PA.preserve(); // FIXME: not sure this is valid, + // see above + + return PA; +} + +FunctionPass *createConnectNoAliasDeclPass() { + return new ConnectNoAliasDeclLegacyPass(); +} +} // namespace llvm Index: llvm/lib/Transforms/Scalar/EarlyCSE.cpp =================================================================== --- llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -927,7 +927,17 @@ << '\n'); AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext())); } else - LLVM_DEBUG(dbgs() << "EarlyCSE skipping assumption: " << *Inst << '\n'); + LLVM_DEBUG(dbgs() << "EarlyCSE skipping intrinsic: " << *Inst << '\n'); + continue; + } + + // Likewise, noalias intrinsics don't actually write. + if (match(Inst, m_Intrinsic()) || + match(Inst, m_Intrinsic()) || + match(Inst, m_Intrinsic()) || + match(Inst, m_Intrinsic())) { + LLVM_DEBUG(dbgs() << "EarlyCSE skipping noalias intrinsic: " << *Inst + << '\n'); continue; } Index: llvm/lib/Transforms/Scalar/GVN.cpp =================================================================== --- llvm/lib/Transforms/Scalar/GVN.cpp +++ llvm/lib/Transforms/Scalar/GVN.cpp @@ -1252,6 +1252,8 @@ LI->getAAMetadata(Tags); if (Tags) NewLoad->setAAMetadata(Tags); + // FIXME: not sure if noalias_sidechannel propagation should be allowed + // here: dependend side channels should also migrate first ! if (auto *MD = LI->getMetadata(LLVMContext::MD_invariant_load)) NewLoad->setMetadata(LLVMContext::MD_invariant_load, MD); Index: llvm/lib/Transforms/Scalar/JumpThreading.cpp =================================================================== --- llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -1477,6 +1477,8 @@ NewVal->setDebugLoc(LoadI->getDebugLoc()); if (AATags) NewVal->setAAMetadata(AATags); + // FIXME: not sure if noalias_sidechannel propagation should be allowed + // here: dependend side channels should also migrate first ! AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal)); } Index: llvm/lib/Transforms/Scalar/LICM.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LICM.cpp +++ llvm/lib/Transforms/Scalar/LICM.cpp @@ -1804,6 +1804,8 @@ NewSI->setDebugLoc(DL); if (AATags) NewSI->setAAMetadata(AATags); + // FIXME: not sure if noalias_sidechannel propagation should be allowed + // here: dependend side channels should also migrate first ! if (MSSAU) { MemoryAccess *MSSAInsertPoint = MSSAInsertPts[i]; @@ -1957,7 +1959,9 @@ if (SomePtr->getType() != ASIV->getType()) return false; - for (User *U : ASIV->users()) { + for (auto U_it = ASIV->user_begin(), U_it_end = ASIV->user_end(); + U_it != U_it_end; ++U_it) { + User *U = *U_it; // Ignore instructions that are outside the loop. Instruction *UI = dyn_cast(U); if (!UI || !CurLoop->contains(UI)) @@ -1966,6 +1970,9 @@ // If there is an non-load/store instruction in the loop, we can't promote // it. if (LoadInst *Load = dyn_cast(UI)) { + if (U_it.getUse().getOperandNo() == + Load->getNoaliasSideChannelOperandIndex()) + continue; if (!Load->isUnordered()) return false; @@ -1988,6 +1995,10 @@ Alignment = std::max(Alignment, InstAlignment); } } else if (const StoreInst *Store = dyn_cast(UI)) { + if (U_it.getUse().getOperandNo() == + Store->getNoaliasSideChannelOperandIndex()) + continue; + // Stores *of* the pointer are not interesting, only stores *to* the // pointer. if (UI->getOperand(1) != ASIV) @@ -2036,8 +2047,18 @@ MaybeAlign(Store->getAlignment()), MDL, Preheader->getTerminator(), DT); } - } else - return false; // Not a load or store. + } else { + // Not a load or store. + if (IntrinsicInst *II = dyn_cast(UI)) { + if (II->getIntrinsicID() == Intrinsic::side_noalias || + II->getIntrinsicID() == Intrinsic::noalias_arg_guard || + II->getIntrinsicID() == Intrinsic::noalias_copy_guard) { + // those must not block promotion. + continue; + } + } + return false; + } // Merge the AA tags. if (LoopUses.empty()) { @@ -2123,6 +2144,8 @@ PreheaderLoad->setDebugLoc(DL); if (AATags) PreheaderLoad->setAAMetadata(AATags); + // FIXME: not sure if noalias_sidechannel propagation should be allowed + // here: dependend side channels should also migrate first ! SSA.AddAvailableValue(Preheader, PreheaderLoad); if (MSSAU) { Index: llvm/lib/Transforms/Scalar/PropagateAndConvertNoAlias.cpp =================================================================== --- /dev/null +++ llvm/lib/Transforms/Scalar/PropagateAndConvertNoAlias.cpp @@ -0,0 +1,1019 @@ +//===- PropagateAndConvertNoAlias.h -----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass moves dependencies on llvm.noalias onto the noalias_sidechannel. +// It also introduces and propagates side.noalias and noalias.arg.guard +// intrinsics. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/PropagateAndConvertNoAlias.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" + +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "convert-noalias" + +namespace { + +class PropagateAndConvertNoAliasLegacyPass : public FunctionPass { +public: + static char ID; + explicit PropagateAndConvertNoAliasLegacyPass() : FunctionPass(ID), Impl() { + initializePropagateAndConvertNoAliasLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnFunction(Function &F) override; + + StringRef getPassName() const override { + return "Propagate and Convert Noalias intrinsics"; + } + +private: + PropagateAndConvertNoAliasPass Impl; +}; +} // namespace + +char PropagateAndConvertNoAliasLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(PropagateAndConvertNoAliasLegacyPass, "convert-noalias", + "Propagate And Convert llvm.noalias intrinsics", false, + false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(PropagateAndConvertNoAliasLegacyPass, "convert-noalias", + "Propagate And Convert llvm.noalias intrinsics", false, + false) + +void PropagateAndConvertNoAliasLegacyPass::getAnalysisUsage( + AnalysisUsage &AU) const { + AU.addPreserved(); + // FIXME: not sure the CallGraphWrapperPass is needed. It ensures the same + // pass order is kept as if the PropagateAndConvertNoAlias pass was not there. + AU.addPreserved(); + AU.addPreserved(); + AU.addRequiredTransitive(); +} + +bool PropagateAndConvertNoAliasLegacyPass::runOnFunction(Function &F) { + if (skipFunction(F)) + return false; + + return Impl.runImpl(F, getAnalysis().getDomTree()); +} + +namespace llvm { + +bool PropagateAndConvertNoAliasPass::runImpl(Function &F, DominatorTree &DT) { + bool Changed = false; + + // find all 'llvm.noalias', 'llvm.noalias.gep' and 'llvm.noalias.arg.guard' + // intrinsics + Changed |= doit(F, DT); + + return Changed; +} + +FunctionPass *createPropagateAndConvertNoAliasPass() { + return new PropagateAndConvertNoAliasLegacyPass(); +} + +PropagateAndConvertNoAliasPass::PropagateAndConvertNoAliasPass() {} + +PreservedAnalyses +PropagateAndConvertNoAliasPass::run(Function &F, FunctionAnalysisManager &AM) { + bool Changed = runImpl(F, AM.getResult(F)); + + if (!Changed) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserve(); + // FIXME: not sure this is valid: + //?? PA.preserve(); // See above + + return PA; +} + +typedef SmallVector SideChannelWorklist; +typedef SmallVector DepsVector; +typedef std::map I2Deps; +typedef SmallSet InstructionSet; + +// Analyse and propagate the instructions that need side channels: +// - InstructionsForSideChannel: instructions that need a side channel +// representation +// - at entry: (A) +// -- llvm.noalias -> llvm.side.noalias +// -- llvm.noalias.arg.guard a, side_a -> side_a +// +// - during propagation: (B) +// -- select a, b, c -> select a, side_b, side_c +// -- PHI a, b,... -> PHI side_a, side_b, ... +// +// - Handled: Instructions that have been investigated. The Deps side refers to +// the side channel dependency. (C) +// -- a nullptr indicates that the normal dependency must be used for that +// operand +// -- an I indictates that the side channel representation of I must be used for +// that operand +// +// The algorithm: +// - We start from the llvm.noalias and llvm.noalias.arg.guard instructions +// - We go over their users, and check if they are special or not +// -- special users need a side channel representation and are annotated as such +// in 'Handled' (non-empty Dep) +// -- normal instructions are a passthrough, and are annotated with an empty Dep +// in 'Handled' (I->{}) +// -- some instructions stop the recursion: +// --- ICmp +// --- first arg of select +// --- llvm.noalias.side.channel, llvm.noalias +// +// After the analysis, 'Handled' contains an overview of all instructions that +// depend on (A) +// - those instructions that were seen, but ignored otherwise have no +// dependencies (I -> {} ) +// - instructions that refer to one ore more side channels have explicit +// dependencies. (I -> { op0, op1, op2, ... }) +// -- if opX == nullptr -> not a real side_channel dependency +// -- if opX == someI : +// ---- if someI points to an instruction in Handled, it must be one of the +// instructions that have a side channel representation +// ---- otherwise, it points to a not-handle plain dependency (coming from a +// noalias.arg.guard) +static void propagateInstructionsForSideChannel( + SideChannelWorklist &InstructionsForSideChannel, I2Deps &Handled, + SideChannelWorklist &out_CreationList, InstructionSet &SideChannelPHIs) { + auto updateMatchingOperands = [](Instruction *U, Instruction *I, + DepsVector &Deps, Instruction *I4SC) { + assert(U->getNumOperands() == Deps.size()); + auto it = Deps.begin(); + for (Value *UOp : U->operands()) { + if (UOp == I) { + assert(*it == nullptr || *it == I4SC); + *it = I4SC; + } + ++it; + } + }; + + while (!InstructionsForSideChannel.empty()) { + Instruction *I4SC = InstructionsForSideChannel.pop_back_val(); + LLVM_DEBUG(llvm::dbgs() + << "-- Propagating side channel instruction: " << *I4SC << "\n"); + SmallVector WorkList = {I4SC}; + if (auto *CB = dyn_cast(I4SC)) { + if (CB->getIntrinsicID() == Intrinsic::noalias_arg_guard) { + // llvm.noalias.arg.guard: delegate to side_channel (operand 1) + Handled.insert(I2Deps::value_type(I4SC, {})); + // no need to add to out_CreationList + + assert(!isa(I4SC->getOperand(0)) && + !isa(I4SC->getOperand(1)) && + "Degenerated case must have been resolved already"); + assert(I4SC->getOperand(0) != I4SC->getOperand(1) && + "Degenerated case must have been resolved already"); + + I4SC = dyn_cast(I4SC->getOperand(1)); + if (I4SC == nullptr) { + // Sidechannel became a constant ? Then the arg guard is not needed + // any more and there is nothing to propagate + continue; + } + } + } + while (!WorkList.empty()) { + Instruction *I = WorkList.pop_back_val(); + LLVM_DEBUG(llvm::dbgs() << "-- checking:" << *I << "\n"); + bool isPtrToInt = isa(I); + for (auto &UOp : I->uses()) { + auto *U_ = UOp.getUser(); + LLVM_DEBUG(llvm::dbgs() << "--- used by:" << *U_ + << ", operand:" << UOp.getOperandNo() << "\n"); + Instruction *U = dyn_cast(U_); + if (U == nullptr) + continue; + + // Only see through a ptr2int if it used by a int2ptr + if (isPtrToInt && !isa(U)) + continue; + + if (isa(U)) { + // ======================================== select -> { lhs, rhs } + bool MatchesOp1 = (U->getOperand(1) == I); + bool MatchesOp2 = (U->getOperand(2) == I); + + if (MatchesOp1 || MatchesOp2) { + auto HI = Handled.insert(I2Deps::value_type(U, {nullptr, nullptr})); + if (HI.second) + out_CreationList.push_back(U); + if (MatchesOp1) { + HI.first->second[0] = I4SC; + } + if (MatchesOp2) { + HI.first->second[1] = I4SC; + } + if (HI.second) { + InstructionsForSideChannel.push_back(U); + } + } + } else if (isa(U)) { + // ======================================== load -> { ptr } + if (UOp.getOperandNo() == + LoadInst::getNoaliasSideChannelOperandIndex()) + continue; // tracking on side channel -> ignore + + auto HI = Handled.insert(I2Deps::value_type(U, {I4SC})); + if (HI.second) + out_CreationList.push_back(U); + assert(U->getOperand(0) == I); + if (HI.second) { + // continue + } + } else if (isa(U)) { + // ======================================== store -> { val, ptr } + if (UOp.getOperandNo() == + StoreInst::getNoaliasSideChannelOperandIndex()) + continue; // tracking on side channel -> ignore + + // also track if we are storing a restrict annotated pointer value... + // This might provide useful information about 'escaping pointers' + bool MatchesOp0 = (U->getOperand(0) == I); + bool MatchesOp1 = (U->getOperand(1) == I); + + if (MatchesOp0 || MatchesOp1) { + auto HI = Handled.insert(I2Deps::value_type(U, {nullptr, nullptr})); + if (HI.second) + out_CreationList.push_back(U); + if (MatchesOp0) { + HI.first->second[0] = I4SC; + } + if (MatchesOp1) { + HI.first->second[1] = I4SC; + } + } + } else if (isa(U)) { + // ======================================== insertvalue -> { val } + // track for injecting llvm.noalias.arg.guard + assert(U->getOperand(1) == I); + // need to introduce a guard + auto HI = Handled.insert(I2Deps::value_type(U, {I4SC})); + if (HI.second) + out_CreationList.push_back(U); + } else if (isa(U)) { + auto HI = Handled.insert(I2Deps::value_type(U, {I4SC})); + if (HI.second) + out_CreationList.push_back(U); + } else if (isa(U)) { + // ======================================== PHI -> { ..... } + PHINode *PU = cast(U); + auto HI = Handled.insert(I2Deps::value_type(U, {})); + if (HI.second) { + HI.first->second.resize(U->getNumOperands(), nullptr); + if (SideChannelPHIs.count(U) == 0) { + // This is a normal PHI, consider it for propagation + InstructionsForSideChannel.push_back(U); + } + if (U->getNumOperands()) + out_CreationList.push_back(U); + } + updateMatchingOperands(PU, I, HI.first->second, I4SC); + } else if (auto *CS = dyn_cast(U)) { + // =============================== call/invoke/intrinsic -> { ...... } + + // NOTES: + // - we always block at a call... + // - the known intrinsics should not have any extra annotations + switch (CS->getIntrinsicID()) { + case Intrinsic::side_noalias: + case Intrinsic::noalias: { + bool MatchesOp0 = (U->getOperand(0) == I); + bool MatchesOpP = + (U->getOperand(Intrinsic::NoAliasIdentifyPArg) == I); + static_assert(Intrinsic::NoAliasIdentifyPArg == + Intrinsic::SideNoAliasIdentifyPArg, + "those must be identical"); + + if (MatchesOp0 || MatchesOpP) { + auto HI = + Handled.insert(I2Deps::value_type(U, {nullptr, nullptr})); + if (HI.second) + out_CreationList.push_back(U); + if (MatchesOp0) { + HI.first->second[0] = I4SC; + } + if (MatchesOpP) { + HI.first->second[1] = I4SC; + } + } + continue; + } + case Intrinsic::noalias_arg_guard: { + // ignore - should be handled by the outer loop ! + continue; + } + + default: + break; + } + // if we get here, we need to inject guards for certain arguments. + // Track which arguments will need one. + auto HI = Handled.insert(I2Deps::value_type(U, {})); + if (HI.second) { + HI.first->second.resize(U->getNumOperands(), nullptr); + if (U->getNumOperands()) { + out_CreationList.push_back(U); + } + } + updateMatchingOperands(U, I, HI.first->second, I4SC); + if (I == CS->getReturnedArgOperand()) { + // also see through call - this does not omit the need of + // introducing a noalias_arg_guard + WorkList.push_back(U); + } + } else { + // ======================================== other -> {} + // this is the generic case... not sure if we should have a elaborate + // check for 'all other instructions'. just acknowledge that we saw it + // and propagate to any users NOTE: if we happen have already handled + // it, this might indicate something interesting that we should handle + // separately + + switch (U->getOpcode()) { + case Instruction::ICmp: + // restrict pointer used in comparison - do not propagate + // sidechannel + continue; + default: + break; + } + + auto HI = Handled.insert(I2Deps::value_type(U, {})); + // No need to add to out_CreationList + if (!HI.second) { + llvm::errs() + << "WARNING: found an instruction that was already handled:" + << *U << "\n"; + assert(!HI.second && + "We should not encounter a handled instruction ??"); + } + + if (HI.second) { + WorkList.push_back(U); + } + } + } + } + } +} + +typedef SmallDenseMap, Value *, 16> + ValueType2CastMap; +static Value *createBitOrPointerOrAddrSpaceCast(Value *V, Type *T, + ValueType2CastMap &VT2C) { + if (V->getType() == T) + return V; + + // Make sure we remember what casts we introduced + Value *&Entry = VT2C[std::make_pair(V, T)]; + if (Entry == nullptr) { + Instruction *InsertionPoint = cast(V); + if (auto *PHI = dyn_cast(V)) { + InsertionPoint = PHI->getParent()->getFirstNonPHI(); + } else { + InsertionPoint = InsertionPoint->getNextNode(); + } + + IRBuilder<> Builder(InsertionPoint); + Entry = Builder.CreateBitOrPointerCast(V, T); + } + return Entry; +} + +// combine llvm.side.noalias intrinsics as much as possible +void collapseSideNoalias(SideChannelWorklist &CollapseableSideNoaliasIntrinsics, + DominatorTree &DT) { + if (CollapseableSideNoaliasIntrinsics.empty()) + return; + + if (!CollapseableSideNoaliasIntrinsics.empty()) { + // sweep from back to front, then from front to back etc... until no + // modifications are done + do { + LLVM_DEBUG(llvm::dbgs() << "- Trying to collapse llvm.side.noalias\n"); + SideChannelWorklist NextList; + bool Changed = false; + + // 1) side_noaliasA (side_noaliasB (....), ...) -> side_noaliasB(...) + while (!CollapseableSideNoaliasIntrinsics.empty()) { + IntrinsicInst *I = + cast(CollapseableSideNoaliasIntrinsics.back()); + assert(I->getIntrinsicID() == Intrinsic::side_noalias); + + CollapseableSideNoaliasIntrinsics.pop_back(); + + // side_noalias (side_noalias(....), .... ) -> side_noalias(....) + if (IntrinsicInst *DepI = dyn_cast(I->getOperand(0))) { + // Check if the depending intrinsic is compatible) + if (DepI->getIntrinsicID() == Intrinsic::side_noalias && + areSideNoaliasCompatible(DepI, I)) { + // similar enough - look through + LLVM_DEBUG(llvm::dbgs() << "-- Collapsing(1):" << *I << "\n"); + I->replaceAllUsesWith(DepI); + I->eraseFromParent(); + Changed = true; + continue; + } + } + //@ FIXME: TODO: + // side_noalias(PHI (fum, self)) -> PHI(side_noalias(fum), phiselfref) + NextList.push_back(I); + } + + // 2) side_noaliasA (...), side_noaliasB(...) --> side_noaliasA(...) + { + for (Instruction *I : NextList) { + IntrinsicInst *II = cast(I); + Instruction *DominatingUse = II; + + SideChannelWorklist similarSideChannels; + for (User *U : II->getOperand(0)->users()) { + if (IntrinsicInst *UII = dyn_cast(U)) { + if (UII->getParent() && // still valid - ignore already removed + // instructions + UII->getIntrinsicID() == Intrinsic::side_noalias && + areSideNoaliasCompatible(II, UII)) { + similarSideChannels.push_back(UII); + if (DT.dominates(UII, DominatingUse)) + DominatingUse = UII; + } + } + } + + for (Instruction *SI : similarSideChannels) { + if ((SI != DominatingUse) && DT.dominates(DominatingUse, SI)) { + LLVM_DEBUG(llvm::dbgs() << "-- Collapsing(2):" << *SI << "\n"); + Changed = true; + SI->replaceAllUsesWith(DominatingUse); + SI->removeFromParent(); // do not yet erase ! + } + } + } + + if (!Changed) + break; + + // Now eliminate all removed intrinsics + llvm::erase_if(NextList, [](Instruction *I) { + if (I->getParent()) { + return false; + } else { + I->deleteValue(); + return true; + } + }); + } + + CollapseableSideNoaliasIntrinsics = NextList; + } while (CollapseableSideNoaliasIntrinsics.size() > 1); + } +} + + +// Look at users of llvm.side.noalias to find PHI nodes that are used as a side +// channel +static void deduceSideChannelPHIs(SideChannelWorklist &SideNoAliasIntrinsics, + InstructionSet &out_SideChannelPHIs) { + LLVM_DEBUG(llvm::dbgs() << "-- Looking up up sidechannel PHI nodes\n"); + for (Instruction *SNI : SideNoAliasIntrinsics) { + SideChannelWorklist worklist = {SNI}; + while (!worklist.empty()) { + Instruction *worker = worklist.pop_back_val(); + for (auto *SNIUser_ : worker->users()) { + Instruction *SNIUser = dyn_cast(SNIUser_); + if (SNIUser == nullptr) + continue; + + if (isa(SNIUser)) { + // Identify as a sidechannel PHI + if (out_SideChannelPHIs.insert(cast(SNIUser)).second) { + LLVM_DEBUG(llvm::dbgs() << "--- " << *SNIUser << "\n"); + // and propagate + worklist.push_back(SNIUser); + } + } else if (isa(SNIUser) || isa(SNIUser) || + isa(SNIUser)) { + // look through select/bitcast/addressspacecast + worklist.push_back(SNIUser); + } else { + // load/store/side.noalias/arg.guard -> stop looking + } + } + } + } +} + +bool PropagateAndConvertNoAliasPass::doit(Function &F, DominatorTree &DT) { + LLVM_DEBUG(llvm::dbgs() << "PropagateAndConvertNoAliasPass:\n"); + LLVM_DEBUG(llvm::dbgs() << "- gathering intrinsics, stores, loads:\n"); + + // PHASE 0: find interesting instructions + // - Find all: + // -- Propagatable noalias intrinsics + // -- Load instructions + // -- Store instructions + SideChannelWorklist InstructionsForSideChannel; + SideChannelWorklist LoadStoreIntrinsicInstructions; + SideChannelWorklist LookThroughIntrinsics; + SideChannelWorklist CollapseableSideNoaliasIntrinsics; + ValueType2CastMap VT2C; + InstructionSet SideChannelPHIs; + SideChannelWorklist DegeneratedNoAliasAndNoAliasArgGuards; + + for (auto &BB : F) { + for (auto &I : BB) { + if (auto CB = dyn_cast(&I)) { + auto ID = CB->getIntrinsicID(); + if (ID == Intrinsic::noalias || ID == Intrinsic::noalias_arg_guard) { + LLVM_DEBUG(llvm::dbgs() << "-- found intrinsic:" << I << "\n"); + auto Op0 = I.getOperand(0); + auto Op1 = I.getOperand(1); + if (isa(Op0) || + ((ID == Intrinsic::noalias_arg_guard) && + ((Op0 == Op1) || isa(Op1)))) { + LLVM_DEBUG(llvm::dbgs() << "--- degenerated\n"); + DegeneratedNoAliasAndNoAliasArgGuards.push_back(&I); + } else { + InstructionsForSideChannel.push_back(&I); + LoadStoreIntrinsicInstructions.push_back(&I); + LookThroughIntrinsics.push_back(&I); + } + } else if (ID == Intrinsic::side_noalias) { + CollapseableSideNoaliasIntrinsics.push_back(&I); + } + } else if (auto LI = dyn_cast(&I)) { + LLVM_DEBUG(llvm::dbgs() << "-- found load:" << I << "\n"); + LoadStoreIntrinsicInstructions.push_back(LI); + } else if (auto SI = dyn_cast(&I)) { + LLVM_DEBUG(llvm::dbgs() << "-- found store:" << I << "\n"); + LoadStoreIntrinsicInstructions.push_back(SI); + } + } + } + + // When there are no noalias related intrinsics, don't do anything. + if (DegeneratedNoAliasAndNoAliasArgGuards.empty() && + InstructionsForSideChannel.empty() && + CollapseableSideNoaliasIntrinsics.empty()) { + LLVM_DEBUG(llvm::dbgs() << "- Nothing to do\n"); + return false; + } + + LLVM_DEBUG( + llvm::dbgs() << "- Looking through degenerated llvm.noalias.arg.guard\n"); + for (Instruction *I : DegeneratedNoAliasAndNoAliasArgGuards) { + I->replaceAllUsesWith(I->getOperand(0)); + I->eraseFromParent(); + } + LLVM_DEBUG(llvm::dbgs() << "- Find out what to do:\n"); + + deduceSideChannelPHIs(CollapseableSideNoaliasIntrinsics, SideChannelPHIs); + + // PHASE 1: forward pass: + // - Start with all intrinsics + // -- Track all users + // -- Interesting users (noalias intrinsics, select, PHI, load/store) + // -- Do this recursively for users that we can look through + I2Deps Handled; // instruction -> { dependencies } + SideChannelWorklist + CreationList; // Tracks all keys in Handled, but in a reproducable way + propagateInstructionsForSideChannel(InstructionsForSideChannel, Handled, + CreationList, SideChannelPHIs); + + // PHASE 2: add missing load/store/intrinsic instructions: + for (auto *I : LoadStoreIntrinsicInstructions) { + if (isa(I)) { + if (Handled.insert(I2Deps::value_type(I, {nullptr})).second) + CreationList.push_back(I); + } else { // Store or llvm.no_alias + if (Handled.insert(I2Deps::value_type(I, {nullptr, nullptr})).second) + CreationList.push_back(I); + } + } + +#if !defined(NDEBUG) + auto dumpit = [](I2Deps::value_type &H) { + auto &out = llvm::dbgs(); + out << *H.first << " -> {"; + bool comma = false; + for (auto D : H.second) { + if (comma) + out << ","; + comma = true; + if (D == nullptr) { + out << "nullptr"; + } else { + out << *D; + } + } + out << "}\n"; + }; +#endif + + // PHASE 3: reconstruct alternative tree + // - detected dependencies: replace them by new instructions + // - undetected dependencies: use the original dependency + // NOTE: See explanation in propagateInstructionsForSideChannel for more + // information ! + LLVM_DEBUG(llvm::dbgs() << "- Reconstructing tree:\n"); + + SideChannelWorklist UnresolvedPHI; + SmallDenseMap I2NewV; + SmallDenseMap I2ArgGuard; + + auto getNewIOrOperand = [&](Instruction *DepOp, Value *OrigOp) { + assert(((!DepOp) || I2NewV.count(DepOp)) && + "DepOp should be known"); + return DepOp ? static_cast(I2NewV[DepOp]) : OrigOp; + }; + + // We are doing a number of sweeps. This should always end. Normally the + // amount of sweeps is low. During initial development, a number of bugs where + // found by putting a hard limit on the the amount. + unsigned Watchdog = 1000000; // Only used in assertions build + (void)Watchdog; + for (auto CloneableInst : CreationList) { + assert(Handled.count(CloneableInst) && + "Entries in CreationList must also be in Handled"); + assert(!Handled[CloneableInst].empty() && + "Only non-empty items should be added to the CreationList"); + + LLVM_DEBUG(llvm::dbgs() << "- "; dumpit(*Handled.find(CloneableInst))); + SideChannelWorklist Worklist = {CloneableInst}; + + while (!Worklist.empty()) { + Instruction *I = Worklist.back(); + + if (I2NewV.count(I)) { + // already exists - skip + Worklist.pop_back(); + continue; + } + + LLVM_DEBUG(llvm::dbgs() << "-- Reconstructing:" << *I << "\n"); + + // Check if we have all the needed arguments + auto HandledIt = Handled.find(I); + if (HandledIt == Handled.end()) { + // This can happen after propagation of a llvm.noalias.arg.guard + Worklist.pop_back(); + I2NewV[I] = I; + LLVM_DEBUG(llvm::dbgs() << "--- Connected to an existing path!\n"); + continue; + } + + // If we are a PHI node, just create it + if (isa(I)) { + if (SideChannelPHIs.count(cast(I)) == 0) { + // But only if it is _not_ a sidechannel PHI node + // ======================================== PHI -> { ..... } + IRBuilder<> Builder(I); + I2NewV[I] = Builder.CreatePHI(I->getType(), I->getNumOperands(), + Twine("side.") + I->getName()); + + UnresolvedPHI.push_back(I); + } else { + I2NewV[I] = I; // Map already existing SideChannel PHI to itself + } + Worklist.pop_back(); + continue; + } + + LLVM_DEBUG(llvm::dbgs() << "--- "; dumpit(*HandledIt)); + auto &Deps = HandledIt->second; + assert((!Deps.empty()) && + "Any creatable instruction must have some dependent operands"); + bool canCreateInstruction = true; + for (auto *DepOp : Deps) { + if (DepOp != nullptr) { + if (I2NewV.count(DepOp) == 0) { + canCreateInstruction = false; + Worklist.push_back(DepOp); + } + } + } +#if !defined(NDEBUG) + if (--Watchdog == 0) { + llvm::errs() + << "PropagateAndConvertNoAlias: ERROR: WATCHDOG TRIGGERED !\n"; + assert(false && "PropagateAndConvertNoAlias: WATCHDOG TRIGGERED"); + } +#endif + if (canCreateInstruction) { + Worklist.pop_back(); + IRBuilder<> Builder(I); + + if (isa(I)) { + // ======================================== select -> { lhs, rhs } + I2NewV[I] = Builder.CreateSelect( + I->getOperand(0), + createBitOrPointerOrAddrSpaceCast( + getNewIOrOperand(Deps[0], I->getOperand(1)), I->getType(), + VT2C), + createBitOrPointerOrAddrSpaceCast( + getNewIOrOperand(Deps[1], I->getOperand(2)), I->getType(), + VT2C), + Twine("side.") + I->getName()); + } else if (isa(I)) { + // ======================================== load -> { ptr } + LoadInst *LI = cast(I); + + if (Deps[0]) { + if (!LI->hasNoaliasSideChannelOperand() || + isa(LI->getNoaliasSideChannelOperand()) || + (LI->getPointerOperand() == + LI->getNoaliasSideChannelOperand())) { + LI->setNoaliasSideChannelOperand( + createBitOrPointerOrAddrSpaceCast( + I2NewV[Deps[0]], LI->getPointerOperandType(), VT2C)); + } else { + // nothing to do - propagation should have happend through the + // side channel ! + // TODO: we might want to add an extra check that the load + // sidechannel was updated + } + } else { + if (!LI->hasNoaliasSideChannelOperand()) { + LI->setNoaliasSideChannelOperand( + UndefValue::get(LI->getPointerOperandType())); + } else { + // has alrady a side channel - nothing to merge + } + } + I2NewV[I] = I; + } else if (isa(I)) { + // ======================================== store -> { val, ptr } + StoreInst *SI = cast(I); + + if (Deps[0]) { + // We try to store a restrict pointer - restrictness + // FIXME: introduce a llvm.noalias.arg.guard + // although escape analysis is already handling it + } + if (Deps[1]) { + if (!SI->hasNoaliasSideChannelOperand() || + isa(SI->getNoaliasSideChannelOperand()) || + (SI->getPointerOperand() == + SI->getNoaliasSideChannelOperand())) { + SI->setNoaliasSideChannelOperand( + createBitOrPointerOrAddrSpaceCast( + I2NewV[Deps[1]], SI->getPointerOperandType(), VT2C)); + } else { + // nothing to do - propagation should have happend through the + // side channel ! + // TODO: we might want to add an extra check that the store + // sidechannel was updated + } + } else { + if (!SI->hasNoaliasSideChannelOperand()) { + SI->setNoaliasSideChannelOperand( + UndefValue::get(SI->getPointerOperandType())); + } else { + // has already a side channel - nothing to merge + } + } + I2NewV[I] = I; + } else if (isa(I)) { + // We try to insert a restrict pointer into a struct - track it. + // Track generated noalias_arg_guard also in I2NewI + assert(Deps.size() == 1 && + "InsertValue tracks exactly one dependency"); + Instruction *DepOp = Deps[0]; + auto *SideOp = cast(I2NewV[DepOp]); + // if we get here, the operand has to be an 'Instruction' (otherwise, + // DepOp would not be set). + auto *OpI = cast(I->getOperand(1)); + auto &ArgGuard = I2ArgGuard[OpI]; + if (ArgGuard == nullptr) { + // create the instruction close to the origin, so that we don't + // introduce bad dependencies + auto InsertionPointIt = OpI->getIterator(); + ++InsertionPointIt; + if (isa(OpI)) { + auto End = OpI->getParent()->end(); + while (InsertionPointIt != End) { + if (!isa(*InsertionPointIt)) + break; + ++InsertionPointIt; + } + } + IRBuilder<> BuilderForArgs(OpI->getParent(), InsertionPointIt); + ArgGuard = BuilderForArgs.CreateNoAliasArgGuard( + OpI, + createBitOrPointerOrAddrSpaceCast(SideOp, OpI->getType(), VT2C), + OpI->getName() + ".guard"); + } + I->setOperand(1, ArgGuard); + } else { + // =============================== ret -> { ...... } + // =============================== call/invoke/intrinsic -> { ...... } + auto CB = dyn_cast(I); + if (CB) { + assert(CB && "If we get here, we should have a Call"); + switch (CB->getIntrinsicID()) { + case Intrinsic::noalias: { + // convert + assert(Deps.size() == 2); + Value *IdentifyPSideChannel; + if (Deps[1]) { + // do the same as with the noalias_sidechannel in the load + // instruction + IdentifyPSideChannel = createBitOrPointerOrAddrSpaceCast( + I2NewV[Deps[1]], + I->getOperand(Intrinsic::NoAliasIdentifyPArg)->getType(), + VT2C); + } else { + IdentifyPSideChannel = UndefValue::get( + I->getOperand(Intrinsic::NoAliasIdentifyPArg)->getType()); + } + Instruction *NewI = Builder.CreateSideNoAliasPlain( + getNewIOrOperand(Deps[0], I->getOperand(0)), + I->getOperand(Intrinsic::NoAliasNoAliasDeclArg), + I->getOperand(Intrinsic::NoAliasIdentifyPArg), + IdentifyPSideChannel, + I->getOperand(Intrinsic::NoAliasIdentifyPObjIdArg), + I->getOperand(Intrinsic::NoAliasScopeArg)); + I2NewV[I] = NewI; + CollapseableSideNoaliasIntrinsics.push_back(NewI); + + // Copy over metadata that is related to the 'getOperand(1)' (aka + // P) + AAMDNodes AAMetadata; + I->getAAMetadata(AAMetadata); + NewI->setAAMetadata(AAMetadata); + continue; + } + case Intrinsic::noalias_arg_guard: { + // no update needed - depending llvm.side_noalias/gep must have + // been updated + continue; + } + case Intrinsic::side_noalias: { + // update + assert((Deps[0] || Deps[1]) && + "side_noalias update needs a depending operand"); + if (Deps[0]) + I->setOperand(0, createBitOrPointerOrAddrSpaceCast( + I2NewV[Deps[0]], I->getType(), VT2C)); + if (Deps[1]) + I->setOperand( + Intrinsic::SideNoAliasIdentifyPSideChannelArg, + createBitOrPointerOrAddrSpaceCast( + I2NewV[Deps[1]], + I->getOperand(Intrinsic::SideNoAliasIdentifyPArg) + ->getType(), + VT2C)); + I2NewV[I] = I; + continue; + } + default: + break; + } + } else { + assert(isa(I)); + } + + // Introduce a noalias_arg_guard for every argument that is + // annotated + assert(I->getNumOperands() == Deps.size()); + for (unsigned i = 0, ci = I->getNumOperands(); i < ci; ++i) { + Instruction *DepOp = Deps[i]; + if (DepOp) { + // Track generated noalias_arg_guard also in I2NewI + auto *SideOp = cast(I2NewV[DepOp]); + // if we get here, the operand has to be an 'Instruction' + // (otherwise, DepOp would not be set). + auto *OpI = cast(I->getOperand(i)); + auto &ArgGuard = I2ArgGuard[OpI]; + if (ArgGuard == nullptr) { + // create the instruction close to the origin, so that we don't + // introduce bad dependencies + auto InsertionPointIt = OpI->getIterator(); + ++InsertionPointIt; + if (isa(OpI)) { + auto End = OpI->getParent()->end(); + while (InsertionPointIt != End) { + if (!isa(*InsertionPointIt)) + break; + ++InsertionPointIt; + } + } + IRBuilder<> BuilderForArgs(OpI->getParent(), InsertionPointIt); + ArgGuard = BuilderForArgs.CreateNoAliasArgGuard( + OpI, + createBitOrPointerOrAddrSpaceCast(SideOp, OpI->getType(), + VT2C), + OpI->getName() + ".guard"); + } + I->setOperand(i, ArgGuard); + } + } + I2NewV[I] = I; + } + } + } + } + + // Phase 4: resolve the generated PHI nodes + LLVM_DEBUG(llvm::dbgs() << "- Resolving " << UnresolvedPHI.size() + << " PHI nodes\n"); + for (auto *PHI_ : SideChannelPHIs) { + PHINode *PHI = cast(PHI_); + auto it = Handled.find(PHI); + if (it != Handled.end()) { + LLVM_DEBUG(llvm::dbgs() << "-- Orig PHI:" << *PHI << "\n"); + auto &Deps = it->second; + for (unsigned i = 0, ci = Deps.size(); i < ci; ++i) { + LLVM_DEBUG(if (Deps[i]) llvm::dbgs() + << "--- UPDATING:Deps:" << *Deps[i] << "\n"); + Value *IncomingValue = Deps[i] ? I2NewV[Deps[i]] : nullptr; + if (IncomingValue) { + if (IncomingValue->getType() != PHI->getType()) { + IncomingValue = createBitOrPointerOrAddrSpaceCast( + IncomingValue, PHI->getType(), VT2C); + } + LLVM_DEBUG(llvm::dbgs() + << "--- IncomingValue:" << *IncomingValue << "\n"); + PHI->setIncomingValue(i, IncomingValue); + } + } + LLVM_DEBUG(llvm::dbgs() << "-- Adapted PHI:" << *PHI << "\n"); + } + } + + for (auto &PHI : UnresolvedPHI) { + PHINode *BasePHI = cast(PHI); + PHINode *NewPHI = cast(I2NewV[PHI]); + auto &Deps = Handled[PHI]; + + LLVM_DEBUG(llvm::dbgs() << "-- Orig PHI:" << *BasePHI << "\n"); + LLVM_DEBUG(llvm::dbgs() << "-- New PHI:" << *NewPHI << "\n"); + LLVM_DEBUG(llvm::dbgs() << "-- Deps: " << Deps.size() << "\n"); + for (unsigned i = 0, ci = BasePHI->getNumOperands(); i < ci; ++i) { + auto *BB = BasePHI->getIncomingBlock(i); + Value *IncomingValue = + Deps[i] ? I2NewV[Deps[i]] : BasePHI->getIncomingValue(i); + if (IncomingValue == nullptr) { + LLVM_DEBUG(llvm::dbgs() + << "--- hmm.. operand " << i << " became undef\n"); + IncomingValue = UndefValue::get(NewPHI->getType()); + } + if (IncomingValue->getType() != NewPHI->getType()) { + IncomingValue = createBitOrPointerOrAddrSpaceCast( + IncomingValue, NewPHI->getType(), VT2C); + } + NewPHI->addIncoming(IncomingValue, BB); + } + } + + // Phase 5: Removing the llvm.noalias + LLVM_DEBUG(llvm::dbgs() << "- Looking through intrinsics:\n"); + for (Instruction *I : LookThroughIntrinsics) { + auto CB = dyn_cast(I); + if (CB->getIntrinsicID() == Intrinsic::noalias || + CB->getIntrinsicID() == Intrinsic::noalias_arg_guard) { + LLVM_DEBUG(llvm::dbgs() << "-- Eliminating: " << *I << "\n"); + I->replaceAllUsesWith(I->getOperand(0)); + I->eraseFromParent(); + } else { + llvm_unreachable("unhandled lookthrough intrinsic"); + } + } + + // Phase 6: Collapse llvm.side.noalias where possible... + // - hmm: should we do this as a complete separate pass ?? + collapseSideNoalias(CollapseableSideNoaliasIntrinsics, DT); + + return true; +} + +} // namespace llvm Index: llvm/lib/Transforms/Scalar/SROA.cpp =================================================================== --- llvm/lib/Transforms/Scalar/SROA.cpp +++ llvm/lib/Transforms/Scalar/SROA.cpp @@ -780,6 +780,13 @@ assert((!LI.isSimple() || LI.getType()->isSingleValueType()) && "All simple FCA loads should have been pre-split"); + if (U->getOperandNo() == LI.getNoaliasSideChannelOperandIndex()) { + // Skip side channel + assert(LI.hasNoaliasSideChannelOperand() && + LI.getNoaliasSideChannelOperand() == *U); + return; + } + if (!IsOffsetKnown) return PI.setAborted(&LI); @@ -792,6 +799,13 @@ } void visitStoreInst(StoreInst &SI) { + if (U->getOperandNo() == SI.getNoaliasSideChannelOperandIndex()) { + // Skip side channel + assert(SI.hasNoaliasSideChannelOperand() && + SI.getNoaliasSideChannelOperand() == *U); + return; + } + Value *ValOp = SI.getValueOperand(); if (ValOp == *U) return PI.setEscapedAndAborted(&SI); @@ -937,6 +951,57 @@ insertUse(II, Offset, Size, true); return; } + // look through noalias intrinsics + if (II.getIntrinsicID() == Intrinsic::noalias_decl) { + insertUse(II, Offset, AllocSize, true); + // do not enqueue direct users (?) They should be handled through a + // dependency on the original alloca + return; + } + if (II.getIntrinsicID() == Intrinsic::noalias) { + if (U->getOperandNo() == Intrinsic::NoAliasIdentifyPArg) { + insertUse(II, Offset, + DL.getTypeStoreSize( + II.getOperand(Intrinsic::NoAliasIdentifyPArg)->getType()), + false); + return; + } + if (U->getOperandNo() == 0) { + assert(II.getOperand(0) == *U); + // _only_ look through the first argument + enqueueUsers(II); + } + return; + } + if (II.getIntrinsicID() == Intrinsic::side_noalias) { + if (U->getOperandNo() == Intrinsic::SideNoAliasIdentifyPArg) { + insertUse( + II, Offset, + DL.getTypeStoreSize( + II.getOperand(Intrinsic::SideNoAliasIdentifyPArg)->getType()), + false); + return; + } + // hmmm - do not look through the first argument for a llvm.side.noalias + return; + } + if (II.getIntrinsicID() == Intrinsic::noalias_arg_guard) { + if (U->getOperandNo() == 0) { + // _only_ look through the first argument + enqueueUsers(II); + } + return; + } + if (II.getIntrinsicID() == Intrinsic::noalias_copy_guard) { + LLVM_DEBUG( + llvm::dbgs() + << "AllocaSlices::SliceBuilder: handling llvm.noalias.copy.guard:" + << (U->getOperandNo() == 0) << ":" << II << "\n"); + // Identify the usage, so that it can be split + if (U->getOperandNo() == 0) + enqueueUsers(II); + return; + } Base::visitIntrinsicInst(II); } @@ -1263,7 +1328,15 @@ LLVM_DEBUG(dbgs() << " original: " << PN << "\n"); LoadInst *SomeLoad = cast(PN.user_back()); - Type *LoadTy = SomeLoad->getType(); + if (SomeLoad->getPointerOperand() != &PN) { + // this must be the side channel -> ignore the speculation for now + LLVM_DEBUG(llvm::dbgs() + << " not speculating dependency on side channel: " + << *SomeLoad << "\n"); + return; + } + + Type *LoadTy = cast(PN.getType())->getElementType(); IRBuilderTy PHIBuilder(&PN); PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(), PN.getName() + ".sroa.speculated"); @@ -2274,6 +2347,9 @@ const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset; Type *NewAllocaTy; + IntrinsicInst *OldNoAliasDecl = nullptr; + IntrinsicInst *NewNoAliasDecl = nullptr; + // This is a convenience and flag variable that will be null unless the new // alloca's integer operations should be widened to this integer type due to // passing isIntegerWideningViable above. If it is non-null, the desired @@ -2344,6 +2420,7 @@ ++NumVectorized; } assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy)); + prepareNoAliasDecl(); } bool visit(AllocaSlices::const_iterator I) { @@ -3063,11 +3140,126 @@ IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile())); if (AATags) Store->setAAMetadata(AATags); - LLVM_DEBUG(dbgs() << " to: " << *Store << "\n"); + LLVM_DEBUG(dbgs() << "(3) to: " << *Store << "\n"); return !II.isVolatile(); } - bool visitIntrinsicInst(IntrinsicInst &II) { + void prepareNoAliasDecl() { + OldNoAliasDecl = nullptr; + NewNoAliasDecl = nullptr; + for (auto U : OldAI.users()) { + IntrinsicInst *II = dyn_cast(U); + if (II && II->getIntrinsicID() == Intrinsic::noalias_decl) { + if (OldNoAliasDecl) { + // We alreay found a llvm.noalias.decl - leave it up to the visiter to + // proapgate + OldNoAliasDecl = nullptr; + NewNoAliasDecl = nullptr; + break; + } + IRB.SetInsertPoint(II); + IRB.SetCurrentDebugLocation(II->getDebugLoc()); + IRB.SetNamePrefix(Twine(NewAI.getName()) + ".noalias.decl."); + + OldNoAliasDecl = II; + LLVM_DEBUG(dbgs() << "Found llvm.noalias.decl: " << *II << "\n"); + ConstantInt *OldId = cast( + II->getArgOperand(Intrinsic::NoAliasDeclObjIdArg)); + NewNoAliasDecl = cast(IRB.CreateNoAliasDeclaration( + &NewAI, NewAllocaBeginOffset + OldId->getZExtValue(), + II->getArgOperand(2))); + LLVM_DEBUG(dbgs() << "New llvm.noalias.decl: " << *NewNoAliasDecl + << "\n"); + // continue - it is possible we see multiple llvm.noalias.decl! + } + } + } + + bool visitNoAliasDeclIntrinsicInst(IntrinsicInst &II) { + assert(II.getIntrinsicID() == Intrinsic::noalias_decl); + LLVM_DEBUG(dbgs() << " original: " << II << "\n"); + Value *New; + if (OldNoAliasDecl) { + assert(OldNoAliasDecl == &II); + assert(NewNoAliasDecl != nullptr); + New = NewNoAliasDecl; + } else { + assert(NewNoAliasDecl == nullptr); + ConstantInt *OldId = + cast(II.getArgOperand(Intrinsic::NoAliasDeclObjIdArg)); + New = cast(IRB.CreateNoAliasDeclaration( + &NewAI, NewAllocaBeginOffset + OldId->getZExtValue(), + II.getArgOperand(2))); + } + (void)New; + LLVM_DEBUG(dbgs() << " to: " << *New << "\n"); + + // Record this instruction for deletion. + Pass.DeadInsts.insert(&II); + + // nothing else to do - preparation was already done + return true; + } + + bool visitSideNoAliasIntrinsicInst(IntrinsicInst &II) { + assert(II.getIntrinsicID() == Intrinsic::side_noalias); + assert(II.getArgOperand(Intrinsic::SideNoAliasIdentifyPArg) == OldPtr); + LLVM_DEBUG(dbgs() << " original: " << II << "\n"); + if (II.getArgOperand(Intrinsic::SideNoAliasNoAliasDeclArg) == + OldNoAliasDecl) { + assert(OldNoAliasDecl && NewNoAliasDecl && + "If we get here, we must have an old and a new llvm.noalias.decl"); + II.setArgOperand(Intrinsic::SideNoAliasNoAliasDeclArg, NewNoAliasDecl); + } + II.setArgOperand( + Intrinsic::SideNoAliasIdentifyPArg, + getNewAllocaSlicePtr( + IRB, + II.getArgOperand(Intrinsic::SideNoAliasIdentifyPArg)->getType())); + if (NewAllocaBeginOffset > 0) { + Value *OldObjIdV = + II.getArgOperand(Intrinsic::SideNoAliasIdentifyPObjIdArg); + auto NewObjId = ConstantInt::get( + OldObjIdV->getType(), + cast(OldObjIdV)->getZExtValue() + NewAllocaBeginOffset); + II.setArgOperand(Intrinsic::SideNoAliasIdentifyPObjIdArg, NewObjId); + } + LLVM_DEBUG(dbgs() << " to: " << II << "\n"); + deleteIfTriviallyDead(OldPtr); + return true; + } + + bool visitNoAliasIntrinsicInst(IntrinsicInst &II) { + assert(II.getIntrinsicID() == Intrinsic::noalias); + assert(II.getArgOperand(Intrinsic::NoAliasIdentifyPArg) == OldPtr); + LLVM_DEBUG(dbgs() << " original: " << II << "\n"); + if (II.getArgOperand(Intrinsic::NoAliasNoAliasDeclArg) == OldNoAliasDecl) { + assert(OldNoAliasDecl && NewNoAliasDecl && + "If we get here, we must have an old and a new llvm.noalias.decl"); + II.setArgOperand(Intrinsic::NoAliasNoAliasDeclArg, NewNoAliasDecl); + } + II.setArgOperand( + Intrinsic::NoAliasIdentifyPArg, + getNewAllocaSlicePtr( + IRB, II.getArgOperand(Intrinsic::NoAliasIdentifyPArg)->getType())); + if (NewAllocaBeginOffset > 0) { + Value *OldObjIdV = II.getArgOperand(Intrinsic::NoAliasIdentifyPObjIdArg); + auto NewObjId = ConstantInt::get( + OldObjIdV->getType(), + cast(OldObjIdV)->getZExtValue() + NewAllocaBeginOffset); + II.setArgOperand(Intrinsic::NoAliasIdentifyPObjIdArg, NewObjId); + } + LLVM_DEBUG(dbgs() << " to: " << II << "\n"); + deleteIfTriviallyDead(OldPtr); + return true; + } + + bool visitNoAliasCopyGuardIntrinsicInst(IntrinsicInst &II) { + assert(II.getIntrinsicID() == Intrinsic::noalias_copy_guard); + return true; + } + + bool visitLifetimeIntrinsicInst(IntrinsicInst &II) { assert(II.isLifetimeStartOrEnd()); LLVM_DEBUG(dbgs() << " original: " << II << "\n"); assert(II.getArgOperand(1) == OldPtr); @@ -3105,6 +3297,25 @@ return true; } + bool visitIntrinsicInst(IntrinsicInst &II) { + switch (II.getIntrinsicID()) { + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + return visitLifetimeIntrinsicInst(II); + case Intrinsic::noalias_decl: + return visitNoAliasDeclIntrinsicInst(II); + case Intrinsic::noalias: + return visitNoAliasIntrinsicInst(II); + case Intrinsic::side_noalias: + return visitSideNoAliasIntrinsicInst(II); + case Intrinsic::noalias_copy_guard: + return visitNoAliasCopyGuardIntrinsicInst(II); + default: + assert(false && "SROA: SliceRewriter: unhandled intrinsic"); + return false; + } + } + void fixLoadStoreAlign(Instruction &Root) { // This algorithm implements the same visitor loop as // hasUnsafePHIOrSelectUse, and fixes the alignment of each load @@ -3204,6 +3415,66 @@ }; namespace { +static llvm::Instruction *introduceNoAliasWhenCopyGuardIndicesAreCompatible( + llvm::LoadInst *Load, llvm::Instruction *CopyGuardII) { + assert(CopyGuardII); + GetElementPtrInst *GEP = + dyn_cast(Load->getPointerOperand()); + assert(GEP && "load of llvm.noalias.copy.guard without GEP ??"); + + bool indicesAreCompatible = false; + MDNode *CopyGuardIndices = cast( + cast( + CopyGuardII->getOperand(Intrinsic::NoAliasCopyGuardIndicesArg)) + ->getMetadata()); + for (const MDOperand &MDOp : CopyGuardIndices->operands()) { + if (const MDNode *IMD = dyn_cast(MDOp)) { + if (IMD->getNumOperands() != GEP->getNumIndices()) { + indicesAreCompatible = false; + } else { + unsigned index = 0; + indicesAreCompatible = true; + for (Value *Index : GEP->indices()) { + const MDOperand &MDIndex = IMD->getOperand(index); + ++index; + ConstantInt *C_lhs = + cast(cast(MDIndex)->getValue()); + if (C_lhs->isMinusOne()) + continue; // accept any index at this place + ConstantInt *C_rhs = dyn_cast(Index); + if ((C_rhs == nullptr) || + (C_lhs->getSExtValue() != + C_rhs->getSExtValue())) { // compare int64 - the ConstantInt can + // have different types + indicesAreCompatible = false; + break; + } + } + } + if (indicesAreCompatible) { + break; + } + } + } + if (indicesAreCompatible) { + IRBuilderTy IRB(Load->getNextNode()); + // A compatible set of indices was found - introduce a noalias intrinsic + // FIXME: what AAMetadata should we put on the llvm.noalias ? + auto NoAlias = IRB.CreateNoAliasPointer( + Load, + CopyGuardII->getOperand(Intrinsic::NoAliasCopyGuardNoAliasDeclArg), GEP, + CopyGuardII->getOperand(Intrinsic::NoAliasCopyGuardScopeArg), + Load->getName() + ".noalias"); + // juggle around + Load->replaceAllUsesWith(NoAlias); + NoAlias->setOperand(0, Load); + LLVM_DEBUG(llvm::dbgs() + << " - compatible, introduced:" << *NoAlias << "\n"); + return NoAlias; + } + + return Load; +} /// Visitor to rewrite aggregate loads and stores as scalar. /// @@ -3213,6 +3484,7 @@ class AggLoadStoreRewriter : public InstVisitor { // Befriend the base class so it can delegate to private visit methods. friend class InstVisitor; + typedef InstVisitor Base; /// Queue of pointer uses to analyze and potentially rewrite. SmallVector Queue; @@ -3347,38 +3619,76 @@ struct LoadOpSplitter : public OpSplitter { AAMDNodes AATags; + Instruction *CopyGuardII = nullptr; + unsigned CGIIndex = 0; LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy, - AAMDNodes AATags, unsigned BaseAlign, const DataLayout &DL) + AAMDNodes AATags, unsigned BaseAlign, const DataLayout &DL, + Instruction *CopyGuardII_) : OpSplitter(InsertionPoint, Ptr, BaseTy, BaseAlign, - DL), AATags(AATags) {} + DL), + AATags(AATags), CopyGuardII(CopyGuardII_) {} /// Emit a leaf load of a single value. This is called at the leaves of the /// recursive emission to actually load values. void emitFunc(Type *Ty, Value *&Agg, unsigned Align, const Twine &Name) { assert(Ty->isSingleValueType()); // Load the single value and insert it using the indices. + auto Ptr = this->Ptr; // Make sure _NOT_ to overwrite the Ptr member + if (CopyGuardII) { + assert(CopyGuardII == Ptr && "Ptr != CopyGuardII ???"); + Ptr = CopyGuardII->getOperand(0); // look through noalias.copy.guard + } Value *GEP = IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep"); - LoadInst *Load = IRB.CreateAlignedLoad(Ty, GEP, Align, Name + ".load"); + Instruction *Load = IRB.CreateAlignedLoad(Ty, GEP, Align, Name + ".load"); if (AATags) Load->setAAMetadata(AATags); + if (CopyGuardII && Ty->isPointerTy()) { + Load = introduceNoAliasWhenCopyGuardIndicesAreCompatible( + cast(Load), CopyGuardII); + } Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert"); LLVM_DEBUG(dbgs() << " to: " << *Load << "\n"); } }; bool visitLoadInst(LoadInst &LI) { + if (U->getOperandNo() == LI.getNoaliasSideChannelOperandIndex()) { + // Skip side channel + assert(LI.hasNoaliasSideChannelOperand() && + LI.getNoaliasSideChannelOperand() == *U); + return false; + } assert(LI.getPointerOperand() == *U); - if (!LI.isSimple() || LI.getType()->isSingleValueType()) + Instruction *CopyGuardII = nullptr; + { + Value *BasePtr = LI.getPointerOperand()->stripInBoundsOffsets(); + if (IntrinsicInst *BaseIntr = dyn_cast(BasePtr)) { + if (BaseIntr->getIntrinsicID() == Intrinsic::noalias_copy_guard) { + CopyGuardII = BaseIntr; + LLVM_DEBUG(llvm::dbgs() << " Replacing Load:" << LI + << "\n" + " Depends on:" + << *CopyGuardII << "\n"); + } + } + } + if (!LI.isSimple() || LI.getType()->isSingleValueType()) { + if (CopyGuardII) { + auto Load = + introduceNoAliasWhenCopyGuardIndicesAreCompatible(&LI, CopyGuardII); + return (Load != &LI); + } return false; + } // We have an aggregate being loaded, split it apart. LLVM_DEBUG(dbgs() << " original: " << LI << "\n"); AAMDNodes AATags; LI.getAAMetadata(AATags); LoadOpSplitter Splitter(&LI, *U, LI.getType(), AATags, - getAdjustedAlignment(&LI, 0, DL), DL); + getAdjustedAlignment(&LI, 0, DL), DL, CopyGuardII); Value *V = UndefValue::get(LI.getType()); Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca"); LI.replaceAllUsesWith(V); @@ -3436,6 +3746,30 @@ return false; } + // Look through noalias intrinsics + bool visitIntrinsicInst(IntrinsicInst &II) { + if (II.getIntrinsicID() == Intrinsic::noalias) { + if (II.getOperand(0) == *U) { + enqueueUsers(II); + } + return false; + } + if (II.getIntrinsicID() == Intrinsic::side_noalias || + II.getIntrinsicID() == Intrinsic::noalias_decl) { + return false; + } + if (II.getIntrinsicID() == Intrinsic::noalias_copy_guard) { + LLVM_DEBUG(llvm::dbgs() + << "AggLoadStoreRewriter: handling llvm.noalias.copy.guard:" + << (II.getOperand(0) == *U) << ":" << II << "\n"); + if (II.getOperand(0) == *U) + enqueueUsers(II); + return false; + } + + return Base::visitIntrinsicInst(II); + } + bool visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) { enqueueUsers(ASC); return false; Index: llvm/lib/Transforms/Scalar/Scalar.cpp =================================================================== --- llvm/lib/Transforms/Scalar/Scalar.cpp +++ llvm/lib/Transforms/Scalar/Scalar.cpp @@ -37,6 +37,7 @@ initializeBDCELegacyPassPass(Registry); initializeAlignmentFromAssumptionsPass(Registry); initializeCallSiteSplittingLegacyPassPass(Registry); + initializeConnectNoAliasDeclLegacyPassPass(Registry); initializeConstantHoistingLegacyPassPass(Registry); initializeConstantPropagationPass(Registry); initializeCorrelatedValuePropagationPass(Registry); @@ -88,6 +89,7 @@ initializeMergedLoadStoreMotionLegacyPassPass(Registry); initializeNaryReassociateLegacyPassPass(Registry); initializePartiallyInlineLibCallsLegacyPassPass(Registry); + initializePropagateAndConvertNoAliasLegacyPassPass(Registry); initializeReassociateLegacyPassPass(Registry); initializeRegToMemPass(Registry); initializeRewriteStatepointsForGCLegacyPassPass(Registry); Index: llvm/lib/Transforms/Utils/CMakeLists.txt =================================================================== --- llvm/lib/Transforms/Utils/CMakeLists.txt +++ llvm/lib/Transforms/Utils/CMakeLists.txt @@ -43,6 +43,7 @@ MisExpect.cpp ModuleUtils.cpp NameAnonGlobals.cpp + NoAliasUtils.cpp PredicateInfo.cpp PromoteMemoryToRegister.cpp StripGCRelocates.cpp Index: llvm/lib/Transforms/Utils/CloneFunction.cpp =================================================================== --- llvm/lib/Transforms/Utils/CloneFunction.cpp +++ llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -36,6 +36,21 @@ #include using namespace llvm; +static void PropagateNoAliasSideChannelInfo(Instruction *To, + const Instruction *From) { + // The noalias_sidechannel is not automatically copied over in a 'clone()' + // Let's do it here. + if (auto *LI = dyn_cast(From)) { + if (LI->hasNoaliasSideChannelOperand()) + cast(To)->setNoaliasSideChannelOperand( + LI->getNoaliasSideChannelOperand()); + } else if (auto SI = dyn_cast(From)) { + if (SI->hasNoaliasSideChannelOperand()) + cast(To)->setNoaliasSideChannelOperand( + SI->getNoaliasSideChannelOperand()); + } +} + /// See comments in Cloning.h. BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix, Function *F, @@ -55,6 +70,8 @@ DIFinder->processInstruction(*TheModule, I); Instruction *NewInst = I.clone(); + PropagateNoAliasSideChannelInfo(NewInst, &I); + if (I.hasName()) NewInst->setName(I.getName() + NameSuffix); NewBB->getInstList().push_back(NewInst); @@ -334,6 +351,7 @@ II != IE; ++II) { Instruction *NewInst = II->clone(); + PropagateNoAliasSideChannelInfo(NewInst, &*II); // Eagerly remap operands to the newly cloned instruction, except for PHI // nodes for which we defer processing until we update the CFG. @@ -861,6 +879,7 @@ // terminator gets replaced and StopAt == BB's terminator. for (; StopAt != &*BI && BB->getTerminator() != &*BI; ++BI) { Instruction *New = BI->clone(); + PropagateNoAliasSideChannelInfo(New, &*BI); New->setName(BI->getName()); New->insertBefore(NewTerm); ValueMapping[&*BI] = New; Index: llvm/lib/Transforms/Utils/InlineFunction.cpp =================================================================== --- llvm/lib/Transforms/Utils/InlineFunction.cpp +++ llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -28,7 +28,6 @@ #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ProfileSummaryInfo.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/Argument.h" @@ -45,6 +44,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -61,6 +61,8 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/NoAliasUtils.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include #include @@ -79,6 +81,11 @@ cl::Hidden, cl::desc("Convert noalias attributes to metadata during inlining.")); +static cl::opt UseNoAliasIntrinsic( + "use-noalias-intrinsic-during-inlining", cl::Hidden, cl::ZeroOrMore, + cl::init(true), + cl::desc("Use the llvm.noalias intrinsic during inlining.")); + static cl::opt PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining", cl::init(true), cl::Hidden, @@ -810,7 +817,8 @@ /// not be differentiated (and this would lead to miscompiles because the /// non-aliasing property communicated by the metadata could have /// call-site-specific control dependencies). -static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) { +static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap, + Function *Caller) { const Function *CalledFunc = CS.getCalledFunction(); SetVector MD; @@ -819,16 +827,69 @@ // inter-procedural alias analysis passes. We can revisit this if it becomes // an efficiency or overhead problem. + // Track function level !noalias metadata ('unknown function' scope). This + // should be merged with the data from the callee + MDNode *CallerNoAlias = Caller->getMetadata("noalias"); + MDNode *CalleeNoAlias = CalledFunc->getMetadata("noalias"); + llvm::MDNode *NewUnknownScope = nullptr; + + if ((CalleeNoAlias != nullptr) && (CallerNoAlias == nullptr)) { + // NOTE: keep in sync with (clang) CGExpr: EmitLoadOfScalar + // NOTE: keep in sync with (clang) CGDecl: EmitAutoVarNoAlias/EmitNoAliasDecl + // NOTE: keep in sync with (llvm) InlineFunction: CloneAliasScopeMetadata + llvm::MDBuilder MDB(Caller->getContext()); + std::string Name = Caller->getName(); + auto NoAliasDomain = MDB.createAnonymousAliasScopeDomain(Name); + Name += ": unknown scope"; + + llvm::MDNode *UnknownScope = + MDB.createAnonymousAliasScope(NoAliasDomain, Name); + + { + SmallVector ScopeListEntries(1, UnknownScope); + CallerNoAlias = llvm::MDNode::get(Caller->getContext(), ScopeListEntries); + Caller->setMetadata("noalias", CallerNoAlias); + } + NewUnknownScope = UnknownScope; + } + for (const BasicBlock &I : *CalledFunc) for (const Instruction &J : I) { if (const MDNode *M = J.getMetadata(LLVMContext::MD_alias_scope)) MD.insert(M); if (const MDNode *M = J.getMetadata(LLVMContext::MD_noalias)) MD.insert(M); + + // We also need to clone the metadata in noalias intrinsics. + if (const auto *II = dyn_cast(&J)) { + if (II->getIntrinsicID() == Intrinsic::noalias) + if (const auto *M = dyn_cast( + cast( + II->getOperand(Intrinsic::NoAliasScopeArg)) + ->getMetadata())) + MD.insert(M); + if (II->getIntrinsicID() == Intrinsic::side_noalias) + if (const auto *M = dyn_cast( + cast( + II->getOperand(Intrinsic::SideNoAliasScopeArg)) + ->getMetadata())) + MD.insert(M); + if (II->getIntrinsicID() == Intrinsic::noalias_decl) + if (const auto *M = dyn_cast( + cast( + II->getOperand(Intrinsic::NoAliasDeclScopeArg)) + ->getMetadata())) + MD.insert(M); + if (II->getIntrinsicID() == Intrinsic::noalias_copy_guard) + if (const auto *M = dyn_cast( + cast( + II->getOperand(Intrinsic::NoAliasCopyGuardScopeArg)) + ->getMetadata())) + MD.insert(M); + } } - if (MD.empty()) - return; + // No early exit: make sure all memory instructions are annotated // Walk the existing metadata, adding the complete (perhaps cyclic) chain to // the set. @@ -845,6 +906,13 @@ // the noalias scopes and the lists of those scopes. SmallVector DummyNodes; DenseMap MDMap; + if (CalleeNoAlias != nullptr) { + // Map CalleeNoAlias onto CallerNoAlias + MD.remove(CalleeNoAlias); + DummyNodes.push_back(MDTuple::getTemporary(CalledFunc->getContext(), None)); + MDMap[CalleeNoAlias].reset(DummyNodes.back().get()); + cast(MDMap[CalleeNoAlias])->replaceAllUsesWith(CallerNoAlias); + } for (const MDNode *I : MD) { DummyNodes.push_back(MDTuple::getTemporary(CalledFunc->getContext(), None)); MDMap[I].reset(DummyNodes.back().get()); @@ -872,6 +940,7 @@ // Now replace the metadata in the new inlined instructions with the // repacements from the map. + SmallPtrSet HandledInstructions; for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); VMI != VMIE; ++VMI) { if (!VMI->second) @@ -881,6 +950,10 @@ if (!NI) continue; + // Check if we already adapted this instruction + if (!HandledInstructions.insert(NI).second) + continue; + if (MDNode *M = NI->getMetadata(LLVMContext::MD_alias_scope)) { MDNode *NewMD = MDMap[M]; // If the call site also had alias scope metadata (a list of scopes to @@ -909,6 +982,150 @@ if (MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_noalias)) NI->setMetadata(LLVMContext::MD_noalias, M); } + + // Update the metadata referenced by a noalias intrinsic + if (auto *II = dyn_cast(NI)) { + auto ID = II->getIntrinsicID(); + if (ID == Intrinsic::noalias || ID == Intrinsic::side_noalias || + ID == Intrinsic::noalias_decl || + ID == Intrinsic::noalias_copy_guard) { + int NoAliasScope = 0; + if (ID == Intrinsic::noalias) + NoAliasScope = Intrinsic::NoAliasScopeArg; + if (ID == Intrinsic::side_noalias) + NoAliasScope = Intrinsic::SideNoAliasScopeArg; + if (ID == Intrinsic::noalias_decl) + NoAliasScope = Intrinsic::NoAliasDeclScopeArg; + if (ID == Intrinsic::noalias_copy_guard) + NoAliasScope = Intrinsic::NoAliasCopyGuardScopeArg; + + if (auto *M = dyn_cast( + cast(II->getOperand(NoAliasScope)) + ->getMetadata())) { + // If the metadata is not in the map, it could be a new intrinsic + // that was just added. + auto MI = MDMap.find(M); + if (MI != MDMap.end()) + II->setOperand( + NoAliasScope, + MetadataAsValue::get(CalledFunc->getContext(), MI->second)); + } + } + } + } + + if (NewUnknownScope) { + // We now need to add the out-of-function scope to _all_ instructions with + // noalias data in the 'caller' + // Note: following strange choice of variables names is similar to how it is + // done later + // FIXME: hmm this might be less than fast :( + // hmm it is also needed to do this _after_ the metadata cloning, otherwise + // we seem to lose information ! + for (BasicBlock &I : *Caller) { + for (Instruction &J : I) { + if (const MDNode *M = J.getMetadata(LLVMContext::MD_noalias)) { + SmallVector NewScopeList; + for (auto &MDOp : M->operands()) { + NewScopeList.push_back(MDOp); + } + NewScopeList.push_back(NewUnknownScope); + J.setMetadata(LLVMContext::MD_noalias, + MDNode::get(Caller->getContext(), NewScopeList)); + } else if (J.mayReadOrWriteMemory()) { + // no Noalias, but we need to add the (new) 'unknown scope' ! + J.setMetadata(LLVMContext::MD_noalias, CallerNoAlias); + } + } + } + } +} + +/// If the inlined function has noalias arguments, +/// then add a new alias scope to instructions that might access memory, and +/// noalias intrinsics corresponding to the noalias arguments. +static void AddNoAliasIntrinsics(CallSite CS, ValueToValueMapTy &VMap, + MDNode *&NewScopeList) { + if (!EnableNoAliasConversion || !UseNoAliasIntrinsic) + return; + + const Function *CalledFunc = CS.getCalledFunction(); + SmallVector NoAliasArgs; + + for (const auto &Arg : CalledFunc->args()) { + if (Arg.hasNoAliasAttr() && !Arg.use_empty()) + NoAliasArgs.push_back(&Arg); + } + + if (NoAliasArgs.empty()) + return; + + MDBuilder MDB(CalledFunc->getContext()); + // Create a new scope domain for this function. + MDNode *NewDomain = + MDB.createAnonymousAliasScopeDomain(CalledFunc->getName()); + + // Create a new scope for each noalias argument. + SmallVector Scopes; + + // For each noalias argument, add a noalias intrinsic call, and update the + // value map to refer to the new result of the noalias call. + for (const Argument *A : NoAliasArgs) { + Value *MappedA = VMap[A]; + if (isa(MappedA)) { + // Skip generating restrict intrinsics for known 'null' pointers + continue; + } + + std::string Name = CalledFunc->getName(); + if (A->hasName()) { + Name += ": %"; + Name += A->getName(); + } else { + Name += ": argument "; + Name += utostr(A->getArgNo()); + } + + MDNode *AScope = MDB.createAnonymousAliasScope(NewDomain, Name); + Scopes.push_back(AScope); + + MDNode *AScopeList = MDNode::get(CalledFunc->getContext(), AScope); + + // The alloca was optimized away -> use a nullptr + auto *IdentifyPAlloca = + ConstantPointerNull::get(MappedA->getType()->getPointerTo()); + auto *NoAliasDecl = + IRBuilder<>(CS.getInstruction()) + .CreateNoAliasDeclaration(IdentifyPAlloca, AScopeList); + Value *NA = IRBuilder<>(CS.getInstruction()) + .CreateNoAliasPointer(MappedA, NoAliasDecl, IdentifyPAlloca, + AScopeList); + VMap[A] = NA; + } + + NewScopeList = MDNode::get(CalledFunc->getContext(), Scopes); +} + +static void AddNoAliasIntrinsicsScope(CallSite CS, ValueToValueMapTy &VMap, + MDNode *NewScopeList) { + if (!EnableNoAliasConversion || !UseNoAliasIntrinsic || !NewScopeList) + return; + + // Iterate over all new instructions in the map; for all memory-access + // instructions, add the alias scope metadata. + for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end(); + VMI != VMIE; ++VMI) { + if (!isa(VMI->first) || !VMI->second) + continue; + + auto *NI = dyn_cast(VMI->second); + if (!NI || !NI->mayReadOrWriteMemory()) + continue; + + NI->setMetadata( + LLVMContext::MD_noalias, + MDNode::concatenate(NI->getMetadata(LLVMContext::MD_noalias), + NewScopeList)); } } @@ -918,7 +1135,7 @@ /// non-derived loads, stores and memory intrinsics with the new alias scopes. static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap, const DataLayout &DL, AAResults *CalleeAAR) { - if (!EnableNoAliasConversion) + if (!EnableNoAliasConversion || UseNoAliasIntrinsic) return; const Function *CalledFunc = CS.getCalledFunction(); @@ -1676,6 +1893,7 @@ ValueToValueMapTy VMap; // Keep a list of pair (dst, src) to emit byval initializations. SmallVector, 4> ByValInit; + MDNode *NAScopeList = nullptr; auto &DL = Caller->getParent()->getDataLayout(); @@ -1706,6 +1924,9 @@ // check what will be known at the start of the inlined code. AddAlignmentAssumptions(CS, IFI); + // Add noalias intrinsics corresponding to noalias function arguments. + AddNoAliasIntrinsics(CS, VMap, NAScopeList); + // We want the inliner to prune the code as it copies. We would LOVE to // have no dead or constant instructions leftover after inlining occurs // (which can happen, e.g., because an argument was constant), but we'll be @@ -1796,7 +2017,7 @@ CalledFunc->getSubprogram() != nullptr); // Clone existing noalias metadata if necessary. - CloneAliasScopeMetadata(CS, VMap); + CloneAliasScopeMetadata(CS, VMap, Caller); // Add noalias metadata if necessary. AddAliasScopeMetadata(CS, VMap, DL, CalleeAAR); @@ -1804,6 +2025,9 @@ // Propagate llvm.mem.parallel_loop_access if necessary. PropagateParallelLoopAccessMetadata(CS, VMap); + // Add scopes to memory accesses corresponding to added noalias intrinsics. + AddNoAliasIntrinsicsScope(CS, VMap, NAScopeList); + // Register any cloned assumptions. if (IFI.GetAssumptionCache) for (BasicBlock &NewBlock : @@ -2251,6 +2475,9 @@ // Since we are now done with the return instruction, delete it also. Returns[0]->eraseFromParent(); + // Already try to connect llvm.noalias.decl where possible + propagateAndConnectNoAliasDecl(Caller); + // We are now done with the inlining. return true; } @@ -2401,6 +2628,9 @@ // Now we can remove the CalleeEntry block, which is now empty. Caller->getBasicBlockList().erase(CalleeEntry); + // Already try to connect llvm.noalias.decl where possible + propagateAndConnectNoAliasDecl(Caller); + // If we inserted a phi node, check to see if it has a single value (e.g. all // the entries are the same or undef). If so, remove the PHI so it doesn't // block other optimizations. Index: llvm/lib/Transforms/Utils/Local.cpp =================================================================== --- llvm/lib/Transforms/Utils/Local.cpp +++ llvm/lib/Transforms/Utils/Local.cpp @@ -417,6 +417,29 @@ return false; } + + // noalias intrinsics are dead if they have no uses (they're tagged as + // writing, but that is only to maintain control dependencies, not because + // they actually write anything). + if (II->getIntrinsicID() == Intrinsic::noalias || + II->getIntrinsicID() == Intrinsic::side_noalias || + II->getIntrinsicID() == Intrinsic::noalias_copy_guard || + II->getIntrinsicID() == Intrinsic::noalias_arg_guard) + return II->use_empty(); + + // llvm.noalias.decl which operand only has a single use are trivially dead. + // NOTE: this assumes that noalias intrinsics nodes are never combined ! + if (II->getIntrinsicID() == Intrinsic::noalias_decl) { + auto *DAA = II->getOperand(Intrinsic::NoAliasDeclAllocaArg); + if (isa(DAA)) { + // no associated alloca -> if there are no uses, it is trivially dead + return II->getNumUses() == 0; + } else { + return (DAA->getNumUses() == 1) && + (II->getOperand(Intrinsic::NoAliasDeclScopeArg)->getNumUses() == + 1); + } + } } if (isAllocLikeFn(I, TLI)) Index: llvm/lib/Transforms/Utils/LoopRotationUtils.cpp =================================================================== --- llvm/lib/Transforms/Utils/LoopRotationUtils.cpp +++ llvm/lib/Transforms/Utils/LoopRotationUtils.cpp @@ -38,6 +38,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/NoAliasUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Transforms/Utils/ValueMapper.h" using namespace llvm; @@ -255,6 +256,81 @@ return false; } + // check if there are local restrict declarations and how they are used. + // - avoid breaking up mixed usage: + // -- either all usages must be in the OrigHeader, or no usages must be in the + // OrigHeader + // -- when usages are outside the function, and we decide to continue, break + // the connection with the llvm.noalias.decl, as it will have no impact any + // more. + SmallVector NoAliasDeclInstructions; + { + SmallVector SideNoAliasOrNoAliasToDisconnect; + for (Instruction &I : *OrigHeader) { + if (IntrinsicInst *II = dyn_cast(&I)) { + if (II->getIntrinsicID() == Intrinsic::noalias_decl) { + // Check usage validity: + bool UsedInsideHeader = false; + bool UsedOutsideHeaderInsideLoop = false; + for (User *U : II->users()) { + Instruction *UI=cast(U); + if (UI->getParent() == OrigHeader) { + UsedInsideHeader = true; + } else if (L->contains(UI)) { + UsedOutsideHeaderInsideLoop = true; + } else { + if (PHINode *PUI = dyn_cast(UI)) { + if (PUI->getNumIncomingValues() > 1) { + LLVM_DEBUG(llvm::dbgs() << "LoopRotation: NOT rotating - " << + *II << "\n used in PHI node " << *PUI << + ",\n in exit block with multiple entries.\n"); + return false; + } + } + SideNoAliasOrNoAliasToDisconnect.push_back(UI); + } + } + + if (UsedInsideHeader && UsedOutsideHeaderInsideLoop) { + LLVM_DEBUG(llvm::dbgs() << "LoopRotation: NOT rotating - " << + *II << " used in header and other parts of the loop.\n" + " Rotation would reduced the llvm.noalias quality too much.\n"); + return false; + } + + NoAliasDeclInstructions.push_back(II); + } + } + } + + // If we get here, we will do the rotate. + // First break the link between any llvm.noalias.decl and its outside-loop + // usage + for (Instruction *I : SideNoAliasOrNoAliasToDisconnect) { + unsigned OpN; + if (PHINode* PI = dyn_cast(I)) { + // can only happen when in the exit block -> single predecessor ! + assert(PI->getNumIncomingValues() == 1 && + "PHI node should have a single value"); + (void)PI; // Silence not-used warning in Release builds + OpN=0; + } else { + IntrinsicInst *II = cast(I); + if (II->getIntrinsicID() == Intrinsic::side_noalias) { + OpN = Intrinsic::SideNoAliasNoAliasDeclArg; + } else if (II->getIntrinsicID() == Intrinsic::noalias) { + OpN = Intrinsic::NoAliasNoAliasDeclArg; + } else { + assert(II->getIntrinsicID() == Intrinsic::noalias_copy_guard); + OpN = Intrinsic::NoAliasCopyGuardNoAliasDeclArg; + } + } + + auto *PT = cast(I->getOperand(OpN)->getType()); + I->setOperand(OpN, ConstantPointerNull::get(PT)); + } + } + // Now, this loop is suitable for rotation. BasicBlock *OrigPreheader = L->getLoopPreheader(); @@ -380,6 +456,74 @@ } } + if (!NoAliasDeclInstructions.empty()) { + // There are local restrict declarations: + // (general): + // Original: OrigPre { OrigHeader NewHeader ... Latch } + // after: (OrigPre+OrigHeader') { NewHeader ... Latch OrigHeader } + // + // with D: llvm.noalias.decl, U: side_noalias, depending on D + // ... { D U } can transform into: + // (0) : ... { D U } // no relevant rotation for this part + // (1) : ... D' { U D } + // (2) : ... D' U' { D U } + // + // We now want to transform: + // (1) -> : ... D' { D U D'' } + // (2) -> : ... D' U' { D D'' U'' } + // D: original llvm.noalias.decl + // D', U': duplicate with replaced scopes + // D'', U'': different duplicate with replaced scopes + // This ensures a safe fallback to 'may_alias' introduced by the rotate, as + // U'' and U' + // scopes will not be compatible wrt to the local restrict + + // Clone the NoAliasDecl again, insert right after the original, move the + // original to the NewHeader. + + Instruction* NewHeaderInsertionPoint = &(*NewHeader->begin()); + for (Instruction* NAD : NoAliasDeclInstructions) { + LLVM_DEBUG(llvm::dbgs() << " Cloning llvm.noalias.decl:" << *NAD << "\n"); + Instruction* NewNAD=NAD->clone(); + NewNAD->insertBefore(NAD); + + // remap dependencies in the OrigHeader block to NewNAD + NAD->replaceUsesInsideBlock(NewNAD, OrigHeader); + + // And move the original NAD to the NewHeader + NAD->moveBefore(NewHeaderInsertionPoint); + + // Now forget about the original NAD mapping + auto tmp = ValueMap[NAD]; // use a local copy to avoid undefined behavior + ValueMap[NewNAD] = tmp; // this could trigger a reallocation. + ValueMap.erase(NAD); + } + + // Scopes must now be duplicated, once for OrigHeader and once for + // OrigPreHeader' + { + auto& Context = NewHeader->getContext(); + + SmallVector NoAliasDeclScopes; + for (Instruction* NAD : NoAliasDeclInstructions) + NoAliasDeclScopes.push_back(cast( + NAD->getOperand(Intrinsic::NoAliasDeclScopeArg))); + + LLVM_DEBUG(llvm::dbgs() << " Updating OrigHeader scopes\n"); + llvm::cloneAndAdaptNoAliasScopes(NoAliasDeclScopes, { OrigHeader }, + Context, "h.rot1"); + LLVM_DEBUG(OrigHeader->dump()); + + LLVM_DEBUG(llvm::dbgs() << " Updating OrigPreheader scopes\n"); + llvm::cloneAndAdaptNoAliasScopes(NoAliasDeclScopes, { OrigPreheader }, + Context, "pre.rot1"); + LLVM_DEBUG(OrigPreheader->dump()); + + LLVM_DEBUG(llvm::dbgs() << " Updated NewHeader:\n"); + LLVM_DEBUG(NewHeader->dump()); + } + } + // Along with all the other instructions, we just cloned OrigHeader's // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's // successors by duplicating their incoming values for OrigHeader. Index: llvm/lib/Transforms/Utils/LoopUnroll.cpp =================================================================== --- llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -22,7 +22,6 @@ #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfoMetadata.h" @@ -33,8 +32,10 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/NoAliasUtils.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h" #include "llvm/Transforms/Utils/UnrollLoop.h" using namespace llvm; @@ -598,6 +599,12 @@ << DIL->getFilename() << " Line: " << DIL->getLine()); } + // Phase1: Identify what noalias metadata is inside the loop: if it is inside + // the loop, the associated metadata must be clone for each iteration. + SmallVector LoopLocalNoAliasDeclScopes; + llvm::identifyNoAliasScopesToClone(L->getBlocks(), + LoopLocalNoAliasDeclScopes); + for (unsigned It = 1; It != ULO.Count; ++It) { std::vector NewBlocks; SmallDenseMap NewLoops; @@ -689,6 +696,16 @@ AC->registerAssumption(II); } } + + { + // Phase2: identify what other metadata depends on the cloned version + // Phase3: after cloning, replace the metadata with the corrected version + // for both memory instructions and noalias intrinsics + std::string ext = "It"; + ext += utostr(It); + cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks, + Header->getContext(), ext); + } } // Loop over the PHI nodes in the original block, setting incoming values. Index: llvm/lib/Transforms/Utils/NoAliasUtils.cpp =================================================================== --- /dev/null +++ llvm/lib/Transforms/Utils/NoAliasUtils.cpp @@ -0,0 +1,251 @@ +//===-- NoAliasUtils.cpp - NoAlias Utility functions ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines common noalias metadatt and intrinsic utility functions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/NoAliasUtils.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "noalias-utils" + +bool llvm::propagateAndConnectNoAliasDecl(Function *F) { + auto *UnknownFunctionScope = F->getMetadata("noalias"); + if (UnknownFunctionScope == nullptr) + return false; + + SmallVector InterestingNoalias; + SmallMapVector KnownAllocaNoAliasDecl; + + auto TrackIfIsUnknownFunctionScope = [&](IntrinsicInst *I, unsigned Index) { + auto V = I->getOperand(Index); + if (cast(V)->getMetadata() == UnknownFunctionScope) { + InterestingNoalias.push_back(I); + } + }; + + for (Instruction &I : llvm::instructions(*F)) { + if (IntrinsicInst *II = dyn_cast(&I)) { + switch (II->getIntrinsicID()) { + case Intrinsic::noalias: { + TrackIfIsUnknownFunctionScope(II, Intrinsic::NoAliasScopeArg); + break; + } + case Intrinsic::side_noalias: { + TrackIfIsUnknownFunctionScope(II, Intrinsic::SideNoAliasScopeArg); + break; + } + case Intrinsic::noalias_copy_guard: { + TrackIfIsUnknownFunctionScope(II, Intrinsic::NoAliasCopyGuardScopeArg); + break; + } + case Intrinsic::noalias_decl: { + auto *depAlloca = dyn_cast(II->getOperand(0)); + if (depAlloca) { + KnownAllocaNoAliasDecl[depAlloca] = II; + } + break; + } + default: + break; + } + } + } + + if (KnownAllocaNoAliasDecl.empty() || InterestingNoalias.empty()) + return false; + + bool Changed = false; + for (auto *II : InterestingNoalias) { + SmallVector UO; + unsigned Index = + (II->getIntrinsicID() == Intrinsic::noalias + ? 0 + : (II->getIntrinsicID() == Intrinsic::side_noalias ? 1 : 2)); + const int IdentifyPArg[] = {Intrinsic::NoAliasIdentifyPArg, + Intrinsic::SideNoAliasIdentifyPArg, + Intrinsic::NoAliasCopyGuardIdentifyPBaseObject}; + const int ScopeArg[] = {Intrinsic::NoAliasScopeArg, + Intrinsic::SideNoAliasScopeArg, + Intrinsic::NoAliasCopyGuardScopeArg}; + const int NoAliasDeclArg[] = {Intrinsic::NoAliasNoAliasDeclArg, + Intrinsic::SideNoAliasNoAliasDeclArg, + Intrinsic::NoAliasCopyGuardNoAliasDeclArg}; + const int ObjIdArg[] = {Intrinsic::NoAliasIdentifyPObjIdArg, + Intrinsic::SideNoAliasIdentifyPObjIdArg, -1}; + + llvm::GetUnderlyingObjects(II->getOperand(IdentifyPArg[Index]), UO, + F->getParent()->getDataLayout()); + if (UO.size() != 1) { + // Multiple objects possible - It would be nice to propagate, but we do + // not do it yet. That is ok as the unknown function scope assumes more + // aliasing. + LLVM_DEBUG(llvm::dbgs() + << "WARNING: no llvm.noalias.decl reconnect accross " + "PHI/select - YET (" + << UO.size() << " underlying objects)\n"); + continue; + } + + auto UA = dyn_cast(UO[0]); + if (UA) { + auto it = KnownAllocaNoAliasDecl.find(UA); + if (it != KnownAllocaNoAliasDecl.end()) { + Instruction *Decl = it->second; + // found a simple matching declaration - propagate + II->setOperand(ScopeArg[Index], + Decl->getOperand(Intrinsic::NoAliasDeclScopeArg)); + II->setOperand(NoAliasDeclArg[Index], Decl); + + auto ObjIdIndex = ObjIdArg[Index]; + if (ObjIdIndex != -1) { + II->setOperand(ObjIdIndex, + Decl->getOperand(Intrinsic::NoAliasDeclObjIdArg)); + } + Changed = true; + } else if (UnknownFunctionScope && isa(UA)) { + if (cast(II->getOperand(ScopeArg[Index])) + ->getMetadata() == UnknownFunctionScope) { + // we have an alloca, but no llvm.noalias.decl and we have unknown + // function scope This is an indication of a temporary that (through a + // pointer or reference to a restrict pointer) introduces restrict. + // - the unknown scope is too broad for these cases + // - conceptually, the scope should be the lifetime of the local, but + // we don't have that information + // - the real restrictness should have been brought in through the + // 'depends on' relationship + // -> so we fall back on the 'depends on' and remove the restrictness + // information at this level. + LLVM_DEBUG( + llvm::dbgs() + << "- Temporary noalias object (without llvm.noalias.decl) " + "detected. Ignore restrictness: " + << *II << "\n"); + II->replaceAllUsesWith(II->getOperand(0)); + II->eraseFromParent(); + Changed = true; + } + } + } else { +#if !defined(NDEBUG) + if (isa(UO[0]) || isa(UO[0])) { + // Multiple objects possible - It would be nice to propagate, but we do + // not do it yet. That is ok as the unknown function scope assumes more + // aliasing. + LLVM_DEBUG(llvm::dbgs() + << "WARNING: no llvm.noalias.decl reconnect accross " + "PHI/select - YET: " + << *UO[0] << "\n"); + } +#endif + } + } + return Changed; +} + +void llvm::cloneNoAliasScopes( + ArrayRef NoAliasDeclScopes, + DenseMap &out_ClonedScopes, + DenseMap &out_ClonedMVScopes, + StringRef Ext, LLVMContext &Context) { + MDBuilder MDB(Context); + + for (auto *MV : NoAliasDeclScopes) { + SmallVector ScopeList; + for (auto &MDOperand : cast(MV->getMetadata())->operands()) { + if (MDNode *MD = dyn_cast(MDOperand)) { + llvm::AliasScopeNode SNAN(MD); + + std::string Name; + auto ScopeName = SNAN.getName(); + if (!ScopeName.empty()) { + Name = ScopeName; + Name += ":"; + Name += Ext; + } else { + Name = Ext; + } + + MDNode *NewScope = MDB.createAnonymousAliasScope( + const_cast(SNAN.getDomain()), Name); + out_ClonedScopes.insert(std::make_pair(MD, NewScope)); + ScopeList.push_back(NewScope); + } + } + MDNode *NewScopeList = MDNode::get(Context, ScopeList); + out_ClonedMVScopes.insert( + std::make_pair(MV, MetadataAsValue::get(Context, NewScopeList))); + } +} + +void llvm::adaptNoAliasScopes( + Instruction *I, DenseMap &ClonedScopes, + DenseMap &ClonedMVScopes, + LLVMContext &Context) { + // MetadataAsValue will always be replaced ! + for (int opI = 0, opIEnd = I->getNumOperands(); opI < opIEnd; ++opI) { + if (MetadataAsValue *MV = dyn_cast(I->getOperand(opI))) { + auto MvIt = ClonedMVScopes.find(MV); + if (MvIt != ClonedMVScopes.end()) { + I->setOperand(opI, MvIt->second); + } + } + } + if (const MDNode *CSNoAlias = I->getMetadata(LLVMContext::MD_noalias)) { + bool needsReplacement = false; + SmallVector NewScopeList; + for (auto &MDOp : CSNoAlias->operands()) { + if (MDNode *MD = dyn_cast_or_null(MDOp)) { + auto MdIt = ClonedScopes.find(MD); + if (MdIt != ClonedScopes.end()) { + NewScopeList.push_back(MdIt->second); + needsReplacement = true; + continue; + } + NewScopeList.push_back(MD); + } + } + if (needsReplacement) { + I->setMetadata(LLVMContext::MD_noalias, + MDNode::get(Context, NewScopeList)); + } + } +} + +void llvm::cloneAndAdaptNoAliasScopes( + ArrayRef NoAliasDeclScopes, + ArrayRef NewBlocks, LLVMContext &Context, StringRef Ext) { + if (NoAliasDeclScopes.empty()) + return; + + DenseMap ClonedScopes; + DenseMap ClonedMVScopes; + LLVM_DEBUG(llvm::dbgs() << "cloneAndAdaptNoAliasScopes: cloning " + << NoAliasDeclScopes.size() << " node(s)\n"); + + cloneNoAliasScopes(NoAliasDeclScopes, ClonedScopes, ClonedMVScopes, Ext, + Context); + // Identify instructions using metadata that needs adaptation + for (BasicBlock *NewBlock : NewBlocks) { + for (Instruction &I : *NewBlock) { + adaptNoAliasScopes(&I, ClonedScopes, ClonedMVScopes, Context); + } + } +} Index: llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp =================================================================== --- llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -25,7 +25,6 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/IteratedDominanceFrontier.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -35,6 +34,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" @@ -45,6 +45,7 @@ #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/Support/Casting.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include #include @@ -61,40 +62,181 @@ STATISTIC(NumDeadAlloca, "Number of dead alloca's removed"); STATISTIC(NumPHIInsert, "Number of PHI nodes inserted"); -bool llvm::isAllocaPromotable(const AllocaInst *AI) { +class PromotableChecker { +public: + bool check(bool c) { return c; } + void trackRemovable(const Instruction *I) {} + void trackOperandToZero(const Instruction *I, int operand) {} + void trackNoAliasDecl(const IntrinsicInst *II) {} +}; + +class PromotableTracker { +public: + bool check(bool c) { + assert(!c && "PromotableTracke::check failed"); + return false; + } + void trackRemovable(Instruction *I) { + // FIXME: Performance Warning: linear search - might become slow (?) + if (std::find(mRemovables.begin(), mRemovables.end(), I) == + mRemovables.end()) + mRemovables.push_back(I); + } + void trackOperandToZero(Instruction *I, int operand) { + mZeroOperands.emplace_back(I, operand); + } + void trackNoAliasDecl(IntrinsicInst *II) { mNoAliasDecls.push_back(II); } + +public: + SmallVector mRemovables; + SmallVector, 4> mZeroOperands; + SmallVector mNoAliasDecls; +}; + +// Return true if the only usage of this pointer is as identifyP argument for +// llvm.noalias or llvm.side.noalias (either direct or recursive) +// Look through bitcast, getelementptr, llvm.noalias, llvm.side.noalias + +template +bool onlyUsedByNoaliasOrSideNoaliasIdentifyPArg(Value *V, PT &pt); + +template +bool isAndOnlyUsedByNoaliasOrSideNoaliasIdentifyPArg(IntrinsicInst *II, + unsigned OpNo, PT &pt) { + if (II->getIntrinsicID() == Intrinsic::side_noalias) { + if (OpNo == 0) { + if (!onlyUsedByNoaliasOrSideNoaliasIdentifyPArg(II, pt)) + return false; + pt.trackRemovable(II); + } else if (OpNo == Intrinsic::SideNoAliasIdentifyPArg) { + pt.trackOperandToZero(II, OpNo); + } else if (OpNo == Intrinsic::SideNoAliasIdentifyPSideChannelArg) { + pt.trackOperandToZero(II, OpNo); + } else { + assert(false && "Unexpected llvm.side.noalias dependency"); + } + return true; + } else if (II->getIntrinsicID() == Intrinsic::noalias) { + if (OpNo == 0) { + if (!onlyUsedByNoaliasOrSideNoaliasIdentifyPArg(II, pt)) + return false; + pt.trackRemovable(II); + } else if (OpNo == Intrinsic::NoAliasIdentifyPArg) { + pt.trackOperandToZero(II, OpNo); + } else { + assert(false && "Unexpected llvm.side.noalias dependency"); + } + return true; + } + + return false; +} + +template +bool onlyUsedByNoaliasOrSideNoaliasIdentifyPArg(Value *V, PT &pt) { + for (Use &U_ : V->uses()) { + unsigned OpNo = U_.getOperandNo(); + User *U = U_.getUser(); + if (IntrinsicInst *II = dyn_cast(U)) { + if (isAndOnlyUsedByNoaliasOrSideNoaliasIdentifyPArg(II, OpNo, pt)) + continue; + return false; + } else if (BitCastInst *BCI = dyn_cast(U)) { + if (!onlyUsedByNoaliasOrSideNoaliasIdentifyPArg(BCI, pt)) + return false; + pt.trackRemovable(BCI); + } else if (GetElementPtrInst *GEPI = dyn_cast(U)) { + if (!onlyUsedByNoaliasOrSideNoaliasIdentifyPArg(GEPI, pt)) + return false; + pt.trackRemovable(GEPI); + } else if (AddrSpaceCastInst *ASCI = dyn_cast(U)) { + if (!onlyUsedByNoaliasOrSideNoaliasIdentifyPArg(ASCI, pt)) + return false; + pt.trackRemovable(ASCI); + } else { + return false; + } + } + return true; +} + +template bool trackAllocaPromotable(AllocaInst *AI, PT &pt) { // FIXME: If the memory unit is of pointer or integer type, we can permit // assignments to subsections of the memory unit. unsigned AS = AI->getType()->getAddressSpace(); // Only allow direct and non-volatile loads and stores... - for (const User *U : AI->users()) { - if (const LoadInst *LI = dyn_cast(U)) { + for (Use &U_ : AI->uses()) { + unsigned OpNo = U_.getOperandNo(); + User *U = U_.getUser(); + + if (LoadInst *LI = dyn_cast(U)) { // Note that atomic loads can be transformed; atomic semantics do // not have any meaning for a local alloca. - if (LI->isVolatile()) + if (pt.check(LI->isVolatile())) return false; - } else if (const StoreInst *SI = dyn_cast(U)) { - if (SI->getOperand(0) == AI) + if (OpNo == LI->getNoaliasSideChannelOperandIndex()) { + // Load will be removed. Disconnect side_noalias dependency + pt.trackOperandToZero(LI, OpNo); + } + } else if (StoreInst *SI = dyn_cast(U)) { + if (pt.check(OpNo == 0)) return false; // Don't allow a store OF the AI, only INTO the AI. // Note that atomic stores can be transformed; atomic semantics do // not have any meaning for a local alloca. - if (SI->isVolatile()) + if (pt.check(SI->isVolatile())) return false; - } else if (const IntrinsicInst *II = dyn_cast(U)) { - if (!II->isLifetimeStartOrEnd()) + if (OpNo == SI->getNoaliasSideChannelOperandIndex()) { + // Store will be removed. Disconnect side_noalias dependency + pt.trackOperandToZero(SI, OpNo); + } + } else if (IntrinsicInst *II = dyn_cast(U)) { + switch (II->getIntrinsicID()) { + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + pt.trackRemovable(II); + break; + case Intrinsic::noalias_decl: + pt.trackNoAliasDecl(II); + break; + case Intrinsic::noalias: + case Intrinsic::side_noalias: + if (!isAndOnlyUsedByNoaliasOrSideNoaliasIdentifyPArg(II, OpNo, pt)) + return false; + break; + default: return false; - } else if (const BitCastInst *BCI = dyn_cast(U)) { - if (BCI->getType() != Type::getInt8PtrTy(U->getContext(), AS)) + } + } else if (BitCastInst *BCI = dyn_cast(U)) { + if (onlyUsedByNoaliasOrSideNoaliasIdentifyPArg(BCI, pt)) { + pt.trackRemovable(BCI); + continue; + } + if (pt.check(BCI->getType() != Type::getInt8PtrTy(U->getContext(), AS))) return false; - if (!onlyUsedByLifetimeMarkers(BCI)) + if (pt.check(!onlyUsedByLifetimeMarkers(BCI))) return false; - } else if (const GetElementPtrInst *GEPI = dyn_cast(U)) { - if (GEPI->getType() != Type::getInt8PtrTy(U->getContext(), AS)) + + for (auto *U2 : BCI->users()) { + pt.trackRemovable(cast(U2)); + } + pt.trackRemovable(BCI); + } else if (GetElementPtrInst *GEPI = dyn_cast(U)) { + if (onlyUsedByNoaliasOrSideNoaliasIdentifyPArg(GEPI, pt)) { + pt.trackRemovable(GEPI); + continue; + } + if (pt.check(GEPI->getType() != Type::getInt8PtrTy(U->getContext(), AS))) return false; - if (!GEPI->hasAllZeroIndices()) + if (pt.check(!GEPI->hasAllZeroIndices())) return false; - if (!onlyUsedByLifetimeMarkers(GEPI)) + if (pt.check(!onlyUsedByLifetimeMarkers(GEPI))) return false; + + for (auto *U2 : GEPI->users()) { + pt.trackRemovable(cast(U2)); + } + pt.trackRemovable(GEPI); } else { return false; } @@ -103,6 +245,11 @@ return true; } +bool llvm::isAllocaPromotable(const AllocaInst *AI) { + PromotableChecker pc; + return trackAllocaPromotable(const_cast(AI), pc); +} + namespace { struct AllocaInfo { @@ -312,26 +459,188 @@ AC->registerAssumption(CI); } -static void removeLifetimeIntrinsicUsers(AllocaInst *AI) { - // Knowing that this alloca is promotable, we know that it's safe to kill all - // instructions except for load and store. +static void removeIntrinsicUsers(AllocaInst *AI) { + // The AI is going to be deleted. Remove lifeftime intrinsic users. + // Also disconnect and remove noalias/side.noalias/noalias_decl intrinsic + // users. - for (auto UI = AI->user_begin(), UE = AI->user_end(); UI != UE;) { - Instruction *I = cast(*UI); - ++UI; - if (isa(I) || isa(I)) - continue; + // Track the possible intrinsics. If we do not have a noalias.decl or we do + // not have an unknown function scope, no extra modificiations are needed. If + // both are there, we need to propagate the MetadataValue from the declaration + // to those intrinsics that are using the unknown scope. + PromotableTracker pt; + + if (!trackAllocaPromotable(AI, pt)) { + assert(false && "trackAllocaPromotable not consistent"); + } + + // Propagate NoaliasDecl + MDNode *NoAliasUnknownScopeMD = + AI->getParent()->getParent()->getMetadata("noalias"); + Instruction *NoAliasDecl = nullptr; + if (pt.mNoAliasDecls.size() == 1) + NoAliasDecl = pt.mNoAliasDecls[0]; + + if (NoAliasUnknownScopeMD) { + if (NoAliasDecl) { + LLVM_DEBUG(llvm::dbgs() + << "- Propagating " << *NoAliasDecl << " scope to:\n"); + auto NoAliasDeclScope = + NoAliasDecl->getOperand(Intrinsic::NoAliasDeclScopeArg); + for (auto PairIO : pt.mZeroOperands) { + Instruction *I = PairIO.first; + auto OpNo = PairIO.second; + (void)OpNo; // Silence not used warning in Release builds. + if (IntrinsicInst *II = dyn_cast(I)) { + auto ID = II->getIntrinsicID(); + if (ID == Intrinsic::noalias || ID == Intrinsic::side_noalias) { + // If we get here, we can assume the identifyP or its sidechannel + // are dependencies + assert( + (ID == Intrinsic::noalias) + ? (OpNo == Intrinsic::NoAliasIdentifyPArg) + : (OpNo == Intrinsic::SideNoAliasIdentifyPArg || + OpNo == Intrinsic::SideNoAliasIdentifyPSideChannelArg)); + unsigned ScopeArg = + (ID == Intrinsic::noalias ? Intrinsic::NoAliasScopeArg + : Intrinsic::SideNoAliasScopeArg); + unsigned DeclArg = (ID == Intrinsic::noalias + ? Intrinsic::NoAliasNoAliasDeclArg + : Intrinsic::SideNoAliasNoAliasDeclArg); + MetadataAsValue *MV = + cast(I->getOperand(ScopeArg)); + if (NoAliasUnknownScopeMD == MV->getMetadata()) { + // Propagate the declaration scope + // Note: splitting already took care of updating the ObjId + LLVM_DEBUG(llvm::dbgs() << "-- " << *I << "\n"); + II->setOperand(ScopeArg, NoAliasDeclScope); + + // also update the noalias declaration + II->setOperand(DeclArg, NoAliasDecl); + } + } + } + } + } else if (pt.mNoAliasDecls.empty()) { + for (auto PairIO : pt.mZeroOperands) { + Instruction *I = PairIO.first; + auto OpNo = PairIO.second; + (void)OpNo; // Silence not used warning in Release builds. + if (IntrinsicInst *II = dyn_cast(I)) { + auto ID = II->getIntrinsicID(); + if (ID == Intrinsic::noalias || ID == Intrinsic::side_noalias) { + // If we get here, we can assume the identifyP or its sidechannel + // are dependencies + assert( + (ID == Intrinsic::noalias) + ? (OpNo == Intrinsic::NoAliasIdentifyPArg) + : (OpNo == Intrinsic::SideNoAliasIdentifyPArg || + OpNo == Intrinsic::SideNoAliasIdentifyPSideChannelArg)); + unsigned ScopeArg = + (ID == Intrinsic::noalias ? Intrinsic::NoAliasScopeArg + : Intrinsic::SideNoAliasScopeArg); + MetadataAsValue *MV = + cast(I->getOperand(ScopeArg)); + if (NoAliasUnknownScopeMD == MV->getMetadata()) { + // Propagate a more or less unique id + LLVM_DEBUG(llvm::dbgs() + << "-- No llvm.noalias.decl, looking through: " << *I + << "\n"); + II->replaceAllUsesWith(II->getOperand(0)); + } + } + } + } + } + } + + if (NoAliasDecl) { + // Check if we need to split up llvm.noalias.decl with unique ObjId's + // This is needed to differentiate restrict pointers, once the alloca is + // removed. NOTE: we might as well have depended on 'constant propagation of + // null' and work with a 'constant pointer' + // for IdentifyP. Not sure what mechanism would be the best. + const DataLayout &DL = AI->getParent()->getModule()->getDataLayout(); + std::map ObjId2NoAliasDecl; + + auto BaseObjId = cast(NoAliasDecl->getOperand( + Intrinsic::NoAliasDeclObjIdArg)) + ->getZExtValue(); + ObjId2NoAliasDecl[BaseObjId] = NoAliasDecl; + + for (auto PairIO : pt.mZeroOperands) { + IntrinsicInst *II = dyn_cast(PairIO.first); + if (II && ((II->getIntrinsicID() == Intrinsic::noalias) || + (II->getIntrinsicID() == Intrinsic::side_noalias))) { + auto OpNo = PairIO.second; + unsigned IdentifyPArg = (II->getIntrinsicID() == Intrinsic::noalias) + ? Intrinsic::NoAliasIdentifyPArg + : Intrinsic::SideNoAliasIdentifyPArg; + unsigned ObjIdArg = (II->getIntrinsicID() == Intrinsic::noalias) + ? Intrinsic::NoAliasIdentifyPObjIdArg + : Intrinsic::SideNoAliasIdentifyPObjIdArg; + unsigned NoAliasDeclArg = (II->getIntrinsicID() == Intrinsic::noalias) + ? Intrinsic::NoAliasNoAliasDeclArg + : Intrinsic::SideNoAliasNoAliasDeclArg; + + if ((unsigned)OpNo != IdentifyPArg) + continue; - if (!I->getType()->isVoidTy()) { - // The only users of this bitcast/GEP instruction are lifetime intrinsics. - // Follow the use/def chain to erase them now instead of leaving it for - // dead code elimination later. - for (auto UUI = I->user_begin(), UUE = I->user_end(); UUI != UUE;) { - Instruction *Inst = cast(*UUI); - ++UUI; - Inst->eraseFromParent(); + auto CurrentObjId = + cast(II->getOperand(ObjIdArg))->getZExtValue(); + + assert(CurrentObjId == BaseObjId && + "Initial object id difference detected."); + + APInt PPointerOffset(DL.getPointerSizeInBits(), 0ull); + assert(AI == II->getOperand(IdentifyPArg) + ->stripAndAccumulateInBoundsConstantOffsets( + DL, PPointerOffset) && + "hmm.. expected stripped P to map to alloca"); + if (!PPointerOffset.isNullValue()) { + CurrentObjId += PPointerOffset.getZExtValue(); + auto &NewNoAliasDecl = ObjId2NoAliasDecl[CurrentObjId]; + if (NewNoAliasDecl == nullptr) { + LLVM_DEBUG(llvm::dbgs() + << "Creating llvm.noalias.decl for IdentifyPObjId " + << CurrentObjId << "\n"); + IRBuilder NoAliasDeclBuilder(NoAliasDecl); + NewNoAliasDecl = NoAliasDeclBuilder.CreateNoAliasDeclaration( + ConstantPointerNull::get(cast(AI->getType())), + CurrentObjId, + NoAliasDecl->getOperand(Intrinsic::NoAliasDeclScopeArg)); + LLVM_DEBUG(llvm::dbgs() << "- " << *NewNoAliasDecl << "\n"); + } + II->setOperand(NoAliasDeclArg, NewNoAliasDecl); + II->setOperand(ObjIdArg, + ConstantInt::get(II->getOperand(ObjIdArg)->getType(), + CurrentObjId)); + LLVM_DEBUG(llvm::dbgs() + << "Remapping noalias.decl dependency: " << *II << "\n"); + } } } + } + + // set args to zero + for (auto II : pt.mNoAliasDecls) { + LLVM_DEBUG(llvm::dbgs() << "Zeoring noalias.decl dep: " << *II << "\n"); + assert(II->getIntrinsicID() == Intrinsic::noalias_decl); + II->setOperand(Intrinsic::NoAliasDeclAllocaArg, + ConstantPointerNull::get(cast(AI->getType()))); + } + for (auto PairIO : pt.mZeroOperands) { + Instruction *I = PairIO.first; + auto OpNo = PairIO.second; + LLVM_DEBUG(llvm::dbgs() + << "Zeroing operand " << OpNo << " of " << *I << "\n"); + I->setOperand(OpNo, ConstantPointerNull::get( + cast(I->getOperand(OpNo)->getType()))); + } + + // remove + for (auto I : pt.mRemovables) { + LLVM_DEBUG(llvm::dbgs() << "Removing " << *I << "\n"); I->eraseFromParent(); } } @@ -357,6 +666,9 @@ for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) { Instruction *UserInst = cast(*UI++); + // load/store can have a side channel + if ((UI != E) && (*UI == UserInst)) + ++UI; if (UserInst == OnlyStore) continue; LoadInst *LI = cast(UserInst); @@ -467,6 +779,9 @@ // store above them, if any. for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) { LoadInst *LI = dyn_cast(*UI++); + // load/store can have a side channel + if ((UI != E) && (*UI == LI)) + ++UI; if (!LI) continue; @@ -544,7 +859,7 @@ assert(AI->getParent()->getParent() == &F && "All allocas should be in the same function, which is same as DF!"); - removeLifetimeIntrinsicUsers(AI); + removeIntrinsicUsers(AI); if (AI->use_empty()) { // If there are no uses of the alloca, just delete it now. Index: llvm/lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1315,6 +1315,7 @@ LLVMContext::MD_dereferenceable, LLVMContext::MD_dereferenceable_or_null, LLVMContext::MD_mem_parallel_loop_access, + LLVMContext::MD_noalias, LLVMContext::MD_access_group, LLVMContext::MD_preserve_access_index}; combineMetadata(I1, I2, KnownIDs, true); @@ -3072,8 +3073,9 @@ StoreInst *SI = cast(QB.CreateStore(QPHI, Address)); AAMDNodes AAMD; PStore->getAAMetadata(AAMD, /*Merge=*/false); - PStore->getAAMetadata(AAMD, /*Merge=*/true); + QStore->getAAMetadata(AAMD, /*Merge=*/true); SI->setAAMetadata(AAMD); + SI->setAAMetadataNoAliasSideChannel(AAMD); unsigned PAlignment = PStore->getAlignment(); unsigned QAlignment = QStore->getAlignment(); unsigned TypeAlignment = Index: llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -280,9 +280,10 @@ // The real propagateMetadata expects a SmallVector, but we deal in // vectors of Instructions. -static void propagateMetadata(Instruction *I, ArrayRef IL) { +static void propagateMetadata(Instruction *I, ArrayRef IL, + bool RemoveNoAlias) { SmallVector VL(IL.begin(), IL.end()); - propagateMetadata(I, VL); + propagateMetadata(I, VL, RemoveNoAlias); } // Vectorizer Implementation @@ -1074,11 +1075,15 @@ } } + bool HasSideChannel = + std::any_of(Chain.begin(), Chain.end(), [](const auto &I) { + return cast(I)->hasNoaliasSideChannelOperand(); + }); StoreInst *SI = Builder.CreateAlignedStore( Vec, Builder.CreateBitCast(S0->getPointerOperand(), VecTy->getPointerTo(AS)), Alignment); - propagateMetadata(SI, Chain); + propagateMetadata(SI, Chain, HasSideChannel); eraseInstructions(Chain); ++NumVectorInstructions; @@ -1191,10 +1196,14 @@ std::tie(First, Last) = getBoundaryInstrs(Chain); Builder.SetInsertPoint(&*First); + bool HasSideChannel = + std::any_of(Chain.begin(), Chain.end(), [](const auto &I) { + return cast(I)->hasNoaliasSideChannelOperand(); + }); Value *Bitcast = Builder.CreateBitCast(L0->getPointerOperand(), VecTy->getPointerTo(AS)); LoadInst *LI = Builder.CreateAlignedLoad(VecTy, Bitcast, Alignment); - propagateMetadata(LI, Chain); + propagateMetadata(LI, Chain, HasSideChannel); if (VecLoadTy) { SmallVector InstrsToErase; Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -907,7 +907,13 @@ void InnerLoopVectorizer::addMetadata(Instruction *To, Instruction *From) { - propagateMetadata(To, From); + bool HasSideChannel = true; + if (auto *SI = dyn_cast(From)) { + HasSideChannel = SI->hasNoaliasSideChannelOperand(); + } else if (auto *LI = dyn_cast(From)) { + HasSideChannel = LI->hasNoaliasSideChannelOperand(); + } + propagateMetadata(To, From, HasSideChannel); addNewMetadata(To, From); } @@ -3235,11 +3241,6 @@ if (VF == 1) return ScalarCallCost; - // Compute corresponding vector type for return value and arguments. - Type *RetTy = ToVectorTy(ScalarRetTy, VF); - for (Type *ScalarTy : ScalarTys) - Tys.push_back(ToVectorTy(ScalarTy, VF)); - // Compute costs of unpacking argument values for the scalar calls and // packing the return values to a vector. unsigned ScalarizationCost = getScalarizationOverhead(CI, VF); @@ -3252,6 +3253,11 @@ if (!TLI || !TLI->isFunctionVectorizable(FnName, VF) || CI->isNoBuiltin()) return Cost; + // Compute corresponding vector type for return value and arguments. + Type *RetTy = ToVectorTy(ScalarRetTy, VF); + for (Type *ScalarTy : ScalarTys) + Tys.push_back(ToVectorTy(ScalarTy, VF)); + // If the corresponding vector cost is cheaper, return its cost. unsigned VectorCallCost = TTI.getCallInstrCost(nullptr, RetTy, Tys); if (VectorCallCost < Cost) { @@ -6891,7 +6897,8 @@ if (CallInst *CI = dyn_cast(I)) { Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); if (ID && (ID == Intrinsic::assume || ID == Intrinsic::lifetime_end || - ID == Intrinsic::lifetime_start || ID == Intrinsic::sideeffect)) + ID == Intrinsic::lifetime_start || ID == Intrinsic::sideeffect || + ID == Intrinsic::noalias || ID == Intrinsic::side_noalias)) return false; } Index: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1273,6 +1273,53 @@ } } + /// Set the operands of this bundle of load or store instructions in their + /// original order. + void setLoadStoreOperandsInOrder() { + assert(Operands.empty() && "Already initialized?"); + auto *I0 = cast(Scalars[0]); + assert((isa(I0) || isa(I0)) && + "Expect a load or store instruction"); + unsigned NumOperands = isa(I0) ? 1 : 2; + + // Check if any instruction has a noalias_sidechannel + bool HasSideChannel = + std::any_of(Scalars.begin(), Scalars.end(), [&](auto *V) { + return cast(V)->getNumOperands() != NumOperands; + }); + + Operands.resize(NumOperands + HasSideChannel); + unsigned NumLanes = Scalars.size(); + for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) { + Operands[OpIdx].resize(NumLanes); + for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { + auto *I = cast(Scalars[Lane]); + assert(((I->getNumOperands() == NumOperands) || + (I->getNumOperands() == NumOperands + 1)) && + "Expected same number of operands (ignoring the " + "noalias_sidechannel"); + Operands[OpIdx][Lane] = I->getOperand(OpIdx); + } + } + + if (HasSideChannel) { + // At least one instruction has a noalias_sidechannel. + // Keep track of the dependencies brought in by it. Later on we will + // omit the noalias information. + unsigned OpIdx = NumOperands; + Operands[OpIdx].resize(NumLanes); + for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { + auto *I = cast(Scalars[Lane]); + if (I->getNumOperands() != NumOperands) { + Operands[OpIdx][Lane] = I->getOperand(OpIdx); + } else { + Operands[OpIdx][Lane] = + UndefValue::get(I->getOperand(OpIdx - 1)->getType()); + } + } + } + } + /// \returns the \p OpIdx operand of this TreeEntry. ValueList &getOperand(unsigned OpIdx) { assert(OpIdx < Operands.size() && "Off bounds"); @@ -2454,7 +2501,7 @@ ++NumOpsWantToKeepOriginalOrder; TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); - TE->setOperandsInOrder(); + TE->setLoadStoreOperandsInOrder(); LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n"); } else { // Need to reorder. @@ -2463,7 +2510,7 @@ TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies, I->getFirst()); - TE->setOperandsInOrder(); + TE->setLoadStoreOperandsInOrder(); LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n"); } return; @@ -2681,7 +2728,7 @@ ValueList Operands; for (Value *V : VL) Operands.push_back(cast(V)->getOperand(0)); - TE->setOperandsInOrder(); + TE->setLoadStoreOperandsInOrder(); buildTree_rec(Operands, Depth + 1, {TE, 0}); return; } @@ -3829,7 +3876,7 @@ PointerType *PtrTy = PointerType::get(VecTy, LI->getPointerAddressSpace()); Value *Ptr = Builder.CreateBitCast(LI->getOperand(0), PtrTy); LoadInst *V = Builder.CreateAlignedLoad(VecTy, Ptr, LI->getAlignment()); - Value *NewV = propagateMetadata(V, E->Scalars); + Value *NewV = propagateMetadata(V, E->Scalars, true); if (!E->ReorderIndices.empty()) { OrdersType Mask; inversePermutation(E->ReorderIndices, Mask); @@ -4033,7 +4080,7 @@ if (!Alignment) Alignment = MaybeAlign(DL->getABITypeAlignment(ScalarLoadTy)); LI->setAlignment(Alignment); - Value *V = propagateMetadata(LI, E->Scalars); + Value *V = propagateMetadata(LI, E->Scalars, (E->getNumOperands() == 2)); if (IsReorder) { OrdersType Mask; inversePermutation(E->ReorderIndices, Mask); @@ -4071,7 +4118,7 @@ Alignment = DL->getABITypeAlignment(SI->getValueOperand()->getType()); ST->setAlignment(Align(Alignment)); - Value *V = propagateMetadata(ST, E->Scalars); + Value *V = propagateMetadata(ST, E->Scalars, (E->getNumOperands() == 3)); if (NeedToShuffleReuses) { V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy), E->ReuseShuffleIndices, "shuffle"); Index: llvm/test/Analysis/BasicAA/noalias-intr.ll =================================================================== --- /dev/null +++ llvm/test/Analysis/BasicAA/noalias-intr.ll @@ -0,0 +1,143 @@ +; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; Function Attrs: nounwind +define void @test01() #0 { +entry: + %_pA = alloca i32, align 4 + %_pB = alloca i32, align 4 + %t1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %t3 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !5) + %t5 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %t1, i32** null, i32 0, metadata !2), !tbaa !7, !noalias !11 + store i32 42, i32* %t5, align 4, !tbaa !12, !noalias !11 + %t7 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pB, i8* %t3, i32** null, i32 0, metadata !5), !tbaa !7, !noalias !11 + store i32 43, i32* %t7, align 4, !tbaa !12, !noalias !11 + ret void +} +; CHECK-LABEL: Function: test01: +; CHECK: NoAlias: i32* %t5, i32* %t7 + +; Function Attrs: nounwind +define void @test02() #0 { +entry: + %_pA = alloca i32, align 4 + %_pB = alloca i32, align 4 + %t1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %t3 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !5) + %t5 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %t1, i32** null, i32 0, metadata !2), !tbaa !7, !noalias !11 + store i32 42, i32* %t5, align 4, !tbaa !12, !noalias !11 + %t7 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %t3, i32** null, i32 0, metadata !5), !tbaa !7, !noalias !11 + store i32 43, i32* %t7, align 4, !tbaa !12, !noalias !11 + ret void +} +; CHECK-LABEL: Function: test02: +; CHECK: MustAlias: i32* %t5, i32* %t7 + + +; Function Attrs: nounwind +define void @test11() #0 { +entry: + %_pA = alloca i32, align 4 + %_pB = alloca i32, align 4 + %t1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %t3 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !5) + %t5 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %t1, i32** null, i32** undef, i32 0, metadata !2), !tbaa !7, !noalias !11 + %.guard1 = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %_pA, i32* %t5) + store i32 42, i32* %.guard1, align 4, !tbaa !12, !noalias !11 + %.guard2 = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %_pB, i32* %t5) + store i32 43, i32* %.guard2, align 4, !tbaa !12, !noalias !11 + ret void +} +; CHECK-LABEL: Function: test11: +; CHECK: NoAlias: i32* %.guard1, i32* %.guard2 + +; Function Attrs: nounwind +define void @test12() #0 { +entry: + %_pA = alloca i32, align 4 + %_pB = alloca i32, align 4 + %t1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %t3 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !5) + %t5 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %t1, i32** null, i32** undef, i32 0, metadata !2), !tbaa !7, !noalias !11 + %.guard1 = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %_pA, i32* %t5) + store i32 42, i32* %.guard1, align 4, !tbaa !12, !noalias !11 + %.guard2 = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %_pA, i32* %t5) + store i32 43, i32* %.guard2, align 4, !tbaa !12, !noalias !11 + ret void +} +; CHECK-LABEL: Function: test12: +; CHECK: MustAlias: i32* %.guard1, i32* %.guard2 + +; Function Attrs: nounwind +define void @test21() #0 { +entry: + %_pA = alloca i32, align 4 + %_pB = alloca i32, align 4 + %t1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %t3 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !5) + %.guard1 = call i32* @llvm.noalias.copy.guard.p0i32.p0i8(i32* %_pA, i8* %t1, metadata !14, metadata !2) + store i32 42, i32* %.guard1, align 4, !tbaa !12, !noalias !11 + %.guard2 = call i32* @llvm.noalias.copy.guard.p0i32.p0i8(i32* %_pB, i8* %t3, metadata !14, metadata !5) + store i32 43, i32* %.guard2, align 4, !tbaa !12, !noalias !11 + ret void +} +; CHECK-LABEL: Function: test21: +; CHECK: NoAlias: i32* %.guard1, i32* %.guard2 + +; Function Attrs: nounwind +define void @test22() #0 { +entry: + %_pA = alloca i32, align 4 + %_pB = alloca i32, align 4 + %t1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %t3 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !5) + %.guard1 = call i32* @llvm.noalias.copy.guard.p0i32.p0i8(i32* %_pA, i8* %t1, metadata !14, metadata !2) + store i32 42, i32* %.guard1, align 4, !tbaa !12, !noalias !11 + %.guard2 = call i32* @llvm.noalias.copy.guard.p0i32.p0i8(i32* %_pA, i8* %t3, metadata !14, metadata !5) + store i32 43, i32* %.guard2, align 4, !tbaa !12, !noalias !11 + ret void +} +; CHECK-LABEL: Function: test22: +; CHECK: MustAlias: i32* %.guard1, i32* %.guard2 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) #1 + +; Function Attrs: argmemonly nounwind speculatable +declare i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32*, i8*, i32**, i32, metadata) #2 + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32*, i8*, i32**, i32**, i32, metadata) #3 + +; Function Attrs: nounwind readnone +declare i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32*, i32*) #4 + +; Function Attrs: nounwind readnone +declare i32* @llvm.noalias.copy.guard.p0i32.p0i8(i32*, i8*, metadata, metadata) #4 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { argmemonly nounwind speculatable } +attributes #3 = { nounwind readnone speculatable } +attributes #4 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3} +!3 = distinct !{!3, !4, !"test01: _pA"} +!4 = distinct !{!4, !"test01"} +!5 = !{!6} +!6 = distinct !{!6, !4, !"test01: _pB"} +!7 = !{!8, !8, i64 0, i64 4} +!8 = !{!9, i64 4, !"any pointer"} +!9 = !{!10, i64 1, !"omnipotent char"} +!10 = !{!"Simple C/C++ TBAA"} +!11 = !{!3, !6} +!12 = !{!13, !13, i64 0, i64 4} +!13 = !{!9, i64 4, !"int"} +!14 = !{!15} +!15 = !{i64 -1, i64 0} Index: llvm/test/Analysis/ScopedNoAliasAA/basic-domains.ll =================================================================== --- llvm/test/Analysis/ScopedNoAliasAA/basic-domains.ll +++ llvm/test/Analysis/ScopedNoAliasAA/basic-domains.ll @@ -52,4 +52,3 @@ ; CHECK: NoAlias: store float %1, float* %arrayidx.i2, align 4, !noalias !6 <-> store float %0, float* %arrayidx.i, align 4, !noalias !6 ; CHECK: NoAlias: store float %2, float* %arrayidx.i3, align 4, !noalias !7 <-> store float %0, float* %arrayidx.i, align 4, !noalias !6 ; CHECK: NoAlias: store float %2, float* %arrayidx.i3, align 4, !noalias !7 <-> store float %1, float* %arrayidx.i2, align 4, !noalias !6 - Index: llvm/test/Analysis/ScopedNoAliasAA/basic2.ll =================================================================== --- llvm/test/Analysis/ScopedNoAliasAA/basic2.ll +++ llvm/test/Analysis/ScopedNoAliasAA/basic2.ll @@ -38,4 +38,3 @@ !3 = !{!3, !2, !"some other scope"} !4 = !{!1} !5 = !{!3} - Index: llvm/test/Analysis/ScopedNoAliasAA/noalias-calls.ll =================================================================== --- /dev/null +++ llvm/test/Analysis/ScopedNoAliasAA/noalias-calls.ll @@ -0,0 +1,85 @@ +; RUN: opt < %s -basicaa -scoped-noalias -aa-eval -evaluate-aa-metadata -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #1 + +; Function Attrs: nounwind +declare void @hey() #1 + +; Function Attrs: nounwind uwtable +define void @foo(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #2 { +entry: + %l.i = alloca i8, i32 512, align 1 + %side.a = call i8* @llvm.side.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i32(i8* %a, i8* null, i8** null, i8** null, i32 0, metadata !0) #0 + %side.b = call i8* @llvm.side.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i32(i8* %c, i8* null, i8** null, i8** null, i32 0, metadata !3) #0 + %a.guard = call i8* @llvm.noalias.arg.guard.p0i8.p0i8(i8* %a, i8* %side.a) + %b.guard = call i8* @llvm.noalias.arg.guard.p0i8.p0i8(i8* %b, i8* %side.b) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a.guard, i8* %b, i64 16, i1 false) #1, !noalias !5 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %b.guard, i64 16, i1 false) #1, !noalias !5 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a.guard, i8* %b.guard, i64 16, i1 false) #1, !noalias !5 + call void @hey() #1, !noalias !5 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %l.i, i8* %b.guard, i64 16, i1 false) #1, !noalias !5 + ret void +} + +; CHECK-LABEL: Function: foo: +;@ JDO_FIXME_NOW: not sure where to look for this, but there is a discripancy between the wanted vs observed +; WANTED: Just Ref: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %b, i64 16, i1 false) #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %1, i64 16, i1 false) #1, !noalias !5 +; WANTED: Just Mod: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %b, i64 16, i1 false) #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i1 false) #1, !noalias !5 +; WANTED: Just Ref: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %b, i64 16, i1 false) #1, !noalias !5 <-> call void @hey() #1, !noalias !5 +; WANTED: NoModRef: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %b, i64 16, i1 false) #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %l.i, i8* %1, i64 16, i1 false) #1, !noalias !5 +; WANTED: Just Mod: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %1, i64 16, i1 false) #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %b, i64 16, i1 false) #1, !noalias !5 +; WANTED: NoModRef: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %1, i64 16, i1 false) #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i1 false) #1, !noalias !5 +; WANTED: Just Mod: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %1, i64 16, i1 false) #1, !noalias !5 <-> call void @hey() #1, !noalias !5 +; WANTED: NoModRef: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %1, i64 16, i1 false) #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %l.i, i8* %1, i64 16, i1 false) #1, !noalias !5 +; WANTED: Just Mod: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i1 false) #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %b, i64 16, i1 false) #1, !noalias !5 +; WANTED: NoModRef: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i1 false) #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %1, i64 16, i1 false) #1, !noalias !5 +; WANTED: NoModRef: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i1 false) #1, !noalias !5 <-> call void @hey() #1, !noalias !5 +; WANTED: NoModRef: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i1 false) #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %l.i, i8* %1, i64 16, i1 false) #1, !noalias !5 +; WANTED: Just Mod: call void @hey() #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %b, i64 16, i1 false) #1, !noalias !5 +; WANTED: Both ModRef: call void @hey() #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %1, i64 16, i1 false) #1, !noalias !5 +; WANTED: NoModRef: call void @hey() #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i1 false) #1, !noalias !5 +; WANTED: NoModRef: call void @hey() #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %l.i, i8* %1, i64 16, i1 false) #1, !noalias !5 +; WANTED: NoModRef: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %l.i, i8* %1, i64 16, i1 false) #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %b, i64 16, i1 false) #1, !noalias !5 +; WANTED: NoModRef: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %l.i, i8* %1, i64 16, i1 false) #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %1, i64 16, i1 false) #1, !noalias !5 +; WANTED: NoModRef: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %l.i, i8* %1, i64 16, i1 false) #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i1 false) #1, !noalias !5 +; WANTED: NoModRef: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %l.i, i8* %1, i64 16, i1 false) #1, !noalias !5 <-> call void @hey() #1, !noalias !5 + +; NOTE: OBSERVED: +; CHECK: Just Ref: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a.guard, i8* %b, i64 16, i1 false) #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %b.guard, i64 16, i1 false) #1, !noalias !5 +; CHECK: Just Mod: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a.guard, i8* %b, i64 16, i1 false) #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a.guard, i8* %b.guard, i64 16, i1 false) #1, !noalias !5 +; CHECK: Both ModRef: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a.guard, i8* %b, i64 16, i1 false) #1, !noalias !5 <-> call void @hey() #1, !noalias !5 +; CHECK: NoModRef: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a.guard, i8* %b, i64 16, i1 false) #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %l.i, i8* %b.guard, i64 16, i1 false) #1, !noalias !5 +; CHECK: Both ModRef: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %b.guard, i64 16, i1 false) #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a.guard, i8* %b, i64 16, i1 false) #1, !noalias !5 +; CHECK: Both ModRef: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %b.guard, i64 16, i1 false) #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a.guard, i8* %b.guard, i64 16, i1 false) #1, !noalias !5 +; CHECK: Both ModRef: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %b.guard, i64 16, i1 false) #1, !noalias !5 <-> call void @hey() #1, !noalias !5 +; CHECK: NoModRef: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %b.guard, i64 16, i1 false) #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %l.i, i8* %b.guard, i64 16, i1 false) #1, !noalias !5 +; CHECK: Just Mod: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a.guard, i8* %b.guard, i64 16, i1 false) #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a.guard, i8* %b, i64 16, i1 false) #1, !noalias !5 +; CHECK: Just Ref: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a.guard, i8* %b.guard, i64 16, i1 false) #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %b.guard, i64 16, i1 false) #1, !noalias !5 +; CHECK: Both ModRef: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a.guard, i8* %b.guard, i64 16, i1 false) #1, !noalias !5 <-> call void @hey() #1, !noalias !5 +; CHECK: NoModRef: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a.guard, i8* %b.guard, i64 16, i1 false) #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %l.i, i8* %b.guard, i64 16, i1 false) #1, !noalias !5 +; CHECK: Both ModRef: call void @hey() #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a.guard, i8* %b, i64 16, i1 false) #1, !noalias !5 +; CHECK: Both ModRef: call void @hey() #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %b.guard, i64 16, i1 false) #1, !noalias !5 +; CHECK: Both ModRef: call void @hey() #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a.guard, i8* %b.guard, i64 16, i1 false) #1, !noalias !5 +; CHECK: Just Mod: call void @hey() #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %l.i, i8* %b.guard, i64 16, i1 false) #1, !noalias !5 +; CHECK: Just Ref: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %l.i, i8* %b.guard, i64 16, i1 false) #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a.guard, i8* %b, i64 16, i1 false) #1, !noalias !5 +; CHECK: Just Ref: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %l.i, i8* %b.guard, i64 16, i1 false) #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %b.guard, i64 16, i1 false) #1, !noalias !5 +; CHECK: Just Ref: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %l.i, i8* %b.guard, i64 16, i1 false) #1, !noalias !5 <-> call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a.guard, i8* %b.guard, i64 16, i1 false) #1, !noalias !5 +; CHECK: Just Ref: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %l.i, i8* %b.guard, i64 16, i1 false) #1, !noalias !5 <-> call void @hey() #1, !noalias !5 + +declare i8* @llvm.side.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i32(i8*, i8*, i8**, i8**, i32, metadata) nounwind readnone speculatable +declare i8* @llvm.noalias.arg.guard.p0i8.p0i8(i8*, i8*) nounwind readnone + +attributes #0 = { argmemonly nounwind } +attributes #1 = { nounwind } +attributes #2 = { nounwind uwtable } + + +!0 = !{!1} +!1 = distinct !{!1, !2, !"hello: %a"} +!2 = distinct !{!2, !"hello"} +!3 = !{!4} +!4 = distinct !{!4, !2, !"hello: %c"} +!5 = !{!1, !4} Index: llvm/test/Analysis/ScopedNoAliasAA/noalias-dup-scope.ll =================================================================== --- /dev/null +++ llvm/test/Analysis/ScopedNoAliasAA/noalias-dup-scope.ll @@ -0,0 +1,161 @@ +; RUN: opt < %s -basicaa -domtree -scoped-noalias -aa-eval -loops -evaluate-aa-metadata -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s -check-prefix=WITHDT +; Note: The -loops above can be anything that requires the domtree, and is +; necessary to work around a pass-manager bug. + +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +@a = common global i32* null, align 8 +@r = common global i32 0, align 4 +@a2 = common global i32* null, align 8 + +; Function Attrs: nounwind +define i32* @foo() #0 { +entry: + %0 = load i32*, i32** @a, align 8, !tbaa !1, !noalias !5 + %1 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %0, i8* null, i32** @a, i32** null, i32 0, metadata !5) #0 + %2 = tail call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %0, i32* %1) #0 + ret i32* %1 +} + +; Function Attrs: nounwind +declare i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32*, i8*, i32**, i32**, i32, metadata) #0 + +; Function Attrs: nounwind +declare i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32*, i32*) #0 + +; Function Attrs: nounwind +define i32* @foo1(i32 signext %b) #0 { +entry: + %tobool = icmp eq i32 %b, 0 + br i1 %tobool, label %if.else, label %if.then + +if.then: ; preds = %entry + %0 = load i32*, i32** @a, noalias_sidechannel i32** null, align 8, !tbaa !1, !noalias !8 + %1 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %0, i8* null, i32** @a, i32** null, i32 0, metadata !12) #0 + %2 = load i32, i32* %0, noalias_sidechannel i32* %1, align 4, !tbaa !13, !noalias !8 + %3 = load i32, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !13, !noalias !8 + %add = add nsw i32 %3, %2 + store i32 %add, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !13, !noalias !8 + %guard.1 = tail call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %0, i32* %1) #0 + tail call void @ex1(i32* %guard.1) #0, !noalias !8 + %incdec.ptr = getelementptr inbounds i32, i32* %0, i64 1 + %4 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %0, i8* null, i32** @a, i32** null, i32 0, metadata !12) #0 + %5 = load i32, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !13, !noalias !8 + store i32 %5, i32* %incdec.ptr, noalias_sidechannel i32* %4, align 4, !tbaa !13, !noalias !8 + %guard.5 = tail call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %incdec.ptr, i32* %4) #0 + tail call void @ex1(i32* %guard.5) #0, !noalias !8 + %idx.ext = sext i32 %b to i64 + %add.ptr = getelementptr inbounds i32, i32* %incdec.ptr, i64 %idx.ext + %6 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %0, i8* null, i32** @a, i32** null, i32 0, metadata !12) #0 + %7 = load i32, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !13, !noalias !8 + store i32 %7, i32* %add.ptr, noalias_sidechannel i32* %6, align 4, !tbaa !13, !noalias !8 + %guard.6 = tail call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %add.ptr, i32* %6) #0 + tail call void @ex1(i32* %guard.6) #0, !noalias !8 + %8 = load i32*, i32** @a2, noalias_sidechannel i32** null, align 8, !tbaa !1, !noalias !8 + %9 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %8, i8* null, i32** @a2, i32** null, i32 0, metadata !15) #0 + %10 = load i32, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !13, !noalias !8 + store i32 %10, i32* %8, noalias_sidechannel i32* %9, align 4, !tbaa !13, !noalias !8 + %guard.9 = tail call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %8, i32* %9) #0 + tail call void @ex1(i32* %guard.9) #0, !noalias !8 + br label %if.end + +if.else: ; preds = %entry + %11 = load i32*, i32** @a2, noalias_sidechannel i32** null, align 8, !tbaa !1, !noalias !12 + %12 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %11, i8* null, i32** @a2, i32** null, i32 0, metadata !12) #0 + %13 = load i32, i32* %11, noalias_sidechannel i32* %12, align 4, !tbaa !13, !noalias !12 + %14 = load i32, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !13, !noalias !12 + %add1 = add nsw i32 %14, %13 + store i32 %add1, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !13, !noalias !12 + br label %if.end + +if.end: ; preds = %if.else, %if.then + %x.0 = phi i32* [ %0, %if.then ], [ %11, %if.else ] + %side.x.0 = phi i32* [ %6, %if.then ], [ %12, %if.else ] + %x.0.guard = tail call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %x.0, i32* %side.x.0) #0 + ret i32* %x.0.guard +} + +; WITHDT: NoAlias: %0 = load i32*, i32** @a, noalias_sidechannel i32** null, align 8, !tbaa !1, !noalias !5 <-> store i32 %add, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %0 = load i32*, i32** @a, noalias_sidechannel i32** null, align 8, !tbaa !1, !noalias !5 <-> store i32 %5, i32* %incdec.ptr, noalias_sidechannel i32* %4, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %0 = load i32*, i32** @a, noalias_sidechannel i32** null, align 8, !tbaa !1, !noalias !5 <-> store i32 %7, i32* %add.ptr, noalias_sidechannel i32* %6, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %0 = load i32*, i32** @a, noalias_sidechannel i32** null, align 8, !tbaa !1, !noalias !5 <-> store i32 %10, i32* %8, noalias_sidechannel i32* %9, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %0 = load i32*, i32** @a, noalias_sidechannel i32** null, align 8, !tbaa !1, !noalias !5 <-> store i32 %add1, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !9 +; WITHDT: NoAlias: %2 = load i32, i32* %0, noalias_sidechannel i32* %1, align 4, !tbaa !10, !noalias !5 <-> store i32 %add, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %2 = load i32, i32* %0, noalias_sidechannel i32* %1, align 4, !tbaa !10, !noalias !5 <-> store i32 %5, i32* %incdec.ptr, noalias_sidechannel i32* %4, align 4, !tbaa !10, !noalias !5 +; WITHDT: MayAlias: %2 = load i32, i32* %0, noalias_sidechannel i32* %1, align 4, !tbaa !10, !noalias !5 <-> store i32 %7, i32* %add.ptr, noalias_sidechannel i32* %6, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %2 = load i32, i32* %0, noalias_sidechannel i32* %1, align 4, !tbaa !10, !noalias !5 <-> store i32 %10, i32* %8, noalias_sidechannel i32* %9, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %2 = load i32, i32* %0, noalias_sidechannel i32* %1, align 4, !tbaa !10, !noalias !5 <-> store i32 %add1, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !9 +; WITHDT: MustAlias: %3 = load i32, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !5 <-> store i32 %add, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %3 = load i32, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !5 <-> store i32 %5, i32* %incdec.ptr, noalias_sidechannel i32* %4, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %3 = load i32, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !5 <-> store i32 %7, i32* %add.ptr, noalias_sidechannel i32* %6, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %3 = load i32, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !5 <-> store i32 %10, i32* %8, noalias_sidechannel i32* %9, align 4, !tbaa !10, !noalias !5 +; WITHDT: MustAlias: %3 = load i32, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !5 <-> store i32 %add1, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !9 +; WITHDT: MustAlias: %5 = load i32, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !5 <-> store i32 %add, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %5 = load i32, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !5 <-> store i32 %5, i32* %incdec.ptr, noalias_sidechannel i32* %4, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %5 = load i32, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !5 <-> store i32 %7, i32* %add.ptr, noalias_sidechannel i32* %6, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %5 = load i32, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !5 <-> store i32 %10, i32* %8, noalias_sidechannel i32* %9, align 4, !tbaa !10, !noalias !5 +; WITHDT: MustAlias: %5 = load i32, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !5 <-> store i32 %add1, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !9 +; WITHDT: MustAlias: %7 = load i32, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !5 <-> store i32 %add, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %7 = load i32, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !5 <-> store i32 %5, i32* %incdec.ptr, noalias_sidechannel i32* %4, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %7 = load i32, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !5 <-> store i32 %7, i32* %add.ptr, noalias_sidechannel i32* %6, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %7 = load i32, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !5 <-> store i32 %10, i32* %8, noalias_sidechannel i32* %9, align 4, !tbaa !10, !noalias !5 +; WITHDT: MustAlias: %7 = load i32, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !5 <-> store i32 %add1, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !9 +; WITHDT: NoAlias: %8 = load i32*, i32** @a2, noalias_sidechannel i32** null, align 8, !tbaa !1, !noalias !5 <-> store i32 %add, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %8 = load i32*, i32** @a2, noalias_sidechannel i32** null, align 8, !tbaa !1, !noalias !5 <-> store i32 %5, i32* %incdec.ptr, noalias_sidechannel i32* %4, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %8 = load i32*, i32** @a2, noalias_sidechannel i32** null, align 8, !tbaa !1, !noalias !5 <-> store i32 %7, i32* %add.ptr, noalias_sidechannel i32* %6, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %8 = load i32*, i32** @a2, noalias_sidechannel i32** null, align 8, !tbaa !1, !noalias !5 <-> store i32 %10, i32* %8, noalias_sidechannel i32* %9, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %8 = load i32*, i32** @a2, noalias_sidechannel i32** null, align 8, !tbaa !1, !noalias !5 <-> store i32 %add1, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !9 +; WITHDT: MustAlias: %10 = load i32, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !5 <-> store i32 %add, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %10 = load i32, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !5 <-> store i32 %5, i32* %incdec.ptr, noalias_sidechannel i32* %4, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %10 = load i32, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !5 <-> store i32 %7, i32* %add.ptr, noalias_sidechannel i32* %6, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %10 = load i32, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !5 <-> store i32 %10, i32* %8, noalias_sidechannel i32* %9, align 4, !tbaa !10, !noalias !5 +; WITHDT: MustAlias: %10 = load i32, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !5 <-> store i32 %add1, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !9 +; WITHDT: NoAlias: %11 = load i32*, i32** @a2, noalias_sidechannel i32** null, align 8, !tbaa !1, !noalias !9 <-> store i32 %add, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %11 = load i32*, i32** @a2, noalias_sidechannel i32** null, align 8, !tbaa !1, !noalias !9 <-> store i32 %5, i32* %incdec.ptr, noalias_sidechannel i32* %4, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %11 = load i32*, i32** @a2, noalias_sidechannel i32** null, align 8, !tbaa !1, !noalias !9 <-> store i32 %7, i32* %add.ptr, noalias_sidechannel i32* %6, align 4, !tbaa !10, !noalias !5 +; WITHDT: MayAlias: %11 = load i32*, i32** @a2, noalias_sidechannel i32** null, align 8, !tbaa !1, !noalias !9 <-> store i32 %10, i32* %8, noalias_sidechannel i32* %9, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %11 = load i32*, i32** @a2, noalias_sidechannel i32** null, align 8, !tbaa !1, !noalias !9 <-> store i32 %add1, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !9 +; WITHDT: NoAlias: %13 = load i32, i32* %11, noalias_sidechannel i32* %12, align 4, !tbaa !10, !noalias !9 <-> store i32 %add, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %13 = load i32, i32* %11, noalias_sidechannel i32* %12, align 4, !tbaa !10, !noalias !9 <-> store i32 %5, i32* %incdec.ptr, noalias_sidechannel i32* %4, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %13 = load i32, i32* %11, noalias_sidechannel i32* %12, align 4, !tbaa !10, !noalias !9 <-> store i32 %7, i32* %add.ptr, noalias_sidechannel i32* %6, align 4, !tbaa !10, !noalias !5 +; WITHDT: MayAlias: %13 = load i32, i32* %11, noalias_sidechannel i32* %12, align 4, !tbaa !10, !noalias !9 <-> store i32 %10, i32* %8, noalias_sidechannel i32* %9, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %13 = load i32, i32* %11, noalias_sidechannel i32* %12, align 4, !tbaa !10, !noalias !9 <-> store i32 %add1, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !9 +; WITHDT: MustAlias: %14 = load i32, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !9 <-> store i32 %add, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %14 = load i32, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !9 <-> store i32 %5, i32* %incdec.ptr, noalias_sidechannel i32* %4, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: %14 = load i32, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !9 <-> store i32 %7, i32* %add.ptr, noalias_sidechannel i32* %6, align 4, !tbaa !10, !noalias !5 +; WITHDT: MayAlias: %14 = load i32, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !9 <-> store i32 %10, i32* %8, noalias_sidechannel i32* %9, align 4, !tbaa !10, !noalias !5 +; WITHDT: MustAlias: %14 = load i32, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !9 <-> store i32 %add1, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !9 +; WITHDT: NoAlias: store i32 %5, i32* %incdec.ptr, noalias_sidechannel i32* %4, align 4, !tbaa !10, !noalias !5 <-> store i32 %add, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: store i32 %7, i32* %add.ptr, noalias_sidechannel i32* %6, align 4, !tbaa !10, !noalias !5 <-> store i32 %add, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !5 +; WITHDT: MayAlias: store i32 %7, i32* %add.ptr, noalias_sidechannel i32* %6, align 4, !tbaa !10, !noalias !5 <-> store i32 %5, i32* %incdec.ptr, noalias_sidechannel i32* %4, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: store i32 %10, i32* %8, noalias_sidechannel i32* %9, align 4, !tbaa !10, !noalias !5 <-> store i32 %add, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: store i32 %10, i32* %8, noalias_sidechannel i32* %9, align 4, !tbaa !10, !noalias !5 <-> store i32 %5, i32* %incdec.ptr, noalias_sidechannel i32* %4, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: store i32 %10, i32* %8, noalias_sidechannel i32* %9, align 4, !tbaa !10, !noalias !5 <-> store i32 %7, i32* %add.ptr, noalias_sidechannel i32* %6, align 4, !tbaa !10, !noalias !5 +; WITHDT: MustAlias: store i32 %add1, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !9 <-> store i32 %add, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: store i32 %add1, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !9 <-> store i32 %5, i32* %incdec.ptr, noalias_sidechannel i32* %4, align 4, !tbaa !10, !noalias !5 +; WITHDT: NoAlias: store i32 %add1, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !9 <-> store i32 %7, i32* %add.ptr, noalias_sidechannel i32* %6, align 4, !tbaa !10, !noalias !5 +; WITHDT: MayAlias: store i32 %add1, i32* @r, noalias_sidechannel i32* null, align 4, !tbaa !10, !noalias !9 <-> store i32 %10, i32* %8, noalias_sidechannel i32* %9, align 4, !tbaa !10, !noalias !5 + +declare void @ex1(i32*) + +attributes #0 = { nounwind } + +!llvm.ident = !{!0} + +!0 = !{!"clang"} +!1 = !{!2, !2, i64 0} +!2 = !{!"any pointer", !3, i64 0} +!3 = !{!"omnipotent char", !4, i64 0} +!4 = !{!"Simple C/C++ TBAA"} +!5 = !{!6} +!6 = distinct !{!6, !7, !"foo: x"} +!7 = distinct !{!7, !"foo"} +!8 = !{!9, !11} +!9 = distinct !{!9, !10, !"foo1: x2"} +!10 = distinct !{!10, !"foo1"} +!11 = distinct !{!11, !10, !"foo1: x"} +!12 = !{!11} +!13 = !{!14, !14, i64 0} +!14 = !{!"int", !3, i64 0} +!15 = !{!9} Index: llvm/test/Analysis/ScopedNoAliasAA/noalias.ll =================================================================== --- /dev/null +++ llvm/test/Analysis/ScopedNoAliasAA/noalias.ll @@ -0,0 +1,66 @@ +; RUN: opt < %s -basicaa -scoped-noalias -aa-eval -evaluate-aa-metadata -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define void @foo(float* nocapture %a, float* nocapture readonly %c, i64 %i0, i64 %i1) #0 { +entry: + %side.a = call float* @llvm.side.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float* %a, i8* null, float** null, float** null, i32 0, metadata !0) #1 + %0 = load float, float* %c, noalias_sidechannel float* null, align 4, !noalias !0 + %arrayidx.i = getelementptr inbounds float, float* %a, i64 %i0 + store float %0, float* %arrayidx.i, noalias_sidechannel float* %side.a, align 4, !noalias !0 + %1 = load float, float* %c, align 4 + %arrayidx = getelementptr inbounds float, float* %a, i64 %i1 + store float %1, float* %arrayidx, noalias_sidechannel float* null, align 4, !noalias !0 + ret void +} + +; CHECK-LABEL: Function: foo: +; CHECK: NoAlias: %0 = load float, float* %c, noalias_sidechannel float* null, align 4, !noalias !0 <-> store float %0, float* %arrayidx.i, noalias_sidechannel float* %side.a, align 4, !noalias !0 +; CHECK: MayAlias: %0 = load float, float* %c, noalias_sidechannel float* null, align 4, !noalias !0 <-> store float %1, float* %arrayidx, noalias_sidechannel float* null, align 4, !noalias !0 +; CHECK: MayAlias: %1 = load float, float* %c, align 4 <-> store float %0, float* %arrayidx.i, noalias_sidechannel float* %side.a, align 4, !noalias !0 +; CHECK: MayAlias: %1 = load float, float* %c, align 4 <-> store float %1, float* %arrayidx, noalias_sidechannel float* null, align 4, !noalias !0 +; CHECK: NoAlias: store float %1, float* %arrayidx, noalias_sidechannel float* null, align 4, !noalias !0 <-> store float %0, float* %arrayidx.i, noalias_sidechannel float* %side.a, align 4, !noalias !0 + + +; Function Attrs: nounwind uwtable +define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c, i64 %i0, i64 %i1) #0 { +entry: + %0 = call float* @llvm.side.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float* %a, i8* null, float** null, float** null, i32 0, metadata !3) #1 + %1 = call float* @llvm.side.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float* %b, i8* null, float** null, float** null, i32 0, metadata !6) #1 + %2 = load float, float* %c, noalias_sidechannel float* null, align 4, !noalias !8 + %arrayidx.i = getelementptr inbounds float, float* %a, i64 5 + store float %2, float* %arrayidx.i, noalias_sidechannel float* %0, align 4, !noalias !8 + %arrayidx1.i = getelementptr inbounds float, float* %b, i64 %i0 + store float %2, float* %arrayidx1.i, noalias_sidechannel float* %1, align 4, !noalias !8 + %3 = load float, float* %c, align 4 + %arrayidx = getelementptr inbounds float, float* %a, i64 %i1 + store float %3, float* %arrayidx, noalias_sidechannel float* null, align 4, !noalias !8 + ret void +} + +; CHECK-LABEL: Function: foo2: +; CHECK: NoAlias: %2 = load float, float* %c, noalias_sidechannel float* null, align 4, !noalias !5 <-> store float %2, float* %arrayidx.i, noalias_sidechannel float* %0, align 4, !noalias !5 +; CHECK: NoAlias: %2 = load float, float* %c, noalias_sidechannel float* null, align 4, !noalias !5 <-> store float %2, float* %arrayidx1.i, noalias_sidechannel float* %1, align 4, !noalias !5 +; CHECK: MayAlias: %2 = load float, float* %c, noalias_sidechannel float* null, align 4, !noalias !5 <-> store float %3, float* %arrayidx, noalias_sidechannel float* null, align 4, !noalias !5 +; CHECK: MayAlias: %3 = load float, float* %c, align 4 <-> store float %2, float* %arrayidx.i, noalias_sidechannel float* %0, align 4, !noalias !5 +; CHECK: MayAlias: %3 = load float, float* %c, align 4 <-> store float %2, float* %arrayidx1.i, noalias_sidechannel float* %1, align 4, !noalias !5 +; CHECK: MayAlias: %3 = load float, float* %c, align 4 <-> store float %3, float* %arrayidx, noalias_sidechannel float* null, align 4, !noalias !5 +; CHECK: NoAlias: store float %2, float* %arrayidx1.i, noalias_sidechannel float* %1, align 4, !noalias !5 <-> store float %2, float* %arrayidx.i, noalias_sidechannel float* %0, align 4, !noalias !5 +; CHECK: NoAlias: store float %3, float* %arrayidx, noalias_sidechannel float* null, align 4, !noalias !5 <-> store float %2, float* %arrayidx.i, noalias_sidechannel float* %0, align 4, !noalias !5 +; CHECK: NoAlias: store float %3, float* %arrayidx, noalias_sidechannel float* null, align 4, !noalias !5 <-> store float %2, float* %arrayidx1.i, noalias_sidechannel float* %1, align 4, !noalias !5 + +declare float* @llvm.side.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float*, i8*, float**, float**, i32, metadata ) nounwind + +attributes #0 = { nounwind uwtable } +attributes #1 = { nounwind } + +!0 = !{!1} +!1 = distinct !{!1, !2, !"hello: %a"} +!2 = distinct !{!2, !"hello"} +!3 = !{!4} +!4 = distinct !{!4, !5, !"hello2: %a"} +!5 = distinct !{!5, !"hello2"} +!6 = !{!7} +!7 = distinct !{!7, !5, !"hello2: %b"} +!8 = !{!4, !7} Index: llvm/test/Analysis/ScopedNoAliasAA/noalias2.ll =================================================================== --- /dev/null +++ llvm/test/Analysis/ScopedNoAliasAA/noalias2.ll @@ -0,0 +1,113 @@ +; RUN: opt < %s -basicaa -scoped-noalias -aa-eval -evaluate-aa-metadata -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define void @foo(float* noalias nocapture %a, float* noalias nocapture readonly %c) #0 { +entry: + %0 = call float* @llvm.side.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float* %a, i8* null, float** null, float** null, i32 0, metadata !0) #1 + %1 = call float* @llvm.side.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float* %c, i8* null, float** null, float** null, i32 0, metadata !3) #1 + %2 = load float, float* %c, noalias_sidechannel float* %1, align 4, !noalias !5 + %arrayidx.i = getelementptr inbounds float, float* %a, i64 5 + store float %2, float* %arrayidx.i, noalias_sidechannel float* %0, align 4, !noalias !5 + %3 = load float, float* %c, noalias_sidechannel float* null, align 4, !noalias !5 + %arrayidx = getelementptr inbounds float, float* %a, i64 7 + store float %3, float* %arrayidx, noalias_sidechannel float* null, align 4, !noalias !5 + ret void +} + +; CHECK-LABEL: Function: foo: +; CHECK: NoAlias: %2 = load float, float* %c, noalias_sidechannel float* %1, align 4, !noalias !5 <-> store float %2, float* %arrayidx.i, noalias_sidechannel float* %0, align 4, !noalias !5 +; CHECK: NoAlias: %2 = load float, float* %c, noalias_sidechannel float* %1, align 4, !noalias !5 <-> store float %3, float* %arrayidx, noalias_sidechannel float* null, align 4, !noalias !5 +; CHECK: NoAlias: %3 = load float, float* %c, noalias_sidechannel float* null, align 4, !noalias !5 <-> store float %2, float* %arrayidx.i, noalias_sidechannel float* %0, align 4, !noalias !5 +; CHECK: NoAlias: %3 = load float, float* %c, noalias_sidechannel float* null, align 4, !noalias !5 <-> store float %3, float* %arrayidx, noalias_sidechannel float* null, align 4, !noalias !5 +; CHECK: NoAlias: store float %3, float* %arrayidx, noalias_sidechannel float* null, align 4, !noalias !5 <-> store float %2, float* %arrayidx.i, noalias_sidechannel float* %0, align 4, !noalias !5 + +; Function Attrs: nounwind uwtable +define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 { +entry: + %0 = call float* @llvm.side.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float* %a, i8* null, float** null, float** null, i32 0, metadata !6) #1 + %1 = call float* @llvm.side.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float* %c, i8* null, float** null, float** null, i32 0, metadata !9) #1 + %2 = call float* @llvm.side.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float* %0, i8* null, float** null, float** null, i32 0, metadata !11) #1, !noalias !14 + %3 = call float* @llvm.side.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float* %1, i8* null, float** null, float** null, i32 0, metadata !15) #1, !noalias !14 + %4 = load float, float* %c, noalias_sidechannel float* %3, align 4, !noalias !17 + %arrayidx.i.i = getelementptr inbounds float, float* %a, i64 5 + store float %4, float* %arrayidx.i.i, noalias_sidechannel float* %2, align 4, !noalias !17 + %5 = load float, float* %c, noalias_sidechannel float* %1, align 4, !noalias !14 + %arrayidx.i = getelementptr inbounds float, float* %a, i64 7 + store float %5, float* %arrayidx.i, noalias_sidechannel float* %0, align 4, !noalias !14 + %6 = call float* @llvm.side.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float* %a, i8* null, float** null, float** null, i32 0, metadata !18) #1 + %7 = call float* @llvm.side.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float* %b, i8* null, float** null, float** null, i32 0, metadata !21) #1 + %8 = load float, float* %c, noalias_sidechannel float* null, align 4, !noalias !23 + %arrayidx.i1 = getelementptr inbounds float, float* %a, i64 6 + store float %8, float* %arrayidx.i1, noalias_sidechannel float* %6, align 4, !noalias !23 + %arrayidx1.i = getelementptr inbounds float, float* %b, i64 8 + store float %8, float* %arrayidx1.i, noalias_sidechannel float* %7, align 4, !noalias !23 + ; %9 = load float, float* %c, noalias_sidechannel float* null, align 4, !noalias !23 + %9 = load float, float* %c, align 4 + %arrayidx = getelementptr inbounds float, float* %a, i64 7 + store float %9, float* %arrayidx, noalias_sidechannel float* null, align 4, !noalias !23 + ret void +} + +; CHECK-LABEL: Function: foo2: +; CHECK: NoAlias: %4 = load float, float* %c, noalias_sidechannel float* %3, align 4, !noalias !11 <-> store float %4, float* %arrayidx.i.i, noalias_sidechannel float* %2, align 4, !noalias !11 +; CHECK: NoAlias: %4 = load float, float* %c, noalias_sidechannel float* %3, align 4, !noalias !11 <-> store float %5, float* %arrayidx.i, noalias_sidechannel float* %0, align 4, !noalias !8 +; CHECK: MayAlias: %4 = load float, float* %c, noalias_sidechannel float* %3, align 4, !noalias !11 <-> store float %8, float* %arrayidx.i1, noalias_sidechannel float* %6, align 4, !noalias !17 +; CHECK: MayAlias: %4 = load float, float* %c, noalias_sidechannel float* %3, align 4, !noalias !11 <-> store float %8, float* %arrayidx1.i, noalias_sidechannel float* %7, align 4, !noalias !17 +; CHECK: MayAlias: %4 = load float, float* %c, noalias_sidechannel float* %3, align 4, !noalias !11 <-> store float %9, float* %arrayidx, noalias_sidechannel float* null, align 4, !noalias !17 +; CHECK: NoAlias: %5 = load float, float* %c, noalias_sidechannel float* %1, align 4, !noalias !8 <-> store float %4, float* %arrayidx.i.i, noalias_sidechannel float* %2, align 4, !noalias !11 +; CHECK: NoAlias: %5 = load float, float* %c, noalias_sidechannel float* %1, align 4, !noalias !8 <-> store float %5, float* %arrayidx.i, noalias_sidechannel float* %0, align 4, !noalias !8 +; CHECK: MayAlias: %5 = load float, float* %c, noalias_sidechannel float* %1, align 4, !noalias !8 <-> store float %8, float* %arrayidx.i1, noalias_sidechannel float* %6, align 4, !noalias !17 +; CHECK: MayAlias: %5 = load float, float* %c, noalias_sidechannel float* %1, align 4, !noalias !8 <-> store float %8, float* %arrayidx1.i, noalias_sidechannel float* %7, align 4, !noalias !17 +; CHECK: MayAlias: %5 = load float, float* %c, noalias_sidechannel float* %1, align 4, !noalias !8 <-> store float %9, float* %arrayidx, noalias_sidechannel float* null, align 4, !noalias !17 +; CHECK: MayAlias: %8 = load float, float* %c, noalias_sidechannel float* null, align 4, !noalias !17 <-> store float %4, float* %arrayidx.i.i, noalias_sidechannel float* %2, align 4, !noalias !11 +; CHECK: MayAlias: %8 = load float, float* %c, noalias_sidechannel float* null, align 4, !noalias !17 <-> store float %5, float* %arrayidx.i, noalias_sidechannel float* %0, align 4, !noalias !8 +; CHECK: NoAlias: %8 = load float, float* %c, noalias_sidechannel float* null, align 4, !noalias !17 <-> store float %8, float* %arrayidx.i1, noalias_sidechannel float* %6, align 4, !noalias !17 +; CHECK: NoAlias: %8 = load float, float* %c, noalias_sidechannel float* null, align 4, !noalias !17 <-> store float %8, float* %arrayidx1.i, noalias_sidechannel float* %7, align 4, !noalias !17 +; CHECK: MayAlias: %8 = load float, float* %c, noalias_sidechannel float* null, align 4, !noalias !17 <-> store float %9, float* %arrayidx, noalias_sidechannel float* null, align 4, !noalias !17 +; CHECK: MayAlias: %9 = load float, float* %c, align 4 <-> store float %4, float* %arrayidx.i.i, noalias_sidechannel float* %2, align 4, !noalias !11 +; CHECK: MayAlias: %9 = load float, float* %c, align 4 <-> store float %5, float* %arrayidx.i, noalias_sidechannel float* %0, align 4, !noalias !8 +; CHECK: MayAlias: %9 = load float, float* %c, align 4 <-> store float %8, float* %arrayidx.i1, noalias_sidechannel float* %6, align 4, !noalias !17 +; CHECK: MayAlias: %9 = load float, float* %c, align 4 <-> store float %8, float* %arrayidx1.i, noalias_sidechannel float* %7, align 4, !noalias !17 +; CHECK: MayAlias: %9 = load float, float* %c, align 4 <-> store float %9, float* %arrayidx, noalias_sidechannel float* null, align 4, !noalias !17 +; CHECK: NoAlias: store float %5, float* %arrayidx.i, noalias_sidechannel float* %0, align 4, !noalias !8 <-> store float %4, float* %arrayidx.i.i, noalias_sidechannel float* %2, align 4, !noalias !11 +; CHECK: NoAlias: store float %8, float* %arrayidx.i1, noalias_sidechannel float* %6, align 4, !noalias !17 <-> store float %4, float* %arrayidx.i.i, noalias_sidechannel float* %2, align 4, !noalias !11 +; CHECK: NoAlias: store float %8, float* %arrayidx.i1, noalias_sidechannel float* %6, align 4, !noalias !17 <-> store float %5, float* %arrayidx.i, noalias_sidechannel float* %0, align 4, !noalias !8 +; CHECK: MayAlias: store float %8, float* %arrayidx1.i, noalias_sidechannel float* %7, align 4, !noalias !17 <-> store float %4, float* %arrayidx.i.i, noalias_sidechannel float* %2, align 4, !noalias !11 +; CHECK: MayAlias: store float %8, float* %arrayidx1.i, noalias_sidechannel float* %7, align 4, !noalias !17 <-> store float %5, float* %arrayidx.i, noalias_sidechannel float* %0, align 4, !noalias !8 +; CHECK: NoAlias: store float %8, float* %arrayidx1.i, noalias_sidechannel float* %7, align 4, !noalias !17 <-> store float %8, float* %arrayidx.i1, noalias_sidechannel float* %6, align 4, !noalias !17 +; CHECK: NoAlias: store float %9, float* %arrayidx, noalias_sidechannel float* null, align 4, !noalias !17 <-> store float %4, float* %arrayidx.i.i, noalias_sidechannel float* %2, align 4, !noalias !11 +; CHECK: MustAlias: store float %9, float* %arrayidx, noalias_sidechannel float* null, align 4, !noalias !17 <-> store float %5, float* %arrayidx.i, noalias_sidechannel float* %0, align 4, !noalias !8 +; CHECK: NoAlias: store float %9, float* %arrayidx, noalias_sidechannel float* null, align 4, !noalias !17 <-> store float %8, float* %arrayidx.i1, noalias_sidechannel float* %6, align 4, !noalias !17 +; CHECK: NoAlias: store float %9, float* %arrayidx, noalias_sidechannel float* null, align 4, !noalias !17 <-> store float %8, float* %arrayidx1.i, noalias_sidechannel float* %7, align 4, !noalias !17 + +declare float* @llvm.side.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float*, i8*, float**, float**, i32, metadata ) nounwind + +attributes #0 = { nounwind uwtable } +attributes #1 = { nounwind } + +!0 = !{!1} +!1 = distinct !{!1, !2, !"hello: %a"} +!2 = distinct !{!2, !"hello"} +!3 = !{!4} +!4 = distinct !{!4, !2, !"hello: %c"} +!5 = !{!1, !4} +!6 = !{!7} +!7 = distinct !{!7, !8, !"foo: %a"} +!8 = distinct !{!8, !"foo"} +!9 = !{!10} +!10 = distinct !{!10, !8, !"foo: %c"} +!11 = !{!12} +!12 = distinct !{!12, !13, !"hello: %a"} +!13 = distinct !{!13, !"hello"} +!14 = !{!7, !10} +!15 = !{!16} +!16 = distinct !{!16, !13, !"hello: %c"} +!17 = !{!12, !16, !7, !10} +!18 = !{!19} +!19 = distinct !{!19, !20, !"hello2: %a"} +!20 = distinct !{!20, !"hello2"} +!21 = !{!22} +!22 = distinct !{!22, !20, !"hello2: %b"} +!23 = !{!19, !22} Index: llvm/test/Analysis/ScopedNoAliasAA/noalias_basics.ll =================================================================== --- /dev/null +++ llvm/test/Analysis/ScopedNoAliasAA/noalias_basics.ll @@ -0,0 +1,142 @@ +; RUN: opt < %s -basicaa -scoped-noalias -aa-eval -evaluate-aa-metadata -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nofree norecurse nounwind uwtable writeonly +define dso_local void @test_p_p(i32* nocapture %_pA, i32* nocapture %_pB) local_unnamed_addr #0 { +entry: + store i32 42, i32* %_pA, align 4, !tbaa !2 + store i32 99, i32* %_pB, align 4, !tbaa !2 + ret void +} +; CHECK-LABEL: Function: test_p_p: +; CHECK: MayAlias: store i32 99, i32* %_pB, align 4, !tbaa !2 <-> store i32 42, i32* %_pA, align 4, !tbaa !2 + +; Function Attrs: nounwind uwtable +define dso_local void @test_rp_p(i32* nocapture %_pA, i32* nocapture %_pB) local_unnamed_addr #1 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !6) + %1 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !6), !tbaa !9, !noalias !6 + store i32 42, i32* %_pA, noalias_sidechannel i32* %1, align 4, !tbaa !2, !noalias !6 + store i32 99, i32* %_pB, noalias_sidechannel i32* undef, align 4, !tbaa !2, !noalias !6 + ret void +} +; CHECK-LABEL: Function: test_rp_p: +; CHECK: NoAlias: store i32 99, i32* %_pB, noalias_sidechannel i32* undef, align 4, !tbaa !9, !noalias !2 <-> store i32 42, i32* %_pA, noalias_sidechannel i32* %1, align 4, !tbaa !9, !noalias !2 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) #2 + +; Function Attrs: nounwind uwtable +define dso_local void @test_rp_rp_00(i32* nocapture %_pA, i32* nocapture %_pB) local_unnamed_addr #1 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) + %1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !14) + %2 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !11), !tbaa !9, !noalias !16 + store i32 42, i32* %_pA, noalias_sidechannel i32* %2, align 4, !tbaa !2, !noalias !16 + %3 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pB, i8* %1, i32** null, i32** undef, i32 0, metadata !14), !tbaa !9, !noalias !16 + store i32 99, i32* %_pB, noalias_sidechannel i32* %3, align 4, !tbaa !2, !noalias !16 + ret void +} +; CHECK-LABEL: Function: test_rp_rp_00: +; CHECK: NoAlias: store i32 99, i32* %_pB, noalias_sidechannel i32* %3, align 4, !tbaa !12, !noalias !11 <-> store i32 42, i32* %_pA, noalias_sidechannel i32* %2, align 4, !tbaa !12, !noalias !11 + +; Now test variants: {objectP, objectID, Scope } +; NOTE: in the following tests, the Scope information is recycled from previous tests + +; Same info -> MayAlias +; Function Attrs: nounwind uwtable +define dso_local void @test_rp_rp_01(i32* nocapture %_pA, i32* nocapture %_pB) local_unnamed_addr #1 { +entry: + %a.p = alloca i32*, align 8 + %b.p = alloca i32*, align 8 + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** %a.p, i32 0, metadata !11) + %1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** %b.p, i32 0, metadata !11) + %2 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* null, i32** %a.p, i32** undef, i32 0, metadata !11), !tbaa !9, !noalias !16 + store i32 42, i32* %_pA, noalias_sidechannel i32* %2, align 4, !tbaa !2, !noalias !16 + %3 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pB, i8* null, i32** %a.p, i32** undef, i32 0, metadata !11), !tbaa !9, !noalias !16 + store i32 99, i32* %_pB, noalias_sidechannel i32* %3, align 4, !tbaa !2, !noalias !16 + ret void +} +; CHECK-LABEL: Function: test_rp_rp_01: +; CHECK: MayAlias: store i32 99, i32* %_pB, noalias_sidechannel i32* %3, align 4, !tbaa !11, !noalias !9 <-> store i32 42, i32* %_pA, noalias_sidechannel i32* %2, align 4, !tbaa !11, !noalias !9 + +; Variants with different info -> NoAlias + +; Function Attrs: nounwind uwtable +define dso_local void @test_rp_rp_02(i32* nocapture %_pA, i32* nocapture %_pB) local_unnamed_addr #1 { +entry: + %a.p = alloca i32*, align 8 + %b.p = alloca i32*, align 8 + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** %a.p, i32 0, metadata !11) + %1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** %b.p, i32 0, metadata !11) + %2 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* null, i32** %a.p, i32** undef, i32 0, metadata !11), !tbaa !9, !noalias !16 + store i32 42, i32* %_pA, noalias_sidechannel i32* %2, align 4, !tbaa !2, !noalias !16 + %3 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pB, i8* null, i32** %b.p, i32** undef, i32 0, metadata !11), !tbaa !9, !noalias !16 + store i32 99, i32* %_pB, noalias_sidechannel i32* %3, align 4, !tbaa !2, !noalias !16 + ret void +} +; CHECK-LABEL: Function: test_rp_rp_02: +; CHECK: NoAlias: store i32 99, i32* %_pB, noalias_sidechannel i32* %3, align 4, !tbaa !11, !noalias !9 <-> store i32 42, i32* %_pA, noalias_sidechannel i32* %2, align 4, !tbaa !11, !noalias !9 + +; Function Attrs: nounwind uwtable +define dso_local void @test_rp_rp_03(i32* nocapture %_pA, i32* nocapture %_pB) local_unnamed_addr #1 { +entry: + %a.p = alloca i32*, align 8 + %b.p = alloca i32*, align 8 + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** %a.p, i32 0, metadata !11) + %1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** %b.p, i32 0, metadata !11) + %2 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* null, i32** %a.p, i32** undef, i32 0, metadata !11), !tbaa !9, !noalias !16 + store i32 42, i32* %_pA, noalias_sidechannel i32* %2, align 4, !tbaa !2, !noalias !16 + %3 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pB, i8* null, i32** %a.p, i32** undef, i32 1, metadata !11), !tbaa !9, !noalias !16 + store i32 99, i32* %_pB, noalias_sidechannel i32* %3, align 4, !tbaa !2, !noalias !16 + ret void +} +; CHECK-LABEL: Function: test_rp_rp_03: +; CHECK: NoAlias: store i32 99, i32* %_pB, noalias_sidechannel i32* %3, align 4, !tbaa !11, !noalias !9 <-> store i32 42, i32* %_pA, noalias_sidechannel i32* %2, align 4, !tbaa !11, !noalias !9 + +; Function Attrs: nounwind uwtable +define dso_local void @test_rp_rp_04(i32* nocapture %_pA, i32* nocapture %_pB) local_unnamed_addr #1 { +entry: + %a.p = alloca i32*, align 8 + %b.p = alloca i32*, align 8 + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** %a.p, i32 0, metadata !11) + %1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** %b.p, i32 0, metadata !11) + %2 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* null, i32** %a.p, i32** undef, i32 0, metadata !11), !tbaa !9, !noalias !16 + store i32 42, i32* %_pA, noalias_sidechannel i32* %2, align 4, !tbaa !2, !noalias !16 + %3 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pB, i8* null, i32** %a.p, i32** undef, i32 0, metadata !14), !tbaa !9, !noalias !16 + store i32 99, i32* %_pB, noalias_sidechannel i32* %3, align 4, !tbaa !2, !noalias !16 + ret void +} +; CHECK-LABEL: Function: test_rp_rp_04: +; CHECK: NoAlias: store i32 99, i32* %_pB, noalias_sidechannel i32* %3, align 4, !tbaa !11, !noalias !9 <-> store i32 42, i32* %_pA, noalias_sidechannel i32* %2, align 4, !tbaa !11, !noalias !9 + + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32*, i8*, i32**, i32**, i32, metadata) #3 + +attributes #0 = { nofree norecurse nounwind uwtable writeonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { argmemonly nounwind } +attributes #3 = { nounwind readnone speculatable } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3, !3, i64 0} +!3 = !{!"int", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C/C++ TBAA"} +!6 = !{!7} +!7 = distinct !{!7, !8, !"test_rp_p: pA"} +!8 = distinct !{!8, !"test_rp_p"} +!9 = !{!10, !10, i64 0} +!10 = !{!"any pointer", !4, i64 0} +!11 = !{!12} +!12 = distinct !{!12, !13, !"test_rp_rp: pA"} +!13 = distinct !{!13, !"test_rp_rp"} +!14 = !{!15} +!15 = distinct !{!15, !13, !"test_rp_rp: pB"} +!16 = !{!12, !15} Index: llvm/test/Analysis/ScopedNoAliasAA/noalias_member.ll =================================================================== --- /dev/null +++ llvm/test/Analysis/ScopedNoAliasAA/noalias_member.ll @@ -0,0 +1,64 @@ +; RUN: opt < %s -basicaa -scoped-noalias -aa-eval -evaluate-aa-metadata -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.FOO = type { i32*, i32* } + +; Function Attrs: nofree nounwind +define dso_local void @test_prp0_prp1(i32** nocapture %_pA) local_unnamed_addr #0 !noalias !2 { +entry: + %0 = load i32*, i32** %_pA, noalias_sidechannel i32** undef, align 4, !tbaa !5, !noalias !2 + %1 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %0, i8* null, i32** %_pA, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 + %arrayidx1 = getelementptr inbounds i32*, i32** %_pA, i32 1 + %2 = load i32*, i32** %arrayidx1, noalias_sidechannel i32** undef, align 4, !tbaa !5, !noalias !2 + %3 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %2, i8* null, i32** nonnull %arrayidx1, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 + store i32 42, i32* %0, noalias_sidechannel i32* %1, align 4, !tbaa !9, !noalias !2 + store i32 99, i32* %2, noalias_sidechannel i32* %3, align 4, !tbaa !9, !noalias !2 + ret void +} +; CHECK-LABEL: Function: test_prp0_prp1: +; CHECK: NoAlias: store i32 99, i32* %2, noalias_sidechannel i32* %3, align 4, !tbaa !9, !noalias !2 <-> store i32 42, i32* %0, noalias_sidechannel i32* %1, align 4, !tbaa !9, !noalias !2 + +; Function Attrs: nofree nounwind +define dso_local void @test_prS0_prS1(%struct.FOO* nocapture %_pS) local_unnamed_addr #0 !noalias !11 { +entry: + %mpA = getelementptr inbounds %struct.FOO, %struct.FOO* %_pS, i32 0, i32 0 + %0 = load i32*, i32** %mpA, noalias_sidechannel i32** undef, align 4, !tbaa !14, !noalias !11 + %1 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %0, i8* null, i32** %mpA, i32** undef, i32 0, metadata !11), !tbaa !14, !noalias !11 + %mpB = getelementptr inbounds %struct.FOO, %struct.FOO* %_pS, i32 0, i32 1 + %2 = load i32*, i32** %mpB, noalias_sidechannel i32** undef, align 4, !tbaa !16, !noalias !11 + %3 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %2, i8* null, i32** nonnull %mpB, i32** undef, i32 0, metadata !11), !tbaa !16, !noalias !11 + store i32 42, i32* %0, noalias_sidechannel i32* %1, align 4, !tbaa !9, !noalias !11 + store i32 99, i32* %2, noalias_sidechannel i32* %3, align 4, !tbaa !9, !noalias !11 + ret void +} +; CHECK-LABEL: Function: test_prS0_prS1: +; CHECK: NoAlias: store i32 99, i32* %2, noalias_sidechannel i32* %3, align 4, !tbaa !11, !noalias !2 <-> store i32 42, i32* %0, noalias_sidechannel i32* %1, align 4, !tbaa !11, !noalias !2 + + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32*, i8*, i32**, i32**, i32, metadata) #1 + +attributes #0 = { nofree nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone speculatable } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3} +!3 = distinct !{!3, !4, !"test_prp0_prp1: unknown scope"} +!4 = distinct !{!4, !"test_prp0_prp1"} +!5 = !{!6, !6, i64 0, i64 4} +!6 = !{!7, i64 4, !"any pointer"} +!7 = !{!8, i64 1, !"omnipotent char"} +!8 = !{!"Simple C/C++ TBAA"} +!9 = !{!10, !10, i64 0, i64 4} +!10 = !{!7, i64 4, !"int"} +!11 = !{!12} +!12 = distinct !{!12, !13, !"test_prS0_prS1: unknown scope"} +!13 = distinct !{!13, !"test_prS0_prS1"} +!14 = !{!15, !6, i64 0, i64 4} +!15 = !{!7, i64 8, !"FOO", !6, i64 0, i64 4, !6, i64 4, i64 4} +!16 = !{!15, !6, i64 4, i64 4} Index: llvm/test/Analysis/ScopedNoAliasAA/noalias_phi.ll =================================================================== --- /dev/null +++ llvm/test/Analysis/ScopedNoAliasAA/noalias_phi.ll @@ -0,0 +1,230 @@ +; RUN: opt < %s -basicaa -scoped-noalias -aa-eval -evaluate-aa-metadata -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nofree norecurse nounwind writeonly +define dso_local void @test_phi_p_p_p(i32* nocapture %_pA, i32* nocapture %_pB, i32* nocapture %_pC, i32 %c) local_unnamed_addr #0 { +entry: + %tobool = icmp eq i32 %c, 0 + br i1 %tobool, label %cond.false, label %cond.true + +cond.true: ; preds = %entry + br label %cond.end + +cond.false: ; preds = %entry + br label %cond.end + +cond.end: ; preds = %cond.false, %cond.true + %cond = phi i32* [ %_pA, %cond.true ], [ %_pB, %cond.false ] + store i32 42, i32* %cond, noalias_sidechannel i32* undef, align 4, !tbaa !2 + store i32 99, i32* %_pC, noalias_sidechannel i32* undef, align 4, !tbaa !2 + ret void +} +; CHECK-LABEL: Function: test_phi_p_p_p: +; CHECK: MayAlias: store i32 99, i32* %_pC, noalias_sidechannel i32* undef, align 4, !tbaa !2 <-> store i32 42, i32* %cond, noalias_sidechannel i32* undef, align 4, !tbaa !2 + +; Function Attrs: nounwind +define dso_local void @test_phi_rp_p_p(i32* nocapture %_pA, i32* nocapture %_pB, i32* nocapture %_pC, i32 %c) local_unnamed_addr #1 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !6) + %tobool = icmp ne i32 %c, 0 + %1 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !6) + br i1 %tobool, label %cond.false, label %cond.true + +cond.true: ; preds = %entry + br label %cond.end + +cond.false: ; preds = %entry + br label %cond.end + +cond.end: ; preds = %cond.false, %cond.true + %cond = phi i32* [ %_pA, %cond.true ], [ %_pB, %cond.false ] + %side.cond = phi i32* [ %1, %cond.true ], [ %_pB, %cond.false ] + store i32 42, i32* %cond, noalias_sidechannel i32* %side.cond, align 4, !tbaa !2, !noalias !6 + store i32 99, i32* %_pC, noalias_sidechannel i32* undef, align 4, !tbaa !2, !noalias !6 + ret void +} +; CHECK-LABEL: Function: test_phi_rp_p_p: +; CHECK: MayAlias: store i32 99, i32* %_pC, noalias_sidechannel i32* undef, align 4, !tbaa !5, !noalias !2 <-> store i32 42, i32* %cond, noalias_sidechannel i32* %side.cond, align 4, !tbaa !5, !noalias !2 + + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) #2 + +; Function Attrs: nounwind +define dso_local void @test_phi_p_rp_p(i32* nocapture %_pA, i32* nocapture %_pB, i32* nocapture %_pC, i32 %c) local_unnamed_addr #1 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !9) + %tobool = icmp ne i32 %c, 0 + %1 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pB, i8* %0, i32** null, i32** undef, i32 0, metadata !9) + br i1 %tobool, label %cond.false, label %cond.true + +cond.true: ; preds = %entry + br label %cond.end + +cond.false: ; preds = %entry + br label %cond.end + +cond.end: ; preds = %cond.false, %cond.true + %cond = phi i32* [ %_pA, %cond.true ], [ %_pB, %cond.false ] + %side.cond = phi i32* [ %_pA, %cond.true ], [ %1, %cond.false ] + store i32 42, i32* %cond, noalias_sidechannel i32* %side.cond, align 4, !tbaa !2, !noalias !9 + store i32 99, i32* %_pC, noalias_sidechannel i32* undef, align 4, !tbaa !2, !noalias !9 + ret void +} +; CHECK-LABEL: Function: test_phi_p_rp_p: +; CHECK: MayAlias: store i32 99, i32* %_pC, noalias_sidechannel i32* undef, align 4, !tbaa !5, !noalias !2 <-> store i32 42, i32* %cond, noalias_sidechannel i32* %side.cond, align 4, !tbaa !5, !noalias !2 + +; Function Attrs: nounwind +define dso_local void @test_phi_rp_rp_p(i32* nocapture %_pA, i32* nocapture %_pB, i32* nocapture %_pC, i32 %c) local_unnamed_addr #1 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !12) + %1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !15) + %tobool = icmp ne i32 %c, 0 + %2 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !12) + %3 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pB, i8* %1, i32** null, i32** undef, i32 0, metadata !15) + br i1 %tobool, label %cond.false, label %cond.true + +cond.true: ; preds = %entry + br label %cond.end + +cond.false: ; preds = %entry + br label %cond.end + +cond.end: ; preds = %cond.false, %cond.true + %cond = phi i32* [ %_pA, %cond.true ], [ %_pB, %cond.false ] + %side.cond = phi i32* [ %2, %cond.true ], [ %3, %cond.false ] + store i32 42, i32* %cond, noalias_sidechannel i32* %side.cond, align 4, !tbaa !2, !noalias !17 + store i32 99, i32* %_pC, noalias_sidechannel i32* undef, align 4, !tbaa !2, !noalias !17 + ret void +} +; CHECK-LABEL: Function: test_phi_rp_rp_p: +; CHECK: NoAlias: store i32 99, i32* %_pC, noalias_sidechannel i32* undef, align 4, !tbaa !7, !noalias !11 <-> store i32 42, i32* %cond, noalias_sidechannel i32* %side.cond, align 4, !tbaa !7, !noalias !11 + +; Function Attrs: nounwind +define dso_local void @test_phi_p_p_rp(i32* nocapture %_pA, i32* nocapture %_pB, i32* nocapture %_pC, i32 %c) local_unnamed_addr #1 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !18) + %tobool = icmp eq i32 %c, 0 + br i1 %tobool, label %cond.false, label %cond.true + +cond.true: ; preds = %entry + br label %cond.end + +cond.false: ; preds = %entry + br label %cond.end + +cond.end: ; preds = %cond.false, %cond.true + %cond = phi i32* [ %_pA, %cond.true ], [ %_pB, %cond.false ] + store i32 42, i32* %cond, noalias_sidechannel i32* undef, align 4, !tbaa !2, !noalias !18 + %1 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pC, i8* %0, i32** null, i32** undef, i32 0, metadata !18), !tbaa !21, !noalias !18 + store i32 99, i32* %_pC, noalias_sidechannel i32* %1, align 4, !tbaa !2, !noalias !18 + ret void +} +; CHECK-LABEL: Function: test_phi_p_p_rp: +; CHECK: NoAlias: store i32 99, i32* %_pC, noalias_sidechannel i32* %1, align 4, !tbaa !5, !noalias !2 <-> store i32 42, i32* %cond, noalias_sidechannel i32* undef, align 4, !tbaa !5, !noalias !2 + +; Function Attrs: nounwind +define dso_local void @test_phi_rp_rp_rp_01(i32* nocapture %_pA, i32* nocapture %_pB, i32* nocapture %_pC, i32 %c) local_unnamed_addr #1 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !23) + %1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !26) + %2 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !28) + %tobool = icmp ne i32 %c, 0 + %3 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !23) + %4 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pB, i8* %1, i32** null, i32** undef, i32 0, metadata !26) + br i1 %tobool, label %cond.false, label %cond.true + +cond.true: ; preds = %entry + br label %cond.end + +cond.false: ; preds = %entry + br label %cond.end + +cond.end: ; preds = %cond.false, %cond.true + %cond = phi i32* [ %_pA, %cond.true ], [ %_pB, %cond.false ] + %side.cond = phi i32* [ %3, %cond.true ], [ %4, %cond.false ] + store i32 42, i32* %cond, noalias_sidechannel i32* %side.cond, align 4, !tbaa !2, !noalias !30 + %5 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pC, i8* %2, i32** null, i32** undef, i32 0, metadata !28), !tbaa !21, !noalias !30 + store i32 99, i32* %_pC, noalias_sidechannel i32* %5, align 4, !tbaa !2, !noalias !30 + ret void +} + +; CHECK-LABEL: Function: test_phi_rp_rp_rp_01: +; CHECK: NoAlias: store i32 99, i32* %_pC, noalias_sidechannel i32* %5, align 4, !tbaa !9, !noalias !13 <-> store i32 42, i32* %cond, noalias_sidechannel i32* %side.cond, align 4, !tbaa !9, !noalias !13 + +; Function Attrs: nounwind +define dso_local void @test_phi_rp_rp_rp_02(i32* nocapture %_pA, i32* nocapture readnone %_pB, i32* nocapture %_pC, i32 %c) local_unnamed_addr #1 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !31) + %1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !34) + %tobool = icmp ne i32 %c, 0 + %2 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !31) + %3 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pC, i8* %1, i32** null, i32** undef, i32 0, metadata !34) + br i1 %tobool, label %cond.false, label %cond.true + +cond.true: ; preds = %entry + br label %cond.end + +cond.false: ; preds = %entry + br label %cond.end + +cond.end: ; preds = %cond.false, %cond.true + %cond = phi i32* [ %_pA, %cond.true ], [ %_pC, %cond.false ] + %side.cond = phi i32* [ %2, %cond.true ], [ %3, %cond.false ] + store i32 42, i32* %cond, noalias_sidechannel i32* %side.cond, align 4, !tbaa !2, !noalias !36 + store i32 99, i32* %_pC, noalias_sidechannel i32* %3, align 4, !tbaa !2, !noalias !36 + ret void +} +; CHECK-LABEL: Function: test_phi_rp_rp_rp_02: +; CHECK: MayAlias: store i32 99, i32* %_pC, noalias_sidechannel i32* %3, align 4, !tbaa !7, !noalias !11 <-> store i32 42, i32* %cond, noalias_sidechannel i32* %side.cond, align 4, !tbaa !7, !noalias !11 + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32*, i8*, i32**, i32**, i32, metadata) #3 + +attributes #0 = { nofree norecurse nounwind writeonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { argmemonly nounwind } +attributes #3 = { nounwind readnone speculatable } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3, !3, i64 0, i64 4} +!3 = !{!4, i64 4, !"int"} +!4 = !{!5, i64 1, !"omnipotent char"} +!5 = !{!"Simple C/C++ TBAA"} +!6 = !{!7} +!7 = distinct !{!7, !8, !"test_phi_rp_p_p: pA"} +!8 = distinct !{!8, !"test_phi_rp_p_p"} +!9 = !{!10} +!10 = distinct !{!10, !11, !"test_phi_p_rp_p: pB"} +!11 = distinct !{!11, !"test_phi_p_rp_p"} +!12 = !{!13} +!13 = distinct !{!13, !14, !"test_phi_rp_rp_p: pA"} +!14 = distinct !{!14, !"test_phi_rp_rp_p"} +!15 = !{!16} +!16 = distinct !{!16, !14, !"test_phi_rp_rp_p: pB"} +!17 = !{!13, !16} +!18 = !{!19} +!19 = distinct !{!19, !20, !"test_phi_p_p_rp: pC"} +!20 = distinct !{!20, !"test_phi_p_p_rp"} +!21 = !{!22, !22, i64 0, i64 4} +!22 = !{!4, i64 4, !"any pointer"} +!23 = !{!24} +!24 = distinct !{!24, !25, !"test_phi_rp_rp_rp_01: pA"} +!25 = distinct !{!25, !"test_phi_rp_rp_rp_01"} +!26 = !{!27} +!27 = distinct !{!27, !25, !"test_phi_rp_rp_rp_01: pB"} +!28 = !{!29} +!29 = distinct !{!29, !25, !"test_phi_rp_rp_rp_01: pC"} +!30 = !{!24, !27, !29} +!31 = !{!32} +!32 = distinct !{!32, !33, !"test_phi_rp_rp_rp_02: pA"} +!33 = distinct !{!33, !"test_phi_rp_rp_rp_02"} +!34 = !{!35} +!35 = distinct !{!35, !33, !"test_phi_rp_rp_rp_02: pC"} +!36 = !{!32, !37, !35} +!37 = distinct !{!37, !33, !"test_phi_rp_rp_rp_02: pB"} Index: llvm/test/Analysis/ScopedNoAliasAA/noalias_phi_in_loop.ll =================================================================== --- /dev/null +++ llvm/test/Analysis/ScopedNoAliasAA/noalias_phi_in_loop.ll @@ -0,0 +1,154 @@ +; RUN: opt < %s -basicaa -scoped-noalias -aa-eval -evaluate-aa-metadata -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind +define dso_local void @test_complex_phi_01(i32* nocapture %_pA, i32** nocapture readonly %_pB, i32 %n) local_unnamed_addr #0 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %1 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 + %2 = load i32*, i32** %_pB, noalias_sidechannel i32** undef, align 4, !tbaa !5, !noalias !2 + %arrayidx1 = getelementptr inbounds i32*, i32** %_pB, i32 1 + %3 = load i32*, i32** %arrayidx1, noalias_sidechannel i32** undef, align 4, !tbaa !5, !noalias !2 + %arrayidx2 = getelementptr inbounds i32*, i32** %_pB, i32 2 + %4 = load i32*, i32** %arrayidx2, noalias_sidechannel i32** undef, align 4, !tbaa !5, !noalias !2 + %arrayidx3 = getelementptr inbounds i32*, i32** %_pB, i32 3 + %5 = load i32*, i32** %arrayidx3, noalias_sidechannel i32** undef, align 4, !tbaa !5, !noalias !2 + %arrayidx4 = getelementptr inbounds i32*, i32** %_pB, i32 4 + %6 = load i32*, i32** %arrayidx4, noalias_sidechannel i32** undef, align 4, !tbaa !5, !noalias !2 + %arrayidx5 = getelementptr inbounds i32*, i32** %_pB, i32 5 + %7 = load i32*, i32** %arrayidx5, noalias_sidechannel i32** undef, align 4, !tbaa !5, !noalias !2 + %arrayidx6 = getelementptr inbounds i32*, i32** %_pB, i32 6 + %8 = load i32*, i32** %arrayidx6, noalias_sidechannel i32** undef, align 4, !tbaa !5, !noalias !2 + %arrayidx7 = getelementptr inbounds i32*, i32** %_pB, i32 7 + %9 = load i32*, i32** %arrayidx7, noalias_sidechannel i32** undef, align 4, !tbaa !5, !noalias !2 + %arrayidx8 = getelementptr inbounds i32*, i32** %_pB, i32 8 + %10 = load i32*, i32** %arrayidx8, noalias_sidechannel i32** undef, align 4, !tbaa !5, !noalias !2 + %arrayidx9 = getelementptr inbounds i32*, i32** %_pB, i32 9 + %11 = load i32*, i32** %arrayidx9, noalias_sidechannel i32** undef, align 4, !tbaa !5, !noalias !2 + br label %for.cond + +for.cond: ; preds = %for.cond, %entry + %side.pTmp00.0 = phi i32* [ %2, %entry ], [ %side.pTmp01.0, %for.cond ] + %pTmp00.0 = phi i32* [ %2, %entry ], [ %pTmp01.0, %for.cond ] + %side.pTmp01.0 = phi i32* [ %3, %entry ], [ %side.pTmp02.0, %for.cond ] + %pTmp01.0 = phi i32* [ %3, %entry ], [ %pTmp02.0, %for.cond ] + %side.pTmp02.0 = phi i32* [ %4, %entry ], [ %side.pTmp03.0, %for.cond ] + %pTmp02.0 = phi i32* [ %4, %entry ], [ %pTmp03.0, %for.cond ] + %side.pTmp03.0 = phi i32* [ %5, %entry ], [ %side.pTmp04.0, %for.cond ] + %pTmp03.0 = phi i32* [ %5, %entry ], [ %pTmp04.0, %for.cond ] + %side.pTmp04.0 = phi i32* [ %6, %entry ], [ %side.pTmp05.0, %for.cond ] + %pTmp04.0 = phi i32* [ %6, %entry ], [ %pTmp05.0, %for.cond ] + %side.pTmp05.0 = phi i32* [ %7, %entry ], [ %side.pTmp06.0, %for.cond ] + %pTmp05.0 = phi i32* [ %7, %entry ], [ %pTmp06.0, %for.cond ] + %side.pTmp06.0 = phi i32* [ %8, %entry ], [ %side.pTmp07.0, %for.cond ] + %pTmp06.0 = phi i32* [ %8, %entry ], [ %pTmp07.0, %for.cond ] + %side.pTmp07.0 = phi i32* [ %9, %entry ], [ %side.pTmp08.0, %for.cond ] + %pTmp07.0 = phi i32* [ %9, %entry ], [ %pTmp08.0, %for.cond ] + %side.pTmp08.0 = phi i32* [ %10, %entry ], [ %side.pTmp09.0, %for.cond ] + %pTmp08.0 = phi i32* [ %10, %entry ], [ %pTmp09.0, %for.cond ] + %side.pTmp09.0 = phi i32* [ %11, %entry ], [ %1, %for.cond ] + %pTmp09.0 = phi i32* [ %11, %entry ], [ %_pA, %for.cond ] + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.cond ] + %cmp = icmp slt i32 %i.0, %n + %inc = add nuw nsw i32 %i.0, 1 + br i1 %cmp, label %for.cond, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + store i32 99, i32* %_pA, noalias_sidechannel i32* %1, align 4, !tbaa !9, !noalias !2 + store i32 42, i32* %pTmp00.0, noalias_sidechannel i32* %side.pTmp00.0, align 4, !tbaa !9, !noalias !2 + ret void +} + +; CHECK-LABEL: Function: test_complex_phi_01: +; CHECK: MayAlias: store i32 42, i32* %pTmp00.0, noalias_sidechannel i32* %side.pTmp00.0, align 4, !tbaa !9, !noalias !2 <-> store i32 99, i32* %_pA, noalias_sidechannel i32* %1, align 4, !tbaa !9, !noalias !2 + + +; Adapted version where the sidechannel chains don't collapse +; Function Attrs: nounwind +define dso_local void @test_complex_phi_02(i32* nocapture %_pA, i32** nocapture readonly %_pB, i32 %n) local_unnamed_addr #0 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %decl2 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 1, metadata !2) + %1 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 + %extra_side = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %decl2, i32** null, i32** undef, i32 1, metadata !2), !tbaa !5, !noalias !2 + %2 = load i32*, i32** %_pB, noalias_sidechannel i32** undef, align 4, !tbaa !5, !noalias !2 + %arrayidx1 = getelementptr inbounds i32*, i32** %_pB, i32 1 + %3 = load i32*, i32** %arrayidx1, noalias_sidechannel i32** undef, align 4, !tbaa !5, !noalias !2 + %arrayidx2 = getelementptr inbounds i32*, i32** %_pB, i32 2 + %4 = load i32*, i32** %arrayidx2, noalias_sidechannel i32** undef, align 4, !tbaa !5, !noalias !2 + %arrayidx3 = getelementptr inbounds i32*, i32** %_pB, i32 3 + %5 = load i32*, i32** %arrayidx3, noalias_sidechannel i32** undef, align 4, !tbaa !5, !noalias !2 + %arrayidx4 = getelementptr inbounds i32*, i32** %_pB, i32 4 + %6 = load i32*, i32** %arrayidx4, noalias_sidechannel i32** undef, align 4, !tbaa !5, !noalias !2 + %arrayidx5 = getelementptr inbounds i32*, i32** %_pB, i32 5 + %7 = load i32*, i32** %arrayidx5, noalias_sidechannel i32** undef, align 4, !tbaa !5, !noalias !2 + %arrayidx6 = getelementptr inbounds i32*, i32** %_pB, i32 6 + %8 = load i32*, i32** %arrayidx6, noalias_sidechannel i32** undef, align 4, !tbaa !5, !noalias !2 + %arrayidx7 = getelementptr inbounds i32*, i32** %_pB, i32 7 + %9 = load i32*, i32** %arrayidx7, noalias_sidechannel i32** undef, align 4, !tbaa !5, !noalias !2 + %arrayidx8 = getelementptr inbounds i32*, i32** %_pB, i32 8 + %10 = load i32*, i32** %arrayidx8, noalias_sidechannel i32** undef, align 4, !tbaa !5, !noalias !2 + %arrayidx9 = getelementptr inbounds i32*, i32** %_pB, i32 9 + %11 = load i32*, i32** %arrayidx9, noalias_sidechannel i32** undef, align 4, !tbaa !5, !noalias !2 + br label %for.cond + +for.cond: ; preds = %for.cond, %entry + %side.pTmp00.0 = phi i32* [ %2, %entry ], [ %side.pTmp01.0, %for.cond ] + %pTmp00.0 = phi i32* [ %2, %entry ], [ %pTmp01.0, %for.cond ] + %side.pTmp01.0 = phi i32* [ %3, %entry ], [ %side.pTmp02.0, %for.cond ] + %pTmp01.0 = phi i32* [ %3, %entry ], [ %pTmp02.0, %for.cond ] + %side.pTmp02.0 = phi i32* [ %4, %entry ], [ %side.pTmp03.0, %for.cond ] + %pTmp02.0 = phi i32* [ %4, %entry ], [ %pTmp03.0, %for.cond ] + %side.pTmp03.0 = phi i32* [ %5, %entry ], [ %side.pTmp04.0, %for.cond ] + %pTmp03.0 = phi i32* [ %5, %entry ], [ %pTmp04.0, %for.cond ] + %side.pTmp04.0 = phi i32* [ %6, %entry ], [ %side.pTmp05.0, %for.cond ] + %pTmp04.0 = phi i32* [ %6, %entry ], [ %pTmp05.0, %for.cond ] + %side.pTmp05.0 = phi i32* [ %7, %entry ], [ %side.pTmp06.0, %for.cond ] + %pTmp05.0 = phi i32* [ %7, %entry ], [ %pTmp06.0, %for.cond ] + %side.pTmp06.0 = phi i32* [ %8, %entry ], [ %side.pTmp07.0, %for.cond ] + %pTmp06.0 = phi i32* [ %8, %entry ], [ %pTmp07.0, %for.cond ] + %side.pTmp07.0 = phi i32* [ %9, %entry ], [ %side.pTmp08.0, %for.cond ] + %pTmp07.0 = phi i32* [ %9, %entry ], [ %pTmp08.0, %for.cond ] + %side.pTmp08.0 = phi i32* [ %10, %entry ], [ %side.pTmp09.0, %for.cond ] + %pTmp08.0 = phi i32* [ %10, %entry ], [ %pTmp09.0, %for.cond ] + %side.pTmp09.0 = phi i32* [ %11, %entry ], [ %extra_side, %for.cond ] + %pTmp09.0 = phi i32* [ %11, %entry ], [ %_pA, %for.cond ] + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.cond ] + %cmp = icmp slt i32 %i.0, %n + %inc = add nuw nsw i32 %i.0, 1 + br i1 %cmp, label %for.cond, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + store i32 99, i32* %_pA, noalias_sidechannel i32* %1, align 4, !tbaa !9, !noalias !2 + store i32 42, i32* %pTmp00.0, noalias_sidechannel i32* %side.pTmp00.0, align 4, !tbaa !9, !noalias !2 + ret void +} + +; CHECK-LABEL: Function: test_complex_phi_02: +; CHECK: NoAlias: store i32 42, i32* %pTmp00.0, noalias_sidechannel i32* %side.pTmp00.0, align 4, !tbaa !9, !noalias !2 <-> store i32 99, i32* %_pA, noalias_sidechannel i32* %1, align 4, !tbaa !9, !noalias !2 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) #1 + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32*, i8*, i32**, i32**, i32, metadata) #2 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { nounwind readnone speculatable } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3} +!3 = distinct !{!3, !4, !"test_complex_phi: rpTmp"} +!4 = distinct !{!4, !"test_complex_phi"} +!5 = !{!6, !6, i64 0, i64 4} +!6 = !{!7, i64 4, !"any pointer"} +!7 = !{!8, i64 1, !"omnipotent char"} +!8 = !{!"Simple C/C++ TBAA"} +!9 = !{!10, !10, i64 0, i64 4} +!10 = !{!7, i64 4, !"int"} Index: llvm/test/Analysis/ScopedNoAliasAA/noalias_recursive.ll =================================================================== --- /dev/null +++ llvm/test/Analysis/ScopedNoAliasAA/noalias_recursive.ll @@ -0,0 +1,127 @@ +; RUN: opt < %s -basicaa -scoped-noalias -aa-eval -evaluate-aa-metadata -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s +; RUN: opt < %s -aa-pipeline=basic-aa,scoped-noalias-aa -passes=aa-eval -evaluate-aa-metadata -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind +define dso_local void @test_prp_prp(i32** nocapture readonly %_pA, i32** nocapture readonly %_pB) local_unnamed_addr #0 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0p0i32.i64(i32*** null, i64 0, metadata !2) + %1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0p0i32.i64(i32*** null, i64 0, metadata !5) + %2 = tail call i32** @llvm.side.noalias.p0p0i32.p0i8.p0p0p0i32.p0p0p0i32.i64(i32** %_pA, i8* %0, i32*** null, i32*** undef, i64 0, metadata !2), !tbaa !7, !noalias !11 + %3 = load i32*, i32** %_pA, noalias_sidechannel i32** %2, align 4, !tbaa !7, !noalias !11 + store i32 42, i32* %3, noalias_sidechannel i32* undef, align 4, !tbaa !12, !noalias !11 + %4 = tail call i32** @llvm.side.noalias.p0p0i32.p0i8.p0p0p0i32.p0p0p0i32.i64(i32** %_pB, i8* %1, i32*** null, i32*** undef, i64 0, metadata !5), !tbaa !7, !noalias !11 + %5 = load i32*, i32** %_pB, noalias_sidechannel i32** %4, align 4, !tbaa !7, !noalias !11 + store i32 99, i32* %5, noalias_sidechannel i32* undef, align 4, !tbaa !12, !noalias !11 + ret void +} +; CHECK-LABEL: Function: test_prp_prp: +; CHECK: MayAlias: store i32 99, i32* %5, noalias_sidechannel i32* undef, align 4, !tbaa !12, !noalias !11 <-> store i32 42, i32* %3, noalias_sidechannel i32* undef, align 4, !tbaa !12, !noalias !11 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0p0i32.i64(i32***, i64, metadata) #1 + +; Function Attrs: nofree nounwind +define dso_local void @test_rpp_rpp(i32** nocapture %_pA, i32** nocapture %_pB) local_unnamed_addr #2 !noalias !14 { +entry: + %0 = load i32*, i32** %_pA, noalias_sidechannel i32** undef, align 4, !tbaa !7, !noalias !14 + %1 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %0, i8* null, i32** %_pA, i32** undef, i64 0, metadata !14), !tbaa !7, !noalias !14 + store i32 42, i32* %0, noalias_sidechannel i32* %1, align 4, !tbaa !12, !noalias !14 + %2 = load i32*, i32** %_pB, noalias_sidechannel i32** undef, align 4, !tbaa !7, !noalias !14 + %3 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %2, i8* null, i32** %_pB, i32** undef, i64 0, metadata !14), !tbaa !7, !noalias !14 + store i32 99, i32* %2, noalias_sidechannel i32* %3, align 4, !tbaa !12, !noalias !14 + ret void +} +; CHECK-LABEL: Function: test_rpp_rpp: +; CHECK: MayAlias: store i32 99, i32* %2, noalias_sidechannel i32* %3, align 4, !tbaa !9, !noalias !2 <-> store i32 42, i32* %0, noalias_sidechannel i32* %1, align 4, !tbaa !9, !noalias !2 + +; Function Attrs: nounwind +define dso_local void @test_rprp_rprp(i32** nocapture %_pA, i32** nocapture %_pB) local_unnamed_addr #0 !noalias !17 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0p0i32.i64(i32*** null, i64 0, metadata !20) + %1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0p0i32.i64(i32*** null, i64 0, metadata !22) + %2 = tail call i32** @llvm.side.noalias.p0p0i32.p0i8.p0p0p0i32.p0p0p0i32.i64(i32** %_pA, i8* %0, i32*** null, i32*** undef, i64 0, metadata !20), !tbaa !7, !noalias !24 + %3 = load i32*, i32** %_pA, noalias_sidechannel i32** %2, align 4, !tbaa !7, !noalias !24 + %4 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %3, i8* null, i32** %_pA, i32** %2, i64 0, metadata !17), !tbaa !7, !noalias !24 + store i32 42, i32* %3, noalias_sidechannel i32* %4, align 4, !tbaa !12, !noalias !24 + %5 = tail call i32** @llvm.side.noalias.p0p0i32.p0i8.p0p0p0i32.p0p0p0i32.i64(i32** %_pB, i8* %1, i32*** null, i32*** undef, i64 0, metadata !22), !tbaa !7, !noalias !24 + %6 = load i32*, i32** %_pB, noalias_sidechannel i32** %5, align 4, !tbaa !7, !noalias !24 + %7 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %6, i8* null, i32** %_pB, i32** %5, i64 0, metadata !17), !tbaa !7, !noalias !24 + store i32 99, i32* %6, noalias_sidechannel i32* %7, align 4, !tbaa !12, !noalias !24 + ret void +} + +; CHECK-LABEL: Function: test_rprp_rprp: +; CHECK: NoAlias: store i32 99, i32* %6, noalias_sidechannel i32* %7, align 4, !tbaa !14, !noalias !13 <-> store i32 42, i32* %3, noalias_sidechannel i32* %4, align 4, !tbaa !14, !noalias !13 + +; Function Attrs: nounwind +define dso_local void @test_prp_01(i32** nocapture %pA) local_unnamed_addr #0 !noalias !17 { +entry: + %0 = load i32*, i32** %pA, noalias_sidechannel i32** undef, align 8, !tbaa !7, !noalias !17 + %1 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %0, i8* null, i32** %pA, i32** undef, i64 0, metadata !17), !tbaa !7, !noalias !17 + store i32 42, i32* %0, noalias_sidechannel i32* %1, align 4, !tbaa !12, !noalias !17 + %arrayidx1 = getelementptr inbounds i32*, i32** %pA, i64 0 + %2 = load i32*, i32** %arrayidx1, noalias_sidechannel i32** undef, align 8, !tbaa !7, !noalias !17 + %3 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %2, i8* null, i32** %arrayidx1, i32** undef, i64 0, metadata !17), !tbaa !7, !noalias !17 + store i32 43, i32* %2, noalias_sidechannel i32* %3, align 4, !tbaa !12, !noalias !17 + ret void +} + +; CHECK-LABEL: Function: test_prp_01: +; CHECK: MayAlias: store i32 43, i32* %2, noalias_sidechannel i32* %3, align 4, !tbaa !9, !noalias !2 <-> store i32 42, i32* %0, noalias_sidechannel i32* %1, align 4, !tbaa !9, !noalias !2 +; Function Attrs: nounwind +define dso_local void @test_prp_02(i32** nocapture %pA) local_unnamed_addr #0 !noalias !17 { +entry: + %0 = load i32*, i32** %pA, noalias_sidechannel i32** undef, align 8, !tbaa !7, !noalias !17 + %1 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %0, i8* null, i32** %pA, i32** undef, i64 0, metadata !17), !tbaa !7, !noalias !17 + store i32 42, i32* %0, noalias_sidechannel i32* %1, align 4, !tbaa !12, !noalias !17 + %arrayidx1 = getelementptr inbounds i32*, i32** %pA, i64 1 + %2 = load i32*, i32** %arrayidx1, noalias_sidechannel i32** undef, align 8, !tbaa !7, !noalias !17 + %3 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %2, i8* null, i32** %arrayidx1, i32** undef, i64 0, metadata !17), !tbaa !7, !noalias !17 + store i32 43, i32* %2, noalias_sidechannel i32* %3, align 4, !tbaa !12, !noalias !17 + ret void +} + +; CHECK-LABEL: Function: test_prp_02: +; CHECK: NoAlias: store i32 43, i32* %2, noalias_sidechannel i32* %3, align 4, !tbaa !9, !noalias !2 <-> store i32 42, i32* %0, noalias_sidechannel i32* %1, align 4, !tbaa !9, !noalias !2 + +; Function Attrs: nounwind readnone speculatable +declare i32** @llvm.side.noalias.p0p0i32.p0i8.p0p0p0i32.p0p0p0i32.i64(i32**, i8*, i32***, i32***, i64, metadata) #3 + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32*, i8*, i32**, i32**, i64, metadata) #3 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { nofree nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind readnone speculatable } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3} +!3 = distinct !{!3, !4, !"test_prp_prp: pA"} +!4 = distinct !{!4, !"test_prp_prp"} +!5 = !{!6} +!6 = distinct !{!6, !4, !"test_prp_prp: pB"} +!7 = !{!8, !8, i64 0, i64 4} +!8 = !{!9, i64 4, !"any pointer"} +!9 = !{!10, i64 1, !"omnipotent char"} +!10 = !{!"Simple C/C++ TBAA"} +!11 = !{!3, !6} +!12 = !{!13, !13, i64 0, i64 4} +!13 = !{!9, i64 4, !"int"} +!14 = !{!15} +!15 = distinct !{!15, !16, !"test_rpp_rpp: unknown scope"} +!16 = distinct !{!16, !"test_rpp_rpp"} +!17 = !{!18} +!18 = distinct !{!18, !19, !"test_rprp_rprp: unknown scope"} +!19 = distinct !{!19, !"test_rprp_rprp"} +!20 = !{!21} +!21 = distinct !{!21, !19, !"test_rprp_rprp: pA"} +!22 = !{!23} +!23 = distinct !{!23, !19, !"test_rprp_rprp: pB"} +!24 = !{!21, !23, !18} Index: llvm/test/Analysis/ScopedNoAliasAA/noalias_select.ll =================================================================== --- /dev/null +++ llvm/test/Analysis/ScopedNoAliasAA/noalias_select.ll @@ -0,0 +1,167 @@ +; RUN: opt < %s -basicaa -scoped-noalias -aa-eval -evaluate-aa-metadata -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nofree norecurse nounwind writeonly +define dso_local void @test_select_p_p_p(i32* nocapture %_pA, i32* nocapture %_pB, i32* nocapture %_pC, i32 %c) local_unnamed_addr #0 { +entry: + %tobool = icmp eq i32 %c, 0 + %cond = select i1 %tobool, i32* %_pB, i32* %_pA + store i32 42, i32* %cond, noalias_sidechannel i32* undef, align 4, !tbaa !2 + store i32 99, i32* %_pC, noalias_sidechannel i32* undef, align 4, !tbaa !2 + ret void +} +; CHECK-LABEL: Function: test_select_p_p_p: +; CHECK: MayAlias: store i32 99, i32* %_pC, noalias_sidechannel i32* undef, align 4, !tbaa !2 <-> store i32 42, i32* %cond, noalias_sidechannel i32* undef, align 4, !tbaa !2 + +; Function Attrs: nounwind +define dso_local void @test_select_rp_p_p(i32* nocapture %_pA, i32* nocapture %_pB, i32* nocapture %_pC, i32 %c) local_unnamed_addr #1 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !6) + %tobool = icmp ne i32 %c, 0 + %1 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !6) + %side.cond = select i1 %tobool, i32* %1, i32* %_pB + %cond = select i1 %tobool, i32* %_pA, i32* %_pB + store i32 42, i32* %cond, noalias_sidechannel i32* %side.cond, align 4, !tbaa !2, !noalias !6 + store i32 99, i32* %_pC, noalias_sidechannel i32* undef, align 4, !tbaa !2, !noalias !6 + ret void +} +; CHECK-LABEL: Function: test_select_rp_p_p: +; CHECK: MayAlias: store i32 99, i32* %_pC, noalias_sidechannel i32* undef, align 4, !tbaa !5, !noalias !2 <-> store i32 42, i32* %cond, noalias_sidechannel i32* %side.cond, align 4, !tbaa !5, !noalias !2 + + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) #2 + +; Function Attrs: nounwind +define dso_local void @test_select_p_rp_p(i32* nocapture %_pA, i32* nocapture %_pB, i32* nocapture %_pC, i32 %c) local_unnamed_addr #1 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !9) + %tobool = icmp ne i32 %c, 0 + %1 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pB, i8* %0, i32** null, i32** undef, i32 0, metadata !9) + %side.cond = select i1 %tobool, i32* %_pA, i32* %1 + %cond = select i1 %tobool, i32* %_pA, i32* %_pB + store i32 42, i32* %cond, noalias_sidechannel i32* %side.cond, align 4, !tbaa !2, !noalias !9 + store i32 99, i32* %_pC, noalias_sidechannel i32* undef, align 4, !tbaa !2, !noalias !9 + ret void +} +; CHECK-LABEL: Function: test_select_p_rp_p: +; CHECK: MayAlias: store i32 99, i32* %_pC, noalias_sidechannel i32* undef, align 4, !tbaa !5, !noalias !2 <-> store i32 42, i32* %cond, noalias_sidechannel i32* %side.cond, align 4, !tbaa !5, !noalias !2 + +; Function Attrs: nounwind +define dso_local void @test_select_rp_rp_p(i32* nocapture %_pA, i32* nocapture %_pB, i32* nocapture %_pC, i32 %c) local_unnamed_addr #1 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !12) + %1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !15) + %tobool = icmp ne i32 %c, 0 + %2 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !12) + %3 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pB, i8* %1, i32** null, i32** undef, i32 0, metadata !15) + %side.cond = select i1 %tobool, i32* %2, i32* %3 + %cond = select i1 %tobool, i32* %_pA, i32* %_pB + store i32 42, i32* %cond, noalias_sidechannel i32* %side.cond, align 4, !tbaa !2, !noalias !17 + store i32 99, i32* %_pC, noalias_sidechannel i32* undef, align 4, !tbaa !2, !noalias !17 + ret void +} +; CHECK-LABEL: Function: test_select_rp_rp_p: +; CHECK: NoAlias: store i32 99, i32* %_pC, noalias_sidechannel i32* undef, align 4, !tbaa !7, !noalias !11 <-> store i32 42, i32* %cond, noalias_sidechannel i32* %side.cond, align 4, !tbaa !7, !noalias !11 + +; Function Attrs: nounwind +define dso_local void @test_select_p_p_rp(i32* nocapture %_pA, i32* nocapture %_pB, i32* nocapture %_pC, i32 %c) local_unnamed_addr #1 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !18) + %tobool = icmp eq i32 %c, 0 + %cond = select i1 %tobool, i32* %_pB, i32* %_pA + store i32 42, i32* %cond, noalias_sidechannel i32* undef, align 4, !tbaa !2, !noalias !18 + %1 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pC, i8* %0, i32** null, i32** undef, i32 0, metadata !18), !tbaa !21, !noalias !18 + store i32 99, i32* %_pC, noalias_sidechannel i32* %1, align 4, !tbaa !2, !noalias !18 + ret void +} +; CHECK-LABEL: Function: test_select_p_p_rp: +; CHECK: NoAlias: store i32 99, i32* %_pC, noalias_sidechannel i32* %1, align 4, !tbaa !5, !noalias !2 <-> store i32 42, i32* %cond, noalias_sidechannel i32* undef, align 4, !tbaa !5, !noalias !2 + +; Function Attrs: nounwind +define dso_local void @test_select_rp_rp_rp_01(i32* nocapture %_pA, i32* nocapture %_pB, i32* nocapture %_pC, i32 %c) local_unnamed_addr #1 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !23) + %1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !26) + %2 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !28) + %tobool = icmp ne i32 %c, 0 + %3 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !23) + %4 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pB, i8* %1, i32** null, i32** undef, i32 0, metadata !26) + %side.cond = select i1 %tobool, i32* %3, i32* %4 + %cond = select i1 %tobool, i32* %_pA, i32* %_pB + store i32 42, i32* %cond, noalias_sidechannel i32* %side.cond, align 4, !tbaa !2, !noalias !30 + %5 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pC, i8* %2, i32** null, i32** undef, i32 0, metadata !28), !tbaa !21, !noalias !30 + store i32 99, i32* %_pC, noalias_sidechannel i32* %5, align 4, !tbaa !2, !noalias !30 + ret void +} + +; CHECK-LABEL: Function: test_select_rp_rp_rp_01: +; CHECK: NoAlias: store i32 99, i32* %_pC, noalias_sidechannel i32* %5, align 4, !tbaa !9, !noalias !13 <-> store i32 42, i32* %cond, noalias_sidechannel i32* %side.cond, align 4, !tbaa !9, !noalias !13 + +; Function Attrs: nounwind +define dso_local void @test_select_rp_rp_rp_02(i32* nocapture %_pA, i32* nocapture readnone %_pB, i32* nocapture %_pC, i32 %c) local_unnamed_addr #1 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !31) + %1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !34) + %tobool = icmp ne i32 %c, 0 + %2 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !31) + %3 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pC, i8* %1, i32** null, i32** undef, i32 0, metadata !34) + %side.cond = select i1 %tobool, i32* %2, i32* %3 + %cond = select i1 %tobool, i32* %_pA, i32* %_pC + store i32 42, i32* %cond, noalias_sidechannel i32* %side.cond, align 4, !tbaa !2, !noalias !36 + store i32 99, i32* %_pC, noalias_sidechannel i32* %3, align 4, !tbaa !2, !noalias !36 + ret void +} +; CHECK-LABEL: Function: test_select_rp_rp_rp_02: +; CHECK: MayAlias: store i32 99, i32* %_pC, noalias_sidechannel i32* %3, align 4, !tbaa !7, !noalias !11 <-> store i32 42, i32* %cond, noalias_sidechannel i32* %side.cond, align 4, !tbaa !7, !noalias !11 + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32*, i8*, i32**, i32**, i32, metadata) #3 + +attributes #0 = { nofree norecurse nounwind writeonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { argmemonly nounwind } +attributes #3 = { nounwind readnone speculatable } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3, !3, i64 0, i64 4} +!3 = !{!4, i64 4, !"int"} +!4 = !{!5, i64 1, !"omnipotent char"} +!5 = !{!"Simple C/C++ TBAA"} +!6 = !{!7} +!7 = distinct !{!7, !8, !"test_select_rp_p_p: pA"} +!8 = distinct !{!8, !"test_select_rp_p_p"} +!9 = !{!10} +!10 = distinct !{!10, !11, !"test_select_p_rp_p: pB"} +!11 = distinct !{!11, !"test_select_p_rp_p"} +!12 = !{!13} +!13 = distinct !{!13, !14, !"test_select_rp_rp_p: pA"} +!14 = distinct !{!14, !"test_select_rp_rp_p"} +!15 = !{!16} +!16 = distinct !{!16, !14, !"test_select_rp_rp_p: pB"} +!17 = !{!13, !16} +!18 = !{!19} +!19 = distinct !{!19, !20, !"test_select_p_p_rp: pC"} +!20 = distinct !{!20, !"test_select_p_p_rp"} +!21 = !{!22, !22, i64 0, i64 4} +!22 = !{!4, i64 4, !"any pointer"} +!23 = !{!24} +!24 = distinct !{!24, !25, !"test_select_rp_rp_rp_01: pA"} +!25 = distinct !{!25, !"test_select_rp_rp_rp_01"} +!26 = !{!27} +!27 = distinct !{!27, !25, !"test_select_rp_rp_rp_01: pB"} +!28 = !{!29} +!29 = distinct !{!29, !25, !"test_select_rp_rp_rp_01: pC"} +!30 = !{!24, !27, !29} +!31 = !{!32} +!32 = distinct !{!32, !33, !"test_select_rp_rp_rp_02: pA"} +!33 = distinct !{!33, !"test_select_rp_rp_rp_02"} +!34 = !{!35} +!35 = distinct !{!35, !33, !"test_select_rp_rp_rp_02: pC"} +!36 = !{!32, !37, !35} +!37 = distinct !{!37, !33, !"test_select_rp_rp_rp_02: pB"} Index: llvm/test/Bitcode/loadstore_sidechannel.ll =================================================================== --- /dev/null +++ llvm/test/Bitcode/loadstore_sidechannel.ll @@ -0,0 +1,17 @@ +; RUN: opt --verify -S < %s | FileCheck %s +; JDO_FIXME_NOW: fix the bicode support.. R U N: llvm-as < %s | llvm-dis | FileCheck %s +; JDO_FIXME_NOW: R U N: verify-uselistorder < %s + +define i32 @f(i32* %p, i32* %q) { + store i32 42, i32* %p, noalias_sidechannel i32* %p + store i32 43, i32* %q, noalias_sidechannel i32* %q + %r = load i32, i32* %p, noalias_sidechannel i32* %p + ret i32 %r +} + +; CHECK: define i32 @f(i32* %p, i32* %q) { +; CHECK-NEXT: store i32 42, i32* %p, noalias_sidechannel i32* %p +; CHECK-NEXT: store i32 43, i32* %q, noalias_sidechannel i32* %q +; CHECK-NEXT: %r = load i32, i32* %p, noalias_sidechannel i32* %p +; CHECK-NEXT: ret i32 %r +; CHECK-NEXT: } Index: llvm/test/CodeGen/Generic/noalias.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/Generic/noalias.ll @@ -0,0 +1,13 @@ +; RUN: llc < %s + +define i32* @test(i32* %p) { + %p.decl = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !0) + %v = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %p, i8* %p.decl, i32** null, i32 0, metadata !0) + ret i32* %v +} + +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) argmemonly nounwind +declare i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32*, i8*, i32**, i32, metadata) argmemonly nounwind speculatable + +!0 = !{!0, !"some domain"} +!1 = !{!1, !0, !"some scope"} Index: llvm/test/CodeGen/Generic/side_noalias.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/Generic/side_noalias.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s + +define i32* @test(i32* %p) { + %p.decl = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !0) + %p.side = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %p, i8* %p.decl, i32** null, i32** undef, i32 0, metadata !0) + %p.guard = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %p, i32* %p.side) + ret i32* %p.guard +} + +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) argmemonly nounwind +declare i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32*, i8*, i32**, i32**, i32, metadata) argmemonly nounwind speculatable +declare i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32*, i32*) nounwind readnone + +!0 = !{!0, !"some domain"} +!1 = !{!1, !0, !"some scope"} Index: llvm/test/Other/opt-O2-pipeline.ll =================================================================== --- llvm/test/Other/opt-O2-pipeline.ll +++ llvm/test/Other/opt-O2-pipeline.ll @@ -13,8 +13,10 @@ ; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (pre inlining) ; CHECK-NEXT: Simplify the CFG ; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Connect llvm.noalias.decl ; CHECK-NEXT: SROA ; CHECK-NEXT: Early CSE +; CHECK-NEXT: Propagate and Convert Noalias intrinsics ; CHECK-NEXT: Lower 'expect' Intrinsics ; CHECK-NEXT: Pass Arguments: ; CHECK-NEXT: Target Library Information @@ -52,14 +54,19 @@ ; CHECK-NEXT: Call Graph SCC Pass Manager ; CHECK-NEXT: Remove unused exception handling info ; CHECK-NEXT: Function Integration/Inlining +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Propagate and Convert Noalias intrinsics ; CHECK-NEXT: Deduce function attributes ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Connect llvm.noalias.decl ; CHECK-NEXT: SROA ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Memory SSA ; CHECK-NEXT: Early CSE w/ MemorySSA +; CHECK-NEXT: Propagate and Convert Noalias intrinsics ; CHECK-NEXT: Speculatively execute instructions if target has divergent branches ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results @@ -146,6 +153,10 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Combine redundant instructions +; CHECK-NEXT: Connect llvm.noalias.decl +; CHECK-NEXT: Propagate and Convert Noalias intrinsics +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Lazy Value Information Analysis ; CHECK-NEXT: Jump Threading ; CHECK-NEXT: Value Propagation Index: llvm/test/Other/opt-O3-pipeline.ll =================================================================== --- llvm/test/Other/opt-O3-pipeline.ll +++ llvm/test/Other/opt-O3-pipeline.ll @@ -13,8 +13,10 @@ ; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (pre inlining) ; CHECK-NEXT: Simplify the CFG ; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Connect llvm.noalias.decl ; CHECK-NEXT: SROA ; CHECK-NEXT: Early CSE +; CHECK-NEXT: Propagate and Convert Noalias intrinsics ; CHECK-NEXT: Lower 'expect' Intrinsics ; CHECK-NEXT: Pass Arguments: ; CHECK-NEXT: Target Library Information @@ -55,15 +57,20 @@ ; CHECK-NEXT: Call Graph SCC Pass Manager ; CHECK-NEXT: Remove unused exception handling info ; CHECK-NEXT: Function Integration/Inlining +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Propagate and Convert Noalias intrinsics ; CHECK-NEXT: Deduce function attributes ; CHECK-NEXT: Promote 'by reference' arguments to scalars ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Connect llvm.noalias.decl ; CHECK-NEXT: SROA ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Memory SSA ; CHECK-NEXT: Early CSE w/ MemorySSA +; CHECK-NEXT: Propagate and Convert Noalias intrinsics ; CHECK-NEXT: Speculatively execute instructions if target has divergent branches ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results @@ -151,6 +158,10 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Combine redundant instructions +; CHECK-NEXT: Connect llvm.noalias.decl +; CHECK-NEXT: Propagate and Convert Noalias intrinsics +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Lazy Value Information Analysis ; CHECK-NEXT: Jump Threading ; CHECK-NEXT: Value Propagation Index: llvm/test/Other/opt-Os-pipeline.ll =================================================================== --- llvm/test/Other/opt-Os-pipeline.ll +++ llvm/test/Other/opt-Os-pipeline.ll @@ -13,8 +13,10 @@ ; CHECK-NEXT: Instrument function entry/exit with calls to e.g. mcount() (pre inlining) ; CHECK-NEXT: Simplify the CFG ; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Connect llvm.noalias.decl ; CHECK-NEXT: SROA ; CHECK-NEXT: Early CSE +; CHECK-NEXT: Propagate and Convert Noalias intrinsics ; CHECK-NEXT: Lower 'expect' Intrinsics ; CHECK-NEXT: Pass Arguments: ; CHECK-NEXT: Target Library Information @@ -52,14 +54,19 @@ ; CHECK-NEXT: Call Graph SCC Pass Manager ; CHECK-NEXT: Remove unused exception handling info ; CHECK-NEXT: Function Integration/Inlining +; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Propagate and Convert Noalias intrinsics ; CHECK-NEXT: Deduce function attributes ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Dominator Tree Construction +; CHECK-NEXT: Connect llvm.noalias.decl ; CHECK-NEXT: SROA ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Memory SSA ; CHECK-NEXT: Early CSE w/ MemorySSA +; CHECK-NEXT: Propagate and Convert Noalias intrinsics ; CHECK-NEXT: Speculatively execute instructions if target has divergent branches ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) ; CHECK-NEXT: Function Alias Analysis Results @@ -133,6 +140,10 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Combine redundant instructions +; CHECK-NEXT: Connect llvm.noalias.decl +; CHECK-NEXT: Propagate and Convert Noalias intrinsics +; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Lazy Value Information Analysis ; CHECK-NEXT: Jump Threading ; CHECK-NEXT: Value Propagation Index: llvm/test/Other/pass-pipelines.ll =================================================================== --- llvm/test/Other/pass-pipelines.ll +++ llvm/test/Other/pass-pipelines.ll @@ -46,6 +46,9 @@ ; CHECK-O2-NEXT: Call Graph SCC Pass Manager ; CHECK-O2-NEXT: Remove unused exception handling info ; CHECK-O2-NEXT: Function Integration/Inlining +; CHECK-O2-NEXT: FunctionPass Manager +; CHECK-O2-NEXT: Dominator Tree Construction +; CHECK-O2-NEXT: Propagate and Convert Noalias intrinsics ; CHECK-O2-NEXT: Deduce function attributes ; Next up is the main function pass pipeline. It shouldn't be split up and ; should contain the main loop pass pipeline as well. Index: llvm/test/Transforms/ConnectNoAliasDecl/basictest.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/ConnectNoAliasDecl/basictest.ll @@ -0,0 +1,179 @@ +; RUN: opt < %s -connect-noaliasdecl -verify -S | FileCheck %s +; RUN: opt < %s -passes=connect-noaliasdecl,verify -S | FileCheck %s + +target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" + + +%struct.FOO = type { i32*, i32*, i32* } + +; Function Attrs: nounwind +define dso_local void @test_01_before(i32** %_p, i32 %c) #0 !noalias !2 { +entry: + %rp = alloca [2 x i32*], align 4 + %other = alloca i32*, align 4 + %local_tmp = alloca i32*, align 4 + %tmp.0 = bitcast [2 x i32*]* %rp to i8* + %.fca.0.gep = getelementptr inbounds [2 x i32*], [2 x i32*]* %rp, i32 0, i32 0 + %.fca.1.gep = getelementptr inbounds [2 x i32*], [2 x i32*]* %rp, i32 0, i32 1 + call void @llvm.lifetime.start.p0i8(i64 8, i8* %tmp.0) #5, !noalias !5 + %tmp.1 = call i8* @llvm.noalias.decl.p0i8.p0a2p0i32.i32([2 x i32*]* %rp, i32 0, metadata !7) + %tmp.2 = load i32*, i32** %_p, align 4, !tbaa !8, !noalias !5 + store i32* %tmp.2, i32** %.fca.0.gep, align 4, !tbaa !8, !noalias !5 + %arrayinit.element = getelementptr inbounds i32*, i32** %.fca.0.gep, i32 1 + %arrayidx1 = getelementptr inbounds i32*, i32** %_p, i32 1 + %tmp.3 = load i32*, i32** %arrayidx1, align 4, !tbaa !8, !noalias !5 + store i32* %tmp.3, i32** %arrayinit.element, align 4, !tbaa !8, !noalias !5 + %tmp.4 = bitcast i32** %other to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* %tmp.4) #5, !noalias !5 + %arrayidx2 = getelementptr inbounds i32*, i32** %_p, i32 2 + %tmp.5 = load i32*, i32** %arrayidx2, align 4, !tbaa !8, !noalias !5 + store i32* %tmp.5, i32** %other, align 4, !tbaa !8, !noalias !5 + %tobool = icmp ne i32 %c, 0 + %cond = select i1 %tobool, i32** %.fca.0.gep, i32** %other + %tmp.6 = load i32*, i32** %arrayinit.element, align 4, !tbaa !8, !noalias !5 + %through_local_tmp = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %tmp.6, i8* null, i32** %arrayinit.element, i32 0, metadata !2), !tbaa !8, !noalias !5 + %tmp.7 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %through_local_tmp, i8* null, i32** %local_tmp, i32 0, metadata !2), !tbaa !8, !noalias !5 + %tmp.8 = load i32, i32* %tmp.7, align 4, !tbaa !12, !noalias !5 + %tmp.9 = load i32*, i32** %.fca.0.gep, align 4, !tbaa !8, !noalias !5 + %tmp.10 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %tmp.9, i8* null, i32** %.fca.0.gep, i32 0, metadata !2), !tbaa !8, !noalias !5 + store i32 %tmp.8, i32* %tmp.10, align 4, !tbaa !12, !noalias !5 + %tmp.11 = load i32*, i32** %cond, align 4, !tbaa !8, !noalias !5 + %tmp.12 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %tmp.11, i8* null, i32** %cond, i32 0, metadata !2), !tbaa !8, !noalias !5 + store i32 42, i32* %tmp.12, align 4, !tbaa !12, !noalias !5 + call void @llvm.lifetime.end.p0i8(i64 4, i8* %tmp.4) #5 + call void @llvm.lifetime.end.p0i8(i64 8, i8* %tmp.0) #5 + ret void +} + +; CHECK-LABEL: @test_01_before( +; CHECK: %tmp.1 = call i8* @llvm.noalias.decl.p0i8.p0a2p0i32.i32([2 x i32*]* %rp, i32 0, metadata !7) +; CHECK: %through_local_tmp = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %tmp.6, i8* %tmp.1, i32** %arrayinit.element, i32 0, metadata !7), !tbaa !8, !noalias !5 +; CHECK: %tmp.10 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %tmp.9, i8* %tmp.1, i32** %.fca.0.gep, i32 0, metadata !7), !tbaa !8, !noalias !5 +; CHECK: %tmp.12 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %tmp.11, i8* null, i32** %cond, i32 0, metadata !2), !tbaa !8, !noalias !5 +; CHECK-NOT: llvm.noalias + +; Function Attrs: nounwind +define dso_local void @test_02(i32** %_p, i32 %c) #0 !noalias !14 { +entry: + %foo = alloca %struct.FOO, align 4 + %tmp = alloca %struct.FOO, align 4 + %tmp.1 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FOOs.i32(%struct.FOO* %foo, i32 0, metadata !17) + %tmp.10 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FOOs.i32(%struct.FOO* %tmp, i32 0, metadata !19) + %tmp.12 = call %struct.FOO* @llvm.noalias.copy.guard.p0s_struct.FOOs.p0i8(%struct.FOO* %foo, i8* null, metadata !21, metadata !14) + %tmp.13 = load %struct.FOO, %struct.FOO* %tmp.12, align 4, !noalias !25 + store %struct.FOO %tmp.13, %struct.FOO* %tmp, !noalias !25 + ret void +} + +; CHECK-LABEL: @test_02( +; CHECK: %tmp.1 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FOOs.i32(%struct.FOO* %foo, i32 0, metadata !17) +; CHECK: %tmp.10 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FOOs.i32(%struct.FOO* %tmp, i32 0, metadata !19) +; CHECK: %tmp.12 = call %struct.FOO* @llvm.noalias.copy.guard.p0s_struct.FOOs.p0i8(%struct.FOO* %foo, i8* %tmp.1, metadata !21, metadata !17) +; CHECK-NOT: llvm.noalias + +; Function Attrs: nounwind +define dso_local void @test_01_after(i32** %_p, i32 %c) #0 !noalias !2 { +entry: + %rp = alloca [2 x i32*], align 4 + %local_tmp = alloca i32*, align 4 + %other = alloca i32*, align 4 + %.fca.0.gep = getelementptr inbounds [2 x i32*], [2 x i32*]* %rp, i32 0, i32 0 + %.fca.1.gep = getelementptr inbounds [2 x i32*], [2 x i32*]* %rp, i32 0, i32 1 + %tmp.0 = bitcast [2 x i32*]* %rp to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* %tmp.0) #5, !noalias !5 + %tmp.1 = call i8* @llvm.noalias.decl.p0i8.p0a2p0i32.i32([2 x i32*]* %rp, i32 0, metadata !7) + %tmp.2 = load i32*, i32** %_p, noalias_sidechannel i32** undef, align 4, !tbaa !8, !noalias !5 + store i32* %tmp.2, i32** %.fca.0.gep, noalias_sidechannel i32** undef, align 4, !tbaa !8, !noalias !5 + %arrayinit.element = getelementptr inbounds i32*, i32** %.fca.0.gep, i32 1 + %arrayidx1 = getelementptr inbounds i32*, i32** %_p, i32 1 + %tmp.3 = load i32*, i32** %arrayidx1, noalias_sidechannel i32** undef, align 4, !tbaa !8, !noalias !5 + store i32* %tmp.3, i32** %arrayinit.element, noalias_sidechannel i32** undef, align 4, !tbaa !8, !noalias !5 + %tmp.4 = bitcast i32** %other to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* %tmp.4) #5, !noalias !5 + %arrayidx2 = getelementptr inbounds i32*, i32** %_p, i32 2 + %tmp.5 = load i32*, i32** %arrayidx2, noalias_sidechannel i32** undef, align 4, !tbaa !8, !noalias !5 + store i32* %tmp.5, i32** %other, noalias_sidechannel i32** undef, align 4, !tbaa !8, !noalias !5 + %tobool = icmp ne i32 %c, 0 + %cond = select i1 %tobool, i32** %.fca.0.gep, i32** %other + %tmp.6 = load i32*, i32** %arrayinit.element, noalias_sidechannel i32** undef, align 4, !tbaa !8, !noalias !5 + %through_local_tmp = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %tmp.6, i8* null, i32** %arrayinit.element, i32** undef, i32 0, metadata !2), !tbaa !8, !noalias !5 + %tmp.7 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %through_local_tmp, i8* null, i32** %local_tmp, i32** undef, i32 0, metadata !2), !tbaa !8, !noalias !5 + %tmp.8 = load i32, i32* %tmp.6, noalias_sidechannel i32* %tmp.7, align 4, !tbaa !12, !noalias !5 + %tmp.9 = load i32*, i32** %.fca.0.gep, noalias_sidechannel i32** undef, align 4, !tbaa !8, !noalias !5 + %tmp.10 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %tmp.9, i8* null, i32** %.fca.0.gep, i32** undef, i32 0, metadata !2), !tbaa !8, !noalias !5 + store i32 %tmp.8, i32* %tmp.9, noalias_sidechannel i32* %tmp.10, align 4, !tbaa !12, !noalias !5 + %tmp.11 = load i32*, i32** %cond, noalias_sidechannel i32** undef, align 4, !tbaa !8, !noalias !5 + %tmp.12 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %tmp.11, i8* null, i32** %cond, i32** undef, i32 0, metadata !2), !tbaa !8, !noalias !5 + store i32 42, i32* %tmp.11, noalias_sidechannel i32* %tmp.12, align 4, !tbaa !12, !noalias !5 + call void @llvm.lifetime.end.p0i8(i64 4, i8* %tmp.4) #5 + call void @llvm.lifetime.end.p0i8(i64 8, i8* %tmp.0) #5 + ret void +} + +; CHECK-LABEL: @test_01_after( +; CHECK: %tmp.1 = call i8* @llvm.noalias.decl.p0i8.p0a2p0i32.i32([2 x i32*]* %rp, i32 0, metadata !7) +; CHECK: %through_local_tmp = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %tmp.6, i8* %tmp.1, i32** %arrayinit.element, i32** undef, i32 0, metadata !7), !tbaa !8, !noalias !5 +; CHECK: %tmp.10 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %tmp.9, i8* %tmp.1, i32** %.fca.0.gep, i32** undef, i32 0, metadata !7), !tbaa !8, !noalias !5 +; CHECK: %tmp.12 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %tmp.11, i8* null, i32** %cond, i32** undef, i32 0, metadata !2), !tbaa !8, !noalias !5 +; CHECK-NOT: llvm.noalias + +; CHECK: declare + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0a2p0i32.i32([2 x i32*]*, i32, metadata) #1 + +; Function Attrs: argmemonly nounwind speculatable +declare i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32*, i8*, i32**, i32, metadata) #2 + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0s_struct.FOOs.i32(%struct.FOO*, i32, metadata) #1 + +; Function Attrs: nounwind readnone +declare %struct.FOO* @llvm.noalias.copy.guard.p0s_struct.FOOs.p0i8(%struct.FOO*, i8*, metadata, metadata) #3 + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32*, i8*, i32**, i32**, i32, metadata) #4 + + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { argmemonly nounwind speculatable } +attributes #3 = { nounwind readnone } +attributes #4 = { nounwind readnone speculatable } +attributes #5 = { nounwind } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 10.0.0 (sgasip@krachtcs10:/u/sgasip/ipd/repositories/llvm_ipd 8efa7b085788c7ef2e29af74da59051ee86c2fd1)"} +!2 = !{!3} +!3 = distinct !{!3, !4, !"test_01: unknown scope"} +!4 = distinct !{!4, !"test_01"} +!5 = !{!6, !3} +!6 = distinct !{!6, !4, !"test_01: rp"} +!7 = !{!6} +!8 = !{!9, !9, i64 0, i64 4} +!9 = !{!10, i64 4, !"any pointer"} +!10 = !{!11, i64 1, !"omnipotent char"} +!11 = !{!"Simple C/C++ TBAA"} +!12 = !{!13, !13, i64 0, i64 4} +!13 = !{!10, i64 4, !"int"} +!14 = !{!15} +!15 = distinct !{!15, !16, !"test_02: unknown scope"} +!16 = distinct !{!16, !"test_02"} +!17 = !{!18} +!18 = distinct !{!18, !16, !"test_02: foo"} +!19 = !{!20} +!20 = distinct !{!20, !16, !"test_02: tmp"} +!21 = !{!22, !23, !24} +!22 = !{i64 -1, i64 0} +!23 = !{i64 -1, i64 1} +!24 = !{i64 -1, i64 2} +!25 = !{!20, !18, !15} Index: llvm/test/Transforms/Coroutines/ArgAddr.ll =================================================================== --- llvm/test/Transforms/Coroutines/ArgAddr.ll +++ llvm/test/Transforms/Coroutines/ArgAddr.ll @@ -46,6 +46,7 @@ ret i32 0 ; CHECK: call void @ctor ; CHECK-NEXT: call void @print(i32 4) +; CHECK-NEXT: bitcast i8* ; CHECK-NEXT: call void @print(i32 3) ; CHECK-NEXT: call void @print(i32 2) ; CHECK: ret i32 0 Index: llvm/test/Transforms/Coroutines/coro-retcon-resume-values.ll =================================================================== --- llvm/test/Transforms/Coroutines/coro-retcon-resume-values.ll +++ llvm/test/Transforms/Coroutines/coro-retcon-resume-values.ll @@ -65,8 +65,12 @@ ; CHECK-LABEL: define i32 @main ; CHECK-NEXT: entry: ; CHECK: [[BUFFER:%.*]] = alloca [8 x i8], align 4 +; CHECK: [[SUB:%.*]] = getelementptr inbounds [8 x i8], [8 x i8]* [[BUFFER]], i64 0, i64 0 ; CHECK: [[SLOT:%.*]] = bitcast [8 x i8]* [[BUFFER]] to i32* -; CHECK-NEXT: store i32 7, i32* [[SLOT]], align 4 +; CHECK-NEXT: [[SUB_0_DECL:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0i8.i64(i8** null, i64 0, metadata [[SUB_0_META:!.*]]) +; CHECK-NEXT: [[SUB_0_SIDE:%.*]] = call i8* @llvm.side.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i64(i8* nonnull [[SUB]], i8* [[SUB_0_DECL]], i8** null, i8** undef, i64 0, metadata [[SUB_0_META]]) +; CHECK-NEXT: [[SUB_0_SIDE_I32:%.*]] = bitcast i8* [[SUB_0_SIDE]] to i32* +; CHECK-NEXT: store i32 7, i32* [[SLOT]], noalias_sidechannel i32* [[SUB_0_SIDE_I32]], align 4, !noalias [[SUB_0_META]] ; CHECK-NEXT: call void @print(i32 7) ; CHECK-NEXT: ret i32 0 Index: llvm/test/Transforms/Coroutines/coro-retcon-value.ll =================================================================== --- llvm/test/Transforms/Coroutines/coro-retcon-value.ll +++ llvm/test/Transforms/Coroutines/coro-retcon-value.ll @@ -75,16 +75,23 @@ ; CHECK-LABEL: define i32 @main ; CHECK-NEXT: entry: ; CHECK: [[BUFFER:%.*]] = alloca [8 x i8], align 4 +; CHECK: [[SUB:%.*]] = getelementptr inbounds [8 x i8], [8 x i8]* [[BUFFER]], i64 0, i64 0 ; CHECK: [[SLOT:%.*]] = bitcast [8 x i8]* [[BUFFER]] to i32* -; CHECK-NEXT: store i32 4, i32* [[SLOT]], align 4 +; CHECK-NEXT: store i32 4, i32* [[SLOT]], noalias_sidechannel i32* undef, align 4 ; CHECK-NEXT: call void @print(i32 4) -; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[SLOT]], align 4 +; CHECK-NEXT: [[SUB_0_DECL:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0i8.i64(i8** null, i64 0, metadata [[SUB_0_META:!.*]]) +; CHECK-NEXT: [[SUB_0_SIDE:%.*]] = call i8* @llvm.side.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i64(i8* nonnull [[SUB]], i8* [[SUB_0_DECL]], i8** null, i8** undef, i64 0, metadata [[SUB_0_META]]) +; CHECK-NEXT: [[SUB_0_SIDE_I32:%.*]] = bitcast i8* [[SUB_0_SIDE]] to i32* +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[SLOT]], noalias_sidechannel i32* [[SUB_0_SIDE_I32]], align 4, !noalias [[SUB_0_META]] ; CHECK-NEXT: [[INC:%.*]] = add i32 [[LOAD]], 1 -; CHECK-NEXT: store i32 [[INC]], i32* [[SLOT]], align 4 +; CHECK-NEXT: store i32 [[INC]], i32* [[SLOT]], noalias_sidechannel i32* [[SUB_0_SIDE_I32]], align 4, !noalias [[SUB_0_META]] ; CHECK-NEXT: call void @print(i32 [[INC]]) -; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[SLOT]], align 4 +; CHECK-NEXT: [[SUB_1_DECL:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0i8.i64(i8** null, i64 0, metadata [[SUB_1_META:!.*]]) +; CHECK-NEXT: [[SUB_1_SIDE:%.*]] = call i8* @llvm.side.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i64(i8* nonnull [[SUB]], i8* [[SUB_1_DECL]], i8** null, i8** undef, i64 0, metadata [[SUB_1_META]]) +; CHECK-NEXT: [[SUB_1_SIDE_I32:%.*]] = bitcast i8* [[SUB_1_SIDE]] to i32* +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[SLOT]], noalias_sidechannel i32* [[SUB_1_SIDE_I32]], align 4, !noalias [[SUB_1_META]] ; CHECK-NEXT: [[INC:%.*]] = add i32 [[LOAD]], 1 -; CHECK-NEXT: store i32 [[INC]], i32* [[SLOT]], align 4 +; CHECK-NEXT: store i32 [[INC]], i32* [[SLOT]], noalias_sidechannel i32* [[SUB_1_SIDE_I32]], align 4, !noalias [[SUB_1_META]] ; CHECK-NEXT: call void @print(i32 [[INC]]) ; CHECK-NEXT: ret i32 0 Index: llvm/test/Transforms/Coroutines/coro-retcon.ll =================================================================== --- llvm/test/Transforms/Coroutines/coro-retcon.ll +++ llvm/test/Transforms/Coroutines/coro-retcon.ll @@ -65,16 +65,23 @@ ; CHECK-LABEL: define i32 @main ; CHECK-NEXT: entry: ; CHECK: [[BUFFER:%.*]] = alloca [8 x i8], align 4 +; CHECK: [[SUB:%.*]] = getelementptr inbounds [8 x i8], [8 x i8]* [[BUFFER]], i64 0, i64 0 ; CHECK: [[SLOT:%.*]] = bitcast [8 x i8]* [[BUFFER]] to i32* -; CHECK-NEXT: store i32 4, i32* [[SLOT]], align 4 +; CHECK-NEXT: store i32 4, i32* [[SLOT]], noalias_sidechannel i32* undef, align 4 ; CHECK-NEXT: call void @print(i32 4) -; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[SLOT]], align 4 +; CHECK-NEXT: [[SUB_0_DECL:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0i8.i64(i8** null, i64 0, metadata [[SUB_0_META:!.*]]) +; CHECK-NEXT: [[SUB_0_SIDE:%.*]] = call i8* @llvm.side.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i64(i8* nonnull [[SUB]], i8* [[SUB_0_DECL]], i8** null, i8** undef, i64 0, metadata [[SUB_0_META]]) +; CHECK-NEXT: [[SUB_0_SIDE_I32:%.*]] = bitcast i8* [[SUB_0_SIDE]] to i32* +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[SLOT]], noalias_sidechannel i32* [[SUB_0_SIDE_I32]], align 4, !noalias [[SUB_0_META]] ; CHECK-NEXT: [[INC:%.*]] = add i32 [[LOAD]], 1 -; CHECK-NEXT: store i32 [[INC]], i32* [[SLOT]], align 4 +; CHECK-NEXT: store i32 [[INC]], i32* [[SLOT]], noalias_sidechannel i32* [[SUB_0_SIDE_I32]], align 4, !noalias [[SUB_0_META]] ; CHECK-NEXT: call void @print(i32 [[INC]]) -; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[SLOT]], align 4 +; CHECK-NEXT: [[SUB_1_DECL:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0i8.i64(i8** null, i64 0, metadata [[SUB_1_META:!.*]]) +; CHECK-NEXT: [[SUB_1_SIDE:%.*]] = call i8* @llvm.side.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i64(i8* nonnull [[SUB]], i8* [[SUB_1_DECL]], i8** null, i8** undef, i64 0, metadata [[SUB_1_META]]) +; CHECK-NEXT: [[SUB_1_SIDE_I32:%.*]] = bitcast i8* [[SUB_1_SIDE]] to i32* +; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[SLOT]], noalias_sidechannel i32* [[SUB_1_SIDE_I32]], align 4, !noalias [[SUB_1_META]] ; CHECK-NEXT: [[INC:%.*]] = add i32 [[LOAD]], 1 -; CHECK-NEXT: store i32 [[INC]], i32* [[SLOT]], align 4 +; CHECK-NEXT: store i32 [[INC]], i32* [[SLOT]], noalias_sidechannel i32* [[SUB_1_SIDE_I32]], align 4, !noalias [[SUB_1_META]] ; CHECK-NEXT: call void @print(i32 [[INC]]) ; CHECK-NEXT: ret i32 0 Index: llvm/test/Transforms/Coroutines/ex3.ll =================================================================== --- llvm/test/Transforms/Coroutines/ex3.ll +++ llvm/test/Transforms/Coroutines/ex3.ll @@ -39,6 +39,7 @@ call void @llvm.coro.destroy(i8* %hdl) ret i32 0 ; CHECK: call void @print(i32 4) +; CHECK-NEXT: bitcast i8* ; CHECK-NEXT: call void @print(i32 -5) ; CHECK-NEXT: call void @print(i32 5) ; CHECK: ret i32 0 Index: llvm/test/Transforms/EarlyCSE/basic.ll =================================================================== --- llvm/test/Transforms/EarlyCSE/basic.ll +++ llvm/test/Transforms/EarlyCSE/basic.ll @@ -54,6 +54,26 @@ ; CHECK: ret i32 0 } +; CHECK-LABEL: @test2b( +define i32 @test2b(i32 *%P, i1 %b) { + %V1 = load i32, i32* %P + call i8* @llvm.noalias.p0i8.p0i8.p0p0i8.i32(i8* undef, i8* null, i8** null, i32 0, metadata !1) + %V2 = load i32, i32* %P + %Diff = sub i32 %V1, %V2 + ret i32 %Diff + ; CHECK: ret i32 0 +} + +; CHECK-LABEL: @test2c( +define i32 @test2c(i32 *%P, i1 %b) { + %V1 = load i32, i32* %P + call i8* @llvm.side.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i32(i8* undef, i8* null, i8** null, i8** null, i32 0, metadata !1) + %V2 = load i32, i32* %P + %Diff = sub i32 %V1, %V2 + ret i32 %Diff + ; CHECK: ret i32 0 +} + ;; Cross block load value numbering. ; CHECK-LABEL: @test3( define i32 @test3(i32 *%P, i1 %Cond) { @@ -134,6 +154,24 @@ ; CHECK: ret i32 42 } +; CHECK-LABEL: @test6b( +define i32 @test6b(i32 *%P, i1 %b) { + store i32 42, i32* %P + call i8* @llvm.noalias.p0i8.p0i8.p0p0i8.i32(i8* undef, i8* null, i8** null, i32 0, metadata !1) + %V1 = load i32, i32* %P + ret i32 %V1 + ; CHECK: ret i32 42 +} + +; CHECK-LABEL: @test6c( +define i32 @test6c(i32 *%P, i1 %b) { + store i32 42, i32* %P + call i8* @llvm.side.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i32(i8* undef, i8* null, i8** null, i8** null, i32 0, metadata !1) + %V1 = load i32, i32* %P + ret i32 %V1 + ; CHECK: ret i32 42 +} + ;; Trivial dead store elimination. ; CHECK-LABEL: @test7( define void @test7(i32 *%P) { @@ -291,3 +329,9 @@ store i32 2, i32* @c, align 4 ret void } + +declare i8* @llvm.noalias.p0i8.p0i8.p0p0i8.i32(i8*, i8*, i8**, i32, metadata ) nounwind +declare i8* @llvm.side.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i32(i8*, i8*, i8**, i8**, i32, metadata ) nounwind + +!0 = !{!0, !"some domain"} +!1 = !{!1, !0, !"some scope"} Index: llvm/test/Transforms/FunctionAttrs/nocapture.ll =================================================================== --- llvm/test/Transforms/FunctionAttrs/nocapture.ll +++ llvm/test/Transforms/FunctionAttrs/nocapture.ll @@ -114,6 +114,40 @@ ret i32 %val } +; EITHER: define i32 @nc1a(i32* %q, i32* nocapture %p, i1 %b) +define i32 @nc1a(i32* %q, i32* %p, i1 %b) { +e: + %pa = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %p, i8* null, i32** null, i32 0, metadata !1) + br label %l +l: + %x = phi i32* [ %pa, %e ] + %y = phi i32* [ %q, %e ] + %tmp = bitcast i32* %x to i32* ; [#uses=2] + %tmp2 = select i1 %b, i32* %tmp, i32* %y + %val = load i32, i32* %tmp2 ; [#uses=1] + store i32 0, i32* %tmp + store i32* %y, i32** @g + ret i32 %val +} + +; EITHER: define i32 @nc1b(i32* %q, i32* nocapture %p, i1 %b) +define i32 @nc1b(i32* %q, i32* %p, i1 %b) { +e: + %side.p = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %p, i8* null, i32** null, i32** null, i32 0, metadata !1) + br label %l +l: + %x = phi i32* [ %p, %e ] + %side.x = phi i32* [ %side.p, %e ] + %y = phi i32* [ %q, %e ] + %tmp = bitcast i32* %x to i32* ; [#uses=2] + %tmp2 = select i1 %b, i32* %tmp, i32* %y + %side.tmp2 = select i1 %b, i32* %side.x, i32* %y + %val = load i32, i32* %tmp2 ; [#uses=1] + store i32 0, i32* %tmp, noalias_sidechannel i32* %side.tmp2 + store i32* %y, i32** @g + ret i32 %val +} + ; EITHER: define i32 @nc1_addrspace(i32* %q, i32 addrspace(1)* nocapture %p, i1 %b) define i32 @nc1_addrspace(i32* %q, i32 addrspace(1)* %p, i1 %b) { e: @@ -362,3 +396,9 @@ declare i8* @llvm.launder.invariant.group.p0i8(i8*) declare i8* @llvm.strip.invariant.group.p0i8(i8*) + +declare i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32*, i8*, i32**, i32, metadata ) nounwind +declare i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32*, i8*, i32**, i32**, i32, metadata ) nounwind + +!0 = !{!0, !"some domain"} +!1 = !{!1, !0, !"some scope"} Index: llvm/test/Transforms/FunctionAttrs/nonnull.ll =================================================================== --- llvm/test/Transforms/FunctionAttrs/nonnull.ll +++ llvm/test/Transforms/FunctionAttrs/nonnull.ll @@ -579,5 +579,15 @@ ret void } +; Return a pointer trivially nonnull (argument attribute) through a llvm.noalias.arg.guard +; BOTH: define nonnull i8* @test_noalias_arg_guard +define i8* @test_noalias_arg_guard(i8* nonnull %p) { + %ret = tail call i8* @llvm.noalias.arg.guard.p0i8.p0i8(i8* %p, i8* %p) + ret i8* %p +} + +; Function Attrs: nounwind readnone +declare i8* @llvm.noalias.arg.guard.p0i8.p0i8(i8*, i8*) nounwind readnone + attributes #0 = { "null-pointer-is-valid"="true" } attributes #1 = { nounwind willreturn} Index: llvm/test/Transforms/Inline/launder.invariant.group.ll =================================================================== --- llvm/test/Transforms/Inline/launder.invariant.group.ll +++ llvm/test/Transforms/Inline/launder.invariant.group.ll @@ -1,6 +1,10 @@ -; RUN: opt -S -inline < %s | FileCheck %s -; RUN: opt -S -O3 < %s | FileCheck %s -; RUN: opt -S -inline -inline-threshold=1 < %s | FileCheck %s +; RUN: opt -S -inline --use-noalias-intrinsic-during-inlining=0 < %s | FileCheck %s --check-prefixes=CHECK,CHECK_SCOPED +; RUN: opt -S -O3 --use-noalias-intrinsic-during-inlining=0 < %s | FileCheck %s --check-prefixes=CHECK,CHECK_SCOPED,CHECK_OPT +; RUN: opt -S -inline -inline-threshold=1 --use-noalias-intrinsic-during-inlining=0 < %s | FileCheck %s --check-prefixes=CHECK,CHECK_SCOPED + +; RUN: opt -S -inline --use-noalias-intrinsic-during-inlining=1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK_NOALIAS +; RUN: opt -S -O3 --use-noalias-intrinsic-during-inlining=1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK_SIDE,CHECK_OPT +; RUN: opt -S -inline -inline-threshold=1 --use-noalias-intrinsic-during-inlining=1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK_NOALIAS %struct.A = type <{ i32 (...)**, i32, [4 x i8] }> @@ -9,7 +13,7 @@ ; sometimes it would be considered noalias. ; CHECK-LABEL: define i32 @bar(%struct.A* noalias define i32 @bar(%struct.A* noalias) { -; CHECK-NOT: noalias +; CHECK_SCOPED-NOT: noalias %2 = bitcast %struct.A* %0 to i8* %3 = call i8* @llvm.launder.invariant.group.p0i8(i8* %2) %4 = getelementptr inbounds i8, i8* %3, i64 8 @@ -22,10 +26,24 @@ ; CHECK-LABEL: define i32 @foo(%struct.A* noalias define i32 @foo(%struct.A* noalias) { - ; CHECK-NOT: call i32 @bar( - ; CHECK-NOT: noalias + ; CHECK_SCOPED-NOT: call i32 @bar( + ; CHECK_SCOPED-NOT: noalias + + ; CHECK_NOALIAS-NOT: call i32 @bar( + ; CHECK_NOALIAS: @llvm.noalias.decl.p0 + ; CHECK_NOALIAS-NEXT: @llvm.noalias.p0 + ; CHECK_NOALIAS-NOT: call i32 @bar( + + ; CHECK_SIDE-NOT: call i32 @bar( + ; CHECK_SIDE: @llvm.noalias.decl.p0 + ; CHECK_SIDE-NEXT: @llvm.side.noalias.p0 + ; CHECK_SIDE-NOT: call i32 @bar( + ; CHECK_SIDE: @llvm.noalias.arg.guard.p0 + ; CHECK_SIDE-NOT: call i32 @bar( %2 = tail call i32 @bar(%struct.A* %0) ret i32 %2 + + ; CHECK_OPT: ret i32 42 } Index: llvm/test/Transforms/Inline/noalias-calls.ll =================================================================== --- llvm/test/Transforms/Inline/noalias-calls.ll +++ llvm/test/Transforms/Inline/noalias-calls.ll @@ -1,9 +1,10 @@ -; RUN: opt -basicaa -inline -enable-noalias-to-md-conversion -S < %s | FileCheck %s +; RUN: opt -basicaa -inline -enable-noalias-to-md-conversion -use-noalias-intrinsic-during-inlining=0 -S < %s | FileCheck %s -check-prefix=MD-SCOPE +; RUN: opt -inline -enable-noalias-to-md-conversion -use-noalias-intrinsic-during-inlining=1 -S < %s | FileCheck %s -check-prefix=INTR-SCOPE target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) #0 -declare void @hey() #0 +declare void @hey() #1 define void @hello(i8* noalias nocapture %a, i8* noalias nocapture readonly %c, i8* nocapture %b) #1 { entry: @@ -16,30 +17,51 @@ ret void } -define void @foo(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #2 { +define void @foo(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #1 { entry: tail call void @hello(i8* %a, i8* %c, i8* %b) ret void } +; MD-SCOPE: define void @foo(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #1 { +; MD-SCOPE: entry: +; MD-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %a, i8* align 16 %b, i64 16, i1 false) #1, !noalias !0 +; MD-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %b, i8* align 16 %c, i64 16, i1 false) #1, !noalias !3 +; MD-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %a, i8* align 16 %c, i64 16, i1 false) #1, !alias.scope !5 +; MD-SCOPE: call void @hey() #1, !noalias !5 +; MD-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %{{.*}}, i8* align 16 %c, i64 16, i1 false) #1, !noalias !3 +; MD-SCOPE: ret void +; MD-SCOPE: } -; CHECK: define void @foo(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #2 { -; CHECK: entry: -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %a, i8* align 16 %b, i64 16, i1 false) #1, !noalias !0 -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %b, i8* align 16 %c, i64 16, i1 false) #1, !noalias !3 -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %a, i8* align 16 %c, i64 16, i1 false) #1, !alias.scope !5 -; CHECK: call void @hey() #1, !noalias !5 -; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %{{.*}}, i8* align 16 %c, i64 16, i1 false) #1, !noalias !3 -; CHECK: ret void -; CHECK: } +; INTR-SCOPE: define void @foo(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #1 { +; INTR-SCOPE: entry: +; INTR-SCOPE: %l.i = alloca i8, i32 512, align 1 +; INTR-SCOPE: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i8.i64(i8** null, i64 0, metadata !0) +; INTR-SCOPE: %1 = call i8* @llvm.noalias.p0i8.p0i8.p0p0i8.i64(i8* %a, i8* %0, i8** null, i64 0, metadata !0) +; INTR-SCOPE: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i8.i64(i8** null, i64 0, metadata !3) +; INTR-SCOPE: %3 = call i8* @llvm.noalias.p0i8.p0i8.p0p0i8.i64(i8* %c, i8* %2, i8** null, i64 0, metadata !3) +; INTR-SCOPE: call void @llvm.lifetime.start.p0i8(i64 512, i8* %l.i) +; INTR-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %1, i8* align 16 %b, i64 16, i1 false) #1, !noalias !5 +; INTR-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %b, i8* align 16 %3, i64 16, i1 false) #1, !noalias !5 +; INTR-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %1, i8* align 16 %3, i64 16, i1 false) #1, !noalias !5 +; INTR-SCOPE: call void @hey() #1, !noalias !5 +; INTR-SCOPE: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %l.i, i8* align 16 %3, i64 16, i1 false) #1, !noalias !5 +; INTR-SCOPE: call void @llvm.lifetime.end.p0i8(i64 512, i8* %l.i) +; INTR-SCOPE: ret void +; INTR-SCOPE: } attributes #0 = { nounwind argmemonly willreturn } attributes #1 = { nounwind } -attributes #2 = { nounwind uwtable } -; CHECK: !0 = !{!1} -; CHECK: !1 = distinct !{!1, !2, !"hello: %c"} -; CHECK: !2 = distinct !{!2, !"hello"} -; CHECK: !3 = !{!4} -; CHECK: !4 = distinct !{!4, !2, !"hello: %a"} -; CHECK: !5 = !{!4, !1} +; MD-SCOPE: !0 = !{!1} +; MD-SCOPE: !1 = distinct !{!1, !2, !"hello: %c"} +; MD-SCOPE: !2 = distinct !{!2, !"hello"} +; MD-SCOPE: !3 = !{!4} +; MD-SCOPE: !4 = distinct !{!4, !2, !"hello: %a"} +; MD-SCOPE: !5 = !{!4, !1} +; INTR-SCOPE: !0 = !{!1} +; INTR-SCOPE: !1 = distinct !{!1, !2, !"hello: %a"} +; INTR-SCOPE: !2 = distinct !{!2, !"hello"} +; INTR-SCOPE: !3 = !{!4} +; INTR-SCOPE: !4 = distinct !{!4, !2, !"hello: %c"} +; INTR-SCOPE: !5 = !{!1, !4} Index: llvm/test/Transforms/Inline/noalias-scopes.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Inline/noalias-scopes.ll @@ -0,0 +1,204 @@ +; RUN: opt -inline -enable-noalias-to-md-conversion -use-noalias-intrinsic-during-inlining=1 -S < %s | FileCheck %s -check-prefix=INTR-SCOPE +; verify that inlining result in scope duplication +; verify that llvm.noalias.decl is introduced at the location of the inlining + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nofree norecurse nounwind +define dso_local void @copy_npnp(i32* noalias nocapture %dst, i32* noalias nocapture readonly %src) local_unnamed_addr #0 { +entry: + %0 = load i32, i32* %src, noalias_sidechannel i32* undef, align 4, !tbaa !2 + store i32 %0, i32* %dst, noalias_sidechannel i32* undef, align 4, !tbaa !2 + ret void +} + +; Function Attrs: nounwind +define dso_local void @copy_rprp(i32* nocapture %dst, i32* nocapture readonly %src) local_unnamed_addr #1 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !6) + %1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !9) + %2 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %src, i8* %1, i32** null, i32** undef, i64 0, metadata !9), !tbaa !11, !noalias !13 + %3 = load i32, i32* %src, noalias_sidechannel i32* %2, align 4, !tbaa !2, !noalias !13 + %4 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %dst, i8* %0, i32** null, i32** undef, i64 0, metadata !6), !tbaa !11, !noalias !13 + store i32 %3, i32* %dst, noalias_sidechannel i32* %4, align 4, !tbaa !2, !noalias !13 + ret void +} + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32**, i64, metadata) #2 + +; Function Attrs: nofree norecurse nounwind +define dso_local void @test_npnp(i32* nocapture %dst, i32* nocapture readonly %src, i32 %n) local_unnamed_addr #0 { +entry: + tail call void @copy_npnp(i32* %dst, i32* %src) + br label %do.body + +do.body: ; preds = %do.body, %entry + %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.body ] + tail call void @copy_npnp(i32* %dst, i32* %src) + tail call void @copy_npnp(i32* %dst, i32* %src) + %dec = add nsw i32 %n.addr.0, -1 + %tobool = icmp eq i32 %n.addr.0, 0 + br i1 %tobool, label %do.end, label %do.body + +do.end: ; preds = %do.body + ret void +} + +; INTR-SCOPE: define dso_local void @test_npnp(i32* nocapture %dst, i32* nocapture readonly %src, i32 %n) local_unnamed_addr #0 { +; INTR-SCOPE: entry: +; INTR-SCOPE: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !14) +; INTR-SCOPE: %1 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %dst, i8* %0, i32** null, i64 0, metadata !14) +; INTR-SCOPE: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !17) +; INTR-SCOPE: %3 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %src, i8* %2, i32** null, i64 0, metadata !17) +; INTR-SCOPE: %4 = load i32, i32* %3, noalias_sidechannel i32* undef, align 4, !tbaa !2, !noalias !19 +; INTR-SCOPE: store i32 %4, i32* %1, noalias_sidechannel i32* undef, align 4, !tbaa !2, !noalias !19 +; INTR-SCOPE: br label %do.body +; INTR-SCOPE: do.body: ; preds = %do.body, %entry +; INTR-SCOPE: %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.body ] +; INTR-SCOPE: %5 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !20) +; INTR-SCOPE: %6 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %dst, i8* %5, i32** null, i64 0, metadata !20) +; INTR-SCOPE: %7 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !23) +; INTR-SCOPE: %8 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %src, i8* %7, i32** null, i64 0, metadata !23) +; INTR-SCOPE: %9 = load i32, i32* %8, noalias_sidechannel i32* undef, align 4, !tbaa !2, !noalias !25 +; INTR-SCOPE: store i32 %9, i32* %6, noalias_sidechannel i32* undef, align 4, !tbaa !2, !noalias !25 +; INTR-SCOPE: %10 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !26) +; INTR-SCOPE: %11 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %dst, i8* %10, i32** null, i64 0, metadata !26) +; INTR-SCOPE: %12 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !29) +; INTR-SCOPE: %13 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %src, i8* %12, i32** null, i64 0, metadata !29) +; INTR-SCOPE: %14 = load i32, i32* %13, noalias_sidechannel i32* undef, align 4, !tbaa !2, !noalias !31 +; INTR-SCOPE: store i32 %14, i32* %11, noalias_sidechannel i32* undef, align 4, !tbaa !2, !noalias !31 +; INTR-SCOPE: %dec = add nsw i32 %n.addr.0, -1 +; INTR-SCOPE: %tobool = icmp eq i32 %n.addr.0, 0 +; INTR-SCOPE: br i1 %tobool, label %do.end, label %do.body +; INTR-SCOPE: do.end: ; preds = %do.body +; INTR-SCOPE: ret void +; INTR-SCOPE: } + + +; Function Attrs: nounwind +define dso_local void @test_rprp(i32* nocapture %dst, i32* nocapture readonly %src, i32 %n) local_unnamed_addr #1 { +entry: + tail call void @copy_rprp(i32* %dst, i32* %src) + br label %do.body + +do.body: ; preds = %do.body, %entry + %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.body ] + tail call void @copy_rprp(i32* %dst, i32* %src) + tail call void @copy_rprp(i32* %dst, i32* %src) + %dec = add nsw i32 %n.addr.0, -1 + %tobool = icmp eq i32 %n.addr.0, 0 + br i1 %tobool, label %do.end, label %do.body + +do.end: ; preds = %do.body + ret void +} + +; INTR-SCOPE: define dso_local void @test_rprp(i32* nocapture %dst, i32* nocapture readonly %src, i32 %n) local_unnamed_addr #1 { +; INTR-SCOPE: entry: +; INTR-SCOPE: %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !32) #5 +; INTR-SCOPE: %1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !35) #5 +; INTR-SCOPE: %2 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %src, i8* %1, i32** null, i32** undef, i64 0, metadata !35) #5, !tbaa !11, !noalias !37 +; INTR-SCOPE: %3 = load i32, i32* %src, noalias_sidechannel i32* %2, align 4, !tbaa !2, !noalias !37 +; INTR-SCOPE: %4 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %dst, i8* %0, i32** null, i32** undef, i64 0, metadata !32) #5, !tbaa !11, !noalias !37 +; INTR-SCOPE: store i32 %3, i32* %dst, noalias_sidechannel i32* %4, align 4, !tbaa !2, !noalias !37 +; INTR-SCOPE: br label %do.body +; INTR-SCOPE: do.body: ; preds = %do.body, %entry +; INTR-SCOPE: %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.body ] +; INTR-SCOPE: %5 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !38) #5 +; INTR-SCOPE: %6 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !41) #5 +; INTR-SCOPE: %7 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %src, i8* %6, i32** null, i32** undef, i64 0, metadata !41) #5, !tbaa !11, !noalias !43 +; INTR-SCOPE: %8 = load i32, i32* %src, noalias_sidechannel i32* %7, align 4, !tbaa !2, !noalias !43 +; INTR-SCOPE: %9 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %dst, i8* %5, i32** null, i32** undef, i64 0, metadata !38) #5, !tbaa !11, !noalias !43 +; INTR-SCOPE: store i32 %8, i32* %dst, noalias_sidechannel i32* %9, align 4, !tbaa !2, !noalias !43 +; INTR-SCOPE: %10 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !44) #5 +; INTR-SCOPE: %11 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !47) #5 +; INTR-SCOPE: %12 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %src, i8* %11, i32** null, i32** undef, i64 0, metadata !47) #5, !tbaa !11, !noalias !49 +; INTR-SCOPE: %13 = load i32, i32* %src, noalias_sidechannel i32* %12, align 4, !tbaa !2, !noalias !49 +; INTR-SCOPE: %14 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %dst, i8* %10, i32** null, i32** undef, i64 0, metadata !44) #5, !tbaa !11, !noalias !49 +; INTR-SCOPE: store i32 %13, i32* %dst, noalias_sidechannel i32* %14, align 4, !tbaa !2, !noalias !49 +; INTR-SCOPE: %dec = add nsw i32 %n.addr.0, -1 +; INTR-SCOPE: %tobool = icmp eq i32 %n.addr.0, 0 +; INTR-SCOPE: br i1 %tobool, label %do.end, label %do.body +; INTR-SCOPE: do.end: ; preds = %do.body +; INTR-SCOPE: ret void +; INTR-SCOPE: } + + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32*, i8*, i32**, i32**, i64, metadata) #3 + +attributes #0 = { nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { argmemonly nounwind } +attributes #3 = { nounwind readnone speculatable } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3, !3, i64 0, i64 4} +!3 = !{!4, i64 4, !"int"} +!4 = !{!5, i64 1, !"omnipotent char"} +!5 = !{!"Simple C/C++ TBAA"} +!6 = !{!7} +!7 = distinct !{!7, !8, !"copy_rprp: rdst"} +!8 = distinct !{!8, !"copy_rprp"} +!9 = !{!10} +!10 = distinct !{!10, !8, !"copy_rprp: rsrc"} +!11 = !{!12, !12, i64 0, i64 4} +!12 = !{!4, i64 4, !"any pointer"} +!13 = !{!7, !10} + +; INTR-SCOPE: !0 = !{i32 1, !"wchar_size", i32 4} +; INTR-SCOPE: !1 = !{!"clang"} +; INTR-SCOPE: !2 = !{!3, !3, i64 0, i64 4} +; INTR-SCOPE: !3 = !{!4, i64 4, !"int"} +; INTR-SCOPE: !4 = !{!5, i64 1, !"omnipotent char"} +; INTR-SCOPE: !5 = !{!"Simple C/C++ TBAA"} +; INTR-SCOPE: !6 = !{!7} +; INTR-SCOPE: !7 = distinct !{!7, !8, !"copy_rprp: rdst"} +; INTR-SCOPE: !8 = distinct !{!8, !"copy_rprp"} +; INTR-SCOPE: !9 = !{!10} +; INTR-SCOPE: !10 = distinct !{!10, !8, !"copy_rprp: rsrc"} +; INTR-SCOPE: !11 = !{!12, !12, i64 0, i64 4} +; INTR-SCOPE: !12 = !{!4, i64 4, !"any pointer"} +; INTR-SCOPE: !13 = !{!7, !10} +; INTR-SCOPE: !14 = !{!15} +; INTR-SCOPE: !15 = distinct !{!15, !16, !"copy_npnp: %dst"} +; INTR-SCOPE: !16 = distinct !{!16, !"copy_npnp"} +; INTR-SCOPE: !17 = !{!18} +; INTR-SCOPE: !18 = distinct !{!18, !16, !"copy_npnp: %src"} +; INTR-SCOPE: !19 = !{!15, !18} +; INTR-SCOPE: !20 = !{!21} +; INTR-SCOPE: !21 = distinct !{!21, !22, !"copy_npnp: %dst"} +; INTR-SCOPE: !22 = distinct !{!22, !"copy_npnp"} +; INTR-SCOPE: !23 = !{!24} +; INTR-SCOPE: !24 = distinct !{!24, !22, !"copy_npnp: %src"} +; INTR-SCOPE: !25 = !{!21, !24} +; INTR-SCOPE: !26 = !{!27} +; INTR-SCOPE: !27 = distinct !{!27, !28, !"copy_npnp: %dst"} +; INTR-SCOPE: !28 = distinct !{!28, !"copy_npnp"} +; INTR-SCOPE: !29 = !{!30} +; INTR-SCOPE: !30 = distinct !{!30, !28, !"copy_npnp: %src"} +; INTR-SCOPE: !31 = !{!27, !30} +; INTR-SCOPE: !32 = !{!33} +; INTR-SCOPE: !33 = distinct !{!33, !34, !"copy_rprp: rdst"} +; INTR-SCOPE: !34 = distinct !{!34, !"copy_rprp"} +; INTR-SCOPE: !35 = !{!36} +; INTR-SCOPE: !36 = distinct !{!36, !34, !"copy_rprp: rsrc"} +; INTR-SCOPE: !37 = !{!33, !36} +; INTR-SCOPE: !38 = !{!39} +; INTR-SCOPE: !39 = distinct !{!39, !40, !"copy_rprp: rdst"} +; INTR-SCOPE: !40 = distinct !{!40, !"copy_rprp"} +; INTR-SCOPE: !41 = !{!42} +; INTR-SCOPE: !42 = distinct !{!42, !40, !"copy_rprp: rsrc"} +; INTR-SCOPE: !43 = !{!39, !42} +; INTR-SCOPE: !44 = !{!45} +; INTR-SCOPE: !45 = distinct !{!45, !46, !"copy_rprp: rdst"} +; INTR-SCOPE: !46 = distinct !{!46, !"copy_rprp"} +; INTR-SCOPE: !47 = !{!48} +; INTR-SCOPE: !48 = distinct !{!48, !46, !"copy_rprp: rsrc"} +; INTR-SCOPE: !49 = !{!45, !48} Index: llvm/test/Transforms/Inline/noalias.ll =================================================================== --- llvm/test/Transforms/Inline/noalias.ll +++ llvm/test/Transforms/Inline/noalias.ll @@ -1,4 +1,5 @@ -; RUN: opt -inline -enable-noalias-to-md-conversion -S < %s | FileCheck %s +; RUN: opt -inline -enable-noalias-to-md-conversion -use-noalias-intrinsic-during-inlining=0 -S < %s | FileCheck %s -check-prefix=MD-SCOPE +; RUN: opt -inline -enable-noalias-to-md-conversion -use-noalias-intrinsic-during-inlining=1 -S < %s | FileCheck %s -check-prefix=INTR-SCOPE target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -19,16 +20,29 @@ ret void } -; CHECK: define void @foo(float* nocapture %a, float* nocapture readonly %c) #0 { -; CHECK: entry: -; CHECK: %0 = load float, float* %c, align 4, !noalias !0 -; CHECK: %arrayidx.i = getelementptr inbounds float, float* %a, i64 5 -; CHECK: store float %0, float* %arrayidx.i, align 4, !alias.scope !0 -; CHECK: %1 = load float, float* %c, align 4 -; CHECK: %arrayidx = getelementptr inbounds float, float* %a, i64 7 -; CHECK: store float %1, float* %arrayidx, align 4 -; CHECK: ret void -; CHECK: } +; MD-SCOPE: define void @foo(float* nocapture %a, float* nocapture readonly %c) #0 { +; MD-SCOPE: entry: +; MD-SCOPE: %0 = load float, float* %c, align 4, !noalias !0 +; MD-SCOPE: %arrayidx.i = getelementptr inbounds float, float* %a, i64 5 +; MD-SCOPE: store float %0, float* %arrayidx.i, align 4, !alias.scope !0 +; MD-SCOPE: %1 = load float, float* %c, align 4 +; MD-SCOPE: %arrayidx = getelementptr inbounds float, float* %a, i64 7 +; MD-SCOPE: store float %1, float* %arrayidx, align 4 +; MD-SCOPE: ret void +; MD-SCOPE: } + +; INTR-SCOPE: define void @foo(float* nocapture %a, float* nocapture readonly %c) #0 { +; INTR-SCOPE: entry: +; INTR-SCOPE: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !0) +; INTR-SCOPE: %1 = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* %a, i8* %0, float** null, i64 0, metadata !0) +; INTR-SCOPE: %2 = load float, float* %c, align 4, !noalias !0 +; INTR-SCOPE: %arrayidx.i = getelementptr inbounds float, float* %1, i64 5 +; INTR-SCOPE: store float %2, float* %arrayidx.i, align 4, !noalias !0 +; INTR-SCOPE: %3 = load float, float* %c, align 4 +; INTR-SCOPE: %arrayidx = getelementptr inbounds float, float* %a, i64 7 +; INTR-SCOPE: store float %3, float* %arrayidx, align 4 +; INTR-SCOPE: ret void +; INTR-SCOPE: } define void @hello2(float* noalias nocapture %a, float* noalias nocapture %b, float* nocapture readonly %c) #0 { entry: @@ -49,28 +63,55 @@ ret void } -; CHECK: define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 { -; CHECK: entry: -; CHECK: %0 = load float, float* %c, align 4, !noalias !3 -; CHECK: %arrayidx.i = getelementptr inbounds float, float* %a, i64 5 -; CHECK: store float %0, float* %arrayidx.i, align 4, !alias.scope !7, !noalias !8 -; CHECK: %arrayidx1.i = getelementptr inbounds float, float* %b, i64 8 -; CHECK: store float %0, float* %arrayidx1.i, align 4, !alias.scope !8, !noalias !7 -; CHECK: %1 = load float, float* %c, align 4 -; CHECK: %arrayidx = getelementptr inbounds float, float* %a, i64 7 -; CHECK: store float %1, float* %arrayidx, align 4 -; CHECK: ret void -; CHECK: } +; MD-SCOPE: define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 { +; MD-SCOPE: entry: +; MD-SCOPE: %0 = load float, float* %c, align 4, !noalias !3 +; MD-SCOPE: %arrayidx.i = getelementptr inbounds float, float* %a, i64 5 +; MD-SCOPE: store float %0, float* %arrayidx.i, align 4, !alias.scope !7, !noalias !8 +; MD-SCOPE: %arrayidx1.i = getelementptr inbounds float, float* %b, i64 8 +; MD-SCOPE: store float %0, float* %arrayidx1.i, align 4, !alias.scope !8, !noalias !7 +; MD-SCOPE: %1 = load float, float* %c, align 4 +; MD-SCOPE: %arrayidx = getelementptr inbounds float, float* %a, i64 7 +; MD-SCOPE: store float %1, float* %arrayidx, align 4 +; MD-SCOPE: ret void +; MD-SCOPE: } + +; INTR-SCOPE: define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 { +; INTR-SCOPE: entry: +; INTR-SCOPE: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !3) +; INTR-SCOPE: %1 = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* %a, i8* %0, float** null, i64 0, metadata !3) +; INTR-SCOPE: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !6) +; INTR-SCOPE: %3 = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* %b, i8* %2, float** null, i64 0, metadata !6) +; INTR-SCOPE: %4 = load float, float* %c, align 4, !noalias !8 +; INTR-SCOPE: %arrayidx.i = getelementptr inbounds float, float* %1, i64 5 +; INTR-SCOPE: store float %4, float* %arrayidx.i, align 4, !noalias !8 +; INTR-SCOPE: %arrayidx1.i = getelementptr inbounds float, float* %3, i64 8 +; INTR-SCOPE: store float %4, float* %arrayidx1.i, align 4, !noalias !8 +; INTR-SCOPE: %5 = load float, float* %c, align 4 +; INTR-SCOPE: %arrayidx = getelementptr inbounds float, float* %a, i64 7 +; INTR-SCOPE: store float %5, float* %arrayidx, align 4 +; INTR-SCOPE: ret void +; INTR-SCOPE: } attributes #0 = { nounwind uwtable } +attributes #1 = { argmemonly nounwind } -; CHECK: !0 = !{!1} -; CHECK: !1 = distinct !{!1, !2, !"hello: %a"} -; CHECK: !2 = distinct !{!2, !"hello"} -; CHECK: !3 = !{!4, !6} -; CHECK: !4 = distinct !{!4, !5, !"hello2: %a"} -; CHECK: !5 = distinct !{!5, !"hello2"} -; CHECK: !6 = distinct !{!6, !5, !"hello2: %b"} -; CHECK: !7 = !{!4} -; CHECK: !8 = !{!6} +; MD-SCOPE: !0 = !{!1} +; MD-SCOPE: !1 = distinct !{!1, !2, !"hello: %a"} +; MD-SCOPE: !2 = distinct !{!2, !"hello"} +; MD-SCOPE: !3 = !{!4, !6} +; MD-SCOPE: !4 = distinct !{!4, !5, !"hello2: %a"} +; MD-SCOPE: !5 = distinct !{!5, !"hello2"} +; MD-SCOPE: !6 = distinct !{!6, !5, !"hello2: %b"} +; MD-SCOPE: !7 = !{!4} +; MD-SCOPE: !8 = !{!6} +; INTR-SCOPE: !0 = !{!1} +; INTR-SCOPE: !1 = distinct !{!1, !2, !"hello: %a"} +; INTR-SCOPE: !2 = distinct !{!2, !"hello"} +; INTR-SCOPE: !3 = !{!4} +; INTR-SCOPE: !4 = distinct !{!4, !5, !"hello2: %a"} +; INTR-SCOPE: !5 = distinct !{!5, !"hello2"} +; INTR-SCOPE: !6 = !{!7} +; INTR-SCOPE: !7 = distinct !{!7, !5, !"hello2: %b"} +; INTR-SCOPE: !8 = !{!4, !7} Index: llvm/test/Transforms/Inline/noalias2.ll =================================================================== --- llvm/test/Transforms/Inline/noalias2.ll +++ llvm/test/Transforms/Inline/noalias2.ll @@ -1,4 +1,6 @@ -; RUN: opt -inline -enable-noalias-to-md-conversion -S < %s | FileCheck %s +; RUN: opt -inline -enable-noalias-to-md-conversion -use-noalias-intrinsic-during-inlining=0 -S < %s | FileCheck %s -check-prefix=MD-SCOPE +; RUN: opt -inline -enable-noalias-to-md-conversion -use-noalias-intrinsic-during-inlining=1 -S < %s | FileCheck %s -check-prefix=INTR-SCOPE + target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -19,16 +21,31 @@ ret void } -; CHECK: define void @foo(float* noalias nocapture %a, float* noalias nocapture readonly %c) #0 { -; CHECK: entry: -; CHECK: %0 = load float, float* %c, align 4, !alias.scope !0, !noalias !3 -; CHECK: %arrayidx.i = getelementptr inbounds float, float* %a, i64 5 -; CHECK: store float %0, float* %arrayidx.i, align 4, !alias.scope !3, !noalias !0 -; CHECK: %1 = load float, float* %c, align 4 -; CHECK: %arrayidx = getelementptr inbounds float, float* %a, i64 7 -; CHECK: store float %1, float* %arrayidx, align 4 -; CHECK: ret void -; CHECK: } +; MD-SCOPE: define void @foo(float* noalias nocapture %a, float* noalias nocapture readonly %c) #0 { +; MD-SCOPE: entry: +; MD-SCOPE: %0 = load float, float* %c, align 4, !alias.scope !0, !noalias !3 +; MD-SCOPE: %arrayidx.i = getelementptr inbounds float, float* %a, i64 5 +; MD-SCOPE: store float %0, float* %arrayidx.i, align 4, !alias.scope !3, !noalias !0 +; MD-SCOPE: %1 = load float, float* %c, align 4 +; MD-SCOPE: %arrayidx = getelementptr inbounds float, float* %a, i64 7 +; MD-SCOPE: store float %1, float* %arrayidx, align 4 +; MD-SCOPE: ret void +; MD-SCOPE: } + +; INTR-SCOPE: define void @foo(float* noalias nocapture %a, float* noalias nocapture readonly %c) #0 { +; INTR-SCOPE: entry: +; INTR-SCOPE: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !0) +; INTR-SCOPE: %1 = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* %a, i8* %0, float** null, i64 0, metadata !0) +; INTR-SCOPE: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !3) +; INTR-SCOPE: %3 = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* %c, i8* %2, float** null, i64 0, metadata !3) +; INTR-SCOPE: %4 = load float, float* %3, align 4, !noalias !5 +; INTR-SCOPE: %arrayidx.i = getelementptr inbounds float, float* %1, i64 5 +; INTR-SCOPE: store float %4, float* %arrayidx.i, align 4, !noalias !5 +; INTR-SCOPE: %5 = load float, float* %c, align 4 +; INTR-SCOPE: %arrayidx = getelementptr inbounds float, float* %a, i64 7 +; INTR-SCOPE: store float %5, float* %arrayidx, align 4 +; INTR-SCOPE: ret void +; INTR-SCOPE: } define void @hello2(float* noalias nocapture %a, float* noalias nocapture %b, float* nocapture readonly %c) #0 { entry: @@ -52,46 +69,101 @@ ret void } -; CHECK: define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 { -; CHECK: entry: -; CHECK: %0 = load float, float* %c, align 4, !alias.scope !5, !noalias !10 -; CHECK: %arrayidx.i.i = getelementptr inbounds float, float* %a, i64 5 -; CHECK: store float %0, float* %arrayidx.i.i, align 4, !alias.scope !10, !noalias !5 -; CHECK: %1 = load float, float* %c, align 4, !alias.scope !13, !noalias !14 -; CHECK: %arrayidx.i = getelementptr inbounds float, float* %a, i64 7 -; CHECK: store float %1, float* %arrayidx.i, align 4, !alias.scope !14, !noalias !13 -; CHECK: %2 = load float, float* %c, align 4, !noalias !15 -; CHECK: %arrayidx.i1 = getelementptr inbounds float, float* %a, i64 6 -; CHECK: store float %2, float* %arrayidx.i1, align 4, !alias.scope !19, !noalias !20 -; CHECK: %arrayidx1.i = getelementptr inbounds float, float* %b, i64 8 -; CHECK: store float %2, float* %arrayidx1.i, align 4, !alias.scope !20, !noalias !19 -; CHECK: %3 = load float, float* %c, align 4 -; CHECK: %arrayidx = getelementptr inbounds float, float* %a, i64 7 -; CHECK: store float %3, float* %arrayidx, align 4 -; CHECK: ret void -; CHECK: } +; MD-SCOPE: define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 { +; MD-SCOPE: entry: +; MD-SCOPE: %0 = load float, float* %c, align 4, !alias.scope !5, !noalias !10 +; MD-SCOPE: %arrayidx.i.i = getelementptr inbounds float, float* %a, i64 5 +; MD-SCOPE: store float %0, float* %arrayidx.i.i, align 4, !alias.scope !10, !noalias !5 +; MD-SCOPE: %1 = load float, float* %c, align 4, !alias.scope !13, !noalias !14 +; MD-SCOPE: %arrayidx.i = getelementptr inbounds float, float* %a, i64 7 +; MD-SCOPE: store float %1, float* %arrayidx.i, align 4, !alias.scope !14, !noalias !13 +; MD-SCOPE: %2 = load float, float* %c, align 4, !noalias !15 +; MD-SCOPE: %arrayidx.i1 = getelementptr inbounds float, float* %a, i64 6 +; MD-SCOPE: store float %2, float* %arrayidx.i1, align 4, !alias.scope !19, !noalias !20 +; MD-SCOPE: %arrayidx1.i = getelementptr inbounds float, float* %b, i64 8 +; MD-SCOPE: store float %2, float* %arrayidx1.i, align 4, !alias.scope !20, !noalias !19 +; MD-SCOPE: %3 = load float, float* %c, align 4 +; MD-SCOPE: %arrayidx = getelementptr inbounds float, float* %a, i64 7 +; MD-SCOPE: store float %3, float* %arrayidx, align 4 +; MD-SCOPE: ret void +; MD-SCOPE: } -; CHECK: !0 = !{!1} -; CHECK: !1 = distinct !{!1, !2, !"hello: %c"} -; CHECK: !2 = distinct !{!2, !"hello"} -; CHECK: !3 = !{!4} -; CHECK: !4 = distinct !{!4, !2, !"hello: %a"} -; CHECK: !5 = !{!6, !8} -; CHECK: !6 = distinct !{!6, !7, !"hello: %c"} -; CHECK: !7 = distinct !{!7, !"hello"} -; CHECK: !8 = distinct !{!8, !9, !"foo: %c"} -; CHECK: !9 = distinct !{!9, !"foo"} -; CHECK: !10 = !{!11, !12} -; CHECK: !11 = distinct !{!11, !7, !"hello: %a"} -; CHECK: !12 = distinct !{!12, !9, !"foo: %a"} -; CHECK: !13 = !{!8} -; CHECK: !14 = !{!12} -; CHECK: !15 = !{!16, !18} -; CHECK: !16 = distinct !{!16, !17, !"hello2: %a"} -; CHECK: !17 = distinct !{!17, !"hello2"} -; CHECK: !18 = distinct !{!18, !17, !"hello2: %b"} -; CHECK: !19 = !{!16} -; CHECK: !20 = !{!18} +; INTR-SCOPE: define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 { +; INTR-SCOPE: entry: +; INTR-SCOPE: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !6) +; INTR-SCOPE: %1 = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* %a, i8* %0, float** null, i64 0, metadata !6) +; INTR-SCOPE: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !9) +; INTR-SCOPE: %3 = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* %c, i8* %2, float** null, i64 0, metadata !9) +; INTR-SCOPE: %4 = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !11) #3, !noalias !14 +; INTR-SCOPE: %5 = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* %1, i8* %4, float** null, i64 0, metadata !11) #3, !noalias !14 +; INTR-SCOPE: %6 = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !15) #3, !noalias !14 +; INTR-SCOPE: %7 = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* %3, i8* %6, float** null, i64 0, metadata !15) #3, !noalias !14 +; INTR-SCOPE: %8 = load float, float* %7, align 4, !noalias !17 +; INTR-SCOPE: %arrayidx.i.i = getelementptr inbounds float, float* %5, i64 5 +; INTR-SCOPE: store float %8, float* %arrayidx.i.i, align 4, !noalias !17 +; INTR-SCOPE: %9 = load float, float* %3, align 4, !noalias !14 +; INTR-SCOPE: %arrayidx.i = getelementptr inbounds float, float* %1, i64 7 +; INTR-SCOPE: store float %9, float* %arrayidx.i, align 4, !noalias !14 +; INTR-SCOPE: %10 = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !18) +; INTR-SCOPE: %11 = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* %a, i8* %10, float** null, i64 0, metadata !18) +; INTR-SCOPE: %12 = call i8* @llvm.noalias.decl.p0i8.p0p0f32.i64(float** null, i64 0, metadata !21) +; INTR-SCOPE: %13 = call float* @llvm.noalias.p0f32.p0i8.p0p0f32.i64(float* %b, i8* %12, float** null, i64 0, metadata !21) +; INTR-SCOPE: %14 = load float, float* %c, align 4, !noalias !23 +; INTR-SCOPE: %arrayidx.i1 = getelementptr inbounds float, float* %11, i64 6 +; INTR-SCOPE: store float %14, float* %arrayidx.i1, align 4, !noalias !23 +; INTR-SCOPE: %arrayidx1.i = getelementptr inbounds float, float* %13, i64 8 +; INTR-SCOPE: store float %14, float* %arrayidx1.i, align 4, !noalias !23 +; INTR-SCOPE: %15 = load float, float* %c, align 4 +; INTR-SCOPE: %arrayidx = getelementptr inbounds float, float* %a, i64 7 +; INTR-SCOPE: store float %15, float* %arrayidx, align 4 +; INTR-SCOPE: ret void +; INTR-SCOPE: } -attributes #0 = { nounwind uwtable } +; MD-SCOPE: !0 = !{!1} +; MD-SCOPE: !1 = distinct !{!1, !2, !"hello: %c"} +; MD-SCOPE: !2 = distinct !{!2, !"hello"} +; MD-SCOPE: !3 = !{!4} +; MD-SCOPE: !4 = distinct !{!4, !2, !"hello: %a"} +; MD-SCOPE: !5 = !{!6, !8} +; MD-SCOPE: !6 = distinct !{!6, !7, !"hello: %c"} +; MD-SCOPE: !7 = distinct !{!7, !"hello"} +; MD-SCOPE: !8 = distinct !{!8, !9, !"foo: %c"} +; MD-SCOPE: !9 = distinct !{!9, !"foo"} +; MD-SCOPE: !10 = !{!11, !12} +; MD-SCOPE: !11 = distinct !{!11, !7, !"hello: %a"} +; MD-SCOPE: !12 = distinct !{!12, !9, !"foo: %a"} +; MD-SCOPE: !13 = !{!8} +; MD-SCOPE: !14 = !{!12} +; MD-SCOPE: !15 = !{!16, !18} +; MD-SCOPE: !16 = distinct !{!16, !17, !"hello2: %a"} +; MD-SCOPE: !17 = distinct !{!17, !"hello2"} +; MD-SCOPE: !18 = distinct !{!18, !17, !"hello2: %b"} +; MD-SCOPE: !19 = !{!16} +; MD-SCOPE: !20 = !{!18} +; INTR-SCOPE: !0 = !{!1} +; INTR-SCOPE: !1 = distinct !{!1, !2, !"hello: %a"} +; INTR-SCOPE: !2 = distinct !{!2, !"hello"} +; INTR-SCOPE: !3 = !{!4} +; INTR-SCOPE: !4 = distinct !{!4, !2, !"hello: %c"} +; INTR-SCOPE: !5 = !{!1, !4} +; INTR-SCOPE: !6 = !{!7} +; INTR-SCOPE: !7 = distinct !{!7, !8, !"foo: %a"} +; INTR-SCOPE: !8 = distinct !{!8, !"foo"} +; INTR-SCOPE: !9 = !{!10} +; INTR-SCOPE: !10 = distinct !{!10, !8, !"foo: %c"} +; INTR-SCOPE: !11 = !{!12} +; INTR-SCOPE: !12 = distinct !{!12, !13, !"hello: %a"} +; INTR-SCOPE: !13 = distinct !{!13, !"hello"} +; INTR-SCOPE: !14 = !{!7, !10} +; INTR-SCOPE: !15 = !{!16} +; INTR-SCOPE: !16 = distinct !{!16, !13, !"hello: %c"} +; INTR-SCOPE: !17 = !{!12, !16, !7, !10} +; INTR-SCOPE: !18 = !{!19} +; INTR-SCOPE: !19 = distinct !{!19, !20, !"hello2: %a"} +; INTR-SCOPE: !20 = distinct !{!20, !"hello2"} +; INTR-SCOPE: !21 = !{!22} +; INTR-SCOPE: !22 = distinct !{!22, !20, !"hello2: %b"} +; INTR-SCOPE: !23 = !{!19, !22} + +attributes #0 = { nounwind uwtable } Index: llvm/test/Transforms/Inline/parallel-loop-md-merge.ll =================================================================== --- llvm/test/Transforms/Inline/parallel-loop-md-merge.ll +++ llvm/test/Transforms/Inline/parallel-loop-md-merge.ll @@ -1,4 +1,5 @@ -; RUN: opt -always-inline -globalopt -S < %s | FileCheck %s +; RUN: opt -always-inline -globalopt --use-noalias-intrinsic-during-inlining=0 -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK_SCOPE +; RUN: opt -always-inline -globalopt --use-noalias-intrinsic-during-inlining=1 -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK_NOALIAS ; ; static void __attribute__((always_inline)) callee(long n, double A[static const restrict n], long i) { ; for (long j = 0; j < n; j += 1) @@ -64,14 +65,21 @@ !11 = distinct !{!11, !12} ; LoopID !12 = !{!"llvm.loop.parallel_accesses", !10} +; There is a small reordering when noalias intrinsics are introduced ; CHECK: store double 4.200000e+01, {{.*}} !llvm.access.group ![[ACCESS_GROUP_LIST_3:[0-9]+]] ; CHECK: br label %for.cond.i, !llvm.loop ![[LOOP_INNER:[0-9]+]] ; CHECK: br label %for.cond, !llvm.loop ![[LOOP_OUTER:[0-9]+]] -; CHECK: ![[ACCESS_GROUP_LIST_3]] = !{![[ACCESS_GROUP_INNER:[0-9]+]], ![[ACCESS_GROUP_OUTER:[0-9]+]]} -; CHECK: ![[ACCESS_GROUP_INNER]] = distinct !{} -; CHECK: ![[ACCESS_GROUP_OUTER]] = distinct !{} + +; CHECK_SCOPE: ![[ACCESS_GROUP_LIST_3]] = !{![[ACCESS_GROUP_INNER:[0-9]+]], ![[ACCESS_GROUP_OUTER:[0-9]+]]} +; CHECK_SCOPE: ![[ACCESS_GROUP_INNER]] = distinct !{} +; CHECK_SCOPE: ![[ACCESS_GROUP_OUTER]] = distinct !{} + +; CHECK_NOALIAS: ![[ACCESS_GROUP_OUTER:[0-9]+]] = distinct !{} +; CHECK_NOALIAS: ![[ACCESS_GROUP_LIST_3]] = !{![[ACCESS_GROUP_INNER:[0-9]+]], ![[ACCESS_GROUP_OUTER]]} +; CHECK_NOALIAS: ![[ACCESS_GROUP_INNER]] = distinct !{} + ; CHECK: ![[LOOP_INNER]] = distinct !{![[LOOP_INNER]], ![[ACCESSES_INNER:[0-9]+]]} ; CHECK: ![[ACCESSES_INNER]] = !{!"llvm.loop.parallel_accesses", ![[ACCESS_GROUP_INNER]]} ; CHECK: ![[LOOP_OUTER]] = distinct !{![[LOOP_OUTER]], ![[ACCESSES_OUTER:[0-9]+]]} Index: llvm/test/Transforms/InstCombine/assume.ll =================================================================== --- llvm/test/Transforms/InstCombine/assume.ll +++ llvm/test/Transforms/InstCombine/assume.ll @@ -351,6 +351,8 @@ !7 = !{i32 1, !"wchar_size", i32 4} !8 = !{i32 7, !"PIC Level", i32 2} !9 = !DILocation(line: 0, column: 0, scope: !2) +!10 = !{!10, !"some domain"} +!11 = !{!11, !10, !"some scope"} attributes #0 = { nounwind uwtable } Index: llvm/test/Transforms/InstCombine/noalias.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InstCombine/noalias.ll @@ -0,0 +1,45 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s + +target datalayout = "e-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-p:32:32:32:32:8-s0:32:32-a0:8:8-S32-n32-v128:32:32-P0-p0:32:32:32:32:8-p10:32:32:32:32:8-p20:32:32:32:32:8" + +@__const.f1.i = private unnamed_addr constant { i8*, [4 x i8] } { i8* null, [4 x i8] undef }, align 4 + +; Function Attrs: nounwind optsize +define dso_local i64 @test01() local_unnamed_addr #0 { +entry: + %i.sroa.6 = alloca [2 x i8], align 2 + %i.sroa.6.0..sroa_idx6 = getelementptr inbounds [2 x i8], [2 x i8]* %i.sroa.6, i32 0, i32 0 + call void @llvm.lifetime.start.p0i8(i64 2, i8* %i.sroa.6.0..sroa_idx6) + %0 = call i8* @llvm.noalias.decl.p0i8.p0a2i8.i32([2 x i8]* %i.sroa.6, i32 2, metadata !2) + call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 %i.sroa.6.0..sroa_idx6, i8* align 2 getelementptr inbounds (i8, i8* bitcast ({ i8*, [4 x i8] }* @__const.f1.i to i8*), i32 2), i32 2, i1 false) + call void @llvm.lifetime.end.p0i8(i64 2, i8* %i.sroa.6.0..sroa_idx6) + ret i64 undef +} + +; CHECK-LABEL: @test01( +; CHECK: %i.sroa.6 = alloca i16, align 2 +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0i16.i32(i16* nonnull %i.sroa.6, i32 2, metadata !2) + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 + +; Function Attrs: argmemonly nounwind +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1 immarg) #1 + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0a2i8.i32([2 x i8]*, i32, metadata) #1 + +attributes #0 = { nounwind optsize "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3} +!3 = distinct !{!3, !4, !"f1: i"} +!4 = distinct !{!4, !"f1"} Index: llvm/test/Transforms/InstSimplify/noalias.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InstSimplify/noalias.ll @@ -0,0 +1,172 @@ +; RUN: opt -instsimplify -S < %s | FileCheck %s + +define void @test01(i8* %ptr) { + call i8* @llvm.noalias.p0i8.p0i8.p0p0i8.i32(i8* %ptr, i8* null, i8** null, i32 0, metadata !1) + ret void + +; CHECK-LABEL: @test01 +; CHECK-NOT: llvm.noalias.p0i8 +; CHECK: ret void +} + +define i8* @test02() { + %v = call i8* @llvm.noalias.p0i8.p0i8.p0p0i8.i32(i8* null, i8* null, i8** null, i32 0, metadata !1) + ret i8* %v + +; CHECK-LABEL: @test02 +; CHECK: llvm.noalias.p0i8 +; CHECK: ret i8* %v +} + +define i8* @test03() { + %v = call i8* @llvm.noalias.p0i8.p0i8.p0p0i8.i32(i8* undef, i8* null, i8** null, i32 0, metadata !1) + ret i8* %v + +; CHECK-LABEL: @test03 +; CHECK-NOT: llvm.noalias.p0i8 +; CHECK: ret i8* undef +} + +declare i8* @llvm.noalias.p0i8.p0i8.p0p0i8.i32(i8*, i8*, i8**, i32, metadata ) nounwind + +define void @test11(i8* %ptr) { + call i8* @llvm.side.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i32(i8* %ptr, i8* null, i8** null, i8** null, i32 0, metadata !1) + ret void + +; CHECK-LABEL: @test11 +; CHECK-NOT: llvm.side.noalias.p0i8 +; CHECK: ret void +} + +define i8* @test12() { + %v = call i8* @llvm.side.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i32(i8* null, i8* null, i8** null, i8** null, i32 0, metadata !1) + ret i8* %v + +; CHECK-LABEL: @test12 +; CHECK: llvm.side.noalias.p0i8 +; CHECK: ret i8* %v +} + +define i8* @test13() { + %v = call i8* @llvm.side.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i32(i8* undef, i8* null, i8** null, i8** null, i32 0, metadata !1) + ret i8* %v + +; CHECK-LABEL: @test13 +; CHECK-NOT: llvm.side.noalias.p0i8 +; CHECK: ret i8* undef +} + +define i8* @test14() { + %u = call i8* @llvm.side.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i32(i8* null, i8* null, i8** null, i8** null, i32 0, metadata !1) + %v = call i8* @llvm.side.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i32(i8* %u, i8* null, i8** null, i8** null, i32 0, metadata !1) + ret i8* %v + +; CHECK-LABEL: @test14 +; CHECK: llvm.side.noalias.p0i8 +; CHECK-NOT: llvm.side.noalias.p0i8 +; CHECK: ret i8* %u +} + +define i8* @test15() { + %u = call i8* @llvm.side.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i32(i8* null, i8* null, i8** null, i8** null, i32 1, metadata !1) + %v = call i8* @llvm.side.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i32(i8* %u, i8* null, i8** null, i8** null, i32 0, metadata !1) + ret i8* %v + +; CHECK-LABEL: @test15 +; CHECK: llvm.side.noalias.p0i8 +; CHECK: llvm.side.noalias.p0i8 +; CHECK: ret i8* %v +} + +define i8* @test20() { + %u = call i8* @llvm.side.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i32(i8* null, i8* null, i8** null, i8** null, i32 0, metadata !1) + %v = call i8* @llvm.noalias.arg.guard.p0i8.p0i8(i8* null, i8* %u) + ret i8* %v + +; CHECK-LABEL: @test20 +; CHECK: llvm.side.noalias.p0i8 +; CHECK: llvm.noalias.arg.guard.p0i8 +; CHECK: ret i8* %v +} + +define i8* @test21() { + %u = call i8* @llvm.side.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i32(i8* null, i8* null, i8** null, i8** null, i32 0, metadata !1) + %v = call i8* @llvm.noalias.arg.guard.p0i8.p0i8(i8* undef, i8* %u) + ret i8* %v + +; CHECK-LABEL: @test21 +; CHECK-NOT: llvm.side.noalias.p0i8 +; CHECK-NOT: llvm.noalias.arg.guard.p0i8 +; CHECK: ret i8* undef +} + +define i8* @test30() { + %v = call i8* @llvm.noalias.copy.guard.p0i8.p0i8(i8* null, i8* null, metadata !2, metadata !1) + ret i8* %v + +; CHECK-LABEL: @test30 +; CHECK: llvm.noalias.copy.guard.p0i8 +; CHECK: ret i8* %v +} + +define void @test31() { + %v = call i8* @llvm.noalias.copy.guard.p0i8.p0i8(i8* null, i8* null, metadata !2, metadata !1) + ret void + +; CHECK-LABEL: @test31 +; CHECK-NOT: llvm.noalias.copy.guard.p0i8 +; CHECK: ret void +} + +define i8* @test32() { + %v = call i8* @llvm.noalias.copy.guard.p0i8.p0i8(i8* undef, i8* null, metadata !2, metadata !1) + ret i8* %v + +; CHECK-LABEL: @test32 +; CHECK-NOT: llvm.noalias.copy.guard.p0i8 +; CHECK: ret i8* undef +} + +define void @test40() { + %v = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i8** null, i32 0, metadata !1) + ret void + +; CHECK-LABEL: @test40 +; CHECK-NOT: llvm.noalias.decl.p0i8 +; CHECK: ret void +} + +define void @test41() { + %u = alloca i8* + %v = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i8** %u, i32 0, metadata !4) + ret void + +; CHECK-LABEL: @test41 +; CHECK-NOT: alloca +; CHECK-NOT: llvm.noalias.decl.p0i8 +; CHECK: ret void +} + +define i8** @test42() { + %u = alloca i8* + %v = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i8** %u, i32 0, metadata !0) + ret i8** %u + +; CHECK-LABEL: @test42 +; CHECK: alloca +; CHECK: llvm.noalias.decl.p0i8 +; CHECK: ret i8** %u +} + + +declare i8* @llvm.side.noalias.p0i8.p0i8.p0p0i8.p0p0i8.i32(i8*, i8*, i8**, i8**, i32, metadata ) nounwind +declare i8* @llvm.noalias.arg.guard.p0i8.p0i8(i8*, i8*) nounwind readnone +declare i8* @llvm.noalias.copy.guard.p0i8.p0i8(i8*, i8*, metadata, metadata) +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i8**, i32, metadata) argmemonly nounwind + +!0 = !{!0, !"some domain"} +!1 = !{!1, !0, !"some scope"} +!2 = !{!3} +!3 = !{ i64 -1, i64 0 } +!4 = !{!4, !"some other domain"} +!5 = !{!5, !4, !"some other scope"} Index: llvm/test/Transforms/LICM/noalias.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LICM/noalias.ll @@ -0,0 +1,87 @@ +; RUN: opt -S -basicaa -licm -enable-mssa-loop-dependency=false %s | FileCheck -check-prefixes=CHECK,AST %s +; RUN: opt -S -basicaa -licm -enable-mssa-loop-dependency=true %s | FileCheck -check-prefixes=CHECK,MSSA %s +; RUN: opt -aa-pipeline=basic-aa -passes='require,require,require,require,loop(licm)' < %s -S | FileCheck -check-prefixes=CHECK,AST %s + + +; Function Attrs: nounwind +define dso_local void @test01(i32* %_p, i32 %n) #0 { +entry: + %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + br label %do.body + +do.body: ; preds = %do.body, %entry + %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.body ] + %1 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_p, i8* %0, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 + store i32 42, i32* %_p, noalias_sidechannel i32* %1, align 4, !tbaa !9, !noalias !2 + %dec = add nsw i32 %n.addr.0, -1 + %cmp = icmp ne i32 %dec, 0 + br i1 %cmp, label %do.body, label %do.end + +do.end: ; preds = %do.body + ret void +} + +; CHECK-LABEL: @test01( +; CHECK-LABEL: entry: +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) +; CHECK: %1 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_p, i8* %0, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 +; MSSA: store i32 42, i32* %_p, noalias_sidechannel i32* %1, align 4, !tbaa !9, !noalias !2 +; CHECK-LABEL: do.body: +; CHECK-LABEL: do.end: +; AST: store i32 42, i32* %_p, align 4, !tbaa !9 +; CHECK: ret void + +; Function Attrs: nounwind +define dso_local void @test02(i32* %_p, i32 %n) #0 { +entry: + br label %do.body + +do.body: ; preds = %do.body, %entry + %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.body ] + %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) + %1 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_p, i8* %0, i32** null, i32** undef, i32 0, metadata !11), !tbaa !5, !noalias !11 + store i32 42, i32* %_p, noalias_sidechannel i32* %1, align 4, !tbaa !9, !noalias !11 + %dec = add nsw i32 %n.addr.0, -1 + %cmp = icmp ne i32 %dec, 0 + br i1 %cmp, label %do.body, label %do.end + +do.end: ; preds = %do.body + ret void +} + +; CHECK-LABEL: @test02( +; CHECK-LABEL: entry: +; CHECK-LABEL: do.body: +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) +; CHECK: %1 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_p, i8* %0, i32** null, i32** undef, i32 0, metadata !11), !tbaa !5, !noalias !11 +; CHECK-LABEL: do.end: +; CHECK: store i32 42, i32* %_p, align 4, !tbaa !9 +; CHECK: ret void + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) #1 + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32*, i8*, i32**, i32**, i32, metadata) #2 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { nounwind readnone speculatable } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3} +!3 = distinct !{!3, !4, !"test01: rp"} +!4 = distinct !{!4, !"test01"} +!5 = !{!6, !6, i64 0, i64 4} +!6 = !{!7, i64 4, !"any pointer"} +!7 = !{!8, i64 1, !"omnipotent char"} +!8 = !{!"Simple C/C++ TBAA"} +!9 = !{!10, !10, i64 0, i64 4} +!10 = !{!7, i64 4, !"int"} +!11 = !{!12} +!12 = distinct !{!12, !13, !"test02: rp"} +!13 = distinct !{!13, !"test02"} Index: llvm/test/Transforms/LoopRotate/noalias.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopRotate/noalias.ll @@ -0,0 +1,186 @@ +; RUN: opt -S -loop-rotate < %s | FileCheck %s +; RUN: opt -S -loop-rotate -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s +; RUN: opt -S -passes='require,require,loop(rotate)' < %s | FileCheck %s +; RUN: opt -S -passes='require,require,loop(rotate)' -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare void @g(i32*) + +define void @test_02(i32* nocapture %_pA) nounwind ssp { +entry: + %array = alloca [20 x i32], align 16 + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %p.decl = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %side.p = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %p.decl, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 + store i32 42, i32* %_pA, noalias_sidechannel i32* %side.p, align 16 + %cmp = icmp slt i32 %i.0, 100 + %arrayidx = getelementptr inbounds [20 x i32], [20 x i32]* %array, i64 0, i64 0 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + store i32 0, i32* %arrayidx, align 16 + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + %arrayidx.lcssa = phi i32* [ %arrayidx, %for.cond ] + call void @g(i32* %arrayidx.lcssa) nounwind + ret void +} + +; CHECK-LABEL: @test_02( +; CHECK: entry: +; CHECK: %p.decl1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) +; CHECK: %side.p2 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %p.decl1, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 +; CHECK: store i32 42, i32* %_pA, noalias_sidechannel i32* undef, align 16 +; CHECK: for.body: +; CHECK: %p.decl = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !9) +; CHECK: %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) +; CHECK: %side.p = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !11), !tbaa !5, !noalias !11 +; CHECK: store i32 42, i32* %_pA, noalias_sidechannel i32* %side.p, align 16 +; CHECK: for.end: + + +define void @test_03(i32* nocapture %_pA) nounwind ssp { +entry: + %array = alloca [20 x i32], align 16 + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %cmp = icmp slt i32 %i.0, 100 + %arrayidx = getelementptr inbounds [20 x i32], [20 x i32]* %array, i64 0, i64 0 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %p.decl = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %side.p = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %p.decl, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 + store i32 42, i32* %_pA, noalias_sidechannel i32* %side.p, align 16 + store i32 0, i32* %arrayidx, align 16 + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + %arrayidx.lcssa = phi i32* [ %arrayidx, %for.cond ] + call void @g(i32* %arrayidx.lcssa) nounwind + ret void +} +; CHECK-LABEL: @test_03( +; CHECK: entry: +; CHECK: for.body: +; CHECK: %p.decl = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !9) +; CHECK: %side.p = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %p.decl, i32** null, i32** undef, i32 0, metadata !9), !tbaa !5, !noalias !9 +; CHECK: store i32 42, i32* %_pA, noalias_sidechannel i32* %side.p, align 16 +; CHECK: for.end: + +define void @test_04(i32* nocapture %_pA) nounwind ssp { +entry: + %array = alloca [20 x i32], align 16 + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %p.decl = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %side.p = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %p.decl, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 + store i32 42, i32* %_pA, noalias_sidechannel i32* %side.p, align 16 + %cmp = icmp slt i32 %i.0, 100 + %arrayidx = getelementptr inbounds [20 x i32], [20 x i32]* %array, i64 0, i64 0 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + store i32 0, i32* %arrayidx, align 16 + store i32 43, i32* %_pA, noalias_sidechannel i32* %side.p, align 16 + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + %arrayidx.lcssa = phi i32* [ %arrayidx, %for.cond ] + call void @g(i32* %arrayidx.lcssa) nounwind + ret void +} +; CHECK-LABEL: @test_04( +; CHECK: entry: +; CHECK: %p.decl1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !13) +; CHECK: %side.p2 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %p.decl1, i32** null, i32** undef, i32 0, metadata !13), !tbaa !5, !noalias !13 +; CHECK: store i32 42, i32* %_pA, noalias_sidechannel i32* undef, align 16 +; CHECK: for.body: +; CHECK: %side.p4 = phi i32* [ %side.p2, %entry ], [ %side.p, %for.body ] +; CHECK: %p.decl = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !9) +; CHECK: store i32 43, i32* %_pA, noalias_sidechannel i32* %side.p4, align 16 +; CHECK: %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !15) +; CHECK: %side.p = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !15), !tbaa !5, !noalias !15 +; CHECK: store i32 42, i32* %_pA, noalias_sidechannel i32* %side.p, align 16 +; CHECK: for.end: + +define void @test_05(i32* nocapture %_pA) nounwind ssp { +entry: + %array = alloca [20 x i32], align 16 + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %p.decl = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %side.p = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %p.decl, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 + store i32 42, i32* %_pA, noalias_sidechannel i32* %side.p, align 16 + %cmp = icmp slt i32 %i.0, 100 + %arrayidx = getelementptr inbounds [20 x i32], [20 x i32]* %array, i64 0, i64 0 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + store i32 0, i32* %arrayidx, align 16 + store i32 43, i32* %_pA, noalias_sidechannel i32* %side.p, align 16 + %inc = add nsw i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + %arrayidx.lcssa = phi i32* [ %arrayidx, %for.cond ] + store i32 44, i32* %_pA, noalias_sidechannel i32* %side.p, align 16 + call void @g(i32* %arrayidx.lcssa) nounwind + ret void +} +; CHECK-LABEL: @test_05( +; CHECK: entry: +; CHECK: %p.decl1 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !17) +; CHECK: %side.p2 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %p.decl1, i32** null, i32** undef, i32 0, metadata !17), !tbaa !5, !noalias !17 +; CHECK: store i32 42, i32* %_pA, noalias_sidechannel i32* undef, align 16 +; CHECK: for.body: +; CHECK: %side.p4 = phi i32* [ %side.p2, %entry ], [ %side.p, %for.body ] +; CHECK: %p.decl = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !9) +; CHECK: store i32 43, i32* %_pA, noalias_sidechannel i32* %side.p4, align 16 +; CHECK: %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !19) +; CHECK: %side.p = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !19), !tbaa !5, !noalias !19 +; CHECK: store i32 42, i32* %_pA, noalias_sidechannel i32* %side.p, align 16 +; CHECK: for.end: +; CHECK: %side.p.lcssa = phi i32* [ %side.p, %for.body ] +; CHECK: store i32 44, i32* %_pA, noalias_sidechannel i32* %side.p.lcssa, align 16 + + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) #1 + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32*, i8*, i32**, i32**, i32, metadata) #2 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { nounwind readnone speculatable } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3} +!3 = distinct !{!3, !4, !"test_loop_rotate_XX: pA"} +!4 = distinct !{!4, !"test_loop_rotate_XX"} +!5 = !{!6, !6, i64 0, i64 4} +!6 = !{!7, i64 4, !"any pointer"} +!7 = !{!8, i64 1, !"omnipotent char"} +!8 = !{!"Simple C/C++ TBAA"} +!9 = !{!10, !10, i64 0, i64 4} +!10 = !{!7, i64 4, !"int"} Index: llvm/test/Transforms/LoopUnroll/noalias.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopUnroll/noalias.ll @@ -0,0 +1,132 @@ +; RUN: opt < %s -S -unroll-runtime -unroll-count=2 -loop-unroll | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind +define dso_local void @test_loop_unroll_01(i32* nocapture %_pA) local_unnamed_addr #0 { +entry: + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %1 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret void + +for.body: ; preds = %for.body, %entry + %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %_pA, i32 %i.06 + store i32 %i.06, i32* %arrayidx, noalias_sidechannel i32* %1, align 4, !tbaa !9, !noalias !2 + %inc = add nuw nsw i32 %i.06, 1 + %exitcond = icmp eq i32 %inc, 4 + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} + +; CHECK: define dso_local void @test_loop_unroll_01(i32* nocapture %_pA) local_unnamed_addr #0 { +; CHECK: entry: +; CHECK: %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) +; CHECK: %1 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 +; CHECK: br label %for.body +; CHECK: for.cond.cleanup: ; preds = %for.body +; CHECK: ret void +; CHECK: for.body: ; preds = %for.body, %entry +; CHECK: %i.06 = phi i32 [ 0, %entry ], [ %inc.1, %for.body ] +; CHECK: %arrayidx = getelementptr inbounds i32, i32* %_pA, i32 %i.06 +; CHECK: store i32 %i.06, i32* %arrayidx, noalias_sidechannel i32* %1, align 4, !tbaa !9, !noalias !2 +; CHECK: %inc = add nuw nsw i32 %i.06, 1 +; CHECK: %arrayidx.1 = getelementptr inbounds i32, i32* %_pA, i32 %inc +; CHECK: store i32 %inc, i32* %arrayidx.1, noalias_sidechannel i32* %1, align 4, !tbaa !9, !noalias !2 +; CHECK: %inc.1 = add nuw nsw i32 %inc, 1 +; CHECK: %exitcond.1 = icmp eq i32 %inc.1, 4 +; CHECK: br i1 %exitcond.1, label %for.cond.cleanup, label %for.body, !llvm.loop !11 +; CHECK: } + + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) #1 + +; Function Attrs: nounwind +define dso_local void @test_loop_unroll_02(i32* nocapture %_pA) local_unnamed_addr #0 { +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.body + ret void + +for.body: ; preds = %for.body, %entry + %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) + %1 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !11), !tbaa !5, !noalias !11 + %arrayidx = getelementptr inbounds i32, i32* %_pA, i32 %i.06 + store i32 %i.06, i32* %arrayidx, noalias_sidechannel i32* %1, align 4, !tbaa !9, !noalias !11 + %inc = add nuw nsw i32 %i.06, 1 + %exitcond = icmp eq i32 %inc, 4 + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} + +; CHECK: define dso_local void @test_loop_unroll_02(i32* nocapture %_pA) local_unnamed_addr #0 { +; CHECK: entry: +; CHECK: br label %for.body +; CHECK: for.cond.cleanup: ; preds = %for.body +; CHECK: ret void +; CHECK: for.body: ; preds = %for.body, %entry +; CHECK: %i.06 = phi i32 [ 0, %entry ], [ %inc.1, %for.body ] +; CHECK: %0 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !13) +; CHECK: %1 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !13), !tbaa !5, !noalias !13 +; CHECK: %arrayidx = getelementptr inbounds i32, i32* %_pA, i32 %i.06 +; CHECK: store i32 %i.06, i32* %arrayidx, noalias_sidechannel i32* %1, align 4, !tbaa !9, !noalias !13 +; CHECK: %inc = add nuw nsw i32 %i.06, 1 +; CHECK: %2 = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !16) +; CHECK: %3 = tail call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %2, i32** null, i32** undef, i32 0, metadata !16), !tbaa !5, !noalias !16 +; CHECK: %arrayidx.1 = getelementptr inbounds i32, i32* %_pA, i32 %inc +; CHECK: store i32 %inc, i32* %arrayidx.1, noalias_sidechannel i32* %3, align 4, !tbaa !9, !noalias !16 +; CHECK: %inc.1 = add nuw nsw i32 %inc, 1 +; CHECK: %exitcond.1 = icmp eq i32 %inc.1, 4 +; CHECK: br i1 %exitcond.1, label %for.cond.cleanup, label %for.body, !llvm.loop !18 +; CHECK: } + + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32*, i8*, i32**, i32**, i32, metadata) #2 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { nounwind readnone speculatable } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3} +!3 = distinct !{!3, !4, !"test_loop_unroll_01: pA"} +!4 = distinct !{!4, !"test_loop_unroll_01"} +!5 = !{!6, !6, i64 0, i64 4} +!6 = !{!7, i64 4, !"any pointer"} +!7 = !{!8, i64 1, !"omnipotent char"} +!8 = !{!"Simple C/C++ TBAA"} +!9 = !{!10, !10, i64 0, i64 4} +!10 = !{!7, i64 4, !"int"} +!11 = !{!12} +!12 = distinct !{!12, !13, !"test_loop_unroll_02: pA"} +!13 = distinct !{!13, !"test_loop_unroll_02"} + +; CHECK: !0 = !{i32 1, !"wchar_size", i32 4} +; CHECK: !1 = !{!"clang"} +; CHECK: !2 = !{!3} +; CHECK: !3 = distinct !{!3, !4, !"test_loop_unroll_01: pA"} +; CHECK: !4 = distinct !{!4, !"test_loop_unroll_01"} +; CHECK: !5 = !{!6, !6, i64 0, i64 4} +; CHECK: !6 = !{!7, i64 4, !"any pointer"} +; CHECK: !7 = !{!8, i64 1, !"omnipotent char"} +; CHECK: !8 = !{!"Simple C/C++ TBAA"} +; CHECK: !9 = !{!10, !10, i64 0, i64 4} +; CHECK: !10 = !{!7, i64 4, !"int"} +; CHECK: !11 = distinct !{!11, !12} +; CHECK: !12 = !{!"llvm.loop.unroll.disable"} +; CHECK: !13 = !{!14} +; CHECK: !14 = distinct !{!14, !15, !"test_loop_unroll_02: pA"} +; CHECK: !15 = distinct !{!15, !"test_loop_unroll_02"} +; CHECK: !16 = !{!17} +; CHECK: !17 = distinct !{!17, !15, !"test_loop_unroll_02: pA:It1"} +; CHECK: !18 = distinct !{!18, !12} Index: llvm/test/Transforms/LoopVectorize/noalias.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/noalias.ll @@ -0,0 +1,90 @@ +; RUN: opt -S -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +; Make sure we can vectorize loops which contain noalias intrinsics + + +; A not-used llvm.noalias should not interfere +; CHECK-LABEL: @test_noalias_not_connected( +; CHECK: @llvm.noalias.p0i32 +; CHECK: store <2 x i32> +; CHECK: ret +define void @test_noalias_not_connected(i32 *%d) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %d2 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %d, i8* null, i32** null, i32 0, metadata !1) + %arrayidx = getelementptr inbounds i32, i32* %d, i64 %indvars.iv + %v1 = load i32, i32* %arrayidx, align 8 + store i32 100, i32* %arrayidx, align 8 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp ne i32 %lftr.wideiv, 128 + br i1 %exitcond, label %for.body, label %for.end + +for.end: + ret void +} + +; A used llvm.noalias should block vectorization. +; CHECK-LABEL: @test_noalias_connected( +; CHECK: @llvm.noalias.p0i32 +; CHECK-NOT: store <2 x i32> +; CHECK: ret +define void @test_noalias_connected(i32 *%d) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %d2 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %d, i8* null, i32** null, i32 0, metadata !1) + %arrayidx = getelementptr inbounds i32, i32* %d2, i64 %indvars.iv + %v1 = load i32, i32* %arrayidx, align 8 + store i32 100, i32* %arrayidx, align 8 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp ne i32 %lftr.wideiv, 128 + br i1 %exitcond, label %for.body, label %for.end + +for.end: + ret void +} + +; A used llvm.side.noalias should NOT block vectorization. +; CHECK-LABEL: @test_side_noalias( +; CHECK: @llvm.side.noalias.p0i32 +; NOTE: the noalias_sidechannel is omitted +; CHECK: store <2 x i32> , <2 x i32>* {{%[0-9.a-zA-Z]*}}, align 8 +; CHECK: ret + +define void @test_side_noalias(i32 *%d) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %side.d = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %d, i8* null, i32** null, i32** null, i32 0, metadata !1) + %arrayidx = getelementptr inbounds i32, i32* %d, i64 %indvars.iv + %v1 = load i32, i32* %arrayidx, noalias_sidechannel i32* %side.d, align 8 + store i32 100, i32* %arrayidx, noalias_sidechannel i32* %side.d, align 8 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp ne i32 %lftr.wideiv, 128 + br i1 %exitcond, label %for.body, label %for.end + +for.end: + ret void +} + +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i8**, i32, metadata) argmemonly nounwind +declare i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32*, i8*, i32**, i32, metadata ) nounwind +declare i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32*, i8*, i32**, i32**, i32, metadata ) nounwind + + +;declare i32* @llvm.noalias.p0i32(i32*, metadata) nounwind argmemonly + +!0 = !{!0, !"some domain"} +!1 = !{!1, !0, !"some scope"} Index: llvm/test/Transforms/PropagateAndConvertNoAlias/basictest.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/PropagateAndConvertNoAlias/basictest.ll @@ -0,0 +1,79 @@ +; RUN: opt < %s -convert-noalias -verify -S | FileCheck %s +; RUN: opt < %s -passes=convert-noalias,verify -S | FileCheck %s + +@gpA = common dso_local global i32* null, align 4 + +; Function Attrs: nounwind +define dso_local void @test01(i32* %_pA) #0 { +entry: + %pA.decl = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %pA = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %pA.decl, i32** null, i32 0, metadata !2), !tbaa !5, !noalias !2 + %arrayidx = getelementptr inbounds i32, i32* %pA, i32 10 + store i32 42, i32* %arrayidx, align 4, !tbaa !9, !noalias !2 + %pA.2 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %pA.decl, i32** null, i32 0, metadata !2), !tbaa !5, !noalias !2 + %add.ptr = getelementptr inbounds i32, i32* %pA.2, i32 1 + %pA.3 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %add.ptr, i8* %pA.decl, i32** null, i32 0, metadata !2), !tbaa !5, !noalias !2 + %arrayidx1 = getelementptr inbounds i32, i32* %pA.3, i32 11 + store i32 43, i32* %arrayidx1, align 4, !tbaa !9, !noalias !2 + ret void +} + +; CHECK-LABEL: @test01( +; CHECK-NEXT: entry: +; CHECK-NEXT: %pA.decl = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) +; CHECK-NEXT: %0 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %pA.decl, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 +; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, i32* %_pA, i32 10 +; CHECK-NEXT: store i32 42, i32* %arrayidx, noalias_sidechannel i32* %0, align 4, !tbaa !9, !noalias !2 +; CHECK-NEXT: %add.ptr = getelementptr inbounds i32, i32* %_pA, i32 1 +; CHECK-NEXT: %arrayidx1 = getelementptr inbounds i32, i32* %add.ptr, i32 11 +; CHECK-NEXT: store i32 43, i32* %arrayidx1, noalias_sidechannel i32* %0, align 4, !tbaa !9, !noalias !2 +; CHECK-NEXT: ret void +; CHECK-NEXT: } + +; Function Attrs: nounwind +define dso_local i32* @test02(i32* %_pA) #0 { +entry: + %pA.decl = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) + %pA.1 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %pA.decl, i32** null, i32 0, metadata !11), !tbaa !5, !noalias !11 + store i32* %pA.1, i32** @gpA, align 4, !tbaa !5, !noalias !11 + %pA.2 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %pA.decl, i32** null, i32 0, metadata !11), !tbaa !5, !noalias !11 + ret i32* %pA.2 +} + +; CHECK-LABEL: @test02( +; CHECK-NEXT: entry: +; CHECK-NEXT: %pA.decl = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) +; CHECK-NEXT: %0 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %pA.decl, i32** null, i32** undef, i32 0, metadata !11), !tbaa !5, !noalias !11 +; CHECK-NEXT: store i32* %_pA, i32** @gpA, noalias_sidechannel i32** undef, align 4, !tbaa !5, !noalias !11 +; CHECK-NEXT: %pA.2.guard = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %_pA, i32* %0) +; CHECK-NEXT: ret i32* %pA.2.guard +; CHECK-NEXT: } + + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) #1 + +; Function Attrs: argmemonly nounwind speculatable +declare i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32*, i8*, i32**, i32, metadata) #2 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { argmemonly nounwind speculatable } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3} +!3 = distinct !{!3, !4, !"test01: pA"} +!4 = distinct !{!4, !"test01"} +!5 = !{!6, !6, i64 0, i64 4} +!6 = !{!7, i64 4, !"any pointer"} +!7 = !{!8, i64 1, !"omnipotent char"} +!8 = !{!"Simple C/C++ TBAA"} +!9 = !{!10, !10, i64 0, i64 4} +!10 = !{!7, i64 4, !"int"} +!11 = !{!12} +!12 = distinct !{!12, !13, !"test02: pA"} +!13 = distinct !{!13, !"test02"} Index: llvm/test/Transforms/PropagateAndConvertNoAlias/call.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/PropagateAndConvertNoAlias/call.ll @@ -0,0 +1,112 @@ +; RUN: opt < %s -convert-noalias -verify -S | FileCheck %s +; RUN: opt < %s -passes=convert-noalias,verify -S | FileCheck %s + +target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" + +; Function Attrs: nounwind +define dso_local i32* @passP(i32* %_pA) #0 { +entry: + %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %1 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32 0, metadata !2), !tbaa !5, !noalias !2 + store i32 42, i32* %1, align 4, !tbaa !9, !noalias !2 + %2 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32 0, metadata !2), !tbaa !5, !noalias !2 + ret i32* %2 +} + +; CHECK-LABEL: @passP( +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) +; CHECK-NEXT: %1 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 +; CHECK-NEXT: store i32 42, i32* %_pA, noalias_sidechannel i32* %1, align 4, !tbaa !9, !noalias !2 +; CHECK-NEXT: %.guard = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %_pA, i32* %1) +; CHECK-NEXT: ret i32* %.guard +; CHECK-NEXT: } + +; Function Attrs: nounwind +define dso_local void @test01(i32* %_pA) #0 { +entry: + %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) + %1 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32 0, metadata !11), !tbaa !5, !noalias !11 + store i32 41, i32* %1, align 4, !tbaa !9, !noalias !11 + %2 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32 0, metadata !11), !tbaa !5, !noalias !11 + %call = call i32* @passP(i32* %2), !noalias !11 + store i32 43, i32* %call, align 4, !tbaa !9, !noalias !11 + ret void +} + +; CHECK-LABEL: @test01( +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) +; CHECK-NEXT: %1 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !11), !tbaa !5, !noalias !11 +; CHECK-NEXT: store i32 41, i32* %_pA, noalias_sidechannel i32* %1, align 4, !tbaa !9, !noalias !11 +; CHECK-NEXT: %.guard = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %_pA, i32* %1) +; CHECK-NEXT: %call = call i32* @passP(i32* %.guard), !noalias !11 +; CHECK-NEXT: store i32 43, i32* %call, noalias_sidechannel i32* undef, align 4, !tbaa !9, !noalias !11 +; CHECK-NEXT: ret void +; CHECK-NEXT: } + +; Function Attrs: nounwind +define dso_local void @test02(i32* %_pA) #0 { +entry: + %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) + %1 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32 0, metadata !11), !tbaa !5, !noalias !11 + store i32 41, i32* %1, align 4, !tbaa !9, !noalias !11 + %2 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32 0, metadata !11), !tbaa !5, !noalias !11 + br label %block + +block: + %tmp0 = phi i32* [ %2, %entry ] + %tmp1 = phi i32* [ %1, %entry ] + %call = call i32* @passP(i32* %tmp0), !noalias !11 + store i32 43, i32* %call, align 4, !tbaa !9, !noalias !11 + ret void +} + + +; CHECK-LABEL: @test02( +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) +; CHECK-NEXT: %1 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !11), !tbaa !5, !noalias !11 +; CHECK-NEXT: store i32 41, i32* %_pA, noalias_sidechannel i32* %1, align 4, !tbaa !9, !noalias !11 +; CHECK-NEXT: br label %block +; CHECK: block: +; CHECK-NEXT: %side.tmp0 = phi i32* [ %1, %entry ] +; CHECK-NEXT: %tmp0 = phi i32* [ %_pA, %entry ] +; CHECK-NEXT: %side.tmp1 = phi i32* [ %1, %entry ] +; CHECK-NEXT: %tmp1 = phi i32* [ %_pA, %entry ] +; CHECK-NEXT: %tmp0.guard = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %tmp0, i32* %side.tmp0) +; CHECK-NEXT: %call = call i32* @passP(i32* %tmp0.guard), !noalias !11 +; CHECK-NEXT: store i32 43, i32* %call, noalias_sidechannel i32* undef, align 4, !tbaa !9, !noalias !11 +; CHECK-NEXT: ret void +; CHECK-NEXT: } + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) #2 + +; Function Attrs: argmemonly nounwind speculatable +declare i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32*, i8*, i32**, i32, metadata) #3 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind willreturn } +attributes #2 = { argmemonly nounwind } +attributes #3 = { argmemonly nounwind speculatable } +attributes #4 = { nounwind readnone speculatable } +attributes #5 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3} +!3 = distinct !{!3, !4, !"passP: pA"} +!4 = distinct !{!4, !"passP"} +!5 = !{!6, !6, i64 0, i64 4} +!6 = !{!7, i64 4, !"any pointer"} +!7 = !{!8, i64 1, !"omnipotent char"} +!8 = !{!"Simple C/C++ TBAA"} +!9 = !{!10, !10, i64 0, i64 4} +!10 = !{!7, i64 4, !"int"} +!11 = !{!12} +!12 = distinct !{!12, !13, !"test01: p1"} +!13 = distinct !{!13, !"test01"} Index: llvm/test/Transforms/PropagateAndConvertNoAlias/degenerated.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/PropagateAndConvertNoAlias/degenerated.ll @@ -0,0 +1,89 @@ +; RUN: opt < %s -convert-noalias -verify -S | FileCheck %s +; RUN: opt < %s -passes=convert-noalias,verify -S | FileCheck %s + +target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" + +; Function Attrs: nounwind +define dso_local void @test01(i32* %_pA) local_unnamed_addr #0 { +entry: + %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %1 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 + store i32 41, i32* %_pA, noalias_sidechannel i32* %1, align 4, !tbaa !9, !noalias !2 + %.guard = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %_pA, i32* %1) + %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) #5, !noalias !2 + %3 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %.guard, i8* %2, i32** null, i32** undef, i32 0, metadata !11) #3, !tbaa !5, !noalias !14 + store i32 42, i32* %.guard, noalias_sidechannel i32* %3, align 4, !tbaa !9, !noalias !14 + %.guard.guard.guard.guard.i = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %.guard, i32* %3) #1 + store i32 43, i32* %.guard.guard.guard.guard.i, noalias_sidechannel i32* undef, align 4, !tbaa !9, !noalias !2 + ret void +} + +; CHECK-LABEL: @test01( +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) +; CHECK-NEXT: %1 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 +; CHECK-NEXT: store i32 41, i32* %_pA, noalias_sidechannel i32* %1, align 4, !tbaa !9, !noalias !2 +; CHECK-NEXT: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) #5, !noalias !2 +; CHECK-NEXT: %3 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %1, i8* %2, i32** null, i32** undef, i32 0, metadata !11) #3, !tbaa !5, !noalias !14 +; CHECK-NEXT: store i32 42, i32* %_pA, noalias_sidechannel i32* %3, align 4, !tbaa !9, !noalias !14 +; CHECK-NEXT: store i32 43, i32* %_pA, noalias_sidechannel i32* %3, align 4, !tbaa !9, !noalias !2 +; CHECK-NEXT: ret void +; CHECK-NEXT: } + +; Function Attrs: nounwind +define dso_local void @test02(i32* %_pA) local_unnamed_addr #0 { +entry: + %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %1 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 + store i32 41, i32* %_pA, noalias_sidechannel i32* %1, align 4, !tbaa !9, !noalias !2 + %.guard = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %_pA, i32* %1) + %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) #5, !noalias !2 + %3 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %.guard, i8* %2, i32** null, i32** undef, i32 0, metadata !11) #3, !tbaa !5, !noalias !14 + store i32 42, i32* %.guard, noalias_sidechannel i32* %3, align 4, !tbaa !9, !noalias !14 + %deg01 = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %.guard, i32* %.guard) #1 + %deg02 = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %.guard, i32* undef) #1 + call void @foo(i32* %deg01), !noalias !14 + call void @foo(i32* %deg01), !noalias !14 + ret void +} + + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) #1 + +; Function Attrs: argmemonly nounwind speculatable +declare i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32*, i8*, i32**, i32, metadata) #2 + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32*, i8*, i32**, i32**, i32, metadata) #3 + +; Function Attrs: nounwind readnone +declare i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32*, i32*) #4 + +declare void @foo(i32*) + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { argmemonly nounwind speculatable } +attributes #3 = { nounwind readnone speculatable } +attributes #4 = { nounwind readnone } +attributes #5 = { nounwind } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3} +!3 = distinct !{!3, !4, !"test01: p1"} +!4 = distinct !{!4, !"test01"} +!5 = !{!6, !6, i64 0, i64 4} +!6 = !{!7, i64 4, !"any pointer"} +!7 = !{!8, i64 1, !"omnipotent char"} +!8 = !{!"Simple C/C++ TBAA"} +!9 = !{!10, !10, i64 0, i64 4} +!10 = !{!7, i64 4, !"int"} +!11 = !{!12} +!12 = distinct !{!12, !13, !"passP: pA"} +!13 = distinct !{!13, !"passP"} +!14 = !{!12, !3} Index: llvm/test/Transforms/PropagateAndConvertNoAlias/double_noalias.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/PropagateAndConvertNoAlias/double_noalias.ll @@ -0,0 +1,69 @@ +; RUN: opt < %s -convert-noalias -verify -S | FileCheck %s +; RUN: opt < %s -passes=convert-noalias,verify -S | FileCheck %s + +target datalayout = "e-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f16:16:16-f32:32:32-f64:32:32-p:32:32:32:32:8-s0:32:32-a0:0:32-S32-n16:32-v128:32:32-P0-p0:32:32:32:32:8" + +; Function Attrs: nounwind +define dso_local void @test_rr(i32** %_p) #0 !noalias !2 { +entry: + %0 = call i8* @llvm.noalias.decl.p0i8.p0p0p0i32.i32(i32*** null, i32 0, metadata !5) + %1 = call i32** @llvm.noalias.p0p0i32.p0i8.p0p0p0i32.i32(i32** %_p, i8* %0, i32*** null, i32 0, metadata !5), !tbaa !7, !noalias !11 + %arrayidx = getelementptr inbounds i32*, i32** %1, i32 1 + %2 = load i32*, i32** %arrayidx, align 4, !tbaa !7, !noalias !11 + %3 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %2, i8* null, i32** %arrayidx, i32 0, metadata !2), !tbaa !7, !noalias !11 + %arrayidx1 = getelementptr inbounds i32, i32* %3, i32 2 + %4 = load i32, i32* %arrayidx1, align 4, !tbaa !12, !noalias !11 + %add = add nsw i32 %4, 1 + %5 = call i32** @llvm.noalias.p0p0i32.p0i8.p0p0p0i32.i32(i32** %_p, i8* %0, i32*** null, i32 0, metadata !5), !tbaa !7, !noalias !11 + %6 = load i32*, i32** %5, align 4, !tbaa !7, !noalias !11 + %7 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %6, i8* null, i32** %5, i32 0, metadata !2), !tbaa !7, !noalias !11 + store i32 %add, i32* %7, align 4, !tbaa !12, !noalias !11 + ret void +} + +; CHECK-LABEL: @test_rr( +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0p0i32.i32(i32*** null, i32 0, metadata !5) +; CHECK-NEXT: %1 = call i32** @llvm.side.noalias.p0p0i32.p0i8.p0p0p0i32.p0p0p0i32.i32(i32** %_p, i8* %0, i32*** null, i32*** undef, i32 0, metadata !5), !tbaa !7, !noalias !11 +; CHECK-NEXT: %arrayidx = getelementptr inbounds i32*, i32** %_p, i32 1 +; CHECK-NEXT: %2 = load i32*, i32** %arrayidx, noalias_sidechannel i32** %1, align 4, !tbaa !7, !noalias !11 +; CHECK-NEXT: %3 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %2, i8* null, i32** %arrayidx, i32** %1, i32 0, metadata !2), !tbaa !7, !noalias !11 +; CHECK-NEXT: %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 2 +; CHECK-NEXT: %4 = load i32, i32* %arrayidx1, noalias_sidechannel i32* %3, align 4, !tbaa !12, !noalias !11 +; CHECK-NEXT: %add = add nsw i32 %4, 1 +; CHECK-NEXT: %5 = load i32*, i32** %_p, noalias_sidechannel i32** %1, align 4, !tbaa !7, !noalias !11 +; CHECK-NEXT: %6 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %5, i8* null, i32** %_p, i32** %1, i32 0, metadata !2), !tbaa !7, !noalias !11 +; CHECK-NEXT: store i32 %add, i32* %5, noalias_sidechannel i32* %6, align 4, !tbaa !12, !noalias !11 +; CHECK-NEXT: ret void +; CHECK-NEXT: } + +; Function Attrs: argmemonly nounwind speculatable +declare i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32*, i8*, i32**, i32, metadata) #1 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0p0i32.i32(i32***, i32, metadata) #2 + +; Function Attrs: argmemonly nounwind speculatable +declare i32** @llvm.noalias.p0p0i32.p0i8.p0p0p0i32.i32(i32**, i8*, i32***, i32, metadata) #1 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind speculatable } +attributes #2 = { argmemonly nounwind } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3} +!3 = distinct !{!3, !4, !"test_rr: unknown scope"} +!4 = distinct !{!4, !"test_rr"} +!5 = !{!6} +!6 = distinct !{!6, !4, !"test_rr: rprp"} +!7 = !{!8, !8, i64 0, i64 4} +!8 = !{!9, i64 4, !"any pointer"} +!9 = !{!10, i64 1, !"omnipotent char"} +!10 = !{!"Simple C/C++ TBAA"} +!11 = !{!6, !3} +!12 = !{!13, !13, i64 0, i64 4} +!13 = !{!9, i64 4, !"int"} Index: llvm/test/Transforms/PropagateAndConvertNoAlias/inlined.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/PropagateAndConvertNoAlias/inlined.ll @@ -0,0 +1,69 @@ +; RUN: opt < %s -convert-noalias -verify -S | FileCheck %s +; RUN: opt < %s -passes=convert-noalias,verify -S | FileCheck %s + +target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" + +; Function Attrs: nounwind +define dso_local void @test01(i32* %_pA) local_unnamed_addr #0 { +entry: + %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %1 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 + store i32 41, i32* %_pA, noalias_sidechannel i32* %1, align 4, !tbaa !9, !noalias !2 + %.guard = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %_pA, i32* %1) + %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) #5, !noalias !2 + %3 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %.guard, i8* %2, i32** null, i32** undef, i32 0, metadata !11) #3, !tbaa !5, !noalias !14 + store i32 42, i32* %.guard, noalias_sidechannel i32* %3, align 4, !tbaa !9, !noalias !14 + %.guard.guard.guard.guard.i = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %.guard, i32* %3) #1 + store i32 43, i32* %.guard.guard.guard.guard.i, noalias_sidechannel i32* undef, align 4, !tbaa !9, !noalias !2 + ret void +} + +; CHECK-LABEL: @test01( +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) +; CHECK-NEXT: %1 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 +; CHECK-NEXT: store i32 41, i32* %_pA, noalias_sidechannel i32* %1, align 4, !tbaa !9, !noalias !2 +; CHECK-NEXT: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !11) #5, !noalias !2 +; CHECK-NEXT: %3 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %1, i8* %2, i32** null, i32** undef, i32 0, metadata !11) #3, !tbaa !5, !noalias !14 +; CHECK-NEXT: store i32 42, i32* %_pA, noalias_sidechannel i32* %3, align 4, !tbaa !9, !noalias !14 +; CHECK-NEXT: store i32 43, i32* %_pA, noalias_sidechannel i32* %3, align 4, !tbaa !9, !noalias !2 +; CHECK-NEXT: ret void +; CHECK-NEXT: } + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) #1 + +; Function Attrs: argmemonly nounwind speculatable +declare i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32*, i8*, i32**, i32, metadata) #2 + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32*, i8*, i32**, i32**, i32, metadata) #3 + +; Function Attrs: nounwind readnone +declare i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32*, i32*) #4 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { argmemonly nounwind speculatable } +attributes #3 = { nounwind readnone speculatable } +attributes #4 = { nounwind readnone } +attributes #5 = { nounwind } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3} +!3 = distinct !{!3, !4, !"test01: p1"} +!4 = distinct !{!4, !"test01"} +!5 = !{!6, !6, i64 0, i64 4} +!6 = !{!7, i64 4, !"any pointer"} +!7 = !{!8, i64 1, !"omnipotent char"} +!8 = !{!"Simple C/C++ TBAA"} +!9 = !{!10, !10, i64 0, i64 4} +!10 = !{!7, i64 4, !"int"} +!11 = !{!12} +!12 = distinct !{!12, !13, !"passP: pA"} +!13 = distinct !{!13, !"passP"} +!14 = !{!12, !3} Index: llvm/test/Transforms/PropagateAndConvertNoAlias/reduced01.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/PropagateAndConvertNoAlias/reduced01.ll @@ -0,0 +1,74 @@ +; RUN: opt < %s -convert-noalias -verify -S | FileCheck %s +; RUN: opt < %s -passes=convert-noalias,verify -S | FileCheck %s +; RUN: opt < %s -convert-noalias -verify -convert-noalias -verify -S | FileCheck %s + +target datalayout = "e-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f16:16:16-f32:32:32-f64:32:32-p:32:32:32:32:8-s0:32:32-a0:0:32-S32-n16:32-v128:32:32-P0-p0:32:32:32:32:8" + +%struct.a = type { i8 } + +; Function Attrs: noreturn +define dso_local void @_Z3fooPii(i32* %_f, i32 %g) local_unnamed_addr #0 { +entry: + %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %1 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_f, i8* %0, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 + %2 = bitcast i32* %1 to %struct.a* + %3 = bitcast i32* %_f to %struct.a* + br label %for.cond + +for.cond: ; preds = %for.cond, %entry + %side.h.0 = phi %struct.a* [ %2, %entry ], [ %h.0.guard, %for.cond ] + %h.0 = phi %struct.a* [ %3, %entry ], [ %h.0.guard, %for.cond ] + %h.0.guard = call %struct.a* @llvm.noalias.arg.guard.p0s_struct.as.p0s_struct.as(%struct.a* %h.0, %struct.a* %side.h.0) + %4 = getelementptr inbounds %struct.a, %struct.a* %h.0, i32 0, i32 0 + %.unpack = load i8, i8* %4, noalias_sidechannel %struct.a* %side.h.0, align 1, !noalias !2 + %5 = insertvalue %struct.a undef, i8 %.unpack, 0 + %call = call i32 @_Z1b1a(%struct.a %5), !noalias !2 + br label %for.cond +} +; CHECK: define dso_local void @_Z3fooPii(i32* %_f, i32 %g) local_unnamed_addr #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) +; CHECK-NEXT: %1 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_f, i8* %0, i32** null, i32** undef, i32 0, metadata !2), !tbaa !5, !noalias !2 +; CHECK-NEXT: %2 = bitcast i32* %1 to %struct.a* +; CHECK-NEXT: %3 = bitcast i32* %_f to %struct.a* +; CHECK-NEXT: br label %for.cond +; CHECK: for.cond: ; preds = %for.cond, %entry +; CHECK-NEXT: %side.h.0 = phi %struct.a* [ %2, %entry ], [ %side.h.0, %for.cond ] +; CHECK-NEXT: %side.h.01 = phi %struct.a* [ %3, %entry ], [ %side.h.0, %for.cond ] +; CHECK-NEXT: %h.0 = phi %struct.a* [ %3, %entry ], [ %h.0, %for.cond ] +; CHECK-NEXT: %4 = getelementptr inbounds %struct.a, %struct.a* %h.0, i32 0, i32 0 +; CHECK-NEXT: %.unpack = load i8, i8* %4, noalias_sidechannel %struct.a* %side.h.0, align 1, !noalias !2 +; CHECK-NEXT: %5 = insertvalue %struct.a undef, i8 %.unpack, 0 +; CHECK-NEXT: %call = call i32 @_Z1b1a(%struct.a %5), !noalias !2 +; CHECK-NEXT: br label %for.cond + + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) #1 + +declare dso_local i32 @_Z1b1a(%struct.a) local_unnamed_addr #2 + +; Function Attrs: nounwind readnone +declare %struct.a* @llvm.noalias.arg.guard.p0s_struct.as.p0s_struct.as(%struct.a*, %struct.a*) #3 + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32*, i8*, i32**, i32**, i32, metadata) #4 + +attributes #0 = { noreturn "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind readnone } +attributes #4 = { nounwind readnone speculatable } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 7.0.0 "} +!2 = !{!3} +!3 = distinct !{!3, !4, !"_Z3fooPii: f"} +!4 = distinct !{!4, !"_Z3fooPii"} +!5 = !{!6, !6, i64 0} +!6 = !{!"any pointer", !7, i64 0} +!7 = !{!"omnipotent char", !8, i64 0} +!8 = !{!"Simple C++ TBAA"} Index: llvm/test/Transforms/PropagateAndConvertNoAlias/select_and_phi.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/PropagateAndConvertNoAlias/select_and_phi.ll @@ -0,0 +1,208 @@ +; RUN: opt < %s -convert-noalias -verify -S | FileCheck %s +; RUN: opt < %s -passes=convert-noalias,verify -S | FileCheck %s +; RUN: opt < %s -convert-noalias -verify -convert-noalias -verify -S | FileCheck %s + +; Function Attrs: nounwind +define dso_local void @test_phi01(i32* %_pA, i32** %_pB, i32 %n) #0 { +entry: + %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) + %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !5) + %arrayidx = getelementptr inbounds i32*, i32** %_pB, i32 2 + %2 = load i32*, i32** %arrayidx, align 4, !tbaa !7, !noalias !11 + %3 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32 0, metadata !2), !tbaa !7, !noalias !11 + %4 = load i32*, i32** %_pB, align 4, !tbaa !7, !noalias !11 + %arrayidx2 = getelementptr inbounds i32*, i32** %_pB, i32 1 + %5 = load i32*, i32** %arrayidx2, align 4, !tbaa !7, !noalias !11 + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %pTmp00.0 = phi i32* [ %4, %entry ], [ %pTmp01.0, %for.body ] + %pTmp01.0 = phi i32* [ %5, %entry ], [ %3, %for.body ] + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %cmp = icmp slt i32 %i.0, %n + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + %6 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32 0, metadata !2), !tbaa !7, !noalias !11 + store i32 99, i32* %6, align 4, !tbaa !12, !noalias !11 + store i32 42, i32* %pTmp00.0, align 4, !tbaa !12, !noalias !11 + %cmp5 = icmp sgt i32 %n, 5 + %7 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %2, i8* %1, i32** null, i32 0, metadata !5) + %cond = select i1 %cmp5, i32* %pTmp00.0, i32* %7 + store i32 43, i32* %cond, align 4, !tbaa !12, !noalias !11 + ret void + +for.body: ; preds = %for.cond + %arrayidx3 = getelementptr inbounds i32, i32* %pTmp01.0, i32 1 + %8 = load i32, i32* %arrayidx3, align 4, !tbaa !12, !noalias !11 + %arrayidx4 = getelementptr inbounds i32, i32* %pTmp00.0, i32 1 + store i32 %8, i32* %arrayidx4, align 4, !tbaa !12, !noalias !11 + %inc = add nsw i32 %i.0, 1 + br label %for.cond +} + +; CHECK-LABEL: @test_phi01( +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !2) +; CHECK-NEXT: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !5) +; CHECK-NEXT: %arrayidx = getelementptr inbounds i32*, i32** %_pB, i32 2 +; CHECK-NEXT: %2 = load i32*, i32** %arrayidx, noalias_sidechannel i32** undef, align 4, !tbaa !7, !noalias !11 +; CHECK-NEXT: %3 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !2), !tbaa !7, !noalias !11 +; CHECK-NEXT: %4 = load i32*, i32** %_pB, noalias_sidechannel i32** undef, align 4, !tbaa !7, !noalias !11 +; CHECK-NEXT: %arrayidx2 = getelementptr inbounds i32*, i32** %_pB, i32 1 +; CHECK-NEXT: %5 = load i32*, i32** %arrayidx2, noalias_sidechannel i32** undef, align 4, !tbaa !7, !noalias !11 +; CHECK-NEXT: br label %for.cond +; CHECK: for.cond: +; CHECK-NEXT: %side.pTmp00.0 = phi i32* [ %4, %entry ], [ %side.pTmp01.0, %for.body ] +; CHECK-NEXT: %pTmp00.0 = phi i32* [ %4, %entry ], [ %pTmp01.0, %for.body ] +; CHECK-NEXT: %side.pTmp01.0 = phi i32* [ %5, %entry ], [ %3, %for.body ] +; CHECK-NEXT: %pTmp01.0 = phi i32* [ %5, %entry ], [ %_pA, %for.body ] +; CHECK-NEXT: %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] +; CHECK-NEXT: %cmp = icmp slt i32 %i.0, %n +; CHECK-NEXT: br i1 %cmp, label %for.body, label %for.cond.cleanup +; CHECK: for.cond.cleanup: +; CHECK-NEXT: store i32 99, i32* %_pA, noalias_sidechannel i32* %3, align 4, !tbaa !12, !noalias !11 +; CHECK-NEXT: store i32 42, i32* %pTmp00.0, noalias_sidechannel i32* %side.pTmp00.0, align 4, !tbaa !12, !noalias !11 +; CHECK-NEXT: %cmp5 = icmp sgt i32 %n, 5 +; CHECK-NEXT: %6 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %2, i8* %1, i32** null, i32** undef, i32 0, metadata !5) +; CHECK-NEXT: %side.cond = select i1 %cmp5, i32* %side.pTmp00.0, i32* %6 +; CHECK-NEXT: %cond = select i1 %cmp5, i32* %pTmp00.0, i32* %2 +; CHECK-NEXT: store i32 43, i32* %cond, noalias_sidechannel i32* %side.cond, align 4, !tbaa !12, !noalias !11 +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: %arrayidx3 = getelementptr inbounds i32, i32* %pTmp01.0, i32 1 +; CHECK-NEXT: %7 = load i32, i32* %arrayidx3, noalias_sidechannel i32* %side.pTmp01.0, align 4, !tbaa !12, !noalias !11 +; CHECK-NEXT: %arrayidx4 = getelementptr inbounds i32, i32* %pTmp00.0, i32 1 +; CHECK-NEXT: store i32 %7, i32* %arrayidx4, noalias_sidechannel i32* %side.pTmp00.0, align 4, !tbaa !12, !noalias !11 +; CHECK-NEXT: %inc = add nsw i32 %i.0, 1 +; CHECK-NEXT: br label %for.cond +; CHECK-NEXT: } + + +; Function Attrs: nounwind +define dso_local void @test_phi02(i32* %_pA, i32** %_pB, i32 %n) #0 { +entry: + %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !14) + %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !17) + %arrayidx = getelementptr inbounds i32*, i32** %_pB, i32 2 + %2 = load i32*, i32** %arrayidx, align 4, !tbaa !7, !noalias !19 + %3 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32 0, metadata !14), !tbaa !7, !noalias !19 + %4 = load i32*, i32** %_pB, align 4, !tbaa !7, !noalias !19 + %arrayidx2 = getelementptr inbounds i32*, i32** %_pB, i32 1 + %5 = load i32*, i32** %arrayidx2, align 4, !tbaa !7, !noalias !19 + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %pTmp00.0 = phi i32* [ %4, %entry ], [ %pTmp01.0, %for.body ] + %pTmp01.0 = phi i32* [ %5, %entry ], [ %3, %for.body ] + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %cmp = icmp slt i32 %i.0, %n + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + %6 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32 0, metadata !14), !tbaa !7, !noalias !19 + %7 = bitcast i32* %6 to i16* + store i16 99, i16* %7, align 2, !tbaa !20, !noalias !19 + %8 = bitcast i32* %pTmp00.0 to i16* + store i16 42, i16* %8, align 2, !tbaa !20, !noalias !19 + %cmp5 = icmp sgt i32 %n, 5 + %9 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %2, i8* %1, i32** null, i32 0, metadata !17) + %cond = select i1 %cmp5, i32* %pTmp00.0, i32* %9 + %10 = bitcast i32* %cond to i16* + store i16 43, i16* %10, align 2, !tbaa !20, !noalias !19 + ret void + +for.body: ; preds = %for.cond + %11 = bitcast i32* %pTmp01.0 to i16* + %arrayidx3 = getelementptr inbounds i16, i16* %11, i32 1 + %12 = load i16, i16* %arrayidx3, align 2, !tbaa !20, !noalias !19 + %13 = bitcast i32* %pTmp00.0 to i16* + %arrayidx4 = getelementptr inbounds i16, i16* %13, i32 1 + store i16 %12, i16* %arrayidx4, align 2, !tbaa !20, !noalias !19 + %inc = add nsw i32 %i.0, 1 + br label %for.cond +} + +; CHECK-LABEL: @test_phi02( +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !14) +; CHECK-NEXT: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !17) +; CHECK-NEXT: %arrayidx = getelementptr inbounds i32*, i32** %_pB, i32 2 +; CHECK-NEXT: %2 = load i32*, i32** %arrayidx, noalias_sidechannel i32** undef, align 4, !tbaa !7, !noalias !19 +; CHECK-NEXT: %3 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %0, i32** null, i32** undef, i32 0, metadata !14), !tbaa !7, !noalias !19 +; CHECK-NEXT: %4 = load i32*, i32** %_pB, noalias_sidechannel i32** undef, align 4, !tbaa !7, !noalias !19 +; CHECK-NEXT: %arrayidx2 = getelementptr inbounds i32*, i32** %_pB, i32 1 +; CHECK-NEXT: %5 = load i32*, i32** %arrayidx2, noalias_sidechannel i32** undef, align 4, !tbaa !7, !noalias !19 +; CHECK-NEXT: br label %for.cond +; CHECK: for.cond: +; CHECK-NEXT: %side.pTmp00.0 = phi i32* [ %4, %entry ], [ %side.pTmp01.0, %for.body ] +; CHECK-NEXT: %pTmp00.0 = phi i32* [ %4, %entry ], [ %pTmp01.0, %for.body ] +; CHECK-NEXT: %side.pTmp01.0 = phi i32* [ %5, %entry ], [ %3, %for.body ] +; CHECK-NEXT: %pTmp01.0 = phi i32* [ %5, %entry ], [ %_pA, %for.body ] +; CHECK-NEXT: %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] +; CHECK-NEXT: %6 = bitcast i32* %side.pTmp00.0 to i16* +; CHECK-NEXT: %7 = bitcast i32* %side.pTmp01.0 to i16* +; CHECK-NEXT: %cmp = icmp slt i32 %i.0, %n +; CHECK-NEXT: br i1 %cmp, label %for.body, label %for.cond.cleanup +; CHECK: for.cond.cleanup: +; CHECK-NEXT: %8 = bitcast i32* %3 to i16* +; CHECK-NEXT: %9 = bitcast i32* %_pA to i16* +; CHECK-NEXT: store i16 99, i16* %9, noalias_sidechannel i16* %8, align 2, !tbaa !20, !noalias !19 +; CHECK-NEXT: %10 = bitcast i32* %pTmp00.0 to i16* +; CHECK-NEXT: store i16 42, i16* %10, noalias_sidechannel i16* %6, align 2, !tbaa !20, !noalias !19 +; CHECK-NEXT: %cmp5 = icmp sgt i32 %n, 5 +; CHECK-NEXT: %11 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %2, i8* %1, i32** null, i32** undef, i32 0, metadata !17) +; CHECK-NEXT: %side.cond = select i1 %cmp5, i32* %side.pTmp00.0, i32* %11 +; CHECK-NEXT: %12 = bitcast i32* %side.cond to i16* +; CHECK-NEXT: %cond = select i1 %cmp5, i32* %pTmp00.0, i32* %2 +; CHECK-NEXT: %13 = bitcast i32* %cond to i16* +; CHECK-NEXT: store i16 43, i16* %13, noalias_sidechannel i16* %12, align 2, !tbaa !20, !noalias !19 +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: %14 = bitcast i32* %pTmp01.0 to i16* +; CHECK-NEXT: %arrayidx3 = getelementptr inbounds i16, i16* %14, i32 1 +; CHECK-NEXT: %15 = load i16, i16* %arrayidx3, noalias_sidechannel i16* %7, align 2, !tbaa !20, !noalias !19 +; CHECK-NEXT: %16 = bitcast i32* %pTmp00.0 to i16* +; CHECK-NEXT: %arrayidx4 = getelementptr inbounds i16, i16* %16, i32 1 +; CHECK-NEXT: store i16 %15, i16* %arrayidx4, noalias_sidechannel i16* %6, align 2, !tbaa !20, !noalias !19 +; CHECK-NEXT: %inc = add nsw i32 %i.0, 1 +; CHECK-NEXT: br label %for.cond +; CHECK-NEXT: } + + + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) #1 + +; Function Attrs: argmemonly nounwind speculatable +declare i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32*, i8*, i32**, i32, metadata) #2 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { argmemonly nounwind speculatable } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 10.0.0 (sgasip@krachtcs10:/u/sgasip/ipd/repositories/llvm_ipd 6068136b7c40a5bb7a7291dd621185a209673e09)"} +!2 = !{!3} +!3 = distinct !{!3, !4, !"test_phi01: rpTmp"} +!4 = distinct !{!4, !"test_phi01"} +!5 = !{!6} +!6 = distinct !{!6, !4, !"test_phi01: rp2"} +!7 = !{!8, !8, i64 0, i64 4} +!8 = !{!9, i64 4, !"any pointer"} +!9 = !{!10, i64 1, !"omnipotent char"} +!10 = !{!"Simple C/C++ TBAA"} +!11 = !{!3, !6} +!12 = !{!13, !13, i64 0, i64 4} +!13 = !{!9, i64 4, !"int"} +!14 = !{!15} +!15 = distinct !{!15, !16, !"test_phi02: rpTmp"} +!16 = distinct !{!16, !"test_phi02"} +!17 = !{!18} +!18 = distinct !{!18, !16, !"test_phi02: rp2"} +!19 = !{!15, !18} +!20 = !{!21, !21, i64 0, i64 2} +!21 = !{!9, i64 2, !"short"} Index: llvm/test/Transforms/PropagateAndConvertNoAlias/struct.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/PropagateAndConvertNoAlias/struct.ll @@ -0,0 +1,117 @@ +; RUN: opt < %s -convert-noalias -verify -S | FileCheck %s +; RUN: opt < %s -passes=convert-noalias,verify -S | FileCheck %s +target datalayout = "e-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f16:16:16-f32:32:32-f64:32:32-p:32:32:32:32:8-s0:32:32-a0:0:32-S32-n16:32-v128:32:32-P0-p0:32:32:32:32:8" + +%struct.FOO = type { i32*, i32*, i32* } + +; Function Attrs: nounwind +define dso_local void @test_rFOO(%struct.FOO* %_pFOO, i32* %_pA) #0 !noalias !2 { +entry: + %tmp = alloca %struct.FOO, align 4 + %0 = call i8* @llvm.noalias.decl.p0i8.p0p0s_struct.FOOs.i32(%struct.FOO** null, i32 0, metadata !5) + %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !7) + %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 4, metadata !7) + %3 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 8, metadata !7) + %4 = call %struct.FOO* @llvm.noalias.p0s_struct.FOOs.p0i8.p0p0s_struct.FOOs.i32(%struct.FOO* %_pFOO, i8* %0, %struct.FOO** null, i32 0, metadata !5), !tbaa !9, !noalias !13 + %5 = call %struct.FOO* @llvm.noalias.copy.guard.p0s_struct.FOOs.p0i8(%struct.FOO* %4, i8* null, metadata !14, metadata !2) + %6 = load %struct.FOO, %struct.FOO* %5, align 4, !noalias !13 + %.fca.0.extract = extractvalue %struct.FOO %6, 0 + %.fca.1.extract = extractvalue %struct.FOO %6, 1 + %.fca.2.extract = extractvalue %struct.FOO %6, 2 + %7 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %.fca.0.extract, i8* %1, i32** null, i32 0, metadata !7), !tbaa !18, !noalias !13 + %8 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %.fca.1.extract, i8* %2, i32** null, i32 4, metadata !7), !tbaa !20, !noalias !13 + %9 = load i32, i32* %8, align 4, !tbaa !21, !noalias !13 + store i32 %9, i32* %7, align 4, !tbaa !21, !noalias !13 + %10 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %1, i32** null, i32 0, metadata !7), !tbaa !18, !noalias !13 + store i32 42, i32* %10, align 4, !tbaa !21, !noalias !13 + %.fca.0.load.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %_pA, i8* %1, i32** null, i32 0, metadata !7) + %.fca.0.insert = insertvalue %struct.FOO undef, i32* %.fca.0.load.noalias, 0 + %.fca.1.load.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %.fca.1.extract, i8* %2, i32** null, i32 4, metadata !7) + %.fca.1.insert = insertvalue %struct.FOO %.fca.0.insert, i32* %.fca.1.load.noalias, 1 + %.fca.2.load.noalias = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32* %.fca.2.extract, i8* %3, i32** null, i32 8, metadata !7) + %.fca.2.insert = insertvalue %struct.FOO %.fca.1.insert, i32* %.fca.2.load.noalias, 2 + call void @fum(%struct.FOO %.fca.2.insert), !noalias !13 + ret void +} + +; CHECK-LABEL: @test_rFOO( +; CHECK-NEXT: entry: +; CHECK-NEXT: %tmp = alloca %struct.FOO, align 4 +; CHECK-NEXT: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0s_struct.FOOs.i32(%struct.FOO** null, i32 0, metadata !5) +; CHECK-NEXT: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 0, metadata !7) +; CHECK-NEXT: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 4, metadata !7) +; CHECK-NEXT: %3 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32** null, i32 8, metadata !7) +; CHECK-NEXT: %4 = call %struct.FOO* @llvm.side.noalias.p0s_struct.FOOs.p0i8.p0p0s_struct.FOOs.p0p0s_struct.FOOs.i32(%struct.FOO* %_pFOO, i8* %0, %struct.FOO** null, %struct.FOO** undef, i32 0, metadata !5), !tbaa !9, !noalias !13 +; CHECK-NEXT: %.guard = call %struct.FOO* @llvm.noalias.arg.guard.p0s_struct.FOOs.p0s_struct.FOOs(%struct.FOO* %_pFOO, %struct.FOO* %4) +; CHECK-NEXT: %5 = call %struct.FOO* @llvm.noalias.copy.guard.p0s_struct.FOOs.p0i8(%struct.FOO* %.guard, i8* null, metadata !14, metadata !2) +; CHECK-NEXT: %6 = load %struct.FOO, %struct.FOO* %5, noalias_sidechannel %struct.FOO* undef, align 4, !noalias !13 +; CHECK-NEXT: %.fca.0.extract = extractvalue %struct.FOO %6, 0 +; CHECK-NEXT: %.fca.1.extract = extractvalue %struct.FOO %6, 1 +; CHECK-NEXT: %.fca.2.extract = extractvalue %struct.FOO %6, 2 +; CHECK-NEXT: %7 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %.fca.0.extract, i8* %1, i32** null, i32** undef, i32 0, metadata !7), !tbaa !18, !noalias !13 +; CHECK-NEXT: %8 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %.fca.1.extract, i8* %2, i32** null, i32** undef, i32 4, metadata !7), !tbaa !20, !noalias !13 +; CHECK-NEXT: %9 = load i32, i32* %.fca.1.extract, noalias_sidechannel i32* %8, align 4, !tbaa !21, !noalias !13 +; CHECK-NEXT: store i32 %9, i32* %.fca.0.extract, noalias_sidechannel i32* %7, align 4, !tbaa !21, !noalias !13 +; CHECK-NEXT: %10 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %_pA, i8* %1, i32** null, i32** undef, i32 0, metadata !7), !tbaa !18, !noalias !13 +; CHECK-NEXT: store i32 42, i32* %_pA, noalias_sidechannel i32* %10, align 4, !tbaa !21, !noalias !13 +; CHECK-NEXT: %.fca.0.load.noalias.guard = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %_pA, i32* %10) +; CHECK-NEXT: %.fca.0.insert = insertvalue %struct.FOO undef, i32* %.fca.0.load.noalias.guard, 0 +; CHECK-NEXT: %.fca.1.load.noalias.guard = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %.fca.1.extract, i32* %8) +; CHECK-NEXT: %.fca.1.insert = insertvalue %struct.FOO %.fca.0.insert, i32* %.fca.1.load.noalias.guard, 1 +; CHECK-NEXT: %11 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i32(i32* %.fca.2.extract, i8* %3, i32** null, i32** undef, i32 8, metadata !7) +; CHECK-NEXT: %.fca.2.load.noalias.guard = call i32* @llvm.noalias.arg.guard.p0i32.p0i32(i32* %.fca.2.extract, i32* %11) +; CHECK-NEXT: %.fca.2.insert = insertvalue %struct.FOO %.fca.1.insert, i32* %.fca.2.load.noalias.guard, 2 +; CHECK-NEXT: call void @fum(%struct.FOO %.fca.2.insert), !noalias !13 +; CHECK-NEXT: ret void +; CHECK-NEXT: } + + +; Function Attrs: argmemonly nounwind speculatable +declare %struct.FOO* @llvm.noalias.p0s_struct.FOOs.p0i8.p0p0s_struct.FOOs.i32(%struct.FOO*, i8*, %struct.FOO**, i32, metadata) #1 + +; Function Attrs: nounwind readnone +declare %struct.FOO* @llvm.noalias.copy.guard.p0s_struct.FOOs.p0i8(%struct.FOO*, i8*, metadata, metadata) #2 + +; Function Attrs: argmemonly nounwind speculatable +declare i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i32(i32*, i8*, i32**, i32, metadata) #1 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i32(i32**, i32, metadata) #3 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0s_struct.FOOs.i32(%struct.FOO**, i32, metadata) #3 + +declare dso_local void @fum(%struct.FOO) #4 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind speculatable } +attributes #2 = { nounwind readnone } +attributes #3 = { argmemonly nounwind } +attributes #4 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3} +!3 = distinct !{!3, !4, !"test_rFOO: unknown scope"} +!4 = distinct !{!4, !"test_rFOO"} +!5 = !{!6} +!6 = distinct !{!6, !4, !"test_rFOO: rpFOO"} +!7 = !{!8} +!8 = distinct !{!8, !4, !"test_rFOO: tmp"} +!9 = !{!10, !10, i64 0, i64 4} +!10 = !{!11, i64 4, !"any pointer"} +!11 = !{!12, i64 1, !"omnipotent char"} +!12 = !{!"Simple C/C++ TBAA"} +!13 = !{!6, !8, !3} +!14 = !{!15, !16, !17} +!15 = !{i64 -1, i64 0} +!16 = !{i64 -1, i64 1} +!17 = !{i64 -1, i64 2} +!18 = !{!19, !10, i64 0, i64 4} +!19 = !{!11, i64 12, !"FOO", !10, i64 0, i64 4, !10, i64 4, i64 4, !10, i64 8, i64 4} +!20 = !{!19, !10, i64 4, i64 4} +!21 = !{!22, !22, i64 0, i64 4} +!22 = !{!11, i64 4, !"int"} Index: llvm/test/Transforms/SLPVectorizer/noalias.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SLPVectorizer/noalias.ll @@ -0,0 +1,55 @@ +; RUN: opt -S < %s -slp-vectorizer -slp-max-reg-size=128 -slp-min-reg-size=128 | FileCheck %s + +; SLP vectorization across a @llvm.side.noalias and side channel + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare void @llvm.sideeffect() #0 + +define void @test(float* %p) { +; CHECK-LABEL: @test( +; CHECK-NEXT: [[P1_DECL:%.*]] = tail call i8* @llvm.noalias.decl.p0i8.p0p0f32.i32(float** null, i32 0, metadata !0) +; CHECK-NEXT: [[P0:%.*]] = getelementptr float, float* [[P:%.*]], i64 0 +; CHECK-NEXT: [[P1:%.*]] = getelementptr float, float* [[P]], i64 1 +; CHECK-NEXT: [[SIDE_P1:%.*]] = tail call float* @llvm.side.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float* [[P1]], i8* [[P1_DECL]], float** null, float** undef, i32 0, metadata !0), !noalias !0 +; CHECK-NEXT: [[P2:%.*]] = getelementptr float, float* [[P]], i64 2 +; CHECK-NEXT: [[P3:%.*]] = getelementptr float, float* [[P]], i64 3 +; CHECK-NEXT: call void @llvm.sideeffect() +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P0]] to <4 x float>* +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 +; CHECK-NEXT: call void @llvm.sideeffect() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[P0]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4 +; CHECK-NEXT: ret void +; + %p1.decl = tail call i8* @llvm.noalias.decl.p0i8.p0p0f32.i32(float** null, i32 0, metadata !0) + %p0 = getelementptr float, float* %p, i64 0 + %p1 = getelementptr float, float* %p, i64 1 + %side.p1 = tail call float* @llvm.side.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float* %p1, i8* %p1.decl, float** null, float** undef, i32 0, metadata !0), !noalias !0 + %p2 = getelementptr float, float* %p, i64 2 + %p3 = getelementptr float, float* %p, i64 3 + %l0 = load float, float* %p0, !noalias !0 + %l1 = load float, float* %p1, noalias_sidechannel float* %side.p1, !noalias !0 + %l2 = load float, float* %p2, !noalias !0 + call void @llvm.sideeffect() + %l3 = load float, float* %p3, !noalias !0 + store float %l0, float* %p0, !noalias !0 + call void @llvm.sideeffect() + store float %l1, float* %p1, noalias_sidechannel float* %side.p1, !noalias !0 + store float %l2, float* %p2, !noalias !0 + store float %l3, float* %p3, !noalias !0 + ret void +} + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0f32.i32(float**, i32, metadata) #1 + +; Function Attrs: nounwind readnone speculatable +declare float* @llvm.side.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float*, i8*, float**, float**, i32, metadata) #2 + +attributes #0 = { inaccessiblememonly nounwind willreturn } +attributes #1 = { argmemonly nounwind } +attributes #2 = { nounwind readnone speculatable } + +!0 = !{!1} +!1 = distinct !{!1, !2, !"test_f: p"} +!2 = distinct !{!2, !"test_f"} Index: llvm/test/Transforms/SROA/noalias.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SROA/noalias.ll @@ -0,0 +1,302 @@ +; RUN: opt < %s -sroa -S | FileCheck %s +; RUN: opt < %s -passes=sroa -S | FileCheck %s + +target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" + + +%struct.FOO = type { i32*, i32*, i32* } + +; Function Attrs: nounwind +define dso_local void @test_ri(i32* %_p) #0 { +entry: + %_p.addr = alloca i32*, align 4 + %rp = alloca i32*, align 4 + store i32* %_p, i32** %_p.addr, align 4, !tbaa !2, !noalias !6 + store i32* undef, i32** %rp, align 4, !noalias !6 + %0 = bitcast i32** %rp to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* %0) #4, !noalias !6 + %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** %rp, i64 0, metadata !6) + %2 = load i32*, i32** %_p.addr, align 4, !tbaa !2, !noalias !6 + store i32* %2, i32** %rp, align 4, !tbaa !2, !noalias !6 + %3 = load i32*, i32** %rp, align 4, !tbaa !2, !noalias !6 + %4 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %3, i8* %1, i32** %rp, i64 0, metadata !6), !tbaa !2, !noalias !6 + store i32 42, i32* %4, align 4, !tbaa !9, !noalias !6 + %5 = bitcast i32** %rp to i8* + call void @llvm.lifetime.end.p0i8(i64 4, i8* %5) #4 + ret void +} + +; CHECK-LABEL: @test_ri( +; CHECK-NOT: alloca +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !2) +; CHECK: %1 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %_p, i8* %0, i32** null, i64 0, metadata !2), !tbaa !5, !noalias !2 + +; Function Attrs: nounwind +define dso_local void @test_ra(i32* %_p) #0 { +entry: + %_p.addr = alloca i32*, align 4 + %rp = alloca [3 x i32*], align 4 + store i32* %_p, i32** %_p.addr, align 4, !tbaa !2, !noalias !11 + store [3 x i32*] undef, [3 x i32*]* %rp, align 4, !noalias !11 + %0 = bitcast [3 x i32*]* %rp to i8* + call void @llvm.lifetime.start.p0i8(i64 12, i8* %0) #4, !noalias !11 + %1 = call i8* @llvm.noalias.decl.p0i8.p0a3p0i32.i64([3 x i32*]* %rp, i64 0, metadata !11) + %arrayinit.begin = getelementptr inbounds [3 x i32*], [3 x i32*]* %rp, i32 0, i32 0 + %2 = load i32*, i32** %_p.addr, align 4, !tbaa !2, !noalias !11 + store i32* %2, i32** %arrayinit.begin, align 4, !tbaa !2, !noalias !11 + %arrayinit.element = getelementptr inbounds i32*, i32** %arrayinit.begin, i32 1 + %3 = load i32*, i32** %_p.addr, align 4, !tbaa !2, !noalias !11 + %add.ptr = getelementptr inbounds i32, i32* %3, i32 1 + store i32* %add.ptr, i32** %arrayinit.element, align 4, !tbaa !2, !noalias !11 + %arrayinit.element1 = getelementptr inbounds i32*, i32** %arrayinit.element, i32 1 + %4 = load i32*, i32** %_p.addr, align 4, !tbaa !2, !noalias !11 + %add.ptr2 = getelementptr inbounds i32, i32* %4, i32 2 + store i32* %add.ptr2, i32** %arrayinit.element1, align 4, !tbaa !2, !noalias !11 + %arrayidx = getelementptr inbounds [3 x i32*], [3 x i32*]* %rp, i32 0, i32 0 + %5 = load i32*, i32** %arrayidx, align 4, !tbaa !2, !noalias !11 + %6 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %5, i8* %1, i32** %arrayidx, i64 0, metadata !11), !tbaa !2, !noalias !11 + store i32 42, i32* %6, align 4, !tbaa !9, !noalias !11 + %arrayidx3 = getelementptr inbounds [3 x i32*], [3 x i32*]* %rp, i32 0, i32 1 + %7 = load i32*, i32** %arrayidx3, align 4, !tbaa !2, !noalias !11 + %8 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %7, i8* %1, i32** %arrayidx3, i64 0, metadata !11), !tbaa !2, !noalias !11 + store i32 43, i32* %8, align 4, !tbaa !9, !noalias !11 + %arrayidx4 = getelementptr inbounds [3 x i32*], [3 x i32*]* %rp, i32 0, i32 2 + %9 = load i32*, i32** %arrayidx4, align 4, !tbaa !2, !noalias !11 + %10 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %9, i8* %1, i32** %arrayidx4, i64 0, metadata !11), !tbaa !2, !noalias !11 + store i32 44, i32* %10, align 4, !tbaa !9, !noalias !11 + %11 = bitcast [3 x i32*]* %rp to i8* + call void @llvm.lifetime.end.p0i8(i64 12, i8* %11) #4 + ret void +} + +; CHECK-LABEL: @test_ra( +; CHECK-NOT: alloca +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !11) +; CHECK: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 8, metadata !11) +; CHECK: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 16, metadata !11) +; CHECK: %3 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %_p, i8* %0, i32** null, i64 0, metadata !11), !tbaa !5, !noalias !11 +; CHECK: %4 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %add.ptr, i8* %1, i32** null, i64 8, metadata !11), !tbaa !5, !noalias !11 +; CHECK: %5 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %add.ptr2, i8* %2, i32** null, i64 16, metadata !11), !tbaa !5, !noalias !11 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0a3p0i32.i64([3 x i32*]*, i64, metadata) #1 + +; Function Attrs: nounwind +define dso_local void @test_rs(i32* %_p) #0 { +entry: + %_p.addr = alloca i32*, align 4 + %foo = alloca %struct.FOO, align 4 + store i32* %_p, i32** %_p.addr, align 4, !tbaa !2, !noalias !14 + store %struct.FOO undef, %struct.FOO* %foo, align 4, !noalias !14 + %0 = bitcast %struct.FOO* %foo to i8* + call void @llvm.lifetime.start.p0i8(i64 12, i8* %0) #4, !noalias !14 + %1 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FOOs.i64(%struct.FOO* %foo, i64 0, metadata !14) + %mP0 = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 0 + %2 = load i32*, i32** %_p.addr, align 4, !tbaa !2, !noalias !14 + store i32* %2, i32** %mP0, align 4, !tbaa !17, !noalias !14 + %mP1 = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 1 + %3 = load i32*, i32** %_p.addr, align 4, !tbaa !2, !noalias !14 + %add.ptr = getelementptr inbounds i32, i32* %3, i32 1 + store i32* %add.ptr, i32** %mP1, align 4, !tbaa !19, !noalias !14 + %mP2 = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 2 + %4 = load i32*, i32** %_p.addr, align 4, !tbaa !2, !noalias !14 + %add.ptr1 = getelementptr inbounds i32, i32* %4, i32 2 + store i32* %add.ptr1, i32** %mP2, align 4, !tbaa !20, !noalias !14 + %mP02 = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 0 + %5 = load i32*, i32** %mP02, align 4, !tbaa !17, !noalias !14 + %6 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %5, i8* %1, i32** %mP02, i64 0, metadata !14), !tbaa !17, !noalias !14 + store i32 42, i32* %6, align 4, !tbaa !9, !noalias !14 + %mP13 = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 1 + %7 = load i32*, i32** %mP13, align 4, !tbaa !19, !noalias !14 + %8 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %7, i8* %1, i32** %mP13, i64 0, metadata !14), !tbaa !19, !noalias !14 + store i32 43, i32* %8, align 4, !tbaa !9, !noalias !14 + %mP24 = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 2 + %9 = load i32*, i32** %mP24, align 4, !tbaa !20, !noalias !14 + %10 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %9, i8* %1, i32** %mP24, i64 0, metadata !14), !tbaa !20, !noalias !14 + store i32 44, i32* %10, align 4, !tbaa !9, !noalias !14 + %11 = bitcast %struct.FOO* %foo to i8* + call void @llvm.lifetime.end.p0i8(i64 12, i8* %11) #4 + ret void +} + +; CHECK-LABEL: @test_rs( +; CHECK-NOT: alloca +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !14) +; CHECK: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 8, metadata !14) +; CHECK: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 16, metadata !14) +; CHECK: %3 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %_p, i8* %0, i32** null, i64 0, metadata !14), !tbaa !17, !noalias !14 +; CHECK: %4 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %add.ptr, i8* %1, i32** null, i64 8, metadata !14), !tbaa !19, !noalias !14 +; CHECK: %5 = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* %add.ptr1, i8* %2, i32** null, i64 16, metadata !14), !tbaa !20, !noalias !14 + +; Function Attrs: argmemonly nounwind speculatable +define dso_local void @test_ri_inlined(i32* %_p) local_unnamed_addr #2 !noalias !21 { +entry: + %rp = alloca i32*, align 4 + %0 = bitcast i32** %rp to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #4, !noalias !24 + %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** nonnull %rp, i64 0, metadata !27), !noalias !21 + store i32* %_p, i32** %rp, noalias_sidechannel i32** undef, align 4, !noalias !24 + %2 = load i32*, i32** %rp, noalias_sidechannel i32** undef, align 4, !noalias !28 + %3 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %2, i8* %1, i32** %rp, i32** undef, i64 0, metadata !27) #4, !noalias !28 + store i32 42, i32* %2, noalias_sidechannel i32* %3, align 4, !noalias !28 + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #4, !noalias !21 + ret void +} + +; CHECK-LABEL: @test_ri_inlined( +; CHECK-NOT: alloca +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !24) +; CHECK: %1 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %_p, i8* %0, i32** null, i32** undef, i64 0, metadata !24){{.*}}, !noalias !27 + +; Function Attrs: argmemonly nounwind speculatable +define dso_local void @test_ra_inlined(i32* %_p) local_unnamed_addr #2 !noalias !29 { +entry: + %rp = alloca [3 x i32*], align 4 + %.fca.0.gep = getelementptr inbounds [3 x i32*], [3 x i32*]* %rp, i32 0, i32 0 + %0 = bitcast [3 x i32*]* %rp to i8* + call void @llvm.lifetime.start.p0i8(i64 12, i8* nonnull %0) #4, !noalias !32 + %1 = call i8* @llvm.noalias.decl.p0i8.p0a3p0i32.i64([3 x i32*]* nonnull %rp, i64 0, metadata !35), !noalias !29 + store i32* %_p, i32** %.fca.0.gep, noalias_sidechannel i32** undef, align 4, !noalias !32 + %arrayinit.element = getelementptr inbounds [3 x i32*], [3 x i32*]* %rp, i32 0, i32 1 + %add.ptr = getelementptr inbounds i32, i32* %_p, i32 1 + store i32* %add.ptr, i32** %arrayinit.element, noalias_sidechannel i32** undef, align 4, !noalias !32 + %arrayinit.element1 = getelementptr inbounds [3 x i32*], [3 x i32*]* %rp, i32 0, i32 2 + %add.ptr2 = getelementptr inbounds i32, i32* %_p, i32 2 + store i32* %add.ptr2, i32** %arrayinit.element1, noalias_sidechannel i32** undef, align 4, !noalias !32 + %2 = load i32*, i32** %.fca.0.gep, noalias_sidechannel i32** undef, align 4, !noalias !36 + %3 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %2, i8* %1, i32** %.fca.0.gep, i32** undef, i64 0, metadata !35) #4, !noalias !36 + store i32 42, i32* %2, noalias_sidechannel i32* %3, align 4, !noalias !36 + %arrayidx1.i = getelementptr inbounds i32*, i32** %.fca.0.gep, i32 1 + %4 = load i32*, i32** %arrayidx1.i, noalias_sidechannel i32** undef, align 4, !noalias !36 + %5 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %4, i8* %1, i32** nonnull %arrayidx1.i, i32** undef, i64 0, metadata !35) #4, !noalias !36 + store i32 43, i32* %4, noalias_sidechannel i32* %5, align 4, !noalias !36 + %arrayidx2.i = getelementptr inbounds i32*, i32** %.fca.0.gep, i32 2 + %6 = load i32*, i32** %arrayidx2.i, noalias_sidechannel i32** undef, align 4, !noalias !36 + %7 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %6, i8* %1, i32** nonnull %arrayidx2.i, i32** undef, i64 0, metadata !35) #4, !noalias !36 + store i32 44, i32* %6, noalias_sidechannel i32* %7, align 4, !noalias !36 + call void @llvm.lifetime.end.p0i8(i64 12, i8* nonnull %0) #4, !noalias !29 + ret void +} + +; CHECK-LABEL: @test_ra_inlined( +; CHECK-NOT: alloca +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !31) +; CHECK: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 8, metadata !31) +; CHECK: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 16, metadata !31) +; CHECK: %3 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %_p, i8* %0, i32** null, i32** undef, i64 0, metadata !31){{.*}}, !noalias !34 +; CHECK: %4 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %add.ptr, i8* %1, i32** nonnull null, i32** undef, i64 8, metadata !31){{.*}}, !noalias !34 +; CHECK: %5 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %add.ptr2, i8* %2, i32** nonnull null, i32** undef, i64 16, metadata !31){{.*}}, !noalias !34 + + +; Function Attrs: argmemonly nounwind speculatable +define dso_local void @test_rs_inlined(i32* %_p) local_unnamed_addr #2 !noalias !37 { +entry: + %foo = alloca %struct.FOO, align 4 + %.fca.0.gep = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 0 + %.fca.1.gep = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 1 + %.fca.2.gep = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 2 + %0 = bitcast %struct.FOO* %foo to i8* + call void @llvm.lifetime.start.p0i8(i64 12, i8* nonnull %0) #4, !noalias !40 + %1 = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FOOs.i64(%struct.FOO* nonnull %foo, i64 0, metadata !43), !noalias !37 + store i32* %_p, i32** %.fca.0.gep, noalias_sidechannel i32** undef, align 4, !noalias !40 + %add.ptr = getelementptr inbounds i32, i32* %_p, i32 1 + store i32* %add.ptr, i32** %.fca.1.gep, noalias_sidechannel i32** undef, align 4, !noalias !40 + %add.ptr1 = getelementptr inbounds i32, i32* %_p, i32 2 + store i32* %add.ptr1, i32** %.fca.2.gep, noalias_sidechannel i32** undef, align 4, !noalias !40 + %mP0.i = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 0 + %2 = load i32*, i32** %mP0.i, noalias_sidechannel i32** undef, align 4, !noalias !44 + %3 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %2, i8* %1, i32** %mP0.i, i32** undef, i64 0, metadata !43) #4, !noalias !44 + store i32 42, i32* %2, noalias_sidechannel i32* %3, align 4, !noalias !44 + %mP1.i = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 1 + %4 = load i32*, i32** %mP1.i, noalias_sidechannel i32** undef, align 4, !noalias !44 + %5 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %4, i8* %1, i32** nonnull %mP1.i, i32** undef, i64 0, metadata !43) #4, !noalias !44 + store i32 43, i32* %4, noalias_sidechannel i32* %5, align 4, !noalias !44 + %mP2.i = getelementptr inbounds %struct.FOO, %struct.FOO* %foo, i32 0, i32 2 + %6 = load i32*, i32** %mP2.i, noalias_sidechannel i32** undef, align 4, !noalias !44 + %7 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %6, i8* %1, i32** nonnull %mP2.i, i32** undef, i64 0, metadata !43) #4, !noalias !44 + store i32 44, i32* %6, noalias_sidechannel i32* %7, align 4, !noalias !44 + call void @llvm.lifetime.end.p0i8(i64 12, i8* nonnull %0) #4, !noalias !37 + ret void +} + +; CHECK-LABEL: @test_rs_inlined( +; CHECK-NOT: alloca +; CHECK: %0 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 0, metadata !38) +; CHECK: %1 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 8, metadata !38) +; CHECK: %2 = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** null, i64 16, metadata !38) +; CHECK: %3 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %_p, i8* %0, i32** null, i32** undef, i64 0, metadata !38){{.*}}, !noalias !41 +; CHECK: %4 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %add.ptr, i8* %1, i32** nonnull null, i32** undef, i64 8, metadata !38){{.*}}, !noalias !41 +; CHECK: %5 = call i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32* %add.ptr1, i8* %2, i32** nonnull null, i32** undef, i64 16, metadata !38){{.*}}, !noalias !41 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0s_struct.FOOs.i64(%struct.FOO*, i64, metadata) #1 + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1 + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32**, i64, metadata) #1 + +; Function Attrs: argmemonly nounwind speculatable +declare i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32*, i8*, i32**, i64, metadata) #2 + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1 + +; Function Attrs: nounwind readnone speculatable +declare i32* @llvm.side.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64(i32*, i8*, i32**, i32**, i64, metadata) #3 + +attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { argmemonly nounwind } +attributes #2 = { argmemonly nounwind speculatable } +attributes #3 = { nounwind readnone speculatable } +attributes #4 = { nounwind } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang"} +!2 = !{!3, !3, i64 0, i64 4} +!3 = !{!4, i64 4, !"any pointer"} +!4 = !{!5, i64 1, !"omnipotent char"} +!5 = !{!"Simple C/C++ TBAA"} +!6 = !{!7} +!7 = distinct !{!7, !8, !"test_ri: rp"} +!8 = distinct !{!8, !"test_ri"} +!9 = !{!10, !10, i64 0, i64 4} +!10 = !{!4, i64 4, !"int"} +!11 = !{!12} +!12 = distinct !{!12, !13, !"test_ra: rp"} +!13 = distinct !{!13, !"test_ra"} +!14 = !{!15} +!15 = distinct !{!15, !16, !"test_rs: foo"} +!16 = distinct !{!16, !"test_rs"} +!17 = !{!18, !3, i64 0, i64 4} +!18 = !{!4, i64 12, !"FOO", !3, i64 0, i64 4, !3, i64 4, i64 4, !3, i64 8, i64 4} +!19 = !{!18, !3, i64 4, i64 4} +!20 = !{!18, !3, i64 8, i64 4} +!21 = !{!22} +!22 = distinct !{!22, !23, !"test_ri_inlined: unknown scope"} +!23 = distinct !{!23, !"test_ri_inlined"} +!24 = !{!25, !22} +!25 = distinct !{!25, !26, !"test_ri_inlined: rp"} +!26 = distinct !{!26, !"test_ri_inlined"} +!27 = !{!25} +!28 = !{!22, !25, !22} +!29 = !{!30} +!30 = distinct !{!30, !31, !"test_ra_inlined: unknown scope"} +!31 = distinct !{!31, !"test_ra_inlined"} +!32 = !{!33, !30} +!33 = distinct !{!33, !34, !"test_ra_inlined: rp"} +!34 = distinct !{!34, !"test_ra_inlined"} +!35 = !{!33} +!36 = !{!30, !33, !30} +!37 = !{!38} +!38 = distinct !{!38, !39, !"test_rs_inlined: unknown scope"} +!39 = distinct !{!39, !"test_rs_inlined"} +!40 = !{!41, !38} +!41 = distinct !{!41, !42, !"test_rs_inlined: foo"} +!42 = distinct !{!42, !"test_rs_inlined"} +!43 = !{!41} +!44 = !{!38, !41, !38} Index: llvm/unittests/IR/IRBuilderTest.cpp =================================================================== --- llvm/unittests/IR/IRBuilderTest.cpp +++ llvm/unittests/IR/IRBuilderTest.cpp @@ -12,6 +12,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Module.h" @@ -293,6 +294,202 @@ EXPECT_FALSE(verifyModule(*M)); } +TEST_F(IRBuilderTest, NoAlias) { + IRBuilder<> Builder(BB); + AllocaInst *Var1 = Builder.CreateAlloca(Builder.getInt8Ty()); + + AllocaInst *VarP = Builder.CreateAlloca(Builder.getInt8PtrTy()); + Builder.CreateStore(Var1, VarP); + + MDBuilder MDB(BB->getContext()); + MDNode *NewDomain = + MDB.createAnonymousAliasScopeDomain("Test Domain"); + MDNode *AScope = + MDB.createAnonymousAliasScope(NewDomain, "Test Scope"); + + CallInst *NAD = + cast(Builder.CreateNoAliasDeclaration(VarP, AScope)); + Value *Pointer = Builder.CreateLoad(VarP); + CallInst *NAP = + cast(Builder.CreateNoAliasPointer(Pointer, NAD, VarP, AScope)); + + // llvm.noalias.decl + EXPECT_EQ(NAD->getArgOperand(Intrinsic::NoAliasDeclAllocaArg), VarP); + EXPECT_EQ( + cast(NAD->getArgOperand(Intrinsic::NoAliasDeclObjIdArg)) + ->getZExtValue(), + 0ull); + EXPECT_EQ( + cast(NAD->getArgOperand(Intrinsic::NoAliasDeclScopeArg)) + ->getMetadata(), + AScope); + + IntrinsicInst *II_NAD = dyn_cast(NAD); + ASSERT_TRUE(II_NAD != nullptr); + EXPECT_EQ(II_NAD->getIntrinsicID(), Intrinsic::noalias_decl); + + EXPECT_TRUE(NAD->onlyAccessesArgMemory()); + EXPECT_TRUE(NAD->doesNotThrow()); + + // llvm.noalias + EXPECT_EQ(NAP->getArgOperand(0), Pointer); + EXPECT_EQ(NAP->getArgOperand(Intrinsic::NoAliasNoAliasDeclArg), NAD); + EXPECT_EQ(NAP->getArgOperand(Intrinsic::NoAliasIdentifyPArg), VarP); + EXPECT_EQ( + cast(NAP->getArgOperand(Intrinsic::NoAliasScopeArg)) + ->getMetadata(), + AScope); + EXPECT_EQ( + cast(NAP->getArgOperand(Intrinsic::NoAliasIdentifyPObjIdArg)) + ->getZExtValue(), + 0ull); // default object id is 0 + + IntrinsicInst *II_NAP = dyn_cast(NAP); + ASSERT_TRUE(II_NAP != nullptr); + EXPECT_EQ(II_NAP->getIntrinsicID(), Intrinsic::noalias); + + EXPECT_TRUE(NAP->onlyAccessesArgMemory()); + EXPECT_TRUE(NAP->doesNotThrow()); +} + +TEST_F(IRBuilderTest, SideNoAliasAndArgGuard) { + IRBuilder<> Builder(BB); + AllocaInst *Var1 = Builder.CreateAlloca(Builder.getInt8Ty()); + + AllocaInst *VarP = Builder.CreateAlloca(Builder.getInt8PtrTy()); + Builder.CreateStore(Var1, VarP); + + MDBuilder MDB(BB->getContext()); + MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("Test Domain"); + MDNode *AScope = MDB.createAnonymousAliasScope(NewDomain, "Test Scope"); + + CallInst *NAD = + cast(Builder.CreateNoAliasDeclaration(VarP, AScope)); + Value *Pointer = Builder.CreateLoad(VarP); + CallInst *SNAP = cast(Builder.CreateSideNoAliasPlain( + Pointer, NAD, VarP, llvm::ConstantPointerNull::get(VarP->getType()), + NAD->getArgOperand(Intrinsic::NoAliasDeclObjIdArg), AScope)); + + CallInst *NAG = cast(Builder.CreateNoAliasArgGuard(Pointer, SNAP)); + + // create load on Pointer, with SNAP side channel + + // llvm.side.noalias + EXPECT_EQ(SNAP->getArgOperand(0), Pointer); + EXPECT_EQ(SNAP->getArgOperand(Intrinsic::SideNoAliasNoAliasDeclArg), NAD); + EXPECT_EQ(SNAP->getArgOperand(Intrinsic::SideNoAliasIdentifyPArg), VarP); + EXPECT_EQ( + cast(SNAP->getArgOperand(Intrinsic::SideNoAliasScopeArg)) + ->getMetadata(), + AScope); + ASSERT_TRUE(cast( + SNAP->getArgOperand(Intrinsic::SideNoAliasIdentifyPSideChannelArg))); + EXPECT_EQ(cast( + SNAP->getArgOperand(Intrinsic::SideNoAliasIdentifyPObjIdArg)) + ->getZExtValue(), + 0ull); // default object id is 0 + + IntrinsicInst *II_SNAP = dyn_cast(SNAP); + ASSERT_TRUE(II_SNAP != nullptr); + EXPECT_EQ(II_SNAP->getIntrinsicID(), Intrinsic::side_noalias); + + EXPECT_TRUE(SNAP->doesNotAccessMemory()); + EXPECT_TRUE(SNAP->doesNotThrow()); + + // llvm.noalias.arg.guard + EXPECT_EQ(NAG->getArgOperand(0), Pointer); + EXPECT_EQ(NAG->getArgOperand(1), SNAP); + + EXPECT_TRUE(NAG->doesNotAccessMemory()); + EXPECT_TRUE(NAG->doesNotThrow()); +} + +TEST_F(IRBuilderTest, NoAliasCopyGuard) { + // // roughly equivalent code for: + // struct FOO { char* __reststrict p; } + // char copy_restrict(struct FOO* p) { + // struct FOO local=*p; // Introduces llvm.noalias.copy.guard + // return *local.p; // NOTE: this part is omitted + // } + + IRBuilder<> Builder(BB); + StructType *ST = + StructType::create(BB->getContext(), {Builder.getInt8PtrTy()}); + + // pointer to incoming struct + AllocaInst *VarIn = Builder.CreateAlloca(ST->getPointerTo()); + + // local struct + AllocaInst *Var1 = Builder.CreateAlloca(ST); + // pointer to local struct + AllocaInst *VarP = Builder.CreateAlloca(ST->getPointerTo()); + Builder.CreateStore(Var1, VarP); + + MDBuilder MDB(BB->getContext()); + MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("Test Domain"); + MDNode *AScope = MDB.createAnonymousAliasScope(NewDomain, "Test Scope"); + + CallInst *NAD = + cast(Builder.CreateNoAliasDeclaration(VarP, AScope)); + (void)NAD; // not used as we don't do the 'return *local.p' here + + auto *SrcPointer = Builder.CreateLoad(VarIn); + // one entry of length 2, containing {-1, 0} : + const int64_t Indices[] = {2, -1, 0}; + auto Int8PtrNull = ConstantPointerNull::get(Builder.getInt8PtrTy()); + CallInst *GP = cast(Builder.CreateNoAliasCopyGuard( + SrcPointer, + Int8PtrNull, // out-of-function __restrict declaration + Indices, // For struct FOO* f, all f[X].p (X ~ -1, .p ~ 0) are restrict + AScope)); + auto *Copy = Builder.CreateMemCpy( + Var1, 4, GP, 4, + BB->getParent()->getParent()->getDataLayout().getTypeStoreSize(ST)); + (void)Copy; + + // llvm.side.noalias + EXPECT_EQ(GP->getArgOperand(Intrinsic::NoAliasCopyGuardIdentifyPBaseObject), + SrcPointer); + EXPECT_EQ(GP->getArgOperand(Intrinsic::NoAliasCopyGuardNoAliasDeclArg), + Int8PtrNull); + + // hmm. how to easily check that we are pointing to a { { -1, 0 } } array ? + EXPECT_TRUE(cast(GP->getArgOperand( + Intrinsic::NoAliasCopyGuardIndicesArg)) != nullptr); + EXPECT_EQ(cast( + GP->getArgOperand(Intrinsic::NoAliasCopyGuardScopeArg)) + ->getMetadata(), + AScope); + + EXPECT_TRUE(GP->doesNotAccessMemory()); + EXPECT_TRUE(GP->doesNotThrow()); +} + +TEST_F(IRBuilderTest, SideChannel) { + IRBuilder<> Builder(BB); + + auto *L = Builder.CreateLoad(GV->getValueType(), GV); + EXPECT_TRUE(!L->hasNoaliasSideChannelOperand()); + + auto *S = Builder.CreateStore(L, GV); + EXPECT_TRUE(!S->hasNoaliasSideChannelOperand()); + + L->setNoaliasSideChannelOperand(GV); + EXPECT_TRUE(L->hasNoaliasSideChannelOperand()); + + S->setNoaliasSideChannelOperand(GV); + EXPECT_TRUE(S->hasNoaliasSideChannelOperand()); + + EXPECT_EQ(L->getNoaliasSideChannelOperand(), GV); + EXPECT_EQ(S->getNoaliasSideChannelOperand(), GV); + + L->removeNoaliasSideChannelOperand(); + EXPECT_TRUE(!L->hasNoaliasSideChannelOperand()); + + S->removeNoaliasSideChannelOperand(); + EXPECT_TRUE(!S->hasNoaliasSideChannelOperand()); +} + TEST_F(IRBuilderTest, Lifetime) { IRBuilder<> Builder(BB); AllocaInst *Var1 = Builder.CreateAlloca(Builder.getInt8Ty());