Index: clang/include/clang/AST/Type.h =================================================================== --- clang/include/clang/AST/Type.h +++ clang/include/clang/AST/Type.h @@ -822,6 +822,19 @@ /// Returns true if it is not a class or if the class might not be dynamic. bool mayBeNotDynamicClass() const; + /// Returns true if this is a restrict pointer or contains a restrict pointer. + /// NOTE: A pointer to a restrict pointer does not count. + bool isRestrictOrContainsRestrictMembers() const; + + /// Get the encoded indices, describing where in memory restrict pointers are + /// located. + std::vector getRestrictIndices() const; + +private: + void getRestrictIndices(std::vector &OutEncodedIndices, + SmallVector &InCurrentIndices) const; + +public: // Don't promise in the API that anything besides 'const' can be // easily added. Index: clang/include/clang/Basic/CodeGenOptions.def =================================================================== --- clang/include/clang/Basic/CodeGenOptions.def +++ clang/include/clang/Basic/CodeGenOptions.def @@ -194,6 +194,8 @@ CODEGENOPT(RelaxedAliasing , 1, 0) ///< Set when -fno-strict-aliasing is enabled. CODEGENOPT(StructPathTBAA , 1, 0) ///< Whether or not to use struct-path TBAA. CODEGENOPT(NewStructPathTBAA , 1, 0) ///< Whether or not to use enhanced struct-path TBAA. +CODEGENOPT(FullRestrict , 1, 0) ///< Set when -ffull-restrict is enabled. +CODEGENOPT(NoNoAliasArgAttr , 1, 0) ///< Set when -fno-noalias-arguments is specified. CODEGENOPT(SaveTempLabels , 1, 0) ///< Save temporary labels. CODEGENOPT(SanitizeAddressUseAfterScope , 1, 0) ///< Enable use-after-scope detection ///< in AddressSanitizer Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -992,6 +992,15 @@ defm fixed_point : OptInFFlag<"fixed-point", "Enable", "Disable", " fixed point types">; defm cxx_static_destructors : OptOutFFlag<"c++-static-destructors", "", "Disable C++ static destructor registration">; +def ffull_restrict : Flag<["-"], "ffull-restrict">, Group, + Flags<[CoreOption, CC1Option]>, + HelpText<"Enable full restrict support">; +def fno_full_restrict : Flag<["-"], "fno-full-restrict">, Group, + Flags<[CoreOption, CC1Option]>, + HelpText<"Disable full restrict support, falling back to the legacy support">; +def fno_noalias_arguments : Flag<["-"], "fno-noalias-arguments">, + Group, Flags<[CoreOption, CC1Option]>, + HelpText<"Do not map restrict arguments onto noalias attribute">; def fsymbol_partition_EQ : Joined<["-"], "fsymbol-partition=">, Group, Flags<[CC1Option]>; Index: clang/lib/AST/Type.cpp =================================================================== --- clang/lib/AST/Type.cpp +++ clang/lib/AST/Type.cpp @@ -100,6 +100,65 @@ return !ClassDecl || ClassDecl->mayBeNonDynamicClass(); } +bool QualType::isRestrictOrContainsRestrictMembers() const { + if (isRestrictQualified()) { + return true; + } + + const Type *BaseElementType = getCanonicalType()->getBaseElementTypeUnsafe(); + assert(!isa(BaseElementType)); + + if (const RecordType *RecTy = dyn_cast(BaseElementType)) { + RecordDecl *RD = RecTy->getDecl(); + for (FieldDecl *FD : RD->fields()) { + if (FD->getType().isRestrictOrContainsRestrictMembers()) { + return true; + } + } + } + + return false; +} + +std::vector QualType::getRestrictIndices() const { + // -1 represents an unbounded array + std::vector EncodedIndices; + SmallVector BaseIndices = {-1}; + + getRestrictIndices(EncodedIndices, BaseIndices); + + return EncodedIndices; +} + +void QualType::getRestrictIndices( + std::vector &OutEncodedIndices, + SmallVector &InCurrentIndices) const { + if (isRestrictQualified()) { + OutEncodedIndices.push_back(InCurrentIndices.size()); + OutEncodedIndices.insert(OutEncodedIndices.end(), InCurrentIndices.begin(), + InCurrentIndices.end()); + + return; + } + + QualType CannonTy = getCanonicalType(); + if (const auto *RecTy = CannonTy->getAs()) { + RecordDecl *RD = RecTy->getDecl(); + InCurrentIndices.push_back(0); + for (FieldDecl *FD : RD->fields()) { + FD->getType().getRestrictIndices(OutEncodedIndices, InCurrentIndices); + InCurrentIndices.back()++; + } + InCurrentIndices.pop_back(); + } else if (const auto *ArrayTy = dyn_cast(CannonTy)) { + // Note: we use -1 for bounded and unbounded arrays + InCurrentIndices.push_back(-1); // -1 indicates that any index is fine + ArrayTy->getElementType().getRestrictIndices(OutEncodedIndices, + InCurrentIndices); + InCurrentIndices.pop_back(); + } +} + bool QualType::isConstant(QualType T, const ASTContext &Ctx) { if (T.isConstQualified()) return true; Index: clang/lib/CodeGen/Address.h =================================================================== --- clang/lib/CodeGen/Address.h +++ clang/lib/CodeGen/Address.h @@ -39,6 +39,13 @@ return Pointer; } + /// Replace the current pointer of the addres with a new pointer. + void adaptPointer(llvm::Value *newPointer) { + assert(Pointer->getType() == newPointer->getType() && + "Address: changing the pointer must not change the type"); + Pointer = newPointer; + } + /// Return the type of the pointer value. llvm::PointerType *getType() const { return llvm::cast(getPointer()->getType()); Index: clang/lib/CodeGen/CGCall.cpp =================================================================== --- clang/lib/CodeGen/CGCall.cpp +++ clang/lib/CodeGen/CGCall.cpp @@ -2584,7 +2584,10 @@ } // Set 'noalias' if an argument type has the `restrict` qualifier. - if (Arg->getType().isRestrictQualified()) + // For accurate full restrict support, we should not annotate arguments + // with noalias. The noalias atttribute is too strong. + if (Arg->getType().isRestrictQualified() && + (!CGM.getCodeGenOpts().NoNoAliasArgAttr)) AI->addAttr(llvm::Attribute::NoAlias); } @@ -4553,12 +4556,28 @@ ArgInfo.getDirectOffset() == 0) { assert(NumIRArgs == 1); llvm::Value *V; - if (!I->isAggregate()) + if (!I->isAggregate()) { V = I->getKnownRValue().getScalarVal(); - else - V = Builder.CreateLoad( - I->hasLValue() ? I->getKnownLValue().getAddress(*this) - : I->getKnownRValue().getAggregateAddress()); + } else { + Address Addr = I->hasLValue() + ? I->getKnownLValue().getAddress(*this) + : I->getKnownRValue().getAggregateAddress(); + if (I->getType().isRestrictOrContainsRestrictMembers() && + CGM.getCodeGenOpts().FullRestrict) { + // protect a load of an aggregate with restrict member pointers with + // an llvm.noalias.copy.guard. + // NOTE: also see CodeGenFunction::EmitAggregateCopy(); + auto NoAliasScopeMD = + getExistingOrUnknownNoAliasScope(Addr.getPointer()); + auto NoAliasDecl = getExistingNoAliasDeclOrNullptr(NoAliasScopeMD); + Addr.adaptPointer(Builder.CreateNoAliasCopyGuard( + Addr.getPointer(), NoAliasDecl, + I->getType().getRestrictIndices(), NoAliasScopeMD)); + } + llvm::LoadInst *LD = + Builder.CreateLoad(Addr, I->getType().isVolatileQualified()); + V = LD; + } // Implement swifterror by copying into a new swifterror argument. // We'll write back in the normal path out of the call. @@ -4640,6 +4659,22 @@ } else { // In the simple case, just pass the coerced loaded value. assert(NumIRArgs == 1); + if (Src.getElementType() == ArgInfo.getCoerceToType()) { + QualType Ty = I->getType(); + if (Ty.isRestrictOrContainsRestrictMembers() && + CGM.getCodeGenOpts().FullRestrict) { + // Protect a load of an aggregate with restrict member pointers with + // an llvm.noalias.copy.guard + // NOTE: also see CodeGenFunction::EmitAggregateCopy(); + auto NoAliasScopeMD = + getExistingOrUnknownNoAliasScope(Src.getPointer()); + auto NoAliasDecl = getExistingNoAliasDeclOrNullptr(NoAliasScopeMD); + Src.adaptPointer(Builder.CreateNoAliasCopyGuard( + Src.getPointer(), NoAliasDecl, Ty.getRestrictIndices(), + NoAliasScopeMD)); + } + } + llvm::Value *Load = CreateCoercedLoad(Src, ArgInfo.getCoerceToType(), *this); Index: clang/lib/CodeGen/CGDecl.cpp =================================================================== --- clang/lib/CodeGen/CGDecl.cpp +++ clang/lib/CodeGen/CGDecl.cpp @@ -36,6 +36,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/Type.h" using namespace clang; @@ -1289,6 +1290,7 @@ /// These turn into simple stack objects, or GlobalValues depending on target. void CodeGenFunction::EmitAutoVarDecl(const VarDecl &D) { AutoVarEmission emission = EmitAutoVarAlloca(D); + EmitAutoVarNoAlias(emission); EmitAutoVarInit(emission); EmitAutoVarCleanups(emission); } @@ -1880,6 +1882,126 @@ type.isVolatileQualified(), Builder, constant); } +// For all local restrict-qualified local variables, we create a noalias +// metadata scope. This scope is used to identify each restrict-qualified +// variable and the other memory accesses within the scope where its aliasing +// assumptions apply. The scope metadata is stored in the NoAliasAddrMap map +// where the pointer to the local variable is the key in the map. +// One variable can contain multiple restrict pointers. All of them are +// represented by a single scope. +void CodeGenFunction::EmitNoAliasDecl(const VarDecl &D, Address Loc) { + // Don't emit noalias intrinsics unless we're optimizing. + if (CGM.getCodeGenOpts().OptimizationLevel == 0) + return; + + if (!CGM.getCodeGenOpts().FullRestrict) + return; + + QualType type = D.getType(); + + // Emit a noalias intrinsic for restrict-qualified variables. + if (!type.isRestrictOrContainsRestrictMembers()) + return; + + // Only emit a llvm.noalias.decl if the address is an alloca. + if (!isa(Loc.getPointer()->stripPointerCasts())) + return; + + // NOTE: keep in sync with (clang) CGDecl: getExistingOrUnknownNoAliasScope + // NOTE: keep in sync with (clang) CGDecl: EmitAutoVarNoAlias/EmitNoAliasDecl + // NOTE: keep in sync with (llvm) InlineFunction: CloneAliasScopeMetadata + llvm::MDBuilder MDB(CurFn->getContext()); + if (!NoAliasDomain) + NoAliasDomain = MDB.createAnonymousAliasScopeDomain(CurFn->getName()); + + std::string Name = (llvm::Twine(CurFn->getName()) + ": " + D.getName()).str(); + + llvm::MDNode *Scope = MDB.createAnonymousAliasScope(NoAliasDomain, Name); + addNoAliasScope(Scope); + + SmallVector ScopeListEntries(1, Scope); + llvm::MDNode *ScopeList = + llvm::MDNode::get(CurFn->getContext(), ScopeListEntries); + + NoAliasAddrMap[Loc.getPointer()] = ScopeList; + + if (HaveInsertPoint()) { + NoAliasDeclMap[ScopeList] = + Builder.CreateNoAliasDeclaration(Loc.getPointer(), ScopeList); + } +} + +llvm::MDNode * +CodeGenFunction::getExistingOrUnknownNoAliasScope(llvm::Value *Ptr) { + auto NAI = NoAliasAddrMap.find( + Ptr->stripInBoundsOffsets()); // make sure to find the base object + + if (NAI != NoAliasAddrMap.end()) { + return NAI->second; + } + if (!NoAliasUnknownScope) { + // NOTE: keep in sync with (clang) CGDecl:getExistingOrUnknownNoAliasScope + // NOTE: keep in sync with (clang) CGDecl:EmitAutoVarNoAlias/EmitNoAliasDecl + // NOTE: keep in sync with (llvm) InlineFunction:CloneAliasScopeMetadata + + // The unknown scope is used when we cannot yet pinpoint the exact scope. + llvm::MDBuilder MDB(CurFn->getContext()); + if (!NoAliasDomain) + NoAliasDomain = MDB.createAnonymousAliasScopeDomain(CurFn->getName()); + std::string Name = + (llvm::Twine(CurFn->getName()) + ": unknown scope").str(); + + llvm::MDNode *Scope = MDB.createAnonymousAliasScope(NoAliasDomain, Name); + FnNoAliasInfo.addNoAliasScope(Scope); // keep this at function level + + SmallVector ScopeListEntries(1, Scope); + NoAliasUnknownScope = + llvm::MDNode::get(CurFn->getContext(), ScopeListEntries); + CurFn->setMetadata("noalias", NoAliasUnknownScope); + } + + return NoAliasUnknownScope; +} + +llvm::Value * +CodeGenFunction::getExistingNoAliasDeclOrNullptr(llvm::MDNode *NoAliasScopeMD) { + llvm::Value *&NoAliasDecl = NoAliasDeclMap[NoAliasScopeMD]; + if (NoAliasDecl == nullptr) { + NoAliasDecl = llvm::ConstantPointerNull::get( + llvm::Type::getInt8PtrTy(getLLVMContext())); + } + return NoAliasDecl; +} + +void CodeGenFunction::EmitAutoVarNoAlias(const AutoVarEmission &emission) { + assert(emission.Variable && "emission was not valid!"); + + // Don't emit noalias intrinsics unless we're optimizing. + if (CGM.getCodeGenOpts().OptimizationLevel == 0) + return; + + if (!CGM.getCodeGenOpts().FullRestrict) + return; + + const VarDecl &D = *emission.Variable; + + // Early exit: emission.getObjectAddress(..) can introduce extra code. + // Only do it if it is needed + if (!D.getType().isRestrictOrContainsRestrictMembers()) + return; + + // Check whether this is a byref variable that's potentially + // captured and moved by its own initializer. If so, we'll need to + // emit the initializer first, then copy into the variable. + const Expr *Init = D.getInit(); + bool capturedByInit = + Init && emission.IsEscapingByRef && isCapturedBy(D, Init); + + Address Loc = + capturedByInit ? emission.Addr : emission.getObjectAddress(*this); + EmitNoAliasDecl(D, Loc); +} + /// Emit an expression as an initializer for an object (variable, field, etc.) /// at the given location. The expression is not necessarily the normal /// initializer for the object, and the address is not necessarily @@ -2478,6 +2600,9 @@ llvm::Value *ArgVal = (DoStore ? Arg.getDirectValue() : nullptr); LValue lv = MakeAddrLValue(DeclPtr, Ty); + + EmitNoAliasDecl(D, DeclPtr); + if (IsScalar) { Qualifiers qs = Ty.getQualifiers(); if (Qualifiers::ObjCLifetime lt = qs.getObjCLifetime()) { Index: clang/lib/CodeGen/CGExpr.cpp =================================================================== --- clang/lib/CodeGen/CGExpr.cpp +++ clang/lib/CodeGen/CGExpr.cpp @@ -30,10 +30,12 @@ #include "clang/Basic/SourceManager.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Metadata.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Path.h" @@ -1727,6 +1729,24 @@ if (llvm::MDNode *RangeInfo = getRangeForLoadFromType(Ty)) Load->setMetadata(llvm::LLVMContext::MD_range, RangeInfo); + // If this is a load from a restrict-qualified variable, then we have pointer + // aliasing assumptions that can be applied to the pointer value being loaded. + if (Ty.isRestrictQualified() && CGM.getCodeGenOpts().FullRestrict) { + auto NoAliasScopeMD = getExistingOrUnknownNoAliasScope(Addr.getPointer()); + auto NoAliasDecl = getExistingNoAliasDeclOrNullptr(NoAliasScopeMD); + auto *NoAliasLoad = Builder.CreateNoAliasPointer( + Load, NoAliasDecl, Addr.getPointer(), NoAliasScopeMD); + + // The llvm.noalias intrinsic can make use of the available alias info + llvm::AAMDNodes AAMetadata; + Load->getAAMetadata(AAMetadata); + NoAliasLoad->setAAMetadata(AAMetadata); + + // ..as wel as the local restrict scope + // In both cases, this is about the 'P.addr'(getOperand(2) of llvm.noalias) + recordMemoryInstruction(NoAliasLoad); + return EmitFromMemory(NoAliasLoad, Ty); + } return EmitFromMemory(Load, Ty); } Index: clang/lib/CodeGen/CGExprAgg.cpp =================================================================== --- clang/lib/CodeGen/CGExprAgg.cpp +++ clang/lib/CodeGen/CGExprAgg.cpp @@ -1948,6 +1948,17 @@ } } + // Guard copies of structs containing restrict pointers + if (Ty.isRestrictOrContainsRestrictMembers() && + CGM.getCodeGenOpts().FullRestrict) { + // NOTE: also see CodeGenFunction::EmitCall() + auto NoAliasScopeMD = getExistingOrUnknownNoAliasScope(SrcPtr.getPointer()); + auto NoAliasDecl = getExistingNoAliasDeclOrNullptr(NoAliasScopeMD); + SrcPtr.adaptPointer(Builder.CreateNoAliasCopyGuard( + SrcPtr.getPointer(), NoAliasDecl, Ty.getRestrictIndices(), + NoAliasScopeMD)); + } + if (getLangOpts().CUDAIsDevice) { if (Ty->isCUDADeviceBuiltinSurfaceType()) { if (getTargetHooks().emitCUDADeviceBuiltinSurfaceDeviceCopy(*this, Dest, @@ -2036,6 +2047,8 @@ } auto Inst = Builder.CreateMemCpy(DestPtr, SrcPtr, SizeVal, isVolatile); + // track noalias scope for memcpy + recordMemoryInstruction(Inst); // Determine the metadata to describe the position of any padding in this // memcpy, as well as the TBAA tags for the members of the struct, in case Index: clang/lib/CodeGen/CGStmt.cpp =================================================================== --- clang/lib/CodeGen/CGStmt.cpp +++ clang/lib/CodeGen/CGStmt.cpp @@ -390,6 +390,28 @@ return true; } +bool CodeGenFunction::hasLocalRestrictVars(const CompoundStmt &S, + FunctionArgList *Args) { + // We may have restrict-qualified variables, but if we're not optimizing, we + // don't do anything special with them. + if (CGM.getCodeGenOpts().OptimizationLevel == 0) + return false; + + if (Args) + for (const auto *VD : *Args) + if (VD->getType().isRestrictOrContainsRestrictMembers()) + return true; + + for (const auto *C : S.body()) + if (const auto *DS = dyn_cast(C)) + for (const auto *I : DS->decls()) + if (const auto *VD = dyn_cast(I)) + if (VD->getType().isRestrictOrContainsRestrictMembers()) + return true; + + return false; +} + /// EmitCompoundStmt - Emit a compound statement {..} node. If GetLast is true, /// this captures the expression result of the last sub-statement and returns it /// (for use by the statement expression extension). @@ -399,7 +421,7 @@ "LLVM IR generation of compound statement ('{}')"); // Keep track of the current cleanup stack depth, including debug scopes. - LexicalScope Scope(*this, S.getSourceRange()); + LexicalScope Scope(*this, S.getSourceRange(), hasLocalRestrictVars(S)); return EmitCompoundStmtWithoutScope(S, GetLast, AggSlot); } @@ -603,6 +625,24 @@ } } +// For all of the instructions generated for this lexical scope that access +// memory, add the noalias metadata associated with any block-local +// restrict-qualified pointers from this scope. +void CodeGenFunction::LexicalNoAliasInfo::addNoAliasMD() { + if (MemoryInsts.empty() || NoAliasScopes.empty()) + return; + + llvm::MDNode *NewScopeList = llvm::MDNode::get( + MemoryInsts[0]->getParent()->getContext(), NoAliasScopes); + + for (auto &I : MemoryInsts) + I->setMetadata( + llvm::LLVMContext::MD_noalias, + llvm::MDNode::concatenate(I->getMetadata(llvm::LLVMContext::MD_noalias), + NewScopeList)); + + MemoryInsts.clear(); +} void CodeGenFunction::EmitLabelStmt(const LabelStmt &S) { EmitLabel(S.getDecl()); Index: clang/lib/CodeGen/CodeGenFunction.h =================================================================== --- clang/lib/CodeGen/CodeGenFunction.h +++ clang/lib/CodeGen/CodeGenFunction.h @@ -37,6 +37,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/TinyPtrVector.h" #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Debug.h" @@ -298,8 +299,7 @@ /// CGBuilder insert helper. This function is called after an /// instruction is created using Builder. void InsertHelper(llvm::Instruction *I, const llvm::Twine &Name, - llvm::BasicBlock *BB, - llvm::BasicBlock::iterator InsertPt) const; + llvm::BasicBlock *BB, llvm::BasicBlock::iterator InsertPt); /// CurFuncDecl - Holds the Decl for the current outermost /// non-closure context. @@ -814,11 +814,54 @@ } }; + // Check if any local variable or argument contains a (direct) restrict + // pointer. + bool hasLocalRestrictVars(const CompoundStmt &S, + FunctionArgList *Args = nullptr); + + // Get the associated NoAliasScope for the Ptr (cast and geps will be + // stripped). If there is no associated scope returns the 'unknown function' + // scope. (which is created on demand) + llvm::MDNode *getExistingOrUnknownNoAliasScope(llvm::Value *Ptr); + + // Get the associated declaration + llvm::Value *getExistingNoAliasDeclOrNullptr(llvm::MDNode *NoAliasScopeMD); + + // The noalias scopes used to tag pointer values assigned to block-local + // restrict-qualified variables, and the memory-accessing instructions within + // this lexical scope to which the associated pointer-aliasing assumptions + // might apply. One of these will exist for each lexical scope. + struct LexicalNoAliasInfo { + bool RecordMemoryInsts; + llvm::TinyPtrVector MemoryInsts; + llvm::TinyPtrVector NoAliasScopes; + + LexicalNoAliasInfo(bool RecordMemoryInsts = false) + : RecordMemoryInsts(RecordMemoryInsts) {} + + void recordMemoryInsts() { RecordMemoryInsts = true; } + + void recordMemoryInstruction(llvm::Instruction *I) { + if (RecordMemoryInsts) + MemoryInsts.push_back(I); + } + + void addNoAliasScope(llvm::MDNode *Scope) { + assert(RecordMemoryInsts && + "Adding noalias scope but not recording memory accesses!"); + NoAliasScopes.push_back(Scope); + } + + void addNoAliasMD(); + }; + + LexicalNoAliasInfo FnNoAliasInfo; + // Cleanup stack depth of the RunCleanupsScope that was pushed most recently. EHScopeStack::stable_iterator CurrentCleanupScopeDepth = EHScopeStack::stable_end(); - class LexicalScope : public RunCleanupsScope { + class LexicalScope : public RunCleanupsScope, public LexicalNoAliasInfo { SourceRange Range; SmallVector Labels; LexicalScope *ParentScope; @@ -828,8 +871,10 @@ public: /// Enter a new cleanup scope. - explicit LexicalScope(CodeGenFunction &CGF, SourceRange Range) - : RunCleanupsScope(CGF), Range(Range), ParentScope(CGF.CurLexicalScope) { + explicit LexicalScope(CodeGenFunction &CGF, SourceRange Range, + bool RecordMemoryInsts = false) + : RunCleanupsScope(CGF), LexicalNoAliasInfo(RecordMemoryInsts), + Range(Range), ParentScope(CGF.CurLexicalScope) { CGF.CurLexicalScope = this; if (CGDebugInfo *DI = CGF.getDebugInfo()) DI->EmitLexicalBlockStart(CGF.Builder, Range.getBegin()); @@ -840,6 +885,19 @@ Labels.push_back(label); } + // If we have block-local restrict-qualified pointers, we need to keep + // track of the memory-accessing instructions in the blocks where such + // pointers are declared (including lexical scopes that are children of + // those blocks) so that we can later add the appropriate metadata. Record + // this instruction and so the same in any parent scopes. + void recordMemoryInstruction(llvm::Instruction *I) { + LexicalNoAliasInfo::recordMemoryInstruction(I); + if (ParentScope) + ParentScope->recordMemoryInstruction(I); + else + CGF.FnNoAliasInfo.recordMemoryInstruction(I); + } + /// Exit this cleanup scope, emitting any accumulated /// cleanups. ~LexicalScope() { @@ -857,6 +915,8 @@ /// Force the emission of cleanups now, instead of waiting /// until this object is destroyed. void ForceCleanup() { + addNoAliasMD(); + CGF.CurLexicalScope = ParentScope; RunCleanupsScope::ForceCleanup(); @@ -873,6 +933,23 @@ typedef llvm::DenseMap DeclMapTy; + // Record this instruction for the purpose of later adding noalias metadata, + // is applicible, in order to support block-local restrict-qualified + // pointers. + void recordMemoryInstruction(llvm::Instruction *I) { + if (CurLexicalScope) + CurLexicalScope->recordMemoryInstruction(I); + else + FnNoAliasInfo.recordMemoryInstruction(I); + } + + void addNoAliasScope(llvm::MDNode *Scope) { + if (CurLexicalScope) + CurLexicalScope->addNoAliasScope(Scope); + else + FnNoAliasInfo.addNoAliasScope(Scope); + } + /// The class used to assign some variables some temporarily addresses. class OMPMapVars { DeclMapTy SavedLocals; @@ -1835,6 +1912,19 @@ void EmitOpenCLKernelMetadata(const FunctionDecl *FD, llvm::Function *Fn); + /// The noalias domain metadata for this function. + llvm::MDNode *NoAliasDomain = nullptr; + + /// A map between the addresses of local restrict-qualified variables and + /// their noalias scope. + llvm::DenseMap NoAliasAddrMap; + + /// A map between the noalias scope and its declaration + llvm::DenseMap NoAliasDeclMap; + + /// The node representing 'out-of-function' scope + llvm::MDNode *NoAliasUnknownScope = nullptr; + public: CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext=false); ~CodeGenFunction(); @@ -2982,6 +3072,9 @@ void emitAutoVarTypeCleanup(const AutoVarEmission &emission, QualType::DestructionKind dtorKind); + void EmitNoAliasDecl(const VarDecl &D, Address Loc); + void EmitAutoVarNoAlias(const AutoVarEmission &emission); + /// Emits the alloca and debug information for the size expressions for each /// dimension of an array. It registers the association of its (1-dimensional) /// QualTypes and size expression's debug node, so that CGDebugInfo can Index: clang/lib/CodeGen/CodeGenFunction.cpp =================================================================== --- clang/lib/CodeGen/CodeGenFunction.cpp +++ clang/lib/CodeGen/CodeGenFunction.cpp @@ -690,6 +690,11 @@ CurFn = Fn; CurFnInfo = &FnInfo; assert(CurFn->isDeclaration() && "Function already has body?"); + NoAliasUnknownScope = nullptr; // make sure we start without a function scope + + // Always track memory instructions. When a restrict usage is encountered, + // they will be annotated with the necessary scopes. + FnNoAliasInfo.recordMemoryInsts(); // If this function has been blacklisted for any of the enabled sanitizers, // disable the sanitizer for the function. @@ -1143,10 +1148,15 @@ void CodeGenFunction::EmitFunctionBody(const Stmt *Body) { incrementProfileCounter(Body); - if (const CompoundStmt *S = dyn_cast(Body)) + if (const CompoundStmt *S = dyn_cast(Body)) { EmitCompoundStmtWithoutScope(*S); - else + + // Now that we're done with the block, add noalias metadata if we had any + // block-local restrict-qualified pointers. + FnNoAliasInfo.addNoAliasMD(); + } else { EmitStmt(Body); + } } /// When instrumenting to collect profile data, the counts for some blocks @@ -2235,10 +2245,18 @@ void CodeGenFunction::InsertHelper(llvm::Instruction *I, const llvm::Twine &Name, llvm::BasicBlock *BB, - llvm::BasicBlock::iterator InsertPt) const { + llvm::BasicBlock::iterator InsertPt) { LoopStack.InsertHelper(I); if (IsSanitizerScope) CGM.getSanitizerMetadata()->disableSanitizerForInstruction(I); + + // When we have block-local restrict-qualified pointers, we need to record + // all memory-accessing instructions (i.e. any kind of instruction for which + // AA::getModRefInfo might return something other than NoModRef) so that they + // can be tagged with noalias metadata with noalias scopes corresponding to + // the applicable restrict-qualified pointers. + if (I->mayReadOrWriteMemory()) + recordMemoryInstruction(I); } void CGBuilderInserter::InsertHelper( Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -4504,6 +4504,15 @@ options::OPT_fdelete_null_pointer_checks, false)) CmdArgs.push_back("-fno-delete-null-pointer-checks"); + if (Arg *FullRestrictArg = Args.getLastArg(options::OPT_ffull_restrict, + options::OPT_fno_full_restrict)) { + if (FullRestrictArg->getOption().matches(options::OPT_ffull_restrict)) { + // Enable inlining support for noalias (currently disabled by default) + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-use-noalias-intrinsic-during-inlining"); + } + } + // LLVM Code Generator Options. if (Args.hasArg(options::OPT_frewrite_map_file) || Index: clang/lib/Frontend/CompilerInvocation.cpp =================================================================== --- clang/lib/Frontend/CompilerInvocation.cpp +++ clang/lib/Frontend/CompilerInvocation.cpp @@ -848,6 +848,14 @@ Opts.StructPathTBAA = !Args.hasArg(OPT_no_struct_path_tbaa); Opts.NewStructPathTBAA = !Args.hasArg(OPT_no_struct_path_tbaa) && Args.hasArg(OPT_new_struct_path_tbaa); + if (Arg *FullRestrictArg = + Args.getLastArg(OPT_ffull_restrict, OPT_fno_full_restrict)) { + Opts.FullRestrict = + FullRestrictArg->getOption().matches(OPT_ffull_restrict); + } + // Keep this behind an option - the alias analysis still works better + // with the noalias attribute on arguments. + Opts.NoNoAliasArgAttr = Args.hasArg(OPT_fno_noalias_arguments); Opts.FineGrainedBitfieldAccesses = Args.hasFlag(OPT_ffine_grained_bitfield_accesses, OPT_fno_fine_grained_bitfield_accesses, false); Index: clang/test/CodeGen/restrict/arg_reuse.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/arg_reuse.c @@ -0,0 +1,36 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -O2 -ffull-restrict -fno-noalias-arguments %s -emit-llvm -o - | FileCheck %s + +// NOTE: use -no-noalias-arguments to block mapping restrict arguments on the 'noalias +// attribute which is too strong for restrict + +// A number of testcases from our wiki (2018/6/7_llvm_restrict_examples +// As llvm/clang treat __restrict differently in following cases: +int test_arg_restrict_vs_local_restrict_01(int *__restrict pA, int *pB, int *pC) { + int *tmp = pA; + *tmp = 42; + pA = pB; + *pA = 43; + *pC = 99; + return *tmp; // fail: needs a load !!! (either 42 or 43) +} + +// CHECK: @test_arg_restrict_vs_local_restrict_01 +// CHECK-NOT: ret i32 42 +// CHECK-NOT: ret i32 43 +// CHECK-NOT: ret i32 99 + +int test_arg_restrict_vs_local_restrict_02(int *pA_, int *pB, int *pC) { + int *__restrict pA; + pA = pA_; + int *tmp = pA; + *tmp = 42; + pA = pB; + *pA = 43; + *pC = 99; + return *tmp; // needs a load !!! (either 42 or 43) +} + +// CHECK: @test_arg_restrict_vs_local_restrict_02 +// CHECK-NOT: ret i32 42 +// CHECK-NOT: ret i32 43 +// CHECK-NOT: ret i32 99 Index: clang/test/CodeGen/restrict/array.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/array.c @@ -0,0 +1,53 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -O1 -disable-llvm-optzns -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +int r; +void ex1(int *); + +void test_FOO_local(int *pA, int *pB, int *pC) { + int *restrict tmp[3] = {pA, pB, pC}; + *tmp[0] = 42; + *tmp[1] = 43; +} + +// CHECK-LABEL: void @test_FOO_local( +// CHECK: [[tmp:%.*]] = alloca [3 x i32*], align 16 +// CHECK: [[TMP1:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0a3p0i32.i64([3 x i32*]* [[tmp]], i64 0, metadata [[TAG_6:!.*]]) +// CHECK: [[arrayidx_begin:%.*]] = getelementptr inbounds [3 x i32*], [3 x i32*]* [[tmp]], i64 0, i64 0 +// CHECK: [[arrayidx:%.*]] = getelementptr inbounds [3 x i32*], [3 x i32*]* [[tmp]], i64 0, i64 0 +// CHECK: [[TMP5:%.*]] = load i32*, i32** [[arrayidx]], align 16, !tbaa [[TAG_2:!.*]], !noalias [[TAG_6]] +// CHECK: [[TMP6:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP5]], i8* [[TMP1]], i32** [[arrayidx]], i64 0, metadata [[TAG_6]]), !tbaa [[TAG_2]], !noalias [[TAG_6]] +// CHECK: store i32 42, i32* [[TMP6]], align 4, !tbaa [[TAG_9:!.*]], !noalias [[TAG_6]] +// CHECK: [[arrayidx2:%.*]] = getelementptr inbounds [3 x i32*], [3 x i32*]* [[tmp]], i64 0, i64 1 +// CHECK: [[TMP7:%.*]] = load i32*, i32** [[arrayidx2]], align 8, !tbaa [[TAG_2]], !noalias [[TAG_6]] +// CHECK: [[TMP8:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP7]], i8* [[TMP1]], i32** [[arrayidx2]], i64 0, metadata [[TAG_6]]), !tbaa [[TAG_2]], !noalias [[TAG_6]] +// CHECK: store i32 43, i32* [[TMP8]], align 4, !tbaa [[TAG_9]], !noalias [[TAG_6]] +// CHECK: ret void + +void test_FOO_p(int *restrict p) { + *p = 42; +} + +// define void @test_FOO_p(i32* noalias %p) #0 { +// CHECK-LABEL: void @test_FOO_p( +// CHECK: [[p_addr:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[TMP0:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** [[p_addr]], i64 0, metadata [[TAG_11:!.*]]) +// CHECK-NEXT: store i32* [[p:%.*]], i32** [[p_addr]], align 8, !tbaa [[TAG_2:!.*]], !noalias [[TAG_11]] +// CHECK-NEXT: [[TMP1:%.*]] = load i32*, i32** [[p_addr]], align 8, !tbaa [[TAG_2]], !noalias [[TAG_11]] +// CHECK-NEXT: [[TMP2:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP1]], i8* [[TMP0]], i32** [[p_addr]], i64 0, metadata [[TAG_11]]), !tbaa [[TAG_2]], !noalias [[TAG_11]] +// CHECK-NEXT: store i32 42, i32* [[TMP2]], align 4, !tbaa [[TAG_9:!.*]], !noalias [[TAG_11]] +// CHECK-NEXT: ret void + +void test_FOO_pp(int *restrict *p) { + *p[0] = 42; +} + +// define void @test_FOO_pp(i32** %p) #0 !noalias !14 { +// CHECK: void @test_FOO_pp(i32** [[p:%.*]]) #0 !noalias [[TAG_14:!.*]] { +// CHECK: [[p_addr:%.*]] = alloca i32**, align 8 +// CHECK-NEXT: store i32** [[p]], i32*** [[p_addr]], align 8, !tbaa [[TAG_2:!.*]], !noalias [[TAG_14:!.*]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32**, i32*** [[p_addr]], align 8, !tbaa [[TAG_2]], !noalias [[TAG_14]] +// CHECK-NEXT: [[arrayidx:%.*]] = getelementptr inbounds i32*, i32** [[TMP0]], i64 0 +// CHECK-NEXT: [[TMP1:%.*]] = load i32*, i32** [[arrayidx]], align 8, !tbaa [[TAG_2]], !noalias [[TAG_14]] +// CHECK-NEXT: [[TMP2:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP1]], i8* null, i32** [[arrayidx]], i64 0, metadata [[TAG_14]]), !tbaa [[TAG_2]], !noalias [[TAG_14]] +// CHECK-NEXT: store i32 42, i32* [[TMP2]], align 4, !tbaa [[TAG_9:!.*]], !noalias [[TAG_14]] +// CHECK-NEXT: ret void Index: clang/test/CodeGen/restrict/basic.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/basic.c @@ -0,0 +1,129 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -O1 -disable-llvm-optzns -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +int r; +void ex1(int *); + +int *a; +int *foo() { + int *restrict x = a; + return x; + + // CHECK-LABEL: i32* @foo( + // CHECK: [[x:%.*]] = alloca i32*, align 8 + // CHECK: [[TMP1:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** [[x]], i64 0, metadata [[TAG_2:!.*]]) + // CHECK: [[TMP3:%.*]] = load i32*, i32** [[x]], align 8, !tbaa !5, !noalias [[TAG_2]] + // CHECK: [[TMP4:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP3]], i8* [[TMP1]], i32** [[x]], i64 0, metadata !2), !tbaa !5, !noalias [[TAG_2]] + // CHECK: ret i32* [[TMP4]] +} + +int *a2; +int *foo1(int b) { + int *restrict x; + + // CHECK-LABEL: define i32* @foo1(i32 %b) + // CHECK: [[b_addr:%.*]] = alloca i32, align 4 + // CHECK: [[x:%.*]] = alloca i32*, align 8 + // CHECK: [[x2:%.*]] = alloca i32*, align 8 + // CHECK: [[TMP1:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** [[x]], i64 0, metadata [[TAG_x:!.*]]) + + if (b) { + x = a; + r += *x; + ex1(x); + + // CHECK: [[TMP3:%.*]] = load i32*, i32** @a, align 8, !tbaa [[TAG_5:!.*]], !noalias [[TAG_x_x2:!.*]] + // CHECK: store i32* [[TMP3]], i32** [[x]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + + // CHECK: [[TMP4:%.*]] = load i32*, i32** [[x]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + // CHECK: [[TMP5:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP4]], i8* [[TMP1]], i32** [[x]], i64 0, metadata [[TAG_x]]), !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + // CHECK: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4, !tbaa [[TAG_9:!.*]], !noalias [[TAG_x_x2]] + // CHECK: [[TMP7:%.*]] = load i32, i32* @r, align 4, !tbaa [[TAG_9]], !noalias [[TAG_x_x2]] + // CHECK: [[add:%.*]] = add nsw i32 [[TMP7]], [[TMP6]] + // CHECK: store i32 [[add]], i32* @r, align 4, !tbaa [[TAG_9]], !noalias [[TAG_x_x2]] + + // CHECK: [[TMP8:%.*]] = load i32*, i32** [[x]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + // CHECK: [[TMP9:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP8]], i8* [[TMP1]], i32** [[x]], i64 0, metadata [[TAG_x]]), !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + // CHECK: call void @ex1(i32* [[TMP9]]), !noalias [[TAG_x_x2]] + + ++x; + *x = r; + ex1(x); + + // CHECK: [[TMP10:%.*]] = load i32*, i32** [[x]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + // CHECK: [[TMP11:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP10]], i8* [[TMP1]], i32** [[x]], i64 0, metadata [[TAG_x]]), !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + // CHECK: [[incdec_ptr:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 1 + // CHECK: store i32* [[incdec_ptr]], i32** [[x]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + + // CHECK: [[TMP12:%.*]] = load i32, i32* @r, align 4, !tbaa [[TAG_9]], !noalias [[TAG_x_x2]] + // CHECK: [[TMP13:%.*]] = load i32*, i32** [[x]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + // CHECK: [[TMP14:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP13]], i8* [[TMP1]], i32** [[x]], i64 0, metadata [[TAG_x]]), !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + // CHECK: store i32 [[TMP12]], i32* [[TMP14]], align 4, !tbaa [[TAG_9]], !noalias [[TAG_x_x2]] + + // CHECK: [[TMP15:%.*]] = load i32*, i32** [[x]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + // CHECK: [[TMP16:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP15]], i8* [[TMP1]], i32** [[x]], i64 0, metadata [[TAG_x]]), !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + // CHECK: call void @ex1(i32* [[TMP16]]), !noalias [[TAG_x_x2]] + + x += b; + *x = r; + ex1(x); + + // CHECK: [[TMP17:%.*]] = load i32, i32* [[b_addr]], align 4, !tbaa [[TAG_9]], !noalias [[TAG_x_x2]] + // CHECK: [[TMP18:%.*]] = load i32*, i32** [[x]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + // CHECK: [[TMP19:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP18]], i8* [[TMP1]], i32** [[x]], i64 0, metadata [[TAG_x]]), !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + // CHECK: [[idx_ext:%.*]] = sext i32 [[TMP17]] to i64 + // CHECK: [[add_ptr:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i64 [[idx_ext]] + // CHECK: store i32* [[add_ptr]], i32** [[x]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + + // CHECK: [[TMP20:%.*]] = load i32, i32* @r, align 4, !tbaa [[TAG_9]], !noalias [[TAG_x_x2]] + // CHECK: [[TMP21:%.*]] = load i32*, i32** [[x]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + // CHECK: [[TMP22:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP21]], i8* [[TMP1]], i32** [[x]], i64 0, metadata [[TAG_x]]), !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + // CHECK: store i32 [[TMP20]], i32* [[TMP22]], align 4, !tbaa [[TAG_9]], !noalias [[TAG_x_x2]] + + // CHECK: [[TMP23:%.*]] = load i32*, i32** [[x]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + // CHECK: [[TMP24:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP23]], i8* [[TMP1]], i32** [[x]], i64 0, metadata [[TAG_x]]), !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + // CHECK: call void @ex1(i32* [[TMP24]]), !noalias [[TAG_x_x2]] + + int *restrict x2 = a2; + *x2 = r; + ex1(x2); + + // CHECK: [[TMP26:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64(i32** [[x2]], i64 0, metadata [[TAG_x2:!.*]]) + // CHECK: [[TMP27:%.*]] = load i32*, i32** @a2, align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + // CHECK: store i32* [[TMP27]], i32** [[x2]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + + // CHECK: [[TMP28:%.*]] = load i32, i32* @r, align 4, !tbaa [[TAG_9]], !noalias [[TAG_x_x2]] + // CHECK: [[TMP29:%.*]] = load i32*, i32** [[x2]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + // CHECK: [[TMP30:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP29]], i8* [[TMP26]], i32** [[x2]], i64 0, metadata [[TAG_x2]]), !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + // CHECK: store i32 [[TMP28]], i32* [[TMP30]], align 4, !tbaa [[TAG_9]], !noalias [[TAG_x_x2]] + + // CHECK: [[TMP31:%.*]] = load i32*, i32** [[x2]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + // CHECK: [[TMP32:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP31]], i8* [[TMP26]], i32** [[x2]], i64 0, metadata [[TAG_x2]]), !tbaa [[TAG_5]], !noalias [[TAG_x_x2]] + // CHECK: call void @ex1(i32* [[TMP32]]), !noalias [[TAG_x_x2]] + } else { + x = a2; + r += *x; + // CHECK: [[TMP34:%.*]] = load i32*, i32** @a2, align 8, !tbaa [[TAG_5]], !noalias [[TAG_x]] + // CHECK: store i32* [[TMP34]], i32** [[x]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x]] + + // CHECK: [[TMP35:%.*]] = load i32*, i32** [[x]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x]] + // CHECK: [[TMP36:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP35]], i8* [[TMP1]], i32** [[x]], i64 0, metadata [[TAG_x]]), !tbaa [[TAG_5]], !noalias [[TAG_x]] + // CHECK: [[TMP37:%.*]] = load i32, i32* [[TMP36]], align 4, !tbaa [[TAG_9]], !noalias [[TAG_x]] + // CHECK: [[TMP38:%.*]] = load i32, i32* @r, align 4, !tbaa [[TAG_9]], !noalias [[TAG_x]] + // CHECK: [[add1:%.*]] = add nsw i32 [[TMP38]], [[TMP37]] + // CHECK: store i32 [[add1]], i32* @r, align 4, !tbaa [[TAG_9]], !noalias [[TAG_x]] + } + + return x; + // CHECK: [[TMP39:%.*]] = load i32*, i32** [[x]], align 8, !tbaa [[TAG_5]], !noalias [[TAG_x]] + // CHECK: [[TMP40:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP39]], i8* [[TMP1]], i32** [[x]], i64 0, metadata [[TAG_x]]), !tbaa [[TAG_5]], !noalias [[TAG_x]] + // CHECK: ret i32* [[TMP40]] +} + +int *bar() { + int *x = a; + return x; + + // CHECK-LABEL: define i32* @bar() + // CHECK-NOT: noalias + // CHECK: ret i32* +} Index: clang/test/CodeGen/restrict/basic_opt_01.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/basic_opt_01.c @@ -0,0 +1,137 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +// Rough test to verify basic functionality of the 'restrict member pointer' rewrite + +// CHECK: @test01A +// CHECK: ret i32 42 +int test01A(int *pA, int *pB) { + int *__restrict prA; + prA = pA; + + *prA = 42; + *pB = 43; + return *prA; +} + +// CHECK: @test01B +// CHECK: ret i32 42 +int test01B(int *__restrict prA, int *pB) { + *prA = 42; + *pB = 43; + return *prA; +} + +// CHECK: @test02A +// CHECK: ret i32 42 +int test02A(int b, int *pA, char *pB, int *pC) { + int *__restrict prA; + prA = pA; + char *__restrict prB; + prB = pB; + char *lp = b ? (char *)prA : (char *)prB; + + *lp = 42; + *pC = 43; + return *lp; +} + +// CHECK: @test02B +// CHECK: ret i32 42 +int test02B(int b, int *__restrict prA, char *__restrict prB, int *pC) { + char *lp = b ? (char *)prA : (char *)prB; + + *lp = 42; + *pC = 43; + return *lp; +} + +// CHECK: @test03 +// CHECK: ret i32 42 +int test03(int n, int *pA, char *pB, int *pC) { + do { + int *__restrict prA; + prA = pA; + + *prA = 42; + *pC = 43; + } while (n--); + return *pA; +} + +// CHECK: @test04A0 +// CHECK: ret i32 42 +int test04A0(int n, int *pA, char *pB, int *pC) { + int *__restrict prA; + do { + prA = pA; + + *prA = 42; + *pC = 43; + } while (n--); + return *prA; +} + +// CHECK: @test04A1 +// CHECK: ret i32 42 +int test04A1(int n, int *pA, char *pB, int *pC) { + int *__restrict prA; + prA = pA; + do { + prA = pA; + + *prA = 42; + *pC = 43; + } while (n--); + return *prA; +} + +// CHECK: @test04B0 +// CHECK: ret i32 42 +int test04B0(int n, int *__restrict prA, char *pB, int *pC) { + do { + prA = prA; + + *prA = 42; + *pC = 43; + } while (n--); + return *prA; +} + +// CHECK: @test04B1 +// CHECK: ret i32 42 +int test04B1(int n, int *__restrict prA, char *pB, int *pC) { + prA = prA; + do { + prA = prA; + + *prA = 42; + *pC = 43; + } while (n--); + return *prA; +} + +// CHECK: @test05A +// CHECK: ret i32 42 +int test05A(int n, int *pA, char *pB, int *pC) { + int *__restrict prA; + prA = pA; + do { + prA++; + + *prA = 42; + *pC = 43; + } while (n--); + return *prA; +} + +// CHECK: @test05B +// CHECK: ret i32 42 +int test05B(int n, int *__restrict prA, char *pB, int *pC) { + do { + prA++; + + *prA = 42; + *pC = 43; + } while (n--); + return *prA; +} Index: clang/test/CodeGen/restrict/basic_opt_02.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/basic_opt_02.c @@ -0,0 +1,52 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +int foo1(int *a, int *restrict b, int c) { + *a = 0; + *b = c; + return *a; // OK: returns 0 +} +// CHECK: @foo1 +// CHECK: ret i32 0 + +int foo2(int *a, int *restrict b, int c) { + int *bc = b + c; + *a = 0; + *bc = c; // OK: bc keeps the restrictness + return *a; // returns 0 +} +// CHECK: @foo2 +// CHECK: ret i32 0 + +static int *copy(int *b) { return b; } + +int foo3(int *a, int *restrict b, int c) { + int *bc = copy(b); // a fix to support this is in the works + *a = 0; + *bc = c; + return *a; +} +// CHECK: @foo3 +// CHECK: ret i32 0 + +// Finally: +inline void update(int *p, int c) { *p = c; } + +int foo6(int *a, int *b, int c) { + int *restrict bc = b; // local restrict + *a = 0; + update(bc, c); // Oops: inlining loses local restrict annotation + return *a; +} + +// CHECK: @foo6 +// CHECK: ret i32 0 + +// Notice the difference with: +int foo7(int *a, int *restrict b, int c) { + *a = 0; + update(b, c); // restrict argument preserved after inlining. + return *a; // returns 0 +} + +// CHECK: @foo7 +// CHECK: ret i32 0 Index: clang/test/CodeGen/restrict/basic_opt_03.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/basic_opt_03.c @@ -0,0 +1,73 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s +// XFAIL: * + +// NOTE: SROA needs to be able to see through llvm.noalias. This is introduced in case of returning of larger structs. +struct A { + int a, b, c, d, e, f, g, h; +}; +struct A constructIt(int a) { + struct A tmp = {a, a, a, a, a, a, a, a}; + return tmp; +} +int test_sroa01a(unsigned c) { + int tmp = 0; + for (int i = 0; i < c; ++i) { + struct A a = constructIt(i); + tmp = tmp + a.e; + } + return tmp; +} + +// CHECK: @test_sroa01a +// CHECK: FIXME + +int test_sroa01b(unsigned c) { + int tmp = 0; + for (int i = 0; i < c; ++i) { + struct A a = {i, i, i, i, i, i, i, i}; + tmp = tmp + a.e; + } + return tmp; +} + +// CHECK: @test_sroa01b +// CHECK: FIXME + +int test_sroa01c(unsigned c) { + int tmp = 0; + for (int i = 0; i < c; ++i) { + int *__restrict dummy; // should not influence optimizations ! + struct A a = {i, i, i, i, i, i, i, i}; + tmp = tmp + a.e; + } + return tmp; +} + +// CHECK: @test_sroa01b +// CHECK: FIXME + +int test_sroa02a(unsigned c) { + int tmp = 0; + struct A a; + for (int i = 0; i < c; ++i) { + a = constructIt(i); + tmp = tmp + a.e; + } + return tmp; +} + +// CHECK: @test_sroa02a +// CHECK: FIXME + +int test_sroa02b(unsigned c) { + struct A a; + int tmp = 0; + for (int i = 0; i < c; ++i) { + a = (struct A){i, i, i, i, i, i, i, i}; + tmp = tmp + a.e; + } + return tmp; +} + +// CHECK: @test_sroa02b +// CHECK: FIXME Index: clang/test/CodeGen/restrict/basic_opt_04.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/basic_opt_04.c @@ -0,0 +1,296 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +// verify effect of restrict on optimizations +void dummy_restrict01_n(int *p) { + if (0) { + *p = 0xdeadbeef; + } +} + +void dummy_restrict01_a(int *__restrict p) { + if (0) { + *p = 0xdeadbeef; + } +} + +void dummy_restrict01_r(int *p_) { + int *__restrict p = p_; + if (0) { + *p = 0xdeadbeef; + } +} + +void dummy_restrict01_R(int *p_) { + int *__restrict p; + p = p_; + if (0) { + *p = 0xdeadbeef; + } +} + +void dummy_restrict02_n(int *p) { + p++; + if (0) { + *p = 0xdeadbeef; + } +} + +void dummy_restrict02_a(int *__restrict p) { + p++; + if (0) { + *p = 0xdeadbeef; + } +} + +void dummy_restrict02_r(int *p_) { + int *__restrict p = p_; + p++; + if (0) { + *p = 0xdeadbeef; + } +} + +void dummy_restrict02_R(int *p_) { + int *__restrict p; + p = p_; + p++; + if (0) { + *p = 0xdeadbeef; + } +} + +// --------------------------------- + +int test01_n(int *pA, int c) { + if (0) { + *pA = 0xdeadbeef; + } + return c; +} + +// CHECK: @test01_n +// CHECK-NOT: .noalias +// CHECK: ret i32 + +int test01_a(int *__restrict pA, int c) { + if (0) { + *pA = 0xdeadbeef; + } + return c; +} + +// CHECK: @test01_a +// CHECK-NOT: .noalias +// CHECK: ret i32 + +int test01_r(int *pA_, int c) { + int *__restrict pA = pA_; + if (0) { + *pA = 0xdeadbeef; + } + return c; +} +// CHECK: @test01_r +// CHECK-NOT: .noalias +// CHECK: ret i32 + +int test01_R(int *pA_, int c) { + int *__restrict pA; + pA = pA_; + if (0) { + *pA = 0xdeadbeef; + } + return c; +} +// CHECK: @test01_R +// CHECK-NOT: .noalias +// CHECK: ret i32 + +int test01_nn(int *pA, int c) { + dummy_restrict01_n(pA); + return c; +} +// CHECK: @test01_nn +// CHECK-NOT: .noalias +// CHECK: ret i32 + +int test01_na(int *pA, int c) { + dummy_restrict01_a(pA); + return c; +} +// CHECK: @test01_na +// CHECK-NOT: .noalias +// CHECK: ret i32 + +int test01_nr(int *pA, int c) { + dummy_restrict01_r(pA); + return c; +} +// CHECK: @test01_nr +// CHECK-NOT: .noalias +// CHECK: ret i32 + +int test01_nR(int *pA, int c) { + dummy_restrict01_R(pA); + return c; +} +// CHECK: @test01_nR +// CHECK-NOT: .noalias +// CHECK: ret i32 + +// ---------------------------------- +int test02_n(int *pA, int c) { + pA++; + if (0) { + *pA = 0xdeadbeef; + } + return c; +} +// CHECK: @test02_n +// CHECK-NOT: .noalias +// CHECK: ret i32 + +int test02_a(int *__restrict pA, int c) { + pA++; + if (0) { + *pA = 0xdeadbeef; + } + return c; +} +// CHECK: @test02_a +// CHECK-NOT: .noalias +// CHECK: ret i32 + +int test02_r(int *pA_, int c) { + int *__restrict pA = pA_; + pA++; + if (0) { + *pA = 0xdeadbeef; + } + return c; +} +// CHECK: @test02_r +// CHECK-NOT: .noalias +// CHECK: ret i32 + +int test02_R(int *pA_, int c) { + int *__restrict pA; + pA = pA_; + pA++; + if (0) { + *pA = 0xdeadbeef; + } + return c; +} +// CHECK: @test02_R +// CHECK-NOT: .noalias +// CHECK: ret i32 + +int test02_nn(int *pA, int c) { + dummy_restrict02_n(pA); + return c; +} +// CHECK: @test02_nn +// CHECK-NOT: .noalias +// CHECK: ret i32 + +int test02_na(int *pA, int c) { + dummy_restrict02_a(pA); + return c; +} +// CHECK: @test02_na +// CHECK-NOT: .noalias +// CHECK: ret i32 + +int test02_nr(int *pA, int c) { + dummy_restrict02_r(pA); + return c; +} +// CHECK: @test02_nr +// CHECK-NOT: .noalias +// CHECK: ret i32 + +int test02_nR(int *pA, int c) { + dummy_restrict02_R(pA); + return c; +} +// CHECK: @test02_nR +// CHECK-NOT: .noalias +// CHECK: ret i32 + +int test11_n(int *pA) { + unsigned total = 0; + for (int i = 0; i < 10; ++i) { + int *p = pA; + total = total + 1; + } + return total; +} +// CHECK: @test11_n +// CHECK-NOT: .noalias +// CHECK: ret i32 + +int test11_lr(int *pA) { + unsigned total = 0; + for (int i = 0; i < 10; ++i) { + int *__restrict p = pA; + total = total + 1; + } + return total; +} +// CHECK: @test11_lr +// CHECK-NOT: .noalias +// CHECK: ret i32 + +int test11_lR(int *pA) { + unsigned total = 0; + for (int i = 0; i < 10; ++i) { + int *__restrict p; + p = pA; + total = total + 1; + } + return total; +} +// CHECK: @test11_lR +// CHECK-NOT: .noalias +// CHECK: ret i32 + +int test12_n(int *pA) { + unsigned total = 0; + for (int i = 0; i < 10; ++i) { + int *p = pA; + p++; + total = total + 1; + } + return total; +} +// CHECK: @test12_n +// CHECK-NOT: .noalias +// CHECK: ret i32 + +int test12_lr(int *pA) { + unsigned total = 0; + for (int i = 0; i < 10; ++i) { + int *__restrict p = pA; + p++; + total = total + 1; + } + return total; +} +// CHECK: @test12_lr +// CHECK-NOT: .noalias +// CHECK: ret i32 + +int test12_lR(int *pA) { + unsigned total = 0; + for (int i = 0; i < 10; ++i) { + int *__restrict p; + p = pA; + p++; + total = total + 1; + } + return total; +} +// CHECK: @test12_lR +// CHECK-NOT: .noalias +// CHECK: ret i32 Index: clang/test/CodeGen/restrict/escape_through_volatile.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/escape_through_volatile.c @@ -0,0 +1,31 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +// a volatile pointer can confuse llvm: (p2 depends on p0) +int test_escape_through_volatile_01(int *a_p0) { + int *__restrict p0; + p0 = a_p0; + int *volatile p1 = p0; + int *p2 = p1; + *p0 = 42; + *p2 = 99; + + return *p0; // 42 or 99 +} + +// CHECK: @test_escape_through_volatile_01 +// either a reload or 99, but must never be 42 +// CHECK-NOT: ret i32 42 + +// but not in: +int test_escape_through_volatile_02(int *__restrict p0) { + int *volatile p1 = p0; + int *p2 = p1; + *p0 = 42; + *p2 = 99; + + return *p0; // 42 or 99 +} + +// CHECK: @test_escape_through_volatile_02 +// either a reload or 99, but must never be 42 +// CHECK-NOT: ret i32 42 Index: clang/test/CodeGen/restrict/inlining_01.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/inlining_01.c @@ -0,0 +1,156 @@ +// sfg-check: check resulting chains +// RUN: %clang_cc1 -triple x86_64-apple-darwin -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +// check how restrict propagation wrt inlining works + +//#define INLINE __attribute__((alwaysinline)) +//#define INLINE inline +#define INLINE + +// Variations: +// - n : no restrict +// - a : argument restrict +// - R : local restrict + +#define ARGRESTRICT_n +#define ARGRESTRICT_a __restrict +#define ARGRESTRICT_R + +#define LOCALRESTRICT_n(T, A, B) T A = B +#define LOCALRESTRICT_a(T, A, B) T A = B +#define LOCALRESTRICT_R(T, A, B) T __restrict A = B + +#define CREATE_ALL(CS) \ + CS(n, n) \ + CS(a, n) \ + CS(n, a) \ + CS(a, a) \ + CS(R, n) \ + CS(n, R) \ + CS(R, R) + +#define CREATE_SET(A, B) \ + INLINE int set_##A##B(int *ARGRESTRICT_##A pA, int *ARGRESTRICT_##B pB) { \ + LOCALRESTRICT_##A(int *, lpA, pA); \ + LOCALRESTRICT_##B(int *, lpB, pB); \ + *lpA = 42; \ + *lpB = 99; \ + return *lpA; \ + } + +#define CREATE_CALL_SET1(A, B) \ + int test01_nn_call_set_##A##B(int *pA, int *pB) { \ + set_##A##B(pA, pB); \ + return *pA; \ + } + +#define CREATE_CALL_SET2(A, B) \ + int test02_##A##B##_call_set_##A##B(int *ARGRESTRICT_##A pA, int *ARGRESTRICT_##B pB) { \ + LOCALRESTRICT_##A(int *, lpA, pA); \ + LOCALRESTRICT_##B(int *, lpB, pB); \ + set_##A##B(lpA, lpB); \ + return *lpA; \ + } + +#define CREATE_CALL_SET3(A, B) \ + int test03_##A##B##_call_set_nn(int *ARGRESTRICT_##A pA, int *ARGRESTRICT_##B pB) { \ + LOCALRESTRICT_##A(int *, lpA, pA); \ + LOCALRESTRICT_##B(int *, lpB, pB); \ + set_nn(lpA, lpB); \ + return *lpA; \ + } + +CREATE_ALL(CREATE_SET) +CREATE_ALL(CREATE_CALL_SET1) +CREATE_ALL(CREATE_CALL_SET2) +CREATE_ALL(CREATE_CALL_SET3) + +// CHECK-LABEL: @set_nn( +// CHECK-NOT: ret i32 42 + +// CHECK-LABEL: @set_an( +// CHECK: ret i32 42 + +// CHECK-LABEL: @set_na( +// CHECK: ret i32 42 + +// CHECK_LABEL: @set_aa( +// CHECK: ret i32 42 + +// CHECK-LABEL: @set_Rn( +// CHECK: ret i32 42 + +// CHECK-LABEL: @set_nR( +// CHECK: ret i32 42 + +// CHECK-LABEL: @set_RR( +// CHECK: ret i32 42 + +// CHECK-LABEL: @test01_nn_call_set_nn( +// CHECK-NOT: ret i32 42 + +//@ NOTE: missed store-load propagation +// CHECK-LABEL: @test01_nn_call_set_an( +// CHECK-NOT: ret i32 42 + +//@ NOTE: missed store-load propagation +// CHECK-LABEL: @test01_nn_call_set_na( +// CHECK-NOT: ret i32 42 + +//@ NOTE: missed store-load propagation +// CHECK-LABEL: @test01_nn_call_set_aa( +// CHECK-NOT: ret i32 42 + +//@ NOTE: missed store-load propagation +// CHECK-LABEL: @test01_nn_call_set_Rn( +// CHECK-NOT: ret i32 42 + +//@ NOTE: missed store-load propagation +// CHECK-LABEL: @test01_nn_call_set_nR( +// CHECK-NOT: ret i32 42 + +//@ NOTE: missed store-load propagation +// CHECK-LABEL: @test01_nn_call_set_RR( +// CHECK-NOT: ret i32 42 + +// CHECK-LABEL: @test02_nn_call_set_nn( +// CHECK-NOT: ret i32 42 + +// CHECK-LABEL: @test02_an_call_set_an( +// CHECK: ret i32 42 + +// CHECK-LABEL: @test02_na_call_set_na( +// CHECK: ret i32 42 + +// CHECK-LABEL: @test02_aa_call_set_aa( +// CHECK: ret i32 42 + +// CHECK-LABEL: @test02_Rn_call_set_Rn( +// CHECK: ret i32 42 + +// CHECK-LABEL: @test02_nR_call_set_nR( +// CHECK: ret i32 42 + +// CHECK-LABEL: @test02_RR_call_set_RR( +// CHECK: ret i32 42 + +// CHECK-LABEL: @test03_nn_call_set_nn( +// CHECK-NOT: ret i32 42 + +// CHECK-LABEL: @test03_an_call_set_nn( +// CHECK: ret i32 42 + +// CHECK-LABEL: @test03_na_call_set_nn( +// CHECK: ret i32 42 + +// CHECK-LABEL: @test03_aa_call_set_nn( +// CHECK: ret i32 42 + +// CHECK-LABEL: @test03_Rn_call_set_nn( +// CHECK: ret i32 42 + +// CHECK-LABEL: @test03_nR_call_set_nn( +// CHECK: ret i32 42 + +// CHECK-LABEL: @test03_RR_call_set_nn( +// CHECK: ret i32 42 Index: clang/test/CodeGen/restrict/inlining_02.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/inlining_02.c @@ -0,0 +1,144 @@ +// sfg-check: check resulting chains +// RUN: %clang_cc1 -triple x86_64-apple-darwin -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +// Base test for restrict propagation with inlining + +//#define INLINE __attribute__((alwaysinline)) +//#define INLINE inline +#define INLINE + +void set_nnn(int *pA, int *pB, int *dummy) { + int *lpA; + lpA = pA; + int *lpB; + lpB = pB; + int *ldummy; + ldummy = dummy; + *lpA = 42; + *lpB = 43; + + *ldummy = 99; +} +// CHECK-LABEL: @set_nnn( + +void set_nna(int *pA, int *pB, int *__restrict dummy) { + int *lpA; + lpA = pA; + int *lpB; + lpB = pB; + int *ldummy; + ldummy = dummy; + *lpA = 42; + *lpB = 43; + + *ldummy = 99; +} +// CHECK-LABEL: @set_nna( + +void set_nnr(int *pA, int *pB, int *dummy) { + int *lpA; + lpA = pA; + int *lpB; + lpB = pB; + int *__restrict ldummy; + ldummy = dummy; + + *lpA = 42; + *lpB = 43; + + *ldummy = 99; +} +// CHECK-LABEL: @set_nnr( + +int test_rr_nnn(int *pA, int *pB, int *dummy) { + int *__restrict lpA; + lpA = pA; + int *__restrict lpB; + lpB = pB; + + set_nnn(lpA, lpB, dummy); + return *lpA; +} + +// CHECK-LABEL: @test_rr_nnn( +// CHECK: ret i32 42 + +int test_rr_nna(int *pA, int *pB, int *dummy) { + int *__restrict lpA; + lpA = pA; + int *__restrict lpB; + lpB = pB; + + set_nna(lpA, lpB, dummy); + return *lpA; +} + +// CHECK-LABEL: @test_rr_nna( +// CHECK: ret i32 42 + +int test_rr_nnr(int *pA, int *pB, int *dummy) { + int *__restrict lpA; + lpA = pA; + int *__restrict lpB; + lpB = pB; + + set_nnr(lpA, lpB, dummy); + return *lpA; +} + +// CHECK-LABEL: @test_rr_nnr( +// CHECK: ret i32 42 + +// ----------------------------------------------------------- + +int test_rr_local_nnn(int *pA, int *pB, int *dummy) { + int *__restrict lpA; + lpA = pA; + int *__restrict lpB; + lpB = pB; + int *ldummy; + ldummy = dummy; + + *lpA = 10; + { + int *l2pA; + l2pA = lpA; + int *l2pB; + l2pB = lpB; + int *l2dummy; + l2dummy = ldummy; + *l2pA = 42; + *l2pB = 43; + + *l2dummy = 99; + } + return *lpA; +} +// CHECK-LABEL: @test_rr_local_nnn( +// CHECK: ret i32 42 + +int test_rr_local_nnr(int *pA, int *pB, int *dummy) { + int *__restrict lpA; + lpA = pA; + int *__restrict lpB; + lpB = pB; + int *ldummy; + ldummy = dummy; + + *lpA = 10; + { + int *l2pA; + l2pA = lpA; + int *l2pB; + l2pB = lpB; + int *__restrict l2dummy; + l2dummy = ldummy; + *l2pA = 42; + *l2pB = 43; + + *l2dummy = 99; + } + return *lpA; +} +// CHECK-LABEL: @test_rr_local_nnr( +// CHECK: ret i32 42 Index: clang/test/CodeGen/restrict/provenance.noalias_reduction_01.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/provenance.noalias_reduction_01.c @@ -0,0 +1,197 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +// Check that unnecessary llvm.provenance.noalias calls are collapsed + +int *test01(int *p, int n) { + int *__restrict rp; + rp = p; + for (int i = 0; i < n; ++i) { + *rp = 10; + rp++; + rp++; + rp++; + rp++; + } + return rp; +} + +// CHECK-LABEL: @test01( +// CHECK: = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64 +// CHECK: = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64 +// CHECK: = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64 +// CHECK: = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64 +// CHECK-NOT: llvm.provenance.noalias + +int *test02(int *p, int n) { + int *__restrict rp; + rp = p; + rp++; + rp++; + rp++; + rp++; + for (int i = 0; i < n; ++i) { + *rp = 10; + rp++; + rp++; + rp++; + rp++; + } + return rp; +} + +// CHECK-LABEL: @test02( +// CHECK: = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64 +// CHECK: = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64 +// CHECK-NOT: llvm.provenance.noalias + +int *test03(int *p, int n) { + int *__restrict rp; + rp = p; + rp++; + rp++; + rp++; + rp++; + for (int i = 0; i < n; ++i) { + *rp = 10; + rp++; + rp++; + if (*rp == 42) { + rp++; + rp++; + } + rp++; + rp++; + } + return rp; +} + +// CHECK-LABEL: @test03( +// CHECK: = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64 +// CHECK: = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64 +// CHECK-NOT: llvm.provenance.noalias + +int *test04(int *p, int n) { + int *__restrict rp; + rp = p; + rp++; + rp++; + rp++; + rp++; + for (int i = 0; i < n; ++i) { + *rp = 10; + rp++; + rp++; + switch (*rp) { + default: + rp++; + case 10: + rp++; + case 20: + rp++; + case 30: + rp++; + break; + } + if (*rp == 42) { + rp++; + rp++; + } + rp++; + rp++; + } + return rp; +} + +// CHECK-LABEL: @test04( +// CHECK: = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64 +// CHECK: = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64 +// CHECK-NOT: llvm.provenance.noalias + +int *test05(int *p, int n) { + int *__restrict rp; + rp = p; + rp++; + rp++; + rp++; + rp++; + for (int i = 0; i < n; ++i) { + *rp = 10; + rp++; + rp++; + switch (*rp) { + default: + rp++; + case 10: + rp++; + case 20: + rp++; + case 30: + rp++; + break; + } + if (*rp == 42) { + rp++; + rp++; + } + rp++; + rp++; + } + return rp; +} + +// CHECK-LABEL: @test05( +// CHECK: = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64 +// CHECK: = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64 +// CHECK-NOT: llvm.provenance.noalias + +int *test06(int *p, int n) { + int *__restrict rp1; + rp1 = p; + // llvm.provenance.noalias rp1 (p) + + { + int *__restrict rp; + rp = p; + // llvm.provenance.noalias rp (p) + // llvm.provenance.noalias rp (rp1) + rp++; + rp++; + rp++; + rp++; + for (int i = 0; i < n; ++i) { + *rp = 10; + rp = rp1; + rp++; + rp++; + + switch (*rp) { + default: + rp++; + case 10: + rp++; + case 20: + rp++; + case 30: + rp++; + break; + } + if (*rp == 42) { + rp++; + rp++; + } + rp++; + rp++; + } + return rp; + } +} + +// CHECK-LABEL: @test06( +// CHECK: = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64 +// CHECK: = tail call i8* @llvm.noalias.decl.p0i8.p0p0i32.i64 +// CHECK: = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64 +// CHECK: = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64 +// CHECK: = tail call i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64 +// CHECK-NOT: llvm.provenance.noalias + +// CHECK: declare i32* @llvm.provenance.noalias.p0i32.p0i8.p0p0i32.p0p0i32.i64 Index: clang/test/CodeGen/restrict/struct.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/struct.c @@ -0,0 +1,138 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -O1 -disable-llvm-optzns -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +int r; +void ex1(int *); + +struct FOO { + int *restrict rp0; + int *restrict rp1; + int *restrict rp2; +}; + +void test_FOO_local(int *pA, int *pB, int *pC) { + struct FOO tmp = {pA, pB, pC}; + *tmp.rp0 = 42; + *tmp.rp1 = 43; +} +// CHECK-LABEL: void @test_FOO_local( +// CHECK: [[tmp:%.*]] = alloca %struct.FOO, align 8 +// CHECK: [[TMP1:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FOOs.i64(%struct.FOO* [[tmp]], i64 0, metadata [[TAG_6:!.*]]) +// CHECK: [[rp0:%.*]] = getelementptr inbounds %struct.FOO, %struct.FOO* [[tmp]], i32 0, i32 0 +// CHECK: [[rp01:%.*]] = getelementptr inbounds %struct.FOO, %struct.FOO* [[tmp]], i32 0, i32 0 +// CHECK: [[TMP5:%.*]] = load i32*, i32** [[rp01]], align 8, !tbaa [[TAG_9:!.*]], !noalias [[TAG_6]] +// CHECK: [[TMP6:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP5]], i8* [[TMP1]], i32** [[rp01]], i64 0, metadata [[TAG_6]]), !tbaa [[TAG_9]], !noalias [[TAG_6]] +// CHECK: store i32 42, i32* [[TMP6]], align 4, !tbaa [[TAG_13:!.*]], !noalias [[TAG_6]] +// CHECK: [[rp12:%.*]] = getelementptr inbounds %struct.FOO, %struct.FOO* [[tmp]], i32 0, i32 1 +// CHECK: [[TMP7:%.*]] = load i32*, i32** [[rp12]], align 8, !tbaa [[TAG_11:!.*]], !noalias [[TAG_6]] +// CHECK: [[TMP8:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP7]], i8* [[TMP1]], i32** [[rp12]], i64 0, metadata [[TAG_6]]), !tbaa [[TAG_11]], !noalias [[TAG_6]] +// CHECK: store i32 43, i32* [[TMP8]], align 4, !tbaa [[TAG_13]], !noalias [[TAG_6]] +// CHECK: ret void + +void test_FOO_arg_pointer(struct FOO *p) { + *p->rp0 = 42; + *p->rp1 = 43; +} + +// define void @test_FOO_arg_pointer(%struct.FOO* %p) #0 !noalias !15 { +// CHECK: void @test_FOO_arg_pointer(%struct.FOO* [[p:%.*]]) #0 !noalias [[TAG_15:!.*]] { +// CHECK: [[p_addr:%.*]] = alloca %struct.FOO*, align 8 +// CHECK-NEXT: store %struct.FOO* [[p]], %struct.FOO** [[p_addr]], align 8, !tbaa [[TAG_2:!.*]], !noalias [[TAG_15]] +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.FOO*, %struct.FOO** [[p_addr]], align 8, !tbaa [[TAG_2]], !noalias [[TAG_15]] +// CHECK-NEXT: [[rp0:%.*]] = getelementptr inbounds %struct.FOO, %struct.FOO* [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP1:%.*]] = load i32*, i32** [[rp0]], align 8, !tbaa [[TAG_9:!.*]], !noalias [[TAG_15]] +// CHECK-NEXT: [[TMP2:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP1]], i8* null, i32** [[rp0]], i64 0, metadata [[TAG_15]]), !tbaa [[TAG_9]], !noalias [[TAG_15]] +// CHECK-NEXT: store i32 42, i32* [[TMP2]], align 4, !tbaa [[TAG_13:!.*]], !noalias [[TAG_15]] +// CHECK-NEXT: [[TMP3:%.*]] = load %struct.FOO*, %struct.FOO** [[p_addr]], align 8, !tbaa [[TAG_2]], !noalias [[TAG_15]] +// CHECK-NEXT: [[rp1:%.*]] = getelementptr inbounds %struct.FOO, %struct.FOO* [[TMP3]], i32 0, i32 1 +// CHECK-NEXT: [[TMP4:%.*]] = load i32*, i32** [[rp1]], align 8, !tbaa [[TAG_11:!.*]], !noalias [[TAG_15]] +// CHECK-NEXT: [[TMP5:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP4]], i8* null, i32** [[rp1]], i64 0, metadata [[TAG_15]]), !tbaa [[TAG_11]], !noalias [[TAG_15]] +// CHECK-NEXT: store i32 43, i32* [[TMP5]], align 4, !tbaa [[TAG_13]], !noalias [[TAG_15]] +// CHECK-NEXT: ret void + +void test_FOO_arg_value(struct FOO p) { + *p.rp0 = 42; + *p.rp1 = 43; +} +// NOTE: the struct is mapped 'byval', the scope will be introduced after inlining. + +// define void @test_FOO_arg_value(%struct.FOO* byval(%struct.FOO) align 8 %p) #0 !noalias !18 { +// CHECK: void @test_FOO_arg_value(%struct.FOO* byval(%struct.FOO) align 8 %p) #0 !noalias [[TAG_18:!.*]] { +// CHECK: [[rp0:%.*]] = getelementptr inbounds %struct.FOO, %struct.FOO* [[p:%.*]], i32 0, i32 0 +// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[rp0]], align 8, !tbaa [[TAG_9:!.*]], !noalias [[TAG_18]] +// CHECK-NEXT: [[TMP1:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP0]], i8* null, i32** [[rp0]], i64 0, metadata [[TAG_18]]), !tbaa [[TAG_9]], !noalias [[TAG_18]] +// CHECK-NEXT: store i32 42, i32* [[TMP1]], align 4, !tbaa [[TAG_13:!.*]], !noalias [[TAG_18]] +// CHECK-NEXT: [[rp1:%.*]] = getelementptr inbounds %struct.FOO, %struct.FOO* [[p]], i32 0, i32 1 +// CHECK-NEXT: [[TMP2:%.*]] = load i32*, i32** [[rp1]], align 8, !tbaa [[TAG_11:!.*]], !noalias [[TAG_18]] +// CHECK-NEXT: [[TMP3:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP2]], i8* null, i32** [[rp1]], i64 0, metadata [[TAG_18]]), !tbaa [[TAG_11]], !noalias [[TAG_18]] +// CHECK-NEXT: store i32 43, i32* [[TMP3]], align 4, !tbaa [[TAG_13]], !noalias [[TAG_18]] +// CHECK-NEXT: ret void + +struct FOO test_FOO_pass(struct FOO p) { + return p; +} + +// define void @test_FOO_pass(%struct.FOO* noalias sret align 8 %agg.result, %struct.FOO* byval(%struct.FOO) align 8 %p) #0 !noalias !21 { +// CHECK: void @test_FOO_pass(%struct.FOO* noalias sret align 8 %agg.result, %struct.FOO* byval(%struct.FOO) align 8 %p) #0 !noalias [[TAG_21:!.*]] { +// CHECK: [[TMP0:%.*]] = call %struct.FOO* @llvm.noalias.copy.guard.p0s_struct.FOOs.p0i8(%struct.FOO* [[p:%.*]], i8* null, metadata [[TAG_24:!.*]], metadata [[TAG_21]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast %struct.FOO* [[agg_result:%.*]] to i8* +// CHECK-NEXT: [[TMP2:%.*]] = bitcast %struct.FOO* [[TMP0]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i64 24, i1 false), !tbaa.struct [[TAG_28:!.*]], !noalias [[TAG_21]] +// CHECK-NEXT: ret void + +struct FUM { + struct FOO m; +}; + +void test_FUM_local(int *pA, int *pB, int *pC) { + struct FUM tmp = {{pA, pB, pC}}; + *tmp.m.rp0 = 42; + *tmp.m.rp1 = 43; +} + +// CHECK-LABEL: void @test_FUM_local( +// CHECK: [[TMP1:%.*]] = call i8* @llvm.noalias.decl.p0i8.p0s_struct.FUMs.i64(%struct.FUM* [[tmp]], i64 0, metadata [[TAG_29:!.*]]) +// CHECK: [[TMP6:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP5:%.*]], i8* [[TMP1]], i32** [[rp02:%.*]], i64 0, metadata [[TAG_29]]), !tbaa [[TAG_32:!.*]], !noalias [[TAG_29]] +// CHECK: [[TMP8:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP7:%.*]], i8* [[TMP1]], i32** [[rp14:%.*]], i64 0, metadata [[TAG_29]]), !tbaa [[TAG_34:!.*]], !noalias [[TAG_29]] + +void test_FUM_arg_pointer(struct FUM *p) { + *p->m.rp0 = 42; + *p->m.rp1 = 43; +} +// define void @test_FUM_arg_pointer(%struct.FUM* %p) #0 !noalias !35 { +// CHECK: void @test_FUM_arg_pointer(%struct.FUM* [[p:%.*]]) #0 !noalias [[TAG_35:!.*]] { +// CHECK: [[TMP2:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP1:%.*]], i8* null, i32** [[rp0:%.*]], i64 0, metadata [[TAG_35]]), !tbaa [[TAG_32:!.*]], !noalias [[TAG_35]] +// CHECK: [[TMP5:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP4:%.*]], i8* null, i32** [[rp1:%.*]], i64 0, metadata [[TAG_35]]), !tbaa [[TAG_34:!.*]], !noalias [[TAG_35]] + +void test_FUM_arg_value(struct FUM p) { + *p.m.rp0 = 42; + *p.m.rp1 = 43; +} + +// define void @test_FUM_arg_value(%struct.FUM* byval(%struct.FUM) align 8 %p) #0 !noalias !38 { +// CHECK: void @test_FUM_arg_value(%struct.FUM* byval(%struct.FUM) align 8 %p) #0 !noalias [[TAG_38:!.*]] { +// CHECK: [[TMP1:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP0:%.*]], i8* null, i32** [[rp0:%.*]], i64 0, metadata [[TAG_38]]), !tbaa [[TAG_32:!.*]], !noalias [[TAG_38]] +// CHECK: [[TMP3:%.*]] = call i32* @llvm.noalias.p0i32.p0i8.p0p0i32.i64(i32* [[TMP2:%.*]], i8* null, i32** [[rp1:%.*]], i64 0, metadata [[TAG_38]]), !tbaa [[TAG_34:!.*]], !noalias [[TAG_38]] + +struct FUM test_FUM_pass(struct FUM p) { + return p; +} + +// define void @test_FUM_pass(%struct.FUM* noalias sret align 8 %agg.result, %struct.FUM* byval(%struct.FUM) align 8 %p) #0 !noalias !41 { +// CHECK: void @test_FUM_pass(%struct.FUM* noalias sret align 8 %agg.result, %struct.FUM* byval(%struct.FUM) align 8 %p) #0 !noalias [[TAG_41:!.*]] { +// CHECK: [[TMP0:%.*]] = call %struct.FUM* @llvm.noalias.copy.guard.p0s_struct.FUMs.p0i8(%struct.FUM* [[p:%.*]], i8* null, metadata [[TAG_44:!.*]], metadata [[TAG_41]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast %struct.FUM* [[agg_result:%.*]] to i8* +// CHECK-NEXT: [[TMP2:%.*]] = bitcast %struct.FUM* [[TMP0]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i64 24, i1 false), !tbaa.struct [[TAG_28:!.*]], !noalias [[TAG_41]] +// CHECK-NEXT: ret void + +// indices for llvm.noalias.copy.guard + +// CHECK: [[TAG_24]] = !{[[TAG_25:!.*]], [[TAG_26:!.*]], [[TAG_27:!.*]]} +// CHECK: [[TAG_25]] = !{i32 -1, i32 0} +// CHECK: [[TAG_26]] = !{i32 -1, i32 1} +// CHECK: [[TAG_27]] = !{i32 -1, i32 2} + +// CHECK: [[TAG_44]] = !{[[TAG_45:!.*]], [[TAG_46:!.*]], [[TAG_47:!.*]]} +// CHECK: [[TAG_45]] = !{i32 -1, i32 0, i32 0} +// CHECK: [[TAG_46]] = !{i32 -1, i32 0, i32 1} +// CHECK: [[TAG_47]] = !{i32 -1, i32 0, i32 2} Index: clang/test/CodeGen/restrict/struct_member_01.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/struct_member_01.c @@ -0,0 +1,73 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +struct FOO { + int *__restrict pA; + int *__restrict pB; +}; + +int test00a(int *pA, int *pB) { + int *__restrict rpA; + int *__restrict rpB; + rpA = pA; + rpB = pB; + + *rpA = 42; + *rpB = 43; + return *rpA; +} + +// CHECK-LABEL: @test00a( +// CHECK: ret i32 42 + +int test00b(int *pA, int *pB) { + int *__restrict rp[2]; + rp[0] = pA; + rp[1] = pB; + + *rp[0] = 42; + *rp[1] = 43; + return *rp[0]; +} + +// CHECK-LABEL: @test00b( +// CHECK: ret i32 42 + +int test01(struct FOO *p0, struct FOO *p1) { + *p0->pA = 42; + *p1->pA = 43; + + return *p0->pA; // 42 or 43 +} +// CHECK-LABEL: @test01( +// CHECK-NOT: ret i32 42 + +int test11(struct FOO *p0, struct FOO *p1) { + *p0->pA = 42; + *p1->pB = 43; + + return *p0->pA; // 42 +} + +// CHECK-LABEL: @test11( +// CHECK: ret i32 42 + +int test21(struct FOO p0, struct FOO p1) { + *p0.pA = 42; + *p1.pB = 43; + + return *p0.pA; // 42 +} + +// CHECK-LABEL: @test21( +// CHECK: ret i32 42 + +int test31(struct FOO *p0, struct FOO *__restrict p1) { + *p0->pA = 42; + *p1->pA = 43; + + return *p0->pA; // 42 +} + +// CHECK-LABEL: @test31( +// CHECK: ret i32 42 Index: clang/test/CodeGen/restrict/struct_member_02.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/struct_member_02.c @@ -0,0 +1,110 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -ffull-restrict -disable-llvm-passes %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -O2 -ffull-restrict -disable-llvm-passes %s -emit-llvm -o - | FileCheck %s + +//#define __restrict volatile + +// this test checks for what variables a restrict scope is created + +struct FOO_ipp { + int **p; +}; + +struct FOO_irpp { + int *__restrict *p; +}; + +struct FOO_iprp { + int dummy; + int **__restrict p; +}; + +struct FOO_irprp { + int *__restrict *__restrict p; +}; + +struct FOO_NESTED { + struct FOO_iprp m; +}; + +struct FOO_NESTED_A { + struct FOO_iprp m[2][3][4]; +}; + +typedef struct FOO_NESTED FUM; +typedef int *__restrict t_irp; + +int foo(int **p) { + struct FOO_ipp m1; // no + struct FOO_irpp m2; // no + struct FOO_iprp m3; // yes + struct FOO_irprp m4; // yes + struct FOO_NESTED m5; // yes + struct FOO_NESTED m6[2][4][5]; // yes + struct FOO_NESTED_A m7[2][4][5]; // yes + t_irp p0; // yes + FUM m8[3]; // yes + int *a1[2]; // no + int **a2[2]; // no + int *__restrict *a3[2]; // no + int **__restrict a4[2]; // yes + int **__restrict a5[2][3][4]; // yes + int *__restrict *a6; // no + m1.p = p; + m2.p = p; + m3.p = p; + m4.p = p; + a1[0] = *p; + a1[1] = *p; + + return **m1.p + **m2.p + **m3.p + **m4.p + *a1[0] + *a1[1]; +} + +// check the scopes of various variables +// CHECK-LABEL: @foo( + +// the local variables: +// CHECK: alloca +// CHECK: alloca +// CHECK: alloca +// CHECK: alloca +// CHECK: alloca +// CHECK: alloca +// CHECK: alloca +// CHECK: alloca +// CHECK: alloca +// CHECK: alloca +// CHECK: alloca +// CHECK: alloca +// CHECK: alloca +// CHECK: alloca +// CHECK: alloca +// CHECK: alloca +// CHECK-NOT: alloca + +// the local variables that have a restrict scope: +// CHECK: llvm.noalias.decl +// CHECK: llvm.noalias.decl +// CHECK: llvm.noalias.decl +// CHECK: llvm.noalias.decl +// CHECK: llvm.noalias.decl +// CHECK: llvm.noalias.decl +// CHECK: llvm.noalias.decl +// CHECK: llvm.noalias.decl +// CHECK: llvm.noalias.decl +// CHECK-NOT: llvm.noalias.decl +// CHECK: ret i32 + +// the restrict related metadata +// CHECK: foo: unknown scope +// CHECK-NEXT: foo +// CHECK-NOT: foo: +// CHECK: foo: m3 +// CHECK-NEXT: foo: m4 +// CHECK-NEXT: foo: m5 +// CHECK-NEXT: foo: m6 +// CHECK-NEXT: foo: m7 +// CHECK-NEXT: foo: p0 +// CHECK-NEXT: foo: m8 +// CHECK-NEXT: foo: a4 +// CHECK-NEXT: foo: a5 +// CHECK-NOT: foo: Index: clang/test/CodeGen/restrict/struct_member_03.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/struct_member_03.c @@ -0,0 +1,100 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +struct FOO { + int *__restrict pA; + int *__restrict pB; +}; + +int test10(int *pA, int *pB) { + struct FOO rp; + rp.pA = pA; + rp.pB = pB; + + *rp.pA = 42; + *rp.pB = 99; + + return *rp.pA; //42 +} +// CHECK-LABEL: @test10( +// CHECK: ret i32 42 + +int test11(struct FOO rp) { + *rp.pA = 42; + *rp.pB = 99; + + return *rp.pA; //42 +} +// CHECK-LABEL: @test11( +// CHECK: ret i32 42 + +int test12(struct FOO *rp) { + *rp->pA = 42; + *rp->pB = 99; + + return *rp->pA; //42 +} +// CHECK-LABEL: @test12( +// CHECK: ret i32 42 + +int test20(int *pA, int *pB) { + struct FOO rp0; + struct FOO rp1; + rp0.pB = pA; + rp1.pB = pB; + + *rp0.pB = 42; + *rp1.pB = 99; + + return *rp0.pB; //42 +} +// CHECK-LABEL: @test20( +// CHECK: ret i32 42 + +int test21(struct FOO rp0, struct FOO rp1) { + *rp0.pB = 42; + *rp1.pB = 99; + + return *rp0.pB; //42 +} +// CHECK-LABEL: @test21( +// CHECK: ret i32 42 + +int test22(struct FOO *rp0, struct FOO *rp1) { + *rp0->pB = 42; + *rp1->pB = 99; + + return *rp0->pB; // needs load +} +// CHECK-LABEL: @test22( +// CHECK-NOT: ret i32 42 +// CHECK-NOT: ret i32 99 + +int test23(struct FOO *rp0, struct FOO *rp1) { + *rp0->pA = 42; + *rp1->pB = 99; + + return *rp0->pA; // 42, rp0->pA and rp1->pB are not overlapping +} +// CHECK-LABEL: @test23( +// CHECK: ret i32 42 + +int test24(struct FOO *__restrict rp0, struct FOO *rp1) { + *rp0->pB = 42; + *rp1->pB = 99; + + return *rp0->pB; // 42 +} +// CHECK-LABEL: @test24( +// CHECK: ret i32 42 + +int test25(struct FOO *p0, struct FOO *rp1) { + struct FOO *__restrict rp0; + rp0 = p0; + *rp0->pB = 42; + *rp1->pB = 99; + + return *rp0->pB; // 42 +} +// CHECK-LABEL: @test25( +// CHECK: ret i32 42 Index: clang/test/CodeGen/restrict/struct_member_04.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/struct_member_04.c @@ -0,0 +1,168 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +// restrict member pointers and inlining - basic functionality test + +struct FOO { + int *__restrict pA; + int *__restrict pB; +}; + +void setFOO(struct FOO *p) { + *p->pA = 42; + *p->pB = 43; +} + +int test10(int *pA, int *pB, int *pC) { + + *pA = 40; + { + struct FOO rp; + rp.pA = pA; + rp.pB = pB; + + setFOO(&rp); + + *pC = 99; + return *rp.pA; // 42 + } +} + +// CHECK-LABEL: @test10( +// CHECK: ret i32 42 + +int test11(int *pA, int *pB, int *pC) { + + *pA = 40; + { + struct FOO rp; + rp.pA = pA; + rp.pB = pB; + + setFOO(&rp); + + *pC = 99; + } + return *pA; // 42 // should be, but llvm does not see it +} + +// CHECK-LABEL: @test11( +// CHECK-NOT: ret i32 40 +// CHECK-NOT: ret i32 42 +// CHECK-NOT: ret i32 43 +// CHECK-NOT: ret i32 99 + +int test12(int *pA, int *pB, int *pC) { + + *pA = 40; + { + struct FOO rp; + rp.pA = pA; + rp.pB = pB; + + setFOO(&rp); + } + + *pC = 99; + return *pA; // 42 or 99 +} + +// CHECK-LABEL: @test12( +// CHECK-NOT: ret i32 40 +// CHECK-NOT: ret i32 42 +// CHECK-NOT: ret i32 43 +// CHECK-NOT: ret i32 99 + +// out-of-function scope +int getFOO(struct FOO *p) { + return *p->pA; +} + +// fully defined +int test20(int *pA, int *pC) { + *pA = 42; + { + struct FOO rp; + rp.pA = pA; + *pC = 99; + return getFOO(&rp); + } +} +// CHECK-LABEL: @test20( +// CHECK: ret i32 42 + +// fully defined +int test21(int *pA, int *pC) { + *pA = 42; + *pC = 99; + { + struct FOO rp; + rp.pA = pA; + return getFOO(&rp); + } +} + +// CHECK-LABEL: @test21( +// CHECK: ret i32 % + +// mixed defined +int test22(int *pA, struct FOO *pB0, int b0, int *pC) { + *pA = 42; + { + struct FOO rp; + rp.pA = pA; + *pC = 99; + return getFOO(b0 ? &rp : pB0); + } +} +// CHECK-LABEL: @test22( +// CHECK: ret i32 % + +// mixed-mixed defined +int test23(int *pA, struct FOO *pB0, int b0, struct FOO *pB1, int b1, int *pC) { + *pA = 41; + { + struct FOO rp; + rp.pA = pA; + *pC = 98; + return test22(pA, b1 ? &rp : pB0, b0, pC); + } +} +// CHECK-LABEL: @test23( +// CHECK: ret i32 % + +// fully defined +int test24(int *pA, int *pB0, int b0, int *pB1, int b1, int *pC) { + *pA = 40; + { + struct FOO fb0; + fb0.pA = pB0; + { + struct FOO fb1; + fb1.pA = pB1; + + return test23(pA, &fb0, b0, &fb1, b1, pC); + } + } +} + +// CHECK-LABEL: @test24( +// CHECK: ret i32 % + +int test25(int *pA, int b0, int b1, int *pC) { + *pA = 40; + { + struct FOO fb0; + fb0.pA = pA; + { + struct FOO fb1; + fb1.pA = pA; + + return test23(pA, &fb0, b0, &fb1, b1, pC); + } + } +} + +// CHECK-LABEL: @test25( +// FIXME: should be: ret i32 42 +// CHECK: ret i32 % Index: clang/test/CodeGen/restrict/struct_member_05.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/struct_member_05.c @@ -0,0 +1,107 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +// restrict member pointers and inlining - basic functionality test + +struct FOO { + int *__restrict pA; + int *__restrict pB; +}; + +struct FOO_plain { + int *pA; + int *pB; +}; + +int test10(int *pA, int *pB, int *pC) { + + *pA = 40; + { + struct FOO rp; + rp.pA = pA; + rp.pB = pB; + + { + struct FOO *p = &rp; + *p->pA = 42; + *p->pB = 43; + } + + *pC = 99; + return *rp.pA; // 42 + } +} + +// CHECK-LABEL: @test10( +// CHECK: ret i32 42 + +int test11(int *pA, int *pB, int *pC) { + + *pA = 40; + { + struct FOO rp; + rp.pA = pA; + rp.pB = pB; + + { + *rp.pA = 42; + *rp.pB = 43; + } + + *pC = 99; + return *rp.pA; // 42 + } +} + +// CHECK-LABEL: @test11( +// CHECK: ret i32 42 + +int test12a(int *pA, int *pB, int *pC, struct FOO *pF) { + + *pA = 40; + { + struct FOO rp; + rp.pA = pA; + rp.pB = pB; + + { + struct FOO *p = pF ? pF : &rp; + *p->pA = 42; + *p->pB = 43; + } + + *pC = 99; + return *rp.pA; // 42 or 40 + } +} + +// CHECK-LABEL: @test12a( +// CHECK-NOT: ret i32 40 +// CHECK-NOT: ret i32 42 +// CHECK-NOT: ret i32 43 +// CHECK-NOT: ret i32 99 + +int test12b(int *pA, int *pB, int *pC, struct FOO_plain *pF) { + + *pA = 40; + { + struct FOO_plain rp; + rp.pA = pA; + rp.pB = pB; + + { + struct FOO_plain *p = pF ? pF : &rp; + *p->pA = 42; + *p->pB = 43; + } + + *pC = 99; + return *rp.pA; // 42 or 40 or 99 or ... + } +} + +// CHECK-LABEL: @test12b( +// CHECK-NOT: ret i32 40 +// CHECK-NOT: ret i32 42 +// CHECK-NOT: ret i32 43 +// CHECK-NOT: ret i32 99 Index: clang/test/CodeGen/restrict/struct_member_06.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/struct_member_06.c @@ -0,0 +1,108 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +// restrict member pointers and inlining - basic functionality test + +struct FOO { + int *__restrict p; +}; + +struct FOO_plain { + int *pA; + int *pB; +}; + +int test01_p_pp(int c, int *pA, int *pB) { + int *__restrict rpA; + rpA = pA; + int *__restrict rpB; + rpB = pB; + + int *p = c ? rpA : rpB; + + return *p; +} +// CHECK-LABEL: @test01_p_pp( +// CHECK: @llvm.noalias.decl +// CHECK-NEXT: @llvm.noalias.decl +// CHECK-NEXT: icmp +// CHECK-NEXT: @llvm.provenance.noalias +// CHECK-NEXT: @llvm.provenance.noalias +// CHECK-NEXT: select +// CHECK-NEXT: select +// CHECK-NEXT: load +// CHECK-NEXT: ret i32 + +int test01_p_ss(int c, int *pA, int *pB) { + struct FOO spA; + spA.p = pA; + struct FOO spB; + spB.p = pB; + + int *p = c ? spA.p : spB.p; + + return *p; +} +// CHECK-LABEL: @test01_p_ss( +// CHECK: @llvm.noalias.decl +// CHECK-NEXT: @llvm.noalias.decl +// CHECK-NEXT: icmp +// CHECK-NEXT: @llvm.provenance.noalias +// CHECK-NEXT: @llvm.provenance.noalias +// CHECK-NEXT: select +// CHECK-NEXT: select +// CHECK-NEXT: load +// CHECK-NEXT: ret i32 + +int test01_s_ss(int c, int *pA, int *pB) { + struct FOO spA; + spA.p = pA; + struct FOO spB; + spB.p = pB; + + { + struct FOO p = c ? spA : spB; + + return *p.p; + } +} +// CHECK-LABEL: @test01_s_ss( +// CHECK: @llvm.noalias.decl +// CHECK-NEXT: @llvm.noalias.decl +// CHECK-NEXT: @llvm.noalias.decl +// CHECK-NEXT: icmp +// CHECK-NEXT: @llvm.provenance.noalias +// CHECK-NEXT: @llvm.provenance.noalias +// CHECK-NEXT: select +// CHECK-NEXT: select +// CHECK-NEXT: @llvm.provenance.noalias +// CHECK-NEXT: load +// CHECK-NEXT: ret i32 + +// FIXME: this one currently results in bad code :( +int test01_ps_ss(int c, int *pA, int *pB) { + struct FOO spA; + spA.p = pA; + struct FOO spB; + spB.p = pB; + + struct FOO *p = c ? &spA : &spB; + + return *p->p; +} +// CHECK-LABEL: @test01_ps_ss( +// CHECK: ret i32 + +int test01_ps_psps(int c, struct FOO *ppA, struct FOO *ppB) { + struct FOO *p = c ? ppA : ppB; + + return *p->p; +} +// CHECK-LABEL: @test01_ps_psps( +// CHECK: icmp +// CHECK-NEXT: select +// CHECK-NEXT: getelementptr +// CHECK-NEXT: load +// CHECK-NEXT: llvm.provenance.noalias +// CHECK-NEXT: load +// CHECK-NEXT: ret i32 Index: clang/test/CodeGen/restrict/struct_member_07.c =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/struct_member_07.c @@ -0,0 +1,60 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s + +struct FOO { + int *__restrict pA; +}; + +struct BAR { + int *pA; +}; + +static void adaptFOO(struct FOO *p) { + *p->pA = 99; +}; + +static void adaptBAR(struct BAR *p) { + *p->pA = 99; +}; + +static void adaptInt(int *p) { + *p = 99; +}; + +// has 'unknown scope': caller: no, callee: yes +int test10(int *pA, struct FOO *pB) { + *pA = 42; + adaptFOO(pB); + return *pA; +} +// CHECK-LABEL: @test10( +// CHECK: ret i32 42 + +// has 'unknown scope': caller: yes, callee: no +int test11(int *pA, struct FOO *pB) { + *pB->pA = 42; + adaptInt(pA); + return *pB->pA; +} +// CHECK-LABEL: @test11( +// CHECK: ret i32 42 + +// has 'unknown scope': caller: no, callee: no +int test12(int *pA, struct BAR *pB) { + *pA = 42; + adaptBAR(pB); + return *pA; +} +// CHECK-LABEL: @test12( +// CHECK-NOT: ret i32 42 + +// has 'unknown scope': caller: yes, callee: yes +int test13(int *pA, struct FOO *pB) { + *pB->pA = 41; // introduce 'unknown scope' + *pA = 42; + adaptFOO(pB); + return *pA; +} + +// CHECK-LABEL: @test13( +// CHECK: ret i32 42 Index: clang/test/CodeGen/restrict/struct_member_08.cpp =================================================================== --- /dev/null +++ clang/test/CodeGen/restrict/struct_member_08.cpp @@ -0,0 +1,162 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK64 +// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -O2 -ffull-restrict %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK32 + +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -ffull-restrict %s -emit-llvm -o - -DDUMMY_BEFORE | FileCheck %s --check-prefixes=CHECK,CHECK64 +// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -O2 -ffull-restrict %s -emit-llvm -o - -DDUMMY_BEFORE | FileCheck %s --check-prefixes=CHECK,CHECK32 + +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -ffull-restrict %s -emit-llvm -o - -DDUMMY_AFTER | FileCheck %s --check-prefixes=CHECK,CHECK64 +// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -O2 -ffull-restrict %s -emit-llvm -o - -DDUMMY_AFTER | FileCheck %s --check-prefixes=CHECK,CHECK32 + +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O2 -ffull-restrict %s -emit-llvm -o - -DDUMMY_BEFORE -DDUMMY_AFTER | FileCheck %s --check-prefixes=CHECK,CHECK64 +// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -O2 -ffull-restrict %s -emit-llvm -o - -DDUMMY_BEFORE -DDUMMY_AFTER | FileCheck %s --check-prefixes=CHECK,CHECK32 + +// NOTE: this test in C++ mode + +struct Fum { + Fum(unsigned long long d) { + ptr1 = ((int *)(d & 0xffffffff)); + ptr2 = ((int *)((d >> 32) & 0xffffffff)); + } + Fum(const Fum &) = default; + +#ifdef DUMMY_BEFORE + int *dummyb0; + int *dummyb1; +#endif + int *__restrict ptr1; + int *__restrict ptr2; +#ifdef DUMMY_AFTER + int *dummya0; + int *dummya1; +#endif +}; + +static Fum pass(Fum d) { return d; } + +int test_Fum_01(unsigned long long data, int *p1) { + Fum tmp = {data}; + + int *p0 = tmp.ptr1; + + *p0 = 42; + *p1 = 99; + return *p0; +} +// CHECK-LABEL: @_Z11test_Fum_01yPi +// CHECK-NOT: alloca +// CHECK: ret i32 42 + +int test_Fum_02(unsigned long long data) { + Fum tmp = {data}; + + int *p0 = tmp.ptr1; + int *p1 = tmp.ptr2; + + *p0 = 42; + *p1 = 99; + return *p0; +} +// CHECK-LABEL: @_Z11test_Fum_02y +// CHECK-NOT: alloca +// CHECK: ret i32 42 + +int test_Fum_pass_01(unsigned long long data, int *p1) { + Fum tmp = {data}; + + int *p0 = pass(tmp).ptr1; + + *p0 = 42; + *p1 = 99; + return *p0; +} +// CHECK-LABEL: @_Z16test_Fum_pass_01yPi +// CHECK-NOT: alloca +// CHECK: ret i32 42 + +int test_Fum_pass_02(unsigned long long data) { + Fum tmp = {data}; + + int *p0 = pass(tmp).ptr1; + int *p1 = pass(tmp).ptr2; + + *p0 = 42; + *p1 = 99; + return *p0; +} +// CHECK-LABEL: @_Z16test_Fum_pass_02y +// CHECK-NOT: alloca +// CHECK: ret i32 42 + +int test_Fum_pass_03(unsigned long long data) { + Fum tmp = {data}; + + int *b0 = tmp.ptr1; + *b0 = 42; + + int *p0 = pass(tmp).ptr1; + + *p0 = 99; + return *b0; // 99 +} +// CHECK-LABEL: @_Z16test_Fum_pass_03y +// CHECK-NOT: alloca +// CHECK-NOT: ret i32 42 +// CHECK: ret i32 99 + +int test_Fum_pass_04(unsigned long long data, int *px) { + Fum tmp = {data}; + + int *b0 = tmp.ptr1; + *b0 = 42; + tmp.ptr1 = px; + + int *p0 = pass(tmp).ptr1; + + *p0 = 99; + return *b0; // 42 or 99 +} +// CHECK-LABEL: @_Z16test_Fum_pass_04yPi +// CHECK-NOT: alloca +// CHECK-NOT: ret i32 42 +// CHECK-NOT: ret i32 99 +// CHECK: ret i32 % + +class S { +public: + S(int *d) : data(d) {} + int *getData() { return data; } + +private: + int *__restrict__ data; +}; + +int test_S__01(int *pA, long N) { + int *__restrict__ x = pA; + + *x = 42; + { + S s(x + N); + *s.getData() = 99; + } + return *x; // N could be 0 +} + +// CHECK-LABEL: @_Z10test_S__01Pil +// CHECK-NOT: alloca +// CHECK-NOT: ret i32 42 +// CHECK: ret i32 % + +int test_S__02(int *pA, long N) { + int *__restrict__ x = pA; + + *x = 42; + { + S s(x + N); + *s.getData() = 99; + return *x; // restrict rules say that N cannot be 0 + } +} + +// CHECK-LABEL: @_Z10test_S__02Pil +// CHECK-NOT: alloca +// CHECK: ret i32 42