Index: lib/CodeGen/CGDecl.cpp =================================================================== --- lib/CodeGen/CGDecl.cpp +++ lib/CodeGen/CGDecl.cpp @@ -27,6 +27,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/Type.h" using namespace clang; using namespace CodeGen; @@ -843,6 +844,7 @@ /// These turn into simple stack objects, or GlobalValues depending on target. void CodeGenFunction::EmitAutoVarDecl(const VarDecl &D) { AutoVarEmission emission = EmitAutoVarAlloca(D); + EmitAutoVarNoAlias(emission); EmitAutoVarInit(emission); EmitAutoVarCleanups(emission); } @@ -1196,6 +1198,47 @@ } } +void CodeGenFunction::EmitAutoVarNoAlias(const AutoVarEmission &emission) { + assert(emission.Variable && "emission was not valid!"); + + // Don't emit noalias intrinsics unless we're optimizing. + if (CGM.getCodeGenOpts().OptimizationLevel == 0) + return; + + const VarDecl &D = *emission.Variable; + QualType type = D.getType(); + + // Emit a noalias intrinsic for restrict-qualified variables. + if (!type.isRestrictQualified()) + return; + + llvm::MDBuilder MDB(CurFn->getContext()); + if (!NoAliasDomain) + NoAliasDomain = MDB.createAnonymousAliasScopeDomain(CurFn->getName()); + + std::string Name = CurFn->getName(); + Name += ": "; + Name += D.getName(); + + llvm::MDNode *Scope = + MDB.createAnonymousAliasScope(NoAliasDomain, Name); + addNoAliasScope(Scope); + + SmallVector ScopeListEntries(1, Scope); + llvm::MDNode *ScopeList = + llvm::MDNode::get(CurFn->getContext(), ScopeListEntries); + + // Check whether this is a byref variable that's potentially + // captured and moved by its own initializer. If so, we'll need to + // emit the initializer first, then copy into the variable. + const Expr *Init = D.getInit(); + bool capturedByInit = emission.IsByRef && isCapturedBy(D, Init); + + llvm::Value *Loc = + capturedByInit ? emission.Address : emission.getObjectAddress(*this); + NoAliasAddrMap[Loc] = ScopeList; +} + /// Emit an expression as an initializer for a variable at the given /// location. The expression is not necessarily the normal /// initializer for the variable, and the address is not necessarily Index: lib/CodeGen/CGExpr.cpp =================================================================== --- lib/CodeGen/CGExpr.cpp +++ lib/CodeGen/CGExpr.cpp @@ -1280,6 +1280,10 @@ Value = EmitToMemory(Value, Ty); + auto NAI = NoAliasAddrMap.find(Addr); + if (NAI != NoAliasAddrMap.end()) + Value = Builder.CreateNoAlias(Value, NAI->second); + if (Ty->isAtomicType() || (!isInit && typeIsSuitableForInlineAtomic(Ty, Volatile))) { EmitAtomicStore(RValue::get(Value), Index: lib/CodeGen/CGStmt.cpp =================================================================== --- lib/CodeGen/CGStmt.cpp +++ lib/CodeGen/CGStmt.cpp @@ -16,6 +16,7 @@ #include "CodeGenModule.h" #include "TargetInfo.h" #include "clang/AST/StmtVisitor.h" +#include "clang/AST/RecursiveASTVisitor.h" #include "clang/Basic/PrettyStackTrace.h" #include "clang/Basic/TargetInfo.h" #include "clang/Sema/LoopHint.h" @@ -260,6 +261,40 @@ return true; } +namespace { +/// We need to record memory instructions for this scope if there are +/// restrict-qualified variables declared within it. +struct RestrictFinder : RecursiveASTVisitor { + bool FoundRestrictDecl; + RestrictFinder() : FoundRestrictDecl(false) {} + + // Blocks and lambdas are handled as separate functions, so we need not + // traverse them in the parent context. + bool TraverseBlockExpr(BlockExpr *BE) { return true; } + bool TraverseLambdaBody(LambdaExpr *LE) { return true; } + bool TraverseCapturedStmt(CapturedStmt *CS) { return true; } + + bool VisitVarDecl(VarDecl *VD) { + if (VD->getType().isRestrictQualified()) + FoundRestrictDecl = true; + + return FoundRestrictDecl; + } +}; +} + +bool CodeGenFunction::hasLocalRestrictVars(const CompoundStmt &S) { + // We may have restrict-qualified variables, but if we're not optimizing, we + // don't do anything special with them. + if (CGM.getCodeGenOpts().OptimizationLevel == 0) + return false; + + RestrictFinder Finder; + // Finder.Visit(&S); + Finder.TraverseStmt(const_cast(&S)); + return Finder.FoundRestrictDecl; +} + /// EmitCompoundStmt - Emit a compound statement {..} node. If GetLast is true, /// this captures the expression result of the last sub-statement and returns it /// (for use by the statement expression extension). @@ -269,7 +304,7 @@ "LLVM IR generation of compound statement ('{}')"); // Keep track of the current cleanup stack depth, including debug scopes. - LexicalScope Scope(*this, S.getSourceRange()); + LexicalScope Scope(*this, S.getSourceRange(), hasLocalRestrictVars(S)); return EmitCompoundStmtWithoutScope(S, GetLast, AggSlot); } @@ -450,6 +485,23 @@ } } +void CodeGenFunction::LexicalNoAliasInfo::addNoAliasMD() { + if (MemoryInsts.empty() || NoAliasScopes.empty()) + return; + + llvm::MDNode *NewScopeList = + llvm::MDNode::get(MemoryInsts[0]->getParent()->getContext(), + NoAliasScopes); + + for (auto &I : MemoryInsts) + I->setMetadata( + llvm::LLVMContext::MD_noalias, + llvm::MDNode::concatenate(I->getMetadata( + llvm::LLVMContext::MD_noalias), + NewScopeList)); + + MemoryInsts.clear(); +} void CodeGenFunction::EmitLabelStmt(const LabelStmt &S) { EmitLabel(S.getDecl()); Index: lib/CodeGen/CodeGenFunction.h =================================================================== --- lib/CodeGen/CodeGenFunction.h +++ lib/CodeGen/CodeGenFunction.h @@ -138,7 +138,7 @@ /// instruction is created using Builder. void InsertHelper(llvm::Instruction *I, const llvm::Twine &Name, llvm::BasicBlock *BB, - llvm::BasicBlock::iterator InsertPt) const; + llvm::BasicBlock::iterator InsertPt); /// CurFuncDecl - Holds the Decl for the current outermost /// non-closure context. @@ -488,7 +488,34 @@ } }; - class LexicalScope : public RunCleanupsScope { + bool hasLocalRestrictVars(const CompoundStmt &S); + + struct LexicalNoAliasInfo { + bool RecordMemoryInsts; + SmallVector MemoryInsts; + SmallVector NoAliasScopes; + + LexicalNoAliasInfo(bool RMI = false) : RecordMemoryInsts(RMI) {} + + void recordMemoryInsts() { + RecordMemoryInsts = true; + } + + void recordMemoryInstruction(llvm::Instruction *I) { + if (RecordMemoryInsts) + MemoryInsts.push_back(I); + } + + void addNoAliasScope(llvm::MDNode *Scope) { + assert(RecordMemoryInsts && + "Adding noalias scope but not recording memory accesses!"); + NoAliasScopes.push_back(Scope); + } + + void addNoAliasMD(); + } FnNoAliasInfo; + + class LexicalScope : public RunCleanupsScope, public LexicalNoAliasInfo { SourceRange Range; SmallVector Labels; LexicalScope *ParentScope; @@ -498,8 +525,10 @@ public: /// \brief Enter a new cleanup scope. - explicit LexicalScope(CodeGenFunction &CGF, SourceRange Range) - : RunCleanupsScope(CGF), Range(Range), ParentScope(CGF.CurLexicalScope) { + explicit LexicalScope(CodeGenFunction &CGF, SourceRange Range, + bool RMI = false) + : RunCleanupsScope(CGF), LexicalNoAliasInfo(RMI), Range(Range), + ParentScope(CGF.CurLexicalScope) { CGF.CurLexicalScope = this; if (CGDebugInfo *DI = CGF.getDebugInfo()) DI->EmitLexicalBlockStart(CGF.Builder, Range.getBegin()); @@ -510,6 +539,14 @@ Labels.push_back(label); } + void recordMemoryInstruction(llvm::Instruction *I) { + LexicalNoAliasInfo::recordMemoryInstruction(I); + if (ParentScope) + ParentScope->recordMemoryInstruction(I); + else + CGF.FnNoAliasInfo.recordMemoryInstruction(I); + } + /// \brief Exit this cleanup scope, emitting any accumulated /// cleanups. ~LexicalScope() { @@ -527,6 +564,8 @@ /// \brief Force the emission of cleanups now, instead of waiting /// until this object is destroyed. void ForceCleanup() { + addNoAliasMD(); + CGF.CurLexicalScope = ParentScope; RunCleanupsScope::ForceCleanup(); @@ -537,6 +576,20 @@ void rescopeLabels(); }; + void recordMemoryInstruction(llvm::Instruction *I) { + if (CurLexicalScope) + CurLexicalScope->recordMemoryInstruction(I); + else + FnNoAliasInfo.recordMemoryInstruction(I); + } + + void addNoAliasScope(llvm::MDNode *Scope) { + if (CurLexicalScope) + CurLexicalScope->addNoAliasScope(Scope); + else + FnNoAliasInfo.addNoAliasScope(Scope); + } + /// \brief The scope used to remap some variables as private in the OpenMP /// loop body (or other captured region emitted without outlining), and to /// restore old vars back on exit. @@ -1040,6 +1093,12 @@ void EmitOpenCLKernelMetadata(const FunctionDecl *FD, llvm::Function *Fn); + /// The noalias domain metadata for this function. + llvm::MDNode* NoAliasDomain; + /// A map between the addresses of local restrict-qualified variables and + /// their noalias scope. + llvm::DenseMap NoAliasAddrMap; + public: CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext=false); ~CodeGenFunction(); @@ -1924,6 +1983,8 @@ void emitAutoVarTypeCleanup(const AutoVarEmission &emission, QualType::DestructionKind dtorKind); + void EmitAutoVarNoAlias(const AutoVarEmission &emission); + void EmitStaticVarDecl(const VarDecl &D, llvm::GlobalValue::LinkageTypes Linkage); Index: lib/CodeGen/CodeGenFunction.cpp =================================================================== --- lib/CodeGen/CodeGenFunction.cpp +++ lib/CodeGen/CodeGenFunction.cpp @@ -54,7 +54,7 @@ CXXDefaultInitExprThis(nullptr), CXXStructorImplicitParamDecl(nullptr), CXXStructorImplicitParamValue(nullptr), OutermostConditional(nullptr), CurLexicalScope(nullptr), TerminateLandingPad(nullptr), - TerminateHandler(nullptr), TrapBB(nullptr) { + TerminateHandler(nullptr), TrapBB(nullptr), NoAliasDomain(nullptr) { if (!suppressNewContext) CGM.getCXXABI().getMangleContext().startNewFunction(); @@ -774,10 +774,16 @@ void CodeGenFunction::EmitFunctionBody(FunctionArgList &Args, const Stmt *Body) { incrementProfileCounter(Body); - if (const CompoundStmt *S = dyn_cast(Body)) + if (const CompoundStmt *S = dyn_cast(Body)) { + if (hasLocalRestrictVars(*S)) + FnNoAliasInfo.recordMemoryInsts(); + EmitCompoundStmtWithoutScope(*S); - else + + FnNoAliasInfo.addNoAliasMD(); + } else { EmitStmt(Body); + } } /// When instrumenting to collect profile data, the counts for some blocks @@ -1731,10 +1737,13 @@ void CodeGenFunction::InsertHelper(llvm::Instruction *I, const llvm::Twine &Name, llvm::BasicBlock *BB, - llvm::BasicBlock::iterator InsertPt) const { + llvm::BasicBlock::iterator InsertPt) { LoopStack.InsertHelper(I); if (IsSanitizerScope) CGM.getSanitizerMetadata()->disableSanitizerForInstruction(I); + + if (I->mayReadOrWriteMemory()) + recordMemoryInstruction(I); } template Index: test/CodeGen/noalias.c =================================================================== --- /dev/null +++ test/CodeGen/noalias.c @@ -0,0 +1,83 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -O1 -disable-llvm-optzns %s -emit-llvm -o - | FileCheck %s + +int r; +void ex1(int *); + +int *a; +int *foo() { + int * restrict x = a; + return x; + +// CHECK-LABEL: define i32* @foo() +// CHECK: [[x_addr_foo1:%[a-z0-9_.]+]] = alloca i32* +// CHECK: [[x_foo1:%[a-z0-9_.]+]] = load i32*, i32** @a{{.*}}, !noalias [[TAG_foo1:!.*]] +// CHECK: [[x_a_foo1:%[a-z0-9_.]+]] = call i32* @llvm.noalias.p0i32(i32* [[x_foo1]], metadata [[TAG_foo1]]) +// CHECK: store i32* [[x_a_foo1]], i32** [[x_addr_foo1]]{{.*}}, !noalias [[TAG_foo1]] +} + +int *a2; +int *foo1(int b) { + int * restrict x; + +// CHECK-LABEL: define i32* @foo1(i32 %b) +// CHECK: [[x_addr_foo2:%[a-z0-9_.]+]] = alloca i32* +// CHECK: [[x2_addr_foo2:%[a-z0-9_.]+]] = alloca i32* + + if (b) { + x = a; + r += *x; + ex1(x); + +// CHECK: [[x_foo2:%[a-z0-9_.]+]] = load i32*, i32** @a{{.*}}, !noalias [[x_x2_tag_foo2:!.*]] +// CHECK: [[x_a_foo2:%[a-z0-9_.]+]] = call i32* @llvm.noalias.p0i32(i32* [[x_foo2]], metadata [[x_tag_foo2:!.*]]) +// CHECK: store i32* [[x_a_foo2]], i32** [[x_addr_foo2]]{{.*}}, !noalias [[x_x2_tag_foo2]] +// CHECK: call void @ex1 + ++x; + *x = r; + ex1(x); + +// CHECK: [[old_x_foo2:%[a-z0-9_.]+]] = load i32*, i32** [[x_addr_foo2]]{{.*}}, !noalias [[x_x2_tag_foo2]] +// CHECK: [[x_foo2:%[a-z0-9_.]+]] = getelementptr inbounds i32, i32* [[old_x_foo2]], i32 1 +// CHECK: [[x_a_foo2:%[a-z0-9_.]+]] = call i32* @llvm.noalias.p0i32(i32* [[x_foo2]], metadata [[x_tag_foo2]]) +// CHECK: store i32* [[x_a_foo2]], i32** [[x_addr_foo2]]{{.*}}, !noalias [[x_x2_tag_foo2]] +// CHECK: call void @ex1 + + x += b; + *x = r; + ex1(x); + +// CHECK: [[old_x_foo2:%[a-z0-9_.]+]] = load i32*, i32** [[x_addr_foo2]]{{.*}}, !noalias [[x_x2_tag_foo2]] +// CHECK: [[x_foo2:%[a-z0-9_.]+]] = getelementptr inbounds i32, i32* [[old_x_foo2]], i64 +// CHECK: [[x_a_foo2:%[a-z0-9_.]+]] = call i32* @llvm.noalias.p0i32(i32* [[x_foo2]], metadata [[x_tag_foo2]]) +// CHECK: store i32* [[x_a_foo2]], i32** [[x_addr_foo2]]{{.*}}, !noalias [[x_x2_tag_foo2]] +// CHECK: call void @ex1 + + int * restrict x2 = a2; + *x2 = r; + ex1(x2); + +// CHECK: [[x2_foo2:%[a-z0-9_.]+]] = load i32*, i32** @a2{{.*}}, !noalias [[x_x2_tag_foo2]] +// CHECK: [[x2_a_foo2:%[a-z0-9_.]+]] = call i32* @llvm.noalias.p0i32(i32* [[x2_foo2]], metadata [[x2_tag_foo2:!.*]]) +// CHECK: store i32* [[x2_a_foo2]], i32** [[x2_addr_foo2]]{{.*}}, !noalias [[x_x2_tag_foo2]] +// CHECK: call void @ex1 + } else { + x = a2; + r += *x; + +// CHECK: [[x_foo2:%[a-z0-9_.]+]] = load i32*, i32** @a2{{.*}}, !noalias [[x_tag_foo2]] +// CHECK: [[x_a_foo2:%[a-z0-9_.]+]] = call i32* @llvm.noalias.p0i32(i32* [[x_foo2]], metadata [[x_tag_foo2]]) +// CHECK: store i32* [[x_a_foo2]], i32** [[x_addr_foo2]]{{.*}}, !noalias [[x_tag_foo2]] + } + + return x; +} + +int *bar() { + int * x = a; + return x; + +// CHECK-LABEL: define i32* @bar() +// CHECK-NOT: noalias +// CHECK: ret i32* +} +