Index: lib/CodeGen/CGDecl.cpp =================================================================== --- lib/CodeGen/CGDecl.cpp +++ lib/CodeGen/CGDecl.cpp @@ -32,6 +32,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/Type.h" using namespace clang; @@ -1124,6 +1125,8 @@ if (D.hasAttr()) EmitVarAnnotations(&D, address.getPointer()); + EmitNoAliasScope(emission); + // Make sure we call @llvm.lifetime.end. if (emission.useLifetimeMarkers()) EHStack.pushCleanup(NormalEHLifetimeMarker, @@ -1301,6 +1304,52 @@ } } +// For all local restrict-qualified local variables, we create a noalias +// metadata scope. This scope is used to identify each restrict-qualified +// variable and the other memory accesses within the scope where its aliasing +// assumptions apply. The scope metadata is stored in the NoAliasAddrMap map +// where the pointer to the local variable is the key in the map. +void CodeGenFunction::EmitNoAliasScope(const AutoVarEmission &emission) { + assert(emission.Variable && "emission was not valid!"); + + // Don't emit noalias intrinsics unless we're optimizing. + if (CGM.getCodeGenOpts().OptimizationLevel == 0) + return; + + const VarDecl &D = *emission.Variable; + QualType type = D.getType(); + + // Emit a noalias intrinsic for restrict-qualified variables. + if (!type.isRestrictQualified()) + return; + + llvm::MDBuilder MDB(CurFn->getContext()); + if (!NoAliasDomain) + NoAliasDomain = MDB.createAnonymousAliasScopeDomain(CurFn->getName()); + + std::string Name = CurFn->getName(); + Name += ": "; + Name += D.getName(); + + llvm::MDNode *Scope = + MDB.createAnonymousAliasScope(NoAliasDomain, Name); + addNoAliasScope(Scope); + + SmallVector ScopeListEntries(1, Scope); + llvm::MDNode *ScopeList = + llvm::MDNode::get(CurFn->getContext(), ScopeListEntries); + + // Check whether this is a byref variable that's potentially + // captured and moved by its own initializer. If so, we'll need to + // emit the initializer first, then copy into the variable. + const Expr *Init = D.getInit(); + bool capturedByInit = emission.IsByRef && isCapturedBy(D, Init); + + Address Loc = + capturedByInit ? emission.Addr : emission.getObjectAddress(*this); + NoAliasAddrMap[Loc.getPointer()] = ScopeList; +} + /// Emit an expression as an initializer for a variable at the given /// location. The expression is not necessarily the normal /// initializer for the variable, and the address is not necessarily Index: lib/CodeGen/CGExpr.cpp =================================================================== --- lib/CodeGen/CGExpr.cpp +++ lib/CodeGen/CGExpr.cpp @@ -1571,6 +1571,13 @@ Value = EmitToMemory(Value, Ty); + // If this is an assignment to a restrict-qualified local variable, then we + // have pointer aliasing assumptions that can be applied to the pointer value + // being stored. + auto NAI = NoAliasAddrMap.find(Addr.getPointer()); + if (NAI != NoAliasAddrMap.end()) + Value = Builder.CreateNoAliasPointer(Value, NAI->second); + LValue AtomicLValue = LValue::MakeAddr(Addr, Ty, getContext(), BaseInfo, TBAAInfo); if (Ty->isAtomicType() || Index: lib/CodeGen/CGStmt.cpp =================================================================== --- lib/CodeGen/CGStmt.cpp +++ lib/CodeGen/CGStmt.cpp @@ -357,6 +357,22 @@ return true; } +bool CodeGenFunction::hasLocalRestrictVars(const CompoundStmt &S) { + // We may have restrict-qualified variables, but if we're not optimizing, we + // don't do anything special with them. + if (CGM.getCodeGenOpts().OptimizationLevel == 0) + return false; + + for (const auto *C : S.body()) + if (const auto *DS = dyn_cast(C)) + for (const auto *I : DS->decls()) + if (const auto *VD = dyn_cast(I)) + if (VD->getType().isRestrictQualified()) + return true; + + return false; +} + /// EmitCompoundStmt - Emit a compound statement {..} node. If GetLast is true, /// this captures the expression result of the last sub-statement and returns it /// (for use by the statement expression extension). @@ -366,7 +382,7 @@ "LLVM IR generation of compound statement ('{}')"); // Keep track of the current cleanup stack depth, including debug scopes. - LexicalScope Scope(*this, S.getSourceRange()); + LexicalScope Scope(*this, S.getSourceRange(), hasLocalRestrictVars(S)); return EmitCompoundStmtWithoutScope(S, GetLast, AggSlot); } @@ -548,6 +564,35 @@ } } +// For all of the instructions generated for this lexical scope that access +// memory, add the noalias metadata associated with any block-local +// restrict-qualified pointers from this scope. +void CodeGenFunction::LexicalNoAliasInfo::addNoAliasMD() { + if (MemoryInsts.empty() || NoAliasScopes.empty()) + return; + + llvm::MDNode *NewScopeList = + llvm::MDNode::get(MemoryInsts[0]->getParent()->getContext(), + NoAliasScopes); + + // Note that we're adding here metadata on all memory-accessing instructions + // in this lexical scope. Perhaps surprisingly, as provided by the C + // specification, this is even true for instructions corresponding to + // accesses before the declaration of the associated restrict-qualified + // variables. This list will include all memory instructions from this scope + // as well as from all inner scopes. There's a callback from inserting an + // instruction which adds all memory instructions to every active lexical + // scope that's recording them. + + for (auto &I : MemoryInsts) + I->setMetadata( + llvm::LLVMContext::MD_noalias, + llvm::MDNode::concatenate(I->getMetadata( + llvm::LLVMContext::MD_noalias), + NewScopeList)); + + MemoryInsts.clear(); +} void CodeGenFunction::EmitLabelStmt(const LabelStmt &S) { EmitLabel(S.getDecl()); Index: lib/CodeGen/CodeGenFunction.h =================================================================== --- lib/CodeGen/CodeGenFunction.h +++ lib/CodeGen/CodeGenFunction.h @@ -35,6 +35,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/TinyPtrVector.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Debug.h" #include "llvm/Transforms/Utils/SanitizerStats.h" @@ -200,7 +201,7 @@ /// instruction is created using Builder. void InsertHelper(llvm::Instruction *I, const llvm::Twine &Name, llvm::BasicBlock *BB, - llvm::BasicBlock::iterator InsertPt) const; + llvm::BasicBlock::iterator InsertPt); /// CurFuncDecl - Holds the Decl for the current outermost /// non-closure context. @@ -629,21 +630,77 @@ } }; - class LexicalScope : public RunCleanupsScope { + bool hasLocalRestrictVars(const CompoundStmt &S); + + // The noalias scopes used to tag pointer values assigned to block-local + // restrict-qualified variables, and the memory-accessing instructions within + // this lexical scope to which the associated pointer-aliasing assumptions + // might apply. One of these will exist for each lexical scope. + struct LexicalNoAliasInfo { + bool RecordMemoryInsts; + llvm::TinyPtrVector MemoryInsts; + llvm::TinyPtrVector NoAliasScopes; + + LexicalNoAliasInfo(bool RecordMemoryInsts = false) + : RecordMemoryInsts(RecordMemoryInsts) {} + + void recordMemoryInsts() { + RecordMemoryInsts = true; + } + + bool isRecordingMemoryInsts() const { + return RecordMemoryInsts; + } + + void recordMemoryInstruction(llvm::Instruction *I) { + if (RecordMemoryInsts) + MemoryInsts.push_back(I); + } + + void addNoAliasScope(llvm::MDNode *Scope) { + assert(RecordMemoryInsts && + "Adding noalias scope but not recording memory accesses!"); + NoAliasScopes.push_back(Scope); + } + + void addNoAliasMD(); + }; + + LexicalNoAliasInfo FnNoAliasInfo; + + class LexicalScope : public RunCleanupsScope, public LexicalNoAliasInfo { SourceRange Range; SmallVector Labels; LexicalScope *ParentScope; + // Cache whether this scope, or any parent scope, is recording memory + // instructions (used for later annotating accesses in blocks with local + // restrict-qualified variables). If this is false, the code can bypass a + // walk up the scope chain for each memory access. + bool IsRecordingMemoryInsts; + LexicalScope(const LexicalScope &) = delete; void operator=(const LexicalScope &) = delete; public: /// \brief Enter a new cleanup scope. - explicit LexicalScope(CodeGenFunction &CGF, SourceRange Range) - : RunCleanupsScope(CGF), Range(Range), ParentScope(CGF.CurLexicalScope) { + explicit LexicalScope(CodeGenFunction &CGF, SourceRange Range, + bool RecordMemoryInsts = false) + : RunCleanupsScope(CGF), LexicalNoAliasInfo(RecordMemoryInsts), + Range(Range), ParentScope(CGF.CurLexicalScope) { CGF.CurLexicalScope = this; if (CGDebugInfo *DI = CGF.getDebugInfo()) DI->EmitLexicalBlockStart(CGF.Builder, Range.getBegin()); + + IsRecordingMemoryInsts = RecordMemoryInsts; + if (ParentScope) + IsRecordingMemoryInsts |= ParentScope->isRecordingMemoryInsts(); + else + IsRecordingMemoryInsts |= CGF.FnNoAliasInfo.isRecordingMemoryInsts(); + } + + bool isRecordingMemoryInsts() const { + return IsRecordingMemoryInsts; } void addLabel(const LabelDecl *label) { @@ -651,6 +708,23 @@ Labels.push_back(label); } + // Record the given memory access instruction in this and all of its + // enclosing scopes. When we close this scope, we'll add the list of + // (metadata corresponding to) all the restrict-qualified local variables + // it declared to all the memory accesses which occurred within it. + // Recording is only enabled under optimization, and it's disabled in a + // scope unless it actually declares any local restrict variables. + void recordMemoryInstruction(llvm::Instruction *I) { + if (!IsRecordingMemoryInsts) + return; + + LexicalNoAliasInfo::recordMemoryInstruction(I); + if (ParentScope) + ParentScope->recordMemoryInstruction(I); + else + CGF.FnNoAliasInfo.recordMemoryInstruction(I); + } + /// \brief Exit this cleanup scope, emitting any accumulated /// cleanups. ~LexicalScope() { @@ -668,6 +742,8 @@ /// \brief Force the emission of cleanups now, instead of waiting /// until this object is destroyed. void ForceCleanup() { + addNoAliasMD(); + CGF.CurLexicalScope = ParentScope; RunCleanupsScope::ForceCleanup(); @@ -684,6 +760,23 @@ typedef llvm::DenseMap DeclMapTy; + // Record this instruction for the purpose of later adding noalias metadata, + // is applicible, in order to support block-local restrict-qualified + // pointers. + void recordMemoryInstruction(llvm::Instruction *I) { + if (CurLexicalScope) + CurLexicalScope->recordMemoryInstruction(I); + else + FnNoAliasInfo.recordMemoryInstruction(I); + } + + void addNoAliasScope(llvm::MDNode *Scope) { + if (CurLexicalScope) + CurLexicalScope->addNoAliasScope(Scope); + else + FnNoAliasInfo.addNoAliasScope(Scope); + } + /// \brief The scope used to remap some variables as private in the OpenMP /// loop body (or other captured region emitted without outlining), and to /// restore old vars back on exit. @@ -1431,6 +1524,12 @@ void EmitOpenCLKernelMetadata(const FunctionDecl *FD, llvm::Function *Fn); + /// The noalias domain metadata for this function. + llvm::MDNode *NoAliasDomain; + /// A map between the addresses of local restrict-qualified variables and + /// their noalias scope. + llvm::DenseMap NoAliasAddrMap; + public: CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext=false); ~CodeGenFunction(); @@ -2468,6 +2567,8 @@ void emitAutoVarTypeCleanup(const AutoVarEmission &emission, QualType::DestructionKind dtorKind); + void EmitNoAliasScope(const AutoVarEmission &emission); + void EmitStaticVarDecl(const VarDecl &D, llvm::GlobalValue::LinkageTypes Linkage); Index: lib/CodeGen/CodeGenFunction.cpp =================================================================== --- lib/CodeGen/CodeGenFunction.cpp +++ lib/CodeGen/CodeGenFunction.cpp @@ -80,7 +80,8 @@ CurLexicalScope(nullptr), TerminateLandingPad(nullptr), TerminateHandler(nullptr), TrapBB(nullptr), ShouldEmitLifetimeMarkers( - shouldEmitLifetimeMarkers(CGM.getCodeGenOpts(), CGM.getLangOpts())) { + shouldEmitLifetimeMarkers(CGM.getCodeGenOpts(), CGM.getLangOpts())), + NoAliasDomain(nullptr) { if (!suppressNewContext) CGM.getCXXABI().getMangleContext().startNewFunction(); @@ -1050,10 +1051,25 @@ void CodeGenFunction::EmitFunctionBody(FunctionArgList &Args, const Stmt *Body) { incrementProfileCounter(Body); - if (const CompoundStmt *S = dyn_cast(Body)) + if (const CompoundStmt *S = dyn_cast(Body)) { + // Block-local restrict-qualified pointers have the property of implying + // exclusivity of access within their block, even with respect to accesses + // in their block prior to the point of declaraction of the + // restrict-qualified pointer. We don't want to incur the expense + // associated with recording memory-accessing instructions when we won't + // have any block-local restrict-qualified pointers, so we check for any + // relevant declaractions first. + if (hasLocalRestrictVars(*S)) + FnNoAliasInfo.recordMemoryInsts(); + EmitCompoundStmtWithoutScope(*S); - else + + // Now that we're done with the block, add noalias metadata if we had any + // block-local restrict-qualified pointers. + FnNoAliasInfo.addNoAliasMD(); + } else { EmitStmt(Body); + } } /// When instrumenting to collect profile data, the counts for some blocks @@ -2101,10 +2117,18 @@ void CodeGenFunction::InsertHelper(llvm::Instruction *I, const llvm::Twine &Name, llvm::BasicBlock *BB, - llvm::BasicBlock::iterator InsertPt) const { + llvm::BasicBlock::iterator InsertPt) { LoopStack.InsertHelper(I); if (IsSanitizerScope) CGM.getSanitizerMetadata()->disableSanitizerForInstruction(I); + + // When we have block-local restrict-qualified pointers, we need to record + // all memory-accessing instructions (i.e. any kind of instruction for which + // AA::getModRefInfo might return something other than NoModRef) so that they + // can be tagged with noalias metadata with noalias scopes corresponding to + // the applicable restrict-qualified pointers. + if (I->mayReadOrWriteMemory()) + recordMemoryInstruction(I); } void CGBuilderInserter::InsertHelper( Index: test/CodeGen/arm_neon_intrinsics.c =================================================================== --- test/CodeGen/arm_neon_intrinsics.c +++ test/CodeGen/arm_neon_intrinsics.c @@ -1,7 +1,10 @@ // RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi apcs-gnu\ // RUN: -target-cpu swift -fallow-half-arguments-and-returns -ffreestanding \ -// RUN: -disable-O0-optnone -emit-llvm -o - %s \ +// RUN: -disable-O0-optnone -mllvm -use-noalias-intrinsic-during-inlining=0 -emit-llvm -o - %s \ // RUN: | opt -S -mem2reg | FileCheck %s +// FIXME: We should update the checks here to accept the llvm.noalias intrinsic +// that results from inlining the called always_inline functions (and remove +// the -use-noalias-intrinsic-during-inlining=0 above). #include Index: test/CodeGen/noalias.c =================================================================== --- /dev/null +++ test/CodeGen/noalias.c @@ -0,0 +1,83 @@ +// RUN: %clang_cc1 -triple x86_64-apple-darwin -O1 -disable-llvm-optzns %s -emit-llvm -o - | FileCheck %s + +int r; +void ex1(int *); + +int *a; +int *foo() { + int * restrict x = a; + return x; + +// CHECK-LABEL: define i32* @foo() +// CHECK: [[x_addr_foo1:%[a-z0-9_.]+]] = alloca i32* +// CHECK: [[x_foo1:%[a-z0-9_.]+]] = load i32*, i32** @a{{.*}}, !noalias [[TAG_foo1:!.*]] +// CHECK: [[x_a_foo1:%[a-z0-9_.]+]] = call i32* @llvm.noalias.p0i32(i32* [[x_foo1]], metadata [[TAG_foo1]]) +// CHECK: store i32* [[x_a_foo1]], i32** [[x_addr_foo1]]{{.*}}, !noalias [[TAG_foo1]] +} + +int *a2; +int *foo1(int b) { + int * restrict x; + +// CHECK-LABEL: define i32* @foo1(i32 %b) +// CHECK: [[x_addr_foo2:%[a-z0-9_.]+]] = alloca i32* +// CHECK: [[x2_addr_foo2:%[a-z0-9_.]+]] = alloca i32* + + if (b) { + x = a; + r += *x; + ex1(x); + +// CHECK: [[x_foo2:%[a-z0-9_.]+]] = load i32*, i32** @a{{.*}}, !noalias [[x_x2_tag_foo2:!.*]] +// CHECK: [[x_a_foo2:%[a-z0-9_.]+]] = call i32* @llvm.noalias.p0i32(i32* [[x_foo2]], metadata [[x_tag_foo2:!.*]]) +// CHECK: store i32* [[x_a_foo2]], i32** [[x_addr_foo2]]{{.*}}, !noalias [[x_x2_tag_foo2]] +// CHECK: call void @ex1 + ++x; + *x = r; + ex1(x); + +// CHECK: [[old_x_foo2:%[a-z0-9_.]+]] = load i32*, i32** [[x_addr_foo2]]{{.*}}, !noalias [[x_x2_tag_foo2]] +// CHECK: [[x_foo2:%[a-z0-9_.]+]] = getelementptr inbounds i32, i32* [[old_x_foo2]], i32 1 +// CHECK: [[x_a_foo2:%[a-z0-9_.]+]] = call i32* @llvm.noalias.p0i32(i32* [[x_foo2]], metadata [[x_tag_foo2]]) +// CHECK: store i32* [[x_a_foo2]], i32** [[x_addr_foo2]]{{.*}}, !noalias [[x_x2_tag_foo2]] +// CHECK: call void @ex1 + + x += b; + *x = r; + ex1(x); + +// CHECK: [[old_x_foo2:%[a-z0-9_.]+]] = load i32*, i32** [[x_addr_foo2]]{{.*}}, !noalias [[x_x2_tag_foo2]] +// CHECK: [[x_foo2:%[a-z0-9_.]+]] = getelementptr inbounds i32, i32* [[old_x_foo2]], i64 +// CHECK: [[x_a_foo2:%[a-z0-9_.]+]] = call i32* @llvm.noalias.p0i32(i32* [[x_foo2]], metadata [[x_tag_foo2]]) +// CHECK: store i32* [[x_a_foo2]], i32** [[x_addr_foo2]]{{.*}}, !noalias [[x_x2_tag_foo2]] +// CHECK: call void @ex1 + + int * restrict x2 = a2; + *x2 = r; + ex1(x2); + +// CHECK: [[x2_foo2:%[a-z0-9_.]+]] = load i32*, i32** @a2{{.*}}, !noalias [[x_x2_tag_foo2]] +// CHECK: [[x2_a_foo2:%[a-z0-9_.]+]] = call i32* @llvm.noalias.p0i32(i32* [[x2_foo2]], metadata [[x2_tag_foo2:!.*]]) +// CHECK: store i32* [[x2_a_foo2]], i32** [[x2_addr_foo2]]{{.*}}, !noalias [[x_x2_tag_foo2]] +// CHECK: call void @ex1 + } else { + x = a2; + r += *x; + +// CHECK: [[x_foo2:%[a-z0-9_.]+]] = load i32*, i32** @a2{{.*}}, !noalias [[x_tag_foo2]] +// CHECK: [[x_a_foo2:%[a-z0-9_.]+]] = call i32* @llvm.noalias.p0i32(i32* [[x_foo2]], metadata [[x_tag_foo2]]) +// CHECK: store i32* [[x_a_foo2]], i32** [[x_addr_foo2]]{{.*}}, !noalias [[x_tag_foo2]] + } + + return x; +} + +int *bar() { + int * x = a; + return x; + +// CHECK-LABEL: define i32* @bar() +// CHECK-NOT: noalias +// CHECK: ret i32* +} + Index: test/OpenMP/taskloop_firstprivate_codegen.cpp =================================================================== --- test/OpenMP/taskloop_firstprivate_codegen.cpp +++ test/OpenMP/taskloop_firstprivate_codegen.cpp @@ -284,7 +284,8 @@ // CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIV_NAFP:%.+]] = call void (i8*, ...)* @llvm.noalias.p0f_isVoidp0i8varargf(void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIV_NAFP]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) @@ -441,7 +442,8 @@ // CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIV_NAFP:%.+]] = call void (i8*, ...)* @llvm.noalias.p0f_isVoidp0i8varargf(void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIV_NAFP]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], Index: test/OpenMP/taskloop_lastprivate_codegen.cpp =================================================================== --- test/OpenMP/taskloop_lastprivate_codegen.cpp +++ test/OpenMP/taskloop_lastprivate_codegen.cpp @@ -257,7 +257,8 @@ // CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIV_NAFP:%.+]] = call void (i8*, ...)* @llvm.noalias.p0f_isVoidp0i8varargf(void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIV_NAFP]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) @@ -425,7 +426,8 @@ // CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIV_NAFP:%.+]] = call void (i8*, ...)* @llvm.noalias.p0f_isVoidp0i8varargf(void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIV_NAFP]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], Index: test/OpenMP/taskloop_private_codegen.cpp =================================================================== --- test/OpenMP/taskloop_private_codegen.cpp +++ test/OpenMP/taskloop_private_codegen.cpp @@ -222,7 +222,8 @@ // CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIV_NAFP:%.+]] = call void (i8*, ...)* @llvm.noalias.p0f_isVoidp0i8varargf(void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIV_NAFP]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) // CHECK: [[PRIV_VAR:%.+]] = load [[S_DOUBLE_TY]]*, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], @@ -350,7 +351,8 @@ // CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIV_NAFP:%.+]] = call void (i8*, ...)* @llvm.noalias.p0f_isVoidp0i8varargf(void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIV_NAFP]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], Index: test/OpenMP/taskloop_simd_firstprivate_codegen.cpp =================================================================== --- test/OpenMP/taskloop_simd_firstprivate_codegen.cpp +++ test/OpenMP/taskloop_simd_firstprivate_codegen.cpp @@ -284,7 +284,8 @@ // CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIV_NAFP:%.+]] = call void (i8*, ...)* @llvm.noalias.p0f_isVoidp0i8varargf(void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIV_NAFP]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) @@ -441,7 +442,8 @@ // CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIV_NAFP:%.+]] = call void (i8*, ...)* @llvm.noalias.p0f_isVoidp0i8varargf(void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIV_NAFP]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], Index: test/OpenMP/taskloop_simd_lastprivate_codegen.cpp =================================================================== --- test/OpenMP/taskloop_simd_lastprivate_codegen.cpp +++ test/OpenMP/taskloop_simd_lastprivate_codegen.cpp @@ -257,7 +257,8 @@ // CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIV_NAFP:%.+]] = call void (i8*, ...)* @llvm.noalias.p0f_isVoidp0i8varargf(void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIV_NAFP]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) @@ -425,7 +426,8 @@ // CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIV_NAFP:%.+]] = call void (i8*, ...)* @llvm.noalias.p0f_isVoidp0i8varargf(void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIV_NAFP]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], Index: test/OpenMP/taskloop_simd_private_codegen.cpp =================================================================== --- test/OpenMP/taskloop_simd_private_codegen.cpp +++ test/OpenMP/taskloop_simd_private_codegen.cpp @@ -222,7 +222,8 @@ // CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIV_NAFP:%.+]] = call void (i8*, ...)* @llvm.noalias.p0f_isVoidp0i8varargf(void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIV_NAFP]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) // CHECK: [[PRIV_VAR:%.+]] = load [[S_DOUBLE_TY]]*, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], @@ -350,7 +351,8 @@ // CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, // CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, // CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], +// CHECK: [[PRIV_NAFP:%.+]] = call void (i8*, ...)* @llvm.noalias.p0f_isVoidp0i8varargf(void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), +// CHECK: store void (i8*, ...)* [[PRIV_NAFP]], void (i8*, ...)** [[MAP_FN_ADDR:%.+]], // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]],