diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1429,10 +1429,11 @@ llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags) { + uint32_t SrcLocStrSize; llvm::Constant *SrcLocStr; if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || Loc.isInvalid()) { - SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); + SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); } else { std::string FunctionName; if (const auto *FD = dyn_cast_or_null(CGF.CurFuncDecl)) @@ -1441,12 +1442,12 @@ const char *FileName = PLoc.getFilename(); unsigned Line = PLoc.getLine(); unsigned Column = PLoc.getColumn(); - SrcLocStr = - OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, Column); + SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, + Column, SrcLocStrSize); } unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); - return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags), - Reserved2Flags); + return OMPBuilder.getOrCreateIdent( + SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags); } llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, @@ -1457,10 +1458,11 @@ if (CGM.getLangOpts().OpenMPIRBuilder) { SmallString<128> Buffer; OMPBuilder.updateToLocation(CGF.Builder.saveIP()); + uint32_t SrcLocStrSize; auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( - getIdentStringFromSourceLocation(CGF, Loc, Buffer)); + getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize); return OMPBuilder.getOrCreateThreadID( - OMPBuilder.getOrCreateIdent(SrcLocStr)); + OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize)); } llvm::Value *ThreadID = nullptr; @@ -9527,8 +9529,9 @@ emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, MappableExprsHandler::MappingExprInfo &MapExprs) { + uint32_t SrcLocStrSize; if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr()) - return OMPBuilder.getOrCreateDefaultSrcLocStr(); + return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); SourceLocation Loc; if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) { @@ -9552,7 +9555,8 @@ PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName, - PLoc.getLine(), PLoc.getColumn()); + PLoc.getLine(), PLoc.getColumn(), + SrcLocStrSize); } /// Emit the arrays used to pass the captures and map information to the diff --git a/clang/test/OpenMP/barrier_codegen.cpp b/clang/test/OpenMP/barrier_codegen.cpp --- a/clang/test/OpenMP/barrier_codegen.cpp +++ b/clang/test/OpenMP/barrier_codegen.cpp @@ -19,8 +19,8 @@ #define HEADER // CHECK: [[IDENT_T:%.+]] = type { i32, i32, i32, i32, i8* } -// CHECK-DAG: [[EXPLICIT_BARRIER_LOC:@.+]] = {{.+}} [[IDENT_T]] { i32 0, i32 34, i32 0, i32 0, i8* getelementptr inbounds ([{{[0-9]+}} x i8], [{{[0-9]+}} x i8]* @{{.+}}, i32 0, i32 0) } -// CHECK-DAG: [[LOC:@.+]] = {{.+}} [[IDENT_T]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([{{[0-9]+}} x i8], [{{[0-9]+}} x i8]* @{{.+}}, i32 0, i32 0) } +// CHECK-DAG: [[EXPLICIT_BARRIER_LOC:@.+]] = {{.+}} [[IDENT_T]] { i32 0, i32 34, i32 0, i32 {{[0-9]+}}, i8* getelementptr inbounds ([{{[0-9]+}} x i8], [{{[0-9]+}} x i8]* @{{.+}}, i32 0, i32 0) } +// CHECK-DAG: [[LOC:@.+]] = {{.+}} [[IDENT_T]] { i32 0, i32 2, i32 0, i32 {{[0-9]+}}, i8* getelementptr inbounds ([{{[0-9]+}} x i8], [{{[0-9]+}} x i8]* @{{.+}}, i32 0, i32 0) } void foo() {} diff --git a/clang/test/OpenMP/for_codegen.cpp b/clang/test/OpenMP/for_codegen.cpp --- a/clang/test/OpenMP/for_codegen.cpp +++ b/clang/test/OpenMP/for_codegen.cpp @@ -22,8 +22,8 @@ // PROF-INSTR-PATH: constant [25 x i8] c"for_codegen-test.profraw\00" // CHECK: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* } -// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8* -// CHECK-DAG: [[LOOP_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 514, i32 0, i32 0, i8* +// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 {{[0-9]+}}, i8* +// CHECK-DAG: [[LOOP_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 514, i32 0, i32 {{[0-9]+}}, i8* // CHECK-DAG: [[I:@.+]] ={{.*}} global i8 1, // CHECK-DAG: [[J:@.+]] ={{.*}} global i8 2, // CHECK-DAG: [[K:@.+]] ={{.*}} global i8 3, diff --git a/clang/test/OpenMP/nvptx_SPMD_codegen.cpp b/clang/test/OpenMP/nvptx_SPMD_codegen.cpp --- a/clang/test/OpenMP/nvptx_SPMD_codegen.cpp +++ b/clang/test/OpenMP/nvptx_SPMD_codegen.cpp @@ -11,13 +11,13 @@ int a; // CHECK-NOT: @__omp_offloading_{{.+}}_exec_mode = weak constant i8 1 -// CHECK-DAG: [[DISTR_LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2050, i32 3, i32 0, i8* getelementptr inbounds -// CHECK-DAG: [[FOR_LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 514, i32 3, i32 0, i8* getelementptr inbounds -// CHECK-DAG: [[LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 3, i32 0, i8* getelementptr inbounds -// CHECK-DAG: [[DISTR_FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2050, i32 1, i32 0, i8* getelementptr inbounds -// CHECK-DAG: [[FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 1, i32 0, i8* getelementptr inbounds -// CHECK-DAG: [[BAR_LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 66, i32 3, i32 0, i8* getelementptr inbounds -// CHECK-DAG: [[BAR_FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 66, i32 1, i32 0, i8* getelementptr inbounds +// CHECK-DAG: [[DISTR_LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2050, i32 3, i32 {{[0-9]+}}, i8* getelementptr inbounds +// CHECK-DAG: [[FOR_LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 514, i32 3, i32 {{[0-9]+}}, i8* getelementptr inbounds +// CHECK-DAG: [[LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 3, i32 {{[0-9]+}}, i8* getelementptr inbounds +// CHECK-DAG: [[DISTR_FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2050, i32 1, i32 {{[0-9]+}}, i8* getelementptr inbounds +// CHECK-DAG: [[FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 1, i32 {{[0-9]+}}, i8* getelementptr inbounds +// CHECK-DAG: [[BAR_LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 66, i32 3, i32 {{[0-9]+}}, i8* getelementptr inbounds +// CHECK-DAG: [[BAR_FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 66, i32 1, i32 {{[0-9]+}}, i8* getelementptr inbounds // CHECK-NOT: @__omp_offloading_{{.+}}_exec_mode = weak constant i8 1 void foo() { diff --git a/clang/test/OpenMP/parallel_num_threads_codegen.cpp b/clang/test/OpenMP/parallel_num_threads_codegen.cpp --- a/clang/test/OpenMP/parallel_num_threads_codegen.cpp +++ b/clang/test/OpenMP/parallel_num_threads_codegen.cpp @@ -15,7 +15,7 @@ // CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* } // CHECK-DAG: [[S_TY:%.+]] = type { [[INTPTR_T_TY:i[0-9]+]], [[INTPTR_T_TY]], [[INTPTR_T_TY]] } // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" -// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } +// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } void foo(); diff --git a/clang/test/OpenMP/parallel_proc_bind_codegen.cpp b/clang/test/OpenMP/parallel_proc_bind_codegen.cpp --- a/clang/test/OpenMP/parallel_proc_bind_codegen.cpp +++ b/clang/test/OpenMP/parallel_proc_bind_codegen.cpp @@ -14,7 +14,7 @@ // CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* } // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" -// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } +// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } void foo(); diff --git a/clang/test/OpenMP/parallel_proc_bind_primary_codegen.cpp b/clang/test/OpenMP/parallel_proc_bind_primary_codegen.cpp --- a/clang/test/OpenMP/parallel_proc_bind_primary_codegen.cpp +++ b/clang/test/OpenMP/parallel_proc_bind_primary_codegen.cpp @@ -15,7 +15,7 @@ // CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* } // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" -// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } +// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } void foo(); diff --git a/clang/test/OpenMP/sections_codegen.cpp b/clang/test/OpenMP/sections_codegen.cpp --- a/clang/test/OpenMP/sections_codegen.cpp +++ b/clang/test/OpenMP/sections_codegen.cpp @@ -9,8 +9,8 @@ // expected-no-diagnostics #ifndef HEADER #define HEADER -// CHECK-DAG: [[IMPLICIT_BARRIER_SECTIONS_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 194, i32 0, i32 0, i8* -// CHECK-DAG: [[SECTIONS_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 1026, i32 0, i32 0, i8* +// CHECK-DAG: [[IMPLICIT_BARRIER_SECTIONS_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 194, i32 0, i32 {{[0-9]+}}, i8* +// CHECK-DAG: [[SECTIONS_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 1026, i32 0, i32 {{[0-9]+}}, i8* // CHECK-LABEL: foo void foo() { extern void mayThrow(); mayThrow(); }; // CHECK-LABEL: bar diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -663,28 +663,31 @@ Function *getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID); /// Return the (LLVM-IR) string describing the source location \p LocStr. - Constant *getOrCreateSrcLocStr(StringRef LocStr); + Constant *getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize); /// Return the (LLVM-IR) string describing the default source location. - Constant *getOrCreateDefaultSrcLocStr(); + Constant *getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize); /// Return the (LLVM-IR) string describing the source location identified by /// the arguments. Constant *getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName, - unsigned Line, unsigned Column); + unsigned Line, unsigned Column, + uint32_t &SrcLocStrSize); /// Return the (LLVM-IR) string describing the DebugLoc \p DL. Use \p F as /// fallback if \p DL does not specify the function name. - Constant *getOrCreateSrcLocStr(DebugLoc DL, Function *F = nullptr); + Constant *getOrCreateSrcLocStr(DebugLoc DL, uint32_t &SrcLocStrSize, + Function *F = nullptr); /// Return the (LLVM-IR) string describing the source location \p Loc. - Constant *getOrCreateSrcLocStr(const LocationDescription &Loc); + Constant *getOrCreateSrcLocStr(const LocationDescription &Loc, + uint32_t &SrcLocStrSize); /// Return an ident_t* encoding the source location \p SrcLocStr and \p Flags. /// TODO: Create a enum class for the Reserve2Flags - Value *getOrCreateIdent(Constant *SrcLocStr, - omp::IdentFlag Flags = omp::IdentFlag(0), - unsigned Reserve2Flags = 0); + Constant *getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize, + omp::IdentFlag Flags = omp::IdentFlag(0), + unsigned Reserve2Flags = 0); /// Create a global flag \p Namein the module with initial value \p Value. GlobalValue *createGlobalFlag(unsigned Value, StringRef Name); @@ -754,7 +757,7 @@ StringMap SrcLocStrMap; /// Map to remember existing ident_t*. - DenseMap, Value *> IdentMap; + DenseMap, Constant *> IdentMap; /// Helper that contains information about regions we need to outline /// during finalization. diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -21,7 +21,9 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/PassManager.h" @@ -37,6 +39,7 @@ #include "llvm/Transforms/Utils/ModuleUtils.h" #include "llvm/Transforms/Utils/UnrollLoop.h" +#include #include #define DEBUG_TYPE "openmp-ir-builder" @@ -255,19 +258,21 @@ return GV; } -Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr, - IdentFlag LocFlags, - unsigned Reserve2Flags) { +Constant *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr, + uint32_t SrcLocStrSize, + IdentFlag LocFlags, + unsigned Reserve2Flags) { // Enable "C-mode". LocFlags |= OMP_IDENT_FLAG_KMPC; - Value *&Ident = + Constant *&Ident = IdentMap[{SrcLocStr, uint64_t(LocFlags) << 31 | Reserve2Flags}]; if (!Ident) { Constant *I32Null = ConstantInt::getNullValue(Int32); - Constant *IdentData[] = { - I32Null, ConstantInt::get(Int32, uint32_t(LocFlags)), - ConstantInt::get(Int32, Reserve2Flags), I32Null, SrcLocStr}; + Constant *IdentData[] = {I32Null, + ConstantInt::get(Int32, uint32_t(LocFlags)), + ConstantInt::get(Int32, Reserve2Flags), + ConstantInt::get(Int32, SrcLocStrSize), SrcLocStr}; Constant *Initializer = ConstantStruct::get(OpenMPIRBuilder::Ident, IdentData); @@ -290,10 +295,12 @@ } } - return Builder.CreatePointerCast(Ident, IdentPtr); + return ConstantExpr::getPointerBitCastOrAddrSpaceCast(Ident, IdentPtr); } -Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr) { +Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr, + uint32_t &SrcLocStrSize) { + SrcLocStrSize = LocStr.size(); Constant *&SrcLocStr = SrcLocStrMap[LocStr]; if (!SrcLocStr) { Constant *Initializer = @@ -314,8 +321,8 @@ Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName, - unsigned Line, - unsigned Column) { + unsigned Line, unsigned Column, + uint32_t &SrcLocStrSize) { SmallString<128> Buffer; Buffer.push_back(';'); Buffer.append(FileName); @@ -327,17 +334,21 @@ Buffer.append(std::to_string(Column)); Buffer.push_back(';'); Buffer.push_back(';'); - return getOrCreateSrcLocStr(Buffer.str()); + return getOrCreateSrcLocStr(Buffer.str(), SrcLocStrSize); } -Constant *OpenMPIRBuilder::getOrCreateDefaultSrcLocStr() { - return getOrCreateSrcLocStr(";unknown;unknown;0;0;;"); +Constant * +OpenMPIRBuilder::getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize) { + StringRef UnknownLoc = ";unknown;unknown;0;0;;"; + return getOrCreateSrcLocStr(UnknownLoc, SrcLocStrSize); } -Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(DebugLoc DL, Function *F) { +Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(DebugLoc DL, + uint32_t &SrcLocStrSize, + Function *F) { DILocation *DIL = DL.get(); if (!DIL) - return getOrCreateDefaultSrcLocStr(); + return getOrCreateDefaultSrcLocStr(SrcLocStrSize); StringRef FileName = M.getName(); if (DIFile *DIF = DIL->getFile()) if (Optional Source = DIF->getSource()) @@ -346,12 +357,13 @@ if (Function.empty() && F) Function = F->getName(); return getOrCreateSrcLocStr(Function, FileName, DIL->getLine(), - DIL->getColumn()); + DIL->getColumn(), SrcLocStrSize); } -Constant * -OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription &Loc) { - return getOrCreateSrcLocStr(Loc.DL, Loc.IP.getBlock()->getParent()); +Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription &Loc, + uint32_t &SrcLocStrSize) { + return getOrCreateSrcLocStr(Loc.DL, SrcLocStrSize, + Loc.IP.getBlock()->getParent()); } Value *OpenMPIRBuilder::getOrCreateThreadID(Value *Ident) { @@ -393,9 +405,11 @@ break; } - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Args[] = {getOrCreateIdent(SrcLocStr, BarrierLocFlags), - getOrCreateThreadID(getOrCreateIdent(SrcLocStr))}; + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Args[] = { + getOrCreateIdent(SrcLocStr, SrcLocStrSize, BarrierLocFlags), + getOrCreateThreadID(getOrCreateIdent(SrcLocStr, SrcLocStrSize))}; // If we are in a cancellable parallel region, barriers are cancellation // points. @@ -441,8 +455,9 @@ llvm_unreachable("Unknown cancel kind!"); } - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind}; Value *Result = Builder.CreateCall( getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args); @@ -513,8 +528,9 @@ if (!updateToLocation(Loc)) return Loc.IP; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Value *ThreadID = getOrCreateThreadID(Ident); if (NumThreads) { @@ -871,8 +887,9 @@ void OpenMPIRBuilder::emitFlush(const LocationDescription &Loc) { // Build call void __kmpc_flush(ident_t *loc) - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Args[] = {getOrCreateIdent(SrcLocStr)}; + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Args[] = {getOrCreateIdent(SrcLocStr, SrcLocStrSize)}; Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_flush), Args); } @@ -886,8 +903,9 @@ void OpenMPIRBuilder::emitTaskwaitImpl(const LocationDescription &Loc) { // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 // global_tid); - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Value *Args[] = {Ident, getOrCreateThreadID(Ident)}; // Ignore return result until untied tasks are supported. @@ -903,8 +921,9 @@ void OpenMPIRBuilder::emitTaskyieldImpl(const LocationDescription &Loc) { // Build call __kmpc_omp_taskyield(loc, thread_id, 0); - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Constant *I32Null = ConstantInt::getNullValue(Int32); Value *Args[] = {Ident, getOrCreateThreadID(Ident), I32Null}; @@ -1114,14 +1133,16 @@ Module *Module = Func->getParent(); Value *RedArrayPtr = Builder.CreateBitCast(RedArray, Builder.getInt8PtrTy(), "red.array.ptr"); - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); bool CanGenerateAtomic = llvm::all_of(ReductionInfos, [](const ReductionInfo &RI) { return RI.AtomicReductionGen; }); - Value *Ident = getOrCreateIdent( - SrcLocStr, CanGenerateAtomic ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE - : IdentFlag(0)); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize, + CanGenerateAtomic + ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE + : IdentFlag(0)); Value *ThreadId = getOrCreateThreadID(Ident); Constant *NumVariables = Builder.getInt32(NumReductions); const DataLayout &DL = Module->getDataLayout(); @@ -1235,8 +1256,9 @@ return Loc.IP; Directive OMPD = Directive::OMPD_master; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Value *ThreadId = getOrCreateThreadID(Ident); Value *Args[] = {Ident, ThreadId}; @@ -1258,8 +1280,9 @@ return Loc.IP; Directive OMPD = Directive::OMPD_masked; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Value *ThreadId = getOrCreateThreadID(Ident); Value *Args[] = {Ident, ThreadId, Filter}; Value *ArgsEnd[] = {Ident, ThreadId}; @@ -1480,8 +1503,9 @@ Builder.restoreIP(CLI->getPreheaderIP()); Builder.SetCurrentDebugLocation(DL); - Constant *SrcLocStr = getOrCreateSrcLocStr(DL); - Value *SrcLoc = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(DL, SrcLocStrSize); + Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize); // Declare useful OpenMP runtime functions. Value *IV = CLI->getIndVar(); @@ -1608,8 +1632,9 @@ // Set up the source location value for OpenMP runtime. Builder.SetCurrentDebugLocation(DL); - Constant *SrcLocStr = getOrCreateSrcLocStr(DL); - Value *SrcLoc = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(DL, SrcLocStrSize); + Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize); // Declare useful OpenMP runtime functions. Value *IV = CLI->getIndVar(); @@ -2379,8 +2404,9 @@ if (!updateToLocation(Loc)) return Loc.IP; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Value *ThreadId = getOrCreateThreadID(Ident); llvm::Value *DidItLD = Builder.CreateLoad(Builder.getInt32Ty(), DidIt); @@ -2407,8 +2433,9 @@ } Directive OMPD = Directive::OMPD_single; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Value *ThreadId = getOrCreateThreadID(Ident); Value *Args[] = {Ident, ThreadId}; @@ -2436,8 +2463,9 @@ return Loc.IP; Directive OMPD = Directive::OMPD_critical; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Value *ThreadId = getOrCreateThreadID(Ident); Value *LockVar = getOMPCriticalRegionLock(CriticalName); Value *Args[] = {Ident, ThreadId, LockVar}; @@ -2486,8 +2514,9 @@ Value *DependBaseAddrGEP = Builder.CreateInBoundsGEP( ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(0)}); - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Value *ThreadId = getOrCreateThreadID(Ident); Value *Args[] = {Ident, ThreadId, DependBaseAddrGEP}; @@ -2512,8 +2541,9 @@ Instruction *ExitCall = nullptr; if (IsThreads) { - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Value *ThreadId = getOrCreateThreadID(Ident); Value *Args[] = {Ident, ThreadId}; @@ -2718,8 +2748,9 @@ IRBuilder<>::InsertPointGuard IPG(Builder); Builder.restoreIP(Loc.IP); - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Value *ThreadId = getOrCreateThreadID(Ident); Value *Args[] = {ThreadId, Size, Allocator}; @@ -2734,8 +2765,9 @@ IRBuilder<>::InsertPointGuard IPG(Builder); Builder.restoreIP(Loc.IP); - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Value *ThreadId = getOrCreateThreadID(Ident); Value *Args[] = {ThreadId, Addr, Allocator}; Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_free); @@ -2748,8 +2780,9 @@ IRBuilder<>::InsertPointGuard IPG(Builder); Builder.restoreIP(Loc.IP); - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); Value *ThreadId = getOrCreateThreadID(Ident); Constant *ThreadPrivateCache = getOrCreateOMPInternalVariable(Int8PtrPtr, Name); @@ -2767,8 +2800,9 @@ if (!updateToLocation(Loc)) return Loc.IP; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Constant *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); ConstantInt *IsSPMDVal = ConstantInt::getSigned( IntegerType::getInt8Ty(Int8->getContext()), IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC); @@ -2820,8 +2854,9 @@ if (!updateToLocation(Loc)) return; - Constant *SrcLocStr = getOrCreateSrcLocStr(Loc); - Value *Ident = getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); ConstantInt *IsSPMDVal = ConstantInt::getSigned( IntegerType::getInt8Ty(Int8->getContext()), IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC); diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -1590,8 +1590,10 @@ &F.getEntryBlock(), F.getEntryBlock().begin())); // Create a fallback location if non was found. // TODO: Use the debug locations of the calls instead. - Constant *Loc = OMPInfoCache.OMPBuilder.getOrCreateDefaultSrcLocStr(); - Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(Loc); + uint32_t SrcLocStrSize; + Constant *Loc = + OMPInfoCache.OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); + Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(Loc, SrcLocStrSize); } return Ident; } @@ -3221,8 +3223,11 @@ OpenMPIRBuilder::LocationDescription Loc( InsertPointTy(ParentBB, ParentBB->end()), DL); OMPInfoCache.OMPBuilder.updateToLocation(Loc); - auto *SrcLocStr = OMPInfoCache.OMPBuilder.getOrCreateSrcLocStr(Loc); - Value *Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(SrcLocStr); + uint32_t SrcLocStrSize; + auto *SrcLocStr = + OMPInfoCache.OMPBuilder.getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *Ident = + OMPInfoCache.OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize); BranchInst::Create(RegionCheckTidBB, ParentBB)->setDebugLoc(DL); // Add check for Tid in RegionCheckTidBB diff --git a/llvm/test/Transforms/OpenMP/deduplication.ll b/llvm/test/Transforms/OpenMP/deduplication.ll --- a/llvm/test/Transforms/OpenMP/deduplication.ll +++ b/llvm/test/Transforms/OpenMP/deduplication.ll @@ -19,7 +19,6 @@ ; CHECK-DAG: @.str0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 ; CHECK-DAG: @.str1 = private unnamed_addr constant [23 x i8] c";file001;loc0001;0;0;;\00", align 1 ; CHECK-DAG: @.str2 = private unnamed_addr constant [23 x i8] c";file002;loc0002;0;0;;\00", align 1 -; CHECK-DAG: @3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str0, i32 0, i32 0) }, align 8 ; UTC_ARGS: --enable diff --git a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll --- a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll +++ b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll @@ -13,7 +13,7 @@ ; CHECK: @[[KERNEL1_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 ; CHECK: @[[KERNEL2_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 ; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" -; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 +; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 ;. define weak void @kernel0() #0 { ; CHECK-LABEL: define {{[^@]+}}@kernel0 diff --git a/llvm/test/Transforms/OpenMP/parallel_region_merging.ll b/llvm/test/Transforms/OpenMP/parallel_region_merging.ll --- a/llvm/test/Transforms/OpenMP/parallel_region_merging.ll +++ b/llvm/test/Transforms/OpenMP/parallel_region_merging.ll @@ -4694,10 +4694,10 @@ ; CHECK1-NEXT: entry: ; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -4719,8 +4719,8 @@ ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK1: entry.split: @@ -4753,7 +4753,7 @@ ; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK1-NEXT: entry: ; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1:[0-9]+]]) ; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 ; CHECK1-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3) ; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) @@ -4837,10 +4837,10 @@ ; CHECK1-NEXT: entry: ; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -4864,15 +4864,15 @@ ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK1-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK1: omp_region.end: -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK1: omp.par.merged.split.split: ; CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) @@ -4893,7 +4893,7 @@ ; CHECK1: omp.par.merged.split: ; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK1: omp_region.body.split: -; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: br label [[OMP_REGION_END]] ; CHECK1: omp.par.outlined.exit.exitStub: ; CHECK1-NEXT: ret void @@ -4921,11 +4921,11 @@ ; CHECK1-NEXT: [[F_RELOADED:%.*]] = alloca float, align 4 ; CHECK1-NEXT: [[F_ADDR:%.*]] = alloca float, align 4 ; CHECK1-NEXT: store float [[F]], float* [[F_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: store float [[F]], float* [[F_RELOADED]], align 4 ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_RELOADED]], float* [[F_ADDR]], float* [[P]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_RELOADED]], float* [[F_ADDR]], float* [[P]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -4948,15 +4948,15 @@ ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 ; CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK1: omp_region.end: -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK1: omp.par.merged.split.split: ; CHECK1-NEXT: call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) @@ -4976,7 +4976,7 @@ ; CHECK1: omp.par.merged.split: ; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK1: omp_region.body.split: -; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: br label [[OMP_REGION_END]] ; CHECK1: omp.par.outlined.exit.exitStub: ; CHECK1-NEXT: ret void @@ -5006,10 +5006,10 @@ ; CHECK1-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8 ; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5033,15 +5033,15 @@ ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK1-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK1: omp_region.end: -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK1: omp.par.merged.split.split: ; CHECK1-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 @@ -5065,7 +5065,7 @@ ; CHECK1: omp.par.merged.split: ; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK1: omp_region.body.split: -; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: br label [[OMP_REGION_END]] ; CHECK1: omp.par.outlined.exit.exitStub: ; CHECK1-NEXT: ret void @@ -5092,10 +5092,10 @@ ; CHECK1-NEXT: entry: ; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5118,15 +5118,15 @@ ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK1-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK1: omp_region.end: -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK1: omp.par.merged.split.split: ; CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) @@ -5150,7 +5150,7 @@ ; CHECK1: omp.par.merged.split: ; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK1: omp_region.body.split: -; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: br label [[OMP_REGION_END]] ; CHECK1: omp.par.outlined.exit.exitStub: ; CHECK1-NEXT: ret void @@ -5179,13 +5179,13 @@ ; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: store i32 [[A]], i32* [[A_RELOADED]], align 4 ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: ; CHECK1-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8* ; CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]]) -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[B]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[B]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5210,15 +5210,15 @@ ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 ; CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK1: omp_region.end: -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK1: omp.par.merged.split.split: ; CHECK1-NEXT: call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) @@ -5239,7 +5239,7 @@ ; CHECK1: omp.par.merged.split: ; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK1: omp_region.body.split: -; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: br label [[OMP_REGION_END]] ; CHECK1: omp.par.outlined.exit.exitStub: ; CHECK1-NEXT: ret void @@ -5270,10 +5270,10 @@ ; CHECK1-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 ; CHECK1-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5295,8 +5295,8 @@ ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) ; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK1: entry.split: @@ -5345,11 +5345,11 @@ ; CHECK1-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 ; CHECK1-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_RELOADED]], align 4 ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_RELOADED]], i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_RELOADED]], i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5372,15 +5372,15 @@ ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 ; CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK1: omp_region.end: -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK1: omp.par.merged.split.split: ; CHECK1-NEXT: call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) @@ -5401,7 +5401,7 @@ ; CHECK1: omp.par.merged.split: ; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK1: omp_region.body.split: -; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: br label [[OMP_REGION_END]] ; CHECK1: omp.par.outlined.exit.exitStub: ; CHECK1-NEXT: ret void @@ -5440,10 +5440,10 @@ ; CHECK1-NEXT: entry: ; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5465,11 +5465,11 @@ ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK1-NEXT: call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK1: entry.split: @@ -5514,11 +5514,11 @@ ; CHECK1-NEXT: [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: store i32 [[A]], i32* [[A_RELOADED]], align 4 ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5543,27 +5543,27 @@ ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 ; CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK1: omp_region.end: -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK1: omp.par.merged.split.split: ; CHECK1-NEXT: call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) ; CHECK1-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 ; CHECK1-NEXT: br i1 [[TMP5]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]] ; CHECK1: omp_region.end4: -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]] ; CHECK1: omp.par.merged.split.split.split.split: ; CHECK1-NEXT: call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) @@ -5584,7 +5584,7 @@ ; CHECK1: omp.par.merged.split.split.split: ; CHECK1-NEXT: br label [[OMP_REGION_BODY5_SPLIT:%.*]] ; CHECK1: omp_region.body5.split: -; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) ; CHECK1-NEXT: br label [[OMP_REGION_END4]] ; CHECK1: omp_region.body: ; CHECK1-NEXT: br label [[SEQ_PAR_MERGED:%.*]] @@ -5595,7 +5595,7 @@ ; CHECK1: omp.par.merged.split: ; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK1: omp_region.body.split: -; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: br label [[OMP_REGION_END]] ; CHECK1: omp.par.outlined.exit.exitStub: ; CHECK1-NEXT: ret void @@ -5741,10 +5741,10 @@ ; CHECK1-NEXT: entry: ; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5768,8 +5768,8 @@ ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: ; CHECK1-NEXT: call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK1-NEXT: call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK1: entry.split: @@ -5811,10 +5811,10 @@ ; CHECK2-NEXT: entry: ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5836,8 +5836,8 @@ ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK2: entry.split: @@ -5870,7 +5870,7 @@ ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK2-NEXT: entry: ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]]) +; CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1:[0-9]+]]) ; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 ; CHECK2-NEXT: call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3) ; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) @@ -5954,10 +5954,10 @@ ; CHECK2-NEXT: entry: ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5981,15 +5981,15 @@ ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK2-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK2: omp_region.end: -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split: ; CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) @@ -6010,7 +6010,7 @@ ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK2: omp_region.body.split: -; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: br label [[OMP_REGION_END]] ; CHECK2: omp.par.outlined.exit.exitStub: ; CHECK2-NEXT: ret void @@ -6038,11 +6038,11 @@ ; CHECK2-NEXT: [[F_RELOADED:%.*]] = alloca float, align 4 ; CHECK2-NEXT: [[F_ADDR:%.*]] = alloca float, align 4 ; CHECK2-NEXT: store float [[F]], float* [[F_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: store float [[F]], float* [[F_RELOADED]], align 4 ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_RELOADED]], float* [[F_ADDR]], float* [[P]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_RELOADED]], float* [[F_ADDR]], float* [[P]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6065,15 +6065,15 @@ ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 ; CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK2: omp_region.end: -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split: ; CHECK2-NEXT: call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) @@ -6093,7 +6093,7 @@ ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK2: omp_region.body.split: -; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: br label [[OMP_REGION_END]] ; CHECK2: omp.par.outlined.exit.exitStub: ; CHECK2-NEXT: ret void @@ -6123,10 +6123,10 @@ ; CHECK2-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8 ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6150,15 +6150,15 @@ ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK2-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK2: omp_region.end: -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split: ; CHECK2-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 @@ -6182,7 +6182,7 @@ ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK2: omp_region.body.split: -; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: br label [[OMP_REGION_END]] ; CHECK2: omp.par.outlined.exit.exitStub: ; CHECK2-NEXT: ret void @@ -6209,10 +6209,10 @@ ; CHECK2-NEXT: entry: ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6235,15 +6235,15 @@ ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 ; CHECK2-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK2: omp_region.end: -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split: ; CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) @@ -6267,7 +6267,7 @@ ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK2: omp_region.body.split: -; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: br label [[OMP_REGION_END]] ; CHECK2: omp.par.outlined.exit.exitStub: ; CHECK2-NEXT: ret void @@ -6296,13 +6296,13 @@ ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: [[B:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: store i32 [[A]], i32* [[A_RELOADED]], align 4 ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: ; CHECK2-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8* ; CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]]) -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[B]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[B]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6327,15 +6327,15 @@ ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 ; CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK2: omp_region.end: -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split: ; CHECK2-NEXT: call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) @@ -6356,7 +6356,7 @@ ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK2: omp_region.body.split: -; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: br label [[OMP_REGION_END]] ; CHECK2: omp.par.outlined.exit.exitStub: ; CHECK2-NEXT: ret void @@ -6387,10 +6387,10 @@ ; CHECK2-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 ; CHECK2-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6412,8 +6412,8 @@ ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) ; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK2: entry.split: @@ -6462,11 +6462,11 @@ ; CHECK2-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 ; CHECK2-NEXT: store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_RELOADED]], align 4 ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_RELOADED]], i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_RELOADED]], i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6489,15 +6489,15 @@ ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 ; CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK2: omp_region.end: -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split: ; CHECK2-NEXT: call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) @@ -6518,7 +6518,7 @@ ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK2: omp_region.body.split: -; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: br label [[OMP_REGION_END]] ; CHECK2: omp.par.outlined.exit.exitStub: ; CHECK2-NEXT: ret void @@ -6557,10 +6557,10 @@ ; CHECK2-NEXT: entry: ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6582,11 +6582,11 @@ ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK2: entry.split: @@ -6631,11 +6631,11 @@ ; CHECK2-NEXT: [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: store i32 [[A]], i32* [[A_RELOADED]], align 4 ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6660,27 +6660,27 @@ ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 ; CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK2: omp_region.end: -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split: ; CHECK2-NEXT: call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) ; CHECK2-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 ; CHECK2-NEXT: br i1 [[TMP5]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]] ; CHECK2: omp_region.end4: -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split.split.split: ; CHECK2-NEXT: call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) @@ -6701,7 +6701,7 @@ ; CHECK2: omp.par.merged.split.split.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY5_SPLIT:%.*]] ; CHECK2: omp_region.body5.split: -; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) ; CHECK2-NEXT: br label [[OMP_REGION_END4]] ; CHECK2: omp_region.body: ; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]] @@ -6712,7 +6712,7 @@ ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] ; CHECK2: omp_region.body.split: -; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: br label [[OMP_REGION_END]] ; CHECK2: omp.par.outlined.exit.exitStub: ; CHECK2-NEXT: ret void @@ -6858,10 +6858,10 @@ ; CHECK2-NEXT: entry: ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6885,8 +6885,8 @@ ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: ; CHECK2-NEXT: call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) -; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]]) ; CHECK2-NEXT: call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) ; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK2: entry.split: diff --git a/llvm/test/Transforms/OpenMP/remove_globalization.ll b/llvm/test/Transforms/OpenMP/remove_globalization.ll --- a/llvm/test/Transforms/OpenMP/remove_globalization.ll +++ b/llvm/test/Transforms/OpenMP/remove_globalization.ll @@ -32,7 +32,7 @@ ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 false, i1 true) ; CHECK-NEXT: call void @foo() #[[ATTR4:[0-9]+]] ; CHECK-NEXT: call void @bar() #[[ATTR4]] -; CHECK-NEXT: call void @unknown_no_openmp() +; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR3:[0-9]+]] ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1, i1 true) ; CHECK-NEXT: ret void ; @@ -41,7 +41,7 @@ ; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 false, i1 true) ; CHECK-DISABLED-NEXT: call void @foo() #[[ATTR4:[0-9]+]] ; CHECK-DISABLED-NEXT: call void @bar() #[[ATTR4]] -; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() +; CHECK-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR3:[0-9]+]] ; CHECK-DISABLED-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1, i1 true) ; CHECK-DISABLED-NEXT: ret void ; @@ -183,14 +183,14 @@ ; CHECK: attributes #[[ATTR0]] = { nosync nounwind } ; CHECK: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } ; CHECK: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn writeonly } -; CHECK: attributes #[[ATTR3:[0-9]+]] = { "llvm.assume"="omp_no_openmp" } +; CHECK: attributes #[[ATTR3]] = { "llvm.assume"="omp_no_openmp" } ; CHECK: attributes #[[ATTR4]] = { nounwind } ; CHECK: attributes #[[ATTR5]] = { nosync nounwind writeonly } ;. ; CHECK-DISABLED: attributes #[[ATTR0]] = { nosync nounwind } ; CHECK-DISABLED: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } ; CHECK-DISABLED: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn writeonly } -; CHECK-DISABLED: attributes #[[ATTR3:[0-9]+]] = { "llvm.assume"="omp_no_openmp" } +; CHECK-DISABLED: attributes #[[ATTR3]] = { "llvm.assume"="omp_no_openmp" } ; CHECK-DISABLED: attributes #[[ATTR4]] = { nounwind } ; CHECK-DISABLED: attributes #[[ATTR5]] = { nosync nounwind writeonly } ;. diff --git a/llvm/test/Transforms/OpenMP/spmdization.ll b/llvm/test/Transforms/OpenMP/spmdization.ll --- a/llvm/test/Transforms/OpenMP/spmdization.ll +++ b/llvm/test/Transforms/OpenMP/spmdization.ll @@ -113,6 +113,7 @@ ; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; AMDGPU: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata" +; AMDGPU: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 ; AMDGPU: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 ; AMDGPU: @[[X_1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 ; AMDGPU: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef @@ -126,6 +127,7 @@ ; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 ; NVPTX: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata" +; NVPTX: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 ; NVPTX: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 ; NVPTX: @[[X1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 ; NVPTX: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef @@ -1469,7 +1471,7 @@ ; AMDGPU: region.guarded.end: ; AMDGPU-NEXT: br label [[REGION_BARRIER]] ; AMDGPU: region.barrier: -; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; AMDGPU-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) ; AMDGPU-NEXT: br label [[REGION_EXIT:%.*]] ; AMDGPU: region.exit: ; AMDGPU-NEXT: br label [[FOR_COND:%.*]] @@ -1505,7 +1507,7 @@ ; NVPTX: region.guarded.end: ; NVPTX-NEXT: br label [[REGION_BARRIER]] ; NVPTX: region.barrier: -; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) +; NVPTX-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) ; NVPTX-NEXT: br label [[REGION_EXIT:%.*]] ; NVPTX: region.exit: ; NVPTX-NEXT: br label [[FOR_COND:%.*]] diff --git a/llvm/test/Transforms/OpenMP/spmdization_assumes.ll b/llvm/test/Transforms/OpenMP/spmdization_assumes.ll --- a/llvm/test/Transforms/OpenMP/spmdization_assumes.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_assumes.ll @@ -25,6 +25,7 @@ ; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 ; CHECK: @[[__OMP_OFFLOADING_FD02_404433C2_MAIN_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 ; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x i8*] [i8* @__omp_offloading_fd02_404433c2_main_l5_exec_mode], section "llvm.metadata" +; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 ;. define weak void @__omp_offloading_fd02_404433c2_main_l5(double* nonnull align 8 dereferenceable(8) %x) local_unnamed_addr #0 { ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_fd02_404433c2_main_l5 @@ -50,7 +51,7 @@ ; CHECK: region.guarded.end: ; CHECK-NEXT: br label [[REGION_BARRIER]] ; CHECK: region.barrier: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) ; CHECK-NEXT: br label [[REGION_EXIT:%.*]] ; CHECK: region.exit: ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [0 x i8*], [0 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 diff --git a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll --- a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll @@ -47,6 +47,7 @@ ; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 ; CHECK: @[[__OMP_OFFLOADING_2A_FBFA7A_SEQUENTIAL_LOOP_L6_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3 ; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x i8*] [i8* @__omp_offloading_2a_fbfa7a_sequential_loop_l6_exec_mode], section "llvm.metadata" +; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 ;. ; CHECK-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" ; CHECK-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 @@ -81,7 +82,7 @@ ; CHECK: region.guarded.end: ; CHECK-NEXT: br label [[REGION_BARRIER]] ; CHECK: region.barrier: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) ; CHECK-NEXT: br label [[REGION_EXIT:%.*]] ; CHECK: region.exit: ; CHECK-NEXT: br label [[FOR_COND_I:%.*]] @@ -105,7 +106,7 @@ ; CHECK: region.guarded.end1: ; CHECK-NEXT: br label [[REGION_BARRIER2]] ; CHECK: region.barrier2: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]]) ; CHECK-NEXT: br label [[REGION_EXIT3]] ; CHECK: region.exit3: ; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1 @@ -126,7 +127,7 @@ ; CHECK: region.guarded.end6: ; CHECK-NEXT: br label [[REGION_BARRIER7]] ; CHECK: region.barrier7: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]]) ; CHECK-NEXT: br label [[REGION_EXIT8:%.*]] ; CHECK: region.exit8: ; CHECK-NEXT: [[CALL8_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR7]], !noalias !8 @@ -143,7 +144,7 @@ ; CHECK: region.guarded.end11: ; CHECK-NEXT: br label [[REGION_BARRIER12]] ; CHECK: region.barrier12: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]]) +; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]]) ; CHECK-NEXT: br label [[REGION_EXIT13:%.*]] ; CHECK: region.exit13: ; CHECK-NEXT: [[CALL11_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR7]], !noalias !8 @@ -160,7 +161,7 @@ ; CHECK: region.guarded.end16: ; CHECK-NEXT: br label [[REGION_BARRIER17]] ; CHECK: region.barrier17: -; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) +; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]]) ; CHECK-NEXT: br label [[REGION_EXIT18:%.*]] ; CHECK: region.exit18: ; CHECK-NEXT: [[CALL14_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR7]], !noalias !8