diff --git a/llvm/include/llvm/IR/OpenMPKinds.def b/llvm/include/llvm/IR/OpenMPKinds.def --- a/llvm/include/llvm/IR/OpenMPKinds.def +++ b/llvm/include/llvm/IR/OpenMPKinds.def @@ -167,6 +167,34 @@ __OMP_RTL(__kmpc_global_thread_num, false, Int32, IdentPtr) __OMP_RTL(__kmpc_fork_call, true, Void, IdentPtr, Int32, ParallelTaskPtr) __OMP_RTL(omp_get_thread_num, false, Int32, ) +__OMP_RTL(omp_get_num_threads, false, Int32, ) +__OMP_RTL(omp_get_max_threads, false, Int32, ) +__OMP_RTL(omp_in_parallel, false, Int32, ) +__OMP_RTL(omp_get_dynamic, false, Int32, ) +__OMP_RTL(omp_get_cancellation, false, Int32, ) +__OMP_RTL(omp_get_nested, false, Int32, ) +__OMP_RTL(omp_get_schedule, false, Void, Int32Ptr, Int32Ptr) +__OMP_RTL(omp_get_thread_limit, false, Int32, ) +__OMP_RTL(omp_get_supported_active_levels, false, Int32, ) +__OMP_RTL(omp_get_max_active_levels, false, Int32, ) +__OMP_RTL(omp_get_level, false, Int32, ) +__OMP_RTL(omp_get_ancestor_thread_num, false, Int32, ) +__OMP_RTL(omp_get_team_size, false, Int32, ) +__OMP_RTL(omp_get_active_level, false, Int32, ) +__OMP_RTL(omp_in_final, false, Int32, ) +__OMP_RTL(omp_get_proc_bind, false, Int32, ) +__OMP_RTL(omp_get_num_places, false, Int32, ) +__OMP_RTL(omp_get_num_procs, false, Int32, ) +__OMP_RTL(omp_get_place_proc_ids, false, Void, Int32, Int32Ptr) +__OMP_RTL(omp_get_place_num, false, Int32, ) +__OMP_RTL(omp_get_partition_num_places, false, Int32, ) +__OMP_RTL(omp_get_partition_place_nums, false, Int32, ) + +__OMP_RTL(omp_set_num_threads, false, Void, Int32) +__OMP_RTL(omp_set_dynamic, false, Void, Int32) +__OMP_RTL(omp_set_nested, false, Void, Int32) +__OMP_RTL(omp_set_schedule, false, Void, Int32, Int32) +__OMP_RTL(omp_set_max_active_levels, false, Void, Int32) #undef __OMP_RTL #undef OMP_RTL @@ -190,6 +218,121 @@ AttributeSet(EnumAttr(ReadOnly), EnumAttr(NoUnwind), EnumAttr(NoFree), EnumAttr(NoSync)), AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_get_num_threads, + AttributeSet(EnumAttr(ReadOnly), EnumAttr(NoUnwind), + EnumAttr(NoFree), EnumAttr(NoSync)), + AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_get_max_threads, + AttributeSet(EnumAttr(ReadOnly), EnumAttr(NoUnwind), + EnumAttr(NoFree), EnumAttr(NoSync)), + AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_in_parallel, + AttributeSet(EnumAttr(ReadOnly), EnumAttr(NoUnwind), + EnumAttr(NoFree), EnumAttr(NoSync)), + AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_get_dynamic, + AttributeSet(EnumAttr(ReadOnly), EnumAttr(NoUnwind), + EnumAttr(NoFree), EnumAttr(NoSync)), + AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_get_cancellation, + AttributeSet(EnumAttr(ReadOnly), EnumAttr(NoUnwind), + EnumAttr(NoFree), EnumAttr(NoSync)), + AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_get_nested, + AttributeSet(EnumAttr(ReadOnly), EnumAttr(NoUnwind), + EnumAttr(NoFree), EnumAttr(NoSync)), + AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_get_schedule, + AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoFree), + EnumAttr(NoSync)), + AttributeSet(), + ArrayRef( + {AttributeSet(EnumAttr(NoCapture), EnumAttr(WriteOnly)), + AttributeSet(EnumAttr(NoCapture), EnumAttr(WriteOnly))})) +__OMP_RTL_ATTRS(omp_get_thread_limit, + AttributeSet(EnumAttr(ReadOnly), EnumAttr(NoUnwind), + EnumAttr(NoFree), EnumAttr(NoSync)), + AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_get_supported_active_levels, + AttributeSet(EnumAttr(ReadOnly), EnumAttr(NoUnwind), + EnumAttr(NoFree), EnumAttr(NoSync)), + AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_get_max_active_levels, + AttributeSet(EnumAttr(ReadOnly), EnumAttr(NoUnwind), + EnumAttr(NoFree), EnumAttr(NoSync)), + AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_get_level, + AttributeSet(EnumAttr(ReadOnly), EnumAttr(NoUnwind), + EnumAttr(NoFree), EnumAttr(NoSync)), + AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_get_ancestor_thread_num, + AttributeSet(EnumAttr(ReadOnly), EnumAttr(NoUnwind), + EnumAttr(NoFree), EnumAttr(NoSync)), + AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_get_team_size, + AttributeSet(EnumAttr(ReadOnly), EnumAttr(NoUnwind), + EnumAttr(NoFree), EnumAttr(NoSync)), + AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_get_active_level, + AttributeSet(EnumAttr(ReadOnly), EnumAttr(NoUnwind), + EnumAttr(NoFree), EnumAttr(NoSync)), + AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_in_final, + AttributeSet(EnumAttr(ReadOnly), EnumAttr(NoUnwind), + EnumAttr(NoFree), EnumAttr(NoSync)), + AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_get_proc_bind, + AttributeSet(EnumAttr(ReadOnly), EnumAttr(NoUnwind), + EnumAttr(NoFree), EnumAttr(NoSync)), + AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_get_num_places, + AttributeSet(EnumAttr(ReadOnly), EnumAttr(NoUnwind), + EnumAttr(NoFree), EnumAttr(NoSync)), + AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_get_num_procs, + AttributeSet(EnumAttr(ReadOnly), EnumAttr(NoUnwind), + EnumAttr(NoFree), EnumAttr(NoSync)), + AttributeSet(), {}) +__OMP_RTL_ATTRS( + omp_get_place_proc_ids, + AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoFree), EnumAttr(NoSync)), + AttributeSet(), + ArrayRef({AttributeSet(), AttributeSet(EnumAttr(NoCapture), + EnumAttr(WriteOnly))})) +__OMP_RTL_ATTRS(omp_get_place_num, + AttributeSet(EnumAttr(ReadOnly), EnumAttr(NoUnwind), + EnumAttr(NoFree), EnumAttr(NoSync)), + AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_get_partition_num_places, + AttributeSet(EnumAttr(ReadOnly), EnumAttr(NoUnwind), + EnumAttr(NoFree), EnumAttr(NoSync)), + AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_get_partition_place_nums, + AttributeSet(EnumAttr(ReadOnly), EnumAttr(NoUnwind), + EnumAttr(NoFree), EnumAttr(NoSync)), + AttributeSet(), {}) + + +__OMP_RTL_ATTRS(omp_set_num_threads, + AttributeSet(EnumAttr(WriteOnly), EnumAttr(NoUnwind), + EnumAttr(NoFree), EnumAttr(NoSync)), + AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_set_dynamic, + AttributeSet(EnumAttr(WriteOnly), EnumAttr(NoUnwind), + EnumAttr(NoFree), EnumAttr(NoSync)), + AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_set_nested, + AttributeSet(EnumAttr(WriteOnly), EnumAttr(NoUnwind), + EnumAttr(NoFree), EnumAttr(NoSync)), + AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_set_schedule, + AttributeSet(EnumAttr(WriteOnly), EnumAttr(NoUnwind), + EnumAttr(NoFree), EnumAttr(NoSync)), + AttributeSet(), {}) +__OMP_RTL_ATTRS(omp_set_max_active_levels, + AttributeSet(EnumAttr(WriteOnly), EnumAttr(NoUnwind), + EnumAttr(NoFree), EnumAttr(NoSync)), + AttributeSet(), {}) #undef __OMP_RTL_ATTRS #undef OMP_RTL_ATTRS diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -19,6 +19,7 @@ #include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/OpenMPConstants.h" +#include "llvm/IR/OpenMPIRBuilder.h" #include "llvm/Transforms/IPO.h" using namespace llvm; @@ -41,7 +42,9 @@ SmallPtrSetImpl &ModuleSlice, function_ref CGUpdater = nullptr) : M(*(*SCC.begin())->getParent()), SCC(SCC), ModuleSlice(ModuleSlice), - CGUpdater(CGUpdater) {} + OMPBuilder(M), CGUpdater(CGUpdater) { + OMPBuilder.initialize(); + } /// Generic information that describes a runtime function struct RuntimeFunctionInfo { @@ -72,7 +75,6 @@ }; bool run() { - initializeTypes(M); if (!initializeRuntimeFunctions(M)) { DisableOpenMPOptimizations = true; return false; @@ -104,6 +106,26 @@ Changed |= deduplicateRuntimeCalls( *F, RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg); Changed |= deduplicateRuntimeCalls(*F, RFIs[OMPRTL_omp_get_thread_num]); + Changed |= deduplicateRuntimeCalls(*F, RFIs[OMPRTL_omp_get_num_threads]); + Changed |= deduplicateRuntimeCalls(*F, RFIs[OMPRTL_omp_in_parallel]); + Changed |= deduplicateRuntimeCalls(*F, RFIs[OMPRTL_omp_get_cancellation]); + Changed |= deduplicateRuntimeCalls(*F, RFIs[OMPRTL_omp_get_thread_limit]); + Changed |= deduplicateRuntimeCalls( + *F, RFIs[OMPRTL_omp_get_supported_active_levels]); + Changed |= deduplicateRuntimeCalls(*F, RFIs[OMPRTL_omp_get_level]); + Changed |= + deduplicateRuntimeCalls(*F, RFIs[OMPRTL_omp_get_ancestor_thread_num]); + Changed |= deduplicateRuntimeCalls(*F, RFIs[OMPRTL_omp_get_team_size]); + Changed |= deduplicateRuntimeCalls(*F, RFIs[OMPRTL_omp_get_active_level]); + Changed |= deduplicateRuntimeCalls(*F, RFIs[OMPRTL_omp_in_final]); + Changed |= deduplicateRuntimeCalls(*F, RFIs[OMPRTL_omp_get_proc_bind]); + Changed |= deduplicateRuntimeCalls(*F, RFIs[OMPRTL_omp_get_num_places]); + Changed |= deduplicateRuntimeCalls(*F, RFIs[OMPRTL_omp_get_num_procs]); + Changed |= deduplicateRuntimeCalls(*F, RFIs[OMPRTL_omp_get_place_num]); + Changed |= deduplicateRuntimeCalls( + *F, RFIs[OMPRTL_omp_get_partition_num_places]); + Changed |= deduplicateRuntimeCalls( + *F, RFIs[OMPRTL_omp_get_partition_place_nums]); } return Changed; @@ -224,6 +246,7 @@ unsigned NumUses = 0; if (!RFI.Declaration) return NumUses; + OMPBuilder.addAttributes(RFI.Kind, *RFI.Declaration); for (Use &U : RFI.Declaration->uses()) { if (Instruction *UserI = dyn_cast(U.getUser())) { if (ModuleSlice.count(UserI->getFunction())) { @@ -272,6 +295,9 @@ /// The slice of the module we are allowed to look at. SmallPtrSetImpl &ModuleSlice; + /// An OpenMP-IR-Builder instance + OpenMPIRBuilder OMPBuilder; + /// Callback to update the call graph, the first argument is a removed call, /// the second an optional replacement call. function_ref CGUpdater; diff --git a/llvm/test/Transforms/OpenMP/add_attributes.ll b/llvm/test/Transforms/OpenMP/add_attributes.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/OpenMP/add_attributes.ll @@ -0,0 +1,696 @@ +; RUN: opt < %s -S -openmpopt | FileCheck %s +; RUN: opt < %s -S -passes=openmpopt | FileCheck %s +; +; TODO: Not all omp_XXXX methods are known to the OpenMPIRBuilder/OpenMPOpt. +; +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + +%struct.omp_lock_t = type { i8* } +%struct.omp_nest_lock_t = type { i8* } + +define void @call_all(i32 %schedule, %struct.omp_lock_t* %lock, i32 %lock_hint, %struct.omp_nest_lock_t* %nest_lock, i32 %i, i8* %s, i64 %st, i8* %vp, double %d, i32 %proc_bind, i64 %allocator_handle, i8* %cp, i64 %event_handle, i32 %pause_resource) { +entry: + %schedule.addr = alloca i32, align 4 + %lock.addr = alloca %struct.omp_lock_t*, align 8 + %lock_hint.addr = alloca i32, align 4 + %nest_lock.addr = alloca %struct.omp_nest_lock_t*, align 8 + %i.addr = alloca i32, align 4 + %s.addr = alloca i8*, align 8 + %st.addr = alloca i64, align 8 + %vp.addr = alloca i8*, align 8 + %d.addr = alloca double, align 8 + %proc_bind.addr = alloca i32, align 4 + %allocator_handle.addr = alloca i64, align 8 + %cp.addr = alloca i8*, align 8 + %event_handle.addr = alloca i64, align 8 + %pause_resource.addr = alloca i32, align 4 + store i32 %schedule, i32* %schedule.addr, align 4 + store %struct.omp_lock_t* %lock, %struct.omp_lock_t** %lock.addr, align 8 + store i32 %lock_hint, i32* %lock_hint.addr, align 4 + store %struct.omp_nest_lock_t* %nest_lock, %struct.omp_nest_lock_t** %nest_lock.addr, align 8 + store i32 %i, i32* %i.addr, align 4 + store i8* %s, i8** %s.addr, align 8 + store i64 %st, i64* %st.addr, align 8 + store i8* %vp, i8** %vp.addr, align 8 + store double %d, double* %d.addr, align 8 + store i32 %proc_bind, i32* %proc_bind.addr, align 4 + store i64 %allocator_handle, i64* %allocator_handle.addr, align 8 + store i8* %cp, i8** %cp.addr, align 8 + store i64 %event_handle, i64* %event_handle.addr, align 8 + store i32 %pause_resource, i32* %pause_resource.addr, align 4 + call void @omp_set_num_threads(i32 0) + call void @omp_set_dynamic(i32 0) + call void @omp_set_nested(i32 0) + call void @omp_set_max_active_levels(i32 0) + %0 = load i32, i32* %schedule.addr, align 4 + call void @omp_set_schedule(i32 %0, i32 0) + %call = call i32 @omp_get_num_threads() + store i32 %call, i32* %i.addr, align 4 + %1 = load i32, i32* %i.addr, align 4 + call void @use_int(i32 %1) + %call1 = call i32 @omp_get_dynamic() + store i32 %call1, i32* %i.addr, align 4 + %2 = load i32, i32* %i.addr, align 4 + call void @use_int(i32 %2) + %call2 = call i32 @omp_get_nested() + store i32 %call2, i32* %i.addr, align 4 + %3 = load i32, i32* %i.addr, align 4 + call void @use_int(i32 %3) + %call3 = call i32 @omp_get_max_threads() + store i32 %call3, i32* %i.addr, align 4 + %4 = load i32, i32* %i.addr, align 4 + call void @use_int(i32 %4) + %call4 = call i32 @omp_get_thread_num() + store i32 %call4, i32* %i.addr, align 4 + %5 = load i32, i32* %i.addr, align 4 + call void @use_int(i32 %5) + %call5 = call i32 @omp_get_num_procs() + store i32 %call5, i32* %i.addr, align 4 + %6 = load i32, i32* %i.addr, align 4 + call void @use_int(i32 %6) + %call6 = call i32 @omp_in_parallel() + store i32 %call6, i32* %i.addr, align 4 + %7 = load i32, i32* %i.addr, align 4 + call void @use_int(i32 %7) + %call7 = call i32 @omp_in_final() + store i32 %call7, i32* %i.addr, align 4 + %8 = load i32, i32* %i.addr, align 4 + call void @use_int(i32 %8) + %call8 = call i32 @omp_get_active_level() + store i32 %call8, i32* %i.addr, align 4 + %9 = load i32, i32* %i.addr, align 4 + call void @use_int(i32 %9) + %call9 = call i32 @omp_get_level() + store i32 %call9, i32* %i.addr, align 4 + %10 = load i32, i32* %i.addr, align 4 + call void @use_int(i32 %10) + %call10 = call i32 @omp_get_ancestor_thread_num(i32 0) + store i32 %call10, i32* %i.addr, align 4 + %11 = load i32, i32* %i.addr, align 4 + call void @use_int(i32 %11) + %call11 = call i32 @omp_get_team_size(i32 0) + store i32 %call11, i32* %i.addr, align 4 + %12 = load i32, i32* %i.addr, align 4 + call void @use_int(i32 %12) + %call12 = call i32 @omp_get_thread_limit() + store i32 %call12, i32* %i.addr, align 4 + %13 = load i32, i32* %i.addr, align 4 + call void @use_int(i32 %13) + %call13 = call i32 @omp_get_max_active_levels() + store i32 %call13, i32* %i.addr, align 4 + %14 = load i32, i32* %i.addr, align 4 + call void @use_int(i32 %14) + call void @omp_get_schedule(i32* %schedule.addr, i32* %i.addr) + %call14 = call i32 @omp_get_max_task_priority() + store i32 %call14, i32* %i.addr, align 4 + %15 = load i32, i32* %i.addr, align 4 + call void @use_int(i32 %15) + %16 = load %struct.omp_lock_t*, %struct.omp_lock_t** %lock.addr, align 8 + call void @omp_init_lock(%struct.omp_lock_t* %16) + %17 = load %struct.omp_lock_t*, %struct.omp_lock_t** %lock.addr, align 8 + call void @omp_set_lock(%struct.omp_lock_t* %17) + %18 = load %struct.omp_lock_t*, %struct.omp_lock_t** %lock.addr, align 8 + call void @omp_unset_lock(%struct.omp_lock_t* %18) + %19 = load %struct.omp_lock_t*, %struct.omp_lock_t** %lock.addr, align 8 + call void @omp_destroy_lock(%struct.omp_lock_t* %19) + %20 = load %struct.omp_lock_t*, %struct.omp_lock_t** %lock.addr, align 8 + %call15 = call i32 @omp_test_lock(%struct.omp_lock_t* %20) + store i32 %call15, i32* %i.addr, align 4 + %21 = load i32, i32* %i.addr, align 4 + call void @use_int(i32 %21) + %22 = load %struct.omp_nest_lock_t*, %struct.omp_nest_lock_t** %nest_lock.addr, align 8 + call void @omp_init_nest_lock(%struct.omp_nest_lock_t* %22) + %23 = load %struct.omp_nest_lock_t*, %struct.omp_nest_lock_t** %nest_lock.addr, align 8 + call void @omp_set_nest_lock(%struct.omp_nest_lock_t* %23) + %24 = load %struct.omp_nest_lock_t*, %struct.omp_nest_lock_t** %nest_lock.addr, align 8 + call void @omp_unset_nest_lock(%struct.omp_nest_lock_t* %24) + %25 = load %struct.omp_nest_lock_t*, %struct.omp_nest_lock_t** %nest_lock.addr, align 8 + call void @omp_destroy_nest_lock(%struct.omp_nest_lock_t* %25) + %26 = load %struct.omp_nest_lock_t*, %struct.omp_nest_lock_t** %nest_lock.addr, align 8 + %call16 = call i32 @omp_test_nest_lock(%struct.omp_nest_lock_t* %26) + store i32 %call16, i32* %i.addr, align 4 + %27 = load i32, i32* %i.addr, align 4 + call void @use_int(i32 %27) + %28 = load %struct.omp_lock_t*, %struct.omp_lock_t** %lock.addr, align 8 + %29 = load i32, i32* %lock_hint.addr, align 4 + call void @omp_init_lock_with_hint(%struct.omp_lock_t* %28, i32 %29) + %30 = load %struct.omp_nest_lock_t*, %struct.omp_nest_lock_t** %nest_lock.addr, align 8 + %31 = load i32, i32* %lock_hint.addr, align 4 + call void @omp_init_nest_lock_with_hint(%struct.omp_nest_lock_t* %30, i32 %31) + %call17 = call double @omp_get_wtime() + store double %call17, double* %d.addr, align 8 + %32 = load double, double* %d.addr, align 8 + call void @use_double(double %32) + %call18 = call double @omp_get_wtick() + store double %call18, double* %d.addr, align 8 + %33 = load double, double* %d.addr, align 8 + call void @use_double(double %33) + %call19 = call i32 @omp_get_default_device() + store i32 %call19, i32* %i.addr, align 4 + %34 = load i32, i32* %i.addr, align 4 + call void @use_int(i32 %34) + call void @omp_set_default_device(i32 0) + %call20 = call i32 @omp_is_initial_device() + store i32 %call20, i32* %i.addr, align 4 + %35 = load i32, i32* %i.addr, align 4 + call void @use_int(i32 %35) + %call21 = call i32 @omp_get_num_devices() + store i32 %call21, i32* %i.addr, align 4 + %36 = load i32, i32* %i.addr, align 4 + call void @use_int(i32 %36) + %call22 = call i32 @omp_get_num_teams() + store i32 %call22, i32* %i.addr, align 4 + %37 = load i32, i32* %i.addr, align 4 + call void @use_int(i32 %37) + %call23 = call i32 @omp_get_team_num() + store i32 %call23, i32* %i.addr, align 4 + %38 = load i32, i32* %i.addr, align 4 + call void @use_int(i32 %38) + %call24 = call i32 @omp_get_cancellation() + store i32 %call24, i32* %i.addr, align 4 + %39 = load i32, i32* %i.addr, align 4 + call void @use_int(i32 %39) + %call25 = call i32 @omp_get_initial_device() + store i32 %call25, i32* %i.addr, align 4 + %40 = load i32, i32* %i.addr, align 4 + call void @use_int(i32 %40) + %41 = load i64, i64* %st.addr, align 8 + %42 = load i32, i32* %i.addr, align 4 + %call26 = call i8* @omp_target_alloc(i64 %41, i32 %42) + store i8* %call26, i8** %vp.addr, align 8 + %43 = load i8*, i8** %vp.addr, align 8 + call void @use_voidptr(i8* %43) + %44 = load i8*, i8** %vp.addr, align 8 + %45 = load i32, i32* %i.addr, align 4 + call void @omp_target_free(i8* %44, i32 %45) + %46 = load i8*, i8** %vp.addr, align 8 + %47 = load i32, i32* %i.addr, align 4 + %call27 = call i32 @omp_target_is_present(i8* %46, i32 %47) + store i32 %call27, i32* %i.addr, align 4 + %48 = load i32, i32* %i.addr, align 4 + call void @use_int(i32 %48) + %49 = load i8*, i8** %vp.addr, align 8 + %50 = load i8*, i8** %vp.addr, align 8 + %51 = load i64, i64* %st.addr, align 8 + %52 = load i64, i64* %st.addr, align 8 + %53 = load i64, i64* %st.addr, align 8 + %54 = load i32, i32* %i.addr, align 4 + %55 = load i32, i32* %i.addr, align 4 + %call28 = call i32 @omp_target_memcpy(i8* %49, i8* %50, i64 %51, i64 %52, i64 %53, i32 %54, i32 %55) + store i32 %call28, i32* %i.addr, align 4 + %56 = load i32, i32* %i.addr, align 4 + call void @use_int(i32 %56) + %57 = load i8*, i8** %vp.addr, align 8 + %58 = load i8*, i8** %vp.addr, align 8 + %59 = load i64, i64* %st.addr, align 8 + %60 = load i64, i64* %st.addr, align 8 + %61 = load i32, i32* %i.addr, align 4 + %call29 = call i32 @omp_target_associate_ptr(i8* %57, i8* %58, i64 %59, i64 %60, i32 %61) + store i32 %call29, i32* %i.addr, align 4 + %62 = load i32, i32* %i.addr, align 4 + call void @use_int(i32 %62) + %63 = load i8*, i8** %vp.addr, align 8 + %64 = load i32, i32* %i.addr, align 4 + %call30 = call i32 @omp_target_disassociate_ptr(i8* %63, i32 %64) + store i32 %call30, i32* %i.addr, align 4 + %65 = load i32, i32* %i.addr, align 4 + call void @use_int(i32 %65) + %call31 = call i32 @omp_get_device_num() + store i32 %call31, i32* %i.addr, align 4 + %66 = load i32, i32* %i.addr, align 4 + call void @use_int(i32 %66) + %call32 = call i32 @omp_get_proc_bind() + store i32 %call32, i32* %proc_bind.addr, align 4 + %call33 = call i32 @omp_get_num_places() + store i32 %call33, i32* %i.addr, align 4 + %67 = load i32, i32* %i.addr, align 4 + call void @use_int(i32 %67) + %call34 = call i32 @omp_get_place_num_procs(i32 0) + store i32 %call34, i32* %i.addr, align 4 + %68 = load i32, i32* %i.addr, align 4 + call void @use_int(i32 %68) + %69 = load i32, i32* %i.addr, align 4 + call void @omp_get_place_proc_ids(i32 %69, i32* %i.addr) + %call35 = call i32 @omp_get_place_num() + store i32 %call35, i32* %i.addr, align 4 + %70 = load i32, i32* %i.addr, align 4 + call void @use_int(i32 %70) + %call36 = call i32 @omp_get_partition_num_places() + store i32 %call36, i32* %i.addr, align 4 + %71 = load i32, i32* %i.addr, align 4 + call void @use_int(i32 %71) + call void @omp_get_partition_place_nums(i32* %i.addr) + %72 = load i32, i32* %i.addr, align 4 + %73 = load i32, i32* %i.addr, align 4 + %74 = load i8*, i8** %vp.addr, align 8 + %call37 = call i32 @omp_control_tool(i32 %72, i32 %73, i8* %74) + store i32 %call37, i32* %i.addr, align 4 + %75 = load i32, i32* %i.addr, align 4 + call void @use_int(i32 %75) + %76 = load i64, i64* %allocator_handle.addr, align 8 + call void @omp_destroy_allocator(i64 %76) + %77 = load i64, i64* %allocator_handle.addr, align 8 + call void @omp_set_default_allocator(i64 %77) + %call38 = call i64 @omp_get_default_allocator() + store i64 %call38, i64* %allocator_handle.addr, align 8 + %78 = load i64, i64* %st.addr, align 8 + %79 = load i64, i64* %allocator_handle.addr, align 8 + %call39 = call i8* @omp_alloc(i64 %78, i64 %79) + store i8* %call39, i8** %vp.addr, align 8 + %80 = load i8*, i8** %vp.addr, align 8 + call void @use_voidptr(i8* %80) + %81 = load i8*, i8** %vp.addr, align 8 + %82 = load i64, i64* %allocator_handle.addr, align 8 + call void @omp_free(i8* %81, i64 %82) + %83 = load i64, i64* %st.addr, align 8 + %84 = load i64, i64* %allocator_handle.addr, align 8 + %call40 = call i8* @omp_alloc(i64 %83, i64 %84) + store i8* %call40, i8** %vp.addr, align 8 + %85 = load i8*, i8** %vp.addr, align 8 + call void @use_voidptr(i8* %85) + %86 = load i8*, i8** %vp.addr, align 8 + %87 = load i64, i64* %allocator_handle.addr, align 8 + call void @omp_free(i8* %86, i64 %87) + %88 = load i8*, i8** %s.addr, align 8 + call void @ompc_set_affinity_format(i8* %88) + %89 = load i8*, i8** %cp.addr, align 8 + %90 = load i64, i64* %st.addr, align 8 + %call41 = call i64 @ompc_get_affinity_format(i8* %89, i64 %90) + store i64 %call41, i64* %st.addr, align 8 + %91 = load i64, i64* %st.addr, align 8 + call void @use_sizet(i64 %91) + %92 = load i8*, i8** %s.addr, align 8 + call void @ompc_display_affinity(i8* %92) + %93 = load i8*, i8** %cp.addr, align 8 + %94 = load i64, i64* %st.addr, align 8 + %95 = load i8*, i8** %s.addr, align 8 + %call42 = call i64 @ompc_capture_affinity(i8* %93, i64 %94, i8* %95) + store i64 %call42, i64* %st.addr, align 8 + %96 = load i64, i64* %st.addr, align 8 + call void @use_sizet(i64 %96) + %97 = load i64, i64* %event_handle.addr, align 8 + call void @omp_fulfill_event(i64 %97) + %98 = load i32, i32* %pause_resource.addr, align 4 + %99 = load i32, i32* %i.addr, align 4 + %call43 = call i32 @omp_pause_resource(i32 %98, i32 %99) + store i32 %call43, i32* %i.addr, align 4 + %100 = load i32, i32* %i.addr, align 4 + call void @use_int(i32 %100) + %101 = load i32, i32* %pause_resource.addr, align 4 + %call44 = call i32 @omp_pause_resource_all(i32 %101) + store i32 %call44, i32* %i.addr, align 4 + %102 = load i32, i32* %i.addr, align 4 + call void @use_int(i32 %102) + %call45 = call i32 @omp_get_supported_active_levels() + store i32 %call45, i32* %i.addr, align 4 + %103 = load i32, i32* %i.addr, align 4 + call void @use_int(i32 %103) + ret void +} + +declare dso_local void @omp_set_num_threads(i32) + +declare dso_local void @omp_set_dynamic(i32) + +declare dso_local void @omp_set_nested(i32) + +declare dso_local void @omp_set_max_active_levels(i32) + +declare dso_local void @omp_set_schedule(i32, i32) + +declare dso_local i32 @omp_get_num_threads() + +declare dso_local void @use_int(i32) + +declare dso_local i32 @omp_get_dynamic() + +declare dso_local i32 @omp_get_nested() + +declare dso_local i32 @omp_get_max_threads() + +declare dso_local i32 @omp_get_thread_num() + +declare dso_local i32 @omp_get_num_procs() + +declare dso_local i32 @omp_in_parallel() + +declare dso_local i32 @omp_in_final() + +declare dso_local i32 @omp_get_active_level() + +declare dso_local i32 @omp_get_level() + +declare dso_local i32 @omp_get_ancestor_thread_num(i32) + +declare dso_local i32 @omp_get_team_size(i32) + +declare dso_local i32 @omp_get_thread_limit() + +declare dso_local i32 @omp_get_max_active_levels() + +declare dso_local void @omp_get_schedule(i32*, i32*) + +declare dso_local i32 @omp_get_max_task_priority() + +declare dso_local void @omp_init_lock(%struct.omp_lock_t*) + +declare dso_local void @omp_set_lock(%struct.omp_lock_t*) + +declare dso_local void @omp_unset_lock(%struct.omp_lock_t*) + +declare dso_local void @omp_destroy_lock(%struct.omp_lock_t*) + +declare dso_local i32 @omp_test_lock(%struct.omp_lock_t*) + +declare dso_local void @omp_init_nest_lock(%struct.omp_nest_lock_t*) + +declare dso_local void @omp_set_nest_lock(%struct.omp_nest_lock_t*) + +declare dso_local void @omp_unset_nest_lock(%struct.omp_nest_lock_t*) + +declare dso_local void @omp_destroy_nest_lock(%struct.omp_nest_lock_t*) + +declare dso_local i32 @omp_test_nest_lock(%struct.omp_nest_lock_t*) + +declare dso_local void @omp_init_lock_with_hint(%struct.omp_lock_t*, i32) + +declare dso_local void @omp_init_nest_lock_with_hint(%struct.omp_nest_lock_t*, i32) + +declare dso_local double @omp_get_wtime() + +declare dso_local void @use_double(double) + +declare dso_local double @omp_get_wtick() + +declare dso_local i32 @omp_get_default_device() + +declare dso_local void @omp_set_default_device(i32) + +declare dso_local i32 @omp_is_initial_device() + +declare dso_local i32 @omp_get_num_devices() + +declare dso_local i32 @omp_get_num_teams() + +declare dso_local i32 @omp_get_team_num() + +declare dso_local i32 @omp_get_cancellation() + +declare dso_local i32 @omp_get_initial_device() + +declare dso_local i8* @omp_target_alloc(i64, i32) + +declare dso_local void @use_voidptr(i8*) + +declare dso_local void @omp_target_free(i8*, i32) + +declare dso_local i32 @omp_target_is_present(i8*, i32) + +declare dso_local i32 @omp_target_memcpy(i8*, i8*, i64, i64, i64, i32, i32) + +declare dso_local i32 @omp_target_associate_ptr(i8*, i8*, i64, i64, i32) + +declare dso_local i32 @omp_target_disassociate_ptr(i8*, i32) + +declare dso_local i32 @omp_get_device_num() + +declare dso_local i32 @omp_get_proc_bind() + +declare dso_local i32 @omp_get_num_places() + +declare dso_local i32 @omp_get_place_num_procs(i32) + +declare dso_local void @omp_get_place_proc_ids(i32, i32*) + +declare dso_local i32 @omp_get_place_num() + +declare dso_local i32 @omp_get_partition_num_places() + +declare dso_local void @omp_get_partition_place_nums(i32*) + +declare dso_local i32 @omp_control_tool(i32, i32, i8*) + +declare dso_local void @omp_destroy_allocator(i64) + +declare dso_local void @omp_set_default_allocator(i64) + +declare dso_local i64 @omp_get_default_allocator() + +declare dso_local i8* @omp_alloc(i64, i64) + +declare dso_local void @omp_free(i8*, i64) + +declare dso_local void @ompc_set_affinity_format(i8*) + +declare dso_local i64 @ompc_get_affinity_format(i8*, i64) + +declare dso_local void @use_sizet(i64) + +declare dso_local void @ompc_display_affinity(i8*) + +declare dso_local i64 @ompc_capture_affinity(i8*, i64, i8*) + +declare dso_local void @omp_fulfill_event(i64) + +declare dso_local i32 @omp_pause_resource(i32, i32) + +declare dso_local i32 @omp_pause_resource_all(i32) + +declare dso_local i32 @omp_get_supported_active_levels() + +; CHECK: ; Function Attrs: nofree nosync nounwind writeonly +; CHECK-NEXT: declare dso_local void @omp_set_num_threads(i32) + +; CHECK: ; Function Attrs: nofree nosync nounwind writeonly +; CHECK-NEXT: declare dso_local void @omp_set_dynamic(i32) + +; CHECK: ; Function Attrs: nofree nosync nounwind writeonly +; CHECK-NEXT: declare dso_local void @omp_set_nested(i32) + +; CHECK: ; Function Attrs: nofree nosync nounwind writeonly +; CHECK-NEXT: declare dso_local void @omp_set_max_active_levels(i32) + +; CHECK: ; Function Attrs: nofree nosync nounwind writeonly +; CHECK-NEXT: declare dso_local void @omp_set_schedule(i32, i32) + +; CHECK: ; Function Attrs: nofree nosync nounwind readonly +; CHECK-NEXT: declare dso_local i32 @omp_get_num_threads() #1 + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local void @use_int(i32) + +; CHECK: ; Function Attrs: nofree nosync nounwind readonly +; CHECK-NEXT: declare dso_local i32 @omp_get_dynamic() #1 + +; CHECK: ; Function Attrs: nofree nosync nounwind readonly +; CHECK-NEXT: declare dso_local i32 @omp_get_nested() #1 + +; CHECK: ; Function Attrs: nofree nosync nounwind readonly +; CHECK-NEXT: declare dso_local i32 @omp_get_max_threads() #1 + +; CHECK: ; Function Attrs: nofree nosync nounwind readonly +; CHECK-NEXT: declare dso_local i32 @omp_get_thread_num() #1 + +; CHECK: ; Function Attrs: nofree nosync nounwind readonly +; CHECK-NEXT: declare dso_local i32 @omp_get_num_procs() #1 + +; CHECK: ; Function Attrs: nofree nosync nounwind readonly +; CHECK-NEXT: declare dso_local i32 @omp_in_parallel() #1 + +; CHECK: ; Function Attrs: nofree nosync nounwind readonly +; CHECK-NEXT: declare dso_local i32 @omp_in_final() #1 + +; CHECK: ; Function Attrs: nofree nosync nounwind readonly +; CHECK-NEXT: declare dso_local i32 @omp_get_active_level() #1 + +; CHECK: ; Function Attrs: nofree nosync nounwind readonly +; CHECK-NEXT: declare dso_local i32 @omp_get_level() #1 + +; CHECK: ; Function Attrs: nofree nosync nounwind readonly +; CHECK-NEXT: declare dso_local i32 @omp_get_ancestor_thread_num(i32) #1 + +; CHECK: ; Function Attrs: nofree nosync nounwind readonly +; CHECK-NEXT: declare dso_local i32 @omp_get_team_size(i32) #1 + +; CHECK: ; Function Attrs: nofree nosync nounwind readonly +; CHECK-NEXT: declare dso_local i32 @omp_get_thread_limit() #1 + +; CHECK: ; Function Attrs: nofree nosync nounwind readonly +; CHECK-NEXT: declare dso_local i32 @omp_get_max_active_levels() #1 + +; CHECK: ; Function Attrs: nofree nosync nounwind +; CHECK-NEXT: declare dso_local void @omp_get_schedule(i32* nocapture writeonly, i32* nocapture writeonly) #2 + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local i32 @omp_get_max_task_priority() + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local void @omp_init_lock(%struct.omp_lock_t*) + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local void @omp_set_lock(%struct.omp_lock_t*) + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local void @omp_unset_lock(%struct.omp_lock_t*) + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local void @omp_destroy_lock(%struct.omp_lock_t*) + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local i32 @omp_test_lock(%struct.omp_lock_t*) + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local void @omp_init_nest_lock(%struct.omp_nest_lock_t*) + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local void @omp_set_nest_lock(%struct.omp_nest_lock_t*) + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local void @omp_unset_nest_lock(%struct.omp_nest_lock_t*) + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local void @omp_destroy_nest_lock(%struct.omp_nest_lock_t*) + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local i32 @omp_test_nest_lock(%struct.omp_nest_lock_t*) + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local void @omp_init_lock_with_hint(%struct.omp_lock_t*, i32) + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local void @omp_init_nest_lock_with_hint(%struct.omp_nest_lock_t*, i32) + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local double @omp_get_wtime() + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local void @use_double(double) + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local double @omp_get_wtick() + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local i32 @omp_get_default_device() + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local void @omp_set_default_device(i32) + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local i32 @omp_is_initial_device() + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local i32 @omp_get_num_devices() + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local i32 @omp_get_num_teams() + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local i32 @omp_get_team_num() + +; CHECK: ; Function Attrs: nofree nosync nounwind readonly +; CHECK-NEXT: declare dso_local i32 @omp_get_cancellation() #1 + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local i32 @omp_get_initial_device() + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local i8* @omp_target_alloc(i64, i32) + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local void @use_voidptr(i8*) + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local void @omp_target_free(i8*, i32) + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local i32 @omp_target_is_present(i8*, i32) + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local i32 @omp_target_memcpy(i8*, i8*, i64, i64, i64, i32, i32) + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local i32 @omp_target_associate_ptr(i8*, i8*, i64, i64, i32) + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local i32 @omp_target_disassociate_ptr(i8*, i32) + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local i32 @omp_get_device_num() + +; CHECK: ; Function Attrs: nofree nosync nounwind readonly +; CHECK-NEXT: declare dso_local i32 @omp_get_proc_bind() #1 + +; CHECK: ; Function Attrs: nofree nosync nounwind readonly +; CHECK-NEXT: declare dso_local i32 @omp_get_num_places() #1 + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local i32 @omp_get_place_num_procs(i32) + +; CHECK: ; Function Attrs: nofree nosync nounwind +; CHECK-NEXT: declare dso_local void @omp_get_place_proc_ids(i32, i32* nocapture writeonly) #2 + +; CHECK: ; Function Attrs: nofree nosync nounwind readonly +; CHECK-NEXT: declare dso_local i32 @omp_get_place_num() #1 + +; CHECK: ; Function Attrs: nofree nosync nounwind readonly +; CHECK-NEXT: declare dso_local i32 @omp_get_partition_num_places() #1 + +; CHECK: ; Function Attrs: nofree nosync nounwind readonly +; CHECK-NEXT: declare dso_local void @omp_get_partition_place_nums(i32*) #1 + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local i32 @omp_control_tool(i32, i32, i8*) + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local void @omp_destroy_allocator(i64) + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local void @omp_set_default_allocator(i64) + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local i64 @omp_get_default_allocator() + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local i8* @omp_alloc(i64, i64) + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local void @omp_free(i8*, i64) + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local void @ompc_set_affinity_format(i8*) + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local i64 @ompc_get_affinity_format(i8*, i64) + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local void @use_sizet(i64) + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local void @ompc_display_affinity(i8*) + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local i64 @ompc_capture_affinity(i8*, i64, i8*) + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local void @omp_fulfill_event(i64) + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local i32 @omp_pause_resource(i32, i32) + +; CHECK-NOT: Function Attrs +; CHECK: declare dso_local i32 @omp_pause_resource_all(i32) + +; CHECK: ; Function Attrs: nofree nosync nounwind readonly +; CHECK-NEXT: declare dso_local i32 @omp_get_supported_active_levels() #1 + +; CHECK-NOT: Function Attrs +; CHECK: declare void @__kmpc_barrier(%struct.ident_t*, i32) + +; CHECK-NOT: Function Attrs +; CHECK: declare i32 @__kmpc_cancel_barrier(%struct.ident_t*, i32) + +; CHECK: ; Function Attrs: nofree nosync nounwind readonly +; CHECK-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #1 + +; CHECK-NOT: Function Attrs +; CHECK: declare void @__kmpc_fork_call(%struct.ident_t*, i32, %struct.ident_t*, ...)