diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -497,6 +497,16 @@ /// one we abort as the kernel is malformed. CallBase *KernelDeinitCB = nullptr; + /// A map from a function to its constant return value. If the value is + /// nullptr, the function cannot be folded. + SmallDenseMap FoldableFunctions; + + /// Flag to indicate if the associated function is a kernel entry. + bool IsKernelEntry = false; + + /// State to track what kernel entries can reach the associated function. + BooleanStateWithPtrSetVector ReachingKernelEntries; + /// Abstract State interface ///{ @@ -517,6 +527,7 @@ IsAtFixpoint = true; SPMDCompatibilityTracker.indicatePessimisticFixpoint(); ReachedUnknownParallelRegions.indicatePessimisticFixpoint(); + ReachingKernelEntries.indicatePessimisticFixpoint(); return ChangeStatus::CHANGED; } @@ -537,6 +548,11 @@ return false; if (ReachedUnknownParallelRegions != RHS.ReachedUnknownParallelRegions) return false; + if (ReachingKernelEntries != RHS.ReachingKernelEntries) + return false; + if (FoldableFunctions != RHS.FoldableFunctions) + return false; + return true; } @@ -566,6 +582,7 @@ SPMDCompatibilityTracker ^= KIS.SPMDCompatibilityTracker; ReachedKnownParallelRegions ^= KIS.ReachedKnownParallelRegions; ReachedUnknownParallelRegions ^= KIS.ReachedUnknownParallelRegions; + ReachingKernelEntries ^= KIS.ReachingKernelEntries; return *this; } @@ -2725,6 +2742,10 @@ if (!OMPInfoCache.Kernels.count(Fn)) return; + // Add itself to the reaching kernel and set IsKernelEntry. + ReachingKernelEntries.insert(Fn); + IsKernelEntry = true; + OMPInformationCache::RuntimeFunctionInfo &InitRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init]; OMPInformationCache::RuntimeFunctionInfo &DeinitRFI = @@ -2826,21 +2847,36 @@ /// Modify the IR based on the KernelInfoState as the fixpoint iteration is /// finished now. ChangeStatus manifest(Attributor &A) override { + ChangeStatus Change = ChangeStatus::UNCHANGED; + + // Fold all valid foldable functions + for (std::pair &P : FoldableFunctions) { + if (P.second == nullptr) + continue; + + A.changeValueAfterManifest(*P.first, *P.second); + A.deleteAfterManifest(*P.first); + + Change = ChangeStatus::CHANGED; + } + // If we are not looking at a kernel with __kmpc_target_init and // __kmpc_target_deinit call we cannot actually manifest the information. if (!KernelInitCB || !KernelDeinitCB) - return ChangeStatus::UNCHANGED; + return Change; // Known SPMD-mode kernels need no manifest changes. if (SPMDCompatibilityTracker.isKnown()) - return ChangeStatus::UNCHANGED; + return Change; // If we can we change the execution mode to SPMD-mode otherwise we build a // custom state machine. if (!changeToSPMDMode(A)) - buildCustomStateMachine(A); + Change = Change | buildCustomStateMachine(A); + else + Change = ChangeStatus::CHANGED; - return ChangeStatus::CHANGED; + return Change; } bool changeToSPMDMode(Attributor &A) { @@ -3203,6 +3239,13 @@ if (!A.checkForAllReadWriteInstructions(CheckRWInst, *this)) SPMDCompatibilityTracker.indicatePessimisticFixpoint(); + if (!IsKernelEntry) + updateReachingKernelEntries(A); + + // Update info regarding execution mode. + if (ReachingKernelEntries.isAssumed()) + updateSPMDFolding(A); + // Callback to check a call instruction. auto CheckCallInst = [&](Instruction &I) { auto &CB = cast(I); @@ -3210,6 +3253,19 @@ *this, IRPosition::callsite_function(CB), DepClassTy::OPTIONAL); if (CBAA.getState().isValidState()) getState() ^= CBAA.getState(); + + Function *Callee = CB.getCalledFunction(); + if (Callee) { + // We need to propagate information to the callee, but since the + // construction of AA always starts with kernel entries, we have to + // create AAKernelInfoFunction for all called functions. However, here + // the caller doesn't depend on the callee. + // TODO: We might want to change the dependence here later if we need + // information from callee to caller. + A.getOrCreateAAFor(IRPosition::function(*Callee), this, + DepClassTy::NONE); + } + return true; }; @@ -3219,6 +3275,81 @@ return StateBefore == getState() ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED; } + +private: + /// Update info regarding reaching kernels. + void updateReachingKernelEntries(Attributor &A) { + auto PredCallSite = [&](AbstractCallSite ACS) { + Function *Caller = ACS.getInstruction()->getFunction(); + + assert(Caller && "Caller is nullptr"); + + auto &CAA = + A.getOrCreateAAFor(IRPosition::function(*Caller)); + if (CAA.isValidState()) { + ReachingKernelEntries ^= CAA.ReachingKernelEntries; + return true; + } + + // We lost track of the caller of the associated function, any kernel + // could reach now. + ReachingKernelEntries.indicatePessimisticFixpoint(); + + return true; + }; + + bool AllCallSitesKnown; + if (!A.checkForAllCallSites(PredCallSite, *this, + true /* RequireAllCallSites */, + AllCallSitesKnown)) + ReachingKernelEntries.indicatePessimisticFixpoint(); + } + + /// Update information regarding folding SPMD mode function calls. + void updateSPMDFolding(Attributor &A) { + unsigned Count = 0; + + for (Kernel K : ReachingKernelEntries) { + auto &AA = A.getAAFor(*this, IRPosition::function(*K), + DepClassTy::REQUIRED); + + if (!AA.isValidState()) { + ReachingKernelEntries.indicatePessimisticFixpoint(); + break; + } + + if (AA.SPMDCompatibilityTracker.isAssumed()) + ++Count; + } + + // Assume reaching kernels are in a mixture of SPMD and non-SPMD mode. + // Update all function calls to __kmpc_is_spmd_exec_mode to nullptr. + Constant *C = nullptr; + + if (ReachingKernelEntries.isAssumed()) { + auto &Ctx = getAnchorValue().getContext(); + + if (Count == 0) { + // All reaching kernels are in non-SPMD mode. Update all function + // calls to __kmpc_is_spmd_exec_mode to 0. + C = ConstantInt::get(Type::getInt8Ty(Ctx), 0); + } else if (Count == ReachingKernelEntries.size()) { + // All reaching kernels are in SPMD mode. Update all function calls to + // __kmpc_is_spmd_exec_mode to 1. + C = ConstantInt::get(Type::getInt8Ty(Ctx), 1); + } + } + + auto &OMPInfoCache = static_cast(A.getInfoCache()); + OMPInformationCache::RuntimeFunctionInfo &IsSPMDExecModeRFI = + OMPInfoCache.RFIs[OMPRTL___kmpc_is_spmd_exec_mode]; + + for (std::pair &P : FoldableFunctions) { + CallBase *CB = P.first; + if (CB->getCalledFunction() == IsSPMDExecModeRFI.Declaration) + P.second = C; + } + } }; /// The call site kernel info abstract attribute, basically, what can we say diff --git a/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_foldable.ll b/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_foldable.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_foldable.ll @@ -0,0 +1,292 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals +; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s +target triple = "nvptx64" + +%struct.ident_t = type { i32, i32, i32, i32, i8* } + +@__omp_offloading_fd00_22cf4bf_foo_l4_exec_mode = weak constant i8 0 +@__omp_offloading_fd00_22cf4bf_foo_l9_exec_mode = weak constant i8 0 +@llvm.compiler.used = appending global [2 x i8*] [i8* @__omp_offloading_fd00_22cf4bf_foo_l4_exec_mode, i8* @__omp_offloading_fd00_22cf4bf_foo_l9_exec_mode], section "llvm.metadata" +@execution_param = internal local_unnamed_addr addrspace(3) global i32 undef, align 4 + +; Function Attrs: convergent noinline norecurse nounwind optnone +;. +; CHECK: @[[__OMP_OFFLOADING_FD00_22CF4BF_FOO_L4_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 +; CHECK: @[[__OMP_OFFLOADING_FD00_22CF4BF_FOO_L9_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 +; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [2 x i8*] [i8* @__omp_offloading_fd00_22cf4bf_foo_l4_exec_mode, i8* @__omp_offloading_fd00_22cf4bf_foo_l9_exec_mode], section "llvm.metadata" +; CHECK: @[[EXECUTION_PARAM:[a-zA-Z0-9_$"\\.-]+]] = internal local_unnamed_addr addrspace(3) global i32 undef, align 4 +;. +define weak void @__omp_offloading_fd00_22cf4bf_foo_l4() #0 { +; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_fd00_22cf4bf_foo_l4 +; CHECK-SAME: () #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* null) +; CHECK-NEXT: call void @__omp_outlined__(i32* null, i32* null) #[[ATTR4:[0-9]+]] +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true, i1 false) +; CHECK-NEXT: ret void +; +entry: + %0 = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false) + %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* null) + call void @__omp_outlined__(i32* null, i32* null) #4 + call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true, i1 false) + ret void +} + +; Function Attrs: convergent noinline norecurse nounwind optnone +define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__ +; CHECK-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* null, i32 0, i32 91, i32* null, i32* null, i32* null, i32* null, i32 1, i32 0) +; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* null, i32 0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__1 to i8*), i8* null, i8** null, i64 2) +; CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* null, i32 0) +; CHECK-NEXT: ret void +; +entry: + call void @__kmpc_for_static_init_4(%struct.ident_t* null, i32 0, i32 91, i32* null, i32* null, i32* null, i32* null, i32 1, i32 0) + call void @__kmpc_parallel_51(%struct.ident_t* null, i32 0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__1 to i8*), i8* null, i8** null, i64 2) + call void @__kmpc_for_static_fini(%struct.ident_t* null, i32 0) + ret void +} + +; Function Attrs: convergent noinline norecurse nounwind optnone +define internal void @__omp_outlined__1(i32* noalias %.global_tid., i32* noalias %.bound_tid., i64 %.previous.lb., i64 %.previous.ub.) #0 { +; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__1 +; CHECK-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* null, i32 0, i32 33, i32* null, i32* null, i32* null, i32* null, i32 1, i32 1) +; CHECK-NEXT: call void bitcast (void (...)* @bar to void ()*)() #[[ATTR5:[0-9]+]] +; CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* null, i32 0) +; CHECK-NEXT: ret void +; +entry: + call void @__kmpc_for_static_init_4(%struct.ident_t* null, i32 0, i32 33, i32* null, i32* null, i32* null, i32* null, i32 1, i32 1) + call void bitcast (void (...)* @bar to void ()*)() #5 + call void @__kmpc_for_static_fini(%struct.ident_t* null, i32 0) + ret void +} + +; Function Attrs: convergent +declare void @bar(...) #1 + +; Function Attrs: convergent noinline norecurse nounwind optnone +define weak void @__omp_offloading_fd00_22cf4bf_foo_l9() #0 { +; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_fd00_22cf4bf_foo_l9 +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* null) +; CHECK-NEXT: call void @__omp_outlined__2(i32* null, i32* null) #[[ATTR4]] +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true, i1 false) +; CHECK-NEXT: ret void +; +entry: + %0 = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false) + %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* null) + call void @__omp_outlined__2(i32* null, i32* null) #4 + call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true, i1 false) + ret void +} + +; Function Attrs: convergent noinline norecurse nounwind optnone +define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__2 +; CHECK-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* null, i32 0, i32 91, i32* null, i32* null, i32* null, i32* null, i32 1, i32 0) +; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* null, i32 0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__3 to i8*), i8* null, i8** null, i64 2) +; CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* null, i32 0) +; CHECK-NEXT: ret void +; +entry: + call void @__kmpc_for_static_init_4(%struct.ident_t* null, i32 0, i32 91, i32* null, i32* null, i32* null, i32* null, i32 1, i32 0) + call void @__kmpc_parallel_51(%struct.ident_t* null, i32 0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__3 to i8*), i8* null, i8** null, i64 2) + call void @__kmpc_for_static_fini(%struct.ident_t* null, i32 0) + ret void +} + +; Function Attrs: convergent noinline norecurse nounwind optnone +define internal void @__omp_outlined__3(i32* noalias %.global_tid., i32* noalias %.bound_tid., i64 %.previous.lb., i64 %.previous.ub.) #0 { +; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__3 +; CHECK-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* null, i32 0, i32 33, i32* null, i32* null, i32* null, i32* null, i32 1, i32 1) +; CHECK-NEXT: call void bitcast (void (...)* @bar to void ()*)() #[[ATTR5]] +; CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* null, i32 0) +; CHECK-NEXT: ret void +; +entry: + call void @__kmpc_for_static_init_4(%struct.ident_t* null, i32 0, i32 33, i32* null, i32* null, i32* null, i32* null, i32 1, i32 1) + call void bitcast (void (...)* @bar to void ()*)() #5 + call void @__kmpc_for_static_fini(%struct.ident_t* null, i32 0) + ret void +} + +; Function Attrs: convergent nounwind mustprogress +define internal void @__kmpc_for_static_init_4(%struct.ident_t* %loc, i32 %global_tid, i32 %schedtype, i32* nocapture writeonly %plastiter, i32* nocapture %plower, i32* nocapture %pupper, i32* nocapture %pstride, i32 %incr, i32 %chunk) #2 { +; CHECK-LABEL: define {{[^@]+}}@__kmpc_for_static_init_4 +; CHECK-SAME: (%struct.ident_t* [[LOC:%.*]], i32 [[GLOBAL_TID:%.*]], i32 [[SCHEDTYPE:%.*]], i32* nocapture writeonly [[PLASTITER:%.*]], i32* nocapture [[PLOWER:%.*]], i32* nocapture [[PUPPER:%.*]], i32* nocapture [[PSTRIDE:%.*]], i32 [[INCR:%.*]], i32 [[CHUNK:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL_I:%.*]] = call i32 @_Z21GetNumberOfOmpThreadsb(i1 zeroext true) #[[ATTR6:[0-9]+]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL_I]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[THEN:%.*]], label [[END:%.*]] +; CHECK: then: +; CHECK-NEXT: [[CALL_I_1:%.*]] = call i32 @_Z21GetNumberOfOmpThreadsb(i1 zeroext false) #[[ATTR6]] +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: ret void +; +entry: + %call = call zeroext i1 @_Z13checkSPMDModeP5ident(%struct.ident_t* %loc) #6 + %call.i = call i32 @_Z21GetNumberOfOmpThreadsb(i1 zeroext %call) #6 + %cmp = icmp ne i32 %call.i, 0 + br i1 %cmp, label %then, label %end + +then: + %call.i.1 = call i32 @_Z21GetNumberOfOmpThreadsb(i1 zeroext false) #6 + br label %end + +end: + ret void +} + +declare i32 @_Z21GetNumberOfOmpThreadsb(i1 zeroext) + +; Function Attrs: convergent nounwind mustprogress +define internal zeroext i1 @_Z13checkSPMDModeP5ident(%struct.ident_t* readonly %loc) local_unnamed_addr #2 { +entry: + %call10 = call signext i8 @__kmpc_is_spmd_exec_mode() #6 + %tobool11 = icmp ne i8 %call10, 0 + ret i1 %tobool11 +} + +; Function Attrs: convergent nofree norecurse nosync nounwind readonly willreturn mustprogress +define internal signext i8 @__kmpc_is_spmd_exec_mode() local_unnamed_addr #3 { +entry: + %0 = load i32, i32* addrspacecast (i32 addrspace(3)* @execution_param to i32*), align 4, !tbaa !12 + %1 = trunc i32 %0 to i8 + %2 = and i8 %1, 1 + %3 = xor i8 %2, 1 + ret i8 %3 +} + +; Function Attrs: convergent +declare void @__kmpc_for_static_fini(%struct.ident_t* nocapture, i32) #1 + +; Function Attrs: convergent +declare i32 @__kmpc_target_init(%struct.ident_t*, i1 zeroext, i1 zeroext, i1 zeroext) #1 + +; Function Attrs: convergent +declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i1 zeroext, i1 zeroext) #1 + +; Function Attrs: convergent nounwind mustprogress +define internal i32 @__kmpc_global_thread_num(%struct.ident_t* nocapture readnone %loc) #2 { +; CHECK-LABEL: define {{[^@]+}}@__kmpc_global_thread_num +; CHECK-SAME: (%struct.ident_t* nocapture readnone [[LOC:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i32 0 +; +entry: + %call = call i32 @_Z14GetOmpThreadIdv() #6 + ret i32 %call +} + +; Function Attrs: convergent nounwind mustprogress +define internal i32 @_Z14GetOmpThreadIdv() local_unnamed_addr #2 { +entry: + %call4 = call signext i8 @__kmpc_is_spmd_exec_mode() #6 + %tobool5.not = icmp eq i8 %call4, 0 + br i1 %tobool5.not, label %if.else7, label %cleanup + +if.else7: ; preds = %entry + br label %cleanup + +cleanup: ; preds = %if.else7, %entry + %retval.0 = phi i32 [ 0, %entry ], [ 1, %if.else7 ] + ret i32 %retval.0 +} + +; Function Attrs: convergent nounwind mustprogress +define internal void @__kmpc_parallel_51(%struct.ident_t* %ident, i32 %global_tid, i32 %if_expr, i32 %num_threads, i32 %proc_bind, i8* %fn, i8* %wrapper_fn, i8** %args, i64 %nargs) #2 { +; CHECK-LABEL: define {{[^@]+}}@__kmpc_parallel_51 +; CHECK-SAME: (%struct.ident_t* [[IDENT:%.*]], i32 [[GLOBAL_TID:%.*]], i32 [[IF_EXPR:%.*]], i32 [[NUM_THREADS:%.*]], i32 [[PROC_BIND:%.*]], i8* [[FN:%.*]], i8* [[WRAPPER_FN:%.*]], i8** [[ARGS:%.*]], i64 [[NARGS:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: br label [[IF_THEN6:%.*]] +; CHECK: if.then6: +; CHECK-NEXT: br label [[CLEANUP:%.*]] +; CHECK: cleanup: +; CHECK-NEXT: ret void +; +entry: + %call1 = call signext i8 @__kmpc_is_spmd_exec_mode() #6 + %conv2 = sext i8 %call1 to i32 + %cmp = icmp sgt i32 %conv2, 0 + br i1 %cmp, label %if.then, label %cleanup + +if.then: ; preds = %entry + %call4 = call signext i8 @__kmpc_is_spmd_exec_mode() #6 + %tobool5.not = icmp eq i8 %call4, 0 + br i1 %tobool5.not, label %cleanup, label %if.then6 + +if.then6: ; preds = %if.then + br label %cleanup + +cleanup: ; preds = %if.then6, %if.then, %entry + ret void +} + +attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_75" "target-features"="+ptx72,+sm_75" } +attributes #1 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_75" "target-features"="+ptx72,+sm_75" } +attributes #2 = { convergent nounwind mustprogress "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_75" "target-features"="+ptx61,+sm_75" } +attributes #3 = { convergent nofree norecurse nosync nounwind readonly willreturn mustprogress "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_75" "target-features"="+ptx61,+sm_75" } +attributes #4 = { nounwind } +attributes #5 = { convergent } +attributes #6 = { convergent nounwind } + +!llvm.module.flags = !{!0, !1, !2, !3, !4, !5} +!omp_offload.info = !{!6, !7} +!nvvm.annotations = !{!8, !9} +!llvm.ident = !{!10, !11, !10, !10, !10, !10, !10, !10, !10, !10, !10, !10, !10, !10, !10, !10} + +!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 11, i32 2]} +!1 = !{i32 1, !"wchar_size", i32 4} +!2 = !{i32 7, !"openmp", i32 50} +!3 = !{i32 7, !"openmp-device", i32 50} +!4 = !{i32 7, !"PIC Level", i32 2} +!5 = !{i32 7, !"frame-pointer", i32 2} +!6 = !{i32 0, i32 64768, i32 36500671, !"foo", i32 9, i32 1} +!7 = !{i32 0, i32 64768, i32 36500671, !"foo", i32 4, i32 0} +!8 = !{void ()* @__omp_offloading_fd00_22cf4bf_foo_l4, !"kernel", i32 1} +!9 = !{void ()* @__omp_offloading_fd00_22cf4bf_foo_l9, !"kernel", i32 1} +!10 = !{!"clang version 13.0.0"} +!11 = !{!"clang version 3.8.0 (tags/RELEASE_380/final)"} +!12 = !{!13, !13, i64 0} +!13 = !{!"int", !14, i64 0} +!14 = !{!"omnipotent char", !15, i64 0} +!15 = !{!"Simple C++ TBAA"} +;. +; CHECK: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind optnone "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_75" "target-features"="+ptx72,+sm_75" } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_75" "target-features"="+ptx72,+sm_75" } +; CHECK: attributes #[[ATTR2]] = { convergent nounwind mustprogress "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_75" "target-features"="+ptx61,+sm_75" } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_75" "target-features"="+ptx72,+sm_75" } +; CHECK: attributes #[[ATTR4]] = { nounwind } +; CHECK: attributes #[[ATTR5]] = { convergent } +; CHECK: attributes #[[ATTR6]] = { convergent nounwind } +;. +; CHECK: [[META0:![0-9]+]] = !{i32 2, !"SDK Version", [2 x i32] [i32 11, i32 2]} +; CHECK: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; CHECK: [[META2:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +; CHECK: [[META4:![0-9]+]] = !{i32 7, !"PIC Level", i32 2} +; CHECK: [[META5:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} +; CHECK: [[META6:![0-9]+]] = !{i32 0, i32 64768, i32 36500671, !"foo", i32 9, i32 1} +; CHECK: [[META7:![0-9]+]] = !{i32 0, i32 64768, i32 36500671, !"foo", i32 4, i32 0} +; CHECK: [[META8:![0-9]+]] = !{void ()* @__omp_offloading_fd00_22cf4bf_foo_l4, !"kernel", i32 1} +; CHECK: [[META9:![0-9]+]] = !{void ()* @__omp_offloading_fd00_22cf4bf_foo_l9, !"kernel", i32 1} +; CHECK: [[META10:![0-9]+]] = !{!"clang version 13.0.0"} +; CHECK: [[META11:![0-9]+]] = !{!"clang version 3.8.0 (tags/RELEASE_380/final)"} +;. diff --git a/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_unfoldable.ll b/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_unfoldable.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/OpenMP/is_spmd_exec_mode_unfoldable.ll @@ -0,0 +1,289 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals +; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s +target triple = "nvptx64" + +%struct.ident_t = type { i32, i32, i32, i32, i8* } + +@__omp_offloading_fd00_22cf4bf_foo_l4_exec_mode = weak constant i8 1 +@__omp_offloading_fd00_22cf4bf_foo_l9_exec_mode = weak constant i8 0 +@llvm.compiler.used = appending global [2 x i8*] [i8* @__omp_offloading_fd00_22cf4bf_foo_l4_exec_mode, i8* @__omp_offloading_fd00_22cf4bf_foo_l9_exec_mode], section "llvm.metadata" +@execution_param = internal local_unnamed_addr addrspace(3) global i32 undef, align 4 + +; Function Attrs: convergent noinline norecurse nounwind optnone +;. +; CHECK: @[[__OMP_OFFLOADING_FD00_22CF4BF_FOO_L4_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1 +; CHECK: @[[__OMP_OFFLOADING_FD00_22CF4BF_FOO_L9_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0 +; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [2 x i8*] [i8* @__omp_offloading_fd00_22cf4bf_foo_l4_exec_mode, i8* @__omp_offloading_fd00_22cf4bf_foo_l9_exec_mode], section "llvm.metadata" +; CHECK: @[[EXECUTION_PARAM:[a-zA-Z0-9_$"\\.-]+]] = internal local_unnamed_addr addrspace(3) global i32 undef, align 4 +;. +define weak void @__omp_offloading_fd00_22cf4bf_foo_l4() #0 { +; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_fd00_22cf4bf_foo_l4 +; CHECK-SAME: () #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 true, i1 true) +; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; CHECK: user_code.entry: +; CHECK-NEXT: call void bitcast (void (...)* @bar to void ()*)() #[[ATTR5:[0-9]+]] +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 true) +; CHECK-NEXT: ret void +; CHECK: worker.exit: +; CHECK-NEXT: ret void +; +entry: + %0 = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 true, i1 true) + %exec_user_code = icmp eq i32 %0, -1 + br i1 %exec_user_code, label %user_code.entry, label %worker.exit + +user_code.entry: ; preds = %entry + call void bitcast (void (...)* @bar to void ()*)() #4 + call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 true) + ret void + +worker.exit: ; preds = %entry + ret void +} + +; Function Attrs: convergent +declare void @bar(...) #1 + +; Function Attrs: convergent noinline norecurse nounwind optnone +define weak void @__omp_offloading_fd00_22cf4bf_foo_l9() #0 { +; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_fd00_22cf4bf_foo_l9 +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false) +; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +; CHECK: user_code.entry: +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* null) +; CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +; CHECK-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR6:[0-9]+]] +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true, i1 false) +; CHECK-NEXT: ret void +; CHECK: worker.exit: +; CHECK-NEXT: ret void +; +entry: + %.zero.addr = alloca i32, align 4 + %.threadid_temp. = alloca i32, align 4 + store i32 0, i32* %.zero.addr, align 4 + %0 = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 true, i1 false, i1 false) + %exec_user_code = icmp eq i32 %0, -1 + br i1 %exec_user_code, label %user_code.entry, label %worker.exit + +user_code.entry: ; preds = %entry + %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* null) + store i32 %1, i32* %.threadid_temp., align 4 + call void @__omp_outlined__(i32* %.threadid_temp., i32* %.zero.addr) #5 + call void @__kmpc_target_deinit(%struct.ident_t* null, i1 true, i1 false) + ret void + +worker.exit: ; preds = %entry + ret void +} + +; Function Attrs: convergent noinline norecurse nounwind optnone +define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 { +; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__ +; CHECK-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* null, i32 0, i32 91, i32* null, i32* null, i32* null, i32* null, i32 1, i32 0) +; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* null, i32 0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__1 to i8*), i8* null, i8** null, i64 2) +; CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* null, i32 0) +; CHECK-NEXT: ret void +; +entry: + call void @__kmpc_for_static_init_4(%struct.ident_t* null, i32 0, i32 91, i32* null, i32* null, i32* null, i32* null, i32 1, i32 0) + call void @__kmpc_parallel_51(%struct.ident_t* null, i32 0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64)* @__omp_outlined__1 to i8*), i8* null, i8** null, i64 2) + call void @__kmpc_for_static_fini(%struct.ident_t* null, i32 0) + ret void +} + +; Function Attrs: nounwind readnone +declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x() #2 + +; Function Attrs: convergent noinline norecurse nounwind optnone +define internal void @__omp_outlined__1(i32* noalias %.global_tid., i32* noalias %.bound_tid., i64 %.previous.lb., i64 %.previous.ub.) #0 { +; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__1 +; CHECK-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* null, i32 0, i32 33, i32* null, i32* null, i32* null, i32* null, i32 1, i32 1) +; CHECK-NEXT: call void bitcast (void (...)* @bar to void ()*)() #[[ATTR5]] +; CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* null, i32 0) +; CHECK-NEXT: ret void +; +entry: + call void @__kmpc_for_static_init_4(%struct.ident_t* null, i32 0, i32 33, i32* null, i32* null, i32* null, i32* null, i32 1, i32 1) + call void bitcast (void (...)* @bar to void ()*)() #4 + call void @__kmpc_for_static_fini(%struct.ident_t* null, i32 0) + ret void +} + +; Function Attrs: convergent nounwind mustprogress +define internal void @__kmpc_for_static_init_4(%struct.ident_t* %loc, i32 %global_tid, i32 %schedtype, i32* nocapture writeonly %plastiter, i32* nocapture %plower, i32* nocapture %pupper, i32* nocapture %pstride, i32 %incr, i32 %chunk) #3 { +; CHECK-LABEL: define {{[^@]+}}@__kmpc_for_static_init_4 +; CHECK-SAME: (%struct.ident_t* [[LOC:%.*]], i32 [[GLOBAL_TID:%.*]], i32 [[SCHEDTYPE:%.*]], i32* nocapture writeonly [[PLASTITER:%.*]], i32* nocapture [[PLOWER:%.*]], i32* nocapture [[PUPPER:%.*]], i32* nocapture [[PSTRIDE:%.*]], i32 [[INCR:%.*]], i32 [[CHUNK:%.*]]) #[[ATTR3:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL_I:%.*]] = call i32 @_Z21GetNumberOfOmpThreadsb(i1 zeroext true) +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[CALL_I]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN_I:%.*]], label [[SW_BB1_I:%.*]] +; CHECK: if.then.i: +; CHECK-NEXT: br label [[SW_BB1_I]] +; CHECK: sw.bb1.i: +; CHECK-NEXT: ret void +; +entry: + %call = call zeroext i1 @_Z13checkSPMDModeP5ident(%struct.ident_t* %loc) + %call.i = call i32 @_Z21GetNumberOfOmpThreadsb(i1 zeroext %call) + %cmp = icmp ne i32 %call.i, 0 + br i1 %cmp, label %if.then.i, label %sw.bb1.i + +if.then.i: ; preds = %entry + br label %sw.bb1.i + +sw.bb1.i: ; preds = %if.then.i, %entry + ret void +} + +declare i32 @_Z21GetNumberOfOmpThreadsb(i1 zeroext) + +; Function Attrs: nounwind readnone +define internal zeroext i1 @_Z13checkSPMDModeP5ident(%struct.ident_t* readonly %loc) local_unnamed_addr #2 { +entry: + %call10 = call signext i8 @__kmpc_is_spmd_exec_mode() #6 + %tobool11 = icmp ne i8 %call10, 0 + ret i1 %tobool11 +} + +; Function Attrs: convergent nounwind mustprogress +define internal signext i8 @__kmpc_is_spmd_exec_mode() local_unnamed_addr #3 { +entry: + %0 = load i32, i32* addrspacecast (i32 addrspace(3)* @execution_param to i32*), align 4, !tbaa !12 + %1 = trunc i32 %0 to i8 + %2 = and i8 %1, 1 + %3 = xor i8 %2, 1 + ret i8 %3 +} + +; Function Attrs: convergent +declare void @__kmpc_for_static_fini(%struct.ident_t* nocapture, i32) #1 + +; Function Attrs: convergent +declare i32 @__kmpc_target_init(%struct.ident_t*, i1 zeroext, i1 zeroext, i1 zeroext) #1 + +; Function Attrs: convergent +declare void @__kmpc_target_deinit(%struct.ident_t* nocapture readnone, i1 zeroext, i1 zeroext) #1 + +; Function Attrs: nounwind readnone +define internal i32 @__kmpc_global_thread_num(%struct.ident_t* nocapture readnone %loc) #2 { +; CHECK-LABEL: define {{[^@]+}}@__kmpc_global_thread_num +; CHECK-SAME: (%struct.ident_t* nocapture readnone [[LOC:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i32 0 +; +entry: + %call = call i32 @_Z14GetOmpThreadIdv() #6 + ret i32 %call +} + +; Function Attrs: nounwind readnone +define internal i32 @_Z14GetOmpThreadIdv() local_unnamed_addr #2 { +entry: + %call4 = call signext i8 @__kmpc_is_spmd_exec_mode() #6 + %tobool5.not = icmp eq i8 %call4, 0 + br i1 %tobool5.not, label %if.else7, label %cleanup + +if.else7: ; preds = %entry + br label %cleanup + +cleanup: ; preds = %if.else7, %entry + %retval.0 = phi i32 [ 0, %entry ], [ 1, %if.else7 ] + ret i32 %retval.0 +} + +; Function Attrs: nounwind readnone +define internal void @__kmpc_parallel_51(%struct.ident_t* %ident, i32 %global_tid, i32 %if_expr, i32 %num_threads, i32 %proc_bind, i8* %fn, i8* %wrapper_fn, i8** %args, i64 %nargs) #2 { +; CHECK-LABEL: define {{[^@]+}}@__kmpc_parallel_51 +; CHECK-SAME: (%struct.ident_t* [[IDENT:%.*]], i32 [[GLOBAL_TID:%.*]], i32 [[IF_EXPR:%.*]], i32 [[NUM_THREADS:%.*]], i32 [[PROC_BIND:%.*]], i8* [[FN:%.*]], i8* [[WRAPPER_FN:%.*]], i8** [[ARGS:%.*]], i64 [[NARGS:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: br label [[IF_THEN6:%.*]] +; CHECK: if.then6: +; CHECK-NEXT: br label [[CLEANUP:%.*]] +; CHECK: cleanup: +; CHECK-NEXT: ret void +; +entry: + %call1 = call signext i8 @__kmpc_is_spmd_exec_mode() #6 + %conv2 = sext i8 %call1 to i32 + %cmp = icmp sgt i32 %conv2, 0 + br i1 %cmp, label %if.then, label %cleanup + +if.then: ; preds = %entry + %call4 = call signext i8 @__kmpc_is_spmd_exec_mode() #6 + %tobool5.not = icmp eq i8 %call4, 0 + br i1 %tobool5.not, label %cleanup, label %if.then6 + +if.then6: ; preds = %if.then + br label %cleanup + +cleanup: ; preds = %if.then6, %if.then, %entry + ret void +} + +attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_75" "target-features"="+ptx72,+sm_75" } +attributes #1 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_75" "target-features"="+ptx72,+sm_75" } +attributes #2 = { nounwind readnone } +attributes #3 = { convergent nounwind mustprogress "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_75" "target-features"="+ptx61,+sm_75" } +attributes #4 = { convergent } +attributes #5 = { nounwind } +attributes #6 = { convergent nofree norecurse nosync nounwind readnone willreturn mustprogress "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_75" "target-features"="+ptx61,+sm_75" } + +!llvm.module.flags = !{!0, !1, !2, !3, !4, !5} +!omp_offload.info = !{!6, !7} +!nvvm.annotations = !{!8, !9} +!llvm.ident = !{!10, !11, !10, !10, !10, !10, !10, !10, !10, !10, !10, !10, !10, !10, !10, !10} + +!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 11, i32 2]} +!1 = !{i32 1, !"wchar_size", i32 4} +!2 = !{i32 7, !"openmp", i32 50} +!3 = !{i32 7, !"openmp-device", i32 50} +!4 = !{i32 7, !"PIC Level", i32 2} +!5 = !{i32 7, !"frame-pointer", i32 2} +!6 = !{i32 0, i32 64768, i32 36500671, !"foo", i32 9, i32 1} +!7 = !{i32 0, i32 64768, i32 36500671, !"foo", i32 4, i32 0} +!8 = !{void ()* @__omp_offloading_fd00_22cf4bf_foo_l4, !"kernel", i32 1} +!9 = !{void ()* @__omp_offloading_fd00_22cf4bf_foo_l9, !"kernel", i32 1} +!10 = !{!"clang version 13.0.0"} +!11 = !{!"clang version 3.8.0 (tags/RELEASE_380/final)"} +!12 = !{!13, !13, i64 0} +!13 = !{!"int", !14, i64 0} +!14 = !{!"omnipotent char", !15, i64 0} +!15 = !{!"Simple C++ TBAA"} +;. +; CHECK: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind optnone "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_75" "target-features"="+ptx72,+sm_75" } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_75" "target-features"="+ptx72,+sm_75" } +; CHECK: attributes #[[ATTR2]] = { nounwind readnone } +; CHECK: attributes #[[ATTR3]] = { convergent nounwind mustprogress "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_75" "target-features"="+ptx61,+sm_75" } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_75" "target-features"="+ptx72,+sm_75" } +; CHECK: attributes #[[ATTR5]] = { convergent } +; CHECK: attributes #[[ATTR6]] = { nounwind } +;. +; CHECK: [[META0:![0-9]+]] = !{i32 2, !"SDK Version", [2 x i32] [i32 11, i32 2]} +; CHECK: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; CHECK: [[META2:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +; CHECK: [[META4:![0-9]+]] = !{i32 7, !"PIC Level", i32 2} +; CHECK: [[META5:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} +; CHECK: [[META6:![0-9]+]] = !{i32 0, i32 64768, i32 36500671, !"foo", i32 9, i32 1} +; CHECK: [[META7:![0-9]+]] = !{i32 0, i32 64768, i32 36500671, !"foo", i32 4, i32 0} +; CHECK: [[META8:![0-9]+]] = !{void ()* @__omp_offloading_fd00_22cf4bf_foo_l4, !"kernel", i32 1} +; CHECK: [[META9:![0-9]+]] = !{void ()* @__omp_offloading_fd00_22cf4bf_foo_l9, !"kernel", i32 1} +; CHECK: [[META10:![0-9]+]] = !{!"clang version 13.0.0"} +; CHECK: [[META11:![0-9]+]] = !{!"clang version 3.8.0 (tags/RELEASE_380/final)"} +;.