diff --git a/llvm/include/llvm/IR/Assumptions.h b/llvm/include/llvm/IR/Assumptions.h --- a/llvm/include/llvm/IR/Assumptions.h +++ b/llvm/include/llvm/IR/Assumptions.h @@ -21,6 +21,7 @@ namespace llvm { class Function; +class CallBase; /// The key we use for assumption attributes. constexpr StringRef AssumptionAttrKey = "llvm.assume"; @@ -45,6 +46,10 @@ /// Return true if \p F has the assumption \p AssumptionStr attached. bool hasAssumption(Function &F, const KnownAssumptionString &AssumptionStr); +/// Return true if \p CB or the callee has the assumption \p AssumptionStr +/// attached. +bool hasAssumption(CallBase &CB, const KnownAssumptionString &AssumptionStr); + } // namespace llvm #endif diff --git a/llvm/lib/IR/Assumptions.cpp b/llvm/lib/IR/Assumptions.cpp --- a/llvm/lib/IR/Assumptions.cpp +++ b/llvm/lib/IR/Assumptions.cpp @@ -11,12 +11,13 @@ #include "llvm/IR/Assumptions.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" using namespace llvm; -bool llvm::hasAssumption(Function &F, - const KnownAssumptionString &AssumptionStr) { - const Attribute &A = F.getFnAttribute(AssumptionAttrKey); +namespace { +bool hasAssumption(const Attribute &A, + const KnownAssumptionString &AssumptionStr) { if (!A.isValid()) return false; assert(A.isStringAttribute() && "Expected a string attribute!"); @@ -28,6 +29,23 @@ return Assumption == AssumptionStr; }); } +} // namespace + +bool llvm::hasAssumption(Function &F, + const KnownAssumptionString &AssumptionStr) { + const Attribute &A = F.getFnAttribute(AssumptionAttrKey); + return ::hasAssumption(A, AssumptionStr); +} + +bool llvm::hasAssumption(CallBase &CB, + const KnownAssumptionString &AssumptionStr) { + if (Function *F = CB.getCalledFunction()) + if (hasAssumption(*F, AssumptionStr)) + return true; + + const Attribute &A = CB.getFnAttr(AssumptionAttrKey); + return ::hasAssumption(A, AssumptionStr); +} StringSet<> llvm::KnownAssumptionStrings({ "omp_no_openmp", // OpenMP 5.1 diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -3705,13 +3705,15 @@ Function *Callee = getAssociatedFunction(); // Helper to lookup an assumption string. - auto HasAssumption = [](Function *Fn, StringRef AssumptionStr) { - return Fn && hasAssumption(*Fn, AssumptionStr); + auto HasAssumption = [](CallBase *CB, StringRef AssumptionStr) { + return CB && hasAssumption(*CB, AssumptionStr); }; // Check for SPMD-mode assumptions. - if (HasAssumption(Callee, "ompx_spmd_amenable")) + if (HasAssumption(&CB, "ompx_spmd_amenable")) { SPMDCompatibilityTracker.indicateOptimisticFixpoint(); + indicateOptimisticFixpoint(); + } // First weed out calls we do not care about, that is readonly/readnone // calls, intrinsics, and "no_openmp" calls. Neither of these can reach a @@ -3733,8 +3735,8 @@ // Unknown callees might contain parallel regions, except if they have // an appropriate assumption attached. - if (!(HasAssumption(Callee, "omp_no_openmp") || - HasAssumption(Callee, "omp_no_parallelism"))) + if (!(HasAssumption(&CB, "omp_no_openmp") || + HasAssumption(&CB, "omp_no_parallelism"))) ReachedUnknownParallelRegions.insert(&CB); // If SPMDCompatibilityTracker is not fixed, we need to give up on the diff --git a/llvm/test/Transforms/OpenMP/spmdization_assumes.ll b/llvm/test/Transforms/OpenMP/spmdization_assumes.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/OpenMP/spmdization_assumes.ll @@ -0,0 +1,168 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals +; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s + +; void foo(double x) { +; #pragma omp target map(from:x) +; { +; x = sin(M_PI); +; #pragma omp parallel +; { } +; } +; } + +target triple = "nvptx64" + +%struct.ident_t = type { i32, i32, i32, i32, i8* } + +@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 +@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8 +@__omp_offloading_fd02_404433c2_main_l5_exec_mode = weak constant i8 1 +@llvm.compiler.used = appending global [1 x i8*] [i8* @__omp_offloading_fd02_404433c2_main_l5_exec_mode], section "llvm.metadata" + +; Function Attrs: alwaysinline convergent norecurse nounwind +;. +; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c" +; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 +; CHECK: @[[__OMP_OFFLOADING_FD02_404433C2_MAIN_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 2 +; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x i8*] [i8* @__omp_offloading_fd02_404433c2_main_l5_exec_mode], section "llvm.metadata" +;. +define weak void @__omp_offloading_fd02_404433c2_main_l5(double* nonnull align 8 dereferenceable(8) %x) local_unnamed_addr #0 { +; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_fd02_404433c2_main_l5 +; CHECK-SAME: (double* nonnull align 8 dereferenceable(8) [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @[[GLOB1]], i1 true, i1 false, i1 false) #[[ATTR3:[0-9]+]] +; CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +; CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[COMMON_RET:%.*]] +; CHECK: common.ret: +; CHECK-NEXT: ret void +; CHECK: user_code.entry: +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @[[GLOB1]]) #[[ATTR3]] +; CHECK-NEXT: [[CALL_I:%.*]] = call double @__nv_sin(double 0x400921FB54442D18) #[[ATTR7:[0-9]+]] +; CHECK-NEXT: br label [[REGION_CHECK_TID:%.*]] +; CHECK: region.check.tid: +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]] +; CHECK: region.guarded: +; CHECK-NEXT: store double [[CALL_I]], double* [[X]], align 8, !tbaa [[TBAA8:![0-9]+]] +; CHECK-NEXT: br label [[REGION_GUARDED_END:%.*]] +; CHECK: region.guarded.end: +; CHECK-NEXT: br label [[REGION_BARRIER]] +; CHECK: region.barrier: +; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +; CHECK-NEXT: br label [[REGION_EXIT:%.*]] +; CHECK: region.exit: +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [0 x i8*], [0 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +; CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** nonnull [[TMP4]], i64 0) #[[ATTR3]] +; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* nonnull @[[GLOB1]], i1 true, i1 false) #[[ATTR3]] +; CHECK-NEXT: br label [[COMMON_RET]] +; +entry: + %captured_vars_addrs = alloca [0 x i8*], align 8 + %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i1 false, i1 true, i1 true) #3 + %exec_user_code = icmp eq i32 %0, -1 + br i1 %exec_user_code, label %user_code.entry, label %common.ret + +common.ret: ; preds = %entry, %user_code.entry + ret void + +user_code.entry: ; preds = %entry + %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @1) + %call.i = call double @__nv_sin(double 0x400921FB54442D18) #6 + store double %call.i, double* %x, align 8, !tbaa !8 + %2 = getelementptr inbounds [0 x i8*], [0 x i8*]* %captured_vars_addrs, i64 0, i64 0 + call void @__kmpc_parallel_51(%struct.ident_t* nonnull @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** nonnull %2, i64 0) #3 + call void @__kmpc_target_deinit(%struct.ident_t* nonnull @1, i1 false, i1 true) #3 + br label %common.ret +} + +declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1) local_unnamed_addr + +; Function Attrs: alwaysinline mustprogress nofree norecurse nosync nounwind readnone willreturn +define internal void @__omp_outlined__(i32* noalias nocapture %.global_tid., i32* noalias nocapture %.bound_tid.) #1 { +; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__ +; CHECK-SAME: (i32* noalias nocapture [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret void +; +entry: + ret void +} + +; Function Attrs: norecurse nounwind +define internal void @__omp_outlined___wrapper(i16 zeroext %0, i32 %1) #2 { +; CHECK-LABEL: define {{[^@]+}}@__omp_outlined___wrapper +; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +; CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** nonnull [[GLOBAL_ARGS]]) #[[ATTR3]] +; CHECK-NEXT: ret void +; +entry: + %global_args = alloca i8**, align 8 + call void @__kmpc_get_shared_variables(i8*** nonnull %global_args) #3 + ret void +} + +declare void @__kmpc_get_shared_variables(i8***) local_unnamed_addr + +; Function Attrs: nounwind +declare i32 @__kmpc_global_thread_num(%struct.ident_t*) local_unnamed_addr #3 + +; Function Attrs: alwaysinline +declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) local_unnamed_addr #4 + +declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1) local_unnamed_addr + +; Function Attrs: convergent +declare double @__nv_sin(double) local_unnamed_addr #5 + +attributes #0 = { alwaysinline convergent norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #1 = { alwaysinline mustprogress nofree norecurse nosync nounwind readnone willreturn "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #2 = { norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #3 = { nounwind } +attributes #4 = { alwaysinline } +attributes #5 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #6 = { convergent nounwind "llvm.assume"="ompx_spmd_amenable" } + +!omp_offload.info = !{!0} +!nvvm.annotations = !{!1} +!llvm.module.flags = !{!2, !3, !4, !5, !6} +!llvm.ident = !{!7} + +!0 = !{i32 0, i32 64770, i32 1078211522, !"main", i32 5, i32 0} +!1 = !{void (double*)* @__omp_offloading_fd02_404433c2_main_l5, !"kernel", i32 1} +!2 = !{i32 1, !"wchar_size", i32 4} +!3 = !{i32 7, !"openmp", i32 50} +!4 = !{i32 7, !"openmp-device", i32 50} +!5 = !{i32 7, !"PIC Level", i32 2} +!6 = !{i32 7, !"frame-pointer", i32 2} +!7 = !{!"clang version 14.0.0"} +!8 = !{!9, !9, i64 0} +!9 = !{!"double", !10, i64 0} +!10 = !{!"omnipotent char", !11, i64 0} +!11 = !{!"Simple C/C++ TBAA"} +;. +; CHECK: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +; CHECK: attributes #[[ATTR1]] = { alwaysinline mustprogress nofree norecurse nosync nounwind readnone willreturn "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +; CHECK: attributes #[[ATTR2]] = { norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +; CHECK: attributes #[[ATTR3]] = { nounwind } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { alwaysinline } +; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +; CHECK: attributes #[[ATTR6:[0-9]+]] = { convergent nounwind } +; CHECK: attributes #[[ATTR7]] = { convergent nounwind "llvm.assume"="ompx_spmd_amenable" } +;. +; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 1078211522, !"main", i32 5, i32 0} +; CHECK: [[META1:![0-9]+]] = !{void (double*)* @__omp_offloading_fd02_404433c2_main_l5, !"kernel", i32 1} +; CHECK: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; CHECK: [[META3:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; CHECK: [[META4:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +; CHECK: [[META5:![0-9]+]] = !{i32 7, !"PIC Level", i32 2} +; CHECK: [[META6:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2} +; CHECK: [[META7:![0-9]+]] = !{!"clang version 14.0.0"} +; CHECK: [[TBAA8]] = !{!9, !9, i64 0} +; CHECK: [[META9:![0-9]+]] = !{!"double", !10, i64 0} +; CHECK: [[META10:![0-9]+]] = !{!"omnipotent char", !11, i64 0} +; CHECK: [[META11:![0-9]+]] = !{!"Simple C/C++ TBAA"} +;.