diff --git a/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c b/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c --- a/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c +++ b/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c @@ -8,37 +8,28 @@ void bar1(void) { #pragma omp parallel // #0 - // all-remark@#0 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nested inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}} - // safe-remark@#0 {{Parallel region is used in unknown ways; will not attempt to rewrite the state machine.}} - // force-remark@#0 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__2_wrapper, kernel ID: }} + // safe-remark@#0 {{Parallel region is used in unknown ways. Will not attempt to rewrite the state machine.}} { } } void bar2(void) { #pragma omp parallel // #1 - // all-remark@#1 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nested inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}} - // safe-remark@#1 {{Parallel region is used in unknown ways; will not attempt to rewrite the state machine.}} - // force-remark@#1 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__6_wrapper, kernel ID: }} + // safe-remark@#1 {{Parallel region is used in unknown ways. Will not attempt to rewrite the state machine.}} { } } void foo1(void) { #pragma omp target teams // #2 - // all-remark@#2 {{Generic-mode kernel is executed with a customized state machine [3 known parallel regions] (good).}} - // all-remark@#2 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__1_wrapper, kernel ID: __omp_offloading}} - // all-remark@#2 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__2_wrapper, kernel ID: __omp_offloading}} + // all-remark@#2 {{Rewriting generic-mode kernel with a customized state machine.}} + { - baz(); // all-remark {{Kernel will be executed in generic-mode due to this potential side-effect, consider to add `__attribute__((assume("ompx_spmd_amenable")))` to the called function '_Z3bazv'.}} + baz(); // all-remark {{Value has potential side effects preventing SPMD-mode execution. Add `__attribute__((assume("ompx_spmd_amenable")))` to the called function to override.}} #pragma omp parallel // #3 - // all-remark@#3 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nested inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}} - // all-remark@#3 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__1_wrapper, kernel ID: __omp_offloading}} { } bar1(); #pragma omp parallel // #4 - // all-remark@#4 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nested inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}} - // all-remark@#4 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__2_wrapper, kernel ID: __omp_offloading}} { } } @@ -46,21 +37,15 @@ void foo2(void) { #pragma omp target teams // #5 - // all-remark@#5 {{Generic-mode kernel is executed with a customized state machine [4 known parallel regions] (good).}} - // all-remark@#5 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__5_wrapper, kernel ID: __omp_offloading}} - // all-remark@#5 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__4_wrapper, kernel ID: __omp_offloading}} + // all-remark@#5 {{Rewriting generic-mode kernel with a customized state machine.}} { - baz(); // all-remark {{Kernel will be executed in generic-mode due to this potential side-effect, consider to add `__attribute__((assume("ompx_spmd_amenable")))` to the called function '_Z3bazv'.}} + baz(); // all-remark {{Value has potential side effects preventing SPMD-mode execution. Add `__attribute__((assume("ompx_spmd_amenable")))` to the called function to override.}} #pragma omp parallel // #6 - // all-remark@#6 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nested inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}} - // all-remark@#6 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__4_wrapper, kernel ID: __omp_offloading}} { } bar1(); bar2(); #pragma omp parallel // #7 - // all-remark@#7 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nested inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}} - // all-remark@#7 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__5_wrapper, kernel ID: __omp_offloading}} { } bar1(); @@ -70,21 +55,15 @@ void foo3(void) { #pragma omp target teams // #8 - // all-remark@#8 {{Generic-mode kernel is executed with a customized state machine [4 known parallel regions] (good).}} - // all-remark@#8 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__7_wrapper, kernel ID: __omp_offloading}} - // all-remark@#8 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__8_wrapper, kernel ID: __omp_offloading}} + // all-remark@#8 {{Rewriting generic-mode kernel with a customized state machine.}} { - baz(); // all-remark {{Kernel will be executed in generic-mode due to this potential side-effect, consider to add `__attribute__((assume("ompx_spmd_amenable")))` to the called function '_Z3bazv'.}} + baz(); // all-remark {{Value has potential side effects preventing SPMD-mode execution. Add `__attribute__((assume("ompx_spmd_amenable")))` to the called function to override.}} #pragma omp parallel // #9 - // all-remark@#9 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nested inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}} - // all-remark@#9 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__7_wrapper, kernel ID: __omp_offloading}} { } bar1(); bar2(); #pragma omp parallel // #10 - // all-remark@#10 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nested inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}} - // all-remark@#10 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__8_wrapper, kernel ID: __omp_offloading}} { } bar1(); @@ -104,5 +83,4 @@ } } -// all-remark@* 5 {{OpenMP runtime call __kmpc_global_thread_num moved to beginning of OpenMP region}} // all-remark@* 9 {{OpenMP runtime call __kmpc_global_thread_num deduplicated}} diff --git a/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c b/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c --- a/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c +++ b/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c @@ -8,28 +8,21 @@ void bar(void) { #pragma omp parallel // #1 \ - // expected-remark@#1 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nested inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}} \ - // expected-remark@#1 {{Parallel region is used in unknown ways; will not attempt to rewrite the state machine.}} + // expected-remark@#1 {{Parallel region is used in unknown ways. Will not attempt to rewrite the state machine.}} { } } void foo(void) { -#pragma omp target teams // #2 \ - // expected-remark@#2 {{Generic-mode kernel is executed with a customized state machine [3 known parallel regions] (good).}} - // expected-remark@#2 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__1_wrapper, kernel ID: __omp_offloading}} \ - // expected-remark@#2 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__2_wrapper, kernel ID: __omp_offloading}} +#pragma omp target teams // #2 + // expected-remark@#2 {{Rewriting generic-mode kernel with a customized state machine.}} { - baz(); // expected-remark {{Kernel will be executed in generic-mode due to this potential side-effect, consider to add `__attribute__((assume("ompx_spmd_amenable")))` to the called function '_Z3bazv'.}} -#pragma omp parallel // #3 \ - // expected-remark@#3 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nested inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}} \ - // expected-remark@#3 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__1_wrapper, kernel ID: __omp_offloading}} + baz(); // expected-remark {{Value has potential side effects preventing SPMD-mode execution. Add `__attribute__((assume("ompx_spmd_amenable")))` to the called function to override.}} +#pragma omp parallel { } bar(); -#pragma omp parallel // #4 \ - // expected-remark@#4 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nested inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}} \ - // expected-remark@#4 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__2_wrapper, kernel ID: __omp_offloading}} +#pragma omp parallel { } } @@ -47,5 +40,4 @@ } } -// expected-remark@* {{OpenMP runtime call __kmpc_global_thread_num moved to beginning of OpenMP region}} // expected-remark@* {{OpenMP runtime call __kmpc_global_thread_num deduplicated}} diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -5328,10 +5328,10 @@ // Emit a missed remark if this is missed OpenMP globalization. auto Remark = [&](OptimizationRemarkMissed ORM) { - return ORM << "Could not move globalized variable to the stack as " - "variable is potentially captured in call; mark " - "parameter as " - "`__attribute__((noescape))` to override."; + return ORM + << "Could not move globalized variable to the stack. " + "Variable is potentially captured in call. Mark " + "parameter as `__attribute__((noescape))` to override."; }; if (AI.LibraryFunctionId == LibFunc___kmpc_alloc_shared) diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -403,6 +403,7 @@ { \ SmallVector ArgsTypes({__VA_ARGS__}); \ Function *F = M.getFunction(_Name); \ + RTLFunctions.insert(F); \ if (declMatchesRTFTypes(F, OMPBuilder._ReturnType, ArgsTypes)) { \ RuntimeFunctionIDMap[F] = _Enum; \ auto &RFI = RFIs[_Enum]; \ @@ -431,6 +432,9 @@ /// Collection of known kernels (\see Kernel) in the module. SmallPtrSetImpl &Kernels; + + /// Collection of known OpenMP runtime functions.. + DenseSet RTLFunctions; }; template @@ -935,16 +939,14 @@ assert(MergableCIs.size() > 1 && "Assumed multiple mergable CIs"); auto Remark = [&](OptimizationRemark OR) { - OR << "Parallel region at " - << ore::NV("OpenMPParallelMergeFront", - MergableCIs.front()->getDebugLoc()) - << " merged with parallel regions at "; + OR << "Parallel region merged with parallel region" + << (MergableCIs.size() > 2 ? "s" : "") << " at "; for (auto *CI : llvm::drop_begin(MergableCIs)) { OR << ore::NV("OpenMPParallelMerge", CI->getDebugLoc()); if (CI != MergableCIs.back()) OR << ", "; } - return OR; + return OR << "."; }; emitRemark(MergableCIs.front(), @@ -1035,17 +1037,6 @@ OMPD_parallel); } - auto Remark = [&](OptimizationRemark OR) { - return OR << "Parallel region at " - << ore::NV("OpenMPParallelMerge", CI->getDebugLoc()) - << " merged with " - << ore::NV("OpenMPParallelMergeFront", - MergableCIs.front()->getDebugLoc()); - }; - if (CI != MergableCIs.front()) - emitRemark(CI, "OpenMPParallelRegionMerging", - Remark); - CI->eraseFromParent(); } @@ -1211,9 +1202,7 @@ << CI->getCaller()->getName() << "\n"); auto Remark = [&](OptimizationRemark OR) { - return OR << "Parallel region in " - << ore::NV("OpenMPParallelDelete", CI->getCaller()->getName()) - << " deleted"; + return OR << "Removing parallel region with no side-effects."; }; emitRemark(CI, "OpenMPParallelRegionDeletion", Remark); @@ -1572,13 +1561,6 @@ if (!CanBeMoved(*CI)) continue; - auto Remark = [&](OptimizationRemark OR) { - return OR << "OpenMP runtime call " - << ore::NV("OpenMPOptRuntime", RFI.Name) - << " moved to beginning of OpenMP region"; - }; - emitRemark(&F, "OpenMPRuntimeCodeMotion", Remark); - CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt()); ReplVal = CI; break; @@ -1608,9 +1590,12 @@ auto Remark = [&](OptimizationRemark OR) { return OR << "OpenMP runtime call " - << ore::NV("OpenMPOptRuntime", RFI.Name) << " deduplicated"; + << ore::NV("OpenMPOptRuntime", RFI.Name) << " deduplicated."; }; - emitRemark(&F, "OpenMPRuntimeDeduplicated", Remark); + if (CI->getDebugLoc()) + emitRemark(CI, "OpenMPRuntimeDeduplicated", Remark); + else + emitRemark(&F, "OpenMPRuntimeDeduplicated", Remark); CGUpdater.removeCallSite(*CI); CI->replaceAllUsesWith(ReplVal); @@ -1791,8 +1776,7 @@ // See https://openmp.llvm.org/remarks/OptimizationRemarks.html auto Remark = [&](OptimizationRemarkAnalysis ORA) { - return ORA - << "[OMP100] Potentially unknown OpenMP target region caller"; + return ORA << "Potentially unknown OpenMP target region caller."; }; emitRemark(&F, "OMP100", Remark); @@ -1886,33 +1870,18 @@ if (!KernelParallelUse) continue; - { - auto Remark = [&](OptimizationRemarkAnalysis ORA) { - return ORA << "Found a parallel region that is called in a target " - "region but not part of a combined target construct nor " - "nested inside a target construct without intermediate " - "code. This can lead to excessive register usage for " - "unrelated target regions in the same translation unit " - "due to spurious call edges assumed by ptxas."; - }; - emitRemark(F, "OpenMPParallelRegionInNonSPMD", - Remark); - } - // If this ever hits, we should investigate. // TODO: Checking the number of uses is not a necessary restriction and // should be lifted. if (UnknownUse || NumDirectCalls != 1 || ToBeReplacedStateMachineUses.size() > 2) { - { - auto Remark = [&](OptimizationRemarkAnalysis ORA) { - return ORA << "Parallel region is used in " - << (UnknownUse ? "unknown" : "unexpected") - << " ways; will not attempt to rewrite the state machine."; - }; - emitRemark( - F, "OpenMPParallelRegionInNonSPMD", Remark); - } + auto Remark = [&](OptimizationRemarkAnalysis ORA) { + return ORA << "Parallel region is used in " + << (UnknownUse ? "unknown" : "unexpected") + << " ways. Will not attempt to rewrite the state machine."; + }; + emitRemark(F, "OpenMPParallelRegionInNonSPMD", + Remark); continue; } @@ -1920,16 +1889,12 @@ // up if the function is not called from a unique kernel. Kernel K = getUniqueKernelFor(*F); if (!K) { - { - auto Remark = [&](OptimizationRemarkAnalysis ORA) { - return ORA << "Parallel region is not known to be called from a " - "unique single target region, maybe the surrounding " - "function has external linkage?; will not attempt to " - "rewrite the state machine use."; - }; - emitRemark( - F, "OpenMPParallelRegionInMultipleKernesl", Remark); - } + auto Remark = [&](OptimizationRemarkAnalysis ORA) { + return ORA << "Parallel region is not called from a unique kernel. " + "Will not attempt to rewrite the state machine."; + }; + emitRemark( + F, "OpenMPParallelRegionInMultipleKernesl", Remark); continue; } @@ -1938,29 +1903,6 @@ // function pointer by a new global symbol for identification purposes. This // ensures only direct calls to the function are left. - { - auto RemarkParalleRegion = [&](OptimizationRemarkAnalysis ORA) { - return ORA << "Specialize parallel region that is only reached from a " - "single target region to avoid spurious call edges and " - "excessive register usage in other target regions. " - "(parallel region ID: " - << ore::NV("OpenMPParallelRegion", F->getName()) - << ", kernel ID: " - << ore::NV("OpenMPTargetRegion", K->getName()) << ")"; - }; - emitRemark(F, "OpenMPParallelRegionInNonSPMD", - RemarkParalleRegion); - auto RemarkKernel = [&](OptimizationRemarkAnalysis ORA) { - return ORA << "Target region containing the parallel region that is " - "specialized. (parallel region ID: " - << ore::NV("OpenMPParallelRegion", F->getName()) - << ", kernel ID: " - << ore::NV("OpenMPTargetRegion", K->getName()) << ")"; - }; - emitRemark(K, "OpenMPParallelRegionInNonSPMD", - RemarkKernel); - } - Module &M = *F->getParent(); Type *Int8Ty = Type::getInt8Ty(M.getContext()); @@ -2637,7 +2579,7 @@ return OR << "Replaced globalized variable with " << ore::NV("SharedMemory", AllocSize->getZExtValue()) << ((AllocSize->getZExtValue() != 1) ? " bytes " : " byte ") - << "of shared memory"; + << "of shared memory."; }; A.emitRemark(CB, "OpenMPReplaceGlobalization", Remark); @@ -2860,19 +2802,24 @@ } bool changeToSPMDMode(Attributor &A) { + auto &OMPInfoCache = static_cast(A.getInfoCache()); + if (!SPMDCompatibilityTracker.isAssumed()) { for (Instruction *NonCompatibleI : SPMDCompatibilityTracker) { if (!NonCompatibleI) continue; + + // Skip diagnostics on calls to known OpenMP runtime functions for now. + if (auto *CB = dyn_cast(NonCompatibleI)) + if (OMPInfoCache.RTLFunctions.contains(CB->getCalledFunction())) + continue; + auto Remark = [&](OptimizationRemarkAnalysis ORA) { - ORA << "Kernel will be executed in generic-mode due to this " - "potential side-effect"; - if (auto *CI = dyn_cast(NonCompatibleI)) { - if (Function *F = CI->getCalledFunction()) - ORA << ", consider to add " - "`__attribute__((assume(\"ompx_spmd_amenable\")))`" - " to the called function '" - << F->getName() << "'"; + ORA << "Value has potential side effects preventing SPMD-mode " + "execution"; + if (isa(NonCompatibleI)) { + ORA << ". Add `__attribute__((assume(\"ompx_spmd_amenable\")))` to " + "the called function to override"; } return ORA << "."; }; @@ -2915,7 +2862,7 @@ ++NumOpenMPTargetRegionKernelsSPMD; auto Remark = [&](OptimizationRemark OR) { - return OR << "Generic-mode kernel is changed to SPMD-mode."; + return OR << "Transformed generic-mode kernel to SPMD-mode."; }; A.emitRemark(KernelInitCB, "OpenMPKernelSPMDMode", Remark); @@ -2960,8 +2907,7 @@ ++NumOpenMPTargetRegionKernelsWithoutStateMachine; auto Remark = [&](OptimizationRemark OR) { - return OR << "Generic-mode kernel is executed without state machine " - "(good)"; + return OR << "Removing unused state machine from generic-mode kernel."; }; A.emitRemark( KernelInitCB, "OpenMPKernelWithoutStateMachine", Remark); @@ -2974,28 +2920,19 @@ ++NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback; auto Remark = [&](OptimizationRemark OR) { - return OR << "Generic-mode kernel is executed with a customized state " - "machine [" - << ore::NV("ParallelRegions", - ReachedKnownParallelRegions.size()) - << " known parallel regions] (good)."; + return OR << "Rewriting generic-mode kernel with a customized state " + "machine."; }; A.emitRemark( KernelInitCB, "OpenMPKernelWithCustomizedStateMachine", Remark); } else { ++NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback; - auto Remark = [&](OptimizationRemark OR) { + auto Remark = [&](OptimizationRemarkAnalysis OR) { return OR << "Generic-mode kernel is executed with a customized state " - "machine that requires a fallback [" - << ore::NV("ParallelRegions", - ReachedKnownParallelRegions.size()) - << " known parallel regions, " - << ore::NV("UnknownParallelRegions", - ReachedUnknownParallelRegions.size()) - << " unkown parallel regions] (bad)."; + "machine that requires a fallback."; }; - A.emitRemark( + A.emitRemark( KernelInitCB, "OpenMPKernelWithCustomizedStateMachineAndFallback", Remark); @@ -3004,11 +2941,9 @@ if (!UnknownParallelRegionCB) continue; auto Remark = [&](OptimizationRemarkAnalysis ORA) { - return ORA - << "State machine fallback caused by this call. If it is a " - "false positive, use " - "`__attribute__((assume(\"omp_no_openmp\")))` " - "(or \"omp_no_parallelism\")."; + return ORA << "Call may contain unknown parallel regions. Use " + << "`__attribute__((assume(\"omp_no_parallelism\")))` to " + "override."; }; A.emitRemark( UnknownParallelRegionCB, diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll b/llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll --- a/llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll +++ b/llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll @@ -1,10 +1,11 @@ ; RUN: opt -passes=openmp-opt -pass-remarks=openmp-opt -pass-remarks-missed=openmp-opt -pass-remarks-analysis=openmp-opt -disable-output < %s 2>&1 | FileCheck %s target triple = "nvptx64" -; CHECK: remark: llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c:11:1: Generic-mode kernel is executed with a customized state machine that requires a fallback [1 known parallel regions, 2 unkown parallel regions] (bad) -; CHECK: remark: llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c:13:5: State machine fallback caused by this call. If it is a false positive, use `__attribute__((assume("omp_no_openmp")))` (or "omp_no_parallelism") -; CHECK: remark: llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c:15:5: State machine fallback caused by this call. If it is a false positive, use `__attribute__((assume("omp_no_openmp")))` (or "omp_no_parallelism") -; CHECK: remark: llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c:20:1: Generic-mode kernel is executed with a customized state machine [1 known parallel regions] (good) +; CHECK: remark: llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c:11:1: Generic-mode kernel is executed with a customized state machine that requires a fallback. +; CHECK: remark: llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c:13:5: Call may contain unknown parallel regions. Use `__attribute__((assume("omp_no_parallelism")))` to override. +; CHECK: remark: llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c:15:5: Call may contain unknown parallel regions. Use `__attribute__((assume("omp_no_parallelism")))` to override. +; CHECK: remark: llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c:20:1: Rewriting generic-mode kernel with a customized state machine. + ;; void unknown(void); ;; void known(void) { diff --git a/llvm/test/Transforms/OpenMP/deduplication_remarks.ll b/llvm/test/Transforms/OpenMP/deduplication_remarks.ll --- a/llvm/test/Transforms/OpenMP/deduplication_remarks.ll +++ b/llvm/test/Transforms/OpenMP/deduplication_remarks.ll @@ -10,9 +10,8 @@ @0 = private unnamed_addr global %struct.ident_t { i32 0, i32 34, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str0, i32 0, i32 0) }, align 8 @.str0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 -; CHECK: remark: deduplication_remarks.c:4:0: OpenMP runtime call __kmpc_global_thread_num moved to beginning of OpenMP region -; CHECK: remark: deduplication_remarks.c:4:0: OpenMP runtime call __kmpc_global_thread_num deduplicated -; CHECK: remark: deduplication_remarks.c:4:0: OpenMP runtime call __kmpc_global_thread_num deduplicated +; CHECK: remark: deduplication_remarks.c:7:10: OpenMP runtime call __kmpc_global_thread_num deduplicated +; CHECK: remark: deduplication_remarks.c:9:10: OpenMP runtime call __kmpc_global_thread_num deduplicated define dso_local void @deduplicate() local_unnamed_addr !dbg !14 { %1 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0), !dbg !21 call void @useI32(i32 %1), !dbg !23 diff --git a/llvm/test/Transforms/OpenMP/globalization_remarks.ll b/llvm/test/Transforms/OpenMP/globalization_remarks.ll --- a/llvm/test/Transforms/OpenMP/globalization_remarks.ll +++ b/llvm/test/Transforms/OpenMP/globalization_remarks.ll @@ -4,7 +4,7 @@ target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" target triple = "nvptx64" -; CHECK: remark: globalization_remarks.c:5:7: Could not move globalized variable to the stack as variable is potentially captured in call; mark parameter as `__attribute__((noescape))` to override. +; CHECK: remark: globalization_remarks.c:5:7: Could not move globalized variable to the stack. Variable is potentially captured in call. Mark parameter as `__attribute__((noescape))` to override. ; CHECK: remark: globalization_remarks.c:5:7: Found thread data sharing on the GPU. Expect degraded performance due to data globalization. %struct.ident_t = type { i32, i32, i32, i32, i8* } diff --git a/llvm/test/Transforms/OpenMP/parallel_deletion_remarks.ll b/llvm/test/Transforms/OpenMP/parallel_deletion_remarks.ll --- a/llvm/test/Transforms/OpenMP/parallel_deletion_remarks.ll +++ b/llvm/test/Transforms/OpenMP/parallel_deletion_remarks.ll @@ -23,9 +23,9 @@ ; ; This will delete all but the first parallel region -; CHECK: remark: parallel_deletion_remarks.c:10:1: Parallel region in delete_parallel deleted -; CHECK: remark: parallel_deletion_remarks.c:12:1: Parallel region in delete_parallel deleted -; CHECK: remark: parallel_deletion_remarks.c:14:1: Parallel region in delete_parallel deleted +; CHECK: remark: parallel_deletion_remarks.c:10:1: Removing parallel region with no side-effects. +; CHECK: remark: parallel_deletion_remarks.c:12:1: Removing parallel region with no side-effects. +; CHECK: remark: parallel_deletion_remarks.c:14:1: Removing parallel region with no side-effects. define dso_local void @delete_parallel() local_unnamed_addr !dbg !15 { call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)), !dbg !18 call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)), !dbg !19 diff --git a/llvm/test/Transforms/OpenMP/remove_globalization.ll b/llvm/test/Transforms/OpenMP/remove_globalization.ll --- a/llvm/test/Transforms/OpenMP/remove_globalization.ll +++ b/llvm/test/Transforms/OpenMP/remove_globalization.ll @@ -4,7 +4,7 @@ target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" target triple = "nvptx64" -; CHECK-REMARKS: remark: remove_globalization.c:4:2: Could not move globalized variable to the stack as variable is potentially captured in call; mark parameter as `__attribute__((noescape))` to override. +; CHECK-REMARKS: remark: remove_globalization.c:4:2: Could not move globalized variable to the stack. Variable is potentially captured in call. Mark parameter as `__attribute__((noescape))` to override. ; CHECK-REMARKS: remark: remove_globalization.c:2:2: Moving globalized variable to the stack. ; CHECK-REMARKS: remark: remove_globalization.c:6:2: Moving globalized variable to the stack. ; CHECK-REMARKS: remark: remove_globalization.c:4:2: Found thread data sharing on the GPU. Expect degraded performance due to data globalization. diff --git a/llvm/test/Transforms/OpenMP/spmdization_remarks.ll b/llvm/test/Transforms/OpenMP/spmdization_remarks.ll --- a/llvm/test/Transforms/OpenMP/spmdization_remarks.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_remarks.ll @@ -1,12 +1,13 @@ ; RUN: opt -passes=openmp-opt -pass-remarks=openmp-opt -pass-remarks-missed=openmp-opt -pass-remarks-analysis=openmp-opt -disable-output < %s 2>&1 | FileCheck %s target triple = "nvptx64" -; CHECK: remark: llvm/test/Transforms/OpenMP/spmdization_remarks.c:13:5: Kernel will be executed in generic-mode due to this potential side-effect, consider to add `__attribute__((assume("ompx_spmd_amenable")))` to the called function 'unknown'. -; CHECK: remark: llvm/test/Transforms/OpenMP/spmdization_remarks.c:15:5: Kernel will be executed in generic-mode due to this potential side-effect, consider to add `__attribute__((assume("ompx_spmd_amenable")))` to the called function 'unknown'. -; CHECK: remark: llvm/test/Transforms/OpenMP/spmdization_remarks.c:11:1: Generic-mode kernel is executed with a customized state machine that requires a fallback [1 known parallel regions, 2 unkown parallel regions] (bad). -; CHECK: remark: llvm/test/Transforms/OpenMP/spmdization_remarks.c:13:5: State machine fallback caused by this call. If it is a false positive, use `__attribute__((assume("omp_no_openmp")))` (or "omp_no_parallelism"). -; CHECK: remark: llvm/test/Transforms/OpenMP/spmdization_remarks.c:15:5: State machine fallback caused by this call. If it is a false positive, use `__attribute__((assume("omp_no_openmp")))` (or "omp_no_parallelism"). -; CHECK: remark: llvm/test/Transforms/OpenMP/spmdization_remarks.c:20:1: Generic-mode kernel is changed to SPMD-mode. +; CHECK: remark: llvm/test/Transforms/OpenMP/spmdization_remarks.c:13:5: Value has potential side effects preventing SPMD-mode execution. Add `__attribute__((assume("ompx_spmd_amenable")))` to the called function to override. +; CHECK: remark: llvm/test/Transforms/OpenMP/spmdization_remarks.c:15:5: Value has potential side effects preventing SPMD-mode execution. Add `__attribute__((assume("ompx_spmd_amenable")))` to the called function to override. +; CHECK: remark: llvm/test/Transforms/OpenMP/spmdization_remarks.c:11:1: Generic-mode kernel is executed with a customized state machine that requires a fallback. +; CHECK: remark: llvm/test/Transforms/OpenMP/spmdization_remarks.c:13:5: Call may contain unknown parallel regions. Use `__attribute__((assume("omp_no_parallelism")))` to override. +; CHECK: remark: llvm/test/Transforms/OpenMP/spmdization_remarks.c:15:5: Call may contain unknown parallel regions. Use `__attribute__((assume("omp_no_parallelism")))` to override. +; CHECK: remark: llvm/test/Transforms/OpenMP/spmdization_remarks.c:20:1: Transformed generic-mode kernel to SPMD-mode. + ;; void unknown(void); ;; void known(void) {