diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -1415,6 +1415,16 @@ } while (!Worklist.empty() && (IterationCounter++ < MaxFixedPointIterations || VerifyMaxFixpointIterations)); + if (IterationCounter > MaxFixedPointIterations && !Worklist.empty()) { + auto Remark = [&](OptimizationRemarkMissed ORM) { + return ORM << "Attributor did not reach a fixpoint after " + << ore::NV("Iterations", MaxFixedPointIterations) + << " iterations."; + }; + Function *F = Worklist.front()->getIRPosition().getAssociatedFunction(); + emitRemark(F, "FixedPoint", Remark); + } + LLVM_DEBUG(dbgs() << "\n[Attributor] Fixpoint iteration done after: " << IterationCounter << "/" << MaxFixpointIterations << " iterations\n"); diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -109,6 +109,11 @@ cl::desc("Enables more verbose remarks."), cl::Hidden, cl::init(false)); +static cl::opt + SetFixpointIterations("openmp-opt-max-iterations", cl::Hidden, + cl::desc("Maximal number of attributor iterations."), + cl::init(256)); + STATISTIC(NumOpenMPRuntimeCallsDeduplicated, "Number of OpenMP runtime calls deduplicated"); STATISTIC(NumOpenMPParallelRegionsDeleted, @@ -3348,8 +3353,9 @@ if (DisableOpenMPOptStateMachineRewrite) return ChangeStatus::UNCHANGED; - assert(ReachedKnownParallelRegions.isValidState() && - "Custom state machine with invalid parallel region states?"); + // Don't rewrite the state machine if we are not in a valid state. + if (!ReachedKnownParallelRegions.isValidState()) + return ChangeStatus::UNCHANGED; const int InitModeArgNo = 1; const int InitUseStateMachineArgNo = 2; @@ -4583,7 +4589,8 @@ SetVector Functions(SCC.begin(), SCC.end()); OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ Functions, Kernels); - unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? 128 : 32; + unsigned MaxFixpointIterations = + (isOpenMPDevice(M)) ? SetFixpointIterations : 32; Attributor A(Functions, InfoCache, CGUpdater, nullptr, true, false, MaxFixpointIterations, OREGetter, DEBUG_TYPE); @@ -4646,7 +4653,8 @@ OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator, /*CGSCC*/ Functions, Kernels); - unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? 128 : 32; + unsigned MaxFixpointIterations = + (isOpenMPDevice(M)) ? SetFixpointIterations : 32; Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true, MaxFixpointIterations, OREGetter, DEBUG_TYPE); @@ -4716,7 +4724,8 @@ Allocator, /*CGSCC*/ Functions, Kernels); - unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? 128 : 32; + unsigned MaxFixpointIterations = + (isOpenMPDevice(M)) ? SetFixpointIterations : 32; Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true, MaxFixpointIterations, OREGetter, DEBUG_TYPE); diff --git a/llvm/test/Transforms/OpenMP/remove_globalization.ll b/llvm/test/Transforms/OpenMP/remove_globalization.ll --- a/llvm/test/Transforms/OpenMP/remove_globalization.ll +++ b/llvm/test/Transforms/OpenMP/remove_globalization.ll @@ -1,14 +1,18 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals ; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s ; RUN: opt -passes=openmp-opt -pass-remarks=openmp-opt -pass-remarks-missed=openmp-opt -disable-output < %s 2>&1 | FileCheck %s -check-prefix=CHECK-REMARKS +; RUN: opt -passes=openmp-opt -pass-remarks-missed=openmp-opt -openmp-opt-max-iterations=1 -disable-output < %s 2>&1 | FileCheck %s -check-prefix=CHECK-FIXPOINT ; RUN: opt -openmp-opt-disable-deglobalization -S -passes=openmp-opt < %s | FileCheck %s --check-prefix=CHECK-DISABLED target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" target triple = "nvptx64" +; UTC_ARGS: --disable ; CHECK-REMARKS: remark: remove_globalization.c:4:2: Could not move globalized variable to the stack. Variable is potentially captured in call. Mark parameter as `__attribute__((noescape))` to override. ; CHECK-REMARKS: remark: remove_globalization.c:2:2: Moving globalized variable to the stack. ; CHECK-REMARKS: remark: remove_globalization.c:6:2: Moving globalized variable to the stack. ; CHECK-REMARKS: remark: remove_globalization.c:4:2: Found thread data sharing on the GPU. Expect degraded performance due to data globalization. +; CHECK-FIXPOINT: Attributor did not reach a fixpoint after 1 iterations. +; UTC_ARGS: --enable @S = external local_unnamed_addr global i8*