Changeset View
Standalone View
llvm/lib/Transforms/IPO/OpenMPOpt.cpp
Show All 32 Lines | |||||
#define DEBUG_TYPE "openmp-opt" | #define DEBUG_TYPE "openmp-opt" | ||||
static cl::opt<bool> DisableOpenMPOptimizations( | static cl::opt<bool> DisableOpenMPOptimizations( | ||||
"openmp-opt-disable", cl::ZeroOrMore, | "openmp-opt-disable", cl::ZeroOrMore, | ||||
cl::desc("Disable OpenMP specific optimizations."), cl::Hidden, | cl::desc("Disable OpenMP specific optimizations."), cl::Hidden, | ||||
cl::init(false)); | cl::init(false)); | ||||
static cl::opt<bool> PrintICVValues("openmp-print-icv-values", cl::init(false), | |||||
cl::Hidden); | |||||
STATISTIC(NumOpenMPRuntimeCallsDeduplicated, | STATISTIC(NumOpenMPRuntimeCallsDeduplicated, | ||||
"Number of OpenMP runtime calls deduplicated"); | "Number of OpenMP runtime calls deduplicated"); | ||||
STATISTIC(NumOpenMPParallelRegionsDeleted, | STATISTIC(NumOpenMPParallelRegionsDeleted, | ||||
"Number of OpenMP parallel regions deleted"); | "Number of OpenMP parallel regions deleted"); | ||||
STATISTIC(NumOpenMPRuntimeFunctionsIdentified, | STATISTIC(NumOpenMPRuntimeFunctionsIdentified, | ||||
"Number of OpenMP runtime functions identified"); | "Number of OpenMP runtime functions identified"); | ||||
STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified, | STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified, | ||||
"Number of OpenMP runtime function uses identified"); | "Number of OpenMP runtime function uses identified"); | ||||
Show All 9 Lines | |||||
struct OMPInformationCache : public InformationCache { | struct OMPInformationCache : public InformationCache { | ||||
OMPInformationCache(Module &M, AnalysisGetter &AG, | OMPInformationCache(Module &M, AnalysisGetter &AG, | ||||
BumpPtrAllocator &Allocator, SetVector<Function *> *CGSCC, | BumpPtrAllocator &Allocator, SetVector<Function *> *CGSCC, | ||||
SmallPtrSetImpl<Function *> &ModuleSlice) | SmallPtrSetImpl<Function *> &ModuleSlice) | ||||
: InformationCache(M, AG, Allocator, CGSCC), ModuleSlice(ModuleSlice), | : InformationCache(M, AG, Allocator, CGSCC), ModuleSlice(ModuleSlice), | ||||
OMPBuilder(M) { | OMPBuilder(M) { | ||||
initializeTypes(M); | initializeTypes(M); | ||||
initializeRuntimeFunctions(); | initializeRuntimeFunctions(); | ||||
initializeInternalControlVars(); | |||||
OMPBuilder.initialize(); | OMPBuilder.initialize(); | ||||
} | } | ||||
/// Generic information that describes an internal control variable. | |||||
struct InternalControlVarInfo { | |||||
/// The kind, as described by InternalControlVar enum. | |||||
InternalControlVar Kind; | |||||
/// The name of the ICV. | |||||
StringRef Name; | |||||
/// Environment variable associated with this ICV. | |||||
StringRef EnvVarName; | |||||
/// Initial value kind. | |||||
ICVInitValue InitKind; | |||||
/// Initial value. | |||||
ConstantInt *InitValue; | |||||
/// Setter RTL function associated with this ICV. | |||||
RuntimeFunction Setter; | |||||
/// Getter RTL function associated with this ICV. | |||||
RuntimeFunction Getter; | |||||
/// RTL Function corresponding to the override clause of this ICV | |||||
RuntimeFunction Clause; | |||||
}; | |||||
/// Generic information that describes a runtime function | /// Generic information that describes a runtime function | ||||
struct RuntimeFunctionInfo { | struct RuntimeFunctionInfo { | ||||
/// The kind, as described by the RuntimeFunction enum. | /// The kind, as described by the RuntimeFunction enum. | ||||
RuntimeFunction Kind; | RuntimeFunction Kind; | ||||
/// The name of the function. | /// The name of the function. | ||||
StringRef Name; | StringRef Name; | ||||
▲ Show 20 Lines • Show All 82 Lines • ▼ Show 20 Lines | struct OMPInformationCache : public InformationCache { | ||||
/// An OpenMP-IR-Builder instance | /// An OpenMP-IR-Builder instance | ||||
OpenMPIRBuilder OMPBuilder; | OpenMPIRBuilder OMPBuilder; | ||||
/// Map from runtime function kind to the runtime function description. | /// Map from runtime function kind to the runtime function description. | ||||
EnumeratedArray<RuntimeFunctionInfo, RuntimeFunction, | EnumeratedArray<RuntimeFunctionInfo, RuntimeFunction, | ||||
RuntimeFunction::OMPRTL___last> | RuntimeFunction::OMPRTL___last> | ||||
RFIs; | RFIs; | ||||
/// Map from ICV kind to the ICV description. | |||||
EnumeratedArray<InternalControlVarInfo, InternalControlVar, | |||||
InternalControlVar::ICV___last> | |||||
ICVs; | |||||
/// Helper to initialize all internal control variable information for those | |||||
/// defined in OMPKinds.def. | |||||
void initializeInternalControlVars() { | |||||
#define ICV_RT_SET(_Name, RTL) \ | |||||
{ \ | |||||
auto &ICV = ICVs[_Name]; \ | |||||
ICV.Setter = RTL; \ | |||||
} | |||||
#define ICV_RT_GET(Name, RTL) \ | |||||
{ \ | |||||
auto &ICV = ICVs[Name]; \ | |||||
ICV.Getter = RTL; \ | |||||
} | |||||
#define ICV_DATA_ENV(Enum, _Name, _EnvVarName, Init) \ | |||||
{ \ | |||||
auto &ICV = ICVs[Enum]; \ | |||||
ICV.Name = _Name; \ | |||||
ICV.Kind = Enum; \ | |||||
ICV.InitKind = Init; \ | |||||
ICV.EnvVarName = _EnvVarName; \ | |||||
switch (ICV.InitKind) { \ | |||||
case IMPLEMENTATION_DEFINED: \ | |||||
ICV.InitValue = nullptr; \ | |||||
break; \ | |||||
case ZERO: \ | |||||
ICV.InitValue = \ | |||||
ConstantInt::get(Type::getInt32Ty(Int32->getContext()), 0); \ | |||||
hoyFB: Hello,
Looks like the `Int32` type with the full name `llvm::omp::types::Int32` is a global… | |||||
sstefan1AuthorUnsubmitted Not really sure, but turns out types were initialized twice, maybe that was the problem. I guess I could also get module context explicitly , instead of using Int32->getContext(). Anyway, I'll try to build with thinLTO and report back. sstefan1: Not really sure, but turns out types were initialized twice, maybe that was the problem. I… | |||||
jdoerfertUnsubmitted Not Done ReplyInline Actions
That is a problem. Can you fix that first. I would not be surprised if the LTO error goes away. jdoerfert: > Not really sure, but turns out types were initialized twice
That is a problem. Can you fix… | |||||
sstefan1AuthorUnsubmitted @hoyFB I wasn't able to reproduce this locally, but I committed a potential fix: rG61238d2690a6ebdf3c4f3f68f39101fac30263a7 Can you check again if the problem is still there? sstefan1: @hoyFB I wasn't able to reproduce this locally, but I committed a potential fix… | |||||
christyleeUnsubmitted Not Done ReplyInline Actions@sstefan1 Thanks! I'll check and report back. christylee: @sstefan1 Thanks! I'll check and report back. | |||||
hoyFBUnsubmitted Not Done ReplyInline Actions@sstefan1 Thanks for the quick response. I applied the fix and the problem is still there: #0 0x0000000001474c2f llvm::sys::PrintStackTrace(llvm::raw_ostream&) /home/hoy/local/server-llvm/llvm-project/llvm/lib/Support/Unix/Signals.inc:564:13 #1 0x0000000001472dd0 llvm::sys::RunSignalHandlers() /home/hoy/local/server-llvm/llvm-project/llvm/lib/Support/Signals.cpp:69:18 #2 0x00000000014751cc SignalHandler(int) /home/hoy/local/server-llvm/llvm-project/llvm/lib/Support/Unix/Signals.inc:396:3 #3 0x00007fb6ac7cc630 __restore_rt (/lib64/libpthread.so.0+0xf630) #4 0x0000000003107065 llvm::Type::getInt32Ty(llvm::LLVMContext&) /home/hoy/local/server-llvm/llvm-project/llvm/lib/IR/Type.cpp:186:66 #5 0x0000000002462754 (anonymous namespace)::OMPInformationCache::initializeInternalControlVars() /home/hoy/local/server-llvm/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def:326:1 #6 0x0000000002462754 (anonymous namespace)::OMPInformationCache::OMPInformationCache(llvm::Module&, llvm::AnalysisGetter&, llvm::BumpPtrAllocatorImpl<llvm::MallocAllocator, 4096ul, 4096ul, 128ul>&, llvm::SetVector<llvm::Function*, std::vector<llvm::Function*, std::allocator<llvm::Function*> >, llvm::DenseSet<llvm::Function*, llvm::DenseMapInfo<llvm::Function*> > >*, llvm::SmallPtrSetImpl<llvm::Function*>&) /home/hoy/local/server-llvm/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp:69:5 #7 0x0000000002484e0f llvm::SmallVectorTemplateCommon<llvm::Function*, void>::begin() /home/hoy/local/server-llvm/llvm-project/llvm/include/llvm/ADT/SmallVector.h:152:45 #8 0x0000000002484e0f (anonymous namespace)::OpenMPOpt::OpenMPOpt(llvm::SmallVectorImpl<llvm::Function*>&, llvm::CallGraphUpdater&, llvm::function_ref<llvm::OptimizationRemarkEmitter& (llvm::Function*)>, (anonymous namespace)::OMPInformationCache&) /home/hoy/local/server-llvm/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp:334:18 #9 0x0000000002484e0f (anonymous namespace)::OpenMPOptLegacyPass::runOnSCC(llvm::CallGraphSCC&) /home/hoy/local/server-llvm/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp:832:15 It seems we still have a race condition. The Int32 type can be initialized by a module and then consumed by another module simultaneously. Your early proposal to use the explicit module context to create a constant value instead of using Int32.getContext() which can be from whatever module that first initializes the type sounds good to me. Thanks. hoyFB: @sstefan1 Thanks for the quick response. I applied the fix and the problem is still there… | |||||
sstefan1AuthorUnsubmitted Can you provide some details on how to reproduce this? I wasn't able to. I would like to be able to test this locally. sstefan1: Can you provide some details on how to reproduce this? I wasn't able to. I would like to be… | |||||
hoyFBUnsubmitted Not Done ReplyInline ActionsThe issue happened to one of our internal large projects with many files having OpenMP usage. The project is built with 40 threads in thinLTO mode. I guess it doesn't reproduce with smaller projects. You may want to print out the Int32.getContext() object as well as the Module object for each ConstantInt::get(Type::getInt32Ty(Int32->getContext()), 0); call and see if they match. Two modules creating constants in the same context may cause a problem. hoyFB: The issue happened to one of our internal large projects with many files having OpenMP usage. | |||||
jdoerfertUnsubmitted Not Done ReplyInline ActionsI think I understand now. Two processes spin up OMPIRBuilder or OpenMPOpt passes at the same time, both of those initialize global types, we created a race on these globals and all bets are off. I assume (thin)LTO has a single llvm::Context so we "just" need to prevent two threads from initializing at the same time. LLVM has a RAII mutex in llvm/include/llvm/Support/Mutex.h (SmartScopedLock) that we should place in a (new) initializer function (which calls the initializer functions we call now in the constructor) to prevent the race. jdoerfert: I think I understand now. Two processes spin up OMPIRBuilder or OpenMPOpt passes at the same… | |||||
hoyFBUnsubmitted Not Done ReplyInline ActionsYeah, that's the problem. The global OMP types are initialized without lock protection with a module local context: Even if they are lock protected, the module local context passed to them can still be used to create a constant by another module in parallel. Module constants should be created in sequence. hoyFB: Yeah, that's the problem.
The global OMP types are initialized without lock protection with a… | |||||
jdoerfertUnsubmitted Not Done ReplyInline Actions
This is not up to the pass. The race in question here is home made but llvm::Context has to be thread safe itself if you use it in parallel, right? This is not the only pass creating constants and types. If, however, thinLTO uses different contexts per module, we have a different problem here. If the latter is the case we need to keep the Types as part of the OpenMPIRBuilder object which is "local" to one lvm::Context. @sstefan1 is going to look into the race tomorrow. I think it might be the best thing to move the types into the OpenMPIRBuilder. That will require some rewrite but make it race free and working with multiple contexts in parallel. jdoerfert: > Even if they are lock protected, the module local context passed to them can still be used to… | |||||
hoyFBUnsubmitted Not Done ReplyInline ActionsIf I understand correctly, functions of one module are always compiled sequentially, even in thinLTO mode. Different modules can be compiled in parallel. A module should have a unique context. Constants and types created during the compilation of one module are appended to the module's context in sequence. With the global Int32 type owning a context of an arbitrary module, creating multiple constants in that context from different threads concurrently result in a race. I think moving the types into OpenMPIRBuilder objects should fix the issue. If you look at this code: https://github.com/llvm/llvm-project/blob/15440191b57237eafb18600ac653b9a0023db391/llvm/lib/IR/Constants.cpp#L776 , the constants are added to a DenseMap object with no lock protection. As long as they are added sequentially, it should be fine. The problem should be thinLTO-specific. In full LTO mode, a dummy module is created to cover all functions which are then compiled sequentially. In thinLTO mode, an input file is one module owning its own context, and they are compiled in parallel. hoyFB: If I understand correctly, functions of one module are always compiled sequentially, even in… | |||||
break; \ | |||||
case FALSE: \ | |||||
ICV.InitValue = ConstantInt::getFalse(Int1->getContext()); \ | |||||
break; \ | |||||
case LAST: \ | |||||
break; \ | |||||
} \ | |||||
} | |||||
#include "llvm/Frontend/OpenMP/OMPKinds.def" | |||||
} | |||||
/// Returns true if the function declaration \p F matches the runtime | /// Returns true if the function declaration \p F matches the runtime | ||||
/// function types, that is, return type \p RTFRetType, and argument types | /// function types, that is, return type \p RTFRetType, and argument types | ||||
/// \p RTFArgTypes. | /// \p RTFArgTypes. | ||||
static bool declMatchesRTFTypes(Function *F, Type *RTFRetType, | static bool declMatchesRTFTypes(Function *F, Type *RTFRetType, | ||||
SmallVector<Type *, 8> &RTFArgTypes) { | SmallVector<Type *, 8> &RTFArgTypes) { | ||||
// TODO: We should output information to the user (under debug output | // TODO: We should output information to the user (under debug output | ||||
// and via remarks). | // and via remarks). | ||||
▲ Show 20 Lines • Show All 89 Lines • ▼ Show 20 Lines | struct OpenMPOpt { | ||||
/// Run all OpenMP optimizations on the underlying SCC/ModuleSlice. | /// Run all OpenMP optimizations on the underlying SCC/ModuleSlice. | ||||
bool run() { | bool run() { | ||||
bool Changed = false; | bool Changed = false; | ||||
LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size() | LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size() | ||||
<< " functions in a slice with " | << " functions in a slice with " | ||||
<< OMPInfoCache.ModuleSlice.size() << " functions\n"); | << OMPInfoCache.ModuleSlice.size() << " functions\n"); | ||||
/// Print initial ICV values for testing. | |||||
/// FIXME: This should be done from the Attributor once it is added. | |||||
if (PrintICVValues) { | |||||
InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel}; | |||||
for (Function *F : OMPInfoCache.ModuleSlice) { | |||||
for (auto ICV : ICVs) { | |||||
auto ICVInfo = OMPInfoCache.ICVs[ICV]; | |||||
auto Remark = [&](OptimizationRemark OR) { | |||||
return OR << "OpenMP ICV " << ore::NV("OpenMPICV", ICVInfo.Name) | |||||
<< " Value: " | |||||
<< (ICVInfo.InitValue | |||||
? ICVInfo.InitValue->getValue().toString(10, true) | |||||
: "IMPLEMENTATION_DEFINED"); | |||||
}; | |||||
emitRemarkOnFunction(F, "OpenMPICVTracker", Remark); | |||||
} | |||||
} | |||||
} | |||||
Changed |= deduplicateRuntimeCalls(); | Changed |= deduplicateRuntimeCalls(); | ||||
Changed |= deleteParallelRegions(); | Changed |= deleteParallelRegions(); | ||||
return Changed; | return Changed; | ||||
} | } | ||||
/// Return the call if \p U is a callee use in a regular call. If \p RFI is | /// Return the call if \p U is a callee use in a regular call. If \p RFI is | ||||
/// given it has to be the callee or a nullptr is returned. | /// given it has to be the callee or a nullptr is returned. | ||||
▲ Show 20 Lines • Show All 318 Lines • ▼ Show 20 Lines | void emitRemark(Instruction *Inst, StringRef RemarkName, | ||||
RemarkCallBack &&RemarkCB) { | RemarkCallBack &&RemarkCB) { | ||||
Function *F = Inst->getParent()->getParent(); | Function *F = Inst->getParent()->getParent(); | ||||
auto &ORE = OREGetter(F); | auto &ORE = OREGetter(F); | ||||
ORE.emit( | ORE.emit( | ||||
[&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, Inst)); }); | [&]() { return RemarkCB(RemarkKind(DEBUG_TYPE, RemarkName, Inst)); }); | ||||
} | } | ||||
/// Emit a remark on a function. Since only OptimizationRemark is supporting | |||||
/// this, it can't be made generic. | |||||
void emitRemarkOnFunction( | |||||
Function *F, StringRef RemarkName, | |||||
function_ref<OptimizationRemark(OptimizationRemark &&)> &&RemarkCB) { | |||||
auto &ORE = OREGetter(F); | |||||
ORE.emit([&]() { | |||||
return RemarkCB(OptimizationRemark(DEBUG_TYPE, RemarkName, F)); | |||||
}); | |||||
} | |||||
/// The underyling module. | /// The underyling module. | ||||
Module &M; | Module &M; | ||||
/// The SCC we are operating on. | /// The SCC we are operating on. | ||||
SmallVectorImpl<Function *> &SCC; | SmallVectorImpl<Function *> &SCC; | ||||
/// Callback to update the call graph, the first argument is a removed call, | /// Callback to update the call graph, the first argument is a removed call, | ||||
/// the second an optional replacement call. | /// the second an optional replacement call. | ||||
▲ Show 20 Lines • Show All 141 Lines • Show Last 20 Lines |
Hello,
Looks like the Int32 type with the full name llvm::omp::types::Int32 is a global variable. Creating a value using it may cause a race condition in thinLTO mode. Although Int32 is initialized with a module specific context, but it may or never get re-created per module. Please correct me if I misunderstand anything. Thanks.
Below is the my failing call stack in thinLTO mode:
PLEASE submit a bug report to https://bugs.llvm.org/ and include the crash backtrace.
Stack dump:
0. Running pass 'CallGraph Pass Manager' on module ...'.
#0 0x00000000014a3a8f llvm::sys::PrintStackTrace(llvm::raw_ostream&) /home/hoy/local/server-llvm/llvm-project/llvm/lib/Support/Unix/Signals.inc:564:13
#1 0x00000000014a1c30 llvm::sys::RunSignalHandlers() /home/hoy/local/server-llvm/llvm-project/llvm/lib/Support/Signals.cpp:69:18
#2 0x00000000014a402c SignalHandler(int) /home/hoy/local/server-llvm/llvm-project/llvm/lib/Support/Unix/Signals.inc:396:3
#3 0x00007f00baf0b630 __restore_rt (/lib64/libpthread.so.0+0xf630)
#4 0x00000000030afd08 llvm::ConstantInt::get(llvm::LLVMContext&, llvm::APInt const&) /home/hoy/local/server-llvm/llvm-project/llvm/lib/IR/Constants.cpp:776:40
#5 0x00000000030b2a9b llvm::ConstantInt::get(llvm::IntegerType*, unsigned long, bool) /home/hoy/local/server-llvm/llvm-project/llvm/lib/IR/Constants.cpp:797:10
#6 0x00000000024ccc56 (anonymous namespace)::OMPInformationCache::initializeInternalControlVars() /home/hoy/local/server-llvm/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def:326:1
#7 0x00000000024ccc56 (anonymous namespace)::OMPInformationCache::OMPInformationCache(llvm::Module&, llvm::AnalysisGetter&, llvm::BumpPtrAllocatorImpl<llvm::MallocAllocator, 4096ul, 4096ul, 128ul>&, llvm::SetVector<llvm::Function*, std::vector<llvm::Function*, std::allocator<llvm::Function*> >, llvm::DenseSet<llvm::Function*, llvm::DenseMapInfo<llvm::Function*> > >*, llvm::SmallPtrSetImpl<llvm::Function*>&) /home/hoy/local/server-llvm/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp:69:0
#8 0x00000000024f10ff llvm::SmallVectorTemplateCommon<llvm::Function*, void>::begin() /home/hoy/local/server-llvm/llvm-project/llvm/include/llvm/ADT/SmallVector.h:152:45
#9 0x00000000024f10ff (anonymous namespace)::OpenMPOpt::OpenMPOpt(llvm::SmallVectorImpl<llvm::Function*>&, llvm::CallGraphUpdater&, llvm::function_ref<llvm::OptimizationRemarkEmitter& (llvm::Function*)>, (anonymous namespace)::OMPInformationCache&) /home/hoy/local/server-llvm/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp:336:0
#10 0x00000000024f10ff (anonymous namespace)::OpenMPOptLegacyPass::runOnSCC(llvm::CallGraphSCC&) /home/hoy/local/server-llvm/llvm-project/llvm/lib/Transforms/IPO/OpenMPOpt.cpp:834:0