Changeset View
Standalone View
llvm/lib/Transforms/IPO/OpenMPOpt.cpp
Show First 20 Lines • Show All 1,757 Lines • ▼ Show 20 Lines | bool runAttributor(bool IsModulePass) { | |||||||||
ChangeStatus Changed = A.run(); | ChangeStatus Changed = A.run(); | |||||||||
LLVM_DEBUG(dbgs() << "[Attributor] Done with " << SCC.size() | LLVM_DEBUG(dbgs() << "[Attributor] Done with " << SCC.size() | |||||||||
<< " functions, result: " << Changed << ".\n"); | << " functions, result: " << Changed << ".\n"); | |||||||||
return Changed == ChangeStatus::CHANGED; | return Changed == ChangeStatus::CHANGED; | |||||||||
} | } | |||||||||
/// Populate the Attributor with abstract attribute opportunities in the | /// Populate the Attributor with abstract attribute opportunities in the | |||||||||
/// function. | /// function. | |||||||||
jdoerfert: copy and paste. | ||||||||||
void registerFoldRuntimeCall(RuntimeFunction RF); | ||||||||||
/// Populate the Attributor with abstract attribute opportunities in the | ||||||||||
/// function. | ||||||||||
void registerAAs(bool IsModulePass); | void registerAAs(bool IsModulePass); | |||||||||
}; | }; | |||||||||
Kernel OpenMPOpt::getUniqueKernelFor(Function &F) { | Kernel OpenMPOpt::getUniqueKernelFor(Function &F) { | |||||||||
if (!OMPInfoCache.ModuleSlice.count(&F)) | if (!OMPInfoCache.ModuleSlice.count(&F)) | |||||||||
return nullptr; | return nullptr; | |||||||||
// Use a scope to keep the lifetime of the CachedKernel short. | // Use a scope to keep the lifetime of the CachedKernel short. | |||||||||
▲ Show 20 Lines • Show All 1,577 Lines • ▼ Show 20 Lines | void initialize(Attributor &A) override { | |||||||||
const unsigned int WrapperFunctionArgNo = 6; | const unsigned int WrapperFunctionArgNo = 6; | |||||||||
RuntimeFunction RF = It->getSecond(); | RuntimeFunction RF = It->getSecond(); | |||||||||
switch (RF) { | switch (RF) { | |||||||||
// All the functions we know are compatible with SPMD mode. | // All the functions we know are compatible with SPMD mode. | |||||||||
case OMPRTL___kmpc_is_spmd_exec_mode: | case OMPRTL___kmpc_is_spmd_exec_mode: | |||||||||
case OMPRTL___kmpc_for_static_fini: | case OMPRTL___kmpc_for_static_fini: | |||||||||
case OMPRTL___kmpc_global_thread_num: | case OMPRTL___kmpc_global_thread_num: | |||||||||
case OMPRTL___kmpc_get_hardware_num_threads_in_block: | ||||||||||
case OMPRTL___kmpc_get_hardware_num_blocks: | ||||||||||
case OMPRTL___kmpc_single: | case OMPRTL___kmpc_single: | |||||||||
case OMPRTL___kmpc_end_single: | case OMPRTL___kmpc_end_single: | |||||||||
case OMPRTL___kmpc_master: | case OMPRTL___kmpc_master: | |||||||||
case OMPRTL___kmpc_end_master: | case OMPRTL___kmpc_end_master: | |||||||||
case OMPRTL___kmpc_barrier: | case OMPRTL___kmpc_barrier: | |||||||||
break; | break; | |||||||||
case OMPRTL___kmpc_for_static_init_4: | case OMPRTL___kmpc_for_static_init_4: | |||||||||
case OMPRTL___kmpc_for_static_init_4u: | case OMPRTL___kmpc_for_static_init_4u: | |||||||||
▲ Show 20 Lines • Show All 137 Lines • ▼ Show 20 Lines | A.registerSimplificationCallback( | |||||||||
A.recordDependence(*this, *AA, DepClassTy::OPTIONAL); | A.recordDependence(*this, *AA, DepClassTy::OPTIONAL); | |||||||||
} | } | |||||||||
return SimplifiedValue; | return SimplifiedValue; | |||||||||
}); | }); | |||||||||
} | } | |||||||||
ChangeStatus updateImpl(Attributor &A) override { | ChangeStatus updateImpl(Attributor &A) override { | |||||||||
ChangeStatus Changed = ChangeStatus::UNCHANGED; | ChangeStatus Changed = ChangeStatus::UNCHANGED; | |||||||||
switch (RFKind) { | switch (RFKind) { | |||||||||
case OMPRTL___kmpc_is_spmd_exec_mode: | case OMPRTL___kmpc_is_spmd_exec_mode: | |||||||||
Changed = Changed | foldIsSPMDExecMode(A); | Changed = Changed | foldIsSPMDExecMode(A); | |||||||||
break; | break; | |||||||||
case OMPRTL___kmpc_get_hardware_num_threads_in_block: | ||||||||||
Changed = Changed | foldHardwareNumThreads(A); | ||||||||||
break; | ||||||||||
case OMPRTL___kmpc_get_hardware_num_blocks: | ||||||||||
Changed = Changed | foldHardwareNumTeams(A); | ||||||||||
break; | ||||||||||
default: | default: | |||||||||
llvm_unreachable("Unhandled OpenMP runtime function!"); | llvm_unreachable("Unhandled OpenMP runtime function!"); | |||||||||
} | } | |||||||||
return Changed; | return Changed; | |||||||||
} | } | |||||||||
ChangeStatus manifest(Attributor &A) override { | ChangeStatus manifest(Attributor &A) override { | |||||||||
Show All 14 Lines | private: | |||||||||
ChangeStatus foldIsSPMDExecMode(Attributor &A) { | ChangeStatus foldIsSPMDExecMode(Attributor &A) { | |||||||||
BooleanState StateBefore = getState(); | BooleanState StateBefore = getState(); | |||||||||
unsigned AssumedSPMDCount = 0, KnownSPMDCount = 0; | unsigned AssumedSPMDCount = 0, KnownSPMDCount = 0; | |||||||||
unsigned AssumedNonSPMDCount = 0, KnownNonSPMDCount = 0; | unsigned AssumedNonSPMDCount = 0, KnownNonSPMDCount = 0; | |||||||||
auto &CallerKernelInfoAA = A.getAAFor<AAKernelInfo>( | auto &CallerKernelInfoAA = A.getAAFor<AAKernelInfo>( | |||||||||
*this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); | *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); | |||||||||
// What actual kernels reach this function | ||||||||||
for (Kernel K : CallerKernelInfoAA.ReachingKernelEntries) { | for (Kernel K : CallerKernelInfoAA.ReachingKernelEntries) { | |||||||||
auto &AA = A.getAAFor<AAKernelInfo>(*this, IRPosition::function(*K), | auto &AA = A.getAAFor<AAKernelInfo>(*this, IRPosition::function(*K), | |||||||||
DepClassTy::REQUIRED); | DepClassTy::REQUIRED); | |||||||||
if (!AA.isValidState()) { | if (!AA.isValidState()) { | |||||||||
SimplifiedValue = nullptr; | SimplifiedValue = nullptr; | |||||||||
return indicatePessimisticFixpoint(); | return indicatePessimisticFixpoint(); | |||||||||
} | } | |||||||||
Show All 37 Lines | if (KnownSPMDCount || AssumedSPMDCount) { | |||||||||
// calls to __kmpc_is_spmd_exec_mode to 0. | // calls to __kmpc_is_spmd_exec_mode to 0. | |||||||||
SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), false); | SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), false); | |||||||||
} | } | |||||||||
return getState() == StateBefore ? ChangeStatus::UNCHANGED | return getState() == StateBefore ? ChangeStatus::UNCHANGED | |||||||||
: ChangeStatus::CHANGED; | : ChangeStatus::CHANGED; | |||||||||
} | } | |||||||||
/// Fold __kmpc_get_hardware_num_blocks into a constant if possible. | ||||||||||
JonChesterfieldUnsubmitted I haven't read through this part, but if we can only fold it to a constant sometimes, we shouldn't mark the calls that survive noinline, as that'll be expensive for the cases that this pass misses. JonChesterfield: I haven't read through this part, but if we can only fold it to a constant sometimes, we… | ||||||||||
jdoerfertUnsubmitted We are about to remove noinline from known runtime functions such that we can keep them around until we get to OpenMP-Opt as calls. This will have the effect we want without any drawbacks. Thus, adding noinline in the runitme will be totally fine. jdoerfert: We are about to remove noinline from known runtime functions such that we can keep them around… | ||||||||||
/// The value is an attribute in the kernel | ||||||||||
jdoerfert: | ||||||||||
ChangeStatus foldHardwareNumTeams(Attributor &A) { | ||||||||||
// Specialize only if all the calls agree with the number of teams | ||||||||||
int32_t CurrentNumTeams = -1; | ||||||||||
BooleanState StateBefore = getState(); | ||||||||||
auto &CallerKernelInfoAA = A.getAAFor<AAKernelInfo>( | ||||||||||
*this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); | ||||||||||
if (!CallerKernelInfoAA.ReachingKernelEntries.isValid()) return indicatePessimisticFixpoint(); also below jdoerfert: ```
if (!CallerKernelInfoAA.ReachingKernelEntries.isValid())
return… | ||||||||||
// What actual kernels reach this function | ||||||||||
for (Kernel K : CallerKernelInfoAA.ReachingKernelEntries) { | ||||||||||
if (K->hasFnAttribute("NumTeams")) { | ||||||||||
int32_t NumT = | ||||||||||
Lint: Pre-merge checks clang-format: please reformat the code - int32_t NumT = + int32_t NumT = Lint: Pre-merge checks: clang-format: please reformat the code
```
- int32_t NumT =
+ int32_t NumT =
``` | ||||||||||
std::stoi(K->getFnAttribute("NumTeams").getValueAsString().str()); | ||||||||||
if (CurrentNumTeams != -1 && CurrentNumTeams != NumT) { | ||||||||||
SimplifiedValue = nullptr; | ||||||||||
return indicatePessimisticFixpoint(); | ||||||||||
} else { | ||||||||||
Lint: Pre-merge checks clang-tidy: warning: do not use 'else' after 'return' [llvm-else-after-return] Lint: Pre-merge checks: clang-tidy: warning: do not use 'else' after 'return' [llvm-else-after-return]
[[https://github. | ||||||||||
tianshilei1992Unsubmitted No need to have else here because the code above already returns. tianshilei1992: No need to have `else` here because the code above already returns. | ||||||||||
CurrentNumTeams = NumT; | ||||||||||
} | ||||||||||
use early exit instead, return ... jdoerfert: use early exit instead,
if (!...)
return ...
| ||||||||||
} else { | ||||||||||
tianshilei1992Unsubmitted no need for else as well tianshilei1992: no need for `else` as well | ||||||||||
// TODO: No attribute, then default? | ||||||||||
tianshilei1992Unsubmitted Directly indicate pessimistic state because we don't know clearly what the number is. tianshilei1992: Directly indicate pessimistic state because we don't know clearly what the number is. | ||||||||||
} | ||||||||||
You should set CurrentAttrValue = NextAttrValue at the end of the loop. jdoerfert: You should set CurrentAttrValue = NextAttrValue at the end of the loop. | ||||||||||
} | ||||||||||
if (CurrentNumTeams != -1) { | ||||||||||
auto &Ctx = getAnchorValue().getContext(); | ||||||||||
SimplifiedValue = | ||||||||||
Lint: Pre-merge checks clang-format: please reformat the code - SimplifiedValue = - ConstantInt::get(Type::getInt8Ty(Ctx), CurrentNumTeams); + SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), CurrentNumTeams); Lint: Pre-merge checks: clang-format: please reformat the code
```
- SimplifiedValue =
- ConstantInt… | ||||||||||
jdoerfert: | ||||||||||
ConstantInt::get(Type::getInt8Ty(Ctx), CurrentNumTeams); | ||||||||||
no reaching kernels is fine, keep it at none (which is the default) and just return UNCHANGED. jdoerfert: no reaching kernels is fine, keep it at none (which is the default) and just return UNCHANGED. | ||||||||||
} | ||||||||||
make this function take the string attribute such that we can have a single one and not two. jdoerfert: make this function take the string attribute such that we can have a single one and not two. | ||||||||||
return getState() == StateBefore ? ChangeStatus::UNCHANGED | ||||||||||
tianshilei1992Unsubmitted This has to be updated as SimplifiedValue is not part of BooleanState. Refer to foldIsSPMDExecMode. tianshilei1992: This has to be updated as `SimplifiedValue` is not part of `BooleanState`. Refer to… | ||||||||||
: ChangeStatus::CHANGED; | ||||||||||
} | ||||||||||
/// Fold __kmpc_get_hardware_num_threads_in_block into a constant if possible. | ||||||||||
/// The value is an attribute in the kernel | ||||||||||
ChangeStatus foldHardwareNumThreads(Attributor &A) { | ||||||||||
tianshilei1992Unsubmitted Update accordingly tianshilei1992: Update accordingly | ||||||||||
// Specialize only if all the calls agree with the number of threads | ||||||||||
int32_t CurrentNumThreads = -1; | ||||||||||
BooleanState StateBefore = getState(); | ||||||||||
auto &CallerKernelInfoAA = A.getAAFor<AAKernelInfo>( | ||||||||||
*this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); | ||||||||||
// What actual kernels reach this function | ||||||||||
for (Kernel K : CallerKernelInfoAA.ReachingKernelEntries) { | ||||||||||
if (K->hasFnAttribute("NumThreads")) { | ||||||||||
int32_t NumT = | ||||||||||
Lint: Pre-merge checks clang-format: please reformat the code - int32_t NumT = + int32_t NumT = Lint: Pre-merge checks: clang-format: please reformat the code
```
- int32_t NumT =
+ int32_t NumT =
``` | ||||||||||
std::stoi(K->getFnAttribute("NumThreads").getValueAsString().str()); | ||||||||||
if (CurrentNumThreads != -1 && CurrentNumThreads != NumT) { | ||||||||||
SimplifiedValue = nullptr; | ||||||||||
return indicatePessimisticFixpoint(); | ||||||||||
} else { | ||||||||||
Lint: Pre-merge checks clang-tidy: warning: do not use 'else' after 'return' [llvm-else-after-return] Lint: Pre-merge checks: clang-tidy: warning: do not use 'else' after 'return' [llvm-else-after-return]
[[https://github. | ||||||||||
CurrentNumThreads = NumT; | ||||||||||
} | ||||||||||
} else { | ||||||||||
// TODO: No attribute, then default? | ||||||||||
} | ||||||||||
} | ||||||||||
if (CurrentNumThreads != -1) { | ||||||||||
auto &Ctx = getAnchorValue().getContext(); | ||||||||||
SimplifiedValue = | ||||||||||
Lint: Pre-merge checks clang-format: please reformat the code - SimplifiedValue = + SimplifiedValue = Lint: Pre-merge checks: clang-format: please reformat the code
```
- SimplifiedValue =
+ SimplifiedValue =… | ||||||||||
ConstantInt::get(Type::getInt8Ty(Ctx), CurrentNumThreads); | ||||||||||
} | ||||||||||
Lint: Pre-merge checks clang-format: please reformat the code - + Lint: Pre-merge checks: clang-format: please reformat the code
```
-
+
``` | ||||||||||
return getState() == StateBefore ? ChangeStatus::UNCHANGED | ||||||||||
: ChangeStatus::CHANGED; | ||||||||||
} | ||||||||||
/// An optional value the associated value is assumed to fold to. That is, we | /// An optional value the associated value is assumed to fold to. That is, we | |||||||||
/// assume the associated value (which is a call) can be replaced by this | /// assume the associated value (which is a call) can be replaced by this | |||||||||
/// simplified value. | /// simplified value. | |||||||||
Optional<Value *> SimplifiedValue; | Optional<Value *> SimplifiedValue; | |||||||||
/// The runtime function kind of the callee of the associated call site. | /// The runtime function kind of the callee of the associated call site. | |||||||||
RuntimeFunction RFKind; | RuntimeFunction RFKind; | |||||||||
}; | }; | |||||||||
} // namespace | } // namespace | |||||||||
/// Register folding callsite | ||||||||||
void OpenMPOpt::registerFoldRuntimeCall(RuntimeFunction RF) { | ||||||||||
auto &RFI = OMPInfoCache.RFIs[RF]; | ||||||||||
RFI.foreachUse(SCC, [&](Use &U, Function &) { | ||||||||||
CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &RFI); | ||||||||||
if (!CI) | ||||||||||
leftover. (and you can also just pipe an IRPosition into errs()) jdoerfert: leftover. (and you can also just pipe an IRPosition into errs()) | ||||||||||
return false; | ||||||||||
A.getOrCreateAAFor<AAFoldRuntimeCall>( | ||||||||||
IRPosition::callsite_function(*CI), /* QueryingAA */ nullptr, | ||||||||||
tianshilei1992Unsubmitted This part needs to be changed. Refer to the trunk for more details. Basically it should be IRPosition::callsite_returned(*CI). tianshilei1992: This part needs to be changed. Refer to the trunk for more details. Basically it should be… | ||||||||||
DepClassTy::NONE, /* ForceUpdate */ false, | ||||||||||
/* UpdateAfterInit */ false); | ||||||||||
return false; | ||||||||||
}); | ||||||||||
} | ||||||||||
void OpenMPOpt::registerAAs(bool IsModulePass) { | void OpenMPOpt::registerAAs(bool IsModulePass) { | |||||||||
if (SCC.empty()) | if (SCC.empty()) | |||||||||
return; | return; | |||||||||
if (IsModulePass) { | if (IsModulePass) { | |||||||||
// Ensure we create the AAKernelInfo AAs first and without triggering an | // Ensure we create the AAKernelInfo AAs first and without triggering an | |||||||||
// update. This will make sure we register all value simplification | // update. This will make sure we register all value simplification | |||||||||
// callbacks before any other AA has the chance to create an AAValueSimplify | // callbacks before any other AA has the chance to create an AAValueSimplify | |||||||||
// or similar. | // or similar. | |||||||||
for (Function *Kernel : OMPInfoCache.Kernels) | for (Function *Kernel : OMPInfoCache.Kernels) { | |||||||||
A.getOrCreateAAFor<AAKernelInfo>( | A.getOrCreateAAFor<AAKernelInfo>( | |||||||||
IRPosition::function(*Kernel), /* QueryingAA */ nullptr, | IRPosition::function(*Kernel), /* QueryingAA */ nullptr, | |||||||||
DepClassTy::NONE, /* ForceUpdate */ false, | DepClassTy::NONE, /* ForceUpdate */ false, | |||||||||
/* UpdateAfterInit */ false); | /* UpdateAfterInit */ false); | |||||||||
} | ||||||||||
auto &IsSPMDRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_is_spmd_exec_mode]; | registerFoldRuntimeCall(OMPRTL___kmpc_is_spmd_exec_mode); | |||||||||
IsSPMDRFI.foreachUse(SCC, [&](Use &U, Function &) { | registerFoldRuntimeCall(OMPRTL___kmpc_get_hardware_num_threads_in_block); | |||||||||
CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &IsSPMDRFI); | registerFoldRuntimeCall(OMPRTL___kmpc_get_hardware_num_blocks); | |||||||||
if (!CI) | ||||||||||
return false; | ||||||||||
A.getOrCreateAAFor<AAFoldRuntimeCall>( | ||||||||||
IRPosition::callsite_function(*CI), /* QueryingAA */ nullptr, | ||||||||||
DepClassTy::NONE, /* ForceUpdate */ false, | ||||||||||
/* UpdateAfterInit */ false); | ||||||||||
return false; | ||||||||||
}); | ||||||||||
} | } | |||||||||
// Create CallSite AA for all Getters. | // Create CallSite AA for all Getters. | |||||||||
for (int Idx = 0; Idx < OMPInfoCache.ICVs.size() - 1; ++Idx) { | for (int Idx = 0; Idx < OMPInfoCache.ICVs.size() - 1; ++Idx) { | |||||||||
auto ICVInfo = OMPInfoCache.ICVs[static_cast<InternalControlVar>(Idx)]; | auto ICVInfo = OMPInfoCache.ICVs[static_cast<InternalControlVar>(Idx)]; | |||||||||
auto &GetterRFI = OMPInfoCache.RFIs[ICVInfo.Getter]; | auto &GetterRFI = OMPInfoCache.RFIs[ICVInfo.Getter]; | |||||||||
▲ Show 20 Lines • Show All 395 Lines • Show Last 20 Lines |
copy and paste.