Changeset View
Standalone View
llvm/lib/Analysis/LoopAccessAnalysis.cpp
Show All 22 Lines | |||||
#include "llvm/ADT/SmallSet.h" | #include "llvm/ADT/SmallSet.h" | ||||
#include "llvm/ADT/SmallVector.h" | #include "llvm/ADT/SmallVector.h" | ||||
#include "llvm/ADT/iterator_range.h" | #include "llvm/ADT/iterator_range.h" | ||||
#include "llvm/Analysis/AliasAnalysis.h" | #include "llvm/Analysis/AliasAnalysis.h" | ||||
#include "llvm/Analysis/AliasSetTracker.h" | #include "llvm/Analysis/AliasSetTracker.h" | ||||
#include "llvm/Analysis/LoopAnalysisManager.h" | #include "llvm/Analysis/LoopAnalysisManager.h" | ||||
#include "llvm/Analysis/LoopInfo.h" | #include "llvm/Analysis/LoopInfo.h" | ||||
#include "llvm/Analysis/MemoryLocation.h" | #include "llvm/Analysis/MemoryLocation.h" | ||||
#include "llvm/Analysis/OptimizationRemarkEmitter.h" | |||||
#include "llvm/Analysis/ScalarEvolution.h" | #include "llvm/Analysis/ScalarEvolution.h" | ||||
#include "llvm/Analysis/ScalarEvolutionExpressions.h" | #include "llvm/Analysis/ScalarEvolutionExpressions.h" | ||||
#include "llvm/Analysis/TargetLibraryInfo.h" | #include "llvm/Analysis/TargetLibraryInfo.h" | ||||
#include "llvm/Analysis/ValueTracking.h" | #include "llvm/Analysis/ValueTracking.h" | ||||
#include "llvm/Analysis/VectorUtils.h" | #include "llvm/Analysis/VectorUtils.h" | ||||
#include "llvm/IR/BasicBlock.h" | #include "llvm/IR/BasicBlock.h" | ||||
#include "llvm/IR/Constants.h" | #include "llvm/IR/Constants.h" | ||||
#include "llvm/IR/DataLayout.h" | #include "llvm/IR/DataLayout.h" | ||||
#include "llvm/IR/DebugLoc.h" | #include "llvm/IR/DebugLoc.h" | ||||
#include "llvm/IR/DerivedTypes.h" | #include "llvm/IR/DerivedTypes.h" | ||||
#include "llvm/IR/DiagnosticInfo.h" | #include "llvm/IR/DiagnosticInfo.h" | ||||
#include "llvm/IR/Dominators.h" | #include "llvm/IR/Dominators.h" | ||||
#include "llvm/IR/Function.h" | #include "llvm/IR/Function.h" | ||||
#include "llvm/IR/InstrTypes.h" | #include "llvm/IR/InstrTypes.h" | ||||
#include "llvm/IR/Instruction.h" | #include "llvm/IR/Instruction.h" | ||||
#include "llvm/IR/Instructions.h" | #include "llvm/IR/Instructions.h" | ||||
#include "llvm/IR/IntrinsicInst.h" | |||||
#include "llvm/IR/Operator.h" | #include "llvm/IR/Operator.h" | ||||
#include "llvm/IR/PassManager.h" | #include "llvm/IR/PassManager.h" | ||||
#include "llvm/IR/Type.h" | #include "llvm/IR/Type.h" | ||||
#include "llvm/IR/Value.h" | #include "llvm/IR/Value.h" | ||||
#include "llvm/IR/ValueHandle.h" | #include "llvm/IR/ValueHandle.h" | ||||
#include "llvm/InitializePasses.h" | #include "llvm/InitializePasses.h" | ||||
#include "llvm/Pass.h" | #include "llvm/Pass.h" | ||||
#include "llvm/Support/Casting.h" | #include "llvm/Support/Casting.h" | ||||
▲ Show 20 Lines • Show All 522 Lines • ▼ Show 20 Lines | public: | ||||
/// We decided that no dependence analysis would be used. Reset the state. | /// We decided that no dependence analysis would be used. Reset the state. | ||||
void resetDepChecks(MemoryDepChecker &DepChecker) { | void resetDepChecks(MemoryDepChecker &DepChecker) { | ||||
CheckDeps.clear(); | CheckDeps.clear(); | ||||
DepChecker.clearDependences(); | DepChecker.clearDependences(); | ||||
} | } | ||||
MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; } | MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; } | ||||
private: | private: | ||||
sdesmalen: UncomputablePtr doesn't really need a separate variable in AccessAnalysis, since it's only set… | |||||
Done, new patch: https://reviews.llvm.org/D115873 malharJ: Done, new patch: https://reviews.llvm.org/D115873 | |||||
typedef SetVector<MemAccessInfo> PtrAccessSet; | typedef SetVector<MemAccessInfo> PtrAccessSet; | ||||
Remark instead of 'insight'? david-arm: Remark instead of 'insight'? | |||||
Use /// for all comments. Also, does this need to be public after the recent changes? fhahn: Use `///` for all comments. Also, does this need to be public after the recent changes? | |||||
It was being accessed from outside the class on line 2045. I've moved it to private now and added a public getter instead. malharJ: It was being accessed from outside the class on line 2045.
I've moved it to private now and… | |||||
/// Go over all memory access and check whether runtime pointer checks | /// Go over all memory access and check whether runtime pointer checks | ||||
/// are needed and build sets of dependency check candidates. | /// are needed and build sets of dependency check candidates. | ||||
void processMemAccesses(); | void processMemAccesses(); | ||||
/// Set of all accesses. | /// Set of all accesses. | ||||
PtrAccessSet Accesses; | PtrAccessSet Accesses; | ||||
/// The loop being checked. | /// The loop being checked. | ||||
▲ Show 20 Lines • Show All 166 Lines • ▼ Show 20 Lines | if (NumWritePtrChecks == 0 || | ||||
"Can only skip updating CanDoRT below, if all entries in AS " | "Can only skip updating CanDoRT below, if all entries in AS " | ||||
"are reads or there is at most 1 entry"); | "are reads or there is at most 1 entry"); | ||||
continue; | continue; | ||||
} | } | ||||
for (auto &Access : AccessInfos) { | for (auto &Access : AccessInfos) { | ||||
if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, TheLoop, | if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, TheLoop, | ||||
RunningDepId, ASId, ShouldCheckWrap, false)) { | RunningDepId, ASId, ShouldCheckWrap, false)) { | ||||
LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" | LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" | ||||
If computing the bounds fails here, we may retry creating checks by adding assumptions below (see line 806). I think it could happen that we have multiple uncomputable pointer bounds here, but for some of them we may be able to actually compute bounds below. Should we remove the ones we can compute bounds below from the set? fhahn: If computing the bounds fails here, we may retry creating checks by adding assumptions below… | |||||
Done. thanks for that suggestion. malharJ: Done. thanks for that suggestion. | |||||
<< *Access.getPointer() << '\n'); | << *Access.getPointer() << '\n'); | ||||
Retries.push_back(Access); | Retries.push_back(Access); | ||||
CanDoAliasSetRT = false; | CanDoAliasSetRT = false; | ||||
} | } | ||||
} | } | ||||
// Note that this function computes CanDoRT and MayNeedRTCheck | // Note that this function computes CanDoRT and MayNeedRTCheck | ||||
// independently. For example CanDoRT=false, MayNeedRTCheck=false means that | // independently. For example CanDoRT=false, MayNeedRTCheck=false means that | ||||
▲ Show 20 Lines • Show All 971 Lines • ▼ Show 20 Lines | while (AI != AE) { | ||||
assert(*I1 != *I2); | assert(*I1 != *I2); | ||||
if (*I1 > *I2) | if (*I1 > *I2) | ||||
std::swap(A, B); | std::swap(A, B); | ||||
Dependence::DepType Type = | Dependence::DepType Type = | ||||
isDependent(*A.first, A.second, *B.first, B.second, Strides); | isDependent(*A.first, A.second, *B.first, B.second, Strides); | ||||
mergeInStatus(Dependence::isSafeForVectorization(Type)); | mergeInStatus(Dependence::isSafeForVectorization(Type)); | ||||
// Gather dependences unless we accumulated MaxDependences | // Gather dependences unless we accumulated MaxDependences | ||||
Why call it UnsafeDependences if it is supposed to only contain unknown dependences? fhahn: Why call it `UnsafeDependences` if it is supposed to only contain unknown dependences? | |||||
I think UnsafeDependences contains both unknown and known unsafe dependences. malharJ: I think UnsafeDependences contains both unknown and known unsafe dependences.
I think the… | |||||
// dependences. In that case return as soon as we find the first | // dependences. In that case return as soon as we find the first | ||||
// unsafe dependence. This puts a limit on this quadratic | // unsafe dependence. This puts a limit on this quadratic | ||||
// algorithm. | // algorithm. | ||||
Not Done ReplyInline ActionsCan there only be a single unsafe/unknown dependence? Or can there be more? sdesmalen: Can there only be a single unsafe/unknown dependence? Or can there be more? | |||||
There can be more. But we are only emitting (as a remark) the first one found. malharJ: There can be more.
But we are only emitting (as a remark) the first one found. | |||||
if (RecordDependences) { | if (RecordDependences) { | ||||
if (Type != Dependence::NoDep) | if (Type != Dependence::NoDep) | ||||
Remark instead of 'insight'? david-arm: Remark instead of 'insight'? | |||||
Dependences.push_back(Dependence(A.second, B.second, Type)); | Dependences.push_back(Dependence(A.second, B.second, Type)); | ||||
It shows here that the dependences are already collected. You can iterate Dependences to find the dependence that isn't safe, so that you don't have to add UnsafeDependence and maintain that separately. sdesmalen: It shows here that the dependences are already collected. You can iterate `Dependences` to find… | |||||
Done. malharJ: Done.
That's a good point, thanks ! | |||||
if (Dependences.size() >= MaxDependences) { | if (Dependences.size() >= MaxDependences) { | ||||
RecordDependences = false; | RecordDependences = false; | ||||
Dependences.clear(); | Dependences.clear(); | ||||
LLVM_DEBUG(dbgs() | LLVM_DEBUG(dbgs() | ||||
<< "Too many dependences, stopped recording\n"); | << "Too many dependences, stopped recording\n"); | ||||
} | } | ||||
} | } | ||||
▲ Show 20 Lines • Show All 65 Lines • ▼ Show 20 Lines | if (isa<SCEVCouldNotCompute>(ExitCount)) { | ||||
return false; | return false; | ||||
} | } | ||||
return true; | return true; | ||||
} | } | ||||
void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, | void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, | ||||
const TargetLibraryInfo *TLI, | const TargetLibraryInfo *TLI, | ||||
DominatorTree *DT) { | DominatorTree *DT) { | ||||
Not Done ReplyInline ActionsI guess this can be more briefly written as: Loc = I->getDebugLoc(); if (auto *PtrI = dyn_cast_or_null<Instruction>(getPointerOperand(I))) Loc = PtrI->getDebugLoc(); (at which point it probably doesn't require a separate function anymore, especially since it has only one use). sdesmalen: I guess this can be more briefly written as:
Loc = I->getDebugLoc();
if (auto *PtrI =… | |||||
I've removed this function and inlined the code since there is only one usage. malharJ: I've removed this function and inlined the code since there is only one usage. | |||||
typedef SmallPtrSet<Value*, 16> ValueSet; | typedef SmallPtrSet<Value*, 16> ValueSet; | ||||
// Holds the Load and Store instructions. | // Holds the Load and Store instructions. | ||||
SmallVector<LoadInst *, 16> Loads; | SmallVector<LoadInst *, 16> Loads; | ||||
SmallVector<StoreInst *, 16> Stores; | SmallVector<StoreInst *, 16> Stores; | ||||
Not Done ReplyInline ActionsThis seems like a case that should be added to getPointerOperand ? sdesmalen: This seems like a case that should be added to `getPointerOperand` ? | |||||
// Holds all the different accesses in the loop. | // Holds all the different accesses in the loop. | ||||
unsigned NumReads = 0; | unsigned NumReads = 0; | ||||
unsigned NumReadWrites = 0; | unsigned NumReadWrites = 0; | ||||
bool HasComplexMemInst = false; | bool HasComplexMemInst = false; | ||||
// A runtime check is only legal to insert if there are no convergent calls. | // A runtime check is only legal to insert if there are no convergent calls. | ||||
▲ Show 20 Lines • Show All 214 Lines • ▼ Show 20 Lines | void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, | ||||
bool CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(), | bool CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(), | ||||
TheLoop, SymbolicStrides); | TheLoop, SymbolicStrides); | ||||
if (!CanDoRTIfNeeded) { | if (!CanDoRTIfNeeded) { | ||||
recordAnalysis("CantIdentifyArrayBounds") << "cannot identify array bounds"; | recordAnalysis("CantIdentifyArrayBounds") << "cannot identify array bounds"; | ||||
LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " | LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " | ||||
<< "the array bounds.\n"); | << "the array bounds.\n"); | ||||
CanVecMem = false; | CanVecMem = false; | ||||
return; | return; | ||||
} | } | ||||
Not Done ReplyInline ActionsUncomputablePtr and the mechanism around it seems entirely redundant, because it doesn't add any information that is used anywhere. The only reason that UncomputablePtr is collected is to avoid printing "Cannot identify array bounds" when it gets to the then-block of if (!CanDoRTIfNeeded) { ... }. The information is redundant, because when UncomputablePtr is set, then CanDoRT is false, and vice-versa. sdesmalen: `UncomputablePtr` and the mechanism around it seems entirely redundant, because it doesn't add… | |||||
Ok, I agree but there's an issue here .. LoopAccessInfo::recordAnalysis() cant be called from within the function ( AccessAnalysis::canCheckPtrAtRT() ). Perhaps one way to do it would be for AccessAnalysis::canCheckPtrAtRT() to accept a parameter malharJ: Ok, I agree but there's an issue here ..
LoopAccessInfo::recordAnalysis() cant be called from… | |||||
It now passes in I as the instruction, but the debug location of the remark seems unchanged in the test. What value is this adding? sdesmalen: It now passes in `I` as the instruction, but the debug location of the remark seems unchanged… | |||||
This was essentially an issue in the original test, I have corrected the issue in my new patch: malharJ: This was essentially an issue in the original test,
The debug info (!35) used by the loop was… | |||||
LLVM_DEBUG( | LLVM_DEBUG( | ||||
Just leave the name as CantIdentifyArrayBounds ? sdesmalen: Just leave the name as `CantIdentifyArrayBounds` ? | |||||
dbgs() << "LAA: May be able to perform a memory runtime check if needed.\n"); | dbgs() << "LAA: May be able to perform a memory runtime check if needed.\n"); | ||||
nit: The capitalisation of cannot -> Cannot seems unnecessary. sdesmalen: nit: The capitalisation of `cannot -> Cannot` seems unnecessary. | |||||
CanVecMem = true; | CanVecMem = true; | ||||
if (Accesses.isDependencyCheckNeeded()) { | if (Accesses.isDependencyCheckNeeded()) { | ||||
LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n"); | LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n"); | ||||
CanVecMem = DepChecker->areDepsSafe( | CanVecMem = DepChecker->areDepsSafe( | ||||
DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides); | DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides); | ||||
MaxSafeDepDistBytes = DepChecker->getMaxSafeDepDistBytes(); | MaxSafeDepDistBytes = DepChecker->getMaxSafeDepDistBytes(); | ||||
Show All 11 Lines | if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck()) { | ||||
SymbolicStrides, true); | SymbolicStrides, true); | ||||
// Check that we found the bounds for the pointer. | // Check that we found the bounds for the pointer. | ||||
if (!CanDoRTIfNeeded) { | if (!CanDoRTIfNeeded) { | ||||
recordAnalysis("CantCheckMemDepsAtRunTime") | recordAnalysis("CantCheckMemDepsAtRunTime") | ||||
<< "cannot check memory dependencies at runtime"; | << "cannot check memory dependencies at runtime"; | ||||
LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n"); | LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n"); | ||||
CanVecMem = false; | CanVecMem = false; | ||||
return; | return; | ||||
Not Done ReplyInline ActionsIs this case not already covered by the recordAnalysis call above? It seems like a mechanism for doing this already exists (recordAnalysis), it may be worth extending that to handle multiple reports (there is currently a limitation that it uses a single Report variable, but perhaps that could be made into a vector of reports which can be appended to). sdesmalen: Is this case not already covered by the `recordAnalysis` call above?
It seems like a mechanism… | |||||
I agree it has been covered by the call to recordAnalysis() at line 2070 ... Regarding the second issue, I'm not sure myself why recordAnalysis has Report as scalar and not a vector but that is not really a part of my change ... I just tried to re-use it. malharJ: I agree it has been covered by the call to `recordAnalysis()` at line 2070 ...
I've now moved… | |||||
unnecessary change. sdesmalen: unnecessary change. | |||||
} | } | ||||
CanVecMem = true; | CanVecMem = true; | ||||
} | } | ||||
} | } | ||||
if (HasConvergentOp) { | if (HasConvergentOp) { | ||||
recordAnalysis("CantInsertRuntimeCheckWithConvergent") | recordAnalysis("CantInsertRuntimeCheckWithConvergent") | ||||
<< "cannot add control dependency to convergent operation"; | << "cannot add control dependency to convergent operation"; | ||||
LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because a runtime check " | LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because a runtime check " | ||||
"would be needed with a convergent operation\n"); | "would be needed with a convergent operation\n"); | ||||
CanVecMem = false; | CanVecMem = false; | ||||
return; | return; | ||||
} | } | ||||
if (CanVecMem) | if (CanVecMem) | ||||
LLVM_DEBUG( | LLVM_DEBUG( | ||||
dbgs() << "LAA: No unsafe dependent memory operations in loop. We" | dbgs() << "LAA: No unsafe dependent memory operations in loop. We" | ||||
<< (PtrRtChecking->Need ? "" : " don't") | << (PtrRtChecking->Need ? "" : " don't") | ||||
<< " need runtime memory checks.\n"); | << " need runtime memory checks.\n"); | ||||
else { | else { | ||||
recordAnalysis("UnsafeMemDep") | auto Deps = getDepChecker().getDependences(); | ||||
why was this remark removed? As far as i can see, this message is not covered by the new memory report alban.bridonneau: why was this remark removed? As far as i can see, this message is not covered by the new memory… | |||||
if (!Deps) | |||||
It seems like it would make things easier to read if the logic would be in a separate function, with proper documentation what it is supposed to do? fhahn: It seems like it would make things easier to read if the logic would be in a separate function… | |||||
return; | |||||
auto found = std::find_if( | |||||
nit: variable names here start with upper cases fhahn: nit: variable names here start with upper cases | |||||
Deps->begin(), Deps->end(), [](const MemoryDepChecker::Dependence &D) { | |||||
return MemoryDepChecker::Dependence::isSafeForVectorization(D.Type) != | |||||
MemoryDepChecker::VectorizationSafetyStatus::Safe; | |||||
These changes here obfuscate the report that's generated by LAA when running loop(print-access-info). The information printed was: Loop access info in function 'store_with_pointer_phi_incoming_phi': loop.header: The compiler can't determine the cause of the issue. Dependences: Unknown: %v8 = load double, double* %arrayidx, align 8 -> store double %mul16, double* %ptr.2, align 8 The information that is added by the remark is: Loop access info in function 'store_with_pointer_phi_incoming_phi': loop.header: Report: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop Unknown data dependence. Memory location is the same as accessed at <UNKNOWN LOCATION>. The compiler can't determine the cause of the issue. Dependences: Unknown: %v8 = load double, double* %arrayidx, align 8 -> store double %mul16, double* %ptr.2, align 8 The extra information here isn't particularly useful, especially because it doesn't have any line information. Perhaps the code should disable the extra info if the location is unknown/unavailable. sdesmalen: These changes here obfuscate the report that's generated by LAA when running `loop(print-access… | |||||
Thanks for pointing this out. Done. malharJ: Thanks for pointing this out. Done. | |||||
}); | |||||
if (found == Deps->end()) | |||||
return; | |||||
MemoryDepChecker::Dependence Dep = *found; | |||||
LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n"); | |||||
// Emit detailed remarks for each unsafe dependence | |||||
Is this true? Unless I miss something, this emits a remark for the first unsafe dependence? fhahn: Is this true? Unless I miss something, this emits a remark for the first unsafe dependence? | |||||
you are correct, the comment is no longer correct. I've updated it now in the new function. malharJ: you are correct, the comment is no longer correct. I've updated it now in the new function. | |||||
DebugLoc SourceLoc; | |||||
if (Instruction *I = Dep.getSource(*this)) { | |||||
SourceLoc = I->getDebugLoc(); | |||||
if (auto *DD = dyn_cast<Instruction>( | |||||
isa<MemSetInst>(I) ? cast<MemSetInst>(I)->getRawDest() | |||||
: getPointerOperand(I))) | |||||
SourceLoc = DD->getDebugLoc(); | |||||
} | |||||
OptimizationRemarkAnalysis &R = | |||||
recordAnalysis("UnsafeDep", Dep.getDestination(*this)) | |||||
<< "unsafe dependent memory operations in loop. Use " | << "unsafe dependent memory operations in loop. Use " | ||||
"#pragma loop distribute(enable) to allow loop distribution " | "#pragma loop distribute(enable) to allow loop distribution " | ||||
"to attempt to isolate the offending operations into a separate " | "to attempt to isolate the offending operations into a separate " | ||||
"loop"; | "loop"; | ||||
LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n"); | |||||
switch (Dep.Type) { | |||||
case MemoryDepChecker::Dependence::NoDep: | |||||
case MemoryDepChecker::Dependence::Forward: | |||||
case MemoryDepChecker::Dependence::BackwardVectorizable: | |||||
llvm_unreachable("Unexpected dependency"); | |||||
nit: dependence instead of dependency, to be in line with the terminology used elsewhere in the file? fhahn: nit: dependence instead of dependency, to be in line with the terminology used elsewhere in the… | |||||
case MemoryDepChecker::Dependence::Backward: | |||||
Can this be an assert? sdesmalen: Can this be an assert? | |||||
I guess not ... Looking at the definition of MemoryDepChecker::getDependences(), malharJ: I guess not ...
Looking at the definition of `MemoryDepChecker::getDependences()`,
it can… | |||||
Not Done ReplyInline Actionsnit: can be removed if you address my comment below. sdesmalen: nit: can be removed if you address my comment below. | |||||
R << "\nBackward loop carried data dependence."; | |||||
Not Done ReplyInline Actionsnit: Please remove the newline. sdesmalen: nit: Please remove the newline. | |||||
can we keep this newline ? The current text (Note: it was not introduced by this patch) is too long already:
malharJ: can we keep this newline ?
The current text (Note: it was not introduced by this patch) is too… | |||||
Instead of using + to concatenate two strings, let this be handled by raw_ostream using << . sdesmalen: Instead of using `+` to concatenate two strings, let this be handled by raw_ostream using `<<` . | |||||
break; | |||||
case MemoryDepChecker::Dependence::ForwardButPreventsForwarding: | |||||
R << "\nForward loop carried data dependence that prevents " | |||||
"store-to-load forwarding."; | |||||
break; | |||||
case MemoryDepChecker::Dependence:: | |||||
Can this be an assert? sdesmalen: Can this be an assert? | |||||
I dont think so ... We will still need the check to see if we can find a dependency type that is unsafe for vectorization. Just the presence of Deps is not sufficient to say above will be satisfied, because it can contain malharJ: I dont think so ...
We will still need the check to see if we can find a dependency type that… | |||||
BackwardVectorizableButPreventsForwarding: | |||||
R << "\nBackward loop carried data dependence that prevents " | |||||
"store-to-load forwarding."; | |||||
Not Done ReplyInline Actionsnit: if (!Deps || llvm::find_if(Deps, [](const MemoryDepChecker::Dependence &D) { .. }) == Deps->end()) return; sdesmalen: nit:
if (!Deps ||
llvm::find_if(Deps, [](const MemoryDepChecker::Dependence &D) { .. | |||||
break; | |||||
case MemoryDepChecker::Dependence::Unknown: | |||||
R << "\nUnknown data dependence."; | |||||
nit: please move this code below the switch sdesmalen: nit: please move this code below the switch | |||||
break; | |||||
} | |||||
if (SourceLoc) | |||||
R << " Memory location is the same as accessed at " | |||||
nit: single-use variable, can be inlined in the next statement. sdesmalen: nit: single-use variable, can be inlined in the next statement. | |||||
Not Done ReplyInline ActionsThis dyn_cast will cause a segfault if the value returned by getPointerOperand is a nullptr. This needs to be dyn_cast_or_null. It would be good to have a test for this case. Also, there is no test for the MemSetInst either. Can you add one? sdesmalen: This dyn_cast will cause a segfault if the value returned by `getPointerOperand` is a `nullptr`. | |||||
Ok, I've changed it to dyn_cast_or_null (I think you had already mentioned it an earlier review comment but I missed it). Also, I tried writing a test case for memset case and I found that it doesnt even make sense to have a case here for memset. if (!St) { recordAnalysis("CantVectorizeInstruction", St) << "instruction cannot be vectorized"; HasComplexMemInst = true; continue; } So it looks like a different remark is emitted and an early exit is taken because memset is considered as a "complex" memory instruction. considering the above, I have removed the memset part from the patch. malharJ: Ok, I've changed it to `dyn_cast_or_null` (I think you had already mentioned it an earlier… | |||||
<< ore::NV("Location", SourceLoc); | |||||
} | } | ||||
} | } | ||||
bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, | bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, | ||||
DominatorTree *DT) { | DominatorTree *DT) { | ||||
assert(TheLoop->contains(BB) && "Unknown block used"); | assert(TheLoop->contains(BB) && "Unknown block used"); | ||||
// Blocks that do not dominate the latch need predication. | // Blocks that do not dominate the latch need predication. | ||||
Show All 9 Lines | OptimizationRemarkAnalysis &LoopAccessInfo::recordAnalysis(StringRef RemarkName, | ||||
DebugLoc DL = TheLoop->getStartLoc(); | DebugLoc DL = TheLoop->getStartLoc(); | ||||
if (I) { | if (I) { | ||||
CodeRegion = I->getParent(); | CodeRegion = I->getParent(); | ||||
// If there is no debug location attached to the instruction, revert back to | // If there is no debug location attached to the instruction, revert back to | ||||
// using the loop's. | // using the loop's. | ||||
if (I->getDebugLoc()) | if (I->getDebugLoc()) | ||||
DL = I->getDebugLoc(); | DL = I->getDebugLoc(); | ||||
} | } | ||||
Report = std::make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE, RemarkName, DL, | Report = std::make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE, RemarkName, DL, | ||||
CodeRegion); | CodeRegion); | ||||
return *Report; | return *Report; | ||||
} | } | ||||
bool LoopAccessInfo::isUniform(Value *V) const { | bool LoopAccessInfo::isUniform(Value *V) const { | ||||
Not Done ReplyInline Actionsnit: this is more concisely written as: if (Instruction *I = Dep.getSource(*this)) { DebugLoc SourceLoc = I->getDebugLoc(); if (auto *DD = dyn_cast_or_null<Instruction>(getPointerOperand(I))) SourceLoc = DD->getDebugLoc(); R << " Memory location is the same as accessed at " << ore::NV("Location", SourceLoc); } sdesmalen: nit: this is more concisely written as:
if (Instruction *I = Dep.getSource(*this)) {… | |||||
auto *SE = PSE->getSE(); | auto *SE = PSE->getSE(); | ||||
// Since we rely on SCEV for uniformity, if the type is not SCEVable, it is | // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is | ||||
// never considered uniform. | // never considered uniform. | ||||
// TODO: Is this really what we want? Even without FP SCEV, we may want some | // TODO: Is this really what we want? Even without FP SCEV, we may want some | ||||
// trivially loop-invariant FP values to be considered uniform. | // trivially loop-invariant FP values to be considered uniform. | ||||
if (!SE->isSCEVable(V->getType())) | if (!SE->isSCEVable(V->getType())) | ||||
return false; | return false; | ||||
return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop)); | return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop)); | ||||
} | } | ||||
void LoopAccessInfo::collectStridedAccess(Value *MemAccess) { | void LoopAccessInfo::collectStridedAccess(Value *MemAccess) { | ||||
Value *Ptr = getLoadStorePointerOperand(MemAccess); | Value *Ptr = getLoadStorePointerOperand(MemAccess); | ||||
if (!Ptr) | if (!Ptr) | ||||
return; | return; | ||||
Value *Stride = getStrideFromPointer(Ptr, PSE->getSE(), TheLoop); | Value *Stride = getStrideFromPointer(Ptr, PSE->getSE(), TheLoop); | ||||
if (!Stride) | if (!Stride) | ||||
return; | return; | ||||
LLVM_DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for " | LLVM_DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for " | ||||
"versioning:"); | "versioning:"); | ||||
LLVM_DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n"); | LLVM_DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n"); | ||||
// Avoid adding the "Stride == 1" predicate when we know that | // Avoid adding the "Stride == 1" predicate when we know that | ||||
nit: please start your comments with capitalisation and end with a period. sdesmalen: nit: please start your comments with capitalisation and end with a period. | |||||
// Stride >= Trip-Count. Such a predicate will effectively optimize a single | // Stride >= Trip-Count. Such a predicate will effectively optimize a single | ||||
// or zero iteration loop, as Trip-Count <= Stride == 1. | // or zero iteration loop, as Trip-Count <= Stride == 1. | ||||
// | // | ||||
Not Done ReplyInline Actionsnit: Maybe it's worth moving the recordAnalysis call to before the for loop, i.e. OptimizationRemarkAnalysis R = recordAnalysis("UnknownArrayBounds", I); for (...) david-arm: nit: Maybe it's worth moving the recordAnalysis call to before the `for` loop, i.e. | |||||
Unfortunately that can't be done because Instruction* I is being declared inside the for-loop. malharJ: Unfortunately that can't be done because `Instruction* I` is being declared inside the for-loop. | |||||
// TODO: We are currently not making a very informed decision on when it is | // TODO: We are currently not making a very informed decision on when it is | ||||
// beneficial to apply stride versioning. It might make more sense that the | // beneficial to apply stride versioning. It might make more sense that the | ||||
// users of this analysis (such as the vectorizer) will trigger it, based on | // users of this analysis (such as the vectorizer) will trigger it, based on | ||||
// their specific cost considerations; For example, in cases where stride | // their specific cost considerations; For example, in cases where stride | ||||
// versioning does not help resolving memory accesses/dependences, the | // versioning does not help resolving memory accesses/dependences, the | ||||
// vectorizer should evaluate the cost of the runtime test, and the benefit | // vectorizer should evaluate the cost of the runtime test, and the benefit | ||||
// of various possible stride specializations, considering the alternatives | // of various possible stride specializations, considering the alternatives | ||||
// of using gather/scatters (if available). | // of using gather/scatters (if available). | ||||
▲ Show 20 Lines • Show All 159 Lines • Show Last 20 Lines |
UncomputablePtr doesn't really need a separate variable in AccessAnalysis, since it's only set by one function, and used directly by the function that calls it.
You can change canCheckPtrAtRT to take Value **UncomputablePtr = nullptr, and in canCheckPtrAtRT assign the value if the value is requested:
Where you call it, you can have:
Can you split this change out into a separate patch with a test that demonstrates the change?