Changeset View
Standalone View
lib/Transforms/Scalar/LoopIdiomRecognize.cpp
Show First 20 Lines • Show All 100 Lines • ▼ Show 20 Lines | public: | ||||
bool runOnLoop(Loop *L); | bool runOnLoop(Loop *L); | ||||
private: | private: | ||||
typedef SmallVector<StoreInst *, 8> StoreList; | typedef SmallVector<StoreInst *, 8> StoreList; | ||||
typedef MapVector<Value *, StoreList> StoreListMap; | typedef MapVector<Value *, StoreList> StoreListMap; | ||||
StoreListMap StoreRefsForMemset; | StoreListMap StoreRefsForMemset; | ||||
StoreListMap StoreRefsForMemsetPattern; | StoreListMap StoreRefsForMemsetPattern; | ||||
StoreList StoreRefsForMemcpy; | StoreListMap StoreRefsForMemcpy; | ||||
bool HasMemset; | bool HasMemset; | ||||
bool HasMemsetPattern; | bool HasMemsetPattern; | ||||
bool HasMemcpy; | bool HasMemcpy; | ||||
/// Return code for isLegalStore() | /// Return code for isLegalStore() | ||||
enum LegalStoreKind { | enum LegalStoreKind { | ||||
None = 0, | None = 0, | ||||
Memset, | Memset, | ||||
MemsetPattern, | MemsetPattern, | ||||
Memcpy, | Memcpy, | ||||
UnorderedAtomicMemcpy, | UnorderedAtomicMemcpy, | ||||
DontUse // Dummy retval never to be used. Allows catching errors in retval | DontUse // Dummy retval never to be used. Allows catching errors in retval | ||||
// handling. | // handling. | ||||
}; | }; | ||||
/// \name Countable Loop Idiom Handling | /// \name Countable Loop Idiom Handling | ||||
/// @{ | /// @{ | ||||
bool runOnCountableLoop(); | bool runOnCountableLoop(); | ||||
bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, | bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, | ||||
SmallVectorImpl<BasicBlock *> &ExitBlocks); | SmallVectorImpl<BasicBlock *> &ExitBlocks); | ||||
void collectStores(BasicBlock *BB); | void collectStores(BasicBlock *BB); | ||||
LegalStoreKind isLegalStore(StoreInst *SI); | LegalStoreKind isLegalStore(StoreInst *SI); | ||||
bool processLoopStores(SmallVectorImpl<StoreInst *> &SL, const SCEV *BECount, | bool processLoopStores(SmallVectorImpl<StoreInst *> &SL, const SCEV *BECount, | ||||
bool ForMemset); | LegalStoreKind MemIdiom); | ||||
haicheng: I think you can pass in an enum which has value ForMemset, ForMemsetPattern, ForMemcpy like… | |||||
bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount); | bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount); | ||||
bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize, | bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize, | ||||
unsigned StoreAlignment, Value *StoredVal, | unsigned StoreAlignment, Value *StoredVal, | ||||
Instruction *TheStore, | Instruction *TheStore, | ||||
SmallPtrSetImpl<Instruction *> &Stores, | SmallPtrSetImpl<Instruction *> &Stores, | ||||
const SCEVAddRecExpr *Ev, const SCEV *BECount, | const SCEVAddRecExpr *Ev, const SCEV *BECount, | ||||
bool NegStride, bool IsLoopMemset = false); | bool NegStride, LegalStoreKind MemIdiom, | ||||
bool processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount); | bool IsLoopMemset = false); | ||||
Not Done ReplyInline ActionsI think we should use MemIdiom and IsLoopMemset can be removed. haicheng: I think we should use MemIdiom and IsLoopMemset can be removed. | |||||
Not Done ReplyInline ActionsBut when processLoopStridedStore() is called from processLoopMemSet() function , then IsLoopMemset is set true and when it is called from processLoopStores() function , then IsLoopMemset is set false.Even though both can have MemIdiom set to LegalStoreKind::Memset .So using MemIdiom alone would not be sufficient.As, we need IsLoopMemset to indicate if memset which can be promoted to a large memset. DIVYA: But when processLoopStridedStore() is called from processLoopMemSet() function , then… | |||||
Not Done ReplyInline ActionsYes, you are right. I think we can move the check avoidLIRForMultiBlockLoop() much earlier so that we don't need this flag and we can save compilation time. But, we can do that in the next patch. haicheng: Yes, you are right. I think we can move the check avoidLIRForMultiBlockLoop() much earlier so… | |||||
bool avoidLIRForMultiBlockLoop(bool IsMemset = false, | bool avoidLIRForMultiBlockLoop(bool IsMemset = false, | ||||
bool IsLoopMemset = false); | bool IsLoopMemset = false); | ||||
/// @} | /// @} | ||||
/// \name Noncountable Loop Idiom Handling | /// \name Noncountable Loop Idiom Handling | ||||
/// @{ | /// @{ | ||||
bool runOnNoncountableLoop(); | bool runOnNoncountableLoop(); | ||||
bool recognizePopcount(); | bool recognizePopcount(); | ||||
void transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst, | void transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst, | ||||
PHINode *CntPhi, Value *Var); | PHINode *CntPhi, Value *Var); | ||||
bool recognizeAndInsertCTLZ(); | bool recognizeAndInsertCTLZ(); | ||||
Not Done ReplyInline ActionsExtra newline. mgrang: Extra newline. | |||||
void transformLoopToCountable(BasicBlock *PreCondBB, Instruction *CntInst, | void transformLoopToCountable(BasicBlock *PreCondBB, Instruction *CntInst, | ||||
PHINode *CntPhi, Value *Var, const DebugLoc DL, | PHINode *CntPhi, Value *Var, const DebugLoc DL, | ||||
bool ZeroCheck, bool IsCntPhiUsedOutsideLoop); | bool ZeroCheck, bool IsCntPhiUsedOutsideLoop); | ||||
/// @} | /// @} | ||||
}; | }; | ||||
class LoopIdiomRecognizeLegacyPass : public LoopPass { | class LoopIdiomRecognizeLegacyPass : public LoopPass { | ||||
▲ Show 20 Lines • Show All 192 Lines • ▼ Show 20 Lines | LoopIdiomRecognize::isLegalStore(StoreInst *SI) { | ||||
// Don't touch volatile stores. | // Don't touch volatile stores. | ||||
if (SI->isVolatile()) | if (SI->isVolatile()) | ||||
return LegalStoreKind::None; | return LegalStoreKind::None; | ||||
// We only want simple or unordered-atomic stores. | // We only want simple or unordered-atomic stores. | ||||
if (!SI->isUnordered()) | if (!SI->isUnordered()) | ||||
return LegalStoreKind::None; | return LegalStoreKind::None; | ||||
// Don't convert stores of non-integral pointer types to memsets (which stores | // Don't convert stores of non-integral pointer types to memsets (which stores | ||||
Not Done ReplyInline ActionsExtra newline. mgrang: Extra newline. | |||||
// integers). | // integers). | ||||
if (DL->isNonIntegralPointerType(SI->getValueOperand()->getType())) | if (DL->isNonIntegralPointerType(SI->getValueOperand()->getType())) | ||||
return LegalStoreKind::None; | return LegalStoreKind::None; | ||||
// Avoid merging nontemporal stores. | // Avoid merging nontemporal stores. | ||||
if (SI->getMetadata(LLVMContext::MD_nontemporal)) | if (SI->getMetadata(LLVMContext::MD_nontemporal)) | ||||
return LegalStoreKind::None; | return LegalStoreKind::None; | ||||
▲ Show 20 Lines • Show All 42 Lines • ▼ Show 20 Lines | return LegalStoreKind::Memset; | ||||
StorePtr->getType()->getPointerAddressSpace() == 0 && | StorePtr->getType()->getPointerAddressSpace() == 0 && | ||||
(PatternValue = getMemSetPatternValue(StoredVal, DL))) { | (PatternValue = getMemSetPatternValue(StoredVal, DL))) { | ||||
// It looks like we can use PatternValue! | // It looks like we can use PatternValue! | ||||
return LegalStoreKind::MemsetPattern; | return LegalStoreKind::MemsetPattern; | ||||
} | } | ||||
// Otherwise, see if the store can be turned into a memcpy. | // Otherwise, see if the store can be turned into a memcpy. | ||||
if (HasMemcpy) { | if (HasMemcpy) { | ||||
// Check to see if the stride matches the size of the store. If so, then we | |||||
// know that every byte is touched in the loop. | |||||
APInt Stride = getStoreStride(StoreEv); | |||||
unsigned StoreSize = getStoreSizeInBytes(SI, DL); | |||||
if (StoreSize != Stride && StoreSize != -Stride) | |||||
return LegalStoreKind::None; | |||||
// The store must be feeding a non-volatile load. | // The store must be feeding a non-volatile load. | ||||
LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand()); | LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand()); | ||||
// Only allow non-volatile loads | // Only allow non-volatile loads | ||||
if (!LI || LI->isVolatile()) | if (!LI || LI->isVolatile()) | ||||
return LegalStoreKind::None; | return LegalStoreKind::None; | ||||
// Only allow simple or unordered-atomic loads | // Only allow simple or unordered-atomic loads | ||||
if (!LI->isUnordered()) | if (!LI->isUnordered()) | ||||
Show All 10 Lines | if (HasMemcpy) { | ||||
// The store and load must share the same stride. | // The store and load must share the same stride. | ||||
if (StoreEv->getOperand(1) != LoadEv->getOperand(1)) | if (StoreEv->getOperand(1) != LoadEv->getOperand(1)) | ||||
return LegalStoreKind::None; | return LegalStoreKind::None; | ||||
// Success. This store can be converted into a memcpy. | // Success. This store can be converted into a memcpy. | ||||
UnorderedAtomic = UnorderedAtomic || LI->isAtomic(); | UnorderedAtomic = UnorderedAtomic || LI->isAtomic(); | ||||
return UnorderedAtomic ? LegalStoreKind::UnorderedAtomicMemcpy | return UnorderedAtomic ? LegalStoreKind::UnorderedAtomicMemcpy | ||||
: LegalStoreKind::Memcpy; | : LegalStoreKind::Memcpy; | ||||
} | } | ||||
Not Done ReplyInline ActionsDitto. mgrang: Ditto. | |||||
// This store can't be transformed into a memset/memcpy. | // This store can't be transformed into a memset/memcpy. | ||||
return LegalStoreKind::None; | return LegalStoreKind::None; | ||||
} | } | ||||
void LoopIdiomRecognize::collectStores(BasicBlock *BB) { | void LoopIdiomRecognize::collectStores(BasicBlock *BB) { | ||||
StoreRefsForMemset.clear(); | StoreRefsForMemset.clear(); | ||||
StoreRefsForMemsetPattern.clear(); | StoreRefsForMemsetPattern.clear(); | ||||
StoreRefsForMemcpy.clear(); | StoreRefsForMemcpy.clear(); | ||||
Show All 13 Lines | case LegalStoreKind::Memset: { | ||||
StoreRefsForMemset[Ptr].push_back(SI); | StoreRefsForMemset[Ptr].push_back(SI); | ||||
} break; | } break; | ||||
case LegalStoreKind::MemsetPattern: { | case LegalStoreKind::MemsetPattern: { | ||||
// Find the base pointer. | // Find the base pointer. | ||||
Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL); | Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL); | ||||
StoreRefsForMemsetPattern[Ptr].push_back(SI); | StoreRefsForMemsetPattern[Ptr].push_back(SI); | ||||
} break; | } break; | ||||
case LegalStoreKind::Memcpy: | case LegalStoreKind::Memcpy: | ||||
case LegalStoreKind::UnorderedAtomicMemcpy: | case LegalStoreKind::UnorderedAtomicMemcpy: { | ||||
StoreRefsForMemcpy.push_back(SI); | // Find the base pointer. | ||||
break; | Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL); | ||||
StoreRefsForMemcpy[Ptr].push_back(SI); | |||||
} break; | |||||
default: | default: | ||||
assert(false && "unhandled return value"); | assert(false && "unhandled return value"); | ||||
break; | break; | ||||
} | } | ||||
Not Done ReplyInline ActionsDitto. mgrang: Ditto. | |||||
} | } | ||||
} | } | ||||
/// runOnLoopBlock - Process the specified block, which lives in a counted loop | /// runOnLoopBlock - Process the specified block, which lives in a counted loop | ||||
/// with the specified backedge count. This block is known to be in the current | /// with the specified backedge count. This block is known to be in the current | ||||
/// loop and not in any subloops. | /// loop and not in any subloops. | ||||
bool LoopIdiomRecognize::runOnLoopBlock( | bool LoopIdiomRecognize::runOnLoopBlock( | ||||
BasicBlock *BB, const SCEV *BECount, | BasicBlock *BB, const SCEV *BECount, | ||||
SmallVectorImpl<BasicBlock *> &ExitBlocks) { | SmallVectorImpl<BasicBlock *> &ExitBlocks) { | ||||
// We can only promote stores in this block if they are unconditionally | // We can only promote stores in this block if they are unconditionally | ||||
// executed in the loop. For a block to be unconditionally executed, it has | // executed in the loop. For a block to be unconditionally executed, it has | ||||
// to dominate all the exit blocks of the loop. Verify this now. | // to dominate all the exit blocks of the loop. Verify this now. | ||||
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) | for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) | ||||
if (!DT->dominates(BB, ExitBlocks[i])) | if (!DT->dominates(BB, ExitBlocks[i])) | ||||
return false; | return false; | ||||
bool MadeChange = false; | bool MadeChange = false; | ||||
// Look for store instructions, which may be optimized to memset/memcpy. | // Look for store instructions, which may be optimized to memset/memcpy. | ||||
collectStores(BB); | collectStores(BB); | ||||
// Look for a single store or sets of stores with a common base, which can be | // Look for a single store or sets of stores with a common base, which can be | ||||
// optimized into a memset (memset_pattern). The latter most commonly happens | // optimized into a memset (memset_pattern). The latter most commonly happens | ||||
// with structs and handunrolled loops. | // with structs and handunrolled loops. | ||||
for (auto &SL : StoreRefsForMemset) | for (auto &SL : StoreRefsForMemset) | ||||
MadeChange |= processLoopStores(SL.second, BECount, true); | MadeChange |= | ||||
processLoopStores(SL.second, BECount, LegalStoreKind::Memset); | |||||
for (auto &SL : StoreRefsForMemsetPattern) | for (auto &SL : StoreRefsForMemsetPattern) | ||||
MadeChange |= processLoopStores(SL.second, BECount, false); | MadeChange |= | ||||
processLoopStores(SL.second, BECount, LegalStoreKind::MemsetPattern); | |||||
// Optimize the store into a memcpy, if it feeds an similarly strided load. | // Look for a single store with a corresponding load or a set of stores with a | ||||
// common base (the corresponding loads of those stores should also have a | |||||
// common base), which can be optimized into a memcpy. | |||||
I think this comment should be changed. haicheng: I think this comment should be changed. | |||||
for (auto &SI : StoreRefsForMemcpy) | for (auto &SI : StoreRefsForMemcpy) | ||||
MadeChange |= processLoopStoreOfLoopLoad(SI, BECount); | MadeChange |= | ||||
processLoopStores(SI.second, BECount, LegalStoreKind::Memcpy); | |||||
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) { | for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) { | ||||
Instruction *Inst = &*I++; | Instruction *Inst = &*I++; | ||||
// Look for memset instructions, which may be optimized to a larger memset. | // Look for memset instructions, which may be optimized to a larger memset. | ||||
if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) { | if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) { | ||||
WeakTrackingVH InstPtr(&*I); | WeakTrackingVH InstPtr(&*I); | ||||
if (!processLoopMemSet(MSI, BECount)) | if (!processLoopMemSet(MSI, BECount)) | ||||
continue; | continue; | ||||
MadeChange = true; | MadeChange = true; | ||||
// If processing the memset invalidated our iterator, start over from the | // If processing the memset invalidated our iterator, start over from the | ||||
// top of the block. | // top of the block. | ||||
if (!InstPtr) | if (!InstPtr) | ||||
I = BB->begin(); | I = BB->begin(); | ||||
continue; | continue; | ||||
} | } | ||||
} | } | ||||
return MadeChange; | return MadeChange; | ||||
} | } | ||||
/// processLoopStores - See if this store(s) can be promoted to a memset. | /// processLoopStores - See if this store(s) can be promoted to a memset or | ||||
/// memcpy. | |||||
bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL, | bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL, | ||||
const SCEV *BECount, | const SCEV *BECount, | ||||
bool ForMemset) { | LegalStoreKind MemIdiom) { | ||||
// Try to find consecutive stores that can be transformed into memsets. | // Try to find consecutive stores that can be transformed into memsets. | ||||
SetVector<StoreInst *> Heads, Tails; | SetVector<StoreInst *> Heads, Tails; | ||||
SmallDenseMap<StoreInst *, StoreInst *> ConsecutiveChain; | SmallDenseMap<StoreInst *, StoreInst *> ConsecutiveChain; | ||||
// Do a quadratic search on all of the given stores and find | // Do a quadratic search on all of the given stores and find | ||||
// all of the pairs of stores that follow each other. | // all of the pairs of stores that follow each other. | ||||
SmallVector<unsigned, 16> IndexQueue; | SmallVector<unsigned, 16> IndexQueue; | ||||
for (unsigned i = 0, e = SL.size(); i < e; ++i) { | for (unsigned i = 0, e = SL.size(); i < e; ++i) { | ||||
if (MemIdiom == LegalStoreKind::Memcpy) | |||||
assert(SL[i]->isUnordered() && | |||||
"Expected only non-volatile non-ordered stores."); | |||||
else | |||||
assert(SL[i]->isSimple() && "Expected only non-volatile stores."); | assert(SL[i]->isSimple() && "Expected only non-volatile stores."); | ||||
Value *FirstStoredVal = SL[i]->getValueOperand(); | Value *FirstStoredVal = SL[i]->getValueOperand(); | ||||
Value *FirstStorePtr = SL[i]->getPointerOperand(); | Value *FirstStorePtr = SL[i]->getPointerOperand(); | ||||
const SCEVAddRecExpr *FirstStoreEv = | const SCEVAddRecExpr *FirstStoreEv = | ||||
cast<SCEVAddRecExpr>(SE->getSCEV(FirstStorePtr)); | cast<SCEVAddRecExpr>(SE->getSCEV(FirstStorePtr)); | ||||
APInt FirstStride = getStoreStride(FirstStoreEv); | APInt FirstStride = getStoreStride(FirstStoreEv); | ||||
unsigned FirstStoreSize = getStoreSizeInBytes(SL[i], DL); | unsigned FirstStoreSize = getStoreSizeInBytes(SL[i], DL); | ||||
LoadInst *FirstStoreLoad; | |||||
Value *FirstLoadPtr; | |||||
if (MemIdiom == LegalStoreKind::Memcpy) { | |||||
FirstStoreLoad = cast<LoadInst>(FirstStoredVal); | |||||
SL[i]->getValueOperand() => FirstStoredVal haicheng: SL[i]->getValueOperand() => FirstStoredVal | |||||
assert(FirstStoreLoad->isUnordered() && | |||||
"Expected only non-volatile non-ordered loads."); | |||||
FirstLoadPtr = | |||||
GetUnderlyingObject(FirstStoreLoad->getPointerOperand(), *DL); | |||||
} | |||||
// See if we can optimize just this store in isolation. | // See if we can optimize just this store in isolation. | ||||
if (FirstStride == FirstStoreSize || -FirstStride == FirstStoreSize) { | if (FirstStride == FirstStoreSize || -FirstStride == FirstStoreSize) { | ||||
Heads.insert(SL[i]); | Heads.insert(SL[i]); | ||||
continue; | continue; | ||||
} | } | ||||
Value *FirstSplatValue = nullptr; | Value *FirstSplatValue = nullptr; | ||||
Constant *FirstPatternValue = nullptr; | Constant *FirstPatternValue = nullptr; | ||||
if (ForMemset) | if (MemIdiom == LegalStoreKind::Memset || | ||||
MemIdiom == LegalStoreKind::MemsetPattern) { | |||||
if (MemIdiom == LegalStoreKind::Memset) | |||||
FirstSplatValue = isBytewiseValue(FirstStoredVal); | FirstSplatValue = isBytewiseValue(FirstStoredVal); | ||||
else | else | ||||
FirstPatternValue = getMemSetPatternValue(FirstStoredVal, DL); | FirstPatternValue = getMemSetPatternValue(FirstStoredVal, DL); | ||||
assert((FirstSplatValue || FirstPatternValue) && | assert((FirstSplatValue || FirstPatternValue) && | ||||
"Expected either splat value or pattern value."); | "Expected either splat value or pattern value."); | ||||
} | |||||
IndexQueue.clear(); | IndexQueue.clear(); | ||||
// If a store has multiple consecutive store candidates, search Stores | // If a store has multiple consecutive store candidates, search Stores | ||||
// array according to the sequence: from i+1 to e, then from i-1 to 0. | // array according to the sequence: from i+1 to e, then from i-1 to 0. | ||||
// This is because usually pairing with immediate succeeding or preceding | // This is because usually pairing with immediate succeeding or preceding | ||||
// candidate create the best chance to find memset opportunity. | // candidate create the best chance to find memset opportunity. | ||||
unsigned j = 0; | unsigned j = 0; | ||||
for (j = i + 1; j < e; ++j) | for (j = i + 1; j < e; ++j) | ||||
IndexQueue.push_back(j); | IndexQueue.push_back(j); | ||||
for (j = i; j > 0; --j) | for (j = i; j > 0; --j) | ||||
IndexQueue.push_back(j - 1); | IndexQueue.push_back(j - 1); | ||||
for (auto &k : IndexQueue) { | for (auto &k : IndexQueue) { | ||||
if (MemIdiom == LegalStoreKind::Memcpy) | |||||
assert(SL[k]->isUnordered() && | |||||
"Expected only non-volatile non-ordered stores."); | |||||
else | |||||
assert(SL[k]->isSimple() && "Expected only non-volatile stores."); | assert(SL[k]->isSimple() && "Expected only non-volatile stores."); | ||||
Value *SecondStorePtr = SL[k]->getPointerOperand(); | Value *SecondStorePtr = SL[k]->getPointerOperand(); | ||||
const SCEVAddRecExpr *SecondStoreEv = | const SCEVAddRecExpr *SecondStoreEv = | ||||
cast<SCEVAddRecExpr>(SE->getSCEV(SecondStorePtr)); | cast<SCEVAddRecExpr>(SE->getSCEV(SecondStorePtr)); | ||||
APInt SecondStride = getStoreStride(SecondStoreEv); | APInt SecondStride = getStoreStride(SecondStoreEv); | ||||
if (FirstStride != SecondStride) | if (FirstStride != SecondStride) | ||||
continue; | continue; | ||||
Value *SecondStoredVal = SL[k]->getValueOperand(); | Value *SecondStoredVal = SL[k]->getValueOperand(); | ||||
Value *SecondSplatValue = nullptr; | Value *SecondSplatValue = nullptr; | ||||
Constant *SecondPatternValue = nullptr; | Constant *SecondPatternValue = nullptr; | ||||
LoadInst *SecondStoreLoad; | |||||
if (ForMemset) | if (MemIdiom == LegalStoreKind::Memset || | ||||
MemIdiom == LegalStoreKind::MemsetPattern) { | |||||
if (MemIdiom == LegalStoreKind::Memset) | |||||
SecondSplatValue = isBytewiseValue(SecondStoredVal); | SecondSplatValue = isBytewiseValue(SecondStoredVal); | ||||
else | else | ||||
SecondPatternValue = getMemSetPatternValue(SecondStoredVal, DL); | SecondPatternValue = getMemSetPatternValue(SecondStoredVal, DL); | ||||
assert((SecondSplatValue || SecondPatternValue) && | assert((SecondSplatValue || SecondPatternValue) && | ||||
"Expected either splat value or pattern value."); | "Expected either splat value or pattern value."); | ||||
} else if (MemIdiom == LegalStoreKind::Memcpy) { | |||||
Please add a comment to make it clear it is for memcpy. or add if (memcpy) or write this part as a switch. haicheng: Please add a comment to make it clear it is for memcpy.
or add if (memcpy)
or write this… | |||||
SecondStoreLoad = cast<LoadInst>(SecondStoredVal); | |||||
SL[k]->getValueOperand() => SecondStoredVal haicheng: SL[k]->getValueOperand() => SecondStoredVal | |||||
assert(SecondStoreLoad->isUnordered() && | |||||
"Expected only non-volatile non-ordered loads."); | |||||
Value *SecondLoadPtr = | |||||
GetUnderlyingObject(SecondStoreLoad->getPointerOperand(), *DL); | |||||
if (FirstLoadPtr != SecondLoadPtr) | |||||
continue; | |||||
if (SL[i]->isAtomic() || SL[k]->isAtomic() || | |||||
FirstStoreLoad->isAtomic() || SecondStoreLoad->isAtomic()) | |||||
return false; | |||||
} | |||||
if (isConsecutiveAccess(SL[i], SL[k], *DL, *SE, false)) { | if (isConsecutiveAccess(SL[i], SL[k], *DL, *SE, false)) { | ||||
if (ForMemset) { | if (MemIdiom == LegalStoreKind::Memset) { | ||||
if (FirstSplatValue != SecondSplatValue) | if (FirstSplatValue != SecondSplatValue) | ||||
continue; | continue; | ||||
} else { | } else if (MemIdiom == LegalStoreKind::MemsetPattern) { | ||||
if (FirstPatternValue != SecondPatternValue) | if (FirstPatternValue != SecondPatternValue) | ||||
continue; | continue; | ||||
} else if (MemIdiom == LegalStoreKind::Memcpy) { | |||||
if (!isConsecutiveAccess(FirstStoreLoad, SecondStoreLoad, *DL, *SE, | |||||
false)) | |||||
continue; | |||||
} | } | ||||
Tails.insert(SL[k]); | Tails.insert(SL[k]); | ||||
Heads.insert(SL[i]); | Heads.insert(SL[i]); | ||||
ConsecutiveChain[SL[i]] = SL[k]; | ConsecutiveChain[SL[i]] = SL[k]; | ||||
break; | break; | ||||
} | } | ||||
} | } | ||||
} | } | ||||
clang-format here haicheng: clang-format here | |||||
// We may run into multiple chains that merge into a single chain. We mark the | // We may run into multiple chains that merge into a single chain. We mark the | ||||
// stores that we transformed so that we don't visit the same store twice. | // stores that we transformed so that we don't visit the same store twice. | ||||
SmallPtrSet<Value *, 16> TransformedStores; | SmallPtrSet<Value *, 16> TransformedStores; | ||||
bool Changed = false; | bool Changed = false; | ||||
// For stores that start but don't end a link in the chain: | // For stores that start but don't end a link in the chain: | ||||
for (SetVector<StoreInst *>::iterator it = Heads.begin(), e = Heads.end(); | for (SetVector<StoreInst *>::iterator it = Heads.begin(), e = Heads.end(); | ||||
it != e; ++it) { | it != e; ++it) { | ||||
if (Tails.count(*it)) | if (Tails.count(*it)) | ||||
continue; | continue; | ||||
// We found a store instr that starts a chain. Now follow the chain and try | // We found a store instr that starts a chain. Now follow the chain and try | ||||
// to transform it. | // to transform it. | ||||
SmallPtrSet<Instruction *, 8> AdjacentStores; | SmallPtrSet<Instruction *, 8> AdjacentStores; | ||||
StoreInst *I = *it; | StoreInst *I = *it; | ||||
StoreInst *HeadStore = I; | StoreInst *HeadStore = I; | ||||
unsigned StoreSize = 0; | unsigned StoreSize = 0; | ||||
// Collect the chain into a list. | // Collect the chain into a list. | ||||
while (Tails.count(I) || Heads.count(I)) { | while (Tails.count(I) || Heads.count(I)) { | ||||
if (TransformedStores.count(I)) | if (TransformedStores.count(I)) | ||||
break; | break; | ||||
AdjacentStores.insert(I); | AdjacentStores.insert(I); | ||||
StoreSize += getStoreSizeInBytes(I, DL); | StoreSize += getStoreSizeInBytes(I, DL); | ||||
// Move to the next value in the chain. | // Move to the next value in the chain. | ||||
I = ConsecutiveChain[I]; | I = ConsecutiveChain[I]; | ||||
} | } | ||||
Value *StoredVal = HeadStore->getValueOperand(); | Value *StoredVal = HeadStore->getValueOperand(); | ||||
Value *StorePtr = HeadStore->getPointerOperand(); | Value *StorePtr = HeadStore->getPointerOperand(); | ||||
const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr)); | const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr)); | ||||
I think code here can refactor with code above haicheng: I think code here can refactor with code above | |||||
APInt Stride = getStoreStride(StoreEv); | APInt Stride = getStoreStride(StoreEv); | ||||
// Check to see if the stride matches the size of the stores. If so, then | // Check to see if the stride matches the size of the stores. If so, then | ||||
// we know that every byte is touched in the loop. | // we know that every byte is touched in the loop. | ||||
if (StoreSize != Stride && StoreSize != -Stride) | if (StoreSize != Stride && StoreSize != -Stride) | ||||
continue; | continue; | ||||
bool NegStride = StoreSize == -Stride; | bool NegStride = StoreSize == -Stride; | ||||
if (processLoopStridedStore(StorePtr, StoreSize, HeadStore->getAlignment(), | if (processLoopStridedStore(StorePtr, StoreSize, HeadStore->getAlignment(), | ||||
StoredVal, HeadStore, AdjacentStores, StoreEv, | StoredVal, HeadStore, AdjacentStores, StoreEv, | ||||
BECount, NegStride)) { | BECount, NegStride, MemIdiom)) { | ||||
TransformedStores.insert(AdjacentStores.begin(), AdjacentStores.end()); | TransformedStores.insert(AdjacentStores.begin(), AdjacentStores.end()); | ||||
Changed = true; | Changed = true; | ||||
} | } | ||||
} | } | ||||
return Changed; | return Changed; | ||||
} | } | ||||
/// processLoopMemSet - See if this memset can be promoted to a large memset. | /// processLoopMemSet - See if this memset can be promoted to a large memset. | ||||
bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI, | bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI, | ||||
I think processLoopStoreOfLoopLoad() can refactor with processLoopStridedStore(), then you don't need if...else... here. haicheng: I think processLoopStoreOfLoopLoad() can refactor with processLoopStridedStore(), then you… | |||||
Not Done ReplyInline ActionsprocessLoopStoreOfLoopLoad() function was already present,so I haven't refactored it with processLoopStridedStore() in this patch.I can do that in the next patch DIVYA: processLoopStoreOfLoopLoad() function was already present,so I haven't refactored it with… | |||||
Not Done ReplyInline ActionsI think you can go ahead. haicheng: I think you can go ahead. | |||||
Not Done ReplyInline ActionsSo should I do it in this patch or next one? DIVYA: So should I do it in this patch or next one? | |||||
I think in this patch. haicheng: I think in this patch. | |||||
const SCEV *BECount) { | const SCEV *BECount) { | ||||
// We can only handle non-volatile memsets with a constant size. | // We can only handle non-volatile memsets with a constant size. | ||||
if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength())) | if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength())) | ||||
return false; | return false; | ||||
// If we're not allowed to hack on memset, we fail. | // If we're not allowed to hack on memset, we fail. | ||||
if (!HasMemset) | if (!HasMemset) | ||||
return false; | return false; | ||||
Show All 28 Lines | bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI, | ||||
if (!SplatValue || !CurLoop->isLoopInvariant(SplatValue)) | if (!SplatValue || !CurLoop->isLoopInvariant(SplatValue)) | ||||
return false; | return false; | ||||
SmallPtrSet<Instruction *, 1> MSIs; | SmallPtrSet<Instruction *, 1> MSIs; | ||||
MSIs.insert(MSI); | MSIs.insert(MSI); | ||||
bool NegStride = SizeInBytes == -Stride; | bool NegStride = SizeInBytes == -Stride; | ||||
return processLoopStridedStore(Pointer, (unsigned)SizeInBytes, | return processLoopStridedStore(Pointer, (unsigned)SizeInBytes, | ||||
MSI->getAlignment(), SplatValue, MSI, MSIs, Ev, | MSI->getAlignment(), SplatValue, MSI, MSIs, Ev, | ||||
BECount, NegStride, /*IsLoopMemset=*/true); | BECount, NegStride, LegalStoreKind::Memset, /*IsLoopMemset=*/true); | ||||
} | } | ||||
/// mayLoopAccessLocation - Return true if the specified loop might access the | /// mayLoopAccessLocation - Return true if the specified loop might access the | ||||
/// specified pointer location, which is a loop-strided access. The 'Access' | /// specified pointer location, which is a loop-strided access. The 'Access' | ||||
/// argument specifies what the verboten forms of access are (read or write). | /// argument specifies what the verboten forms of access are (read or write). | ||||
static bool | static bool | ||||
mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, | mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, | ||||
const SCEV *BECount, unsigned StoreSize, | const SCEV *BECount, unsigned StoreSize, | ||||
Show All 35 Lines | static const SCEV *getStartForNegStride(const SCEV *Start, const SCEV *BECount, | ||||
if (StoreSize != 1) | if (StoreSize != 1) | ||||
Index = SE->getMulExpr(Index, SE->getConstant(IntPtr, StoreSize), | Index = SE->getMulExpr(Index, SE->getConstant(IntPtr, StoreSize), | ||||
SCEV::FlagNUW); | SCEV::FlagNUW); | ||||
return SE->getMinusSCEV(Start, Index); | return SE->getMinusSCEV(Start, Index); | ||||
} | } | ||||
/// processLoopStridedStore - We see a strided store of some value. If we can | /// processLoopStridedStore - We see a strided store of some value. If we can | ||||
/// transform this into a memset or memset_pattern in the loop preheader, do so. | /// transform this into a memset or memset_pattern in the loop preheader, do so. | ||||
/// Otherwise, If there are one or multiple stored values, which are strided | |||||
/// loads in the same loop with the same stride, (with the stores being | |||||
a space between stride and comma. haicheng: a space between stride and comma. | |||||
/// consecutive and having a common base, similarly the loads also being | |||||
/// consecutive) then this may be transformed into a memcpy.This kicks in for | |||||
/// stuff like | |||||
/// for (i) A[i] = B[i]; | |||||
/// | |||||
/// for (i) | |||||
/// { | |||||
/// A[i].a = B[i].a | |||||
/// A[i].b = B[i].b | |||||
/// } | |||||
bool LoopIdiomRecognize::processLoopStridedStore( | bool LoopIdiomRecognize::processLoopStridedStore( | ||||
Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment, | Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment, | ||||
Value *StoredVal, Instruction *TheStore, | Value *StoredVal, Instruction *TheStore, | ||||
SmallPtrSetImpl<Instruction *> &Stores, const SCEVAddRecExpr *Ev, | SmallPtrSetImpl<Instruction *> &Stores, const SCEVAddRecExpr *Ev, | ||||
const SCEV *BECount, bool NegStride, bool IsLoopMemset) { | const SCEV *BECount, bool NegStride, LegalStoreKind MemIdiom, | ||||
Value *SplatValue = isBytewiseValue(StoredVal); | bool IsLoopMemset) { | ||||
Value *SplatValue; | |||||
Constant *PatternValue = nullptr; | Constant *PatternValue = nullptr; | ||||
if (MemIdiom == LegalStoreKind::Memset || | |||||
MemIdiom == LegalStoreKind::MemsetPattern) { | |||||
if (!SplatValue) | if (MemIdiom == LegalStoreKind::Memset) | ||||
SplatValue = isBytewiseValue(StoredVal); | |||||
else if (MemIdiom == LegalStoreKind::MemsetPattern) | |||||
PatternValue = getMemSetPatternValue(StoredVal, DL); | PatternValue = getMemSetPatternValue(StoredVal, DL); | ||||
assert((SplatValue || PatternValue) && | assert((SplatValue || PatternValue) && | ||||
"Expected either splat value or pattern value."); | "Expected either splat value or pattern value."); | ||||
} | |||||
This part can be written as if (Memset) SplatValue = else if (memsetpattern) Patternvalue = haicheng: This part can be written as
```
if (Memset)
SplatValue =
else if (memsetpattern)… | |||||
// The trip count of the loop and the base pointer of the addrec SCEV is | // The trip count of the loop and the base pointer of the addrec SCEV is | ||||
// guaranteed to be loop invariant, which means that it should dominate the | // guaranteed to be loop invariant, which means that it should dominate the | ||||
// header. This allows us to insert code for it in the preheader. | // header. This allows us to insert code for it in the preheader. | ||||
unsigned DestAS = DestPtr->getType()->getPointerAddressSpace(); | unsigned DestAS = DestPtr->getType()->getPointerAddressSpace(); | ||||
BasicBlock *Preheader = CurLoop->getLoopPreheader(); | BasicBlock *Preheader = CurLoop->getLoopPreheader(); | ||||
IRBuilder<> Builder(Preheader->getTerminator()); | IRBuilder<> Builder(Preheader->getTerminator()); | ||||
SCEVExpander Expander(*SE, *DL, "loop-idiom"); | SCEVExpander Expander(*SE, *DL, "loop-idiom"); | ||||
Show All 20 Lines | bool LoopIdiomRecognize::processLoopStridedStore( | ||||
if (mayLoopAccessLocation(BasePtr, MRI_ModRef, CurLoop, BECount, StoreSize, | if (mayLoopAccessLocation(BasePtr, MRI_ModRef, CurLoop, BECount, StoreSize, | ||||
*AA, Stores)) { | *AA, Stores)) { | ||||
Expander.clear(); | Expander.clear(); | ||||
// If we generated new code for the base pointer, clean up. | // If we generated new code for the base pointer, clean up. | ||||
RecursivelyDeleteTriviallyDeadInstructions(BasePtr, TLI); | RecursivelyDeleteTriviallyDeadInstructions(BasePtr, TLI); | ||||
return false; | return false; | ||||
} | } | ||||
if (MemIdiom == LegalStoreKind::Memset || | |||||
MemIdiom == LegalStoreKind::MemsetPattern) { | |||||
if (avoidLIRForMultiBlockLoop(/*IsMemset=*/true, IsLoopMemset)) | if (avoidLIRForMultiBlockLoop(/*IsMemset=*/true, IsLoopMemset)) | ||||
return false; | return false; | ||||
} | |||||
LoadInst *StoreLoadInst; | |||||
Value *LoadPtr; | |||||
const SCEVAddRecExpr *LoadEv; | |||||
const SCEV *LdStart; | |||||
Value *LoadBasePtr; | |||||
if (MemIdiom == LegalStoreKind::Memcpy) { | |||||
StoreLoadInst = cast<LoadInst>(StoredVal); | |||||
LoadPtr = GetUnderlyingObject(StoreLoadInst->getPointerOperand(), *DL); | |||||
LoadEv = | |||||
cast<SCEVAddRecExpr>(SE->getSCEV(StoreLoadInst->getPointerOperand())); | |||||
LdStart = LoadEv->getStart(); | |||||
unsigned LdAS = LoadPtr->getType()->getPointerAddressSpace(); | |||||
// Handle negative strided loops. | |||||
if (NegStride) | |||||
LdStart = getStartForNegStride(LdStart, BECount, IntPtr, StoreSize, SE); | |||||
// TODO: ideally we should still be able to generate memcpy if SCEV expander | |||||
// is taught to generate the dependencies at the latest point. | |||||
if (!isSafeToExpand(LdStart, *SE)) | |||||
return false; | |||||
// For a memcpy, we have to make sure that the input array is not being | |||||
// mutated by the loop. | |||||
LoadBasePtr = Expander.expandCodeFor(LdStart, Builder.getInt8PtrTy(LdAS), | |||||
Preheader->getTerminator()); | |||||
if (mayLoopAccessLocation(LoadBasePtr, MRI_Mod, CurLoop, BECount, StoreSize, | |||||
*AA, Stores)) { | |||||
Expander.clear(); | |||||
// If we generated new code for the base pointer, clean up. | |||||
RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI); | |||||
RecursivelyDeleteTriviallyDeadInstructions(BasePtr, TLI); | |||||
return false; | |||||
} | |||||
if (avoidLIRForMultiBlockLoop()) | |||||
return false; | |||||
} | |||||
// Okay, everything looks good, insert the memset. | // Okay, everything looks good, insert the memset or memcpy. | ||||
not only memset here haicheng: not only memset here | |||||
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to | // The # stored bytes is (BECount+1)*Size. Expand the trip count out to | ||||
// pointer size if it isn't already. | // pointer size if it isn't already. | ||||
BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr); | BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr); | ||||
const SCEV *NumBytesS = | const SCEV *NumBytesS = | ||||
SE->getAddExpr(BECount, SE->getOne(IntPtr), SCEV::FlagNUW); | SE->getAddExpr(BECount, SE->getOne(IntPtr), SCEV::FlagNUW); | ||||
if (StoreSize != 1) { | if (StoreSize != 1) { | ||||
NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize), | NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize), | ||||
SCEV::FlagNUW); | SCEV::FlagNUW); | ||||
} | } | ||||
// TODO: ideally we should still be able to generate memset if SCEV expander | // TODO: ideally we should still be able to generate memset if SCEV expander | ||||
// is taught to generate the dependencies at the latest point. | // is taught to generate the dependencies at the latest point. | ||||
if (!isSafeToExpand(NumBytesS, *SE)) | if (!isSafeToExpand(NumBytesS, *SE)) | ||||
return false; | return false; | ||||
Value *NumBytes = | Value *NumBytes = | ||||
Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator()); | Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator()); | ||||
CallInst *NewCall; | CallInst *NewCall; | ||||
if (SplatValue) { | |||||
if (MemIdiom == LegalStoreKind::Memset) { | |||||
NewCall = | NewCall = | ||||
Builder.CreateMemSet(BasePtr, SplatValue, NumBytes, StoreAlignment); | Builder.CreateMemSet(BasePtr, SplatValue, NumBytes, StoreAlignment); | ||||
} else { | } else if (MemIdiom == LegalStoreKind::MemsetPattern) { | ||||
// Everything is emitted in default address space | // Everything is emitted in default address space | ||||
Type *Int8PtrTy = DestInt8PtrTy; | Type *Int8PtrTy = DestInt8PtrTy; | ||||
Module *M = TheStore->getModule(); | Module *M = TheStore->getModule(); | ||||
Value *MSP = | Value *MSP = | ||||
M->getOrInsertFunction("memset_pattern16", Builder.getVoidTy(), | M->getOrInsertFunction("memset_pattern16", Builder.getVoidTy(), | ||||
Int8PtrTy, Int8PtrTy, IntPtr); | Int8PtrTy, Int8PtrTy, IntPtr); | ||||
inferLibFuncAttributes(*M->getFunction("memset_pattern16"), *TLI); | inferLibFuncAttributes(*M->getFunction("memset_pattern16"), *TLI); | ||||
// Otherwise we should form a memset_pattern16. PatternValue is known to be | // Otherwise we should form a memset_pattern16. PatternValue is known to be | ||||
// an constant array of 16-bytes. Plop the value into a mergable global. | // an constant array of 16-bytes. Plop the value into a mergable global. | ||||
GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true, | GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true, | ||||
GlobalValue::PrivateLinkage, | GlobalValue::PrivateLinkage, | ||||
PatternValue, ".memset_pattern"); | PatternValue, ".memset_pattern"); | ||||
GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); // Ok to merge these. | GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); // Ok to merge these. | ||||
GV->setAlignment(16); | GV->setAlignment(16); | ||||
Value *PatternPtr = ConstantExpr::getBitCast(GV, Int8PtrTy); | Value *PatternPtr = ConstantExpr::getBitCast(GV, Int8PtrTy); | ||||
NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes}); | NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes}); | ||||
} | |||||
Not Done ReplyInline ActionsExtra / in comments. Also check comment spacing. mgrang: Extra / in comments. Also check comment spacing. | |||||
I think we need to emphasize that loads and stores are consecutive. haicheng: I think we need to emphasize that loads and stores are consecutive. | |||||
Not Done ReplyInline ActionsPlease check the format. haicheng: Please check the format. | |||||
DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n" | } else if (MemIdiom == LegalStoreKind::Memcpy) { | ||||
Not Done ReplyInline ActionsUse i != e instead of i < e. mgrang: Use i != e instead of i < e. | |||||
Not Done ReplyInline ActionsWhat if FirstStoreLoad is nullptr? haicheng: What if FirstStoreLoad is nullptr? | |||||
Then you can use cast instead of dyn_cast haicheng: Then you can use cast instead of dyn_cast | |||||
Not Done ReplyInline Actionsmemset? haicheng: memset? | |||||
<< " from store to: " << *Ev << " at: " << *TheStore << "\n"); | unsigned Align = std::min(StoreAlignment, StoreLoadInst->getAlignment()); | ||||
NewCall->setDebugLoc(TheStore->getDebugLoc()); | if (!TheStore->isAtomic() && !StoreLoadInst->isAtomic()) | ||||
NewCall = Builder.CreateMemCpy(BasePtr, LoadBasePtr, NumBytes, Align); | |||||
// Okay, the memset has been formed. Zap the original store and anything that | |||||
// feeds into it. | |||||
for (auto *I : Stores) | |||||
deleteDeadInstruction(I); | |||||
++NumMemSet; | |||||
return true; | |||||
} | |||||
/// If the stored value is a strided load in the same loop with the same stride | |||||
/// this may be transformable into a memcpy. This kicks in for stuff like | |||||
/// for (i) A[i] = B[i]; | |||||
bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI, | |||||
const SCEV *BECount) { | |||||
assert(SI->isUnordered() && "Expected only non-volatile non-ordered stores."); | |||||
Value *StorePtr = SI->getPointerOperand(); | |||||
const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr)); | |||||
APInt Stride = getStoreStride(StoreEv); | |||||
unsigned StoreSize = getStoreSizeInBytes(SI, DL); | |||||
bool NegStride = StoreSize == -Stride; | |||||
// The store must be feeding a non-volatile load. | |||||
LoadInst *LI = cast<LoadInst>(SI->getValueOperand()); | |||||
assert(LI->isUnordered() && "Expected only non-volatile non-ordered loads."); | |||||
// See if the pointer expression is an AddRec like {base,+,1} on the current | |||||
// loop, which indicates a strided load. If we have something else, it's a | |||||
// random load we can't handle. | |||||
const SCEVAddRecExpr *LoadEv = | |||||
cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand())); | |||||
// The trip count of the loop and the base pointer of the addrec SCEV is | |||||
// guaranteed to be loop invariant, which means that it should dominate the | |||||
// header. This allows us to insert code for it in the preheader. | |||||
BasicBlock *Preheader = CurLoop->getLoopPreheader(); | |||||
IRBuilder<> Builder(Preheader->getTerminator()); | |||||
SCEVExpander Expander(*SE, *DL, "loop-idiom"); | |||||
const SCEV *StrStart = StoreEv->getStart(); | |||||
unsigned StrAS = SI->getPointerAddressSpace(); | |||||
Type *IntPtrTy = Builder.getIntPtrTy(*DL, StrAS); | |||||
// Handle negative strided loops. | |||||
if (NegStride) | |||||
StrStart = getStartForNegStride(StrStart, BECount, IntPtrTy, StoreSize, SE); | |||||
// Okay, we have a strided store "p[i]" of a loaded value. We can turn | |||||
// this into a memcpy in the loop preheader now if we want. However, this | |||||
// would be unsafe to do if there is anything else in the loop that may read | |||||
// or write the memory region we're storing to. This includes the load that | |||||
// feeds the stores. Check for an alias by generating the base address and | |||||
// checking everything. | |||||
Value *StoreBasePtr = Expander.expandCodeFor( | |||||
StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator()); | |||||
SmallPtrSet<Instruction *, 1> Stores; | |||||
Stores.insert(SI); | |||||
if (mayLoopAccessLocation(StoreBasePtr, MRI_ModRef, CurLoop, BECount, | |||||
StoreSize, *AA, Stores)) { | |||||
Expander.clear(); | |||||
// If we generated new code for the base pointer, clean up. | |||||
RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI); | |||||
return false; | |||||
} | |||||
const SCEV *LdStart = LoadEv->getStart(); | |||||
unsigned LdAS = LI->getPointerAddressSpace(); | |||||
// Handle negative strided loops. | |||||
if (NegStride) | |||||
LdStart = getStartForNegStride(LdStart, BECount, IntPtrTy, StoreSize, SE); | |||||
// For a memcpy, we have to make sure that the input array is not being | |||||
// mutated by the loop. | |||||
Value *LoadBasePtr = Expander.expandCodeFor( | |||||
LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator()); | |||||
if (mayLoopAccessLocation(LoadBasePtr, MRI_Mod, CurLoop, BECount, StoreSize, | |||||
*AA, Stores)) { | |||||
Expander.clear(); | |||||
// If we generated new code for the base pointer, clean up. | |||||
RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI); | |||||
RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI); | |||||
return false; | |||||
} | |||||
if (avoidLIRForMultiBlockLoop()) | |||||
return false; | |||||
// Okay, everything is safe, we can transform this! | |||||
// The # stored bytes is (BECount+1)*Size. Expand the trip count out to | |||||
// pointer size if it isn't already. | |||||
BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy); | |||||
const SCEV *NumBytesS = | |||||
SE->getAddExpr(BECount, SE->getOne(IntPtrTy), SCEV::FlagNUW); | |||||
if (StoreSize != 1) | |||||
NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize), | |||||
SCEV::FlagNUW); | |||||
Value *NumBytes = | |||||
Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator()); | |||||
unsigned Align = std::min(SI->getAlignment(), LI->getAlignment()); | |||||
CallInst *NewCall = nullptr; | |||||
// Check whether to generate an unordered atomic memcpy: | |||||
// If the load or store are atomic, then they must neccessarily be unordered | |||||
// by previous checks. | |||||
if (!SI->isAtomic() && !LI->isAtomic()) | |||||
NewCall = Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, Align); | |||||
else { | else { | ||||
// We cannot allow unaligned ops for unordered load/store, so reject | // We cannot allow unaligned ops for unordered load/store, so reject | ||||
// anything where the alignment isn't at least the element size. | // anything where the alignment isn't at least the element size. | ||||
if (Align < StoreSize) | if (Align < StoreSize) | ||||
return false; | return false; | ||||
// If the element.atomic memcpy is not lowered into explicit | // If the element.atomic memcpy is not lowered into explicit | ||||
// loads/stores later, then it will be lowered into an element-size | // loads/stores later, then it will be lowered into an element-size | ||||
// specific lib call. If the lib call doesn't exist for our store size, then | // specific lib call. If the lib call doesn't exist for our store size, then | ||||
// we shouldn't generate the memcpy. | // we shouldn't generate the memcpy. | ||||
if (StoreSize > TTI->getAtomicMemIntrinsicMaxElementSize()) | if (StoreSize > TTI->getAtomicMemIntrinsicMaxElementSize()) | ||||
return false; | return false; | ||||
NewCall = Builder.CreateElementUnorderedAtomicMemCpy( | NewCall = Builder.CreateElementUnorderedAtomicMemCpy(BasePtr, LoadBasePtr, | ||||
StoreBasePtr, LoadBasePtr, NumBytes, StoreSize); | NumBytes, StoreSize); | ||||
// Propagate alignment info onto the pointer args. Note that unordered | // Propagate alignment info onto the pointer args. Note that unordered | ||||
// atomic loads/stores are *required* by the spec to have an alignment | // atomic loads/stores are *required* by the spec to have an alignment | ||||
// but non-atomic loads/stores may not. | // but non-atomic loads/stores may not. | ||||
Not Done ReplyInline ActionsThis loop looks a bit hacky. Can this be re-written in a better way? mgrang: This loop looks a bit hacky. Can this be re-written in a better way? | |||||
NewCall->addParamAttr(0, Attribute::getWithAlignment(NewCall->getContext(), | NewCall->addParamAttr(0, Attribute::getWithAlignment( | ||||
SI->getAlignment())); | NewCall->getContext(), StoreAlignment)); | ||||
NewCall->addParamAttr(1, Attribute::getWithAlignment(NewCall->getContext(), | NewCall->addParamAttr( | ||||
Not Done ReplyInline ActionsCheck indentation. mgrang: Check indentation. | |||||
LI->getAlignment())); | 1, Attribute::getWithAlignment(NewCall->getContext(), | ||||
StoreLoadInst->getAlignment())); | |||||
} | |||||
} | } | ||||
I think here can be written as if (Memset) not nested if haicheng: I think here can be written as
if (Memset)
else if (MemsetPattern)
else (Memcpy)
not nested if | |||||
NewCall->setDebugLoc(SI->getDebugLoc()); | |||||
NewCall->setDebugLoc(TheStore->getDebugLoc()); | |||||
Not Done ReplyInline Actionsmemset? haicheng: memset? | |||||
if (MemIdiom == LegalStoreKind::Memset || | |||||
MemIdiom == LegalStoreKind::MemsetPattern){ | |||||
DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n" | |||||
<< " from store to: " << *Ev << " at: " << *TheStore | |||||
<< "\n"); | |||||
++NumMemSet; | |||||
} | |||||
Not Done ReplyInline ActionsSame thing here. haicheng: Same thing here. | |||||
Not Done ReplyInline ActionsThis will be checked in the isLegalStore() function itself. DIVYA: This will be checked in the isLegalStore() function itself. | |||||
else if (MemIdiom == LegalStoreKind::Memcpy){ | |||||
Not Done ReplyInline ActionsWhat if some Loads/Stores are atomic and some are not? haicheng: What if some Loads/Stores are atomic and some are not? | |||||
DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n" | DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n" | ||||
This can be changed to if (memset) haicheng: This can be changed to
if (memset) | |||||
<< " from load ptr=" << *LoadEv << " at: " << *LI << "\n" | << " from load ptr=" << *LoadEv << " at: " << *StoreLoadInst | ||||
<< " from store ptr=" << *StoreEv << " at: " << *SI << "\n"); | << "\n" | ||||
<< " from store ptr=" << *Ev << " at: " << *TheStore | |||||
this can be changed to else if (memset_pattern) haicheng: this can be changed to
else if (memset_pattern) | |||||
// Okay, the memcpy has been formed. Zap the original store and anything that | << "\n"); | ||||
// feeds into it. | |||||
deleteDeadInstruction(SI); | |||||
++NumMemCpy; | ++NumMemCpy; | ||||
} | |||||
Can we just return from here? haicheng: Can we just return from here? | |||||
// Okay, the memset or memcpy has been formed. Zap the original store and | |||||
// anything that feeds into it. | |||||
for (auto *I : Stores) | |||||
deleteDeadInstruction(I); | |||||
I think you can remove this one or change it to an assert. At this point, I think we know the stores have the same strides and loads have the same strides as their users (stores), so the strides of the loads should be the same. haicheng: I think you can remove this one or change it to an assert.
At this point, I think we know… | |||||
return true; | return true; | ||||
} | } | ||||
// When compiling for codesize we avoid idiom recognition for a multi-block loop | // When compiling for codesize we avoid idiom recognition for a multi-block loop | ||||
// unless it is a loop_memset idiom or a memset/memcpy idiom in a nested loop. | // unless it is a loop_memset idiom or a memset/memcpy idiom in a nested loop. | ||||
// | // | ||||
bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset, | bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset, | ||||
bool IsLoopMemset) { | bool IsLoopMemset) { | ||||
if (ApplyCodeSizeHeuristics && CurLoop->getNumBlocks() > 1) { | if (ApplyCodeSizeHeuristics && CurLoop->getNumBlocks() > 1) { | ||||
if (!CurLoop->getParentLoop() && (!IsMemset || !IsLoopMemset)) { | if (!CurLoop->getParentLoop() && (!IsMemset || !IsLoopMemset)) { | ||||
DEBUG(dbgs() << " " << CurLoop->getHeader()->getParent()->getName() | DEBUG(dbgs() << " " << CurLoop->getHeader()->getParent()->getName() | ||||
<< " : LIR " << (IsMemset ? "Memset" : "Memcpy") | << " : LIR " << (IsMemset ? "Memset" : "Memcpy") | ||||
<< " avoided: multi-block top-level loop\n"); | << " avoided: multi-block top-level loop\n"); | ||||
return true; | return true; | ||||
} | } | ||||
} | } | ||||
return false; | return false; | ||||
} | } | ||||
Not Done ReplyInline ActionsNewline removed. mgrang: Newline removed. | |||||
bool LoopIdiomRecognize::runOnNoncountableLoop() { | bool LoopIdiomRecognize::runOnNoncountableLoop() { | ||||
return recognizePopcount() || recognizeAndInsertCTLZ(); | return recognizePopcount() || recognizeAndInsertCTLZ(); | ||||
} | } | ||||
/// Check if the given conditional branch is based on the comparison between | /// Check if the given conditional branch is based on the comparison between | ||||
/// a variable and zero, and if the variable is non-zero, the control yields to | /// a variable and zero, and if the variable is non-zero, the control yields to | ||||
I think after creating newCall, setDebugLoc and debug dumping can be refactored too. haicheng: I think after creating newCall, setDebugLoc and debug dumping can be refactored too. | |||||
/// the loop entry. If the branch matches the behavior, the variable involved | /// the loop entry. If the branch matches the behavior, the variable involved | ||||
/// in the comparison is returned. This function will be called to see if the | /// in the comparison is returned. This function will be called to see if the | ||||
/// precondition and postcondition of the loop are in desirable form. | /// precondition and postcondition of the loop are in desirable form. | ||||
static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry) { | static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry) { | ||||
if (!BI || !BI->isConditional()) | if (!BI || !BI->isConditional()) | ||||
return nullptr; | return nullptr; | ||||
ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); | ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); | ||||
▲ Show 20 Lines • Show All 380 Lines • ▼ Show 20 Lines | static CallInst *createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value *Val, | ||||
CI->setDebugLoc(DL); | CI->setDebugLoc(DL); | ||||
return CI; | return CI; | ||||
} | } | ||||
static CallInst *createCTLZIntrinsic(IRBuilder<> &IRBuilder, Value *Val, | static CallInst *createCTLZIntrinsic(IRBuilder<> &IRBuilder, Value *Val, | ||||
const DebugLoc &DL, bool ZeroCheck) { | const DebugLoc &DL, bool ZeroCheck) { | ||||
Value *Ops[] = {Val, ZeroCheck ? IRBuilder.getTrue() : IRBuilder.getFalse()}; | Value *Ops[] = {Val, ZeroCheck ? IRBuilder.getTrue() : IRBuilder.getFalse()}; | ||||
Type *Tys[] = {Val->getType()}; | Type *Tys[] = {Val->getType()}; | ||||
Not Done ReplyInline ActionsNewline removed. mgrang: Newline removed. | |||||
Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent(); | Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent(); | ||||
Value *Func = Intrinsic::getDeclaration(M, Intrinsic::ctlz, Tys); | Value *Func = Intrinsic::getDeclaration(M, Intrinsic::ctlz, Tys); | ||||
CallInst *CI = IRBuilder.CreateCall(Func, Ops); | CallInst *CI = IRBuilder.CreateCall(Func, Ops); | ||||
CI->setDebugLoc(DL); | CI->setDebugLoc(DL); | ||||
return CI; | return CI; | ||||
} | } | ||||
▲ Show 20 Lines • Show All 220 Lines • Show Last 20 Lines |
I think you can pass in an enum which has value ForMemset, ForMemsetPattern, ForMemcpy like LegalStoreKind above or just use LegalStoreKind if possible.