Diff 113115

include/llvm/Transforms/Utils/LoopUtils.h

	Show First 20 Lines • Show All 417 Lines • ▼ Show 20 Lines
	/// dominated by the specified block, and that are in the current loop) in			/// dominated by the specified block, and that are in the current loop) in
	/// reverse depth first order w.r.t the DominatorTree. This allows us to visit			/// reverse depth first order w.r.t the DominatorTree. This allows us to visit
	/// uses before definitions, allowing us to sink a loop body in one pass without			/// uses before definitions, allowing us to sink a loop body in one pass without
	/// iteration. Takes DomTreeNode, AliasAnalysis, LoopInfo, DominatorTree,			/// iteration. Takes DomTreeNode, AliasAnalysis, LoopInfo, DominatorTree,
	/// DataLayout, TargetLibraryInfo, Loop, AliasSet information for all			/// DataLayout, TargetLibraryInfo, Loop, AliasSet information for all
	/// instructions of the loop and loop safety information as			/// instructions of the loop and loop safety information as
	/// arguments. Diagnostics is emitted via \p ORE. It returns changed status.			/// arguments. Diagnostics is emitted via \p ORE. It returns changed status.
	bool sinkRegion(DomTreeNode , AliasAnalysis , LoopInfo , DominatorTree ,			bool sinkRegion(DomTreeNode , AliasAnalysis , LoopInfo , DominatorTree ,
	TargetLibraryInfo , Loop , AliasSetTracker *,			TargetLibraryInfo , TargetTransformInfo , Loop *,
	LoopSafetyInfo , OptimizationRemarkEmitter ORE);			AliasSetTracker , LoopSafetyInfo ,
				OptimizationRemarkEmitter *ORE);

	/// \brief Walk the specified region of the CFG (defined by all blocks			/// \brief Walk the specified region of the CFG (defined by all blocks
	/// dominated by the specified block, and that are in the current loop) in depth			/// dominated by the specified block, and that are in the current loop) in depth
	/// first order w.r.t the DominatorTree. This allows us to visit definitions			/// first order w.r.t the DominatorTree. This allows us to visit definitions
	/// before uses, allowing us to hoist a loop body in one pass without iteration.			/// before uses, allowing us to hoist a loop body in one pass without iteration.
	/// Takes DomTreeNode, AliasAnalysis, LoopInfo, DominatorTree, DataLayout,			/// Takes DomTreeNode, AliasAnalysis, LoopInfo, DominatorTree, DataLayout,
	/// TargetLibraryInfo, Loop, AliasSet information for all instructions of the			/// TargetLibraryInfo, Loop, AliasSet information for all instructions of the
	/// loop and loop safety information as arguments. Diagnostics is emitted via \p			/// loop and loop safety information as arguments. Diagnostics is emitted via \p
	▲ Show 20 Lines • Show All 106 Lines • Show Last 20 Lines

lib/Transforms/Scalar/LICM.cpp

Show First 20 Lines • Show All 83 Lines • ▼ Show 20 Lines

static cl::opt<uint32_t> MaxNumUsesTraversed(		static cl::opt<uint32_t> MaxNumUsesTraversed(
"licm-max-num-uses-traversed", cl::Hidden, cl::init(8),		"licm-max-num-uses-traversed", cl::Hidden, cl::init(8),
cl::desc("Max num uses visited for identifying load "		cl::desc("Max num uses visited for identifying load "
"invariance in loop using invariant start (default = 8)"));		"invariance in loop using invariant start (default = 8)"));

static bool inSubLoop(BasicBlock BB, Loop CurLoop, LoopInfo *LI);		static bool inSubLoop(BasicBlock BB, Loop CurLoop, LoopInfo *LI);
static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop,		static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop,
const LoopSafetyInfo *SafetyInfo);		const LoopSafetyInfo *SafetyInfo,
		TargetTransformInfo *TTI,
		bool &ContainFoldableUsersInLoop);
static bool hoist(Instruction &I, const DominatorTree DT, const Loop CurLoop,		static bool hoist(Instruction &I, const DominatorTree DT, const Loop CurLoop,
const LoopSafetyInfo *SafetyInfo,		const LoopSafetyInfo *SafetyInfo,
OptimizationRemarkEmitter *ORE);		OptimizationRemarkEmitter *ORE);
static bool sink(Instruction &I, const LoopInfo LI, const DominatorTree DT,		static bool sink(Instruction &I, const LoopInfo LI, const DominatorTree DT,
const Loop CurLoop, AliasSetTracker CurAST,		const Loop CurLoop, AliasSetTracker CurAST,
const LoopSafetyInfo *SafetyInfo,		const LoopSafetyInfo *SafetyInfo,
OptimizationRemarkEmitter *ORE);		OptimizationRemarkEmitter *ORE,
		bool ContainFoldableUsersInLoop);
static bool isSafeToExecuteUnconditionally(Instruction &Inst,		static bool isSafeToExecuteUnconditionally(Instruction &Inst,
const DominatorTree *DT,		const DominatorTree *DT,
const Loop *CurLoop,		const Loop *CurLoop,
const LoopSafetyInfo *SafetyInfo,		const LoopSafetyInfo *SafetyInfo,
OptimizationRemarkEmitter *ORE,		OptimizationRemarkEmitter *ORE,
const Instruction *CtxI = nullptr);		const Instruction *CtxI = nullptr);
static bool pointerInvalidatedByLoop(Value *V, uint64_t Size,		static bool pointerInvalidatedByLoop(Value *V, uint64_t Size,
const AAMDNodes &AAInfo,		const AAMDNodes &AAInfo,
AliasSetTracker *CurAST);		AliasSetTracker *CurAST);
static Instruction *		static Instruction *
CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,		CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,
const LoopInfo *LI,		const LoopInfo *LI,
const LoopSafetyInfo *SafetyInfo);		const LoopSafetyInfo *SafetyInfo);

namespace {		namespace {
struct LoopInvariantCodeMotion {		struct LoopInvariantCodeMotion {
bool runOnLoop(Loop L, AliasAnalysis AA, LoopInfo LI, DominatorTree DT,		bool runOnLoop(Loop L, AliasAnalysis AA, LoopInfo LI, DominatorTree DT,
TargetLibraryInfo TLI, ScalarEvolution SE,		TargetLibraryInfo TLI, TargetTransformInfo TTI,
OptimizationRemarkEmitter *ORE, bool DeleteAST);		ScalarEvolution SE, OptimizationRemarkEmitter ORE,
		bool DeleteAST);

DenseMap<Loop , AliasSetTracker > &getLoopToAliasSetMap() {		DenseMap<Loop , AliasSetTracker > &getLoopToAliasSetMap() {
return LoopToAliasSetMap;		return LoopToAliasSetMap;
}		}

private:		private:
DenseMap<Loop , AliasSetTracker > LoopToAliasSetMap;		DenseMap<Loop , AliasSetTracker > LoopToAliasSetMap;

Show All 23 Lines	bool runOnLoop(Loop *L, LPPassManager &LPM) override {
// pass. Function analyses need to be preserved across loop transformations		// pass. Function analyses need to be preserved across loop transformations
// but ORE cannot be preserved (see comment before the pass definition).		// but ORE cannot be preserved (see comment before the pass definition).
OptimizationRemarkEmitter ORE(L->getHeader()->getParent());		OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
return LICM.runOnLoop(L,		return LICM.runOnLoop(L,
&getAnalysis<AAResultsWrapperPass>().getAAResults(),		&getAnalysis<AAResultsWrapperPass>().getAAResults(),
&getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),		&getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
&getAnalysis<DominatorTreeWrapperPass>().getDomTree(),		&getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),		&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
		&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
		*L->getHeader()->getParent()),
SE ? &SE->getSE() : nullptr, &ORE, false);		SE ? &SE->getSE() : nullptr, &ORE, false);
}		}

/// This transformation requires natural loop information & requires that		/// This transformation requires natural loop information & requires that
/// loop preheaders be inserted into the CFG...		/// loop preheaders be inserted into the CFG...
///		///
void getAnalysisUsage(AnalysisUsage &AU) const override {		void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();		AU.setPreservesCFG();
AU.addRequired<TargetLibraryInfoWrapperPass>();		AU.addRequired<TargetLibraryInfoWrapperPass>();
		AU.addRequired<TargetTransformInfoWrapperPass>();
getLoopAnalysisUsage(AU);		getLoopAnalysisUsage(AU);
}		}

using llvm::Pass::doFinalization;		using llvm::Pass::doFinalization;

bool doFinalization() override {		bool doFinalization() override {
assert(LICM.getLoopToAliasSetMap().empty() &&		assert(LICM.getLoopToAliasSetMap().empty() &&
"Didn't free loop alias sets");		"Didn't free loop alias sets");
Show All 24 Lines	PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,

auto ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(F);		auto ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(F);
// FIXME: This should probably be optional rather than required.		// FIXME: This should probably be optional rather than required.
if (!ORE)		if (!ORE)
report_fatal_error("LICM: OptimizationRemarkEmitterAnalysis not "		report_fatal_error("LICM: OptimizationRemarkEmitterAnalysis not "
"cached at a higher level");		"cached at a higher level");

LoopInvariantCodeMotion LICM;		LoopInvariantCodeMotion LICM;
if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.SE, ORE, true))		if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.TLI, &AR.TTI, &AR.SE, ORE,
		true))
return PreservedAnalyses::all();		return PreservedAnalyses::all();

auto PA = getLoopPassPreservedAnalyses();		auto PA = getLoopPassPreservedAnalyses();
PA.preserveSet<CFGAnalyses>();		PA.preserveSet<CFGAnalyses>();
return PA;		return PA;
}		}

char LegacyLICMPass::ID = 0;		char LegacyLICMPass::ID = 0;
INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion",		INITIALIZE_PASS_BEGIN(LegacyLICMPass, "licm", "Loop Invariant Code Motion",
false, false)		false, false)
INITIALIZE_PASS_DEPENDENCY(LoopPass)		INITIALIZE_PASS_DEPENDENCY(LoopPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)		INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
		INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,		INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
false)		false)

Pass *llvm::createLICMPass() { return new LegacyLICMPass(); }		Pass *llvm::createLICMPass() { return new LegacyLICMPass(); }

/// Hoist expressions out of the specified loop. Note, alias info for inner		/// Hoist expressions out of the specified loop. Note, alias info for inner
/// loop is not preserved so it is not a good idea to run LICM multiple		/// loop is not preserved so it is not a good idea to run LICM multiple
/// times on one loop.		/// times on one loop.
/// We should delete AST for inner loops in the new pass manager to avoid		/// We should delete AST for inner loops in the new pass manager to avoid
/// memory leak.		/// memory leak.
///		///
bool LoopInvariantCodeMotion::runOnLoop(Loop L, AliasAnalysis AA,		bool LoopInvariantCodeMotion::runOnLoop(
LoopInfo LI, DominatorTree DT,		Loop L, AliasAnalysis AA, LoopInfo LI, DominatorTree DT,
TargetLibraryInfo *TLI,		TargetLibraryInfo TLI, TargetTransformInfo TTI, ScalarEvolution *SE,
ScalarEvolution *SE,		OptimizationRemarkEmitter *ORE, bool DeleteAST) {
OptimizationRemarkEmitter *ORE,
bool DeleteAST) {
bool Changed = false;		bool Changed = false;

assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");		assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");

AliasSetTracker *CurAST = collectAliasInfoForLoop(L, LI, AA);		AliasSetTracker *CurAST = collectAliasInfoForLoop(L, LI, AA);

// Get the preheader block to move instructions into...		// Get the preheader block to move instructions into...
BasicBlock *Preheader = L->getLoopPreheader();		BasicBlock *Preheader = L->getLoopPreheader();

// Compute loop safety information.		// Compute loop safety information.
LoopSafetyInfo SafetyInfo;		LoopSafetyInfo SafetyInfo;
computeLoopSafetyInfo(&SafetyInfo, L);		computeLoopSafetyInfo(&SafetyInfo, L);

// We want to visit all of the instructions in this loop... that are not parts		// We want to visit all of the instructions in this loop... that are not parts
// of our subloops (they have already had their invariants hoisted out of		// of our subloops (they have already had their invariants hoisted out of
// their loop, into this loop, so there is no need to process the BODIES of		// their loop, into this loop, so there is no need to process the BODIES of
// the subloops).		// the subloops).
//		//
// Traverse the body of the loop in depth first order on the dominator tree so		// Traverse the body of the loop in depth first order on the dominator tree so
// that we are guaranteed to see definitions before we see uses. This allows		// that we are guaranteed to see definitions before we see uses. This allows
// us to sink instructions in one pass, without iteration. After sinking		// us to sink instructions in one pass, without iteration. After sinking
// instructions, we perform another pass to hoist them out of the loop.		// instructions, we perform another pass to hoist them out of the loop.
//		//
if (L->hasDedicatedExits())		if (L->hasDedicatedExits())
Changed \|= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,		Changed \|= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
CurAST, &SafetyInfo, ORE);		CurAST, &SafetyInfo, ORE);
if (Preheader)		if (Preheader)
Changed \|= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,		Changed \|= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, L,
CurAST, &SafetyInfo, ORE);		CurAST, &SafetyInfo, ORE);

// Now that all loop invariants have been removed from the loop, promote any		// Now that all loop invariants have been removed from the loop, promote any
// memory references to scalars that we can.		// memory references to scalars that we can.
// Don't sink stores from loops without dedicated block exits. Exits		// Don't sink stores from loops without dedicated block exits. Exits
▲ Show 20 Lines • Show All 83 Lines • ▼ Show 20 Lines
}		}

/// Walk the specified region of the CFG (defined by all blocks dominated by		/// Walk the specified region of the CFG (defined by all blocks dominated by
/// the specified block, and that are in the current loop) in reverse depth		/// the specified block, and that are in the current loop) in reverse depth
/// first order w.r.t the DominatorTree. This allows us to visit uses before		/// first order w.r.t the DominatorTree. This allows us to visit uses before
/// definitions, allowing us to sink a loop body in one pass without iteration.		/// definitions, allowing us to sink a loop body in one pass without iteration.
///		///
bool llvm::sinkRegion(DomTreeNode N, AliasAnalysis AA, LoopInfo *LI,		bool llvm::sinkRegion(DomTreeNode N, AliasAnalysis AA, LoopInfo *LI,
DominatorTree DT, TargetLibraryInfo TLI, Loop *CurLoop,		DominatorTree DT, TargetLibraryInfo TLI,
		TargetTransformInfo TTI, Loop CurLoop,
AliasSetTracker CurAST, LoopSafetyInfo SafetyInfo,		AliasSetTracker CurAST, LoopSafetyInfo SafetyInfo,
OptimizationRemarkEmitter *ORE) {		OptimizationRemarkEmitter *ORE) {

// Verify inputs.		// Verify inputs.
assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&		assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr &&		CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr &&
"Unexpected input to sinkRegion");		"Unexpected input to sinkRegion");

Show All 24 Lines	for (BasicBlock::iterator II = BB->end(); II != BB->begin();) {
continue;		continue;
}		}

// Check to see if we can sink this instruction to the exit blocks		// Check to see if we can sink this instruction to the exit blocks
// of the loop. We can do this if the all users of the instruction are		// of the loop. We can do this if the all users of the instruction are
// outside of the loop. In this case, it doesn't even matter if the		// outside of the loop. In this case, it doesn't even matter if the
// operands of the instruction are loop invariant.		// operands of the instruction are loop invariant.
//		//
if (isNotUsedInLoop(I, CurLoop, SafetyInfo) &&		bool ContainFoldableUsersInLoop = false;
		if (isNotUsedInLoop(I, CurLoop, SafetyInfo, TTI,
		ContainFoldableUsersInLoop) &&
canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE)) {		canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo, ORE)) {
		if (!ContainFoldableUsersInLoop)
++II;		++II;
Changed \|= sink(I, LI, DT, CurLoop, CurAST, SafetyInfo, ORE);		Changed \|= sink(I, LI, DT, CurLoop, CurAST, SafetyInfo, ORE,
		ContainFoldableUsersInLoop);
}		}
}		}
}		}
return Changed;		return Changed;
}		}

/// Walk the specified region of the CFG (defined by all blocks dominated by		/// Walk the specified region of the CFG (defined by all blocks dominated by
/// the specified block, and that are in the current loop) in depth first		/// the specified block, and that are in the current loop) in depth first
▲ Show 20 Lines • Show All 275 Lines • ▼ Show 20 Lines
static bool isTriviallyReplacablePHI(const PHINode &PN, const Instruction &I) {		static bool isTriviallyReplacablePHI(const PHINode &PN, const Instruction &I) {
for (const Value *IncValue : PN.incoming_values())		for (const Value *IncValue : PN.incoming_values())
if (IncValue != &I)		if (IncValue != &I)
return false;		return false;

return true;		return true;
}		}

		static bool isFoldableInLoop(const Instruction I, const Instruction UserI,
		const TargetTransformInfo *TTI) {
		/// FIXME: for now we only check if the addressing mode defined by a GEP is
		hfinkelUnsubmitted Done Reply Inline Actions This isn't a very good FIXME because it doesn't explain what you might fix about it. Are there other things for which we might check? hfinkel: This isn't a very good FIXME because it doesn't explain what you might fix about it. Are there…
		/// directly foldable into a load.
		const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I);
		if (GEP && isa<LoadInst>(UserI) && (I->getParent() == UserI->getParent())) {
		bmakamUnsubmitted Not Done Reply Inline Actions Do we need to check I->getParent() == UserI->getParent()? We already check if CurLoop->contains(UserI) right? bmakam: Do we need to check I->getParent() == UserI->getParent()? We already check if CurLoop->contains…
		junbumlAuthorUnsubmitted Not Done Reply Inline Actions Even in the same loop, if they are in different blocks, the current ISel may not fold the GEP into the load. junbuml: Even in the same loop, if they are in different blocks, the current ISel may not fold the GEP…
		SmallVector<const Value *, 4> Indices;
		hfinkelUnsubmitted Not Done Reply Inline Actions This can just be: return TTI->getUserCost(GEP) == TargetTransformInfo::TCC_Free; (this code here to call getGEPCost seems to duplicate the implementation logic of getUserCost) On that note, you might not even have to restrict this to GEPs used by Loads, but rather, you could allow all zero-cost instructions. hfinkel: This can just be: return TTI->getUserCost(GEP) == TargetTransformInfo::TCC_Free; (this code…
		hfinkelUnsubmitted Not Done Reply Inline Actions (Also, I think that using all 'Free' instructions has the benefit of being correlated with the unrolling cost of the loop - these are the instructions that won't increase the unrolling cost of the loop). hfinkel: (Also, I think that using all 'Free' instructions has the benefit of being correlated with the…
		junbumlAuthorUnsubmitted Not Done Reply Inline Actions I don't think Free from getUserCost guarantee that the instruction is folded away always. So, I specifically check for a GEP which could be a legal addressing mode and it's used in a load / store in the same block, expecting the isel fold them into its users. junbuml: I don't think Free from getUserCost guarantee that the instruction is folded away always. So…
		hfinkelUnsubmitted Not Done Reply Inline Actions I don't think Free from getUserCost guarantee that the instruction is folded away always. Yes, it should be. Free means free. If not, then there's something wrong with the cost model that we should fix. It might be possible to allow this for all other zero-cost instructions. However, I'm not perfectly sure if this is good or needed for all other free instructions. For example, I'm not clear if sinking a free trunc is needed? However, in GEP case, by sinking a GEP, we can decouple the users of the GEP: one in loop and one in outside of the loop so that the one in loop will be folded in isel if they are in the same block. I don't understand how the advantages, or disadvantages, of doing this for a free truncate are different from a free GEP. In both cases, we decouple things inside the loop from outside the loop allowing the folding to take place later. I think we need extensive tests before opening up this for all other free instruction,s and isolating this for GEP as a first step would make review process easy. Then please run tests (I assume you ran tests for this, as proposed, too). This makes the review harder. LICM is part of our canonicalization process, and we need to have an understandable canonical form. The more that this turns into a patchwork of heuristics, the harder it is to figure out what our canonical form is. "We always decouple free instructions" is easy to explain. We sometimes decouple GEPs if they happen to be used in certain ways is harder. hfinkel: > I don't think Free from getUserCost guarantee that the instruction is folded away always.
		junbumlAuthorUnsubmitted Not Done Reply Inline Actions Use getUercost() directly, instead of getGEPCost(). It might be possible to allow this for all other zero-cost instructions. However, I'm not perfectly sure if this is good or needed for all other free instructions. For example, I'm not clear if sinking a free trunc is needed? However, in GEP case, by sinking a GEP, we can decouple the users of the GEP: one in loop and one in outside of the loop so that the one in loop will be folded in isel if they are in the same block. I think we need extensive tests before opening up this for all other free instruction,s and isolating this for GEP as a first step would make review process easy. junbuml: Use getUercost() directly, instead of getGEPCost(). It might be possible to allow this for all…
		for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I)
		Indices.push_back(*I);
		return TTI->getGEPCost(GEP->getSourceElementType(),
		GEP->getPointerOperand(),
		junbumlAuthorUnsubmitted Not Done Reply Inline Actions As r314923 was reverted, we cannot simply use getUserCost for all instructions because it optimistically assume that a GEP will fold into addressing mode regardless of its users. I don't think we can rely on this optimistic assumption in here. To handle GEPs properly in this change, we can check GEP's users here directly, or we can add a function in TTI to see if an instruction is really foldable. junbuml: As r314923 was reverted, we cannot simply use getUserCost for all instructions because it…
		Indices) == TargetTransformInfo::TCC_Free;
		}
		return false;
		}

/// Return true if the only users of this instruction are outside of		/// Return true if the only users of this instruction are outside of
/// the loop. If this is true, we can sink the instruction to the exit		/// the loop. If this is true, we can sink the instruction to the exit
/// blocks of the loop.		/// blocks of the loop.
///		///
		/// We also return true if the instruction is foldable in the loop at isel time
		/// (e.g., a GEP can be folded into a load as an addressing mode in the loop).
static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop,		static bool isNotUsedInLoop(const Instruction &I, const Loop *CurLoop,
const LoopSafetyInfo *SafetyInfo) {		const LoopSafetyInfo *SafetyInfo,
		TargetTransformInfo *TTI,
		bool &ContainFoldableUsersInLoop) {
const auto &BlockColors = SafetyInfo->BlockColors;		const auto &BlockColors = SafetyInfo->BlockColors;
for (const User *U : I.users()) {		for (const User *U : I.users()) {
const Instruction *UI = cast<Instruction>(U);		const Instruction *UI = cast<Instruction>(U);
if (const PHINode *PN = dyn_cast<PHINode>(UI)) {		if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
const BasicBlock *BB = PN->getParent();		const BasicBlock *BB = PN->getParent();
// We cannot sink uses in catchswitches.		// We cannot sink uses in catchswitches.
if (isa<CatchSwitchInst>(BB->getTerminator()))		if (isa<CatchSwitchInst>(BB->getTerminator()))
return false;		return false;
Show All 21 Lines	if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)		for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (PN->getIncomingValue(i) == &I)		if (PN->getIncomingValue(i) == &I)
if (CurLoop->contains(PN->getIncomingBlock(i)))		if (CurLoop->contains(PN->getIncomingBlock(i)))
return false;		return false;

continue;		continue;
}		}

if (CurLoop->contains(UI))		if (CurLoop->contains(UI)) {
		// Check if the instruction is foldable with its user in the loop.
		if (!ContainFoldableUsersInLoop && isFoldableInLoop(&I, UI, TTI)) {
		hfinkelUnsubmitted Done Reply Inline Actions The test for `!ContainFoldableUsersInLoop` limits us to looking for only one foldable user within the loop. Why? hfinkel: The test for `!ContainFoldableUsersInLoop` limits us to looking for only one foldable user…
		ContainFoldableUsersInLoop = true;
		continue;
		}
return false;		return false;
}		}
		}
return true;		return true;
}		}

static Instruction *		static Instruction *
CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,		CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,
const LoopInfo *LI,		const LoopInfo *LI,
const LoopSafetyInfo *SafetyInfo) {		const LoopSafetyInfo *SafetyInfo) {
Instruction *New;		Instruction *New;
▲ Show 20 Lines • Show All 56 Lines • ▼ Show 20 Lines
/// When an instruction is found to only be used outside of the loop, this		/// When an instruction is found to only be used outside of the loop, this
/// function moves it to the exit blocks and patches up SSA form as needed.		/// function moves it to the exit blocks and patches up SSA form as needed.
/// This method is guaranteed to remove the original instruction from its		/// This method is guaranteed to remove the original instruction from its
/// position, and may either delete it or move it to outside of the loop.		/// position, and may either delete it or move it to outside of the loop.
///		///
static bool sink(Instruction &I, const LoopInfo LI, const DominatorTree DT,		static bool sink(Instruction &I, const LoopInfo LI, const DominatorTree DT,
const Loop CurLoop, AliasSetTracker CurAST,		const Loop CurLoop, AliasSetTracker CurAST,
const LoopSafetyInfo *SafetyInfo,		const LoopSafetyInfo *SafetyInfo,
OptimizationRemarkEmitter *ORE) {		OptimizationRemarkEmitter *ORE,
		bool ContainFoldableUsersInLoop) {
DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");		DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
ORE->emit(OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)		ORE->emit(OptimizationRemark(DEBUG_TYPE, "InstSunk", &I)
<< "sinking " << ore::NV("Inst", &I));		<< "sinking " << ore::NV("Inst", &I));
bool Changed = false;		bool Changed = false;
if (isa<LoadInst>(I))		if (isa<LoadInst>(I))
++NumMovedLoads;		++NumMovedLoads;
else if (isa<CallInst>(I))		else if (isa<CallInst>(I))
++NumMovedCalls;		++NumMovedCalls;
++NumSunk;		++NumSunk;
Changed = true;		Changed = true;

#ifndef NDEBUG		#ifndef NDEBUG
SmallVector<BasicBlock *, 32> ExitBlocks;		SmallVector<BasicBlock *, 32> ExitBlocks;
CurLoop->getUniqueExitBlocks(ExitBlocks);		CurLoop->getUniqueExitBlocks(ExitBlocks);
SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),		SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(),
ExitBlocks.end());		ExitBlocks.end());
#endif		#endif

// Clones of this instruction. Don't create more than one per exit block!		// Clones of this instruction. Don't create more than one per exit block!
SmallDenseMap<BasicBlock , Instruction , 32> SunkCopies;		SmallDenseMap<BasicBlock , Instruction , 32> SunkCopies;
		SmallPtrSet<Instruction *, 2> UsersToBeRemoved;

// If this instruction is only used outside of the loop, then all users are		// If this instruction is only used outside of the loop, then all users are
// PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of		// PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of
// the instruction.		// the instruction.
while (!I.use_empty()) {		for (Value::user_iterator UI = I.user_begin(), UE = I.user_end(); UI != UE;) {
Value::user_iterator UI = I.user_begin();
auto User = cast<Instruction>(UI);		auto User = cast<Instruction>(UI);
		Use &U = UI.getUse();
		++UI;

		if (CurLoop->contains(User) \|\| UsersToBeRemoved.count(User))
		continue;

if (!DT->isReachableFromEntry(User->getParent())) {		if (!DT->isReachableFromEntry(User->getParent())) {
User->replaceUsesOfWith(&I, UndefValue::get(I.getType()));		User->replaceUsesOfWith(&I, UndefValue::get(I.getType()));
continue;		continue;
}		}
// The user must be a PHI node.		// The user must be a PHI node.
PHINode *PN = cast<PHINode>(User);		PHINode *PN = cast<PHINode>(User);

// Surprisingly, instructions can be used outside of loops without any		// Surprisingly, instructions can be used outside of loops without any
// exits. This can only happen in PHI nodes if the incoming block is		// exits. This can only happen in PHI nodes if the incoming block is
// unreachable.		// unreachable.
Use &U = UI.getUse();
BasicBlock *BB = PN->getIncomingBlock(U);		BasicBlock *BB = PN->getIncomingBlock(U);
if (!DT->isReachableFromEntry(BB)) {		if (!DT->isReachableFromEntry(BB)) {
U = UndefValue::get(I.getType());		U = UndefValue::get(I.getType());
continue;		continue;
}		}

BasicBlock *ExitBlock = PN->getParent();		BasicBlock *ExitBlock = PN->getParent();
assert(ExitBlockSet.count(ExitBlock) &&		assert(ExitBlockSet.count(ExitBlock) &&
"The LCSSA PHI is not in an exit block!");		"The LCSSA PHI is not in an exit block!");

Instruction *New;		Instruction *New;
auto It = SunkCopies.find(ExitBlock);		auto It = SunkCopies.find(ExitBlock);
if (It != SunkCopies.end())		if (It != SunkCopies.end())
New = It->second;		New = It->second;
else		else
New = SunkCopies[ExitBlock] =		New = SunkCopies[ExitBlock] =
CloneInstructionInExitBlock(I, ExitBlock, PN, LI, SafetyInfo);		CloneInstructionInExitBlock(I, ExitBlock, PN, LI, SafetyInfo);

		UsersToBeRemoved.insert(PN);
PN->replaceAllUsesWith(New);		PN->replaceAllUsesWith(New);
PN->eraseFromParent();
}		}

		for (auto *User : UsersToBeRemoved)
		User->eraseFromParent();

		if (!ContainFoldableUsersInLoop) {
CurAST->deleteValue(&I);		CurAST->deleteValue(&I);
I.eraseFromParent();		I.eraseFromParent();
		}

return Changed;		return Changed;
}		}

/// When an instruction is found to only use loop invariant operands that		/// When an instruction is found to only use loop invariant operands that
/// is safe to hoist, this instruction is called to do the dirty work.		/// is safe to hoist, this instruction is called to do the dirty work.
///		///
static bool hoist(Instruction &I, const DominatorTree DT, const Loop CurLoop,		static bool hoist(Instruction &I, const DominatorTree DT, const Loop CurLoop,
const LoopSafetyInfo *SafetyInfo,		const LoopSafetyInfo *SafetyInfo,
▲ Show 20 Lines • Show All 529 Lines • Show Last 20 Lines

test/Transforms/LICM/sink-foldable.ll

This file was added.

				; RUN: opt < %s -licm -S \| FileCheck %s
				target triple = "aarch64--linux-gnueabi"

				; CHECK-LABEL:@test1
				; CHECK-LABEL:loopexit1:
				; CHECK: %[[PHI:.+]] = phi i8** [ %arrayidx0, %if.end ]
				; CHECK: getelementptr inbounds i8, i8* %[[PHI]], i64 1

				define i8 @test1(i32 %j, i8 readonly %P, i8* readnone %Q) {
				entry:
				%cmp0 = icmp slt i32 0, %j
				br i1 %cmp0, label %for.body.lr.ph, label %return

				for.body.lr.ph:
				br label %for.body

				for.body:
				%P.addr = phi i8** [ %P, %for.body.lr.ph ], [ %arrayidx0, %if.end ]
				%i0 = phi i32 [ 0, %for.body.lr.ph ], [ %i.add, %if.end]

				%i0.ext = sext i32 %i0 to i64
				%arrayidx0 = getelementptr inbounds i8, i8* %P.addr, i64 %i0.ext
				%l0 = load i8, i8* %arrayidx0, align 8
				%cmp1 = icmp ugt i8* %l0, %Q
				br i1 %cmp1, label %loopexit0, label %if.end

				if.end: ; preds = %for.body
				%arrayidx1 = getelementptr inbounds i8, i8* %arrayidx0, i64 1
				%l1 = load i8, i8* %arrayidx1, align 8
				%cmp4 = icmp ugt i8* %l1, %Q
				%i.add = add nsw i32 %i0, 2
				br i1 %cmp4, label %loopexit1, label %for.body

				loopexit0:
				%p1 = phi i8** [%arrayidx0, %for.body]
				br label %return

				loopexit1:
				%p2 = phi i8** [%arrayidx1, %if.end]
				br label %return

				return:
				%retval.0 = phi i8** [ %p1, %loopexit0 ], [%p2, %loopexit1], [ null, %entry ]
				ret i8** %retval.0
				}

				; CHECK-LABEL: @test2
				; CHECK-LABEL: loopexit2:
				; CHECK: %[[PHI:.]] = phi i8* [ %add.ptr, %if.end ]
				; CHECK: getelementptr inbounds i8, i8* %[[PHI]]

				define i8 @test2(i32 %j, i8 readonly %P, i8* readnone %Q) {

				entry:
				br label %for.body

				for.cond:
				%i.addr.0 = phi i32 [ %add, %if.end ]
				%P.addr.0 = phi i8** [ %add.ptr, %if.end ]
				%cmp = icmp slt i32 %i.addr.0, %j
				br i1 %cmp, label %for.body, label %loopexit0

				for.body:
				%P.addr = phi i8** [ %P, %entry ], [ %P.addr.0, %for.cond ]
				%i.addr = phi i32 [ 0, %entry ], [ %i.addr.0, %for.cond ]

				%idx.ext = sext i32 %i.addr to i64
				%add.ptr = getelementptr inbounds i8, i8* %P.addr, i64 %idx.ext
				%l0 = load i8, i8* %add.ptr, align 8

				%cmp1 = icmp ugt i8* %l0, %Q
				br i1 %cmp1, label %loopexit1, label %if.end

				if.end:
				%add.i = add i32 %i.addr, 1
				%idx2.ext = sext i32 %add.i to i64
				%arrayidx2 = getelementptr inbounds i8, i8* %add.ptr, i64 %idx2.ext
				%l1 = load i8, i8* %arrayidx2, align 8
				%cmp2 = icmp ugt i8* %l1, %Q
				%add = add nsw i32 %add.i, 1
				br i1 %cmp2, label %loopexit2, label %for.cond

				loopexit0:
				%p0 = phi i8** [ null, %for.cond ]
				br label %return

				loopexit1:
				%p1 = phi i8** [ %add.ptr, %for.body ]
				br label %return

				loopexit2:
				%p2 = phi i8** [ %arrayidx2, %if.end ]
				br label %return

				return:
				%retval.0 = phi i8** [ %p1, %loopexit1 ], [ %p2, %loopexit2 ], [ %p0, %loopexit0 ]
				ret i8** %retval.0
				}

This is an archive of the discontinued LLVM Phabricator instance.

[LICM] Allow sinking when foldable in loop
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 113115

include/llvm/Transforms/Utils/LoopUtils.h

lib/Transforms/Scalar/LICM.cpp

test/Transforms/LICM/sink-foldable.ll

This is an archive of the discontinued LLVM Phabricator instance.

[LICM] Allow sinking when foldable in loopClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 113115

include/llvm/Transforms/Utils/LoopUtils.h

lib/Transforms/Scalar/LICM.cpp

test/Transforms/LICM/sink-foldable.ll

[LICM] Allow sinking when foldable in loop
ClosedPublic