Diff 170944

llvm/lib/Transforms/IPO/HotColdSplitting.cpp

Show First 20 Lines • Show All 51 Lines • ▼ Show 20 Lines
#include "llvm/Transforms/Utils/Local.h"		#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"		#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Transforms/Utils/ValueMapper.h"		#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>		#include <algorithm>
#include <cassert>		#include <cassert>

#define DEBUG_TYPE "hotcoldsplit"		#define DEBUG_TYPE "hotcoldsplit"

STATISTIC(NumColdSESEFound,		STATISTIC(NumColdRegionsFound, "Number of cold regions found.");
"Number of cold single entry single exit (SESE) regions found.");		STATISTIC(NumColdRegionsOutlined, "Number of cold regions outlined.");
STATISTIC(NumColdSESEOutlined,
"Number of cold single entry single exit (SESE) regions outlined.");

using namespace llvm;		using namespace llvm;

static cl::opt<bool> EnableStaticAnalyis("hot-cold-static-analysis",		static cl::opt<bool> EnableStaticAnalyis("hot-cold-static-analysis",
cl::init(true), cl::Hidden);		cl::init(true), cl::Hidden);


namespace {		namespace {

struct PostDomTree : PostDomTreeBase<BasicBlock> {		struct PostDomTree : PostDomTreeBase<BasicBlock> {
PostDomTree(Function &F) { recalculate(F); }		PostDomTree(Function &F) { recalculate(F); }
};		};

typedef DenseSet<const BasicBlock *> DenseSetBB;		/// A sequence of basic blocks.
typedef DenseMap<const BasicBlock *, uint64_t> DenseMapBBInt;		///
		/// A 0-sized SmallVector is slightly cheaper to move than a std::vector.
// From: https://reviews.llvm.org/D22558		using BlockSequence = SmallVector<BasicBlock *, 0>;
// Exit is not part of the region.
static bool isSingleEntrySingleExit(BasicBlock Entry, const BasicBlock Exit,
DominatorTree DT, PostDomTree PDT,
SmallVectorImpl<BasicBlock *> &Region) {
if (!DT->dominates(Entry, Exit))
return false;

if (!PDT->dominates(Exit, Entry))
return false;

for (auto I = df_begin(Entry), E = df_end(Entry); I != E;) {
if (*I == Exit) {
I.skipChildren();
continue;
}
if (!DT->dominates(Entry, *I))
return false;
Region.push_back(*I);
++I;
}
return true;
}

// Same as blockEndsInUnreachable in CodeGen/BranchFolding.cpp. Do not modify		// Same as blockEndsInUnreachable in CodeGen/BranchFolding.cpp. Do not modify
// this function unless you modify the MBB version as well.		// this function unless you modify the MBB version as well.
//		//
/// A no successor, non-return block probably ends in unreachable and is cold.		/// A no successor, non-return block probably ends in unreachable and is cold.
/// Also consider a block that ends in an indirect branch to be a return block,		/// Also consider a block that ends in an indirect branch to be a return block,
/// since many targets use plain indirect branches to return.		/// since many targets use plain indirect branches to return.
bool blockEndsInUnreachable(const BasicBlock &BB) {		bool blockEndsInUnreachable(const BasicBlock &BB) {
Show All 33 Lines	if (const CallInst *CI = dyn_cast<CallInst>(&I)) {

// Assume that inline assembly is hot code.		// Assume that inline assembly is hot code.
if (isa<InlineAsm>(CI->getCalledValue()))		if (isa<InlineAsm>(CI->getCalledValue()))
return false;		return false;
}		}
return false;		return false;
}		}

static bool returnsOrHasSideEffects(const BasicBlock &BB) {		/// Check whether it's safe to outline \p BB.
const Instruction *I = BB.getTerminator();		static bool mayExtractBlock(const BasicBlock &BB) {
if (isa<ReturnInst>(I) \|\| isa<IndirectBrInst>(I) \|\| isa<InvokeInst>(I))		return !BB.hasAddressTaken();
return true;		}

for (const Instruction &I : BB)		/// Identify the maximal region of cold blocks which includes \p SinkBB.
if (const CallInst *CI = dyn_cast<CallInst>(&I)) {		///
if (CI->hasFnAttr(Attribute::NoReturn))		/// Include all blocks post-dominated by \p SinkBB, \p SinkBB itself, and all
return true;		/// blocks dominated by \p SinkBB. Exclude all other blocks, and blocks which
		/// cannot be outlined.
if (isa<InlineAsm>(CI->getCalledValue()))		///
return true;		/// Return an empty sequence if the cold region is too small to outline, or if
		/// the cold region has no warm predecessors.
		static BlockSequence
		findMaximalColdRegion(BasicBlock &SinkBB, DominatorTree &DT, PostDomTree &PDT) {
		// The maximal cold region.
		BlockSequence ColdRegion = {};

		// The ancestor farthest-away from SinkBB, and also post-dominated by it.
		BasicBlock *MaxAncestor = &SinkBB;
		unsigned MaxAncestorHeight = 0;

		// Visit SinkBB's ancestors using inverse DFS.
		auto PredIt = ++idf_begin(&SinkBB);
		auto PredEnd = idf_end(&SinkBB);
		while (PredIt != PredEnd) {
		BasicBlock &PredBB = **PredIt;
		bool SinkPostDom = PDT.dominates(&SinkBB, &PredBB);

		// If SinkBB does not post-dominate a predecessor, do not mark the
		// predecessor (or any of its predecessors) cold.
		if (!SinkPostDom \|\| !mayExtractBlock(PredBB)) {
		PredIt.skipChildren();
		continue;
}		}

return false;		// Keep track of the post-dominated ancestor farthest away from the sink.
		unsigned AncestorHeight = PredIt.getPathLength();
		if (AncestorHeight > MaxAncestorHeight) {
		MaxAncestor = &PredBB;
		MaxAncestorHeight = AncestorHeight;
}		}

static DenseSetBB getHotBlocks(Function &F) {		ColdRegion.push_back(&PredBB);
		++PredIt;
// Mark all cold basic blocks.
DenseSetBB ColdBlocks;
for (BasicBlock &BB : F)
if (unlikelyExecuted(BB)) {
LLVM_DEBUG(llvm::dbgs() << "\nForward propagation marks cold: " << BB);
ColdBlocks.insert((const BasicBlock *)&BB);
}		}

// Forward propagation: basic blocks are hot when they are reachable from the		// CodeExtractor requires that all blocks to be extracted must be dominated
// beginning of the function through a path that does not contain cold blocks.		// by the first block to be extracted.
SmallVector<const BasicBlock *, 8> WL;		//
DenseSetBB HotBlocks;		// To avoid spurious or repeated outlining, require that the max ancestor
		// has a predecessor. By construction this predecessor is not in the cold
		// region, i.e. its existence implies we don't outline the whole function.
		//
		// TODO: If MaxAncestor has no predecessors, we may be able to outline the
		// second largest cold region that has a predecessor.
		sebpopUnsubmitted Done Reply Inline Actions When the MaxAncestor has no predecessors we may be able to outline the second largest cold region that has a predecessor. Maybe add a comment about this. sebpop: When the MaxAncestor has no predecessors we may be able to outline the second largest cold…
		vskAuthorUnsubmitted Not Done Reply Inline Actions Good point, I'll add a comment. vsk: Good point, I'll add a comment.
		if (pred_empty(MaxAncestor) \|\|
		MaxAncestor->getSinglePredecessor() == MaxAncestor)
		sebpopUnsubmitted Not Done Reply Inline Actions Maybe add a FIXME note: the blocks not dominated by the max ancestor may be extracted as other smaller cold regions. Not sure how much benefit that would give... sebpop: Maybe add a FIXME note: the blocks not dominated by the max ancestor may be extracted as other…
		vskAuthorUnsubmitted Not Done Reply Inline Actions Good point. Although.. marking outlined calls as noreturn when appropriate and outlining more than once per function could achieve most of the win. vsk: Good point. Although.. marking outlined calls as noreturn when appropriate and outlining more…
		return {};

		// Filter out predecessors not dominated by the max ancestor.
		//
		// TODO: Blocks not dominated by the max ancestor could be extracted as
		// other cold regions. Marking outlined calls as noreturn when appropriate
		// and outlining more than once per function could achieve most of the win.
		auto EraseIt = remove_if(ColdRegion, [&](BasicBlock *PredBB) {
		return PredBB != MaxAncestor && !DT.dominates(MaxAncestor, PredBB);
		});
		ColdRegion.erase(EraseIt, ColdRegion.end());

const BasicBlock *It = &F.front();		// Add SinkBB to the cold region.
if (!ColdBlocks.count(It)) {		ColdRegion.push_back(&SinkBB);
HotBlocks.insert(It);
// Breadth First Search to mark edges reachable from hot.
WL.push_back(It);
while (WL.size() > 0) {
It = WL.pop_back_val();

for (const BasicBlock *Succ : successors(It)) {		// Ensure that the first extracted block is the max ancestor.
// Do not visit blocks that are cold.		if (ColdRegion[0] != MaxAncestor) {
if (!ColdBlocks.count(Succ) && !HotBlocks.count(Succ)) {		auto AncestorIt = find(ColdRegion, MaxAncestor);
HotBlocks.insert(Succ);		*AncestorIt = ColdRegion[0];
WL.push_back(Succ);		ColdRegion[0] = MaxAncestor;
}		}
}
}		// Find all successors of SinkBB dominated by SinkBB using DFS.
		auto SuccIt = ++df_begin(&SinkBB);
		auto SuccEnd = df_end(&SinkBB);
		while (SuccIt != SuccEnd) {
		BasicBlock &SuccBB = **SuccIt;
		bool SinkDom = DT.dominates(&SinkBB, &SuccBB);

		// If SinkBB does not dominate a successor, do not mark the successor (or
		// any of its successors) cold.
		if (!SinkDom \|\| !mayExtractBlock(SuccBB)) {
		SuccIt.skipChildren();
		continue;
}		}

assert(WL.empty() && "work list should be empty");		ColdRegion.push_back(&SuccBB);
		++SuccIt;
		}

DenseMapBBInt NumHotSuccessors;		// TODO: Consider outlining regions with just 1 block, but more than some
// Back propagation: when all successors of a basic block are cold, the		// threshold of instructions.
		sebpopUnsubmitted Done Reply Inline Actions Agreed. I think the threshold can be pretty low given that the code is cold. You could even try to remove the condition (ColdRegion.size() == 1) and see what happens: code size and perf. sebpop: Agreed. I think the threshold can be pretty low given that the code is cold. You could even try…
		vskAuthorUnsubmitted Not Done Reply Inline Actions I have tried it out internally and saw a substantial increase in outlining, but haven't looked into performance impact yet. I'll send a separate patch when I have more data. vsk: I have tried it out internally and saw a substantial increase in outlining, but haven't looked…
// basic block is cold as well.		if (ColdRegion.size() == 1)
for (BasicBlock &BBRef : F) {		return {};
const BasicBlock *BB = &BBRef;
if (HotBlocks.count(BB)) {
// Keep a count of hot successors for every hot block.
NumHotSuccessors[BB] = 0;
for (const BasicBlock *Succ : successors(BB))
if (!ColdBlocks.count(Succ))
NumHotSuccessors[BB] += 1;

// Add to work list the blocks with all successors cold. Those are the		return ColdRegion;
// root nodes in the next loop, where we will move those blocks from
// HotBlocks to ColdBlocks and iterate over their predecessors.
if (NumHotSuccessors[BB] == 0)
WL.push_back(BB);
}
}		}

while (WL.size() > 0) {		/// Get the largest cold region in \p F.
It = WL.pop_back_val();		static BlockSequence getLargestColdRegion(Function &F, ProfileSummaryInfo &PSI,
if (ColdBlocks.count(It))		BlockFrequencyInfo *BFI,
		DominatorTree &DT, PostDomTree &PDT) {
		// Keep track of the largest cold region.
		BlockSequence LargestColdRegion = {};

		for (BasicBlock &BB : F) {
		// Identify cold blocks.
		if (!mayExtractBlock(BB))
		continue;
		bool Cold =
		PSI.isColdBB(&BB, BFI) \|\| (EnableStaticAnalyis && unlikelyExecuted(BB));
		if (!Cold)
continue;		continue;

// Do not back-propagate to blocks that return or have side effects.		LLVM_DEBUG({
if (returnsOrHasSideEffects(*It))		dbgs() << "Found cold block:\n";
		BB.dump();
		});

		// Find a maximal cold region we can outline.
		BlockSequence ColdRegion = findMaximalColdRegion(BB, DT, PDT);
		if (ColdRegion.empty()) {
		LLVM_DEBUG(dbgs() << " Skipping (block not profitable to extract)\n");
continue;		continue;
		}

// Move the block from HotBlocks to ColdBlocks.		++NumColdRegionsFound;
LLVM_DEBUG(llvm::dbgs() << "\nBack propagation marks cold: " << *It);
HotBlocks.erase(It);
ColdBlocks.insert(It);

// Iterate over the predecessors.		LLVM_DEBUG({
for (const BasicBlock *Pred : predecessors(It)) {		llvm::dbgs() << "Identified cold region with " << ColdRegion.size()
if (HotBlocks.count(Pred)) {		<< " blocks:\n";
NumHotSuccessors[Pred] -= 1;		for (BasicBlock *BB : ColdRegion)
		BB->dump();
		});

// If Pred has no more hot successors, add it to the work list.		// TODO: Outline more than one region.
if (NumHotSuccessors[Pred] == 0)		if (ColdRegion.size() > LargestColdRegion.size())
WL.push_back(Pred);		LargestColdRegion = std::move(ColdRegion);
}
}
}		}

return HotBlocks;		return LargestColdRegion;
}		}

class HotColdSplitting {		class HotColdSplitting {
public:		public:
HotColdSplitting(ProfileSummaryInfo *ProfSI,		HotColdSplitting(ProfileSummaryInfo *ProfSI,
function_ref<BlockFrequencyInfo *(Function &)> GBFI,		function_ref<BlockFrequencyInfo *(Function &)> GBFI,
function_ref<TargetTransformInfo &(Function &)> GTTI,		function_ref<TargetTransformInfo &(Function &)> GTTI,
std::function<OptimizationRemarkEmitter &(Function &)> *GORE)		std::function<OptimizationRemarkEmitter &(Function &)> *GORE)
: PSI(ProfSI), GetBFI(GBFI), GetTTI(GTTI), GetORE(GORE) {}		: PSI(ProfSI), GetBFI(GBFI), GetTTI(GTTI), GetORE(GORE) {}
bool run(Module &M);		bool run(Module &M);

private:		private:
bool shouldOutlineFrom(const Function &F) const;		bool shouldOutlineFrom(const Function &F) const;
const Function *outlineColdBlocks(Function &F, const DenseSetBB &ColdBlock,		Function *extractColdRegion(const BlockSequence &Region, DominatorTree &DT,
DominatorTree DT, PostDomTree PDT);		BlockFrequencyInfo *BFI,
Function extractColdRegion(const SmallVectorImpl<BasicBlock > &Region,
DominatorTree DT, BlockFrequencyInfo BFI,
OptimizationRemarkEmitter &ORE);		OptimizationRemarkEmitter &ORE);
bool isOutlineCandidate(const SmallVectorImpl<BasicBlock *> &Region,
const BasicBlock *Exit) const {
if (!Exit)
return false;

// Regions with landing pads etc.
for (const BasicBlock *BB : Region) {
if (BB->isEHPad() \|\| BB->hasAddressTaken())
return false;
}
return true;
}
SmallPtrSet<const Function *, 2> OutlinedFunctions;		SmallPtrSet<const Function *, 2> OutlinedFunctions;
ProfileSummaryInfo *PSI;		ProfileSummaryInfo *PSI;
function_ref<BlockFrequencyInfo *(Function &)> GetBFI;		function_ref<BlockFrequencyInfo *(Function &)> GetBFI;
function_ref<TargetTransformInfo &(Function &)> GetTTI;		function_ref<TargetTransformInfo &(Function &)> GetTTI;
std::function<OptimizationRemarkEmitter &(Function &)> *GetORE;		std::function<OptimizationRemarkEmitter &(Function &)> *GetORE;
};		};

class HotColdSplittingLegacyPass : public ModulePass {		class HotColdSplittingLegacyPass : public ModulePass {
Show All 20 Lines
bool HotColdSplitting::shouldOutlineFrom(const Function &F) const {		bool HotColdSplitting::shouldOutlineFrom(const Function &F) const {
// Do not try to outline again from an already outlined cold function.		// Do not try to outline again from an already outlined cold function.
if (OutlinedFunctions.count(&F))		if (OutlinedFunctions.count(&F))
return false;		return false;

if (F.size() <= 2)		if (F.size() <= 2)
return false;		return false;

		// TODO: Consider only skipping functions marked `optnone` or `cold`.

if (F.hasAddressTaken())		if (F.hasAddressTaken())
return false;		return false;

if (F.hasFnAttribute(Attribute::AlwaysInline))		if (F.hasFnAttribute(Attribute::AlwaysInline))
return false;		return false;

if (F.hasFnAttribute(Attribute::NoInline))		if (F.hasFnAttribute(Attribute::NoInline))
return false;		return false;

if (F.getCallingConv() == CallingConv::Cold)		if (F.getCallingConv() == CallingConv::Cold)
return false;		return false;

if (PSI->isFunctionEntryCold(&F))		if (PSI->isFunctionEntryCold(&F))
return false;		return false;
return true;		return true;
}		}

Function *		Function *HotColdSplitting::extractColdRegion(const BlockSequence &Region,
HotColdSplitting::extractColdRegion(const SmallVectorImpl<BasicBlock *> &Region,		DominatorTree &DT,
DominatorTree DT, BlockFrequencyInfo BFI,		BlockFrequencyInfo *BFI,
OptimizationRemarkEmitter &ORE) {		OptimizationRemarkEmitter &ORE) {
assert(!Region.empty());		assert(!Region.empty());
LLVM_DEBUG(for (auto *BB : Region)		LLVM_DEBUG(for (auto *BB : Region)
llvm::dbgs() << "\nExtracting: " << *BB;);		llvm::dbgs() << "\nExtracting: " << *BB;);

// TODO: Pass BFI and BPI to update profile information.		// TODO: Pass BFI and BPI to update profile information.
CodeExtractor CE(Region, DT);		CodeExtractor CE(Region, &DT);

SetVector<Value *> Inputs, Outputs, Sinks;		SetVector<Value *> Inputs, Outputs, Sinks;
CE.findInputsOutputs(Inputs, Outputs, Sinks);		CE.findInputsOutputs(Inputs, Outputs, Sinks);

// Do not extract regions that have live exit variables.		// Do not extract regions that have live exit variables.
if (Outputs.size() > 0)		if (Outputs.size() > 0) {
		LLVM_DEBUG(llvm::dbgs() << "Not outlining; live outputs\n");
return nullptr;		return nullptr;
		}

		// TODO: Run MergeBasicBlockIntoOnlyPred on the outlined function.
Function *OrigF = Region[0]->getParent();		Function *OrigF = Region[0]->getParent();
if (Function *OutF = CE.extractCodeRegion()) {		if (Function *OutF = CE.extractCodeRegion()) {
User U = OutF->user_begin();		User U = OutF->user_begin();
CallInst *CI = cast<CallInst>(U);		CallInst *CI = cast<CallInst>(U);
CallSite CS(CI);		CallSite CS(CI);
NumColdSESEOutlined++;		NumColdRegionsOutlined++;
if (GetTTI(OutF).useColdCCForColdCall(OutF)) {		if (GetTTI(OutF).useColdCCForColdCall(OutF)) {
OutF->setCallingConv(CallingConv::Cold);		OutF->setCallingConv(CallingConv::Cold);
CS.setCallingConv(CallingConv::Cold);		CS.setCallingConv(CallingConv::Cold);
}		}
CI->setIsNoInline();		CI->setIsNoInline();

// Try to make the outlined code as small as possible on the assumption		// Try to make the outlined code as small as possible on the assumption
// that it's cold.		// that it's cold.
Show All 15 Lines	ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",		return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
&*Region[0]->begin())		&*Region[0]->begin())
<< "Failed to extract region at block "		<< "Failed to extract region at block "
<< ore::NV("Block", Region.front());		<< ore::NV("Block", Region.front());
});		});
return nullptr;		return nullptr;
}		}

// Return the function created after outlining, nullptr otherwise.
const Function *HotColdSplitting::outlineColdBlocks(Function &F,
const DenseSetBB &HotBlocks,
DominatorTree *DT,
PostDomTree *PDT) {
auto BFI = GetBFI(F);
auto &ORE = (*GetORE)(F);
// Walking the dominator tree allows us to find the largest
// cold region.
BasicBlock *Begin = DT->getRootNode()->getBlock();

// Early return if the beginning of the function has been marked cold,
// otherwise all the function gets outlined.
if (PSI->isColdBB(Begin, BFI) \|\| !HotBlocks.count(Begin))
return nullptr;

for (auto I = df_begin(Begin), E = df_end(Begin); I != E; ++I) {
BasicBlock BB = I;
if (PSI->isColdBB(BB, BFI) \|\| !HotBlocks.count(BB)) {
SmallVector<BasicBlock *, 4> ValidColdRegion, Region;
BasicBlock Exit = (PDT)[BB]->getIDom()->getBlock();
BasicBlock *ExitColdRegion = nullptr;

// Estimated cold region between a BB and its dom-frontier.
while (Exit && isSingleEntrySingleExit(BB, Exit, DT, PDT, Region) &&
isOutlineCandidate(Region, Exit)) {
ExitColdRegion = Exit;
ValidColdRegion = Region;
Region.clear();
// Update Exit recursively to its dom-frontier.
Exit = (*PDT)[Exit]->getIDom()->getBlock();
}
if (ExitColdRegion) {
// Do not outline a region with only one block.
if (ValidColdRegion.size() == 1)
continue;

++NumColdSESEFound;
ValidColdRegion.push_back(ExitColdRegion);
// Candidate for outlining. FIXME: Continue outlining.
return extractColdRegion(ValidColdRegion, DT, BFI, ORE);
}
}
}
return nullptr;
}

bool HotColdSplitting::run(Module &M) {		bool HotColdSplitting::run(Module &M) {
		bool Changed = false;
for (auto &F : M) {		for (auto &F : M) {
if (!shouldOutlineFrom(F))		if (!shouldOutlineFrom(F)) {
		LLVM_DEBUG(llvm::dbgs() << "Not outlining in " << F.getName() << "\n");
continue;		continue;
		}

		LLVM_DEBUG(llvm::dbgs() << "Outlining in " << F.getName() << "\n");
DominatorTree DT(F);		DominatorTree DT(F);
PostDomTree PDT(F);		PostDomTree PDT(F);
PDT.recalculate(F);		PDT.recalculate(F);
DenseSetBB HotBlocks;		BlockFrequencyInfo *BFI = GetBFI(F);
if (EnableStaticAnalyis) // Static analysis of cold blocks.
HotBlocks = getHotBlocks(F);

const Function *Outlined = outlineColdBlocks(F, HotBlocks, &DT, &PDT);		BlockSequence ColdRegion = getLargestColdRegion(F, *PSI, BFI, DT, PDT);
if (Outlined)		if (ColdRegion.empty())
		continue;

		OptimizationRemarkEmitter &ORE = (*GetORE)(F);
		Function *Outlined = extractColdRegion(ColdRegion, DT, BFI, ORE);
		if (Outlined) {
OutlinedFunctions.insert(Outlined);		OutlinedFunctions.insert(Outlined);
		Changed = true;
}		}
return true;		}
		return Changed;
}		}

bool HotColdSplittingLegacyPass::runOnModule(Module &M) {		bool HotColdSplittingLegacyPass::runOnModule(Module &M) {
if (skipModule(M))		if (skipModule(M))
return false;		return false;
ProfileSummaryInfo *PSI =		ProfileSummaryInfo *PSI =
getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();		getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
auto GTTI = [this](Function &F) -> TargetTransformInfo & {		auto GTTI = [this](Function &F) -> TargetTransformInfo & {
▲ Show 20 Lines • Show All 58 Lines • Show Last 20 Lines

llvm/lib/Transforms/Utils/CodeExtractor.cpp

Show First 20 Lines • Show All 1,261 Lines • ▼ Show 20 Lines	for (BasicBlock::iterator I = header->begin(); isa<PHINode>(I); ++I) {
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)		for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (!Blocks.count(PN->getIncomingBlock(i)))		if (!Blocks.count(PN->getIncomingBlock(i)))
PN->setIncomingBlock(i, newFuncRoot);		PN->setIncomingBlock(i, newFuncRoot);
}		}

// Look at all successors of the codeReplacer block. If any of these blocks		// Look at all successors of the codeReplacer block. If any of these blocks
// had PHI nodes in them, we need to update the "from" block to be the code		// had PHI nodes in them, we need to update the "from" block to be the code
// replacer, not the original block in the extracted region.		// replacer, not the original block in the extracted region.
std::vector<BasicBlock *> Succs(succ_begin(codeReplacer),		for (BasicBlock *SuccBB : successors(codeReplacer)) {
succ_end(codeReplacer));		for (PHINode &PN : SuccBB->phis()) {
for (unsigned i = 0, e = Succs.size(); i != e; ++i)		Value *IncomingCodeReplacerVal = nullptr;
for (BasicBlock::iterator I = Succs[i]->begin(); isa<PHINode>(I); ++I) {		SmallVector<unsigned, 2> IncomingValsToRemove;
PHINode *PN = cast<PHINode>(I);		for (unsigned I = 0, E = PN.getNumIncomingValues(); I != E; ++I) {
std::set<BasicBlock*> ProcessedPreds;		BasicBlock *IncomingBB = PN.getIncomingBlock(I);
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (Blocks.count(PN->getIncomingBlock(i))) {		// Ignore incoming values from outside of the extracted region.
if (ProcessedPreds.insert(PN->getIncomingBlock(i)).second)		if (!Blocks.count(IncomingBB))
PN->setIncomingBlock(i, codeReplacer);		continue;
else {
// There were multiple entries in the PHI for this block, now there		// Ensure that there is only one incoming value from codeReplacer.
// is only one, so remove the duplicated entries.		if (!IncomingCodeReplacerVal) {
PN->removeIncomingValue(i, false);		PN.setIncomingBlock(I, codeReplacer);
--i; --e;		IncomingCodeReplacerVal = PN.getIncomingValue(I);
		} else {
		assert(IncomingCodeReplacerVal == PN.getIncomingValue(I) &&
		"PHI has two incompatbile incoming values from codeRepl");
		IncomingValsToRemove.push_back(I);
}		}
}		}

		for (unsigned I : reverse(IncomingValsToRemove))
		PN.removeIncomingValue(I, /DeletePHIIfEmpty=/false);
		}
}		}

// Erase debug info intrinsics. Variable updates within the new function are		// Erase debug info intrinsics. Variable updates within the new function are
// invisible to debuggers. This could be improved by defining a DISubprogram		// invisible to debuggers. This could be improved by defining a DISubprogram
// for the new function.		// for the new function.
for (BasicBlock &BB : *newFunction) {		for (BasicBlock &BB : *newFunction) {
auto BlockIt = BB.begin();		auto BlockIt = BB.begin();
while (BlockIt != BB.end()) {		while (BlockIt != BB.end()) {
Instruction Inst = &BlockIt;		Instruction Inst = &BlockIt;
Show All 10 Lines

llvm/test/Transforms/HotColdSplit/do-not-split.ll

This file was moved from llvm/test/Transforms/HotColdSplit/split-cold-1.ll.

	; RUN: opt -hotcoldsplit -S < %s \| FileCheck %s			; RUN: opt -hotcoldsplit -S < %s \| FileCheck %s
	; RUN: opt -passes=hotcoldsplit -S < %s \| FileCheck %s			; RUN: opt -passes=hotcoldsplit -S < %s \| FileCheck %s

	; Check that the function is not split. Outlined function is called from a			; Check that these functions are not split. Outlined functions are called from a
	; basic block named codeRepl.			; basic block named codeRepl.

				; The cold region is too small to split.
	; CHECK-LABEL: @foo			; CHECK-LABEL: @foo
	; CHECK-NOT: codeRepl			; CHECK-NOT: codeRepl
	define void @foo() {			define void @foo() {
	entry:			entry:
	br i1 undef, label %if.then, label %if.end			br i1 undef, label %if.then, label %if.end

	if.then: ; preds = %entry			if.then: ; preds = %entry
	unreachable			unreachable

	if.end: ; preds = %entry			if.end: ; preds = %entry
	br label %if.then12			br label %if.then12

	if.then12: ; preds = %if.end			if.then12: ; preds = %if.end
	br label %cleanup40			br label %cleanup40

	cleanup40: ; preds = %if.then12			cleanup40: ; preds = %if.then12
	br label %return			br label %return

	return: ; preds = %cleanup40			return: ; preds = %cleanup40
	ret void			ret void
	}			}

	; Check that the function is not split. We used to outline the full function.			; Make sure we don't try to outline the entire function.

	; CHECK-LABEL: @fun			; CHECK-LABEL: @fun
	; CHECK-NOT: codeRepl			; CHECK-NOT: codeRepl

	define void @fun() {			define void @fun() {
	entry:			entry:
	br i1 undef, label %if.then, label %if.end			br i1 undef, label %if.then, label %if.end

	if.then: ; preds = %entry			if.then: ; preds = %entry
	br label %if.end			br label %if.end

	if.end: ; preds = %entry			if.end: ; preds = %entry
	ret void			ret void
	}			}

				; Don't outline infinite loops.
				; CHECK-LABEL: @infinite_loop
				; CHECK-NOT: codeRepl
				define void @infinite_loop() {
				entry:
				br label %loop

				loop:
				call void @sink()
				br label %loop
				}

				declare void @sink() cold

llvm/test/Transforms/HotColdSplit/duplicate-phi-preds-crash.ll

This file was added.

				; RUN: opt -S -hotcoldsplit < %s \| FileCheck %s

				target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
				target triple = "x86_64-apple-macosx10.14.0"

				declare void @sideeffect(i64)

				declare i8* @realloc(i8* %ptr, i64 %size)

				declare void @free(i8* %ptr)

				declare void @sink() cold

				; CHECK-LABEL: define {{.*}}@realloc2(
				; CHECK: call {{.*}}@sideeffect(
				; CHECK: call {{.*}}@realloc(
				; CHECK-LABEL: codeRepl:
				; CHECK-NEXT: call {{.}}@realloc2_if.then2(i64 %size, i8 %ptr)
				; CHECK-LABEL: cleanup:
				; CHECK-NEXT: phi i8* [ null, %if.then ], [ null, %codeRepl ], [ %call, %if.end ]
				define i8* @realloc2(i8* %ptr, i64 %size) {
				entry:
				%0 = add i64 %size, -1
				%1 = icmp ugt i64 %0, 184549375
				br i1 %1, label %if.then, label %if.end

				if.then: ; preds = %entry
				call void @sideeffect(i64 %size)
				br label %cleanup

				if.end: ; preds = %entry
				%call = call i8* @realloc(i8* %ptr, i64 %size)
				%tobool1 = icmp eq i8* %call, null
				br i1 %tobool1, label %if.then2, label %cleanup

				if.then2: ; preds = %if.end
				call void @sideeffect(i64 %size)
				call void @sink()
				%tobool3 = icmp eq i8* %ptr, null
				br i1 %tobool3, label %cleanup, label %if.then4

				if.then4: ; preds = %if.then2
				call void @free(i8* %ptr)
				br label %cleanup

				cleanup: ; preds = %if.end, %if.then4, %if.then2, %if.then
				%retval.0 = phi i8* [ null, %if.then ], [ null, %if.then2 ], [ null, %if.then4 ], [ %call, %if.end ]
				ret i8* %retval.0
				}

				; CHECK-LABEL: define {{.*}}@realloc2_if.then2(
				; CHECK: call {{.*}}@sideeffect
				; CHECK: call {{.*}}@sink
				; CHECK: call {{.*}}@free

llvm/test/Transforms/HotColdSplit/multiple-exits.ll

This file was added.

				; RUN: opt -S -hotcoldsplit < %s \| FileCheck %s

				; Source:
				;
				; extern void sideeffect(int);
				; extern void __attribute__((cold)) sink();
				; void foo(int cond) {
				; if (cond) { //< Start outlining here.
				; sink();
				; if (cond > 10)
				; goto exit1;
				; else
				; goto exit2;
				; }
				; exit1:
				; sideeffect(1);
				; return;
				; exit2:
				; sideeffect(2);
				; return;
				; }

				target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
				target triple = "x86_64-apple-macosx10.14.0"

				; CHECK-LABEL: define {{.*}}@foo(
				; CHECK: br i1 {{.*}}, label %exit1, label %codeRepl
				; CHECK-LABEL: codeRepl:
				; CHECK: [[targetBlock:%.*]] = call i1 @foo_if.then(
				; CHECK-NEXT: br i1 [[targetBlock]], label %exit1, label %[[return:.*]]
				; CHECK-LABEL: exit1:
				; CHECK: call {{.*}}@sideeffect(i32 1)
				; CHECK: [[return]]:
				; CHECK-NEXT: ret void
				define void @foo(i32 %cond) {
				entry:
				%tobool = icmp eq i32 %cond, 0
				br i1 %tobool, label %exit1, label %if.then

				if.then: ; preds = %entry
				tail call void (...) @sink()
				%cmp = icmp sgt i32 %cond, 10
				br i1 %cmp, label %exit1, label %exit2

				exit1: ; preds = %entry, %if.then
				call void @sideeffect(i32 1)
				br label %return

				exit2: ; preds = %if.then
				call void @sideeffect(i32 2)
				br label %return

				return: ; preds = %exit2, %exit1
				ret void
				}

				; CHECK-LABEL: define {{.*}}@foo_if.then(
				; TODO: Eliminate this unnecessary unconditional branch.
				; CHECK: br
				; CHECK: [[exit1Stub:.*]]:
				; CHECK-NEXT: ret i1 true
				; CHECK: [[returnStub:.*]]:
				; CHECK-NEXT: ret i1 false
				; CHECK: call {{.*}}@sink
				; CHECK-NEXT: [[cmp:%.*]] = icmp
				; CHECK-NEXT: br i1 [[cmp]], label %[[exit1Stub]], label %exit2
				; CHECK-LABEL: exit2:
				; CHECK-NEXT: call {{.*}}@sideeffect(i32 2)
				; CHECK-NEXT: br label %[[returnStub]]

				declare void @sink(...) cold

				declare void @sideeffect(i32)

llvm/test/Transforms/HotColdSplit/outline-if-then-else.ll

This file was added.

				; RUN: opt -S -hotcoldsplit < %s \| FileCheck %s

				; Source:
				;
				; extern void sideeffect(int);
				; extern void __attribute__((cold)) sink();
				; void foo(int cond) {
				; if (cond) { //< Start outlining here.
				; if (cond > 10)
				; sideeffect(0);
				; else
				; sideeffect(1);
				; sink();
				; }
				; sideeffect(2);
				; }

				target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
				target triple = "x86_64-apple-macosx10.14.0"

				; CHECK-LABEL: define {{.*}}@foo(
				; CHECK: br i1 {{.*}}, label %codeRepl, label %if.end2
				; CHECK-LABEL: codeRepl:
				; CHECK-NEXT: call void @foo_if.then
				; CHECK-LABEL: if.end2:
				; CHECK: call void @sideeffect(i32 2)
				define void @foo(i32 %cond) {
				entry:
				%cond.addr = alloca i32
				store i32 %cond, i32* %cond.addr
				%0 = load i32, i32* %cond.addr
				%tobool = icmp ne i32 %0, 0
				br i1 %tobool, label %if.then, label %if.end2

				if.then: ; preds = %entry
				%1 = load i32, i32* %cond.addr
				%cmp = icmp sgt i32 %1, 10
				br i1 %cmp, label %if.then1, label %if.else

				if.then1: ; preds = %if.then
				call void @sideeffect(i32 0)
				br label %if.end

				if.else: ; preds = %if.then
				call void @sideeffect(i32 1)
				br label %if.end

				if.end: ; preds = %if.else, %if.then1
				call void (...) @sink()
				ret void

				if.end2: ; preds = %entry
				call void @sideeffect(i32 2)
				ret void
				}

				; CHECK-LABEL: define {{.*}}@foo_if.then(
				; CHECK: call {{.*}}@sideeffect
				; CHECK: call {{.*}}@sideeffect
				; CHECK: call {{.*}}@sink

				declare void @sideeffect(i32)

				declare void @sink(...) cold

llvm/test/Transforms/HotColdSplit/outline-while-loop.ll

This file was added.

				; RUN: opt -S -hotcoldsplit < %s \| FileCheck %s

				; Source:
				;
				; extern void sideeffect(int);
				; extern void __attribute__((cold)) sink();
				; void foo(int cond) {
				; if (cond) { //< Start outlining here.
				; while (cond > 10) {
				; --cond;
				; sideeffect(0);
				; }
				; sink();
				; }
				; sideeffect(1);
				; }

				target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
				target triple = "x86_64-apple-macosx10.14.0"

				; CHECK-LABEL: define {{.*}}@foo(
				; CHECK: br i1 {{.*}}, label %if.end, label %codeRepl
				; CHECK-LABEL: codeRepl:
				; CHECK-NEXT: call void @foo_while.cond.preheader
				; CHECK-LABEL: if.end:
				; CHECK: call void @sideeffect(i32 1)
				define void @foo(i32 %cond) {
				entry:
				%tobool = icmp eq i32 %cond, 0
				br i1 %tobool, label %if.end, label %while.cond.preheader

				while.cond.preheader: ; preds = %entry
				%cmp3 = icmp sgt i32 %cond, 10
				br i1 %cmp3, label %while.body.preheader, label %while.end

				while.body.preheader: ; preds = %while.cond.preheader
				br label %while.body

				while.body: ; preds = %while.body.preheader, %while.body
				%cond.addr.04 = phi i32 [ %dec, %while.body ], [ %cond, %while.body.preheader ]
				%dec = add nsw i32 %cond.addr.04, -1
				tail call void @sideeffect(i32 0) #3
				%cmp = icmp sgt i32 %dec, 10
				br i1 %cmp, label %while.body, label %while.end.loopexit

				while.end.loopexit: ; preds = %while.body
				br label %while.end

				while.end: ; preds = %while.end.loopexit, %while.cond.preheader
				tail call void (...) @sink()
				ret void

				if.end: ; preds = %entry
				tail call void @sideeffect(i32 1)
				ret void
				}

				; CHECK-LABEL: define {{.*}}@foo_while.cond.preheader(
				; CHECK: phi i32
				; CHECK-NEXT: add nsw i32
				; CHECK-NEXT: call {{.*}}@sideeffect
				; CHECK-NEXT: icmp
				; CHECK-NEXT: br

				declare void @sideeffect(i32)

				declare void @sink(...) cold

llvm/test/Transforms/HotColdSplit/split-cold-1.ll

This file was moved to llvm/test/Transforms/HotColdSplit/do-not-split.ll.

This is an archive of the discontinued LLVM Phabricator instance.

[HotColdSplitting] Identify larger cold regions using domtree queries
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 170944

llvm/lib/Transforms/IPO/HotColdSplitting.cpp

llvm/lib/Transforms/Utils/CodeExtractor.cpp

llvm/test/Transforms/HotColdSplit/do-not-split.ll

llvm/test/Transforms/HotColdSplit/duplicate-phi-preds-crash.ll

llvm/test/Transforms/HotColdSplit/multiple-exits.ll

llvm/test/Transforms/HotColdSplit/outline-if-then-else.ll

llvm/test/Transforms/HotColdSplit/outline-while-loop.ll

llvm/test/Transforms/HotColdSplit/split-cold-1.ll

This is an archive of the discontinued LLVM Phabricator instance.

[HotColdSplitting] Identify larger cold regions using domtree queriesClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 170944

llvm/lib/Transforms/IPO/HotColdSplitting.cpp

llvm/lib/Transforms/Utils/CodeExtractor.cpp

llvm/test/Transforms/HotColdSplit/do-not-split.ll

llvm/test/Transforms/HotColdSplit/duplicate-phi-preds-crash.ll

llvm/test/Transforms/HotColdSplit/multiple-exits.ll

llvm/test/Transforms/HotColdSplit/outline-if-then-else.ll

llvm/test/Transforms/HotColdSplit/outline-while-loop.ll

llvm/test/Transforms/HotColdSplit/split-cold-1.ll

[HotColdSplitting] Identify larger cold regions using domtree queries
ClosedPublic