Diff 170999

llvm/trunk/lib/Transforms/IPO/HotColdSplitting.cpp

Show First 20 Lines • Show All 51 Lines • ▼ Show 20 Lines
#include "llvm/Transforms/Utils/Local.h"		#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"		#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Transforms/Utils/ValueMapper.h"		#include "llvm/Transforms/Utils/ValueMapper.h"
#include <algorithm>		#include <algorithm>
#include <cassert>		#include <cassert>

#define DEBUG_TYPE "hotcoldsplit"		#define DEBUG_TYPE "hotcoldsplit"

STATISTIC(NumColdSESEFound,		STATISTIC(NumColdRegionsFound, "Number of cold regions found.");
"Number of cold single entry single exit (SESE) regions found.");		STATISTIC(NumColdRegionsOutlined, "Number of cold regions outlined.");
STATISTIC(NumColdSESEOutlined,
"Number of cold single entry single exit (SESE) regions outlined.");

using namespace llvm;		using namespace llvm;

static cl::opt<bool> EnableStaticAnalyis("hot-cold-static-analysis",		static cl::opt<bool> EnableStaticAnalyis("hot-cold-static-analysis",
cl::init(true), cl::Hidden);		cl::init(true), cl::Hidden);


namespace {		namespace {

struct PostDomTree : PostDomTreeBase<BasicBlock> {		struct PostDomTree : PostDomTreeBase<BasicBlock> {
PostDomTree(Function &F) { recalculate(F); }		PostDomTree(Function &F) { recalculate(F); }
};		};

typedef DenseSet<const BasicBlock *> DenseSetBB;		/// A sequence of basic blocks.
typedef DenseMap<const BasicBlock *, uint64_t> DenseMapBBInt;		///
		/// A 0-sized SmallVector is slightly cheaper to move than a std::vector.
// From: https://reviews.llvm.org/D22558		using BlockSequence = SmallVector<BasicBlock *, 0>;
// Exit is not part of the region.
static bool isSingleEntrySingleExit(BasicBlock Entry, const BasicBlock Exit,
DominatorTree DT, PostDomTree PDT,
SmallVectorImpl<BasicBlock *> &Region) {
if (!DT->dominates(Entry, Exit))
return false;

if (!PDT->dominates(Exit, Entry))
return false;

for (auto I = df_begin(Entry), E = df_end(Entry); I != E;) {
if (*I == Exit) {
I.skipChildren();
continue;
}
if (!DT->dominates(Entry, *I))
return false;
Region.push_back(*I);
++I;
}
return true;
}

// Same as blockEndsInUnreachable in CodeGen/BranchFolding.cpp. Do not modify		// Same as blockEndsInUnreachable in CodeGen/BranchFolding.cpp. Do not modify
// this function unless you modify the MBB version as well.		// this function unless you modify the MBB version as well.
//		//
/// A no successor, non-return block probably ends in unreachable and is cold.		/// A no successor, non-return block probably ends in unreachable and is cold.
/// Also consider a block that ends in an indirect branch to be a return block,		/// Also consider a block that ends in an indirect branch to be a return block,
/// since many targets use plain indirect branches to return.		/// since many targets use plain indirect branches to return.
bool blockEndsInUnreachable(const BasicBlock &BB) {		bool blockEndsInUnreachable(const BasicBlock &BB) {
Show All 33 Lines	if (const CallInst *CI = dyn_cast<CallInst>(&I)) {

// Assume that inline assembly is hot code.		// Assume that inline assembly is hot code.
if (isa<InlineAsm>(CI->getCalledValue()))		if (isa<InlineAsm>(CI->getCalledValue()))
return false;		return false;
}		}
return false;		return false;
}		}

static bool returnsOrHasSideEffects(const BasicBlock &BB) {		/// Check whether it's safe to outline \p BB.
const Instruction *I = BB.getTerminator();		static bool mayExtractBlock(const BasicBlock &BB) {
if (isa<ReturnInst>(I) \|\| isa<IndirectBrInst>(I) \|\| isa<InvokeInst>(I))		return !BB.hasAddressTaken();
return true;		}

for (const Instruction &I : BB)		/// Identify the maximal region of cold blocks which includes \p SinkBB.
if (const CallInst *CI = dyn_cast<CallInst>(&I)) {		///
if (CI->hasFnAttr(Attribute::NoReturn))		/// Include all blocks post-dominated by \p SinkBB, \p SinkBB itself, and all
return true;		/// blocks dominated by \p SinkBB. Exclude all other blocks, and blocks which
		/// cannot be outlined.
if (isa<InlineAsm>(CI->getCalledValue()))		///
return true;		/// Return an empty sequence if the cold region is too small to outline, or if
		/// the cold region has no warm predecessors.
		static BlockSequence
		findMaximalColdRegion(BasicBlock &SinkBB, DominatorTree &DT, PostDomTree &PDT) {
		// The maximal cold region.
		BlockSequence ColdRegion = {};

		// The ancestor farthest-away from SinkBB, and also post-dominated by it.
		BasicBlock *MaxAncestor = &SinkBB;
		unsigned MaxAncestorHeight = 0;

		// Visit SinkBB's ancestors using inverse DFS.
		auto PredIt = ++idf_begin(&SinkBB);
		auto PredEnd = idf_end(&SinkBB);
		while (PredIt != PredEnd) {
		BasicBlock &PredBB = **PredIt;
		bool SinkPostDom = PDT.dominates(&SinkBB, &PredBB);

		// If SinkBB does not post-dominate a predecessor, do not mark the
		// predecessor (or any of its predecessors) cold.
		if (!SinkPostDom \|\| !mayExtractBlock(PredBB)) {
		PredIt.skipChildren();
		continue;
}		}

return false;		// Keep track of the post-dominated ancestor farthest away from the sink.
		unsigned AncestorHeight = PredIt.getPathLength();
		if (AncestorHeight > MaxAncestorHeight) {
		MaxAncestor = &PredBB;
		MaxAncestorHeight = AncestorHeight;
}		}

static DenseSetBB getHotBlocks(Function &F) {		ColdRegion.push_back(&PredBB);
		++PredIt;
// Mark all cold basic blocks.
DenseSetBB ColdBlocks;
for (BasicBlock &BB : F)
if (unlikelyExecuted(BB)) {
LLVM_DEBUG(llvm::dbgs() << "\nForward propagation marks cold: " << BB);
ColdBlocks.insert((const BasicBlock *)&BB);
}		}

// Forward propagation: basic blocks are hot when they are reachable from the		// CodeExtractor requires that all blocks to be extracted must be dominated
// beginning of the function through a path that does not contain cold blocks.		// by the first block to be extracted.
SmallVector<const BasicBlock *, 8> WL;		//
DenseSetBB HotBlocks;		// To avoid spurious or repeated outlining, require that the max ancestor
		// has a predecessor. By construction this predecessor is not in the cold
		// region, i.e. its existence implies we don't outline the whole function.
		//
		// TODO: If MaxAncestor has no predecessors, we may be able to outline the
		// second largest cold region that has a predecessor.
		if (pred_empty(MaxAncestor) \|\|
		MaxAncestor->getSinglePredecessor() == MaxAncestor)
		return {};

		// Filter out predecessors not dominated by the max ancestor.
		//
		// TODO: Blocks not dominated by the max ancestor could be extracted as
		// other cold regions. Marking outlined calls as noreturn when appropriate
		// and outlining more than once per function could achieve most of the win.
		auto EraseIt = remove_if(ColdRegion, [&](BasicBlock *PredBB) {
		return PredBB != MaxAncestor && !DT.dominates(MaxAncestor, PredBB);
		});
		ColdRegion.erase(EraseIt, ColdRegion.end());

const BasicBlock *It = &F.front();		// Add SinkBB to the cold region.
if (!ColdBlocks.count(It)) {		ColdRegion.push_back(&SinkBB);
HotBlocks.insert(It);
// Breadth First Search to mark edges reachable from hot.
WL.push_back(It);
while (WL.size() > 0) {
It = WL.pop_back_val();

for (const BasicBlock *Succ : successors(It)) {		// Ensure that the first extracted block is the max ancestor.
// Do not visit blocks that are cold.		if (ColdRegion[0] != MaxAncestor) {
if (!ColdBlocks.count(Succ) && !HotBlocks.count(Succ)) {		auto AncestorIt = find(ColdRegion, MaxAncestor);
HotBlocks.insert(Succ);		*AncestorIt = ColdRegion[0];
WL.push_back(Succ);		ColdRegion[0] = MaxAncestor;
}		}
}
}		// Find all successors of SinkBB dominated by SinkBB using DFS.
		auto SuccIt = ++df_begin(&SinkBB);
		auto SuccEnd = df_end(&SinkBB);
		while (SuccIt != SuccEnd) {
		BasicBlock &SuccBB = **SuccIt;
		bool SinkDom = DT.dominates(&SinkBB, &SuccBB);

		// If SinkBB does not dominate a successor, do not mark the successor (or
		// any of its successors) cold.
		if (!SinkDom \|\| !mayExtractBlock(SuccBB)) {
		SuccIt.skipChildren();
		continue;
}		}

assert(WL.empty() && "work list should be empty");		ColdRegion.push_back(&SuccBB);
		++SuccIt;
		}

DenseMapBBInt NumHotSuccessors;		// TODO: Consider outlining regions with just 1 block, but more than some
// Back propagation: when all successors of a basic block are cold, the		// threshold of instructions.
// basic block is cold as well.		if (ColdRegion.size() == 1)
for (BasicBlock &BBRef : F) {		return {};
const BasicBlock *BB = &BBRef;
if (HotBlocks.count(BB)) {
// Keep a count of hot successors for every hot block.
NumHotSuccessors[BB] = 0;
for (const BasicBlock *Succ : successors(BB))
if (!ColdBlocks.count(Succ))
NumHotSuccessors[BB] += 1;

// Add to work list the blocks with all successors cold. Those are the		return ColdRegion;
// root nodes in the next loop, where we will move those blocks from
// HotBlocks to ColdBlocks and iterate over their predecessors.
if (NumHotSuccessors[BB] == 0)
WL.push_back(BB);
}
}		}

while (WL.size() > 0) {		/// Get the largest cold region in \p F.
It = WL.pop_back_val();		static BlockSequence getLargestColdRegion(Function &F, ProfileSummaryInfo &PSI,
if (ColdBlocks.count(It))		BlockFrequencyInfo *BFI,
		DominatorTree &DT, PostDomTree &PDT) {
		// Keep track of the largest cold region.
		BlockSequence LargestColdRegion = {};

		for (BasicBlock &BB : F) {
		// Identify cold blocks.
		if (!mayExtractBlock(BB))
		continue;
		bool Cold =
		PSI.isColdBB(&BB, BFI) \|\| (EnableStaticAnalyis && unlikelyExecuted(BB));
		if (!Cold)
continue;		continue;

// Do not back-propagate to blocks that return or have side effects.		LLVM_DEBUG({
if (returnsOrHasSideEffects(*It))		dbgs() << "Found cold block:\n";
		BB.dump();
		});

		// Find a maximal cold region we can outline.
		BlockSequence ColdRegion = findMaximalColdRegion(BB, DT, PDT);
		if (ColdRegion.empty()) {
		LLVM_DEBUG(dbgs() << " Skipping (block not profitable to extract)\n");
continue;		continue;
		}

// Move the block from HotBlocks to ColdBlocks.		++NumColdRegionsFound;
LLVM_DEBUG(llvm::dbgs() << "\nBack propagation marks cold: " << *It);
HotBlocks.erase(It);
ColdBlocks.insert(It);

// Iterate over the predecessors.		LLVM_DEBUG({
for (const BasicBlock *Pred : predecessors(It)) {		llvm::dbgs() << "Identified cold region with " << ColdRegion.size()
if (HotBlocks.count(Pred)) {		<< " blocks:\n";
NumHotSuccessors[Pred] -= 1;		for (BasicBlock *BB : ColdRegion)
		BB->dump();
		});

// If Pred has no more hot successors, add it to the work list.		// TODO: Outline more than one region.
if (NumHotSuccessors[Pred] == 0)		if (ColdRegion.size() > LargestColdRegion.size())
WL.push_back(Pred);		LargestColdRegion = std::move(ColdRegion);
}
}
}		}

return HotBlocks;		return LargestColdRegion;
}		}

class HotColdSplitting {		class HotColdSplitting {
public:		public:
HotColdSplitting(ProfileSummaryInfo *ProfSI,		HotColdSplitting(ProfileSummaryInfo *ProfSI,
function_ref<BlockFrequencyInfo *(Function &)> GBFI,		function_ref<BlockFrequencyInfo *(Function &)> GBFI,
function_ref<TargetTransformInfo &(Function &)> GTTI,		function_ref<TargetTransformInfo &(Function &)> GTTI,
std::function<OptimizationRemarkEmitter &(Function &)> *GORE)		std::function<OptimizationRemarkEmitter &(Function &)> *GORE)
: PSI(ProfSI), GetBFI(GBFI), GetTTI(GTTI), GetORE(GORE) {}		: PSI(ProfSI), GetBFI(GBFI), GetTTI(GTTI), GetORE(GORE) {}
bool run(Module &M);		bool run(Module &M);

private:		private:
bool shouldOutlineFrom(const Function &F) const;		bool shouldOutlineFrom(const Function &F) const;
const Function *outlineColdBlocks(Function &F, const DenseSetBB &ColdBlock,		Function *extractColdRegion(const BlockSequence &Region, DominatorTree &DT,
DominatorTree DT, PostDomTree PDT);		BlockFrequencyInfo *BFI,
Function extractColdRegion(const SmallVectorImpl<BasicBlock > &Region,
DominatorTree DT, BlockFrequencyInfo BFI,
OptimizationRemarkEmitter &ORE, unsigned Count);		OptimizationRemarkEmitter &ORE, unsigned Count);
bool isOutlineCandidate(const SmallVectorImpl<BasicBlock *> &Region,
const BasicBlock *Exit) const {
if (!Exit)
return false;

// Regions with landing pads etc.
for (const BasicBlock *BB : Region) {
if (BB->isEHPad() \|\| BB->hasAddressTaken())
return false;
}
return true;
}
SmallPtrSet<const Function *, 2> OutlinedFunctions;		SmallPtrSet<const Function *, 2> OutlinedFunctions;
ProfileSummaryInfo *PSI;		ProfileSummaryInfo *PSI;
function_ref<BlockFrequencyInfo *(Function &)> GetBFI;		function_ref<BlockFrequencyInfo *(Function &)> GetBFI;
function_ref<TargetTransformInfo &(Function &)> GetTTI;		function_ref<TargetTransformInfo &(Function &)> GetTTI;
std::function<OptimizationRemarkEmitter &(Function &)> *GetORE;		std::function<OptimizationRemarkEmitter &(Function &)> *GetORE;
};		};

class HotColdSplittingLegacyPass : public ModulePass {		class HotColdSplittingLegacyPass : public ModulePass {
Show All 20 Lines
bool HotColdSplitting::shouldOutlineFrom(const Function &F) const {		bool HotColdSplitting::shouldOutlineFrom(const Function &F) const {
// Do not try to outline again from an already outlined cold function.		// Do not try to outline again from an already outlined cold function.
if (OutlinedFunctions.count(&F))		if (OutlinedFunctions.count(&F))
return false;		return false;

if (F.size() <= 2)		if (F.size() <= 2)
return false;		return false;

		// TODO: Consider only skipping functions marked `optnone` or `cold`.

if (F.hasAddressTaken())		if (F.hasAddressTaken())
return false;		return false;

if (F.hasFnAttribute(Attribute::AlwaysInline))		if (F.hasFnAttribute(Attribute::AlwaysInline))
return false;		return false;

if (F.hasFnAttribute(Attribute::NoInline))		if (F.hasFnAttribute(Attribute::NoInline))
return false;		return false;

if (F.getCallingConv() == CallingConv::Cold)		if (F.getCallingConv() == CallingConv::Cold)
return false;		return false;

if (PSI->isFunctionEntryCold(&F))		if (PSI->isFunctionEntryCold(&F))
return false;		return false;
return true;		return true;
}		}

Function *HotColdSplitting::extractColdRegion(		Function *HotColdSplitting::extractColdRegion(const BlockSequence &Region,
const SmallVectorImpl<BasicBlock > &Region, DominatorTree DT,		DominatorTree &DT,
BlockFrequencyInfo *BFI, OptimizationRemarkEmitter &ORE, unsigned Count) {		BlockFrequencyInfo *BFI,
		OptimizationRemarkEmitter &ORE,
		unsigned Count) {
assert(!Region.empty());		assert(!Region.empty());
LLVM_DEBUG(for (auto *BB : Region)		LLVM_DEBUG(for (auto *BB : Region)
llvm::dbgs() << "\nExtracting: " << *BB;);		llvm::dbgs() << "\nExtracting: " << *BB;);

// TODO: Pass BFI and BPI to update profile information.		// TODO: Pass BFI and BPI to update profile information.
CodeExtractor CE(Region, DT, /* AggregateArgs / false, / BFI */ nullptr,		CodeExtractor CE(Region, &DT, /* AggregateArgs / false, / BFI */ nullptr,
/* BPI / nullptr, / AllowVarArgs */ false,		/* BPI / nullptr, / AllowVarArgs */ false,
/* AllowAlloca */ false,		/* AllowAlloca */ false,
/* Suffix */ "cold." + std::to_string(Count));		/* Suffix */ "cold." + std::to_string(Count));

SetVector<Value *> Inputs, Outputs, Sinks;		SetVector<Value *> Inputs, Outputs, Sinks;
CE.findInputsOutputs(Inputs, Outputs, Sinks);		CE.findInputsOutputs(Inputs, Outputs, Sinks);

// Do not extract regions that have live exit variables.		// Do not extract regions that have live exit variables.
if (Outputs.size() > 0)		if (Outputs.size() > 0) {
		LLVM_DEBUG(llvm::dbgs() << "Not outlining; live outputs\n");
return nullptr;		return nullptr;
		}

		// TODO: Run MergeBasicBlockIntoOnlyPred on the outlined function.
Function *OrigF = Region[0]->getParent();		Function *OrigF = Region[0]->getParent();
if (Function *OutF = CE.extractCodeRegion()) {		if (Function *OutF = CE.extractCodeRegion()) {
User U = OutF->user_begin();		User U = OutF->user_begin();
CallInst *CI = cast<CallInst>(U);		CallInst *CI = cast<CallInst>(U);
CallSite CS(CI);		CallSite CS(CI);
NumColdSESEOutlined++;		NumColdRegionsOutlined++;
if (GetTTI(OutF).useColdCCForColdCall(OutF)) {		if (GetTTI(OutF).useColdCCForColdCall(OutF)) {
OutF->setCallingConv(CallingConv::Cold);		OutF->setCallingConv(CallingConv::Cold);
CS.setCallingConv(CallingConv::Cold);		CS.setCallingConv(CallingConv::Cold);
}		}
CI->setIsNoInline();		CI->setIsNoInline();

// Try to make the outlined code as small as possible on the assumption		// Try to make the outlined code as small as possible on the assumption
// that it's cold.		// that it's cold.
Show All 15 Lines	ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",		return OptimizationRemarkMissed(DEBUG_TYPE, "ExtractFailed",
&*Region[0]->begin())		&*Region[0]->begin())
<< "Failed to extract region at block "		<< "Failed to extract region at block "
<< ore::NV("Block", Region.front());		<< ore::NV("Block", Region.front());
});		});
return nullptr;		return nullptr;
}		}

// Return the function created after outlining, nullptr otherwise.
const Function *HotColdSplitting::outlineColdBlocks(Function &F,
const DenseSetBB &HotBlocks,
DominatorTree *DT,
PostDomTree *PDT) {
auto BFI = GetBFI(F);
auto &ORE = (*GetORE)(F);
// Walking the dominator tree allows us to find the largest
// cold region.
BasicBlock *Begin = DT->getRootNode()->getBlock();

// Early return if the beginning of the function has been marked cold,
// otherwise all the function gets outlined.
if (PSI->isColdBB(Begin, BFI) \|\| !HotBlocks.count(Begin))
return nullptr;

for (auto I = df_begin(Begin), E = df_end(Begin); I != E; ++I) {
BasicBlock BB = I;
if (PSI->isColdBB(BB, BFI) \|\| !HotBlocks.count(BB)) {
SmallVector<BasicBlock *, 4> ValidColdRegion, Region;
BasicBlock Exit = (PDT)[BB]->getIDom()->getBlock();
BasicBlock *ExitColdRegion = nullptr;

// Estimated cold region between a BB and its dom-frontier.
while (Exit && isSingleEntrySingleExit(BB, Exit, DT, PDT, Region) &&
isOutlineCandidate(Region, Exit)) {
ExitColdRegion = Exit;
ValidColdRegion = Region;
Region.clear();
// Update Exit recursively to its dom-frontier.
Exit = (*PDT)[Exit]->getIDom()->getBlock();
}
if (ExitColdRegion) {
// Do not outline a region with only one block.
if (ValidColdRegion.size() == 1)
continue;

++NumColdSESEFound;
ValidColdRegion.push_back(ExitColdRegion);
// Candidate for outlining. FIXME: Continue outlining.
return extractColdRegion(ValidColdRegion, DT, BFI, ORE, /* Count */ 1);
}
}
}
return nullptr;
}

bool HotColdSplitting::run(Module &M) {		bool HotColdSplitting::run(Module &M) {
		bool Changed = false;
for (auto &F : M) {		for (auto &F : M) {
if (!shouldOutlineFrom(F))		if (!shouldOutlineFrom(F)) {
		LLVM_DEBUG(llvm::dbgs() << "Not outlining in " << F.getName() << "\n");
continue;		continue;
		}

		LLVM_DEBUG(llvm::dbgs() << "Outlining in " << F.getName() << "\n");
DominatorTree DT(F);		DominatorTree DT(F);
PostDomTree PDT(F);		PostDomTree PDT(F);
PDT.recalculate(F);		PDT.recalculate(F);
DenseSetBB HotBlocks;		BlockFrequencyInfo *BFI = GetBFI(F);
if (EnableStaticAnalyis) // Static analysis of cold blocks.
HotBlocks = getHotBlocks(F);

const Function *Outlined = outlineColdBlocks(F, HotBlocks, &DT, &PDT);		BlockSequence ColdRegion = getLargestColdRegion(F, *PSI, BFI, DT, PDT);
if (Outlined)		if (ColdRegion.empty())
		continue;

		OptimizationRemarkEmitter &ORE = (*GetORE)(F);
		Function *Outlined =
		extractColdRegion(ColdRegion, DT, BFI, ORE, /Count=/1);
		if (Outlined) {
OutlinedFunctions.insert(Outlined);		OutlinedFunctions.insert(Outlined);
		Changed = true;
}		}
return true;		}
		return Changed;
}		}

bool HotColdSplittingLegacyPass::runOnModule(Module &M) {		bool HotColdSplittingLegacyPass::runOnModule(Module &M) {
if (skipModule(M))		if (skipModule(M))
return false;		return false;
ProfileSummaryInfo *PSI =		ProfileSummaryInfo *PSI =
getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();		getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
auto GTTI = [this](Function &F) -> TargetTransformInfo & {		auto GTTI = [this](Function &F) -> TargetTransformInfo & {
▲ Show 20 Lines • Show All 58 Lines • Show Last 20 Lines

llvm/trunk/lib/Transforms/Utils/CodeExtractor.cpp

Show First 20 Lines • Show All 1,267 Lines • ▼ Show 20 Lines	for (BasicBlock::iterator I = header->begin(); isa<PHINode>(I); ++I) {
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)		for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (!Blocks.count(PN->getIncomingBlock(i)))		if (!Blocks.count(PN->getIncomingBlock(i)))
PN->setIncomingBlock(i, newFuncRoot);		PN->setIncomingBlock(i, newFuncRoot);
}		}

// Look at all successors of the codeReplacer block. If any of these blocks		// Look at all successors of the codeReplacer block. If any of these blocks
// had PHI nodes in them, we need to update the "from" block to be the code		// had PHI nodes in them, we need to update the "from" block to be the code
// replacer, not the original block in the extracted region.		// replacer, not the original block in the extracted region.
std::vector<BasicBlock *> Succs(succ_begin(codeReplacer),		for (BasicBlock *SuccBB : successors(codeReplacer)) {
succ_end(codeReplacer));		for (PHINode &PN : SuccBB->phis()) {
for (unsigned i = 0, e = Succs.size(); i != e; ++i)		Value *IncomingCodeReplacerVal = nullptr;
for (BasicBlock::iterator I = Succs[i]->begin(); isa<PHINode>(I); ++I) {		SmallVector<unsigned, 2> IncomingValsToRemove;
PHINode *PN = cast<PHINode>(I);		for (unsigned I = 0, E = PN.getNumIncomingValues(); I != E; ++I) {
std::set<BasicBlock*> ProcessedPreds;		BasicBlock *IncomingBB = PN.getIncomingBlock(I);
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (Blocks.count(PN->getIncomingBlock(i))) {		// Ignore incoming values from outside of the extracted region.
if (ProcessedPreds.insert(PN->getIncomingBlock(i)).second)		if (!Blocks.count(IncomingBB))
PN->setIncomingBlock(i, codeReplacer);		continue;
else {
// There were multiple entries in the PHI for this block, now there		// Ensure that there is only one incoming value from codeReplacer.
// is only one, so remove the duplicated entries.		if (!IncomingCodeReplacerVal) {
PN->removeIncomingValue(i, false);		PN.setIncomingBlock(I, codeReplacer);
--i; --e;		IncomingCodeReplacerVal = PN.getIncomingValue(I);
		} else {
		assert(IncomingCodeReplacerVal == PN.getIncomingValue(I) &&
		"PHI has two incompatbile incoming values from codeRepl");
		IncomingValsToRemove.push_back(I);
}		}
}		}

		for (unsigned I : reverse(IncomingValsToRemove))
		PN.removeIncomingValue(I, /DeletePHIIfEmpty=/false);
		}
}		}

// Erase debug info intrinsics. Variable updates within the new function are		// Erase debug info intrinsics. Variable updates within the new function are
// invisible to debuggers. This could be improved by defining a DISubprogram		// invisible to debuggers. This could be improved by defining a DISubprogram
// for the new function.		// for the new function.
for (BasicBlock &BB : *newFunction) {		for (BasicBlock &BB : *newFunction) {
auto BlockIt = BB.begin();		auto BlockIt = BB.begin();
while (BlockIt != BB.end()) {		while (BlockIt != BB.end()) {
Instruction Inst = &BlockIt;		Instruction Inst = &BlockIt;
Show All 10 Lines

llvm/trunk/test/Transforms/HotColdSplit/do-not-split.ll

				; RUN: opt -hotcoldsplit -S < %s \| FileCheck %s
				; RUN: opt -passes=hotcoldsplit -S < %s \| FileCheck %s

				; Check that these functions are not split. Outlined functions are called from a
				; basic block named codeRepl.

				; The cold region is too small to split.
				; CHECK-LABEL: @foo
				; CHECK-NOT: codeRepl
				define void @foo() {
				entry:
				br i1 undef, label %if.then, label %if.end

				if.then: ; preds = %entry
				unreachable

				if.end: ; preds = %entry
				br label %if.then12

				if.then12: ; preds = %if.end
				br label %cleanup40

				cleanup40: ; preds = %if.then12
				br label %return

				return: ; preds = %cleanup40
				ret void
				}

				; Make sure we don't try to outline the entire function.
				; CHECK-LABEL: @fun
				; CHECK-NOT: codeRepl
				define void @fun() {
				entry:
				br i1 undef, label %if.then, label %if.end

				if.then: ; preds = %entry
				br label %if.end

				if.end: ; preds = %entry
				ret void
				}

				; Don't outline infinite loops.
				; CHECK-LABEL: @infinite_loop
				; CHECK-NOT: codeRepl
				define void @infinite_loop() {
				entry:
				br label %loop

				loop:
				call void @sink()
				br label %loop
				}

				declare void @sink() cold

llvm/trunk/test/Transforms/HotColdSplit/duplicate-phi-preds-crash.ll

				; RUN: opt -S -hotcoldsplit < %s \| FileCheck %s

				target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
				target triple = "x86_64-apple-macosx10.14.0"

				declare void @sideeffect(i64)

				declare i8* @realloc(i8* %ptr, i64 %size)

				declare void @free(i8* %ptr)

				declare void @sink() cold

				; CHECK-LABEL: define {{.*}}@realloc2(
				; CHECK: call {{.*}}@sideeffect(
				; CHECK: call {{.*}}@realloc(
				; CHECK-LABEL: codeRepl:
				; CHECK-NEXT: call {{.}}@realloc2.cold.1(i64 %size, i8 %ptr)
				; CHECK-LABEL: cleanup:
				; CHECK-NEXT: phi i8* [ null, %if.then ], [ null, %codeRepl ], [ %call, %if.end ]
				define i8* @realloc2(i8* %ptr, i64 %size) {
				entry:
				%0 = add i64 %size, -1
				%1 = icmp ugt i64 %0, 184549375
				br i1 %1, label %if.then, label %if.end

				if.then: ; preds = %entry
				call void @sideeffect(i64 %size)
				br label %cleanup

				if.end: ; preds = %entry
				%call = call i8* @realloc(i8* %ptr, i64 %size)
				%tobool1 = icmp eq i8* %call, null
				br i1 %tobool1, label %if.then2, label %cleanup

				if.then2: ; preds = %if.end
				call void @sideeffect(i64 %size)
				call void @sink()
				%tobool3 = icmp eq i8* %ptr, null
				br i1 %tobool3, label %cleanup, label %if.then4

				if.then4: ; preds = %if.then2
				call void @free(i8* %ptr)
				br label %cleanup

				cleanup: ; preds = %if.end, %if.then4, %if.then2, %if.then
				%retval.0 = phi i8* [ null, %if.then ], [ null, %if.then2 ], [ null, %if.then4 ], [ %call, %if.end ]
				ret i8* %retval.0
				}

				; CHECK-LABEL: define {{.*}}@realloc2.cold.1(
				; CHECK: call {{.*}}@sideeffect
				; CHECK: call {{.*}}@sink
				; CHECK: call {{.*}}@free

llvm/trunk/test/Transforms/HotColdSplit/multiple-exits.ll

				; RUN: opt -S -hotcoldsplit < %s \| FileCheck %s

				; Source:
				;
				; extern void sideeffect(int);
				; extern void __attribute__((cold)) sink();
				; void foo(int cond) {
				; if (cond) { //< Start outlining here.
				; sink();
				; if (cond > 10)
				; goto exit1;
				; else
				; goto exit2;
				; }
				; exit1:
				; sideeffect(1);
				; return;
				; exit2:
				; sideeffect(2);
				; return;
				; }

				target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
				target triple = "x86_64-apple-macosx10.14.0"

				; CHECK-LABEL: define {{.*}}@foo(
				; CHECK: br i1 {{.*}}, label %exit1, label %codeRepl
				; CHECK-LABEL: codeRepl:
				; CHECK: [[targetBlock:%.*]] = call i1 @foo.cold.1(
				; CHECK-NEXT: br i1 [[targetBlock]], label %exit1, label %[[return:.*]]
				; CHECK-LABEL: exit1:
				; CHECK: call {{.*}}@sideeffect(i32 1)
				; CHECK: [[return]]:
				; CHECK-NEXT: ret void
				define void @foo(i32 %cond) {
				entry:
				%tobool = icmp eq i32 %cond, 0
				br i1 %tobool, label %exit1, label %if.then

				if.then: ; preds = %entry
				tail call void (...) @sink()
				%cmp = icmp sgt i32 %cond, 10
				br i1 %cmp, label %exit1, label %exit2

				exit1: ; preds = %entry, %if.then
				call void @sideeffect(i32 1)
				br label %return

				exit2: ; preds = %if.then
				call void @sideeffect(i32 2)
				br label %return

				return: ; preds = %exit2, %exit1
				ret void
				}

				; CHECK-LABEL: define {{.*}}@foo.cold.1(
				; TODO: Eliminate this unnecessary unconditional branch.
				; CHECK: br
				; CHECK: [[exit1Stub:.*]]:
				; CHECK-NEXT: ret i1 true
				; CHECK: [[returnStub:.*]]:
				; CHECK-NEXT: ret i1 false
				; CHECK: call {{.*}}@sink
				; CHECK-NEXT: [[cmp:%.*]] = icmp
				; CHECK-NEXT: br i1 [[cmp]], label %[[exit1Stub]], label %exit2
				; CHECK-LABEL: exit2:
				; CHECK-NEXT: call {{.*}}@sideeffect(i32 2)
				; CHECK-NEXT: br label %[[returnStub]]

				declare void @sink(...) cold

				declare void @sideeffect(i32)

llvm/trunk/test/Transforms/HotColdSplit/outline-if-then-else.ll

				; RUN: opt -S -hotcoldsplit < %s \| FileCheck %s

				; Source:
				;
				; extern void sideeffect(int);
				; extern void __attribute__((cold)) sink();
				; void foo(int cond) {
				; if (cond) { //< Start outlining here.
				; if (cond > 10)
				; sideeffect(0);
				; else
				; sideeffect(1);
				; sink();
				; }
				; sideeffect(2);
				; }

				target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
				target triple = "x86_64-apple-macosx10.14.0"

				; CHECK-LABEL: define {{.*}}@foo(
				; CHECK: br i1 {{.*}}, label %codeRepl, label %if.end2
				; CHECK-LABEL: codeRepl:
				; CHECK-NEXT: call void @foo.cold.1
				; CHECK-LABEL: if.end2:
				; CHECK: call void @sideeffect(i32 2)
				define void @foo(i32 %cond) {
				entry:
				%cond.addr = alloca i32
				store i32 %cond, i32* %cond.addr
				%0 = load i32, i32* %cond.addr
				%tobool = icmp ne i32 %0, 0
				br i1 %tobool, label %if.then, label %if.end2

				if.then: ; preds = %entry
				%1 = load i32, i32* %cond.addr
				%cmp = icmp sgt i32 %1, 10
				br i1 %cmp, label %if.then1, label %if.else

				if.then1: ; preds = %if.then
				call void @sideeffect(i32 0)
				br label %if.end

				if.else: ; preds = %if.then
				call void @sideeffect(i32 1)
				br label %if.end

				if.end: ; preds = %if.else, %if.then1
				call void (...) @sink()
				ret void

				if.end2: ; preds = %entry
				call void @sideeffect(i32 2)
				ret void
				}

				; CHECK-LABEL: define {{.*}}@foo.cold.1
				; CHECK: call {{.*}}@sideeffect
				; CHECK: call {{.*}}@sideeffect
				; CHECK: call {{.*}}@sink

				declare void @sideeffect(i32)

				declare void @sink(...) cold

llvm/trunk/test/Transforms/HotColdSplit/outline-while-loop.ll

				; RUN: opt -S -hotcoldsplit < %s \| FileCheck %s

				; Source:
				;
				; extern void sideeffect(int);
				; extern void __attribute__((cold)) sink();
				; void foo(int cond) {
				; if (cond) { //< Start outlining here.
				; while (cond > 10) {
				; --cond;
				; sideeffect(0);
				; }
				; sink();
				; }
				; sideeffect(1);
				; }

				target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
				target triple = "x86_64-apple-macosx10.14.0"

				; CHECK-LABEL: define {{.*}}@foo(
				; CHECK: br i1 {{.*}}, label %if.end, label %codeRepl
				; CHECK-LABEL: codeRepl:
				; CHECK-NEXT: call void @foo.cold.1
				; CHECK-LABEL: if.end:
				; CHECK: call void @sideeffect(i32 1)
				define void @foo(i32 %cond) {
				entry:
				%tobool = icmp eq i32 %cond, 0
				br i1 %tobool, label %if.end, label %while.cond.preheader

				while.cond.preheader: ; preds = %entry
				%cmp3 = icmp sgt i32 %cond, 10
				br i1 %cmp3, label %while.body.preheader, label %while.end

				while.body.preheader: ; preds = %while.cond.preheader
				br label %while.body

				while.body: ; preds = %while.body.preheader, %while.body
				%cond.addr.04 = phi i32 [ %dec, %while.body ], [ %cond, %while.body.preheader ]
				%dec = add nsw i32 %cond.addr.04, -1
				tail call void @sideeffect(i32 0) #3
				%cmp = icmp sgt i32 %dec, 10
				br i1 %cmp, label %while.body, label %while.end.loopexit

				while.end.loopexit: ; preds = %while.body
				br label %while.end

				while.end: ; preds = %while.end.loopexit, %while.cond.preheader
				tail call void (...) @sink()
				ret void

				if.end: ; preds = %entry
				tail call void @sideeffect(i32 1)
				ret void
				}

				; CHECK-LABEL: define {{.*}}@foo.cold.1
				; CHECK: phi i32
				; CHECK-NEXT: add nsw i32
				; CHECK-NEXT: call {{.*}}@sideeffect
				; CHECK-NEXT: icmp
				; CHECK-NEXT: br

				declare void @sideeffect(i32)

				declare void @sink(...) cold

llvm/trunk/test/Transforms/HotColdSplit/split-cold-1.ll

	; RUN: opt -hotcoldsplit -S < %s \| FileCheck %s
	; RUN: opt -passes=hotcoldsplit -S < %s \| FileCheck %s

	; Check that the function is not split. Outlined function is called from a
	; basic block named codeRepl.

	; CHECK-LABEL: @foo
	; CHECK-NOT: codeRepl
	define void @foo() {
	entry:
	br i1 undef, label %if.then, label %if.end

	if.then: ; preds = %entry
	unreachable

	if.end: ; preds = %entry
	br label %if.then12

	if.then12: ; preds = %if.end
	br label %cleanup40

	cleanup40: ; preds = %if.then12
	br label %return

	return: ; preds = %cleanup40
	ret void
	}

	; Check that the function is not split. We used to outline the full function.

	; CHECK-LABEL: @fun
	; CHECK-NOT: codeRepl

	define void @fun() {
	entry:
	br i1 undef, label %if.then, label %if.end

	if.then: ; preds = %entry
	br label %if.end

	if.end: ; preds = %entry
	ret void
	}

llvm/trunk/unittests/Transforms/Utils/CodeExtractorTest.cpp

	Show All 15 Lines
	#include "llvm/IR/Verifier.h"			#include "llvm/IR/Verifier.h"
	#include "llvm/IRReader/IRReader.h"			#include "llvm/IRReader/IRReader.h"
	#include "llvm/Support/SourceMgr.h"			#include "llvm/Support/SourceMgr.h"
	#include "gtest/gtest.h"			#include "gtest/gtest.h"

	using namespace llvm;			using namespace llvm;

	namespace {			namespace {
	TEST(CodeExtractor, ExitStub) {			TEST(CodeExtractor, DISABLED_ExitStub) {
	LLVMContext Ctx;			LLVMContext Ctx;
	SMDiagnostic Err;			SMDiagnostic Err;
	std::unique_ptr<Module> M(parseAssemblyString(R"invalid(			std::unique_ptr<Module> M(parseAssemblyString(R"invalid(
	define i32 @foo(i32 %x, i32 %y, i32 %z) {			define i32 @foo(i32 %x, i32 %y, i32 %z) {
	header:			header:
	%0 = icmp ugt i32 %x, %y			%0 = icmp ugt i32 %x, %y
	br i1 %0, label %body1, label %body2			br i1 %0, label %body1, label %body2

	body1:			body1:
	%1 = add i32 %z, 2			%1 = add i32 %z, 2
	br label %notExtracted			br label %notExtracted

	body2:			body2:
	%2 = mul i32 %z, 7			%2 = mul i32 %z, 7
	br label %notExtracted			br label %notExtracted

	notExtracted:			notExtracted:
	%3 = phi i32 [ %1, %body1 ], [ %2, %body2 ]			%3 = phi i32 [ %1, %body1 ], [ %2, %body2 ]
	%4 = add i32 %3, %x			%4 = add i32 %3, %x
	ret i32 %4			ret i32 %4
	}			}
	)invalid",			)invalid",
	Err, Ctx));			Err, Ctx));

				// CodeExtractor miscompiles this function. There appear to be some issues
				// with the handling of outlined regions with live output values.
				//
				// In the original function, CE adds two reloads in the codeReplacer block:
				//
				// codeRepl: ; preds = %header
				// call void @foo_header.split(i32 %z, i32 %x, i32 %y, i32* %.loc, i32* %.loc1)
				// %.reload = load i32, i32* %.loc
				// %.reload2 = load i32, i32* %.loc1
				// br label %notExtracted
				//
				// These reloads must flow into the notExtracted block:
				//
				// notExtracted: ; preds = %codeRepl
				// %0 = phi i32 [ %.reload, %codeRepl ], [ %.reload2, %body2 ]
				//
				// The problem is that the PHI node in notExtracted now has an incoming
				// value from a BasicBlock that's in a different function.

	Function *Func = M->getFunction("foo");			Function *Func = M->getFunction("foo");
	SmallVector<BasicBlock *, 3> Candidates;			SmallVector<BasicBlock *, 3> Candidates;
	for (auto &BB : *Func) {			for (auto &BB : *Func) {
	if (BB.getName() == "body1")			if (BB.getName() == "body1")
	Candidates.push_back(&BB);			Candidates.push_back(&BB);
	if (BB.getName() == "body2")			if (BB.getName() == "body2")
	Candidates.push_back(&BB);			Candidates.push_back(&BB);
	}			}
	Show All 13 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[HotColdSplitting] Identify larger cold regions using domtree queries
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 170999

llvm/trunk/lib/Transforms/IPO/HotColdSplitting.cpp

llvm/trunk/lib/Transforms/Utils/CodeExtractor.cpp

llvm/trunk/test/Transforms/HotColdSplit/do-not-split.ll

llvm/trunk/test/Transforms/HotColdSplit/duplicate-phi-preds-crash.ll

llvm/trunk/test/Transforms/HotColdSplit/multiple-exits.ll

llvm/trunk/test/Transforms/HotColdSplit/outline-if-then-else.ll

llvm/trunk/test/Transforms/HotColdSplit/outline-while-loop.ll

llvm/trunk/test/Transforms/HotColdSplit/split-cold-1.ll

llvm/trunk/unittests/Transforms/Utils/CodeExtractorTest.cpp

This is an archive of the discontinued LLVM Phabricator instance.

[HotColdSplitting] Identify larger cold regions using domtree queriesClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 170999

llvm/trunk/lib/Transforms/IPO/HotColdSplitting.cpp

llvm/trunk/lib/Transforms/Utils/CodeExtractor.cpp

llvm/trunk/test/Transforms/HotColdSplit/do-not-split.ll

llvm/trunk/test/Transforms/HotColdSplit/duplicate-phi-preds-crash.ll

llvm/trunk/test/Transforms/HotColdSplit/multiple-exits.ll

llvm/trunk/test/Transforms/HotColdSplit/outline-if-then-else.ll

llvm/trunk/test/Transforms/HotColdSplit/outline-while-loop.ll

llvm/trunk/test/Transforms/HotColdSplit/split-cold-1.ll

llvm/trunk/unittests/Transforms/Utils/CodeExtractorTest.cpp

[HotColdSplitting] Identify larger cold regions using domtree queries
ClosedPublic