This is an archive of the discontinued LLVM Phabricator instance.

[HotColdSplitting] Allow outlining single-block cold regions
ClosedPublic

Authored by vsk on Oct 29 2018, 11:52 AM.

Download Raw Diff

Details

Reviewers

sebpop
hiraditya
tejohnson

Commits

rGdd4be53b20a8: [HotColdSplitting] Allow outlining single-block cold regions
rL345524: [HotColdSplitting] Allow outlining single-block cold regions

Summary

It can be profitable to outline single-block cold regions because they
may be large.

Allow outlining single-block regions if they have over some threshold of
non-debug, non-terminator instructions. I chose 3 as the threshold after
experimenting with several internal frameworks.

In practice, reducing the threshold further did not give much
improvement, whereas increasing it resulted in substantial regressions.

Diff Detail

Repository: rL LLVM

Event Timeline

vsk created this revision.Oct 29 2018, 11:52 AM

Ok, thanks!

This revision is now accepted and ready to land.Oct 29 2018, 12:02 PM

Closed by commit rL345524: [HotColdSplitting] Allow outlining single-block cold regions (authored by vedantk). · Explain WhyOct 29 2018, 12:18 PM

This revision was automatically updated to reflect the committed changes.

junbuml added a subscriber: junbuml.Oct 29 2018, 1:02 PM

junbuml added inline comments.

llvm/trunk/lib/Transforms/IPO/HotColdSplitting.cpp
145	Don't you think using TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize) is the right thing to do instead of counting the number of instruction?
245	Why don't you use MinOutliningInstCount directly in hasMinimumInstCount() instead of passing as a parameter?

vsk added inline comments.Oct 29 2018, 2:01 PM

llvm/trunk/lib/Transforms/IPO/HotColdSplitting.cpp
145	Thanks for suggesting this. I'll try this out and report back (with a patch if all goes well).
245	Good point, I'll address this in a follow-up.

Revision Contents

Path

Size

llvm/

trunk/

lib/

Transforms/

IPO/

HotColdSplitting.cpp

23 lines

test/

Transforms/

HotColdSplit/

do-not-split.ll

64 lines

minsize.ll

23 lines

split-out-dbg-val-of-arg.ll

34 lines

Diff 171551

llvm/trunk/lib/Transforms/IPO/HotColdSplitting.cpp

Show All 25 Lines
#include "llvm/IR/BasicBlock.h"		#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"		#include "llvm/IR/CFG.h"
#include "llvm/IR/DataLayout.h"		#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DiagnosticInfo.h"		#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"		#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"		#include "llvm/IR/Function.h"
#include "llvm/IR/Instruction.h"		#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"		#include "llvm/IR/Instructions.h"
		#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"		#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"		#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"		#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"		#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"		#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"		#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"		#include "llvm/IR/Value.h"
#include "llvm/Pass.h"		#include "llvm/Pass.h"
Show All 18 Lines
STATISTIC(NumColdRegionsFound, "Number of cold regions found.");		STATISTIC(NumColdRegionsFound, "Number of cold regions found.");
STATISTIC(NumColdRegionsOutlined, "Number of cold regions outlined.");		STATISTIC(NumColdRegionsOutlined, "Number of cold regions outlined.");

using namespace llvm;		using namespace llvm;

static cl::opt<bool> EnableStaticAnalyis("hot-cold-static-analysis",		static cl::opt<bool> EnableStaticAnalyis("hot-cold-static-analysis",
cl::init(true), cl::Hidden);		cl::init(true), cl::Hidden);

		static cl::opt<unsigned> MinOutliningInstCount(
		"min-outlining-inst-count", cl::init(3), cl::Hidden,
		cl::desc("Minimum number of instructions needed for a single-block region "
		"to be an outlining candidate"));

namespace {		namespace {

struct PostDomTree : PostDomTreeBase<BasicBlock> {		struct PostDomTree : PostDomTreeBase<BasicBlock> {
PostDomTree(Function &F) { recalculate(F); }		PostDomTree(Function &F) { recalculate(F); }
};		};

/// A sequence of basic blocks.		/// A sequence of basic blocks.
▲ Show 20 Lines • Show All 49 Lines • ▼ Show 20 Lines	static bool unlikelyExecuted(const BasicBlock &BB) {
return false;		return false;
}		}

/// Check whether it's safe to outline \p BB.		/// Check whether it's safe to outline \p BB.
static bool mayExtractBlock(const BasicBlock &BB) {		static bool mayExtractBlock(const BasicBlock &BB) {
return !BB.hasAddressTaken();		return !BB.hasAddressTaken();
}		}

		/// Check whether \p BB has at least \p Min non-debug, non-terminator
		/// instructions.
		static bool hasMinimumInstCount(const BasicBlock &BB, unsigned Min) {
		unsigned Count = 0;
		for (const Instruction &I : BB) {
		if (isa<DbgInfoIntrinsic>(&I) \|\| &I == BB.getTerminator())
		continue;
		if (++Count >= Min)
		junbumlUnsubmitted Not Done Reply Inline Actions Don't you think using TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize) is the right thing to do instead of counting the number of instruction? junbuml: Don't you think using TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize) is the…
		vskAuthorUnsubmitted Not Done Reply Inline Actions Thanks for suggesting this. I'll try this out and report back (with a patch if all goes well). vsk: Thanks for suggesting this. I'll try this out and report back (with a patch if all goes well).
		return true;
		}
		return false;
		}

/// Identify the maximal region of cold blocks which includes \p SinkBB.		/// Identify the maximal region of cold blocks which includes \p SinkBB.
///		///
/// Include all blocks post-dominated by \p SinkBB, \p SinkBB itself, and all		/// Include all blocks post-dominated by \p SinkBB, \p SinkBB itself, and all
/// blocks dominated by \p SinkBB. Exclude all other blocks, and blocks which		/// blocks dominated by \p SinkBB. Exclude all other blocks, and blocks which
/// cannot be outlined.		/// cannot be outlined.
///		///
/// Return an empty sequence if the cold region is too small to outline, or if		/// Return an empty sequence if the cold region is too small to outline, or if
/// the cold region has no warm predecessors.		/// the cold region has no warm predecessors.
▲ Show 20 Lines • Show All 77 Lines • ▼ Show 20 Lines	if (!SinkDom \|\| !mayExtractBlock(SuccBB)) {
SuccIt.skipChildren();		SuccIt.skipChildren();
continue;		continue;
}		}

ColdRegion.push_back(&SuccBB);		ColdRegion.push_back(&SuccBB);
++SuccIt;		++SuccIt;
}		}

// TODO: Consider outlining regions with just 1 block, but more than some		if (ColdRegion.size() == 1 &&
// threshold of instructions.		!hasMinimumInstCount(*ColdRegion[0], MinOutliningInstCount))
		junbumlUnsubmitted Not Done Reply Inline Actions Why don't you use MinOutliningInstCount directly in hasMinimumInstCount() instead of passing as a parameter? junbuml: Why don't you use MinOutliningInstCount directly in hasMinimumInstCount() instead of passing as…
		vskAuthorUnsubmitted Not Done Reply Inline Actions Good point, I'll address this in a follow-up. vsk: Good point, I'll address this in a follow-up.
if (ColdRegion.size() == 1)
return {};		return {};

return ColdRegion;		return ColdRegion;
}		}

/// Get the largest cold region in \p F.		/// Get the largest cold region in \p F.
static BlockSequence getLargestColdRegion(Function &F, ProfileSummaryInfo &PSI,		static BlockSequence getLargestColdRegion(Function &F, ProfileSummaryInfo &PSI,
BlockFrequencyInfo *BFI,		BlockFrequencyInfo *BFI,
▲ Show 20 Lines • Show All 266 Lines • Show Last 20 Lines

llvm/trunk/test/Transforms/HotColdSplit/do-not-split.ll

	; RUN: opt -hotcoldsplit -S < %s \| FileCheck %s			; RUN: opt -hotcoldsplit -S < %s \| FileCheck %s
	; RUN: opt -passes=hotcoldsplit -S < %s \| FileCheck %s			; RUN: opt -passes=hotcoldsplit -S < %s \| FileCheck %s

	; Check that these functions are not split. Outlined functions are called from a			; Check that these functions are not split. Outlined functions are called from a
	; basic block named codeRepl.			; basic block named codeRepl.

	; The cold region is too small to split.			; The cold region is too small to split.
	; CHECK-LABEL: @foo			; CHECK-LABEL: @foo
	; CHECK-NOT: codeRepl			; CHECK-NOT: foo.cold.1
	define void @foo() {			define void @foo() {
	entry:			entry:
	br i1 undef, label %if.then, label %if.end			br i1 undef, label %if.then, label %if.end

	if.then: ; preds = %entry			if.then: ; preds = %entry
	unreachable			unreachable

	if.end: ; preds = %entry			if.end: ; preds = %entry
	br label %if.then12			ret void
				}

	if.then12: ; preds = %if.end			; The cold region is still too small to split.
	br label %cleanup40			; CHECK-LABEL: @bar
				; CHECK-NOT: bar.cold.1
				define void @bar() {
				entry:
				br i1 undef, label %if.then, label %if.end

	cleanup40: ; preds = %if.then12			if.then: ; preds = %entry
	br label %return			call void @sink()
				call void @sink()
				ret void

	return: ; preds = %cleanup40			if.end: ; preds = %entry
	ret void			ret void
	}			}

	; Make sure we don't try to outline the entire function.			; Make sure we don't try to outline the entire function.
	; CHECK-LABEL: @fun			; CHECK-LABEL: @fun
	; CHECK-NOT: codeRepl			; CHECK-NOT: fun.cold.1
	define void @fun() {			define void @fun() {
	entry:			entry:
	br i1 undef, label %if.then, label %if.end			br i1 undef, label %if.then, label %if.end

	if.then: ; preds = %entry			if.then: ; preds = %entry
	br label %if.end			br label %if.end

	if.end: ; preds = %entry			if.end: ; preds = %entry
	ret void			ret void
	}			}

	; Don't outline infinite loops.			; Don't outline infinite loops.
	; CHECK-LABEL: @infinite_loop			; CHECK-LABEL: @infinite_loop
	; CHECK-NOT: codeRepl			; CHECK-NOT: infinite_loop.cold.1
	define void @infinite_loop() {			define void @infinite_loop() {
	entry:			entry:
	br label %loop			br label %loop

	loop:			loop:
	call void @sink()			call void @sink()
				call void @sink()
				call void @sink()
	br label %loop			br label %loop
	}			}

				; Don't count debug intrinsics towards the outlining threshold.
				; CHECK-LABEL: @dont_count_debug_intrinsics
				; CHECK-NOT: dont_count_debug_intrinsics.cold.1
				define void @dont_count_debug_intrinsics(i32 %arg1) !dbg !6 {
				entry:
				%var = add i32 0, 0, !dbg !11
				br i1 undef, label %if.then, label %if.end

				if.then: ; preds = %entry
				ret void

				if.end: ; preds = %entry
				call void @llvm.dbg.value(metadata i32 %arg1, metadata !9, metadata !DIExpression()), !dbg !11
				call void @llvm.dbg.value(metadata i32 %arg1, metadata !9, metadata !DIExpression()), !dbg !11
				call void @sink()
				ret void
				}

				declare void @llvm.dbg.value(metadata, metadata, metadata)

	declare void @sink() cold			declare void @sink() cold

				!llvm.dbg.cu = !{!0}
				!llvm.debugify = !{!3, !4}
				!llvm.module.flags = !{!5}

				!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
				!1 = !DIFile(filename: "<stdin>", directory: "/")
				!2 = !{}
				!3 = !{i32 7}
				!4 = !{i32 1}
				!5 = !{i32 2, !"Debug Info Version", i32 3}
				!6 = distinct !DISubprogram(name: "dont_count_debug_intrinsics", linkageName: "dont_count_debug_intrinsics", scope: null, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, unit: !0, retainedNodes: !8)
				!7 = !DISubroutineType(types: !2)
				!8 = !{!9}
				!9 = !DILocalVariable(name: "1", scope: !6, file: !1, line: 1, type: !10)
				!10 = !DIBasicType(name: "ty32", size: 32, encoding: DW_ATE_unsigned)
				!11 = !DILocation(line: 1, column: 1, scope: !6)

llvm/trunk/test/Transforms/HotColdSplit/minsize.ll

	; RUN: opt -hotcoldsplit -S < %s \| FileCheck %s			; RUN: opt -hotcoldsplit -S < %s \| FileCheck %s

	; CHECK-LABEL: @fun			; CHECK-LABEL: @fun
	; CHECK: codeRepl:			; CHECK: call void @fun.cold.1
	; CHECK-NEXT: call void @fun.cold.1

	define void @fun() {			define void @fun() {
	entry:			entry:
	br i1 undef, label %if.then, label %if.else			br i1 undef, label %if.then, label %if.else

	if.then:			if.then:
	ret void			ret void

	if.else:			if.else:
	br label %if.then4			call void @sink()
				call void @sink()
	if.then4:			call void @sink()
	br i1 undef, label %if.then5, label %if.end			ret void

	if.then5:
	br label %cleanup

	if.end:
	br label %cleanup

	cleanup:
	%cleanup.dest.slot.0 = phi i32 [ 1, %if.then5 ], [ 0, %if.end ]
	unreachable
	}			}

				declare void @sink() cold

	; CHECK: define {{.}} @fun.cold.1{{.}}#[[outlined_func_attr:[0-9]+]]			; CHECK: define {{.}} @fun.cold.1{{.}}#[[outlined_func_attr:[0-9]+]]
	; CHECK: attributes #[[outlined_func_attr]] = { {{.*}}minsize			; CHECK: attributes #[[outlined_func_attr]] = { {{.*}}minsize

llvm/trunk/test/Transforms/HotColdSplit/split-out-dbg-val-of-arg.ll

	; RUN: opt -hotcoldsplit -S < %s \| FileCheck %s			; RUN: opt -hotcoldsplit -S < %s \| FileCheck %s

	; CHECK-LABEL: define {{.*}}@foo.cold			; CHECK-LABEL: define {{.*}}@foo.cold
	; CHECK-NOT: llvm.dbg.value			; CHECK-NOT: llvm.dbg.value

	define void @foo(i32 %arg1) !dbg !6 {			define void @foo(i32 %arg1) !dbg !6 {
	entry:			entry:
	%var = add i32 0, 0, !dbg !11			%var = add i32 0, 0, !dbg !11
	br i1 undef, label %if.then, label %if.end, !dbg !12			br i1 undef, label %if.then, label %if.end

	if.then: ; preds = %entry			if.then: ; preds = %entry
	ret void, !dbg !13			ret void

	if.end: ; preds = %entry			if.end: ; preds = %entry
	call void @llvm.dbg.value(metadata i32 %arg1, metadata !9, metadata !DIExpression()), !dbg !11			call void @llvm.dbg.value(metadata i32 %arg1, metadata !9, metadata !DIExpression()), !dbg !11
	br label %if.then12, !dbg !14			call void @sink()
				call void @sink()
	if.then12: ; preds = %if.end			call void @sink()
	br label %cleanup40, !dbg !15			ret void

	cleanup40: ; preds = %if.then12
	br i1 undef, label %if.then5, label %if.end1, !dbg !16

	if.then5:
	br label %return, !dbg !17

	if.end1:
	br label %return, !dbg !18

	return: ; preds = %cleanup40
	unreachable, !dbg !19
	}			}

	declare void @llvm.dbg.value(metadata, metadata, metadata)			declare void @llvm.dbg.value(metadata, metadata, metadata)

				declare void @sink() cold

	!llvm.dbg.cu = !{!0}			!llvm.dbg.cu = !{!0}
	!llvm.debugify = !{!3, !4}			!llvm.debugify = !{!3, !4}
	!llvm.module.flags = !{!5}			!llvm.module.flags = !{!5}

	!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)			!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
	!1 = !DIFile(filename: "<stdin>", directory: "/")			!1 = !DIFile(filename: "<stdin>", directory: "/")
	!2 = !{}			!2 = !{}
	!3 = !{i32 7}			!3 = !{i32 7}
	!4 = !{i32 1}			!4 = !{i32 1}
	!5 = !{i32 2, !"Debug Info Version", i32 3}			!5 = !{i32 2, !"Debug Info Version", i32 3}
	!6 = distinct !DISubprogram(name: "foo", linkageName: "foo", scope: null, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, unit: !0, retainedNodes: !8)			!6 = distinct !DISubprogram(name: "foo", linkageName: "foo", scope: null, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, unit: !0, retainedNodes: !8)
	!7 = !DISubroutineType(types: !2)			!7 = !DISubroutineType(types: !2)
	!8 = !{!9}			!8 = !{!9}
	!9 = !DILocalVariable(name: "1", scope: !6, file: !1, line: 1, type: !10)			!9 = !DILocalVariable(name: "1", scope: !6, file: !1, line: 1, type: !10)
	!10 = !DIBasicType(name: "ty32", size: 32, encoding: DW_ATE_unsigned)			!10 = !DIBasicType(name: "ty32", size: 32, encoding: DW_ATE_unsigned)
	!11 = !DILocation(line: 1, column: 1, scope: !6)			!11 = !DILocation(line: 1, column: 1, scope: !6)
	!12 = !DILocation(line: 2, column: 1, scope: !6)
	!13 = !DILocation(line: 3, column: 1, scope: !6)
	!14 = !DILocation(line: 4, column: 1, scope: !6)
	!15 = !DILocation(line: 5, column: 1, scope: !6)
	!16 = !DILocation(line: 6, column: 1, scope: !6)
	!17 = !DILocation(line: 7, column: 1, scope: !6)
	!18 = !DILocation(line: 8, column: 1, scope: !6)
	!19 = !DILocation(line: 9, column: 1, scope: !6)