diff --git a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp --- a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp +++ b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp @@ -101,7 +101,8 @@ return !(isa(I) || isa(I)); } -bool unlikelyExecuted(BasicBlock &BB) { +bool unlikelyExecuted(BasicBlock &BB, ProfileSummaryInfo *PSI, + BlockFrequencyInfo *BFI) { // Exception handling blocks are unlikely executed. if (BB.isEHPad() || isa(BB.getTerminator())) return true; @@ -114,12 +115,21 @@ return true; // The block is cold if it has an unreachable terminator, unless it's - // preceded by a call to a (possibly warm) noreturn call (e.g. longjmp). + // preceded by a call to a (possibly warm) noreturn call (e.g. longjmp); + // in the case of a longjmp, if the block is cold according to + // profile information, we mark it as unlikely to be executed as well. if (blockEndsInUnreachable(BB)) { + LLVM_DEBUG(dbgs() << "Block ends in unreachable: " << BB.getName() << "\n"); if (auto *CI = dyn_cast_or_null(BB.getTerminator()->getPrevNode())) - if (CI->hasFnAttr(Attribute::NoReturn)) - return false; + if (CI->hasFnAttr(Attribute::NoReturn)) { + if (IntrinsicInst *II = dyn_cast(CI)) + return (II->getIntrinsicID() != Intrinsic::eh_sjlj_longjmp) || + (BFI && PSI->isColdBlock(&BB, BFI)); + return !CI->getCalledFunction()->getName().startswith("longjmp") || + !CI->getCalledFunction()->getName().endswith("longjmp") || + (BFI && PSI->isColdBlock(&BB, BFI)); + } return true; } @@ -575,7 +585,7 @@ continue; bool Cold = (BFI && PSI->isColdBlock(BB, BFI)) || - (EnableStaticAnalyis && unlikelyExecuted(*BB)); + (EnableStaticAnalyis && unlikelyExecuted(*BB, PSI, BFI)); if (!Cold) continue; diff --git a/llvm/test/Transforms/HotColdSplit/sjlj-nosplit.ll b/llvm/test/Transforms/HotColdSplit/sjlj-nosplit.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/HotColdSplit/sjlj-nosplit.ll @@ -0,0 +1,103 @@ +; RUN: opt -hotcoldsplit -S < %s | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@c = dso_local global i32 1, align 4 +@buf = dso_local global [20 x i8*] zeroinitializer, align 16 + +; CHECK-LABEL: @f +; CHECK-NOT: f.cold.1 +define dso_local void @f() #0 { +entry: + %i = alloca i32, align 4 + %j = alloca i32, align 4 + %k = alloca i32, align 4 + %0 = load i32, i32* @c, align 4 + %tobool = icmp ne i32 %0, 0 + br i1 %tobool, label %if.then, label %if.else + +if.then: ; preds = %entry + ret void + +if.else: ; preds = %entry + %1 = load i32, i32* @c, align 4 + %inc = add nsw i32 %1, 1 + store i32 %inc, i32* @c, align 4 + %2 = load i32, i32* @c, align 4 + %inc1 = add nsw i32 %2, 1 + store i32 %inc1, i32* @c, align 4 + %3 = load i32, i32* @c, align 4 + %inc2 = add nsw i32 %3, 1 + store i32 %inc2, i32* @c, align 4 + %4 = load i32, i32* @c, align 4 + %inc3 = add nsw i32 %4, 1 + store i32 %inc3, i32* @c, align 4 + %5 = load i32, i32* @c, align 4 + %dec = add nsw i32 %5, -1 + store i32 %dec, i32* @c, align 4 + %6 = load i32, i32* @c, align 4 + %dec4 = add nsw i32 %6, -1 + store i32 %dec4, i32* @c, align 4 + %7 = load i32, i32* @c, align 4 + %inc5 = add nsw i32 %7, 1 + store i32 %inc5, i32* @c, align 4 + %8 = load i32, i32* @c, align 4 + %inc6 = add nsw i32 %8, 1 + store i32 %inc6, i32* @c, align 4 + %9 = load i32, i32* @c, align 4 + %add = add nsw i32 %9, 1 + store i32 %add, i32* %i, align 4 + %10 = load i32, i32* %i, align 4 + %sub = sub nsw i32 %10, 1 + store i32 %sub, i32* %j, align 4 + %11 = load i32, i32* %i, align 4 + %add7 = add nsw i32 %11, 2 + store i32 %add7, i32* %k, align 4 + call void @llvm.eh.sjlj.longjmp(i8* bitcast ([20 x i8*]* @buf to i8*)) + unreachable +} + +declare void @llvm.eh.sjlj.longjmp(i8*) #1 + +; CHECK-LABEL: @main +; CHECK-NOT: main.cold.1 +define dso_local i32 @main() #0 { +entry: + %retval = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 0, i32* %retval, align 4 + store i32 0, i32* %i, align 4 + %0 = call i8* @llvm.frameaddress.p0i8(i32 0) + store i8* %0, i8** getelementptr inbounds ([20 x i8*], [20 x i8*]* @buf, i64 0, i64 0), align 16 + %1 = call i8* @llvm.stacksave() + store i8* %1, i8** getelementptr inbounds ([20 x i8*], [20 x i8*]* @buf, i64 0, i64 2), align 16 + %2 = call i32 @llvm.eh.sjlj.setjmp(i8* bitcast ([20 x i8*]* @buf to i8*)) + %tobool = icmp ne i32 %2, 0 + br i1 %tobool, label %if.then, label %if.end + +if.then: ; preds = %entry + store i32 1, i32* %retval, align 4 + br label %return + +if.end: ; preds = %entry + call void @f() + store i32 0, i32* %retval, align 4 + br label %return + +return: ; preds = %if.end, %if.then + %3 = load i32, i32* %retval, align 4 + ret i32 %3 +} + +declare i8* @llvm.frameaddress.p0i8(i32 immarg) #2 + +declare i8* @llvm.stacksave() #3 + +declare i32 @llvm.eh.sjlj.setjmp(i8*) #3 + +attributes #0 = { nounwind uwtable } +attributes #1 = { noreturn nounwind } +attributes #2 = { nounwind readnone } +attributes #3 = { nounwind } + + diff --git a/llvm/test/Transforms/HotColdSplit/sjlj-split.ll b/llvm/test/Transforms/HotColdSplit/sjlj-split.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/HotColdSplit/sjlj-split.ll @@ -0,0 +1,136 @@ +; RUN: opt -profile-summary-cold-count=0 -hotcoldsplit -S < %s | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@c = dso_local global i32 1, align 4 +@buf = dso_local global [20 x i8*] zeroinitializer, align 16 + +; CHECK-LABEL: @f +; CHECK: f.cold.1 +define dso_local void @f() #0 !prof !31 { +entry: + %i = alloca i32, align 4 + %j = alloca i32, align 4 + %k = alloca i32, align 4 + %0 = load i32, i32* @c, align 4 + %tobool = icmp ne i32 %0, 0 + br i1 %tobool, label %if.then, label %if.else, !prof !32 + +if.then: ; preds = %entry + ret void + +if.else: ; preds = %entry + %1 = load i32, i32* @c, align 4 + %inc = add i32 %1, 1 + store i32 %inc, i32* @c, align 4 + %2 = load i32, i32* @c, align 4 + %inc1 = add i32 %2, 1 + store i32 %inc1, i32* @c, align 4 + %3 = load i32, i32* @c, align 4 + %inc2 = add i32 %3, 1 + store i32 %inc2, i32* @c, align 4 + %4 = load i32, i32* @c, align 4 + %inc3 = add i32 %4, 1 + store i32 %inc3, i32* @c, align 4 + %5 = load i32, i32* @c, align 4 + %dec = add i32 %5, -1 + store i32 %dec, i32* @c, align 4 + %6 = load i32, i32* @c, align 4 + %dec4 = add i32 %6, -1 + store i32 %dec4, i32* @c, align 4 + %7 = load i32, i32* @c, align 4 + %inc5 = add i32 %7, 1 + store i32 %inc5, i32* @c, align 4 + %8 = load i32, i32* @c, align 4 + %inc6 = add i32 %8, 1 + store i32 %inc6, i32* @c, align 4 + %9 = load i32, i32* @c, align 4 + %add = add i32 %9, 1 + store i32 %add, i32* %i, align 4 + %10 = load i32, i32* %i, align 4 + %sub = sub i32 %10, 1 + store i32 %sub, i32* %j, align 4 + %11 = load i32, i32* %i, align 4 + %add7 = add i32 %11, 2 + store i32 %add7, i32* %k, align 4 + call void @llvm.eh.sjlj.longjmp(i8* bitcast ([20 x i8*]* @buf to i8*)) + unreachable +} + +declare void @llvm.eh.sjlj.longjmp(i8*) #1 + +define dso_local i32 @main() #0 !prof !31 { +entry: + %retval = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 0, i32* %retval, align 4 + store i32 0, i32* %i, align 4 + %0 = call i8* @llvm.frameaddress.p0i8(i32 0) + store i8* %0, i8** getelementptr inbounds ([20 x i8*], [20 x i8*]* @buf, i64 0, i64 0), align 16 + %1 = call i8* @llvm.stacksave() + store i8* %1, i8** getelementptr inbounds ([20 x i8*], [20 x i8*]* @buf, i64 0, i64 2), align 16 + %2 = call i32 @llvm.eh.sjlj.setjmp(i8* bitcast ([20 x i8*]* @buf to i8*)) + %tobool = icmp ne i32 %2, 0 + br i1 %tobool, label %if.then, label %if.end, !prof !33 + +if.then: ; preds = %entry + store i32 1, i32* %retval, align 4 + br label %return + +if.end: ; preds = %entry + call void @f() + store i32 0, i32* %retval, align 4 + br label %return + +return: ; preds = %if.end, %if.then + %3 = load i32, i32* %retval, align 4 + ret i32 %3 +} + +declare i8* @llvm.frameaddress.p0i8(i32 immarg) #2 + +declare i8* @llvm.stacksave() #3 + +declare i32 @llvm.eh.sjlj.setjmp(i8*) #3 + +attributes #0 = { inlinehint nounwind uwtable } +attributes #1 = { noreturn nounwind } +attributes #2 = { nounwind readnone } +attributes #3 = { nounwind } + +!llvm.module.flags = !{!0, !1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10, !11, !12} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 2} +!5 = !{!"MaxCount", i64 1} +!6 = !{!"MaxInternalCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 1} +!8 = !{!"NumCounts", i64 4} +!9 = !{!"NumFunctions", i64 2} +!10 = !{!"IsPartialProfile", i64 0} +!11 = !{!"PartialProfileRatio", double 0.000000e+00} +!12 = !{!"DetailedSummary", !13} +!13 = !{!14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29} +!14 = !{i32 10000, i64 0, i32 0} +!15 = !{i32 100000, i64 0, i32 0} +!16 = !{i32 200000, i64 0, i32 0} +!17 = !{i32 300000, i64 0, i32 0} +!18 = !{i32 400000, i64 0, i32 0} +!19 = !{i32 500000, i64 1, i32 2} +!20 = !{i32 600000, i64 1, i32 2} +!21 = !{i32 700000, i64 1, i32 2} +!22 = !{i32 800000, i64 1, i32 2} +!23 = !{i32 900000, i64 1, i32 2} +!24 = !{i32 950000, i64 1, i32 2} +!25 = !{i32 990000, i64 1, i32 2} +!26 = !{i32 999000, i64 1, i32 2} +!27 = !{i32 999900, i64 1, i32 2} +!28 = !{i32 999990, i64 1, i32 2} +!29 = !{i32 999999, i64 1, i32 2} +!31 = !{!"function_entry_count", i64 1} +!32 = !{!"branch_weights", i32 1, i32 0} +!33 = !{!"branch_weights", i32 0, i32 1} + diff --git a/llvm/test/Transforms/HotColdSplit/split-assert-fail.ll b/llvm/test/Transforms/HotColdSplit/split-assert-fail.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/HotColdSplit/split-assert-fail.ll @@ -0,0 +1,47 @@ +; REQUIRES: asserts +; RUN: opt -S -instsimplify -hotcoldsplit -debug < %s 2>&1 | FileCheck %s +; RUN: opt -instcombine -hotcoldsplit -instsimplify %s -o /dev/null + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@.str = private unnamed_addr constant [2 x i8] c"0\00", align 1 +@.str.1 = private unnamed_addr constant [14 x i8] c"assert-fail.c\00", align 1 +@__PRETTY_FUNCTION__.main = private unnamed_addr constant [15 x i8] c"int main(void)\00", align 1 + +; CHECK: @f +; CHECK-LABEL: codeRepl: +; CHECK } +; CHECK: define {{.*}}@f.cold.1() +; CHECK-LABEL: newFuncRoot: +; CHECK: br label %if.then + +; Function Attrs: nounwind willreturn +define i32 @f() #0 { +entry: + %retval = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 0, i32* %retval, align 4 + store i32 0, i32* %i, align 4 + %0 = load i32, i32* %i, align 4 + %cmp = icmp eq i32 %0, 2 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + call void @__assert_fail(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), i32 10, i8* getelementptr inbounds ([15 x i8], [15 x i8]* @__PRETTY_FUNCTION__.main, i64 0, i64 0)) #1 + unreachable + +if.end: ; preds = %entry + %1 = load i32, i32* %i, align 4 + %add = add nsw i32 %1, 1 + store i32 %add, i32* %i, align 4 + %2 = load i32, i32* %i, align 4 + ret i32 %2 +} + +; Function Attrs: noreturn nounwind +declare dso_local void @__assert_fail(i8*, i8*, i32, i8*) #1 + +attributes #0 = { nounwind willreturn } +attributes #1 = { noreturn nounwind } +