diff --git a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp --- a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp +++ b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp @@ -119,6 +119,7 @@ // in the case of a longjmp, if the block is cold according to // profile information, we mark it as unlikely to be executed as well. if (blockEndsInUnreachable(BB)) { + LLVM_DEBUG(dbgs() << "Block ends in unreachable: " << BB.getName() << "\n"); if (auto *CI = dyn_cast_or_null(BB.getTerminator()->getPrevNode())) if (CI->hasFnAttr(Attribute::NoReturn)) { @@ -126,6 +127,7 @@ return (II->getIntrinsicID() != Intrinsic::eh_sjlj_longjmp) || (BFI && PSI->isColdBlock(&BB, BFI)); return !CI->getCalledFunction()->getName().startswith("longjmp") || + !CI->getCalledFunction()->getName().endswith("longjmp") || (BFI && PSI->isColdBlock(&BB, BFI)); } return true; diff --git a/llvm/test/Transforms/HotColdSplit/outline-unless-longjmp.ll b/llvm/test/Transforms/HotColdSplit/outline-unless-longjmp.ll deleted file mode 100644 --- a/llvm/test/Transforms/HotColdSplit/outline-unless-longjmp.ll +++ /dev/null @@ -1,66 +0,0 @@ -; REQUIRES: asserts -; RUN: opt -S -hotcoldsplit -debug < %s 2>&1 | FileCheck %s -; RUN: opt -hotcoldsplit %s -o /dev/null - -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -%"class.std::ios_base::Init" = type { i8 } -%struct.__jmp_buf_tag = type { [8 x i64], i32, %struct.__sigset_t } -%struct.__sigset_t = type { [16 x i64] } - -@jbuf = dso_local global [1 x %struct.__jmp_buf_tag] zeroinitializer, align 16 - -; CHECK: @f -; CHECK-NOT: {{.*}}@f.cold.1 - - -declare dso_local i32 @__cxa_atexit(void (i8*)*, i8*, i8*) #3 - -define dso_local void @_Z1ai(i32 %i) #4 { -entry: - %i.addr = alloca i32, align 4 - store i32 %i, i32* %i.addr, align 4 - %0 = load i32, i32* %i.addr, align 4 - %add = add nsw i32 %0, 1 - call void @longjmp(%struct.__jmp_buf_tag* getelementptr inbounds ([1 x %struct.__jmp_buf_tag], [1 x %struct.__jmp_buf_tag]* @jbuf, i64 0, i64 0), i32 %add) #8 - unreachable -} - -declare dso_local void @longjmp(%struct.__jmp_buf_tag*, i32) #5 - -define dso_local i32 @f() #6 { -entry: - %retval = alloca i32, align 4 - %i = alloca i32, align 4 - store i32 0, i32* %retval, align 4 - store volatile i32 0, i32* %i, align 4 - %call = call i32 @_setjmp(%struct.__jmp_buf_tag* getelementptr inbounds ([1 x %struct.__jmp_buf_tag], [1 x %struct.__jmp_buf_tag]* @jbuf, i64 0, i64 0)) #9 - %cmp = icmp ne i32 %call, 3 - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - %0 = load volatile i32, i32* %i, align 4 - %inc = add nsw i32 %0, 1 - store volatile i32 %inc, i32* %i, align 4 - call void @_Z1ai(i32 %0) #10 - unreachable - -if.end: ; preds = %entry - ret i32 0 -} - -; Function Attrs: nounwind returns_twice -declare dso_local i32 @_setjmp(%struct.__jmp_buf_tag*) #7 - - -attributes #3 = { nounwind } -attributes #4 = { noinline noreturn nounwind uwtable } -attributes #5 = { noreturn nounwind } -attributes #6 = { noinline norecurse nounwind uwtable } -attributes #7 = { nounwind returns_twice } -attributes #8 = { noreturn nounwind } -attributes #9 = { nounwind returns_twice } -attributes #10 = { noreturn } - - diff --git a/llvm/test/Transforms/HotColdSplit/sjlj-nosplit.ll b/llvm/test/Transforms/HotColdSplit/sjlj-nosplit.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/HotColdSplit/sjlj-nosplit.ll @@ -0,0 +1,103 @@ +; RUN: opt -hotcoldsplit -S < %s | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@c = dso_local global i32 1, align 4 +@buf = dso_local global [20 x i8*] zeroinitializer, align 16 + +; CHECK-LABEL: @f +; CHECK-NOT: f.cold.1 +define dso_local void @f() #0 { +entry: + %i = alloca i32, align 4 + %j = alloca i32, align 4 + %k = alloca i32, align 4 + %0 = load i32, i32* @c, align 4 + %tobool = icmp ne i32 %0, 0 + br i1 %tobool, label %if.then, label %if.else + +if.then: ; preds = %entry + ret void + +if.else: ; preds = %entry + %1 = load i32, i32* @c, align 4 + %inc = add nsw i32 %1, 1 + store i32 %inc, i32* @c, align 4 + %2 = load i32, i32* @c, align 4 + %inc1 = add nsw i32 %2, 1 + store i32 %inc1, i32* @c, align 4 + %3 = load i32, i32* @c, align 4 + %inc2 = add nsw i32 %3, 1 + store i32 %inc2, i32* @c, align 4 + %4 = load i32, i32* @c, align 4 + %inc3 = add nsw i32 %4, 1 + store i32 %inc3, i32* @c, align 4 + %5 = load i32, i32* @c, align 4 + %dec = add nsw i32 %5, -1 + store i32 %dec, i32* @c, align 4 + %6 = load i32, i32* @c, align 4 + %dec4 = add nsw i32 %6, -1 + store i32 %dec4, i32* @c, align 4 + %7 = load i32, i32* @c, align 4 + %inc5 = add nsw i32 %7, 1 + store i32 %inc5, i32* @c, align 4 + %8 = load i32, i32* @c, align 4 + %inc6 = add nsw i32 %8, 1 + store i32 %inc6, i32* @c, align 4 + %9 = load i32, i32* @c, align 4 + %add = add nsw i32 %9, 1 + store i32 %add, i32* %i, align 4 + %10 = load i32, i32* %i, align 4 + %sub = sub nsw i32 %10, 1 + store i32 %sub, i32* %j, align 4 + %11 = load i32, i32* %i, align 4 + %add7 = add nsw i32 %11, 2 + store i32 %add7, i32* %k, align 4 + call void @llvm.eh.sjlj.longjmp(i8* bitcast ([20 x i8*]* @buf to i8*)) + unreachable +} + +declare void @llvm.eh.sjlj.longjmp(i8*) #1 + +; CHECK-LABEL: @main +; CHECK-NOT: main.cold.1 +define dso_local i32 @main() #0 { +entry: + %retval = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 0, i32* %retval, align 4 + store i32 0, i32* %i, align 4 + %0 = call i8* @llvm.frameaddress.p0i8(i32 0) + store i8* %0, i8** getelementptr inbounds ([20 x i8*], [20 x i8*]* @buf, i64 0, i64 0), align 16 + %1 = call i8* @llvm.stacksave() + store i8* %1, i8** getelementptr inbounds ([20 x i8*], [20 x i8*]* @buf, i64 0, i64 2), align 16 + %2 = call i32 @llvm.eh.sjlj.setjmp(i8* bitcast ([20 x i8*]* @buf to i8*)) + %tobool = icmp ne i32 %2, 0 + br i1 %tobool, label %if.then, label %if.end + +if.then: ; preds = %entry + store i32 1, i32* %retval, align 4 + br label %return + +if.end: ; preds = %entry + call void @f() + store i32 0, i32* %retval, align 4 + br label %return + +return: ; preds = %if.end, %if.then + %3 = load i32, i32* %retval, align 4 + ret i32 %3 +} + +declare i8* @llvm.frameaddress.p0i8(i32 immarg) #2 + +declare i8* @llvm.stacksave() #3 + +declare i32 @llvm.eh.sjlj.setjmp(i8*) #3 + +attributes #0 = { nounwind uwtable } +attributes #1 = { noreturn nounwind } +attributes #2 = { nounwind readnone } +attributes #3 = { nounwind } + + diff --git a/llvm/test/Transforms/HotColdSplit/sjlj-split.ll b/llvm/test/Transforms/HotColdSplit/sjlj-split.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/HotColdSplit/sjlj-split.ll @@ -0,0 +1,136 @@ +; RUN: opt -profile-summary-cold-count=0 -hotcoldsplit -S < %s | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@c = dso_local global i32 1, align 4 +@buf = dso_local global [20 x i8*] zeroinitializer, align 16 + +; CHECK-LABEL: @f +; CHECK: f.cold.1 +define dso_local void @f() #0 !prof !31 { +entry: + %i = alloca i32, align 4 + %j = alloca i32, align 4 + %k = alloca i32, align 4 + %0 = load i32, i32* @c, align 4 + %tobool = icmp ne i32 %0, 0 + br i1 %tobool, label %if.then, label %if.else, !prof !32 + +if.then: ; preds = %entry + ret void + +if.else: ; preds = %entry + %1 = load i32, i32* @c, align 4 + %inc = add i32 %1, 1 + store i32 %inc, i32* @c, align 4 + %2 = load i32, i32* @c, align 4 + %inc1 = add i32 %2, 1 + store i32 %inc1, i32* @c, align 4 + %3 = load i32, i32* @c, align 4 + %inc2 = add i32 %3, 1 + store i32 %inc2, i32* @c, align 4 + %4 = load i32, i32* @c, align 4 + %inc3 = add i32 %4, 1 + store i32 %inc3, i32* @c, align 4 + %5 = load i32, i32* @c, align 4 + %dec = add i32 %5, -1 + store i32 %dec, i32* @c, align 4 + %6 = load i32, i32* @c, align 4 + %dec4 = add i32 %6, -1 + store i32 %dec4, i32* @c, align 4 + %7 = load i32, i32* @c, align 4 + %inc5 = add i32 %7, 1 + store i32 %inc5, i32* @c, align 4 + %8 = load i32, i32* @c, align 4 + %inc6 = add i32 %8, 1 + store i32 %inc6, i32* @c, align 4 + %9 = load i32, i32* @c, align 4 + %add = add i32 %9, 1 + store i32 %add, i32* %i, align 4 + %10 = load i32, i32* %i, align 4 + %sub = sub i32 %10, 1 + store i32 %sub, i32* %j, align 4 + %11 = load i32, i32* %i, align 4 + %add7 = add i32 %11, 2 + store i32 %add7, i32* %k, align 4 + call void @llvm.eh.sjlj.longjmp(i8* bitcast ([20 x i8*]* @buf to i8*)) + unreachable +} + +declare void @llvm.eh.sjlj.longjmp(i8*) #1 + +define dso_local i32 @main() #0 !prof !31 { +entry: + %retval = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 0, i32* %retval, align 4 + store i32 0, i32* %i, align 4 + %0 = call i8* @llvm.frameaddress.p0i8(i32 0) + store i8* %0, i8** getelementptr inbounds ([20 x i8*], [20 x i8*]* @buf, i64 0, i64 0), align 16 + %1 = call i8* @llvm.stacksave() + store i8* %1, i8** getelementptr inbounds ([20 x i8*], [20 x i8*]* @buf, i64 0, i64 2), align 16 + %2 = call i32 @llvm.eh.sjlj.setjmp(i8* bitcast ([20 x i8*]* @buf to i8*)) + %tobool = icmp ne i32 %2, 0 + br i1 %tobool, label %if.then, label %if.end, !prof !33 + +if.then: ; preds = %entry + store i32 1, i32* %retval, align 4 + br label %return + +if.end: ; preds = %entry + call void @f() + store i32 0, i32* %retval, align 4 + br label %return + +return: ; preds = %if.end, %if.then + %3 = load i32, i32* %retval, align 4 + ret i32 %3 +} + +declare i8* @llvm.frameaddress.p0i8(i32 immarg) #2 + +declare i8* @llvm.stacksave() #3 + +declare i32 @llvm.eh.sjlj.setjmp(i8*) #3 + +attributes #0 = { inlinehint nounwind uwtable } +attributes #1 = { noreturn nounwind } +attributes #2 = { nounwind readnone } +attributes #3 = { nounwind } + +!llvm.module.flags = !{!0, !1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10, !11, !12} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 2} +!5 = !{!"MaxCount", i64 1} +!6 = !{!"MaxInternalCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 1} +!8 = !{!"NumCounts", i64 4} +!9 = !{!"NumFunctions", i64 2} +!10 = !{!"IsPartialProfile", i64 0} +!11 = !{!"PartialProfileRatio", double 0.000000e+00} +!12 = !{!"DetailedSummary", !13} +!13 = !{!14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29} +!14 = !{i32 10000, i64 0, i32 0} +!15 = !{i32 100000, i64 0, i32 0} +!16 = !{i32 200000, i64 0, i32 0} +!17 = !{i32 300000, i64 0, i32 0} +!18 = !{i32 400000, i64 0, i32 0} +!19 = !{i32 500000, i64 1, i32 2} +!20 = !{i32 600000, i64 1, i32 2} +!21 = !{i32 700000, i64 1, i32 2} +!22 = !{i32 800000, i64 1, i32 2} +!23 = !{i32 900000, i64 1, i32 2} +!24 = !{i32 950000, i64 1, i32 2} +!25 = !{i32 990000, i64 1, i32 2} +!26 = !{i32 999000, i64 1, i32 2} +!27 = !{i32 999900, i64 1, i32 2} +!28 = !{i32 999990, i64 1, i32 2} +!29 = !{i32 999999, i64 1, i32 2} +!31 = !{!"function_entry_count", i64 1} +!32 = !{!"branch_weights", i32 1, i32 0} +!33 = !{!"branch_weights", i32 0, i32 1} +