Index: llvm/lib/Transforms/IPO/PartialInlining.cpp =================================================================== --- llvm/lib/Transforms/IPO/PartialInlining.cpp +++ llvm/lib/Transforms/IPO/PartialInlining.cpp @@ -416,7 +416,7 @@ auto IsSingleEntry = [](SmallVectorImpl &BlockList) { BasicBlock *Dom = BlockList.front(); - return BlockList.size() > 1 && Dom->hasNPredecessors(1); + return BlockList.size() >= 1 && Dom->hasNPredecessors(1); }; auto IsSingleExit = Index: llvm/test/Transforms/PartialInlining/switch_stmt.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/PartialInlining/switch_stmt.ll @@ -0,0 +1,128 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes="partial-inliner" -skip-partial-inlining-cost-analysis -S < %s | FileCheck %s +; RUN: opt -partial-inliner -skip-partial-inlining-cost-analysis -S < %s | FileCheck %s + +define dso_local signext i32 @callee(i32 signext %c1, i32 signext %c2) !prof !30 { +; CHECK-LABEL: @callee( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store i32 0, i32* [[RC]], align 4 +; CHECK-NEXT: switch i32 [[C1:%.*]], label [[SW_DEFAULT:%.*]] [ +; CHECK-NEXT: i32 0, label [[SW_BB:%.*]] +; CHECK-NEXT: i32 1, label [[SW_BB1:%.*]] +; CHECK-NEXT: i32 2, label [[SW_BB2:%.*]] +; CHECK-NEXT: ], !prof !31 +; CHECK: sw.bb: +; CHECK-NEXT: store i32 1, i32* [[RC]], align 4 +; CHECK-NEXT: br label [[SW_EPILOG:%.*]] +; CHECK: sw.bb1: +; CHECK-NEXT: store i32 2, i32* [[RC]], align 4 +; CHECK-NEXT: br label [[SW_EPILOG]] +; CHECK: sw.bb2: +; CHECK-NEXT: store i32 4, i32* [[RC]], align 4 +; CHECK-NEXT: br label [[SW_EPILOG]] +; CHECK: sw.default: +; CHECK-NEXT: store i32 [[C2:%.*]], i32* [[RC]], align 4 +; CHECK-NEXT: br label [[SW_EPILOG]] +; CHECK: sw.epilog: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[RC]], align 4 +; CHECK-NEXT: ret i32 [[TMP0]] +; +entry: + %rc = alloca i32, align 4 + store i32 0, i32* %rc, align 4 + switch i32 %c1, label %sw.default [ + i32 0, label %sw.bb + i32 1, label %sw.bb1 + i32 2, label %sw.bb2 + ], !prof !31 + +sw.bb: ;; cold + store i32 1, i32* %rc, align 4 + br label %sw.epilog + +sw.bb1: + store i32 2, i32* %rc, align 4 + br label %sw.epilog + +sw.bb2: ;; cold + store i32 4, i32* %rc, align 4 + br label %sw.epilog + +sw.default: + store i32 %c2, i32* %rc, align 4 + br label %sw.epilog + +sw.epilog: + %0 = load i32, i32* %rc, align 4 + ret i32 %0 +} + +define dso_local signext i32 @caller(i32 signext %c) !prof !30 { +; CHECK-LABEL: @caller( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RC_I:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[RC_I]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP0]]) +; CHECK-NEXT: store i32 0, i32* [[RC_I]], align 4 +; CHECK-NEXT: switch i32 [[C:%.*]], label [[SW_DEFAULT_I:%.*]] [ +; CHECK-NEXT: i32 0, label [[CODEREPL_I:%.*]] +; CHECK-NEXT: i32 1, label [[SW_BB1_I:%.*]] +; CHECK-NEXT: i32 2, label [[CODEREPL1_I:%.*]] +; CHECK-NEXT: ], !prof !31 +; CHECK: codeRepl.i: +; CHECK-NEXT: call void @callee.1.sw.bb(i32* [[RC_I]]) +; CHECK-NEXT: br label [[CALLEE_1_EXIT:%.*]] +; CHECK: sw.bb1.i: +; CHECK-NEXT: store i32 2, i32* [[RC_I]], align 4 +; CHECK-NEXT: br label [[CALLEE_1_EXIT]] +; CHECK: codeRepl1.i: +; CHECK-NEXT: call void @callee.1.sw.bb2(i32* [[RC_I]]) +; CHECK-NEXT: br label [[CALLEE_1_EXIT]] +; CHECK: sw.default.i: +; CHECK-NEXT: store i32 [[C]], i32* [[RC_I]], align 4 +; CHECK-NEXT: br label [[CALLEE_1_EXIT]] +; CHECK: callee.1.exit: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[RC_I]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[RC_I]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP2]]) +; +entry: + %0 = call signext i32 @callee(i32 signext %c, i32 signext %c) + ret i32 %0 +} + +!llvm.module.flags = !{!0, !1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10, !11, !12} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 2} +!5 = !{!"MaxCount", i64 1000} +!6 = !{!"MaxInternalCount", i64 1000} +!7 = !{!"MaxFunctionCount", i64 1000} +!8 = !{!"NumCounts", i64 4} +!9 = !{!"NumFunctions", i64 2} +!10 = !{!"IsPartialProfile", i64 0} +!11 = !{!"PartialProfileRatio", double 0.000000e+00} +!12 = !{!"DetailedSummary", !13} +!13 = !{!14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29} +!14 = !{i32 10000, i64 0, i32 0} +!15 = !{i32 100000, i64 0, i32 0} +!16 = !{i32 200000, i64 0, i32 0} +!17 = !{i32 300000, i64 0, i32 0} +!18 = !{i32 400000, i64 0, i32 0} +!19 = !{i32 500000, i64 1, i32 2} +!20 = !{i32 600000, i64 1, i32 2} +!21 = !{i32 700000, i64 1, i32 2} +!22 = !{i32 800000, i64 1, i32 2} +!23 = !{i32 900000, i64 1, i32 2} +!24 = !{i32 950000, i64 1, i32 2} +!25 = !{i32 990000, i64 1, i32 2} +!26 = !{i32 999000, i64 1, i32 2} +!27 = !{i32 999900, i64 1, i32 2} +!28 = !{i32 999990, i64 1, i32 2} +!29 = !{i32 999999, i64 1, i32 2} +!30 = !{!"function_entry_count", i64 1000} +!31 = !{!"branch_weights", i32 500, i32 10, i32 150, i32 40}