Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -288,7 +288,7 @@ BitTestInfo C, uint32_t W) : First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E), ContiguousRange(CR), Parent(P), Default(D), Cases(std::move(C)), - Weight(W) {} + Weight(W), DefaultWeight(0) {} APInt First; APInt Range; const Value *SValue; @@ -300,6 +300,7 @@ MachineBasicBlock *Default; BitTestInfo Cases; uint32_t Weight; + uint32_t DefaultWeight; }; /// Minimum jump table density, in percent. @@ -341,6 +342,7 @@ CaseClusterIt LastCluster; const ConstantInt *GE; const ConstantInt *LT; + uint32_t DefaultWeight; }; typedef SmallVector SwitchWorkList; Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1924,8 +1924,7 @@ MachineBasicBlock* MBB = B.Cases[0].ThisBB; - uint32_t DefaultWeight = getEdgeWeight(SwitchBB, B.Default); - addSuccessorWithWeight(SwitchBB, B.Default, DefaultWeight); + addSuccessorWithWeight(SwitchBB, B.Default, B.DefaultWeight); addSuccessorWithWeight(SwitchBB, MBB, B.Weight); SDValue BrRange = DAG.getNode(ISD::BRCOND, dl, @@ -8037,7 +8036,8 @@ } // Compute total weight. - uint32_t UnhandledWeights = 0; + uint32_t DefaultWeight = W.DefaultWeight; + uint32_t UnhandledWeights = DefaultWeight; for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I) { UnhandledWeights += I->Weight; assert(UnhandledWeights >= I->Weight && "Weight overflow!"); @@ -8067,14 +8067,24 @@ MachineBasicBlock *JumpMBB = JT->MBB; CurMF->insert(BBI, JumpMBB); - // Collect the sum of weights of outgoing edges from JumpMBB, which will - // be the edge weight on CurMBB->JumpMBB. - uint32_t JumpWeight = 0; - for (auto Succ : JumpMBB->successors()) - JumpWeight += getEdgeWeight(JumpMBB, Succ); - uint32_t FallthruWeight = getEdgeWeight(CurMBB, Fallthrough); + uint32_t JumpWeight = I->Weight; + uint32_t FallthroughWeight = UnhandledWeights; - addSuccessorWithWeight(CurMBB, Fallthrough, FallthruWeight); + // If Fallthrough is a target of the jump table, we evenly distribute + // the weight on the edge to Fallthrough to successors of CurMBB. + // Also update the weight on the edge from JumpMBB to Fallthrough. + for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(), + SE = JumpMBB->succ_end(); + SI != SE; ++SI) { + if (*SI == Fallthrough) { + JumpWeight += DefaultWeight / 2; + FallthroughWeight -= DefaultWeight / 2; + JumpMBB->setSuccWeight(SI, DefaultWeight / 2); + break; + } + } + + addSuccessorWithWeight(CurMBB, Fallthrough, FallthroughWeight); addSuccessorWithWeight(CurMBB, JumpMBB, JumpWeight); // The jump table header will be inserted in our current block, do the @@ -8101,8 +8111,17 @@ BTB->Parent = CurMBB; BTB->Default = Fallthrough; - // If we're in the right place, emit the bit test header header right now. - if (CurMBB ==SwitchMBB) { + BTB->DefaultWeight = UnhandledWeights; + // If the cases in bit test don't form a contiguous range, we evenly + // distribute the weight on the edge to Fallthrough to two successors + // of CurMBB. + if (!BTB->ContiguousRange) { + BTB->Weight += DefaultWeight / 2; + BTB->DefaultWeight -= DefaultWeight / 2; + } + + // If we're in the right place, emit the bit test header right now. + if (CurMBB == SwitchMBB) { visitBitTestHeader(*BTB, SwitchMBB); BTB->Emitted = true; } @@ -8167,8 +8186,8 @@ // Mehlhorn "Nearly Optimal Binary Search Trees" (1975). CaseClusterIt LastLeft = W.FirstCluster; CaseClusterIt FirstRight = W.LastCluster; - uint32_t LeftWeight = LastLeft->Weight; - uint32_t RightWeight = FirstRight->Weight; + uint32_t LeftWeight = LastLeft->Weight + W.DefaultWeight / 2; + uint32_t RightWeight = FirstRight->Weight + W.DefaultWeight / 2; // Move LastLeft and FirstRight towards each other from opposite directions to // find a partitioning of the clusters which balances the weight on both @@ -8254,7 +8273,8 @@ } else { LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock()); FuncInfo.MF->insert(BBI, LeftMBB); - WorkList.push_back({LeftMBB, FirstLeft, LastLeft, W.GE, Pivot}); + WorkList.push_back( + {LeftMBB, FirstLeft, LastLeft, W.GE, Pivot, W.DefaultWeight / 2}); // Put Cond in a virtual register to make it available from the new blocks. ExportFromCurrentBlock(Cond); } @@ -8269,7 +8289,8 @@ } else { RightMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock()); FuncInfo.MF->insert(BBI, RightMBB); - WorkList.push_back({RightMBB, FirstRight, LastRight, Pivot, W.LT}); + WorkList.push_back( + {RightMBB, FirstRight, LastRight, Pivot, W.LT, W.DefaultWeight / 2}); // Put Cond in a virtual register to make it available from the new blocks. ExportFromCurrentBlock(Cond); } @@ -8370,7 +8391,8 @@ SwitchWorkList WorkList; CaseClusterIt First = Clusters.begin(); CaseClusterIt Last = Clusters.end() - 1; - WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr}); + uint32_t DefaultWeight = getEdgeWeight(SwitchMBB, DefaultMBB); + WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultWeight}); while (!WorkList.empty()) { SwitchWorkListItem W = WorkList.back(); Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -1490,9 +1490,7 @@ CodeGenAndEmitDAG(); } - uint32_t UnhandledWeight = 0; - for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) - UnhandledWeight += SDB->BitTestCases[i].Cases[j].ExtraWeight; + uint32_t UnhandledWeight = SDB->BitTestCases[i].Weight; for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) { UnhandledWeight -= SDB->BitTestCases[i].Cases[j].ExtraWeight; Index: llvm/trunk/test/CodeGen/AArch64/aarch64-deferred-spilling.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/aarch64-deferred-spilling.ll +++ llvm/trunk/test/CodeGen/AArch64/aarch64-deferred-spilling.ll @@ -7,9 +7,9 @@ ; ; CHECK: // %if.then.120 ; -; REGULAR: str w22, [sp, #[[OFFSET:[0-9]+]]] // 4-byte Folded Spill -; Check that w22 wouldn't need to be spilled since it is never reused. -; REGULAR-NOT: {{[wx]}}22{{,?}} +; REGULAR: str w21, [sp, #[[OFFSET:[0-9]+]]] // 4-byte Folded Spill +; Check that w21 wouldn't need to be spilled since it is never reused. +; REGULAR-NOT: {{[wx]}}21{{,?}} ; ; Check that w22 is used to carry a value through the call. ; DEFERRED-NOT: str {{[wx]}}22, @@ -22,8 +22,8 @@ ; DEFERRED: mov {{[wx][0-9]+}}, {{[wx]}}22 ; DEFERRED-NOT: ldr {{[wx]}}22, ; -; REGULAR-NOT: {{[wx]}}22{{,?}} -; REGUAL: ldr w22, [sp, #[[OFFSET]]] // 4-byte Folded Reload +; REGULAR-NOT: {{[wx]}}21{{,?}} +; REGULAR: ldr w21, [sp, #[[OFFSET]]] // 4-byte Folded Reload ; ; End of the basic block we are interested in. ; CHECK: b Index: llvm/trunk/test/CodeGen/ARM/jump-table-islands.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/jump-table-islands.ll +++ llvm/trunk/test/CodeGen/ARM/jump-table-islands.ll @@ -13,7 +13,7 @@ ; CHECK: .long LBB{{[0-9]+_[0-9]+}}-[[JUMP_TABLE]] ; CHECK: [[SKIP_TABLE]]: -; CHECK: add pc, lr, {{r[0-9]+}} +; CHECK: add pc, {{r[0-9]+}}, {{r[0-9]+}} br i1 %tst, label %simple, label %complex simple: Index: llvm/trunk/test/CodeGen/Generic/MachineBranchProb.ll =================================================================== --- llvm/trunk/test/CodeGen/Generic/MachineBranchProb.ll +++ llvm/trunk/test/CodeGen/Generic/MachineBranchProb.ll @@ -16,9 +16,9 @@ i64 5, label %sw.bb1 ], !prof !0 ; CHECK: BB#0: derived from LLVM BB %entry -; CHECK: Successors according to CFG: BB#2(64) BB#4(14) +; CHECK: Successors according to CFG: BB#2(64) BB#4(21) ; CHECK: BB#4: derived from LLVM BB %entry -; CHECK: Successors according to CFG: BB#1(10) BB#5(4) +; CHECK: Successors according to CFG: BB#1(10) BB#5(11) ; CHECK: BB#5: derived from LLVM BB %entry ; CHECK: Successors according to CFG: BB#1(4) BB#3(7) Index: llvm/trunk/test/CodeGen/Thumb2/v8_IT_3.ll =================================================================== --- llvm/trunk/test/CodeGen/Thumb2/v8_IT_3.ll +++ llvm/trunk/test/CodeGen/Thumb2/v8_IT_3.ll @@ -35,7 +35,7 @@ br i1 %tmp4, label %bb1, label %bb8 bb1: -; CHECK: %bb6 +; CHECK: %entry ; CHECK: it eq ; CHECK-NEXT: ldreq ; CHECK-NEXT: it eq @@ -54,8 +54,9 @@ bb4: ; CHECK-PIC: cmp ; CHECK-PIC: cmp +; CHECK-PIC: cmp ; CHECK-PIC-NEXT: bne -; CHECK-PIC-NEXT: %bb4 +; CHECK-PIC: %bb6 ; CHECK-PIC-NEXT: movs ; CHECK-PIC-NEXT: add ; CHECK-PIC-NEXT: pop Index: llvm/trunk/test/CodeGen/Thumb2/v8_IT_5.ll =================================================================== --- llvm/trunk/test/CodeGen/Thumb2/v8_IT_5.ll +++ llvm/trunk/test/CodeGen/Thumb2/v8_IT_5.ll @@ -7,9 +7,11 @@ ; CHECK-NEXT: %if.else163 ; CHECK-NEXT: mov.w ; CHECK-NEXT: b +; CHECK: [[JUMPTARGET]]:{{.*}}%if.else173 +; CHECK-NEXT: mov.w +; CHECK-NEXT: pop ; CHECK-NEXT: %if.else145 ; CHECK-NEXT: mov.w -; CHECK: [[JUMPTARGET]]:{{.*}}%if.else173 %struct.hc = type { i32, i32, i32, i32 } Index: llvm/trunk/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll +++ llvm/trunk/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll @@ -6,8 +6,8 @@ define i32 @test(i32 %argc, i8** %argv) nounwind { entry: ; CHECK: cmpl $2 -; CHECK-NEXT: jne -; CHECK-NEXT: %bb2 +; CHECK-NEXT: je +; CHECK-NEXT: %entry switch i32 %argc, label %UnifiedReturnBlock [ i32 1, label %bb Index: llvm/trunk/test/CodeGen/X86/switch-bt.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/switch-bt.ll +++ llvm/trunk/test/CodeGen/X86/switch-bt.ll @@ -5,7 +5,7 @@ ; CHECK: movabsq $2305843009482129440, %r ; CHECK-NEXT: btq %rax, %r -; CHECK-NEXT: jae +; CHECK-NEXT: jb ; CHECK: movl $671088640, %e ; CHECK-NEXT: btq %rax, %r ; CHECK-NEXT: jae @@ -145,13 +145,13 @@ ; CHECK: cmpl $10 ; CHECK: je ; CHECK: cmpl $20 +; CHECK: je +; CHECK: cmpl $30 ; CHECK: jne ; CHECK: cmpl $40 ; CHECK: je ; CHECK: cmpl $50 -; CHECK: jne -; CHECK: cmpl $30 -; CHECK: jne +; CHECK: je ; CHECK: cmpl $60 ; CHECK: jne } Index: llvm/trunk/test/CodeGen/X86/switch-edge-weight.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/switch-edge-weight.ll +++ llvm/trunk/test/CodeGen/X86/switch-edge-weight.ll @@ -1,19 +1,20 @@ ; RUN: llc -march=x86-64 -print-machineinstrs=expand-isel-pseudos %s -o /dev/null 2>&1 | FileCheck %s +declare void @foo(i32) ; CHECK-LABEL: test define void @test(i32 %x) nounwind { entry: switch i32 %x, label %sw.default [ - i32 54, label %sw.bb - i32 55, label %sw.bb - i32 56, label %sw.bb - i32 58, label %sw.bb - i32 67, label %sw.bb - i32 68, label %sw.bb - i32 134, label %sw.bb - i32 140, label %sw.bb + i32 1, label %sw.bb + i32 155, label %sw.bb + i32 156, label %sw.bb + i32 157, label %sw.bb + i32 158, label %sw.bb + i32 159, label %sw.bb + i32 1134, label %sw.bb + i32 1140, label %sw.bb ], !prof !1 sw.bb: @@ -30,12 +31,199 @@ ; Check if weights are correctly assigned to edges generated from switch ; statement. ; +; CHECK: BB#0: +; BB#0 to BB#4: [0, 1133] (65 = 60 + 5) +; BB#0 to BB#5: [1134, UINT32_MAX] (25 = 20 + 5) +; CHECK: Successors according to CFG: BB#4(65) BB#5(25) +; ; CHECK: BB#4: -; CHECK: Successors according to CFG: BB#1(10) BB#6(10) +; BB#4 to BB#1: [155, 159] (50) +; BB#4 to BB#5: [0, 1133] - [155, 159] (15 = 10 + 5) +; CHECK: Successors according to CFG: BB#1(50) BB#7(15) +; +; CHECK: BB#5: +; BB#5 to BB#1: {1140} (10) +; BB#5 to BB#6: [1134, UINT32_MAX] - {1140} (15 = 10 + 5) +; CHECK: Successors according to CFG: BB#1(10) BB#6(15) +; ; CHECK: BB#6: -; CHECK: Successors according to CFG: BB#1(10) BB#2(10) +; BB#6 to BB#1: {1134} (10) +; BB#6 to BB#2: [1134, UINT32_MAX] - {1134, 1140} (5) +; CHECK: Successors according to CFG: BB#1(10) BB#2(5) } -declare void @foo(i32) +; CHECK-LABEL: test2 + +define void @test2(i32 %x) nounwind { +entry: + +; In this switch statement, there is an edge from jump table to default +; statement. + + switch i32 %x, label %sw.default [ + i32 1, label %sw.bb + i32 10, label %sw.bb2 + i32 11, label %sw.bb3 + i32 12, label %sw.bb4 + i32 13, label %sw.bb5 + i32 14, label %sw.bb5 + ], !prof !3 + +sw.bb: + call void @foo(i32 0) + br label %sw.epilog + +sw.bb2: + call void @foo(i32 2) + br label %sw.epilog + +sw.bb3: + call void @foo(i32 3) + br label %sw.epilog + +sw.bb4: + call void @foo(i32 4) + br label %sw.epilog + +sw.bb5: + call void @foo(i32 5) + br label %sw.epilog + +sw.default: + call void @foo(i32 1) + br label %sw.epilog + +sw.epilog: + ret void + +; Check if weights are correctly assigned to edges generated from switch +; statement. +; +; CHECK: BB#0: +; BB#0 to BB#6: {0} + [15, UINT32_MAX] (5) +; BB#0 to BB#8: [1, 14] (jump table) (65 = 60 + 5) +; CHECK: Successors according to CFG: BB#6(5) BB#8(65) +; +; CHECK: BB#8: +; BB#8 to BB#1: {1} (10) +; BB#8 to BB#6: [2, 9] (5) +; BB#8 to BB#2: {10} (10) +; BB#8 to BB#3: {11} (10) +; BB#8 to BB#4: {12} (10) +; BB#8 to BB#5: {13, 14} (20) +; CHECK: Successors according to CFG: BB#1(10) BB#6(5) BB#2(10) BB#3(10) BB#4(10) BB#5(20) +} + +; CHECK-LABEL: test3 + +define void @test3(i32 %x) nounwind { +entry: + +; In this switch statement, there is no edge from jump table to default +; statement. + + switch i32 %x, label %sw.default [ + i32 10, label %sw.bb + i32 11, label %sw.bb2 + i32 12, label %sw.bb3 + i32 13, label %sw.bb4 + i32 14, label %sw.bb5 + ], !prof !2 + +sw.bb: + call void @foo(i32 0) + br label %sw.epilog + +sw.bb2: + call void @foo(i32 2) + br label %sw.epilog + +sw.bb3: + call void @foo(i32 3) + br label %sw.epilog + +sw.bb4: + call void @foo(i32 4) + br label %sw.epilog + +sw.bb5: + call void @foo(i32 5) + br label %sw.epilog + +sw.default: + call void @foo(i32 1) + br label %sw.epilog + +sw.epilog: + ret void + +; Check if weights are correctly assigned to edges generated from switch +; statement. +; +; CHECK: BB#0: +; BB#0 to BB#6: [0, 9] + [15, UINT32_MAX] {10} +; BB#0 to BB#8: [10, 14] (jump table) (50) +; CHECK: Successors according to CFG: BB#6(10) BB#8(50) +; +; CHECK: BB#8: +; BB#8 to BB#1: {10} (10) +; BB#8 to BB#2: {11} (10) +; BB#8 to BB#3: {12} (10) +; BB#8 to BB#4: {13} (10) +; BB#8 to BB#5: {14} (10) +; CHECK: Successors according to CFG: BB#1(10) BB#2(10) BB#3(10) BB#4(10) BB#5(10) +} + +; CHECK-LABEL: test4 + +define void @test4(i32 %x) nounwind { +entry: + +; In this switch statement, there is no edge from bit test to default basic +; block. + + switch i32 %x, label %sw.default [ + i32 1, label %sw.bb + i32 111, label %sw.bb2 + i32 112, label %sw.bb3 + i32 113, label %sw.bb3 + i32 114, label %sw.bb2 + i32 115, label %sw.bb2 + ], !prof !3 + +sw.bb: + call void @foo(i32 0) + br label %sw.epilog + +sw.bb2: + call void @foo(i32 2) + br label %sw.epilog + +sw.bb3: + call void @foo(i32 3) + br label %sw.epilog + +sw.default: + call void @foo(i32 1) + br label %sw.epilog + +sw.epilog: + ret void + +; Check if weights are correctly assigned to edges generated from switch +; statement. +; +; CHECK: BB#0: +; BB#0 to BB#6: [0, 110] + [116, UINT32_MAX] (20) +; BB#0 to BB#7: [111, 115] (bit test) (50) +; CHECK: Successors according to CFG: BB#6(20) BB#7(50) +; +; CHECK: BB#7: +; BB#7 to BB#2: {111, 114, 115} (30) +; BB#7 to BB#3: {112, 113} (20) +; CHECK: Successors according to CFG: BB#2(30) BB#3(20) +} !1 = !{!"branch_weights", i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10} +!2 = !{!"branch_weights", i32 10, i32 10, i32 10, i32 10, i32 10, i32 10} +!3 = !{!"branch_weights", i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10} Index: llvm/trunk/test/CodeGen/X86/switch-order-weight.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/switch-order-weight.ll +++ llvm/trunk/test/CodeGen/X86/switch-order-weight.ll @@ -13,8 +13,8 @@ ; CHECK-LABEL: test1: ; CHECK-NOT: unr ; CHECK: cmpl $10 -; CHECK: bar ; CHECK: cmpl $20 +; CHECK: bar if.then: tail call void @unr(i32 23) noreturn nounwind Index: llvm/trunk/test/CodeGen/X86/switch.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/switch.ll +++ llvm/trunk/test/CodeGen/X86/switch.ll @@ -51,7 +51,7 @@ ; CHECK-LABEL: simple_ranges ; CHECK: leal -100 ; CHECK: cmpl $4 -; CHECK: jae +; CHECK: jb ; CHECK: cmpl $3 ; CHECK: ja