diff --git a/llvm/test/CodeGen/X86/switch-phi-const.ll b/llvm/test/CodeGen/X86/switch-phi-const.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/switch-phi-const.ll @@ -0,0 +1,186 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc %s -o - -mtriple=x86_64-- | FileCheck %s +@g = global i32 0 +@effect = global i32 0 + +define void @switch_phi_const(i32 %x) { +; CHECK-LABEL: switch_phi_const: +; CHECK: # %bb.0: # %bb0 +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: decl %edi +; CHECK-NEXT: cmpl $54, %edi +; CHECK-NEXT: ja .LBB0_8 +; CHECK-NEXT: # %bb.1: # %bb0 +; CHECK-NEXT: movl $42, %eax +; CHECK-NEXT: movl $13, %edx +; CHECK-NEXT: movl $5, %esi +; CHECK-NEXT: movl $1, %ecx +; CHECK-NEXT: jmpq *.LJTI0_0(,%rdi,8) +; CHECK-NEXT: .LBB0_2: # %case_7 +; CHECK-NEXT: movq g@GOTPCREL(%rip), %rax +; CHECK-NEXT: movl (%rax), %ecx +; CHECK-NEXT: movq effect@GOTPCREL(%rip), %rax +; CHECK-NEXT: movl $7, (%rax) +; CHECK-NEXT: .LBB0_3: # %case_1_loop +; CHECK-NEXT: movq effect@GOTPCREL(%rip), %rax +; CHECK-NEXT: movl $1, (%rax) +; CHECK-NEXT: movl %ecx, %esi +; CHECK-NEXT: .LBB0_4: # %case_5 +; CHECK-NEXT: movq effect@GOTPCREL(%rip), %rax +; CHECK-NEXT: movl $5, (%rax) +; CHECK-NEXT: movl %esi, %edx +; CHECK-NEXT: .LBB0_5: # %case_13 +; CHECK-NEXT: movq effect@GOTPCREL(%rip), %rax +; CHECK-NEXT: movl $13, (%rax) +; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: .LBB0_6: # %case_42 +; CHECK-NEXT: movq effect@GOTPCREL(%rip), %rcx +; CHECK-NEXT: movl %eax, (%rcx) +; CHECK-NEXT: movl $55, %eax +; CHECK-NEXT: .LBB0_7: # %case_55 +; CHECK-NEXT: movq effect@GOTPCREL(%rip), %rcx +; CHECK-NEXT: movl %eax, (%rcx) +; CHECK-NEXT: .LBB0_8: # %default +; CHECK-NEXT: retq +bb0: + switch i32 %x, label %default [ + i32 1, label %case_1_loop + i32 5, label %case_5 + i32 7, label %case_7 + i32 13, label %case_13 + i32 42, label %case_42 + i32 55, label %case_55 + ] + +case_1_loop: + ; We should replace 1 with %x + %x0 = phi i32 [ 1, %bb0 ], [ %x5, %case_7 ] + store i32 1, i32* @effect, align 4 + br label %case_5 + +case_5: + ; We should replace 5 with %x + %x1 = phi i32 [ 5, %bb0 ], [ %x0, %case_1_loop ] + store i32 5, i32* @effect, align 4 + br label %case_13 + +case_13: + ; We should replace 13 with %x + %x2 = phi i32 [ 13, %bb0 ], [ %x1, %case_5 ] + store i32 13, i32* @effect, align 4 + br label %case_42 + +case_42: + ; We should replace 42 with %x + %x3 = phi i32 [ 42, %bb0 ], [ %x2, %case_13 ] + store i32 %x3, i32* @effect, align 4 + br label %case_55 + +case_55: + ; We must not replace any of the PHI arguments! + %x4 = phi i32 [ 42, %bb0 ], [ 55, %case_42 ] + store i32 %x4, i32* @effect, align 4 + br label %default + +case_7: + %x5 = load i32, i32* @g, align 4 + store i32 7, i32* @effect, align 4 + br label %case_1_loop + +default: + ret void +} + +@g64 = global i64 0 +@effect64 = global i64 0 + +define void @switch_trunc_phi_const(i32 %x) { +; CHECK-LABEL: switch_trunc_phi_const: +; CHECK: # %bb.0: # %bb0 +; CHECK-NEXT: movl $3895, %r8d # imm = 0xF37 +; CHECK-NEXT: movl $42, %esi +; CHECK-NEXT: movl $13, %edx +; CHECK-NEXT: movl $5, %eax +; CHECK-NEXT: movl $1, %ecx +; CHECK-NEXT: decb %dil +; CHECK-NEXT: movzbl %dil, %edi +; CHECK-NEXT: cmpb $54, %dil +; CHECK-NEXT: ja .LBB1_8 +; CHECK-NEXT: # %bb.1: # %bb0 +; CHECK-NEXT: jmpq *.LJTI1_0(,%rdi,8) +; CHECK-NEXT: .LBB1_8: # %default +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB1_2: # %case_1_loop +; CHECK-NEXT: movq effect64@GOTPCREL(%rip), %rax +; CHECK-NEXT: movq $1, (%rax) +; CHECK-NEXT: movq %rcx, %rax +; CHECK-NEXT: .LBB1_3: # %case_5 +; CHECK-NEXT: movq effect64@GOTPCREL(%rip), %rcx +; CHECK-NEXT: movq $5, (%rcx) +; CHECK-NEXT: movq %rax, %rdx +; CHECK-NEXT: .LBB1_4: # %case_13 +; CHECK-NEXT: movq effect64@GOTPCREL(%rip), %rax +; CHECK-NEXT: movq $13, (%rax) +; CHECK-NEXT: movq %rdx, %rsi +; CHECK-NEXT: .LBB1_5: # %case_42 +; CHECK-NEXT: movq effect64@GOTPCREL(%rip), %rax +; CHECK-NEXT: movq %rsi, (%rax) +; CHECK-NEXT: movl $55, %r8d +; CHECK-NEXT: .LBB1_6: # %case_55 +; CHECK-NEXT: movq effect64@GOTPCREL(%rip), %rax +; CHECK-NEXT: movq %r8, (%rax) +; CHECK-NEXT: .LBB1_7: # %case_7 +; CHECK-NEXT: movq g64@GOTPCREL(%rip), %rax +; CHECK-NEXT: movq (%rax), %rcx +; CHECK-NEXT: movq effect64@GOTPCREL(%rip), %rax +; CHECK-NEXT: movq $7, (%rax) +; CHECK-NEXT: jmp .LBB1_2 +bb0: + %x_trunc = trunc i32 %x to i8 + switch i8 %x_trunc, label %default [ + i8 1, label %case_1_loop + i8 5, label %case_5 + i8 7, label %case_7 + i8 13, label %case_13 + i8 42, label %case_42 + i8 55, label %case_55 + ] + +case_1_loop: + ; We should replace 1 with %x + %x0 = phi i64 [ 1, %bb0 ], [ %x5, %case_7 ] + store i64 1, i64* @effect64, align 4 + br label %case_5 + +case_5: + ; We should replace 5 with %x + %x1 = phi i64 [ 5, %bb0 ], [ %x0, %case_1_loop ] + store i64 5, i64* @effect64, align 4 + br label %case_13 + +case_13: + ; We should replace 13 with %x + %x2 = phi i64 [ 13, %bb0 ], [ %x1, %case_5 ] + store i64 13, i64* @effect64, align 4 + br label %case_42 + +case_42: + ; We should replace 42 with %x + %x3 = phi i64 [ 42, %bb0 ], [ %x2, %case_13 ] + store i64 %x3, i64* @effect64, align 4 + br label %case_55 + +case_55: + ; We must not replace any of the PHI arguments! (3898 == 0xf00 + 55) + %x4 = phi i64 [ 3895, %bb0 ], [ 55, %case_42 ] + store i64 %x4, i64* @effect64, align 4 + br label %case_7 + +case_7: + %x5 = load i64, i64* @g64, align 4 + store i64 7, i64* @effect64, align 4 + br label %case_1_loop + +default: + ret void +} diff --git a/llvm/test/CodeGen/X86/switch.ll b/llvm/test/CodeGen/X86/switch.ll --- a/llvm/test/CodeGen/X86/switch.ll +++ b/llvm/test/CodeGen/X86/switch.ll @@ -1,9 +1,57 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=x86_64-linux-gnu %s -o - -jump-table-density=40 -switch-peel-threshold=101 -verify-machineinstrs | FileCheck %s ; RUN: llc -mtriple=x86_64-linux-gnu %s -o - -O0 -jump-table-density=40 -verify-machineinstrs | FileCheck --check-prefix=NOOPT %s declare void @g(i32) +; Should be lowered as a jump table, both with and without optimization. define void @basic(i32 %x) { +; CHECK-LABEL: basic: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: decl %edi +; CHECK-NEXT: cmpl $4, %edi +; CHECK-NEXT: ja .LBB0_4 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: jmpq *.LJTI0_0(,%rdi,8) +; CHECK-NEXT: .LBB0_3: # %bb2 +; CHECK-NEXT: movl $1, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB0_4: # %return +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB0_2: # %bb0 +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; +; NOOPT-LABEL: basic: +; NOOPT: # %bb.0: # %entry +; NOOPT-NEXT: pushq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 16 +; NOOPT-NEXT: movl %edi, %eax +; NOOPT-NEXT: decl %eax +; NOOPT-NEXT: movl %eax, %ecx +; NOOPT-NEXT: movq %rcx, (%rsp) # 8-byte Spill +; NOOPT-NEXT: subl $4, %eax +; NOOPT-NEXT: ja .LBB0_4 +; NOOPT-NEXT: # %bb.5: # %entry +; NOOPT-NEXT: movq (%rsp), %rax # 8-byte Reload +; NOOPT-NEXT: movq .LJTI0_0(,%rax,8), %rax +; NOOPT-NEXT: jmpq *%rax +; NOOPT-NEXT: .LBB0_1: # %bb0 +; NOOPT-NEXT: xorl %edi, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB0_4 +; NOOPT-NEXT: .LBB0_2: # %bb1 +; NOOPT-NEXT: movl $1, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB0_4 +; NOOPT-NEXT: .LBB0_3: # %bb2 +; NOOPT-NEXT: movl $1, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: .LBB0_4: # %return +; NOOPT-NEXT: popq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 8 +; NOOPT-NEXT: retq entry: switch i32 %x, label %return [ i32 3, label %bb0 @@ -15,23 +63,73 @@ bb1: tail call void @g(i32 1) br label %return bb2: tail call void @g(i32 1) br label %return return: ret void - -; Lowered as a jump table, both with and without optimization. -; CHECK-LABEL: basic -; CHECK: decl -; CHECK: cmpl $4 -; CHECK: ja -; CHECK: jmpq *.LJTI -; NOOPT-LABEL: basic -; NOOPT: decl -; NOOPT: subl $4 -; NOOPT: ja -; NOOPT: movq .LJTI -; NOOPT: jmpq } ; Should never be lowered as a jump table because of the attribute define void @basic_nojumptable(i32 %x) "no-jump-tables"="true" { +; CHECK-LABEL: basic_nojumptable: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpl $3, %edi +; CHECK-NEXT: jg .LBB1_4 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: cmpl $1, %edi +; CHECK-NEXT: je .LBB1_7 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: cmpl $3, %edi +; CHECK-NEXT: jne .LBB1_6 +; CHECK-NEXT: # %bb.3: # %bb0 +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB1_4: # %entry +; CHECK-NEXT: cmpl $4, %edi +; CHECK-NEXT: je .LBB1_7 +; CHECK-NEXT: # %bb.5: # %entry +; CHECK-NEXT: cmpl $5, %edi +; CHECK-NEXT: jne .LBB1_6 +; CHECK-NEXT: .LBB1_7: # %bb2 +; CHECK-NEXT: movl $1, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB1_6: # %return +; CHECK-NEXT: retq +; +; NOOPT-LABEL: basic_nojumptable: +; NOOPT: # %bb.0: # %entry +; NOOPT-NEXT: pushq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 16 +; NOOPT-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; NOOPT-NEXT: subl $1, %edi +; NOOPT-NEXT: je .LBB1_2 +; NOOPT-NEXT: jmp .LBB1_5 +; NOOPT-NEXT: .LBB1_5: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $3, %eax +; NOOPT-NEXT: je .LBB1_1 +; NOOPT-NEXT: jmp .LBB1_6 +; NOOPT-NEXT: .LBB1_6: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $4, %eax +; NOOPT-NEXT: je .LBB1_2 +; NOOPT-NEXT: jmp .LBB1_7 +; NOOPT-NEXT: .LBB1_7: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $5, %eax +; NOOPT-NEXT: je .LBB1_3 +; NOOPT-NEXT: jmp .LBB1_4 +; NOOPT-NEXT: .LBB1_1: # %bb0 +; NOOPT-NEXT: xorl %edi, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB1_4 +; NOOPT-NEXT: .LBB1_2: # %bb1 +; NOOPT-NEXT: movl $1, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB1_4 +; NOOPT-NEXT: .LBB1_3: # %bb2 +; NOOPT-NEXT: movl $1, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: .LBB1_4: # %return +; NOOPT-NEXT: popq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 8 +; NOOPT-NEXT: retq entry: switch i32 %x, label %return [ i32 3, label %bb0 @@ -43,14 +141,56 @@ bb1: tail call void @g(i32 1) br label %return bb2: tail call void @g(i32 1) br label %return return: ret void - -; Lowered as a jump table, both with and without optimization. -; CHECK-LABEL: basic_nojumptable -; CHECK-NOT: jmpq *.LJTI } ; Should be lowered as a jump table because of the attribute define void @basic_nojumptable_false(i32 %x) "no-jump-tables"="false" { +; CHECK-LABEL: basic_nojumptable_false: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: decl %edi +; CHECK-NEXT: cmpl $4, %edi +; CHECK-NEXT: ja .LBB2_4 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: jmpq *.LJTI2_0(,%rdi,8) +; CHECK-NEXT: .LBB2_3: # %bb2 +; CHECK-NEXT: movl $1, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB2_4: # %return +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB2_2: # %bb0 +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; +; NOOPT-LABEL: basic_nojumptable_false: +; NOOPT: # %bb.0: # %entry +; NOOPT-NEXT: pushq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 16 +; NOOPT-NEXT: movl %edi, %eax +; NOOPT-NEXT: decl %eax +; NOOPT-NEXT: movl %eax, %ecx +; NOOPT-NEXT: movq %rcx, (%rsp) # 8-byte Spill +; NOOPT-NEXT: subl $4, %eax +; NOOPT-NEXT: ja .LBB2_4 +; NOOPT-NEXT: # %bb.5: # %entry +; NOOPT-NEXT: movq (%rsp), %rax # 8-byte Reload +; NOOPT-NEXT: movq .LJTI2_0(,%rax,8), %rax +; NOOPT-NEXT: jmpq *%rax +; NOOPT-NEXT: .LBB2_1: # %bb0 +; NOOPT-NEXT: xorl %edi, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB2_4 +; NOOPT-NEXT: .LBB2_2: # %bb1 +; NOOPT-NEXT: movl $1, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB2_4 +; NOOPT-NEXT: .LBB2_3: # %bb2 +; NOOPT-NEXT: movl $1, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: .LBB2_4: # %return +; NOOPT-NEXT: popq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 8 +; NOOPT-NEXT: retq entry: switch i32 %x, label %return [ i32 3, label %bb0 @@ -62,17 +202,55 @@ bb1: tail call void @g(i32 1) br label %return bb2: tail call void @g(i32 1) br label %return return: ret void - -; Lowered as a jump table, both with and without optimization. -; CHECK-LABEL: basic_nojumptable_false -; CHECK: decl -; CHECK: cmpl $4 -; CHECK: ja -; CHECK: jmpq *.LJTI } +; Should be lowered to two range checks. +; We do this even at -O0, because it's cheap and makes codegen faster. define void @simple_ranges(i32 %x) { +; CHECK-LABEL: simple_ranges: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: leal -100(%rdi), %eax +; CHECK-NEXT: cmpl $4, %eax +; CHECK-NEXT: jb .LBB3_3 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: cmpl $3, %edi +; CHECK-NEXT: ja .LBB3_4 +; CHECK-NEXT: # %bb.2: # %bb0 +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB3_3: # %bb1 +; CHECK-NEXT: movl $1, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB3_4: # %return +; CHECK-NEXT: retq +; +; NOOPT-LABEL: simple_ranges: +; NOOPT: # %bb.0: # %entry +; NOOPT-NEXT: pushq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 16 +; NOOPT-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; NOOPT-NEXT: subl $4, %edi +; NOOPT-NEXT: jb .LBB3_1 +; NOOPT-NEXT: jmp .LBB3_4 +; NOOPT-NEXT: .LBB3_4: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: addl $-100, %eax +; NOOPT-NEXT: subl $4, %eax +; NOOPT-NEXT: jb .LBB3_2 +; NOOPT-NEXT: jmp .LBB3_3 +; NOOPT-NEXT: .LBB3_1: # %bb0 +; NOOPT-NEXT: xorl %edi, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB3_3 +; NOOPT-NEXT: .LBB3_2: # %bb1 +; NOOPT-NEXT: movl $1, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: .LBB3_3: # %return +; NOOPT-NEXT: popq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 8 +; NOOPT-NEXT: retq entry: switch i32 %x, label %return [ i32 0, label %bb0 @@ -87,28 +265,73 @@ bb0: tail call void @g(i32 0) br label %return bb1: tail call void @g(i32 1) br label %return return: ret void - - - -; Should be lowered to two range checks. -; CHECK-LABEL: simple_ranges -; CHECK: leal -100 -; CHECK: cmpl $4 -; CHECK: jb -; CHECK: cmpl $3 -; CHECK: ja - -; We do this even at -O0, because it's cheap and makes codegen faster. -; NOOPT-LABEL: simple_ranges -; NOOPT: subl $4 -; NOOPT: jb -; NOOPT: addl $-100 -; NOOPT: subl $4 -; NOOPT: jb } +; Cases 0-5 could be lowered with two bit tests, +; but with 6-8, the whole switch is suitable for a jump table. define void @jt_is_better(i32 %x) { +; CHECK-LABEL: jt_is_better: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpl $8, %edi +; CHECK-NEXT: ja .LBB4_7 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: jmpq *.LJTI4_0(,%rax,8) +; CHECK-NEXT: .LBB4_2: # %bb0 +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB4_3: # %bb1 +; CHECK-NEXT: movl $1, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB4_7: # %return +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB4_4: # %bb2 +; CHECK-NEXT: movl $2, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB4_5: # %bb3 +; CHECK-NEXT: movl $3, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB4_6: # %bb4 +; CHECK-NEXT: movl $4, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; +; NOOPT-LABEL: jt_is_better: +; NOOPT: # %bb.0: # %entry +; NOOPT-NEXT: pushq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 16 +; NOOPT-NEXT: movl %edi, %eax +; NOOPT-NEXT: # kill: def $rax killed $eax +; NOOPT-NEXT: movq %rax, (%rsp) # 8-byte Spill +; NOOPT-NEXT: subl $8, %edi +; NOOPT-NEXT: ja .LBB4_6 +; NOOPT-NEXT: # %bb.7: # %entry +; NOOPT-NEXT: movq (%rsp), %rax # 8-byte Reload +; NOOPT-NEXT: movq .LJTI4_0(,%rax,8), %rax +; NOOPT-NEXT: jmpq *%rax +; NOOPT-NEXT: .LBB4_1: # %bb0 +; NOOPT-NEXT: xorl %edi, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB4_6 +; NOOPT-NEXT: .LBB4_2: # %bb1 +; NOOPT-NEXT: movl $1, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB4_6 +; NOOPT-NEXT: .LBB4_3: # %bb2 +; NOOPT-NEXT: movl $2, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB4_6 +; NOOPT-NEXT: .LBB4_4: # %bb3 +; NOOPT-NEXT: movl $3, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB4_6 +; NOOPT-NEXT: .LBB4_5: # %bb4 +; NOOPT-NEXT: movl $4, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: .LBB4_6: # %return +; NOOPT-NEXT: popq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 8 +; NOOPT-NEXT: retq entry: switch i32 %x, label %return [ i32 0, label %bb0 @@ -128,17 +351,101 @@ bb3: tail call void @g(i32 3) br label %return bb4: tail call void @g(i32 4) br label %return return: ret void - -; Cases 0-5 could be lowered with two bit tests, -; but with 6-8, the whole switch is suitable for a jump table. -; CHECK-LABEL: jt_is_better -; CHECK: cmpl $8 -; CHECK: ja -; CHECK: jmpq *.LJTI } +; This could be lowered as a jump table, but bit tests is more efficient. +; The bit test on 2,5,8 is unnecessary as all cases cover the rage [0, 8]. +; The range check guarantees that cases other than 0,3,6 and 1,4,7 must be +; in 2,5,8. define void @bt_is_better(i32 %x) { +; CHECK-LABEL: bt_is_better: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpl $8, %edi +; CHECK-NEXT: ja .LBB5_4 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: movl $73, %eax +; CHECK-NEXT: btl %edi, %eax +; CHECK-NEXT: jb .LBB5_5 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: movl $146, %eax +; CHECK-NEXT: btl %edi, %eax +; CHECK-NEXT: jae .LBB5_3 +; CHECK-NEXT: # %bb.6: # %bb1 +; CHECK-NEXT: movl $1, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB5_5: # %bb0 +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB5_3: # %bb2 +; CHECK-NEXT: movl $2, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB5_4: # %return +; CHECK-NEXT: retq +; +; NOOPT-LABEL: bt_is_better: +; NOOPT: # %bb.0: # %entry +; NOOPT-NEXT: pushq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 16 +; NOOPT-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; NOOPT-NEXT: testl %edi, %edi +; NOOPT-NEXT: je .LBB5_1 +; NOOPT-NEXT: jmp .LBB5_5 +; NOOPT-NEXT: .LBB5_5: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $1, %eax +; NOOPT-NEXT: je .LBB5_2 +; NOOPT-NEXT: jmp .LBB5_6 +; NOOPT-NEXT: .LBB5_6: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $2, %eax +; NOOPT-NEXT: je .LBB5_3 +; NOOPT-NEXT: jmp .LBB5_7 +; NOOPT-NEXT: .LBB5_7: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $3, %eax +; NOOPT-NEXT: je .LBB5_1 +; NOOPT-NEXT: jmp .LBB5_8 +; NOOPT-NEXT: .LBB5_8: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $4, %eax +; NOOPT-NEXT: je .LBB5_2 +; NOOPT-NEXT: jmp .LBB5_9 +; NOOPT-NEXT: .LBB5_9: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $5, %eax +; NOOPT-NEXT: je .LBB5_3 +; NOOPT-NEXT: jmp .LBB5_10 +; NOOPT-NEXT: .LBB5_10: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $6, %eax +; NOOPT-NEXT: je .LBB5_1 +; NOOPT-NEXT: jmp .LBB5_11 +; NOOPT-NEXT: .LBB5_11: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $7, %eax +; NOOPT-NEXT: je .LBB5_2 +; NOOPT-NEXT: jmp .LBB5_12 +; NOOPT-NEXT: .LBB5_12: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $8, %eax +; NOOPT-NEXT: je .LBB5_3 +; NOOPT-NEXT: jmp .LBB5_4 +; NOOPT-NEXT: .LBB5_1: # %bb0 +; NOOPT-NEXT: xorl %edi, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB5_4 +; NOOPT-NEXT: .LBB5_2: # %bb1 +; NOOPT-NEXT: movl $1, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB5_4 +; NOOPT-NEXT: .LBB5_3: # %bb2 +; NOOPT-NEXT: movl $2, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: .LBB5_4: # %return +; NOOPT-NEXT: popq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 8 +; NOOPT-NEXT: retq entry: switch i32 %x, label %return [ i32 0, label %bb0 @@ -155,25 +462,98 @@ bb1: tail call void @g(i32 1) br label %return bb2: tail call void @g(i32 2) br label %return return: ret void - -; This could be lowered as a jump table, but bit tests is more efficient. -; CHECK-LABEL: bt_is_better -; The bit test on 2,5,8 is unnecessary as all cases cover the rage [0, 8]. -; The range check guarantees that cases other than 0,3,6 and 1,4,7 must be -; in 2,5,8. -; -; 73 = 2^0 + 2^3 + 2^6 -; CHECK: movl $73 -; CHECK: btl -; 146 = 2^1 + 2^4 + 2^7 -; CHECK: movl $146 -; CHECK: btl -; 292 = 2^2 + 2^5 + 2^8 -; CHECK-NOT: movl $292 -; CHECK-NOT: btl } +; This will also be lowered as bit test, but as the range [0,8] is not fully +; covered (5 missing), the default statement can be jumped to and we end up +; with one more branch. define void @bt_is_better2(i32 %x) { +; CHECK-LABEL: bt_is_better2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpl $8, %edi +; CHECK-NEXT: ja .LBB6_7 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: movl $73, %eax +; CHECK-NEXT: btl %edi, %eax +; CHECK-NEXT: jb .LBB6_5 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: movl $146, %eax +; CHECK-NEXT: btl %edi, %eax +; CHECK-NEXT: jae .LBB6_3 +; CHECK-NEXT: # %bb.6: # %bb1 +; CHECK-NEXT: movl $1, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB6_5: # %bb0 +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB6_3: # %entry +; CHECK-NEXT: movl $260, %eax # imm = 0x104 +; CHECK-NEXT: btl %edi, %eax +; CHECK-NEXT: jae .LBB6_7 +; CHECK-NEXT: # %bb.4: # %bb2 +; CHECK-NEXT: movl $2, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB6_7: # %return +; CHECK-NEXT: retq +; +; NOOPT-LABEL: bt_is_better2: +; NOOPT: # %bb.0: # %entry +; NOOPT-NEXT: pushq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 16 +; NOOPT-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; NOOPT-NEXT: testl %edi, %edi +; NOOPT-NEXT: je .LBB6_1 +; NOOPT-NEXT: jmp .LBB6_5 +; NOOPT-NEXT: .LBB6_5: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $1, %eax +; NOOPT-NEXT: je .LBB6_2 +; NOOPT-NEXT: jmp .LBB6_6 +; NOOPT-NEXT: .LBB6_6: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $2, %eax +; NOOPT-NEXT: je .LBB6_3 +; NOOPT-NEXT: jmp .LBB6_7 +; NOOPT-NEXT: .LBB6_7: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $3, %eax +; NOOPT-NEXT: je .LBB6_1 +; NOOPT-NEXT: jmp .LBB6_8 +; NOOPT-NEXT: .LBB6_8: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $4, %eax +; NOOPT-NEXT: je .LBB6_2 +; NOOPT-NEXT: jmp .LBB6_9 +; NOOPT-NEXT: .LBB6_9: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $6, %eax +; NOOPT-NEXT: je .LBB6_1 +; NOOPT-NEXT: jmp .LBB6_10 +; NOOPT-NEXT: .LBB6_10: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $7, %eax +; NOOPT-NEXT: je .LBB6_2 +; NOOPT-NEXT: jmp .LBB6_11 +; NOOPT-NEXT: .LBB6_11: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $8, %eax +; NOOPT-NEXT: je .LBB6_3 +; NOOPT-NEXT: jmp .LBB6_4 +; NOOPT-NEXT: .LBB6_1: # %bb0 +; NOOPT-NEXT: xorl %edi, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB6_4 +; NOOPT-NEXT: .LBB6_2: # %bb1 +; NOOPT-NEXT: movl $1, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB6_4 +; NOOPT-NEXT: .LBB6_3: # %bb2 +; NOOPT-NEXT: movl $2, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: .LBB6_4: # %return +; NOOPT-NEXT: popq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 8 +; NOOPT-NEXT: retq entry: switch i32 %x, label %return [ i32 0, label %bb0 @@ -189,24 +569,95 @@ bb1: tail call void @g(i32 1) br label %return bb2: tail call void @g(i32 2) br label %return return: ret void - -; This will also be lowered as bit test, but as the range [0,8] is not fully -; covered (5 missing), the default statement can be jumped to and we end up -; with one more branch. -; CHECK-LABEL: bt_is_better2 -; -; 73 = 2^0 + 2^3 + 2^6 -; CHECK: movl $73 -; CHECK: btl -; 146 = 2^1 + 2^4 + 2^7 -; CHECK: movl $146 -; CHECK: btl -; 260 = 2^2 + 2^8 -; CHECK: movl $260 -; CHECK: btl } define void @bt_is_better3(i32 %x) { +; CHECK-LABEL: bt_is_better3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpl $18, %edi +; CHECK-NEXT: ja .LBB7_7 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: movl $74752, %eax # imm = 0x12400 +; CHECK-NEXT: btl %edi, %eax +; CHECK-NEXT: jb .LBB7_5 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: movl $149504, %eax # imm = 0x24800 +; CHECK-NEXT: btl %edi, %eax +; CHECK-NEXT: jae .LBB7_3 +; CHECK-NEXT: # %bb.6: # %bb1 +; CHECK-NEXT: movl $1, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB7_5: # %bb0 +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB7_3: # %entry +; CHECK-NEXT: movl $266240, %eax # imm = 0x41000 +; CHECK-NEXT: btl %edi, %eax +; CHECK-NEXT: jae .LBB7_7 +; CHECK-NEXT: # %bb.4: # %bb2 +; CHECK-NEXT: movl $2, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB7_7: # %return +; CHECK-NEXT: retq +; +; NOOPT-LABEL: bt_is_better3: +; NOOPT: # %bb.0: # %entry +; NOOPT-NEXT: pushq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 16 +; NOOPT-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; NOOPT-NEXT: subl $10, %edi +; NOOPT-NEXT: je .LBB7_1 +; NOOPT-NEXT: jmp .LBB7_5 +; NOOPT-NEXT: .LBB7_5: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $11, %eax +; NOOPT-NEXT: je .LBB7_2 +; NOOPT-NEXT: jmp .LBB7_6 +; NOOPT-NEXT: .LBB7_6: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $12, %eax +; NOOPT-NEXT: je .LBB7_3 +; NOOPT-NEXT: jmp .LBB7_7 +; NOOPT-NEXT: .LBB7_7: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $13, %eax +; NOOPT-NEXT: je .LBB7_1 +; NOOPT-NEXT: jmp .LBB7_8 +; NOOPT-NEXT: .LBB7_8: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $14, %eax +; NOOPT-NEXT: je .LBB7_2 +; NOOPT-NEXT: jmp .LBB7_9 +; NOOPT-NEXT: .LBB7_9: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $16, %eax +; NOOPT-NEXT: je .LBB7_1 +; NOOPT-NEXT: jmp .LBB7_10 +; NOOPT-NEXT: .LBB7_10: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $17, %eax +; NOOPT-NEXT: je .LBB7_2 +; NOOPT-NEXT: jmp .LBB7_11 +; NOOPT-NEXT: .LBB7_11: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $18, %eax +; NOOPT-NEXT: je .LBB7_3 +; NOOPT-NEXT: jmp .LBB7_4 +; NOOPT-NEXT: .LBB7_1: # %bb0 +; NOOPT-NEXT: xorl %edi, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB7_4 +; NOOPT-NEXT: .LBB7_2: # %bb1 +; NOOPT-NEXT: movl $1, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB7_4 +; NOOPT-NEXT: .LBB7_3: # %bb2 +; NOOPT-NEXT: movl $2, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: .LBB7_4: # %return +; NOOPT-NEXT: popq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 8 +; NOOPT-NEXT: retq entry: switch i32 %x, label %return [ i32 10, label %bb0 @@ -227,21 +678,89 @@ ; [0, 8], as each value in the range [10, 18] can be represented by bits in a ; word. Then we still need a branch to jump to the default statement for the ; range [0, 10). -; CHECK-LABEL: bt_is_better3 -; ; 74752 = 2^10 + 2^13 + 2^16 -; CHECK: movl $74752 -; CHECK: btl ; 149504 = 2^11 + 2^14 + 2^17 -; CHECK: movl $149504 -; CHECK: btl ; 266240 = 2^12 + 2^15 + 2^18 -; CHECK: movl $266240 -; CHECK: btl } +; Should pivot around 400 for two subtrees of equal size. define void @optimal_pivot1(i32 %x) { +; CHECK-LABEL: optimal_pivot1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpl $399, %edi # imm = 0x18F +; CHECK-NEXT: jg .LBB8_5 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: cmpl $100, %edi +; CHECK-NEXT: je .LBB8_8 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: cmpl $200, %edi +; CHECK-NEXT: je .LBB8_9 +; CHECK-NEXT: # %bb.3: # %entry +; CHECK-NEXT: cmpl $300, %edi # imm = 0x12C +; CHECK-NEXT: je .LBB8_8 +; CHECK-NEXT: .LBB8_4: # %return +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB8_5: # %entry +; CHECK-NEXT: cmpl $400, %edi # imm = 0x190 +; CHECK-NEXT: je .LBB8_9 +; CHECK-NEXT: # %bb.6: # %entry +; CHECK-NEXT: cmpl $600, %edi # imm = 0x258 +; CHECK-NEXT: je .LBB8_9 +; CHECK-NEXT: # %bb.7: # %entry +; CHECK-NEXT: cmpl $500, %edi # imm = 0x1F4 +; CHECK-NEXT: jne .LBB8_4 +; CHECK-NEXT: .LBB8_8: # %bb0 +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB8_9: # %bb1 +; CHECK-NEXT: movl $1, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; +; NOOPT-LABEL: optimal_pivot1: +; NOOPT: # %bb.0: # %entry +; NOOPT-NEXT: pushq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 16 +; NOOPT-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; NOOPT-NEXT: subl $100, %edi +; NOOPT-NEXT: je .LBB8_1 +; NOOPT-NEXT: jmp .LBB8_4 +; NOOPT-NEXT: .LBB8_4: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $200, %eax +; NOOPT-NEXT: je .LBB8_2 +; NOOPT-NEXT: jmp .LBB8_5 +; NOOPT-NEXT: .LBB8_5: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $300, %eax # imm = 0x12C +; NOOPT-NEXT: je .LBB8_1 +; NOOPT-NEXT: jmp .LBB8_6 +; NOOPT-NEXT: .LBB8_6: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $400, %eax # imm = 0x190 +; NOOPT-NEXT: je .LBB8_2 +; NOOPT-NEXT: jmp .LBB8_7 +; NOOPT-NEXT: .LBB8_7: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $500, %eax # imm = 0x1F4 +; NOOPT-NEXT: je .LBB8_1 +; NOOPT-NEXT: jmp .LBB8_8 +; NOOPT-NEXT: .LBB8_8: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $600, %eax # imm = 0x258 +; NOOPT-NEXT: je .LBB8_2 +; NOOPT-NEXT: jmp .LBB8_3 +; NOOPT-NEXT: .LBB8_1: # %bb0 +; NOOPT-NEXT: xorl %edi, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB8_3 +; NOOPT-NEXT: .LBB8_2: # %bb1 +; NOOPT-NEXT: movl $1, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: .LBB8_3: # %return +; NOOPT-NEXT: popq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 8 +; NOOPT-NEXT: retq entry: switch i32 %x, label %return [ i32 100, label %bb0 @@ -255,15 +774,157 @@ bb0: tail call void @g(i32 0) br label %return bb1: tail call void @g(i32 1) br label %return return: ret void - -; Should pivot around 400 for two subtrees of equal size. -; CHECK-LABEL: optimal_pivot1 -; CHECK-NOT: cmpl -; CHECK: cmpl $399 } +; Should pivot around 300 for two subtrees with two jump tables each. define void @optimal_pivot2(i32 %x) { +; CHECK-LABEL: optimal_pivot2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: cmpl $299, %edi # imm = 0x12B +; CHECK-NEXT: jg .LBB9_4 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: leal -100(%rdi), %eax +; CHECK-NEXT: cmpl $3, %eax +; CHECK-NEXT: jbe .LBB9_12 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: addl $-200, %edi +; CHECK-NEXT: cmpl $3, %edi +; CHECK-NEXT: ja .LBB9_11 +; CHECK-NEXT: # %bb.3: # %entry +; CHECK-NEXT: jmpq *.LJTI9_1(,%rdi,8) +; CHECK-NEXT: .LBB9_4: # %entry +; CHECK-NEXT: leal -300(%rdi), %eax +; CHECK-NEXT: cmpl $3, %eax +; CHECK-NEXT: jbe .LBB9_13 +; CHECK-NEXT: # %bb.5: # %entry +; CHECK-NEXT: addl $-400, %edi # imm = 0xFE70 +; CHECK-NEXT: cmpl $3, %edi +; CHECK-NEXT: ja .LBB9_11 +; CHECK-NEXT: # %bb.6: # %entry +; CHECK-NEXT: jmpq *.LJTI9_3(,%rdi,8) +; CHECK-NEXT: .LBB9_12: # %entry +; CHECK-NEXT: jmpq *.LJTI9_0(,%rax,8) +; CHECK-NEXT: .LBB9_13: # %entry +; CHECK-NEXT: jmpq *.LJTI9_2(,%rax,8) +; CHECK-NEXT: .LBB9_7: # %bb0 +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB9_8: # %bb1 +; CHECK-NEXT: movl $1, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB9_9: # %bb2 +; CHECK-NEXT: movl $2, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB9_10: # %bb3 +; CHECK-NEXT: movl $3, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB9_11: # %return +; CHECK-NEXT: retq +; +; NOOPT-LABEL: optimal_pivot2: +; NOOPT: # %bb.0: # %entry +; NOOPT-NEXT: pushq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 16 +; NOOPT-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; NOOPT-NEXT: subl $100, %edi +; NOOPT-NEXT: je .LBB9_1 +; NOOPT-NEXT: jmp .LBB9_6 +; NOOPT-NEXT: .LBB9_6: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $101, %eax +; NOOPT-NEXT: je .LBB9_2 +; NOOPT-NEXT: jmp .LBB9_7 +; NOOPT-NEXT: .LBB9_7: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $102, %eax +; NOOPT-NEXT: je .LBB9_3 +; NOOPT-NEXT: jmp .LBB9_8 +; NOOPT-NEXT: .LBB9_8: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $103, %eax +; NOOPT-NEXT: je .LBB9_4 +; NOOPT-NEXT: jmp .LBB9_9 +; NOOPT-NEXT: .LBB9_9: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $200, %eax +; NOOPT-NEXT: je .LBB9_1 +; NOOPT-NEXT: jmp .LBB9_10 +; NOOPT-NEXT: .LBB9_10: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $201, %eax +; NOOPT-NEXT: je .LBB9_2 +; NOOPT-NEXT: jmp .LBB9_11 +; NOOPT-NEXT: .LBB9_11: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $202, %eax +; NOOPT-NEXT: je .LBB9_3 +; NOOPT-NEXT: jmp .LBB9_12 +; NOOPT-NEXT: .LBB9_12: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $203, %eax +; NOOPT-NEXT: je .LBB9_4 +; NOOPT-NEXT: jmp .LBB9_13 +; NOOPT-NEXT: .LBB9_13: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $300, %eax # imm = 0x12C +; NOOPT-NEXT: je .LBB9_1 +; NOOPT-NEXT: jmp .LBB9_14 +; NOOPT-NEXT: .LBB9_14: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $301, %eax # imm = 0x12D +; NOOPT-NEXT: je .LBB9_2 +; NOOPT-NEXT: jmp .LBB9_15 +; NOOPT-NEXT: .LBB9_15: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $302, %eax # imm = 0x12E +; NOOPT-NEXT: je .LBB9_3 +; NOOPT-NEXT: jmp .LBB9_16 +; NOOPT-NEXT: .LBB9_16: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $303, %eax # imm = 0x12F +; NOOPT-NEXT: je .LBB9_4 +; NOOPT-NEXT: jmp .LBB9_17 +; NOOPT-NEXT: .LBB9_17: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $400, %eax # imm = 0x190 +; NOOPT-NEXT: je .LBB9_1 +; NOOPT-NEXT: jmp .LBB9_18 +; NOOPT-NEXT: .LBB9_18: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $401, %eax # imm = 0x191 +; NOOPT-NEXT: je .LBB9_2 +; NOOPT-NEXT: jmp .LBB9_19 +; NOOPT-NEXT: .LBB9_19: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $402, %eax # imm = 0x192 +; NOOPT-NEXT: je .LBB9_3 +; NOOPT-NEXT: jmp .LBB9_20 +; NOOPT-NEXT: .LBB9_20: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $403, %eax # imm = 0x193 +; NOOPT-NEXT: je .LBB9_4 +; NOOPT-NEXT: jmp .LBB9_5 +; NOOPT-NEXT: .LBB9_1: # %bb0 +; NOOPT-NEXT: xorl %edi, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB9_5 +; NOOPT-NEXT: .LBB9_2: # %bb1 +; NOOPT-NEXT: movl $1, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB9_5 +; NOOPT-NEXT: .LBB9_3: # %bb2 +; NOOPT-NEXT: movl $2, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB9_5 +; NOOPT-NEXT: .LBB9_4: # %bb3 +; NOOPT-NEXT: movl $3, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: .LBB9_5: # %return +; NOOPT-NEXT: popq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 8 +; NOOPT-NEXT: retq entry: switch i32 %x, label %return [ i32 100, label %bb0 i32 101, label %bb1 i32 102, label %bb2 i32 103, label %bb3 @@ -277,19 +938,105 @@ bb2: tail call void @g(i32 2) br label %return bb3: tail call void @g(i32 3) br label %return return: ret void - -; Should pivot around 300 for two subtrees with two jump tables each. -; CHECK-LABEL: optimal_pivot2 -; CHECK-NOT: cmpl -; CHECK: cmpl $299 -; CHECK: jmpq *.LJTI -; CHECK: jmpq *.LJTI -; CHECK: jmpq *.LJTI -; CHECK: jmpq *.LJTI } +; Splitting in the largest gap (between 6 and 12) would yield suboptimal result. +; Expecting a jump table from 5 to 15. +; At -O0, we don't build jump tables for only parts of a switch. define void @optimal_jump_table1(i32 %x) { +; CHECK-LABEL: optimal_jump_table1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: leal -5(%rdi), %eax +; CHECK-NEXT: cmpl $10, %eax +; CHECK-NEXT: ja .LBB10_1 +; CHECK-NEXT: # %bb.9: # %entry +; CHECK-NEXT: jmpq *.LJTI10_0(,%rax,8) +; CHECK-NEXT: .LBB10_3: # %bb1 +; CHECK-NEXT: movl $1, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB10_1: # %entry +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: jne .LBB10_8 +; CHECK-NEXT: # %bb.2: # %bb0 +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB10_8: # %return +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB10_4: # %bb2 +; CHECK-NEXT: movl $2, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB10_5: # %bb3 +; CHECK-NEXT: movl $3, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB10_6: # %bb4 +; CHECK-NEXT: movl $4, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB10_7: # %bb5 +; CHECK-NEXT: movl $5, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; +; NOOPT-LABEL: optimal_jump_table1: +; NOOPT: # %bb.0: # %entry +; NOOPT-NEXT: pushq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 16 +; NOOPT-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; NOOPT-NEXT: testl %edi, %edi +; NOOPT-NEXT: je .LBB10_1 +; NOOPT-NEXT: jmp .LBB10_8 +; NOOPT-NEXT: .LBB10_8: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $5, %eax +; NOOPT-NEXT: je .LBB10_2 +; NOOPT-NEXT: jmp .LBB10_9 +; NOOPT-NEXT: .LBB10_9: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $6, %eax +; NOOPT-NEXT: je .LBB10_3 +; NOOPT-NEXT: jmp .LBB10_10 +; NOOPT-NEXT: .LBB10_10: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $12, %eax +; NOOPT-NEXT: je .LBB10_4 +; NOOPT-NEXT: jmp .LBB10_11 +; NOOPT-NEXT: .LBB10_11: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $13, %eax +; NOOPT-NEXT: je .LBB10_5 +; NOOPT-NEXT: jmp .LBB10_12 +; NOOPT-NEXT: .LBB10_12: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $15, %eax +; NOOPT-NEXT: je .LBB10_6 +; NOOPT-NEXT: jmp .LBB10_7 +; NOOPT-NEXT: .LBB10_1: # %bb0 +; NOOPT-NEXT: xorl %edi, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB10_7 +; NOOPT-NEXT: .LBB10_2: # %bb1 +; NOOPT-NEXT: movl $1, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB10_7 +; NOOPT-NEXT: .LBB10_3: # %bb2 +; NOOPT-NEXT: movl $2, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB10_7 +; NOOPT-NEXT: .LBB10_4: # %bb3 +; NOOPT-NEXT: movl $3, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB10_7 +; NOOPT-NEXT: .LBB10_5: # %bb4 +; NOOPT-NEXT: movl $4, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB10_7 +; NOOPT-NEXT: .LBB10_6: # %bb5 +; NOOPT-NEXT: movl $5, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: .LBB10_7: # %return +; NOOPT-NEXT: popq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 8 +; NOOPT-NEXT: retq entry: switch i32 %x, label %return [ i32 0, label %bb0 @@ -306,32 +1053,107 @@ bb4: tail call void @g(i32 4) br label %return bb5: tail call void @g(i32 5) br label %return return: ret void - -; Splitting in the largest gap (between 6 and 12) would yield suboptimal result. -; Expecting a jump table from 5 to 15. -; CHECK-LABEL: optimal_jump_table1 -; CHECK: leal -5 -; CHECK: cmpl $10 -; CHECK: jmpq *.LJTI - -; At -O0, we don't build jump tables for only parts of a switch. -; NOOPT-LABEL: optimal_jump_table1 -; NOOPT: testl %edi, %edi -; NOOPT: je -; NOOPT: subl $5, [[REG:%e[abcd][xi]]] -; NOOPT: je -; NOOPT: subl $6, [[REG]] -; NOOPT: je -; NOOPT: subl $12, [[REG]] -; NOOPT: je -; NOOPT: subl $13, [[REG]] -; NOOPT: je -; NOOPT: subl $15, [[REG]] -; NOOPT: je } +; Partitioning the cases to the minimum number of dense sets is not good enough. +; This can be partitioned as {0,1,2,9},{14,15} or {0,1,2},{9,14,15}. The former +; should be preferred. Expecting a table from 0-9. define void @optimal_jump_table2(i32 %x) { +; CHECK-LABEL: optimal_jump_table2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpl $9, %edi +; CHECK-NEXT: ja .LBB11_1 +; CHECK-NEXT: # %bb.10: # %entry +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: jmpq *.LJTI11_0(,%rax,8) +; CHECK-NEXT: .LBB11_4: # %bb0 +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB11_1: # %entry +; CHECK-NEXT: cmpl $14, %edi +; CHECK-NEXT: je .LBB11_8 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: cmpl $15, %edi +; CHECK-NEXT: jne .LBB11_9 +; CHECK-NEXT: # %bb.3: # %bb5 +; CHECK-NEXT: movl $5, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB11_9: # %return +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB11_5: # %bb1 +; CHECK-NEXT: movl $1, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB11_6: # %bb2 +; CHECK-NEXT: movl $2, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB11_7: # %bb3 +; CHECK-NEXT: movl $3, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB11_8: # %bb4 +; CHECK-NEXT: movl $4, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; +; NOOPT-LABEL: optimal_jump_table2: +; NOOPT: # %bb.0: # %entry +; NOOPT-NEXT: pushq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 16 +; NOOPT-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; NOOPT-NEXT: testl %edi, %edi +; NOOPT-NEXT: je .LBB11_1 +; NOOPT-NEXT: jmp .LBB11_8 +; NOOPT-NEXT: .LBB11_8: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $1, %eax +; NOOPT-NEXT: je .LBB11_2 +; NOOPT-NEXT: jmp .LBB11_9 +; NOOPT-NEXT: .LBB11_9: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $2, %eax +; NOOPT-NEXT: je .LBB11_3 +; NOOPT-NEXT: jmp .LBB11_10 +; NOOPT-NEXT: .LBB11_10: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $9, %eax +; NOOPT-NEXT: je .LBB11_4 +; NOOPT-NEXT: jmp .LBB11_11 +; NOOPT-NEXT: .LBB11_11: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $14, %eax +; NOOPT-NEXT: je .LBB11_5 +; NOOPT-NEXT: jmp .LBB11_12 +; NOOPT-NEXT: .LBB11_12: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $15, %eax +; NOOPT-NEXT: je .LBB11_6 +; NOOPT-NEXT: jmp .LBB11_7 +; NOOPT-NEXT: .LBB11_1: # %bb0 +; NOOPT-NEXT: xorl %edi, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB11_7 +; NOOPT-NEXT: .LBB11_2: # %bb1 +; NOOPT-NEXT: movl $1, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB11_7 +; NOOPT-NEXT: .LBB11_3: # %bb2 +; NOOPT-NEXT: movl $2, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB11_7 +; NOOPT-NEXT: .LBB11_4: # %bb3 +; NOOPT-NEXT: movl $3, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB11_7 +; NOOPT-NEXT: .LBB11_5: # %bb4 +; NOOPT-NEXT: movl $4, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB11_7 +; NOOPT-NEXT: .LBB11_6: # %bb5 +; NOOPT-NEXT: movl $5, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: .LBB11_7: # %return +; NOOPT-NEXT: popq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 8 +; NOOPT-NEXT: retq entry: switch i32 %x, label %return [ i32 0, label %bb0 @@ -348,17 +1170,113 @@ bb4: tail call void @g(i32 4) br label %return bb5: tail call void @g(i32 5) br label %return return: ret void - -; Partitioning the cases to the minimum number of dense sets is not good enough. -; This can be partitioned as {0,1,2,9},{14,15} or {0,1,2},{9,14,15}. The former -; should be preferred. Expecting a table from 0-9. -; CHECK-LABEL: optimal_jump_table2 -; CHECK: cmpl $9 -; CHECK: jmpq *.LJTI } +; Splitting to maximize left-right density sum and gap size would split this +; between 3 and 10, and then between 20 and 25. It's better to build a table +; from 1-20. define void @optimal_jump_table3(i32 %x) { +; CHECK-LABEL: optimal_jump_table3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: leal -1(%rdi), %eax +; CHECK-NEXT: cmpl $19, %eax +; CHECK-NEXT: ja .LBB12_1 +; CHECK-NEXT: # %bb.3: # %entry +; CHECK-NEXT: jmpq *.LJTI12_0(,%rax,8) +; CHECK-NEXT: .LBB12_4: # %bb0 +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB12_5: # %bb1 +; CHECK-NEXT: movl $1, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB12_6: # %bb2 +; CHECK-NEXT: movl $2, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB12_7: # %bb3 +; CHECK-NEXT: movl $3, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB12_1: # %entry +; CHECK-NEXT: cmpl $25, %edi +; CHECK-NEXT: jne .LBB12_8 +; CHECK-NEXT: # %bb.2: # %bb4 +; CHECK-NEXT: movl $4, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB12_8: # %return +; CHECK-NEXT: retq +; +; NOOPT-LABEL: optimal_jump_table3: +; NOOPT: # %bb.0: # %entry +; NOOPT-NEXT: pushq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 16 +; NOOPT-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; NOOPT-NEXT: subl $1, %edi +; NOOPT-NEXT: je .LBB12_1 +; NOOPT-NEXT: jmp .LBB12_7 +; NOOPT-NEXT: .LBB12_7: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $2, %eax +; NOOPT-NEXT: je .LBB12_2 +; NOOPT-NEXT: jmp .LBB12_8 +; NOOPT-NEXT: .LBB12_8: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $3, %eax +; NOOPT-NEXT: je .LBB12_3 +; NOOPT-NEXT: jmp .LBB12_9 +; NOOPT-NEXT: .LBB12_9: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $10, %eax +; NOOPT-NEXT: je .LBB12_4 +; NOOPT-NEXT: jmp .LBB12_10 +; NOOPT-NEXT: .LBB12_10: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $13, %eax +; NOOPT-NEXT: je .LBB12_1 +; NOOPT-NEXT: jmp .LBB12_11 +; NOOPT-NEXT: .LBB12_11: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $14, %eax +; NOOPT-NEXT: je .LBB12_2 +; NOOPT-NEXT: jmp .LBB12_12 +; NOOPT-NEXT: .LBB12_12: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $15, %eax +; NOOPT-NEXT: je .LBB12_3 +; NOOPT-NEXT: jmp .LBB12_13 +; NOOPT-NEXT: .LBB12_13: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $20, %eax +; NOOPT-NEXT: je .LBB12_4 +; NOOPT-NEXT: jmp .LBB12_14 +; NOOPT-NEXT: .LBB12_14: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $25, %eax +; NOOPT-NEXT: je .LBB12_5 +; NOOPT-NEXT: jmp .LBB12_6 +; NOOPT-NEXT: .LBB12_1: # %bb0 +; NOOPT-NEXT: xorl %edi, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB12_6 +; NOOPT-NEXT: .LBB12_2: # %bb1 +; NOOPT-NEXT: movl $1, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB12_6 +; NOOPT-NEXT: .LBB12_3: # %bb2 +; NOOPT-NEXT: movl $2, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB12_6 +; NOOPT-NEXT: .LBB12_4: # %bb3 +; NOOPT-NEXT: movl $3, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB12_6 +; NOOPT-NEXT: .LBB12_5: # %bb4 +; NOOPT-NEXT: movl $4, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: .LBB12_6: # %return +; NOOPT-NEXT: popq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 8 +; NOOPT-NEXT: retq entry: switch i32 %x, label %return [ i32 1, label %bb0 @@ -377,18 +1295,75 @@ bb3: tail call void @g(i32 3) br label %return bb4: tail call void @g(i32 4) br label %return return: ret void - -; Splitting to maximize left-right density sum and gap size would split this -; between 3 and 10, and then between 20 and 25. It's better to build a table -; from 1-20. -; CHECK-LABEL: optimal_jump_table3 -; CHECK: leal -1 -; CHECK: cmpl $19 -; CHECK: jmpq *.LJTI } %struct.S = type { %struct.S*, i32 } + +; This will be lowered to a comparison with 4 and then bit tests. Make sure +; that the phi node in %header gets a value from the comparison block. define void @phi_node_trouble(%struct.S* %s) { +; CHECK-LABEL: phi_node_trouble: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB13_1: # %header +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: testq %rdi, %rdi +; CHECK-NEXT: je .LBB13_5 +; CHECK-NEXT: # %bb.2: # %loop +; CHECK-NEXT: # in Loop: Header=BB13_1 Depth=1 +; CHECK-NEXT: movq (%rdi), %rdi +; CHECK-NEXT: movl 8(%rdi), %eax +; CHECK-NEXT: cmpl $4, %eax +; CHECK-NEXT: je .LBB13_1 +; CHECK-NEXT: # %bb.3: # %loop +; CHECK-NEXT: addl $-25, %eax +; CHECK-NEXT: cmpl $44, %eax +; CHECK-NEXT: ja .LBB13_5 +; CHECK-NEXT: # %bb.4: # %loop +; CHECK-NEXT: movabsq $17592186046465, %rcx # imm = 0x100000000801 +; CHECK-NEXT: btq %rax, %rcx +; CHECK-NEXT: .LBB13_5: # %exit2 +; CHECK-NEXT: retq +; +; NOOPT-LABEL: phi_node_trouble: +; NOOPT: # %bb.0: # %entry +; NOOPT-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; NOOPT-NEXT: jmp .LBB13_1 +; NOOPT-NEXT: .LBB13_1: # %header +; NOOPT-NEXT: # =>This Inner Loop Header: Depth=1 +; NOOPT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; NOOPT-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; NOOPT-NEXT: cmpq $0, %rax +; NOOPT-NEXT: je .LBB13_3 +; NOOPT-NEXT: # %bb.2: # %loop +; NOOPT-NEXT: # in Loop: Header=BB13_1 Depth=1 +; NOOPT-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; NOOPT-NEXT: movq (%rax), %rax +; NOOPT-NEXT: movl 8(%rax), %ecx +; NOOPT-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; NOOPT-NEXT: subl $4, %ecx +; NOOPT-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; NOOPT-NEXT: je .LBB13_1 +; NOOPT-NEXT: jmp .LBB13_5 +; NOOPT-NEXT: .LBB13_5: # %loop +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $25, %eax +; NOOPT-NEXT: je .LBB13_4 +; NOOPT-NEXT: jmp .LBB13_6 +; NOOPT-NEXT: .LBB13_6: # %loop +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $36, %eax +; NOOPT-NEXT: je .LBB13_4 +; NOOPT-NEXT: jmp .LBB13_7 +; NOOPT-NEXT: .LBB13_7: # %loop +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $69, %eax +; NOOPT-NEXT: je .LBB13_4 +; NOOPT-NEXT: jmp .LBB13_3 +; NOOPT-NEXT: .LBB13_3: # %exit +; NOOPT-NEXT: retq +; NOOPT-NEXT: .LBB13_4: # %exit2 +; NOOPT-NEXT: retq entry: br label %header header: @@ -410,17 +1385,23 @@ ret void exit2: ret void - -; This will be lowered to a comparison with 4 and then bit tests. Make sure -; that the phi node in %header gets a value from the comparison block. -; CHECK-LABEL: phi_node_trouble -; CHECK: movq (%[[REG1:[a-z]+]]), %[[REG1]] -; CHECK: movl 8(%[[REG1]]), %[[REG2:[a-z]+]] -; CHECK: cmpl $4, %[[REG2]] } +; Branch directly to the default. +; (In optimized builds the switch is removed earlier.) define void @default_only(i32 %x) { +; CHECK-LABEL: default_only: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: retq +; +; NOOPT-LABEL: default_only: +; NOOPT: # %bb.0: # %entry +; NOOPT-NEXT: jmp .LBB14_2 +; NOOPT-NEXT: .LBB14_1: # %return +; NOOPT-NEXT: retq +; NOOPT-NEXT: .LBB14_2: # %sw +; NOOPT-NEXT: jmp .LBB14_1 entry: br label %sw return: @@ -428,17 +1409,63 @@ sw: switch i32 %x, label %return [ ] - -; Branch directly to the default. -; (In optimized builds the switch is removed earlier.) -; NOOPT-LABEL: default_only -; NOOPT: .LBB[[L:[A-Z0-9_]+]]: -; NOOPT-NEXT: retq -; NOOPT: jmp .LBB[[L]] } +; Don't infloop on jump tables where the upper bound is the max value of the +; input type (in this case 127). define void @int_max_table_cluster(i8 %x) { +; CHECK-LABEL: int_max_table_cluster: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addb $64, %dil +; CHECK-NEXT: cmpb $-65, %dil +; CHECK-NEXT: ja .LBB15_4 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: jmpq *.LJTI15_0(,%rax,8) +; CHECK-NEXT: .LBB15_2: # %bb0 +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB15_3: # %bb3 +; CHECK-NEXT: movl $1, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB15_4: # %return +; CHECK-NEXT: retq +; +; NOOPT-LABEL: int_max_table_cluster: +; NOOPT: # %bb.0: # %entry +; NOOPT-NEXT: pushq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 16 +; NOOPT-NEXT: movb %dil, %al +; NOOPT-NEXT: addb $64, %al +; NOOPT-NEXT: movzbl %al, %ecx +; NOOPT-NEXT: # kill: def $rcx killed $ecx +; NOOPT-NEXT: movq %rcx, (%rsp) # 8-byte Spill +; NOOPT-NEXT: subb $-65, %al +; NOOPT-NEXT: ja .LBB15_5 +; NOOPT-NEXT: # %bb.6: # %entry +; NOOPT-NEXT: movq (%rsp), %rax # 8-byte Reload +; NOOPT-NEXT: movq .LJTI15_0(,%rax,8), %rax +; NOOPT-NEXT: jmpq *%rax +; NOOPT-NEXT: .LBB15_1: # %bb0 +; NOOPT-NEXT: xorl %edi, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB15_5 +; NOOPT-NEXT: .LBB15_2: # %bb1 +; NOOPT-NEXT: movl $1, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB15_5 +; NOOPT-NEXT: .LBB15_3: # %bb2 +; NOOPT-NEXT: movl $1, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB15_5 +; NOOPT-NEXT: .LBB15_4: # %bb3 +; NOOPT-NEXT: movl $1, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: .LBB15_5: # %return +; NOOPT-NEXT: popq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 8 +; NOOPT-NEXT: retq entry: switch i8 %x, label %return [ i8 0, label %bb0 i8 1, label %bb0 i8 2, label %bb0 i8 3, label %bb0 @@ -493,15 +1520,103 @@ bb2: tail call void @g(i32 1) br label %return bb3: tail call void @g(i32 1) br label %return return: ret void - -; Don't infloop on jump tables where the upper bound is the max value of the -; input type (in this case 127). -; CHECK-LABEL: int_max_table_cluster -; CHECK: jmpq *.LJTI } +; Cases 1,4,7 have a very large branch weight (which shouldn't overflow), so +; their bit test should come first. 0,3,6 and 2,5,8,9 both have a weight of 20, +; but the latter set has more cases, so should be tested for earlier. The bit +; test on 0,3,6 is unnecessary as all cases cover the range [0, 9]. The range +; check guarantees that cases other than 1,4,7 and 2,5,8,9 must be in 0,3,6. define void @bt_order_by_weight(i32 %x) { +; CHECK-LABEL: bt_order_by_weight: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpl $9, %edi +; CHECK-NEXT: ja .LBB16_6 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: movl $146, %eax +; CHECK-NEXT: btl %edi, %eax +; CHECK-NEXT: jae .LBB16_2 +; CHECK-NEXT: # %bb.4: # %bb1 +; CHECK-NEXT: movl $1, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB16_2: # %entry +; CHECK-NEXT: movl $804, %eax # imm = 0x324 +; CHECK-NEXT: btl %edi, %eax +; CHECK-NEXT: jae .LBB16_3 +; CHECK-NEXT: # %bb.5: # %bb2 +; CHECK-NEXT: movl $2, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB16_3: # %bb0 +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB16_6: # %return +; CHECK-NEXT: retq +; +; NOOPT-LABEL: bt_order_by_weight: +; NOOPT: # %bb.0: # %entry +; NOOPT-NEXT: pushq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 16 +; NOOPT-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; NOOPT-NEXT: testl %edi, %edi +; NOOPT-NEXT: je .LBB16_1 +; NOOPT-NEXT: jmp .LBB16_5 +; NOOPT-NEXT: .LBB16_5: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $1, %eax +; NOOPT-NEXT: je .LBB16_2 +; NOOPT-NEXT: jmp .LBB16_6 +; NOOPT-NEXT: .LBB16_6: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $2, %eax +; NOOPT-NEXT: je .LBB16_3 +; NOOPT-NEXT: jmp .LBB16_7 +; NOOPT-NEXT: .LBB16_7: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $3, %eax +; NOOPT-NEXT: je .LBB16_1 +; NOOPT-NEXT: jmp .LBB16_8 +; NOOPT-NEXT: .LBB16_8: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $4, %eax +; NOOPT-NEXT: je .LBB16_2 +; NOOPT-NEXT: jmp .LBB16_9 +; NOOPT-NEXT: .LBB16_9: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $5, %eax +; NOOPT-NEXT: je .LBB16_3 +; NOOPT-NEXT: jmp .LBB16_10 +; NOOPT-NEXT: .LBB16_10: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $6, %eax +; NOOPT-NEXT: je .LBB16_1 +; NOOPT-NEXT: jmp .LBB16_11 +; NOOPT-NEXT: .LBB16_11: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $7, %eax +; NOOPT-NEXT: je .LBB16_2 +; NOOPT-NEXT: jmp .LBB16_12 +; NOOPT-NEXT: .LBB16_12: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: addl $-8, %eax +; NOOPT-NEXT: subl $2, %eax +; NOOPT-NEXT: jb .LBB16_3 +; NOOPT-NEXT: jmp .LBB16_4 +; NOOPT-NEXT: .LBB16_1: # %bb0 +; NOOPT-NEXT: xorl %edi, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB16_4 +; NOOPT-NEXT: .LBB16_2: # %bb1 +; NOOPT-NEXT: movl $1, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB16_4 +; NOOPT-NEXT: .LBB16_3: # %bb2 +; NOOPT-NEXT: movl $2, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: .LBB16_4: # %return +; NOOPT-NEXT: popq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 8 +; NOOPT-NEXT: retq entry: switch i32 %x, label %return [ i32 0, label %bb0 @@ -519,23 +1634,6 @@ bb1: tail call void @g(i32 1) br label %return bb2: tail call void @g(i32 2) br label %return return: ret void - -; Cases 1,4,7 have a very large branch weight (which shouldn't overflow), so -; their bit test should come first. 0,3,6 and 2,5,8,9 both have a weight of 20, -; but the latter set has more cases, so should be tested for earlier. The bit -; test on 0,3,6 is unnecessary as all cases cover the range [0, 9]. The range -; check guarantees that cases other than 1,4,7 and 2,5,8,9 must be in 0,3,6. - -; CHECK-LABEL: bt_order_by_weight -; 146 = 2^1 + 2^4 + 2^7 -; CHECK: movl $146 -; CHECK: btl -; 292 = 2^2 + 2^5 + 2^8 + 2^9 -; CHECK: movl $804 -; CHECK: btl -; 73 = 2^0 + 2^3 + 2^6 -; CHECK-NOT: movl $73 -; CHECK-NOT: btl } !1 = !{!"branch_weights", @@ -548,7 +1646,58 @@ ; Cases 2,5,8,9: i32 0, i32 0, i32 0, i32 20} + +; Case 200 has the highest weight and should come first. 100 and 300 have the +; same weight, but 300 goes to the 'next' block, so should be last. define void @order_by_weight_and_fallthrough(i32 %x) { +; CHECK-LABEL: order_by_weight_and_fallthrough: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpl $200, %edi +; CHECK-NEXT: jne .LBB17_1 +; CHECK-NEXT: .LBB17_3: # %bb0 +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB17_1: # %entry +; CHECK-NEXT: cmpl $100, %edi +; CHECK-NEXT: je .LBB17_4 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: cmpl $300, %edi # imm = 0x12C +; CHECK-NEXT: je .LBB17_3 +; CHECK-NEXT: # %bb.5: # %return +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB17_4: # %bb1 +; CHECK-NEXT: movl $1, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; +; NOOPT-LABEL: order_by_weight_and_fallthrough: +; NOOPT: # %bb.0: # %entry +; NOOPT-NEXT: pushq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 16 +; NOOPT-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; NOOPT-NEXT: subl $100, %edi +; NOOPT-NEXT: je .LBB17_2 +; NOOPT-NEXT: jmp .LBB17_4 +; NOOPT-NEXT: .LBB17_4: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $200, %eax +; NOOPT-NEXT: je .LBB17_1 +; NOOPT-NEXT: jmp .LBB17_5 +; NOOPT-NEXT: .LBB17_5: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $300, %eax # imm = 0x12C +; NOOPT-NEXT: jne .LBB17_3 +; NOOPT-NEXT: jmp .LBB17_1 +; NOOPT-NEXT: .LBB17_1: # %bb0 +; NOOPT-NEXT: xorl %edi, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB17_3 +; NOOPT-NEXT: .LBB17_2: # %bb1 +; NOOPT-NEXT: movl $1, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: .LBB17_3: # %return +; NOOPT-NEXT: popq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 8 +; NOOPT-NEXT: retq entry: switch i32 %x, label %return [ i32 100, label %bb1 @@ -558,13 +1707,6 @@ bb0: tail call void @g(i32 0) br label %return bb1: tail call void @g(i32 1) br label %return return: ret void - -; Case 200 has the highest weight and should come first. 100 and 300 have the -; same weight, but 300 goes to the 'next' block, so should be last. -; CHECK-LABEL: order_by_weight_and_fallthrough -; CHECK: cmpl $200 -; CHECK: cmpl $100 -; CHECK: cmpl $300 } !2 = !{!"branch_weights", @@ -578,7 +1720,111 @@ i32 10} +; Make sure to pick a pivot in the middle also with zero-weight cases. define void @zero_weight_tree(i32 %x) { +; CHECK-LABEL: zero_weight_tree: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpl $29, %edi +; CHECK-NEXT: jg .LBB18_5 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: jne .LBB18_2 +; CHECK-NEXT: # %bb.9: # %bb0 +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB18_5: # %entry +; CHECK-NEXT: cmpl $50, %edi +; CHECK-NEXT: jne .LBB18_6 +; CHECK-NEXT: # %bb.12: # %bb5 +; CHECK-NEXT: movl $5, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB18_2: # %entry +; CHECK-NEXT: cmpl $10, %edi +; CHECK-NEXT: je .LBB18_10 +; CHECK-NEXT: # %bb.3: # %entry +; CHECK-NEXT: cmpl $20, %edi +; CHECK-NEXT: jne .LBB18_13 +; CHECK-NEXT: # %bb.4: # %bb2 +; CHECK-NEXT: movl $2, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB18_6: # %entry +; CHECK-NEXT: cmpl $30, %edi +; CHECK-NEXT: je .LBB18_11 +; CHECK-NEXT: # %bb.7: # %entry +; CHECK-NEXT: cmpl $40, %edi +; CHECK-NEXT: je .LBB18_8 +; CHECK-NEXT: .LBB18_13: # %return +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB18_10: # %bb1 +; CHECK-NEXT: movl $1, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB18_11: # %bb3 +; CHECK-NEXT: movl $3, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB18_8: # %bb4 +; CHECK-NEXT: movl $4, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; +; NOOPT-LABEL: zero_weight_tree: +; NOOPT: # %bb.0: # %entry +; NOOPT-NEXT: pushq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 16 +; NOOPT-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; NOOPT-NEXT: testl %edi, %edi +; NOOPT-NEXT: je .LBB18_1 +; NOOPT-NEXT: jmp .LBB18_8 +; NOOPT-NEXT: .LBB18_8: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $10, %eax +; NOOPT-NEXT: je .LBB18_2 +; NOOPT-NEXT: jmp .LBB18_9 +; NOOPT-NEXT: .LBB18_9: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $20, %eax +; NOOPT-NEXT: je .LBB18_3 +; NOOPT-NEXT: jmp .LBB18_10 +; NOOPT-NEXT: .LBB18_10: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $30, %eax +; NOOPT-NEXT: je .LBB18_4 +; NOOPT-NEXT: jmp .LBB18_11 +; NOOPT-NEXT: .LBB18_11: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $40, %eax +; NOOPT-NEXT: je .LBB18_5 +; NOOPT-NEXT: jmp .LBB18_12 +; NOOPT-NEXT: .LBB18_12: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $50, %eax +; NOOPT-NEXT: je .LBB18_6 +; NOOPT-NEXT: jmp .LBB18_7 +; NOOPT-NEXT: .LBB18_1: # %bb0 +; NOOPT-NEXT: xorl %edi, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB18_7 +; NOOPT-NEXT: .LBB18_2: # %bb1 +; NOOPT-NEXT: movl $1, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB18_7 +; NOOPT-NEXT: .LBB18_3: # %bb2 +; NOOPT-NEXT: movl $2, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB18_7 +; NOOPT-NEXT: .LBB18_4: # %bb3 +; NOOPT-NEXT: movl $3, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB18_7 +; NOOPT-NEXT: .LBB18_5: # %bb4 +; NOOPT-NEXT: movl $4, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB18_7 +; NOOPT-NEXT: .LBB18_6: # %bb5 +; NOOPT-NEXT: movl $5, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: .LBB18_7: # %return +; NOOPT-NEXT: popq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 8 +; NOOPT-NEXT: retq entry: switch i32 %x, label %return [ i32 0, label %bb0 @@ -595,17 +1841,149 @@ bb4: tail call void @g(i32 4) br label %return bb5: tail call void @g(i32 5) br label %return return: ret void - -; Make sure to pick a pivot in the middle also with zero-weight cases. -; CHECK-LABEL: zero_weight_tree -; CHECK-NOT: cmpl -; CHECK: cmpl $29 } !3 = !{!"branch_weights", i32 1, i32 10, i32 0, i32 0, i32 0, i32 0, i32 10} +; Without branch probabilities, the pivot would be 40, since that would yield +; equal-sized sub-trees. When taking weights into account, case 70 becomes the +; pivot. Since there is room for 3 cases in a leaf, cases 50 and 60 are also +; included in the right-hand side because that doesn't reduce their rank. define void @left_leaning_weight_balanced_tree(i32 %x) { +; CHECK-LABEL: left_leaning_weight_balanced_tree: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpl $49, %edi +; CHECK-NEXT: jle .LBB19_1 +; CHECK-NEXT: # %bb.11: # %entry +; CHECK-NEXT: cmpl $70, %edi +; CHECK-NEXT: jne .LBB19_12 +; CHECK-NEXT: .LBB19_14: # %bb6 +; CHECK-NEXT: movl $6, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB19_1: # %entry +; CHECK-NEXT: cmpl $9, %edi +; CHECK-NEXT: jg .LBB19_4 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: jne .LBB19_18 +; CHECK-NEXT: # %bb.3: # %bb0 +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB19_4: # %entry +; CHECK-NEXT: cmpl $29, %edi +; CHECK-NEXT: jg .LBB19_8 +; CHECK-NEXT: # %bb.5: # %entry +; CHECK-NEXT: cmpl $10, %edi +; CHECK-NEXT: je .LBB19_15 +; CHECK-NEXT: # %bb.6: # %entry +; CHECK-NEXT: cmpl $20, %edi +; CHECK-NEXT: jne .LBB19_18 +; CHECK-NEXT: # %bb.7: # %bb2 +; CHECK-NEXT: movl $2, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB19_12: # %entry +; CHECK-NEXT: cmpl $50, %edi +; CHECK-NEXT: je .LBB19_17 +; CHECK-NEXT: # %bb.13: # %entry +; CHECK-NEXT: cmpl $60, %edi +; CHECK-NEXT: je .LBB19_14 +; CHECK-NEXT: jmp .LBB19_18 +; CHECK-NEXT: .LBB19_8: # %entry +; CHECK-NEXT: cmpl $30, %edi +; CHECK-NEXT: je .LBB19_16 +; CHECK-NEXT: # %bb.9: # %entry +; CHECK-NEXT: cmpl $40, %edi +; CHECK-NEXT: jne .LBB19_18 +; CHECK-NEXT: # %bb.10: # %bb4 +; CHECK-NEXT: movl $4, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB19_15: # %bb1 +; CHECK-NEXT: movl $1, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB19_16: # %bb3 +; CHECK-NEXT: movl $3, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB19_17: # %bb5 +; CHECK-NEXT: movl $5, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB19_18: # %return +; CHECK-NEXT: retq +; +; NOOPT-LABEL: left_leaning_weight_balanced_tree: +; NOOPT: # %bb.0: # %entry +; NOOPT-NEXT: pushq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 16 +; NOOPT-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; NOOPT-NEXT: testl %edi, %edi +; NOOPT-NEXT: je .LBB19_1 +; NOOPT-NEXT: jmp .LBB19_9 +; NOOPT-NEXT: .LBB19_9: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $10, %eax +; NOOPT-NEXT: je .LBB19_2 +; NOOPT-NEXT: jmp .LBB19_10 +; NOOPT-NEXT: .LBB19_10: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $20, %eax +; NOOPT-NEXT: je .LBB19_3 +; NOOPT-NEXT: jmp .LBB19_11 +; NOOPT-NEXT: .LBB19_11: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $30, %eax +; NOOPT-NEXT: je .LBB19_4 +; NOOPT-NEXT: jmp .LBB19_12 +; NOOPT-NEXT: .LBB19_12: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $40, %eax +; NOOPT-NEXT: je .LBB19_5 +; NOOPT-NEXT: jmp .LBB19_13 +; NOOPT-NEXT: .LBB19_13: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $50, %eax +; NOOPT-NEXT: je .LBB19_6 +; NOOPT-NEXT: jmp .LBB19_14 +; NOOPT-NEXT: .LBB19_14: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $60, %eax +; NOOPT-NEXT: je .LBB19_7 +; NOOPT-NEXT: jmp .LBB19_15 +; NOOPT-NEXT: .LBB19_15: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $70, %eax +; NOOPT-NEXT: je .LBB19_7 +; NOOPT-NEXT: jmp .LBB19_8 +; NOOPT-NEXT: .LBB19_1: # %bb0 +; NOOPT-NEXT: xorl %edi, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB19_8 +; NOOPT-NEXT: .LBB19_2: # %bb1 +; NOOPT-NEXT: movl $1, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB19_8 +; NOOPT-NEXT: .LBB19_3: # %bb2 +; NOOPT-NEXT: movl $2, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB19_8 +; NOOPT-NEXT: .LBB19_4: # %bb3 +; NOOPT-NEXT: movl $3, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB19_8 +; NOOPT-NEXT: .LBB19_5: # %bb4 +; NOOPT-NEXT: movl $4, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB19_8 +; NOOPT-NEXT: .LBB19_6: # %bb5 +; NOOPT-NEXT: movl $5, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB19_8 +; NOOPT-NEXT: .LBB19_7: # %bb6 +; NOOPT-NEXT: movl $6, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: .LBB19_8: # %return +; NOOPT-NEXT: popq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 8 +; NOOPT-NEXT: retq entry: switch i32 %x, label %return [ i32 0, label %bb0 @@ -626,21 +2004,145 @@ bb6: tail call void @g(i32 6) br label %return bb7: tail call void @g(i32 7) br label %return return: ret void - -; Without branch probabilities, the pivot would be 40, since that would yield -; equal-sized sub-trees. When taking weights into account, case 70 becomes the -; pivot. Since there is room for 3 cases in a leaf, cases 50 and 60 are also -; included in the right-hand side because that doesn't reduce their rank. - -; CHECK-LABEL: left_leaning_weight_balanced_tree -; CHECK-NOT: cmpl -; CHECK: cmpl $49 } !4 = !{!"branch_weights", i32 1, i32 10, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1000} +; Same as the previous test, except case 50 has higher rank to the left than it +; would have on the right. Case 60 would have the same rank on both sides, so is +; moved into the leaf. define void @left_leaning_weight_balanced_tree2(i32 %x) { +; CHECK-LABEL: left_leaning_weight_balanced_tree2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpl $59, %edi +; CHECK-NEXT: jle .LBB20_1 +; CHECK-NEXT: # %bb.10: # %entry +; CHECK-NEXT: cmpl $70, %edi +; CHECK-NEXT: jne .LBB20_11 +; CHECK-NEXT: .LBB20_12: # %bb6 +; CHECK-NEXT: movl $6, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB20_1: # %entry +; CHECK-NEXT: cmpl $29, %edi +; CHECK-NEXT: jle .LBB20_2 +; CHECK-NEXT: # %bb.6: # %entry +; CHECK-NEXT: cmpl $50, %edi +; CHECK-NEXT: jne .LBB20_7 +; CHECK-NEXT: # %bb.16: # %bb5 +; CHECK-NEXT: movl $5, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB20_11: # %entry +; CHECK-NEXT: cmpl $60, %edi +; CHECK-NEXT: je .LBB20_12 +; CHECK-NEXT: jmp .LBB20_17 +; CHECK-NEXT: .LBB20_2: # %entry +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: jne .LBB20_3 +; CHECK-NEXT: # %bb.13: # %bb0 +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB20_3: # %entry +; CHECK-NEXT: cmpl $10, %edi +; CHECK-NEXT: je .LBB20_14 +; CHECK-NEXT: # %bb.4: # %entry +; CHECK-NEXT: cmpl $20, %edi +; CHECK-NEXT: jne .LBB20_17 +; CHECK-NEXT: # %bb.5: # %bb2 +; CHECK-NEXT: movl $2, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB20_7: # %entry +; CHECK-NEXT: cmpl $30, %edi +; CHECK-NEXT: je .LBB20_15 +; CHECK-NEXT: # %bb.8: # %entry +; CHECK-NEXT: cmpl $40, %edi +; CHECK-NEXT: jne .LBB20_17 +; CHECK-NEXT: # %bb.9: # %bb4 +; CHECK-NEXT: movl $4, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB20_14: # %bb1 +; CHECK-NEXT: movl $1, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB20_15: # %bb3 +; CHECK-NEXT: movl $3, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB20_17: # %return +; CHECK-NEXT: retq +; +; NOOPT-LABEL: left_leaning_weight_balanced_tree2: +; NOOPT: # %bb.0: # %entry +; NOOPT-NEXT: pushq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 16 +; NOOPT-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; NOOPT-NEXT: testl %edi, %edi +; NOOPT-NEXT: je .LBB20_1 +; NOOPT-NEXT: jmp .LBB20_9 +; NOOPT-NEXT: .LBB20_9: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $10, %eax +; NOOPT-NEXT: je .LBB20_2 +; NOOPT-NEXT: jmp .LBB20_10 +; NOOPT-NEXT: .LBB20_10: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $20, %eax +; NOOPT-NEXT: je .LBB20_3 +; NOOPT-NEXT: jmp .LBB20_11 +; NOOPT-NEXT: .LBB20_11: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $30, %eax +; NOOPT-NEXT: je .LBB20_4 +; NOOPT-NEXT: jmp .LBB20_12 +; NOOPT-NEXT: .LBB20_12: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $40, %eax +; NOOPT-NEXT: je .LBB20_5 +; NOOPT-NEXT: jmp .LBB20_13 +; NOOPT-NEXT: .LBB20_13: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $50, %eax +; NOOPT-NEXT: je .LBB20_6 +; NOOPT-NEXT: jmp .LBB20_14 +; NOOPT-NEXT: .LBB20_14: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $60, %eax +; NOOPT-NEXT: je .LBB20_7 +; NOOPT-NEXT: jmp .LBB20_15 +; NOOPT-NEXT: .LBB20_15: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $70, %eax +; NOOPT-NEXT: je .LBB20_7 +; NOOPT-NEXT: jmp .LBB20_8 +; NOOPT-NEXT: .LBB20_1: # %bb0 +; NOOPT-NEXT: xorl %edi, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB20_8 +; NOOPT-NEXT: .LBB20_2: # %bb1 +; NOOPT-NEXT: movl $1, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB20_8 +; NOOPT-NEXT: .LBB20_3: # %bb2 +; NOOPT-NEXT: movl $2, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB20_8 +; NOOPT-NEXT: .LBB20_4: # %bb3 +; NOOPT-NEXT: movl $3, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB20_8 +; NOOPT-NEXT: .LBB20_5: # %bb4 +; NOOPT-NEXT: movl $4, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB20_8 +; NOOPT-NEXT: .LBB20_6: # %bb5 +; NOOPT-NEXT: movl $5, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB20_8 +; NOOPT-NEXT: .LBB20_7: # %bb6 +; NOOPT-NEXT: movl $6, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: .LBB20_8: # %return +; NOOPT-NEXT: popq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 8 +; NOOPT-NEXT: retq entry: switch i32 %x, label %return [ i32 0, label %bb0 @@ -661,20 +2163,143 @@ bb6: tail call void @g(i32 6) br label %return bb7: tail call void @g(i32 7) br label %return return: ret void - -; Same as the previous test, except case 50 has higher rank to the left than it -; would have on the right. Case 60 would have the same rank on both sides, so is -; moved into the leaf. - -; CHECK-LABEL: left_leaning_weight_balanced_tree2 -; CHECK-NOT: cmpl -; CHECK: cmpl $59 } !5 = !{!"branch_weights", i32 1, i32 10, i32 1, i32 1, i32 1, i32 1, i32 90, i32 70, i32 1000} +; Analogous to left_leaning_weight_balanced_tree. define void @right_leaning_weight_balanced_tree(i32 %x) { +; CHECK-LABEL: right_leaning_weight_balanced_tree: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpl $19, %edi +; CHECK-NEXT: jg .LBB21_4 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: jne .LBB21_2 +; CHECK-NEXT: # %bb.13: # %bb0 +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB21_4: # %entry +; CHECK-NEXT: cmpl $49, %edi +; CHECK-NEXT: jle .LBB21_5 +; CHECK-NEXT: # %bb.9: # %entry +; CHECK-NEXT: cmpl $70, %edi +; CHECK-NEXT: jne .LBB21_10 +; CHECK-NEXT: .LBB21_12: # %bb6 +; CHECK-NEXT: movl $6, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB21_5: # %entry +; CHECK-NEXT: cmpl $20, %edi +; CHECK-NEXT: je .LBB21_14 +; CHECK-NEXT: # %bb.6: # %entry +; CHECK-NEXT: cmpl $30, %edi +; CHECK-NEXT: je .LBB21_15 +; CHECK-NEXT: # %bb.7: # %entry +; CHECK-NEXT: cmpl $40, %edi +; CHECK-NEXT: jne .LBB21_17 +; CHECK-NEXT: # %bb.8: # %bb4 +; CHECK-NEXT: movl $4, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB21_10: # %entry +; CHECK-NEXT: cmpl $50, %edi +; CHECK-NEXT: je .LBB21_16 +; CHECK-NEXT: # %bb.11: # %entry +; CHECK-NEXT: cmpl $60, %edi +; CHECK-NEXT: je .LBB21_12 +; CHECK-NEXT: jmp .LBB21_17 +; CHECK-NEXT: .LBB21_14: # %bb2 +; CHECK-NEXT: movl $2, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB21_15: # %bb3 +; CHECK-NEXT: movl $3, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB21_2: # %entry +; CHECK-NEXT: cmpl $10, %edi +; CHECK-NEXT: jne .LBB21_17 +; CHECK-NEXT: # %bb.3: # %bb1 +; CHECK-NEXT: movl $1, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB21_17: # %return +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB21_16: # %bb5 +; CHECK-NEXT: movl $5, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; +; NOOPT-LABEL: right_leaning_weight_balanced_tree: +; NOOPT: # %bb.0: # %entry +; NOOPT-NEXT: pushq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 16 +; NOOPT-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; NOOPT-NEXT: testl %edi, %edi +; NOOPT-NEXT: je .LBB21_1 +; NOOPT-NEXT: jmp .LBB21_9 +; NOOPT-NEXT: .LBB21_9: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $10, %eax +; NOOPT-NEXT: je .LBB21_2 +; NOOPT-NEXT: jmp .LBB21_10 +; NOOPT-NEXT: .LBB21_10: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $20, %eax +; NOOPT-NEXT: je .LBB21_3 +; NOOPT-NEXT: jmp .LBB21_11 +; NOOPT-NEXT: .LBB21_11: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $30, %eax +; NOOPT-NEXT: je .LBB21_4 +; NOOPT-NEXT: jmp .LBB21_12 +; NOOPT-NEXT: .LBB21_12: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $40, %eax +; NOOPT-NEXT: je .LBB21_5 +; NOOPT-NEXT: jmp .LBB21_13 +; NOOPT-NEXT: .LBB21_13: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $50, %eax +; NOOPT-NEXT: je .LBB21_6 +; NOOPT-NEXT: jmp .LBB21_14 +; NOOPT-NEXT: .LBB21_14: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $60, %eax +; NOOPT-NEXT: je .LBB21_7 +; NOOPT-NEXT: jmp .LBB21_15 +; NOOPT-NEXT: .LBB21_15: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $70, %eax +; NOOPT-NEXT: je .LBB21_7 +; NOOPT-NEXT: jmp .LBB21_8 +; NOOPT-NEXT: .LBB21_1: # %bb0 +; NOOPT-NEXT: xorl %edi, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB21_8 +; NOOPT-NEXT: .LBB21_2: # %bb1 +; NOOPT-NEXT: movl $1, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB21_8 +; NOOPT-NEXT: .LBB21_3: # %bb2 +; NOOPT-NEXT: movl $2, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB21_8 +; NOOPT-NEXT: .LBB21_4: # %bb3 +; NOOPT-NEXT: movl $3, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB21_8 +; NOOPT-NEXT: .LBB21_5: # %bb4 +; NOOPT-NEXT: movl $4, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB21_8 +; NOOPT-NEXT: .LBB21_6: # %bb5 +; NOOPT-NEXT: movl $5, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB21_8 +; NOOPT-NEXT: .LBB21_7: # %bb6 +; NOOPT-NEXT: movl $6, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: .LBB21_8: # %return +; NOOPT-NEXT: popq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 8 +; NOOPT-NEXT: retq entry: switch i32 %x, label %return [ i32 0, label %bb0 @@ -695,18 +2320,106 @@ bb6: tail call void @g(i32 6) br label %return bb7: tail call void @g(i32 7) br label %return return: ret void - -; Analogous to left_leaning_weight_balanced_tree. - -; CHECK-LABEL: right_leaning_weight_balanced_tree -; CHECK-NOT: cmpl -; CHECK: cmpl $19 } !6 = !{!"branch_weights", i32 1, i32 1000, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 10} +; If the tree were balanced based on number of clusters, {0-3,100} would go on +; the left and {200,300} on the right. However, the jump table weights as much +; as its components, so 100 is selected as the pivot. define void @jump_table_affects_balance(i32 %x) { +; CHECK-LABEL: jump_table_affects_balance: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: cmpl $99, %edi +; CHECK-NEXT: jg .LBB22_3 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: cmpl $3, %edi +; CHECK-NEXT: ja .LBB22_10 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: jmpq *.LJTI22_0(,%rax,8) +; CHECK-NEXT: .LBB22_9: # %bb3 +; CHECK-NEXT: movl $3, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB22_3: # %entry +; CHECK-NEXT: cmpl $300, %edi # imm = 0x12C +; CHECK-NEXT: je .LBB22_8 +; CHECK-NEXT: # %bb.4: # %entry +; CHECK-NEXT: cmpl $200, %edi +; CHECK-NEXT: je .LBB22_7 +; CHECK-NEXT: # %bb.5: # %entry +; CHECK-NEXT: cmpl $100, %edi +; CHECK-NEXT: jne .LBB22_10 +; CHECK-NEXT: .LBB22_6: # %bb0 +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB22_8: # %bb2 +; CHECK-NEXT: movl $2, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB22_7: # %bb1 +; CHECK-NEXT: movl $1, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB22_10: # %return +; CHECK-NEXT: retq +; +; NOOPT-LABEL: jump_table_affects_balance: +; NOOPT: # %bb.0: # %entry +; NOOPT-NEXT: pushq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 16 +; NOOPT-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; NOOPT-NEXT: testl %edi, %edi +; NOOPT-NEXT: je .LBB22_1 +; NOOPT-NEXT: jmp .LBB22_6 +; NOOPT-NEXT: .LBB22_6: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $1, %eax +; NOOPT-NEXT: je .LBB22_2 +; NOOPT-NEXT: jmp .LBB22_7 +; NOOPT-NEXT: .LBB22_7: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $2, %eax +; NOOPT-NEXT: je .LBB22_3 +; NOOPT-NEXT: jmp .LBB22_8 +; NOOPT-NEXT: .LBB22_8: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $3, %eax +; NOOPT-NEXT: je .LBB22_4 +; NOOPT-NEXT: jmp .LBB22_9 +; NOOPT-NEXT: .LBB22_9: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $100, %eax +; NOOPT-NEXT: je .LBB22_1 +; NOOPT-NEXT: jmp .LBB22_10 +; NOOPT-NEXT: .LBB22_10: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $200, %eax +; NOOPT-NEXT: je .LBB22_2 +; NOOPT-NEXT: jmp .LBB22_11 +; NOOPT-NEXT: .LBB22_11: # %entry +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $300, %eax # imm = 0x12C +; NOOPT-NEXT: je .LBB22_3 +; NOOPT-NEXT: jmp .LBB22_5 +; NOOPT-NEXT: .LBB22_1: # %bb0 +; NOOPT-NEXT: xorl %edi, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB22_5 +; NOOPT-NEXT: .LBB22_2: # %bb1 +; NOOPT-NEXT: movl $1, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB22_5 +; NOOPT-NEXT: .LBB22_3: # %bb2 +; NOOPT-NEXT: movl $2, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB22_5 +; NOOPT-NEXT: .LBB22_4: # %bb3 +; NOOPT-NEXT: movl $3, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: .LBB22_5: # %return +; NOOPT-NEXT: popq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 8 +; NOOPT-NEXT: retq entry: switch i32 %x, label %return [ ; Jump table: @@ -724,17 +2437,58 @@ bb2: tail call void @g(i32 2) br label %return bb3: tail call void @g(i32 3) br label %return return: ret void - -; CHECK-LABEL: jump_table_affects_balance -; If the tree were balanced based on number of clusters, {0-3,100} would go on -; the left and {200,300} on the right. However, the jump table weights as much -; as its components, so 100 is selected as the pivot. -; CHECK-NOT: cmpl -; CHECK: cmpl $99 } +; Don't assert due to truncating the bitwidth (64) to i4 when checking +; that the bit-test range fits in a word. define void @pr23738(i4 %x) { +; CHECK-LABEL: pr23738: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: andb $15, %al +; CHECK-NEXT: cmpb $11, %al +; CHECK-NEXT: ja .LBB23_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: andl $15, %edi +; CHECK-NEXT: movl $2051, %eax # imm = 0x803 +; CHECK-NEXT: btq %rdi, %rax +; CHECK-NEXT: jae .LBB23_2 +; CHECK-NEXT: # %bb.3: # %bb1 +; CHECK-NEXT: movl $1, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB23_2: # %bb0 +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; +; NOOPT-LABEL: pr23738: +; NOOPT: # %bb.0: # %entry +; NOOPT-NEXT: pushq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 16 +; NOOPT-NEXT: movb %dil, %al +; NOOPT-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill +; NOOPT-NEXT: andb $15, %al +; NOOPT-NEXT: subb $11, %al +; NOOPT-NEXT: je .LBB23_2 +; NOOPT-NEXT: jmp .LBB23_4 +; NOOPT-NEXT: .LBB23_4: # %entry +; NOOPT-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload +; NOOPT-NEXT: andb $15, %al +; NOOPT-NEXT: subb $2, %al +; NOOPT-NEXT: jb .LBB23_2 +; NOOPT-NEXT: jmp .LBB23_1 +; NOOPT-NEXT: .LBB23_1: # %bb0 +; NOOPT-NEXT: xorl %edi, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB23_3 +; NOOPT-NEXT: .LBB23_2: # %bb1 +; NOOPT-NEXT: movl $1, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: .LBB23_3: # %return +; NOOPT-NEXT: popq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 8 +; NOOPT-NEXT: retq entry: switch i4 %x, label %bb0 [ i4 0, label %bb1 @@ -744,12 +2498,73 @@ bb0: tail call void @g(i32 0) br label %return bb1: tail call void @g(i32 1) br label %return return: ret void -; Don't assert due to truncating the bitwidth (64) to i4 when checking -; that the bit-test range fits in a word. } +; The switch is lowered with bit tests. Since the case range is contiguous, the +; second bit test is redundant and can be skipped. Check that we don't update +; the phi node with an incoming value from the MBB of the skipped bit test +; (-verify-machine-instrs cathces this). define i32 @pr27135(i32 %i) { +; CHECK-LABEL: pr27135: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: jne .LBB24_5 +; CHECK-NEXT: # %bb.1: # %sw +; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: addl $-96, %edi +; CHECK-NEXT: cmpl $5, %edi +; CHECK-NEXT: jbe .LBB24_2 +; CHECK-NEXT: .LBB24_5: # %end +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB24_2: # %sw +; CHECK-NEXT: movl $19, %eax +; CHECK-NEXT: btl %edi, %eax +; CHECK-NEXT: jae .LBB24_3 +; CHECK-NEXT: # %bb.4: # %sw.bb2 +; CHECK-NEXT: .LBB24_3: # %sw.bb +; +; NOOPT-LABEL: pr27135: +; NOOPT: # %bb.0: # %entry +; NOOPT-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; NOOPT-NEXT: xorl %eax, %eax +; NOOPT-NEXT: # implicit-def: $cl +; NOOPT-NEXT: testb $1, %cl +; NOOPT-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; NOOPT-NEXT: jne .LBB24_1 +; NOOPT-NEXT: jmp .LBB24_4 +; NOOPT-NEXT: .LBB24_1: # %sw +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: movl $1, %ecx +; NOOPT-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; NOOPT-NEXT: addl $-96, %eax +; NOOPT-NEXT: subl $2, %eax +; NOOPT-NEXT: jb .LBB24_3 +; NOOPT-NEXT: jmp .LBB24_5 +; NOOPT-NEXT: .LBB24_5: # %sw +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: addl $-98, %eax +; NOOPT-NEXT: subl $2, %eax +; NOOPT-NEXT: jb .LBB24_2 +; NOOPT-NEXT: jmp .LBB24_6 +; NOOPT-NEXT: .LBB24_6: # %sw +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: subl $100, %eax +; NOOPT-NEXT: je .LBB24_3 +; NOOPT-NEXT: jmp .LBB24_7 +; NOOPT-NEXT: .LBB24_7: # %sw +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload +; NOOPT-NEXT: subl $101, %ecx +; NOOPT-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; NOOPT-NEXT: jne .LBB24_4 +; NOOPT-NEXT: jmp .LBB24_2 +; NOOPT-NEXT: .LBB24_2: # %sw.bb +; NOOPT-NEXT: .LBB24_3: # %sw.bb2 +; NOOPT-NEXT: .LBB24_4: # %end +; NOOPT-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload +; NOOPT-NEXT: retq entry: br i1 undef, label %sw, label %end sw: @@ -768,18 +2583,50 @@ end: %p = phi i32 [ 1, %sw ], [ 0, %entry ] ret i32 %p - -; CHECK-LABEL: pr27135: -; The switch is lowered with bit tests. Since the case range is contiguous, the -; second bit test is redundant and can be skipped. Check that we don't update -; the phi node with an incoming value from the MBB of the skipped bit test -; (-verify-machine-instrs cathces this). -; CHECK: btl -; CHECK-NOT: btl } +; Since the default is unreachable, either cluster will be reached. +; Only one comparison should be emitted. define void @range_with_unreachable_fallthrough(i32 %i) { +; CHECK-LABEL: range_with_unreachable_fallthrough: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addl $-4, %edi +; CHECK-NEXT: cmpl $3, %edi +; CHECK-NEXT: jae .LBB25_1 +; CHECK-NEXT: # %bb.2: # %bb2 +; CHECK-NEXT: movl $1, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB25_1: # %bb1 +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; +; NOOPT-LABEL: range_with_unreachable_fallthrough: +; NOOPT: # %bb.0: # %entry +; NOOPT-NEXT: pushq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 16 +; NOOPT-NEXT: movl %edi, %eax +; NOOPT-NEXT: decl %eax +; NOOPT-NEXT: subl $3, %eax +; NOOPT-NEXT: jb .LBB25_1 +; NOOPT-NEXT: jmp .LBB25_5 +; NOOPT-NEXT: .LBB25_5: # %entry +; NOOPT-NEXT: jmp .LBB25_2 +; NOOPT-NEXT: .LBB25_1: # %bb1 +; NOOPT-NEXT: xorl %edi, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB25_4 +; NOOPT-NEXT: .LBB25_2: # %bb2 +; NOOPT-NEXT: movl $1, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB25_4 +; NOOPT-NEXT: # %bb.3: # %default +; NOOPT-NEXT: .cfi_def_cfa_offset 8 +; NOOPT-NEXT: .LBB25_4: # %return +; NOOPT-NEXT: .cfi_def_cfa_offset 16 +; NOOPT-NEXT: popq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 8 +; NOOPT-NEXT: retq entry: switch i32 %i, label %default [ i32 1, label %bb1 @@ -795,12 +2642,127 @@ return: ret void +} -; CHECK-LABEL: range_with_unreachable_fallthrough: -; Since the default is unreachable, either cluster will be reached. -; Only one comparison should be emitted. -; CHECK: cmpl -; CHECK-NOT: cmpl -; CHECK: jmp g -; CHECK: jmp g + +; A switch over a small type (i8) should be extended to avoid truncation +; instructions. +define void @switch_i8(i32 %a) { +; CHECK-LABEL: switch_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: andb $127, %dil +; CHECK-NEXT: leal -1(%rdi), %eax +; CHECK-NEXT: cmpb $8, %al +; CHECK-NEXT: ja .LBB26_1 +; CHECK-NEXT: # %bb.10: +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: jmpq *.LJTI26_0(,%rax,8) +; CHECK-NEXT: .LBB26_4: # %bb0 +; CHECK-NEXT: xorl %edi, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB26_1: +; CHECK-NEXT: cmpb $13, %dil +; CHECK-NEXT: je .LBB26_8 +; CHECK-NEXT: # %bb.2: +; CHECK-NEXT: cmpb $42, %dil +; CHECK-NEXT: jne .LBB26_9 +; CHECK-NEXT: # %bb.3: # %bb5 +; CHECK-NEXT: movl $5, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB26_9: # %return +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB26_5: # %bb1 +; CHECK-NEXT: movl $1, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB26_6: # %bb2 +; CHECK-NEXT: movl $2, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB26_7: # %bb3 +; CHECK-NEXT: movl $3, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; CHECK-NEXT: .LBB26_8: # %bb4 +; CHECK-NEXT: movl $4, %edi +; CHECK-NEXT: jmp g@PLT # TAILCALL +; +; NOOPT-LABEL: switch_i8: +; NOOPT: # %bb.0: +; NOOPT-NEXT: pushq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 16 +; NOOPT-NEXT: movb %dil, %al +; NOOPT-NEXT: andb $127, %al +; NOOPT-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill +; NOOPT-NEXT: subb $1, %al +; NOOPT-NEXT: je .LBB26_1 +; NOOPT-NEXT: jmp .LBB26_8 +; NOOPT-NEXT: .LBB26_8: +; NOOPT-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload +; NOOPT-NEXT: subb $3, %al +; NOOPT-NEXT: je .LBB26_2 +; NOOPT-NEXT: jmp .LBB26_9 +; NOOPT-NEXT: .LBB26_9: +; NOOPT-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload +; NOOPT-NEXT: subb $7, %al +; NOOPT-NEXT: je .LBB26_3 +; NOOPT-NEXT: jmp .LBB26_10 +; NOOPT-NEXT: .LBB26_10: +; NOOPT-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload +; NOOPT-NEXT: subb $9, %al +; NOOPT-NEXT: je .LBB26_4 +; NOOPT-NEXT: jmp .LBB26_11 +; NOOPT-NEXT: .LBB26_11: +; NOOPT-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload +; NOOPT-NEXT: subb $13, %al +; NOOPT-NEXT: je .LBB26_5 +; NOOPT-NEXT: jmp .LBB26_12 +; NOOPT-NEXT: .LBB26_12: +; NOOPT-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload +; NOOPT-NEXT: subb $42, %al +; NOOPT-NEXT: je .LBB26_6 +; NOOPT-NEXT: jmp .LBB26_7 +; NOOPT-NEXT: .LBB26_1: # %bb0 +; NOOPT-NEXT: xorl %edi, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB26_7 +; NOOPT-NEXT: .LBB26_2: # %bb1 +; NOOPT-NEXT: movl $1, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB26_7 +; NOOPT-NEXT: .LBB26_3: # %bb2 +; NOOPT-NEXT: movl $2, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB26_7 +; NOOPT-NEXT: .LBB26_4: # %bb3 +; NOOPT-NEXT: movl $3, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB26_7 +; NOOPT-NEXT: .LBB26_5: # %bb4 +; NOOPT-NEXT: movl $4, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: jmp .LBB26_7 +; NOOPT-NEXT: .LBB26_6: # %bb5 +; NOOPT-NEXT: movl $5, %edi +; NOOPT-NEXT: callq g@PLT +; NOOPT-NEXT: .LBB26_7: # %return +; NOOPT-NEXT: popq %rax +; NOOPT-NEXT: .cfi_def_cfa_offset 8 +; NOOPT-NEXT: retq + %and = and i32 %a, 127 + %trunc = trunc i32 %and to i8 + switch i8 %trunc, label %return [ + i8 1, label %bb0 + i8 3, label %bb1 + i8 7, label %bb2 + i8 9, label %bb3 + i8 13, label %bb4 + i8 42, label %bb5 + ] +bb0: tail call void @g(i32 0) br label %return +bb1: tail call void @g(i32 1) br label %return +bb2: tail call void @g(i32 2) br label %return +bb3: tail call void @g(i32 3) br label %return +bb4: tail call void @g(i32 4) br label %return +bb5: tail call void @g(i32 5) br label %return +return: + ret void }