This is not meant as an actual code review to get this patch submitted but to have a basis for further discussion.
It is meant to experiment around a solution for http://llvm.org/PR22230
With this patch, LLVM, generates pretty nice code for the example from the bug report (see below). Obviously it is far from complete or correct.
.section TEXT,text,regular,pure_instructions
.macosx_version_min 10, 10
.globl Z1fPhP1A
.align 4, 0x90
Z1fPhP1A: ## @_Z1fPhP1A
.cfi_startproc
BB#0: ## %entry
pushq %rbp
Ltmp0:
.cfi_def_cfa_offset 16
Ltmp1:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp2:
.cfi_def_cfa_register %rbp
pushq %r15
pushq %r14
pushq %rbx
pushq %rax
Ltmp3:
.cfi_offset %rbx, -40
Ltmp4:
.cfi_offset %r14, -32
Ltmp5:
.cfi_offset %r15, -24
movq %rsi, %r14
movq %rdi, %rbx
incq %rbx
leaq LJTI0_0(%rip), %r15
jmp LBB0_1
.align 4, 0x90
LBB0_6: ## %for.cond.backedge
- in Loop: Header=BB0_1 Depth=1
incq %rbx
LBB0_1: ## %for.cond
- =>This Inner Loop Header: Depth=1
movzbl (%rbx), %eax
cmpq $3, %rax
ja LBB0_6
- BB#2: ## %for.cond
- in Loop: Header=BB0_1 Depth=1
movslq (%r15,%rax,4), %rax
addq %r15, %rax
jmpq *%rax
LBB0_3: ## %if.then
- in Loop: Header=BB0_1 Depth=1
movq %r14, %rdi
jmp LBB0_5
LBB0_4: ## %if.then4
- in Loop: Header=BB0_1 Depth=1
leaq 4(%r14), %rdi
jmp LBB0_5
LBB0_7: ## %if.then8
- in Loop: Header=BB0_1 Depth=1
leaq 8(%r14), %rdi
jmp LBB0_5
LBB0_8: ## %if.then12
- in Loop: Header=BB0_1 Depth=1
leaq 12(%r14), %rdi
LBB0_5: ## %for.cond.backedge
- in Loop: Header=BB0_1 Depth=1
callq __Z6assignPj
jmp LBB0_6
.cfi_endproc
.align 2, 0x90
L0_0_set_3 = LBB0_3-LJTI0_0
L0_0_set_4 = LBB0_4-LJTI0_0
L0_0_set_7 = LBB0_7-LJTI0_0
L0_0_set_8 = LBB0_8-LJTI0_0
LJTI0_0:
.long L0_0_set_3
.long L0_0_set_4
.long L0_0_set_7
.long L0_0_set_8
.subsections_via_symbols
This test is used to avoid rematerializing expensive operation.