Improves loop code generation. All targets are affected but most benefits are obtained for X86. Creates shorter code in a number of cases by allowing the Strength Reduce algorithm to consider both the direct and swapped forms of zero compare instructions, which enhances the opportunities to obtain an overall better LSR solution. Given equal LSR solution cost, the patch also honours the direction of the loop induction variable specified in the user source code, which in practice also tends to result in a better solution.
The patch broke a number of regression tests due to inherent test fragility, not because of intended test failures. I fixed the CodeGen tests for the ARM and X86 architectures.
An example of code improved by this patch:
int func(void); void func2(void); void LSRTest(int count) { count += func(); for ( ; count != 20; ++count ) { func2(); } }
Before:
.section __TEXT,__text,regular,pure_instructions .macosx_version_min 10, 12 .globl _LSRTest _LSRTest: .cfi_startproc pushl %ebp .cfi_def_cfa_offset 8 .cfi_offset %ebp, -8 movl %esp, %ebp .cfi_def_cfa_register %ebp pushl %esi pushl %eax .cfi_offset %esi, -12 calll _func addl 8(%ebp), %eax pushl $20 popl %esi subl %eax, %esi jmp LBB0_1 LBB0_2: calll _func2 decl %esi LBB0_1: testl %esi, %esi jne LBB0_2 addl $4, %esp popl %esi popl %ebp retl .cfi_endproc
After:
.section __TEXT,__text,regular,pure_instructions .macosx_version_min 10, 12 .globl _LSRTest _LSRTest: .cfi_startproc pushl %ebp .cfi_def_cfa_offset 8 .cfi_offset %ebp, -8 movl %esp, %ebp .cfi_def_cfa_register %ebp pushl %esi pushl %eax .cfi_offset %esi, -12 movl 8(%ebp), %esi calll _func leal -20(%eax,%esi), %esi jmp LBB0_1 LBB0_2: calll _func2 incl %esi LBB0_1: testl %esi, %esi jne LBB0_2 addl $4, %esp popl %esi popl %ebp retl .cfi_endproc