For the below case, pre-inc prep think it's a good candidate to use pre-inc for the bucket, but 64bit integer load/store update (pre-inc) instruction on Power requires the displacement field should be DS-form (4's multiple). Since it can't satisfy the constraint, we have to do some fix ups later. As below, the original load/stores could be well-form, it makes things worse.
unsigned long long result = 0;
unsigned long long foo(char *p, unsigned long long n) {
for (unsigned long long i = 0; i < n; i++) {
unsigned long long x1 = *(unsigned long long *)(p - 50000 + i);
unsigned long long x2 = *(unsigned long long *)(p - 61024 + i);
unsigned long long x3 = *(unsigned long long *)(p - 62048 + i);
unsigned long long x4 = *(unsigned long long *)(p - 64096 + i);
result *= x1 * x2 * x3 * x4;
}
return result;
}The current instruction sequence:
.LBB0_2:
addi 4, 5, -11023
addi 7, 5, 1
addi 8, 5, -12047
addi 5, 5, -14095
ld 9, 0(7)
ld 4, 0(4)
ld 8, 0(8)
ld 5, 0(5)
mulld 4, 4, 9
mulld 4, 4, 8
mulld 4, 4, 5
mr 5, 7
mulld 3, 4, 3
bdnz .LBB0_2With the proposed fix:
.LBB0_2:
ld 4, 14096(5)
ld 7, 3072(5)
ld 8, 2048(5)
ld 9, 0(5)
addi 5, 5, 1
mulld 4, 7, 4
mulld 4, 4, 8
mulld 4, 4, 9
mulld 3, 4, 3
bdnz .LBB0_2For i64 loads/stores, the proposed fix is to check whether the displacement fits in a 16-bit signed field but isn't a multiple of 4, if so we don't make it as candidate any more.