Clang doesn't correctly generate loads in the presence of the -mno-unaligned-access flag. At -O0 it produces:
$ clang --target=aarch64-arm-none-eabi -mno-unaligned-access -O0 -S test.c -o-
foo: stp x29, x30, [sp, #-16]! mov x29, sp sub sp, sp, #32 // =32 adrp x8, a1 add x8, x8, :lo12:a1 movz x2, #0x5 sub x9, x29, #8 // =8 add x10, sp, #16 // =16 str x0, [sp, #16] mov x0, x9 mov x1, x10 str x8, [sp, #8] // 8-byte Folded Spill bl memcpy ldur w11, [x29, #-7] ldr x8, [sp, #8] // 8-byte Folded Reload ldur w12, [x8, #1] add w11, w12, w11 stur w11, [x8, #1] mov w0, w11 mov sp, x29 ldp x29, x30, [sp], #16 ret
At -O1 and above, it produces correct code, using ldrb's to access to non-aligned data:
clang --target=aarch64-arm-none-eabi -mno-unaligned-access -O1 -S test.c -o-
foo:
adrp x8, a1
add x8, x8, :lo12:a1
ldrb w9, [x8, #1]!
ldrb w10, [x8, #3]
ldrb w11, [x8, #2]
ldrb w12, [x8, #1]
bfi w11, w10, #8, #8
lsr x10, x0, #8
bfi w9, w12, #8, #8
bfi w9, w11, #16, #16
add w0, w9, w10
strb w0, [x8]
lsr w9, w0, #24
lsr w10, w0, #16
lsr w11, w0, #8
strb w9, [x8, #3]
strb w10, [x8, #2]
strb w11, [x8, #1]
retThe root cause seems to be in fast-isel not producing unaligned access correctly for -mno-unaligned-access:
clang --target=aarch64-arm-none-eabi -mno-unaligned-access -O1 -mllvm -fast-isel -S test.c -o-
foo:
adrp x8, a1
add x8, x8, :lo12:a1
ldur w9, [x8, #1]
lsr x10, x0, #8
add w0, w9, w10
stur w0, [x8, #1]
ret