diff --git a/llvm/test/CodeGen/ARM/fp16-return-pr60510.ll b/llvm/test/CodeGen/ARM/fp16-return-pr60510.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/fp16-return-pr60510.ll @@ -0,0 +1,144 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py + +; No FP16/BF16 +; RUN: llc -mtriple=arm-none-eabi -float-abi=soft -mattr=+armv8.2-a,+fp-armv8,-fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=NO-FP16-SOFT +; R UN: llc -mtriple=thumb-none-eabi -float-abi=soft -mattr=+armv8.2-a,+fp-armv8,-fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=NO-FP16-SOFT +; RUN: llc -mtriple=arm-none-eabi -mattr=+armv8.2-a,+fp-armv8,-fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=NO-FP16-SOFT +; R UN: llc -mtriple=thumb-none-eabi -mattr=+armv8.2-a,+fp-armv8,-fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=NO-FP16-SOFT +; RUN: llc -mtriple=arm-none-eabihf -mattr=+armv8.2-a,+fp-armv8,-fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=NO-FP16-HARD +; R UN: llc -mtriple=thumb-none-eabihf -mattr=+armv8.2-a,+fp-armv8,-fullfp16,-bf16,-neon %s -o - | FileCheck %s --check-prefixes=NO-FP16-HARD + +; FP16/BF16 +; RUN: llc -mtriple=arm-none-eabi -float-abi=soft -mattr=+armv8.2-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-SOFT +; R UN: llc -mtriple=thumb-none-eabi -float-abi=soft -mattr=+armv8.2-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-SOFT +; RUN: llc -mtriple=arm-none-eabi -mattr=+armv8.2-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-SOFT +; R UN: llc -mtriple=thumb-none-eabi -mattr=+armv8.2-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-SOFT +; RUN: llc -mtriple=arm-none-eabihf -mattr=+armv8.2-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-HARD +; R UN: llc -mtriple=thumb-none-eabihf -mattr=+armv8.2-a,+fp-armv8,+fullfp16,+bf16,-neon %s -o - | FileCheck %s --check-prefixes=FP16-HARD + + +; PR60510 showed a bug where the return from `*_inner` was getting "lost" by an +; optimisation, and a garbage value was being left in `s0`. + +declare dso_local float @other(float) nounwind + +declare dso_local bfloat @fp16_inner() nounwind + +define half @fp16_out_call(float %arg) nounwind { +; NO-FP16-SOFT-LABEL: fp16_out_call: +; NO-FP16-SOFT: @ %bb.0: +; NO-FP16-SOFT-NEXT: .save {r4, r5, r11, lr} +; NO-FP16-SOFT-NEXT: push {r4, r5, r11, lr} +; NO-FP16-SOFT-NEXT: mov r4, r0 +; NO-FP16-SOFT-NEXT: bl fp16_inner +; NO-FP16-SOFT-NEXT: mov r5, r0 +; NO-FP16-SOFT-NEXT: mov r0, r4 +; NO-FP16-SOFT-NEXT: bl other +; NO-FP16-SOFT-NEXT: mov r0, r5 +; NO-FP16-SOFT-NEXT: pop {r4, r5, r11, pc} +; +; NO-FP16-HARD-LABEL: fp16_out_call: +; NO-FP16-HARD: @ %bb.0: +; NO-FP16-HARD-NEXT: .save {r11, lr} +; NO-FP16-HARD-NEXT: push {r11, lr} +; NO-FP16-HARD-NEXT: .vsave {d8, d9} +; NO-FP16-HARD-NEXT: vpush {d8, d9} +; NO-FP16-HARD-NEXT: vmov.f32 s16, s0 +; NO-FP16-HARD-NEXT: bl fp16_inner +; NO-FP16-HARD-NEXT: vmov.f32 s18, s0 +; NO-FP16-HARD-NEXT: vmov.f32 s0, s16 +; NO-FP16-HARD-NEXT: bl other +; NO-FP16-HARD-NEXT: vmov.f32 s0, s18 +; NO-FP16-HARD-NEXT: vpop {d8, d9} +; NO-FP16-HARD-NEXT: pop {r11, pc} +; +; FP16-SOFT-LABEL: fp16_out_call: +; FP16-SOFT: @ %bb.0: +; FP16-SOFT-NEXT: .save {r4, r5, r11, lr} +; FP16-SOFT-NEXT: push {r4, r5, r11, lr} +; FP16-SOFT-NEXT: mov r4, r0 +; FP16-SOFT-NEXT: bl fp16_inner +; FP16-SOFT-NEXT: mov r5, r0 +; FP16-SOFT-NEXT: mov r0, r4 +; FP16-SOFT-NEXT: bl other +; FP16-SOFT-NEXT: vmov.f16 s0, r5 +; FP16-SOFT-NEXT: vmov r0, s0 +; FP16-SOFT-NEXT: pop {r4, r5, r11, pc} +; +; FP16-HARD-LABEL: fp16_out_call: +; FP16-HARD: @ %bb.0: +; FP16-HARD-NEXT: .save {r11, lr} +; FP16-HARD-NEXT: push {r11, lr} +; FP16-HARD-NEXT: .vsave {d8} +; FP16-HARD-NEXT: vpush {d8} +; FP16-HARD-NEXT: vmov.f32 s16, s0 +; FP16-HARD-NEXT: bl fp16_inner +; FP16-HARD-NEXT: vmov.f32 s0, s16 +; FP16-HARD-NEXT: bl other +; FP16-HARD-NEXT: vpop {d8} +; FP16-HARD-NEXT: pop {r11, pc} + %call = call half @fp16_inner() + %call1 = call float @other(float %arg) + ret half %call +} + +declare dso_local bfloat @bf_inner() nounwind + +define bfloat @bf_out_call(float %arg) nounwind { +; NO-FP16-SOFT-LABEL: bf_out_call: +; NO-FP16-SOFT: @ %bb.0: +; NO-FP16-SOFT-NEXT: .save {r4, r5, r11, lr} +; NO-FP16-SOFT-NEXT: push {r4, r5, r11, lr} +; NO-FP16-SOFT-NEXT: mov r4, r0 +; NO-FP16-SOFT-NEXT: bl bf_inner +; NO-FP16-SOFT-NEXT: mov r5, r0 +; NO-FP16-SOFT-NEXT: mov r0, r4 +; NO-FP16-SOFT-NEXT: bl other +; NO-FP16-SOFT-NEXT: mov r0, r5 +; NO-FP16-SOFT-NEXT: pop {r4, r5, r11, pc} +; +; NO-FP16-HARD-LABEL: bf_out_call: +; NO-FP16-HARD: @ %bb.0: +; NO-FP16-HARD-NEXT: .save {r11, lr} +; NO-FP16-HARD-NEXT: push {r11, lr} +; NO-FP16-HARD-NEXT: .vsave {d8, d9} +; NO-FP16-HARD-NEXT: vpush {d8, d9} +; NO-FP16-HARD-NEXT: vmov.f32 s16, s0 +; NO-FP16-HARD-NEXT: bl bf_inner +; NO-FP16-HARD-NEXT: vmov.f32 s18, s0 +; NO-FP16-HARD-NEXT: vmov.f32 s0, s16 +; NO-FP16-HARD-NEXT: bl other +; NO-FP16-HARD-NEXT: vmov.f32 s0, s18 +; NO-FP16-HARD-NEXT: vpop {d8, d9} +; NO-FP16-HARD-NEXT: pop {r11, pc} +; +; FP16-SOFT-LABEL: bf_out_call: +; FP16-SOFT: @ %bb.0: +; FP16-SOFT-NEXT: .save {r4, r5, r11, lr} +; FP16-SOFT-NEXT: push {r4, r5, r11, lr} +; FP16-SOFT-NEXT: mov r4, r0 +; FP16-SOFT-NEXT: bl bf_inner +; FP16-SOFT-NEXT: mov r5, r0 +; FP16-SOFT-NEXT: mov r0, r4 +; FP16-SOFT-NEXT: bl other +; FP16-SOFT-NEXT: mov r0, r5 +; FP16-SOFT-NEXT: pop {r4, r5, r11, pc} +; +; FP16-HARD-LABEL: bf_out_call: +; FP16-HARD: @ %bb.0: +; FP16-HARD-NEXT: .save {r11, lr} +; FP16-HARD-NEXT: push {r11, lr} +; FP16-HARD-NEXT: .vsave {d8, d9} +; FP16-HARD-NEXT: vpush {d8, d9} +; FP16-HARD-NEXT: vmov.f32 s16, s0 +; FP16-HARD-NEXT: bl bf_inner +; FP16-HARD-NEXT: vmov.f32 s18, s0 +; FP16-HARD-NEXT: vmov.f32 s0, s16 +; FP16-HARD-NEXT: bl other +; FP16-HARD-NEXT: vmov.f32 s0, s18 +; FP16-HARD-NEXT: vpop {d8, d9} +; FP16-HARD-NEXT: pop {r11, pc} + %call = call bfloat @bf_inner() + %call1 = call float @other(float %arg) + ret bfloat %call +}