Index: lib/Target/ARM/ARMCallingConv.td =================================================================== --- lib/Target/ARM/ARMCallingConv.td +++ lib/Target/ARM/ARMCallingConv.td @@ -30,8 +30,8 @@ CCIfSwiftError>>, // Handle all vector types as either f64 or v2f64. - CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, - CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>, + CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType>, + CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType>, // f64 and v2f64 are passed in adjacent GPRs, possibly split onto the stack CCIfType<[f64, v2f64], CCCustom<"CC_ARM_APCS_Custom_f64">>, @@ -56,8 +56,8 @@ CCIfSwiftError>>, // Handle all vector types as either f64 or v2f64. - CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, - CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>, + CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType>, + CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType>, CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_APCS_Custom_f64">>, @@ -71,8 +71,8 @@ let Entry = 1 in def FastCC_ARM_APCS : CallingConv<[ // Handle all vector types as either f64 or v2f64. - CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, - CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>, + CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType>, + CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType>, CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>, CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, @@ -91,8 +91,8 @@ let Entry = 1 in def RetFastCC_ARM_APCS : CallingConv<[ // Handle all vector types as either f64 or v2f64. - CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, - CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>, + CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType>, + CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType>, CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>, CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, @@ -108,8 +108,8 @@ let Entry = 1 in def CC_ARM_APCS_GHC : CallingConv<[ // Handle all vector types as either f64 or v2f64. - CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, - CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>, + CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType>, + CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType>, CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>, CCIfType<[f64], CCAssignToReg<[D8, D9, D10, D11]>>, @@ -165,8 +165,8 @@ CCIfNest>, // Handle all vector types as either f64 or v2f64. - CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType>, - CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType>, + CCIfType<[v1i64, v2i32, v4i16, v4f16, v4f16, v8i8, v2f32], CCBitConvertToType>, + CCIfType<[v2i64, v4i32, v8i16, v8f16, v8f16, v16i8, v4f32], CCBitConvertToType>, // Pass SwiftSelf in a callee saved register. CCIfSwiftSelf>>, @@ -182,8 +182,8 @@ let Entry = 1 in def RetCC_ARM_AAPCS : CallingConv<[ // Handle all vector types as either f64 or v2f64. - CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType>, - CCIfType<[v2i64, v4i32, v8i16, v8f16,v16i8, v4f32], CCBitConvertToType>, + CCIfType<[v1i64, v2i32, v4i16, v4f16, v4f16, v8i8, v2f32], CCBitConvertToType>, + CCIfType<[v2i64, v4i32, v8i16, v8f16, v8f16,v16i8, v4f32], CCBitConvertToType>, // Pass SwiftSelf in a callee saved register. CCIfSwiftSelf>>, @@ -208,8 +208,8 @@ CCIfByVal>, // Handle all vector types as either f64 or v2f64. - CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType>, - CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType>, + CCIfType<[v1i64, v2i32, v4i16, v4f16, v4f16, v8i8, v2f32], CCBitConvertToType>, + CCIfType<[v2i64, v4i32, v8i16, v8f16, v8f16, v16i8, v4f32], CCBitConvertToType>, // Pass SwiftSelf in a callee saved register. CCIfSwiftSelf>>, @@ -230,8 +230,8 @@ let Entry = 1 in def RetCC_ARM_AAPCS_VFP : CallingConv<[ // Handle all vector types as either f64 or v2f64. - CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType>, - CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType>, + CCIfType<[v1i64, v2i32, v4i16, v4f16, v4f16, v8i8, v2f32], CCBitConvertToType>, + CCIfType<[v2i64, v4i32, v8i16, v8f16, v8f16, v16i8, v4f32], CCBitConvertToType>, // Pass SwiftSelf in a callee saved register. CCIfSwiftSelf>>, Index: test/CodeGen/ARM/fp16-vector-argument.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/fp16-vector-argument.ll @@ -0,0 +1,490 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=armv8a -mattr=+armv8.2-a,+fullfp16,+neon -target-abi=apcs-gnu -float-abi=soft -o - %s | FileCheck %s --check-prefix=SOFT +; RUN: llc -mtriple=armv8a -mattr=+armv8.2-a,+fullfp16,+neon -target-abi=aapcs-gnu -float-abi=soft -o - %s | FileCheck %s --check-prefix=SOFTA +; RUN: llc -mtriple=armv8a -mattr=+armv8.2-a,+fullfp16,+neon -target-abi=apcs-gnu -float-abi=hard -o - %s | FileCheck %s --check-prefix=HARD +; RUN: llc -mtriple=armv8a -mattr=+armv8.2-a,+fullfp16,+neon -target-abi=aapcs-gnu -float-abi=hard -o - %s | FileCheck %s --check-prefix=HARDA +; RUNTOADD: llc -mtriple=armeb-eabi -mattr=+armv8.2-a,+fullfp16,+neon -target-abi=apcs-gnu -float-abi=soft -o - %s | FileCheck %s --check-prefix=SOFTEB +; RUNTOADD: llc -mtriple=armeb-eabi -mattr=+armv8.2-a,+fullfp16,+neon -target-abi=aapcs-gnu -float-abi=soft -o - %s | FileCheck %s --check-prefix=SOFTAEB +; RUNTOADD: llc -mtriple=armeb-eabi -mattr=+armv8.2-a,+fullfp16,+neon -target-abi=apcs-gnu -float-abi=hard -o - %s | FileCheck %s --check-prefix=HARDEB +; RUNTOADD: llc -mtriple=armeb-eabi -mattr=+armv8.2-a,+fullfp16,+neon -target-abi=aapcs-gnu -float-abi=hard -o - %s | FileCheck %s --check-prefix=HARDAEB + +declare <4 x half> @llvm.fabs.v4f16(<4 x half>) +declare <8 x half> @llvm.fabs.v8f16(<8 x half>) +declare void @use(double, float, <4 x half>, i16, <8 x half>) +define <4 x half> @test_vabs_f16(<4 x half> %a) { +; SOFT-LABEL: test_vabs_f16: +; SOFT: @ %bb.0: @ %entry +; SOFT-NEXT: vmov d16, r0, r1 +; SOFT-NEXT: vabs.f16 d16, d16 +; SOFT-NEXT: vmov r0, r1, d16 +; SOFT-NEXT: bx lr +; +; SOFTA-LABEL: test_vabs_f16: +; SOFTA: @ %bb.0: @ %entry +; SOFTA-NEXT: vmov d16, r0, r1 +; SOFTA-NEXT: vabs.f16 d16, d16 +; SOFTA-NEXT: vmov r0, r1, d16 +; SOFTA-NEXT: bx lr +; +; HARD-LABEL: test_vabs_f16: +; HARD: @ %bb.0: @ %entry +; HARD-NEXT: vmov d16, r0, r1 +; HARD-NEXT: vabs.f16 d16, d16 +; HARD-NEXT: vmov r0, r1, d16 +; HARD-NEXT: bx lr +; +; HARDA-LABEL: test_vabs_f16: +; HARDA: @ %bb.0: @ %entry +; HARDA-NEXT: vabs.f16 d0, d0 +; HARDA-NEXT: bx lr +; +; SOFTEB-LABEL: test_vabs_f16: +; SOFTEB: @ %bb.0: @ %entry +; SOFTEB-NEXT: vmov d16, r1, r0 +; SOFTEB-NEXT: vrev64.16 d16, d16 +; SOFTEB-NEXT: vabs.f16 d16, d16 +; SOFTEB-NEXT: vrev64.16 d16, d16 +; SOFTEB-NEXT: vmov r1, r0, d16 +; SOFTEB-NEXT: bx lr +; +; SOFTAEB-LABEL: test_vabs_f16: +; SOFTAEB: @ %bb.0: @ %entry +; SOFTAEB-NEXT: vmov d16, r1, r0 +; SOFTAEB-NEXT: vrev64.16 d16, d16 +; SOFTAEB-NEXT: vabs.f16 d16, d16 +; SOFTAEB-NEXT: vrev64.16 d16, d16 +; SOFTAEB-NEXT: vmov r1, r0, d16 +; SOFTAEB-NEXT: bx lr +; +; HARDEB-LABEL: test_vabs_f16: +; HARDEB: @ %bb.0: @ %entry +; HARDEB-NEXT: vmov d16, r1, r0 +; HARDEB-NEXT: vrev64.16 d16, d16 +; HARDEB-NEXT: vabs.f16 d16, d16 +; HARDEB-NEXT: vrev64.16 d16, d16 +; HARDEB-NEXT: vmov r1, r0, d16 +; HARDEB-NEXT: bx lr +; +; HARDAEB-LABEL: test_vabs_f16: +; HARDAEB: @ %bb.0: @ %entry +; HARDAEB-NEXT: vrev64.16 d16, d0 +; HARDAEB-NEXT: vabs.f16 d16, d16 +; HARDAEB-NEXT: vrev64.16 d0, d16 +; HARDAEB-NEXT: bx lr +entry: + %vabs1.i = tail call <4 x half> @llvm.fabs.v4f16(<4 x half> %a) + ret <4 x half> %vabs1.i +} + + +define <8 x half> @test2_vabs_f16(<8 x half> %a) { +; SOFT-LABEL: test2_vabs_f16: +; SOFT: @ %bb.0: @ %entry +; SOFT-NEXT: vmov d17, r2, r3 +; SOFT-NEXT: vmov d16, r0, r1 +; SOFT-NEXT: vabs.f16 q8, q8 +; SOFT-NEXT: vmov r0, r1, d16 +; SOFT-NEXT: vmov r2, r3, d17 +; SOFT-NEXT: bx lr +; +; SOFTA-LABEL: test2_vabs_f16: +; SOFTA: @ %bb.0: @ %entry +; SOFTA-NEXT: vmov d17, r2, r3 +; SOFTA-NEXT: vmov d16, r0, r1 +; SOFTA-NEXT: vabs.f16 q8, q8 +; SOFTA-NEXT: vmov r0, r1, d16 +; SOFTA-NEXT: vmov r2, r3, d17 +; SOFTA-NEXT: bx lr +; +; HARD-LABEL: test2_vabs_f16: +; HARD: @ %bb.0: @ %entry +; HARD-NEXT: vmov d17, r2, r3 +; HARD-NEXT: vmov d16, r0, r1 +; HARD-NEXT: vabs.f16 q8, q8 +; HARD-NEXT: vmov r0, r1, d16 +; HARD-NEXT: vmov r2, r3, d17 +; HARD-NEXT: bx lr +; +; HARDA-LABEL: test2_vabs_f16: +; HARDA: @ %bb.0: @ %entry +; HARDA-NEXT: vabs.f16 q0, q0 +; HARDA-NEXT: bx lr +; +; SOFTEB-LABEL: test2_vabs_f16: +; SOFTEB: @ %bb.0: @ %entry +; SOFTEB-NEXT: vmov d17, r3, r2 +; SOFTEB-NEXT: vmov d16, r1, r0 +; SOFTEB-NEXT: vrev64.16 q8, q8 +; SOFTEB-NEXT: vabs.f16 q8, q8 +; SOFTEB-NEXT: vrev64.16 q8, q8 +; SOFTEB-NEXT: vmov r1, r0, d16 +; SOFTEB-NEXT: vmov r3, r2, d17 +; SOFTEB-NEXT: bx lr +; +; SOFTAEB-LABEL: test2_vabs_f16: +; SOFTAEB: @ %bb.0: @ %entry +; SOFTAEB-NEXT: vmov d17, r3, r2 +; SOFTAEB-NEXT: vmov d16, r1, r0 +; SOFTAEB-NEXT: vrev64.16 q8, q8 +; SOFTAEB-NEXT: vabs.f16 q8, q8 +; SOFTAEB-NEXT: vrev64.16 q8, q8 +; SOFTAEB-NEXT: vmov r1, r0, d16 +; SOFTAEB-NEXT: vmov r3, r2, d17 +; SOFTAEB-NEXT: bx lr +; +; HARDEB-LABEL: test2_vabs_f16: +; HARDEB: @ %bb.0: @ %entry +; HARDEB-NEXT: vmov d17, r3, r2 +; HARDEB-NEXT: vmov d16, r1, r0 +; HARDEB-NEXT: vrev64.16 q8, q8 +; HARDEB-NEXT: vabs.f16 q8, q8 +; HARDEB-NEXT: vrev64.16 q8, q8 +; HARDEB-NEXT: vmov r1, r0, d16 +; HARDEB-NEXT: vmov r3, r2, d17 +; HARDEB-NEXT: bx lr +; +; HARDAEB-LABEL: test2_vabs_f16: +; HARDAEB: @ %bb.0: @ %entry +; HARDAEB-NEXT: vrev64.16 q8, q0 +; HARDAEB-NEXT: vabs.f16 q8, q8 +; HARDAEB-NEXT: vrev64.16 q0, q8 +; HARDAEB-NEXT: bx lr +entry: + %vabs1.i = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %a) + ret <8 x half> %vabs1.i +} + +define void @test(double, float, i16, <4 x half>, <8 x half>) { +; SOFT-LABEL: test: +; SOFT: @ %bb.0: @ %entry +; SOFT-NEXT: push {r4, r5, lr} +; SOFT-NEXT: sub sp, sp, #24 +; SOFT-NEXT: mov r5, r3 +; SOFT-NEXT: add r3, sp, #44 +; SOFT-NEXT: vld1.32 {d16, d17}, [r3] +; SOFT-NEXT: add r3, sp, #8 +; SOFT-NEXT: vabs.f16 q8, q8 +; SOFT-NEXT: vst1.32 {d16, d17}, [r3] +; SOFT-NEXT: vldr d16, [sp, #36] +; SOFT-NEXT: vabs.f16 d16, d16 +; SOFT-NEXT: vmov r3, r4, d16 +; SOFT-NEXT: strd r4, r5, [sp] +; SOFT-NEXT: bl use +; SOFT-NEXT: add sp, sp, #24 +; SOFT-NEXT: pop {r4, r5, pc} +; +; SOFTA-LABEL: test: +; SOFTA: @ %bb.0: @ %entry +; SOFTA-NEXT: push {r11, lr} +; SOFTA-NEXT: sub sp, sp, #32 +; SOFTA-NEXT: vldr d16, [sp, #40] +; SOFTA-NEXT: mov r12, #16 +; SOFTA-NEXT: vabs.f16 d16, d16 +; SOFTA-NEXT: mov lr, sp +; SOFTA-NEXT: vst1.16 {d16}, [lr:64], r12 +; SOFTA-NEXT: add r12, sp, #48 +; SOFTA-NEXT: vld1.64 {d16, d17}, [r12] +; SOFTA-NEXT: vabs.f16 q8, q8 +; SOFTA-NEXT: str r3, [sp, #8] +; SOFTA-NEXT: vst1.64 {d16, d17}, [lr] +; SOFTA-NEXT: bl use +; SOFTA-NEXT: add sp, sp, #32 +; SOFTA-NEXT: pop {r11, pc} +; +; HARD-LABEL: test: +; HARD: @ %bb.0: @ %entry +; HARD-NEXT: push {r4, r5, lr} +; HARD-NEXT: sub sp, sp, #24 +; HARD-NEXT: mov r5, r3 +; HARD-NEXT: add r3, sp, #44 +; HARD-NEXT: vld1.32 {d16, d17}, [r3] +; HARD-NEXT: add r3, sp, #8 +; HARD-NEXT: vabs.f16 q8, q8 +; HARD-NEXT: vst1.32 {d16, d17}, [r3] +; HARD-NEXT: vldr d16, [sp, #36] +; HARD-NEXT: vabs.f16 d16, d16 +; HARD-NEXT: vmov r3, r4, d16 +; HARD-NEXT: strd r4, r5, [sp] +; HARD-NEXT: bl use +; HARD-NEXT: add sp, sp, #24 +; HARD-NEXT: pop {r4, r5, pc} +; +; HARDA-LABEL: test: +; HARDA: @ %bb.0: @ %entry +; HARDA-NEXT: vabs.f16 q2, q2 +; HARDA-NEXT: vabs.f16 d2, d2 +; HARDA-NEXT: b use +; +; SOFTEB-LABEL: test: +; SOFTEB: @ %bb.0: @ %entry +; SOFTEB-NEXT: .save {r4, r5, r7, lr} +; SOFTEB-NEXT: push {r4, r5, r7, lr} +; SOFTEB-NEXT: .pad #24 +; SOFTEB-NEXT: sub sp, sp, #24 +; SOFTEB-NEXT: vldr d18, [sp, #40] +; SOFTEB-NEXT: mov r7, r3 +; SOFTEB-NEXT: add r3, sp, #48 +; SOFTEB-NEXT: add r5, sp, #8 +; SOFTEB-NEXT: vrev64.16 d18, d18 +; SOFTEB-NEXT: vabs.f16 d18, d18 +; SOFTEB-NEXT: vldmia r3, {d16, d17} +; SOFTEB-NEXT: vrev64.16 q8, q8 +; SOFTEB-NEXT: vrev64.16 d18, d18 +; SOFTEB-NEXT: vabs.f16 q8, q8 +; SOFTEB-NEXT: vmov r4, r3, d18 +; SOFTEB-NEXT: vrev64.16 q8, q8 +; SOFTEB-NEXT: vstmia r5, {d16, d17} +; SOFTEB-NEXT: stm sp, {r4, r7} +; SOFTEB-NEXT: bl use +; SOFTEB-NEXT: add sp, sp, #24 +; SOFTEB-NEXT: pop {r4, r5, r7, pc} +; +; SOFTAEB-LABEL: test: +; SOFTAEB: @ %bb.0: @ %entry +; SOFTAEB-NEXT: .save {r11, lr} +; SOFTAEB-NEXT: push {r11, lr} +; SOFTAEB-NEXT: .pad #32 +; SOFTAEB-NEXT: sub sp, sp, #32 +; SOFTAEB-NEXT: vldr d16, [sp, #40] +; SOFTAEB-NEXT: mov r12, #16 +; SOFTAEB-NEXT: mov lr, sp +; SOFTAEB-NEXT: str r3, [sp, #8] +; SOFTAEB-NEXT: vrev64.16 d16, d16 +; SOFTAEB-NEXT: vabs.f16 d16, d16 +; SOFTAEB-NEXT: vst1.16 {d16}, [lr:64], r12 +; SOFTAEB-NEXT: add r12, sp, #48 +; SOFTAEB-NEXT: vld1.64 {d16, d17}, [r12] +; SOFTAEB-NEXT: vrev64.16 q8, q8 +; SOFTAEB-NEXT: vabs.f16 q8, q8 +; SOFTAEB-NEXT: vrev64.16 q8, q8 +; SOFTAEB-NEXT: vst1.64 {d16, d17}, [lr] +; SOFTAEB-NEXT: bl use +; SOFTAEB-NEXT: add sp, sp, #32 +; SOFTAEB-NEXT: pop {r11, pc} +; +; HARDEB-LABEL: test: +; HARDEB: @ %bb.0: @ %entry +; HARDEB-NEXT: .save {r4, r5, r7, lr} +; HARDEB-NEXT: push {r4, r5, r7, lr} +; HARDEB-NEXT: .pad #24 +; HARDEB-NEXT: sub sp, sp, #24 +; HARDEB-NEXT: vldr d18, [sp, #40] +; HARDEB-NEXT: mov r7, r3 +; HARDEB-NEXT: add r3, sp, #48 +; HARDEB-NEXT: add r5, sp, #8 +; HARDEB-NEXT: vrev64.16 d18, d18 +; HARDEB-NEXT: vabs.f16 d18, d18 +; HARDEB-NEXT: vldmia r3, {d16, d17} +; HARDEB-NEXT: vrev64.16 q8, q8 +; HARDEB-NEXT: vrev64.16 d18, d18 +; HARDEB-NEXT: vabs.f16 q8, q8 +; HARDEB-NEXT: vmov r4, r3, d18 +; HARDEB-NEXT: vrev64.16 q8, q8 +; HARDEB-NEXT: vstmia r5, {d16, d17} +; HARDEB-NEXT: stm sp, {r4, r7} +; HARDEB-NEXT: bl use +; HARDEB-NEXT: add sp, sp, #24 +; HARDEB-NEXT: pop {r4, r5, r7, pc} +; +; HARDAEB-LABEL: test: +; HARDAEB: @ %bb.0: @ %entry +; HARDAEB-NEXT: vrev64.16 d16, d2 +; HARDAEB-NEXT: vabs.f16 d16, d16 +; HARDAEB-NEXT: vrev64.16 d2, d16 +; HARDAEB-NEXT: vrev64.16 q8, q2 +; HARDAEB-NEXT: vabs.f16 q8, q8 +; HARDAEB-NEXT: vrev64.16 q2, q8 +; HARDAEB-NEXT: b use +entry: + %5 = tail call <4 x half> @llvm.fabs.v4f16(<4 x half> %3) + %6 = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %4) + tail call void @use(double %0, float %1, <4 x half> %5, i16 %2, <8 x half> %6) + ret void +} + +define void @many_args_test(double, float, i16, <4 x half>, <8 x half>, <8 x half>, <8 x half>) { +; SOFT-LABEL: many_args_test: +; SOFT: @ %bb.0: @ %entry +; SOFT-NEXT: push {r4, r5, lr} +; SOFT-NEXT: sub sp, sp, #24 +; SOFT-NEXT: mov r5, r3 +; SOFT-NEXT: add r3, sp, #76 +; SOFT-NEXT: vld1.32 {d16, d17}, [r3] +; SOFT-NEXT: add r3, sp, #44 +; SOFT-NEXT: vabs.f16 q8, q8 +; SOFT-NEXT: vld1.32 {d18, d19}, [r3] +; SOFT-NEXT: add r3, sp, #60 +; SOFT-NEXT: ldr r4, [sp, #40] +; SOFT-NEXT: vadd.f16 q8, q8, q9 +; SOFT-NEXT: vld1.32 {d18, d19}, [r3] +; SOFT-NEXT: add r3, sp, #8 +; SOFT-NEXT: vmul.f16 q8, q9, q8 +; SOFT-NEXT: vst1.32 {d16, d17}, [r3] +; SOFT-NEXT: ldr r3, [sp, #36] +; SOFT-NEXT: strd r4, r5, [sp] +; SOFT-NEXT: bl use +; SOFT-NEXT: add sp, sp, #24 +; SOFT-NEXT: pop {r4, r5, pc} +; +; SOFTA-LABEL: many_args_test: +; SOFTA: @ %bb.0: @ %entry +; SOFTA-NEXT: push {r11, lr} +; SOFTA-NEXT: sub sp, sp, #32 +; SOFTA-NEXT: add r12, sp, #80 +; SOFTA-NEXT: mov lr, sp +; SOFTA-NEXT: vld1.64 {d16, d17}, [r12] +; SOFTA-NEXT: add r12, sp, #48 +; SOFTA-NEXT: vabs.f16 q8, q8 +; SOFTA-NEXT: vld1.64 {d18, d19}, [r12] +; SOFTA-NEXT: add r12, sp, #64 +; SOFTA-NEXT: str r3, [sp, #8] +; SOFTA-NEXT: vadd.f16 q8, q8, q9 +; SOFTA-NEXT: vld1.64 {d18, d19}, [r12] +; SOFTA-NEXT: mov r12, #16 +; SOFTA-NEXT: vmul.f16 q8, q9, q8 +; SOFTA-NEXT: vldr d18, [sp, #40] +; SOFTA-NEXT: vst1.16 {d18}, [lr:64], r12 +; SOFTA-NEXT: vst1.64 {d16, d17}, [lr] +; SOFTA-NEXT: bl use +; SOFTA-NEXT: add sp, sp, #32 +; SOFTA-NEXT: pop {r11, pc} +; +; HARD-LABEL: many_args_test: +; HARD: @ %bb.0: @ %entry +; HARD-NEXT: push {r4, r5, lr} +; HARD-NEXT: sub sp, sp, #24 +; HARD-NEXT: mov r5, r3 +; HARD-NEXT: add r3, sp, #76 +; HARD-NEXT: vld1.32 {d16, d17}, [r3] +; HARD-NEXT: add r3, sp, #44 +; HARD-NEXT: vabs.f16 q8, q8 +; HARD-NEXT: vld1.32 {d18, d19}, [r3] +; HARD-NEXT: add r3, sp, #60 +; HARD-NEXT: ldr r4, [sp, #40] +; HARD-NEXT: vadd.f16 q8, q8, q9 +; HARD-NEXT: vld1.32 {d18, d19}, [r3] +; HARD-NEXT: add r3, sp, #8 +; HARD-NEXT: vmul.f16 q8, q9, q8 +; HARD-NEXT: vst1.32 {d16, d17}, [r3] +; HARD-NEXT: ldr r3, [sp, #36] +; HARD-NEXT: strd r4, r5, [sp] +; HARD-NEXT: bl use +; HARD-NEXT: add sp, sp, #24 +; HARD-NEXT: pop {r4, r5, pc} +; +; HARDA-LABEL: many_args_test: +; HARDA: @ %bb.0: @ %entry +; HARDA-NEXT: mov r1, sp +; HARDA-NEXT: vld1.64 {d16, d17}, [r1] +; HARDA-NEXT: vabs.f16 q8, q8 +; HARDA-NEXT: vadd.f16 q8, q8, q2 +; HARDA-NEXT: vmul.f16 q2, q3, q8 +; HARDA-NEXT: b use +; +; SOFTEB-LABEL: many_args_test: +; SOFTEB: @ %bb.0: @ %entry +; SOFTEB-NEXT: .save {r4, r5, lr} +; SOFTEB-NEXT: push {r4, r5, lr} +; SOFTEB-NEXT: .pad #24 +; SOFTEB-NEXT: sub sp, sp, #24 +; SOFTEB-NEXT: mov r5, r3 +; SOFTEB-NEXT: add r3, sp, #76 +; SOFTEB-NEXT: ldr r4, [sp, #40] +; SOFTEB-NEXT: vldmia r3, {d16, d17} +; SOFTEB-NEXT: add r3, sp, #44 +; SOFTEB-NEXT: vldmia r3, {d18, d19} +; SOFTEB-NEXT: add r3, sp, #60 +; SOFTEB-NEXT: vrev64.16 q8, q8 +; SOFTEB-NEXT: strd r4, r5, [sp] +; SOFTEB-NEXT: add r5, sp, #8 +; SOFTEB-NEXT: vabs.f16 q8, q8 +; SOFTEB-NEXT: vrev64.16 q9, q9 +; SOFTEB-NEXT: vadd.f16 q8, q8, q9 +; SOFTEB-NEXT: vldmia r3, {d18, d19} +; SOFTEB-NEXT: ldr r3, [sp, #36] +; SOFTEB-NEXT: vrev64.16 q9, q9 +; SOFTEB-NEXT: vmul.f16 q8, q9, q8 +; SOFTEB-NEXT: vrev64.16 q8, q8 +; SOFTEB-NEXT: vstmia r5, {d16, d17} +; SOFTEB-NEXT: bl use +; SOFTEB-NEXT: add sp, sp, #24 +; SOFTEB-NEXT: pop {r4, r5, pc} +; +; SOFTAEB-LABEL: many_args_test: +; SOFTAEB: @ %bb.0: @ %entry +; SOFTAEB-NEXT: .save {r11, lr} +; SOFTAEB-NEXT: push {r11, lr} +; SOFTAEB-NEXT: .pad #32 +; SOFTAEB-NEXT: sub sp, sp, #32 +; SOFTAEB-NEXT: vldr d16, [sp, #40] +; SOFTAEB-NEXT: mov r12, #16 +; SOFTAEB-NEXT: mov lr, sp +; SOFTAEB-NEXT: str r3, [sp, #8] +; SOFTAEB-NEXT: vrev64.16 d16, d16 +; SOFTAEB-NEXT: vst1.16 {d16}, [lr:64], r12 +; SOFTAEB-NEXT: add r12, sp, #80 +; SOFTAEB-NEXT: vld1.64 {d16, d17}, [r12] +; SOFTAEB-NEXT: add r12, sp, #48 +; SOFTAEB-NEXT: vrev64.16 q8, q8 +; SOFTAEB-NEXT: vabs.f16 q8, q8 +; SOFTAEB-NEXT: vld1.64 {d18, d19}, [r12] +; SOFTAEB-NEXT: add r12, sp, #64 +; SOFTAEB-NEXT: vrev64.16 q9, q9 +; SOFTAEB-NEXT: vadd.f16 q8, q8, q9 +; SOFTAEB-NEXT: vld1.64 {d18, d19}, [r12] +; SOFTAEB-NEXT: vrev64.16 q9, q9 +; SOFTAEB-NEXT: vmul.f16 q8, q9, q8 +; SOFTAEB-NEXT: vrev64.16 q8, q8 +; SOFTAEB-NEXT: vst1.64 {d16, d17}, [lr] +; SOFTAEB-NEXT: bl use +; SOFTAEB-NEXT: add sp, sp, #32 +; SOFTAEB-NEXT: pop {r11, pc} +; +; HARDEB-LABEL: many_args_test: +; HARDEB: @ %bb.0: @ %entry +; HARDEB-NEXT: .save {r4, r5, lr} +; HARDEB-NEXT: push {r4, r5, lr} +; HARDEB-NEXT: .pad #24 +; HARDEB-NEXT: sub sp, sp, #24 +; HARDEB-NEXT: mov r5, r3 +; HARDEB-NEXT: add r3, sp, #76 +; HARDEB-NEXT: ldr r4, [sp, #40] +; HARDEB-NEXT: vldmia r3, {d16, d17} +; HARDEB-NEXT: add r3, sp, #44 +; HARDEB-NEXT: vldmia r3, {d18, d19} +; HARDEB-NEXT: add r3, sp, #60 +; HARDEB-NEXT: vrev64.16 q8, q8 +; HARDEB-NEXT: strd r4, r5, [sp] +; HARDEB-NEXT: add r5, sp, #8 +; HARDEB-NEXT: vabs.f16 q8, q8 +; HARDEB-NEXT: vrev64.16 q9, q9 +; HARDEB-NEXT: vadd.f16 q8, q8, q9 +; HARDEB-NEXT: vldmia r3, {d18, d19} +; HARDEB-NEXT: ldr r3, [sp, #36] +; HARDEB-NEXT: vrev64.16 q9, q9 +; HARDEB-NEXT: vmul.f16 q8, q9, q8 +; HARDEB-NEXT: vrev64.16 q8, q8 +; HARDEB-NEXT: vstmia r5, {d16, d17} +; HARDEB-NEXT: bl use +; HARDEB-NEXT: add sp, sp, #24 +; HARDEB-NEXT: pop {r4, r5, pc} +; +; HARDAEB-LABEL: many_args_test: +; HARDAEB: @ %bb.0: @ %entry +; HARDAEB-NEXT: mov r1, sp +; HARDAEB-NEXT: vld1.64 {d16, d17}, [r1] +; HARDAEB-NEXT: vrev64.16 q8, q8 +; HARDAEB-NEXT: vabs.f16 q8, q8 +; HARDAEB-NEXT: vrev64.16 q9, q2 +; HARDAEB-NEXT: vadd.f16 q8, q8, q9 +; HARDAEB-NEXT: vrev64.16 q9, q3 +; HARDAEB-NEXT: vmul.f16 q8, q9, q8 +; HARDAEB-NEXT: vrev64.16 q2, q8 +; HARDAEB-NEXT: b use +entry: + %7 = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %6) + %8 = fadd <8 x half> %7, %4 + %9 = fmul <8 x half> %5, %8 + tail call void @use(double %0, float %1, <4 x half> %3, i16 %2, <8 x half> %9) + ret void +}