Index: lib/Target/ARM/ARMCallingConv.td =================================================================== --- lib/Target/ARM/ARMCallingConv.td +++ lib/Target/ARM/ARMCallingConv.td @@ -30,8 +30,8 @@ CCIfSwiftError>>, // Handle all vector types as either f64 or v2f64. - CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, - CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>, + CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType>, + CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType>, // f64 and v2f64 are passed in adjacent GPRs, possibly split onto the stack CCIfType<[f64, v2f64], CCCustom<"CC_ARM_APCS_Custom_f64">>, @@ -56,8 +56,8 @@ CCIfSwiftError>>, // Handle all vector types as either f64 or v2f64. - CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, - CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>, + CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType>, + CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType>, CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_APCS_Custom_f64">>, @@ -71,8 +71,8 @@ let Entry = 1 in def FastCC_ARM_APCS : CallingConv<[ // Handle all vector types as either f64 or v2f64. - CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, - CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>, + CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType>, + CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType>, CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>, CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, @@ -91,8 +91,8 @@ let Entry = 1 in def RetFastCC_ARM_APCS : CallingConv<[ // Handle all vector types as either f64 or v2f64. - CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, - CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>, + CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType>, + CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType>, CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>, CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, @@ -108,8 +108,8 @@ let Entry = 1 in def CC_ARM_APCS_GHC : CallingConv<[ // Handle all vector types as either f64 or v2f64. - CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, - CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType>, + CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType>, + CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType>, CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>, CCIfType<[f64], CCAssignToReg<[D8, D9, D10, D11]>>, @@ -165,8 +165,8 @@ CCIfNest>, // Handle all vector types as either f64 or v2f64. - CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType>, - CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType>, + CCIfType<[v1i64, v2i32, v4i16, v4f16, v4f16, v8i8, v2f32], CCBitConvertToType>, + CCIfType<[v2i64, v4i32, v8i16, v8f16, v8f16, v16i8, v4f32], CCBitConvertToType>, // Pass SwiftSelf in a callee saved register. CCIfSwiftSelf>>, @@ -182,8 +182,8 @@ let Entry = 1 in def RetCC_ARM_AAPCS : CallingConv<[ // Handle all vector types as either f64 or v2f64. - CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType>, - CCIfType<[v2i64, v4i32, v8i16, v8f16,v16i8, v4f32], CCBitConvertToType>, + CCIfType<[v1i64, v2i32, v4i16, v4f16, v4f16, v8i8, v2f32], CCBitConvertToType>, + CCIfType<[v2i64, v4i32, v8i16, v8f16, v8f16,v16i8, v4f32], CCBitConvertToType>, // Pass SwiftSelf in a callee saved register. CCIfSwiftSelf>>, @@ -208,8 +208,8 @@ CCIfByVal>, // Handle all vector types as either f64 or v2f64. - CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType>, - CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType>, + CCIfType<[v1i64, v2i32, v4i16, v4f16, v4f16, v8i8, v2f32], CCBitConvertToType>, + CCIfType<[v2i64, v4i32, v8i16, v8f16, v8f16, v16i8, v4f32], CCBitConvertToType>, // Pass SwiftSelf in a callee saved register. CCIfSwiftSelf>>, @@ -230,8 +230,8 @@ let Entry = 1 in def RetCC_ARM_AAPCS_VFP : CallingConv<[ // Handle all vector types as either f64 or v2f64. - CCIfType<[v1i64, v2i32, v4i16, v4f16, v8i8, v2f32], CCBitConvertToType>, - CCIfType<[v2i64, v4i32, v8i16, v8f16, v16i8, v4f32], CCBitConvertToType>, + CCIfType<[v1i64, v2i32, v4i16, v4f16, v4f16, v8i8, v2f32], CCBitConvertToType>, + CCIfType<[v2i64, v4i32, v8i16, v8f16, v8f16, v16i8, v4f32], CCBitConvertToType>, // Pass SwiftSelf in a callee saved register. CCIfSwiftSelf>>, Index: test/CodeGen/ARM/fp16-vector-argument.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/fp16-vector-argument.ll @@ -0,0 +1,56 @@ +; RUN: llc -mtriple=armv8.2a -mattr=+armv8.2-a,+fullfp16,+neon -target-abi=apcs-gnu %s | FileCheck %s --ckeck-prefix=EL +; RUN: llc -mtriple=armeb -mattr=+armv8.2-a,+fullfp16,+neon -target-abi=apcs-gnu %s | FileCheck %s --check-prefix=EB +declare <4 x half> @llvm.fabs.v4f16(<4 x half>) +declare <8 x half> @llvm.fabs.v8f16(<8 x half>) + +define <4 x half> @test_vabs_f16(<4 x half> %a) { +; EL-LABEL: test_vabs_f16: +; EL: .fnstart +; EL-NEXT: @ %bb.0: +; EL-NEXT: vmov d16, r0, r1 +; EL-NEXT: vabs.f16 d16, d16 +; EL-NEXT: vmov r0, r1, d16 +; EL-NEXT: bx lr + +; EB-LABEL: test_vabs_f16: +; EB: .fnstart +; EB-NEXT: @ %bb.0: +; EB-NEXT: vmov d16, r1, r0 +; EB-NEXT: vrev64.16 d16, d16 +; EB-NEXT: vabs.f16 d16, d16 +; EB-NEXT: vrev64.16 d16, d16 +; EB-NEXT: vmov r1, r0, d16 +; EB-NEXT: bx lr + +entry: + %vabs1.i = tail call <4 x half> @llvm.fabs.v4f16(<4 x half> %a) + ret <4 x half> %vabs1.i +} + +define <8 x half> @test2_vabs_f16(<8 x half> %a) { +; EL-LABEL: test2_vabs_f16: +; EL: .fnstart +; EL-NEXT: @ %bb.0: +; EL-NEXT: vmov d17, r2, r3 +; EL-NEXT: vmov d16, r0, r1 +; EL-NEXT: vabs.f16 q8, q8 +; EL-NEXT: vmov r0, r1, d16 +; EL-NEXT: vmov r2, r3, d17 +; EL-NEXT: bx lr + +; EB-NEXT: test2_vabs_f16: +; EB-NEXT: .fnstart +; EB-NEXT: @ %bb.0: +; EB-NEXT: vmov d17, r3, r2 +; EB-NEXT: vmov d16, r1, r0 +; EB-NEXT: vrev64.16 q8, q8 +; EB-NEXT: vabs.f16 q8, q8 +; EB-NEXT: vrev64.16 q8, q8 +; EB-NEXT: vmov r1, r0, d16 +; EB-NEXT: vmov r3, r2, d17 +; EB-NEXT: bx lr +entry: + %vabs1.i = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %a) + ret <8 x half> %vabs1.i +} +