Index: llvm/include/llvm/IR/IntrinsicsAArch64.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsAArch64.td +++ llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -779,7 +779,6 @@ : Intrinsic<[LLVMHalfElementsVectorType<0>], [llvm_anyvector_ty], [IntrNoMem]>; - // This class of intrinsics are not intended to be useful within LLVM IR but // are instead here to support some of the more regid parts of the ACLE. class Builtin_SVCVT @@ -811,6 +810,12 @@ def int_aarch64_sve_fcvtzs_i32f16 : Builtin_SVCVT<"svcvt_s32_f16_m", llvm_nxv4i32_ty, llvm_nxv8f16_ty>; +// +// Floating-point arithmetic +// +def int_aarch64_sve_recps: AdvSIMD_2VectorArg_Intrinsic; +def int_aarch64_sve_rsqrts: AdvSIMD_2VectorArg_Intrinsic; + // // Predicate operations // Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -139,11 +139,11 @@ defm FDIV_ZPmZ : sve_fp_2op_p_zds<0b1101, "fdiv">; defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd", fadd>; - defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub", null_frag>; - defm FMUL_ZZZ : sve_fp_3op_u_zd<0b010, "fmul", null_frag>; + defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub", fsub>; + defm FMUL_ZZZ : sve_fp_3op_u_zd<0b010, "fmul", fmul>; defm FTSMUL_ZZZ : sve_fp_3op_u_zd<0b011, "ftsmul", null_frag>; - defm FRECPS_ZZZ : sve_fp_3op_u_zd<0b110, "frecps", null_frag>; - defm FRSQRTS_ZZZ : sve_fp_3op_u_zd<0b111, "frsqrts", null_frag>; + defm FRECPS_ZZZ : sve_fp_3op_u_zd<0b110, "frecps", int_aarch64_sve_recps>; + defm FRSQRTS_ZZZ : sve_fp_3op_u_zd<0b111, "frsqrts", int_aarch64_sve_rsqrts>; defm FTSSEL_ZZZ : sve_int_bin_cons_misc_0_b<"ftssel">; Index: llvm/test/CodeGen/AArch64/sve-fp.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fp.ll +++ llvm/test/CodeGen/AArch64/sve-fp.ll @@ -23,3 +23,107 @@ %res = fadd %a, %b ret %res } + +define @fsub_h( %a, %b) { +; CHECK-LABEL: fsub_h: +; CHECK: fsub z0.h, z0.h, z1.h +; CHECK-NEXT: ret + %res = fsub %a, %b + ret %res +} + +define @fsub_s( %a, %b) { +; CHECK-LABEL: fsub_s: +; CHECK: fsub z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %res = fsub %a, %b + ret %res +} + +define @fsub_d( %a, %b) { +; CHECK-LABEL: fsub_d: +; CHECK: fsub z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %res = fsub %a, %b + ret %res +} + +define @fmul_h( %a, %b) { +; CHECK-LABEL: fmul_h: +; CHECK: fmul z0.h, z0.h, z1.h +; CHECK-NEXT: ret + %res = fmul %a, %b + ret %res +} + +define @fmul_s( %a, %b) { +; CHECK-LABEL: fmul_s: +; CHECK: fmul z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %res = fmul %a, %b + ret %res +} + +define @fmul_d( %a, %b) { +; CHECK-LABEL: fmul_d: +; CHECK: fmul z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %res = fmul %a, %b + ret %res +} + +define @frecps_h( %a, %b) { +; CHECK-LABEL: frecps_h: +; CHECK: frecps z0.h, z0.h, z1.h +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.recps.nxv8f16( %a, %b) + ret %res +} + +define @frecps_s( %a, %b) { +; CHECK-LABEL: frecps_s: +; CHECK: frecps z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.recps.nxv4f32( %a, %b) + ret %res +} + +define @frecps_d( %a, %b) { +; CHECK-LABEL: frecps_d: +; CHECK: frecps z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.recps.nxv2f64( %a, %b) + ret %res +} + +define @frsqrts_h( %a, %b) { +; CHECK-LABEL: frsqrts_h: +; CHECK: frsqrts z0.h, z0.h, z1.h +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.rsqrts.nxv8f16( %a, %b) + ret %res +} + +define @frsqrts_s( %a, %b) { +; CHECK-LABEL: frsqrts_s: +; CHECK: frsqrts z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.rsqrts.nxv4f32( %a, %b) + ret %res +} + +define @frsqrts_d( %a, %b) { +; CHECK-LABEL: frsqrts_d: +; CHECK: frsqrts z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.rsqrts.nxv2f64( %a, %b) + ret %res +} + +declare @llvm.aarch64.sve.recps.nxv8f16(, ) +declare @llvm.aarch64.sve.recps.nxv4f32( , ) +declare @llvm.aarch64.sve.recps.nxv2f64(, ) + +declare @llvm.aarch64.sve.rsqrts.nxv8f16(, ) +declare @llvm.aarch64.sve.rsqrts.nxv4f32(, ) +declare @llvm.aarch64.sve.rsqrts.nxv2f64(, )