Index: llvm/lib/Target/AArch64/AArch64InstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -1157,6 +1157,18 @@ return Imm.isExactlyValue(+0.0); }]>; +def fpimm_half : FPImmLeaf; + +def fpimm_one : FPImmLeaf; + +def fpimm_two : FPImmLeaf; + // Vector lane operands class AsmVectorIndex : AsmOperandClass { let Name = NamePrefix # "IndexRange" # Min # "_" # Max; Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -253,14 +253,109 @@ defm FRECPE_ZZ : sve_fp_2op_u_zd<0b110, "frecpe", int_aarch64_sve_frecpe_x>; defm FRSQRTE_ZZ : sve_fp_2op_u_zd<0b111, "frsqrte", int_aarch64_sve_frsqrte_x>; - defm FADD_ZPmI : sve_fp_2op_i_p_zds<0b000, "fadd", sve_fpimm_half_one>; - defm FSUB_ZPmI : sve_fp_2op_i_p_zds<0b001, "fsub", sve_fpimm_half_one>; - defm FMUL_ZPmI : sve_fp_2op_i_p_zds<0b010, "fmul", sve_fpimm_half_two>; - defm FSUBR_ZPmI : sve_fp_2op_i_p_zds<0b011, "fsubr", sve_fpimm_half_one>; - defm FMAXNM_ZPmI : sve_fp_2op_i_p_zds<0b100, "fmaxnm", sve_fpimm_zero_one>; - defm FMINNM_ZPmI : sve_fp_2op_i_p_zds<0b101, "fminnm", sve_fpimm_zero_one>; - defm FMAX_ZPmI : sve_fp_2op_i_p_zds<0b110, "fmax", sve_fpimm_zero_one>; - defm FMIN_ZPmI : sve_fp_2op_i_p_zds<0b111, "fmin", sve_fpimm_zero_one>; + defm FADD_ZPmI : sve_fp_2op_i_p_zds<0b000, "fadd", "FADD_ZPZI", sve_fpimm_half_one>; + defm FSUB_ZPmI : sve_fp_2op_i_p_zds<0b001, "fsub", "FSUB_ZPZI", sve_fpimm_half_one>; + defm FMUL_ZPmI : sve_fp_2op_i_p_zds<0b010, "fmul", "FMUL_ZPZI", sve_fpimm_half_two>; + defm FSUBR_ZPmI : sve_fp_2op_i_p_zds<0b011, "fsubr", "FSUBR_ZPZI", sve_fpimm_half_one>; + defm FMAXNM_ZPmI : sve_fp_2op_i_p_zds<0b100, "fmaxnm", "FMAXNM_ZPZI", sve_fpimm_zero_one>; + defm FMINNM_ZPmI : sve_fp_2op_i_p_zds<0b101, "fminnm", "FMINNM_ZPZI", sve_fpimm_zero_one>; + defm FMAX_ZPmI : sve_fp_2op_i_p_zds<0b110, "fmax", "FMAX_ZPZI", sve_fpimm_zero_one>; + defm FMIN_ZPmI : sve_fp_2op_i_p_zds<0b111, "fmin", "FMIN_ZPZI", sve_fpimm_zero_one>; + + defm FADD_ZPZI : sve_fp_2op_i_p_zds_zx; + defm FSUB_ZPZI : sve_fp_2op_i_p_zds_zx; + defm FMUL_ZPZI : sve_fp_2op_i_p_zds_zx; + defm FSUBR_ZPZI : sve_fp_2op_i_p_zds_zx; + defm FMAXNM_ZPZI : sve_fp_2op_i_p_zds_zx; + defm FMINNM_ZPZI : sve_fp_2op_i_p_zds_zx; + defm FMAX_ZPZI : sve_fp_2op_i_p_zds_zx; + defm FMIN_ZPZI : sve_fp_2op_i_p_zds_zx; + + /// Compact single bit fp immediates + multiclass intrinsic_compact_fp_immediates { + def : Pat<(nxv8f16 (op (nxv8i1 PPR_3b:$Pg), + (nxv8f16 ZPR:$Zs1), + (nxv8f16 (AArch64dup (f16 A))))), + (!cast(I # "_H") PPR_3b:$Pg, ZPR:$Zs1, 0)>; + def : Pat<(nxv8f16 (op (nxv8i1 PPR_3b:$Pg), + (nxv8f16 ZPR:$Zs1), + (nxv8f16 (AArch64dup (f16 B))))), + (!cast(I # "_H") PPR_3b:$Pg, ZPR:$Zs1, 1)>; + def : Pat<(nxv4f32 (op (nxv4i1 PPR_3b:$Pg), + (nxv4f32 ZPR:$Zs1), + (nxv4f32 (AArch64dup (f32 A))))), + (!cast(I # "_S") PPR_3b:$Pg, ZPR:$Zs1, 0)>; + def : Pat<(nxv4f32 (op (nxv4i1 PPR_3b:$Pg), + (nxv4f32 ZPR:$Zs1), + (nxv4f32 (AArch64dup (f32 B))))), + (!cast(I # "_S") PPR_3b:$Pg, ZPR:$Zs1, 1)>; + def : Pat<(nxv2f64 (op (nxv2i1 PPR_3b:$Pg), + (nxv2f64 ZPR:$Zs1), + (nxv2f64 (AArch64dup (f64 A))))), + (!cast(I # "_D") PPR_3b:$Pg, ZPR:$Zs1, 0)>; + def : Pat<(nxv2f64 (op (nxv2i1 PPR_3b:$Pg), + (nxv2f64 ZPR:$Zs1), + (nxv2f64 (AArch64dup (f64 B))))), + (!cast(I # "_D") PPR_3b:$Pg, ZPR:$Zs1, 1)>; + + def : Pat<(nxv8f16 (ir_op (nxv8f16 ZPR:$Zs1), + (nxv8f16 (AArch64dup (f16 A))))), + (!cast(IX # "_H") (PTRUE_H 31), ZPR:$Zs1, 0)>; + def : Pat<(nxv8f16 (ir_op (nxv8f16 ZPR:$Zs1), + (nxv8f16 (AArch64dup (f16 B))))), + (!cast(IX # "_H") (PTRUE_H 31), ZPR:$Zs1, 1)>; + def : Pat<(nxv4f32 (ir_op (nxv4f32 ZPR:$Zs1), + (nxv4f32 (AArch64dup (f32 A))))), + (!cast(IX # "_S") (PTRUE_S 31), ZPR:$Zs1, 0)>; + def : Pat<(nxv4f32 (ir_op (nxv4f32 ZPR:$Zs1), + (nxv4f32 (AArch64dup (f32 B))))), + (!cast(IX # "_S") (PTRUE_S 31), ZPR:$Zs1, 1)>; + def : Pat<(nxv2f64 (ir_op (nxv2f64 ZPR:$Zs1), + (nxv2f64 (AArch64dup (f64 A))))), + (!cast(IX # "_D") (PTRUE_D 31), ZPR:$Zs1, 0)>; + def : Pat<(nxv2f64 (ir_op (nxv2f64 ZPR:$Zs1), + (nxv2f64 (AArch64dup (f64 B))))), + (!cast(IX # "_D") (PTRUE_D 31), ZPR:$Zs1, 1)>; + + let AddedComplexity = 2 in { + // When Intrinsic combined with SELECT + def : Pat<(nxv8f16 (op nxv8i1:$Pg, + (vselect nxv8i1:$Pg, nxv8f16:$Zs1, (SVEDup0)), + (nxv8f16 (AArch64dup (f16 A))))), + (!cast(IZ # "_H") $Pg, $Zs1, 0)>; + def : Pat<(nxv8f16 (op nxv8i1:$Pg, + (vselect nxv8i1:$Pg, nxv8f16:$Zs1, (SVEDup0)), + (nxv8f16 (AArch64dup (f16 B))))), + (!cast(IZ # "_H") $Pg, $Zs1, 1)>; + def : Pat<(nxv4f32 (op nxv4i1:$Pg, + (vselect nxv4i1:$Pg, nxv4f32:$Zs1, (SVEDup0)), + (nxv4f32 (AArch64dup (f32 A))))), + (!cast(IZ # "_S") $Pg, $Zs1, 0)>; + def : Pat<(nxv4f32 (op nxv4i1:$Pg, + (vselect nxv4i1:$Pg, nxv4f32:$Zs1, (SVEDup0)), + (nxv4f32 (AArch64dup (f32 B))))), + (!cast(IZ # "_S") $Pg, $Zs1, 1)>; + def : Pat<(nxv2f64 (op nxv2i1:$Pg, + (vselect nxv2i1:$Pg, nxv2f64:$Zs1, (SVEDup0)), + (nxv2f64 (AArch64dup (f64 A))))), + (!cast(IZ # "_D") $Pg, $Zs1, 0)>; + def : Pat<(nxv2f64 (op nxv2i1:$Pg, + (vselect nxv2i1:$Pg, nxv2f64:$Zs1, (SVEDup0)), + (nxv2f64 (AArch64dup (f64 B))))), + (!cast(IZ # "_D") $Pg, $Zs1, 1)>; + } + } + + defm : intrinsic_compact_fp_immediates<"FADD_ZPmI", "FADD_ZPZI_ZERO", "FADD_ZPZI_UNDEF", fpimm_half, fpimm_one, int_aarch64_sve_fadd, fadd>; + defm : intrinsic_compact_fp_immediates<"FSUB_ZPmI", "FSUB_ZPZI_ZERO", "FSUB_ZPZI_UNDEF", fpimm_half, fpimm_one, int_aarch64_sve_fsub, fsub>; + defm : intrinsic_compact_fp_immediates<"FSUBR_ZPmI", "FSUBR_ZPZI_ZERO", "FSUBR_ZPZI_UNDEF", fpimm_half, fpimm_one, int_aarch64_sve_fsubr>; + defm : intrinsic_compact_fp_immediates<"FMUL_ZPmI", "FMUL_ZPZI_ZERO", "FMUL_ZPZI_UNDEF", fpimm_half, fpimm_two, int_aarch64_sve_fmul, fmul>; + defm : intrinsic_compact_fp_immediates<"FMAX_ZPmI", "FMAX_ZPZI_ZERO", "FMAX_ZPZI_UNDEF", fpimm0, fpimm_one, int_aarch64_sve_fmax>; + defm : intrinsic_compact_fp_immediates<"FMIN_ZPmI", "FMIN_ZPZI_ZERO", "FMIN_ZPZI_UNDEF", fpimm0, fpimm_one, int_aarch64_sve_fmin>; + defm : intrinsic_compact_fp_immediates<"FMAXNM_ZPmI","FMAXNM_ZPZI_ZERO","FMAXNM_ZPZI_UNDEF", fpimm0, fpimm_one, int_aarch64_sve_fmaxnm>; + defm : intrinsic_compact_fp_immediates<"FMINNM_ZPmI","FMINNM_ZPZI_ZERO","FMINNM_ZPZI_UNDEF", fpimm0, fpimm_one, int_aarch64_sve_fminnm>; defm FADD_ZPmZ : sve_fp_2op_p_zds<0b0000, "fadd", "FADD_ZPZZ", int_aarch64_sve_fadd, DestructiveBinaryComm>; defm FSUB_ZPmZ : sve_fp_2op_p_zds<0b0001, "fsub", "FSUB_ZPZZ", int_aarch64_sve_fsub, DestructiveBinaryCommWithRev, "FSUBR_ZPmZ", 1>; Index: llvm/lib/Target/AArch64/SVEInstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/SVEInstrFormats.td +++ llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -1512,10 +1512,23 @@ let ElementSize = zprty.ElementSize; } -multiclass sve_fp_2op_i_p_zds opc, string asm, Operand imm_ty> { - def _H : sve_fp_2op_i_p_zds<0b01, opc, asm, ZPR16, imm_ty>; - def _S : sve_fp_2op_i_p_zds<0b10, opc, asm, ZPR32, imm_ty>; - def _D : sve_fp_2op_i_p_zds<0b11, opc, asm, ZPR64, imm_ty>; +multiclass sve_fp_2op_i_p_zds opc, string asm, string Ps, + Operand imm_ty> { + let DestructiveInstType = DestructiveBinaryImm in { + def _H : SVEPseudo2Instr, sve_fp_2op_i_p_zds<0b01, opc, asm, ZPR16, imm_ty>; + def _S : SVEPseudo2Instr, sve_fp_2op_i_p_zds<0b10, opc, asm, ZPR32, imm_ty>; + def _D : SVEPseudo2Instr, sve_fp_2op_i_p_zds<0b11, opc, asm, ZPR64, imm_ty>; + } +} + +multiclass sve_fp_2op_i_p_zds_zx { + def _UNDEF_H : PredTwoOpImmPseudo; + def _UNDEF_S : PredTwoOpImmPseudo; + def _UNDEF_D : PredTwoOpImmPseudo; + + def _ZERO_H : PredTwoOpImmPseudo; + def _ZERO_S : PredTwoOpImmPseudo; + def _ZERO_D : PredTwoOpImmPseudo; } class sve_fp_2op_p_zds sz, bits<4> opc, string asm, Index: llvm/test/CodeGen/AArch64/sve-fp-immediates-merging.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-fp-immediates-merging.ll @@ -0,0 +1,1411 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; FADD +; + +define @fadd_h_immhalf( %pg, %a) { +; CHECK-LABEL: fadd_h_immhalf: +; CHECK: movprfx z0.h, p0/z, z0.h +; CHECK: fadd z0.h, p0/m, z0.h, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fadd.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fadd_h_immhalf_undef( %pg, %a) { +; CHECK-LABEL: fadd_h_immhalf_undef: +; CHECK-NOT: movprfx +; CHECK: fadd z0.h, p0/m, z0.h, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fadd.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fadd_h_immone( %pg, %a) { +; CHECK-LABEL: fadd_h_immone: +; CHECK: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fadd z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fadd.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fadd_h_immone_undef( %pg, %a) { +; CHECK-LABEL: fadd_h_immone_undef: +; CHECK-NOT: movprfx +; CHECK: fadd z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fadd.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fadd_s_immhalf( %pg, %a) { +; CHECK-LABEL: fadd_s_immhalf: +; CHECK: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fadd z0.s, p0/m, z0.s, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fadd.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fadd_s_immhalf_undef( %pg, %a) { +; CHECK-LABEL: fadd_s_immhalf_undef: +; CHECK-NOT: movprfx +; CHECK: fadd z0.s, p0/m, z0.s, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fadd.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fadd_s_immone( %pg, %a) { +; CHECK-LABEL: fadd_s_immone: +; CHECK: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fadd z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fadd.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fadd_s_immone_undef( %pg, %a) { +; CHECK-LABEL: fadd_s_immone_undef: +; CHECK-NOT: movprfx +; CHECK: fadd z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fadd.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fadd_d_immhalf( %pg, %a) { +; CHECK-LABEL: fadd_d_immhalf: +; CHECK: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fadd z0.d, p0/m, z0.d, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fadd.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fadd_d_immhalf_undef( %pg, %a) { +; CHECK-LABEL: fadd_d_immhalf_undef: +; CHECK-NOT: movprfx +; CHECK: fadd z0.d, p0/m, z0.d, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fadd.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fadd_d_immone( %pg, %a) { +; CHECK-LABEL: fadd_d_immone: +; CHECK: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fadd z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fadd.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fadd_d_immone_undef( %pg, %a) { +; CHECK-LABEL: fadd_d_immone_undef: +; CHECK-NOT: movprfx +; CHECK: fadd z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fadd.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +; +; FMAX +; + +define @fmax_h_immzero( %pg, %a) { +; CHECK-LABEL: fmax_h_immzero: +; CHECK: movprfx z0.h, p0/z, z0. +; CHECK-NEXT: fmax z0.h, p0/m, z0.h, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmax.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fmax_h_immzero_undef( %pg, %a) { +; CHECK-LABEL: fmax_h_immzero_undef: +; CHECK-NOT: movprfx +; CHECK: fmax z0.h, p0/m, z0.h, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fmax.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fmax_h_immone( %pg, %a) { +; CHECK-LABEL: fmax_h_immone: +; CHECK: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fmax z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmax.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fmax_h_immone_undef( %pg, %a) { +; CHECK-LABEL: fmax_h_immone_undef: +; CHECK-NOT: movprfx +; CHECK: fmax z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fmax.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fmax_s_immzero( %pg, %a) { +; CHECK-LABEL: fmax_s_immzero: +; CHECK: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fmax z0.s, p0/m, z0.s, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmax.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fmax_s_immzero_undef( %pg, %a) { +; CHECK-LABEL: fmax_s_immzero_undef: +; CHECK-NOT: movprfx +; CHECK: fmax z0.s, p0/m, z0.s, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fmax.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fmax_s_immone( %pg, %a) { +; CHECK-LABEL: fmax_s_immone: +; CHECK: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fmax z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmax.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fmax_s_immone_undef( %pg, %a) { +; CHECK-LABEL: fmax_s_immone_undef: +; CHECK-NOT: movprfx +; CHECK: fmax z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fmax.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fmax_d_immzero( %pg, %a) { +; CHECK-LABEL: fmax_d_immzero: +; CHECK: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fmax z0.d, p0/m, z0.d, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmax.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fmax_d_immzero_undef( %pg, %a) { +; CHECK-LABEL: fmax_d_immzero_undef: +; CHECK-NOT: movprfx +; CHECK: fmax z0.d, p0/m, z0.d, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fmax.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fmax_d_immone( %pg, %a) { +; CHECK-LABEL: fmax_d_immone: +; CHECK: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fmax z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmax.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fmax_d_immone_undef( %pg, %a) { +; CHECK-LABEL: fmax_d_immone_undef: +; CHECK-NOT: movprfx +; CHECK: fmax z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fmax.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +; +; FMAXNM +; + +define @fmaxnm_h_immzero( %pg, %a) { +; CHECK-LABEL: fmaxnm_h_immzero: +; CHECK: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmaxnm.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fmaxnm_h_immzero_undef( %pg, %a) { +; CHECK-LABEL: fmaxnm_h_immzero_undef: +; CHECK-NOT: movprfx +; CHECK: fmaxnm z0.h, p0/m, z0.h, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fmaxnm.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fmaxnm_h_immone( %pg, %a) { +; CHECK-LABEL: fmaxnm_h_immone: +; CHECK: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmaxnm.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fmaxnm_h_immone_undef( %pg, %a) { +; CHECK-LABEL: fmaxnm_h_immone_undef: +; CHECK-NOT: movprfx +; CHECK: fmaxnm z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fmaxnm.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fmaxnm_s_immzero( %pg, %a) { +; CHECK-LABEL: fmaxnm_s_immzero: +; CHECK: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmaxnm.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fmaxnm_s_immzero_undef( %pg, %a) { +; CHECK-LABEL: fmaxnm_s_immzero_undef: +; CHECK-NOT: movprfx +; CHECK: fmaxnm z0.s, p0/m, z0.s, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fmaxnm.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fmaxnm_s_immone( %pg, %a) { +; CHECK-LABEL: fmaxnm_s_immone: +; CHECK: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmaxnm.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fmaxnm_s_immone_undef( %pg, %a) { +; CHECK-LABEL: fmaxnm_s_immone_undef: +; CHECK-NOT: movprfx +; CHECK: fmaxnm z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fmaxnm.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fmaxnm_d_immzero( %pg, %a) { +; CHECK-LABEL: fmaxnm_d_immzero: +; CHECK: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmaxnm.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fmaxnm_d_immzero_undef( %pg, %a) { +; CHECK-LABEL: fmaxnm_d_immzero_undef: +; CHECK-NOT: movprfx +; CHECK: fmaxnm z0.d, p0/m, z0.d, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fmaxnm.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fmaxnm_d_immone( %pg, %a) { +; CHECK-LABEL: fmaxnm_d_immone: +; CHECK: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmaxnm.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fmaxnm_d_immone_undef( %pg, %a) { +; CHECK-LABEL: fmaxnm_d_immone_undef: +; CHECK-NOT: movprfx +; CHECK: fmaxnm z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fmaxnm.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +; +; FMIN +; + +define @fmin_h_immzero( %pg, %a) { +; CHECK-LABEL: fmin_h_immzero: +; CHECK: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fmin z0.h, p0/m, z0.h, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmin.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fmin_h_immzero_undef( %pg, %a) { +; CHECK-LABEL: fmin_h_immzero_undef: +; CHECK-NOT: movprfx +; CHECK: fmin z0.h, p0/m, z0.h, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fmin.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fmin_h_immone( %pg, %a) { +; CHECK-LABEL: fmin_h_immone: +; CHECK: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fmin z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmin.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fmin_h_immone_undef( %pg, %a) { +; CHECK-LABEL: fmin_h_immone_undef: +; CHECK-NOT: movprfx +; CHECK: fmin z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fmin.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fmin_s_immzero( %pg, %a) { +; CHECK-LABEL: fmin_s_immzero: +; CHECK: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fmin z0.s, p0/m, z0.s, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmin.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fmin_s_immzero_undef( %pg, %a) { +; CHECK-LABEL: fmin_s_immzero_undef: +; CHECK-NOT: movprfx +; CHECK: fmin z0.s, p0/m, z0.s, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fmin.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fmin_s_immone( %pg, %a) { +; CHECK-LABEL: fmin_s_immone: +; CHECK: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fmin z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmin.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fmin_s_immone_undef( %pg, %a) { +; CHECK-LABEL: fmin_s_immone_undef: +; CHECK-NOT: movprfx +; CHECK: fmin z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fmin.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fmin_d_immzero( %pg, %a) { +; CHECK-LABEL: fmin_d_immzero: +; CHECK: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fmin z0.d, p0/m, z0.d, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmin.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fmin_d_immzero_undef( %pg, %a) { +; CHECK-LABEL: fmin_d_immzero_undef: +; CHECK-NOT: movprfx +; CHECK: fmin z0.d, p0/m, z0.d, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fmin.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fmin_d_immone( %pg, %a) { +; CHECK-LABEL: fmin_d_immone: +; CHECK: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fmin z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmin.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fmin_d_immone_undef( %pg, %a) { +; CHECK-LABEL: fmin_d_immone_undef: +; CHECK-NOT: movprfx +; CHECK: fmin z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fmin.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +; +; FMINNM +; + +define @fminnm_h_immzero( %pg, %a) { +; CHECK-LABEL: fminnm_h_immzero: +; CHECK: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fminnm.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fminnm_h_immzero_undef( %pg, %a) { +; CHECK-LABEL: fminnm_h_immzero_undef: +; CHECK-NOT: movprfx +; CHECK: fminnm z0.h, p0/m, z0.h, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fminnm.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fminnm_h_immone( %pg, %a) { +; CHECK-LABEL: fminnm_h_immone: +; CHECK: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fminnm.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fminnm_h_immone_undef( %pg, %a) { +; CHECK-LABEL: fminnm_h_immone_undef: +; CHECK-NOT: movprfx +; CHECK: fminnm z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fminnm.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fminnm_s_immzero( %pg, %a) { +; CHECK-LABEL: fminnm_s_immzero: +; CHECK: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fminnm.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fminnm_s_immzero_undef( %pg, %a) { +; CHECK-LABEL: fminnm_s_immzero_undef: +; CHECK-NOT: movprfx +; CHECK: fminnm z0.s, p0/m, z0.s, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fminnm.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fminnm_s_immone( %pg, %a) { +; CHECK-LABEL: fminnm_s_immone: +; CHECK: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fminnm.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fminnm_s_immone_undef( %pg, %a) { +; CHECK-LABEL: fminnm_s_immone_undef: +; CHECK-NOT: movprfx +; CHECK: fminnm z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fminnm.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fminnm_d_immzero( %pg, %a) { +; CHECK-LABEL: fminnm_d_immzero: +; CHECK: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fminnm.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fminnm_d_immzero_undef( %pg, %a) { +; CHECK-LABEL: fminnm_d_immzero_undef: +; CHECK-NOT: movprfx +; CHECK: fminnm z0.d, p0/m, z0.d, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fminnm.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fminnm_d_immone( %pg, %a) { +; CHECK-LABEL: fminnm_d_immone: +; CHECK: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fminnm.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fminnm_d_immone_undef( %pg, %a) { +; CHECK-LABEL: fminnm_d_immone_undef: +; CHECK-NOT: movprfx +; CHECK: fminnm z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fminnm.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +; +; FMUL +; + +define @fmul_h_immhalf( %pg, %a) { +; CHECK-LABEL: fmul_h_immhalf: +; CHECK: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fmul z0.h, p0/m, z0.h, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmul.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fmul_h_immhalf_undef( %pg, %a) { +; CHECK-LABEL: fmul_h_immhalf_undef: +; CHECK-NOT: movprfx +; CHECK: fmul z0.h, p0/m, z0.h, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fmul.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fmul_h_immtwo( %pg, %a) { +; CHECK-LABEL: fmul_h_immtwo: +; CHECK: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fmul z0.h, p0/m, z0.h, #2.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 2.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmul.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fmul_h_immtwo_undef( %pg, %a) { +; CHECK-LABEL: fmul_h_immtwo_undef: +; CHECK-NOT: movprfx +; CHECK: fmul z0.h, p0/m, z0.h, #2.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 2.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fmul.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fmul_s_immhalf( %pg, %a) { +; CHECK-LABEL: fmul_s_immhalf: +; CHECK: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fmul z0.s, p0/m, z0.s, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmul.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fmul_s_immhalf_undef( %pg, %a) { +; CHECK-LABEL: fmul_s_immhalf_undef: +; CHECK-NOT: movprfx +; CHECK: fmul z0.s, p0/m, z0.s, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fmul.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fmul_s_immtwo( %pg, %a) { +; CHECK-LABEL: fmul_s_immtwo: +; CHECK: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fmul z0.s, p0/m, z0.s, #2.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 2.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmul.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fmul_s_immtwo_undef( %pg, %a) { +; CHECK-LABEL: fmul_s_immtwo_undef: +; CHECK-NOT: movprfx +; CHECK: fmul z0.s, p0/m, z0.s, #2.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 2.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fmul.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fmul_d_immhalf( %pg, %a) { +; CHECK-LABEL: fmul_d_immhalf: +; CHECK: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fmul z0.d, p0/m, z0.d, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmul.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fmul_d_immhalf_undef( %pg, %a) { +; CHECK-LABEL: fmul_d_immhalf_undef: +; CHECK-NOT: movprfx +; CHECK: fmul z0.d, p0/m, z0.d, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fmul.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fmul_d_immtwo( %pg, %a) { +; CHECK-LABEL: fmul_d_immtwo: +; CHECK: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fmul z0.d, p0/m, z0.d, #2.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 2.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmul.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fmul_d_immtwo_undef( %pg, %a) { +; CHECK-LABEL: fmul_d_immtwo_undef: +; CHECK-NOT: movprfx +; CHECK: fmul z0.d, p0/m, z0.d, #2.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 2.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fmul.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +; +; FSUB +; + +define @fsub_h_immhalf( %pg, %a) { +; CHECK-LABEL: fsub_h_immhalf: +; CHECK: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fsub z0.h, p0/m, z0.h, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fsub.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fsub_h_immhalf_undef( %pg, %a) { +; CHECK-LABEL: fsub_h_immhalf_undef: +; CHECK-NOT: movprfx +; CHECK: fsub z0.h, p0/m, z0.h, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fsub.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fsub_h_immone( %pg, %a) { +; CHECK-LABEL: fsub_h_immone: +; CHECK: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fsub z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fsub.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fsub_h_immone_undef( %pg, %a) { +; CHECK-LABEL: fsub_h_immone_undef: +; CHECK-NOT: movprfx +; CHECK: fsub z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fsub.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fsub_s_immhalf( %pg, %a) { +; CHECK-LABEL: fsub_s_immhalf: +; CHECK: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fsub z0.s, p0/m, z0.s, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fsub.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fsub_s_immhalf_undef( %pg, %a) { +; CHECK-LABEL: fsub_s_immhalf_undef: +; CHECK-NOT: movprfx +; CHECK: fsub z0.s, p0/m, z0.s, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fsub.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fsub_s_immone( %pg, %a) { +; CHECK-LABEL: fsub_s_immone: +; CHECK: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fsub z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fsub.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fsub_s_immone_undef( %pg, %a) { +; CHECK-LABEL: fsub_s_immone_undef: +; CHECK-NOT: movprfx +; CHECK: fsub z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fsub.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fsub_d_immhalf( %pg, %a) { +; CHECK-LABEL: fsub_d_immhalf: +; CHECK: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fsub z0.d, p0/m, z0.d, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fsub.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fsub_d_immhalf_undef( %pg, %a) { +; CHECK-LABEL: fsub_d_immhalf_undef: +; CHECK-NOT: movprfx +; CHECK: fsub z0.d, p0/m, z0.d, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fsub.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fsub_d_immone( %pg, %a) { +; CHECK-LABEL: fsub_d_immone: +; CHECK: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fsub z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fsub.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fsub_d_immone_undef( %pg, %a) { +; CHECK-LABEL: fsub_d_immone_undef: +; CHECK-NOT: movprfx +; CHECK: fsub z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fsub.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +; +; FSUBR +; + +define @fsubr_h_immhalf( %pg, %a) { +; CHECK-LABEL: fsubr_h_immhalf: +; CHECK: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fsubr z0.h, p0/m, z0.h, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fsubr.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fsubr_h_immhalf_undef( %pg, %a) { +; CHECK-LABEL: fsubr_h_immhalf_undef: +; CHECK-NOT: movprfx +; CHECK: fsubr z0.h, p0/m, z0.h, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fsubr.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fsubr_h_immone( %pg, %a) { +; CHECK-LABEL: fsubr_h_immone: +; CHECK: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fsubr z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fsubr.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fsubr_h_immone_undef( %pg, %a) { +; CHECK-LABEL: fsubr_h_immone_undef: +; CHECK-NOT: movprfx +; CHECK: fsubr z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fsubr.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fsubr_s_immhalf( %pg, %a) { +; CHECK-LABEL: fsubr_s_immhalf: +; CHECK: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fsubr z0.s, p0/m, z0.s, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fsubr.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fsubr_s_immhalf_undef( %pg, %a) { +; CHECK-LABEL: fsubr_s_immhalf_undef: +; CHECK-NOT: movprfx +; CHECK: fsubr z0.s, p0/m, z0.s, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fsubr.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fsubr_s_immone( %pg, %a) { +; CHECK-LABEL: fsubr_s_immone: +; CHECK: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fsubr z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fsubr.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fsubr_s_immone_undef( %pg, %a) { +; CHECK-LABEL: fsubr_s_immone_undef: +; CHECK-NOT: movprfx +; CHECK: fsubr z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fsubr.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fsubr_d_immhalf( %pg, %a) { +; CHECK-LABEL: fsubr_d_immhalf: +; CHECK: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fsubr z0.d, p0/m, z0.d, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fsubr.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fsubr_d_immhalf_undef( %pg, %a) { +; CHECK-LABEL: fsubr_d_immhalf_undef: +; CHECK-NOT: movprfx +; CHECK: fsubr z0.d, p0/m, z0.d, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fsubr.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fsubr_d_immone( %pg, %a) { +; CHECK-LABEL: fsubr_d_immone: +; CHECK: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fsubr z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fsubr.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fsubr_d_immone_undef( %pg, %a) { +; CHECK-LABEL: fsubr_d_immone_undef: +; CHECK-NOT: movprfx +; CHECK: fsubr z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, undef + %out = call @llvm.aarch64.sve.fsubr.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +;; Arithmetic intrinsic declarations + +declare @llvm.aarch64.sve.fadd.nxv8f16(, , ) +declare @llvm.aarch64.sve.fadd.nxv4f32(, , ) +declare @llvm.aarch64.sve.fadd.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fmax.nxv8f16(, , ) +declare @llvm.aarch64.sve.fmax.nxv4f32(, , ) +declare @llvm.aarch64.sve.fmax.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fmaxnm.nxv8f16(, , ) +declare @llvm.aarch64.sve.fmaxnm.nxv4f32(, , ) +declare @llvm.aarch64.sve.fmaxnm.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fmin.nxv8f16(, , ) +declare @llvm.aarch64.sve.fmin.nxv4f32(, , ) +declare @llvm.aarch64.sve.fmin.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fminnm.nxv8f16(, , ) +declare @llvm.aarch64.sve.fminnm.nxv4f32(, , ) +declare @llvm.aarch64.sve.fminnm.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fmul.nxv8f16(, , ) +declare @llvm.aarch64.sve.fmul.nxv4f32(, , ) +declare @llvm.aarch64.sve.fmul.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fsub.nxv8f16(, , ) +declare @llvm.aarch64.sve.fsub.nxv4f32(, , ) +declare @llvm.aarch64.sve.fsub.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fsubr.nxv8f16(, , ) +declare @llvm.aarch64.sve.fsubr.nxv4f32(, , ) +declare @llvm.aarch64.sve.fsubr.nxv2f64(, , )