diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -1206,6 +1206,18 @@ return Imm.isExactlyValue(+0.0); }]>; +def fpimm_half : FPImmLeaf; + +def fpimm_one : FPImmLeaf; + +def fpimm_two : FPImmLeaf; + def gi_fpimm16 : GICustomOperandRenderer<"renderFPImm16">, GISDNodeXFormEquiv; def gi_fpimm32 : GICustomOperandRenderer<"renderFPImm32">, diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -399,14 +399,34 @@ defm FRECPE_ZZ : sve_fp_2op_u_zd<0b110, "frecpe", int_aarch64_sve_frecpe_x>; defm FRSQRTE_ZZ : sve_fp_2op_u_zd<0b111, "frsqrte", int_aarch64_sve_frsqrte_x>; - defm FADD_ZPmI : sve_fp_2op_i_p_zds<0b000, "fadd", sve_fpimm_half_one>; - defm FSUB_ZPmI : sve_fp_2op_i_p_zds<0b001, "fsub", sve_fpimm_half_one>; - defm FMUL_ZPmI : sve_fp_2op_i_p_zds<0b010, "fmul", sve_fpimm_half_two>; - defm FSUBR_ZPmI : sve_fp_2op_i_p_zds<0b011, "fsubr", sve_fpimm_half_one>; - defm FMAXNM_ZPmI : sve_fp_2op_i_p_zds<0b100, "fmaxnm", sve_fpimm_zero_one>; - defm FMINNM_ZPmI : sve_fp_2op_i_p_zds<0b101, "fminnm", sve_fpimm_zero_one>; - defm FMAX_ZPmI : sve_fp_2op_i_p_zds<0b110, "fmax", sve_fpimm_zero_one>; - defm FMIN_ZPmI : sve_fp_2op_i_p_zds<0b111, "fmin", sve_fpimm_zero_one>; + defm FADD_ZPmI : sve_fp_2op_i_p_zds<0b000, "fadd", "FADD_ZPZI", sve_fpimm_half_one>; + defm FSUB_ZPmI : sve_fp_2op_i_p_zds<0b001, "fsub", "FSUB_ZPZI", sve_fpimm_half_one>; + defm FMUL_ZPmI : sve_fp_2op_i_p_zds<0b010, "fmul", "FMUL_ZPZI", sve_fpimm_half_two>; + defm FSUBR_ZPmI : sve_fp_2op_i_p_zds<0b011, "fsubr", "FSUBR_ZPZI", sve_fpimm_half_one>; + defm FMAXNM_ZPmI : sve_fp_2op_i_p_zds<0b100, "fmaxnm", "FMAXNM_ZPZI", sve_fpimm_zero_one>; + defm FMINNM_ZPmI : sve_fp_2op_i_p_zds<0b101, "fminnm", "FMINNM_ZPZI", sve_fpimm_zero_one>; + defm FMAX_ZPmI : sve_fp_2op_i_p_zds<0b110, "fmax", "FMAX_ZPZI", sve_fpimm_zero_one>; + defm FMIN_ZPmI : sve_fp_2op_i_p_zds<0b111, "fmin", "FMIN_ZPZI", sve_fpimm_zero_one>; + + defm FADD_ZPZI : sve_fp_2op_i_p_zds_zx; + defm FSUB_ZPZI : sve_fp_2op_i_p_zds_zx; + defm FMUL_ZPZI : sve_fp_2op_i_p_zds_zx; + defm FSUBR_ZPZI : sve_fp_2op_i_p_zds_zx; + defm FMAXNM_ZPZI : sve_fp_2op_i_p_zds_zx; + defm FMINNM_ZPZI : sve_fp_2op_i_p_zds_zx; + defm FMAX_ZPZI : sve_fp_2op_i_p_zds_zx; + defm FMIN_ZPZI : sve_fp_2op_i_p_zds_zx; + + let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in { + defm FADD_ZPZI : sve_fp_2op_i_p_zds_zx_zeroing; + defm FSUB_ZPZI : sve_fp_2op_i_p_zds_zx_zeroing; + defm FMUL_ZPZI : sve_fp_2op_i_p_zds_zx_zeroing; + defm FSUBR_ZPZI : sve_fp_2op_i_p_zds_zx_zeroing; + defm FMAXNM_ZPZI : sve_fp_2op_i_p_zds_zx_zeroing; + defm FMINNM_ZPZI : sve_fp_2op_i_p_zds_zx_zeroing; + defm FMAX_ZPZI : sve_fp_2op_i_p_zds_zx_zeroing; + defm FMIN_ZPZI : sve_fp_2op_i_p_zds_zx_zeroing; + } defm FADD_ZPmZ : sve_fp_2op_p_zds<0b0000, "fadd", "FADD_ZPZZ", int_aarch64_sve_fadd, DestructiveBinaryComm>; defm FSUB_ZPmZ : sve_fp_2op_p_zds<0b0001, "fsub", "FSUB_ZPZZ", int_aarch64_sve_fsub, DestructiveBinaryCommWithRev, "FSUBR_ZPmZ">; @@ -2231,6 +2251,74 @@ (DUP_ZR_D GPR64:$index)), GPR64:$src)>; + /// Compact single bit fp immediates + multiclass intrinsic_compact_fp_immediates_patterns { + def : Pat<(ftype (op (itype PPR_3b:$Pg), + (ftype ZPR:$Zs1), + (ftype (AArch64dup (sftype A))))), + (!cast(I) PPR_3b:$Pg, ZPR:$Zs1, imm)>; + def : Pat<(ftype (ir_op (itype (AArch64ptrue 31)), + (ftype ZPR:$Zs1), + (ftype (AArch64dup (sftype A))))), + (!cast(IX) (PTRUE_S 31), ZPR:$Zs1, imm)>; + } + + multiclass intrinsic_compact_fp_immediates { + defm : intrinsic_compact_fp_immediates_patterns; + defm : intrinsic_compact_fp_immediates_patterns; + defm : intrinsic_compact_fp_immediates_patterns; + defm : intrinsic_compact_fp_immediates_patterns; + defm : intrinsic_compact_fp_immediates_patterns; + defm : intrinsic_compact_fp_immediates_patterns; + } + + defm : intrinsic_compact_fp_immediates<"FADD_ZPmI", "FADD_ZPZI_ZERO", "FADD_ZPZI_UNDEF", fpimm_half, fpimm_one, int_aarch64_sve_fadd, AArch64fadd_p>; + defm : intrinsic_compact_fp_immediates<"FSUB_ZPmI", "FSUB_ZPZI_ZERO", "FSUB_ZPZI_UNDEF", fpimm_half, fpimm_one, int_aarch64_sve_fsub, AArch64fsub_p>; + defm : intrinsic_compact_fp_immediates<"FSUBR_ZPmI", "FSUBR_ZPZI_ZERO", "FSUBR_ZPZI_UNDEF", fpimm_half, fpimm_one, int_aarch64_sve_fsubr>; + defm : intrinsic_compact_fp_immediates<"FMUL_ZPmI", "FMUL_ZPZI_ZERO", "FMUL_ZPZI_UNDEF", fpimm_half, fpimm_two, int_aarch64_sve_fmul, AArch64fmul_p>; + defm : intrinsic_compact_fp_immediates<"FMAX_ZPmI", "FMAX_ZPZI_ZERO", "FMAX_ZPZI_UNDEF", fpimm0, fpimm_one, int_aarch64_sve_fmax, AArch64fmax_p>; + defm : intrinsic_compact_fp_immediates<"FMIN_ZPmI", "FMIN_ZPZI_ZERO", "FMIN_ZPZI_UNDEF", fpimm0, fpimm_one, int_aarch64_sve_fmin, AArch64fmin_p>; + defm : intrinsic_compact_fp_immediates<"FMAXNM_ZPmI","FMAXNM_ZPZI_ZERO","FMAXNM_ZPZI_UNDEF", fpimm0, fpimm_one, int_aarch64_sve_fmaxnm, AArch64fmaxnm_p>; + defm : intrinsic_compact_fp_immediates<"FMINNM_ZPmI","FMINNM_ZPZI_ZERO","FMINNM_ZPZI_UNDEF", fpimm0, fpimm_one, int_aarch64_sve_fminnm, AArch64fminnm_p>; + + multiclass intrinsic_compact_fp_immediates_patterns_zeroing { + let AddedComplexity = 2 in { + def : Pat<(ftype (op itype:$Pg, + (vselect itype:$Pg, ftype:$Zs1, (SVEDup0)), + (ftype (AArch64dup (sftype A))))), + (!cast(IZ) $Pg, $Zs1, imm)>; + } + } + + multiclass intrinsic_compact_fp_immediates_zeroing { + defm : intrinsic_compact_fp_immediates_patterns_zeroing; + defm : intrinsic_compact_fp_immediates_patterns_zeroing; + defm : intrinsic_compact_fp_immediates_patterns_zeroing; + defm : intrinsic_compact_fp_immediates_patterns_zeroing; + defm : intrinsic_compact_fp_immediates_patterns_zeroing; + defm : intrinsic_compact_fp_immediates_patterns_zeroing; + } + + let Predicates = [UseExperimentalZeroingPseudos] in { + defm : intrinsic_compact_fp_immediates_zeroing<"FADD_ZPZI_ZERO", fpimm_half, fpimm_one, int_aarch64_sve_fadd>; + defm : intrinsic_compact_fp_immediates_zeroing<"FSUB_ZPZI_ZERO", fpimm_half, fpimm_one, int_aarch64_sve_fsub>; + defm : intrinsic_compact_fp_immediates_zeroing<"FSUBR_ZPZI_ZERO", fpimm_half, fpimm_one, int_aarch64_sve_fsubr>; + defm : intrinsic_compact_fp_immediates_zeroing<"FMUL_ZPZI_ZERO", fpimm_half, fpimm_two, int_aarch64_sve_fmul>; + defm : intrinsic_compact_fp_immediates_zeroing<"FMAX_ZPZI_ZERO", fpimm0, fpimm_one, int_aarch64_sve_fmax>; + defm : intrinsic_compact_fp_immediates_zeroing<"FMIN_ZPZI_ZERO", fpimm0, fpimm_one, int_aarch64_sve_fmin>; + defm : intrinsic_compact_fp_immediates_zeroing<"FMAXNM_ZPZI_ZERO", fpimm0, fpimm_one, int_aarch64_sve_fmaxnm>; + defm : intrinsic_compact_fp_immediates_zeroing<"FMINNM_ZPZI_ZERO", fpimm0, fpimm_one, int_aarch64_sve_fminnm>; + } + // Insert FP scalar into vector with scalar index def : Pat<(nxv8f16 (vector_insert (nxv8f16 ZPR:$vec), (f16 FPR16:$src), GPR64:$index)), (CPY_ZPmV_H ZPR:$vec, diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -1623,10 +1623,12 @@ let ElementSize = zprty.ElementSize; } -multiclass sve_fp_2op_i_p_zds opc, string asm, Operand imm_ty> { - def _H : sve_fp_2op_i_p_zds<0b01, opc, asm, ZPR16, imm_ty>; - def _S : sve_fp_2op_i_p_zds<0b10, opc, asm, ZPR32, imm_ty>; - def _D : sve_fp_2op_i_p_zds<0b11, opc, asm, ZPR64, imm_ty>; +multiclass sve_fp_2op_i_p_zds opc, string asm, string Ps, Operand imm_ty> { + let DestructiveInstType = DestructiveBinaryImm in { + def _H : SVEPseudo2Instr, sve_fp_2op_i_p_zds<0b01, opc, asm, ZPR16, imm_ty>; + def _S : SVEPseudo2Instr, sve_fp_2op_i_p_zds<0b10, opc, asm, ZPR32, imm_ty>; + def _D : SVEPseudo2Instr, sve_fp_2op_i_p_zds<0b11, opc, asm, ZPR64, imm_ty>; + } } class sve_fp_2op_p_zds sz, bits<4> opc, string asm, @@ -8089,3 +8091,16 @@ def : SVE_Shift_DupImm_Pred_Pat(NAME # _UNDEF_S)>; def : SVE_Shift_DupImm_Pred_Pat(NAME # _UNDEF_D)>; } + +multiclass sve_fp_2op_i_p_zds_zx { + def _UNDEF_H : PredTwoOpImmPseudo; + def _UNDEF_S : PredTwoOpImmPseudo; + def _UNDEF_D : PredTwoOpImmPseudo; +} + + +multiclass sve_fp_2op_i_p_zds_zx_zeroing { + def _ZERO_H : PredTwoOpImmPseudo; + def _ZERO_S : PredTwoOpImmPseudo; + def _ZERO_D : PredTwoOpImmPseudo; +} \ No newline at end of file diff --git a/llvm/test/CodeGen/AArch64/sve-fp-immediates-merging.ll b/llvm/test/CodeGen/AArch64/sve-fp-immediates-merging.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-fp-immediates-merging.ll @@ -0,0 +1,1856 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +; +; FADD +; + +define @fadd_h_immhalf( %a) #0 { +; CHECK-LABEL: fadd_h_immhalf: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fadd z0.h, p0/m, z0.h, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fadd %a, %splat + ret %out +} + +define @fadd_h_immhalf_acle( %pg, %a) #0 { +; CHECK-LABEL: fadd_h_immhalf_acle: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd z0.h, p0/m, z0.h, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fadd.nxv8f16( %pg, + %a, + %splat) + ret %out +} + +define @fadd_h_immhalf_acle_zero( %pg, %a) #1 { +; CHECK-LABEL: fadd_h_immhalf_acle_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fadd z0.h, p0/m, z0.h, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fadd.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fadd_h_immone( %a) #0 { +; CHECK-LABEL: fadd_h_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fadd z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fadd %a, %splat + ret %out +} + +define @fadd_h_immone_acle( %pg, %a) #0 { +; CHECK-LABEL: fadd_h_immone_acle: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fadd.nxv8f16( %pg, + %a, + %splat) + ret %out +} + +define @fadd_h_immone_acle_zero( %pg, %a) #1 { +; CHECK-LABEL: fadd_h_immone_acle_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fadd z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fadd.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fadd_s_immhalf( %a) #0 { +; CHECK-LABEL: fadd_s_immhalf: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fadd z0.s, p0/m, z0.s, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fadd %a, %splat + ret %out +} + +define @fadd_s_immhalf_acle( %pg, %a) #0 { +; CHECK-LABEL: fadd_s_immhalf_acle: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd z0.s, p0/m, z0.s, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fadd.nxv4f32( %pg, + %a, + %splat) + ret %out +} + +define @fadd_s_immhalf_acle_zero( %pg, %a) #1 { +; CHECK-LABEL: fadd_s_immhalf_acle_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fadd z0.s, p0/m, z0.s, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fadd.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fadd_s_immone( %a) #0 { +; CHECK-LABEL: fadd_s_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fadd z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fadd %a, %splat + ret %out +} + +define @fadd_s_immone_acle( %pg, %a) #0 { +; CHECK-LABEL: fadd_s_immone_acle: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fadd.nxv4f32( %pg, + %a, + %splat) + ret %out +} + +define @fadd_s_immone_acle_zero( %pg, %a) #1 { +; CHECK-LABEL: fadd_s_immone_acle_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fadd z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fadd.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fadd_d_immhalf( %a) #0 { +; CHECK-LABEL: fadd_d_immhalf: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fadd z0.d, p0/m, z0.d, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fadd %a, %splat + ret %out +} + +define @fadd_d_immhalf_acle( %pg, %a) #0 { +; CHECK-LABEL: fadd_d_immhalf_acle: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd z0.d, p0/m, z0.d, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fadd.nxv2f64( %pg, + %a, + %splat) + ret %out +} + +define @fadd_d_immhalf_acle_zero( %pg, %a) #1 { +; CHECK-LABEL: fadd_d_immhalf_acle_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fadd z0.d, p0/m, z0.d, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fadd.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fadd_d_immone( %a) #0 { +; CHECK-LABEL: fadd_d_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fadd z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fadd %a, %splat + ret %out +} + +define @fadd_d_immone_acle( %pg, %a) #0 { +; CHECK-LABEL: fadd_d_immone_acle: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fadd.nxv2f64( %pg, + %a, + %splat) + ret %out +} + +define @fadd_d_immone_acle_zero( %pg, %a) #1 { +; CHECK-LABEL: fadd_d_immone_acle_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fadd z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fadd.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +; +; FMAX +; + +define @fmax_h_immzero( %a) #0 { +; CHECK-LABEL: fmax_h_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmax z0.h, p0/m, z0.h, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.maximum.nxv8f16( %a, %splat) + ret %out +} + +define @fmax_h_immzero_acle( %pg, %a) #0 { +; CHECK-LABEL: fmax_h_immzero_acle: +; CHECK: // %bb.0: +; CHECK-NEXT: fmax z0.h, p0/m, z0.h, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmax.nxv8f16( %pg, + %a, + %splat) + ret %out +} + +define @fmax_h_immzero_acle_zero( %pg, %a) #1 { +; CHECK-LABEL: fmax_h_immzero_acle_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fmax z0.h, p0/m, z0.h, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmax.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fmax_h_immone( %a) #0 { +; CHECK-LABEL: fmax_h_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmax z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.maximum.nxv8f16( %a, %splat) + ret %out +} + +define @fmax_h_immone_acle( %pg, %a) #0 { +; CHECK-LABEL: fmax_h_immone_acle: +; CHECK: // %bb.0: +; CHECK-NEXT: fmax z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmax.nxv8f16( %pg, + %a, + %splat) + ret %out +} + +define @fmax_h_immone_acle_zero( %pg, %a) #1 { +; CHECK-LABEL: fmax_h_immone_acle_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fmax z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmax.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fmax_s_immzero( %a) #0 { +; CHECK-LABEL: fmax_s_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmax z0.s, p0/m, z0.s, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.maximum.nxv4f32( %a, %splat) + ret %out +} + +define @fmax_s_immzero_acle( %pg, %a) #0 { +; CHECK-LABEL: fmax_s_immzero_acle: +; CHECK: // %bb.0: +; CHECK-NEXT: fmax z0.s, p0/m, z0.s, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmax.nxv4f32( %pg, + %a, + %splat) + ret %out +} + +define @fmax_s_immzero_acle_zero( %pg, %a) #1 { +; CHECK-LABEL: fmax_s_immzero_acle_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fmax z0.s, p0/m, z0.s, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmax.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fmax_s_immone( %a) #0 { +; CHECK-LABEL: fmax_s_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmax z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.maximum.nxv4f32( %a, %splat) + ret %out +} + +define @fmax_s_immone_acle( %pg, %a) #0 { +; CHECK-LABEL: fmax_s_immone_acle: +; CHECK: // %bb.0: +; CHECK-NEXT: fmax z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmax.nxv4f32( %pg, + %a, + %splat) + ret %out +} + +define @fmax_s_immone_acle_zero( %pg, %a) #1 { +; CHECK-LABEL: fmax_s_immone_acle_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fmax z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmax.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fmax_d_immzero( %a) #0 { +; CHECK-LABEL: fmax_d_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmax z0.d, p0/m, z0.d, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.maximum.nxv2f64( %a, %splat) + ret %out +} + +define @fmax_d_immzero_acle( %pg, %a) #0 { +; CHECK-LABEL: fmax_d_immzero_acle: +; CHECK: // %bb.0: +; CHECK-NEXT: fmax z0.d, p0/m, z0.d, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmax.nxv2f64( %pg, + %a, + %splat) + ret %out +} + +define @fmax_d_immzero_acle_zero( %pg, %a) #1 { +; CHECK-LABEL: fmax_d_immzero_acle_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fmax z0.d, p0/m, z0.d, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmax.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fmax_d_immone( %a) #0 { +; CHECK-LABEL: fmax_d_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmax z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.maximum.nxv2f64( %a, %splat) + ret %out +} + +define @fmax_d_immone_acle( %pg, %a) #0 { +; CHECK-LABEL: fmax_d_immone_acle: +; CHECK: // %bb.0: +; CHECK-NEXT: fmax z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmax.nxv2f64( %pg, + %a, + %splat) + ret %out +} + +define @fmax_d_immone_acle_zero( %pg, %a) #1 { +; CHECK-LABEL: fmax_d_immone_acle_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fmax z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmax.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +; +; FMAXNM +; + +define @fmaxnm_h_immzero( %a) #0 { +; CHECK-LABEL: fmaxnm_h_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.maxnum.nxv8f16( %a, %splat) + ret %out +} + +define @fmaxnm_h_immzero_acle( %pg, %a) #0 { +; CHECK-LABEL: fmaxnm_h_immzero_acle: +; CHECK: // %bb.0: +; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmaxnm.nxv8f16( %pg, + %a, + %splat) + ret %out +} + +define @fmaxnm_h_immzero_acle_zero( %pg, %a) #1 { +; CHECK-LABEL: fmaxnm_h_immzero_acle_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmaxnm.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fmaxnm_h_immone( %a) #0 { +; CHECK-LABEL: fmaxnm_h_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.maxnum.nxv8f16( %a, %splat) + ret %out +} + +define @fmaxnm_h_immone_acle( %pg, %a) #0 { +; CHECK-LABEL: fmaxnm_h_immone_acle: +; CHECK: // %bb.0: +; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmaxnm.nxv8f16( %pg, + %a, + %splat) + ret %out +} + +define @fmaxnm_h_immone_acle_zero( %pg, %a) #1 { +; CHECK-LABEL: fmaxnm_h_immone_acle_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmaxnm.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fmaxnm_s_immzero( %a) #0 { +; CHECK-LABEL: fmaxnm_s_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.maxnum.nxv4f32( %a, %splat) + ret %out +} + +define @fmaxnm_s_immzero_acle( %pg, %a) #0 { +; CHECK-LABEL: fmaxnm_s_immzero_acle: +; CHECK: // %bb.0: +; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmaxnm.nxv4f32( %pg, + %a, + %splat) + ret %out +} + +define @fmaxnm_s_immzero_acle_zero( %pg, %a) #1 { +; CHECK-LABEL: fmaxnm_s_immzero_acle_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmaxnm.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fmaxnm_s_immone( %a) #0 { +; CHECK-LABEL: fmaxnm_s_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.maxnum.nxv4f32( %a, %splat) + ret %out +} + +define @fmaxnm_s_immone_acle( %pg, %a) #0 { +; CHECK-LABEL: fmaxnm_s_immone_acle: +; CHECK: // %bb.0: +; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmaxnm.nxv4f32( %pg, + %a, + %splat) + ret %out +} + +define @fmaxnm_s_immone_acle_zero( %pg, %a) #1 { +; CHECK-LABEL: fmaxnm_s_immone_acle_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmaxnm.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fmaxnm_d_immzero( %a) #0 { +; CHECK-LABEL: fmaxnm_d_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.maxnum.nxv2f64( %a, %splat) + ret %out +} + +define @fmaxnm_d_immzero_acle( %pg, %a) #0 { +; CHECK-LABEL: fmaxnm_d_immzero_acle: +; CHECK: // %bb.0: +; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmaxnm.nxv2f64( %pg, + %a, + %splat) + ret %out +} + +define @fmaxnm_d_immzero_acle_zero( %pg, %a) #1 { +; CHECK-LABEL: fmaxnm_d_immzero_acle_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmaxnm.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fmaxnm_d_immone( %a) #0 { +; CHECK-LABEL: fmaxnm_d_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.maxnum.nxv2f64( %a, %splat) + ret %out +} + +define @fmaxnm_d_immone_acle( %pg, %a) #0 { +; CHECK-LABEL: fmaxnm_d_immone_acle: +; CHECK: // %bb.0: +; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmaxnm.nxv2f64( %pg, + %a, + %splat) + ret %out +} + +define @fmaxnm_d_immone_acle_zero( %pg, %a) #1 { +; CHECK-LABEL: fmaxnm_d_immone_acle_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmaxnm.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +; +; FMIN +; + +define @fmin_h_immzero( %a) #0 { +; CHECK-LABEL: fmin_h_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmin z0.h, p0/m, z0.h, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.minimum.nxv8f16( %a, %splat) + ret %out +} + +define @fmin_h_immzero_acle( %pg, %a) #0 { +; CHECK-LABEL: fmin_h_immzero_acle: +; CHECK: // %bb.0: +; CHECK-NEXT: fmin z0.h, p0/m, z0.h, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmin.nxv8f16( %pg, + %a, + %splat) + ret %out +} + +define @fmin_h_immzero_acle_zero( %pg, %a) #1 { +; CHECK-LABEL: fmin_h_immzero_acle_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fmin z0.h, p0/m, z0.h, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmin.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fmin_h_immone( %a) #0 { +; CHECK-LABEL: fmin_h_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmin z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.minimum.nxv8f16( %a, %splat) + ret %out +} + +define @fmin_h_immone_acle( %pg, %a) #0 { +; CHECK-LABEL: fmin_h_immone_acle: +; CHECK: // %bb.0: +; CHECK-NEXT: fmin z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmin.nxv8f16( %pg, + %a, + %splat) + ret %out +} + +define @fmin_h_immone_acle_zero( %pg, %a) #1 { +; CHECK-LABEL: fmin_h_immone_acle_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fmin z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmin.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fmin_s_immzero( %a) #0 { +; CHECK-LABEL: fmin_s_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmin z0.s, p0/m, z0.s, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.minimum.nxv4f32( %a, %splat) + ret %out +} + +define @fmin_s_immzero_acle( %pg, %a) #0 { +; CHECK-LABEL: fmin_s_immzero_acle: +; CHECK: // %bb.0: +; CHECK-NEXT: fmin z0.s, p0/m, z0.s, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmin.nxv4f32( %pg, + %a, + %splat) + ret %out +} + +define @fmin_s_immzero_acle_zero( %pg, %a) #1 { +; CHECK-LABEL: fmin_s_immzero_acle_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fmin z0.s, p0/m, z0.s, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmin.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fmin_s_immone( %a) #0 { +; CHECK-LABEL: fmin_s_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmin z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.minimum.nxv4f32( %a, %splat) + ret %out +} + +define @fmin_s_immone_acle( %pg, %a) #0 { +; CHECK-LABEL: fmin_s_immone_acle: +; CHECK: // %bb.0: +; CHECK-NEXT: fmin z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmin.nxv4f32( %pg, + %a, + %splat) + ret %out +} + +define @fmin_s_immone_acle_zero( %pg, %a) #1 { +; CHECK-LABEL: fmin_s_immone_acle_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fmin z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmin.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fmin_d_immzero( %a) #0 { +; CHECK-LABEL: fmin_d_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmin z0.d, p0/m, z0.d, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.minimum.nxv2f64( %a, %splat) + ret %out +} + +define @fmin_d_immzero_acle( %pg, %a) #0 { +; CHECK-LABEL: fmin_d_immzero_acle: +; CHECK: // %bb.0: +; CHECK-NEXT: fmin z0.d, p0/m, z0.d, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmin.nxv2f64( %pg, + %a, + %splat) + ret %out +} + +define @fmin_d_immzero_acle_zero( %pg, %a) #1 { +; CHECK-LABEL: fmin_d_immzero_acle_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fmin z0.d, p0/m, z0.d, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmin.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fmin_d_immone( %a) #0 { +; CHECK-LABEL: fmin_d_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmin z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.minimum.nxv2f64( %a, %splat) + ret %out +} + +define @fmin_d_immone_acle( %pg, %a) #0 { +; CHECK-LABEL: fmin_d_immone_acle: +; CHECK: // %bb.0: +; CHECK-NEXT: fmin z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmin.nxv2f64( %pg, + %a, + %splat) + ret %out +} + +define @fmin_d_immone_acle_zero( %pg, %a) #1 { +; CHECK-LABEL: fmin_d_immone_acle_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fmin z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmin.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +; +; FMINNM +; + +define @fminnm_h_immzero( %a) #0 { +; CHECK-LABEL: fminnm_h_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.minnum.nxv8f16( %a, %splat) + ret %out +} + +define @fminnm_h_immzero_acle( %pg, %a) #0 { +; CHECK-LABEL: fminnm_h_immzero_acle: +; CHECK: // %bb.0: +; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fminnm.nxv8f16( %pg, + %a, + %splat) + ret %out +} + +define @fminnm_h_immzero_acle_zero( %pg, %a) #1 { +; CHECK-LABEL: fminnm_h_immzero_acle_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fminnm.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fminnm_h_immone( %a) #0 { +; CHECK-LABEL: fminnm_h_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.minnum.nxv8f16( %a, %splat) + ret %out +} + +define @fminnm_h_immone_acle( %pg, %a) #0 { +; CHECK-LABEL: fminnm_h_immone_acle: +; CHECK: // %bb.0: +; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fminnm.nxv8f16( %pg, + %a, + %splat) + ret %out +} + +define @fminnm_h_immone_acle_zero( %pg, %a) #1 { +; CHECK-LABEL: fminnm_h_immone_acle_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fminnm.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fminnm_s_immzero( %a) #0 { +; CHECK-LABEL: fminnm_s_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.minnum.nxv4f32( %a, %splat) + ret %out +} + +define @fminnm_s_immzero_acle( %pg, %a) #0 { +; CHECK-LABEL: fminnm_s_immzero_acle: +; CHECK: // %bb.0: +; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fminnm.nxv4f32( %pg, + %a, + %splat) + ret %out +} + +define @fminnm_s_immzero_acle_zero( %pg, %a) #1 { +; CHECK-LABEL: fminnm_s_immzero_acle_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fminnm.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fminnm_s_immone( %a) #0 { +; CHECK-LABEL: fminnm_s_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.minnum.nxv4f32( %a, %splat) + ret %out +} + +define @fminnm_s_immone_acle( %pg, %a) #0 { +; CHECK-LABEL: fminnm_s_immone_acle: +; CHECK: // %bb.0: +; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fminnm.nxv4f32( %pg, + %a, + %splat) + ret %out +} + +define @fminnm_s_immone_acle_zero( %pg, %a) #1 { +; CHECK-LABEL: fminnm_s_immone_acle_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fminnm.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fminnm_d_immzero( %a) #0 { +; CHECK-LABEL: fminnm_d_immzero: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.minnum.nxv2f64( %a, %splat) + ret %out +} + +define @fminnm_d_immzero_acle( %pg, %a) #0 { +; CHECK-LABEL: fminnm_d_immzero_acle: +; CHECK: // %bb.0: +; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fminnm.nxv2f64( %pg, + %a, + %splat) + ret %out +} + +define @fminnm_d_immzero_acle_zero( %pg, %a) #1 { +; CHECK-LABEL: fminnm_d_immzero_acle_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, #0.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fminnm.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fminnm_d_immone( %a) #0 { +; CHECK-LABEL: fminnm_d_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.minnum.nxv2f64( %a, %splat) + ret %out +} + +define @fminnm_d_immone_acle( %pg, %a) #0 { +; CHECK-LABEL: fminnm_d_immone_acle: +; CHECK: // %bb.0: +; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fminnm.nxv2f64( %pg, + %a, + %splat) + ret %out +} + +define @fminnm_d_immone_acle_zero( %pg, %a) #1 { +; CHECK-LABEL: fminnm_d_immone_acle_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fminnm.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +; +; FMUL +; + +define @fmul_h_immhalf( %a) #0 { +; CHECK-LABEL: fmul_h_immhalf: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmul z0.h, p0/m, z0.h, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fmul %a, %splat + ret %out +} + +define @fmul_h_immhalf_acle( %pg, %a) #0 { +; CHECK-LABEL: fmul_h_immhalf_acle: +; CHECK: // %bb.0: +; CHECK-NEXT: fmul z0.h, p0/m, z0.h, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmul.nxv8f16( %pg, + %a, + %splat) + ret %out +} + +define @fmul_h_immhalf_acle_zero( %pg, %a) #1 { +; CHECK-LABEL: fmul_h_immhalf_acle_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fmul z0.h, p0/m, z0.h, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmul.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fmul_h_immtwo( %a) #0 { +; CHECK-LABEL: fmul_h_immtwo: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd z0.h, z0.h, z0.h +; CHECK-NEXT: ret + %elt = insertelement undef, half 2.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fmul %a, %splat + ret %out +} + +define @fmul_h_immtwo_acle( %pg, %a) #0 { +; CHECK-LABEL: fmul_h_immtwo_acle: +; CHECK: // %bb.0: +; CHECK-NEXT: fmul z0.h, p0/m, z0.h, #2.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 2.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmul.nxv8f16( %pg, + %a, + %splat) + ret %out +} + +define @fmul_h_immtwo_acle_zero( %pg, %a) #1 { +; CHECK-LABEL: fmul_h_immtwo_acle_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fmul z0.h, p0/m, z0.h, #2.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 2.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmul.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fmul_s_immhalf( %a) #0 { +; CHECK-LABEL: fmul_s_immhalf: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmul z0.s, p0/m, z0.s, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fmul %a, %splat + ret %out +} + +define @fmul_s_immhalf_acle( %pg, %a) #0 { +; CHECK-LABEL: fmul_s_immhalf_acle: +; CHECK: // %bb.0: +; CHECK-NEXT: fmul z0.s, p0/m, z0.s, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmul.nxv4f32( %pg, + %a, + %splat) + ret %out +} + +define @fmul_s_immhalf_acle_zero( %pg, %a) #1 { +; CHECK-LABEL: fmul_s_immhalf_acle_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fmul z0.s, p0/m, z0.s, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmul.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fmul_s_immtwo( %a) #0 { +; CHECK-LABEL: fmul_s_immtwo: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd z0.s, z0.s, z0.s +; CHECK-NEXT: ret + %elt = insertelement undef, float 2.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fmul %a, %splat + ret %out +} + +define @fmul_s_immtwo_acle( %pg, %a) #0 { +; CHECK-LABEL: fmul_s_immtwo_acle: +; CHECK: // %bb.0: +; CHECK-NEXT: fmul z0.s, p0/m, z0.s, #2.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 2.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmul.nxv4f32( %pg, + %a, + %splat) + ret %out +} + +define @fmul_s_immtwo_acle_zero( %pg, %a) #1 { +; CHECK-LABEL: fmul_s_immtwo_acle_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fmul z0.s, p0/m, z0.s, #2.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 2.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmul.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fmul_d_immhalf( %a) #0 { +; CHECK-LABEL: fmul_d_immhalf: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmul z0.d, p0/m, z0.d, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fmul %a, %splat + ret %out +} + +define @fmul_d_immhalf_acle( %pg, %a) #0 { +; CHECK-LABEL: fmul_d_immhalf_acle: +; CHECK: // %bb.0: +; CHECK-NEXT: fmul z0.d, p0/m, z0.d, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmul.nxv2f64( %pg, + %a, + %splat) + ret %out +} + +define @fmul_d_immhalf_acle_zero( %pg, %a) #1 { +; CHECK-LABEL: fmul_d_immhalf_acle_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fmul z0.d, p0/m, z0.d, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmul.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fmul_d_immtwo( %a) #0 { +; CHECK-LABEL: fmul_d_immtwo: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd z0.d, z0.d, z0.d +; CHECK-NEXT: ret + %elt = insertelement undef, double 2.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fmul %a, %splat + ret %out +} + +define @fmul_d_immtwo_acle( %pg, %a) #0 { +; CHECK-LABEL: fmul_d_immtwo_acle: +; CHECK: // %bb.0: +; CHECK-NEXT: fmul z0.d, p0/m, z0.d, #2.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 2.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fmul.nxv2f64( %pg, + %a, + %splat) + ret %out +} + +define @fmul_d_immtwo_acle_zero( %pg, %a) #1 { +; CHECK-LABEL: fmul_d_immtwo_acle_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fmul z0.d, p0/m, z0.d, #2.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 2.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fmul.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +; +; FSUB +; + +define @fsub_h_immhalf( %a) #0 { +; CHECK-LABEL: fsub_h_immhalf: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fsub z0.h, p0/m, z0.h, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fsub %a, %splat + ret %out +} + +define @fsub_h_immhalf_acle( %pg, %a) #0 { +; CHECK-LABEL: fsub_h_immhalf_acle: +; CHECK: // %bb.0: +; CHECK-NEXT: fsub z0.h, p0/m, z0.h, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fsub.nxv8f16( %pg, + %a, + %splat) + ret %out +} + +define @fsub_h_immhalf_acle_zero( %pg, %a) #1 { +; CHECK-LABEL: fsub_h_immhalf_acle_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fsub z0.h, p0/m, z0.h, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fsub.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fsub_h_immone( %a) #0 { +; CHECK-LABEL: fsub_h_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fsub z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fsub %a, %splat + ret %out +} + +define @fsub_h_immone_acle( %pg, %a) #0 { +; CHECK-LABEL: fsub_h_immone_acle: +; CHECK: // %bb.0: +; CHECK-NEXT: fsub z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fsub.nxv8f16( %pg, + %a, + %splat) + ret %out +} + +define @fsub_h_immone_acle_zero( %pg, %a) #1 { +; CHECK-LABEL: fsub_h_immone_acle_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fsub z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fsub.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fsub_s_immhalf( %a) #0 { +; CHECK-LABEL: fsub_s_immhalf: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fsub z0.s, p0/m, z0.s, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fsub %a, %splat + ret %out +} + +define @fsub_s_immhalf_acle( %pg, %a) #0 { +; CHECK-LABEL: fsub_s_immhalf_acle: +; CHECK: // %bb.0: +; CHECK-NEXT: fsub z0.s, p0/m, z0.s, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fsub.nxv4f32( %pg, + %a, + %splat) + ret %out +} + +define @fsub_s_immhalf_acle_zero( %pg, %a) #1 { +; CHECK-LABEL: fsub_s_immhalf_acle_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fsub z0.s, p0/m, z0.s, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fsub.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fsub_s_immone( %a) #0 { +; CHECK-LABEL: fsub_s_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fsub z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fsub %a, %splat + ret %out +} + +define @fsub_s_immone_acle( %pg, %a) #0 { +; CHECK-LABEL: fsub_s_immone_acle: +; CHECK: // %bb.0: +; CHECK-NEXT: fsub z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fsub.nxv4f32( %pg, + %a, + %splat) + ret %out +} + +define @fsub_s_immone_acle_zero( %pg, %a) #1 { +; CHECK-LABEL: fsub_s_immone_acle_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fsub z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fsub.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fsub_d_immhalf( %a) #0 { +; CHECK-LABEL: fsub_d_immhalf: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fsub z0.d, p0/m, z0.d, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fsub %a, %splat + ret %out +} + +define @fsub_d_immhalf_acle( %pg, %a) #0 { +; CHECK-LABEL: fsub_d_immhalf_acle: +; CHECK: // %bb.0: +; CHECK-NEXT: fsub z0.d, p0/m, z0.d, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fsub.nxv2f64( %pg, + %a, + %splat) + ret %out +} + +define @fsub_d_immhalf_acle_zero( %pg, %a) #1 { +; CHECK-LABEL: fsub_d_immhalf_acle_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fsub z0.d, p0/m, z0.d, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fsub.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fsub_d_immone( %a) #0 { +; CHECK-LABEL: fsub_d_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fsub z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = fsub %a, %splat + ret %out +} + +define @fsub_d_immone_acle( %pg, %a) #0 { +; CHECK-LABEL: fsub_d_immone_acle: +; CHECK: // %bb.0: +; CHECK-NEXT: fsub z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.fsub.nxv2f64( %pg, + %a, + %splat) + ret %out +} + +define @fsub_d_immone_acle_zero( %pg, %a) #1 { +; CHECK-LABEL: fsub_d_immone_acle_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fsub z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fsub.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +; +; FSUBR +; + +define @fsubr_h_immhalf( %pg, %a) #1 { +; CHECK-LABEL: fsubr_h_immhalf: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fsubr z0.h, p0/m, z0.h, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, half 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fsubr.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fsubr_h_immone( %pg, %a) #1 { +; CHECK-LABEL: fsubr_h_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: fsubr z0.h, p0/m, z0.h, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, half 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fsubr.nxv8f16( %pg, + %a_z, + %splat) + ret %out +} + +define @fsubr_s_immhalf( %pg, %a) #1 { +; CHECK-LABEL: fsubr_s_immhalf: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fsubr z0.s, p0/m, z0.s, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, float 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fsubr.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fsubr_s_immone( %pg, %a) #1 { +; CHECK-LABEL: fsubr_s_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: fsubr z0.s, p0/m, z0.s, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, float 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fsubr.nxv4f32( %pg, + %a_z, + %splat) + ret %out +} + +define @fsubr_d_immhalf( %pg, %a) #1 { +; CHECK-LABEL: fsubr_d_immhalf: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fsubr z0.d, p0/m, z0.d, #0.5 +; CHECK-NEXT: ret + %elt = insertelement undef, double 0.500000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fsubr.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +define @fsubr_d_immone( %pg, %a) #1 { +; CHECK-LABEL: fsubr_d_immone: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: fsubr z0.d, p0/m, z0.d, #1.0 +; CHECK-NEXT: ret + %elt = insertelement undef, double 1.000000e+00, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %a_z = select %pg, %a, zeroinitializer + %out = call @llvm.aarch64.sve.fsubr.nxv2f64( %pg, + %a_z, + %splat) + ret %out +} + +;; Arithmetic intrinsic declarations + +declare @llvm.maximum.nxv8f16(, ) +declare @llvm.maximum.nxv4f32(, ) +declare @llvm.maximum.nxv2f64(, ) + +declare @llvm.maxnum.nxv8f16(, ) +declare @llvm.maxnum.nxv4f32(, ) +declare @llvm.maxnum.nxv2f64(, ) + +declare @llvm.minimum.nxv8f16(, ) +declare @llvm.minimum.nxv4f32(, ) +declare @llvm.minimum.nxv2f64(, ) + +declare @llvm.minnum.nxv8f16(, ) +declare @llvm.minnum.nxv4f32(, ) +declare @llvm.minnum.nxv2f64(, ) + +declare @llvm.aarch64.sve.fadd.nxv8f16(, , ) +declare @llvm.aarch64.sve.fadd.nxv4f32(, , ) +declare @llvm.aarch64.sve.fadd.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fmax.nxv8f16(, , ) +declare @llvm.aarch64.sve.fmax.nxv4f32(, , ) +declare @llvm.aarch64.sve.fmax.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fmaxnm.nxv8f16(, , ) +declare @llvm.aarch64.sve.fmaxnm.nxv4f32(, , ) +declare @llvm.aarch64.sve.fmaxnm.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fmin.nxv8f16(, , ) +declare @llvm.aarch64.sve.fmin.nxv4f32(, , ) +declare @llvm.aarch64.sve.fmin.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fminnm.nxv8f16(, , ) +declare @llvm.aarch64.sve.fminnm.nxv4f32(, , ) +declare @llvm.aarch64.sve.fminnm.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fmul.nxv8f16(, , ) +declare @llvm.aarch64.sve.fmul.nxv4f32(, , ) +declare @llvm.aarch64.sve.fmul.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fsub.nxv8f16(, , ) +declare @llvm.aarch64.sve.fsub.nxv4f32(, , ) +declare @llvm.aarch64.sve.fsub.nxv2f64(, , ) + +declare @llvm.aarch64.sve.fsubr.nxv8f16(, , ) +declare @llvm.aarch64.sve.fsubr.nxv4f32(, , ) +declare @llvm.aarch64.sve.fsubr.nxv2f64(, , ) + +attributes #0 = { "target-features"="+sve" } +attributes #1 = { "target-features"="+sve,+use-experimental-zeroing-pseudos" }