diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -2529,7 +2529,7 @@ let Inst{4-0} = Zd; let Constraints = "$Zd = $_Zd"; - let DestructiveInstType = DestructiveOther; + let DestructiveInstType = DestructiveUnaryPassthru; let ElementSize = Sz; } @@ -2578,9 +2578,12 @@ } multiclass sve_fp_2op_p_zd_HSD opc, string asm, SDPatternOperator op> { - def _H : sve_fp_2op_p_zd<{ 0b01, opc }, asm, ZPR16, ZPR16, ElementSizeH>; - def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32, ElementSizeS>; - def _D : sve_fp_2op_p_zd<{ 0b11, opc }, asm, ZPR64, ZPR64, ElementSizeD>; + def _H : sve_fp_2op_p_zd<{ 0b01, opc }, asm, ZPR16, ZPR16, ElementSizeH>, + SVEPseudo2Instr; + def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32, ElementSizeS>, + SVEPseudo2Instr; + def _D : sve_fp_2op_p_zd<{ 0b11, opc }, asm, ZPR64, ZPR64, ElementSizeD>, + SVEPseudo2Instr; def : SVE_1_Op_Passthru_Pat(NAME # _H)>; def : SVE_1_Op_Passthru_Pat(NAME # _H)>; @@ -2588,6 +2591,17 @@ def : SVE_1_Op_Passthru_Pat(NAME # _S)>; def : SVE_1_Op_Passthru_Pat(NAME # _S)>; def : SVE_1_Op_Passthru_Pat(NAME # _D)>; + + def _UNDEF_H : PredOneOpPassthruPseudo; + def _UNDEF_S : PredOneOpPassthruPseudo; + def _UNDEF_D : PredOneOpPassthruPseudo; + + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_H)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_H)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_H)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_S)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_S)>; + defm : SVE_1_Op_PassthruUndef_Pat(NAME # _UNDEF_D)>; } multiclass sve2_fp_flogb { diff --git a/llvm/test/CodeGen/AArch64/sve-unary-movprfx.ll b/llvm/test/CodeGen/AArch64/sve-unary-movprfx.ll --- a/llvm/test/CodeGen/AArch64/sve-unary-movprfx.ll +++ b/llvm/test/CodeGen/AArch64/sve-unary-movprfx.ll @@ -604,6 +604,180 @@ ret %ret } +; +; FSQRT (sve_fp_2op_p_zd_HSD) +; + +define @fsqrt_f16( %a, %b) #0 { +; CHECK-LABEL: fsqrt_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fsqrt z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %ret = tail call @llvm.sqrt.nxv8f16( %b) + ret %ret +} + +define @fsqrt_f16_dupreg( %a) #0 { +; CHECK-LABEL: fsqrt_f16_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: fsqrt z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %ret = tail call @llvm.sqrt.nxv8f16( %a) + ret %ret +} + +define @fsqrt_f16_undef( %a, %b) #0 { +; CHECK-LABEL: fsqrt_f16_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fsqrt z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %ret = tail call @llvm.aarch64.sve.fsqrt.nxv8f16( undef, %pg, %b) + ret %ret +} + +define @fsqrt_f16_active( %a, %b) #0 { +; CHECK-LABEL: fsqrt_f16_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fsqrt z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %ret = tail call @llvm.aarch64.sve.fsqrt.nxv8f16( %a, %pg, %b) + ret %ret +} + +define @fsqrt_f16_not_active( %a, %b) #0 { +; CHECK-LABEL: fsqrt_f16_not_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fsqrt z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %pg.to = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg) + %pg.from = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg.to) + %ret = tail call @llvm.aarch64.sve.fsqrt.nxv8f16( %a, %pg.from, %b) + ret %ret +} + +define @fsqrt_f32( %a, %b) #0 { +; CHECK-LABEL: fsqrt_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fsqrt z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %ret = tail call @llvm.sqrt.nxv4f32( %b) + ret %ret +} + +define @fsqrt_f32_dupreg( %a) #0 { +; CHECK-LABEL: fsqrt_f32_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fsqrt z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %ret = tail call @llvm.sqrt.nxv4f32( %a) + ret %ret +} + +define @fsqrt_f32_undef( %a, %b) #0 { +; CHECK-LABEL: fsqrt_f32_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fsqrt z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.fsqrt.nxv4f32( undef, %pg, %b) + ret %ret +} + +define @fsqrt_f32_active( %a, %b) #0 { +; CHECK-LABEL: fsqrt_f32_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fsqrt z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %ret = tail call @llvm.aarch64.sve.fsqrt.nxv4f32( %a, %pg, %b) + ret %ret +} + +define @fsqrt_f32_not_active( %a, %b) #0 { +; CHECK-LABEL: fsqrt_f32_not_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fsqrt z0.s, p0/m, z1.s +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %pg.to = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg) + %pg.from = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.to) + %ret = tail call @llvm.aarch64.sve.fsqrt.nxv4f32( %a, %pg.from, %b) + ret %ret +} + +define @fsqrt_f64( %a, %b) #0 { +; CHECK-LABEL: fsqrt_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fsqrt z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %ret = tail call @llvm.sqrt.nxv2f64( %b) + ret %ret +} + +define @fsqrt_f64_dupreg( %a) #0 { +; CHECK-LABEL: fsqrt_f64_dupreg: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fsqrt z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %ret = tail call @llvm.sqrt.nxv2f64( %a) + ret %ret +} + +define @fsqrt_f64_undef( %a, %b) #0 { +; CHECK-LABEL: fsqrt_f64_undef: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fsqrt z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.fsqrt.nxv2f64( undef, %pg, %b) + ret %ret +} + +define @fsqrt_f64_active( %a, %b) #0 { +; CHECK-LABEL: fsqrt_f64_active: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fsqrt z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %pg = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %ret = tail call @llvm.aarch64.sve.fsqrt.nxv2f64( %a, %pg, %b) + ret %ret +} + +define @fsqrt_f64_not_active( %a, %b, %pg) #0 { +; CHECK-LABEL: fsqrt_f64_not_active: +; CHECK: // %bb.0: +; CHECK-NEXT: fsqrt z0.d, p0/m, z1.d +; CHECK-NEXT: ret + %ret = tail call @llvm.aarch64.sve.fsqrt.nxv2f64( %a, %pg, %b) + ret %ret +} + ; ; SXTB (sve_int_un_pred_arit_0_h) ; @@ -988,6 +1162,14 @@ declare @llvm.fabs.nxv4f32() declare @llvm.fabs.nxv2f64() +declare @llvm.aarch64.sve.fsqrt.nxv8f16(, , ) +declare @llvm.aarch64.sve.fsqrt.nxv4f32(, , ) +declare @llvm.aarch64.sve.fsqrt.nxv2f64(, , ) + +declare @llvm.sqrt.nxv8f16() +declare @llvm.sqrt.nxv4f32() +declare @llvm.sqrt.nxv2f64() + declare @llvm.aarch64.sve.sxtb.nxv8i16(, , ) declare @llvm.aarch64.sve.sxtb.nxv4i32(, , ) declare @llvm.aarch64.sve.sxtb.nxv2i64(, , )