diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -2518,18 +2518,25 @@ defm UQSUBR_ZPmZ : sve2_int_arith_pred<0b111110, "uqsubr", int_aarch64_sve_uqsubr>; // SVE2 saturating/rounding bitwise shift left (predicated) - defm SRSHL_ZPmZ : sve2_int_arith_pred<0b000100, "srshl", int_aarch64_sve_srshl>; - defm URSHL_ZPmZ : sve2_int_arith_pred<0b000110, "urshl", int_aarch64_sve_urshl>; - defm SRSHLR_ZPmZ : sve2_int_arith_pred<0b001100, "srshlr", null_frag>; - defm URSHLR_ZPmZ : sve2_int_arith_pred<0b001110, "urshlr", null_frag>; - defm SQSHL_ZPmZ : sve2_int_arith_pred<0b010000, "sqshl", int_aarch64_sve_sqshl>; - defm UQSHL_ZPmZ : sve2_int_arith_pred<0b010010, "uqshl", int_aarch64_sve_uqshl>; - defm SQRSHL_ZPmZ : sve2_int_arith_pred<0b010100, "sqrshl", int_aarch64_sve_sqrshl>; - defm UQRSHL_ZPmZ : sve2_int_arith_pred<0b010110, "uqrshl", int_aarch64_sve_uqrshl>; - defm SQSHLR_ZPmZ : sve2_int_arith_pred<0b011000, "sqshlr", null_frag>; - defm UQSHLR_ZPmZ : sve2_int_arith_pred<0b011010, "uqshlr", null_frag>; - defm SQRSHLR_ZPmZ : sve2_int_arith_pred<0b011100, "sqrshlr", null_frag>; - defm UQRSHLR_ZPmZ : sve2_int_arith_pred<0b011110, "uqrshlr", null_frag>; + defm SRSHL_ZPmZ : sve2_int_arith_pred<0b000100, "srshl", int_aarch64_sve_srshl, "SRSHL_ZPZZ", DestructiveBinaryCommWithRev, "SRSHLR_ZPmZ">; + defm URSHL_ZPmZ : sve2_int_arith_pred<0b000110, "urshl", int_aarch64_sve_urshl, "URSHL_ZPZZ", DestructiveBinaryCommWithRev, "URSHLR_ZPmZ">; + defm SRSHLR_ZPmZ : sve2_int_arith_pred<0b001100, "srshlr", null_frag, "SRSHLR_ZPZZ", DestructiveBinaryCommWithRev, "SRSHL_ZPmZ", /*isReverseInstr*/ 1>; + defm URSHLR_ZPmZ : sve2_int_arith_pred<0b001110, "urshlr", null_frag, "URSHLR_ZPZZ", DestructiveBinaryCommWithRev, "URSHL_ZPmZ", /*isReverseInstr*/ 1>; + defm SQSHL_ZPmZ : sve2_int_arith_pred<0b010000, "sqshl", int_aarch64_sve_sqshl, "SQSHL_ZPZZ", DestructiveBinaryCommWithRev, "SQSHLR_ZPmZ">; + defm UQSHL_ZPmZ : sve2_int_arith_pred<0b010010, "uqshl", int_aarch64_sve_uqshl, "UQSHL_ZPZZ", DestructiveBinaryCommWithRev, "UQSHLR_ZPmZ">; + defm SQRSHL_ZPmZ : sve2_int_arith_pred<0b010100, "sqrshl", int_aarch64_sve_sqrshl, "SQRSHL_ZPZZ", DestructiveBinaryCommWithRev, "SQRSHLR_ZPmZ">; + defm UQRSHL_ZPmZ : sve2_int_arith_pred<0b010110, "uqrshl", int_aarch64_sve_uqrshl, "UQRSHL_ZPZZ", DestructiveBinaryCommWithRev, "UQRSHLR_ZPmZ">; + defm SQSHLR_ZPmZ : sve2_int_arith_pred<0b011000, "sqshlr", null_frag, "SQSHLR_ZPZZ", DestructiveBinaryCommWithRev, "SQSHL_ZPmZ", /*isReverseInstr*/ 1>; + defm UQSHLR_ZPmZ : sve2_int_arith_pred<0b011010, "uqshlr", null_frag, "UQSHLR_ZPZZ", DestructiveBinaryCommWithRev, "UQSHL_ZPmZ", /*isReverseInstr*/ 1>; + defm SQRSHLR_ZPmZ : sve2_int_arith_pred<0b011100, "sqrshlr", null_frag, "SQRSHLR_ZPZZ", DestructiveBinaryCommWithRev, "SQRSHL_ZPmZ", /*isReverseInstr*/ 1>; + defm UQRSHLR_ZPmZ : sve2_int_arith_pred<0b011110, "uqrshlr", null_frag, "UQRSHLR_ZPZZ", DestructiveBinaryCommWithRev, "UQRSHL_ZPmZ", /*isReverseInstr*/ 1>; + + defm SRSHL_ZPZZ : sve2_int_bin_pred_all_active_bhsd; + defm URSHL_ZPZZ : sve2_int_bin_pred_all_active_bhsd; + defm SQSHL_ZPZZ : sve2_int_bin_pred_all_active_bhsd; + defm UQSHL_ZPZZ : sve2_int_bin_pred_all_active_bhsd; + defm SQRSHL_ZPZZ : sve2_int_bin_pred_all_active_bhsd; + defm UQRSHL_ZPZZ : sve2_int_bin_pred_all_active_bhsd; let Predicates = [HasSVE2, UseExperimentalZeroingPseudos] in { defm SQSHL_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -372,6 +372,12 @@ : Pat<(vtd (op (pt (SVEAllActive)), vt1:$Op1, vt2:$Op2)), (inst $Op1, $Op2)>; +class SVE_2_Op_Pred_All_Active_Pt +: Pat<(vtd (op (pt (SVEAllActive:$Op1)), vt1:$Op2, vt2:$Op3)), + (inst $Op1, $Op2, $Op3)>; + class SVE_3_Op_Pat : Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3)), @@ -3167,11 +3173,20 @@ let ElementSize = zprty.ElementSize; } -multiclass sve2_int_arith_pred opc, string asm, SDPatternOperator op> { - def _B : sve2_int_arith_pred<0b00, opc, asm, ZPR8>; - def _H : sve2_int_arith_pred<0b01, opc, asm, ZPR16>; - def _S : sve2_int_arith_pred<0b10, opc, asm, ZPR32>; - def _D : sve2_int_arith_pred<0b11, opc, asm, ZPR64>; +multiclass sve2_int_arith_pred opc, string asm, SDPatternOperator op, + string Ps = "", + DestructiveInstTypeEnum flags=DestructiveOther, + string revname="", bit isReverseInstr=0> { + let DestructiveInstType = flags in { + def _B : sve2_int_arith_pred<0b00, opc, asm, ZPR8>, + SVEPseudo2Instr, SVEInstr2Rev; + def _H : sve2_int_arith_pred<0b01, opc, asm, ZPR16>, + SVEPseudo2Instr, SVEInstr2Rev; + def _S : sve2_int_arith_pred<0b10, opc, asm, ZPR32>, + SVEPseudo2Instr, SVEInstr2Rev; + def _D : sve2_int_arith_pred<0b11, opc, asm, ZPR64>, + SVEPseudo2Instr, SVEInstr2Rev; + } def : SVE_3_Op_Pat(NAME # _B)>; def : SVE_3_Op_Pat(NAME # _H)>; @@ -3179,6 +3194,18 @@ def : SVE_3_Op_Pat(NAME # _D)>; } +multiclass sve2_int_bin_pred_all_active_bhsd { + def _UNDEF_B : PredTwoOpPseudo; + def _UNDEF_H : PredTwoOpPseudo; + def _UNDEF_S : PredTwoOpPseudo; + def _UNDEF_D : PredTwoOpPseudo; + + def : SVE_2_Op_Pred_All_Active_Pt(NAME # _UNDEF_B)>; + def : SVE_2_Op_Pred_All_Active_Pt(NAME # _UNDEF_H)>; + def : SVE_2_Op_Pred_All_Active_Pt(NAME # _UNDEF_S)>; + def : SVE_2_Op_Pred_All_Active_Pt(NAME # _UNDEF_D)>; +} + class sve2_int_sadd_long_accum_pairwise sz, bit U, string asm, ZPRRegOp zprty1, ZPRRegOp zprty2> : I<(outs zprty1:$Zda), (ins PPR3bAny:$Pg, zprty1:$_Zda, zprty2:$Zn), diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll --- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll @@ -706,6 +706,69 @@ ret %out } +; +; SQRSHLR +; + +define @sqrshlr_i8( %a, %b) { +; CHECK-LABEL: sqrshlr_i8: +; CHECK: ptrue p0.b +; CHECK-NEXT: sqrshlr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %out = call @llvm.aarch64.sve.sqrshl.nxv16i8( %pg, + %b, + %a) + ret %out +} + +define @sqrshlr_i16( %a, %b) { +; CHECK-LABEL: sqrshlr_i16: +; CHECK: ptrue p0.h +; CHECK-NEXT: sqrshlr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %out = call @llvm.aarch64.sve.sqrshl.nxv8i16( %pg, + %b, + %a) + ret %out +} + +define @sqrshlr_i32( %a, %b) { +; CHECK-LABEL: sqrshlr_i32: +; CHECK: ptrue p0.s +; CHECK-NEXT: sqrshlr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %out = call @llvm.aarch64.sve.sqrshl.nxv4i32( %pg, + %b, + %a) + ret %out +} + +define @sqrshlr_i64( %a, %b) { +; CHECK-LABEL: sqrshlr_i64: +; CHECK: ptrue p0.d +; CHECK-NEXT: sqrshlr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %out = call @llvm.aarch64.sve.sqrshl.nxv2i64( %pg, + %b, + %a) + ret %out +} + +define @sqrshlr_i64_noptrue( %pg, %a, %b) { +; CHECK-LABEL: sqrshlr_i64_noptrue: +; CHECK: sqrshl z1.d, p0/m, z1.d, z0.d +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqrshl.nxv2i64( %pg, + %b, + %a) + ret %out +} + ; ; SQSHL (Vectors) ; @@ -750,6 +813,69 @@ ret %out } +; +; SQSHLR +; + +define @sqshlr_i8( %a, %b) { +; CHECK-LABEL: sqshlr_i8: +; CHECK: ptrue p0.b +; CHECK-NEXT: sqshlr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %out = call @llvm.aarch64.sve.sqshl.nxv16i8( %pg, + %b, + %a) + ret %out +} + +define @sqshlr_i16( %a, %b) { +; CHECK-LABEL: sqshlr_i16: +; CHECK: ptrue p0.h +; CHECK-NEXT: sqshlr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %out = call @llvm.aarch64.sve.sqshl.nxv8i16( %pg, + %b, + %a) + ret %out +} + +define @sqshlr_i32( %a, %b) { +; CHECK-LABEL: sqshlr_i32: +; CHECK: ptrue p0.s +; CHECK-NEXT: sqshlr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %out = call @llvm.aarch64.sve.sqshl.nxv4i32( %pg, + %b, + %a) + ret %out +} + +define @sqshlr_i64( %a, %b) { +; CHECK-LABEL: sqshlr_i64: +; CHECK: ptrue p0.d +; CHECK-NEXT: sqshlr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %out = call @llvm.aarch64.sve.sqshl.nxv2i64( %pg, + %b, + %a) + ret %out +} + +define @sqshlr_i64_noptrue( %pg, %a, %b) { +; CHECK-LABEL: sqshlr_i64_noptrue: +; CHECK: sqshl z1.d, p0/m, z1.d, z0.d +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.sqshl.nxv2i64( %pg, + %b, + %a) + ret %out +} + ; ; SQSHL (Scalar) ; @@ -1110,6 +1236,69 @@ ret %out } +; +; SRSHLR +; + +define @srshlr_i8( %a, %b) { +; CHECK-LABEL: srshlr_i8: +; CHECK: ptrue p0.b +; CHECK-NEXT: srshlr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %out = call @llvm.aarch64.sve.srshl.nxv16i8( %pg, + %b, + %a) + ret %out +} + +define @srshlr_i16( %a, %b) { +; CHECK-LABEL: srshlr_i16: +; CHECK: ptrue p0.h +; CHECK-NEXT: srshlr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %out = call @llvm.aarch64.sve.srshl.nxv8i16( %pg, + %b, + %a) + ret %out +} + +define @srshlr_i32( %a, %b) { +; CHECK-LABEL: srshlr_i32: +; CHECK: ptrue p0.s +; CHECK-NEXT: srshlr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %out = call @llvm.aarch64.sve.srshl.nxv4i32( %pg, + %b, + %a) + ret %out +} + +define @srshlr_i64( %a, %b) { +; CHECK-LABEL: srshlr_i64: +; CHECK: ptrue p0.d +; CHECK-NEXT: srshlr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %out = call @llvm.aarch64.sve.srshl.nxv2i64( %pg, + %b, + %a) + ret %out +} + +define @srshlr_i64_noptrue( %pg, %a, %b) { +; CHECK-LABEL: srshlr_i64_noptrue: +; CHECK: srshl z1.d, p0/m, z1.d, z0.d +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.srshl.nxv2i64( %pg, + %b, + %a) + ret %out +} + ; ; SRSHR ; @@ -1550,6 +1739,69 @@ ret %out } +; +; UQRSHLR +; + +define @uqrshlr_i8( %a, %b) { +; CHECK-LABEL: uqrshlr_i8: +; CHECK: ptrue p0.b +; CHECK-NEXT: uqrshlr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %out = call @llvm.aarch64.sve.uqrshl.nxv16i8( %pg, + %b, + %a) + ret %out +} + +define @uqrshlr_i16( %a, %b) { +; CHECK-LABEL: uqrshlr_i16: +; CHECK: ptrue p0.h +; CHECK-NEXT: uqrshlr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %out = call @llvm.aarch64.sve.uqrshl.nxv8i16( %pg, + %b, + %a) + ret %out +} + +define @uqrshlr_i32( %a, %b) { +; CHECK-LABEL: uqrshlr_i32: +; CHECK: ptrue p0.s +; CHECK-NEXT: uqrshlr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %out = call @llvm.aarch64.sve.uqrshl.nxv4i32( %pg, + %b, + %a) + ret %out +} + +define @uqrshlr_i64( %a, %b) { +; CHECK-LABEL: uqrshlr_i64: +; CHECK: ptrue p0.d +; CHECK-NEXT: uqrshlr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %out = call @llvm.aarch64.sve.uqrshl.nxv2i64( %pg, + %b, + %a) + ret %out +} + +define @uqrshlr_i64_noptrue( %pg, %a, %b) { +; CHECK-LABEL: uqrshlr_i64_noptrue: +; CHECK: uqrshl z1.d, p0/m, z1.d, z0.d +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqrshl.nxv2i64( %pg, + %b, + %a) + ret %out +} + ; ; UQSHL (Vectors) ; @@ -1594,6 +1846,69 @@ ret %out } +; +; UQSHLR +; + +define @uqshlr_i8( %a, %b) { +; CHECK-LABEL: uqshlr_i8: +; CHECK: ptrue p0.b +; CHECK-NEXT: uqshlr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %out = call @llvm.aarch64.sve.uqshl.nxv16i8( %pg, + %b, + %a) + ret %out +} + +define @uqshlr_i16( %a, %b) { +; CHECK-LABEL: uqshlr_i16: +; CHECK: ptrue p0.h +; CHECK-NEXT: uqshlr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %out = call @llvm.aarch64.sve.uqshl.nxv8i16( %pg, + %b, + %a) + ret %out +} + +define @uqshlr_i32( %a, %b) { +; CHECK-LABEL: uqshlr_i32: +; CHECK: ptrue p0.s +; CHECK-NEXT: uqshlr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %out = call @llvm.aarch64.sve.uqshl.nxv4i32( %pg, + %b, + %a) + ret %out +} + +define @uqshlr_i64( %a, %b) { +; CHECK-LABEL: uqshlr_i64: +; CHECK: ptrue p0.d +; CHECK-NEXT: uqshlr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %out = call @llvm.aarch64.sve.uqshl.nxv2i64( %pg, + %b, + %a) + ret %out +} + +define @uqshlr_i64_noptrue( %pg, %a, %b) { +; CHECK-LABEL: uqshlr_i64_noptrue: +; CHECK: uqshl z1.d, p0/m, z1.d, z0.d +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.uqshl.nxv2i64( %pg, + %b, + %a) + ret %out +} + ; ; UQSHL (Scalar) ; @@ -1880,6 +2195,69 @@ ret %out } +; +; URSHLR +; + +define @urshlr_i8( %a, %b) { +; CHECK-LABEL: urshlr_i8: +; CHECK: ptrue p0.b +; CHECK-NEXT: urshlr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %out = call @llvm.aarch64.sve.urshl.nxv16i8( %pg, + %b, + %a) + ret %out +} + +define @urshlr_i16( %a, %b) { +; CHECK-LABEL: urshlr_i16: +; CHECK: ptrue p0.h +; CHECK-NEXT: urshlr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %out = call @llvm.aarch64.sve.urshl.nxv8i16( %pg, + %b, + %a) + ret %out +} + +define @urshlr_i32( %a, %b) { +; CHECK-LABEL: urshlr_i32: +; CHECK: ptrue p0.s +; CHECK-NEXT: urshlr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %out = call @llvm.aarch64.sve.urshl.nxv4i32( %pg, + %b, + %a) + ret %out +} + +define @urshlr_i64( %a, %b) { +; CHECK-LABEL: urshlr_i64: +; CHECK: ptrue p0.d +; CHECK-NEXT: urshlr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %out = call @llvm.aarch64.sve.urshl.nxv2i64( %pg, + %b, + %a) + ret %out +} + +define @urshlr_i64_noptrue( %pg, %a, %b) { +; CHECK-LABEL: urshlr_i64_noptrue: +; CHECK: urshl z1.d, p0/m, z1.d, z0.d +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.urshl.nxv2i64( %pg, + %b, + %a) + ret %out +} + ; ; URSHR ; @@ -2289,3 +2667,8 @@ declare @llvm.aarch64.sve.usra.nxv8i16(, , i32) declare @llvm.aarch64.sve.usra.nxv4i32(, , i32) declare @llvm.aarch64.sve.usra.nxv2i64(, , i32) + +declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv8i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv4i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv2i1(i32)