diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -323,7 +323,7 @@ defm ADD_ZI : sve_int_arith_imm0<0b000, "add", add>; defm SUB_ZI : sve_int_arith_imm0<0b001, "sub", sub>; - defm SUBR_ZI : sve_int_arith_imm0_subr<0b011, "subr", sub>; + defm SUBR_ZI : sve_int_arith_imm0_subr<0b011, "subr", sub, int_aarch64_sve_subr>; defm SQADD_ZI : sve_int_arith_imm0<0b100, "sqadd", saddsat>; defm UQADD_ZI : sve_int_arith_imm0<0b101, "uqadd", uaddsat>; defm SQSUB_ZI : sve_int_arith_imm0<0b110, "sqsub", ssubsat>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -366,6 +366,11 @@ : Pat<(vt (op (vt zprty:$Op1), (vt (AArch64dup (it (cpx i32:$imm, i32:$shift)))))), (inst $Op1, i32:$imm, i32:$shift)>; +class SVE_1_Op_Imm_OptLsl_Pat_All_Active + : Pat<(vt (op (pt (SVEAllActive)), (vt zprty:$Op1), (vt (AArch64dup (it (cpx i32:$imm, i32:$shift)))))), + (inst $Op1, i32:$imm, i32:$shift)>; + class SVE_1_Op_Imm_Arith_All_Active : Pat<(vt (op (pt (SVEAllActive)), (vt zprty:$Op1), (vt (AArch64dup (it (cpx i32:$imm)))))), @@ -4171,7 +4176,7 @@ def : SVE_1_Op_Imm_OptLsl_Pat(NAME # _D)>; } -multiclass sve_int_arith_imm0_subr opc, string asm, SDPatternOperator op> { +multiclass sve_int_arith_imm0_subr opc, string asm, SDPatternOperator op, SDPatternOperator int_op> { def _B : sve_int_arith_imm0<0b00, opc, asm, ZPR8, addsub_imm8_opt_lsl_i8>; def _H : sve_int_arith_imm0<0b01, opc, asm, ZPR16, addsub_imm8_opt_lsl_i16>; def _S : sve_int_arith_imm0<0b10, opc, asm, ZPR32, addsub_imm8_opt_lsl_i32>; @@ -4181,6 +4186,11 @@ def : SVE_1_Op_Imm_OptLsl_Reverse_Pat(NAME # _H)>; def : SVE_1_Op_Imm_OptLsl_Reverse_Pat(NAME # _S)>; def : SVE_1_Op_Imm_OptLsl_Reverse_Pat(NAME # _D)>; + + def : SVE_1_Op_Imm_OptLsl_Pat_All_Active(NAME # _B)>; + def : SVE_1_Op_Imm_OptLsl_Pat_All_Active(NAME # _H)>; + def : SVE_1_Op_Imm_OptLsl_Pat_All_Active(NAME # _S)>; + def : SVE_1_Op_Imm_OptLsl_Pat_All_Active(NAME # _D)>; } class sve_int_arith_imm sz8_64, bits<6> opc, string asm, diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll @@ -260,6 +260,162 @@ ret %out } +; SUBR + +define @subr_i8( %a) { +; CHECK-LABEL: subr_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: subr z0.b, z0.b, #127 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %elt = insertelement undef, i8 127, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.subr.nxv16i8( %pg, + %a, + %splat) + ret %out +} + +define @subr_i16( %a) { +; CHECK-LABEL: subr_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: subr z0.h, z0.h, #127 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %elt = insertelement undef, i16 127, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.subr.nxv8i16( %pg, + %a, + %splat) + ret %out +} + +define @subr_i16_out_of_range( %a) { +; CHECK-LABEL: subr_i16_out_of_range: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #257 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: subr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %elt = insertelement undef, i16 257, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.subr.nxv8i16( %pg, + %a, + %splat) + ret %out +} + +define @subr_i32( %a) { +; CHECK-LABEL: subr_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: subr z0.s, z0.s, #127 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %elt = insertelement undef, i32 127, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.subr.nxv4i32( %pg, + %a, + %splat) + ret %out +} + +define @subr_i32_out_of_range( %a) { +; CHECK-LABEL: subr_i32_out_of_range: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #257 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: subr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %elt = insertelement undef, i32 257, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.subr.nxv4i32( %pg, + %a, + %splat) + ret %out +} + +define @subr_i64( %a) { +; CHECK-LABEL: subr_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: subr z0.d, z0.d, #127 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %elt = insertelement undef, i64 127, i64 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.subr.nxv2i64( %pg, + %a, + %splat) + ret %out +} + +define @subr_i64_out_of_range( %a) { +; CHECK-LABEL: subr_i64_out_of_range: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #257 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: subr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %elt = insertelement undef, i64 257, i64 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.subr.nxv2i64( %pg, + %a, + %splat) + ret %out +} + +; As subr_i32 but where pg is i8 based and thus compatible for i32. +define @subr_i32_ptrue_all_b( %a) #0 { +; CHECK-LABEL: subr_i32_ptrue_all_b: +; CHECK: subr z0.s, z0.s, #1 +; CHECK-NEXT: ret + %pg.b = tail call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %pg.s = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.b) + %b = tail call @llvm.aarch64.sve.dup.x.nxv4i32(i32 1) + %out = tail call @llvm.aarch64.sve.subr.nxv4i32( %pg.s, + %a, + %b) + ret %out +} + +; As subr_i32 but where pg is i16 based and thus compatible for i32. +define @subr_i32_ptrue_all_h( %a) #0 { +; CHECK-LABEL: subr_i32_ptrue_all_h: +; CHECK: subr z0.s, z0.s, #1 +; CHECK-NEXT: ret + %pg.h = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %pg.b = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %pg.h) + %pg.s = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.b) + %b = tail call @llvm.aarch64.sve.dup.x.nxv4i32(i32 1) + %out = tail call @llvm.aarch64.sve.subr.nxv4i32( %pg.s, + %a, + %b) + ret %out +} + +; As subr_i32 but where pg is i64 based, which is not compatibile for i32 and +; thus inactive lanes are important and the immediate form cannot be used. +define @subr_i32_ptrue_all_d( %a) #0 { +; CHECK-LABEL: subr_i32_ptrue_all_d: +; CHECK-DAG: ptrue [[PG:p[0-9]+]].d +; CHECK-DAG: mov [[DUP:z[0-9]+]].s, #1 +; CHECK-DAG: subr z0.s, [[PG]]/m, z0.s, [[DUP]].s +; CHECK-NEXT: ret + %pg.d = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %pg.b = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %pg.d) + %pg.s = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( %pg.b) + %b = tail call @llvm.aarch64.sve.dup.x.nxv4i32(i32 1) + %out = tail call @llvm.aarch64.sve.subr.nxv4i32( %pg.s, + %a, + %b) + ret %out +} + ; SMAX define @smax_i8( %a) { @@ -1873,6 +2029,11 @@ declare @llvm.aarch64.sve.sub.nxv4i32(, , ) declare @llvm.aarch64.sve.sub.nxv2i64(, , ) +declare @llvm.aarch64.sve.subr.nxv16i8(, , ) +declare @llvm.aarch64.sve.subr.nxv8i16(, , ) +declare @llvm.aarch64.sve.subr.nxv4i32(, , ) +declare @llvm.aarch64.sve.subr.nxv2i64(, , ) + declare @llvm.aarch64.sve.sqadd.x.nxv16i8(, ) declare @llvm.aarch64.sve.sqadd.x.nxv8i16(, ) declare @llvm.aarch64.sve.sqadd.x.nxv4i32(, )