diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -3162,8 +3162,8 @@ // SVE2 bitwise shift right and accumulate defm SSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b00, "ssra", AArch64ssra>; defm USRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b01, "usra", AArch64usra>; - defm SRSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b10, "srsra", int_aarch64_sve_srsra>; - defm URSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b11, "ursra", int_aarch64_sve_ursra>; + defm SRSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b10, "srsra", int_aarch64_sve_srsra, int_aarch64_sve_srshr>; + defm URSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b11, "ursra", int_aarch64_sve_ursra, int_aarch64_sve_urshr>; // SVE2 complex integer add defm CADD_ZZI : sve2_int_cadd<0b0, "cadd", int_aarch64_sve_cadd_x>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -527,6 +527,12 @@ : Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3)), (inst $Op2, $Op3, $Op1)>; +class SVE_Shift_Add_All_Active_Pat +: Pat<(vtd (add vt1:$Op1, (op (pt (SVEAllActive)), vt2:$Op2, vt3:$Op3))), + (inst $Op1, $Op2, $Op3)>; + //===----------------------------------------------------------------------===// // SVE pattern match helpers. //===----------------------------------------------------------------------===// @@ -3802,7 +3808,8 @@ } multiclass sve2_int_bin_accum_shift_imm_right opc, string asm, - SDPatternOperator op> { + SDPatternOperator op, + SDPatternOperator shift_op = null_frag> { def _B : sve2_int_bin_accum_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>; def _H : sve2_int_bin_accum_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> { let Inst{19} = imm{3}; @@ -3819,6 +3826,11 @@ def : SVE_3_Op_Imm_Pat(NAME # _H)>; def : SVE_3_Op_Imm_Pat(NAME # _S)>; def : SVE_3_Op_Imm_Pat(NAME # _D)>; + + def : SVE_Shift_Add_All_Active_Pat(NAME # _B)>; + def : SVE_Shift_Add_All_Active_Pat(NAME # _H)>; + def : SVE_Shift_Add_All_Active_Pat(NAME # _S)>; + def : SVE_Shift_Add_All_Active_Pat(NAME # _D)>; } class sve2_int_cadd sz, bit opc, string asm, ZPRRegOp zprty> diff --git a/llvm/test/CodeGen/AArch64/sve2-rsra.ll b/llvm/test/CodeGen/AArch64/sve2-rsra.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2-rsra.ll @@ -0,0 +1,114 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +; URSRA + +define @ursra_i8( %a, %b) #0 { +; CHECK-LABEL: ursra_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ursra z0.b, z1.b, #1 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %shift = call @llvm.aarch64.sve.urshr.nxv16i8( %pg, %b, i32 1) + %add = add %a, %shift + ret %add +} + +define @ursra_i16( %a, %b) #0 { +; CHECK-LABEL: ursra_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ursra z0.h, z1.h, #2 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %shift = call @llvm.aarch64.sve.urshr.nxv8i16( %pg, %b, i32 2) + %add = add %a, %shift + ret %add +} + +define @ursra_i32( %a, %b) #0 { +; CHECK-LABEL: ursra_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ursra z0.s, z1.s, #3 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %shift = call @llvm.aarch64.sve.urshr.nxv4i32( %pg, %b, i32 3) + %add = add %a, %shift + ret %add +} + +define @ursra_i64( %a, %b) #0 { +; CHECK-LABEL: ursra_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ursra z0.d, z1.d, #4 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %shift = call @llvm.aarch64.sve.urshr.nxv2i64( %pg, %b, i32 4) + %add = add %a, %shift + ret %add +} + +; SRSRA + +define @srsra_i8( %a, %b) #0 { +; CHECK-LABEL: srsra_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: srsra z0.b, z1.b, #1 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) + %shift = call @llvm.aarch64.sve.srshr.nxv16i8( %pg, %b, i32 1) + %add = add %a, %shift + ret %add +} + +define @srsra_i16( %a, %b) #0 { +; CHECK-LABEL: srsra_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: srsra z0.h, z1.h, #2 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %shift = call @llvm.aarch64.sve.srshr.nxv8i16( %pg, %b, i32 2) + %add = add %a, %shift + ret %add +} + +define @srsra_i32( %a, %b) #0 { +; CHECK-LABEL: srsra_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: srsra z0.s, z1.s, #3 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %shift = call @llvm.aarch64.sve.srshr.nxv4i32( %pg, %b, i32 3) + %add = add %a, %shift + ret %add +} + +define @srsra_i64( %a, %b) #0 { +; CHECK-LABEL: srsra_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: srsra z0.d, z1.d, #4 +; CHECK-NEXT: ret + %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %shift = call @llvm.aarch64.sve.srshr.nxv2i64( %pg, %b, i32 4) + %add = add %a, %shift + ret %add +} + + +declare @llvm.aarch64.sve.ptrue.nxv16i1(i32 immarg) +declare @llvm.aarch64.sve.ptrue.nxv8i1(i32 immarg) +declare @llvm.aarch64.sve.ptrue.nxv4i1(i32 immarg) +declare @llvm.aarch64.sve.ptrue.nxv2i1(i32 immarg) + +declare @llvm.aarch64.sve.urshr.nxv16i8(, , i32) +declare @llvm.aarch64.sve.urshr.nxv8i16(, , i32) +declare @llvm.aarch64.sve.urshr.nxv4i32(, , i32) +declare @llvm.aarch64.sve.urshr.nxv2i64(, , i32) + +declare @llvm.aarch64.sve.srshr.nxv16i8(, , i32) +declare @llvm.aarch64.sve.srshr.nxv8i16(, , i32) +declare @llvm.aarch64.sve.srshr.nxv4i32(, , i32) +declare @llvm.aarch64.sve.srshr.nxv2i64(, , i32) + +attributes #0 = { "target-features"="+sve,+sve2" }