diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -828,6 +828,17 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". +class AdvSIMD_SVE_Int_Reduce_Intrinsic + : Intrinsic<[llvm_anyint_ty], + [LLVMScalarOrSameVectorWidth<1, llvm_i1_ty>, + llvm_anyvector_ty], + [IntrNoMem]>; + +class AdvSIMD_SVE_SADDV_Reduce_Intrinsic + : Intrinsic<[llvm_i64_ty], + [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_anyvector_ty], + [IntrNoMem]>; class AdvSIMD_Pred2VectorArg_Intrinsic : Intrinsic<[llvm_anyvector_ty], @@ -875,6 +886,18 @@ def int_aarch64_sve_mla : AdvSIMD_Pred3VectorArg_Intrinsic; def int_aarch64_sve_mls : AdvSIMD_Pred3VectorArg_Intrinsic; +def int_aarch64_sve_saddv : AdvSIMD_SVE_SADDV_Reduce_Intrinsic; +def int_aarch64_sve_uaddv : AdvSIMD_SVE_SADDV_Reduce_Intrinsic; + +def int_aarch64_sve_smaxv : AdvSIMD_SVE_Int_Reduce_Intrinsic; +def int_aarch64_sve_umaxv : AdvSIMD_SVE_Int_Reduce_Intrinsic; +def int_aarch64_sve_sminv : AdvSIMD_SVE_Int_Reduce_Intrinsic; +def int_aarch64_sve_uminv : AdvSIMD_SVE_Int_Reduce_Intrinsic; + +def int_aarch64_sve_orv : AdvSIMD_SVE_Int_Reduce_Intrinsic; +def int_aarch64_sve_eorv : AdvSIMD_SVE_Int_Reduce_Intrinsic; +def int_aarch64_sve_andv : AdvSIMD_SVE_Int_Reduce_Intrinsic; + def int_aarch64_sve_abs : AdvSIMD_Merged1VectorArg_Intrinsic; def int_aarch64_sve_neg : AdvSIMD_Merged1VectorArg_Intrinsic; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -53,15 +53,15 @@ defm MLS_ZPmZZ : sve_int_mlas_vvv_pred<0b1, "mls", int_aarch64_sve_mls>; // SVE predicated integer reductions. - defm SADDV_VPZ : sve_int_reduce_0_saddv<0b000, "saddv">; - defm UADDV_VPZ : sve_int_reduce_0_uaddv<0b001, "uaddv">; - defm SMAXV_VPZ : sve_int_reduce_1<0b000, "smaxv">; - defm UMAXV_VPZ : sve_int_reduce_1<0b001, "umaxv">; - defm SMINV_VPZ : sve_int_reduce_1<0b010, "sminv">; - defm UMINV_VPZ : sve_int_reduce_1<0b011, "uminv">; - defm ORV_VPZ : sve_int_reduce_2<0b000, "orv">; - defm EORV_VPZ : sve_int_reduce_2<0b001, "eorv">; - defm ANDV_VPZ : sve_int_reduce_2<0b010, "andv">; + defm SADDV_VPZ : sve_int_reduce_0_saddv<0b000, "saddv", int_aarch64_sve_saddv>; + defm UADDV_VPZ : sve_int_reduce_0_uaddv<0b001, "uaddv", int_aarch64_sve_uaddv>; + defm SMAXV_VPZ : sve_int_reduce_1<0b000, "smaxv", int_aarch64_sve_smaxv>; + defm UMAXV_VPZ : sve_int_reduce_1<0b001, "umaxv", int_aarch64_sve_umaxv>; + defm SMINV_VPZ : sve_int_reduce_1<0b010, "sminv", int_aarch64_sve_sminv>; + defm UMINV_VPZ : sve_int_reduce_1<0b011, "uminv", int_aarch64_sve_uminv>; + defm ORV_VPZ : sve_int_reduce_2<0b000, "orv", int_aarch64_sve_orv>; + defm EORV_VPZ : sve_int_reduce_2<0b001, "eorv", int_aarch64_sve_eorv>; + defm ANDV_VPZ : sve_int_reduce_2<0b010, "andv", int_aarch64_sve_andv>; defm ORR_ZI : sve_int_log_imm<0b00, "orr", "orn">; defm EOR_ZI : sve_int_log_imm<0b01, "eor", "eon">; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -5693,31 +5693,46 @@ let Inst{4-0} = Vd; } -multiclass sve_int_reduce_0_saddv opc, string asm> { +multiclass sve_int_reduce_0_saddv opc, string asm, SDPatternOperator op> { def _B : sve_int_reduce<0b00, 0b00, opc, asm, ZPR8, FPR64>; def _H : sve_int_reduce<0b01, 0b00, opc, asm, ZPR16, FPR64>; def _S : sve_int_reduce<0b10, 0b00, opc, asm, ZPR32, FPR64>; + + def : SVE_2_Op_Pat(NAME # _B)>; + def : SVE_2_Op_Pat(NAME # _H)>; + def : SVE_2_Op_Pat(NAME # _S)>; } -multiclass sve_int_reduce_0_uaddv opc, string asm> { +multiclass sve_int_reduce_0_uaddv opc, string asm, SDPatternOperator op> { def _B : sve_int_reduce<0b00, 0b00, opc, asm, ZPR8, FPR64>; def _H : sve_int_reduce<0b01, 0b00, opc, asm, ZPR16, FPR64>; def _S : sve_int_reduce<0b10, 0b00, opc, asm, ZPR32, FPR64>; def _D : sve_int_reduce<0b11, 0b00, opc, asm, ZPR64, FPR64>; + + def : SVE_2_Op_Pat(NAME # _B)>; + def : SVE_2_Op_Pat(NAME # _H)>; + def : SVE_2_Op_Pat(NAME # _S)>; + def : SVE_2_Op_Pat(NAME # _D)>; } -multiclass sve_int_reduce_1 opc, string asm> { +multiclass sve_int_reduce_1 opc, string asm, SDPatternOperator op> { def _B : sve_int_reduce<0b00, 0b01, opc, asm, ZPR8, FPR8>; def _H : sve_int_reduce<0b01, 0b01, opc, asm, ZPR16, FPR16>; def _S : sve_int_reduce<0b10, 0b01, opc, asm, ZPR32, FPR32>; def _D : sve_int_reduce<0b11, 0b01, opc, asm, ZPR64, FPR64>; + + def : SVE_2_Op_Pat(NAME # _S)>; + def : SVE_2_Op_Pat(NAME # _D)>; } -multiclass sve_int_reduce_2 opc, string asm> { +multiclass sve_int_reduce_2 opc, string asm, SDPatternOperator op> { def _B : sve_int_reduce<0b00, 0b11, opc, asm, ZPR8, FPR8>; def _H : sve_int_reduce<0b01, 0b11, opc, asm, ZPR16, FPR16>; def _S : sve_int_reduce<0b10, 0b11, opc, asm, ZPR32, FPR32>; def _D : sve_int_reduce<0b11, 0b11, opc, asm, ZPR64, FPR64>; + + def : SVE_2_Op_Pat(NAME # _S)>; + def : SVE_2_Op_Pat(NAME # _D)>; } class sve_int_movprfx_pred sz8_32, bits<3> opc, string asm, diff --git a/llvm/test/CodeGen/AArch64/sve-int-reduce-pred.ll b/llvm/test/CodeGen/AArch64/sve-int-reduce-pred.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-int-reduce-pred.ll @@ -0,0 +1,237 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +define i64 @saddv_i8( %pg, %a) { +; CHECK-LABEL: saddv_i8: +; CHECK: saddv d[[REDUCE:[0-9]+]], p0, z0.b +; CHECK: fmov x0, d[[REDUCE]] +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.saddv.nxv16i8( %pg, + %a) + ret i64 %out +} + +define i64 @saddv_i16( %pg, %a) { +; CHECK-LABEL: saddv_i16: +; CHECK: saddv d[[REDUCE:[0-9]+]], p0, z0.h +; CHECK: fmov x0, d[[REDUCE]] +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.saddv.nxv8i16( %pg, + %a) + ret i64 %out +} + + +define i64 @saddv_i32( %pg, %a) { +; CHECK-LABEL: saddv_i32: +; CHECK: saddv d[[REDUCE:[0-9]+]], p0, z0.s +; CHECK: fmov x0, d[[REDUCE]] +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.saddv.nxv4i32( %pg, + %a) + ret i64 %out +} + +define i64 @uaddv_i8( %pg, %a) { +; CHECK-LABEL: uaddv_i8: +; CHECK: uaddv d[[REDUCE:[0-9]+]], p0, z0.b +; CHECK: fmov x0, d[[REDUCE]] +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.uaddv.nxv16i8( %pg, + %a) + ret i64 %out +} + +define i64 @uaddv_i16( %pg, %a) { +; CHECK-LABEL: uaddv_i16: +; CHECK: uaddv d[[REDUCE:[0-9]+]], p0, z0.h +; CHECK: fmov x0, d[[REDUCE]] +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.uaddv.nxv8i16( %pg, + %a) + ret i64 %out +} + + +define i64 @uaddv_i32( %pg, %a) { +; CHECK-LABEL: uaddv_i32: +; CHECK: uaddv d[[REDUCE:[0-9]+]], p0, z0.s +; CHECK: fmov x0, d[[REDUCE]] +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.uaddv.nxv4i32( %pg, + %a) + ret i64 %out +} + +define i64 @uaddv_i64( %pg, %a) { +; CHECK-LABEL: uaddv_i64: +; CHECK: uaddv d[[REDUCE:[0-9]+]], p0, z0.d +; CHECK: fmov x0, d[[REDUCE]] +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.uaddv.nxv2i64( %pg, + %a) + ret i64 %out +} + +define i32 @smaxv_i32( %pg, %a) { +; CHECK-LABEL: smaxv_i32: +; CHECK: smaxv s[[REDUCE:[0-9]+]], p0, z0.s +; CHECK: fmov w0, s[[REDUCE]] +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.smaxv.nxv4i32( %pg, + %a) + ret i32 %out +} + +define i64 @smaxv_i64( %pg, %a) { +; CHECK-LABEL: smaxv_i64: +; CHECK: smaxv d[[REDUCE:[0-9]+]], p0, z0.d +; CHECK: fmov x0, d[[REDUCE]] +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.smaxv.nxv2i64( %pg, + %a) + ret i64 %out +} + +define i32 @umaxv_i32( %pg, %a) { +; CHECK-LABEL: umaxv_i32: +; CHECK: umaxv s[[REDUCE:[0-9]+]], p0, z0.s +; CHECK: fmov w0, s[[REDUCE]] +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.umaxv.nxv4i32( %pg, + %a) + ret i32 %out +} + +define i64 @umaxv_i64( %pg, %a) { +; CHECK-LABEL: umaxv_i64: +; CHECK: umaxv d[[REDUCE:[0-9]+]], p0, z0.d +; CHECK: fmov x0, d[[REDUCE]] +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.umaxv.nxv2i64( %pg, + %a) + ret i64 %out +} + +define i32 @sminv_i32( %pg, %a) { +; CHECK-LABEL: sminv_i32: +; CHECK: sminv s[[REDUCE:[0-9]+]], p0, z0.s +; CHECK: fmov w0, s[[REDUCE]] +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.sminv.nxv4i32( %pg, + %a) + ret i32 %out +} + +define i64 @sminv_i64( %pg, %a) { +; CHECK-LABEL: sminv_i64: +; CHECK: sminv d[[REDUCE:[0-9]+]], p0, z0.d +; CHECK: fmov x0, d[[REDUCE]] +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.sminv.nxv2i64( %pg, + %a) + ret i64 %out +} + +define i32 @uminv_i32( %pg, %a) { +; CHECK-LABEL: uminv_i32: +; CHECK: uminv s[[REDUCE:[0-9]+]], p0, z0.s +; CHECK: fmov w0, s[[REDUCE]] +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.uminv.nxv4i32( %pg, + %a) + ret i32 %out +} + +define i64 @uminv_i64( %pg, %a) { +; CHECK-LABEL: uminv_i64: +; CHECK: uminv d[[REDUCE:[0-9]+]], p0, z0.d +; CHECK: fmov x0, d[[REDUCE]] +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.uminv.nxv2i64( %pg, + %a) + ret i64 %out +} + +define i32 @orv_i32( %pg, %a) { +; CHECK-LABEL: orv_i32: +; CHECK: orv s[[REDUCE:[0-9]+]], p0, z0.s +; CHECK: fmov w0, s[[REDUCE]] +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.orv.nxv4i32( %pg, + %a) + ret i32 %out +} + +define i64 @orv_i64( %pg, %a) { +; CHECK-LABEL: orv_i64: +; CHECK: orv d[[REDUCE:[0-9]+]], p0, z0.d +; CHECK: fmov x0, d[[REDUCE]] +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.orv.nxv2i64( %pg, + %a) + ret i64 %out +} + +define i32 @eorv_i32( %pg, %a) { +; CHECK-LABEL: eorv_i32: +; CHECK: eorv s[[REDUCE:[0-9]+]], p0, z0.s +; CHECK: fmov w0, s[[REDUCE]] +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.eorv.nxv4i32( %pg, + %a) + ret i32 %out +} + +define i64 @eorv_i64( %pg, %a) { +; CHECK-LABEL: eorv_i64: +; CHECK: eorv d[[REDUCE:[0-9]+]], p0, z0.d +; CHECK: fmov x0, d[[REDUCE]] +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.eorv.nxv2i64( %pg, + %a) + ret i64 %out +} + +define i32 @andv_i32( %pg, %a) { +; CHECK-LABEL: andv_i32: +; CHECK: andv s[[REDUCE:[0-9]+]], p0, z0.s +; CHECK: fmov w0, s[[REDUCE]] +; CHECK-NEXT: ret + %out = call i32 @llvm.aarch64.sve.andv.nxv4i32( %pg, + %a) + ret i32 %out +} + +define i64 @andv_i64( %pg, %a) { +; CHECK-LABEL: andv_i64: +; CHECK: andv d[[REDUCE:[0-9]+]], p0, z0.d +; CHECK: fmov x0, d[[REDUCE]] +; CHECK-NEXT: ret + %out = call i64 @llvm.aarch64.sve.andv.nxv2i64( %pg, + %a) + ret i64 %out +} + +declare i64 @llvm.aarch64.sve.saddv.nxv16i8(, ) +declare i64 @llvm.aarch64.sve.saddv.nxv8i16(, ) +declare i64 @llvm.aarch64.sve.saddv.nxv4i32(, ) + +declare i64 @llvm.aarch64.sve.uaddv.nxv16i8(, ) +declare i64 @llvm.aarch64.sve.uaddv.nxv8i16(, ) +declare i64 @llvm.aarch64.sve.uaddv.nxv4i32(, ) +declare i64 @llvm.aarch64.sve.uaddv.nxv2i64(, ) + +declare i32 @llvm.aarch64.sve.smaxv.nxv4i32(, ) +declare i64 @llvm.aarch64.sve.smaxv.nxv2i64(, ) +declare i32 @llvm.aarch64.sve.umaxv.nxv4i32(, ) +declare i64 @llvm.aarch64.sve.umaxv.nxv2i64(, ) +declare i32 @llvm.aarch64.sve.sminv.nxv4i32(, ) +declare i64 @llvm.aarch64.sve.sminv.nxv2i64(, ) +declare i32 @llvm.aarch64.sve.uminv.nxv4i32(, ) +declare i64 @llvm.aarch64.sve.uminv.nxv2i64(, ) +declare i32 @llvm.aarch64.sve.orv.nxv4i32(, ) +declare i64 @llvm.aarch64.sve.orv.nxv2i64(, ) +declare i32 @llvm.aarch64.sve.eorv.nxv4i32(, ) +declare i64 @llvm.aarch64.sve.eorv.nxv2i64(, ) +declare i32 @llvm.aarch64.sve.andv.nxv4i32(, ) +declare i64 @llvm.aarch64.sve.andv.nxv2i64(, )