diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -1191,6 +1191,10 @@ def int_aarch64_sve_umin : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_sabd : AdvSIMD_Pred2VectorArg_Intrinsic; def int_aarch64_sve_uabd : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_smax_base : AdvSIMD_2VectorArg_Intrinsic; +def int_aarch64_sve_smin_base : AdvSIMD_2VectorArg_Intrinsic; +def int_aarch64_sve_umax_base : AdvSIMD_2VectorArg_Intrinsic; +def int_aarch64_sve_umin_base : AdvSIMD_2VectorArg_Intrinsic; def int_aarch64_sve_mad : AdvSIMD_Pred3VectorArg_Intrinsic; def int_aarch64_sve_msb : AdvSIMD_Pred3VectorArg_Intrinsic; diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -271,6 +271,10 @@ bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift); bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm); + + bool SelectSVESignedArithImm(SDValue N, SDValue &Imm); + + bool SelectSVEArithImm(SDValue N, SDValue &Imm); }; } // end anonymous namespace @@ -2910,9 +2914,34 @@ return false; } +bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) { + if (auto CNode = dyn_cast(N)) { + int64_t ImmVal = CNode->getSExtValue(); + SDLoc DL(N); + if (ImmVal >= -127 && ImmVal < 127) { + Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32); + return true; + } + } + return false; +} + +bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, SDValue &Imm) { + if (auto CNode = dyn_cast(N)) { + uint64_t ImmVal = CNode->getSExtValue(); + SDLoc DL(N); + ImmVal = ImmVal & 0xFF; + if (ImmVal < 256) { + Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32); + return true; + } + } + return false; +} + bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm) { if (auto CNode = dyn_cast(N)) { - uint64_t ImmVal = CNode->getZExtValue(); + int64_t ImmVal = CNode->getZExtValue(); SDLoc DL(N); // Shift mask depending on type size. diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -305,7 +305,7 @@ } def SImm8Operand : SImmOperand<8>; -def simm8 : Operand, ImmLeaf= -128 && Imm < 127; }]> { +def simm8 : Operand, ImmLeaf= -128 && Imm < 127; }]> { let ParserMatchClass = SImm8Operand; let DecoderMethod = "DecodeSImm<8>"; } diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -130,10 +130,10 @@ defm EOR_ZI : sve_int_log_imm<0b01, "eor", "eon", xor>; defm AND_ZI : sve_int_log_imm<0b10, "and", "bic", and>; - defm SMAX_ZI : sve_int_arith_imm1<0b00, "smax", simm8>; - defm SMIN_ZI : sve_int_arith_imm1<0b10, "smin", simm8>; - defm UMAX_ZI : sve_int_arith_imm1<0b01, "umax", imm0_255>; - defm UMIN_ZI : sve_int_arith_imm1<0b11, "umin", imm0_255>; + defm SMAX_ZI : sve_int_arith_imm1<0b00, "smax", int_aarch64_sve_smax_base>; + defm SMIN_ZI : sve_int_arith_imm1<0b10, "smin", int_aarch64_sve_smin_base>; + defm UMAX_ZI : sve_int_arith_imm1_unsigned<0b01, "umax", int_aarch64_sve_umax_base>; + defm UMIN_ZI : sve_int_arith_imm1_unsigned<0b11, "umin", int_aarch64_sve_umin_base>; defm MUL_ZI : sve_int_arith_imm2<"mul">; defm MUL_ZPmZ : sve_int_bin_pred_arit_2<0b000, "mul", int_aarch64_sve_mul>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -212,6 +212,8 @@ def SVELogicalImm32Pat : ComplexPattern", []>; def SVELogicalImm64Pat : ComplexPattern", []>; +def SVEArithImmPat : ComplexPattern; +def SVESArithImmPat : ComplexPattern; class SVEExactFPImm : AsmOperandClass { let Name = "SVEExactFPImmOperand" # Suffix; @@ -317,6 +319,11 @@ : Pat<(vt (op (vt zprty:$Op1), (vt (AArch64dup (it (cpx i32:$imm, i32:$shift)))))), (inst $Op1, i32:$imm, i32:$shift)>; +class SVE_1_Op_Imm_Arith_Pat + : Pat<(vt (op (vt zprty:$Op1), (vt (AArch64dup (it (cpx i32:$imm)))))), + (inst $Op1, i32:$imm)>; + class SVE_1_Op_Imm_Log_Pat : Pat<(vt (op (vt zprty:$Op1), (vt (AArch64dup (it (cpx i64:$imm)))))), @@ -3506,11 +3513,28 @@ let ElementSize = ElementSizeNone; } -multiclass sve_int_arith_imm1 opc, string asm, Operand immtype> { - def _B : sve_int_arith_imm<0b00, { 0b1010, opc }, asm, ZPR8, immtype>; - def _H : sve_int_arith_imm<0b01, { 0b1010, opc }, asm, ZPR16, immtype>; - def _S : sve_int_arith_imm<0b10, { 0b1010, opc }, asm, ZPR32, immtype>; - def _D : sve_int_arith_imm<0b11, { 0b1010, opc }, asm, ZPR64, immtype>; +multiclass sve_int_arith_imm1 opc, string asm, SDPatternOperator op> { + def _B : sve_int_arith_imm<0b00, { 0b1010, opc }, asm, ZPR8, simm8>; + def _H : sve_int_arith_imm<0b01, { 0b1010, opc }, asm, ZPR16, simm8>; + def _S : sve_int_arith_imm<0b10, { 0b1010, opc }, asm, ZPR32, simm8>; + def _D : sve_int_arith_imm<0b11, { 0b1010, opc }, asm, ZPR64, simm8>; + + def : SVE_1_Op_Imm_Arith_Pat(NAME # _B)>; + def : SVE_1_Op_Imm_Arith_Pat(NAME # _H)>; + def : SVE_1_Op_Imm_Arith_Pat(NAME # _S)>; + def : SVE_1_Op_Imm_Arith_Pat(NAME # _D)>; +} + +multiclass sve_int_arith_imm1_unsigned opc, string asm, SDPatternOperator op> { + def _B : sve_int_arith_imm<0b00, { 0b1010, opc }, asm, ZPR8, imm0_255>; + def _H : sve_int_arith_imm<0b01, { 0b1010, opc }, asm, ZPR16, imm0_255>; + def _S : sve_int_arith_imm<0b10, { 0b1010, opc }, asm, ZPR32, imm0_255>; + def _D : sve_int_arith_imm<0b11, { 0b1010, opc }, asm, ZPR64, imm0_255>; + + def : SVE_1_Op_Imm_Arith_Pat(NAME # _B)>; + def : SVE_1_Op_Imm_Arith_Pat(NAME # _H)>; + def : SVE_1_Op_Imm_Arith_Pat(NAME # _S)>; + def : SVE_1_Op_Imm_Arith_Pat(NAME # _D)>; } multiclass sve_int_arith_imm2 { diff --git a/llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll b/llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll @@ -0,0 +1,350 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +; +; SMAX +; +define @smax_i8_pos( %a, %b) { +; CHECK-LABEL: smax_i8_pos +; CHECK: smax z0.b, z0.b, #27 +; CHECK-NEXT: ret + %elt = insertelement undef, i8 27, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %res = call @llvm.aarch64.sve.smax.base.nxv16i8( %a, %splat) + ret %res +} + +define @smax_i8_neg( %a, %b) { +; CHECK-LABEL: smax_i8_neg +; CHECK: smax z0.b, z0.b, #-58 +; CHECK-NEXT: ret + %elt = insertelement undef, i8 -58, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %res = call @llvm.aarch64.sve.smax.base.nxv16i8( %a, %splat) + ret %res +} + +define @smax_i16_pos( %a, %b) { +; CHECK-LABEL: smax_i16_pos +; CHECK: smax z0.h, z0.h, #27 +; CHECK-NEXT: ret + %elt = insertelement undef, i16 27, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %res = call @llvm.aarch64.sve.smax.base.nxv8i16( %a, %splat) + ret %res +} + +define @smax_i16_neg( %a, %b) { +; CHECK-LABEL: smax_i16_neg +; CHECK: smax z0.h, z0.h, #-58 +; CHECK-NEXT: ret + %elt = insertelement undef, i16 -58, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %res = call @llvm.aarch64.sve.smax.base.nxv8i16( %a, %splat) + ret %res +} + +define @smax_i32_pos( %a, %b) { +; CHECK-LABEL: smax_i32_pos +; CHECK: smax z0.s, z0.s, #27 +; CHECK-NEXT: ret + %elt = insertelement undef, i32 27, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %res = call @llvm.aarch64.sve.smax.base.nxv4i32( %a, %splat) + ret %res +} + +define @smax_i32_neg( %a, %b) { +; CHECK-LABEL: smax_i32_neg +; CHECK: smax z0.s, z0.s, #-58 +; CHECK-NEXT: ret + %elt = insertelement undef, i32 -58, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %res = call @llvm.aarch64.sve.smax.base.nxv4i32( %a, %splat) + ret %res +} + +define @smax_i64_pos( %a, %b) { +; CHECK-LABEL: smax_i64_pos +; CHECK: smax z0.d, z0.d, #27 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 27, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %res = call @llvm.aarch64.sve.smax.base.nxv2i64( %a, %splat) + ret %res +} + +define @smax_i64_neg( %a, %b) { +; CHECK-LABEL: smax_i64_neg +; CHECK: smax z0.d, z0.d, #-58 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 -58, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %res = call @llvm.aarch64.sve.smax.base.nxv2i64( %a, %splat) + ret %res +} + +; +; SMIN +; +define @smin_i8_pos( %a, %b) { +; CHECK-LABEL: smin_i8_pos +; CHECK: smin z0.b, z0.b, #27 +; CHECK-NEXT: ret + %elt = insertelement undef, i8 27, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %res = call @llvm.aarch64.sve.smin.base.nxv16i8( %a, %splat) + ret %res +} + +define @smin_i8_neg( %a, %b) { +; CHECK-LABEL: smin_i8_neg +; CHECK: smin z0.b, z0.b, #-58 +; CHECK-NEXT: ret + %elt = insertelement undef, i8 -58, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %res = call @llvm.aarch64.sve.smin.base.nxv16i8( %a, %splat) + ret %res +} + +define @smin_i16_pos( %a, %b) { +; CHECK-LABEL: smin_i16_pos +; CHECK: smin z0.h, z0.h, #27 +; CHECK-NEXT: ret + %elt = insertelement undef, i16 27, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %res = call @llvm.aarch64.sve.smin.base.nxv8i16( %a, %splat) + ret %res +} + +define @smin_i16_neg( %a, %b) { +; CHECK-LABEL: smin_i16_neg +; CHECK: smin z0.h, z0.h, #-58 +; CHECK-NEXT: ret + %elt = insertelement undef, i16 -58, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %res = call @llvm.aarch64.sve.smin.base.nxv8i16( %a, %splat) + ret %res +} + +define @smin_i32_pos( %a, %b) { +; CHECK-LABEL: smin_i32_pos +; CHECK: smin z0.s, z0.s, #27 +; CHECK-NEXT: ret + %elt = insertelement undef, i32 27, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %res = call @llvm.aarch64.sve.smin.base.nxv4i32( %a, %splat) + ret %res +} + +define @smin_i32_neg( %a, %b) { +; CHECK-LABEL: smin_i32_neg +; CHECK: smin z0.s, z0.s, #-58 +; CHECK-NEXT: ret + %elt = insertelement undef, i32 -58, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %res = call @llvm.aarch64.sve.smin.base.nxv4i32( %a, %splat) + ret %res +} + +define @smin_i64_pos( %a, %b) { +; CHECK-LABEL: smin_i64_pos +; CHECK: smin z0.d, z0.d, #27 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 27, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %res = call @llvm.aarch64.sve.smin.base.nxv2i64( %a, %splat) + ret %res +} + +define @smin_i64_neg( %a, %b) { +; CHECK-LABEL: smin_i64_neg +; CHECK: smin z0.d, z0.d, #-58 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 -58, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %res = call @llvm.aarch64.sve.smin.base.nxv2i64( %a, %splat) + ret %res +} + +; +; UMAX +; +define @umax_i8_pos( %a, %b) { +; CHECK-LABEL: umax_i8_pos +; CHECK: umax z0.b, z0.b, #27 +; CHECK-NEXT: ret + %elt = insertelement undef, i8 27, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %res = call @llvm.aarch64.sve.umax.base.nxv16i8( %a, %splat) + ret %res +} + +define @umax_i8_large( %a, %b) { +; CHECK-LABEL: umax_i8_large +; CHECK: umax z0.b, z0.b, #129 +; CHECK-NEXT: ret + %elt = insertelement undef, i8 129, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %res = call @llvm.aarch64.sve.umax.base.nxv16i8( %a, %splat) + ret %res +} + +define @umax_i16_pos( %a, %b) { +; CHECK-LABEL: umax_i16_pos +; CHECK: umax z0.h, z0.h, #27 +; CHECK-NEXT: ret + %elt = insertelement undef, i16 27, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %res = call @llvm.aarch64.sve.umax.base.nxv8i16( %a, %splat) + ret %res +} + +define @umax_i16_large( %a, %b) { +; CHECK-LABEL: umax_i16_large +; CHECK: umax z0.h, z0.h, #129 +; CHECK-NEXT: ret + %elt = insertelement undef, i16 129, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %res = call @llvm.aarch64.sve.umax.base.nxv8i16( %a, %splat) + ret %res +} + +define @umax_i32_pos( %a, %b) { +; CHECK-LABEL: umax_i32_pos +; CHECK: umax z0.s, z0.s, #27 +; CHECK-NEXT: ret + %elt = insertelement undef, i32 27, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %res = call @llvm.aarch64.sve.umax.base.nxv4i32( %a, %splat) + ret %res +} + +define @umax_i32_large( %a, %b) { +; CHECK-LABEL: umax_i32_large +; CHECK: umax z0.s, z0.s, #129 +; CHECK-NEXT: ret + %elt = insertelement undef, i32 129, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %res = call @llvm.aarch64.sve.umax.base.nxv4i32( %a, %splat) + ret %res +} + +define @umax_i64_pos( %a, %b) { +; CHECK-LABEL: umax_i64_pos +; CHECK: umax z0.d, z0.d, #27 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 27, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %res = call @llvm.aarch64.sve.umax.base.nxv2i64( %a, %splat) + ret %res +} + +define @umax_i64_large( %a, %b) { +; CHECK-LABEL: umax_i64_large +; CHECK: umax z0.d, z0.d, #129 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 129, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %res = call @llvm.aarch64.sve.umax.base.nxv2i64( %a, %splat) + ret %res +} + +; +; UMIN +; +define @umin_i8_pos( %a, %b) { +; CHECK-LABEL: umin_i8_pos +; CHECK: umin z0.b, z0.b, #27 +; CHECK-NEXT: ret + %elt = insertelement undef, i8 27, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %res = call @llvm.aarch64.sve.umin.base.nxv16i8( %a, %splat) + ret %res +} + +define @umin_i8_large( %a, %b) { +; CHECK-LABEL: umin_i8_large +; CHECK: umin z0.b, z0.b, #129 +; CHECK-NEXT: ret + %elt = insertelement undef, i8 129, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %res = call @llvm.aarch64.sve.umin.base.nxv16i8( %a, %splat) + ret %res +} + +define @umin_i16_pos( %a, %b) { +; CHECK-LABEL: umin_i16_pos +; CHECK: umin z0.h, z0.h, #27 +; CHECK-NEXT: ret + %elt = insertelement undef, i16 27, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %res = call @llvm.aarch64.sve.umin.base.nxv8i16( %a, %splat) + ret %res +} + +define @umin_i16_large( %a, %b) { +; CHECK-LABEL: umin_i16_large +; CHECK: umin z0.h, z0.h, #129 +; CHECK-NEXT: ret + %elt = insertelement undef, i16 129, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %res = call @llvm.aarch64.sve.umin.base.nxv8i16( %a, %splat) + ret %res +} + +define @umin_i32_pos( %a, %b) { +; CHECK-LABEL: umin_i32_pos +; CHECK: umin z0.s, z0.s, #27 +; CHECK-NEXT: ret + %elt = insertelement undef, i32 27, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %res = call @llvm.aarch64.sve.umin.base.nxv4i32( %a, %splat) + ret %res +} + +define @umin_i32_large( %a, %b) { +; CHECK-LABEL: umin_i32_large +; CHECK: umin z0.s, z0.s, #129 +; CHECK-NEXT: ret + %elt = insertelement undef, i32 129, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %res = call @llvm.aarch64.sve.umin.base.nxv4i32( %a, %splat) + ret %res +} + +define @umin_i64_pos( %a, %b) { +; CHECK-LABEL: umin_i64_pos +; CHECK: umin z0.d, z0.d, #27 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 27, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %res = call @llvm.aarch64.sve.umin.base.nxv2i64( %a, %splat) + ret %res +} + +define @umin_i64_large( %a, %b) { +; CHECK-LABEL: umin_i64_large +; CHECK: umin z0.d, z0.d, #129 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 129, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %res = call @llvm.aarch64.sve.umin.base.nxv2i64( %a, %splat) + ret %res +} + +declare @llvm.aarch64.sve.smax.base.nxv16i8(,) +declare @llvm.aarch64.sve.smax.base.nxv8i16(,) +declare @llvm.aarch64.sve.smax.base.nxv4i32(,) +declare @llvm.aarch64.sve.smax.base.nxv2i64(,) +declare @llvm.aarch64.sve.smin.base.nxv16i8(,) +declare @llvm.aarch64.sve.smin.base.nxv8i16(,) +declare @llvm.aarch64.sve.smin.base.nxv4i32(,) +declare @llvm.aarch64.sve.smin.base.nxv2i64(,) +declare @llvm.aarch64.sve.umax.base.nxv16i8(,) +declare @llvm.aarch64.sve.umax.base.nxv8i16(,) +declare @llvm.aarch64.sve.umax.base.nxv4i32(,) +declare @llvm.aarch64.sve.umax.base.nxv2i64(,) +declare @llvm.aarch64.sve.umin.base.nxv16i8(,) +declare @llvm.aarch64.sve.umin.base.nxv8i16(,) +declare @llvm.aarch64.sve.umin.base.nxv4i32(,) +declare @llvm.aarch64.sve.umin.base.nxv2i64(,)