diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -114,6 +114,8 @@ FCVTZS_MERGE_PASSTHRU, SIGN_EXTEND_INREG_MERGE_PASSTHRU, ZERO_EXTEND_INREG_MERGE_PASSTHRU, + ABS_MERGE_PASSTHRU, + NEG_MERGE_PASSTHRU, SETCC_MERGE_ZERO, @@ -812,6 +814,7 @@ SDValue ThisVal) const; SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -187,6 +187,8 @@ case AArch64ISD::CTLZ_MERGE_PASSTHRU: case AArch64ISD::CTPOP_MERGE_PASSTHRU: case AArch64ISD::DUP_MERGE_PASSTHRU: + case AArch64ISD::ABS_MERGE_PASSTHRU: + case AArch64ISD::NEG_MERGE_PASSTHRU: case AArch64ISD::FNEG_MERGE_PASSTHRU: case AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU: case AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU: @@ -1097,6 +1099,7 @@ setOperationAction(ISD::SHL, VT, Custom); setOperationAction(ISD::SRL, VT, Custom); setOperationAction(ISD::SRA, VT, Custom); + setOperationAction(ISD::ABS, VT, Custom); setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); setOperationAction(ISD::VECREDUCE_AND, VT, Custom); setOperationAction(ISD::VECREDUCE_OR, VT, Custom); @@ -1345,6 +1348,7 @@ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); // Lower fixed length vector operations to scalable equivalents. + setOperationAction(ISD::ABS, VT, Custom); setOperationAction(ISD::ADD, VT, Custom); setOperationAction(ISD::AND, VT, Custom); setOperationAction(ISD::ANY_EXTEND, VT, Custom); @@ -1743,6 +1747,8 @@ MAKE_CASE(AArch64ISD::FSQRT_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::FRECPX_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::FABS_MERGE_PASSTHRU) + MAKE_CASE(AArch64ISD::ABS_MERGE_PASSTHRU) + MAKE_CASE(AArch64ISD::NEG_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO) MAKE_CASE(AArch64ISD::ADC) MAKE_CASE(AArch64ISD::SBC) @@ -3661,6 +3667,12 @@ case Intrinsic::aarch64_sve_fabs: return DAG.getNode(AArch64ISD::FABS_MERGE_PASSTHRU, dl, Op.getValueType(), Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); + case Intrinsic::aarch64_sve_abs: + return DAG.getNode(AArch64ISD::ABS_MERGE_PASSTHRU, dl, Op.getValueType(), + Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); + case Intrinsic::aarch64_sve_neg: + return DAG.getNode(AArch64ISD::NEG_MERGE_PASSTHRU, dl, Op.getValueType(), + Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); case Intrinsic::aarch64_sve_convert_to_svbool: { EVT OutVT = Op.getValueType(); EVT InVT = Op.getOperand(1).getValueType(); @@ -4163,9 +4175,12 @@ } // Generate SUBS and CSEL for integer abs. -static SDValue LowerABS(SDValue Op, SelectionDAG &DAG) { +SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const { MVT VT = Op.getSimpleValueType(); + if (VT.isVector()) + return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABS_MERGE_PASSTHRU); + SDLoc DL(Op); SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op.getOperand(0)); diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -205,6 +205,8 @@ def AArch64cnt_mt : SDNode<"AArch64ISD::CTPOP_MERGE_PASSTHRU", SDT_AArch64Arith>; def AArch64fneg_mt : SDNode<"AArch64ISD::FNEG_MERGE_PASSTHRU", SDT_AArch64Arith>; def AArch64fabs_mt : SDNode<"AArch64ISD::FABS_MERGE_PASSTHRU", SDT_AArch64Arith>; +def AArch64abs_mt : SDNode<"AArch64ISD::ABS_MERGE_PASSTHRU", SDT_AArch64Arith>; +def AArch64neg_mt : SDNode<"AArch64ISD::NEG_MERGE_PASSTHRU", SDT_AArch64Arith>; def AArch64sxt_mt : SDNode<"AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU", SDT_AArch64IntExtend>; def AArch64uxt_mt : SDNode<"AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU", SDT_AArch64IntExtend>; def AArch64frintp_mt : SDNode<"AArch64ISD::FCEIL_MERGE_PASSTHRU", SDT_AArch64Arith>; @@ -376,8 +378,8 @@ defm UXTH_ZPmZ : sve_int_un_pred_arit_0_w<0b011, "uxth", AArch64uxt_mt>; defm SXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b100, "sxtw", AArch64sxt_mt>; defm UXTW_ZPmZ : sve_int_un_pred_arit_0_d<0b101, "uxtw", AArch64uxt_mt>; - defm ABS_ZPmZ : sve_int_un_pred_arit_0< 0b110, "abs", int_aarch64_sve_abs>; - defm NEG_ZPmZ : sve_int_un_pred_arit_0< 0b111, "neg", int_aarch64_sve_neg>; + defm ABS_ZPmZ : sve_int_un_pred_arit_0< 0b110, "abs", AArch64abs_mt>; + defm NEG_ZPmZ : sve_int_un_pred_arit_0< 0b111, "neg", AArch64neg_mt>; defm CLS_ZPmZ : sve_int_un_pred_arit_1< 0b000, "cls", AArch64cls_mt>; defm CLZ_ZPmZ : sve_int_un_pred_arit_1< 0b001, "clz", AArch64clz_mt>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -3777,10 +3777,10 @@ def _S : sve_int_un_pred_arit<0b10, { opc, 0b0 }, asm, ZPR32>; def _D : sve_int_un_pred_arit<0b11, { opc, 0b0 }, asm, ZPR64>; - def : SVE_3_Op_Pat(NAME # _B)>; - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; + def : SVE_1_Op_Passthru_Pat(NAME # _B)>; + def : SVE_1_Op_Passthru_Pat(NAME # _H)>; + def : SVE_1_Op_Passthru_Pat(NAME # _S)>; + def : SVE_1_Op_Passthru_Pat(NAME # _D)>; } multiclass sve_int_un_pred_arit_0_h opc, string asm, diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-arith.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-arith.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-arith.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-arith.ll @@ -1053,4 +1053,314 @@ ret void } + +; +; ABS +; + +; Don't use SVE for 64-bit vectors. +define <8 x i8> @abs_v8i8(<8 x i8> %op1) #0 { +; CHECK-LABEL: abs_v8i8: +; CHECK: abs v0.8b, v0.8b +; CHECK: ret + %res = call <8 x i8> @llvm.abs.v8i8(<8 x i8> %op1, i1 false) + ret <8 x i8> %res +} + +; Don't use SVE for 128-bit vectors. +define <16 x i8> @abs_v16i8(<16 x i8> %op1) #0 { +; CHECK-LABEL: abs_v16i8: +; CHECK: abs v0.16b, v0.16b +; CHECK: ret + %res = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %op1, i1 false) + ret <16 x i8> %res +} + +define void @abs_v32i8(<32 x i8>* %a) #0 { +; CHECK-LABEL: abs_v32i8: +; CHECK: ptrue [[PG:p[0-9]+]].b, vl[[#min(VBYTES,32)]] +; CHECK-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0] +; CHECK: abs [[RES:z[0-9]+]].b, [[PG]]/m, [[OP1]].b +; CHECK: st1b { [[RES]].b }, [[PG]], [x0] +; CHECK: ret + %op1 = load <32 x i8>, <32 x i8>* %a + %res = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %op1, i1 false) + store <32 x i8> %res, <32 x i8>* %a + ret void +} + +define void @abs_v64i8(<64 x i8>* %a) #0 { +; CHECK-LABEL: abs_v64i8: +; CHECK: ptrue [[PG:p[0-9]+]].b, vl[[#min(VBYTES,64)]] +; CHECK-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0] +; CHECK: abs [[RES:z[0-9]+]].b, [[PG]]/m, [[OP1]].b +; CHECK: st1b { [[RES]].b }, [[PG]], [x0] +; CHECK: ret + %op1 = load <64 x i8>, <64 x i8>* %a + %res = call <64 x i8> @llvm.abs.v64i8(<64 x i8> %op1, i1 false) + store <64 x i8> %res, <64 x i8>* %a + ret void +} + +define void @abs_v128i8(<128 x i8>* %a) #0 { +; CHECK-LABEL: abs_v128i8: +; CHECK: ptrue [[PG:p[0-9]+]].b, vl[[#min(VBYTES,128)]] +; CHECK-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0] +; CHECK: abs [[RES:z[0-9]+]].b, [[PG]]/m, [[OP1]].b +; CHECK: st1b { [[RES]].b }, [[PG]], [x0] +; CHECK: ret + %op1 = load <128 x i8>, <128 x i8>* %a + %res = call <128 x i8> @llvm.abs.v128i8(<128 x i8> %op1, i1 false) + store <128 x i8> %res, <128 x i8>* %a + ret void +} + +define void @abs_v256i8(<256 x i8>* %a) #0 { +; CHECK-LABEL: abs_v256i8: +; CHECK: ptrue [[PG:p[0-9]+]].b, vl[[#min(VBYTES,256)]] +; CHECK-DAG: ld1b { [[OP1:z[0-9]+]].b }, [[PG]]/z, [x0] +; CHECK: abs [[RES:z[0-9]+]].b, [[PG]]/m, [[OP1]].b +; CHECK: st1b { [[RES]].b }, [[PG]], [x0] +; CHECK: ret + %op1 = load <256 x i8>, <256 x i8>* %a + %res = call <256 x i8> @llvm.abs.v256i8(<256 x i8> %op1, i1 false) + store <256 x i8> %res, <256 x i8>* %a + ret void +} + +; Don't use SVE for 64-bit vectors. +define <4 x i16> @abs_v4i16(<4 x i16> %op1) #0 { +; CHECK-LABEL: abs_v4i16: +; CHECK: abs v0.4h, v0.4h +; CHECK: ret + %res = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %op1, i1 false) + ret <4 x i16> %res +} + +; Don't use SVE for 128-bit vectors. +define <8 x i16> @abs_v8i16(<8 x i16> %op1) #0 { +; CHECK-LABEL: abs_v8i16: +; CHECK: abs v0.8h, v0.8h +; CHECK: ret + %res = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %op1, i1 false) + ret <8 x i16> %res +} + +define void @abs_v16i16(<16 x i16>* %a) #0 { +; CHECK-LABEL: abs_v16i16: +; CHECK: ptrue [[PG:p[0-9]+]].h, vl[[#min(div(VBYTES,2),16)]] +; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0] +; CHECK: abs [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h +; CHECK: st1h { [[RES]].h }, [[PG]], [x0] +; CHECK: ret + %op1 = load <16 x i16>, <16 x i16>* %a + %res = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %op1, i1 false) + store <16 x i16> %res, <16 x i16>* %a + ret void +} + +define void @abs_v32i16(<32 x i16>* %a) #0 { +; CHECK-LABEL: abs_v32i16: +; CHECK: ptrue [[PG:p[0-9]+]].h, vl[[#min(div(VBYTES,2),32)]] +; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0] +; CHECK: abs [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h +; CHECK: st1h { [[RES]].h }, [[PG]], [x0] +; CHECK: ret + %op1 = load <32 x i16>, <32 x i16>* %a + %res = call <32 x i16> @llvm.abs.v32i16(<32 x i16> %op1, i1 false) + store <32 x i16> %res, <32 x i16>* %a + ret void +} + +define void @abs_v64i16(<64 x i16>* %a) #0 { +; CHECK-LABEL: abs_v64i16: +; CHECK: ptrue [[PG:p[0-9]+]].h, vl[[#min(div(VBYTES,2),64)]] +; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0] +; CHECK: abs [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h +; CHECK: st1h { [[RES]].h }, [[PG]], [x0] +; CHECK: ret + %op1 = load <64 x i16>, <64 x i16>* %a + %res = call <64 x i16> @llvm.abs.v64i16(<64 x i16> %op1, i1 false) + store <64 x i16> %res, <64 x i16>* %a + ret void +} + +define void @abs_v128i16(<128 x i16>* %a) #0 { +; CHECK-LABEL: abs_v128i16: +; CHECK: ptrue [[PG:p[0-9]+]].h, vl[[#min(div(VBYTES,2),128)]] +; CHECK-DAG: ld1h { [[OP1:z[0-9]+]].h }, [[PG]]/z, [x0] +; CHECK: abs [[RES:z[0-9]+]].h, [[PG]]/m, [[OP1]].h +; CHECK: st1h { [[RES]].h }, [[PG]], [x0] +; CHECK: ret + %op1 = load <128 x i16>, <128 x i16>* %a + %res = call <128 x i16> @llvm.abs.v128i16(<128 x i16> %op1, i1 false) + store <128 x i16> %res, <128 x i16>* %a + ret void +} + +; Don't use SVE for 64-bit vectors. +define <2 x i32> @abs_v2i32(<2 x i32> %op1) #0 { +; CHECK-LABEL: abs_v2i32: +; CHECK: abs v0.2s, v0.2s +; CHECK: ret + %res = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %op1, i1 false) + ret <2 x i32> %res +} + +; Don't use SVE for 128-bit vectors. +define <4 x i32> @abs_v4i32(<4 x i32> %op1, <4 x i32> %op2) #0 { +; CHECK-LABEL: abs_v4i32: +; CHECK: abs v0.4s, v0.4s +; CHECK: ret + %res = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %op1, i1 false) + ret <4 x i32> %res +} + +define void @abs_v8i32(<8 x i32>* %a) #0 { +; CHECK-LABEL: abs_v8i32: +; CHECK: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),8)]] +; CHECK-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0] +; CHECK: abs [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s +; CHECK: st1w { [[RES]].s }, [[PG]], [x0] +; CHECK: ret + %op1 = load <8 x i32>, <8 x i32>* %a + %res = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %op1, i1 false) + store <8 x i32> %res, <8 x i32>* %a + ret void +} + +define void @abs_v16i32(<16 x i32>* %a) #0 { +; CHECK-LABEL: abs_v16i32: +; CHECK: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),16)]] +; CHECK-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0] +; CHECK: abs [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s +; CHECK: st1w { [[RES]].s }, [[PG]], [x0] +; CHECK: ret + %op1 = load <16 x i32>, <16 x i32>* %a + %res = call <16 x i32> @llvm.abs.v16i32(<16 x i32> %op1, i1 false) + store <16 x i32> %res, <16 x i32>* %a + ret void +} + +define void @abs_v32i32(<32 x i32>* %a) #0 { +; CHECK-LABEL: abs_v32i32: +; CHECK: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),32)]] +; CHECK-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0] +; CHECK: abs [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s +; CHECK: st1w { [[RES]].s }, [[PG]], [x0] +; CHECK: ret + %op1 = load <32 x i32>, <32 x i32>* %a + %res = call <32 x i32> @llvm.abs.v32i32(<32 x i32> %op1, i1 false) + store <32 x i32> %res, <32 x i32>* %a + ret void +} + +define void @abs_v64i32(<64 x i32>* %a) #0 { +; CHECK-LABEL: abs_v64i32: +; CHECK: ptrue [[PG:p[0-9]+]].s, vl[[#min(div(VBYTES,4),64)]] +; CHECK-DAG: ld1w { [[OP1:z[0-9]+]].s }, [[PG]]/z, [x0] +; CHECK: abs [[RES:z[0-9]+]].s, [[PG]]/m, [[OP1]].s +; CHECK: st1w { [[RES]].s }, [[PG]], [x0] +; CHECK: ret + %op1 = load <64 x i32>, <64 x i32>* %a + %res = call <64 x i32> @llvm.abs.v64i32(<64 x i32> %op1, i1 false) + store <64 x i32> %res, <64 x i32>* %a + ret void +} + +; Don't use SVE for 64-bit vectors. +define <1 x i64> @abs_v1i64(<1 x i64> %op1) #0 { +; CHECK-LABEL: abs_v1i64: +; CHECK: abs d0, d0 +; CHECK: ret + %res = call <1 x i64> @llvm.abs.v1i64(<1 x i64> %op1, i1 false) + ret <1 x i64> %res +} + +; Don't use SVE for 128-bit vectors. +define <2 x i64> @abs_v2i64(<2 x i64> %op1) #0 { +; CHECK-LABEL: abs_v2i64: +; CHECK: abs v0.2d, v0.2d +; CHECK: ret + %res = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %op1, i1 false) + ret <2 x i64> %res +} + +define void @abs_v4i64(<4 x i64>* %a) #0 { +; CHECK-LABEL: abs_v4i64: +; CHECK: ptrue [[PG:p[0-9]+]].d, vl[[#min(div(VBYTES,8),4)]] +; CHECK-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0] +; CHECK: abs [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d +; CHECK: st1d { [[RES]].d }, [[PG]], [x0] +; CHECK: ret + %op1 = load <4 x i64>, <4 x i64>* %a + %res = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %op1, i1 false) + store <4 x i64> %res, <4 x i64>* %a + ret void +} + +define void @abs_v8i64(<8 x i64>* %a) #0 { +; CHECK-LABEL: abs_v8i64: +; CHECK: ptrue [[PG:p[0-9]+]].d, vl[[#min(div(VBYTES,8),8)]] +; CHECK-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0] +; CHECK: abs [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d +; CHECK: st1d { [[RES]].d }, [[PG]], [x0] +; CHECK: ret + %op1 = load <8 x i64>, <8 x i64>* %a + %res = call <8 x i64> @llvm.abs.v8i64(<8 x i64> %op1, i1 false) + store <8 x i64> %res, <8 x i64>* %a + ret void +} + +define void @abs_v16i64(<16 x i64>* %a) #0 { +; CHECK-LABEL: abs_v16i64: +; CHECK: ptrue [[PG:p[0-9]+]].d, vl[[#min(div(VBYTES,8),16)]] +; CHECK-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0] +; CHECK: abs [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d +; CHECK: st1d { [[RES]].d }, [[PG]], [x0] +; CHECK: ret + %op1 = load <16 x i64>, <16 x i64>* %a + %res = call <16 x i64> @llvm.abs.v16i64(<16 x i64> %op1, i1 false) + store <16 x i64> %res, <16 x i64>* %a + ret void +} + +define void @abs_v32i64(<32 x i64>* %a) #0 { +; CHECK-LABEL: abs_v32i64: +; CHECK: ptrue [[PG:p[0-9]+]].d, vl[[#min(div(VBYTES,8),32)]] +; CHECK-DAG: ld1d { [[OP1:z[0-9]+]].d }, [[PG]]/z, [x0] +; CHECK: abs [[RES:z[0-9]+]].d, [[PG]]/m, [[OP1]].d +; CHECK: st1d { [[RES]].d }, [[PG]], [x0] +; CHECK: ret + %op1 = load <32 x i64>, <32 x i64>* %a + %res = call <32 x i64> @llvm.abs.v32i64(<32 x i64> %op1, i1 false) + store <32 x i64> %res, <32 x i64>* %a + ret void +} + +declare <8 x i8> @llvm.abs.v8i8(<8 x i8>, i1) +declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1) +declare <32 x i8> @llvm.abs.v32i8(<32 x i8>, i1) +declare <64 x i8> @llvm.abs.v64i8(<64 x i8>, i1) +declare <128 x i8> @llvm.abs.v128i8(<128 x i8>, i1) +declare <256 x i8> @llvm.abs.v256i8(<256 x i8>, i1) +declare <4 x i16> @llvm.abs.v4i16(<4 x i16>, i1) +declare <8 x i16> @llvm.abs.v8i16(<8 x i16>, i1) +declare <16 x i16> @llvm.abs.v16i16(<16 x i16>, i1) +declare <32 x i16> @llvm.abs.v32i16(<32 x i16>, i1) +declare <64 x i16> @llvm.abs.v64i16(<64 x i16>, i1) +declare <128 x i16> @llvm.abs.v128i16(<128 x i16>, i1) +declare <2 x i32> @llvm.abs.v2i32(<2 x i32>, i1) +declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1) +declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1) +declare <16 x i32> @llvm.abs.v16i32(<16 x i32>, i1) +declare <32 x i32> @llvm.abs.v32i32(<32 x i32>, i1) +declare <64 x i32> @llvm.abs.v64i32(<64 x i32>, i1) +declare <1 x i64> @llvm.abs.v1i64(<1 x i64>, i1) +declare <2 x i64> @llvm.abs.v2i64(<2 x i64>, i1) +declare <4 x i64> @llvm.abs.v4i64(<4 x i64>, i1) +declare <8 x i64> @llvm.abs.v8i64(<8 x i64>, i1) +declare <16 x i64> @llvm.abs.v16i64(<16 x i64>, i1) +declare <32 x i64> @llvm.abs.v32i64(<32 x i64>, i1) + attributes #0 = { "target-features"="+sve" } diff --git a/llvm/test/CodeGen/AArch64/sve-int-arith.ll b/llvm/test/CodeGen/AArch64/sve-int-arith.ll --- a/llvm/test/CodeGen/AArch64/sve-int-arith.ll +++ b/llvm/test/CodeGen/AArch64/sve-int-arith.ll @@ -77,6 +77,81 @@ ret %res } +define @abs_nxv16i8( %a) { +; CHECK-LABEL: abs_nxv16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: abs z0.b, p0/m, z0.b +; CHECK-NEXT: ret + %res = call @llvm.abs.nxv16i8( %a, i1 false) + ret %res +} + +define @abs_nxv8i16( %a) { +; CHECK-LABEL: abs_nxv8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: abs z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %res = call @llvm.abs.nxv8i16( %a, i1 false) + ret %res +} + +define @abs_nxv4i32( %a) { +; CHECK-LABEL: abs_nxv4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: abs z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = call @llvm.abs.nxv4i32( %a, i1 false) + ret %res +} + +define @abs_nxv2i64( %a) { +; CHECK-LABEL: abs_nxv2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: abs z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %res = call @llvm.abs.nxv2i64( %a, i1 false) + ret %res +} + +define @abs_nxv4i16( %a) { +; CHECK-LABEL: abs_nxv4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sxth z0.s, p0/m, z0.s +; CHECK-NEXT: abs z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %res = call @llvm.abs.nxv4i16( %a, i1 false) + ret %res +} + +define @abs_nxv32i8( %a) { +; CHECK-LABEL: abs_nxv32i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: abs z0.b, p0/m, z0.b +; CHECK-NEXT: abs z1.b, p0/m, z1.b +; CHECK-NEXT: ret + %res = call @llvm.abs.nxv32i8( %a, i1 false) + ret %res +} + +define @abs_nxv8i64( %a) { +; CHECK-LABEL: abs_nxv8i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: abs z0.d, p0/m, z0.d +; CHECK-NEXT: abs z1.d, p0/m, z1.d +; CHECK-NEXT: abs z2.d, p0/m, z2.d +; CHECK-NEXT: abs z3.d, p0/m, z3.d +; CHECK-NEXT: ret + %res = call @llvm.abs.nxv8i64( %a, i1 false) + ret %res +} + define @sqadd_i64( %a, %b) { ; CHECK-LABEL: sqadd_i64: ; CHECK: // %bb.0: @@ -281,3 +356,11 @@ declare @llvm.usub.sat.nxv8i16(, ) declare @llvm.usub.sat.nxv4i32(, ) declare @llvm.usub.sat.nxv2i64(, ) + +declare @llvm.abs.nxv32i8(, i1) +declare @llvm.abs.nxv16i8(, i1) +declare @llvm.abs.nxv4i16(, i1) +declare @llvm.abs.nxv8i16(, i1) +declare @llvm.abs.nxv4i32(, i1) +declare @llvm.abs.nxv8i64(, i1) +declare @llvm.abs.nxv2i64(, i1)