diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -445,6 +445,7 @@ SDValue lowerFPVECREDUCE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerABS(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFixedLengthVectorLoadToRVV(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFixedLengthVectorStoreToRVV(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFixedLengthVectorSetccToRVV(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -431,6 +431,9 @@ setOperationAction(ISD::UMIN, VT, Legal); setOperationAction(ISD::UMAX, VT, Legal); + if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) + setOperationAction(ISD::ABS, VT, Custom); + setOperationAction(ISD::ROTL, VT, Expand); setOperationAction(ISD::ROTR, VT, Expand); @@ -575,6 +578,7 @@ setOperationAction(ISD::SMAX, VT, Custom); setOperationAction(ISD::UMIN, VT, Custom); setOperationAction(ISD::UMAX, VT, Custom); + setOperationAction(ISD::ABS, VT, Custom); setOperationAction(ISD::MULHS, VT, Custom); setOperationAction(ISD::MULHU, VT, Custom); @@ -1591,6 +1595,8 @@ return lowerToScalableOp(Op, DAG, RISCVISD::UMIN_VL); case ISD::UMAX: return lowerToScalableOp(Op, DAG, RISCVISD::UMAX_VL); + case ISD::ABS: + return lowerABS(Op, DAG); case ISD::VSELECT: return lowerFixedLengthVectorSelectToRVV(Op, DAG); } @@ -2901,6 +2907,43 @@ return lowerToScalableOp(Op, DAG, VecOpc, /*HasMask*/ true); } +// Lower vector ABS to smax(X, sub(0, X)). +SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + MVT VT = Op.getSimpleValueType(); + SDValue X = Op.getOperand(0); + + // For scalable vectors we just need to deal with i64 on RV32 since the + // default expansion crashes in getConstant. + if (VT.isScalableVector()) { + assert(!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64 && + "Unexpected custom lowering!"); + SDValue SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VT, + DAG.getConstant(0, DL, MVT::i32)); + SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, SplatZero, X); + return DAG.getNode(ISD::SMAX, DL, VT, X, NegX); + } + + assert(VT.isFixedLengthVector() && "Unexpected type"); + + MVT ContainerVT = + RISCVTargetLowering::getContainerForFixedLengthVector(DAG, VT, Subtarget); + X = convertToScalableVector(ContainerVT, X, DAG, Subtarget); + + SDValue Mask, VL; + std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); + + SDValue SplatZero = + DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, + DAG.getConstant(0, DL, Subtarget.getXLenVT())); + SDValue NegX = + DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X, Mask, VL); + SDValue Max = + DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX, Mask, VL); + + return convertFromScalableVector(VT, Max, DAG, Subtarget); +} + SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV( SDValue Op, SelectionDAG &DAG) const { MVT VT = Op.getSimpleValueType(); diff --git a/llvm/test/CodeGen/RISCV/rvv/abs-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/abs-sdnode.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/abs-sdnode.ll @@ -0,0 +1,198 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s + +declare @llvm.abs.nxv1i16(, i1) + +define @vabs_nxv1i16( %v) { +; CHECK-LABEL: vabs_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,mf4,ta,mu +; CHECK-NEXT: vrsub.vi v25, v8, 0 +; CHECK-NEXT: vmax.vv v8, v8, v25 +; CHECK-NEXT: ret + %r = call @llvm.abs.nxv1i16( %v, i1 false) + ret %r +} + +declare @llvm.abs.nxv2i16(, i1) + +define @vabs_nxv2i16( %v) { +; CHECK-LABEL: vabs_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,mf2,ta,mu +; CHECK-NEXT: vrsub.vi v25, v8, 0 +; CHECK-NEXT: vmax.vv v8, v8, v25 +; CHECK-NEXT: ret + %r = call @llvm.abs.nxv2i16( %v, i1 false) + ret %r +} + +declare @llvm.abs.nxv4i16(, i1) + +define @vabs_nxv4i16( %v) { +; CHECK-LABEL: vabs_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m1,ta,mu +; CHECK-NEXT: vrsub.vi v25, v8, 0 +; CHECK-NEXT: vmax.vv v8, v8, v25 +; CHECK-NEXT: ret + %r = call @llvm.abs.nxv4i16( %v, i1 false) + ret %r +} + +declare @llvm.abs.nxv8i16(, i1) + +define @vabs_nxv8i16( %v) { +; CHECK-LABEL: vabs_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m2,ta,mu +; CHECK-NEXT: vrsub.vi v26, v8, 0 +; CHECK-NEXT: vmax.vv v8, v8, v26 +; CHECK-NEXT: ret + %r = call @llvm.abs.nxv8i16( %v, i1 false) + ret %r +} + +declare @llvm.abs.nxv16i16(, i1) + +define @vabs_nxv16i16( %v) { +; CHECK-LABEL: vabs_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m4,ta,mu +; CHECK-NEXT: vrsub.vi v28, v8, 0 +; CHECK-NEXT: vmax.vv v8, v8, v28 +; CHECK-NEXT: ret + %r = call @llvm.abs.nxv16i16( %v, i1 false) + ret %r +} + +declare @llvm.abs.nxv32i16(, i1) + +define @vabs_nxv32i16( %v) { +; CHECK-LABEL: vabs_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16,m8,ta,mu +; CHECK-NEXT: vrsub.vi v16, v8, 0 +; CHECK-NEXT: vmax.vv v8, v8, v16 +; CHECK-NEXT: ret + %r = call @llvm.abs.nxv32i16( %v, i1 false) + ret %r +} + +declare @llvm.abs.nxv1i32(, i1) + +define @vabs_nxv1i32( %v) { +; CHECK-LABEL: vabs_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,mf2,ta,mu +; CHECK-NEXT: vrsub.vi v25, v8, 0 +; CHECK-NEXT: vmax.vv v8, v8, v25 +; CHECK-NEXT: ret + %r = call @llvm.abs.nxv1i32( %v, i1 false) + ret %r +} + +declare @llvm.abs.nxv2i32(, i1) + +define @vabs_nxv2i32( %v) { +; CHECK-LABEL: vabs_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m1,ta,mu +; CHECK-NEXT: vrsub.vi v25, v8, 0 +; CHECK-NEXT: vmax.vv v8, v8, v25 +; CHECK-NEXT: ret + %r = call @llvm.abs.nxv2i32( %v, i1 false) + ret %r +} + +declare @llvm.abs.nxv4i32(, i1) + +define @vabs_nxv4i32( %v) { +; CHECK-LABEL: vabs_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m2,ta,mu +; CHECK-NEXT: vrsub.vi v26, v8, 0 +; CHECK-NEXT: vmax.vv v8, v8, v26 +; CHECK-NEXT: ret + %r = call @llvm.abs.nxv4i32( %v, i1 false) + ret %r +} + +declare @llvm.abs.nxv8i32(, i1) + +define @vabs_nxv8i32( %v) { +; CHECK-LABEL: vabs_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m4,ta,mu +; CHECK-NEXT: vrsub.vi v28, v8, 0 +; CHECK-NEXT: vmax.vv v8, v8, v28 +; CHECK-NEXT: ret + %r = call @llvm.abs.nxv8i32( %v, i1 false) + ret %r +} + +declare @llvm.abs.nxv16i32(, i1) + +define @vabs_nxv16i32( %v) { +; CHECK-LABEL: vabs_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32,m8,ta,mu +; CHECK-NEXT: vrsub.vi v16, v8, 0 +; CHECK-NEXT: vmax.vv v8, v8, v16 +; CHECK-NEXT: ret + %r = call @llvm.abs.nxv16i32( %v, i1 false) + ret %r +} + +declare @llvm.abs.nxv1i64(, i1) + +define @vabs_nxv1i64( %v) { +; CHECK-LABEL: vabs_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m1,ta,mu +; CHECK-NEXT: vrsub.vi v25, v8, 0 +; CHECK-NEXT: vmax.vv v8, v8, v25 +; CHECK-NEXT: ret + %r = call @llvm.abs.nxv1i64( %v, i1 false) + ret %r +} + +declare @llvm.abs.nxv2i64(, i1) + +define @vabs_nxv2i64( %v) { +; CHECK-LABEL: vabs_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m2,ta,mu +; CHECK-NEXT: vrsub.vi v26, v8, 0 +; CHECK-NEXT: vmax.vv v8, v8, v26 +; CHECK-NEXT: ret + %r = call @llvm.abs.nxv2i64( %v, i1 false) + ret %r +} + +declare @llvm.abs.nxv4i64(, i1) + +define @vabs_nxv4i64( %v) { +; CHECK-LABEL: vabs_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m4,ta,mu +; CHECK-NEXT: vrsub.vi v28, v8, 0 +; CHECK-NEXT: vmax.vv v8, v8, v28 +; CHECK-NEXT: ret + %r = call @llvm.abs.nxv4i64( %v, i1 false) + ret %r +} + +declare @llvm.abs.nxv8i64(, i1) + +define @vabs_nxv8i64( %v) { +; CHECK-LABEL: vabs_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64,m8,ta,mu +; CHECK-NEXT: vrsub.vi v16, v8, 0 +; CHECK-NEXT: vmax.vv v8, v8, v16 +; CHECK-NEXT: ret + %r = call @llvm.abs.nxv8i64( %v, i1 false) + ret %r +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll @@ -0,0 +1,254 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1-RV64 + +define void @abs_v16i8(<16 x i8>* %x, <16 x i8>* %y) { +; CHECK-LABEL: abs_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; CHECK-NEXT: vle8.v v25, (a0) +; CHECK-NEXT: vrsub.vi v26, v25, 0 +; CHECK-NEXT: vmax.vv v25, v25, v26 +; CHECK-NEXT: vse8.v v25, (a0) +; CHECK-NEXT: ret + %a = load <16 x i8>, <16 x i8>* %x + %b = load <16 x i8>, <16 x i8>* %y + %c = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %a, i1 false) + store <16 x i8> %c, <16 x i8>* %x + ret void +} +declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1) + +define void @abs_v8i16(<8 x i16>* %x, <8 x i16>* %y) { +; CHECK-LABEL: abs_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vrsub.vi v26, v25, 0 +; CHECK-NEXT: vmax.vv v25, v25, v26 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x i16>, <8 x i16>* %x + %b = load <8 x i16>, <8 x i16>* %y + %c = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %a, i1 false) + store <8 x i16> %c, <8 x i16>* %x + ret void +} +declare <8 x i16> @llvm.abs.v8i16(<8 x i16>, i1) + +define void @abs_v4i32(<4 x i32>* %x, <4 x i32>* %y) { +; CHECK-LABEL: abs_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vrsub.vi v26, v25, 0 +; CHECK-NEXT: vmax.vv v25, v25, v26 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x i32>, <4 x i32>* %x + %b = load <4 x i32>, <4 x i32>* %y + %c = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %a, i1 false) + store <4 x i32> %c, <4 x i32>* %x + ret void +} +declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1) + +define void @abs_v2i64(<2 x i64>* %x, <2 x i64>* %y) { +; CHECK-LABEL: abs_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vrsub.vi v26, v25, 0 +; CHECK-NEXT: vmax.vv v25, v25, v26 +; CHECK-NEXT: vse64.v v25, (a0) +; CHECK-NEXT: ret + %a = load <2 x i64>, <2 x i64>* %x + %b = load <2 x i64>, <2 x i64>* %y + %c = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %a, i1 false) + store <2 x i64> %c, <2 x i64>* %x + ret void +} +declare <2 x i64> @llvm.abs.v2i64(<2 x i64>, i1) + +define void @abs_v32i8(<32 x i8>* %x, <32 x i8>* %y) { +; LMULMAX2-LABEL: abs_v32i8: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi a1, zero, 32 +; LMULMAX2-NEXT: vsetvli a1, a1, e8,m2,ta,mu +; LMULMAX2-NEXT: vle8.v v26, (a0) +; LMULMAX2-NEXT: vrsub.vi v28, v26, 0 +; LMULMAX2-NEXT: vmax.vv v26, v26, v28 +; LMULMAX2-NEXT: vse8.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-RV32-LABEL: abs_v32i8: +; LMULMAX1-RV32: # %bb.0: +; LMULMAX1-RV32-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; LMULMAX1-RV32-NEXT: addi a1, a0, 16 +; LMULMAX1-RV32-NEXT: vle8.v v25, (a1) +; LMULMAX1-RV32-NEXT: vle8.v v26, (a0) +; LMULMAX1-RV32-NEXT: vrsub.vi v27, v25, 0 +; LMULMAX1-RV32-NEXT: vmax.vv v25, v25, v27 +; LMULMAX1-RV32-NEXT: vrsub.vi v27, v26, 0 +; LMULMAX1-RV32-NEXT: vmax.vv v26, v26, v27 +; LMULMAX1-RV32-NEXT: vse8.v v26, (a0) +; LMULMAX1-RV32-NEXT: vse8.v v25, (a1) +; LMULMAX1-RV32-NEXT: ret +; +; LMULMAX1-RV64-LABEL: abs_v32i8: +; LMULMAX1-RV64: # %bb.0: +; LMULMAX1-RV64-NEXT: vsetivli a1, 16, e8,m1,ta,mu +; LMULMAX1-RV64-NEXT: addi a1, a0, 16 +; LMULMAX1-RV64-NEXT: vle8.v v25, (a1) +; LMULMAX1-RV64-NEXT: vle8.v v26, (a0) +; LMULMAX1-RV64-NEXT: vrsub.vi v27, v25, 0 +; LMULMAX1-RV64-NEXT: vmax.vv v25, v25, v27 +; LMULMAX1-RV64-NEXT: vrsub.vi v27, v26, 0 +; LMULMAX1-RV64-NEXT: vmax.vv v26, v26, v27 +; LMULMAX1-RV64-NEXT: vse8.v v26, (a0) +; LMULMAX1-RV64-NEXT: vse8.v v25, (a1) +; LMULMAX1-RV64-NEXT: ret + %a = load <32 x i8>, <32 x i8>* %x + %b = load <32 x i8>, <32 x i8>* %y + %c = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %a, i1 false) + store <32 x i8> %c, <32 x i8>* %x + ret void +} +declare <32 x i8> @llvm.abs.v32i8(<32 x i8>, i1) + +define void @abs_v16i16(<16 x i16>* %x, <16 x i16>* %y) { +; LMULMAX2-LABEL: abs_v16i16: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: vsetivli a1, 16, e16,m2,ta,mu +; LMULMAX2-NEXT: vle16.v v26, (a0) +; LMULMAX2-NEXT: vrsub.vi v28, v26, 0 +; LMULMAX2-NEXT: vmax.vv v26, v26, v28 +; LMULMAX2-NEXT: vse16.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-RV32-LABEL: abs_v16i16: +; LMULMAX1-RV32: # %bb.0: +; LMULMAX1-RV32-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; LMULMAX1-RV32-NEXT: addi a1, a0, 16 +; LMULMAX1-RV32-NEXT: vle16.v v25, (a1) +; LMULMAX1-RV32-NEXT: vle16.v v26, (a0) +; LMULMAX1-RV32-NEXT: vrsub.vi v27, v25, 0 +; LMULMAX1-RV32-NEXT: vmax.vv v25, v25, v27 +; LMULMAX1-RV32-NEXT: vrsub.vi v27, v26, 0 +; LMULMAX1-RV32-NEXT: vmax.vv v26, v26, v27 +; LMULMAX1-RV32-NEXT: vse16.v v26, (a0) +; LMULMAX1-RV32-NEXT: vse16.v v25, (a1) +; LMULMAX1-RV32-NEXT: ret +; +; LMULMAX1-RV64-LABEL: abs_v16i16: +; LMULMAX1-RV64: # %bb.0: +; LMULMAX1-RV64-NEXT: vsetivli a1, 8, e16,m1,ta,mu +; LMULMAX1-RV64-NEXT: addi a1, a0, 16 +; LMULMAX1-RV64-NEXT: vle16.v v25, (a1) +; LMULMAX1-RV64-NEXT: vle16.v v26, (a0) +; LMULMAX1-RV64-NEXT: vrsub.vi v27, v25, 0 +; LMULMAX1-RV64-NEXT: vmax.vv v25, v25, v27 +; LMULMAX1-RV64-NEXT: vrsub.vi v27, v26, 0 +; LMULMAX1-RV64-NEXT: vmax.vv v26, v26, v27 +; LMULMAX1-RV64-NEXT: vse16.v v26, (a0) +; LMULMAX1-RV64-NEXT: vse16.v v25, (a1) +; LMULMAX1-RV64-NEXT: ret + %a = load <16 x i16>, <16 x i16>* %x + %b = load <16 x i16>, <16 x i16>* %y + %c = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %a, i1 false) + store <16 x i16> %c, <16 x i16>* %x + ret void +} +declare <16 x i16> @llvm.abs.v16i16(<16 x i16>, i1) + +define void @abs_v8i32(<8 x i32>* %x, <8 x i32>* %y) { +; LMULMAX2-LABEL: abs_v8i32: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: vsetivli a1, 8, e32,m2,ta,mu +; LMULMAX2-NEXT: vle32.v v26, (a0) +; LMULMAX2-NEXT: vrsub.vi v28, v26, 0 +; LMULMAX2-NEXT: vmax.vv v26, v26, v28 +; LMULMAX2-NEXT: vse32.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-RV32-LABEL: abs_v8i32: +; LMULMAX1-RV32: # %bb.0: +; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; LMULMAX1-RV32-NEXT: addi a1, a0, 16 +; LMULMAX1-RV32-NEXT: vle32.v v25, (a1) +; LMULMAX1-RV32-NEXT: vle32.v v26, (a0) +; LMULMAX1-RV32-NEXT: vrsub.vi v27, v25, 0 +; LMULMAX1-RV32-NEXT: vmax.vv v25, v25, v27 +; LMULMAX1-RV32-NEXT: vrsub.vi v27, v26, 0 +; LMULMAX1-RV32-NEXT: vmax.vv v26, v26, v27 +; LMULMAX1-RV32-NEXT: vse32.v v26, (a0) +; LMULMAX1-RV32-NEXT: vse32.v v25, (a1) +; LMULMAX1-RV32-NEXT: ret +; +; LMULMAX1-RV64-LABEL: abs_v8i32: +; LMULMAX1-RV64: # %bb.0: +; LMULMAX1-RV64-NEXT: vsetivli a1, 4, e32,m1,ta,mu +; LMULMAX1-RV64-NEXT: addi a1, a0, 16 +; LMULMAX1-RV64-NEXT: vle32.v v25, (a1) +; LMULMAX1-RV64-NEXT: vle32.v v26, (a0) +; LMULMAX1-RV64-NEXT: vrsub.vi v27, v25, 0 +; LMULMAX1-RV64-NEXT: vmax.vv v25, v25, v27 +; LMULMAX1-RV64-NEXT: vrsub.vi v27, v26, 0 +; LMULMAX1-RV64-NEXT: vmax.vv v26, v26, v27 +; LMULMAX1-RV64-NEXT: vse32.v v26, (a0) +; LMULMAX1-RV64-NEXT: vse32.v v25, (a1) +; LMULMAX1-RV64-NEXT: ret + %a = load <8 x i32>, <8 x i32>* %x + %b = load <8 x i32>, <8 x i32>* %y + %c = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %a, i1 false) + store <8 x i32> %c, <8 x i32>* %x + ret void +} +declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1) + +define void @abs_v4i64(<4 x i64>* %x, <4 x i64>* %y) { +; LMULMAX2-LABEL: abs_v4i64: +; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: vsetivli a1, 4, e64,m2,ta,mu +; LMULMAX2-NEXT: vle64.v v26, (a0) +; LMULMAX2-NEXT: vrsub.vi v28, v26, 0 +; LMULMAX2-NEXT: vmax.vv v26, v26, v28 +; LMULMAX2-NEXT: vse64.v v26, (a0) +; LMULMAX2-NEXT: ret +; +; LMULMAX1-RV32-LABEL: abs_v4i64: +; LMULMAX1-RV32: # %bb.0: +; LMULMAX1-RV32-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; LMULMAX1-RV32-NEXT: addi a1, a0, 16 +; LMULMAX1-RV32-NEXT: vle64.v v25, (a1) +; LMULMAX1-RV32-NEXT: vle64.v v26, (a0) +; LMULMAX1-RV32-NEXT: vrsub.vi v27, v25, 0 +; LMULMAX1-RV32-NEXT: vmax.vv v25, v25, v27 +; LMULMAX1-RV32-NEXT: vrsub.vi v27, v26, 0 +; LMULMAX1-RV32-NEXT: vmax.vv v26, v26, v27 +; LMULMAX1-RV32-NEXT: vse64.v v26, (a0) +; LMULMAX1-RV32-NEXT: vse64.v v25, (a1) +; LMULMAX1-RV32-NEXT: ret +; +; LMULMAX1-RV64-LABEL: abs_v4i64: +; LMULMAX1-RV64: # %bb.0: +; LMULMAX1-RV64-NEXT: vsetivli a1, 2, e64,m1,ta,mu +; LMULMAX1-RV64-NEXT: addi a1, a0, 16 +; LMULMAX1-RV64-NEXT: vle64.v v25, (a1) +; LMULMAX1-RV64-NEXT: vle64.v v26, (a0) +; LMULMAX1-RV64-NEXT: vrsub.vi v27, v25, 0 +; LMULMAX1-RV64-NEXT: vmax.vv v25, v25, v27 +; LMULMAX1-RV64-NEXT: vrsub.vi v27, v26, 0 +; LMULMAX1-RV64-NEXT: vmax.vv v26, v26, v27 +; LMULMAX1-RV64-NEXT: vse64.v v26, (a0) +; LMULMAX1-RV64-NEXT: vse64.v v25, (a1) +; LMULMAX1-RV64-NEXT: ret + %a = load <4 x i64>, <4 x i64>* %x + %b = load <4 x i64>, <4 x i64>* %y + %c = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %a, i1 false) + store <4 x i64> %c, <4 x i64>* %x + ret void +} +declare <4 x i64> @llvm.abs.v4i64(<4 x i64>, i1)