Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -10500,6 +10500,82 @@ DAG.getConstant(0, dl, MVT::i64)); } +static SDValue tryConvertSVEWideCompare(SDNode *N, unsigned ReplacementIID, + bool Invert, SelectionDAG &DAG) { + SDValue Comparator = N->getOperand(3); + if (Comparator.getOpcode() == AArch64ISD::DUP || + Comparator.getOpcode() == ISD::SPLAT_VECTOR) { + unsigned IID = getIntrinsicID(N); + EVT VT = N->getValueType(0); + EVT CmpVT = N->getOperand(2).getValueType(); + SDValue Pred = N->getOperand(1); + SDLoc DL(N); + + switch (IID) { + default: + llvm_unreachable("Called with wrong intrinsic!"); + break; + + // Signed comparisons + case Intrinsic::aarch64_sve_cmpeq_wide: + case Intrinsic::aarch64_sve_cmpne_wide: + case Intrinsic::aarch64_sve_cmpge_wide: + case Intrinsic::aarch64_sve_cmpgt_wide: + case Intrinsic::aarch64_sve_cmplt_wide: + case Intrinsic::aarch64_sve_cmple_wide: { + if (auto *CN = dyn_cast(Comparator.getOperand(0))) { + int64_t ImmVal = CN->getSExtValue(); + + if (ImmVal >= -16 && ImmVal <= 15) { + SDValue Imm = DAG.getConstant(ImmVal, DL, MVT::i32); + SDValue Splat = DAG.getNode(AArch64ISD::DUP, DL, CmpVT, Imm); + SDValue ID = DAG.getTargetConstant(ReplacementIID, DL, MVT::i64); + SDValue Op0, Op1; + if (Invert) { + Op0 = Splat; + Op1 = N->getOperand(2); + } else { + Op0 = N->getOperand(2); + Op1 = Splat; + } + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, + ID, Pred, Op0, Op1); + } + } + break; + } + // Unsigned comparisons + case Intrinsic::aarch64_sve_cmphs_wide: + case Intrinsic::aarch64_sve_cmphi_wide: + case Intrinsic::aarch64_sve_cmplo_wide: + case Intrinsic::aarch64_sve_cmpls_wide: { + if (auto *CN = dyn_cast(Comparator.getOperand(0))) { + uint64_t ImmVal = CN->getZExtValue(); + + if (ImmVal <= 127) { + SDValue Imm = DAG.getConstant(ImmVal, DL, MVT::i32); + SDValue Splat = DAG.getNode(AArch64ISD::DUP, DL, CmpVT, Imm); + SDValue ID = DAG.getTargetConstant(ReplacementIID, DL, MVT::i64); + SDValue Op0, Op1; + if (Invert) { + Op0 = Splat; + Op1 = N->getOperand(2); + } else { + Op0 = N->getOperand(2); + Op1 = Splat; + } + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, + ID, Pred, Op0, Op1); + } + } + break; + } + } + } + + return SDValue(); +} + static SDValue performIntrinsicCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget) { @@ -10554,6 +10630,34 @@ case Intrinsic::aarch64_crc32h: case Intrinsic::aarch64_crc32ch: return tryCombineCRC32(0xffff, N, DAG); + case Intrinsic::aarch64_sve_cmpeq_wide: + return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmpeq, + false, DAG); + case Intrinsic::aarch64_sve_cmpne_wide: + return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmpne, + false, DAG); + case Intrinsic::aarch64_sve_cmpge_wide: + return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmpge, + false, DAG); + case Intrinsic::aarch64_sve_cmpgt_wide: + return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmpgt, + false, DAG); + case Intrinsic::aarch64_sve_cmplt_wide: + return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmpgt, + true, DAG); + case Intrinsic::aarch64_sve_cmple_wide: + return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmpge, + true, DAG); + case Intrinsic::aarch64_sve_cmphs_wide: + return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmphs, + false, DAG); + case Intrinsic::aarch64_sve_cmphi_wide: + return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmphi, + false, DAG); + case Intrinsic::aarch64_sve_cmplo_wide: + return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmphi, true, DAG); + case Intrinsic::aarch64_sve_cmpls_wide: + return tryConvertSVEWideCompare(N, Intrinsic::aarch64_sve_cmphs, true, DAG); } return SDValue(); } Index: llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares-with-imm.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares-with-imm.ll +++ llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares-with-imm.ll @@ -30,6 +30,18 @@ ret %out } +define @wide_cmpeq_b( %pg, %a) { +; CHECK-LABEL: wide_cmpeq_b +; CHECK: cmpeq p0.b, p0/z, z0.b, #4 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 4, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.cmpeq.wide.nxv16i8( %pg, + %a, + %splat) + ret %out +} + define @ir_cmpeq_h( %a) { ; CHECK-LABEL: ir_cmpeq_h ; CHECK: cmpeq p0.h, p0/z, z0.h, #-16 @@ -52,6 +64,18 @@ ret %out } +define @wide_cmpeq_h( %pg, %a) { +; CHECK-LABEL: wide_cmpeq_h +; CHECK: cmpeq p0.h, p0/z, z0.h, #-16 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 -16, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.cmpeq.wide.nxv8i16( %pg, + %a, + %splat) + ret %out +} + define @ir_cmpeq_s( %a) { ; CHECK-LABEL: ir_cmpeq_s ; CHECK: cmpeq p0.s, p0/z, z0.s, #15 @@ -74,6 +98,18 @@ ret %out } +define @wide_cmpeq_s( %pg, %a) { +; CHECK-LABEL: wide_cmpeq_s +; CHECK: cmpeq p0.s, p0/z, z0.s, #15 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 15, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.cmpeq.wide.nxv4i32( %pg, + %a, + %splat) + ret %out +} + define @ir_cmpeq_d( %a) { ; CHECK-LABEL: ir_cmpeq_d ; CHECK: cmpeq p0.d, p0/z, z0.d, #0 @@ -122,6 +158,18 @@ ret %out } +define @wide_cmpge_b( %pg, %a) { +; CHECK-LABEL: wide_cmpge_b +; CHECK: cmpge p0.b, p0/z, z0.b, #4 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 4, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.cmpge.wide.nxv16i8( %pg, + %a, + %splat) + ret %out +} + define @ir_cmpge_h( %a) { ; CHECK-LABEL: ir_cmpge_h ; CHECK: cmpge p0.h, p0/z, z0.h, #-16 @@ -144,6 +192,18 @@ ret %out } +define @wide_cmpge_h( %pg, %a) { +; CHECK-LABEL: wide_cmpge_h +; CHECK: cmpge p0.h, p0/z, z0.h, #-16 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 -16, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.cmpge.wide.nxv8i16( %pg, + %a, + %splat) + ret %out +} + define @ir_cmpge_s( %a) { ; CHECK-LABEL: ir_cmpge_s ; CHECK: cmpge p0.s, p0/z, z0.s, #15 @@ -166,6 +226,18 @@ ret %out } +define @wide_cmpge_s( %pg, %a) { +; CHECK-LABEL: wide_cmpge_s +; CHECK: cmpge p0.s, p0/z, z0.s, #15 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 15, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.cmpge.wide.nxv4i32( %pg, + %a, + %splat) + ret %out +} + define @ir_cmpge_d( %a) { ; CHECK-LABEL: ir_cmpge_d ; CHECK: cmpge p0.d, p0/z, z0.d, #0 @@ -214,6 +286,18 @@ ret %out } +define @wide_cmpgt_b( %pg, %a) { +; CHECK-LABEL: wide_cmpgt_b +; CHECK: cmpgt p0.b, p0/z, z0.b, #4 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 4, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.cmpgt.wide.nxv16i8( %pg, + %a, + %splat) + ret %out +} + define @ir_cmpgt_h( %a) { ; CHECK-LABEL: ir_cmpgt_h ; CHECK: cmpgt p0.h, p0/z, z0.h, #-16 @@ -236,6 +320,18 @@ ret %out } +define @wide_cmpgt_h( %pg, %a) { +; CHECK-LABEL: wide_cmpgt_h +; CHECK: cmpgt p0.h, p0/z, z0.h, #-16 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 -16, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.cmpgt.wide.nxv8i16( %pg, + %a, + %splat) + ret %out +} + define @ir_cmpgt_s( %a) { ; CHECK-LABEL: ir_cmpgt_s ; CHECK: cmpgt p0.s, p0/z, z0.s, #15 @@ -258,6 +354,18 @@ ret %out } +define @wide_cmpgt_s( %pg, %a) { +; CHECK-LABEL: wide_cmpgt_s +; CHECK: cmpgt p0.s, p0/z, z0.s, #15 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 15, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.cmpgt.wide.nxv4i32( %pg, + %a, + %splat) + ret %out +} + define @ir_cmpgt_d( %a) { ; CHECK-LABEL: ir_cmpgt_d ; CHECK: cmpgt p0.d, p0/z, z0.d, #0 @@ -306,6 +414,18 @@ ret %out } +define @wide_cmple_b( %pg, %a) { +; CHECK-LABEL: wide_cmple_b +; CHECK: cmple p0.b, p0/z, z0.b, #4 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 4, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.cmple.wide.nxv16i8( %pg, + %a, + %splat) + ret %out +} + define @ir_cmple_h( %a) { ; CHECK-LABEL: ir_cmple_h ; CHECK: cmple p0.h, p0/z, z0.h, #-16 @@ -328,6 +448,18 @@ ret %out } +define @wide_cmple_h( %pg, %a) { +; CHECK-LABEL: wide_cmple_h +; CHECK: cmple p0.h, p0/z, z0.h, #-16 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 -16, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.cmple.wide.nxv8i16( %pg, + %a, + %splat) + ret %out +} + define @ir_cmple_s( %a) { ; CHECK-LABEL: ir_cmple_s ; CHECK: cmple p0.s, p0/z, z0.s, #15 @@ -350,6 +482,18 @@ ret %out } +define @wide_cmple_s( %pg, %a) { +; CHECK-LABEL: wide_cmple_s +; CHECK: cmple p0.s, p0/z, z0.s, #15 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 15, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.cmple.wide.nxv4i32( %pg, + %a, + %splat) + ret %out +} + define @ir_cmple_d( %a) { ; CHECK-LABEL: ir_cmple_d ; CHECK: cmple p0.d, p0/z, z0.d, #0 @@ -398,6 +542,18 @@ ret %out } +define @wide_cmplt_b( %pg, %a) { +; CHECK-LABEL: wide_cmplt_b +; CHECK: cmplt p0.b, p0/z, z0.b, #4 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 4, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.cmplt.wide.nxv16i8( %pg, + %a, + %splat) + ret %out +} + define @ir_cmplt_h( %a) { ; CHECK-LABEL: ir_cmplt_h ; CHECK: cmplt p0.h, p0/z, z0.h, #-16 @@ -420,6 +576,18 @@ ret %out } +define @wide_cmplt_h( %pg, %a) { +; CHECK-LABEL: wide_cmplt_h +; CHECK: cmplt p0.h, p0/z, z0.h, #-16 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 -16, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.cmplt.wide.nxv8i16( %pg, + %a, + %splat) + ret %out +} + define @ir_cmplt_s( %a) { ; CHECK-LABEL: ir_cmplt_s ; CHECK: cmplt p0.s, p0/z, z0.s, #15 @@ -442,6 +610,18 @@ ret %out } +define @wide_cmplt_s( %pg, %a) { +; CHECK-LABEL: wide_cmplt_s +; CHECK: cmplt p0.s, p0/z, z0.s, #15 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 15, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.cmplt.wide.nxv4i32( %pg, + %a, + %splat) + ret %out +} + define @ir_cmplt_d( %a) { ; CHECK-LABEL: ir_cmplt_d ; CHECK: cmplt p0.d, p0/z, z0.d, #0 @@ -490,6 +670,18 @@ ret %out } +define @wide_cmpne_b( %pg, %a) { +; CHECK-LABEL: wide_cmpne_b +; CHECK: cmpne p0.b, p0/z, z0.b, #4 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 4, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.cmpne.wide.nxv16i8( %pg, + %a, + %splat) + ret %out +} + define @ir_cmpne_h( %a) { ; CHECK-LABEL: ir_cmpne_h ; CHECK: cmpne p0.h, p0/z, z0.h, #-16 @@ -512,6 +704,18 @@ ret %out } +define @wide_cmpne_h( %pg, %a) { +; CHECK-LABEL: wide_cmpne_h +; CHECK: cmpne p0.h, p0/z, z0.h, #-16 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 -16, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.cmpne.wide.nxv8i16( %pg, + %a, + %splat) + ret %out +} + define @ir_cmpne_s( %a) { ; CHECK-LABEL: ir_cmpne_s ; CHECK: cmpne p0.s, p0/z, z0.s, #15 @@ -534,6 +738,18 @@ ret %out } +define @wide_cmpne_s( %pg, %a) { +; CHECK-LABEL: wide_cmpne_s +; CHECK: cmpne p0.s, p0/z, z0.s, #15 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 15, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.cmpne.wide.nxv4i32( %pg, + %a, + %splat) + ret %out +} + define @ir_cmpne_d( %a) { ; CHECK-LABEL: ir_cmpne_d ; CHECK: cmpne p0.d, p0/z, z0.d, #0 @@ -586,6 +802,18 @@ ret %out } +define @wide_cmphi_b( %pg, %a) { +; CHECK-LABEL: wide_cmphi_b +; CHECK: cmphi p0.b, p0/z, z0.b, #4 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 4, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.cmphi.wide.nxv16i8( %pg, + %a, + %splat) + ret %out +} + define @ir_cmphi_h( %a) { ; CHECK-LABEL: ir_cmphi_h ; CHECK: cmphi p0.h, p0/z, z0.h, #0 @@ -608,6 +836,18 @@ ret %out } +define @wide_cmphi_h( %pg, %a) { +; CHECK-LABEL: wide_cmphi_h +; CHECK: cmphi p0.h, p0/z, z0.h, #0 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 0, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.cmphi.wide.nxv8i16( %pg, + %a, + %splat) + ret %out +} + define @ir_cmphi_s( %a) { ; CHECK-LABEL: ir_cmphi_s ; CHECK: cmphi p0.s, p0/z, z0.s, #68 @@ -630,6 +870,18 @@ ret %out } +define @wide_cmphi_s( %pg, %a) { +; CHECK-LABEL: wide_cmphi_s +; CHECK: cmphi p0.s, p0/z, z0.s, #68 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 68, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.cmphi.wide.nxv4i32( %pg, + %a, + %splat) + ret %out +} + define @ir_cmphi_d( %a) { ; CHECK-LABEL: ir_cmphi_d ; CHECK: cmphi p0.d, p0/z, z0.d, #127 @@ -678,6 +930,18 @@ ret %out } +define @wide_cmphs_b( %pg, %a) { +; CHECK-LABEL: wide_cmphs_b +; CHECK: cmphs p0.b, p0/z, z0.b, #4 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 4, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.cmphs.wide.nxv16i8( %pg, + %a, + %splat) + ret %out +} + define @ir_cmphs_h( %a) { ; CHECK-LABEL: ir_cmphs_h ; CHECK: cmphs p0.h, p0/z, z0.h, #0 @@ -700,6 +964,18 @@ ret %out } +define @wide_cmphs_h( %pg, %a) { +; CHECK-LABEL: wide_cmphs_h +; CHECK: cmphs p0.h, p0/z, z0.h, #0 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 0, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.cmphs.wide.nxv8i16( %pg, + %a, + %splat) + ret %out +} + define @ir_cmphs_s( %a) { ; CHECK-LABEL: ir_cmphs_s ; CHECK: cmphs p0.s, p0/z, z0.s, #68 @@ -722,6 +998,18 @@ ret %out } +define @wide_cmphs_s( %pg, %a) { +; CHECK-LABEL: wide_cmphs_s +; CHECK: cmphs p0.s, p0/z, z0.s, #68 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 68, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.cmphs.wide.nxv4i32( %pg, + %a, + %splat) + ret %out +} + define @ir_cmphs_d( %a) { ; CHECK-LABEL: ir_cmphs_d ; CHECK: cmphs p0.d, p0/z, z0.d, #127 @@ -770,6 +1058,28 @@ ret %out } +define @wide_cmplo_b( %pg, %a) { +; CHECK-LABEL: wide_cmplo_b +; CHECK: cmplo p0.b, p0/z, z0.b, #4 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 4, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.cmplo.wide.nxv16i8( %pg, + %a, + %splat) + ret %out +} + +define @ir_cmplo_h( %a) { +; CHECK-LABEL: ir_cmplo_h +; CHECK: cmplo p0.h, p0/z, z0.h, #0 +; CHECK-NEXT: ret + %elt = insertelement undef, i16 0, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = icmp ult %a, %splat + ret %out +} + define @int_cmplo_h( %pg, %a) { ; CHECK-LABEL: int_cmplo_h ; CHECK: cmplo p0.h, p0/z, z0.h, #0 @@ -782,6 +1092,18 @@ ret %out } +define @wide_cmplo_h( %pg, %a) { +; CHECK-LABEL: wide_cmplo_h +; CHECK: cmplo p0.h, p0/z, z0.h, #0 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 0, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.cmplo.wide.nxv8i16( %pg, + %a, + %splat) + ret %out +} + define @ir_cmplo_s( %a) { ; CHECK-LABEL: ir_cmplo_s ; CHECK: cmplo p0.s, p0/z, z0.s, #68 @@ -804,6 +1126,18 @@ ret %out } +define @wide_cmplo_s( %pg, %a) { +; CHECK-LABEL: wide_cmplo_s +; CHECK: cmplo p0.s, p0/z, z0.s, #68 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 68, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.cmplo.wide.nxv4i32( %pg, + %a, + %splat) + ret %out +} + define @ir_cmplo_d( %a) { ; CHECK-LABEL: ir_cmplo_d ; CHECK: cmplo p0.d, p0/z, z0.d, #127 @@ -852,6 +1186,18 @@ ret %out } +define @wide_cmpls_b( %pg, %a) { +; CHECK-LABEL: wide_cmpls_b +; CHECK: cmpls p0.b, p0/z, z0.b, #4 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 4, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.cmpls.wide.nxv16i8( %pg, + %a, + %splat) + ret %out +} + define @ir_cmpls_h( %a) { ; CHECK-LABEL: ir_cmpls_h ; CHECK: cmpls p0.h, p0/z, z0.h, #0 @@ -874,6 +1220,18 @@ ret %out } +define @wide_cmpls_h( %pg, %a) { +; CHECK-LABEL: wide_cmpls_h +; CHECK: cmpls p0.h, p0/z, z0.h, #0 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 0, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.cmpls.wide.nxv8i16( %pg, + %a, + %splat) + ret %out +} + define @ir_cmpls_s( %a) { ; CHECK-LABEL: ir_cmpls_s ; CHECK: cmpls p0.s, p0/z, z0.s, #68 @@ -896,6 +1254,18 @@ ret %out } +define @wide_cmpls_s( %pg, %a) { +; CHECK-LABEL: wide_cmpls_s +; CHECK: cmpls p0.s, p0/z, z0.s, #68 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 68, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.cmpls.wide.nxv4i32( %pg, + %a, + %splat) + ret %out +} + define @ir_cmpls_d( %a) { ; CHECK-LABEL: ir_cmpls_d ; CHECK: cmpls p0.d, p0/z, z0.d, #127 @@ -922,28 +1292,62 @@ declare @llvm.aarch64.sve.cmpeq.nxv8i16(, , ) declare @llvm.aarch64.sve.cmpeq.nxv4i32(, , ) declare @llvm.aarch64.sve.cmpeq.nxv2i64(, , ) +declare @llvm.aarch64.sve.cmpeq.wide.nxv16i8(, , ) +declare @llvm.aarch64.sve.cmpeq.wide.nxv8i16(, , ) +declare @llvm.aarch64.sve.cmpeq.wide.nxv4i32(, , ) declare @llvm.aarch64.sve.cmpge.nxv16i8(, , ) declare @llvm.aarch64.sve.cmpge.nxv8i16(, , ) declare @llvm.aarch64.sve.cmpge.nxv4i32(, , ) declare @llvm.aarch64.sve.cmpge.nxv2i64(, , ) +declare @llvm.aarch64.sve.cmpge.wide.nxv16i8(, , ) +declare @llvm.aarch64.sve.cmpge.wide.nxv8i16(, , ) +declare @llvm.aarch64.sve.cmpge.wide.nxv4i32(, , ) declare @llvm.aarch64.sve.cmpgt.nxv16i8(, , ) declare @llvm.aarch64.sve.cmpgt.nxv8i16(, , ) declare @llvm.aarch64.sve.cmpgt.nxv4i32(, , ) declare @llvm.aarch64.sve.cmpgt.nxv2i64(, , ) +declare @llvm.aarch64.sve.cmpgt.wide.nxv16i8(, , ) +declare @llvm.aarch64.sve.cmpgt.wide.nxv8i16(, , ) +declare @llvm.aarch64.sve.cmpgt.wide.nxv4i32(, , ) declare @llvm.aarch64.sve.cmphi.nxv16i8(, , ) declare @llvm.aarch64.sve.cmphi.nxv8i16(, , ) declare @llvm.aarch64.sve.cmphi.nxv4i32(, , ) declare @llvm.aarch64.sve.cmphi.nxv2i64(, , ) +declare @llvm.aarch64.sve.cmphi.wide.nxv16i8(, , ) +declare @llvm.aarch64.sve.cmphi.wide.nxv8i16(, , ) +declare @llvm.aarch64.sve.cmphi.wide.nxv4i32(, , ) declare @llvm.aarch64.sve.cmphs.nxv16i8(, , ) declare @llvm.aarch64.sve.cmphs.nxv8i16(, , ) declare @llvm.aarch64.sve.cmphs.nxv4i32(, , ) declare @llvm.aarch64.sve.cmphs.nxv2i64(, , ) +declare @llvm.aarch64.sve.cmphs.wide.nxv16i8(, , ) +declare @llvm.aarch64.sve.cmphs.wide.nxv8i16(, , ) +declare @llvm.aarch64.sve.cmphs.wide.nxv4i32(, , ) + +declare @llvm.aarch64.sve.cmple.wide.nxv16i8(, , ) +declare @llvm.aarch64.sve.cmple.wide.nxv8i16(, , ) +declare @llvm.aarch64.sve.cmple.wide.nxv4i32(, , ) + +declare @llvm.aarch64.sve.cmplo.wide.nxv16i8(, , ) +declare @llvm.aarch64.sve.cmplo.wide.nxv8i16(, , ) +declare @llvm.aarch64.sve.cmplo.wide.nxv4i32(, , ) + +declare @llvm.aarch64.sve.cmpls.wide.nxv16i8(, , ) +declare @llvm.aarch64.sve.cmpls.wide.nxv8i16(, , ) +declare @llvm.aarch64.sve.cmpls.wide.nxv4i32(, , ) + +declare @llvm.aarch64.sve.cmplt.wide.nxv16i8(, , ) +declare @llvm.aarch64.sve.cmplt.wide.nxv8i16(, , ) +declare @llvm.aarch64.sve.cmplt.wide.nxv4i32(, , ) declare @llvm.aarch64.sve.cmpne.nxv16i8(, , ) declare @llvm.aarch64.sve.cmpne.nxv8i16(, , ) declare @llvm.aarch64.sve.cmpne.nxv4i32(, , ) declare @llvm.aarch64.sve.cmpne.nxv2i64(, , ) +declare @llvm.aarch64.sve.cmpne.wide.nxv16i8(, , ) +declare @llvm.aarch64.sve.cmpne.wide.nxv8i16(, , ) +declare @llvm.aarch64.sve.cmpne.wide.nxv4i32(, , )