diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4647,6 +4647,45 @@ DAG.getTargetConstant(Pattern, DL, MVT::i32)); } +static SDValue optimizeWhile(SDValue Op, SelectionDAG &DAG, bool IsSigned, + bool IsLess, bool IsEqual) { + if (!isa(Op.getOperand(1)) || + !isa(Op.getOperand(2))) + return SDValue(); + + SDLoc dl(Op); + APInt X = Op.getConstantOperandAPInt(1); + APInt Y = Op.getConstantOperandAPInt(2); + APInt NumActiveElems; + bool Overflow; + if (IsLess) + NumActiveElems = IsSigned ? Y.ssub_ov(X, Overflow) : Y.usub_ov(X, Overflow); + else + NumActiveElems = IsSigned ? X.ssub_ov(Y, Overflow) : X.usub_ov(Y, Overflow); + + if (Overflow) + return SDValue(); + + if (IsEqual) { + APInt One(NumActiveElems.getBitWidth(), 1, IsSigned); + NumActiveElems = IsSigned ? NumActiveElems.sadd_ov(One, Overflow) + : NumActiveElems.uadd_ov(One, Overflow); + if (Overflow) + return SDValue(); + } + + std::optional PredPattern = + getSVEPredPatternFromNumElements(NumActiveElems.getZExtValue()); + unsigned MinSVEVectorSize = std::max( + DAG.getSubtarget().getMinSVEVectorSizeInBits(), 128u); + unsigned ElementSize = 128 / Op.getValueType().getVectorMinNumElements(); + if (PredPattern != std::nullopt && + NumActiveElems.getZExtValue() <= (MinSVEVectorSize / ElementSize)) + return getPTrue(DAG, dl, Op.getValueType(), *PredPattern); + + return SDValue(); +} + // Returns a safe bitcast between two scalable vector predicates, where // any newly created lanes from a widening bitcast are defined as zero. static SDValue getSVEPredicateBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) { @@ -4899,22 +4938,30 @@ Op.getOperand(1)))); return SDValue(); } - case Intrinsic::aarch64_sve_whilelo: { - if (isa(Op.getOperand(1)) && - isa(Op.getOperand(2))) { - unsigned MinSVEVectorSize = - std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u); - unsigned ElementSize = 128 / Op.getValueType().getVectorMinNumElements(); - unsigned NumActiveElems = - Op.getConstantOperandVal(2) - Op.getConstantOperandVal(1); - std::optional PredPattern = - getSVEPredPatternFromNumElements(NumActiveElems); - if ((PredPattern != std::nullopt) && - NumActiveElems <= (MinSVEVectorSize / ElementSize)) - return getPTrue(DAG, dl, Op.getValueType(), *PredPattern); - } - return SDValue(); - } + case Intrinsic::aarch64_sve_whilelo: + return optimizeWhile(Op, DAG, /*IsSigned=*/false, /*IsLess=*/true, + /*IsEqual=*/false); + case Intrinsic::aarch64_sve_whilelt: + return optimizeWhile(Op, DAG, /*IsSigned=*/true, /*IsLess=*/true, + /*IsEqual=*/false); + case Intrinsic::aarch64_sve_whilels: + return optimizeWhile(Op, DAG, /*IsSigned=*/false, /*IsLess=*/true, + /*IsEqual=*/true); + case Intrinsic::aarch64_sve_whilele: + return optimizeWhile(Op, DAG, /*IsSigned=*/true, /*IsLess=*/true, + /*IsEqual=*/true); + case Intrinsic::aarch64_sve_whilege: + return optimizeWhile(Op, DAG, /*IsSigned=*/true, /*IsLess=*/false, + /*IsEqual=*/true); + case Intrinsic::aarch64_sve_whilegt: + return optimizeWhile(Op, DAG, /*IsSigned=*/true, /*IsLess=*/false, + /*IsEqual=*/false); + case Intrinsic::aarch64_sve_whilehs: + return optimizeWhile(Op, DAG, /*IsSigned=*/false, /*IsLess=*/false, + /*IsEqual=*/true); + case Intrinsic::aarch64_sve_whilehi: + return optimizeWhile(Op, DAG, /*IsSigned=*/false, /*IsLess=*/false, + /*IsEqual=*/false); case Intrinsic::aarch64_sve_sunpkhi: return DAG.getNode(AArch64ISD::SUNPKHI, dl, Op.getValueType(), Op.getOperand(1)); diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll @@ -78,6 +78,70 @@ ret %out } +define @whilele_d_ii_dont_fold_to_ptrue_larger_than_minvec() { +; CHECK-LABEL: whilele_d_ii_dont_fold_to_ptrue_larger_than_minvec: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #3 +; CHECK-NEXT: whilele p0.d, xzr, x8 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilele.nxv2i1.i64(i64 0, i64 3) + ret %out +} + +define @whilele_b_ii() { +; CHECK-LABEL: whilele_b_ii: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.b, vl6 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilele.nxv16i1.i64(i64 -2, i64 3) + ret %out +} + +define @whilele_b_ii_dont_fold_to_ptrue_nonexistent_vl9() { +; CHECK-LABEL: whilele_b_ii_dont_fold_to_ptrue_nonexistent_vl9: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #9 +; CHECK-NEXT: whilele p0.b, xzr, x8 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilele.nxv16i1.i64(i64 0, i64 9) + ret %out +} + +define @whilele_b_vl_maximum() vscale_range(16, 16) { +; CHECK-LABEL: whilele_b_vl_maximum: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b, vl256 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilele.nxv16i1.i64(i64 0, i64 255) + ret %out +} + +define @whilele_b_ii_dont_fold_to_ptrue_overflow() { +; CHECK-LABEL: whilele_b_ii_dont_fold_to_ptrue_overflow: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #2 +; CHECK-NEXT: mov w9, #2147483647 +; CHECK-NEXT: movk w8, #32768, lsl #16 +; CHECK-NEXT: whilele p0.b, w9, w8 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilele.nxv16i1.i32(i32 2147483647, i32 -2147483646) + ret %out +} + +define @whilele_b_ii_dont_fold_to_ptrue_increment_overflow() { +; CHECK-LABEL: whilele_b_ii_dont_fold_to_ptrue_increment_overflow: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #2147483647 +; CHECK-NEXT: whilele p0.b, wzr, w8 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilele.nxv16i1.i32(i32 0, i32 2147483647) + ret %out +} + ; ; WHILELO ; @@ -154,6 +218,58 @@ ret %out } +define @whilelo_d_ii_dont_fold_to_ptrue_larger_than_minvec() { +; CHECK-LABEL: whilelo_d_ii_dont_fold_to_ptrue_larger_than_minvec: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #3 +; CHECK-NEXT: whilelo p0.d, xzr, x8 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilelo.nxv2i1.i64(i64 0, i64 3) + ret %out +} + +define @whilelo_b_ii() { +; CHECK-LABEL: whilelo_b_ii: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.b, vl6 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilelo.nxv16i1.i64(i64 2, i64 8) + ret %out +} + +define @whilelo_b_ii_dont_fold_to_ptrue_nonexistent_vl9() { +; CHECK-LABEL: whilelo_b_ii_dont_fold_to_ptrue_nonexistent_vl9: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #9 +; CHECK-NEXT: whilelo p0.b, xzr, x8 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilelo.nxv16i1.i64(i64 0, i64 9) + ret %out +} + +define @whilelo_b_vl_maximum() vscale_range(16, 16) { +; CHECK-LABEL: whilelo_b_vl_maximum: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b, vl256 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilelo.nxv16i1.i64(i64 0, i64 256) + ret %out +} + +define @whilelo_b_ii_dont_fold_to_ptrue_overflow() { +; CHECK-LABEL: whilelo_b_ii_dont_fold_to_ptrue_overflow: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #6 +; CHECK-NEXT: mov w9, #-1 +; CHECK-NEXT: whilelo p0.b, w9, w8 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilelo.nxv16i1.i32(i32 4294967295, i32 6) + ret %out +} + ; ; WHILELS ; @@ -230,6 +346,69 @@ ret %out } +define @whilels_d_ii_dont_fold_to_ptrue_larger_than_minvec() { +; CHECK-LABEL: whilels_d_ii_dont_fold_to_ptrue_larger_than_minvec: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #3 +; CHECK-NEXT: whilels p0.d, xzr, x8 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilels.nxv2i1.i64(i64 0, i64 3) + ret %out +} + +define @whilels_b_ii() { +; CHECK-LABEL: whilels_b_ii: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.b, vl7 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilels.nxv16i1.i64(i64 2, i64 8) + ret %out +} + +define @whilels_b_ii_dont_fold_to_ptrue_nonexistent_vl9() { +; CHECK-LABEL: whilels_b_ii_dont_fold_to_ptrue_nonexistent_vl9: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #9 +; CHECK-NEXT: whilels p0.b, xzr, x8 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilels.nxv16i1.i64(i64 0, i64 9) + ret %out +} + +define @whilels_b_ii_vl_maximum() vscale_range(16, 16) { +; CHECK-LABEL: whilels_b_ii_vl_maximum: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b, vl256 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilels.nxv16i1.i64(i64 0, i64 255) + ret %out +} + +define @whilels_b_ii_dont_fold_to_ptrue_overflow() { +; CHECK-LABEL: whilels_b_ii_dont_fold_to_ptrue_overflow: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #6 +; CHECK-NEXT: mov w9, #-1 +; CHECK-NEXT: whilels p0.b, w9, w8 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilels.nxv16i1.i32(i32 4294967295, i32 6) + ret %out +} + +define @whilels_b_ii_dont_fold_to_ptrue_increment_overflow() { +; CHECK-LABEL: whilels_b_ii_dont_fold_to_ptrue_increment_overflow: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #-1 +; CHECK-NEXT: whilels p0.b, wzr, w8 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilels.nxv16i1.i32(i32 0, i32 4294967295) + ret %out +} + ; ; WHILELT ; @@ -306,6 +485,59 @@ ret %out } +define @whilelt_d_ii_dont_fold_to_ptrue_larger_than_minvec() { +; CHECK-LABEL: whilelt_d_ii_dont_fold_to_ptrue_larger_than_minvec: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #3 +; CHECK-NEXT: whilelt p0.d, xzr, x8 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilelt.nxv2i1.i64(i64 0, i64 3) + ret %out +} + +define @whilelt_b_ii() { +; CHECK-LABEL: whilelt_b_ii: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.b, vl5 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilelt.nxv16i1.i64(i64 -2, i64 3) + ret %out +} + +define @whilelt_b_ii_dont_fold_to_ptrue_nonexistent_vl9() { +; CHECK-LABEL: whilelt_b_ii_dont_fold_to_ptrue_nonexistent_vl9: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #9 +; CHECK-NEXT: whilelt p0.b, xzr, x8 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilelt.nxv16i1.i64(i64 0, i64 9) + ret %out +} + +define @whilelt_b_ii_vl_maximum() vscale_range(16, 16) { +; CHECK-LABEL: whilelt_b_ii_vl_maximum: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b, vl256 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilelt.nxv16i1.i64(i64 0, i64 256) + ret %out +} + +define @whilelt_b_ii_dont_fold_to_ptrue_overflow() { +; CHECK-LABEL: whilelt_b_ii_dont_fold_to_ptrue_overflow: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #2 +; CHECK-NEXT: mov w9, #2147483647 +; CHECK-NEXT: movk w8, #32768, lsl #16 +; CHECK-NEXT: whilelt p0.b, w9, w8 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilelt.nxv16i1.i32(i32 2147483647, i32 -2147483646) + ret %out +} + declare @llvm.aarch64.sve.whilele.nxv16i1.i32(i32, i32) declare @llvm.aarch64.sve.whilele.nxv16i1.i64(i64, i64) declare @llvm.aarch64.sve.whilele.nxv8i1.i32(i32, i32) diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll --- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll @@ -78,6 +78,71 @@ ret %out } +define @whilege_d_ii_dont_fold_to_ptrue_larger_than_minvec() { +; CHECK-LABEL: whilege_d_ii_dont_fold_to_ptrue_larger_than_minvec: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #3 +; CHECK-NEXT: whilege p0.d, x8, xzr +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilege.nxv2i1.i64(i64 3, i64 0) + ret %out +} + +define @whilege_b_ii() { +; CHECK-LABEL: whilege_b_ii: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.b, vl6 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilege.nxv16i1.i32(i32 3, i32 -2) + ret %out +} + +define @whilege_b_ii_dont_fold_to_ptrue_nonexistent_vl9() { +; CHECK-LABEL: whilege_b_ii_dont_fold_to_ptrue_nonexistent_vl9: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #9 +; CHECK-NEXT: whilege p0.b, x8, xzr +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilege.nxv16i1.i64(i64 9, i64 0) + ret %out +} + +define @whilege_b_ii_vl_maximum() vscale_range(16, 16) { +; CHECK-LABEL: whilege_b_ii_vl_maximum: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b, vl256 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilege.nxv16i1.i64(i64 255, i64 0) + ret %out +} + +define @whilege_b_ii_dont_fold_to_ptrue_overflow() { +; CHECK-LABEL: whilege_b_ii_dont_fold_to_ptrue_overflow: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #2 +; CHECK-NEXT: mov w9, #2147483647 +; CHECK-NEXT: movk w8, #32768, lsl #16 +; CHECK-NEXT: whilege p0.b, w9, w8 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilege.nxv16i1.i32(i32 2147483647, i32 -2147483646) + ret %out +} + +define @whilege_b_ii_dont_fold_to_ptrue_increment_overflow() { +; CHECK-LABEL: whilege_b_ii_dont_fold_to_ptrue_increment_overflow: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #2147483647 +; CHECK-NEXT: mov w9, #-2147483641 +; CHECK-NEXT: whilege p0.b, w9, w8 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilege.nxv16i1.i32(i32 -2147483641, i32 2147483647) + ret %out +} + ; ; WHILEHS ; @@ -154,6 +219,69 @@ ret %out } +define @whilehs_d_ii_dont_fold_to_ptrue_larger_than_minvec() { +; CHECK-LABEL: whilehs_d_ii_dont_fold_to_ptrue_larger_than_minvec: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #3 +; CHECK-NEXT: whilehs p0.d, x8, xzr +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilehs.nxv2i1.i64(i64 3, i64 0) + ret %out +} + +define @whilehs_b_ii() { +; CHECK-LABEL: whilehs_b_ii: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.b, vl7 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilehs.nxv16i1.i64(i64 8, i64 2) + ret %out +} + +define @whilehs_b_ii_dont_fold_to_ptrue_nonexistent_vl9() { +; CHECK-LABEL: whilehs_b_ii_dont_fold_to_ptrue_nonexistent_vl9: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #9 +; CHECK-NEXT: whilehs p0.b, x8, xzr +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilehs.nxv16i1.i64(i64 9, i64 0) + ret %out +} + +define @whilehs_b_ii_vl_maximum() vscale_range(16, 16) { +; CHECK-LABEL: whilehs_b_ii_vl_maximum: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b, vl256 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilehs.nxv16i1.i64(i64 255, i64 0) + ret %out +} + +define @whilehs_b_ii_dont_fold_to_ptrue_overflow() { +; CHECK-LABEL: whilehs_b_ii_dont_fold_to_ptrue_overflow: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #-1 +; CHECK-NEXT: mov w9, #6 +; CHECK-NEXT: whilehs p0.b, w9, w8 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilehs.nxv16i1.i32(i32 6, i32 4294967295) + ret %out +} + +define @whilehs_b_ii_dont_fold_to_ptrue_increment_overflow() { +; CHECK-LABEL: whilehs_b_ii_dont_fold_to_ptrue_increment_overflow: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #-1 +; CHECK-NEXT: whilehs p0.b, w8, wzr +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilehs.nxv16i1.i32(i32 4294967295, i32 0) + ret %out +} + ; ; WHILEGT ; @@ -230,6 +358,58 @@ ret %out } +define @whilegt_d_ii_dont_fold_to_ptrue_larger_than_minvec() { +; CHECK-LABEL: whilegt_d_ii_dont_fold_to_ptrue_larger_than_minvec: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #3 +; CHECK-NEXT: whilegt p0.d, x8, xzr +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilegt.nxv2i1.i64(i64 3, i64 0) + ret %out +} + +define @whilegt_b_ii() { +; CHECK-LABEL: whilegt_b_ii: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.b, vl5 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilegt.nxv16i1.i32(i32 3, i32 -2) + ret %out +} + +define @whilegt_b_ii_fold_to_ptrue_nonexistent_vl9() { +; CHECK-LABEL: whilegt_b_ii_fold_to_ptrue_nonexistent_vl9: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #9 +; CHECK-NEXT: whilegt p0.b, x8, xzr +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilegt.nxv16i1.i64(i64 9, i64 0) + ret %out +} + +define @whilegt_b_ii_vl_maximum() vscale_range(16, 16) { +; CHECK-LABEL: whilegt_b_ii_vl_maximum: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b, vl256 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilegt.nxv16i1.i64(i64 256, i64 0) + ret %out +} + +define @whilegt_b_ii_dont_fold_to_ptrue_overflow() { +; CHECK-LABEL: whilegt_b_ii_dont_fold_to_ptrue_overflow: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #2147483647 +; CHECK-NEXT: mov w9, #-2147483641 +; CHECK-NEXT: whilegt p0.b, w9, w8 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilegt.nxv16i1.i32(i32 -2147483641, i32 2147483647) + ret %out +} + ; ; WHILEHI ; @@ -306,6 +486,58 @@ ret %out } +define @whilehi_d_ii_dont_fold_to_ptrue_larger_than_minvec() { +; CHECK-LABEL: whilehi_d_ii_dont_fold_to_ptrue_larger_than_minvec: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #3 +; CHECK-NEXT: whilehi p0.d, x8, xzr +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilehi.nxv2i1.i64(i64 3, i64 0) + ret %out +} + +define @whilehi_b_ii() { +; CHECK-LABEL: whilehi_b_ii: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.b, vl6 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilehi.nxv16i1.i64(i64 8, i64 2) + ret %out +} + +define @whilehi_b_ii_dont_fold_to_ptrue_nonexistent_vl9() { +; CHECK-LABEL: whilehi_b_ii_dont_fold_to_ptrue_nonexistent_vl9: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #9 +; CHECK-NEXT: whilehi p0.b, x8, xzr +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilehi.nxv16i1.i64(i64 9, i64 0) + ret %out +} + +define @whilehi_b_ii_vl_maximum() vscale_range(16, 16) { +; CHECK-LABEL: whilehi_b_ii_vl_maximum: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b, vl256 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilehi.nxv16i1.i64(i64 256, i64 0) + ret %out +} + +define @whilelhi_b_ii_dont_fold_to_ptrue_overflow() { +; CHECK-LABEL: whilelhi_b_ii_dont_fold_to_ptrue_overflow: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #-1 +; CHECK-NEXT: mov w9, #7 +; CHECK-NEXT: whilehi p0.b, w9, w8 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilehi.nxv16i1.i32(i32 7, i32 4294967295) + ret %out +} + declare @llvm.aarch64.sve.whilege.nxv16i1.i32(i32, i32) declare @llvm.aarch64.sve.whilege.nxv16i1.i64(i64, i64) declare @llvm.aarch64.sve.whilege.nxv8i1.i32(i32, i32)