Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4307,6 +4307,51 @@ } } +static Optional +getSVEPTruePredNumElements(unsigned IntNo, SDValue Op, + unsigned MinSVEVectorSize) { + bool Overflow = false; + bool IsSigned = (IntNo == Intrinsic::aarch64_sve_whilelt || + IntNo == Intrinsic::aarch64_sve_whilele || + IntNo == Intrinsic::aarch64_sve_whilegt || + IntNo == Intrinsic::aarch64_sve_whilege); + bool IsLess = (IntNo == Intrinsic::aarch64_sve_whilelo || + IntNo == Intrinsic::aarch64_sve_whilelt || + IntNo == Intrinsic::aarch64_sve_whilels || + IntNo == Intrinsic::aarch64_sve_whilele); + bool IsOpEqualOrSame = (IntNo == Intrinsic::aarch64_sve_whilels || + IntNo == Intrinsic::aarch64_sve_whilele || + IntNo == Intrinsic::aarch64_sve_whilege || + IntNo == Intrinsic::aarch64_sve_whilehs); + APInt NumActiveElems; + unsigned ElementSize = 128 / Op.getValueType().getVectorMinNumElements(); + if (IsLess) { + NumActiveElems = IsSigned ? Op.getConstantOperandAPInt(2).ssub_ov( + Op.getConstantOperandAPInt(1), Overflow) + : Op.getConstantOperandAPInt(2).usub_ov( + Op.getConstantOperandAPInt(1), Overflow); + } else { + NumActiveElems = IsSigned ? Op.getConstantOperandAPInt(1).ssub_ov( + Op.getConstantOperandAPInt(2), Overflow) + : Op.getConstantOperandAPInt(1).usub_ov( + Op.getConstantOperandAPInt(2), Overflow); + } + if (Overflow) + return None; + if (IsOpEqualOrSame) { + APInt One(NumActiveElems.getBitWidth(), 1, IsSigned); + NumActiveElems = IsSigned ? NumActiveElems.sadd_ov(One, Overflow) + : NumActiveElems.uadd_ov(One, Overflow); + } + Optional PredPattern = + getSVEPredPatternFromNumElements(NumActiveElems.getZExtValue()); + if (PredPattern != None && !Overflow && + NumActiveElems.getZExtValue() <= (MinSVEVectorSize / ElementSize)) + return PredPattern; + + return None; +} + static SDValue addRequiredExtensionForVectorMULL(SDValue N, SelectionDAG &DAG, const EVT &OrigTy, const EVT &ExtTy, @@ -4807,18 +4852,21 @@ Op.getOperand(1)))); return SDValue(); } - case Intrinsic::aarch64_sve_whilelo: { + case Intrinsic::aarch64_sve_whilelo: + case Intrinsic::aarch64_sve_whilelt: + case Intrinsic::aarch64_sve_whilels: + case Intrinsic::aarch64_sve_whilele: + case Intrinsic::aarch64_sve_whilege: + case Intrinsic::aarch64_sve_whilegt: + case Intrinsic::aarch64_sve_whilehs: + case Intrinsic::aarch64_sve_whilehi: { if (isa(Op.getOperand(1)) && isa(Op.getOperand(2))) { unsigned MinSVEVectorSize = std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u); - unsigned ElementSize = 128 / Op.getValueType().getVectorMinNumElements(); - unsigned NumActiveElems = - Op.getConstantOperandVal(2) - Op.getConstantOperandVal(1); Optional PredPattern = - getSVEPredPatternFromNumElements(NumActiveElems); - if ((PredPattern != None) && - NumActiveElems <= (MinSVEVectorSize / ElementSize)) + getSVEPTruePredNumElements(IntNo, Op, MinSVEVectorSize); + if (PredPattern != None) return getPTrue(DAG, dl, Op.getValueType(), *PredPattern); } return SDValue(); Index: llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll +++ llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll @@ -78,6 +78,46 @@ ret %out } +define @whilele_d_ii() { +; CHECK-LABEL: whilele_d_ii: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #3 +; CHECK-NEXT: whilele p0.d, xzr, x8 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilele.nxv2i1.i64(i64 0, i64 3) + ret %out +} + +define @whilele_b_ii1() { +; CHECK-LABEL: whilele_b_ii1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.b, vl6 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilele.nxv16i1.i64(i64 -2, i64 3) + ret %out +} + +define @whilele_b_ii2() { +; CHECK-LABEL: whilele_b_ii2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #9 +; CHECK-NEXT: whilele p0.b, xzr, x8 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilele.nxv16i1.i64(i64 0, i64 9) + ret %out +} + +define @whilele_b_ii3() vscale_range(16, 16) { +; CHECK-LABEL: whilele_b_ii3: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b, vl256 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilele.nxv16i1.i64(i64 0, i64 255) + ret %out +} + ; ; WHILELO ; @@ -154,6 +194,58 @@ ret %out } +define @whilelo_d_ii() { +; CHECK-LABEL: whilelo_d_ii: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #3 +; CHECK-NEXT: whilelo p0.d, xzr, x8 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilelo.nxv2i1.i64(i64 0, i64 3) + ret %out +} + +define @whilelo_b_ii1() { +; CHECK-LABEL: whilelo_b_ii1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.b, vl6 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilelo.nxv16i1.i64(i64 2, i64 8) + ret %out +} + +define @whilelo_b_ii2() { +; CHECK-LABEL: whilelo_b_ii2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #9 +; CHECK-NEXT: whilelo p0.b, xzr, x8 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilelo.nxv16i1.i64(i64 0, i64 9) + ret %out +} + +define @whilelo_b_ii3() vscale_range(16, 16) { +; CHECK-LABEL: whilelo_b_ii3: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b, vl256 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilelo.nxv16i1.i64(i64 0, i64 256) + ret %out +} + +define @whilelo_b_ii4() { +; CHECK-LABEL: whilelo_b_ii4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #3 +; CHECK-NEXT: mov x9, #-2 +; CHECK-NEXT: whilelo p0.b, x9, x8 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilelo.nxv16i1.i64(i64 -2, i64 3) + ret %out +} + ; ; WHILELS ; @@ -230,6 +322,58 @@ ret %out } +define @whilels_d_ii() { +; CHECK-LABEL: whilels_d_ii: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #3 +; CHECK-NEXT: whilels p0.d, xzr, x8 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilels.nxv2i1.i64(i64 0, i64 3) + ret %out +} + +define @whilels_b_ii1() { +; CHECK-LABEL: whilels_b_ii1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.b, vl7 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilels.nxv16i1.i64(i64 2, i64 8) + ret %out +} + +define @whilels_b_ii2() { +; CHECK-LABEL: whilels_b_ii2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #9 +; CHECK-NEXT: whilels p0.b, xzr, x8 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilels.nxv16i1.i64(i64 0, i64 9) + ret %out +} + +define @whilels_b_ii3() vscale_range(16, 16) { +; CHECK-LABEL: whilels_b_ii3: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b, vl256 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilels.nxv16i1.i64(i64 0, i64 255) + ret %out +} + +define @whilels_b_ii4() { +; CHECK-LABEL: whilels_b_ii4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #3 +; CHECK-NEXT: mov x9, #-2 +; CHECK-NEXT: whilels p0.b, x9, x8 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilels.nxv16i1.i64(i64 -2, i64 3) + ret %out +} + ; ; WHILELT ; @@ -306,6 +450,46 @@ ret %out } +define @whilelt_d_ii() { +; CHECK-LABEL: whilelt_d_ii: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #3 +; CHECK-NEXT: whilelt p0.d, xzr, x8 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilelt.nxv2i1.i64(i64 0, i64 3) + ret %out +} + +define @whilelt_b_ii1() { +; CHECK-LABEL: whilelt_b_ii1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.b, vl5 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilelt.nxv16i1.i64(i64 -2, i64 3) + ret %out +} + +define @whilelt_b_ii2() { +; CHECK-LABEL: whilelt_b_ii2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #9 +; CHECK-NEXT: whilelt p0.b, xzr, x8 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilelt.nxv16i1.i64(i64 0, i64 9) + ret %out +} + +define @whilelt_b_ii3() vscale_range(16, 16) { +; CHECK-LABEL: whilelt_b_ii3: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b, vl256 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilelt.nxv16i1.i64(i64 0, i64 256) + ret %out +} + declare @llvm.aarch64.sve.whilele.nxv16i1.i32(i32, i32) declare @llvm.aarch64.sve.whilele.nxv16i1.i64(i64, i64) declare @llvm.aarch64.sve.whilele.nxv8i1.i32(i32, i32) Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll +++ llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll @@ -78,6 +78,46 @@ ret %out } +define @whilege_d_ii() { +; CHECK-LABEL: whilege_d_ii: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #3 +; CHECK-NEXT: whilege p0.d, x8, xzr +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilege.nxv2i1.i64(i64 3, i64 0) + ret %out +} + +define @whilege_b_ii1() { +; CHECK-LABEL: whilege_b_ii1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.b, vl6 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilege.nxv16i1.i32(i32 3, i32 -2) + ret %out +} + +define @whilege_b_ii2() { +; CHECK-LABEL: whilege_b_ii2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #9 +; CHECK-NEXT: whilege p0.b, x8, xzr +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilege.nxv16i1.i64(i64 9, i64 0) + ret %out +} + +define @whilege_b_ii3() vscale_range(16, 16) { +; CHECK-LABEL: whilege_b_ii3: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b, vl256 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilege.nxv16i1.i64(i64 255, i64 0) + ret %out +} + ; ; WHILEHS ; @@ -154,6 +194,58 @@ ret %out } +define @whilehs_d_ii() { +; CHECK-LABEL: whilehs_d_ii: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #3 +; CHECK-NEXT: whilehs p0.d, x8, xzr +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilehs.nxv2i1.i64(i64 3, i64 0) + ret %out +} + +define @whilehs_b_ii1() { +; CHECK-LABEL: whilehs_b_ii1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.b, vl7 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilehs.nxv16i1.i64(i64 8, i64 2) + ret %out +} + +define @whilehs_b_ii2() { +; CHECK-LABEL: whilehs_b_ii2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #9 +; CHECK-NEXT: whilehs p0.b, x8, xzr +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilehs.nxv16i1.i64(i64 9, i64 0) + ret %out +} + +define @whilehs_b_ii3() vscale_range(16, 16) { +; CHECK-LABEL: whilehs_b_ii3: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b, vl256 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilehs.nxv16i1.i64(i64 255, i64 0) + ret %out +} + +define @whilehs_b_ii4() { +; CHECK-LABEL: whilehs_b_ii4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #3 +; CHECK-NEXT: mov x9, #-2 +; CHECK-NEXT: whilehs p0.b, x9, x8 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilehs.nxv16i1.i64(i64 -2, i64 3) + ret %out +} + ; ; WHILEGT ; @@ -306,6 +398,58 @@ ret %out } +define @whilehi_d_ii() { +; CHECK-LABEL: whilehi_d_ii: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #3 +; CHECK-NEXT: whilehi p0.d, x8, xzr +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilehi.nxv2i1.i64(i64 3, i64 0) + ret %out +} + +define @whilehi_b_ii1() { +; CHECK-LABEL: whilehi_b_ii1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.b, vl6 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilehi.nxv16i1.i64(i64 8, i64 2) + ret %out +} + +define @whilehi_b_ii2() { +; CHECK-LABEL: whilehi_b_ii2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #9 +; CHECK-NEXT: whilehi p0.b, x8, xzr +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilehi.nxv16i1.i64(i64 9, i64 0) + ret %out +} + +define @whilehi_b_ii3() vscale_range(16, 16) { +; CHECK-LABEL: whilehi_b_ii3: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b, vl256 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilehi.nxv16i1.i64(i64 256, i64 0) + ret %out +} + +define @whilehi_b_ii4() { +; CHECK-LABEL: whilehi_b_ii4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #3 +; CHECK-NEXT: mov x9, #-2 +; CHECK-NEXT: whilehi p0.b, x9, x8 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilehi.nxv16i1.i64(i64 -2, i64 3) + ret %out +} + declare @llvm.aarch64.sve.whilege.nxv16i1.i32(i32, i32) declare @llvm.aarch64.sve.whilege.nxv16i1.i64(i64, i64) declare @llvm.aarch64.sve.whilege.nxv8i1.i32(i32, i32)