Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4813,12 +4813,139 @@ unsigned MinSVEVectorSize = std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u); unsigned ElementSize = 128 / Op.getValueType().getVectorMinNumElements(); - unsigned NumActiveElems = - Op.getConstantOperandVal(2) - Op.getConstantOperandVal(1); + bool Overflow; + APInt NumActiveElems = Op.getConstantOperandAPInt(2).usub_ov( + Op.getConstantOperandAPInt(1), Overflow); Optional PredPattern = - getSVEPredPatternFromNumElements(NumActiveElems); + getSVEPredPatternFromNumElements(NumActiveElems.getZExtValue()); + if ((PredPattern != None) && !Overflow && + NumActiveElems.getZExtValue() <= (MinSVEVectorSize / ElementSize)) + return getPTrue(DAG, dl, Op.getValueType(), *PredPattern); + } + return SDValue(); + } + case Intrinsic::aarch64_sve_whilelt: { + if (isa(Op.getOperand(1)) && + isa(Op.getOperand(2))) { + unsigned MinSVEVectorSize = + std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u); + unsigned ElementSize = 128 / Op.getValueType().getVectorMinNumElements(); + APInt NumActiveElems = + Op.getConstantOperandAPInt(2) - Op.getConstantOperandAPInt(1); + Optional PredPattern = + getSVEPredPatternFromNumElements(NumActiveElems.getZExtValue()); + if ((PredPattern != None) && + NumActiveElems.getZExtValue() <= (MinSVEVectorSize / ElementSize)) + return getPTrue(DAG, dl, Op.getValueType(), *PredPattern); + } + return SDValue(); + } + case Intrinsic::aarch64_sve_whilels: { + if (isa(Op.getOperand(1)) && + isa(Op.getOperand(2))) { + unsigned MinSVEVectorSize = + std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u); + unsigned ElementSize = 128 / Op.getValueType().getVectorMinNumElements(); + bool Overflow; + APInt Op1 = Op.getConstantOperandAPInt(1); + APInt NumActiveElems = + Op.getConstantOperandAPInt(2).usub_ov(Op1, Overflow); + if (Overflow) + return SDValue(); + APInt One(NumActiveElems.getBitWidth(), 1, false); + NumActiveElems = NumActiveElems.uadd_ov(One, Overflow); + Optional PredPattern = + getSVEPredPatternFromNumElements(NumActiveElems.getZExtValue()); + if ((PredPattern != None) && !Overflow && + NumActiveElems.getZExtValue() <= (MinSVEVectorSize / ElementSize)) + return getPTrue(DAG, dl, Op.getValueType(), *PredPattern); + } + return SDValue(); + } + case Intrinsic::aarch64_sve_whilele: { + if (isa(Op.getOperand(1)) && + isa(Op.getOperand(2))) { + unsigned MinSVEVectorSize = + std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u); + unsigned ElementSize = 128 / Op.getValueType().getVectorMinNumElements(); + APInt NumActiveElems = + Op.getConstantOperandAPInt(2) - Op.getConstantOperandAPInt(1) + 1; + Optional PredPattern = + getSVEPredPatternFromNumElements(NumActiveElems.getZExtValue()); if ((PredPattern != None) && - NumActiveElems <= (MinSVEVectorSize / ElementSize)) + NumActiveElems.getZExtValue() <= (MinSVEVectorSize / ElementSize)) + return getPTrue(DAG, dl, Op.getValueType(), *PredPattern); + } + return SDValue(); + } + case Intrinsic::aarch64_sve_whilege: { + if (isa(Op.getOperand(1)) && + isa(Op.getOperand(2))) { + unsigned MinSVEVectorSize = + std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u); + unsigned ElementSize = 128 / Op.getValueType().getVectorMinNumElements(); + APInt NumActiveElems = + Op.getConstantOperandAPInt(1) - Op.getConstantOperandAPInt(2) + 1; + Optional PredPattern = + getSVEPredPatternFromNumElements(NumActiveElems.getZExtValue()); + if ((PredPattern != None) && + NumActiveElems.getZExtValue() <= (MinSVEVectorSize / ElementSize)) + return getPTrue(DAG, dl, Op.getValueType(), *PredPattern); + } + return SDValue(); + } + case Intrinsic::aarch64_sve_whilegt: { + if (isa(Op.getOperand(1)) && + isa(Op.getOperand(2))) { + unsigned MinSVEVectorSize = + std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u); + unsigned ElementSize = 128 / Op.getValueType().getVectorMinNumElements(); + APInt NumActiveElems = + Op.getConstantOperandAPInt(1) - Op.getConstantOperandAPInt(2); + Optional PredPattern = + getSVEPredPatternFromNumElements(NumActiveElems.getZExtValue()); + if ((PredPattern != None) && + NumActiveElems.getZExtValue() <= (MinSVEVectorSize / ElementSize)) + return getPTrue(DAG, dl, Op.getValueType(), *PredPattern); + } + return SDValue(); + } + case Intrinsic::aarch64_sve_whilehs: { + if (isa(Op.getOperand(1)) && + isa(Op.getOperand(2))) { + unsigned MinSVEVectorSize = + std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u); + unsigned ElementSize = 128 / Op.getValueType().getVectorMinNumElements(); + bool Overflow; + APInt Op2 = Op.getConstantOperandAPInt(2); + APInt NumActiveElems = + Op.getConstantOperandAPInt(1).usub_ov(Op2, Overflow); + if (Overflow) + return SDValue(); + APInt One(NumActiveElems.getBitWidth(), 1, false); + NumActiveElems = NumActiveElems.uadd_ov(One, Overflow); + Optional PredPattern = + getSVEPredPatternFromNumElements(NumActiveElems.getZExtValue()); + if ((PredPattern != None) && !Overflow && + NumActiveElems.getZExtValue() <= (MinSVEVectorSize / ElementSize)) + return getPTrue(DAG, dl, Op.getValueType(), *PredPattern); + } + return SDValue(); + } + case Intrinsic::aarch64_sve_whilehi: { + if (isa(Op.getOperand(1)) && + isa(Op.getOperand(2))) { + unsigned MinSVEVectorSize = + std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u); + unsigned ElementSize = 128 / Op.getValueType().getVectorMinNumElements(); + bool Overflow; + APInt Op2 = Op.getConstantOperandAPInt(2); + APInt NumActiveElems = + Op.getConstantOperandAPInt(1).usub_ov(Op2, Overflow); + Optional PredPattern = + getSVEPredPatternFromNumElements(NumActiveElems.getZExtValue()); + if ((PredPattern != None) && !Overflow && + NumActiveElems.getZExtValue() <= (MinSVEVectorSize / ElementSize)) return getPTrue(DAG, dl, Op.getValueType(), *PredPattern); } return SDValue(); Index: llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll +++ llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll @@ -78,6 +78,46 @@ ret %out } +define @whilele_d_ii() { +; CHECK-LABEL: whilele_d_ii: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #3 +; CHECK-NEXT: whilele p0.d, xzr, x8 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilele.nxv2i1.i64(i64 0, i64 3) + ret %out +} + +define @whilele_b_ii1() { +; CHECK-LABEL: whilele_b_ii1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.b, vl6 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilele.nxv16i1.i64(i64 -2, i64 3) + ret %out +} + +define @whilele_b_ii2() { +; CHECK-LABEL: whilele_b_ii2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #9 +; CHECK-NEXT: whilele p0.b, xzr, x8 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilele.nxv16i1.i64(i64 0, i64 9) + ret %out +} + +define @whilele_b_ii3() vscale_range(16, 16) { +; CHECK-LABEL: whilele_b_ii3: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b, vl256 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilele.nxv16i1.i64(i64 0, i64 255) + ret %out +} + ; ; WHILELO ; @@ -154,6 +194,58 @@ ret %out } +define @whilelo_d_ii() { +; CHECK-LABEL: whilelo_d_ii: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #3 +; CHECK-NEXT: whilelo p0.d, xzr, x8 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilelo.nxv2i1.i64(i64 0, i64 3) + ret %out +} + +define @whilelo_b_ii1() { +; CHECK-LABEL: whilelo_b_ii1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.b, vl6 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilelo.nxv16i1.i64(i64 2, i64 8) + ret %out +} + +define @whilelo_b_ii2() { +; CHECK-LABEL: whilelo_b_ii2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #9 +; CHECK-NEXT: whilelo p0.b, xzr, x8 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilelo.nxv16i1.i64(i64 0, i64 9) + ret %out +} + +define @whilelo_b_ii3() vscale_range(16, 16) { +; CHECK-LABEL: whilelo_b_ii3: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b, vl256 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilelo.nxv16i1.i64(i64 0, i64 256) + ret %out +} + +define @whilelo_b_ii4() { +; CHECK-LABEL: whilelo_b_ii4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #3 +; CHECK-NEXT: mov x9, #-2 +; CHECK-NEXT: whilelo p0.b, x9, x8 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilelo.nxv16i1.i64(i64 -2, i64 3) + ret %out +} + ; ; WHILELS ; @@ -230,6 +322,58 @@ ret %out } +define @whilels_d_ii() { +; CHECK-LABEL: whilels_d_ii: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #3 +; CHECK-NEXT: whilels p0.d, xzr, x8 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilels.nxv2i1.i64(i64 0, i64 3) + ret %out +} + +define @whilels_b_ii1() { +; CHECK-LABEL: whilels_b_ii1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.b, vl7 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilels.nxv16i1.i64(i64 2, i64 8) + ret %out +} + +define @whilels_b_ii2() { +; CHECK-LABEL: whilels_b_ii2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #9 +; CHECK-NEXT: whilels p0.b, xzr, x8 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilels.nxv16i1.i64(i64 0, i64 9) + ret %out +} + +define @whilels_b_ii3() vscale_range(16, 16) { +; CHECK-LABEL: whilels_b_ii3: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b, vl256 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilels.nxv16i1.i64(i64 0, i64 255) + ret %out +} + +define @whilels_b_ii4() { +; CHECK-LABEL: whilels_b_ii4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #3 +; CHECK-NEXT: mov x9, #-2 +; CHECK-NEXT: whilels p0.b, x9, x8 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilels.nxv16i1.i64(i64 -2, i64 3) + ret %out +} + ; ; WHILELT ; @@ -306,6 +450,46 @@ ret %out } +define @whilelt_d_ii() { +; CHECK-LABEL: whilelt_d_ii: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #3 +; CHECK-NEXT: whilelt p0.d, xzr, x8 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilelt.nxv2i1.i64(i64 0, i64 3) + ret %out +} + +define @whilelt_b_ii1() { +; CHECK-LABEL: whilelt_b_ii1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.b, vl5 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilelt.nxv16i1.i64(i64 -2, i64 3) + ret %out +} + +define @whilelt_b_ii2() { +; CHECK-LABEL: whilelt_b_ii2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #9 +; CHECK-NEXT: whilelt p0.b, xzr, x8 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilelt.nxv16i1.i64(i64 0, i64 9) + ret %out +} + +define @whilelt_b_ii3() vscale_range(16, 16) { +; CHECK-LABEL: whilelt_b_ii3: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b, vl256 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilelt.nxv16i1.i64(i64 0, i64 256) + ret %out +} + declare @llvm.aarch64.sve.whilele.nxv16i1.i32(i32, i32) declare @llvm.aarch64.sve.whilele.nxv16i1.i64(i64, i64) declare @llvm.aarch64.sve.whilele.nxv8i1.i32(i32, i32) Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll +++ llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll @@ -78,6 +78,46 @@ ret %out } +define @whilege_d_ii() { +; CHECK-LABEL: whilege_d_ii: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #3 +; CHECK-NEXT: whilege p0.d, x8, xzr +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilege.nxv2i1.i64(i64 3, i64 0) + ret %out +} + +define @whilege_b_ii1() { +; CHECK-LABEL: whilege_b_ii1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.b, vl6 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilege.nxv16i1.i32(i32 3, i32 -2) + ret %out +} + +define @whilege_b_ii2() { +; CHECK-LABEL: whilege_b_ii2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #9 +; CHECK-NEXT: whilege p0.b, x8, xzr +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilege.nxv16i1.i64(i64 9, i64 0) + ret %out +} + +define @whilege_b_ii3() vscale_range(16, 16) { +; CHECK-LABEL: whilege_b_ii3: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b, vl256 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilege.nxv16i1.i64(i64 255, i64 0) + ret %out +} + ; ; WHILEHS ; @@ -154,6 +194,58 @@ ret %out } +define @whilehs_d_ii() { +; CHECK-LABEL: whilehs_d_ii: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #3 +; CHECK-NEXT: whilehs p0.d, x8, xzr +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilehs.nxv2i1.i64(i64 3, i64 0) + ret %out +} + +define @whilehs_b_ii1() { +; CHECK-LABEL: whilehs_b_ii1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.b, vl7 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilehs.nxv16i1.i64(i64 8, i64 2) + ret %out +} + +define @whilehs_b_ii2() { +; CHECK-LABEL: whilehs_b_ii2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #9 +; CHECK-NEXT: whilehs p0.b, x8, xzr +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilehs.nxv16i1.i64(i64 9, i64 0) + ret %out +} + +define @whilehs_b_ii3() vscale_range(16, 16) { +; CHECK-LABEL: whilehs_b_ii3: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b, vl256 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilehs.nxv16i1.i64(i64 255, i64 0) + ret %out +} + +define @whilehs_b_ii4() { +; CHECK-LABEL: whilehs_b_ii4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #3 +; CHECK-NEXT: mov x9, #-2 +; CHECK-NEXT: whilehs p0.b, x9, x8 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilehs.nxv16i1.i64(i64 -2, i64 3) + ret %out +} + ; ; WHILEGT ; @@ -306,6 +398,58 @@ ret %out } +define @whilehi_d_ii() { +; CHECK-LABEL: whilehi_d_ii: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #3 +; CHECK-NEXT: whilehi p0.d, x8, xzr +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilehi.nxv2i1.i64(i64 3, i64 0) + ret %out +} + +define @whilehi_b_ii1() { +; CHECK-LABEL: whilehi_b_ii1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.b, vl6 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilehi.nxv16i1.i64(i64 8, i64 2) + ret %out +} + +define @whilehi_b_ii2() { +; CHECK-LABEL: whilehi_b_ii2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #9 +; CHECK-NEXT: whilehi p0.b, x8, xzr +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilehi.nxv16i1.i64(i64 9, i64 0) + ret %out +} + +define @whilehi_b_ii3() vscale_range(16, 16) { +; CHECK-LABEL: whilehi_b_ii3: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b, vl256 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilehi.nxv16i1.i64(i64 256, i64 0) + ret %out +} + +define @whilehi_b_ii4() { +; CHECK-LABEL: whilehi_b_ii4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #3 +; CHECK-NEXT: mov x9, #-2 +; CHECK-NEXT: whilehi p0.b, x9, x8 +; CHECK-NEXT: ret +entry: + %out = call @llvm.aarch64.sve.whilehi.nxv16i1.i64(i64 -2, i64 3) + ret %out +} + declare @llvm.aarch64.sve.whilege.nxv16i1.i32(i32, i32) declare @llvm.aarch64.sve.whilege.nxv16i1.i64(i64, i64) declare @llvm.aarch64.sve.whilege.nxv8i1.i32(i32, i32)