Index: llvm/include/llvm/IR/IntrinsicsAArch64.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsAArch64.td +++ llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -2683,4 +2683,13 @@ : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>, llvm_i32_ty]>; + + // + // Predicate-pair intrinsics + // + foreach cmp = ["ge", "gt", "hi", "hs", "le", "lo", "ls", "lt"] in { + def int_aarch64_sve_while # cmp # _x2 + : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], + [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; + } } Index: llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -356,6 +356,7 @@ void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale, unsigned Opc_rr, unsigned Opc_ri, bool IsIntr = false); + void SelectWhilePair(SDNode *N, unsigned Opc); bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm); /// SVE Reg+Imm addressing mode. @@ -1688,6 +1689,64 @@ return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset); } +enum class SelectTypeKind { + Int1 = 0, +}; + +/// This function selects an opcode from a list of opcodes, which is +/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit } +/// element types, in this order. +template +static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef Opcodes) { + // Only match scalable vector VTs + if (!VT.isScalableVector()) + return 0; + + EVT EltVT = VT.getVectorElementType(); + switch (Kind) { + case SelectTypeKind::Int1: + if (EltVT != MVT::i1) + return 0; + break; + } + + unsigned Offset; + switch (VT.getVectorMinNumElements()) { + case 16: // 8-bit + Offset = 0; + break; + case 8: // 16-bit + Offset = 1; + break; + case 4: // 32-bit + Offset = 2; + break; + case 2: // 64-bit + Offset = 3; + break; + default: + return 0; + } + + return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset]; +} + +void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + + SDValue Ops[] = {N->getOperand(1), N->getOperand(2)}; + + SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops); + SDValue SuperReg = SDValue(WhilePair, 0); + + for (unsigned I = 0; I < 2; ++I) + ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg( + AArch64::psub0 + I, DL, VT, SuperReg)); + + CurDAG->RemoveDeadNode(N); +} + void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale, unsigned Opc_ri, unsigned Opc_rr, bool IsIntr) { @@ -4623,6 +4682,62 @@ if (tryMULLV64LaneV128(IntNo, Node)) return; break; + case Intrinsic::aarch64_sve_whilege_x2: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H, + AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D})) + SelectWhilePair(Node, Op); + return; + case Intrinsic::aarch64_sve_whilegt_x2: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H, + AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D})) + SelectWhilePair(Node, Op); + return; + case Intrinsic::aarch64_sve_whilehi_x2: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H, + AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D})) + SelectWhilePair(Node, Op); + return; + case Intrinsic::aarch64_sve_whilehs_x2: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H, + AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D})) + SelectWhilePair(Node, Op); + return; + case Intrinsic::aarch64_sve_whilele_x2: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H, + AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D})) + SelectWhilePair(Node, Op); + return; + case Intrinsic::aarch64_sve_whilelo_x2: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H, + AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D})) + SelectWhilePair(Node, Op); + return; + case Intrinsic::aarch64_sve_whilels_x2: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H, + AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D})) + SelectWhilePair(Node, Op); + return; + case Intrinsic::aarch64_sve_whilelt_x2: + if (auto Op = SelectOpcodeFromVT( + Node->getValueType(0), + {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H, + AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D})) + SelectWhilePair(Node, Op); + return; } break; } Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -287,6 +287,14 @@ case Intrinsic::aarch64_sve_fcmpge: case Intrinsic::aarch64_sve_fcmpgt: case Intrinsic::aarch64_sve_fcmpuo: + case Intrinsic::aarch64_sve_whilege_x2: + case Intrinsic::aarch64_sve_whilegt_x2: + case Intrinsic::aarch64_sve_whilehi_x2: + case Intrinsic::aarch64_sve_whilehs_x2: + case Intrinsic::aarch64_sve_whilele_x2: + case Intrinsic::aarch64_sve_whilelo_x2: + case Intrinsic::aarch64_sve_whilels_x2: + case Intrinsic::aarch64_sve_whilelt_x2: return true; } } Index: llvm/test/CodeGen/AArch64/sve2p1-intrinsics-while-pp.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve2p1-intrinsics-while-pp.ll @@ -0,0 +1,663 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64 -mattr=+sve2p1 < %s | FileCheck %s + +; == WHILEGE == + +define @whilege_x2_nxv16i1(i64 %m, i64 %n) nounwind { +; CHECK-LABEL: whilege_x2_nxv16i1: +; CHECK: // %bb.0: +; CHECK-NEXT: whilege { p0.b, p1.b }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 +; CHECK-NEXT: ret + %pp = call { , } @llvm.aarch64.sve.whilege.x2.nxv16i1(i64 %m, i64 %n) + %res = extractvalue {, } %pp, 0 + ret %res +} + +define @whilege_x2_nxv8i1(i64 %m, i64 %n) nounwind { +; CHECK-LABEL: whilege_x2_nxv8i1: +; CHECK: // %bb.0: +; CHECK-NEXT: whilege { p0.h, p1.h }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 +; CHECK-NEXT: ret + %pp = call { , } @llvm.aarch64.sve.whilege.x2.nxv8i1(i64 %m, i64 %n) + %res = extractvalue {, } %pp, 0 + ret %res +} + +define @whilege_x2_nxv4i1(i64 %m, i64 %n) nounwind { +; CHECK-LABEL: whilege_x2_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: whilege { p0.s, p1.s }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 +; CHECK-NEXT: ret + %pp = call { , } @llvm.aarch64.sve.whilege.x2.nxv4i1(i64 %m, i64 %n) + %res = extractvalue {, } %pp, 0 + ret %res +} + +define @whilege_x2_nxv2i1(i64 %m, i64 %n) nounwind { +; CHECK-LABEL: whilege_x2_nxv2i1: +; CHECK: // %bb.0: +; CHECK-NEXT: whilege { p0.d, p1.d }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 +; CHECK-NEXT: ret + %pp = call { , } @llvm.aarch64.sve.whilege.x2.nxv2i1(i64 %m, i64 %n) + %res = extractvalue {, } %pp, 0 + ret %res +} + + +; == WHILEGT == + +define @whilegt_x2_nxv16i1(i64 %m, i64 %n) nounwind { +; CHECK-LABEL: whilegt_x2_nxv16i1: +; CHECK: // %bb.0: +; CHECK-NEXT: whilegt { p0.b, p1.b }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 +; CHECK-NEXT: ret + %pp = call { , } @llvm.aarch64.sve.whilegt.x2.nxv16i1(i64 %m, i64 %n) + %res = extractvalue {, } %pp, 0 + ret %res +} + +define @whilegt_x2_nxv8i1(i64 %m, i64 %n) nounwind { +; CHECK-LABEL: whilegt_x2_nxv8i1: +; CHECK: // %bb.0: +; CHECK-NEXT: whilegt { p0.h, p1.h }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 +; CHECK-NEXT: ret + %pp = call { , } @llvm.aarch64.sve.whilegt.x2.nxv8i1(i64 %m, i64 %n) + %res = extractvalue {, } %pp, 0 + ret %res +} + +define @whilegt_x2_nxv4i1(i64 %m, i64 %n) nounwind { +; CHECK-LABEL: whilegt_x2_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: whilegt { p0.s, p1.s }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 +; CHECK-NEXT: ret + %pp = call { , } @llvm.aarch64.sve.whilegt.x2.nxv4i1(i64 %m, i64 %n) + %res = extractvalue {, } %pp, 0 + ret %res +} + +define @whilegt_x2_nxv2i1(i64 %m, i64 %n) nounwind { +; CHECK-LABEL: whilegt_x2_nxv2i1: +; CHECK: // %bb.0: +; CHECK-NEXT: whilegt { p0.d, p1.d }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 +; CHECK-NEXT: ret + %pp = call { , } @llvm.aarch64.sve.whilegt.x2.nxv2i1(i64 %m, i64 %n) + %res = extractvalue {, } %pp, 0 + ret %res +} + + +; == WHILEHI == + +define @whilehi_x2_nxv16i1(i64 %m, i64 %n) nounwind { +; CHECK-LABEL: whilehi_x2_nxv16i1: +; CHECK: // %bb.0: +; CHECK-NEXT: whilehi { p0.b, p1.b }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 +; CHECK-NEXT: ret + %pp = call { , } @llvm.aarch64.sve.whilehi.x2.nxv16i1(i64 %m, i64 %n) + %res = extractvalue {, } %pp, 0 + ret %res +} + +define @whilehi_x2_nxv8i1(i64 %m, i64 %n) nounwind { +; CHECK-LABEL: whilehi_x2_nxv8i1: +; CHECK: // %bb.0: +; CHECK-NEXT: whilehi { p0.h, p1.h }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 +; CHECK-NEXT: ret + %pp = call { , } @llvm.aarch64.sve.whilehi.x2.nxv8i1(i64 %m, i64 %n) + %res = extractvalue {, } %pp, 0 + ret %res +} + +define @whilehi_x2_nxv4i1(i64 %m, i64 %n) nounwind { +; CHECK-LABEL: whilehi_x2_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: whilehi { p0.s, p1.s }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 +; CHECK-NEXT: ret + %pp = call { , } @llvm.aarch64.sve.whilehi.x2.nxv4i1(i64 %m, i64 %n) + %res = extractvalue {, } %pp, 0 + ret %res +} + +define @whilehi_x2_nxv2i1(i64 %m, i64 %n) nounwind { +; CHECK-LABEL: whilehi_x2_nxv2i1: +; CHECK: // %bb.0: +; CHECK-NEXT: whilehi { p0.d, p1.d }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 +; CHECK-NEXT: ret + %pp = call { , } @llvm.aarch64.sve.whilehi.x2.nxv2i1(i64 %m, i64 %n) + %res = extractvalue {, } %pp, 0 + ret %res +} + + +; == WHILEHS == + +define @whilehs_x2_nxv16i1(i64 %m, i64 %n) nounwind { +; CHECK-LABEL: whilehs_x2_nxv16i1: +; CHECK: // %bb.0: +; CHECK-NEXT: whilehs { p0.b, p1.b }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 +; CHECK-NEXT: ret + %pp = call { , } @llvm.aarch64.sve.whilehs.x2.nxv16i1(i64 %m, i64 %n) + %res = extractvalue {, } %pp, 0 + ret %res +} + +define @whilehs_x2_nxv8i1(i64 %m, i64 %n) nounwind { +; CHECK-LABEL: whilehs_x2_nxv8i1: +; CHECK: // %bb.0: +; CHECK-NEXT: whilehs { p0.h, p1.h }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 +; CHECK-NEXT: ret + %pp = call { , } @llvm.aarch64.sve.whilehs.x2.nxv8i1(i64 %m, i64 %n) + %res = extractvalue {, } %pp, 0 + ret %res +} + +define @whilehs_x2_nxv4i1(i64 %m, i64 %n) nounwind { +; CHECK-LABEL: whilehs_x2_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: whilehs { p0.s, p1.s }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 +; CHECK-NEXT: ret + %pp = call { , } @llvm.aarch64.sve.whilehs.x2.nxv4i1(i64 %m, i64 %n) + %res = extractvalue {, } %pp, 0 + ret %res +} + +define @whilehs_x2_nxv2i1(i64 %m, i64 %n) nounwind { +; CHECK-LABEL: whilehs_x2_nxv2i1: +; CHECK: // %bb.0: +; CHECK-NEXT: whilehs { p0.d, p1.d }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 +; CHECK-NEXT: ret + %pp = call { , } @llvm.aarch64.sve.whilehs.x2.nxv2i1(i64 %m, i64 %n) + %res = extractvalue {, } %pp, 0 + ret %res +} + + +; == WHILELE == + +define @whilele_x2_nxv16i1(i64 %m, i64 %n) nounwind { +; CHECK-LABEL: whilele_x2_nxv16i1: +; CHECK: // %bb.0: +; CHECK-NEXT: whilele { p0.b, p1.b }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 +; CHECK-NEXT: ret + %pp = call { , } @llvm.aarch64.sve.whilele.x2.nxv16i1(i64 %m, i64 %n) + %res = extractvalue {, } %pp, 0 + ret %res +} + +define @whilele_x2_nxv8i1(i64 %m, i64 %n) nounwind { +; CHECK-LABEL: whilele_x2_nxv8i1: +; CHECK: // %bb.0: +; CHECK-NEXT: whilele { p0.h, p1.h }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 +; CHECK-NEXT: ret + %pp = call { , } @llvm.aarch64.sve.whilele.x2.nxv8i1(i64 %m, i64 %n) + %res = extractvalue {, } %pp, 0 + ret %res +} + +define @whilele_x2_nxv4i1(i64 %m, i64 %n) nounwind { +; CHECK-LABEL: whilele_x2_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: whilele { p0.s, p1.s }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 +; CHECK-NEXT: ret + %pp = call { , } @llvm.aarch64.sve.whilele.x2.nxv4i1(i64 %m, i64 %n) + %res = extractvalue {, } %pp, 0 + ret %res +} + +define @whilele_x2_nxv2i1(i64 %m, i64 %n) nounwind { +; CHECK-LABEL: whilele_x2_nxv2i1: +; CHECK: // %bb.0: +; CHECK-NEXT: whilele { p0.d, p1.d }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 +; CHECK-NEXT: ret + %pp = call { , } @llvm.aarch64.sve.whilele.x2.nxv2i1(i64 %m, i64 %n) + %res = extractvalue {, } %pp, 0 + ret %res +} + + +; == WHILELO == + +define @whilelo_x2_nxv16i1(i64 %m, i64 %n) nounwind { +; CHECK-LABEL: whilelo_x2_nxv16i1: +; CHECK: // %bb.0: +; CHECK-NEXT: whilelo { p0.b, p1.b }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 +; CHECK-NEXT: ret + %pp = call { , } @llvm.aarch64.sve.whilelo.x2.nxv16i1(i64 %m, i64 %n) + %res = extractvalue {, } %pp, 0 + ret %res +} + +define @whilelo_x2_nxv8i1(i64 %m, i64 %n) nounwind { +; CHECK-LABEL: whilelo_x2_nxv8i1: +; CHECK: // %bb.0: +; CHECK-NEXT: whilelo { p0.h, p1.h }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 +; CHECK-NEXT: ret + %pp = call { , } @llvm.aarch64.sve.whilelo.x2.nxv8i1(i64 %m, i64 %n) + %res = extractvalue {, } %pp, 0 + ret %res +} + +define @whilelo_x2_nxv4i1(i64 %m, i64 %n) nounwind { +; CHECK-LABEL: whilelo_x2_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: whilelo { p0.s, p1.s }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 +; CHECK-NEXT: ret + %pp = call { , } @llvm.aarch64.sve.whilelo.x2.nxv4i1(i64 %m, i64 %n) + %res = extractvalue {, } %pp, 0 + ret %res +} + +define @whilelo_x2_nxv2i1(i64 %m, i64 %n) nounwind { +; CHECK-LABEL: whilelo_x2_nxv2i1: +; CHECK: // %bb.0: +; CHECK-NEXT: whilelo { p0.d, p1.d }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 +; CHECK-NEXT: ret + %pp = call { , } @llvm.aarch64.sve.whilelo.x2.nxv2i1(i64 %m, i64 %n) + %res = extractvalue {, } %pp, 0 + ret %res +} + + +; == WHILELS == + +define @whilels_x2_nxv16i1(i64 %m, i64 %n) nounwind { +; CHECK-LABEL: whilels_x2_nxv16i1: +; CHECK: // %bb.0: +; CHECK-NEXT: whilels { p0.b, p1.b }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 +; CHECK-NEXT: ret + %pp = call { , } @llvm.aarch64.sve.whilels.x2.nxv16i1(i64 %m, i64 %n) + %res = extractvalue {, } %pp, 0 + ret %res +} + +define @whilels_x2_nxv8i1(i64 %m, i64 %n) nounwind { +; CHECK-LABEL: whilels_x2_nxv8i1: +; CHECK: // %bb.0: +; CHECK-NEXT: whilels { p0.h, p1.h }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 +; CHECK-NEXT: ret + %pp = call { , } @llvm.aarch64.sve.whilels.x2.nxv8i1(i64 %m, i64 %n) + %res = extractvalue {, } %pp, 0 + ret %res +} + +define @whilels_x2_nxv4i1(i64 %m, i64 %n) nounwind { +; CHECK-LABEL: whilels_x2_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: whilels { p0.s, p1.s }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 +; CHECK-NEXT: ret + %pp = call { , } @llvm.aarch64.sve.whilels.x2.nxv4i1(i64 %m, i64 %n) + %res = extractvalue {, } %pp, 0 + ret %res +} + +define @whilels_x2_nxv2i1(i64 %m, i64 %n) nounwind { +; CHECK-LABEL: whilels_x2_nxv2i1: +; CHECK: // %bb.0: +; CHECK-NEXT: whilels { p0.d, p1.d }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 +; CHECK-NEXT: ret + %pp = call { , } @llvm.aarch64.sve.whilels.x2.nxv2i1(i64 %m, i64 %n) + %res = extractvalue {, } %pp, 0 + ret %res +} + + +; == WHILELT == + +define @whilelt_x2_nxv16i1(i64 %m, i64 %n) nounwind { +; CHECK-LABEL: whilelt_x2_nxv16i1: +; CHECK: // %bb.0: +; CHECK-NEXT: whilelt { p0.b, p1.b }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 +; CHECK-NEXT: ret + %pp = call { , } @llvm.aarch64.sve.whilelt.x2.nxv16i1(i64 %m, i64 %n) + %res = extractvalue {, } %pp, 0 + ret %res +} + +define @whilelt_x2_nxv8i1(i64 %m, i64 %n) nounwind { +; CHECK-LABEL: whilelt_x2_nxv8i1: +; CHECK: // %bb.0: +; CHECK-NEXT: whilelt { p0.h, p1.h }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 +; CHECK-NEXT: ret + %pp = call { , } @llvm.aarch64.sve.whilelt.x2.nxv8i1(i64 %m, i64 %n) + %res = extractvalue {, } %pp, 0 + ret %res +} + +define @whilelt_x2_nxv4i1(i64 %m, i64 %n) nounwind { +; CHECK-LABEL: whilelt_x2_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: whilelt { p0.s, p1.s }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 +; CHECK-NEXT: ret + %pp = call { , } @llvm.aarch64.sve.whilelt.x2.nxv4i1(i64 %m, i64 %n) + %res = extractvalue {, } %pp, 0 + ret %res +} + +define @whilelt_x2_nxv2i1(i64 %m, i64 %n) nounwind { +; CHECK-LABEL: whilelt_x2_nxv2i1: +; CHECK: // %bb.0: +; CHECK-NEXT: whilelt { p0.d, p1.d }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 +; CHECK-NEXT: ret + %pp = call { , } @llvm.aarch64.sve.whilelt.x2.nxv2i1(i64 %m, i64 %n) + %res = extractvalue {, } %pp, 0 + ret %res +} + + +; Test that we get good code quality when using while in combination with other intrinsics + +define @codegen_whilege_b16_x2(i64 noundef %op1, i64 noundef %op2) nounwind { +; CHECK-LABEL: codegen_whilege_b16_x2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: whilege { p0.h, p1.h }, x0, x1 +; CHECK-NEXT: ret +entry: + %0 = tail call { , } @llvm.aarch64.sve.whilege.x2.nxv8i1(i64 %op1, i64 %op2) + %1 = extractvalue { , } %0, 0 + %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %1) + %3 = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, %2, i64 0) + %4 = extractvalue { , } %0, 1 + %5 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %4) + %6 = tail call @llvm.vector.insert.nxv32i1.nxv16i1( %3, %5, i64 16) + ret %6 +} + +define @codegen_whilegt_b32_x2(i64 noundef %op1, i64 noundef %op2) nounwind { +; CHECK-LABEL: codegen_whilegt_b32_x2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: whilegt { p0.s, p1.s }, x0, x1 +; CHECK-NEXT: ret +entry: + %0 = tail call { , } @llvm.aarch64.sve.whilegt.x2.nxv4i1(i64 %op1, i64 %op2) + %1 = extractvalue { , } %0, 0 + %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %1) + %3 = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, %2, i64 0) + %4 = extractvalue { , } %0, 1 + %5 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %4) + %6 = tail call @llvm.vector.insert.nxv32i1.nxv16i1( %3, %5, i64 16) + ret %6 +} + +define @codegen_whilehi_b64_x2(i64 noundef %op1, i64 noundef %op2) nounwind { +; CHECK-LABEL: codegen_whilehi_b64_x2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: whilehi { p0.d, p1.d }, x0, x1 +; CHECK-NEXT: ret +entry: + %0 = tail call { , } @llvm.aarch64.sve.whilehi.x2.nxv2i1(i64 %op1, i64 %op2) + %1 = extractvalue { , } %0, 0 + %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %1) + %3 = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, %2, i64 0) + %4 = extractvalue { , } %0, 1 + %5 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %4) + %6 = tail call @llvm.vector.insert.nxv32i1.nxv16i1( %3, %5, i64 16) + ret %6 +} + +define @codegen_whilehs_b16_x2(i64 noundef %op1, i64 noundef %op2) nounwind { +; CHECK-LABEL: codegen_whilehs_b16_x2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: whilehs { p0.h, p1.h }, x0, x1 +; CHECK-NEXT: ret +entry: + %0 = tail call { , } @llvm.aarch64.sve.whilehs.x2.nxv8i1(i64 %op1, i64 %op2) + %1 = extractvalue { , } %0, 0 + %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %1) + %3 = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, %2, i64 0) + %4 = extractvalue { , } %0, 1 + %5 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %4) + %6 = tail call @llvm.vector.insert.nxv32i1.nxv16i1( %3, %5, i64 16) + ret %6 +} + +define @codegen_whilele_b32_x2(i64 noundef %op1, i64 noundef %op2) nounwind { +; CHECK-LABEL: codegen_whilele_b32_x2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: whilele { p0.s, p1.s }, x0, x1 +; CHECK-NEXT: ret +entry: + %0 = tail call { , } @llvm.aarch64.sve.whilele.x2.nxv4i1(i64 %op1, i64 %op2) + %1 = extractvalue { , } %0, 0 + %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %1) + %3 = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, %2, i64 0) + %4 = extractvalue { , } %0, 1 + %5 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %4) + %6 = tail call @llvm.vector.insert.nxv32i1.nxv16i1( %3, %5, i64 16) + ret %6 +} + +define @codegen_whilelo_b64_x2(i64 noundef %op1, i64 noundef %op2) nounwind { +; CHECK-LABEL: codegen_whilelo_b64_x2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: whilelo { p0.d, p1.d }, x0, x1 +; CHECK-NEXT: ret +entry: + %0 = tail call { , } @llvm.aarch64.sve.whilelo.x2.nxv2i1(i64 %op1, i64 %op2) + %1 = extractvalue { , } %0, 0 + %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %1) + %3 = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, %2, i64 0) + %4 = extractvalue { , } %0, 1 + %5 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( %4) + %6 = tail call @llvm.vector.insert.nxv32i1.nxv16i1( %3, %5, i64 16) + ret %6 +} + +define @codegen_whilels_b16_x2(i64 noundef %op1, i64 noundef %op2) nounwind { +; CHECK-LABEL: codegen_whilels_b16_x2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: whilels { p0.h, p1.h }, x0, x1 +; CHECK-NEXT: ret +entry: + %0 = tail call { , } @llvm.aarch64.sve.whilels.x2.nxv8i1(i64 %op1, i64 %op2) + %1 = extractvalue { , } %0, 0 + %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %1) + %3 = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, %2, i64 0) + %4 = extractvalue { , } %0, 1 + %5 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %4) + %6 = tail call @llvm.vector.insert.nxv32i1.nxv16i1( %3, %5, i64 16) + ret %6 +} + +define @codegen_whilelt_b32_x2(i64 noundef %op1, i64 noundef %op2) nounwind { +; CHECK-LABEL: codegen_whilelt_b32_x2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: whilelt { p0.s, p1.s }, x0, x1 +; CHECK-NEXT: ret +entry: + %0 = tail call { , } @llvm.aarch64.sve.whilelt.x2.nxv4i1(i64 %op1, i64 %op2) + %1 = extractvalue { , } %0, 0 + %2 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %1) + %3 = tail call @llvm.vector.insert.nxv32i1.nxv16i1( poison, %2, i64 0) + %4 = extractvalue { , } %0, 1 + %5 = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( %4) + %6 = tail call @llvm.vector.insert.nxv32i1.nxv16i1( %3, %5, i64 16) + ret %6 +} + + +; == Test that we use predicate registers starting at a multiple of 2 == + +define @whilege_x2_nxv16i1_reg_off( %p0, i64 %m, i64 %n) nounwind { +; CHECK-LABEL: whilege_x2_nxv16i1_reg_off: +; CHECK: // %bb.0: +; CHECK-NEXT: whilege { p2.b, p3.b }, x0, x1 +; CHECK-NEXT: and p0.b, p2/z, p2.b, p0.b +; CHECK-NEXT: ret + %pp = call { , } @llvm.aarch64.sve.whilege.x2.nxv16i1(i64 %m, i64 %n) + %part1 = extractvalue {, } %pp, 0 + %res = and %part1, %p0 + ret %res +} + +define @whilegt_x2_nxv8i1_reg_off( %p0, i64 %m, i64 %n) nounwind { +; CHECK-LABEL: whilegt_x2_nxv8i1_reg_off: +; CHECK: // %bb.0: +; CHECK-NEXT: whilegt { p2.h, p3.h }, x0, x1 +; CHECK-NEXT: and p0.b, p2/z, p2.b, p0.b +; CHECK-NEXT: ret + %pp = call { , } @llvm.aarch64.sve.whilegt.x2.nxv8i1(i64 %m, i64 %n) + %part1 = extractvalue {, } %pp, 0 + %res = and %part1, %p0 + ret %res +} + +define @whilehi_x2_nxv4i1_reg_off( %p0, i64 %m, i64 %n) nounwind { +; CHECK-LABEL: whilehi_x2_nxv4i1_reg_off: +; CHECK: // %bb.0: +; CHECK-NEXT: whilehi { p2.s, p3.s }, x0, x1 +; CHECK-NEXT: and p0.b, p2/z, p2.b, p0.b +; CHECK-NEXT: ret + %pp = call { , } @llvm.aarch64.sve.whilehi.x2.nxv4i1(i64 %m, i64 %n) + %part1 = extractvalue {, } %pp, 0 + %res = and %part1, %p0 + ret %res +} + +define @whilehs_x2_nxv2i1_reg_off( %p0, i64 %m, i64 %n) nounwind { +; CHECK-LABEL: whilehs_x2_nxv2i1_reg_off: +; CHECK: // %bb.0: +; CHECK-NEXT: whilehs { p2.d, p3.d }, x0, x1 +; CHECK-NEXT: and p0.b, p2/z, p2.b, p0.b +; CHECK-NEXT: ret + %pp = call { , } @llvm.aarch64.sve.whilehs.x2.nxv2i1(i64 %m, i64 %n) + %part1 = extractvalue {, } %pp, 0 + %res = and %part1, %p0 + ret %res +} + +define @whilele_x2_nxv16i1_reg_off( %p0, i64 %m, i64 %n) nounwind { +; CHECK-LABEL: whilele_x2_nxv16i1_reg_off: +; CHECK: // %bb.0: +; CHECK-NEXT: whilele { p2.b, p3.b }, x0, x1 +; CHECK-NEXT: and p0.b, p2/z, p2.b, p0.b +; CHECK-NEXT: ret + %pp = call { , } @llvm.aarch64.sve.whilele.x2.nxv16i1(i64 %m, i64 %n) + %part1 = extractvalue {, } %pp, 0 + %res = and %part1, %p0 + ret %res +} + +define @whilelo_x2_nxv8i1_reg_off( %p0, i64 %m, i64 %n) nounwind { +; CHECK-LABEL: whilelo_x2_nxv8i1_reg_off: +; CHECK: // %bb.0: +; CHECK-NEXT: whilelo { p2.h, p3.h }, x0, x1 +; CHECK-NEXT: and p0.b, p2/z, p2.b, p0.b +; CHECK-NEXT: ret + %pp = call { , } @llvm.aarch64.sve.whilelo.x2.nxv8i1(i64 %m, i64 %n) + %part1 = extractvalue {, } %pp, 0 + %res = and %part1, %p0 + ret %res +} + +define @whilels_x2_nxv4i1_reg_off( %p0, i64 %m, i64 %n) nounwind { +; CHECK-LABEL: whilels_x2_nxv4i1_reg_off: +; CHECK: // %bb.0: +; CHECK-NEXT: whilels { p2.s, p3.s }, x0, x1 +; CHECK-NEXT: and p0.b, p2/z, p2.b, p0.b +; CHECK-NEXT: ret + %pp = call { , } @llvm.aarch64.sve.whilels.x2.nxv4i1(i64 %m, i64 %n) + %part1 = extractvalue {, } %pp, 0 + %res = and %part1, %p0 + ret %res +} + +define @whilelt_x2_nxv2i1_reg_off( %p0, i64 %m, i64 %n) nounwind { +; CHECK-LABEL: whilelt_x2_nxv2i1_reg_off: +; CHECK: // %bb.0: +; CHECK-NEXT: whilelt { p2.d, p3.d }, x0, x1 +; CHECK-NEXT: and p0.b, p2/z, p2.b, p0.b +; CHECK-NEXT: ret + %pp = call { , } @llvm.aarch64.sve.whilelt.x2.nxv2i1(i64 %m, i64 %n) + %part1 = extractvalue {, } %pp, 0 + %res = and %part1, %p0 + ret %res +} + +; == WHILEGE == +declare { , } @llvm.aarch64.sve.whilege.x2.nxv16i1(i64, i64) +declare { , } @llvm.aarch64.sve.whilege.x2.nxv8i1(i64, i64) +declare { , } @llvm.aarch64.sve.whilege.x2.nxv4i1(i64, i64) +declare { , } @llvm.aarch64.sve.whilege.x2.nxv2i1(i64, i64) + +; == WHILEGT == +declare { , } @llvm.aarch64.sve.whilegt.x2.nxv16i1(i64, i64) +declare { , } @llvm.aarch64.sve.whilegt.x2.nxv8i1(i64, i64) +declare { , } @llvm.aarch64.sve.whilegt.x2.nxv4i1(i64, i64) +declare { , } @llvm.aarch64.sve.whilegt.x2.nxv2i1(i64, i64) + +; == WHILEHI == +declare { , } @llvm.aarch64.sve.whilehi.x2.nxv16i1(i64, i64) +declare { , } @llvm.aarch64.sve.whilehi.x2.nxv8i1(i64, i64) +declare { , } @llvm.aarch64.sve.whilehi.x2.nxv4i1(i64, i64) +declare { , } @llvm.aarch64.sve.whilehi.x2.nxv2i1(i64, i64) + +; == WHILEHS == +declare { , } @llvm.aarch64.sve.whilehs.x2.nxv16i1(i64, i64) +declare { , } @llvm.aarch64.sve.whilehs.x2.nxv8i1(i64, i64) +declare { , } @llvm.aarch64.sve.whilehs.x2.nxv4i1(i64, i64) +declare { , } @llvm.aarch64.sve.whilehs.x2.nxv2i1(i64, i64) + +; == WHILELE == +declare { , } @llvm.aarch64.sve.whilele.x2.nxv16i1(i64, i64) +declare { , } @llvm.aarch64.sve.whilele.x2.nxv8i1(i64, i64) +declare { , } @llvm.aarch64.sve.whilele.x2.nxv4i1(i64, i64) +declare { , } @llvm.aarch64.sve.whilele.x2.nxv2i1(i64, i64) + +; == WHILELO == +declare { , } @llvm.aarch64.sve.whilelo.x2.nxv16i1(i64, i64) +declare { , } @llvm.aarch64.sve.whilelo.x2.nxv8i1(i64, i64) +declare { , } @llvm.aarch64.sve.whilelo.x2.nxv4i1(i64, i64) +declare { , } @llvm.aarch64.sve.whilelo.x2.nxv2i1(i64, i64) + +; == WHILELS == +declare { , } @llvm.aarch64.sve.whilels.x2.nxv16i1(i64, i64) +declare { , } @llvm.aarch64.sve.whilels.x2.nxv8i1(i64, i64) +declare { , } @llvm.aarch64.sve.whilels.x2.nxv4i1(i64, i64) +declare { , } @llvm.aarch64.sve.whilels.x2.nxv2i1(i64, i64) + +; == WHILELT == +declare { , } @llvm.aarch64.sve.whilelt.x2.nxv16i1(i64, i64) +declare { , } @llvm.aarch64.sve.whilelt.x2.nxv8i1(i64, i64) +declare { , } @llvm.aarch64.sve.whilelt.x2.nxv4i1(i64, i64) +declare { , } @llvm.aarch64.sve.whilelt.x2.nxv2i1(i64, i64) + +; == SVBOOL CONVERSION == +declare @llvm.aarch64.sve.convert.to.svbool.nxv2i1() +declare @llvm.aarch64.sve.convert.to.svbool.nxv4i1() +declare @llvm.aarch64.sve.convert.to.svbool.nxv8i1() + +; == VECTOR INSERTS == +declare @llvm.vector.insert.nxv32i1.nxv16i1(, , i64 immarg)