diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -11372,9 +11372,10 @@ // Stop if more than one members are non-undef. if (NumDefs > 1) break; + VTs.push_back(EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), - X.getValueType().getVectorNumElements())); + X.getValueType().getVectorElementCount())); } if (NumDefs == 0) @@ -18794,6 +18795,11 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); EVT OpVT = N->getOperand(0).getValueType(); + + // We currently can't generate an appropriate shuffle for a scalable vector. + if (VT.isScalableVector()) + return SDValue(); + int NumElts = VT.getVectorNumElements(); int NumOpElts = OpVT.getVectorNumElements(); @@ -19054,11 +19060,14 @@ return V; // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR - // nodes often generate nop CONCAT_VECTOR nodes. - // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that - // place the incoming vectors at the exact same location. + // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR + // operands and look for a CONCAT operations that place the incoming vectors + // at the exact same location. + // + // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled. SDValue SingleSource = SDValue(); - unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements(); + unsigned PartNumElem = + N->getOperand(0).getValueType().getVectorElementCount().Min; for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { SDValue Op = N->getOperand(i); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2151,7 +2151,7 @@ EVT InVT = Lo.getValueType(); EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(), - InVT.getVectorNumElements()); + InVT.getVectorElementCount()); if (N->isStrictFPOpcode()) { Lo = DAG.getNode(N->getOpcode(), dl, { OutVT, MVT::Other }, @@ -2559,12 +2559,12 @@ SDValue InVec = N->getOperand(OpNo); EVT InVT = InVec->getValueType(0); EVT OutVT = N->getValueType(0); - unsigned NumElements = OutVT.getVectorNumElements(); + ElementCount NumElements = OutVT.getVectorElementCount(); bool IsFloat = OutVT.isFloatingPoint(); // Widening should have already made sure this is a power-two vector // if we're trying to split it at all. assert() that's true, just in case. - assert(!(NumElements & 1) && "Splitting vector, but not in half!"); + assert(!(NumElements.Min & 1) && "Splitting vector, but not in half!"); unsigned InElementSize = InVT.getScalarSizeInBits(); unsigned OutElementSize = OutVT.getScalarSizeInBits(); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -932,8 +932,11 @@ setOperationAction(ISD::SHL, VT, Custom); setOperationAction(ISD::SRL, VT, Custom); setOperationAction(ISD::SRA, VT, Custom); - if (VT.getScalarType() == MVT::i1) + if (VT.getScalarType() == MVT::i1) { setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::TRUNCATE, VT, Custom); + setOperationAction(ISD::CONCAT_VECTORS, VT, Legal); + } } } @@ -8858,6 +8861,16 @@ SelectionDAG &DAG) const { EVT VT = Op.getValueType(); + if (VT.getScalarType() == MVT::i1) { + // Lower i1 truncate to `(x & 1) != 0`. + SDLoc dl(Op); + EVT OpVT = Op.getOperand(0).getValueType(); + SDValue Zero = DAG.getConstant(0, dl, OpVT); + SDValue One = DAG.getConstant(1, dl, OpVT); + SDValue And = DAG.getNode(ISD::AND, dl, OpVT, Op.getOperand(0), One); + return DAG.getSetCC(dl, VT, And, Zero, ISD::SETNE); + } + if (!VT.isVector() || VT.isScalableVector()) return Op; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1109,6 +1109,13 @@ defm TRN1_PPP : sve_int_perm_bin_perm_pp<0b100, "trn1", AArch64trn1>; defm TRN2_PPP : sve_int_perm_bin_perm_pp<0b101, "trn2", AArch64trn2>; + def : Pat<(nxv4i1 (concat_vectors nxv2i1:$p1, nxv2i1:$p2)), + (UZP1_PPP_S $p1, $p2)>; + def : Pat<(nxv8i1 (concat_vectors nxv4i1:$p1, nxv4i1:$p2)), + (UZP1_PPP_H $p1, $p2)>; + def : Pat<(nxv16i1 (concat_vectors nxv8i1:$p1, nxv8i1:$p2)), + (UZP1_PPP_B $p1, $p2)>; + defm CMPHS_PPzZZ : sve_int_cmp_0<0b000, "cmphs", SETUGE, SETULE>; defm CMPHI_PPzZZ : sve_int_cmp_0<0b001, "cmphi", SETUGT, SETULT>; defm CMPGE_PPzZZ : sve_int_cmp_0<0b100, "cmpge", SETGE, SETLE>; diff --git a/llvm/test/CodeGen/AArch64/sve-trunc.ll b/llvm/test/CodeGen/AArch64/sve-trunc.ll --- a/llvm/test/CodeGen/AArch64/sve-trunc.ll +++ b/llvm/test/CodeGen/AArch64/sve-trunc.ll @@ -59,3 +59,123 @@ %out = trunc %in to ret %out } + +; Truncating to i1 requires convert it to a cmp + +define @trunc_i64toi1( %in) { +; CHECK-LABEL: trunc_i64toi1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: and z0.d, z0.d, #0x1 +; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 +; CHECK-NEXT: ret +entry: + %out = trunc %in to + ret %out +} + +define @trunc_i64toi1_split( %in) { +; CHECK-LABEL: trunc_i64toi1_split: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: and z1.d, z1.d, #0x1 +; CHECK-NEXT: and z0.d, z0.d, #0x1 +; CHECK-NEXT: cmpne p1.d, p0/z, z1.d, #0 +; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 +; CHECK-NEXT: uzp1 p0.s, p0.s, p1.s +; CHECK-NEXT: ret +entry: + %out = trunc %in to + ret %out +} + +define @trunc_i64toi1_split2( %in) { +; CHECK-LABEL: trunc_i64toi1_split2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: and z3.d, z3.d, #0x1 +; CHECK-NEXT: and z2.d, z2.d, #0x1 +; CHECK-NEXT: and z1.d, z1.d, #0x1 +; CHECK-NEXT: and z0.d, z0.d, #0x1 +; CHECK-NEXT: cmpne p1.d, p0/z, z3.d, #0 +; CHECK-NEXT: cmpne p2.d, p0/z, z2.d, #0 +; CHECK-NEXT: uzp1 p1.s, p2.s, p1.s +; CHECK-NEXT: cmpne p2.d, p0/z, z1.d, #0 +; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 +; CHECK-NEXT: uzp1 p0.s, p0.s, p2.s +; CHECK-NEXT: uzp1 p0.h, p0.h, p1.h +; CHECK-NEXT: ret +entry: + %out = trunc %in to + ret %out +} + +define @trunc_i64toi1_split3( %in) { +; CHECK-LABEL: trunc_i64toi1_split3: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: and z7.d, z7.d, #0x1 +; CHECK-NEXT: and z6.d, z6.d, #0x1 +; CHECK-NEXT: and z5.d, z5.d, #0x1 +; CHECK-NEXT: and z4.d, z4.d, #0x1 +; CHECK-NEXT: and z3.d, z3.d, #0x1 +; CHECK-NEXT: and z2.d, z2.d, #0x1 +; CHECK-NEXT: cmpne p1.d, p0/z, z7.d, #0 +; CHECK-NEXT: cmpne p2.d, p0/z, z6.d, #0 +; CHECK-NEXT: cmpne p3.d, p0/z, z5.d, #0 +; CHECK-NEXT: cmpne p4.d, p0/z, z4.d, #0 +; CHECK-NEXT: and z1.d, z1.d, #0x1 +; CHECK-NEXT: and z0.d, z0.d, #0x1 +; CHECK-NEXT: uzp1 p1.s, p2.s, p1.s +; CHECK-NEXT: cmpne p2.d, p0/z, z3.d, #0 +; CHECK-NEXT: uzp1 p3.s, p4.s, p3.s +; CHECK-NEXT: cmpne p4.d, p0/z, z2.d, #0 +; CHECK-NEXT: uzp1 p2.s, p4.s, p2.s +; CHECK-NEXT: cmpne p4.d, p0/z, z1.d, #0 +; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 +; CHECK-NEXT: uzp1 p0.s, p0.s, p4.s +; CHECK-NEXT: uzp1 p1.h, p3.h, p1.h +; CHECK-NEXT: uzp1 p0.h, p0.h, p2.h +; CHECK-NEXT: uzp1 p0.b, p0.b, p1.b +; CHECK-NEXT: ret +entry: + %out = trunc %in to + ret %out +} + + +define @trunc_i32toi1( %in) { +; CHECK-LABEL: trunc_i32toi1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: and z0.s, z0.s, #0x1 +; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0 +; CHECK-NEXT: ret +entry: + %out = trunc %in to + ret %out +} + +define @trunc_i16toi1( %in) { +; CHECK-LABEL: trunc_i16toi1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: and z0.h, z0.h, #0x1 +; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 +; CHECK-NEXT: ret +entry: + %out = trunc %in to + ret %out +} + +define @trunc_i8toi1( %in) { +; CHECK-LABEL: trunc_i8toi1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: and z0.b, z0.b, #0x1 +; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: ret +entry: + %out = trunc %in to + ret %out +}