Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -12195,6 +12195,63 @@ return true; } +static inline EVT getNaturalIntSVETypeWithMatchingElementCount(EVT VT) { + if (!VT.isScalableVector()) + return EVT(); + + switch (VT.getVectorMinNumElements()) { + default: + return EVT(); + case 16: + return VT.changeVectorElementType(MVT::i8); + case 8: + return VT.changeVectorElementType(MVT::i16); + case 4: + return VT.changeVectorElementType(MVT::i32); + case 2: + return VT.changeVectorElementType(MVT::i64); + } +} + +static inline EVT getNaturalIntSVETypeWithMatchingElementType(EVT VT) { + if (!VT.isScalableVector()) + return EVT(); + + switch (VT.getVectorElementType().getSimpleVT().SimpleTy) { + default: + return EVT(); + case MVT::i8: + return MVT::nxv16i8; + case MVT::i16: + return MVT::nxv8i16; + case MVT::i32: + return MVT::nxv4i32; + case MVT::i64: + return MVT::nxv2i64; + } +} + +static inline EVT getNaturalPredSVETypeWithMatchingElementType(EVT VT) { + if (!VT.isScalableVector()) + return EVT(); + + switch (VT.getVectorElementType().getSimpleVT().SimpleTy) { + default: + return EVT(); + case MVT::i8: + return MVT::nxv16i1; + case MVT::f16: + case MVT::i16: + return MVT::nxv8i1; + case MVT::f32: + case MVT::i32: + return MVT::nxv4i1; + case MVT::f64: + case MVT::i64: + return MVT::nxv2i1; + } +} + /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as /// MemIntrinsicNodes. The associated MachineMemOperands record the alignment /// specified in the intrinsic calls. @@ -16000,6 +16057,33 @@ SDValue Pred = N->getOperand(1); SDValue VecToReduce = N->getOperand(2); + EVT DataVT = VecToReduce.getValueType(); + + // Bitwise OR/AND reductions of i1s can be expressed using a PTEST. + if (SDValue Res = tryLowerPredTestReduction(N, Opc, DAG)) + return Res; + + if (DataVT.getSizeInBits().getKnownMinSize() < AArch64::SVEBitsPerBlock) { + // The following does no real work but will allow instruction selection. + + // Promote the element type. + EVT VT1 = getNaturalIntSVETypeWithMatchingElementCount(DataVT); + VecToReduce = DAG.getNode(ISD::ANY_EXTEND, DL, VT1, VecToReduce); + + // Cast back to the original element type. + EVT VT2 = getNaturalIntSVETypeWithMatchingElementType(DataVT); + VecToReduce = DAG.getNode(ISD::BITCAST, DL, VT2, VecToReduce); + + // Cast predicate to match the original element type. + EVT VT3 = getNaturalPredSVETypeWithMatchingElementType(DataVT); + Pred = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT3, Pred); + + DataVT = VecToReduce.getValueType(); + } + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.isTypeLegal(DataVT)) + return SDValue(); // NOTE: The integer reduction's result type is not always linked to the // operand's element type so we construct it from the intrinsic's result type. Index: llvm/test/CodeGen/AArch64/sve-across.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-across.ll @@ -0,0 +1,16 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s + +define i8 @andv_nxv2i8( %pg, %a) { +; CHECK-LABEL: andv_nxv2i8: +; CHECK: // %bb.0: +; CHECK-NEXT: pfalse p0.b +; CHECK-NEXT: mov z0.d, #0 // =0x0 +; CHECK-NEXT: andv b0, p0, z0.b +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret + %res = call i8 @llvm.aarch64.sve.andv.nxv2i8( zeroinitializer, zeroinitializer) + ret i8 %res +} + +declare i8 @llvm.aarch64.sve.andv.nxv2i8(, )