diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2259,16 +2259,40 @@ SDLoc dl(N); SDValue Op = PromoteIntOpVectorReduction(N, N->getOperand(0)); - EVT EltVT = Op.getValueType().getVectorElementType(); - EVT VT = N->getValueType(0); + EVT OrigEltVT = N->getOperand(0).getValueType().getVectorElementType(); + EVT InVT = Op.getValueType(); + EVT EltVT = InVT.getVectorElementType(); + EVT ResVT = N->getValueType(0); + unsigned Opcode = N->getOpcode(); - if (VT.bitsGE(EltVT)) - return DAG.getNode(N->getOpcode(), SDLoc(N), VT, Op); + // An i1 vecreduce_xor is equivalent to vecreduce_add, use that instead if + // vecreduce_xor is not legal + if (Opcode == ISD::VECREDUCE_XOR && OrigEltVT == MVT::i1 && + !TLI.isOperationLegalOrCustom(ISD::VECREDUCE_XOR, InVT) && + TLI.isOperationLegalOrCustom(ISD::VECREDUCE_ADD, InVT)) + Opcode = ISD::VECREDUCE_ADD; + + // An i1 vecreduce_or is equivalent to vecreduce_umax, use that instead if + // vecreduce_or is not legal + else if (Opcode == ISD::VECREDUCE_OR && OrigEltVT == MVT::i1 && + !TLI.isOperationLegalOrCustom(ISD::VECREDUCE_OR, InVT) && + TLI.isOperationLegalOrCustom(ISD::VECREDUCE_UMAX, InVT)) + Opcode = ISD::VECREDUCE_UMAX; + + // An i1 vecreduce_and is equivalent to vecreduce_umin, use that instead if + // vecreduce_and is not legal + else if (Opcode == ISD::VECREDUCE_AND && OrigEltVT == MVT::i1 && + !TLI.isOperationLegalOrCustom(ISD::VECREDUCE_AND, InVT) && + TLI.isOperationLegalOrCustom(ISD::VECREDUCE_UMIN, InVT)) + Opcode = ISD::VECREDUCE_UMIN; + + if (ResVT.bitsGE(EltVT)) + return DAG.getNode(Opcode, SDLoc(N), ResVT, Op); // Result size must be >= element size. If this is not the case after // promotion, also promote the result type and then truncate. - SDValue Reduce = DAG.getNode(N->getOpcode(), dl, EltVT, Op); - return DAG.getNode(ISD::TRUNCATE, dl, VT, Reduce); + SDValue Reduce = DAG.getNode(Opcode, dl, EltVT, Op); + return DAG.getNode(ISD::TRUNCATE, dl, ResVT, Reduce); } SDValue DAGTypeLegalizer::PromoteIntOp_VP_REDUCE(SDNode *N, unsigned OpNo) { diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -14863,16 +14863,6 @@ if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) return SDValue(); - // Although NEON has no EORV instruction, when only the least significant bit - // is required the operation is synonymous with ADDV. - if (LHS.getOpcode() == ISD::VECREDUCE_XOR && isOneConstant(RHS) && - LHS.getOperand(0).getValueType().isFixedLengthVector() && - LHS.hasOneUse()) { - SDLoc DL(N); - SDValue ADDV = DAG.getNode(ISD::VECREDUCE_ADD, DL, VT, LHS.getOperand(0)); - return DAG.getNode(ISD::AND, DL, VT, ADDV, RHS); - } - if (VT.isScalableVector()) return performSVEAndCombine(N, DCI); diff --git a/llvm/test/CodeGen/AArch64/reduce-and.ll b/llvm/test/CodeGen/AArch64/reduce-and.ll --- a/llvm/test/CodeGen/AArch64/reduce-and.ll +++ b/llvm/test/CodeGen/AArch64/reduce-and.ll @@ -20,10 +20,8 @@ define i1 @test_redand_v2i1(<2 x i1> %a) { ; CHECK-LABEL: test_redand_v2i1: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: and w8, w9, w8 +; CHECK-NEXT: uminp v0.2s, v0.2s, v0.2s +; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret ; @@ -43,14 +41,8 @@ define i1 @test_redand_v4i1(<4 x i1> %a) { ; CHECK-LABEL: test_redand_v4i1: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: umov w8, v0.h[1] -; CHECK-NEXT: umov w9, v0.h[0] -; CHECK-NEXT: umov w10, v0.h[2] -; CHECK-NEXT: umov w11, v0.h[3] -; CHECK-NEXT: and w8, w9, w8 -; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: and w8, w8, w11 +; CHECK-NEXT: uminv h0, v0.4h +; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret ; @@ -76,22 +68,8 @@ define i1 @test_redand_v8i1(<8 x i1> %a) { ; CHECK-LABEL: test_redand_v8i1: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: umov w8, v0.b[1] -; CHECK-NEXT: umov w9, v0.b[0] -; CHECK-NEXT: umov w10, v0.b[2] -; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] -; CHECK-NEXT: umov w13, v0.b[5] -; CHECK-NEXT: and w8, w9, w8 -; CHECK-NEXT: umov w9, v0.b[6] -; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: umov w10, v0.b[7] -; CHECK-NEXT: and w8, w8, w11 -; CHECK-NEXT: and w8, w8, w12 -; CHECK-NEXT: and w8, w8, w13 -; CHECK-NEXT: and w8, w8, w9 -; CHECK-NEXT: and w8, w8, w10 +; CHECK-NEXT: uminv b0, v0.8b +; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret ; @@ -129,23 +107,8 @@ define i1 @test_redand_v16i1(<16 x i1> %a) { ; CHECK-LABEL: test_redand_v16i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: and v0.8b, v0.8b, v1.8b -; CHECK-NEXT: umov w8, v0.b[1] -; CHECK-NEXT: umov w9, v0.b[0] -; CHECK-NEXT: umov w10, v0.b[2] -; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] -; CHECK-NEXT: and w8, w9, w8 -; CHECK-NEXT: umov w9, v0.b[5] -; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: umov w10, v0.b[6] -; CHECK-NEXT: and w8, w8, w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: and w8, w8, w12 -; CHECK-NEXT: and w8, w8, w9 -; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: and w8, w8, w11 +; CHECK-NEXT: uminv b0, v0.16b +; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret ; @@ -203,6 +166,67 @@ ret i1 %or_result } +define <16 x i1> @test_redand_ins_v16i1(<16 x i1> %a) { +; CHECK-LABEL: test_redand_ins_v16i1: +; CHECK: // %bb.0: +; CHECK-NEXT: uminv b0, v0.16b +; CHECK-NEXT: ret +; +; GISEL-LABEL: test_redand_ins_v16i1: +; GISEL: // %bb.0: +; GISEL-NEXT: mov b1, v0.b[1] +; GISEL-NEXT: mov b2, v0.b[2] +; GISEL-NEXT: mov b3, v0.b[3] +; GISEL-NEXT: mov b4, v0.b[4] +; GISEL-NEXT: mov b5, v0.b[5] +; GISEL-NEXT: mov b6, v0.b[6] +; GISEL-NEXT: mov b7, v0.b[7] +; GISEL-NEXT: fmov w8, s0 +; GISEL-NEXT: fmov w9, s1 +; GISEL-NEXT: fmov w10, s2 +; GISEL-NEXT: fmov w11, s3 +; GISEL-NEXT: mov b16, v0.b[8] +; GISEL-NEXT: mov b17, v0.b[9] +; GISEL-NEXT: mov b18, v0.b[10] +; GISEL-NEXT: mov b19, v0.b[11] +; GISEL-NEXT: and w8, w8, w9 +; GISEL-NEXT: and w9, w10, w11 +; GISEL-NEXT: fmov w10, s4 +; GISEL-NEXT: fmov w11, s5 +; GISEL-NEXT: fmov w12, s6 +; GISEL-NEXT: fmov w13, s7 +; GISEL-NEXT: mov b20, v0.b[12] +; GISEL-NEXT: mov b21, v0.b[13] +; GISEL-NEXT: mov b22, v0.b[14] +; GISEL-NEXT: mov b23, v0.b[15] +; GISEL-NEXT: and w10, w10, w11 +; GISEL-NEXT: and w11, w12, w13 +; GISEL-NEXT: fmov w12, s16 +; GISEL-NEXT: fmov w13, s17 +; GISEL-NEXT: fmov w14, s18 +; GISEL-NEXT: fmov w15, s19 +; GISEL-NEXT: fmov w16, s22 +; GISEL-NEXT: fmov w17, s23 +; GISEL-NEXT: and w8, w8, w9 +; GISEL-NEXT: and w12, w12, w13 +; GISEL-NEXT: and w9, w10, w11 +; GISEL-NEXT: and w13, w14, w15 +; GISEL-NEXT: fmov w14, s20 +; GISEL-NEXT: fmov w15, s21 +; GISEL-NEXT: and w10, w12, w13 +; GISEL-NEXT: and w8, w8, w9 +; GISEL-NEXT: and w14, w14, w15 +; GISEL-NEXT: and w15, w16, w17 +; GISEL-NEXT: and w11, w14, w15 +; GISEL-NEXT: and w9, w10, w11 +; GISEL-NEXT: and w8, w8, w9 +; GISEL-NEXT: fmov s0, w8 +; GISEL-NEXT: ret + %and_result = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a) + %ins = insertelement <16 x i1> poison, i1 %and_result, i64 0 + ret <16 x i1> %ins +} + define i8 @test_redand_v1i8(<1 x i8> %a) { ; CHECK-LABEL: test_redand_v1i8: ; CHECK: // %bb.0: diff --git a/llvm/test/CodeGen/AArch64/reduce-or.ll b/llvm/test/CodeGen/AArch64/reduce-or.ll --- a/llvm/test/CodeGen/AArch64/reduce-or.ll +++ b/llvm/test/CodeGen/AArch64/reduce-or.ll @@ -20,10 +20,8 @@ define i1 @test_redor_v2i1(<2 x i1> %a) { ; CHECK-LABEL: test_redor_v2i1: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: orr w8, w9, w8 +; CHECK-NEXT: umaxp v0.2s, v0.2s, v0.2s +; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret ; @@ -43,14 +41,8 @@ define i1 @test_redor_v4i1(<4 x i1> %a) { ; CHECK-LABEL: test_redor_v4i1: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: umov w8, v0.h[1] -; CHECK-NEXT: umov w9, v0.h[0] -; CHECK-NEXT: umov w10, v0.h[2] -; CHECK-NEXT: umov w11, v0.h[3] -; CHECK-NEXT: orr w8, w9, w8 -; CHECK-NEXT: orr w8, w8, w10 -; CHECK-NEXT: orr w8, w8, w11 +; CHECK-NEXT: umaxv h0, v0.4h +; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret ; @@ -76,22 +68,8 @@ define i1 @test_redor_v8i1(<8 x i1> %a) { ; CHECK-LABEL: test_redor_v8i1: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: umov w8, v0.b[1] -; CHECK-NEXT: umov w9, v0.b[0] -; CHECK-NEXT: umov w10, v0.b[2] -; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] -; CHECK-NEXT: umov w13, v0.b[5] -; CHECK-NEXT: orr w8, w9, w8 -; CHECK-NEXT: umov w9, v0.b[6] -; CHECK-NEXT: orr w8, w8, w10 -; CHECK-NEXT: umov w10, v0.b[7] -; CHECK-NEXT: orr w8, w8, w11 -; CHECK-NEXT: orr w8, w8, w12 -; CHECK-NEXT: orr w8, w8, w13 -; CHECK-NEXT: orr w8, w8, w9 -; CHECK-NEXT: orr w8, w8, w10 +; CHECK-NEXT: umaxv b0, v0.8b +; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret ; @@ -129,23 +107,8 @@ define i1 @test_redor_v16i1(<16 x i1> %a) { ; CHECK-LABEL: test_redor_v16i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b -; CHECK-NEXT: umov w8, v0.b[1] -; CHECK-NEXT: umov w9, v0.b[0] -; CHECK-NEXT: umov w10, v0.b[2] -; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] -; CHECK-NEXT: orr w8, w9, w8 -; CHECK-NEXT: umov w9, v0.b[5] -; CHECK-NEXT: orr w8, w8, w10 -; CHECK-NEXT: umov w10, v0.b[6] -; CHECK-NEXT: orr w8, w8, w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: orr w8, w8, w12 -; CHECK-NEXT: orr w8, w8, w9 -; CHECK-NEXT: orr w8, w8, w10 -; CHECK-NEXT: orr w8, w8, w11 +; CHECK-NEXT: umaxv b0, v0.16b +; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret ; @@ -203,6 +166,67 @@ ret i1 %or_result } +define <16 x i1> @test_redor_ins_v16i1(<16 x i1> %a) { +; CHECK-LABEL: test_redor_ins_v16i1: +; CHECK: // %bb.0: +; CHECK-NEXT: umaxv b0, v0.16b +; CHECK-NEXT: ret +; +; GISEL-LABEL: test_redor_ins_v16i1: +; GISEL: // %bb.0: +; GISEL-NEXT: mov b1, v0.b[1] +; GISEL-NEXT: mov b2, v0.b[2] +; GISEL-NEXT: mov b3, v0.b[3] +; GISEL-NEXT: mov b4, v0.b[4] +; GISEL-NEXT: mov b5, v0.b[5] +; GISEL-NEXT: mov b6, v0.b[6] +; GISEL-NEXT: mov b7, v0.b[7] +; GISEL-NEXT: fmov w8, s0 +; GISEL-NEXT: fmov w9, s1 +; GISEL-NEXT: fmov w10, s2 +; GISEL-NEXT: fmov w11, s3 +; GISEL-NEXT: mov b16, v0.b[8] +; GISEL-NEXT: mov b17, v0.b[9] +; GISEL-NEXT: mov b18, v0.b[10] +; GISEL-NEXT: mov b19, v0.b[11] +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: orr w9, w10, w11 +; GISEL-NEXT: fmov w10, s4 +; GISEL-NEXT: fmov w11, s5 +; GISEL-NEXT: fmov w12, s6 +; GISEL-NEXT: fmov w13, s7 +; GISEL-NEXT: mov b20, v0.b[12] +; GISEL-NEXT: mov b21, v0.b[13] +; GISEL-NEXT: mov b22, v0.b[14] +; GISEL-NEXT: mov b23, v0.b[15] +; GISEL-NEXT: orr w10, w10, w11 +; GISEL-NEXT: orr w11, w12, w13 +; GISEL-NEXT: fmov w12, s16 +; GISEL-NEXT: fmov w13, s17 +; GISEL-NEXT: fmov w14, s18 +; GISEL-NEXT: fmov w15, s19 +; GISEL-NEXT: fmov w16, s22 +; GISEL-NEXT: fmov w17, s23 +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: orr w12, w12, w13 +; GISEL-NEXT: orr w9, w10, w11 +; GISEL-NEXT: orr w13, w14, w15 +; GISEL-NEXT: fmov w14, s20 +; GISEL-NEXT: fmov w15, s21 +; GISEL-NEXT: orr w10, w12, w13 +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: orr w14, w14, w15 +; GISEL-NEXT: orr w15, w16, w17 +; GISEL-NEXT: orr w11, w14, w15 +; GISEL-NEXT: orr w9, w10, w11 +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: fmov s0, w8 +; GISEL-NEXT: ret + %or_result = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %a) + %ins = insertelement <16 x i1> poison, i1 %or_result, i64 0 + ret <16 x i1> %ins +} + define i8 @test_redor_v1i8(<1 x i8> %a) { ; CHECK-LABEL: test_redor_v1i8: ; CHECK: // %bb.0: diff --git a/llvm/test/CodeGen/AArch64/reduce-xor.ll b/llvm/test/CodeGen/AArch64/reduce-xor.ll --- a/llvm/test/CodeGen/AArch64/reduce-xor.ll +++ b/llvm/test/CodeGen/AArch64/reduce-xor.ll @@ -165,6 +165,67 @@ ret i1 %or_result } +define <16 x i1> @test_redxor_ins_v16i1(<16 x i1> %a) { +; CHECK-LABEL: test_redxor_ins_v16i1: +; CHECK: // %bb.0: +; CHECK-NEXT: addv b0, v0.16b +; CHECK-NEXT: ret +; +; GISEL-LABEL: test_redxor_ins_v16i1: +; GISEL: // %bb.0: +; GISEL-NEXT: mov b1, v0.b[1] +; GISEL-NEXT: mov b2, v0.b[2] +; GISEL-NEXT: mov b3, v0.b[3] +; GISEL-NEXT: mov b4, v0.b[4] +; GISEL-NEXT: mov b5, v0.b[5] +; GISEL-NEXT: mov b6, v0.b[6] +; GISEL-NEXT: mov b7, v0.b[7] +; GISEL-NEXT: fmov w8, s0 +; GISEL-NEXT: fmov w9, s1 +; GISEL-NEXT: fmov w10, s2 +; GISEL-NEXT: fmov w11, s3 +; GISEL-NEXT: mov b16, v0.b[8] +; GISEL-NEXT: mov b17, v0.b[9] +; GISEL-NEXT: mov b18, v0.b[10] +; GISEL-NEXT: mov b19, v0.b[11] +; GISEL-NEXT: eor w8, w8, w9 +; GISEL-NEXT: eor w9, w10, w11 +; GISEL-NEXT: fmov w10, s4 +; GISEL-NEXT: fmov w11, s5 +; GISEL-NEXT: fmov w12, s6 +; GISEL-NEXT: fmov w13, s7 +; GISEL-NEXT: mov b20, v0.b[12] +; GISEL-NEXT: mov b21, v0.b[13] +; GISEL-NEXT: mov b22, v0.b[14] +; GISEL-NEXT: mov b23, v0.b[15] +; GISEL-NEXT: eor w10, w10, w11 +; GISEL-NEXT: eor w11, w12, w13 +; GISEL-NEXT: fmov w12, s16 +; GISEL-NEXT: fmov w13, s17 +; GISEL-NEXT: fmov w14, s18 +; GISEL-NEXT: fmov w15, s19 +; GISEL-NEXT: fmov w16, s22 +; GISEL-NEXT: fmov w17, s23 +; GISEL-NEXT: eor w8, w8, w9 +; GISEL-NEXT: eor w12, w12, w13 +; GISEL-NEXT: eor w9, w10, w11 +; GISEL-NEXT: eor w13, w14, w15 +; GISEL-NEXT: fmov w14, s20 +; GISEL-NEXT: fmov w15, s21 +; GISEL-NEXT: eor w10, w12, w13 +; GISEL-NEXT: eor w8, w8, w9 +; GISEL-NEXT: eor w14, w14, w15 +; GISEL-NEXT: eor w15, w16, w17 +; GISEL-NEXT: eor w11, w14, w15 +; GISEL-NEXT: eor w9, w10, w11 +; GISEL-NEXT: eor w8, w8, w9 +; GISEL-NEXT: fmov s0, w8 +; GISEL-NEXT: ret + %xor_result = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a) + %ins = insertelement <16 x i1> poison, i1 %xor_result, i64 0 + ret <16 x i1> %ins +} + define i8 @test_redxor_v1i8(<1 x i8> %a) { ; CHECK-LABEL: test_redxor_v1i8: ; CHECK: // %bb.0: diff --git a/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll @@ -140,14 +140,8 @@ define i1 @test_v4i1(<4 x i1> %a) nounwind { ; CHECK-LABEL: test_v4i1: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: umov w8, v0.h[1] -; CHECK-NEXT: umov w9, v0.h[0] -; CHECK-NEXT: umov w10, v0.h[2] -; CHECK-NEXT: umov w11, v0.h[3] -; CHECK-NEXT: and w8, w9, w8 -; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: and w8, w8, w11 +; CHECK-NEXT: uminv h0, v0.4h +; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret %b = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a) diff --git a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-umax-legalization.ll @@ -142,14 +142,8 @@ define i1 @test_v4i1(<4 x i1> %a) nounwind { ; CHECK-LABEL: test_v4i1: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: umov w8, v0.h[1] -; CHECK-NEXT: umov w9, v0.h[0] -; CHECK-NEXT: umov w10, v0.h[2] -; CHECK-NEXT: umov w11, v0.h[3] -; CHECK-NEXT: orr w8, w9, w8 -; CHECK-NEXT: orr w8, w8, w10 -; CHECK-NEXT: orr w8, w8, w11 +; CHECK-NEXT: umaxv h0, v0.4h +; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret %b = call i1 @llvm.vector.reduce.umax.v4i1(<4 x i1> %a)