diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -18193,7 +18193,9 @@ Op0ExtV, Op1ExtV, Op->getOperand(2)); } -static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) { +static SDValue performSETCCCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + SelectionDAG &DAG) { assert(N->getOpcode() == ISD::SETCC && "Unexpected opcode!"); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); @@ -18236,6 +18238,21 @@ } } + // setcc (iN (bitcast (vNi1 X))), 0, (eq|ne) + // ==> setcc (iN (zext (i1 (vecreduce_or (vNi1 X))))), 0, (eq|ne) + if (DCI.isBeforeLegalize() && VT.isScalarInteger() && + (Cond == ISD::SETEQ || Cond == ISD::SETNE) && isNullConstant(RHS) && + LHS->getOpcode() == ISD::BITCAST) { + EVT ToVT = LHS->getValueType(0); + EVT FromVT = LHS->getOperand(0).getValueType(); + if (FromVT.isFixedLengthVector() && + FromVT.getVectorElementType() == MVT::i1) { + LHS = DAG.getNode(ISD::VECREDUCE_OR, DL, MVT::i1, LHS->getOperand(0)); + LHS = DAG.getNode(ISD::ZERO_EXTEND, DL, ToVT, LHS); + return DAG.getSetCC(DL, VT, LHS, RHS, Cond); + } + } + return SDValue(); } @@ -19378,7 +19395,7 @@ case ISD::VSELECT: return performVSelectCombine(N, DCI.DAG); case ISD::SETCC: - return performSETCCCombine(N, DAG); + return performSETCCCombine(N, DCI, DAG); case ISD::LOAD: if (performTBISimplification(N->getOperand(1), DCI, DAG)) return SDValue(N, 0); diff --git a/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll b/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll --- a/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll +++ b/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll @@ -4,34 +4,11 @@ define i1 @combine_setcc_eq_vecreduce_or_v8i1(<8 x i8> %a) { ; CHECK-LABEL: combine_setcc_eq_vecreduce_or_v8i1: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: cmeq v0.8b, v0.8b, #0 -; CHECK-NEXT: umov w8, v0.b[1] -; CHECK-NEXT: umov w10, v0.b[2] -; CHECK-NEXT: umov w9, v0.b[0] -; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] -; CHECK-NEXT: umov w13, v0.b[5] -; CHECK-NEXT: umov w14, v0.b[6] -; CHECK-NEXT: and w8, w8, #0x1 -; CHECK-NEXT: and w10, w10, #0x1 -; CHECK-NEXT: and w9, w9, #0x1 -; CHECK-NEXT: and w11, w11, #0x1 -; CHECK-NEXT: bfi w9, w8, #1, #1 -; CHECK-NEXT: and w8, w12, #0x1 -; CHECK-NEXT: bfi w9, w10, #2, #1 -; CHECK-NEXT: and w10, w13, #0x1 -; CHECK-NEXT: bfi w9, w11, #3, #1 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: bfi w9, w8, #4, #1 -; CHECK-NEXT: and w8, w14, #0x1 -; CHECK-NEXT: bfi w9, w10, #5, #1 -; CHECK-NEXT: orr w8, w9, w8, lsl #6 -; CHECK-NEXT: orr w8, w8, w11, lsl #7 -; CHECK-NEXT: tst w8, #0xff -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: umaxv b0, v0.8b +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: mvn w8, w8 +; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret %cmp1 = icmp eq <8 x i8> %a, zeroinitializer %cast = bitcast <8 x i1> %cmp1 to i8 @@ -42,58 +19,11 @@ define i1 @combine_setcc_eq_vecreduce_or_v16i1(<16 x i8> %a) { ; CHECK-LABEL: combine_setcc_eq_vecreduce_or_v16i1: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: cmeq v0.16b, v0.16b, #0 -; CHECK-NEXT: umov w8, v0.b[1] -; CHECK-NEXT: umov w10, v0.b[2] -; CHECK-NEXT: umov w9, v0.b[0] -; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] -; CHECK-NEXT: umov w13, v0.b[5] -; CHECK-NEXT: and w8, w8, #0x1 -; CHECK-NEXT: and w10, w10, #0x1 -; CHECK-NEXT: and w9, w9, #0x1 -; CHECK-NEXT: and w11, w11, #0x1 -; CHECK-NEXT: and w12, w12, #0x1 -; CHECK-NEXT: and w13, w13, #0x1 -; CHECK-NEXT: bfi w9, w8, #1, #1 -; CHECK-NEXT: umov w8, v0.b[6] -; CHECK-NEXT: bfi w9, w10, #2, #1 -; CHECK-NEXT: umov w10, v0.b[7] -; CHECK-NEXT: bfi w9, w11, #3, #1 -; CHECK-NEXT: umov w11, v0.b[8] -; CHECK-NEXT: bfi w9, w12, #4, #1 -; CHECK-NEXT: umov w12, v0.b[9] -; CHECK-NEXT: and w8, w8, #0x1 -; CHECK-NEXT: bfi w9, w13, #5, #1 -; CHECK-NEXT: umov w13, v0.b[10] -; CHECK-NEXT: and w10, w10, #0x1 -; CHECK-NEXT: orr w8, w9, w8, lsl #6 -; CHECK-NEXT: umov w9, v0.b[11] -; CHECK-NEXT: and w11, w11, #0x1 -; CHECK-NEXT: orr w8, w8, w10, lsl #7 -; CHECK-NEXT: umov w10, v0.b[12] -; CHECK-NEXT: and w12, w12, #0x1 -; CHECK-NEXT: orr w8, w8, w11, lsl #8 -; CHECK-NEXT: umov w11, v0.b[13] -; CHECK-NEXT: and w13, w13, #0x1 -; CHECK-NEXT: orr w8, w8, w12, lsl #9 -; CHECK-NEXT: umov w12, v0.b[14] -; CHECK-NEXT: and w9, w9, #0x1 -; CHECK-NEXT: orr w8, w8, w13, lsl #10 -; CHECK-NEXT: and w10, w10, #0x1 -; CHECK-NEXT: orr w8, w8, w9, lsl #11 -; CHECK-NEXT: and w9, w11, #0x1 -; CHECK-NEXT: umov w11, v0.b[15] -; CHECK-NEXT: orr w8, w8, w10, lsl #12 -; CHECK-NEXT: and w10, w12, #0x1 -; CHECK-NEXT: orr w8, w8, w9, lsl #13 -; CHECK-NEXT: orr w8, w8, w10, lsl #14 -; CHECK-NEXT: orr w8, w8, w11, lsl #15 -; CHECK-NEXT: tst w8, #0xffff -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: umaxv b0, v0.16b +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: mvn w8, w8 +; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret %cmp1 = icmp eq <16 x i8> %a, zeroinitializer %cast = bitcast <16 x i1> %cmp1 to i16 @@ -104,106 +34,13 @@ define i1 @combine_setcc_eq_vecreduce_or_v32i1(<32 x i8> %a) { ; CHECK-LABEL: combine_setcc_eq_vecreduce_or_v32i1: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: cmeq v1.16b, v1.16b, #0 ; CHECK-NEXT: cmeq v0.16b, v0.16b, #0 -; CHECK-NEXT: umov w8, v1.b[1] -; CHECK-NEXT: umov w10, v1.b[2] -; CHECK-NEXT: umov w9, v1.b[0] -; CHECK-NEXT: umov w11, v1.b[3] -; CHECK-NEXT: umov w12, v1.b[4] -; CHECK-NEXT: umov w13, v1.b[5] -; CHECK-NEXT: umov w14, v1.b[6] -; CHECK-NEXT: and w8, w8, #0x1 -; CHECK-NEXT: umov w15, v1.b[7] -; CHECK-NEXT: and w10, w10, #0x1 -; CHECK-NEXT: and w9, w9, #0x1 -; CHECK-NEXT: and w11, w11, #0x1 -; CHECK-NEXT: and w12, w12, #0x1 -; CHECK-NEXT: bfi w9, w8, #1, #1 -; CHECK-NEXT: umov w8, v1.b[8] -; CHECK-NEXT: bfi w9, w10, #2, #1 -; CHECK-NEXT: and w13, w13, #0x1 -; CHECK-NEXT: umov w10, v1.b[9] -; CHECK-NEXT: bfi w9, w11, #3, #1 -; CHECK-NEXT: bfi w9, w12, #4, #1 -; CHECK-NEXT: and w14, w14, #0x1 -; CHECK-NEXT: umov w11, v1.b[10] -; CHECK-NEXT: bfi w9, w13, #5, #1 -; CHECK-NEXT: and w15, w15, #0x1 -; CHECK-NEXT: umov w13, v0.b[1] -; CHECK-NEXT: orr w9, w9, w14, lsl #6 -; CHECK-NEXT: and w8, w8, #0x1 -; CHECK-NEXT: orr w9, w9, w15, lsl #7 -; CHECK-NEXT: umov w15, v0.b[2] -; CHECK-NEXT: and w10, w10, #0x1 -; CHECK-NEXT: umov w14, v0.b[0] -; CHECK-NEXT: orr w8, w9, w8, lsl #8 -; CHECK-NEXT: umov w9, v0.b[3] -; CHECK-NEXT: and w11, w11, #0x1 -; CHECK-NEXT: orr w8, w8, w10, lsl #9 -; CHECK-NEXT: umov w10, v0.b[4] -; CHECK-NEXT: and w13, w13, #0x1 -; CHECK-NEXT: orr w8, w8, w11, lsl #10 -; CHECK-NEXT: and w11, w15, #0x1 -; CHECK-NEXT: umov w15, v0.b[5] -; CHECK-NEXT: and w14, w14, #0x1 -; CHECK-NEXT: and w9, w9, #0x1 -; CHECK-NEXT: bfi w14, w13, #1, #1 -; CHECK-NEXT: and w10, w10, #0x1 -; CHECK-NEXT: umov w13, v0.b[6] -; CHECK-NEXT: bfi w14, w11, #2, #1 -; CHECK-NEXT: umov w11, v1.b[12] -; CHECK-NEXT: bfi w14, w9, #3, #1 -; CHECK-NEXT: and w9, w15, #0x1 -; CHECK-NEXT: umov w12, v1.b[11] -; CHECK-NEXT: bfi w14, w10, #4, #1 -; CHECK-NEXT: umov w10, v0.b[7] -; CHECK-NEXT: umov w15, v0.b[8] -; CHECK-NEXT: and w13, w13, #0x1 -; CHECK-NEXT: bfi w14, w9, #5, #1 -; CHECK-NEXT: and w9, w11, #0x1 -; CHECK-NEXT: orr w11, w14, w13, lsl #6 -; CHECK-NEXT: umov w13, v0.b[9] -; CHECK-NEXT: and w12, w12, #0x1 -; CHECK-NEXT: and w10, w10, #0x1 -; CHECK-NEXT: and w14, w15, #0x1 -; CHECK-NEXT: umov w15, v0.b[10] -; CHECK-NEXT: orr w8, w8, w12, lsl #11 -; CHECK-NEXT: orr w10, w11, w10, lsl #7 -; CHECK-NEXT: orr w8, w8, w9, lsl #12 -; CHECK-NEXT: orr w9, w10, w14, lsl #8 -; CHECK-NEXT: and w10, w13, #0x1 -; CHECK-NEXT: umov w11, v0.b[11] -; CHECK-NEXT: and w12, w15, #0x1 -; CHECK-NEXT: umov w13, v0.b[12] -; CHECK-NEXT: orr w9, w9, w10, lsl #9 -; CHECK-NEXT: umov w14, v1.b[13] -; CHECK-NEXT: orr w9, w9, w12, lsl #10 -; CHECK-NEXT: umov w12, v0.b[13] -; CHECK-NEXT: umov w10, v1.b[14] -; CHECK-NEXT: and w11, w11, #0x1 -; CHECK-NEXT: umov w15, v0.b[14] -; CHECK-NEXT: and w13, w13, #0x1 -; CHECK-NEXT: orr w9, w9, w11, lsl #11 -; CHECK-NEXT: and w14, w14, #0x1 -; CHECK-NEXT: umov w11, v1.b[15] -; CHECK-NEXT: orr w9, w9, w13, lsl #12 -; CHECK-NEXT: and w12, w12, #0x1 -; CHECK-NEXT: umov w13, v0.b[15] -; CHECK-NEXT: and w10, w10, #0x1 -; CHECK-NEXT: and w15, w15, #0x1 -; CHECK-NEXT: orr w8, w8, w14, lsl #13 -; CHECK-NEXT: orr w9, w9, w12, lsl #13 -; CHECK-NEXT: orr w8, w8, w10, lsl #14 -; CHECK-NEXT: orr w9, w9, w15, lsl #14 -; CHECK-NEXT: orr w8, w8, w11, lsl #15 -; CHECK-NEXT: orr w9, w9, w13, lsl #15 -; CHECK-NEXT: orr w8, w9, w8 -; CHECK-NEXT: tst w8, #0xffff -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-NEXT: umaxv b0, v0.16b +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: mvn w8, w8 +; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret %cmp1 = icmp eq <32 x i8> %a, zeroinitializer %cast = bitcast <32 x i1> %cmp1 to i32 @@ -214,202 +51,17 @@ define i1 @combine_setcc_eq_vecreduce_or_v64i1(<64 x i8> %a) { ; CHECK-LABEL: combine_setcc_eq_vecreduce_or_v64i1: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: cmeq v2.16b, v2.16b, #0 ; CHECK-NEXT: cmeq v3.16b, v3.16b, #0 ; CHECK-NEXT: cmeq v1.16b, v1.16b, #0 -; CHECK-NEXT: umov w8, v3.b[1] -; CHECK-NEXT: umov w10, v3.b[2] -; CHECK-NEXT: umov w9, v3.b[0] -; CHECK-NEXT: umov w11, v3.b[3] -; CHECK-NEXT: umov w12, v3.b[4] -; CHECK-NEXT: umov w13, v3.b[5] -; CHECK-NEXT: umov w14, v3.b[6] -; CHECK-NEXT: and w8, w8, #0x1 -; CHECK-NEXT: umov w15, v3.b[7] -; CHECK-NEXT: and w10, w10, #0x1 -; CHECK-NEXT: and w9, w9, #0x1 -; CHECK-NEXT: and w11, w11, #0x1 -; CHECK-NEXT: and w12, w12, #0x1 -; CHECK-NEXT: bfi w9, w8, #1, #1 -; CHECK-NEXT: umov w16, v3.b[8] -; CHECK-NEXT: bfi w9, w10, #2, #1 -; CHECK-NEXT: and w13, w13, #0x1 -; CHECK-NEXT: umov w17, v3.b[9] -; CHECK-NEXT: bfi w9, w11, #3, #1 -; CHECK-NEXT: umov w8, v3.b[10] -; CHECK-NEXT: bfi w9, w12, #4, #1 -; CHECK-NEXT: and w14, w14, #0x1 -; CHECK-NEXT: umov w10, v3.b[11] -; CHECK-NEXT: bfi w9, w13, #5, #1 -; CHECK-NEXT: and w15, w15, #0x1 -; CHECK-NEXT: umov w11, v3.b[12] -; CHECK-NEXT: and w16, w16, #0x1 -; CHECK-NEXT: orr w9, w9, w14, lsl #6 -; CHECK-NEXT: umov w12, v3.b[13] -; CHECK-NEXT: and w14, w17, #0x1 -; CHECK-NEXT: orr w9, w9, w15, lsl #7 -; CHECK-NEXT: umov w13, v3.b[14] -; CHECK-NEXT: and w8, w8, #0x1 -; CHECK-NEXT: orr w9, w9, w16, lsl #8 -; CHECK-NEXT: and w10, w10, #0x1 -; CHECK-NEXT: orr w9, w9, w14, lsl #9 -; CHECK-NEXT: and w11, w11, #0x1 -; CHECK-NEXT: orr w8, w9, w8, lsl #10 -; CHECK-NEXT: umov w9, v1.b[1] -; CHECK-NEXT: orr w8, w8, w10, lsl #11 -; CHECK-NEXT: and w10, w12, #0x1 -; CHECK-NEXT: orr w8, w8, w11, lsl #12 -; CHECK-NEXT: and w11, w13, #0x1 -; CHECK-NEXT: umov w12, v1.b[2] -; CHECK-NEXT: orr w8, w8, w10, lsl #13 -; CHECK-NEXT: umov w10, v1.b[0] -; CHECK-NEXT: orr w8, w8, w11, lsl #14 -; CHECK-NEXT: umov w11, v1.b[3] -; CHECK-NEXT: and w9, w9, #0x1 -; CHECK-NEXT: umov w13, v1.b[4] -; CHECK-NEXT: umov w14, v1.b[5] -; CHECK-NEXT: and w12, w12, #0x1 -; CHECK-NEXT: umov w15, v3.b[15] -; CHECK-NEXT: and w10, w10, #0x1 -; CHECK-NEXT: bfi w10, w9, #1, #1 -; CHECK-NEXT: and w9, w11, #0x1 -; CHECK-NEXT: and w11, w13, #0x1 -; CHECK-NEXT: bfi w10, w12, #2, #1 -; CHECK-NEXT: umov w13, v1.b[6] -; CHECK-NEXT: and w12, w14, #0x1 -; CHECK-NEXT: bfi w10, w9, #3, #1 -; CHECK-NEXT: umov w9, v1.b[7] -; CHECK-NEXT: bfi w10, w11, #4, #1 -; CHECK-NEXT: umov w11, v1.b[8] -; CHECK-NEXT: bfi w10, w12, #5, #1 -; CHECK-NEXT: umov w14, v1.b[10] -; CHECK-NEXT: and w12, w13, #0x1 -; CHECK-NEXT: umov w13, v1.b[9] -; CHECK-NEXT: and w9, w9, #0x1 -; CHECK-NEXT: orr w8, w8, w15, lsl #15 -; CHECK-NEXT: orr w10, w10, w12, lsl #6 -; CHECK-NEXT: and w11, w11, #0x1 -; CHECK-NEXT: orr w9, w10, w9, lsl #7 -; CHECK-NEXT: umov w10, v1.b[11] -; CHECK-NEXT: orr w9, w9, w11, lsl #8 -; CHECK-NEXT: and w11, w13, #0x1 -; CHECK-NEXT: umov w13, v1.b[12] -; CHECK-NEXT: and w12, w14, #0x1 -; CHECK-NEXT: orr w9, w9, w11, lsl #9 -; CHECK-NEXT: umov w11, v1.b[13] -; CHECK-NEXT: and w10, w10, #0x1 -; CHECK-NEXT: orr w9, w9, w12, lsl #10 -; CHECK-NEXT: cmeq v2.16b, v2.16b, #0 -; CHECK-NEXT: orr w9, w9, w10, lsl #11 -; CHECK-NEXT: and w10, w13, #0x1 -; CHECK-NEXT: and w11, w11, #0x1 -; CHECK-NEXT: umov w12, v2.b[1] -; CHECK-NEXT: orr w9, w9, w10, lsl #12 -; CHECK-NEXT: umov w10, v2.b[2] -; CHECK-NEXT: umov w14, v2.b[0] -; CHECK-NEXT: orr w9, w9, w11, lsl #13 -; CHECK-NEXT: umov w11, v2.b[3] -; CHECK-NEXT: umov w15, v2.b[4] -; CHECK-NEXT: and w12, w12, #0x1 -; CHECK-NEXT: umov w16, v2.b[5] -; CHECK-NEXT: and w10, w10, #0x1 -; CHECK-NEXT: umov w13, v1.b[14] -; CHECK-NEXT: and w14, w14, #0x1 -; CHECK-NEXT: and w11, w11, #0x1 -; CHECK-NEXT: bfi w14, w12, #1, #1 -; CHECK-NEXT: and w12, w15, #0x1 -; CHECK-NEXT: umov w15, v2.b[6] -; CHECK-NEXT: bfi w14, w10, #2, #1 -; CHECK-NEXT: umov w10, v2.b[7] -; CHECK-NEXT: bfi w14, w11, #3, #1 -; CHECK-NEXT: and w11, w16, #0x1 -; CHECK-NEXT: and w13, w13, #0x1 -; CHECK-NEXT: bfi w14, w12, #4, #1 -; CHECK-NEXT: and w12, w15, #0x1 -; CHECK-NEXT: umov w16, v2.b[11] -; CHECK-NEXT: bfi w14, w11, #5, #1 -; CHECK-NEXT: and w10, w10, #0x1 -; CHECK-NEXT: orr w9, w9, w13, lsl #14 -; CHECK-NEXT: umov w13, v2.b[8] -; CHECK-NEXT: orr w12, w14, w12, lsl #6 -; CHECK-NEXT: umov w11, v1.b[15] -; CHECK-NEXT: orr w10, w12, w10, lsl #7 -; CHECK-NEXT: umov w12, v2.b[9] -; CHECK-NEXT: umov w14, v2.b[10] -; CHECK-NEXT: and w16, w16, #0x1 ; CHECK-NEXT: cmeq v0.16b, v0.16b, #0 -; CHECK-NEXT: and w13, w13, #0x1 -; CHECK-NEXT: orr w9, w9, w11, lsl #15 -; CHECK-NEXT: and w11, w12, #0x1 -; CHECK-NEXT: umov w12, v0.b[1] -; CHECK-NEXT: orr w10, w10, w13, lsl #8 -; CHECK-NEXT: and w13, w14, #0x1 -; CHECK-NEXT: umov w14, v0.b[0] -; CHECK-NEXT: orr w10, w10, w11, lsl #9 -; CHECK-NEXT: umov w11, v0.b[2] -; CHECK-NEXT: orr w10, w10, w13, lsl #10 -; CHECK-NEXT: umov w13, v0.b[3] -; CHECK-NEXT: and w12, w12, #0x1 -; CHECK-NEXT: umov w15, v0.b[4] -; CHECK-NEXT: umov w17, v0.b[5] -; CHECK-NEXT: and w14, w14, #0x1 -; CHECK-NEXT: orr w10, w10, w16, lsl #11 -; CHECK-NEXT: and w11, w11, #0x1 -; CHECK-NEXT: bfi w14, w12, #1, #1 -; CHECK-NEXT: and w12, w13, #0x1 -; CHECK-NEXT: umov w16, v2.b[13] -; CHECK-NEXT: and w13, w15, #0x1 -; CHECK-NEXT: umov w15, v0.b[6] -; CHECK-NEXT: bfi w14, w11, #2, #1 -; CHECK-NEXT: umov w11, v2.b[12] -; CHECK-NEXT: bfi w14, w12, #3, #1 -; CHECK-NEXT: and w12, w17, #0x1 -; CHECK-NEXT: bfi w14, w13, #4, #1 -; CHECK-NEXT: umov w13, v0.b[7] -; CHECK-NEXT: umov w17, v0.b[8] -; CHECK-NEXT: and w15, w15, #0x1 -; CHECK-NEXT: bfi w14, w12, #5, #1 -; CHECK-NEXT: and w11, w11, #0x1 -; CHECK-NEXT: orr w12, w14, w15, lsl #6 -; CHECK-NEXT: umov w14, v0.b[9] -; CHECK-NEXT: and w13, w13, #0x1 -; CHECK-NEXT: orr w10, w10, w11, lsl #12 -; CHECK-NEXT: and w15, w17, #0x1 -; CHECK-NEXT: umov w17, v0.b[10] -; CHECK-NEXT: orr w12, w12, w13, lsl #7 -; CHECK-NEXT: umov w13, v0.b[11] -; CHECK-NEXT: orr w11, w12, w15, lsl #8 -; CHECK-NEXT: and w12, w14, #0x1 -; CHECK-NEXT: umov w15, v0.b[12] -; CHECK-NEXT: and w16, w16, #0x1 -; CHECK-NEXT: and w14, w17, #0x1 -; CHECK-NEXT: orr w11, w11, w12, lsl #9 -; CHECK-NEXT: umov w12, v2.b[14] -; CHECK-NEXT: and w13, w13, #0x1 -; CHECK-NEXT: orr w11, w11, w14, lsl #10 -; CHECK-NEXT: umov w14, v0.b[13] -; CHECK-NEXT: umov w17, v0.b[14] -; CHECK-NEXT: and w15, w15, #0x1 -; CHECK-NEXT: orr w11, w11, w13, lsl #11 -; CHECK-NEXT: umov w13, v2.b[15] -; CHECK-NEXT: orr w11, w11, w15, lsl #12 -; CHECK-NEXT: umov w15, v0.b[15] -; CHECK-NEXT: and w14, w14, #0x1 -; CHECK-NEXT: and w12, w12, #0x1 -; CHECK-NEXT: and w17, w17, #0x1 -; CHECK-NEXT: orr w10, w10, w16, lsl #13 -; CHECK-NEXT: orr w11, w11, w14, lsl #13 -; CHECK-NEXT: orr w10, w10, w12, lsl #14 -; CHECK-NEXT: orr w11, w11, w17, lsl #14 -; CHECK-NEXT: orr w10, w10, w13, lsl #15 -; CHECK-NEXT: orr w11, w11, w15, lsl #15 -; CHECK-NEXT: orr w8, w9, w8 -; CHECK-NEXT: orr w9, w11, w10 -; CHECK-NEXT: orr w8, w9, w8 -; CHECK-NEXT: tst w8, #0xffff -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: orr v1.16b, v1.16b, v3.16b +; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b +; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-NEXT: umaxv b0, v0.16b +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: mvn w8, w8 +; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret %cmp1 = icmp eq <64 x i8> %a, zeroinitializer %cast = bitcast <64 x i1> %cmp1 to i64 @@ -420,34 +72,10 @@ define i1 @combine_setcc_ne_vecreduce_or_v8i1(<8 x i8> %a) { ; CHECK-LABEL: combine_setcc_ne_vecreduce_or_v8i1: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: cmtst v0.8b, v0.8b, v0.8b -; CHECK-NEXT: umov w8, v0.b[1] -; CHECK-NEXT: umov w10, v0.b[2] -; CHECK-NEXT: umov w9, v0.b[0] -; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] -; CHECK-NEXT: umov w13, v0.b[5] -; CHECK-NEXT: umov w14, v0.b[6] -; CHECK-NEXT: and w8, w8, #0x1 -; CHECK-NEXT: and w10, w10, #0x1 -; CHECK-NEXT: and w9, w9, #0x1 -; CHECK-NEXT: and w11, w11, #0x1 -; CHECK-NEXT: bfi w9, w8, #1, #1 -; CHECK-NEXT: and w8, w12, #0x1 -; CHECK-NEXT: bfi w9, w10, #2, #1 -; CHECK-NEXT: and w10, w13, #0x1 -; CHECK-NEXT: bfi w9, w11, #3, #1 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: bfi w9, w8, #4, #1 -; CHECK-NEXT: and w8, w14, #0x1 -; CHECK-NEXT: bfi w9, w10, #5, #1 -; CHECK-NEXT: orr w8, w9, w8, lsl #6 -; CHECK-NEXT: orr w8, w8, w11, lsl #7 -; CHECK-NEXT: tst w8, #0xff -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: umaxv b0, v0.8b +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret %cmp1 = icmp ne <8 x i8> %a, zeroinitializer %cast = bitcast <8 x i1> %cmp1 to i8 @@ -458,58 +86,10 @@ define i1 @combine_setcc_ne_vecreduce_or_v16i1(<16 x i8> %a) { ; CHECK-LABEL: combine_setcc_ne_vecreduce_or_v16i1: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: cmtst v0.16b, v0.16b, v0.16b -; CHECK-NEXT: umov w8, v0.b[1] -; CHECK-NEXT: umov w10, v0.b[2] -; CHECK-NEXT: umov w9, v0.b[0] -; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] -; CHECK-NEXT: umov w13, v0.b[5] -; CHECK-NEXT: and w8, w8, #0x1 -; CHECK-NEXT: and w10, w10, #0x1 -; CHECK-NEXT: and w9, w9, #0x1 -; CHECK-NEXT: and w11, w11, #0x1 -; CHECK-NEXT: and w12, w12, #0x1 -; CHECK-NEXT: and w13, w13, #0x1 -; CHECK-NEXT: bfi w9, w8, #1, #1 -; CHECK-NEXT: umov w8, v0.b[6] -; CHECK-NEXT: bfi w9, w10, #2, #1 -; CHECK-NEXT: umov w10, v0.b[7] -; CHECK-NEXT: bfi w9, w11, #3, #1 -; CHECK-NEXT: umov w11, v0.b[8] -; CHECK-NEXT: bfi w9, w12, #4, #1 -; CHECK-NEXT: umov w12, v0.b[9] -; CHECK-NEXT: and w8, w8, #0x1 -; CHECK-NEXT: bfi w9, w13, #5, #1 -; CHECK-NEXT: umov w13, v0.b[10] -; CHECK-NEXT: and w10, w10, #0x1 -; CHECK-NEXT: orr w8, w9, w8, lsl #6 -; CHECK-NEXT: umov w9, v0.b[11] -; CHECK-NEXT: and w11, w11, #0x1 -; CHECK-NEXT: orr w8, w8, w10, lsl #7 -; CHECK-NEXT: umov w10, v0.b[12] -; CHECK-NEXT: and w12, w12, #0x1 -; CHECK-NEXT: orr w8, w8, w11, lsl #8 -; CHECK-NEXT: umov w11, v0.b[13] -; CHECK-NEXT: and w13, w13, #0x1 -; CHECK-NEXT: orr w8, w8, w12, lsl #9 -; CHECK-NEXT: umov w12, v0.b[14] -; CHECK-NEXT: and w9, w9, #0x1 -; CHECK-NEXT: orr w8, w8, w13, lsl #10 -; CHECK-NEXT: and w10, w10, #0x1 -; CHECK-NEXT: orr w8, w8, w9, lsl #11 -; CHECK-NEXT: and w9, w11, #0x1 -; CHECK-NEXT: umov w11, v0.b[15] -; CHECK-NEXT: orr w8, w8, w10, lsl #12 -; CHECK-NEXT: and w10, w12, #0x1 -; CHECK-NEXT: orr w8, w8, w9, lsl #13 -; CHECK-NEXT: orr w8, w8, w10, lsl #14 -; CHECK-NEXT: orr w8, w8, w11, lsl #15 -; CHECK-NEXT: tst w8, #0xffff -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: umaxv b0, v0.16b +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret %cmp1 = icmp ne <16 x i8> %a, zeroinitializer %cast = bitcast <16 x i1> %cmp1 to i16 @@ -520,106 +100,11 @@ define i1 @combine_setcc_ne_vecreduce_or_v32i1(<32 x i8> %a) { ; CHECK-LABEL: combine_setcc_ne_vecreduce_or_v32i1: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: cmtst v1.16b, v1.16b, v1.16b +; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: cmtst v0.16b, v0.16b, v0.16b -; CHECK-NEXT: umov w8, v1.b[1] -; CHECK-NEXT: umov w10, v1.b[2] -; CHECK-NEXT: umov w9, v1.b[0] -; CHECK-NEXT: umov w11, v1.b[3] -; CHECK-NEXT: umov w12, v1.b[4] -; CHECK-NEXT: umov w13, v1.b[5] -; CHECK-NEXT: umov w14, v1.b[6] -; CHECK-NEXT: umov w15, v1.b[7] -; CHECK-NEXT: and w8, w8, #0x1 -; CHECK-NEXT: and w10, w10, #0x1 -; CHECK-NEXT: and w9, w9, #0x1 -; CHECK-NEXT: and w11, w11, #0x1 -; CHECK-NEXT: and w12, w12, #0x1 -; CHECK-NEXT: and w13, w13, #0x1 -; CHECK-NEXT: bfi w9, w8, #1, #1 -; CHECK-NEXT: umov w8, v1.b[8] -; CHECK-NEXT: bfi w9, w10, #2, #1 -; CHECK-NEXT: umov w10, v1.b[9] -; CHECK-NEXT: bfi w9, w11, #3, #1 -; CHECK-NEXT: and w14, w14, #0x1 -; CHECK-NEXT: bfi w9, w12, #4, #1 -; CHECK-NEXT: umov w11, v1.b[10] -; CHECK-NEXT: bfi w9, w13, #5, #1 -; CHECK-NEXT: and w15, w15, #0x1 -; CHECK-NEXT: umov w13, v0.b[1] -; CHECK-NEXT: orr w9, w9, w14, lsl #6 -; CHECK-NEXT: and w8, w8, #0x1 -; CHECK-NEXT: orr w9, w9, w15, lsl #7 -; CHECK-NEXT: umov w15, v0.b[2] -; CHECK-NEXT: and w10, w10, #0x1 -; CHECK-NEXT: umov w14, v0.b[0] -; CHECK-NEXT: orr w8, w9, w8, lsl #8 -; CHECK-NEXT: umov w9, v0.b[3] -; CHECK-NEXT: and w11, w11, #0x1 -; CHECK-NEXT: orr w8, w8, w10, lsl #9 -; CHECK-NEXT: umov w10, v0.b[4] -; CHECK-NEXT: and w13, w13, #0x1 -; CHECK-NEXT: orr w8, w8, w11, lsl #10 -; CHECK-NEXT: umov w11, v0.b[5] -; CHECK-NEXT: and w15, w15, #0x1 -; CHECK-NEXT: and w14, w14, #0x1 -; CHECK-NEXT: and w9, w9, #0x1 -; CHECK-NEXT: bfi w14, w13, #1, #1 -; CHECK-NEXT: and w10, w10, #0x1 -; CHECK-NEXT: umov w13, v0.b[6] -; CHECK-NEXT: bfi w14, w15, #2, #1 -; CHECK-NEXT: bfi w14, w9, #3, #1 -; CHECK-NEXT: and w9, w11, #0x1 -; CHECK-NEXT: umov w12, v1.b[11] -; CHECK-NEXT: umov w15, v1.b[12] -; CHECK-NEXT: bfi w14, w10, #4, #1 -; CHECK-NEXT: umov w10, v0.b[7] -; CHECK-NEXT: and w11, w13, #0x1 -; CHECK-NEXT: umov w13, v0.b[8] -; CHECK-NEXT: bfi w14, w9, #5, #1 -; CHECK-NEXT: orr w11, w14, w11, lsl #6 -; CHECK-NEXT: umov w14, v0.b[9] -; CHECK-NEXT: and w12, w12, #0x1 -; CHECK-NEXT: and w9, w15, #0x1 -; CHECK-NEXT: and w10, w10, #0x1 -; CHECK-NEXT: umov w15, v0.b[10] -; CHECK-NEXT: and w13, w13, #0x1 -; CHECK-NEXT: orr w8, w8, w12, lsl #11 -; CHECK-NEXT: orr w10, w11, w10, lsl #7 -; CHECK-NEXT: orr w8, w8, w9, lsl #12 -; CHECK-NEXT: orr w9, w10, w13, lsl #8 -; CHECK-NEXT: and w10, w14, #0x1 -; CHECK-NEXT: umov w11, v0.b[11] -; CHECK-NEXT: and w12, w15, #0x1 -; CHECK-NEXT: umov w13, v0.b[12] -; CHECK-NEXT: orr w9, w9, w10, lsl #9 -; CHECK-NEXT: umov w14, v1.b[13] -; CHECK-NEXT: orr w9, w9, w12, lsl #10 -; CHECK-NEXT: umov w12, v0.b[13] -; CHECK-NEXT: umov w10, v1.b[14] -; CHECK-NEXT: and w11, w11, #0x1 -; CHECK-NEXT: umov w15, v0.b[14] -; CHECK-NEXT: and w13, w13, #0x1 -; CHECK-NEXT: orr w9, w9, w11, lsl #11 -; CHECK-NEXT: and w14, w14, #0x1 -; CHECK-NEXT: umov w11, v1.b[15] -; CHECK-NEXT: orr w9, w9, w13, lsl #12 -; CHECK-NEXT: and w12, w12, #0x1 -; CHECK-NEXT: umov w13, v0.b[15] -; CHECK-NEXT: and w10, w10, #0x1 -; CHECK-NEXT: and w15, w15, #0x1 -; CHECK-NEXT: orr w8, w8, w14, lsl #13 -; CHECK-NEXT: orr w9, w9, w12, lsl #13 -; CHECK-NEXT: orr w8, w8, w10, lsl #14 -; CHECK-NEXT: orr w9, w9, w15, lsl #14 -; CHECK-NEXT: orr w8, w8, w11, lsl #15 -; CHECK-NEXT: orr w9, w9, w13, lsl #15 -; CHECK-NEXT: orr w8, w9, w8 -; CHECK-NEXT: tst w8, #0xffff -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: umaxv b0, v0.16b +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret %cmp1 = icmp ne <32 x i8> %a, zeroinitializer %cast = bitcast <32 x i1> %cmp1 to i32 @@ -630,202 +115,13 @@ define i1 @combine_setcc_ne_vecreduce_or_v64i1(<64 x i8> %a) { ; CHECK-LABEL: combine_setcc_ne_vecreduce_or_v64i1: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: cmtst v3.16b, v3.16b, v3.16b -; CHECK-NEXT: cmtst v1.16b, v1.16b, v1.16b -; CHECK-NEXT: cmtst v2.16b, v2.16b, v2.16b -; CHECK-NEXT: umov w8, v3.b[1] -; CHECK-NEXT: umov w10, v3.b[2] -; CHECK-NEXT: umov w9, v3.b[0] -; CHECK-NEXT: umov w11, v3.b[3] -; CHECK-NEXT: umov w12, v3.b[4] -; CHECK-NEXT: umov w13, v3.b[5] -; CHECK-NEXT: umov w14, v3.b[6] -; CHECK-NEXT: and w8, w8, #0x1 -; CHECK-NEXT: umov w15, v3.b[7] -; CHECK-NEXT: and w10, w10, #0x1 -; CHECK-NEXT: and w9, w9, #0x1 -; CHECK-NEXT: and w11, w11, #0x1 -; CHECK-NEXT: and w12, w12, #0x1 -; CHECK-NEXT: bfi w9, w8, #1, #1 -; CHECK-NEXT: umov w16, v3.b[8] -; CHECK-NEXT: bfi w9, w10, #2, #1 -; CHECK-NEXT: and w13, w13, #0x1 -; CHECK-NEXT: umov w17, v3.b[9] -; CHECK-NEXT: bfi w9, w11, #3, #1 -; CHECK-NEXT: umov w8, v3.b[10] -; CHECK-NEXT: bfi w9, w12, #4, #1 -; CHECK-NEXT: and w14, w14, #0x1 -; CHECK-NEXT: umov w10, v3.b[11] -; CHECK-NEXT: bfi w9, w13, #5, #1 -; CHECK-NEXT: and w15, w15, #0x1 -; CHECK-NEXT: umov w11, v3.b[12] -; CHECK-NEXT: orr w9, w9, w14, lsl #6 -; CHECK-NEXT: and w16, w16, #0x1 -; CHECK-NEXT: umov w12, v3.b[13] -; CHECK-NEXT: orr w9, w9, w15, lsl #7 -; CHECK-NEXT: and w17, w17, #0x1 -; CHECK-NEXT: and w8, w8, #0x1 -; CHECK-NEXT: orr w9, w9, w16, lsl #8 -; CHECK-NEXT: umov w14, v1.b[1] -; CHECK-NEXT: and w10, w10, #0x1 -; CHECK-NEXT: orr w9, w9, w17, lsl #9 -; CHECK-NEXT: and w11, w11, #0x1 -; CHECK-NEXT: orr w8, w9, w8, lsl #10 -; CHECK-NEXT: umov w15, v1.b[0] -; CHECK-NEXT: and w9, w12, #0x1 -; CHECK-NEXT: orr w8, w8, w10, lsl #11 -; CHECK-NEXT: umov w10, v1.b[2] -; CHECK-NEXT: orr w8, w8, w11, lsl #12 -; CHECK-NEXT: umov w11, v1.b[3] -; CHECK-NEXT: orr w8, w8, w9, lsl #13 -; CHECK-NEXT: and w9, w14, #0x1 -; CHECK-NEXT: umov w12, v1.b[4] -; CHECK-NEXT: and w14, w15, #0x1 -; CHECK-NEXT: umov w15, v1.b[5] -; CHECK-NEXT: and w10, w10, #0x1 -; CHECK-NEXT: umov w13, v3.b[14] -; CHECK-NEXT: bfi w14, w9, #1, #1 -; CHECK-NEXT: and w9, w11, #0x1 -; CHECK-NEXT: and w11, w12, #0x1 -; CHECK-NEXT: umov w12, v1.b[6] -; CHECK-NEXT: bfi w14, w10, #2, #1 -; CHECK-NEXT: umov w10, v1.b[7] -; CHECK-NEXT: bfi w14, w9, #3, #1 -; CHECK-NEXT: and w9, w15, #0x1 -; CHECK-NEXT: bfi w14, w11, #4, #1 -; CHECK-NEXT: umov w15, v1.b[11] -; CHECK-NEXT: and w11, w12, #0x1 -; CHECK-NEXT: umov w12, v1.b[8] -; CHECK-NEXT: bfi w14, w9, #5, #1 -; CHECK-NEXT: umov w9, v1.b[9] -; CHECK-NEXT: orr w11, w14, w11, lsl #6 -; CHECK-NEXT: and w10, w10, #0x1 -; CHECK-NEXT: umov w14, v1.b[10] -; CHECK-NEXT: and w13, w13, #0x1 -; CHECK-NEXT: and w12, w12, #0x1 -; CHECK-NEXT: orr w10, w11, w10, lsl #7 -; CHECK-NEXT: and w9, w9, #0x1 -; CHECK-NEXT: umov w11, v1.b[12] -; CHECK-NEXT: orr w10, w10, w12, lsl #8 -; CHECK-NEXT: and w12, w15, #0x1 -; CHECK-NEXT: orr w9, w10, w9, lsl #9 -; CHECK-NEXT: and w10, w14, #0x1 -; CHECK-NEXT: orr w8, w8, w13, lsl #14 -; CHECK-NEXT: umov w13, v3.b[15] -; CHECK-NEXT: umov w14, v1.b[13] -; CHECK-NEXT: orr w9, w9, w10, lsl #10 -; CHECK-NEXT: and w10, w11, #0x1 -; CHECK-NEXT: orr w9, w9, w12, lsl #11 -; CHECK-NEXT: umov w12, v2.b[2] -; CHECK-NEXT: umov w15, v2.b[4] -; CHECK-NEXT: orr w9, w9, w10, lsl #12 -; CHECK-NEXT: umov w10, v2.b[1] -; CHECK-NEXT: orr w8, w8, w13, lsl #15 -; CHECK-NEXT: and w11, w14, #0x1 -; CHECK-NEXT: umov w13, v2.b[0] -; CHECK-NEXT: umov w14, v2.b[3] -; CHECK-NEXT: and w12, w12, #0x1 -; CHECK-NEXT: umov w16, v2.b[5] -; CHECK-NEXT: and w10, w10, #0x1 -; CHECK-NEXT: orr w9, w9, w11, lsl #13 -; CHECK-NEXT: umov w11, v1.b[14] -; CHECK-NEXT: and w13, w13, #0x1 -; CHECK-NEXT: and w14, w14, #0x1 -; CHECK-NEXT: bfi w13, w10, #1, #1 -; CHECK-NEXT: umov w10, v2.b[6] -; CHECK-NEXT: bfi w13, w12, #2, #1 -; CHECK-NEXT: and w12, w15, #0x1 -; CHECK-NEXT: bfi w13, w14, #3, #1 -; CHECK-NEXT: and w14, w16, #0x1 -; CHECK-NEXT: umov w15, v2.b[7] -; CHECK-NEXT: umov w16, v2.b[8] -; CHECK-NEXT: bfi w13, w12, #4, #1 -; CHECK-NEXT: and w10, w10, #0x1 -; CHECK-NEXT: bfi w13, w14, #5, #1 -; CHECK-NEXT: umov w12, v1.b[15] -; CHECK-NEXT: orr w10, w13, w10, lsl #6 -; CHECK-NEXT: and w11, w11, #0x1 -; CHECK-NEXT: and w13, w15, #0x1 -; CHECK-NEXT: umov w15, v2.b[9] -; CHECK-NEXT: and w14, w16, #0x1 -; CHECK-NEXT: umov w16, v2.b[10] +; CHECK-NEXT: orr v1.16b, v1.16b, v3.16b +; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b +; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: cmtst v0.16b, v0.16b, v0.16b -; CHECK-NEXT: orr w9, w9, w11, lsl #14 -; CHECK-NEXT: orr w10, w10, w13, lsl #7 -; CHECK-NEXT: orr w9, w9, w12, lsl #15 -; CHECK-NEXT: and w11, w15, #0x1 -; CHECK-NEXT: orr w10, w10, w14, lsl #8 -; CHECK-NEXT: umov w12, v0.b[1] -; CHECK-NEXT: and w13, w16, #0x1 -; CHECK-NEXT: umov w14, v0.b[0] -; CHECK-NEXT: orr w10, w10, w11, lsl #9 -; CHECK-NEXT: umov w11, v0.b[2] -; CHECK-NEXT: orr w10, w10, w13, lsl #10 -; CHECK-NEXT: umov w13, v0.b[3] -; CHECK-NEXT: umov w15, v0.b[4] -; CHECK-NEXT: and w12, w12, #0x1 -; CHECK-NEXT: umov w17, v0.b[5] -; CHECK-NEXT: and w14, w14, #0x1 -; CHECK-NEXT: umov w16, v2.b[11] -; CHECK-NEXT: and w11, w11, #0x1 -; CHECK-NEXT: orr w8, w9, w8 -; CHECK-NEXT: bfi w14, w12, #1, #1 -; CHECK-NEXT: and w12, w13, #0x1 -; CHECK-NEXT: and w13, w15, #0x1 -; CHECK-NEXT: umov w15, v0.b[6] -; CHECK-NEXT: bfi w14, w11, #2, #1 -; CHECK-NEXT: umov w11, v2.b[12] -; CHECK-NEXT: bfi w14, w12, #3, #1 -; CHECK-NEXT: and w12, w17, #0x1 -; CHECK-NEXT: bfi w14, w13, #4, #1 -; CHECK-NEXT: umov w13, v0.b[7] -; CHECK-NEXT: umov w17, v0.b[8] -; CHECK-NEXT: and w15, w15, #0x1 -; CHECK-NEXT: bfi w14, w12, #5, #1 -; CHECK-NEXT: and w16, w16, #0x1 -; CHECK-NEXT: orr w12, w14, w15, lsl #6 -; CHECK-NEXT: umov w14, v0.b[9] -; CHECK-NEXT: and w13, w13, #0x1 -; CHECK-NEXT: and w11, w11, #0x1 -; CHECK-NEXT: and w15, w17, #0x1 -; CHECK-NEXT: umov w17, v0.b[10] -; CHECK-NEXT: orr w10, w10, w16, lsl #11 -; CHECK-NEXT: orr w12, w12, w13, lsl #7 -; CHECK-NEXT: orr w10, w10, w11, lsl #12 -; CHECK-NEXT: orr w11, w12, w15, lsl #8 -; CHECK-NEXT: and w12, w14, #0x1 -; CHECK-NEXT: umov w13, v0.b[11] -; CHECK-NEXT: and w14, w17, #0x1 -; CHECK-NEXT: umov w15, v0.b[12] -; CHECK-NEXT: orr w11, w11, w12, lsl #9 -; CHECK-NEXT: umov w16, v2.b[13] -; CHECK-NEXT: orr w11, w11, w14, lsl #10 -; CHECK-NEXT: umov w14, v0.b[13] -; CHECK-NEXT: umov w12, v2.b[14] -; CHECK-NEXT: and w13, w13, #0x1 -; CHECK-NEXT: umov w17, v0.b[14] -; CHECK-NEXT: and w15, w15, #0x1 -; CHECK-NEXT: orr w11, w11, w13, lsl #11 -; CHECK-NEXT: and w16, w16, #0x1 -; CHECK-NEXT: umov w13, v2.b[15] -; CHECK-NEXT: orr w11, w11, w15, lsl #12 -; CHECK-NEXT: and w14, w14, #0x1 -; CHECK-NEXT: umov w15, v0.b[15] -; CHECK-NEXT: and w12, w12, #0x1 -; CHECK-NEXT: and w17, w17, #0x1 -; CHECK-NEXT: orr w10, w10, w16, lsl #13 -; CHECK-NEXT: orr w11, w11, w14, lsl #13 -; CHECK-NEXT: orr w10, w10, w12, lsl #14 -; CHECK-NEXT: orr w11, w11, w17, lsl #14 -; CHECK-NEXT: orr w10, w10, w13, lsl #15 -; CHECK-NEXT: orr w11, w11, w15, lsl #15 -; CHECK-NEXT: orr w9, w11, w10 -; CHECK-NEXT: orr w8, w9, w8 -; CHECK-NEXT: tst w8, #0xffff -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: umaxv b0, v0.16b +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret %cmp1 = icmp ne <64 x i8> %a, zeroinitializer %cast = bitcast <64 x i1> %cmp1 to i64