Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -18193,6 +18193,21 @@ } } + // setcc (iN (bitcast (vNi1 X))), 0, eq ==> vecreduce_or (setcc X, 0, eq) + // where N = (8, 16). + if (Cond == ISD::SETEQ && isNullConstant(RHS) && + LHS->getOpcode() == ISD::BITCAST) { + MVT BitcastVT = LHS->getSimpleValueType(0); + MVT OrigVT = LHS->getOperand(0).getSimpleValueType(); + if ((BitcastVT == MVT::i8 && OrigVT == MVT::v8i1) || + (BitcastVT == MVT::i16 && OrigVT == MVT::v16i1)) { + auto SetCC = + DAG.getNode(ISD::SETCC, DL, OrigVT, LHS->getOperand(0), + DAG.getConstant(0, DL, OrigVT), N->getOperand(2)); + return DAG.getNode(ISD::VECREDUCE_OR, DL, MVT::i1, SetCC); + } + } + return SDValue(); } Index: llvm/test/CodeGen/AArch64/dag-combine-setcc.ll =================================================================== --- llvm/test/CodeGen/AArch64/dag-combine-setcc.ll +++ llvm/test/CodeGen/AArch64/dag-combine-setcc.ll @@ -4,35 +4,9 @@ define <8 x i1> @v8i1(<8 x i1> %a) { ; CHECK-LABEL: v8i1: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: umov w8, v0.b[1] -; CHECK-NEXT: umov w10, v0.b[2] -; CHECK-NEXT: umov w9, v0.b[0] -; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] -; CHECK-NEXT: umov w13, v0.b[5] -; CHECK-NEXT: umov w14, v0.b[6] -; CHECK-NEXT: and w8, w8, #0x1 -; CHECK-NEXT: and w10, w10, #0x1 -; CHECK-NEXT: and w9, w9, #0x1 -; CHECK-NEXT: and w11, w11, #0x1 -; CHECK-NEXT: bfi w9, w8, #1, #1 -; CHECK-NEXT: and w8, w12, #0x1 -; CHECK-NEXT: bfi w9, w10, #2, #1 -; CHECK-NEXT: and w10, w13, #0x1 -; CHECK-NEXT: bfi w9, w11, #3, #1 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: bfi w9, w8, #4, #1 -; CHECK-NEXT: and w8, w14, #0x1 -; CHECK-NEXT: bfi w9, w10, #5, #1 -; CHECK-NEXT: orr w8, w9, w8, lsl #6 -; CHECK-NEXT: orr w8, w8, w11, lsl #7 -; CHECK-NEXT: tst w8, #0xff -; CHECK-NEXT: cset w8, eq -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: movi v1.8b, #1 +; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b +; CHECK-NEXT: umaxv b0, v0.8b ; CHECK-NEXT: ret %cast = bitcast <8 x i1> %a to i8 %cmp = icmp eq i8 %cast, zeroinitializer @@ -43,58 +17,9 @@ define <16 x i1> @v16i1(<16 x i1> %a) { ; CHECK-LABEL: v16i1: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: umov w8, v0.b[1] -; CHECK-NEXT: umov w10, v0.b[2] -; CHECK-NEXT: umov w9, v0.b[0] -; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] -; CHECK-NEXT: umov w13, v0.b[5] -; CHECK-NEXT: and w8, w8, #0x1 -; CHECK-NEXT: and w10, w10, #0x1 -; CHECK-NEXT: and w9, w9, #0x1 -; CHECK-NEXT: and w11, w11, #0x1 -; CHECK-NEXT: and w12, w12, #0x1 -; CHECK-NEXT: and w13, w13, #0x1 -; CHECK-NEXT: bfi w9, w8, #1, #1 -; CHECK-NEXT: umov w8, v0.b[6] -; CHECK-NEXT: bfi w9, w10, #2, #1 -; CHECK-NEXT: umov w10, v0.b[7] -; CHECK-NEXT: bfi w9, w11, #3, #1 -; CHECK-NEXT: umov w11, v0.b[8] -; CHECK-NEXT: bfi w9, w12, #4, #1 -; CHECK-NEXT: umov w12, v0.b[9] -; CHECK-NEXT: and w8, w8, #0x1 -; CHECK-NEXT: bfi w9, w13, #5, #1 -; CHECK-NEXT: umov w13, v0.b[10] -; CHECK-NEXT: and w10, w10, #0x1 -; CHECK-NEXT: orr w8, w9, w8, lsl #6 -; CHECK-NEXT: umov w9, v0.b[11] -; CHECK-NEXT: and w11, w11, #0x1 -; CHECK-NEXT: orr w8, w8, w10, lsl #7 -; CHECK-NEXT: umov w10, v0.b[12] -; CHECK-NEXT: and w12, w12, #0x1 -; CHECK-NEXT: orr w8, w8, w11, lsl #8 -; CHECK-NEXT: umov w11, v0.b[13] -; CHECK-NEXT: and w13, w13, #0x1 -; CHECK-NEXT: orr w8, w8, w12, lsl #9 -; CHECK-NEXT: umov w12, v0.b[14] -; CHECK-NEXT: and w9, w9, #0x1 -; CHECK-NEXT: orr w8, w8, w13, lsl #10 -; CHECK-NEXT: and w10, w10, #0x1 -; CHECK-NEXT: orr w8, w8, w9, lsl #11 -; CHECK-NEXT: and w9, w11, #0x1 -; CHECK-NEXT: umov w11, v0.b[15] -; CHECK-NEXT: orr w8, w8, w10, lsl #12 -; CHECK-NEXT: and w10, w12, #0x1 -; CHECK-NEXT: orr w8, w8, w9, lsl #13 -; CHECK-NEXT: orr w8, w8, w10, lsl #14 -; CHECK-NEXT: orr w8, w8, w11, lsl #15 -; CHECK-NEXT: tst w8, #0xffff -; CHECK-NEXT: cset w8, eq -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: movi v1.16b, #1 +; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b +; CHECK-NEXT: umaxv b0, v0.16b ; CHECK-NEXT: ret %cast = bitcast <16 x i1> %a to i16 %cmp = icmp eq i16 %cast, zeroinitializer