diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -17052,12 +17052,55 @@ return performCONDCombine(N, DCI, DAG, 2, 3); } +// Try to re-use an already extended operand of a v16i8 SetCC feeding a +// extended select. Doing so avoids requiring another full extension of the +// SET_CC result when lowering the select. +static SDValue tryToWidenSetCCOperands(SDNode *Op, SelectionDAG &DAG) { + if (Op->getOperand(0).getValueType() != MVT::v16i8) + return SDValue(); + + if (any_of(Op->uses(), [](const SDNode *N) { + return N->getOpcode() != ISD::VSELECT || + N->getValueType(0) != MVT::v16i32; + })) + return SDValue(); + + APInt V; + if (!ISD::isConstantSplatVector(Op->getOperand(1).getNode(), V)) + return SDValue(); + + SDLoc DL(Op); + SDValue Op0ExtV; + SDValue Op1ExtV; + ISD::CondCode CC = cast(Op->getOperand(2))->get(); + // Check if the first operand of the SET_CC is already extended. If it is, + // split the SET_CC and re-use the extended version of the operand. + SDNode *Op0SExt = DAG.getNodeIfExists( + ISD::SIGN_EXTEND, DAG.getVTList(MVT::v16i32), Op->getOperand(0)); + SDNode *Op0ZExt = DAG.getNodeIfExists( + ISD::ZERO_EXTEND, DAG.getVTList(MVT::v16i32), Op->getOperand(0)); + if (Op0SExt && isSignedIntSetCC(CC)) { + Op0ExtV = SDValue(Op0SExt, 0); + Op1ExtV = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::v16i32, Op->getOperand(1)); + } else if (Op0ZExt && isUnsignedIntSetCC(CC)) { + Op0ExtV = SDValue(Op0ZExt, 0); + Op1ExtV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v16i32, Op->getOperand(1)); + } else + return SDValue(); + + return DAG.getNode(ISD::SETCC, DL, MVT::v16i1, Op0ExtV, Op1ExtV, + Op->getOperand(2)); +} + static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) { assert(N->getOpcode() == ISD::SETCC && "Unexpected opcode!"); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); ISD::CondCode Cond = cast(N->getOperand(2))->get(); + if (SDValue V = tryToWidenSetCCOperands(N, DAG)) + return V; + // setcc (csel 0, 1, cond, X), 1, ne ==> csel 0, 1, !cond, X if (Cond == ISD::SETNE && isOneConstant(RHS) && LHS->getOpcode() == AArch64ISD::CSEL && @@ -18738,7 +18781,6 @@ case ISD::VECREDUCE_UMIN: Results.push_back(LowerVECREDUCE(SDValue(N, 0), DAG)); return; - case ISD::CTPOP: if (SDValue Result = LowerCTPOP(SDValue(N, 0), DAG)) Results.push_back(Result); diff --git a/llvm/test/CodeGen/AArch64/vselect-ext.ll b/llvm/test/CodeGen/AArch64/vselect-ext.ll --- a/llvm/test/CodeGen/AArch64/vselect-ext.ll +++ b/llvm/test/CodeGen/AArch64/vselect-ext.ll @@ -85,24 +85,21 @@ define <16 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select(<16 x i8> %a) { ; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: movi.16b v1, #10 -; CHECK-NEXT: ushll.8h v2, v0, #0 -; CHECK-NEXT: ushll2.8h v3, v0, #0 -; CHECK-NEXT: ushll.4s v4, v2, #0 -; CHECK-NEXT: cmhi.16b v0, v0, v1 -; CHECK-NEXT: ushll.4s v5, v3, #0 -; CHECK-NEXT: ushll2.4s v1, v3, #0 -; CHECK-NEXT: sshll.8h v3, v0, #0 -; CHECK-NEXT: sshll2.8h v0, v0, #0 -; CHECK-NEXT: ushll2.4s v2, v2, #0 -; CHECK-NEXT: sshll.4s v6, v3, #0 -; CHECK-NEXT: sshll.4s v7, v0, #0 -; CHECK-NEXT: sshll2.4s v0, v0, #0 -; CHECK-NEXT: sshll2.4s v16, v3, #0 -; CHECK-NEXT: and.16b v3, v1, v0 -; CHECK-NEXT: and.16b v1, v2, v16 -; CHECK-NEXT: and.16b v2, v5, v7 -; CHECK-NEXT: and.16b v0, v4, v6 +; CHECK-NEXT: movi.4s v1, #10 +; CHECK-NEXT: ushll2.8h v2, v0, #0 +; CHECK-NEXT: ushll.8h v0, v0, #0 +; CHECK-NEXT: ushll2.4s v3, v2, #0 +; CHECK-NEXT: ushll2.4s v4, v0, #0 +; CHECK-NEXT: ushll.4s v0, v0, #0 +; CHECK-NEXT: ushll.4s v2, v2, #0 +; CHECK-NEXT: cmhi.4s v5, v0, v1 +; CHECK-NEXT: cmhi.4s v6, v2, v1 +; CHECK-NEXT: cmhi.4s v7, v3, v1 +; CHECK-NEXT: cmhi.4s v1, v4, v1 +; CHECK-NEXT: and.16b v3, v3, v7 +; CHECK-NEXT: and.16b v1, v4, v1 +; CHECK-NEXT: and.16b v2, v2, v6 +; CHECK-NEXT: and.16b v0, v0, v5 ; CHECK-NEXT: ret entry: %ext = zext <16 x i8> %a to <16 x i32> @@ -114,24 +111,21 @@ define <16 x i32> @same_sext_used_in_cmp_signed_pred_and_select(<16 x i8> %a) { ; CHECK-LABEL: same_sext_used_in_cmp_signed_pred_and_select: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: movi.16b v1, #10 -; CHECK-NEXT: sshll.8h v3, v0, #0 +; CHECK-NEXT: movi.4s v1, #10 ; CHECK-NEXT: sshll2.8h v2, v0, #0 -; CHECK-NEXT: cmgt.16b v0, v0, v1 -; CHECK-NEXT: ext.16b v1, v3, v3, #8 -; CHECK-NEXT: sshll.8h v5, v0, #0 -; CHECK-NEXT: sshll2.8h v0, v0, #0 -; CHECK-NEXT: ext.16b v4, v2, v2, #8 -; CHECK-NEXT: ext.16b v6, v5, v5, #8 -; CHECK-NEXT: ext.16b v7, v0, v0, #8 -; CHECK-NEXT: and.8b v0, v2, v0 -; CHECK-NEXT: sshll.4s v2, v0, #0 -; CHECK-NEXT: and.8b v0, v3, v5 -; CHECK-NEXT: and.8b v1, v1, v6 -; CHECK-NEXT: and.8b v3, v4, v7 +; CHECK-NEXT: sshll.8h v0, v0, #0 +; CHECK-NEXT: sshll2.4s v3, v2, #0 +; CHECK-NEXT: sshll2.4s v4, v0, #0 ; CHECK-NEXT: sshll.4s v0, v0, #0 -; CHECK-NEXT: sshll.4s v1, v1, #0 -; CHECK-NEXT: sshll.4s v3, v3, #0 +; CHECK-NEXT: sshll.4s v2, v2, #0 +; CHECK-NEXT: cmgt.4s v5, v0, v1 +; CHECK-NEXT: cmgt.4s v6, v2, v1 +; CHECK-NEXT: cmgt.4s v7, v3, v1 +; CHECK-NEXT: cmgt.4s v1, v4, v1 +; CHECK-NEXT: and.16b v3, v3, v7 +; CHECK-NEXT: and.16b v1, v4, v1 +; CHECK-NEXT: and.16b v2, v2, v6 +; CHECK-NEXT: and.16b v0, v0, v5 ; CHECK-NEXT: ret entry: %ext = sext <16 x i8> %a to <16 x i32>