diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -18026,6 +18026,55 @@ return performCONDCombine(N, DCI, DAG, 2, 3); } +// Try to re-use an already extended operand of a vector SetCC feeding a +// extended select. Doing so avoids requiring another full extension of the +// SET_CC result when lowering the select. +static SDValue tryToWidenSetCCOperands(SDNode *Op, SelectionDAG &DAG) { + EVT Op0MVT = Op->getOperand(0).getValueType(); + if (!Op0MVT.isVector() || Op->use_empty()) + return SDValue(); + + // Make sure that all uses of Op are VSELECTs with result matching types where + // the result type has a larger element type than the SetCC operand. + SDNode *FirstUse = *Op->use_begin(); + if (FirstUse->getOpcode() != ISD::VSELECT) + return SDValue(); + EVT UseMVT = FirstUse->getValueType(0); + if (UseMVT.getScalarType().getScalarSizeInBits() <= + Op0MVT.getScalarType().getScalarSizeInBits()) + return SDValue(); + if (any_of(Op->uses(), [&UseMVT](const SDNode *N) { + return N->getOpcode() != ISD::VSELECT || N->getValueType(0) != UseMVT; + })) + return SDValue(); + + APInt V; + if (!ISD::isConstantSplatVector(Op->getOperand(1).getNode(), V)) + return SDValue(); + + SDLoc DL(Op); + SDValue Op0ExtV; + SDValue Op1ExtV; + ISD::CondCode CC = cast(Op->getOperand(2))->get(); + // Check if the first operand of the SET_CC is already extended. If it is, + // split the SET_CC and re-use the extended version of the operand. + SDNode *Op0SExt = DAG.getNodeIfExists(ISD::SIGN_EXTEND, DAG.getVTList(UseMVT), + Op->getOperand(0)); + SDNode *Op0ZExt = DAG.getNodeIfExists(ISD::ZERO_EXTEND, DAG.getVTList(UseMVT), + Op->getOperand(0)); + if (Op0SExt && isSignedIntSetCC(CC)) { + Op0ExtV = SDValue(Op0SExt, 0); + Op1ExtV = DAG.getNode(ISD::SIGN_EXTEND, DL, UseMVT, Op->getOperand(1)); + } else if (Op0ZExt && isUnsignedIntSetCC(CC)) { + Op0ExtV = SDValue(Op0ZExt, 0); + Op1ExtV = DAG.getNode(ISD::ZERO_EXTEND, DL, UseMVT, Op->getOperand(1)); + } else + return SDValue(); + + return DAG.getNode(ISD::SETCC, DL, UseMVT.changeVectorElementType(MVT::i1), + Op0ExtV, Op1ExtV, Op->getOperand(2)); +} + static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) { assert(N->getOpcode() == ISD::SETCC && "Unexpected opcode!"); SDValue LHS = N->getOperand(0); @@ -18034,6 +18083,9 @@ SDLoc DL(N); EVT VT = N->getValueType(0); + if (SDValue V = tryToWidenSetCCOperands(N, DAG)) + return V; + // setcc (csel 0, 1, cond, X), 1, ne ==> csel 0, 1, !cond, X if (Cond == ISD::SETNE && isOneConstant(RHS) && LHS->getOpcode() == AArch64ISD::CSEL && diff --git a/llvm/test/CodeGen/AArch64/vselect-ext.ll b/llvm/test/CodeGen/AArch64/vselect-ext.ll --- a/llvm/test/CodeGen/AArch64/vselect-ext.ll +++ b/llvm/test/CodeGen/AArch64/vselect-ext.ll @@ -85,26 +85,23 @@ define <8 x i64> @same_zext_used_in_cmp_unsigned_pred_and_select_v8i64(<8 x i8> %a) { ; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v8i64: ; CHECK: ; %bb.0: -; CHECK-NEXT: movi.8b v1, #10 -; CHECK-NEXT: ushll.8h v2, v0, #0 -; CHECK-NEXT: cmhi.8b v0, v0, v1 -; CHECK-NEXT: ushll.4s v1, v2, #0 -; CHECK-NEXT: sshll.8h v0, v0, #0 -; CHECK-NEXT: ushll2.4s v2, v2, #0 -; CHECK-NEXT: sshll.4s v3, v0, #0 -; CHECK-NEXT: sshll2.4s v0, v0, #0 -; CHECK-NEXT: ushll.2d v4, v1, #0 -; CHECK-NEXT: ushll.2d v5, v2, #0 -; CHECK-NEXT: ushll2.2d v1, v1, #0 -; CHECK-NEXT: ushll2.2d v2, v2, #0 -; CHECK-NEXT: sshll.2d v6, v3, #0 -; CHECK-NEXT: sshll.2d v7, v0, #0 -; CHECK-NEXT: sshll2.2d v0, v0, #0 -; CHECK-NEXT: sshll2.2d v16, v3, #0 -; CHECK-NEXT: and.16b v3, v2, v0 -; CHECK-NEXT: and.16b v1, v1, v16 -; CHECK-NEXT: and.16b v2, v5, v7 -; CHECK-NEXT: and.16b v0, v4, v6 +; CHECK-NEXT: ushll.8h v0, v0, #0 +; CHECK-NEXT: mov w8, #10 +; CHECK-NEXT: ushll2.4s v2, v0, #0 +; CHECK-NEXT: ushll.4s v0, v0, #0 +; CHECK-NEXT: dup.2d v1, x8 +; CHECK-NEXT: ushll2.2d v3, v2, #0 +; CHECK-NEXT: ushll2.2d v4, v0, #0 +; CHECK-NEXT: ushll.2d v0, v0, #0 +; CHECK-NEXT: ushll.2d v2, v2, #0 +; CHECK-NEXT: cmhi.2d v5, v0, v1 +; CHECK-NEXT: cmhi.2d v6, v2, v1 +; CHECK-NEXT: cmhi.2d v7, v3, v1 +; CHECK-NEXT: cmhi.2d v1, v4, v1 +; CHECK-NEXT: and.16b v3, v3, v7 +; CHECK-NEXT: and.16b v1, v4, v1 +; CHECK-NEXT: and.16b v2, v2, v6 +; CHECK-NEXT: and.16b v0, v0, v5 ; CHECK-NEXT: ret %ext = zext <8 x i8> %a to <8 x i64> %cmp = icmp ugt <8 x i8> %a, @@ -116,24 +113,21 @@ define <16 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v16i32(<16 x i8> %a) { ; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v16i32: ; CHECK: ; %bb.0: -; CHECK-NEXT: movi.16b v1, #10 -; CHECK-NEXT: ushll.8h v2, v0, #0 -; CHECK-NEXT: ushll2.8h v3, v0, #0 -; CHECK-NEXT: ushll.4s v4, v2, #0 -; CHECK-NEXT: cmhi.16b v0, v0, v1 -; CHECK-NEXT: ushll.4s v5, v3, #0 -; CHECK-NEXT: ushll2.4s v1, v3, #0 -; CHECK-NEXT: sshll.8h v3, v0, #0 -; CHECK-NEXT: sshll2.8h v0, v0, #0 -; CHECK-NEXT: ushll2.4s v2, v2, #0 -; CHECK-NEXT: sshll.4s v6, v3, #0 -; CHECK-NEXT: sshll.4s v7, v0, #0 -; CHECK-NEXT: sshll2.4s v0, v0, #0 -; CHECK-NEXT: sshll2.4s v16, v3, #0 -; CHECK-NEXT: and.16b v3, v1, v0 -; CHECK-NEXT: and.16b v1, v2, v16 -; CHECK-NEXT: and.16b v2, v5, v7 -; CHECK-NEXT: and.16b v0, v4, v6 +; CHECK-NEXT: movi.4s v1, #10 +; CHECK-NEXT: ushll2.8h v2, v0, #0 +; CHECK-NEXT: ushll.8h v0, v0, #0 +; CHECK-NEXT: ushll2.4s v3, v2, #0 +; CHECK-NEXT: ushll2.4s v4, v0, #0 +; CHECK-NEXT: ushll.4s v0, v0, #0 +; CHECK-NEXT: ushll.4s v2, v2, #0 +; CHECK-NEXT: cmhi.4s v5, v0, v1 +; CHECK-NEXT: cmhi.4s v6, v2, v1 +; CHECK-NEXT: cmhi.4s v7, v3, v1 +; CHECK-NEXT: cmhi.4s v1, v4, v1 +; CHECK-NEXT: and.16b v3, v3, v7 +; CHECK-NEXT: and.16b v1, v4, v1 +; CHECK-NEXT: and.16b v2, v2, v6 +; CHECK-NEXT: and.16b v0, v0, v5 ; CHECK-NEXT: ret %ext = zext <16 x i8> %a to <16 x i32> %cmp = icmp ugt <16 x i8> %a, @@ -144,16 +138,14 @@ define <8 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v8i32(<8 x i8> %a) { ; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v8i32: ; CHECK: ; %bb.0: -; CHECK-NEXT: movi.8b v1, #10 -; CHECK-NEXT: ushll.8h v2, v0, #0 -; CHECK-NEXT: ushll.4s v3, v2, #0 -; CHECK-NEXT: cmhi.8b v0, v0, v1 -; CHECK-NEXT: ushll2.4s v1, v2, #0 -; CHECK-NEXT: sshll.8h v0, v0, #0 -; CHECK-NEXT: sshll2.4s v2, v0, #0 -; CHECK-NEXT: sshll.4s v0, v0, #0 -; CHECK-NEXT: and.16b v1, v1, v2 -; CHECK-NEXT: and.16b v0, v3, v0 +; CHECK-NEXT: movi.4s v1, #10 +; CHECK-NEXT: ushll.8h v0, v0, #0 +; CHECK-NEXT: ushll2.4s v2, v0, #0 +; CHECK-NEXT: ushll.4s v0, v0, #0 +; CHECK-NEXT: cmhi.4s v3, v2, v1 +; CHECK-NEXT: cmhi.4s v4, v0, v1 +; CHECK-NEXT: and.16b v1, v2, v3 +; CHECK-NEXT: and.16b v0, v0, v4 ; CHECK-NEXT: ret %ext = zext <8 x i8> %a to <8 x i32> %cmp = icmp ugt <8 x i8> %a, @@ -174,14 +166,13 @@ ; check-next: ret ; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v8i32_2: ; CHECK: ; %bb.0: -; CHECK-NEXT: movi.8h v1, #10 -; CHECK-NEXT: ushll.4s v2, v0, #0 -; CHECK-NEXT: cmhi.8h v1, v0, v1 -; CHECK-NEXT: ushll2.4s v0, v0, #0 -; CHECK-NEXT: sshll2.4s v3, v1, #0 -; CHECK-NEXT: sshll.4s v4, v1, #0 -; CHECK-NEXT: and.16b v1, v0, v3 -; CHECK-NEXT: and.16b v0, v2, v4 +; CHECK-NEXT: movi.4s v1, #10 +; CHECK-NEXT: ushll2.4s v2, v0, #0 +; CHECK-NEXT: ushll.4s v0, v0, #0 +; CHECK-NEXT: cmhi.4s v3, v2, v1 +; CHECK-NEXT: cmhi.4s v4, v0, v1 +; CHECK-NEXT: and.16b v1, v2, v3 +; CHECK-NEXT: and.16b v0, v0, v4 ; CHECK-NEXT: ret %ext = zext <8 x i16> %a to <8 x i32> %cmp = icmp ugt <8 x i16> %a, @@ -237,11 +228,10 @@ ; check-next: ret ; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v4i32: ; CHECK: ; %bb.0: -; CHECK-NEXT: movi.4h v1, #10 -; CHECK-NEXT: ushll.4s v2, v0, #0 -; CHECK-NEXT: cmhi.4h v0, v0, v1 -; CHECK-NEXT: sshll.4s v0, v0, #0 -; CHECK-NEXT: and.16b v0, v2, v0 +; CHECK-NEXT: movi.4s v1, #10 +; CHECK-NEXT: ushll.4s v0, v0, #0 +; CHECK-NEXT: cmhi.4s v1, v0, v1 +; CHECK-NEXT: and.16b v0, v0, v1 ; CHECK-NEXT: ret %ext = zext <4 x i16> %a to <4 x i32> %cmp = icmp ugt <4 x i16> %a, @@ -334,24 +324,21 @@ define <16 x i32> @same_sext_used_in_cmp_signed_pred_and_select_v16i32(<16 x i8> %a) { ; CHECK-LABEL: same_sext_used_in_cmp_signed_pred_and_select_v16i32: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: movi.16b v1, #10 -; CHECK-NEXT: sshll.8h v3, v0, #0 +; CHECK-NEXT: movi.4s v1, #10 ; CHECK-NEXT: sshll2.8h v2, v0, #0 -; CHECK-NEXT: cmgt.16b v0, v0, v1 -; CHECK-NEXT: ext.16b v1, v3, v3, #8 -; CHECK-NEXT: sshll.8h v5, v0, #0 -; CHECK-NEXT: sshll2.8h v0, v0, #0 -; CHECK-NEXT: ext.16b v4, v2, v2, #8 -; CHECK-NEXT: ext.16b v6, v5, v5, #8 -; CHECK-NEXT: ext.16b v7, v0, v0, #8 -; CHECK-NEXT: and.8b v0, v2, v0 -; CHECK-NEXT: sshll.4s v2, v0, #0 -; CHECK-NEXT: and.8b v0, v3, v5 -; CHECK-NEXT: and.8b v1, v1, v6 -; CHECK-NEXT: and.8b v3, v4, v7 +; CHECK-NEXT: sshll.8h v0, v0, #0 +; CHECK-NEXT: sshll2.4s v3, v2, #0 +; CHECK-NEXT: sshll2.4s v4, v0, #0 ; CHECK-NEXT: sshll.4s v0, v0, #0 -; CHECK-NEXT: sshll.4s v1, v1, #0 -; CHECK-NEXT: sshll.4s v3, v3, #0 +; CHECK-NEXT: sshll.4s v2, v2, #0 +; CHECK-NEXT: cmgt.4s v5, v0, v1 +; CHECK-NEXT: cmgt.4s v6, v2, v1 +; CHECK-NEXT: cmgt.4s v7, v3, v1 +; CHECK-NEXT: cmgt.4s v1, v4, v1 +; CHECK-NEXT: and.16b v3, v3, v7 +; CHECK-NEXT: and.16b v1, v4, v1 +; CHECK-NEXT: and.16b v2, v2, v6 +; CHECK-NEXT: and.16b v0, v0, v5 ; CHECK-NEXT: ret entry: %ext = sext <16 x i8> %a to <16 x i32> @@ -363,14 +350,13 @@ define <8 x i32> @same_sext_used_in_cmp_signed_pred_and_select_v8i32(<8 x i16> %a) { ; CHECK-LABEL: same_sext_used_in_cmp_signed_pred_and_select_v8i32: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: movi.8h v1, #10 -; CHECK-NEXT: ext.16b v2, v0, v0, #8 -; CHECK-NEXT: cmgt.8h v1, v0, v1 -; CHECK-NEXT: ext.16b v3, v1, v1, #8 -; CHECK-NEXT: and.8b v0, v0, v1 +; CHECK-NEXT: movi.4s v1, #10 +; CHECK-NEXT: sshll2.4s v2, v0, #0 ; CHECK-NEXT: sshll.4s v0, v0, #0 -; CHECK-NEXT: and.8b v1, v2, v3 -; CHECK-NEXT: sshll.4s v1, v1, #0 +; CHECK-NEXT: cmgt.4s v3, v2, v1 +; CHECK-NEXT: cmgt.4s v4, v0, v1 +; CHECK-NEXT: and.16b v1, v2, v3 +; CHECK-NEXT: and.16b v0, v0, v4 ; CHECK-NEXT: ret entry: %ext = sext <8 x i16> %a to <8 x i32>