diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -17061,6 +17061,39 @@ return performCONDCombine(N, DCI, DAG, 2, 3); } +// If there a node zero extending V to v16i32 either through a explicit +// ZERO_EXTEND node or implicitly through a shuffle and cast, return the node of +// the extension result. Otherwise return nullptr. +static SDNode *getZExtOrEquivalentShuffleCast(SDValue V, SelectionDAG &DAG) { + if (auto *ZExt = + DAG.getNodeIfExists(ISD::ZERO_EXTEND, DAG.getVTList(MVT::v16i32), V)) + return ZExt; + + // Check if there's a sequence of nodes implicitly zero extending V. That is + // v = CONCAT_VECTORS V, undef, undef, undef + // mask = BUILD_VECTOR <64, 64, 64, 0, 64, 64, 64,1,....> + // v2 = VECTOR_SHUFFLE mask, v, <0, undef, undef, .....> + // res = BIT_CAST v2 to v16i32 + auto Und = DAG.getUNDEF(MVT::v16i8); + auto *Concat = DAG.getNodeIfExists( + ISD::CONCAT_VECTORS, DAG.getVTList(MVT::v64i8), {V, Und, Und, Und}); + if (!Concat) + return nullptr; + + SmallVector NewMask(64, 64); + for (unsigned i = 0; i < 16; i++) + NewMask[i * 4 + 3] = i; + + SDValue Undef = DAG.getUNDEF(MVT::i8); + SmallVector Constants(64, Undef); + Constants[0] = DAG.getConstant(0, SDLoc(), MVT::i8); + SDValue ZeroSplat = + DAG.getNode(ISD::BUILD_VECTOR, SDLoc(), MVT::v64i8, Constants); + auto Shuffle = DAG.getVectorShuffle(MVT::v64i8, SDLoc(), SDValue(Concat, 0), + ZeroSplat, NewMask); + return DAG.getNodeIfExists(ISD::BITCAST, DAG.getVTList(MVT::v16i32), Shuffle); +} + // Try to re-use an already extended operand of a v16i8 SetCC feeding a // extended select. Doing so avoids requiring another full extension of the // SET_CC result when lowering the select. @@ -17086,8 +17119,7 @@ // split the SET_CC and re-use the extended version of the operand. SDNode *Op0SExt = DAG.getNodeIfExists( ISD::SIGN_EXTEND, DAG.getVTList(MVT::v16i32), Op->getOperand(0)); - SDNode *Op0ZExt = DAG.getNodeIfExists( - ISD::ZERO_EXTEND, DAG.getVTList(MVT::v16i32), Op->getOperand(0)); + SDNode *Op0ZExt = getZExtOrEquivalentShuffleCast(Op->getOperand(0), DAG); if (Op0SExt && isSignedIntSetCC(CC)) { Op0ExtV = SDValue(Op0SExt, 0); Op1ExtV = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::v16i32, Op->getOperand(1)); diff --git a/llvm/test/CodeGen/AArch64/vselect-ext.ll b/llvm/test/CodeGen/AArch64/vselect-ext.ll --- a/llvm/test/CodeGen/AArch64/vselect-ext.ll +++ b/llvm/test/CodeGen/AArch64/vselect-ext.ll @@ -200,7 +200,7 @@ ; CHECK-NEXT: adrp x11, lCPI7_2@PAGE ; CHECK-NEXT: Lloh3: ; CHECK-NEXT: adrp x12, lCPI7_3@PAGE -; CHECK-NEXT: movi.2d v1, #0xffffffffffffffff +; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff ; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: movi.2d v3, #0000000000000000 ; CHECK-NEXT: Lloh4: @@ -216,23 +216,20 @@ ; CHECK-NEXT: ldr q4, [x0, x8] ; CHECK-NEXT: add x8, x8, #16 ; CHECK-NEXT: cmp x8, #128 -; CHECK-NEXT: cmgt.16b v7, v4, v1 -; CHECK-NEXT: tbl.16b v16, { v3, v4 }, v0 -; CHECK-NEXT: tbl.16b v17, { v3, v4 }, v2 -; CHECK-NEXT: sshll2.8h v20, v7, #0 -; CHECK-NEXT: tbl.16b v18, { v3, v4 }, v5 -; CHECK-NEXT: sshll2.4s v21, v20, #0 -; CHECK-NEXT: sshll.4s v20, v20, #0 -; CHECK-NEXT: tbl.16b v19, { v3, v4 }, v6 -; CHECK-NEXT: sshll.8h v7, v7, #0 -; CHECK-NEXT: and.16b v16, v16, v21 -; CHECK-NEXT: and.16b v17, v17, v20 -; CHECK-NEXT: stp q17, q16, [x1, #32] -; CHECK-NEXT: sshll2.4s v16, v7, #0 -; CHECK-NEXT: sshll.4s v7, v7, #0 -; CHECK-NEXT: and.16b v16, v18, v16 -; CHECK-NEXT: and.16b v7, v19, v7 -; CHECK-NEXT: stp q7, q16, [x1], #64 +; CHECK-NEXT: tbl.16b v7, { v3, v4 }, v0 +; CHECK-NEXT: tbl.16b v16, { v3, v4 }, v2 +; CHECK-NEXT: tbl.16b v17, { v3, v4 }, v5 +; CHECK-NEXT: tbl.16b v18, { v3, v4 }, v6 +; CHECK-NEXT: cmhi.4s v19, v1, v7 +; CHECK-NEXT: cmhi.4s v20, v1, v16 +; CHECK-NEXT: cmhi.4s v21, v1, v17 +; CHECK-NEXT: cmhi.4s v22, v1, v18 +; CHECK-NEXT: and.16b v7, v7, v19 +; CHECK-NEXT: and.16b v16, v16, v20 +; CHECK-NEXT: stp q16, q7, [x1, #32] +; CHECK-NEXT: and.16b v7, v17, v21 +; CHECK-NEXT: and.16b v16, v18, v22 +; CHECK-NEXT: stp q16, q7, [x1], #64 ; CHECK-NEXT: b.ne LBB7_1 ; CHECK-NEXT: ; %bb.2: ; %exit ; CHECK-NEXT: ret @@ -273,7 +270,7 @@ ; CHECK-NEXT: adrp x11, lCPI8_2@PAGE ; CHECK-NEXT: Lloh11: ; CHECK-NEXT: adrp x12, lCPI8_3@PAGE -; CHECK-NEXT: movi.2d v1, #0xffffffffffffffff +; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff ; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: movi.2d v3, #0000000000000000 ; CHECK-NEXT: Lloh12: @@ -289,23 +286,20 @@ ; CHECK-NEXT: ldr q4, [x0, x8] ; CHECK-NEXT: add x8, x8, #16 ; CHECK-NEXT: cmp x8, #128 -; CHECK-NEXT: cmgt.16b v7, v4, v1 -; CHECK-NEXT: tbl.16b v16, { v3, v4 }, v0 -; CHECK-NEXT: tbl.16b v17, { v3, v4 }, v2 -; CHECK-NEXT: sshll2.8h v20, v7, #0 -; CHECK-NEXT: tbl.16b v18, { v3, v4 }, v5 -; CHECK-NEXT: sshll2.4s v21, v20, #0 -; CHECK-NEXT: sshll.4s v20, v20, #0 -; CHECK-NEXT: tbl.16b v19, { v3, v4 }, v6 -; CHECK-NEXT: sshll.8h v7, v7, #0 -; CHECK-NEXT: and.16b v16, v16, v21 -; CHECK-NEXT: and.16b v17, v17, v20 -; CHECK-NEXT: stp q17, q16, [x1, #32] -; CHECK-NEXT: sshll2.4s v16, v7, #0 -; CHECK-NEXT: sshll.4s v7, v7, #0 -; CHECK-NEXT: and.16b v16, v18, v16 -; CHECK-NEXT: and.16b v7, v19, v7 -; CHECK-NEXT: stp q7, q16, [x1], #64 +; CHECK-NEXT: tbl.16b v7, { v3, v4 }, v0 +; CHECK-NEXT: tbl.16b v16, { v3, v4 }, v2 +; CHECK-NEXT: tbl.16b v17, { v3, v4 }, v5 +; CHECK-NEXT: tbl.16b v18, { v3, v4 }, v6 +; CHECK-NEXT: cmhi.4s v19, v1, v7 +; CHECK-NEXT: cmhi.4s v20, v1, v16 +; CHECK-NEXT: cmhi.4s v21, v1, v17 +; CHECK-NEXT: cmhi.4s v22, v1, v18 +; CHECK-NEXT: and.16b v7, v7, v19 +; CHECK-NEXT: and.16b v16, v16, v20 +; CHECK-NEXT: stp q16, q7, [x1, #32] +; CHECK-NEXT: and.16b v7, v17, v21 +; CHECK-NEXT: and.16b v16, v18, v22 +; CHECK-NEXT: stp q16, q7, [x1], #64 ; CHECK-NEXT: b.ne LBB8_1 ; CHECK-NEXT: ; %bb.2: ; %exit ; CHECK-NEXT: ret