diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -15561,6 +15561,18 @@ return SDValue(); } +static SDValue performUnpackCombine(SDNode *N, SelectionDAG &DAG) { + assert((N->getOpcode() == AArch64ISD::UUNPKHI || + N->getOpcode() == AArch64ISD::UUNPKLO) && + "Unexpected Opcode!"); + + // uunpklo/hi undef -> undef + if (N->getOperand(0).isUndef()) + return DAG.getUNDEF(N->getValueType(0)); + + return SDValue(); +} + static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG) { SDLoc DL(N); SDValue Op0 = N->getOperand(0); @@ -17227,6 +17239,9 @@ return performNVCASTCombine(N); case AArch64ISD::SPLICE: return performSpliceCombine(N, DAG); + case AArch64ISD::UUNPKLO: + case AArch64ISD::UUNPKHI: + return performUnpackCombine(N, DAG); case AArch64ISD::UZP1: return performUzpCombine(N, DAG); case AArch64ISD::SETCC_MERGE_ZERO: diff --git a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll --- a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll @@ -404,8 +404,7 @@ define @insert_nxv3i32_nxv2i32( %sv0) { ; CHECK-LABEL: insert_nxv3i32_nxv2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: uunpkhi z1.d, z0.s -; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: ret %v0 = call @llvm.experimental.vector.insert.nxv3i32.nxv2i32( undef, %sv0, i64 0) ret %v0 @@ -443,13 +442,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: uunpklo z1.d, z0.s -; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s ; CHECK-NEXT: st1w { z0.s }, p0, [sp] -; CHECK-NEXT: st1w { z1.d }, p1, [sp, #2, mul vl] -; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [sp, #1, mul vl] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload