diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -908,6 +908,7 @@ SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N); SDValue WidenVecRes_EXTEND_VECTOR_INREG(SDNode* N); SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N); + SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N); SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); SDValue WidenVecRes_LOAD(SDNode* N); SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -3038,6 +3038,9 @@ case ISD::BITCAST: Res = WidenVecRes_BITCAST(N); break; case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break; case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break; + case ISD::INSERT_SUBVECTOR: + Res = WidenVecRes_INSERT_SUBVECTOR(N); + break; case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; @@ -4059,6 +4062,16 @@ return DAG.getBuildVector(WidenVT, dl, Ops); } +SDValue DAGTypeLegalizer::WidenVecRes_INSERT_SUBVECTOR(SDNode *N) { + EVT VT = N->getValueType(0); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = N->getOperand(1); + SDValue Idx = N->getOperand(2); + SDLoc dl(N); + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WidenVT, InOp1, InOp2, Idx); +} + SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { EVT VT = N->getValueType(0); EVT EltVT = VT.getVectorElementType(); diff --git a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll --- a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll @@ -399,3 +399,52 @@ declare @llvm.experimental.vector.insert.nxv8i16.nxv2i16(, , i64) declare @llvm.experimental.vector.insert.nxv8f16.nxv2f16(, , i64) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Upacked types that need results widening +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +define @insert_nxv3i32_nxv2i32( %sv0) { +; CHECK-LABEL: insert_nxv3i32_nxv2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z1.d, z0.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %v0 = call @llvm.experimental.vector.insert.nxv3i32.nxv2i32( undef, %sv0, i64 0) + ret %v0 +} + +define @insert_nxv6i32_nxv2i32( %sv0, %sv1) { +; CHECK-LABEL: insert_nxv6i32_nxv2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z1.d, z0.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %v0 = call @llvm.experimental.vector.insert.nxv6i32.nxv2i32( undef, %sv0, i64 0) + %v1 = call @llvm.experimental.vector.insert.nxv6i32.nxv2i32( %v0, %sv0, i64 2) + ret %v0 +} + +;; This only works because the input vector is undef and index is zero +define @insert_nxv6i32_nxv3i32( %sv0) { +; CHECK-LABEL: insert_nxv6i32_nxv3i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %v0 = call @llvm.experimental.vector.insert.nxv6i32.nxv3i32( undef, %sv0, i64 0) + ret %v0 +} + +define @insert_nxv12i32_nxv4i32( %sv0, %sv1, %sv2) { +; CHECK-LABEL: insert_nxv12i32_nxv4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ret + %v0 = call @llvm.experimental.vector.insert.nxv4i32.nxv12i32( undef, %sv0, i64 0) + %v1 = call @llvm.experimental.vector.insert.nxv4i32.nxv12i32( %v0, %sv1, i64 4) + %v2 = call @llvm.experimental.vector.insert.nxv4i32.nxv12i32( %v1, %sv2, i64 8) + ret %v2 +} + +declare @llvm.experimental.vector.insert.nxv3i32.nxv2i32(, , i64) +declare @llvm.experimental.vector.insert.nxv6i32.nxv2i32(, , i64) +declare @llvm.experimental.vector.insert.nxv6i32.nxv3i32(, , i64) +declare @llvm.experimental.vector.insert.nxv4i32.nxv12i32(, , i64)