diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -928,6 +928,7 @@ SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N); SDValue WidenVecOp_EXTEND(SDNode *N); SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue WidenVecOp_INSERT_SUBVECTOR(SDNode *N); SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue WidenVecOp_STORE(SDNode* N); SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4520,6 +4520,7 @@ case ISD::BITCAST: Res = WidenVecOp_BITCAST(N); break; case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break; + case ISD::INSERT_SUBVECTOR: Res = WidenVecOp_INSERT_SUBVECTOR(N); break; case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break; case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::STORE: Res = WidenVecOp_STORE(N); break; @@ -4834,6 +4835,24 @@ return DAG.getBuildVector(VT, dl, Ops); } +SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) { + SDValue SubVec = N->getOperand(1); + SDValue InVec = N->getOperand(0); + + if (getTypeAction(InVec.getValueType()) == TargetLowering::TypeWidenVector) + InVec = GetWidenedVector(InVec); + + if (getTypeAction(SubVec.getValueType()) == TargetLowering::TypeWidenVector) + SubVec = GetWidenedVector(SubVec); + + if (SubVec.getValueType() == InVec.getValueType() && InVec.isUndef() && + N->getConstantOperandVal(2) == 0) + return SubVec; + + report_fatal_error("Don't know how to widen the operands for " + "INSERT_SUBVECTOR"); +} + SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) { SDValue InOp = GetWidenedVector(N->getOperand(0)); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), diff --git a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll --- a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll @@ -246,6 +246,33 @@ } +; Insert subvectors that need widening + +define @insert_nxv1i32_nxv4i32_undef() nounwind { +; CHECK-LABEL: insert_nxv1i32_nxv4i32_undef: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z0.s, #1 // =0x1 +; CHECK-NEXT: ret +entry: + %0 = insertelement undef, i32 1, i32 0 + %subvec = shufflevector %0, undef, zeroinitializer + %retval = call @llvm.experimental.vector.insert.nxv4i32.nxv1i32( undef, %subvec, i64 0) + ret %retval +} + +define @insert_nxv1i16_nxv6i16_undef() nounwind { +; CHECK-LABEL: insert_nxv1i16_nxv6i16_undef: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z0.h, #1 // =0x1 +; CHECK-NEXT: ret +entry: + %0 = insertelement undef, i16 1, i32 0 + %subvec = shufflevector %0, undef, zeroinitializer + %retval = call @llvm.experimental.vector.insert.nxv6i16.nxv1i16( undef, %subvec, i64 0) + ret %retval +} + + declare @llvm.experimental.vector.insert.nxv2i64.v2i64(, <2 x i64>, i64) declare @llvm.experimental.vector.insert.nxv4i32.v4i32(, <4 x i32>, i64) declare @llvm.experimental.vector.insert.nxv8i16.v8i16(, <8 x i16>, i64) @@ -253,3 +280,5 @@ declare @llvm.experimental.vector.insert.nxv8i64.nxv16i64(, , i64) declare @llvm.experimental.vector.insert.v2i64.nxv16i64(, <2 x i64>, i64) +declare @llvm.experimental.vector.insert.nxv4i32.nxv1i32(, , i64) +declare @llvm.experimental.vector.insert.nxv6i16.nxv1i16(, , i64) diff --git a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll --- a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll +++ b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll @@ -132,6 +132,16 @@ ;; Widen/split splats of wide vector types. +define @sve_splat_1xi32(i32 %val) { +; CHECK-LABEL: sve_splat_1xi32: +; CHECK: mov z0.s, w0 +; CHECK-NEXT: ret +entry: + %ins = insertelement undef, i32 %val, i32 0 + %splat = shufflevector %ins, undef, zeroinitializer + ret %splat +} + define @sve_splat_12xi32(i32 %val) { ; CHECK-LABEL: @sve_splat_12xi32 ; CHECK: mov z0.s, w0