Index: llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -928,6 +928,7 @@ SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N); SDValue WidenVecOp_EXTEND(SDNode *N); SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue WidenVecOp_INSERT_SUBVECTOR(SDNode *N); SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue WidenVecOp_STORE(SDNode* N); SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo); Index: llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4520,6 +4520,7 @@ case ISD::BITCAST: Res = WidenVecOp_BITCAST(N); break; case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break; + case ISD::INSERT_SUBVECTOR: Res = WidenVecOp_INSERT_SUBVECTOR(N); break; case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break; case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::STORE: Res = WidenVecOp_STORE(N); break; @@ -4834,6 +4835,24 @@ return DAG.getBuildVector(VT, dl, Ops); } +SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) { + SDValue SubVec = N->getOperand(1); + SDValue InVec = N->getOperand(0); + + if (getTypeAction(InVec.getValueType()) == TargetLowering::TypeWidenVector) + InVec = GetWidenedVector(InVec); + + if (getTypeAction(SubVec.getValueType()) == TargetLowering::TypeWidenVector) + SubVec = GetWidenedVector(SubVec); + + if (SubVec.getValueType() == InVec.getValueType() && InVec.isUndef() && + N->getConstantOperandVal(2) == 0) + return SubVec; + + report_fatal_error("Don't know how to widen the operands for " + "INSERT_SUBVECTOR"); +} + SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) { SDValue InOp = GetWidenedVector(N->getOperand(0)); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), Index: llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll +++ llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll @@ -187,6 +187,17 @@ ret double %x0 } +define @foo7() { +; CHECK-LABEL: foo7: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z0.s, #1 // =0x1 +; CHECK-NEXT: ret +entry: + %0 = insertelement undef, i32 1, i32 0 + %1 = shufflevector %0, undef, zeroinitializer + ret %1 +} + declare float @callee1(float, , , ) declare float @callee2(i32, i32, i32, i32, i32, i32, i32, i32, float, , ) declare float @callee3(float, float, , , ) Index: llvm/test/CodeGen/AArch64/sve-insert-vector.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-insert-vector.ll +++ llvm/test/CodeGen/AArch64/sve-insert-vector.ll @@ -246,6 +246,33 @@ } +; Insert subvectors that need widening + +define @insert_nxv1i32_nxv4i32_undef() nounwind { +; CHECK-LABEL: insert_nxv1i32_nxv4i32_undef: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z0.s, #1 // =0x1 +; CHECK-NEXT: ret +entry: + %0 = insertelement undef, i32 1, i32 0 + %subvec = shufflevector %0, undef, zeroinitializer + %retval = call @llvm.experimental.vector.insert.nxv4i32.nxv1i32( undef, %subvec, i64 0) + ret %retval +} + +define @insert_nxv1i16_nxv6i16_undef() nounwind { +; CHECK-LABEL: insert_nxv1i16_nxv6i16_undef: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z0.h, #1 // =0x1 +; CHECK-NEXT: ret +entry: + %0 = insertelement undef, i16 1, i32 0 + %subvec = shufflevector %0, undef, zeroinitializer + %retval = call @llvm.experimental.vector.insert.nxv6i16.nxv1i16( undef, %subvec, i64 0) + ret %retval +} + + declare @llvm.experimental.vector.insert.nxv2i64.v2i64(, <2 x i64>, i64) declare @llvm.experimental.vector.insert.nxv4i32.v4i32(, <4 x i32>, i64) declare @llvm.experimental.vector.insert.nxv8i16.v8i16(, <8 x i16>, i64) @@ -253,3 +280,5 @@ declare @llvm.experimental.vector.insert.nxv8i64.nxv16i64(, , i64) declare @llvm.experimental.vector.insert.v2i64.nxv16i64(, <2 x i64>, i64) +declare @llvm.experimental.vector.insert.nxv4i32.nxv1i32(, , i64) +declare @llvm.experimental.vector.insert.nxv6i16.nxv1i16(, , i64)