diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -5786,18 +5786,17 @@ } SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) { + EVT VT = N->getValueType(0); SDValue SubVec = N->getOperand(1); SDValue InVec = N->getOperand(0); - if (getTypeAction(InVec.getValueType()) == TargetLowering::TypeWidenVector) - InVec = GetWidenedVector(InVec); - if (getTypeAction(SubVec.getValueType()) == TargetLowering::TypeWidenVector) SubVec = GetWidenedVector(SubVec); - if (SubVec.getValueType() == InVec.getValueType() && InVec.isUndef() && + if (SubVec.getValueType().knownBitsLE(VT) && InVec.isUndef() && N->getConstantOperandVal(2) == 0) - return SubVec; + return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, InVec, SubVec, + N->getOperand(2)); report_fatal_error("Don't know how to widen the operands for " "INSERT_SUBVECTOR"); diff --git a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll --- a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll @@ -285,6 +285,16 @@ ret %retval } +define @insert_nxv1f32_nxv4f32_undef( %subvec) nounwind { +; CHECK-LABEL: insert_nxv1f32_nxv4f32_undef: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s +; CHECK-NEXT: ret +entry: + %retval = call @llvm.experimental.vector.insert.nxv4f32.nxv1f32( undef, %subvec, i64 0) + ret %retval +} + ; This tests promotion of the input operand to INSERT_SUBVECTOR. define @insert_nxv8i16_nxv2i16( %vec, %in) nounwind { ; CHECK-LABEL: insert_nxv8i16_nxv2i16: @@ -731,6 +741,7 @@ declare @llvm.experimental.vector.insert.nxv8f16.nxv4f16(, , i64) declare @llvm.experimental.vector.insert.nxv3f32.nxv2f32(, , i64) +declare @llvm.experimental.vector.insert.nxv4f32.nxv1f32(, , i64) declare @llvm.experimental.vector.insert.nxv4f32.nxv2f32(, , i64) declare @llvm.experimental.vector.insert.nxv2i1.v8i1(, <8 x i1>, i64)