diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -6241,13 +6241,34 @@ SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) { EVT VT = N->getValueType(0); SDValue SubVec = N->getOperand(1); + EVT SubVT = SubVec.getValueType(); SDValue InVec = N->getOperand(0); if (getTypeAction(SubVec.getValueType()) == TargetLowering::TypeWidenVector) SubVec = GetWidenedVector(SubVec); - if (SubVec.getValueType().knownBitsLE(VT) && InVec.isUndef() && - N->getConstantOperandVal(2) == 0) + // Whether or not all the elements of SubVec will be inserted into valid + // indices of VT. + bool IndicesValid = false; + // If we statically know that VT can fit SubVT, the indices are valid. + if (VT.knownBitsGE(SubVT)) + IndicesValid = true; + else if (VT.isScalableVector() && SubVT.isFixedLengthVector()) { + // Otherwise, if we're inserting a fixed vector into a scalable vector and + // we know the minimum vscale we can work out if it's valid ourselves. + Attribute Attr = DAG.getMachineFunction().getFunction().getFnAttribute( + Attribute::VScaleRange); + if (Attr.isValid()) { + unsigned VScaleMin = Attr.getVScaleRangeMin(); + if (VT.getSizeInBits().getKnownMinValue() * VScaleMin >= + SubVT.getFixedSizeInBits()) + IndicesValid = true; + } + } + + // We need to make sure that the indices are still valid, otherwise we might + // widen what was previously well-defined to something undefined. + if (IndicesValid && InVec.isUndef() && N->getConstantOperandVal(2) == 0) return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, InVec, SubVec, N->getOperand(2)); diff --git a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll --- a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll @@ -495,6 +495,18 @@ ret void } +; We should be able to widen the <3 x i64> subvector to a <4 x i64> here because +; we know that the minimum vscale is 2 +define @insert_nxv2i64_nxv3i64(<3 x i64> %sv) #0 { +; CHECK-LABEL: insert_nxv2i64_nxv3i64: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %vec = call @llvm.vector.insert.nxv2i64.v3i64( undef, <3 x i64> %sv, i64 0) + ret %vec +} + +attributes #0 = { vscale_range(2,1024) } + declare @llvm.vector.insert.nxv1i1.nxv4i1(, , i64) declare @llvm.vector.insert.nxv8i1.nxv32i1(, , i64) @@ -512,3 +524,5 @@ declare @llvm.vector.insert.nxv2i32.nxv16i32(, , i64 %idx) declare @llvm.vector.insert.nxv4i32.nxv16i32(, , i64 %idx) declare @llvm.vector.insert.nxv8i32.nxv16i32(, , i64 %idx) + +declare @llvm.vector.insert.nxv2i64.v3i64(, <3 x i64>, i64 %idx)