diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -7810,13 +7810,15 @@ static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx, EVT VecVT, const SDLoc &dl, - unsigned NumSubElts) { - if (!VecVT.isScalableVector() && isa(Idx)) - return Idx; + ElementCount SubEC) { + assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) && + "Cannot index a scalable vector within a fixed-width vector"); - EVT IdxVT = Idx.getValueType(); unsigned NElts = VecVT.getVectorMinNumElements(); - if (VecVT.isScalableVector()) { + unsigned NumSubElts = SubEC.getKnownMinValue(); + EVT IdxVT = Idx.getValueType(); + + if (VecVT.isScalableVector() && !SubEC.isScalable()) { // If this is a constant index and we know the value plus the number of the // elements in the subvector minus one is less than the minimum number of // elements then it's safe to return Idx. @@ -7863,16 +7865,16 @@ unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size. assert(EltSize * 8 == EltVT.getFixedSizeInBits() && "Converting bits to bytes lost precision"); - - // Scalable vectors don't need clamping as these are checked at compile time - if (SubVecVT.isFixedLengthVector()) { - assert(SubVecVT.getVectorElementType() == EltVT && - "Sub-vector must be a fixed vector with matching element type"); - Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl, - SubVecVT.getVectorNumElements()); - } + assert(SubVecVT.getVectorElementType() == EltVT && + "Sub-vector must be a fixed vector with matching element type"); + Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl, + SubVecVT.getVectorElementCount()); EVT IdxVT = Index.getValueType(); + if (SubVecVT.isScalableVector()) + Index = + DAG.getNode(ISD::MUL, dl, IdxVT, Index, + DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1))); Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index, DAG.getConstant(EltSize, dl, IdxVT)); diff --git a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll --- a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll @@ -313,6 +313,24 @@ ret %r } +; Test that the index is scaled by vscale if the subvector is scalable. +define @insert_nxv8f16_nxv2f16( %vec, %in) nounwind { +; CHECK-LABEL: insert_nxv8f16_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: st1h { z0.h }, p0, [sp] +; CHECK-NEXT: st1h { z1.d }, p1, [x8, #1, mul vl] +; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp] +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %r = call @llvm.experimental.vector.insert.nxv8f16.nxv2f16( %vec, %in, i64 2) + ret %r +} ; Fixed length clamping @@ -379,3 +397,5 @@ declare @llvm.experimental.vector.insert.nxv6i16.nxv1i16(, , i64) declare @llvm.experimental.vector.insert.nxv8i16.nxv2i16(, , i64) + +declare @llvm.experimental.vector.insert.nxv8f16.nxv2f16(, , i64)