diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1273,20 +1273,24 @@ EVT VecVT = Vec.getValueType(); EVT LoVT = Lo.getValueType(); - unsigned VecElems = VecVT.getVectorNumElements(); - unsigned SubElems = SubVec.getValueType().getVectorNumElements(); - unsigned LoElems = LoVT.getVectorNumElements(); + EVT SubVecVT = SubVec.getValueType(); + unsigned VecElems = VecVT.getVectorMinNumElements(); + unsigned SubElems = SubVecVT.getVectorMinNumElements(); + unsigned LoElems = LoVT.getVectorMinNumElements(); // If we know the index is in the first half, and we know the subvector // doesn't cross the boundary between the halves, we can avoid spilling the // vector, and insert into the lower half of the split vector directly. - // Similarly if the subvector is fully in the high half. unsigned IdxVal = cast(Idx)->getZExtValue(); if (IdxVal + SubElems <= LoElems) { Lo = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, LoVT, Lo, SubVec, Idx); return; } - if (IdxVal >= LoElems && IdxVal + SubElems <= VecElems) { + // Similarly if the subvector is fully in the high half, but mind that we + // can't tell whether a fixed-length subvector is fully within the high half + // of a scalable vector. + if (VecVT.isScalableVector() == SubVecVT.isScalableVector() && + IdxVal >= LoElems && IdxVal + SubElems <= VecElems) { Hi = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, Hi.getValueType(), Hi, SubVec, DAG.getVectorIdxConstant(IdxVal - LoElems, dl)); return; @@ -1315,13 +1319,12 @@ SmallestAlign); // Increment the pointer to the other part. - unsigned IncrementSize = Lo.getValueSizeInBits() / 8; - StackPtr = - DAG.getMemBasePlusOffset(StackPtr, TypeSize::Fixed(IncrementSize), dl); + auto *Load = cast(Lo); + MachinePointerInfo MPI = Load->getPointerInfo(); + IncrementPointer(Load, LoVT, MPI, StackPtr); // Load the Hi part from the stack slot. - Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, - PtrInfo.getWithOffset(IncrementSize), SmallestAlign); + Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MPI, SmallestAlign); } void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2524,6 +2524,9 @@ case ISD::EXTRACT_SUBVECTOR: { // Offset the demanded elts by the subvector index. SDValue Src = V.getOperand(0); + // We don't support scalable vectors at the moment. + if (Src.getValueType().isScalableVector()) + return false; uint64_t Idx = V.getConstantOperandVal(1); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); APInt UndefSrcElts; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll @@ -553,6 +553,99 @@ ret %c } +declare @llvm.experimental.vector.insert.v2i64.nxv16i64(, <2 x i64>, i64) + +define void @insert_v2i64_nxv16i64(<2 x i64>* %psv0, <2 x i64>* %psv1, * %out) { +; CHECK-LABEL: insert_v2i64_nxv16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a3, 2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vle64.v v16, (a1) +; CHECK-NEXT: vsetivli a0, 6, e64,m8,tu,mu +; CHECK-NEXT: vslideup.vi v8, v16, 4 +; CHECK-NEXT: vs8r.v v8, (a2) +; CHECK-NEXT: ret + %sv0 = load <2 x i64>, <2 x i64>* %psv0 + %sv1 = load <2 x i64>, <2 x i64>* %psv1 + %v0 = call @llvm.experimental.vector.insert.v2i64.nxv16i64( undef, <2 x i64> %sv0, i64 0) + %v = call @llvm.experimental.vector.insert.v2i64.nxv16i64( %v0, <2 x i64> %sv1, i64 4) + store %v, * %out + ret void +} + +define void @insert_v2i64_nxv16i64_lo0(<2 x i64>* %psv, * %out) { +; CHECK-LABEL: insert_v2i64_nxv16i64_lo0: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a2, 2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vs8r.v v8, (a1) +; CHECK-NEXT: ret + %sv = load <2 x i64>, <2 x i64>* %psv + %v = call @llvm.experimental.vector.insert.v2i64.nxv16i64( undef, <2 x i64> %sv, i64 0) + store %v, * %out + ret void +} + +define void @insert_v2i64_nxv16i64_lo2(<2 x i64>* %psv, * %out) { +; CHECK-LABEL: insert_v2i64_nxv16i64_lo2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a2, 2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vsetivli a0, 4, e64,m8,ta,mu +; CHECK-NEXT: vslideup.vi v16, v8, 2 +; CHECK-NEXT: vs8r.v v16, (a1) +; CHECK-NEXT: ret + %sv = load <2 x i64>, <2 x i64>* %psv + %v = call @llvm.experimental.vector.insert.v2i64.nxv16i64( undef, <2 x i64> %sv, i64 2) + store %v, * %out + ret void +} + +; Check we don't mistakenly optimize this: we don't know whether this is +; inserted into the low or high split vector. +define void @insert_v2i64_nxv16i64_hi(<2 x i64>* %psv, * %out) { +; CHECK-LABEL: insert_v2i64_nxv16i64_hi: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: vsetivli a2, 2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: slli a2, a0, 4 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: addi a3, zero, 8 +; CHECK-NEXT: bltu a2, a3, .LBB29_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: .LBB29_2: +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: vsetivli a4, 2, e64,m1,ta,mu +; CHECK-NEXT: vse64.v v25, (a2) +; CHECK-NEXT: slli a0, a0, 6 +; CHECK-NEXT: add a2, a3, a0 +; CHECK-NEXT: vl8re64.v v8, (a2) +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vl8re64.v v16, (a2) +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: vs8r.v v8, (a0) +; CHECK-NEXT: vs8r.v v16, (a1) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %sv = load <2 x i64>, <2 x i64>* %psv + %v = call @llvm.experimental.vector.insert.v2i64.nxv16i64( undef, <2 x i64> %sv, i64 8) + store %v, * %out + ret void +} + declare <8 x i1> @llvm.experimental.vector.insert.v4i1.v8i1(<8 x i1>, <4 x i1>, i64) declare <32 x i1> @llvm.experimental.vector.insert.v8i1.v32i1(<32 x i1>, <8 x i1>, i64) diff --git a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll --- a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll @@ -458,6 +458,46 @@ ret %vec } +declare @llvm.experimental.vector.insert.nxv8i64.nxv16i64(, , i64) + +define void @insert_nxv8i64_nxv16i64( %sv0, %sv1, * %out) { +; CHECK-LABEL: insert_nxv8i64_nxv16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vs8r.v v8, (a0) +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: vs8r.v v16, (a0) +; CHECK-NEXT: ret + %v0 = call @llvm.experimental.vector.insert.nxv8i64.nxv16i64( undef, %sv0, i64 0) + %v = call @llvm.experimental.vector.insert.nxv8i64.nxv16i64( %v0, %sv1, i64 8) + store %v, * %out + ret void +} + +define void @insert_nxv8i64_nxv16i64_lo( %sv0, * %out) { +; CHECK-LABEL: insert_nxv8i64_nxv16i64_lo: +; CHECK: # %bb.0: +; CHECK-NEXT: vs8r.v v8, (a0) +; CHECK-NEXT: ret + %v = call @llvm.experimental.vector.insert.nxv8i64.nxv16i64( undef, %sv0, i64 0) + store %v, * %out + ret void +} + +define void @insert_nxv8i64_nxv16i64_hi( %sv0, * %out) { +; CHECK-LABEL: insert_nxv8i64_nxv16i64_hi: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: vs8r.v v8, (a0) +; CHECK-NEXT: ret + %v = call @llvm.experimental.vector.insert.nxv8i64.nxv16i64( undef, %sv0, i64 8) + store %v, * %out + ret void +} + declare @llvm.experimental.vector.insert.nxv1i1.nxv4i1(, , i64) declare @llvm.experimental.vector.insert.nxv8i1.nxv32i1(, , i64)