diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -841,6 +841,7 @@ SDValue SplitVecOp_TruncateHelper(SDNode *N); SDValue SplitVecOp_BITCAST(SDNode *N); + SDValue SplitVecOp_INSERT_SUBVECTOR(SDNode *N, unsigned OpNo); SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue SplitVecOp_ExtVecInRegOp(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2061,6 +2061,7 @@ case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break; case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break; case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break; + case ISD::INSERT_SUBVECTOR: Res = SplitVecOp_INSERT_SUBVECTOR(N, OpNo); break; case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break; case ISD::TRUNCATE: @@ -2277,6 +2278,38 @@ JoinIntegers(Lo, Hi)); } +SDValue DAGTypeLegalizer::SplitVecOp_INSERT_SUBVECTOR(SDNode *N, + unsigned OpNo) { + assert(OpNo == 1 && "Invalid OpNo; can only split SubVec."); + // We know that the result type is legal. + EVT ResVT = N->getValueType(0); + + if (ResVT.isFixedLengthVector() && + N->getOperand(0).getValueType().isFixedLengthVector() && + N->getOperand(1).getValueType().isScalableVector()) + report_fatal_error("Inserting a scalable vector into a fixed-length vector " + "is not yet supported. "); + + SDValue Vec = N->getOperand(0); + SDValue SubVec = N->getOperand(1); + SDValue Idx = N->getOperand(2); + SDLoc dl(N); + + SDValue Lo, Hi; + GetSplitVector(SubVec, Lo, Hi); + + uint64_t IdxVal = cast(Idx)->getZExtValue(); + uint64_t LoElts = Lo.getValueType().getVectorMinNumElements(); + + SDValue FirstInsertion = + DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, Lo, Idx); + SDValue SecondInsertion = + DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, FirstInsertion, Hi, + DAG.getVectorIdxConstant(IdxVal + LoElts, dl)); + + return SecondInsertion; +} + SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) { // We know that the extracted result type is legal. EVT SubVT = N->getValueType(0); diff --git a/llvm/test/CodeGen/AArch64/split-vector-insert.ll b/llvm/test/CodeGen/AArch64/split-vector-insert.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/split-vector-insert.ll @@ -0,0 +1,56 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -o - -mtriple=aarch64-- -mcpu=a64fx -debug-only=legalize-types 2>&1 | FileCheck %s --check-prefix=CHECK-LEGALIZATION +; RUN: llc < %s -o - -mtriple=aarch64-- -mcpu=a64fx | FileCheck %s --check-prefix=CHECK-CODEGEN + +declare void @_Z5svtblu12__SVUint64_tu12__SVUint64_t(, ) +declare @llvm.experimental.vector.insert.nxv2i64.v8i64(, <8 x i64>, i64) + +define void @foo() { +; CHECK-LEGALIZATION: Legally typed node: [[T1:t[0-9]+]]: nxv2i64 = insert_subvector undef:nxv2i64, [[T2:t[0-9]+]], Constant:i64<0> +; CHECK-LEGALIZATION: Legally typed node: [[T3:t[0-9]+]]: nxv2i64 = insert_subvector [[T1]], [[T2]], Constant:i64<2> +; CHECK-LEGALIZATION: Legally typed node: [[T4:t[0-9]+]]: nxv2i64 = insert_subvector [[T3]], [[T2]], Constant:i64<4> +; CHECK-LEGALIZATION: Legally typed node: [[T5:t[0-9]+]]: nxv2i64 = insert_subvector [[T4]], [[T2]], Constant:i64<6> + +; CHECK-CODEGEN-LABEL: foo: +; CHECK-CODEGEN: // %bb.0: +; CHECK-CODEGEN-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-CODEGEN-NEXT: addvl sp, sp, #-3 +; CHECK-CODEGEN-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG +; CHECK-CODEGEN-NEXT: .cfi_offset w30, -8 +; CHECK-CODEGEN-NEXT: .cfi_offset w29, -16 +; CHECK-CODEGEN-NEXT: cntd x9 +; CHECK-CODEGEN-NEXT: sub x9, x9, #1 // =1 +; CHECK-CODEGEN-NEXT: mov w8, #2 +; CHECK-CODEGEN-NEXT: cmp x9, #2 // =2 +; CHECK-CODEGEN-NEXT: csel x8, x9, x8, lo +; CHECK-CODEGEN-NEXT: movi v0.2d, #0000000000000000 +; CHECK-CODEGEN-NEXT: ptrue p0.d +; CHECK-CODEGEN-NEXT: mov x10, sp +; CHECK-CODEGEN-NEXT: lsl x8, x8, #3 +; CHECK-CODEGEN-NEXT: st1d { z0.d }, p0, [sp] +; CHECK-CODEGEN-NEXT: str q0, [x10, x8] +; CHECK-CODEGEN-NEXT: addvl x10, sp, #1 +; CHECK-CODEGEN-NEXT: ld1d { z1.d }, p0/z, [sp] +; CHECK-CODEGEN-NEXT: mov w8, #4 +; CHECK-CODEGEN-NEXT: cmp x9, #4 // =4 +; CHECK-CODEGEN-NEXT: csel x8, x9, x8, lo +; CHECK-CODEGEN-NEXT: lsl x8, x8, #3 +; CHECK-CODEGEN-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl] +; CHECK-CODEGEN-NEXT: str q0, [x10, x8] +; CHECK-CODEGEN-NEXT: addvl x10, sp, #2 +; CHECK-CODEGEN-NEXT: ld1d { z1.d }, p0/z, [sp, #1, mul vl] +; CHECK-CODEGEN-NEXT: mov w8, #6 +; CHECK-CODEGEN-NEXT: cmp x9, #6 // =6 +; CHECK-CODEGEN-NEXT: csel x8, x9, x8, lo +; CHECK-CODEGEN-NEXT: lsl x8, x8, #3 +; CHECK-CODEGEN-NEXT: st1d { z1.d }, p0, [sp, #2, mul vl] +; CHECK-CODEGEN-NEXT: str q0, [x10, x8] +; CHECK-CODEGEN-NEXT: ld1d { z1.d }, p0/z, [sp, #2, mul vl] +; CHECK-CODEGEN-NEXT: bl _Z5svtblu12__SVUint64_tu12__SVUint64_t +; CHECK-CODEGEN-NEXT: addvl sp, sp, #3 +; CHECK-CODEGEN-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-CODEGEN-NEXT: ret + %castScalableSve = call @llvm.experimental.vector.insert.nxv2i64.v8i64( undef, <8 x i64> zeroinitializer, i64 0) + call void @_Z5svtblu12__SVUint64_tu12__SVUint64_t( undef, %castScalableSve) + ret void +}