diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -841,6 +841,7 @@ SDValue SplitVecOp_TruncateHelper(SDNode *N); SDValue SplitVecOp_BITCAST(SDNode *N); + SDValue SplitVecOp_INSERT_SUBVECTOR(SDNode *N, unsigned OpNo); SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue SplitVecOp_ExtVecInRegOp(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2062,6 +2062,7 @@ case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break; case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break; case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break; + case ISD::INSERT_SUBVECTOR: Res = SplitVecOp_INSERT_SUBVECTOR(N, OpNo); break; case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break; case ISD::TRUNCATE: @@ -2278,6 +2279,32 @@ JoinIntegers(Lo, Hi)); } +SDValue DAGTypeLegalizer::SplitVecOp_INSERT_SUBVECTOR(SDNode *N, + unsigned OpNo) { + assert(OpNo == 1 && "Invalid OpNo; can only split SubVec."); + // We know that the result type is legal. + EVT ResVT = N->getValueType(0); + + SDValue Vec = N->getOperand(0); + SDValue SubVec = N->getOperand(1); + SDValue Idx = N->getOperand(2); + SDLoc dl(N); + + SDValue Lo, Hi; + GetSplitVector(SubVec, Lo, Hi); + + uint64_t IdxVal = cast(Idx)->getZExtValue(); + uint64_t LoElts = Lo.getValueType().getVectorMinNumElements(); + + SDValue FirstInsertion = + DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, Lo, Idx); + SDValue SecondInsertion = + DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, FirstInsertion, Hi, + DAG.getVectorIdxConstant(IdxVal + LoElts, dl)); + + return SecondInsertion; +} + SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) { // We know that the extracted result type is legal. EVT SubVT = N->getValueType(0); diff --git a/llvm/test/CodeGen/AArch64/split-vector-insert.ll b/llvm/test/CodeGen/AArch64/split-vector-insert.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/split-vector-insert.ll @@ -0,0 +1,115 @@ +; RUN: llc < %s -debug-only=legalize-types 2>&1 | FileCheck %s --check-prefix=CHECK-LEGALIZATION +; RUN: llc < %s | FileCheck %s +; REQUIRES: asserts + +target triple = "aarch64-unknown-linux-gnu" +attributes #0 = {"target-features"="+sve"} + +declare @llvm.experimental.vector.insert.nxv2i64.v8i64(, <8 x i64>, i64) +declare @llvm.experimental.vector.insert.nxv2f64.v8f64(, <8 x double>, i64) + +define @test_nxv2i64_v8i64( %a, <8 x i64> %b) #0 { +; CHECK-LEGALIZATION: Legally typed node: [[T1:t[0-9]+]]: nxv2i64 = insert_subvector {{t[0-9]+}}, {{t[0-9]+}}, Constant:i64<0> +; CHECK-LEGALIZATION: Legally typed node: [[T2:t[0-9]+]]: nxv2i64 = insert_subvector [[T1]], {{t[0-9]+}}, Constant:i64<2> +; CHECK-LEGALIZATION: Legally typed node: [[T3:t[0-9]+]]: nxv2i64 = insert_subvector [[T2]], {{t[0-9]+}}, Constant:i64<4> +; CHECK-LEGALIZATION: Legally typed node: [[T4:t[0-9]+]]: nxv2i64 = insert_subvector [[T3]], {{t[0-9]+}}, Constant:i64<6> + +; CHECK-LABEL: test_nxv2i64_v8i64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-4 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: cntd x8 +; CHECK-NEXT: sub x8, x8, #1 // =1 +; CHECK-NEXT: cmp x8, #0 // =0 +; CHECK-NEXT: csel x10, x8, xzr, lo +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: lsl x10, x10, #3 +; CHECK-NEXT: st1d { z0.d }, p0, [sp] +; CHECK-NEXT: str q1, [x9, x10] +; CHECK-NEXT: addvl x10, sp, #1 +; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] +; CHECK-NEXT: mov w9, #2 +; CHECK-NEXT: cmp x8, #2 // =2 +; CHECK-NEXT: csel x9, x8, x9, lo +; CHECK-NEXT: lsl x9, x9, #3 +; CHECK-NEXT: st1d { z0.d }, p0, [sp, #1, mul vl] +; CHECK-NEXT: str q2, [x10, x9] +; CHECK-NEXT: addvl x10, sp, #2 +; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: mov w9, #4 +; CHECK-NEXT: cmp x8, #4 // =4 +; CHECK-NEXT: csel x9, x8, x9, lo +; CHECK-NEXT: lsl x9, x9, #3 +; CHECK-NEXT: st1d { z0.d }, p0, [sp, #2, mul vl] +; CHECK-NEXT: str q3, [x10, x9] +; CHECK-NEXT: addvl x10, sp, #3 +; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp, #2, mul vl] +; CHECK-NEXT: mov w9, #6 +; CHECK-NEXT: cmp x8, #6 // =6 +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: lsl x8, x8, #3 +; CHECK-NEXT: st1d { z0.d }, p0, [sp, #3, mul vl] +; CHECK-NEXT: str q4, [x10, x8] +; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp, #3, mul vl] +; CHECK-NEXT: addvl sp, sp, #4 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %r = call @llvm.experimental.vector.insert.nxv2i64.v8i64( %a, <8 x i64> %b, i64 0) + ret %r +} + +define @test_nxv2f64_v8f64( %a, <8 x double> %b) #0 { +; CHECK-LEGALIZATION: Legally typed node: [[T1:t[0-9]+]]: nxv2f64 = insert_subvector {{t[0-9]+}}, {{t[0-9]+}}, Constant:i64<0> +; CHECK-LEGALIZATION: Legally typed node: [[T2:t[0-9]+]]: nxv2f64 = insert_subvector [[T1]], {{t[0-9]+}}, Constant:i64<2> +; CHECK-LEGALIZATION: Legally typed node: [[T3:t[0-9]+]]: nxv2f64 = insert_subvector [[T2]], {{t[0-9]+}}, Constant:i64<4> +; CHECK-LEGALIZATION: Legally typed node: [[T4:t[0-9]+]]: nxv2f64 = insert_subvector [[T3]], {{t[0-9]+}}, Constant:i64<6> + +; CHECK-LABEL: test_nxv2f64_v8f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-4 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: cntd x8 +; CHECK-NEXT: sub x8, x8, #1 // =1 +; CHECK-NEXT: cmp x8, #0 // =0 +; CHECK-NEXT: csel x10, x8, xzr, lo +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: lsl x10, x10, #3 +; CHECK-NEXT: st1d { z0.d }, p0, [sp] +; CHECK-NEXT: str q1, [x9, x10] +; CHECK-NEXT: addvl x10, sp, #1 +; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] +; CHECK-NEXT: mov w9, #2 +; CHECK-NEXT: cmp x8, #2 // =2 +; CHECK-NEXT: csel x9, x8, x9, lo +; CHECK-NEXT: lsl x9, x9, #3 +; CHECK-NEXT: st1d { z0.d }, p0, [sp, #1, mul vl] +; CHECK-NEXT: str q2, [x10, x9] +; CHECK-NEXT: addvl x10, sp, #2 +; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: mov w9, #4 +; CHECK-NEXT: cmp x8, #4 // =4 +; CHECK-NEXT: csel x9, x8, x9, lo +; CHECK-NEXT: lsl x9, x9, #3 +; CHECK-NEXT: st1d { z0.d }, p0, [sp, #2, mul vl] +; CHECK-NEXT: str q3, [x10, x9] +; CHECK-NEXT: addvl x10, sp, #3 +; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp, #2, mul vl] +; CHECK-NEXT: mov w9, #6 +; CHECK-NEXT: cmp x8, #6 // =6 +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: lsl x8, x8, #3 +; CHECK-NEXT: st1d { z0.d }, p0, [sp, #3, mul vl] +; CHECK-NEXT: str q4, [x10, x8] +; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp, #3, mul vl] +; CHECK-NEXT: addvl sp, sp, #4 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %r = call @llvm.experimental.vector.insert.nxv2f64.v8f64( %a, <8 x double> %b, i64 0) + ret %r +}