diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -841,6 +841,7 @@ SDValue SplitVecOp_TruncateHelper(SDNode *N); SDValue SplitVecOp_BITCAST(SDNode *N); + SDValue SplitVecOp_INSERT_SUBVECTOR(SDNode *N, unsigned OpNo); SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue SplitVecOp_ExtVecInRegOp(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2062,6 +2062,7 @@ case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break; case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break; case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break; + case ISD::INSERT_SUBVECTOR: Res = SplitVecOp_INSERT_SUBVECTOR(N, OpNo); break; case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break; case ISD::TRUNCATE: @@ -2278,6 +2279,32 @@ JoinIntegers(Lo, Hi)); } +SDValue DAGTypeLegalizer::SplitVecOp_INSERT_SUBVECTOR(SDNode *N, + unsigned OpNo) { + assert(OpNo == 1 && "Invalid OpNo; can only split SubVec."); + // We know that the result type is legal. + EVT ResVT = N->getValueType(0); + + SDValue Vec = N->getOperand(0); + SDValue SubVec = N->getOperand(1); + SDValue Idx = N->getOperand(2); + SDLoc dl(N); + + SDValue Lo, Hi; + GetSplitVector(SubVec, Lo, Hi); + + uint64_t IdxVal = cast(Idx)->getZExtValue(); + uint64_t LoElts = Lo.getValueType().getVectorMinNumElements(); + + SDValue FirstInsertion = + DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, Lo, Idx); + SDValue SecondInsertion = + DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, FirstInsertion, Hi, + DAG.getVectorIdxConstant(IdxVal + LoElts, dl)); + + return SecondInsertion; +} + SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) { // We know that the extracted result type is legal. EVT SubVT = N->getValueType(0); diff --git a/llvm/test/CodeGen/AArch64/split-vector-insert.ll b/llvm/test/CodeGen/AArch64/split-vector-insert.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/split-vector-insert.ll @@ -0,0 +1,459 @@ +; RUN: llc < %s -o - -mtriple=aarch64-- -mcpu=a64fx -debug-only=legalize-types 2>&1 | FileCheck %s --check-prefix=CHECK-LEGALIZATION +; RUN: llc < %s -o - -mtriple=aarch64-- -mcpu=a64fx | FileCheck %s --check-prefix=CHECK-CODEGEN +; REQUIRES: asserts + +declare void @do_something_a(, ) +declare @llvm.experimental.vector.insert.nxv2i64.v8i64(, <8 x i64>, i64) + +declare void @do_something_b(, ) +declare @llvm.experimental.vector.insert.nxv2f64.v8f64(, <8 x double>, i64) + +declare void @do_something_c(, ) +declare @llvm.experimental.vector.insert.nxv2i64.v32i64(, <32 x i64>, i64) + +declare void @do_something_d(, ) +declare @llvm.experimental.vector.insert.nxv2f64.v32f64(, <32 x double>, i64) + +define void @test_nxv2i64_v8i64() { +; CHECK-LEGALIZATION: Legally typed node: [[T1:t[0-9]+]]: nxv2i64 = insert_subvector undef:nxv2i64, [[T2:t[0-9]+]], Constant:i64<0> +; CHECK-LEGALIZATION: Legally typed node: [[T3:t[0-9]+]]: nxv2i64 = insert_subvector [[T1]], [[T2]], Constant:i64<2> +; CHECK-LEGALIZATION: Legally typed node: [[T4:t[0-9]+]]: nxv2i64 = insert_subvector [[T3]], [[T2]], Constant:i64<4> +; CHECK-LEGALIZATION: Legally typed node: [[T5:t[0-9]+]]: nxv2i64 = insert_subvector [[T4]], [[T2]], Constant:i64<6> + +; CHECK-CODEGEN-LABEL: test_nxv2i64_v8i64: +; CHECK-CODEGEN: // %bb.0: +; CHECK-CODEGEN-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-CODEGEN-NEXT: addvl sp, sp, #-3 +; CHECK-CODEGEN-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG +; CHECK-CODEGEN-NEXT: .cfi_offset w30, -8 +; CHECK-CODEGEN-NEXT: .cfi_offset w29, -16 +; CHECK-CODEGEN-NEXT: cntd x9 +; CHECK-CODEGEN-NEXT: sub x9, x9, #1 // =1 +; CHECK-CODEGEN-NEXT: mov w8, #2 +; CHECK-CODEGEN-NEXT: cmp x9, #2 // =2 +; CHECK-CODEGEN-NEXT: csel x8, x9, x8, lo +; CHECK-CODEGEN-NEXT: movi v0.2d, #0000000000000000 +; CHECK-CODEGEN-NEXT: ptrue p0.d +; CHECK-CODEGEN-NEXT: mov x10, sp +; CHECK-CODEGEN-NEXT: lsl x8, x8, #3 +; CHECK-CODEGEN-NEXT: st1d { z0.d }, p0, [sp] +; CHECK-CODEGEN-NEXT: str q0, [x10, x8] +; CHECK-CODEGEN-NEXT: addvl x10, sp, #1 +; CHECK-CODEGEN-NEXT: ld1d { z1.d }, p0/z, [sp] +; CHECK-CODEGEN-NEXT: mov w8, #4 +; CHECK-CODEGEN-NEXT: cmp x9, #4 // =4 +; CHECK-CODEGEN-NEXT: csel x8, x9, x8, lo +; CHECK-CODEGEN-NEXT: lsl x8, x8, #3 +; CHECK-CODEGEN-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl] +; CHECK-CODEGEN-NEXT: str q0, [x10, x8] +; CHECK-CODEGEN-NEXT: addvl x10, sp, #2 +; CHECK-CODEGEN-NEXT: ld1d { z1.d }, p0/z, [sp, #1, mul vl] +; CHECK-CODEGEN-NEXT: mov w8, #6 +; CHECK-CODEGEN-NEXT: cmp x9, #6 // =6 +; CHECK-CODEGEN-NEXT: csel x8, x9, x8, lo +; CHECK-CODEGEN-NEXT: lsl x8, x8, #3 +; CHECK-CODEGEN-NEXT: st1d { z1.d }, p0, [sp, #2, mul vl] +; CHECK-CODEGEN-NEXT: str q0, [x10, x8] +; CHECK-CODEGEN-NEXT: ld1d { z1.d }, p0/z, [sp, #2, mul vl] +; CHECK-CODEGEN-NEXT: bl do_something_a +; CHECK-CODEGEN-NEXT: addvl sp, sp, #3 +; CHECK-CODEGEN-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-CODEGEN-NEXT: ret + %castScalableSve = call @llvm.experimental.vector.insert.nxv2i64.v8i64( undef, <8 x i64> zeroinitializer, i64 0) + call void @do_something_a( undef, %castScalableSve) + ret void +} + +define void @test_nxv2f64_v8f64() { +; CHECK-LEGALIZATION: Legally typed node: [[T6:t[0-9]+]]: nxv2f64 = insert_subvector undef:nxv2f64, [[T7:t[0-9]+]], Constant:i64<0> +; CHECK-LEGALIZATION: Legally typed node: [[T8:t[0-9]+]]: nxv2f64 = insert_subvector [[T6]], [[T7]], Constant:i64<2> +; CHECK-LEGALIZATION: Legally typed node: [[T9:t[0-9]+]]: nxv2f64 = insert_subvector [[T8]], [[T7]], Constant:i64<4> +; CHECK-LEGALIZATION: Legally typed node: [[T10:t[0-9]+]]: nxv2f64 = insert_subvector [[T9]], [[T7]], Constant:i64<6> + +; CHECK-CODEGEN-LABEL: test_nxv2f64_v8f64: +; CHECK-CODEGEN: // %bb.0: +; CHECK-CODEGEN-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-CODEGEN-NEXT: addvl sp, sp, #-3 +; CHECK-CODEGEN-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG +; CHECK-CODEGEN-NEXT: .cfi_offset w30, -8 +; CHECK-CODEGEN-NEXT: .cfi_offset w29, -16 +; CHECK-CODEGEN-NEXT: cntd x9 +; CHECK-CODEGEN-NEXT: sub x9, x9, #1 // =1 +; CHECK-CODEGEN-NEXT: mov w8, #2 +; CHECK-CODEGEN-NEXT: cmp x9, #2 // =2 +; CHECK-CODEGEN-NEXT: csel x8, x9, x8, lo +; CHECK-CODEGEN-NEXT: movi v0.2d, #0000000000000000 +; CHECK-CODEGEN-NEXT: ptrue p0.d +; CHECK-CODEGEN-NEXT: mov x10, sp +; CHECK-CODEGEN-NEXT: lsl x8, x8, #3 +; CHECK-CODEGEN-NEXT: st1d { z0.d }, p0, [sp] +; CHECK-CODEGEN-NEXT: str q0, [x10, x8] +; CHECK-CODEGEN-NEXT: addvl x10, sp, #1 +; CHECK-CODEGEN-NEXT: ld1d { z1.d }, p0/z, [sp] +; CHECK-CODEGEN-NEXT: mov w8, #4 +; CHECK-CODEGEN-NEXT: cmp x9, #4 // =4 +; CHECK-CODEGEN-NEXT: csel x8, x9, x8, lo +; CHECK-CODEGEN-NEXT: lsl x8, x8, #3 +; CHECK-CODEGEN-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl] +; CHECK-CODEGEN-NEXT: str q0, [x10, x8] +; CHECK-CODEGEN-NEXT: addvl x10, sp, #2 +; CHECK-CODEGEN-NEXT: ld1d { z1.d }, p0/z, [sp, #1, mul vl] +; CHECK-CODEGEN-NEXT: mov w8, #6 +; CHECK-CODEGEN-NEXT: cmp x9, #6 // =6 +; CHECK-CODEGEN-NEXT: csel x8, x9, x8, lo +; CHECK-CODEGEN-NEXT: lsl x8, x8, #3 +; CHECK-CODEGEN-NEXT: st1d { z1.d }, p0, [sp, #2, mul vl] +; CHECK-CODEGEN-NEXT: str q0, [x10, x8] +; CHECK-CODEGEN-NEXT: ld1d { z1.d }, p0/z, [sp, #2, mul vl] +; CHECK-CODEGEN-NEXT: bl do_something_b +; CHECK-CODEGEN-NEXT: addvl sp, sp, #3 +; CHECK-CODEGEN-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-CODEGEN-NEXT: ret + %castScalableSve = call @llvm.experimental.vector.insert.nxv2f64.v8f64( undef, <8 x double> zeroinitializer, i64 0) + call void @do_something_b( undef, %castScalableSve) + ret void +} + +define void @test_nxv2i64_v32i64() { +; CHECK-LEGALIZATION: Legally typed node: [[T11:t[0-9]+]]: nxv2i64 = insert_subvector undef:nxv2i64, [[T12:t[0-9]+]], Constant:i64<0> +; CHECK-LEGALIZATION: Legally typed node: [[T13:t[0-9]+]]: nxv2i64 = insert_subvector [[T11]], [[T12]], Constant:i64<2> +; CHECK-LEGALIZATION: Legally typed node: [[T14:t[0-9]+]]: nxv2i64 = insert_subvector [[T13]], [[T12]], Constant:i64<4> +; CHECK-LEGALIZATION: Legally typed node: [[T15:t[0-9]+]]: nxv2i64 = insert_subvector [[T14]], [[T12]], Constant:i64<6> +; CHECK-LEGALIZATION: Legally typed node: [[T16:t[0-9]+]]: nxv2i64 = insert_subvector [[T15]], [[T12]], Constant:i64<8> +; CHECK-LEGALIZATION: Legally typed node: [[T17:t[0-9]+]]: nxv2i64 = insert_subvector [[T16]], [[T12]], Constant:i64<10> +; CHECK-LEGALIZATION: Legally typed node: [[T18:t[0-9]+]]: nxv2i64 = insert_subvector [[T17]], [[T12]], Constant:i64<12> +; CHECK-LEGALIZATION: Legally typed node: [[T19:t[0-9]+]]: nxv2i64 = insert_subvector [[T18]], [[T12]], Constant:i64<14> +; CHECK-LEGALIZATION: Legally typed node: [[T20:t[0-9]+]]: nxv2i64 = insert_subvector [[T19]], [[T12]], Constant:i64<16> +; CHECK-LEGALIZATION: Legally typed node: [[T21:t[0-9]+]]: nxv2i64 = insert_subvector [[T20]], [[T12]], Constant:i64<18> +; CHECK-LEGALIZATION: Legally typed node: [[T22:t[0-9]+]]: nxv2i64 = insert_subvector [[T21]], [[T12]], Constant:i64<20> +; CHECK-LEGALIZATION: Legally typed node: [[T23:t[0-9]+]]: nxv2i64 = insert_subvector [[T22]], [[T12]], Constant:i64<22> +; CHECK-LEGALIZATION: Legally typed node: [[T24:t[0-9]+]]: nxv2i64 = insert_subvector [[T23]], [[T12]], Constant:i64<24> +; CHECK-LEGALIZATION: Legally typed node: [[T25:t[0-9]+]]: nxv2i64 = insert_subvector [[T24]], [[T12]], Constant:i64<26> +; CHECK-LEGALIZATION: Legally typed node: [[T26:t[0-9]+]]: nxv2i64 = insert_subvector [[T25]], [[T12]], Constant:i64<28> +; CHECK-LEGALIZATION: Legally typed node: [[T27:t[0-9]+]]: nxv2i64 = insert_subvector [[T26]], [[T12]], Constant:i64<30> + +; CHECK-CODEGEN-LABEL: test_nxv2i64_v32i64: +; CHECK-CODEGEN: // %bb.0: +; CHECK-CODEGEN-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-CODEGEN-NEXT: addvl sp, sp, #-15 +; CHECK-CODEGEN-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xf8, 0x00, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 120 * VG +; CHECK-CODEGEN-NEXT: .cfi_offset w30, -8 +; CHECK-CODEGEN-NEXT: .cfi_offset w29, -16 +; CHECK-CODEGEN-NEXT: cntd x8 +; CHECK-CODEGEN-NEXT: sub x8, x8, #1 // =1 +; CHECK-CODEGEN-NEXT: mov w9, #2 +; CHECK-CODEGEN-NEXT: cmp x8, #2 // =2 +; CHECK-CODEGEN-NEXT: csel x9, x8, x9, lo +; CHECK-CODEGEN-NEXT: movi v0.2d, #0000000000000000 +; CHECK-CODEGEN-NEXT: ptrue p0.d +; CHECK-CODEGEN-NEXT: mov x10, sp +; CHECK-CODEGEN-NEXT: lsl x9, x9, #3 +; CHECK-CODEGEN-NEXT: st1d { z0.d }, p0, [sp] +; CHECK-CODEGEN-NEXT: str q0, [x10, x9] +; CHECK-CODEGEN-NEXT: addvl x10, sp, #1 +; CHECK-CODEGEN-NEXT: ld1d { z1.d }, p0/z, [sp] +; CHECK-CODEGEN-NEXT: mov w9, #4 +; CHECK-CODEGEN-NEXT: cmp x8, #4 // =4 +; CHECK-CODEGEN-NEXT: csel x9, x8, x9, lo +; CHECK-CODEGEN-NEXT: lsl x9, x9, #3 +; CHECK-CODEGEN-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl] +; CHECK-CODEGEN-NEXT: str q0, [x10, x9] +; CHECK-CODEGEN-NEXT: addvl x10, sp, #2 +; CHECK-CODEGEN-NEXT: ld1d { z1.d }, p0/z, [sp, #1, mul vl] +; CHECK-CODEGEN-NEXT: mov w9, #6 +; CHECK-CODEGEN-NEXT: cmp x8, #6 // =6 +; CHECK-CODEGEN-NEXT: csel x9, x8, x9, lo +; CHECK-CODEGEN-NEXT: lsl x9, x9, #3 +; CHECK-CODEGEN-NEXT: st1d { z1.d }, p0, [sp, #2, mul vl] +; CHECK-CODEGEN-NEXT: str q0, [x10, x9] +; CHECK-CODEGEN-NEXT: addvl x10, sp, #3 +; CHECK-CODEGEN-NEXT: ld1d { z1.d }, p0/z, [sp, #2, mul vl] +; CHECK-CODEGEN-NEXT: mov w9, #8 +; CHECK-CODEGEN-NEXT: cmp x8, #8 // =8 +; CHECK-CODEGEN-NEXT: csel x9, x8, x9, lo +; CHECK-CODEGEN-NEXT: lsl x9, x9, #3 +; CHECK-CODEGEN-NEXT: st1d { z1.d }, p0, [sp, #3, mul vl] +; CHECK-CODEGEN-NEXT: str q0, [x10, x9] +; CHECK-CODEGEN-NEXT: addvl x10, sp, #4 +; CHECK-CODEGEN-NEXT: ld1d { z1.d }, p0/z, [sp, #3, mul vl] +; CHECK-CODEGEN-NEXT: mov w9, #10 +; CHECK-CODEGEN-NEXT: cmp x8, #10 // =10 +; CHECK-CODEGEN-NEXT: csel x9, x8, x9, lo +; CHECK-CODEGEN-NEXT: lsl x9, x9, #3 +; CHECK-CODEGEN-NEXT: st1d { z1.d }, p0, [sp, #4, mul vl] +; CHECK-CODEGEN-NEXT: str q0, [x10, x9] +; CHECK-CODEGEN-NEXT: addvl x10, sp, #5 +; CHECK-CODEGEN-NEXT: ld1d { z1.d }, p0/z, [sp, #4, mul vl] +; CHECK-CODEGEN-NEXT: mov w9, #12 +; CHECK-CODEGEN-NEXT: cmp x8, #12 // =12 +; CHECK-CODEGEN-NEXT: csel x9, x8, x9, lo +; CHECK-CODEGEN-NEXT: lsl x9, x9, #3 +; CHECK-CODEGEN-NEXT: st1d { z1.d }, p0, [sp, #5, mul vl] +; CHECK-CODEGEN-NEXT: str q0, [x10, x9] +; CHECK-CODEGEN-NEXT: addvl x10, sp, #6 +; CHECK-CODEGEN-NEXT: ld1d { z1.d }, p0/z, [sp, #5, mul vl] +; CHECK-CODEGEN-NEXT: mov w9, #14 +; CHECK-CODEGEN-NEXT: cmp x8, #14 // =14 +; CHECK-CODEGEN-NEXT: csel x9, x8, x9, lo +; CHECK-CODEGEN-NEXT: lsl x9, x9, #3 +; CHECK-CODEGEN-NEXT: st1d { z1.d }, p0, [sp, #6, mul vl] +; CHECK-CODEGEN-NEXT: str q0, [x10, x9] +; CHECK-CODEGEN-NEXT: addvl x10, sp, #7 +; CHECK-CODEGEN-NEXT: ld1d { z1.d }, p0/z, [sp, #6, mul vl] +; CHECK-CODEGEN-NEXT: mov w9, #16 +; CHECK-CODEGEN-NEXT: cmp x8, #16 // =16 +; CHECK-CODEGEN-NEXT: csel x9, x8, x9, lo +; CHECK-CODEGEN-NEXT: lsl x9, x9, #3 +; CHECK-CODEGEN-NEXT: st1d { z1.d }, p0, [sp, #7, mul vl] +; CHECK-CODEGEN-NEXT: str q0, [x10, x9] +; CHECK-CODEGEN-NEXT: addvl x10, sp, #8 +; CHECK-CODEGEN-NEXT: ld1d { z1.d }, p0/z, [sp, #7, mul vl] +; CHECK-CODEGEN-NEXT: mov w9, #18 +; CHECK-CODEGEN-NEXT: cmp x8, #18 // =18 +; CHECK-CODEGEN-NEXT: csel x9, x8, x9, lo +; CHECK-CODEGEN-NEXT: lsl x9, x9, #3 +; CHECK-CODEGEN-NEXT: addvl x11, sp, #1 +; CHECK-CODEGEN-NEXT: st1d { z1.d }, p0, [x11, #7, mul vl] +; CHECK-CODEGEN-NEXT: str q0, [x10, x9] +; CHECK-CODEGEN-NEXT: addvl x10, sp, #9 +; CHECK-CODEGEN-NEXT: addvl x11, sp, #1 +; CHECK-CODEGEN-NEXT: ld1d { z1.d }, p0/z, [x11, #7, mul vl] +; CHECK-CODEGEN-NEXT: mov w9, #20 +; CHECK-CODEGEN-NEXT: cmp x8, #20 // =20 +; CHECK-CODEGEN-NEXT: csel x9, x8, x9, lo +; CHECK-CODEGEN-NEXT: lsl x9, x9, #3 +; CHECK-CODEGEN-NEXT: addvl x11, sp, #2 +; CHECK-CODEGEN-NEXT: st1d { z1.d }, p0, [x11, #7, mul vl] +; CHECK-CODEGEN-NEXT: str q0, [x10, x9] +; CHECK-CODEGEN-NEXT: addvl x10, sp, #10 +; CHECK-CODEGEN-NEXT: addvl x11, sp, #2 +; CHECK-CODEGEN-NEXT: ld1d { z1.d }, p0/z, [x11, #7, mul vl] +; CHECK-CODEGEN-NEXT: mov w9, #22 +; CHECK-CODEGEN-NEXT: cmp x8, #22 // =22 +; CHECK-CODEGEN-NEXT: csel x9, x8, x9, lo +; CHECK-CODEGEN-NEXT: lsl x9, x9, #3 +; CHECK-CODEGEN-NEXT: addvl x11, sp, #3 +; CHECK-CODEGEN-NEXT: st1d { z1.d }, p0, [x11, #7, mul vl] +; CHECK-CODEGEN-NEXT: str q0, [x10, x9] +; CHECK-CODEGEN-NEXT: addvl x10, sp, #11 +; CHECK-CODEGEN-NEXT: addvl x11, sp, #3 +; CHECK-CODEGEN-NEXT: ld1d { z1.d }, p0/z, [x11, #7, mul vl] +; CHECK-CODEGEN-NEXT: mov w9, #24 +; CHECK-CODEGEN-NEXT: cmp x8, #24 // =24 +; CHECK-CODEGEN-NEXT: csel x9, x8, x9, lo +; CHECK-CODEGEN-NEXT: lsl x9, x9, #3 +; CHECK-CODEGEN-NEXT: addvl x11, sp, #4 +; CHECK-CODEGEN-NEXT: st1d { z1.d }, p0, [x11, #7, mul vl] +; CHECK-CODEGEN-NEXT: str q0, [x10, x9] +; CHECK-CODEGEN-NEXT: addvl x10, sp, #12 +; CHECK-CODEGEN-NEXT: addvl x11, sp, #4 +; CHECK-CODEGEN-NEXT: ld1d { z1.d }, p0/z, [x11, #7, mul vl] +; CHECK-CODEGEN-NEXT: mov w9, #26 +; CHECK-CODEGEN-NEXT: cmp x8, #26 // =26 +; CHECK-CODEGEN-NEXT: csel x9, x8, x9, lo +; CHECK-CODEGEN-NEXT: lsl x9, x9, #3 +; CHECK-CODEGEN-NEXT: addvl x11, sp, #5 +; CHECK-CODEGEN-NEXT: st1d { z1.d }, p0, [x11, #7, mul vl] +; CHECK-CODEGEN-NEXT: str q0, [x10, x9] +; CHECK-CODEGEN-NEXT: addvl x10, sp, #13 +; CHECK-CODEGEN-NEXT: addvl x11, sp, #5 +; CHECK-CODEGEN-NEXT: ld1d { z1.d }, p0/z, [x11, #7, mul vl] +; CHECK-CODEGEN-NEXT: mov w9, #28 +; CHECK-CODEGEN-NEXT: cmp x8, #28 // =28 +; CHECK-CODEGEN-NEXT: csel x9, x8, x9, lo +; CHECK-CODEGEN-NEXT: lsl x9, x9, #3 +; CHECK-CODEGEN-NEXT: addvl x11, sp, #6 +; CHECK-CODEGEN-NEXT: st1d { z1.d }, p0, [x11, #7, mul vl] +; CHECK-CODEGEN-NEXT: str q0, [x10, x9] +; CHECK-CODEGEN-NEXT: addvl x10, sp, #14 +; CHECK-CODEGEN-NEXT: addvl x11, sp, #6 +; CHECK-CODEGEN-NEXT: ld1d { z1.d }, p0/z, [x11, #7, mul vl] +; CHECK-CODEGEN-NEXT: mov w9, #30 +; CHECK-CODEGEN-NEXT: cmp x8, #30 // =30 +; CHECK-CODEGEN-NEXT: csel x8, x8, x9, lo +; CHECK-CODEGEN-NEXT: lsl x8, x8, #3 +; CHECK-CODEGEN-NEXT: addvl x9, sp, #7 +; CHECK-CODEGEN-NEXT: st1d { z1.d }, p0, [x9, #7, mul vl] +; CHECK-CODEGEN-NEXT: str q0, [x10, x8] +; CHECK-CODEGEN-NEXT: addvl x8, sp, #7 +; CHECK-CODEGEN-NEXT: ld1d { z1.d }, p0/z, [x8, #7, mul vl] +; CHECK-CODEGEN-NEXT: bl do_something_c +; CHECK-CODEGEN-NEXT: addvl sp, sp, #15 +; CHECK-CODEGEN-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-CODEGEN-NEXT: ret + %castScalableSve = call @llvm.experimental.vector.insert.nxv2i64.v32i64( undef, <32 x i64> zeroinitializer, i64 0) + call void @do_something_c( undef, %castScalableSve) + ret void +} + +define void @test_nxv2f64_v32f64() { +; CHECK-LEGALIZATION: Legally typed node: [[T28:t[0-9]+]]: nxv2f64 = insert_subvector undef:nxv2f64, [[T29:t[0-9]+]], Constant:i64<0> +; CHECK-LEGALIZATION: Legally typed node: [[T30:t[0-9]+]]: nxv2f64 = insert_subvector [[T28]], [[T29]], Constant:i64<2> +; CHECK-LEGALIZATION: Legally typed node: [[T31:t[0-9]+]]: nxv2f64 = insert_subvector [[T30]], [[T29]], Constant:i64<4> +; CHECK-LEGALIZATION: Legally typed node: [[T32:t[0-9]+]]: nxv2f64 = insert_subvector [[T31]], [[T29]], Constant:i64<6> +; CHECK-LEGALIZATION: Legally typed node: [[T33:t[0-9]+]]: nxv2f64 = insert_subvector [[T32]], [[T29]], Constant:i64<8> +; CHECK-LEGALIZATION: Legally typed node: [[T34:t[0-9]+]]: nxv2f64 = insert_subvector [[T33]], [[T29]], Constant:i64<10> +; CHECK-LEGALIZATION: Legally typed node: [[T35:t[0-9]+]]: nxv2f64 = insert_subvector [[T34]], [[T29]], Constant:i64<12> +; CHECK-LEGALIZATION: Legally typed node: [[T36:t[0-9]+]]: nxv2f64 = insert_subvector [[T35]], [[T29]], Constant:i64<14> +; CHECK-LEGALIZATION: Legally typed node: [[T37:t[0-9]+]]: nxv2f64 = insert_subvector [[T36]], [[T29]], Constant:i64<16> +; CHECK-LEGALIZATION: Legally typed node: [[T38:t[0-9]+]]: nxv2f64 = insert_subvector [[T37]], [[T29]], Constant:i64<18> +; CHECK-LEGALIZATION: Legally typed node: [[T39:t[0-9]+]]: nxv2f64 = insert_subvector [[T38]], [[T29]], Constant:i64<20> +; CHECK-LEGALIZATION: Legally typed node: [[T40:t[0-9]+]]: nxv2f64 = insert_subvector [[T39]], [[T29]], Constant:i64<22> +; CHECK-LEGALIZATION: Legally typed node: [[T41:t[0-9]+]]: nxv2f64 = insert_subvector [[T40]], [[T29]], Constant:i64<24> +; CHECK-LEGALIZATION: Legally typed node: [[T42:t[0-9]+]]: nxv2f64 = insert_subvector [[T41]], [[T29]], Constant:i64<26> +; CHECK-LEGALIZATION: Legally typed node: [[T43:t[0-9]+]]: nxv2f64 = insert_subvector [[T42]], [[T29]], Constant:i64<28> + +; CHECK-CODEGEN-LABEL: test_nxv2f64_v32f64: +; CHECK-CODEGEN: // %bb.0: +; CHECK-CODEGEN-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-CODEGEN-NEXT: addvl sp, sp, #-15 +; CHECK-CODEGEN-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xf8, 0x00, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 120 * VG +; CHECK-CODEGEN-NEXT: .cfi_offset w30, -8 +; CHECK-CODEGEN-NEXT: .cfi_offset w29, -16 +; CHECK-CODEGEN-NEXT: cntd x8 +; CHECK-CODEGEN-NEXT: sub x8, x8, #1 // =1 +; CHECK-CODEGEN-NEXT: mov w9, #2 +; CHECK-CODEGEN-NEXT: cmp x8, #2 // =2 +; CHECK-CODEGEN-NEXT: csel x9, x8, x9, lo +; CHECK-CODEGEN-NEXT: movi v0.2d, #0000000000000000 +; CHECK-CODEGEN-NEXT: ptrue p0.d +; CHECK-CODEGEN-NEXT: mov x10, sp +; CHECK-CODEGEN-NEXT: lsl x9, x9, #3 +; CHECK-CODEGEN-NEXT: st1d { z0.d }, p0, [sp] +; CHECK-CODEGEN-NEXT: str q0, [x10, x9] +; CHECK-CODEGEN-NEXT: addvl x10, sp, #1 +; CHECK-CODEGEN-NEXT: ld1d { z1.d }, p0/z, [sp] +; CHECK-CODEGEN-NEXT: mov w9, #4 +; CHECK-CODEGEN-NEXT: cmp x8, #4 // =4 +; CHECK-CODEGEN-NEXT: csel x9, x8, x9, lo +; CHECK-CODEGEN-NEXT: lsl x9, x9, #3 +; CHECK-CODEGEN-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl] +; CHECK-CODEGEN-NEXT: str q0, [x10, x9] +; CHECK-CODEGEN-NEXT: addvl x10, sp, #2 +; CHECK-CODEGEN-NEXT: ld1d { z1.d }, p0/z, [sp, #1, mul vl] +; CHECK-CODEGEN-NEXT: mov w9, #6 +; CHECK-CODEGEN-NEXT: cmp x8, #6 // =6 +; CHECK-CODEGEN-NEXT: csel x9, x8, x9, lo +; CHECK-CODEGEN-NEXT: lsl x9, x9, #3 +; CHECK-CODEGEN-NEXT: st1d { z1.d }, p0, [sp, #2, mul vl] +; CHECK-CODEGEN-NEXT: str q0, [x10, x9] +; CHECK-CODEGEN-NEXT: addvl x10, sp, #3 +; CHECK-CODEGEN-NEXT: ld1d { z1.d }, p0/z, [sp, #2, mul vl] +; CHECK-CODEGEN-NEXT: mov w9, #8 +; CHECK-CODEGEN-NEXT: cmp x8, #8 // =8 +; CHECK-CODEGEN-NEXT: csel x9, x8, x9, lo +; CHECK-CODEGEN-NEXT: lsl x9, x9, #3 +; CHECK-CODEGEN-NEXT: st1d { z1.d }, p0, [sp, #3, mul vl] +; CHECK-CODEGEN-NEXT: str q0, [x10, x9] +; CHECK-CODEGEN-NEXT: addvl x10, sp, #4 +; CHECK-CODEGEN-NEXT: ld1d { z1.d }, p0/z, [sp, #3, mul vl] +; CHECK-CODEGEN-NEXT: mov w9, #10 +; CHECK-CODEGEN-NEXT: cmp x8, #10 // =10 +; CHECK-CODEGEN-NEXT: csel x9, x8, x9, lo +; CHECK-CODEGEN-NEXT: lsl x9, x9, #3 +; CHECK-CODEGEN-NEXT: st1d { z1.d }, p0, [sp, #4, mul vl] +; CHECK-CODEGEN-NEXT: str q0, [x10, x9] +; CHECK-CODEGEN-NEXT: addvl x10, sp, #5 +; CHECK-CODEGEN-NEXT: ld1d { z1.d }, p0/z, [sp, #4, mul vl] +; CHECK-CODEGEN-NEXT: mov w9, #12 +; CHECK-CODEGEN-NEXT: cmp x8, #12 // =12 +; CHECK-CODEGEN-NEXT: csel x9, x8, x9, lo +; CHECK-CODEGEN-NEXT: lsl x9, x9, #3 +; CHECK-CODEGEN-NEXT: st1d { z1.d }, p0, [sp, #5, mul vl] +; CHECK-CODEGEN-NEXT: str q0, [x10, x9] +; CHECK-CODEGEN-NEXT: addvl x10, sp, #6 +; CHECK-CODEGEN-NEXT: ld1d { z1.d }, p0/z, [sp, #5, mul vl] +; CHECK-CODEGEN-NEXT: mov w9, #14 +; CHECK-CODEGEN-NEXT: cmp x8, #14 // =14 +; CHECK-CODEGEN-NEXT: csel x9, x8, x9, lo +; CHECK-CODEGEN-NEXT: lsl x9, x9, #3 +; CHECK-CODEGEN-NEXT: st1d { z1.d }, p0, [sp, #6, mul vl] +; CHECK-CODEGEN-NEXT: str q0, [x10, x9] +; CHECK-CODEGEN-NEXT: addvl x10, sp, #7 +; CHECK-CODEGEN-NEXT: ld1d { z1.d }, p0/z, [sp, #6, mul vl] +; CHECK-CODEGEN-NEXT: mov w9, #16 +; CHECK-CODEGEN-NEXT: cmp x8, #16 // =16 +; CHECK-CODEGEN-NEXT: csel x9, x8, x9, lo +; CHECK-CODEGEN-NEXT: lsl x9, x9, #3 +; CHECK-CODEGEN-NEXT: st1d { z1.d }, p0, [sp, #7, mul vl] +; CHECK-CODEGEN-NEXT: str q0, [x10, x9] +; CHECK-CODEGEN-NEXT: addvl x10, sp, #8 +; CHECK-CODEGEN-NEXT: ld1d { z1.d }, p0/z, [sp, #7, mul vl] +; CHECK-CODEGEN-NEXT: mov w9, #18 +; CHECK-CODEGEN-NEXT: cmp x8, #18 // =18 +; CHECK-CODEGEN-NEXT: csel x9, x8, x9, lo +; CHECK-CODEGEN-NEXT: lsl x9, x9, #3 +; CHECK-CODEGEN-NEXT: addvl x11, sp, #1 +; CHECK-CODEGEN-NEXT: st1d { z1.d }, p0, [x11, #7, mul vl] +; CHECK-CODEGEN-NEXT: str q0, [x10, x9] +; CHECK-CODEGEN-NEXT: addvl x10, sp, #9 +; CHECK-CODEGEN-NEXT: addvl x11, sp, #1 +; CHECK-CODEGEN-NEXT: ld1d { z1.d }, p0/z, [x11, #7, mul vl] +; CHECK-CODEGEN-NEXT: mov w9, #20 +; CHECK-CODEGEN-NEXT: cmp x8, #20 // =20 +; CHECK-CODEGEN-NEXT: csel x9, x8, x9, lo +; CHECK-CODEGEN-NEXT: lsl x9, x9, #3 +; CHECK-CODEGEN-NEXT: addvl x11, sp, #2 +; CHECK-CODEGEN-NEXT: st1d { z1.d }, p0, [x11, #7, mul vl] +; CHECK-CODEGEN-NEXT: str q0, [x10, x9] +; CHECK-CODEGEN-NEXT: addvl x10, sp, #10 +; CHECK-CODEGEN-NEXT: addvl x11, sp, #2 +; CHECK-CODEGEN-NEXT: ld1d { z1.d }, p0/z, [x11, #7, mul vl] +; CHECK-CODEGEN-NEXT: mov w9, #22 +; CHECK-CODEGEN-NEXT: cmp x8, #22 // =22 +; CHECK-CODEGEN-NEXT: csel x9, x8, x9, lo +; CHECK-CODEGEN-NEXT: lsl x9, x9, #3 +; CHECK-CODEGEN-NEXT: addvl x11, sp, #3 +; CHECK-CODEGEN-NEXT: st1d { z1.d }, p0, [x11, #7, mul vl] +; CHECK-CODEGEN-NEXT: str q0, [x10, x9] +; CHECK-CODEGEN-NEXT: addvl x10, sp, #11 +; CHECK-CODEGEN-NEXT: addvl x11, sp, #3 +; CHECK-CODEGEN-NEXT: ld1d { z1.d }, p0/z, [x11, #7, mul vl] +; CHECK-CODEGEN-NEXT: mov w9, #24 +; CHECK-CODEGEN-NEXT: cmp x8, #24 // =24 +; CHECK-CODEGEN-NEXT: csel x9, x8, x9, lo +; CHECK-CODEGEN-NEXT: lsl x9, x9, #3 +; CHECK-CODEGEN-NEXT: addvl x11, sp, #4 +; CHECK-CODEGEN-NEXT: st1d { z1.d }, p0, [x11, #7, mul vl] +; CHECK-CODEGEN-NEXT: str q0, [x10, x9] +; CHECK-CODEGEN-NEXT: addvl x10, sp, #12 +; CHECK-CODEGEN-NEXT: addvl x11, sp, #4 +; CHECK-CODEGEN-NEXT: ld1d { z1.d }, p0/z, [x11, #7, mul vl] +; CHECK-CODEGEN-NEXT: mov w9, #26 +; CHECK-CODEGEN-NEXT: cmp x8, #26 // =26 +; CHECK-CODEGEN-NEXT: csel x9, x8, x9, lo +; CHECK-CODEGEN-NEXT: lsl x9, x9, #3 +; CHECK-CODEGEN-NEXT: addvl x11, sp, #5 +; CHECK-CODEGEN-NEXT: st1d { z1.d }, p0, [x11, #7, mul vl] +; CHECK-CODEGEN-NEXT: str q0, [x10, x9] +; CHECK-CODEGEN-NEXT: addvl x10, sp, #13 +; CHECK-CODEGEN-NEXT: addvl x11, sp, #5 +; CHECK-CODEGEN-NEXT: ld1d { z1.d }, p0/z, [x11, #7, mul vl] +; CHECK-CODEGEN-NEXT: mov w9, #28 +; CHECK-CODEGEN-NEXT: cmp x8, #28 // =28 +; CHECK-CODEGEN-NEXT: csel x9, x8, x9, lo +; CHECK-CODEGEN-NEXT: lsl x9, x9, #3 +; CHECK-CODEGEN-NEXT: addvl x11, sp, #6 +; CHECK-CODEGEN-NEXT: st1d { z1.d }, p0, [x11, #7, mul vl] +; CHECK-CODEGEN-NEXT: str q0, [x10, x9] +; CHECK-CODEGEN-NEXT: addvl x10, sp, #14 +; CHECK-CODEGEN-NEXT: addvl x11, sp, #6 +; CHECK-CODEGEN-NEXT: ld1d { z1.d }, p0/z, [x11, #7, mul vl] +; CHECK-CODEGEN-NEXT: mov w9, #30 +; CHECK-CODEGEN-NEXT: cmp x8, #30 // =30 +; CHECK-CODEGEN-NEXT: csel x8, x8, x9, lo +; CHECK-CODEGEN-NEXT: lsl x8, x8, #3 +; CHECK-CODEGEN-NEXT: addvl x9, sp, #7 +; CHECK-CODEGEN-NEXT: st1d { z1.d }, p0, [x9, #7, mul vl] +; CHECK-CODEGEN-NEXT: str q0, [x10, x8] +; CHECK-CODEGEN-NEXT: addvl x8, sp, #7 +; CHECK-CODEGEN-NEXT: ld1d { z1.d }, p0/z, [x8, #7, mul vl] +; CHECK-CODEGEN-NEXT: bl do_something_d +; CHECK-CODEGEN-NEXT: addvl sp, sp, #15 +; CHECK-CODEGEN-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-CODEGEN-NEXT: ret +; CHECK-LEGALIZATION: Legally typed node: [[T44:t[0-9]+]]: nxv2f64 = insert_subvector [[T43]], [[T29]], Constant:i64<30> + %castScalableSve = call @llvm.experimental.vector.insert.nxv2f64.v32f64( undef, <32 x double> zeroinitializer, i64 0) + call void @do_something_d( undef, %castScalableSve) + ret void +}