Index: llvm/test/CodeGen/AArch64/sve-insert-vector.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-insert-vector.ll +++ llvm/test/CodeGen/AArch64/sve-insert-vector.ll @@ -129,7 +129,130 @@ ret %retval } + +; Insert subvectors into illegal vectors + +define void @insert_nxv8i64_nxv16i64( %sv0, %sv1, * %out) { +; CHECK-LABEL: insert_nxv8i64_nxv16i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: st1d { z7.d }, p0, [x0, #7, mul vl] +; CHECK-NEXT: st1d { z6.d }, p0, [x0, #6, mul vl] +; CHECK-NEXT: st1d { z5.d }, p0, [x0, #5, mul vl] +; CHECK-NEXT: st1d { z4.d }, p0, [x0, #4, mul vl] +; CHECK-NEXT: st1d { z3.d }, p0, [x0, #3, mul vl] +; CHECK-NEXT: st1d { z2.d }, p0, [x0, #2, mul vl] +; CHECK-NEXT: st1d { z1.d }, p0, [x0, #1, mul vl] +; CHECK-NEXT: st1d { z0.d }, p0, [x0] +; CHECK-NEXT: ret + %v0 = call @llvm.experimental.vector.insert.nxv8i64.nxv16i64( undef, %sv0, i64 0) + %v = call @llvm.experimental.vector.insert.nxv8i64.nxv16i64( %v0, %sv1, i64 8) + store %v, * %out + ret void +} + +define void @insert_nxv8i64_nxv16i64_lo( %sv0, * %out) { +; CHECK-LABEL: insert_nxv8i64_nxv16i64_lo: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: st1d { z3.d }, p0, [x0, #3, mul vl] +; CHECK-NEXT: st1d { z2.d }, p0, [x0, #2, mul vl] +; CHECK-NEXT: st1d { z1.d }, p0, [x0, #1, mul vl] +; CHECK-NEXT: st1d { z0.d }, p0, [x0] +; CHECK-NEXT: ret + %v = call @llvm.experimental.vector.insert.nxv8i64.nxv16i64( undef, %sv0, i64 0) + store %v, * %out + ret void +} + +define void @insert_nxv8i64_nxv16i64_hi( %sv0, * %out) { +; CHECK-LABEL: insert_nxv8i64_nxv16i64_hi: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: st1d { z3.d }, p0, [x0, #7, mul vl] +; CHECK-NEXT: st1d { z2.d }, p0, [x0, #6, mul vl] +; CHECK-NEXT: st1d { z1.d }, p0, [x0, #5, mul vl] +; CHECK-NEXT: st1d { z0.d }, p0, [x0, #4, mul vl] +; CHECK-NEXT: ret + %v = call @llvm.experimental.vector.insert.nxv8i64.nxv16i64( undef, %sv0, i64 8) + store %v, * %out + ret void +} + +define void @insert_v2i64_nxv16i64(<2 x i64>* %psv0, <2 x i64>* %psv1, * %out) { +; CHECK-LABEL: insert_v2i64_nxv16i64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-4 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: st1d { z0.d }, p0, [sp] +; CHECK-NEXT: str q1, [sp, #32] +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, #1, mul vl] +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x8, #2, mul vl] +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x8, #3, mul vl] +; CHECK-NEXT: ld1d { z3.d }, p0/z, [sp] +; CHECK-NEXT: st1d { z2.d }, p0, [x2, #3, mul vl] +; CHECK-NEXT: st1d { z1.d }, p0, [x2, #2, mul vl] +; CHECK-NEXT: st1d { z0.d }, p0, [x2, #1, mul vl] +; CHECK-NEXT: st1d { z3.d }, p0, [x2] +; CHECK-NEXT: addvl sp, sp, #4 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %sv0 = load <2 x i64>, <2 x i64>* %psv0 + %sv1 = load <2 x i64>, <2 x i64>* %psv1 + %v0 = call @llvm.experimental.vector.insert.v2i64.nxv16i64( undef, <2 x i64> %sv0, i64 0) + %v = call @llvm.experimental.vector.insert.v2i64.nxv16i64( %v0, <2 x i64> %sv1, i64 4) + store %v, * %out + ret void +} + +define void @insert_v2i64_nxv16i64_lo0(<2 x i64>* %psv, * %out) { +; CHECK-LABEL: insert_v2i64_nxv16i64_lo0: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: st1d { z0.d }, p0, [x1] +; CHECK-NEXT: ret + %sv = load <2 x i64>, <2 x i64>* %psv + %v = call @llvm.experimental.vector.insert.v2i64.nxv16i64( undef, <2 x i64> %sv, i64 0) + store %v, * %out + ret void +} + +define void @insert_v2i64_nxv16i64_lo2(<2 x i64>* %psv, * %out) { +; CHECK-LABEL: insert_v2i64_nxv16i64_lo2: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: str q0, [sp, #16] +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, #1, mul vl] +; CHECK-NEXT: ld1d { z1.d }, p0/z, [sp] +; CHECK-NEXT: st1d { z0.d }, p0, [x1, #1, mul vl] +; CHECK-NEXT: st1d { z1.d }, p0, [x1] +; CHECK-NEXT: addvl sp, sp, #2 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %sv = load <2 x i64>, <2 x i64>* %psv + %v = call @llvm.experimental.vector.insert.v2i64.nxv16i64( undef, <2 x i64> %sv, i64 2) + store %v, * %out + ret void +} + + declare @llvm.experimental.vector.insert.nxv2i64.v2i64(, <2 x i64>, i64) declare @llvm.experimental.vector.insert.nxv4i32.v4i32(, <4 x i32>, i64) declare @llvm.experimental.vector.insert.nxv8i16.v8i16(, <8 x i16>, i64) declare @llvm.experimental.vector.insert.nxv16i8.v16i8(, <16 x i8>, i64) + +declare @llvm.experimental.vector.insert.nxv8i64.nxv16i64(, , i64) +declare @llvm.experimental.vector.insert.v2i64.nxv16i64(, <2 x i64>, i64)